diff options
author | nkozlovskiy <nmk@ydb.tech> | 2023-09-29 12:24:06 +0300 |
---|---|---|
committer | nkozlovskiy <nmk@ydb.tech> | 2023-09-29 12:41:34 +0300 |
commit | e0e3e1717e3d33762ce61950504f9637a6e669ed (patch) | |
tree | bca3ff6939b10ed60c3d5c12439963a1146b9711 /contrib/tools/python/src/Objects/stringlib | |
parent | 38f2c5852db84c7b4d83adfcb009eb61541d1ccd (diff) | |
download | ydb-e0e3e1717e3d33762ce61950504f9637a6e669ed.tar.gz |
add ydb deps
Diffstat (limited to 'contrib/tools/python/src/Objects/stringlib')
-rw-r--r-- | contrib/tools/python/src/Objects/stringlib/count.h | 30 | ||||
-rw-r--r-- | contrib/tools/python/src/Objects/stringlib/ctype.h | 109 | ||||
-rw-r--r-- | contrib/tools/python/src/Objects/stringlib/fastsearch.h | 160 | ||||
-rw-r--r-- | contrib/tools/python/src/Objects/stringlib/find.h | 175 | ||||
-rw-r--r-- | contrib/tools/python/src/Objects/stringlib/formatter.h | 1547 | ||||
-rw-r--r-- | contrib/tools/python/src/Objects/stringlib/localeutil.h | 212 | ||||
-rw-r--r-- | contrib/tools/python/src/Objects/stringlib/partition.h | 110 | ||||
-rw-r--r-- | contrib/tools/python/src/Objects/stringlib/split.h | 394 | ||||
-rw-r--r-- | contrib/tools/python/src/Objects/stringlib/string_format.h | 1361 | ||||
-rw-r--r-- | contrib/tools/python/src/Objects/stringlib/stringdefs.h | 33 | ||||
-rw-r--r-- | contrib/tools/python/src/Objects/stringlib/transmogrify.h | 264 | ||||
-rw-r--r-- | contrib/tools/python/src/Objects/stringlib/unicodedefs.h | 37 |
12 files changed, 4432 insertions, 0 deletions
diff --git a/contrib/tools/python/src/Objects/stringlib/count.h b/contrib/tools/python/src/Objects/stringlib/count.h new file mode 100644 index 0000000000..de34f96b3e --- /dev/null +++ b/contrib/tools/python/src/Objects/stringlib/count.h @@ -0,0 +1,30 @@ +/* stringlib: count implementation */ + +#ifndef STRINGLIB_COUNT_H +#define STRINGLIB_COUNT_H + +#ifndef STRINGLIB_FASTSEARCH_H +#error must include "stringlib/fastsearch.h" before including this module +#endif + +Py_LOCAL_INLINE(Py_ssize_t) +stringlib_count(const STRINGLIB_CHAR* str, Py_ssize_t str_len, + const STRINGLIB_CHAR* sub, Py_ssize_t sub_len, + Py_ssize_t maxcount) +{ + Py_ssize_t count; + + if (str_len < 0) + return 0; /* start > len(str) */ + if (sub_len == 0) + return (str_len < maxcount) ? str_len + 1 : maxcount; + + count = fastsearch(str, str_len, sub, sub_len, maxcount, FAST_COUNT); + + if (count < 0) + return 0; /* no match */ + + return count; +} + +#endif diff --git a/contrib/tools/python/src/Objects/stringlib/ctype.h b/contrib/tools/python/src/Objects/stringlib/ctype.h new file mode 100644 index 0000000000..739cf3d9eb --- /dev/null +++ b/contrib/tools/python/src/Objects/stringlib/ctype.h @@ -0,0 +1,109 @@ +/* NOTE: this API is -ONLY- for use with single byte character strings. */ +/* Do not use it with Unicode. */ + +#include "bytes_methods.h" + +static PyObject* +stringlib_isspace(PyObject *self) +{ + return _Py_bytes_isspace(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +} + +static PyObject* +stringlib_isalpha(PyObject *self) +{ + return _Py_bytes_isalpha(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +} + +static PyObject* +stringlib_isalnum(PyObject *self) +{ + return _Py_bytes_isalnum(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +} + +static PyObject* +stringlib_isdigit(PyObject *self) +{ + return _Py_bytes_isdigit(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +} + +static PyObject* +stringlib_islower(PyObject *self) +{ + return _Py_bytes_islower(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +} + +static PyObject* +stringlib_isupper(PyObject *self) +{ + return _Py_bytes_isupper(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +} + +static PyObject* +stringlib_istitle(PyObject *self) +{ + return _Py_bytes_istitle(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +} + + +/* functions that return a new object partially translated by ctype funcs: */ + +static PyObject* +stringlib_lower(PyObject *self) +{ + PyObject* newobj; + newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self)); + if (!newobj) + return NULL; + _Py_bytes_lower(STRINGLIB_STR(newobj), STRINGLIB_STR(self), + STRINGLIB_LEN(self)); + return newobj; +} + +static PyObject* +stringlib_upper(PyObject *self) +{ + PyObject* newobj; + newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self)); + if (!newobj) + return NULL; + _Py_bytes_upper(STRINGLIB_STR(newobj), STRINGLIB_STR(self), + STRINGLIB_LEN(self)); + return newobj; +} + +static PyObject* +stringlib_title(PyObject *self) +{ + PyObject* newobj; + newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self)); + if (!newobj) + return NULL; + _Py_bytes_title(STRINGLIB_STR(newobj), STRINGLIB_STR(self), + STRINGLIB_LEN(self)); + return newobj; +} + +static PyObject* +stringlib_capitalize(PyObject *self) +{ + PyObject* newobj; + newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self)); + if (!newobj) + return NULL; + _Py_bytes_capitalize(STRINGLIB_STR(newobj), STRINGLIB_STR(self), + STRINGLIB_LEN(self)); + return newobj; +} + +static PyObject* +stringlib_swapcase(PyObject *self) +{ + PyObject* newobj; + newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self)); + if (!newobj) + return NULL; + _Py_bytes_swapcase(STRINGLIB_STR(newobj), STRINGLIB_STR(self), + STRINGLIB_LEN(self)); + return newobj; +} diff --git a/contrib/tools/python/src/Objects/stringlib/fastsearch.h b/contrib/tools/python/src/Objects/stringlib/fastsearch.h new file mode 100644 index 0000000000..e231c587e4 --- /dev/null +++ b/contrib/tools/python/src/Objects/stringlib/fastsearch.h @@ -0,0 +1,160 @@ +/* stringlib: fastsearch implementation */ + +#ifndef STRINGLIB_FASTSEARCH_H +#define STRINGLIB_FASTSEARCH_H + +/* fast search/count implementation, based on a mix between boyer- + moore and horspool, with a few more bells and whistles on the top. + for some more background, see: http://effbot.org/zone/stringlib.htm */ + +/* note: fastsearch may access s[n], which isn't a problem when using + Python's ordinary string types, but may cause problems if you're + using this code in other contexts. also, the count mode returns -1 + if there cannot possible be a match in the target string, and 0 if + it has actually checked for matches, but didn't find any. callers + beware! */ + +#define FAST_COUNT 0 +#define FAST_SEARCH 1 +#define FAST_RSEARCH 2 + +#if LONG_BIT >= 128 +#define STRINGLIB_BLOOM_WIDTH 128 +#elif LONG_BIT >= 64 +#define STRINGLIB_BLOOM_WIDTH 64 +#elif LONG_BIT >= 32 +#define STRINGLIB_BLOOM_WIDTH 32 +#else +#error "LONG_BIT is smaller than 32" +#endif + +#define STRINGLIB_BLOOM_ADD(mask, ch) \ + ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1))))) +#define STRINGLIB_BLOOM(mask, ch) \ + ((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1))))) + +Py_LOCAL_INLINE(Py_ssize_t) +fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n, + const STRINGLIB_CHAR* p, Py_ssize_t m, + Py_ssize_t maxcount, int mode) +{ + unsigned long mask; + Py_ssize_t skip, count = 0; + Py_ssize_t i, j, mlast, w; + + w = n - m; + + if (w < 0 || (mode == FAST_COUNT && maxcount == 0)) + return -1; + + /* look for special cases */ + if (m <= 1) { + if (m <= 0) + return -1; + /* use special case for 1-character strings */ + if (mode == FAST_COUNT) { + for (i = 0; i < n; i++) + if (s[i] == p[0]) { + count++; + if (count == maxcount) + return maxcount; + } + return count; + } else if (mode == FAST_SEARCH) { + for (i = 0; i < n; i++) + if (s[i] == p[0]) + return i; + } else { /* FAST_RSEARCH */ + for (i = n - 1; i > -1; i--) + if (s[i] == p[0]) + return i; + } + return -1; + } + + mlast = m - 1; + skip = mlast - 1; + mask = 0; + + if (mode != FAST_RSEARCH) { + + /* create compressed boyer-moore delta 1 table */ + + /* process pattern[:-1] */ + for (i = 0; i < mlast; i++) { + STRINGLIB_BLOOM_ADD(mask, p[i]); + if (p[i] == p[mlast]) + skip = mlast - i - 1; + } + /* process pattern[-1] outside the loop */ + STRINGLIB_BLOOM_ADD(mask, p[mlast]); + + for (i = 0; i <= w; i++) { + /* note: using mlast in the skip path slows things down on x86 */ + if (s[i+m-1] == p[m-1]) { + /* candidate match */ + for (j = 0; j < mlast; j++) + if (s[i+j] != p[j]) + break; + if (j == mlast) { + /* got a match! */ + if (mode != FAST_COUNT) + return i; + count++; + if (count == maxcount) + return maxcount; + i = i + mlast; + continue; + } + /* miss: check if next character is part of pattern */ + if (!STRINGLIB_BLOOM(mask, s[i+m])) + i = i + m; + else + i = i + skip; + } else { + /* skip: check if next character is part of pattern */ + if (!STRINGLIB_BLOOM(mask, s[i+m])) + i = i + m; + } + } + } else { /* FAST_RSEARCH */ + + /* create compressed boyer-moore delta 1 table */ + + /* process pattern[0] outside the loop */ + STRINGLIB_BLOOM_ADD(mask, p[0]); + /* process pattern[:0:-1] */ + for (i = mlast; i > 0; i--) { + STRINGLIB_BLOOM_ADD(mask, p[i]); + if (p[i] == p[0]) + skip = i - 1; + } + + for (i = w; i >= 0; i--) { + if (s[i] == p[0]) { + /* candidate match */ + for (j = mlast; j > 0; j--) + if (s[i+j] != p[j]) + break; + if (j == 0) + /* got a match! */ + return i; + /* miss: check if previous character is part of pattern */ + if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) + i = i - m; + else + i = i - skip; + } else { + /* skip: check if previous character is part of pattern */ + if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) + i = i - m; + } + } + } + + if (mode != FAST_COUNT) + return -1; + return count; +} + +#endif diff --git a/contrib/tools/python/src/Objects/stringlib/find.h b/contrib/tools/python/src/Objects/stringlib/find.h new file mode 100644 index 0000000000..ce615dcb8a --- /dev/null +++ b/contrib/tools/python/src/Objects/stringlib/find.h @@ -0,0 +1,175 @@ +/* stringlib: find/index implementation */ + +#ifndef STRINGLIB_FIND_H +#define STRINGLIB_FIND_H + +#ifndef STRINGLIB_FASTSEARCH_H +#error must include "stringlib/fastsearch.h" before including this module +#endif + +Py_LOCAL_INLINE(Py_ssize_t) +stringlib_find(const STRINGLIB_CHAR* str, Py_ssize_t str_len, + const STRINGLIB_CHAR* sub, Py_ssize_t sub_len, + Py_ssize_t offset) +{ + Py_ssize_t pos; + + if (str_len < 0) + return -1; + if (sub_len == 0) + return offset; + + pos = fastsearch(str, str_len, sub, sub_len, -1, FAST_SEARCH); + + if (pos >= 0) + pos += offset; + + return pos; +} + +Py_LOCAL_INLINE(Py_ssize_t) +stringlib_rfind(const STRINGLIB_CHAR* str, Py_ssize_t str_len, + const STRINGLIB_CHAR* sub, Py_ssize_t sub_len, + Py_ssize_t offset) +{ + Py_ssize_t pos; + + if (str_len < 0) + return -1; + if (sub_len == 0) + return str_len + offset; + + pos = fastsearch(str, str_len, sub, sub_len, -1, FAST_RSEARCH); + + if (pos >= 0) + pos += offset; + + return pos; +} + +/* helper macro to fixup start/end slice values */ +#define ADJUST_INDICES(start, end, len) \ + if (end > len) \ + end = len; \ + else if (end < 0) { \ + end += len; \ + if (end < 0) \ + end = 0; \ + } \ + if (start < 0) { \ + start += len; \ + if (start < 0) \ + start = 0; \ + } + +Py_LOCAL_INLINE(Py_ssize_t) +stringlib_find_slice(const STRINGLIB_CHAR* str, Py_ssize_t str_len, + const STRINGLIB_CHAR* sub, Py_ssize_t sub_len, + Py_ssize_t start, Py_ssize_t end) +{ + ADJUST_INDICES(start, end, str_len); + return stringlib_find(str + start, end - start, sub, sub_len, start); +} + +Py_LOCAL_INLINE(Py_ssize_t) +stringlib_rfind_slice(const STRINGLIB_CHAR* str, Py_ssize_t str_len, + const STRINGLIB_CHAR* sub, Py_ssize_t sub_len, + Py_ssize_t start, Py_ssize_t end) +{ + ADJUST_INDICES(start, end, str_len); + return stringlib_rfind(str + start, end - start, sub, sub_len, start); +} + +#ifdef STRINGLIB_WANT_CONTAINS_OBJ + +Py_LOCAL_INLINE(int) +stringlib_contains_obj(PyObject* str, PyObject* sub) +{ + return stringlib_find( + STRINGLIB_STR(str), STRINGLIB_LEN(str), + STRINGLIB_STR(sub), STRINGLIB_LEN(sub), 0 + ) != -1; +} + +#endif /* STRINGLIB_WANT_CONTAINS_OBJ */ + +/* +This function is a helper for the "find" family (find, rfind, index, +rindex) and for count, startswith and endswith, because they all have +the same behaviour for the arguments. + +It does not touch the variables received until it knows everything +is ok. +*/ + +#define FORMAT_BUFFER_SIZE 50 + +Py_LOCAL_INLINE(int) +stringlib_parse_args_finds(const char * function_name, PyObject *args, + PyObject **subobj, + Py_ssize_t *start, Py_ssize_t *end) +{ + PyObject *tmp_subobj; + Py_ssize_t tmp_start = 0; + Py_ssize_t tmp_end = PY_SSIZE_T_MAX; + PyObject *obj_start=Py_None, *obj_end=Py_None; + char format[FORMAT_BUFFER_SIZE] = "O|OO:"; + size_t len = strlen(format); + + strncpy(format + len, function_name, FORMAT_BUFFER_SIZE - len - 1); + format[FORMAT_BUFFER_SIZE - 1] = '\0'; + + if (!PyArg_ParseTuple(args, format, &tmp_subobj, &obj_start, &obj_end)) + return 0; + + /* To support None in "start" and "end" arguments, meaning + the same as if they were not passed. + */ + if (obj_start != Py_None) + if (!_PyEval_SliceIndex(obj_start, &tmp_start)) + return 0; + if (obj_end != Py_None) + if (!_PyEval_SliceIndex(obj_end, &tmp_end)) + return 0; + + *start = tmp_start; + *end = tmp_end; + *subobj = tmp_subobj; + return 1; +} + +#undef FORMAT_BUFFER_SIZE + +#if STRINGLIB_IS_UNICODE + +/* +Wraps stringlib_parse_args_finds() and additionally ensures that the +first argument is a unicode object. + +Note that we receive a pointer to the pointer of the substring object, +so when we create that object in this function we don't DECREF it, +because it continues living in the caller functions (those functions, +after finishing using the substring, must DECREF it). +*/ + +Py_LOCAL_INLINE(int) +stringlib_parse_args_finds_unicode(const char * function_name, PyObject *args, + PyUnicodeObject **substring, + Py_ssize_t *start, Py_ssize_t *end) +{ + PyObject *tmp_substring; + + if(stringlib_parse_args_finds(function_name, args, &tmp_substring, + start, end)) { + tmp_substring = PyUnicode_FromObject(tmp_substring); + if (!tmp_substring) + return 0; + *substring = (PyUnicodeObject *)tmp_substring; + return 1; + } + return 0; +} + +#endif /* STRINGLIB_IS_UNICODE */ + +#endif /* STRINGLIB_FIND_H */ diff --git a/contrib/tools/python/src/Objects/stringlib/formatter.h b/contrib/tools/python/src/Objects/stringlib/formatter.h new file mode 100644 index 0000000000..70f574c030 --- /dev/null +++ b/contrib/tools/python/src/Objects/stringlib/formatter.h @@ -0,0 +1,1547 @@ +/* implements the string, long, and float formatters. that is, + string.__format__, etc. */ + +#include <locale.h> + +/* Before including this, you must include either: + stringlib/unicodedefs.h + stringlib/stringdefs.h + + Also, you should define the names: + FORMAT_STRING + FORMAT_LONG + FORMAT_FLOAT + FORMAT_COMPLEX + to be whatever you want the public names of these functions to + be. These are the only non-static functions defined here. +*/ + +/* Raises an exception about an unknown presentation type for this + * type. */ + +static void +unknown_presentation_type(STRINGLIB_CHAR presentation_type, + const char* type_name) +{ +#if STRINGLIB_IS_UNICODE + /* If STRINGLIB_CHAR is Py_UNICODE, %c might be out-of-range, + hence the two cases. If it is char, gcc complains that the + condition below is always true, hence the ifdef. */ + if (presentation_type > 32 && presentation_type < 128) +#endif + PyErr_Format(PyExc_ValueError, + "Unknown format code '%c' " + "for object of type '%.200s'", + (char)presentation_type, + type_name); +#if STRINGLIB_IS_UNICODE + else + PyErr_Format(PyExc_ValueError, + "Unknown format code '\\x%x' " + "for object of type '%.200s'", + (unsigned int)presentation_type, + type_name); +#endif +} + +static void +invalid_comma_type(STRINGLIB_CHAR presentation_type) +{ +#if STRINGLIB_IS_UNICODE + /* See comment in unknown_presentation_type */ + if (presentation_type > 32 && presentation_type < 128) +#endif + PyErr_Format(PyExc_ValueError, + "Cannot specify ',' with '%c'.", + (char)presentation_type); +#if STRINGLIB_IS_UNICODE + else + PyErr_Format(PyExc_ValueError, + "Cannot specify ',' with '\\x%x'.", + (unsigned int)presentation_type); +#endif +} + +/* + get_integer consumes 0 or more decimal digit characters from an + input string, updates *result with the corresponding positive + integer, and returns the number of digits consumed. + + returns -1 on error. +*/ +static int +get_integer(STRINGLIB_CHAR **ptr, STRINGLIB_CHAR *end, + Py_ssize_t *result) +{ + Py_ssize_t accumulator, digitval; + int numdigits; + accumulator = numdigits = 0; + for (;;(*ptr)++, numdigits++) { + if (*ptr >= end) + break; + digitval = STRINGLIB_TODECIMAL(**ptr); + if (digitval < 0) + break; + /* + Detect possible overflow before it happens: + + accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if + accumulator > (PY_SSIZE_T_MAX - digitval) / 10. + */ + if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) { + PyErr_Format(PyExc_ValueError, + "Too many decimal digits in format string"); + return -1; + } + accumulator = accumulator * 10 + digitval; + } + *result = accumulator; + return numdigits; +} + +/************************************************************************/ +/*********** standard format specifier parsing **************************/ +/************************************************************************/ + +/* returns true if this character is a specifier alignment token */ +Py_LOCAL_INLINE(int) +is_alignment_token(STRINGLIB_CHAR c) +{ + switch (c) { + case '<': case '>': case '=': case '^': + return 1; + default: + return 0; + } +} + +/* returns true if this character is a sign element */ +Py_LOCAL_INLINE(int) +is_sign_element(STRINGLIB_CHAR c) +{ + switch (c) { + case ' ': case '+': case '-': + return 1; + default: + return 0; + } +} + + +typedef struct { + STRINGLIB_CHAR fill_char; + STRINGLIB_CHAR align; + int alternate; + STRINGLIB_CHAR sign; + Py_ssize_t width; + int thousands_separators; + Py_ssize_t precision; + STRINGLIB_CHAR type; +} InternalFormatSpec; + + +#if 0 +/* Occasionally useful for debugging. Should normally be commented out. */ +static void +DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format) +{ + printf("internal format spec: fill_char %d\n", format->fill_char); + printf("internal format spec: align %d\n", format->align); + printf("internal format spec: alternate %d\n", format->alternate); + printf("internal format spec: sign %d\n", format->sign); + printf("internal format spec: width %zd\n", format->width); + printf("internal format spec: thousands_separators %d\n", + format->thousands_separators); + printf("internal format spec: precision %zd\n", format->precision); + printf("internal format spec: type %c\n", format->type); + printf("\n"); +} +#endif + + +/* + ptr points to the start of the format_spec, end points just past its end. + fills in format with the parsed information. + returns 1 on success, 0 on failure. + if failure, sets the exception +*/ +static int +parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec, + Py_ssize_t format_spec_len, + InternalFormatSpec *format, + char default_type, + char default_align) +{ + STRINGLIB_CHAR *ptr = format_spec; + STRINGLIB_CHAR *end = format_spec + format_spec_len; + + /* end-ptr is used throughout this code to specify the length of + the input string */ + + Py_ssize_t consumed; + int align_specified = 0; + int fill_char_specified = 0; + + format->fill_char = ' '; + format->align = default_align; + format->alternate = 0; + format->sign = '\0'; + format->width = -1; + format->thousands_separators = 0; + format->precision = -1; + format->type = default_type; + + /* If the second char is an alignment token, + then parse the fill char */ + if (end-ptr >= 2 && is_alignment_token(ptr[1])) { + format->align = ptr[1]; + format->fill_char = ptr[0]; + fill_char_specified = 1; + align_specified = 1; + ptr += 2; + } + else if (end-ptr >= 1 && is_alignment_token(ptr[0])) { + format->align = ptr[0]; + align_specified = 1; + ++ptr; + } + + /* Parse the various sign options */ + if (end-ptr >= 1 && is_sign_element(ptr[0])) { + format->sign = ptr[0]; + ++ptr; + } + + /* If the next character is #, we're in alternate mode. This only + applies to integers. */ + if (end-ptr >= 1 && ptr[0] == '#') { + format->alternate = 1; + ++ptr; + } + + /* The special case for 0-padding (backwards compat) */ + if (!fill_char_specified && end-ptr >= 1 && ptr[0] == '0') { + format->fill_char = '0'; + if (!align_specified) { + format->align = '='; + } + ++ptr; + } + + consumed = get_integer(&ptr, end, &format->width); + if (consumed == -1) + /* Overflow error. Exception already set. */ + return 0; + + /* If consumed is 0, we didn't consume any characters for the + width. In that case, reset the width to -1, because + get_integer() will have set it to zero. -1 is how we record + that the width wasn't specified. */ + if (consumed == 0) + format->width = -1; + + /* Comma signifies add thousands separators */ + if (end-ptr && ptr[0] == ',') { + format->thousands_separators = 1; + ++ptr; + } + + /* Parse field precision */ + if (end-ptr && ptr[0] == '.') { + ++ptr; + + consumed = get_integer(&ptr, end, &format->precision); + if (consumed == -1) + /* Overflow error. Exception already set. */ + return 0; + + /* Not having a precision after a dot is an error. */ + if (consumed == 0) { + PyErr_Format(PyExc_ValueError, + "Format specifier missing precision"); + return 0; + } + + } + + /* Finally, parse the type field. */ + + if (end-ptr > 1) { + /* More than one char remain, invalid conversion spec. */ + PyErr_Format(PyExc_ValueError, "Invalid conversion specification"); + return 0; + } + + if (end-ptr == 1) { + format->type = ptr[0]; + ++ptr; + } + + /* Do as much validating as we can, just by looking at the format + specifier. Do not take into account what type of formatting + we're doing (int, float, string). */ + + if (format->thousands_separators) { + switch (format->type) { + case 'd': + case 'e': + case 'f': + case 'g': + case 'E': + case 'G': + case '%': + case 'F': + case '\0': + /* These are allowed. See PEP 378.*/ + break; + default: + invalid_comma_type(format->type); + return 0; + } + } + + return 1; +} + +/* Calculate the padding needed. */ +static void +calc_padding(Py_ssize_t nchars, Py_ssize_t width, STRINGLIB_CHAR align, + Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding, + Py_ssize_t *n_total) +{ + if (width >= 0) { + if (nchars > width) + *n_total = nchars; + else + *n_total = width; + } + else { + /* not specified, use all of the chars and no more */ + *n_total = nchars; + } + + /* Figure out how much leading space we need, based on the + aligning */ + if (align == '>') + *n_lpadding = *n_total - nchars; + else if (align == '^') + *n_lpadding = (*n_total - nchars) / 2; + else if (align == '<' || align == '=') + *n_lpadding = 0; + else { + /* We should never have an unspecified alignment. */ + *n_lpadding = 0; + assert(0); + } + + *n_rpadding = *n_total - nchars - *n_lpadding; +} + +/* Do the padding, and return a pointer to where the caller-supplied + content goes. */ +static STRINGLIB_CHAR * +fill_padding(STRINGLIB_CHAR *p, Py_ssize_t nchars, STRINGLIB_CHAR fill_char, + Py_ssize_t n_lpadding, Py_ssize_t n_rpadding) +{ + /* Pad on left. */ + if (n_lpadding) + STRINGLIB_FILL(p, fill_char, n_lpadding); + + /* Pad on right. */ + if (n_rpadding) + STRINGLIB_FILL(p + nchars + n_lpadding, fill_char, n_rpadding); + + /* Pointer to the user content. */ + return p + n_lpadding; +} + +#if defined FORMAT_FLOAT || defined FORMAT_LONG || defined FORMAT_COMPLEX +/************************************************************************/ +/*********** common routines for numeric formatting *********************/ +/************************************************************************/ + +/* Locale type codes. */ +#define LT_CURRENT_LOCALE 0 +#define LT_DEFAULT_LOCALE 1 +#define LT_NO_LOCALE 2 + +/* Locale info needed for formatting integers and the part of floats + before and including the decimal. Note that locales only support + 8-bit chars, not unicode. */ +typedef struct { + char *decimal_point; + char *thousands_sep; + char *grouping; +} LocaleInfo; + +/* describes the layout for an integer, see the comment in + calc_number_widths() for details */ +typedef struct { + Py_ssize_t n_lpadding; + Py_ssize_t n_prefix; + Py_ssize_t n_spadding; + Py_ssize_t n_rpadding; + char sign; + Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */ + Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including + any grouping chars. */ + Py_ssize_t n_decimal; /* 0 if only an integer */ + Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part, + excluding the decimal itself, if + present. */ + + /* These 2 are not the widths of fields, but are needed by + STRINGLIB_GROUPING. */ + Py_ssize_t n_digits; /* The number of digits before a decimal + or exponent. */ + Py_ssize_t n_min_width; /* The min_width we used when we computed + the n_grouped_digits width. */ +} NumberFieldWidths; + + +/* Given a number of the form: + digits[remainder] + where ptr points to the start and end points to the end, find where + the integer part ends. This could be a decimal, an exponent, both, + or neither. + If a decimal point is present, set *has_decimal and increment + remainder beyond it. + Results are undefined (but shouldn't crash) for improperly + formatted strings. +*/ +static void +parse_number(STRINGLIB_CHAR *ptr, Py_ssize_t len, + Py_ssize_t *n_remainder, int *has_decimal) +{ + STRINGLIB_CHAR *end = ptr + len; + STRINGLIB_CHAR *remainder; + + while (ptr<end && isdigit(*ptr)) + ++ptr; + remainder = ptr; + + /* Does remainder start with a decimal point? */ + *has_decimal = ptr<end && *remainder == '.'; + + /* Skip the decimal point. */ + if (*has_decimal) + remainder++; + + *n_remainder = end - remainder; +} + +/* not all fields of format are used. for example, precision is + unused. should this take discrete params in order to be more clear + about what it does? or is passing a single format parameter easier + and more efficient enough to justify a little obfuscation? */ +static Py_ssize_t +calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix, + STRINGLIB_CHAR sign_char, STRINGLIB_CHAR *number, + Py_ssize_t n_number, Py_ssize_t n_remainder, + int has_decimal, const LocaleInfo *locale, + const InternalFormatSpec *format) +{ + Py_ssize_t n_non_digit_non_padding; + Py_ssize_t n_padding; + + spec->n_digits = n_number - n_remainder - (has_decimal?1:0); + spec->n_lpadding = 0; + spec->n_prefix = n_prefix; + spec->n_decimal = has_decimal ? strlen(locale->decimal_point) : 0; + spec->n_remainder = n_remainder; + spec->n_spadding = 0; + spec->n_rpadding = 0; + spec->sign = '\0'; + spec->n_sign = 0; + + /* the output will look like: + | | + | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> | + | | + + sign is computed from format->sign and the actual + sign of the number + + prefix is given (it's for the '0x' prefix) + + digits is already known + + the total width is either given, or computed from the + actual digits + + only one of lpadding, spadding, and rpadding can be non-zero, + and it's calculated from the width and other fields + */ + + /* compute the various parts we're going to write */ + switch (format->sign) { + case '+': + /* always put a + or - */ + spec->n_sign = 1; + spec->sign = (sign_char == '-' ? '-' : '+'); + break; + case ' ': + spec->n_sign = 1; + spec->sign = (sign_char == '-' ? '-' : ' '); + break; + default: + /* Not specified, or the default (-) */ + if (sign_char == '-') { + spec->n_sign = 1; + spec->sign = '-'; + } + } + + /* The number of chars used for non-digits and non-padding. */ + n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal + + spec->n_remainder; + + /* min_width can go negative, that's okay. format->width == -1 means + we don't care. */ + if (format->fill_char == '0' && format->align == '=') + spec->n_min_width = format->width - n_non_digit_non_padding; + else + spec->n_min_width = 0; + + if (spec->n_digits == 0) + /* This case only occurs when using 'c' formatting, we need + to special case it because the grouping code always wants + to have at least one character. */ + spec->n_grouped_digits = 0; + else + spec->n_grouped_digits = STRINGLIB_GROUPING(NULL, 0, NULL, + spec->n_digits, + spec->n_min_width, + locale->grouping, + locale->thousands_sep); + + /* Given the desired width and the total of digit and non-digit + space we consume, see if we need any padding. format->width can + be negative (meaning no padding), but this code still works in + that case. */ + n_padding = format->width - + (n_non_digit_non_padding + spec->n_grouped_digits); + if (n_padding > 0) { + /* Some padding is needed. Determine if it's left, space, or right. */ + switch (format->align) { + case '<': + spec->n_rpadding = n_padding; + break; + case '^': + spec->n_lpadding = n_padding / 2; + spec->n_rpadding = n_padding - spec->n_lpadding; + break; + case '=': + spec->n_spadding = n_padding; + break; + case '>': + spec->n_lpadding = n_padding; + break; + default: + /* Shouldn't get here, but treat it as '>' */ + spec->n_lpadding = n_padding; + assert(0); + break; + } + } + return spec->n_lpadding + spec->n_sign + spec->n_prefix + + spec->n_spadding + spec->n_grouped_digits + spec->n_decimal + + spec->n_remainder + spec->n_rpadding; +} + +/* Fill in the digit parts of a numbers's string representation, + as determined in calc_number_widths(). + No error checking, since we know the buffer is the correct size. */ +static void +fill_number(STRINGLIB_CHAR *buf, const NumberFieldWidths *spec, + STRINGLIB_CHAR *digits, Py_ssize_t n_digits, + STRINGLIB_CHAR *prefix, STRINGLIB_CHAR fill_char, + LocaleInfo *locale, int toupper) +{ + /* Used to keep track of digits, decimal, and remainder. */ + STRINGLIB_CHAR *p = digits; + +#ifndef NDEBUG + Py_ssize_t r; +#endif + + if (spec->n_lpadding) { + STRINGLIB_FILL(buf, fill_char, spec->n_lpadding); + buf += spec->n_lpadding; + } + if (spec->n_sign == 1) { + *buf++ = spec->sign; + } + if (spec->n_prefix) { + memmove(buf, + prefix, + spec->n_prefix * sizeof(STRINGLIB_CHAR)); + if (toupper) { + Py_ssize_t t; + for (t = 0; t < spec->n_prefix; ++t) + buf[t] = STRINGLIB_TOUPPER(buf[t]); + } + buf += spec->n_prefix; + } + if (spec->n_spadding) { + STRINGLIB_FILL(buf, fill_char, spec->n_spadding); + buf += spec->n_spadding; + } + + /* Only for type 'c' special case, it has no digits. */ + if (spec->n_digits != 0) { + /* Fill the digits with InsertThousandsGrouping. */ +#ifndef NDEBUG + r = +#endif + STRINGLIB_GROUPING(buf, spec->n_grouped_digits, digits, + spec->n_digits, spec->n_min_width, + locale->grouping, locale->thousands_sep); +#ifndef NDEBUG + assert(r == spec->n_grouped_digits); +#endif + p += spec->n_digits; + } + if (toupper) { + Py_ssize_t t; + for (t = 0; t < spec->n_grouped_digits; ++t) + buf[t] = STRINGLIB_TOUPPER(buf[t]); + } + buf += spec->n_grouped_digits; + + if (spec->n_decimal) { + Py_ssize_t t; + for (t = 0; t < spec->n_decimal; ++t) + buf[t] = locale->decimal_point[t]; + buf += spec->n_decimal; + p += 1; + } + + if (spec->n_remainder) { + memcpy(buf, p, spec->n_remainder * sizeof(STRINGLIB_CHAR)); + buf += spec->n_remainder; + p += spec->n_remainder; + } + + if (spec->n_rpadding) { + STRINGLIB_FILL(buf, fill_char, spec->n_rpadding); + buf += spec->n_rpadding; + } +} + +static char no_grouping[1] = {CHAR_MAX}; + +/* Find the decimal point character(s?), thousands_separator(s?), and + grouping description, either for the current locale if type is + LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE, or + none if LT_NO_LOCALE. */ +static void +get_locale_info(int type, LocaleInfo *locale_info) +{ + switch (type) { + case LT_CURRENT_LOCALE: { + struct lconv *locale_data = localeconv(); + locale_info->decimal_point = locale_data->decimal_point; + locale_info->thousands_sep = locale_data->thousands_sep; + locale_info->grouping = locale_data->grouping; + break; + } + case LT_DEFAULT_LOCALE: + locale_info->decimal_point = "."; + locale_info->thousands_sep = ","; + locale_info->grouping = "\3"; /* Group every 3 characters. The + (implicit) trailing 0 means repeat + infinitely. */ + break; + case LT_NO_LOCALE: + locale_info->decimal_point = "."; + locale_info->thousands_sep = ""; + locale_info->grouping = no_grouping; + break; + default: + assert(0); + } +} + +#endif /* FORMAT_FLOAT || FORMAT_LONG || FORMAT_COMPLEX */ + +/************************************************************************/ +/*********** string formatting ******************************************/ +/************************************************************************/ + +static PyObject * +format_string_internal(PyObject *value, const InternalFormatSpec *format) +{ + Py_ssize_t lpad; + Py_ssize_t rpad; + Py_ssize_t total; + STRINGLIB_CHAR *p; + Py_ssize_t len = STRINGLIB_LEN(value); + PyObject *result = NULL; + + /* sign is not allowed on strings */ + if (format->sign != '\0') { + PyErr_SetString(PyExc_ValueError, + "Sign not allowed in string format specifier"); + goto done; + } + + /* alternate is not allowed on strings */ + if (format->alternate) { + PyErr_SetString(PyExc_ValueError, + "Alternate form (#) not allowed in string format " + "specifier"); + goto done; + } + + /* '=' alignment not allowed on strings */ + if (format->align == '=') { + PyErr_SetString(PyExc_ValueError, + "'=' alignment not allowed " + "in string format specifier"); + goto done; + } + + /* if precision is specified, output no more that format.precision + characters */ + if (format->precision >= 0 && len >= format->precision) { + len = format->precision; + } + + calc_padding(len, format->width, format->align, &lpad, &rpad, &total); + + /* allocate the resulting string */ + result = STRINGLIB_NEW(NULL, total); + if (result == NULL) + goto done; + + /* Write into that space. First the padding. */ + p = fill_padding(STRINGLIB_STR(result), len, + format->fill_char, lpad, rpad); + + /* Then the source string. */ + memcpy(p, STRINGLIB_STR(value), len * sizeof(STRINGLIB_CHAR)); + +done: + return result; +} + + +/************************************************************************/ +/*********** long formatting ********************************************/ +/************************************************************************/ + +#if defined FORMAT_LONG || defined FORMAT_INT +typedef PyObject* +(*IntOrLongToString)(PyObject *value, int base); + +static PyObject * +format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format, + IntOrLongToString tostring) +{ + PyObject *result = NULL; + PyObject *tmp = NULL; + STRINGLIB_CHAR *pnumeric_chars; + STRINGLIB_CHAR numeric_char; + STRINGLIB_CHAR sign_char = '\0'; + Py_ssize_t n_digits; /* count of digits need from the computed + string */ + Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which + produces non-digits */ + Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */ + Py_ssize_t n_total; + STRINGLIB_CHAR *prefix = NULL; + NumberFieldWidths spec; + long x; + + /* Locale settings, either from the actual locale or + from a hard-code pseudo-locale */ + LocaleInfo locale; + + /* no precision allowed on integers */ + if (format->precision != -1) { + PyErr_SetString(PyExc_ValueError, + "Precision not allowed in integer format specifier"); + goto done; + } + + /* special case for character formatting */ + if (format->type == 'c') { + /* error to specify a sign */ + if (format->sign != '\0') { + PyErr_SetString(PyExc_ValueError, + "Sign not allowed with integer" + " format specifier 'c'"); + goto done; + } + + /* Error to specify a comma. */ + if (format->thousands_separators) { + PyErr_SetString(PyExc_ValueError, + "Thousands separators not allowed with integer" + " format specifier 'c'"); + goto done; + } + + /* taken from unicodeobject.c formatchar() */ + /* Integer input truncated to a character */ +/* XXX: won't work for int */ + x = PyLong_AsLong(value); + if (x == -1 && PyErr_Occurred()) + goto done; +#if STRINGLIB_IS_UNICODE +#ifdef Py_UNICODE_WIDE + if (x < 0 || x > 0x10ffff) { + PyErr_SetString(PyExc_OverflowError, + "%c arg not in range(0x110000) " + "(wide Python build)"); + goto done; + } +#else + if (x < 0 || x > 0xffff) { + PyErr_SetString(PyExc_OverflowError, + "%c arg not in range(0x10000) " + "(narrow Python build)"); + goto done; + } +#endif +#else + if (x < 0 || x > 0xff) { + PyErr_SetString(PyExc_OverflowError, + "%c arg not in range(0x100)"); + goto done; + } +#endif + numeric_char = (STRINGLIB_CHAR)x; + pnumeric_chars = &numeric_char; + n_digits = 1; + + /* As a sort-of hack, we tell calc_number_widths that we only + have "remainder" characters. calc_number_widths thinks + these are characters that don't get formatted, only copied + into the output string. We do this for 'c' formatting, + because the characters are likely to be non-digits. */ + n_remainder = 1; + } + else { + int base; + int leading_chars_to_skip = 0; /* Number of characters added by + PyNumber_ToBase that we want to + skip over. */ + + /* Compute the base and how many characters will be added by + PyNumber_ToBase */ + switch (format->type) { + case 'b': + base = 2; + leading_chars_to_skip = 2; /* 0b */ + break; + case 'o': + base = 8; + leading_chars_to_skip = 2; /* 0o */ + break; + case 'x': + case 'X': + base = 16; + leading_chars_to_skip = 2; /* 0x */ + break; + default: /* shouldn't be needed, but stops a compiler warning */ + case 'd': + case 'n': + base = 10; + break; + } + + /* The number of prefix chars is the same as the leading + chars to skip */ + if (format->alternate) + n_prefix = leading_chars_to_skip; + + /* Do the hard part, converting to a string in a given base */ + tmp = tostring(value, base); + if (tmp == NULL) + goto done; + + pnumeric_chars = STRINGLIB_STR(tmp); + n_digits = STRINGLIB_LEN(tmp); + + prefix = pnumeric_chars; + + /* Remember not to modify what pnumeric_chars points to. it + might be interned. Only modify it after we copy it into a + newly allocated output buffer. */ + + /* Is a sign character present in the output? If so, remember it + and skip it */ + if (pnumeric_chars[0] == '-') { + sign_char = pnumeric_chars[0]; + ++prefix; + ++leading_chars_to_skip; + } + + /* Skip over the leading chars (0x, 0b, etc.) */ + n_digits -= leading_chars_to_skip; + pnumeric_chars += leading_chars_to_skip; + } + + /* Determine the grouping, separator, and decimal point, if any. */ + get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : + (format->thousands_separators ? + LT_DEFAULT_LOCALE : + LT_NO_LOCALE), + &locale); + + /* Calculate how much memory we'll need. */ + n_total = calc_number_widths(&spec, n_prefix, sign_char, pnumeric_chars, + n_digits, n_remainder, 0, &locale, format); + + /* Allocate the memory. */ + result = STRINGLIB_NEW(NULL, n_total); + if (!result) + goto done; + + /* Populate the memory. */ + fill_number(STRINGLIB_STR(result), &spec, pnumeric_chars, n_digits, + prefix, format->fill_char, &locale, format->type == 'X'); + +done: + Py_XDECREF(tmp); + return result; +} +#endif /* defined FORMAT_LONG || defined FORMAT_INT */ + +/************************************************************************/ +/*********** float formatting *******************************************/ +/************************************************************************/ + +#ifdef FORMAT_FLOAT +#if STRINGLIB_IS_UNICODE +static void +strtounicode(Py_UNICODE *buffer, const char *charbuffer, Py_ssize_t len) +{ + Py_ssize_t i; + for (i = 0; i < len; ++i) + buffer[i] = (Py_UNICODE)charbuffer[i]; +} +#endif + +/* much of this is taken from unicodeobject.c */ +static PyObject * +format_float_internal(PyObject *value, + const InternalFormatSpec *format) +{ + char *buf = NULL; /* buffer returned from PyOS_double_to_string */ + Py_ssize_t n_digits; + Py_ssize_t n_remainder; + Py_ssize_t n_total; + int has_decimal; + double val; + Py_ssize_t precision; + Py_ssize_t default_precision = 6; + STRINGLIB_CHAR type = format->type; + int add_pct = 0; + STRINGLIB_CHAR *p; + NumberFieldWidths spec; + int flags = 0; + PyObject *result = NULL; + STRINGLIB_CHAR sign_char = '\0'; + int float_type; /* Used to see if we have a nan, inf, or regular float. */ + +#if STRINGLIB_IS_UNICODE + Py_UNICODE *unicode_tmp = NULL; +#endif + + /* Locale settings, either from the actual locale or + from a hard-code pseudo-locale */ + LocaleInfo locale; + + if (format->precision > INT_MAX) { + PyErr_SetString(PyExc_ValueError, "precision too big"); + goto done; + } + precision = (int)format->precision; + + /* Alternate is not allowed on floats. */ + if (format->alternate) { + PyErr_SetString(PyExc_ValueError, + "Alternate form (#) not allowed in float format " + "specifier"); + goto done; + } + + if (type == '\0') { + /* Omitted type specifier. This is like 'g' but with at least one + digit after the decimal point, and different default precision.*/ + type = 'g'; + default_precision = PyFloat_STR_PRECISION; + flags |= Py_DTSF_ADD_DOT_0; + } + + if (type == 'n') + /* 'n' is the same as 'g', except for the locale used to + format the result. We take care of that later. */ + type = 'g'; + + val = PyFloat_AsDouble(value); + if (val == -1.0 && PyErr_Occurred()) + goto done; + + if (type == '%') { + type = 'f'; + val *= 100; + add_pct = 1; + } + + if (precision < 0) + precision = default_precision; + + /* Cast "type", because if we're in unicode we need to pass an + 8-bit char. This is safe, because we've restricted what "type" + can be. */ + buf = PyOS_double_to_string(val, (char)type, precision, flags, + &float_type); + if (buf == NULL) + goto done; + n_digits = strlen(buf); + + if (add_pct) { + /* We know that buf has a trailing zero (since we just called + strlen() on it), and we don't use that fact any more. So we + can just write over the trailing zero. */ + buf[n_digits] = '%'; + n_digits += 1; + } + + /* Since there is no unicode version of PyOS_double_to_string, + just use the 8 bit version and then convert to unicode. */ +#if STRINGLIB_IS_UNICODE + unicode_tmp = (Py_UNICODE*)PyMem_Malloc((n_digits)*sizeof(Py_UNICODE)); + if (unicode_tmp == NULL) { + PyErr_NoMemory(); + goto done; + } + strtounicode(unicode_tmp, buf, n_digits); + p = unicode_tmp; +#else + p = buf; +#endif + + /* Is a sign character present in the output? If so, remember it + and skip it */ + if (*p == '-') { + sign_char = *p; + ++p; + --n_digits; + } + + /* Determine if we have any "remainder" (after the digits, might include + decimal or exponent or both (or neither)) */ + parse_number(p, n_digits, &n_remainder, &has_decimal); + + /* Determine the grouping, separator, and decimal point, if any. */ + get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : + (format->thousands_separators ? + LT_DEFAULT_LOCALE : + LT_NO_LOCALE), + &locale); + + /* Calculate how much memory we'll need. */ + n_total = calc_number_widths(&spec, 0, sign_char, p, n_digits, + n_remainder, has_decimal, &locale, format); + + /* Allocate the memory. */ + result = STRINGLIB_NEW(NULL, n_total); + if (result == NULL) + goto done; + + /* Populate the memory. */ + fill_number(STRINGLIB_STR(result), &spec, p, n_digits, NULL, + format->fill_char, &locale, 0); + +done: + PyMem_Free(buf); +#if STRINGLIB_IS_UNICODE + PyMem_Free(unicode_tmp); +#endif + return result; +} +#endif /* FORMAT_FLOAT */ + +/************************************************************************/ +/*********** complex formatting *****************************************/ +/************************************************************************/ + +#ifdef FORMAT_COMPLEX + +static PyObject * +format_complex_internal(PyObject *value, + const InternalFormatSpec *format) +{ + double re; + double im; + char *re_buf = NULL; /* buffer returned from PyOS_double_to_string */ + char *im_buf = NULL; /* buffer returned from PyOS_double_to_string */ + + InternalFormatSpec tmp_format = *format; + Py_ssize_t n_re_digits; + Py_ssize_t n_im_digits; + Py_ssize_t n_re_remainder; + Py_ssize_t n_im_remainder; + Py_ssize_t n_re_total; + Py_ssize_t n_im_total; + int re_has_decimal; + int im_has_decimal; + Py_ssize_t precision; + Py_ssize_t default_precision = 6; + STRINGLIB_CHAR type = format->type; + STRINGLIB_CHAR *p_re; + STRINGLIB_CHAR *p_im; + NumberFieldWidths re_spec; + NumberFieldWidths im_spec; + int flags = 0; + PyObject *result = NULL; + STRINGLIB_CHAR *p; + STRINGLIB_CHAR re_sign_char = '\0'; + STRINGLIB_CHAR im_sign_char = '\0'; + int re_float_type; /* Used to see if we have a nan, inf, or regular float. */ + int im_float_type; + int add_parens = 0; + int skip_re = 0; + Py_ssize_t lpad; + Py_ssize_t rpad; + Py_ssize_t total; + +#if STRINGLIB_IS_UNICODE + Py_UNICODE *re_unicode_tmp = NULL; + Py_UNICODE *im_unicode_tmp = NULL; +#endif + + /* Locale settings, either from the actual locale or + from a hard-code pseudo-locale */ + LocaleInfo locale; + + if (format->precision > INT_MAX) { + PyErr_SetString(PyExc_ValueError, "precision too big"); + goto done; + } + precision = (int)format->precision; + + /* Alternate is not allowed on complex. */ + if (format->alternate) { + PyErr_SetString(PyExc_ValueError, + "Alternate form (#) not allowed in complex format " + "specifier"); + goto done; + } + + /* Neither is zero pading. */ + if (format->fill_char == '0') { + PyErr_SetString(PyExc_ValueError, + "Zero padding is not allowed in complex format " + "specifier"); + goto done; + } + + /* Neither is '=' alignment . */ + if (format->align == '=') { + PyErr_SetString(PyExc_ValueError, + "'=' alignment flag is not allowed in complex format " + "specifier"); + goto done; + } + + re = PyComplex_RealAsDouble(value); + if (re == -1.0 && PyErr_Occurred()) + goto done; + im = PyComplex_ImagAsDouble(value); + if (im == -1.0 && PyErr_Occurred()) + goto done; + + if (type == '\0') { + /* Omitted type specifier. Should be like str(self). */ + type = 'g'; + default_precision = PyFloat_STR_PRECISION; + if (re == 0.0 && copysign(1.0, re) == 1.0) + skip_re = 1; + else + add_parens = 1; + } + + if (type == 'n') + /* 'n' is the same as 'g', except for the locale used to + format the result. We take care of that later. */ + type = 'g'; + + if (precision < 0) + precision = default_precision; + + /* Cast "type", because if we're in unicode we need to pass an + 8-bit char. This is safe, because we've restricted what "type" + can be. */ + re_buf = PyOS_double_to_string(re, (char)type, precision, flags, + &re_float_type); + if (re_buf == NULL) + goto done; + im_buf = PyOS_double_to_string(im, (char)type, precision, flags, + &im_float_type); + if (im_buf == NULL) + goto done; + + n_re_digits = strlen(re_buf); + n_im_digits = strlen(im_buf); + + /* Since there is no unicode version of PyOS_double_to_string, + just use the 8 bit version and then convert to unicode. */ +#if STRINGLIB_IS_UNICODE + re_unicode_tmp = (Py_UNICODE*)PyMem_Malloc((n_re_digits)*sizeof(Py_UNICODE)); + if (re_unicode_tmp == NULL) { + PyErr_NoMemory(); + goto done; + } + strtounicode(re_unicode_tmp, re_buf, n_re_digits); + p_re = re_unicode_tmp; + + im_unicode_tmp = (Py_UNICODE*)PyMem_Malloc((n_im_digits)*sizeof(Py_UNICODE)); + if (im_unicode_tmp == NULL) { + PyErr_NoMemory(); + goto done; + } + strtounicode(im_unicode_tmp, im_buf, n_im_digits); + p_im = im_unicode_tmp; +#else + p_re = re_buf; + p_im = im_buf; +#endif + + /* Is a sign character present in the output? If so, remember it + and skip it */ + if (*p_re == '-') { + re_sign_char = *p_re; + ++p_re; + --n_re_digits; + } + if (*p_im == '-') { + im_sign_char = *p_im; + ++p_im; + --n_im_digits; + } + + /* Determine if we have any "remainder" (after the digits, might include + decimal or exponent or both (or neither)) */ + parse_number(p_re, n_re_digits, &n_re_remainder, &re_has_decimal); + parse_number(p_im, n_im_digits, &n_im_remainder, &im_has_decimal); + + /* Determine the grouping, separator, and decimal point, if any. */ + get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : + (format->thousands_separators ? + LT_DEFAULT_LOCALE : + LT_NO_LOCALE), + &locale); + + /* Turn off any padding. We'll do it later after we've composed + the numbers without padding. */ + tmp_format.fill_char = '\0'; + tmp_format.align = '<'; + tmp_format.width = -1; + + /* Calculate how much memory we'll need. */ + n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, p_re, + n_re_digits, n_re_remainder, + re_has_decimal, &locale, &tmp_format); + + /* Same formatting, but always include a sign, unless the real part is + * going to be omitted, in which case we use whatever sign convention was + * requested by the original format. */ + if (!skip_re) + tmp_format.sign = '+'; + n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, p_im, + n_im_digits, n_im_remainder, + im_has_decimal, &locale, &tmp_format); + + if (skip_re) + n_re_total = 0; + + /* Add 1 for the 'j', and optionally 2 for parens. */ + calc_padding(n_re_total + n_im_total + 1 + add_parens * 2, + format->width, format->align, &lpad, &rpad, &total); + + result = STRINGLIB_NEW(NULL, total); + if (result == NULL) + goto done; + + /* Populate the memory. First, the padding. */ + p = fill_padding(STRINGLIB_STR(result), + n_re_total + n_im_total + 1 + add_parens * 2, + format->fill_char, lpad, rpad); + + if (add_parens) + *p++ = '('; + + if (!skip_re) { + fill_number(p, &re_spec, p_re, n_re_digits, NULL, 0, &locale, 0); + p += n_re_total; + } + fill_number(p, &im_spec, p_im, n_im_digits, NULL, 0, &locale, 0); + p += n_im_total; + *p++ = 'j'; + + if (add_parens) + *p++ = ')'; + +done: + PyMem_Free(re_buf); + PyMem_Free(im_buf); +#if STRINGLIB_IS_UNICODE + PyMem_Free(re_unicode_tmp); + PyMem_Free(im_unicode_tmp); +#endif + return result; +} +#endif /* FORMAT_COMPLEX */ + +/************************************************************************/ +/*********** built in formatters ****************************************/ +/************************************************************************/ +PyObject * +FORMAT_STRING(PyObject *obj, + STRINGLIB_CHAR *format_spec, + Py_ssize_t format_spec_len) +{ + InternalFormatSpec format; + PyObject *result = NULL; + + /* check for the special case of zero length format spec, make + it equivalent to str(obj) */ + if (format_spec_len == 0) { + result = STRINGLIB_TOSTR(obj); + goto done; + } + + /* parse the format_spec */ + if (!parse_internal_render_format_spec(format_spec, format_spec_len, + &format, 's', '<')) + goto done; + + /* type conversion? */ + switch (format.type) { + case 's': + /* no type conversion needed, already a string. do the formatting */ + result = format_string_internal(obj, &format); + break; + default: + /* unknown */ + unknown_presentation_type(format.type, obj->ob_type->tp_name); + goto done; + } + +done: + return result; +} + +#if defined FORMAT_LONG || defined FORMAT_INT +static PyObject* +format_int_or_long(PyObject* obj, + STRINGLIB_CHAR *format_spec, + Py_ssize_t format_spec_len, + IntOrLongToString tostring) +{ + PyObject *result = NULL; + PyObject *tmp = NULL; + InternalFormatSpec format; + + /* check for the special case of zero length format spec, make + it equivalent to str(obj) */ + if (format_spec_len == 0) { + result = STRINGLIB_TOSTR(obj); + goto done; + } + + /* parse the format_spec */ + if (!parse_internal_render_format_spec(format_spec, + format_spec_len, + &format, 'd', '>')) + goto done; + + /* type conversion? */ + switch (format.type) { + case 'b': + case 'c': + case 'd': + case 'o': + case 'x': + case 'X': + case 'n': + /* no type conversion needed, already an int (or long). do + the formatting */ + result = format_int_or_long_internal(obj, &format, tostring); + break; + + case 'e': + case 'E': + case 'f': + case 'F': + case 'g': + case 'G': + case '%': + /* convert to float */ + tmp = PyNumber_Float(obj); + if (tmp == NULL) + goto done; + result = format_float_internal(tmp, &format); + break; + + default: + /* unknown */ + unknown_presentation_type(format.type, obj->ob_type->tp_name); + goto done; + } + +done: + Py_XDECREF(tmp); + return result; +} +#endif /* FORMAT_LONG || defined FORMAT_INT */ + +#ifdef FORMAT_LONG +/* Need to define long_format as a function that will convert a long + to a string. In 3.0, _PyLong_Format has the correct signature. In + 2.x, we need to fudge a few parameters */ +#if PY_VERSION_HEX >= 0x03000000 +#define long_format _PyLong_Format +#else +static PyObject* +long_format(PyObject* value, int base) +{ + /* Convert to base, don't add trailing 'L', and use the new octal + format. We already know this is a long object */ + assert(PyLong_Check(value)); + /* convert to base, don't add 'L', and use the new octal format */ + return _PyLong_Format(value, base, 0, 1); +} +#endif + +PyObject * +FORMAT_LONG(PyObject *obj, + STRINGLIB_CHAR *format_spec, + Py_ssize_t format_spec_len) +{ + return format_int_or_long(obj, format_spec, format_spec_len, + long_format); +} +#endif /* FORMAT_LONG */ + +#ifdef FORMAT_INT +/* this is only used for 2.x, not 3.0 */ +static PyObject* +int_format(PyObject* value, int base) +{ + /* Convert to base, and use the new octal format. We already + know this is an int object */ + assert(PyInt_Check(value)); + return _PyInt_Format((PyIntObject*)value, base, 1); +} + +PyObject * +FORMAT_INT(PyObject *obj, + STRINGLIB_CHAR *format_spec, + Py_ssize_t format_spec_len) +{ + return format_int_or_long(obj, format_spec, format_spec_len, + int_format); +} +#endif /* FORMAT_INT */ + +#ifdef FORMAT_FLOAT +PyObject * +FORMAT_FLOAT(PyObject *obj, + STRINGLIB_CHAR *format_spec, + Py_ssize_t format_spec_len) +{ + PyObject *result = NULL; + InternalFormatSpec format; + + /* check for the special case of zero length format spec, make + it equivalent to str(obj) */ + if (format_spec_len == 0) { + result = STRINGLIB_TOSTR(obj); + goto done; + } + + /* parse the format_spec */ + if (!parse_internal_render_format_spec(format_spec, + format_spec_len, + &format, '\0', '>')) + goto done; + + /* type conversion? */ + switch (format.type) { + case '\0': /* No format code: like 'g', but with at least one decimal. */ + case 'e': + case 'E': + case 'f': + case 'F': + case 'g': + case 'G': + case 'n': + case '%': + /* no conversion, already a float. do the formatting */ + result = format_float_internal(obj, &format); + break; + + default: + /* unknown */ + unknown_presentation_type(format.type, obj->ob_type->tp_name); + goto done; + } + +done: + return result; +} +#endif /* FORMAT_FLOAT */ + +#ifdef FORMAT_COMPLEX +PyObject * +FORMAT_COMPLEX(PyObject *obj, + STRINGLIB_CHAR *format_spec, + Py_ssize_t format_spec_len) +{ + PyObject *result = NULL; + InternalFormatSpec format; + + /* check for the special case of zero length format spec, make + it equivalent to str(obj) */ + if (format_spec_len == 0) { + result = STRINGLIB_TOSTR(obj); + goto done; + } + + /* parse the format_spec */ + if (!parse_internal_render_format_spec(format_spec, + format_spec_len, + &format, '\0', '>')) + goto done; + + /* type conversion? */ + switch (format.type) { + case '\0': /* No format code: like 'g', but with at least one decimal. */ + case 'e': + case 'E': + case 'f': + case 'F': + case 'g': + case 'G': + case 'n': + /* no conversion, already a complex. do the formatting */ + result = format_complex_internal(obj, &format); + break; + + default: + /* unknown */ + unknown_presentation_type(format.type, obj->ob_type->tp_name); + goto done; + } + +done: + return result; +} +#endif /* FORMAT_COMPLEX */ diff --git a/contrib/tools/python/src/Objects/stringlib/localeutil.h b/contrib/tools/python/src/Objects/stringlib/localeutil.h new file mode 100644 index 0000000000..f548133875 --- /dev/null +++ b/contrib/tools/python/src/Objects/stringlib/localeutil.h @@ -0,0 +1,212 @@ +/* stringlib: locale related helpers implementation */ + +#ifndef STRINGLIB_LOCALEUTIL_H +#define STRINGLIB_LOCALEUTIL_H + +#include <locale.h> + +#define MAX(x, y) ((x) < (y) ? (y) : (x)) +#define MIN(x, y) ((x) < (y) ? (x) : (y)) + +typedef struct { + const char *grouping; + char previous; + Py_ssize_t i; /* Where we're currently pointing in grouping. */ +} GroupGenerator; + +static void +_GroupGenerator_init(GroupGenerator *self, const char *grouping) +{ + self->grouping = grouping; + self->i = 0; + self->previous = 0; +} + +/* Returns the next grouping, or 0 to signify end. */ +static Py_ssize_t +_GroupGenerator_next(GroupGenerator *self) +{ + /* Note that we don't really do much error checking here. If a + grouping string contains just CHAR_MAX, for example, then just + terminate the generator. That shouldn't happen, but at least we + fail gracefully. */ + switch (self->grouping[self->i]) { + case 0: + return self->previous; + case CHAR_MAX: + /* Stop the generator. */ + return 0; + default: { + char ch = self->grouping[self->i]; + self->previous = ch; + self->i++; + return (Py_ssize_t)ch; + } + } +} + +/* Fill in some digits, leading zeros, and thousands separator. All + are optional, depending on when we're called. */ +static void +fill(STRINGLIB_CHAR **digits_end, STRINGLIB_CHAR **buffer_end, + Py_ssize_t n_chars, Py_ssize_t n_zeros, const char* thousands_sep, + Py_ssize_t thousands_sep_len) +{ +#if STRINGLIB_IS_UNICODE + Py_ssize_t i; +#endif + + if (thousands_sep) { + *buffer_end -= thousands_sep_len; + + /* Copy the thousands_sep chars into the buffer. */ +#if STRINGLIB_IS_UNICODE + /* Convert from the char's of the thousands_sep from + the locale into unicode. */ + for (i = 0; i < thousands_sep_len; ++i) + (*buffer_end)[i] = thousands_sep[i]; +#else + /* No conversion, just memcpy the thousands_sep. */ + memcpy(*buffer_end, thousands_sep, thousands_sep_len); +#endif + } + + *buffer_end -= n_chars; + *digits_end -= n_chars; + memcpy(*buffer_end, *digits_end, n_chars * sizeof(STRINGLIB_CHAR)); + + *buffer_end -= n_zeros; + STRINGLIB_FILL(*buffer_end, '0', n_zeros); +} + +/** + * _Py_InsertThousandsGrouping: + * @buffer: A pointer to the start of a string. + * @n_buffer: Number of characters in @buffer. + * @digits: A pointer to the digits we're reading from. If count + * is non-NULL, this is unused. + * @n_digits: The number of digits in the string, in which we want + * to put the grouping chars. + * @min_width: The minimum width of the digits in the output string. + * Output will be zero-padded on the left to fill. + * @grouping: see definition in localeconv(). + * @thousands_sep: see definition in localeconv(). + * + * There are 2 modes: counting and filling. If @buffer is NULL, + * we are in counting mode, else filling mode. + * If counting, the required buffer size is returned. + * If filling, we know the buffer will be large enough, so we don't + * need to pass in the buffer size. + * Inserts thousand grouping characters (as defined by grouping and + * thousands_sep) into the string between buffer and buffer+n_digits. + * + * Return value: 0 on error, else 1. Note that no error can occur if + * count is non-NULL. + * + * This name won't be used, the includer of this file should define + * it to be the actual function name, based on unicode or string. + * + * As closely as possible, this code mimics the logic in decimal.py's + _insert_thousands_sep(). + **/ +Py_ssize_t +_Py_InsertThousandsGrouping(STRINGLIB_CHAR *buffer, + Py_ssize_t n_buffer, + STRINGLIB_CHAR *digits, + Py_ssize_t n_digits, + Py_ssize_t min_width, + const char *grouping, + const char *thousands_sep) +{ + Py_ssize_t count = 0; + Py_ssize_t n_zeros; + int loop_broken = 0; + int use_separator = 0; /* First time through, don't append the + separator. They only go between + groups. */ + STRINGLIB_CHAR *buffer_end = NULL; + STRINGLIB_CHAR *digits_end = NULL; + Py_ssize_t l; + Py_ssize_t n_chars; + Py_ssize_t thousands_sep_len = strlen(thousands_sep); + Py_ssize_t remaining = n_digits; /* Number of chars remaining to + be looked at */ + /* A generator that returns all of the grouping widths, until it + returns 0. */ + GroupGenerator groupgen; + _GroupGenerator_init(&groupgen, grouping); + + if (buffer) { + buffer_end = buffer + n_buffer; + digits_end = digits + n_digits; + } + + while ((l = _GroupGenerator_next(&groupgen)) > 0) { + l = MIN(l, MAX(MAX(remaining, min_width), 1)); + n_zeros = MAX(0, l - remaining); + n_chars = MAX(0, MIN(remaining, l)); + + /* Use n_zero zero's and n_chars chars */ + + /* Count only, don't do anything. */ + count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars; + + if (buffer) { + /* Copy into the output buffer. */ + fill(&digits_end, &buffer_end, n_chars, n_zeros, + use_separator ? thousands_sep : NULL, thousands_sep_len); + } + + /* Use a separator next time. */ + use_separator = 1; + + remaining -= n_chars; + min_width -= l; + + if (remaining <= 0 && min_width <= 0) { + loop_broken = 1; + break; + } + min_width -= thousands_sep_len; + } + if (!loop_broken) { + /* We left the loop without using a break statement. */ + + l = MAX(MAX(remaining, min_width), 1); + n_zeros = MAX(0, l - remaining); + n_chars = MAX(0, MIN(remaining, l)); + + /* Use n_zero zero's and n_chars chars */ + count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars; + if (buffer) { + /* Copy into the output buffer. */ + fill(&digits_end, &buffer_end, n_chars, n_zeros, + use_separator ? thousands_sep : NULL, thousands_sep_len); + } + } + return count; +} + +/** + * _Py_InsertThousandsGroupingLocale: + * @buffer: A pointer to the start of a string. + * @n_digits: The number of digits in the string, in which we want + * to put the grouping chars. + * + * Reads thee current locale and calls _Py_InsertThousandsGrouping(). + **/ +Py_ssize_t +_Py_InsertThousandsGroupingLocale(STRINGLIB_CHAR *buffer, + Py_ssize_t n_buffer, + STRINGLIB_CHAR *digits, + Py_ssize_t n_digits, + Py_ssize_t min_width) +{ + struct lconv *locale_data = localeconv(); + const char *grouping = locale_data->grouping; + const char *thousands_sep = locale_data->thousands_sep; + + return _Py_InsertThousandsGrouping(buffer, n_buffer, digits, n_digits, + min_width, grouping, thousands_sep); +} +#endif /* STRINGLIB_LOCALEUTIL_H */ diff --git a/contrib/tools/python/src/Objects/stringlib/partition.h b/contrib/tools/python/src/Objects/stringlib/partition.h new file mode 100644 index 0000000000..0170bddbf0 --- /dev/null +++ b/contrib/tools/python/src/Objects/stringlib/partition.h @@ -0,0 +1,110 @@ +/* stringlib: partition implementation */ + +#ifndef STRINGLIB_PARTITION_H +#define STRINGLIB_PARTITION_H + +#ifndef STRINGLIB_FASTSEARCH_H +#error must include "stringlib/fastsearch.h" before including this module +#endif + +Py_LOCAL_INLINE(PyObject*) +stringlib_partition(PyObject* str_obj, + const STRINGLIB_CHAR* str, Py_ssize_t str_len, + PyObject* sep_obj, + const STRINGLIB_CHAR* sep, Py_ssize_t sep_len) +{ + PyObject* out; + Py_ssize_t pos; + + if (sep_len == 0) { + PyErr_SetString(PyExc_ValueError, "empty separator"); + return NULL; + } + + out = PyTuple_New(3); + if (!out) + return NULL; + + pos = fastsearch(str, str_len, sep, sep_len, -1, FAST_SEARCH); + + if (pos < 0) { +#if STRINGLIB_MUTABLE + PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, str_len)); + PyTuple_SET_ITEM(out, 1, STRINGLIB_NEW(NULL, 0)); + PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(NULL, 0)); +#else + Py_INCREF(str_obj); + PyTuple_SET_ITEM(out, 0, (PyObject*) str_obj); + Py_INCREF(STRINGLIB_EMPTY); + PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY); + Py_INCREF(STRINGLIB_EMPTY); + PyTuple_SET_ITEM(out, 2, (PyObject*) STRINGLIB_EMPTY); +#endif + return out; + } + + PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, pos)); + Py_INCREF(sep_obj); + PyTuple_SET_ITEM(out, 1, sep_obj); + pos += sep_len; + PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str + pos, str_len - pos)); + + if (PyErr_Occurred()) { + Py_DECREF(out); + return NULL; + } + + return out; +} + +Py_LOCAL_INLINE(PyObject*) +stringlib_rpartition(PyObject* str_obj, + const STRINGLIB_CHAR* str, Py_ssize_t str_len, + PyObject* sep_obj, + const STRINGLIB_CHAR* sep, Py_ssize_t sep_len) +{ + PyObject* out; + Py_ssize_t pos; + + if (sep_len == 0) { + PyErr_SetString(PyExc_ValueError, "empty separator"); + return NULL; + } + + out = PyTuple_New(3); + if (!out) + return NULL; + + pos = fastsearch(str, str_len, sep, sep_len, -1, FAST_RSEARCH); + + if (pos < 0) { +#if STRINGLIB_MUTABLE + PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(NULL, 0)); + PyTuple_SET_ITEM(out, 1, STRINGLIB_NEW(NULL, 0)); + PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str, str_len)); +#else + Py_INCREF(STRINGLIB_EMPTY); + PyTuple_SET_ITEM(out, 0, (PyObject*) STRINGLIB_EMPTY); + Py_INCREF(STRINGLIB_EMPTY); + PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY); + Py_INCREF(str_obj); + PyTuple_SET_ITEM(out, 2, (PyObject*) str_obj); +#endif + return out; + } + + PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, pos)); + Py_INCREF(sep_obj); + PyTuple_SET_ITEM(out, 1, sep_obj); + pos += sep_len; + PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str + pos, str_len - pos)); + + if (PyErr_Occurred()) { + Py_DECREF(out); + return NULL; + } + + return out; +} + +#endif diff --git a/contrib/tools/python/src/Objects/stringlib/split.h b/contrib/tools/python/src/Objects/stringlib/split.h new file mode 100644 index 0000000000..60e77674f0 --- /dev/null +++ b/contrib/tools/python/src/Objects/stringlib/split.h @@ -0,0 +1,394 @@ +/* stringlib: split implementation */ + +#ifndef STRINGLIB_SPLIT_H +#define STRINGLIB_SPLIT_H + +#ifndef STRINGLIB_FASTSEARCH_H +#error must include "stringlib/fastsearch.h" before including this module +#endif + +/* Overallocate the initial list to reduce the number of reallocs for small + split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three + resizes, to sizes 4, 8, then 16. Most observed string splits are for human + text (roughly 11 words per line) and field delimited data (usually 1-10 + fields). For large strings the split algorithms are bandwidth limited + so increasing the preallocation likely will not improve things.*/ + +#define MAX_PREALLOC 12 + +/* 5 splits gives 6 elements */ +#define PREALLOC_SIZE(maxsplit) \ + (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1) + +#define SPLIT_APPEND(data, left, right) \ + sub = STRINGLIB_NEW((data) + (left), \ + (right) - (left)); \ + if (sub == NULL) \ + goto onError; \ + if (PyList_Append(list, sub)) { \ + Py_DECREF(sub); \ + goto onError; \ + } \ + else \ + Py_DECREF(sub); + +#define SPLIT_ADD(data, left, right) { \ + sub = STRINGLIB_NEW((data) + (left), \ + (right) - (left)); \ + if (sub == NULL) \ + goto onError; \ + if (count < MAX_PREALLOC) { \ + PyList_SET_ITEM(list, count, sub); \ + } else { \ + if (PyList_Append(list, sub)) { \ + Py_DECREF(sub); \ + goto onError; \ + } \ + else \ + Py_DECREF(sub); \ + } \ + count++; } + + +/* Always force the list to the expected size. */ +#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count + +Py_LOCAL_INLINE(PyObject *) +stringlib_split_whitespace(PyObject* str_obj, + const STRINGLIB_CHAR* str, Py_ssize_t str_len, + Py_ssize_t maxcount) +{ + Py_ssize_t i, j, count=0; + PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); + PyObject *sub; + + if (list == NULL) + return NULL; + + i = j = 0; + while (maxcount-- > 0) { + while (i < str_len && STRINGLIB_ISSPACE(str[i])) + i++; + if (i == str_len) break; + j = i; i++; + while (i < str_len && !STRINGLIB_ISSPACE(str[i])) + i++; +#ifndef STRINGLIB_MUTABLE + if (j == 0 && i == str_len && STRINGLIB_CHECK_EXACT(str_obj)) { + /* No whitespace in str_obj, so just use it as list[0] */ + Py_INCREF(str_obj); + PyList_SET_ITEM(list, 0, (PyObject *)str_obj); + count++; + break; + } +#endif + SPLIT_ADD(str, j, i); + } + + if (i < str_len) { + /* Only occurs when maxcount was reached */ + /* Skip any remaining whitespace and copy to end of string */ + while (i < str_len && STRINGLIB_ISSPACE(str[i])) + i++; + if (i != str_len) + SPLIT_ADD(str, i, str_len); + } + FIX_PREALLOC_SIZE(list); + return list; + + onError: + Py_DECREF(list); + return NULL; +} + +Py_LOCAL_INLINE(PyObject *) +stringlib_split_char(PyObject* str_obj, + const STRINGLIB_CHAR* str, Py_ssize_t str_len, + const STRINGLIB_CHAR ch, + Py_ssize_t maxcount) +{ + Py_ssize_t i, j, count=0; + PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); + PyObject *sub; + + if (list == NULL) + return NULL; + + i = j = 0; + while ((j < str_len) && (maxcount-- > 0)) { + for(; j < str_len; j++) { + /* I found that using memchr makes no difference */ + if (str[j] == ch) { + SPLIT_ADD(str, i, j); + i = j = j + 1; + break; + } + } + } +#ifndef STRINGLIB_MUTABLE + if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) { + /* ch not in str_obj, so just use str_obj as list[0] */ + Py_INCREF(str_obj); + PyList_SET_ITEM(list, 0, (PyObject *)str_obj); + count++; + } else +#endif + if (i <= str_len) { + SPLIT_ADD(str, i, str_len); + } + FIX_PREALLOC_SIZE(list); + return list; + + onError: + Py_DECREF(list); + return NULL; +} + +Py_LOCAL_INLINE(PyObject *) +stringlib_split(PyObject* str_obj, + const STRINGLIB_CHAR* str, Py_ssize_t str_len, + const STRINGLIB_CHAR* sep, Py_ssize_t sep_len, + Py_ssize_t maxcount) +{ + Py_ssize_t i, j, pos, count=0; + PyObject *list, *sub; + + if (sep_len == 0) { + PyErr_SetString(PyExc_ValueError, "empty separator"); + return NULL; + } + else if (sep_len == 1) + return stringlib_split_char(str_obj, str, str_len, sep[0], maxcount); + + list = PyList_New(PREALLOC_SIZE(maxcount)); + if (list == NULL) + return NULL; + + i = j = 0; + while (maxcount-- > 0) { + pos = fastsearch(str+i, str_len-i, sep, sep_len, -1, FAST_SEARCH); + if (pos < 0) + break; + j = i + pos; + SPLIT_ADD(str, i, j); + i = j + sep_len; + } +#ifndef STRINGLIB_MUTABLE + if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) { + /* No match in str_obj, so just use it as list[0] */ + Py_INCREF(str_obj); + PyList_SET_ITEM(list, 0, (PyObject *)str_obj); + count++; + } else +#endif + { + SPLIT_ADD(str, i, str_len); + } + FIX_PREALLOC_SIZE(list); + return list; + + onError: + Py_DECREF(list); + return NULL; +} + +Py_LOCAL_INLINE(PyObject *) +stringlib_rsplit_whitespace(PyObject* str_obj, + const STRINGLIB_CHAR* str, Py_ssize_t str_len, + Py_ssize_t maxcount) +{ + Py_ssize_t i, j, count=0; + PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); + PyObject *sub; + + if (list == NULL) + return NULL; + + i = j = str_len - 1; + while (maxcount-- > 0) { + while (i >= 0 && STRINGLIB_ISSPACE(str[i])) + i--; + if (i < 0) break; + j = i; i--; + while (i >= 0 && !STRINGLIB_ISSPACE(str[i])) + i--; +#ifndef STRINGLIB_MUTABLE + if (j == str_len - 1 && i < 0 && STRINGLIB_CHECK_EXACT(str_obj)) { + /* No whitespace in str_obj, so just use it as list[0] */ + Py_INCREF(str_obj); + PyList_SET_ITEM(list, 0, (PyObject *)str_obj); + count++; + break; + } +#endif + SPLIT_ADD(str, i + 1, j + 1); + } + + if (i >= 0) { + /* Only occurs when maxcount was reached */ + /* Skip any remaining whitespace and copy to beginning of string */ + while (i >= 0 && STRINGLIB_ISSPACE(str[i])) + i--; + if (i >= 0) + SPLIT_ADD(str, 0, i + 1); + } + FIX_PREALLOC_SIZE(list); + if (PyList_Reverse(list) < 0) + goto onError; + return list; + + onError: + Py_DECREF(list); + return NULL; +} + +Py_LOCAL_INLINE(PyObject *) +stringlib_rsplit_char(PyObject* str_obj, + const STRINGLIB_CHAR* str, Py_ssize_t str_len, + const STRINGLIB_CHAR ch, + Py_ssize_t maxcount) +{ + Py_ssize_t i, j, count=0; + PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); + PyObject *sub; + + if (list == NULL) + return NULL; + + i = j = str_len - 1; + while ((i >= 0) && (maxcount-- > 0)) { + for(; i >= 0; i--) { + if (str[i] == ch) { + SPLIT_ADD(str, i + 1, j + 1); + j = i = i - 1; + break; + } + } + } +#ifndef STRINGLIB_MUTABLE + if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) { + /* ch not in str_obj, so just use str_obj as list[0] */ + Py_INCREF(str_obj); + PyList_SET_ITEM(list, 0, (PyObject *)str_obj); + count++; + } else +#endif + if (j >= -1) { + SPLIT_ADD(str, 0, j + 1); + } + FIX_PREALLOC_SIZE(list); + if (PyList_Reverse(list) < 0) + goto onError; + return list; + + onError: + Py_DECREF(list); + return NULL; +} + +Py_LOCAL_INLINE(PyObject *) +stringlib_rsplit(PyObject* str_obj, + const STRINGLIB_CHAR* str, Py_ssize_t str_len, + const STRINGLIB_CHAR* sep, Py_ssize_t sep_len, + Py_ssize_t maxcount) +{ + Py_ssize_t j, pos, count=0; + PyObject *list, *sub; + + if (sep_len == 0) { + PyErr_SetString(PyExc_ValueError, "empty separator"); + return NULL; + } + else if (sep_len == 1) + return stringlib_rsplit_char(str_obj, str, str_len, sep[0], maxcount); + + list = PyList_New(PREALLOC_SIZE(maxcount)); + if (list == NULL) + return NULL; + + j = str_len; + while (maxcount-- > 0) { + pos = fastsearch(str, j, sep, sep_len, -1, FAST_RSEARCH); + if (pos < 0) + break; + SPLIT_ADD(str, pos + sep_len, j); + j = pos; + } +#ifndef STRINGLIB_MUTABLE + if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) { + /* No match in str_obj, so just use it as list[0] */ + Py_INCREF(str_obj); + PyList_SET_ITEM(list, 0, (PyObject *)str_obj); + count++; + } else +#endif + { + SPLIT_ADD(str, 0, j); + } + FIX_PREALLOC_SIZE(list); + if (PyList_Reverse(list) < 0) + goto onError; + return list; + + onError: + Py_DECREF(list); + return NULL; +} + +Py_LOCAL_INLINE(PyObject *) +stringlib_splitlines(PyObject* str_obj, + const STRINGLIB_CHAR* str, Py_ssize_t str_len, + int keepends) +{ + /* This does not use the preallocated list because splitlines is + usually run with hundreds of newlines. The overhead of + switching between PyList_SET_ITEM and append causes about a + 2-3% slowdown for that common case. A smarter implementation + could move the if check out, so the SET_ITEMs are done first + and the appends only done when the prealloc buffer is full. + That's too much work for little gain.*/ + + register Py_ssize_t i; + register Py_ssize_t j; + PyObject *list = PyList_New(0); + PyObject *sub; + + if (list == NULL) + return NULL; + + for (i = j = 0; i < str_len; ) { + Py_ssize_t eol; + + /* Find a line and append it */ + while (i < str_len && !STRINGLIB_ISLINEBREAK(str[i])) + i++; + + /* Skip the line break reading CRLF as one line break */ + eol = i; + if (i < str_len) { + if (str[i] == '\r' && i + 1 < str_len && str[i+1] == '\n') + i += 2; + else + i++; + if (keepends) + eol = i; + } +#ifndef STRINGLIB_MUTABLE + if (j == 0 && eol == str_len && STRINGLIB_CHECK_EXACT(str_obj)) { + /* No linebreak in str_obj, so just use it as list[0] */ + if (PyList_Append(list, str_obj)) + goto onError; + break; + } +#endif + SPLIT_APPEND(str, j, eol); + j = i; + } + return list; + + onError: + Py_DECREF(list); + return NULL; +} + +#endif diff --git a/contrib/tools/python/src/Objects/stringlib/string_format.h b/contrib/tools/python/src/Objects/stringlib/string_format.h new file mode 100644 index 0000000000..2bd1839d72 --- /dev/null +++ b/contrib/tools/python/src/Objects/stringlib/string_format.h @@ -0,0 +1,1361 @@ +/* + string_format.h -- implementation of string.format(). + + It uses the Objects/stringlib conventions, so that it can be + compiled for both unicode and string objects. +*/ + + +/* Defines for Python 2.6 compatibility */ +#if PY_VERSION_HEX < 0x03000000 +#define PyLong_FromSsize_t _PyLong_FromSsize_t +#endif + +/* Defines for more efficiently reallocating the string buffer */ +#define INITIAL_SIZE_INCREMENT 100 +#define SIZE_MULTIPLIER 2 +#define MAX_SIZE_INCREMENT 3200 + + +/************************************************************************/ +/*********** Global data structures and forward declarations *********/ +/************************************************************************/ + +/* + A SubString consists of the characters between two string or + unicode pointers. +*/ +typedef struct { + STRINGLIB_CHAR *ptr; + STRINGLIB_CHAR *end; +} SubString; + + +typedef enum { + ANS_INIT, + ANS_AUTO, + ANS_MANUAL +} AutoNumberState; /* Keep track if we're auto-numbering fields */ + +/* Keeps track of our auto-numbering state, and which number field we're on */ +typedef struct { + AutoNumberState an_state; + int an_field_number; +} AutoNumber; + + +/* forward declaration for recursion */ +static PyObject * +build_string(SubString *input, PyObject *args, PyObject *kwargs, + int recursion_depth, AutoNumber *auto_number); + + + +/************************************************************************/ +/************************** Utility functions ************************/ +/************************************************************************/ + +static void +AutoNumber_Init(AutoNumber *auto_number) +{ + auto_number->an_state = ANS_INIT; + auto_number->an_field_number = 0; +} + +/* fill in a SubString from a pointer and length */ +Py_LOCAL_INLINE(void) +SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len) +{ + str->ptr = p; + if (p == NULL) + str->end = NULL; + else + str->end = str->ptr + len; +} + +/* return a new string. if str->ptr is NULL, return None */ +Py_LOCAL_INLINE(PyObject *) +SubString_new_object(SubString *str) +{ + if (str->ptr == NULL) { + Py_INCREF(Py_None); + return Py_None; + } + return STRINGLIB_NEW(str->ptr, str->end - str->ptr); +} + +/* return a new string. if str->ptr is NULL, return None */ +Py_LOCAL_INLINE(PyObject *) +SubString_new_object_or_empty(SubString *str) +{ + if (str->ptr == NULL) { + return STRINGLIB_NEW(NULL, 0); + } + return STRINGLIB_NEW(str->ptr, str->end - str->ptr); +} + +/* Return 1 if an error has been detected switching between automatic + field numbering and manual field specification, else return 0. Set + ValueError on error. */ +static int +autonumber_state_error(AutoNumberState state, int field_name_is_empty) +{ + if (state == ANS_MANUAL) { + if (field_name_is_empty) { + PyErr_SetString(PyExc_ValueError, "cannot switch from " + "manual field specification to " + "automatic field numbering"); + return 1; + } + } + else { + if (!field_name_is_empty) { + PyErr_SetString(PyExc_ValueError, "cannot switch from " + "automatic field numbering to " + "manual field specification"); + return 1; + } + } + return 0; +} + + +/************************************************************************/ +/*********** Output string management functions ****************/ +/************************************************************************/ + +typedef struct { + STRINGLIB_CHAR *ptr; + STRINGLIB_CHAR *end; + PyObject *obj; + Py_ssize_t size_increment; +} OutputString; + +/* initialize an OutputString object, reserving size characters */ +static int +output_initialize(OutputString *output, Py_ssize_t size) +{ + output->obj = STRINGLIB_NEW(NULL, size); + if (output->obj == NULL) + return 0; + + output->ptr = STRINGLIB_STR(output->obj); + output->end = STRINGLIB_LEN(output->obj) + output->ptr; + output->size_increment = INITIAL_SIZE_INCREMENT; + + return 1; +} + +/* + output_extend reallocates the output string buffer. + It returns a status: 0 for a failed reallocation, + 1 for success. +*/ + +static int +output_extend(OutputString *output, Py_ssize_t count) +{ + STRINGLIB_CHAR *startptr = STRINGLIB_STR(output->obj); + Py_ssize_t curlen = output->ptr - startptr; + Py_ssize_t maxlen = curlen + count + output->size_increment; + + if (STRINGLIB_RESIZE(&output->obj, maxlen) < 0) + return 0; + startptr = STRINGLIB_STR(output->obj); + output->ptr = startptr + curlen; + output->end = startptr + maxlen; + if (output->size_increment < MAX_SIZE_INCREMENT) + output->size_increment *= SIZE_MULTIPLIER; + return 1; +} + +/* + output_data dumps characters into our output string + buffer. + + In some cases, it has to reallocate the string. + + It returns a status: 0 for a failed reallocation, + 1 for success. +*/ +static int +output_data(OutputString *output, const STRINGLIB_CHAR *s, Py_ssize_t count) +{ + if ((count > output->end - output->ptr) && !output_extend(output, count)) + return 0; + memcpy(output->ptr, s, count * sizeof(STRINGLIB_CHAR)); + output->ptr += count; + return 1; +} + +/************************************************************************/ +/*********** Format string parsing -- integers and identifiers *********/ +/************************************************************************/ + +static Py_ssize_t +get_integer(const SubString *str) +{ + Py_ssize_t accumulator = 0; + Py_ssize_t digitval; + STRINGLIB_CHAR *p; + + /* empty string is an error */ + if (str->ptr >= str->end) + return -1; + + for (p = str->ptr; p < str->end; p++) { + digitval = STRINGLIB_TODECIMAL(*p); + if (digitval < 0) + return -1; + /* + Detect possible overflow before it happens: + + accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if + accumulator > (PY_SSIZE_T_MAX - digitval) / 10. + */ + if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) { + PyErr_Format(PyExc_ValueError, + "Too many decimal digits in format string"); + return -1; + } + accumulator = accumulator * 10 + digitval; + } + return accumulator; +} + +/************************************************************************/ +/******** Functions to get field objects and specification strings ******/ +/************************************************************************/ + +/* do the equivalent of obj.name */ +static PyObject * +getattr(PyObject *obj, SubString *name) +{ + PyObject *newobj; + PyObject *str = SubString_new_object(name); + if (str == NULL) + return NULL; + newobj = PyObject_GetAttr(obj, str); + Py_DECREF(str); + return newobj; +} + +/* do the equivalent of obj[idx], where obj is a sequence */ +static PyObject * +getitem_sequence(PyObject *obj, Py_ssize_t idx) +{ + return PySequence_GetItem(obj, idx); +} + +/* do the equivalent of obj[idx], where obj is not a sequence */ +static PyObject * +getitem_idx(PyObject *obj, Py_ssize_t idx) +{ + PyObject *newobj; + PyObject *idx_obj = PyLong_FromSsize_t(idx); + if (idx_obj == NULL) + return NULL; + newobj = PyObject_GetItem(obj, idx_obj); + Py_DECREF(idx_obj); + return newobj; +} + +/* do the equivalent of obj[name] */ +static PyObject * +getitem_str(PyObject *obj, SubString *name) +{ + PyObject *newobj; + PyObject *str = SubString_new_object(name); + if (str == NULL) + return NULL; + newobj = PyObject_GetItem(obj, str); + Py_DECREF(str); + return newobj; +} + +typedef struct { + /* the entire string we're parsing. we assume that someone else + is managing its lifetime, and that it will exist for the + lifetime of the iterator. can be empty */ + SubString str; + + /* pointer to where we are inside field_name */ + STRINGLIB_CHAR *ptr; +} FieldNameIterator; + + +static int +FieldNameIterator_init(FieldNameIterator *self, STRINGLIB_CHAR *ptr, + Py_ssize_t len) +{ + SubString_init(&self->str, ptr, len); + self->ptr = self->str.ptr; + return 1; +} + +static int +_FieldNameIterator_attr(FieldNameIterator *self, SubString *name) +{ + STRINGLIB_CHAR c; + + name->ptr = self->ptr; + + /* return everything until '.' or '[' */ + while (self->ptr < self->str.end) { + switch (c = *self->ptr++) { + case '[': + case '.': + /* backup so that we this character will be seen next time */ + self->ptr--; + break; + default: + continue; + } + break; + } + /* end of string is okay */ + name->end = self->ptr; + return 1; +} + +static int +_FieldNameIterator_item(FieldNameIterator *self, SubString *name) +{ + int bracket_seen = 0; + STRINGLIB_CHAR c; + + name->ptr = self->ptr; + + /* return everything until ']' */ + while (self->ptr < self->str.end) { + switch (c = *self->ptr++) { + case ']': + bracket_seen = 1; + break; + default: + continue; + } + break; + } + /* make sure we ended with a ']' */ + if (!bracket_seen) { + PyErr_SetString(PyExc_ValueError, "Missing ']' in format string"); + return 0; + } + + /* end of string is okay */ + /* don't include the ']' */ + name->end = self->ptr-1; + return 1; +} + +/* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */ +static int +FieldNameIterator_next(FieldNameIterator *self, int *is_attribute, + Py_ssize_t *name_idx, SubString *name) +{ + /* check at end of input */ + if (self->ptr >= self->str.end) + return 1; + + switch (*self->ptr++) { + case '.': + *is_attribute = 1; + if (_FieldNameIterator_attr(self, name) == 0) + return 0; + *name_idx = -1; + break; + case '[': + *is_attribute = 0; + if (_FieldNameIterator_item(self, name) == 0) + return 0; + *name_idx = get_integer(name); + if (*name_idx == -1 && PyErr_Occurred()) + return 0; + break; + default: + /* Invalid character follows ']' */ + PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may " + "follow ']' in format field specifier"); + return 0; + } + + /* empty string is an error */ + if (name->ptr == name->end) { + PyErr_SetString(PyExc_ValueError, "Empty attribute in format string"); + return 0; + } + + return 2; +} + + +/* input: field_name + output: 'first' points to the part before the first '[' or '.' + 'first_idx' is -1 if 'first' is not an integer, otherwise + it's the value of first converted to an integer + 'rest' is an iterator to return the rest +*/ +static int +field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first, + Py_ssize_t *first_idx, FieldNameIterator *rest, + AutoNumber *auto_number) +{ + STRINGLIB_CHAR c; + STRINGLIB_CHAR *p = ptr; + STRINGLIB_CHAR *end = ptr + len; + int field_name_is_empty; + int using_numeric_index; + + /* find the part up until the first '.' or '[' */ + while (p < end) { + switch (c = *p++) { + case '[': + case '.': + /* backup so that we this character is available to the + "rest" iterator */ + p--; + break; + default: + continue; + } + break; + } + + /* set up the return values */ + SubString_init(first, ptr, p - ptr); + FieldNameIterator_init(rest, p, end - p); + + /* see if "first" is an integer, in which case it's used as an index */ + *first_idx = get_integer(first); + if (*first_idx == -1 && PyErr_Occurred()) + return 0; + + field_name_is_empty = first->ptr >= first->end; + + /* If the field name is omitted or if we have a numeric index + specified, then we're doing numeric indexing into args. */ + using_numeric_index = field_name_is_empty || *first_idx != -1; + + /* We always get here exactly one time for each field we're + processing. And we get here in field order (counting by left + braces). So this is the perfect place to handle automatic field + numbering if the field name is omitted. */ + + /* Check if we need to do the auto-numbering. It's not needed if + we're called from string.Format routines, because it's handled + in that class by itself. */ + if (auto_number) { + /* Initialize our auto numbering state if this is the first + time we're either auto-numbering or manually numbering. */ + if (auto_number->an_state == ANS_INIT && using_numeric_index) + auto_number->an_state = field_name_is_empty ? + ANS_AUTO : ANS_MANUAL; + + /* Make sure our state is consistent with what we're doing + this time through. Only check if we're using a numeric + index. */ + if (using_numeric_index) + if (autonumber_state_error(auto_number->an_state, + field_name_is_empty)) + return 0; + /* Zero length field means we want to do auto-numbering of the + fields. */ + if (field_name_is_empty) + *first_idx = (auto_number->an_field_number)++; + } + + return 1; +} + + +/* + get_field_object returns the object inside {}, before the + format_spec. It handles getindex and getattr lookups and consumes + the entire input string. +*/ +static PyObject * +get_field_object(SubString *input, PyObject *args, PyObject *kwargs, + AutoNumber *auto_number) +{ + PyObject *obj = NULL; + int ok; + int is_attribute; + SubString name; + SubString first; + Py_ssize_t index; + FieldNameIterator rest; + + if (!field_name_split(input->ptr, input->end - input->ptr, &first, + &index, &rest, auto_number)) { + goto error; + } + + if (index == -1) { + /* look up in kwargs */ + PyObject *key = SubString_new_object(&first); + if (key == NULL) + goto error; + if ((kwargs == NULL) || (obj = PyDict_GetItem(kwargs, key)) == NULL) { + PyErr_SetObject(PyExc_KeyError, key); + Py_DECREF(key); + goto error; + } + Py_DECREF(key); + Py_INCREF(obj); + } + else { + /* look up in args */ + obj = PySequence_GetItem(args, index); + if (obj == NULL) + goto error; + } + + /* iterate over the rest of the field_name */ + while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index, + &name)) == 2) { + PyObject *tmp; + + if (is_attribute) + /* getattr lookup "." */ + tmp = getattr(obj, &name); + else + /* getitem lookup "[]" */ + if (index == -1) + tmp = getitem_str(obj, &name); + else + if (PySequence_Check(obj)) + tmp = getitem_sequence(obj, index); + else + /* not a sequence */ + tmp = getitem_idx(obj, index); + if (tmp == NULL) + goto error; + + /* assign to obj */ + Py_DECREF(obj); + obj = tmp; + } + /* end of iterator, this is the non-error case */ + if (ok == 1) + return obj; +error: + Py_XDECREF(obj); + return NULL; +} + +/************************************************************************/ +/***************** Field rendering functions **************************/ +/************************************************************************/ + +/* + render_field() is the main function in this section. It takes the + field object and field specification string generated by + get_field_and_spec, and renders the field into the output string. + + render_field calls fieldobj.__format__(format_spec) method, and + appends to the output. +*/ +static int +render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output) +{ + int ok = 0; + PyObject *result = NULL; + PyObject *format_spec_object = NULL; + PyObject *(*formatter)(PyObject *, STRINGLIB_CHAR *, Py_ssize_t) = NULL; + STRINGLIB_CHAR* format_spec_start = format_spec->ptr ? + format_spec->ptr : NULL; + Py_ssize_t format_spec_len = format_spec->ptr ? + format_spec->end - format_spec->ptr : 0; + + /* If we know the type exactly, skip the lookup of __format__ and just + call the formatter directly. */ +#if STRINGLIB_IS_UNICODE + if (PyUnicode_CheckExact(fieldobj)) + formatter = _PyUnicode_FormatAdvanced; + /* Unfortunately, there's a problem with checking for int, long, + and float here. If we're being included as unicode, their + formatters expect string format_spec args. For now, just skip + this optimization for unicode. This could be fixed, but it's a + hassle. */ +#else + if (PyString_CheckExact(fieldobj)) + formatter = _PyBytes_FormatAdvanced; + else if (PyInt_CheckExact(fieldobj)) + formatter =_PyInt_FormatAdvanced; + else if (PyLong_CheckExact(fieldobj)) + formatter =_PyLong_FormatAdvanced; + else if (PyFloat_CheckExact(fieldobj)) + formatter = _PyFloat_FormatAdvanced; +#endif + + if (formatter) { + /* we know exactly which formatter will be called when __format__ is + looked up, so call it directly, instead. */ + result = formatter(fieldobj, format_spec_start, format_spec_len); + } + else { + /* We need to create an object out of the pointers we have, because + __format__ takes a string/unicode object for format_spec. */ + format_spec_object = STRINGLIB_NEW(format_spec_start, + format_spec_len); + if (format_spec_object == NULL) + goto done; + + result = PyObject_Format(fieldobj, format_spec_object); + } + if (result == NULL) + goto done; + +#if PY_VERSION_HEX >= 0x03000000 + assert(PyUnicode_Check(result)); +#else + assert(PyString_Check(result) || PyUnicode_Check(result)); + + /* Convert result to our type. We could be str, and result could + be unicode */ + { + PyObject *tmp = STRINGLIB_TOSTR(result); + if (tmp == NULL) + goto done; + Py_DECREF(result); + result = tmp; + } +#endif + + ok = output_data(output, + STRINGLIB_STR(result), STRINGLIB_LEN(result)); +done: + Py_XDECREF(format_spec_object); + Py_XDECREF(result); + return ok; +} + +static int +parse_field(SubString *str, SubString *field_name, SubString *format_spec, + STRINGLIB_CHAR *conversion) +{ + /* Note this function works if the field name is zero length, + which is good. Zero length field names are handled later, in + field_name_split. */ + + STRINGLIB_CHAR c = 0; + + /* initialize these, as they may be empty */ + *conversion = '\0'; + SubString_init(format_spec, NULL, 0); + + /* Search for the field name. it's terminated by the end of + the string, or a ':' or '!' */ + field_name->ptr = str->ptr; + while (str->ptr < str->end) { + switch (c = *(str->ptr++)) { + case ':': + case '!': + break; + default: + continue; + } + break; + } + + if (c == '!' || c == ':') { + /* we have a format specifier and/or a conversion */ + /* don't include the last character */ + field_name->end = str->ptr-1; + + /* the format specifier is the rest of the string */ + format_spec->ptr = str->ptr; + format_spec->end = str->end; + + /* see if there's a conversion specifier */ + if (c == '!') { + /* there must be another character present */ + if (format_spec->ptr >= format_spec->end) { + PyErr_SetString(PyExc_ValueError, + "end of format while looking for conversion " + "specifier"); + return 0; + } + *conversion = *(format_spec->ptr++); + + /* if there is another character, it must be a colon */ + if (format_spec->ptr < format_spec->end) { + c = *(format_spec->ptr++); + if (c != ':') { + PyErr_SetString(PyExc_ValueError, + "expected ':' after format specifier"); + return 0; + } + } + } + } + else + /* end of string, there's no format_spec or conversion */ + field_name->end = str->ptr; + + return 1; +} + +/************************************************************************/ +/******* Output string allocation and escape-to-markup processing ******/ +/************************************************************************/ + +/* MarkupIterator breaks the string into pieces of either literal + text, or things inside {} that need to be marked up. it is + designed to make it easy to wrap a Python iterator around it, for + use with the Formatter class */ + +typedef struct { + SubString str; +} MarkupIterator; + +static int +MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len) +{ + SubString_init(&self->str, ptr, len); + return 1; +} + +/* returns 0 on error, 1 on non-error termination, and 2 if it got a + string (or something to be expanded) */ +static int +MarkupIterator_next(MarkupIterator *self, SubString *literal, + int *field_present, SubString *field_name, + SubString *format_spec, STRINGLIB_CHAR *conversion, + int *format_spec_needs_expanding) +{ + int at_end; + STRINGLIB_CHAR c = 0; + STRINGLIB_CHAR *start; + int count; + Py_ssize_t len; + int markup_follows = 0; + + /* initialize all of the output variables */ + SubString_init(literal, NULL, 0); + SubString_init(field_name, NULL, 0); + SubString_init(format_spec, NULL, 0); + *conversion = '\0'; + *format_spec_needs_expanding = 0; + *field_present = 0; + + /* No more input, end of iterator. This is the normal exit + path. */ + if (self->str.ptr >= self->str.end) + return 1; + + start = self->str.ptr; + + /* First read any literal text. Read until the end of string, an + escaped '{' or '}', or an unescaped '{'. In order to never + allocate memory and so I can just pass pointers around, if + there's an escaped '{' or '}' then we'll return the literal + including the brace, but no format object. The next time + through, we'll return the rest of the literal, skipping past + the second consecutive brace. */ + while (self->str.ptr < self->str.end) { + switch (c = *(self->str.ptr++)) { + case '{': + case '}': + markup_follows = 1; + break; + default: + continue; + } + break; + } + + at_end = self->str.ptr >= self->str.end; + len = self->str.ptr - start; + + if ((c == '}') && (at_end || (c != *self->str.ptr))) { + PyErr_SetString(PyExc_ValueError, "Single '}' encountered " + "in format string"); + return 0; + } + if (at_end && c == '{') { + PyErr_SetString(PyExc_ValueError, "Single '{' encountered " + "in format string"); + return 0; + } + if (!at_end) { + if (c == *self->str.ptr) { + /* escaped } or {, skip it in the input. there is no + markup object following us, just this literal text */ + self->str.ptr++; + markup_follows = 0; + } + else + len--; + } + + /* record the literal text */ + literal->ptr = start; + literal->end = start + len; + + if (!markup_follows) + return 2; + + /* this is markup, find the end of the string by counting nested + braces. note that this prohibits escaped braces, so that + format_specs cannot have braces in them. */ + *field_present = 1; + count = 1; + + start = self->str.ptr; + + /* we know we can't have a zero length string, so don't worry + about that case */ + while (self->str.ptr < self->str.end) { + switch (c = *(self->str.ptr++)) { + case '{': + /* the format spec needs to be recursively expanded. + this is an optimization, and not strictly needed */ + *format_spec_needs_expanding = 1; + count++; + break; + case '}': + count--; + if (count <= 0) { + /* we're done. parse and get out */ + SubString s; + + SubString_init(&s, start, self->str.ptr - 1 - start); + if (parse_field(&s, field_name, format_spec, conversion) == 0) + return 0; + + /* success */ + return 2; + } + break; + } + } + + /* end of string while searching for matching '}' */ + PyErr_SetString(PyExc_ValueError, "unmatched '{' in format"); + return 0; +} + + +/* do the !r or !s conversion on obj */ +static PyObject * +do_conversion(PyObject *obj, STRINGLIB_CHAR conversion) +{ + /* XXX in pre-3.0, do we need to convert this to unicode, since it + might have returned a string? */ + switch (conversion) { + case 'r': + return PyObject_Repr(obj); + case 's': + return STRINGLIB_TOSTR(obj); + default: + if (conversion > 32 && conversion < 127) { + /* It's the ASCII subrange; casting to char is safe + (assuming the execution character set is an ASCII + superset). */ + PyErr_Format(PyExc_ValueError, + "Unknown conversion specifier %c", + (char)conversion); + } else + PyErr_Format(PyExc_ValueError, + "Unknown conversion specifier \\x%x", + (unsigned int)conversion); + return NULL; + } +} + +/* given: + + {field_name!conversion:format_spec} + + compute the result and write it to output. + format_spec_needs_expanding is an optimization. if it's false, + just output the string directly, otherwise recursively expand the + format_spec string. + + field_name is allowed to be zero length, in which case we + are doing auto field numbering. +*/ + +static int +output_markup(SubString *field_name, SubString *format_spec, + int format_spec_needs_expanding, STRINGLIB_CHAR conversion, + OutputString *output, PyObject *args, PyObject *kwargs, + int recursion_depth, AutoNumber *auto_number) +{ + PyObject *tmp = NULL; + PyObject *fieldobj = NULL; + SubString expanded_format_spec; + SubString *actual_format_spec; + int result = 0; + + /* convert field_name to an object */ + fieldobj = get_field_object(field_name, args, kwargs, auto_number); + if (fieldobj == NULL) + goto done; + + if (conversion != '\0') { + tmp = do_conversion(fieldobj, conversion); + if (tmp == NULL) + goto done; + + /* do the assignment, transferring ownership: fieldobj = tmp */ + Py_DECREF(fieldobj); + fieldobj = tmp; + tmp = NULL; + } + + /* if needed, recurively compute the format_spec */ + if (format_spec_needs_expanding) { + tmp = build_string(format_spec, args, kwargs, recursion_depth-1, + auto_number); + if (tmp == NULL) + goto done; + + /* note that in the case we're expanding the format string, + tmp must be kept around until after the call to + render_field. */ + SubString_init(&expanded_format_spec, + STRINGLIB_STR(tmp), STRINGLIB_LEN(tmp)); + actual_format_spec = &expanded_format_spec; + } + else + actual_format_spec = format_spec; + + if (render_field(fieldobj, actual_format_spec, output) == 0) + goto done; + + result = 1; + +done: + Py_XDECREF(fieldobj); + Py_XDECREF(tmp); + + return result; +} + +/* + do_markup is the top-level loop for the format() method. It + searches through the format string for escapes to markup codes, and + calls other functions to move non-markup text to the output, + and to perform the markup to the output. +*/ +static int +do_markup(SubString *input, PyObject *args, PyObject *kwargs, + OutputString *output, int recursion_depth, AutoNumber *auto_number) +{ + MarkupIterator iter; + int format_spec_needs_expanding; + int result; + int field_present; + SubString literal; + SubString field_name; + SubString format_spec; + STRINGLIB_CHAR conversion; + + MarkupIterator_init(&iter, input->ptr, input->end - input->ptr); + while ((result = MarkupIterator_next(&iter, &literal, &field_present, + &field_name, &format_spec, + &conversion, + &format_spec_needs_expanding)) == 2) { + if (!output_data(output, literal.ptr, literal.end - literal.ptr)) + return 0; + if (field_present) + if (!output_markup(&field_name, &format_spec, + format_spec_needs_expanding, conversion, output, + args, kwargs, recursion_depth, auto_number)) + return 0; + } + return result; +} + + +/* + build_string allocates the output string and then + calls do_markup to do the heavy lifting. +*/ +static PyObject * +build_string(SubString *input, PyObject *args, PyObject *kwargs, + int recursion_depth, AutoNumber *auto_number) +{ + OutputString output; + PyObject *result = NULL; + Py_ssize_t count; + + output.obj = NULL; /* needed so cleanup code always works */ + + /* check the recursion level */ + if (recursion_depth <= 0) { + PyErr_SetString(PyExc_ValueError, + "Max string recursion exceeded"); + goto done; + } + + /* initial size is the length of the format string, plus the size + increment. seems like a reasonable default */ + if (!output_initialize(&output, + input->end - input->ptr + + INITIAL_SIZE_INCREMENT)) + goto done; + + if (!do_markup(input, args, kwargs, &output, recursion_depth, + auto_number)) { + goto done; + } + + count = output.ptr - STRINGLIB_STR(output.obj); + if (STRINGLIB_RESIZE(&output.obj, count) < 0) { + goto done; + } + + /* transfer ownership to result */ + result = output.obj; + output.obj = NULL; + +done: + Py_XDECREF(output.obj); + return result; +} + +/************************************************************************/ +/*********** main routine ***********************************************/ +/************************************************************************/ + +/* this is the main entry point */ +static PyObject * +do_string_format(PyObject *self, PyObject *args, PyObject *kwargs) +{ + SubString input; + + /* PEP 3101 says only 2 levels, so that + "{0:{1}}".format('abc', 's') # works + "{0:{1:{2}}}".format('abc', 's', '') # fails + */ + int recursion_depth = 2; + + AutoNumber auto_number; + + AutoNumber_Init(&auto_number); + SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self)); + return build_string(&input, args, kwargs, recursion_depth, &auto_number); +} + + + +/************************************************************************/ +/*********** formatteriterator ******************************************/ +/************************************************************************/ + +/* This is used to implement string.Formatter.vparse(). It exists so + Formatter can share code with the built in unicode.format() method. + It's really just a wrapper around MarkupIterator that is callable + from Python. */ + +typedef struct { + PyObject_HEAD + + STRINGLIB_OBJECT *str; + + MarkupIterator it_markup; +} formatteriterobject; + +static void +formatteriter_dealloc(formatteriterobject *it) +{ + Py_XDECREF(it->str); + PyObject_FREE(it); +} + +/* returns a tuple: + (literal, field_name, format_spec, conversion) + + literal is any literal text to output. might be zero length + field_name is the string before the ':'. might be None + format_spec is the string after the ':'. mibht be None + conversion is either None, or the string after the '!' +*/ +static PyObject * +formatteriter_next(formatteriterobject *it) +{ + SubString literal; + SubString field_name; + SubString format_spec; + STRINGLIB_CHAR conversion; + int format_spec_needs_expanding; + int field_present; + int result = MarkupIterator_next(&it->it_markup, &literal, &field_present, + &field_name, &format_spec, &conversion, + &format_spec_needs_expanding); + + /* all of the SubString objects point into it->str, so no + memory management needs to be done on them */ + assert(0 <= result && result <= 2); + if (result == 0 || result == 1) + /* if 0, error has already been set, if 1, iterator is empty */ + return NULL; + else { + PyObject *literal_str = NULL; + PyObject *field_name_str = NULL; + PyObject *format_spec_str = NULL; + PyObject *conversion_str = NULL; + PyObject *tuple = NULL; + + literal_str = SubString_new_object(&literal); + if (literal_str == NULL) + goto done; + + field_name_str = SubString_new_object(&field_name); + if (field_name_str == NULL) + goto done; + + /* if field_name is non-zero length, return a string for + format_spec (even if zero length), else return None */ + format_spec_str = (field_present ? + SubString_new_object_or_empty : + SubString_new_object)(&format_spec); + if (format_spec_str == NULL) + goto done; + + /* if the conversion is not specified, return a None, + otherwise create a one length string with the conversion + character */ + if (conversion == '\0') { + conversion_str = Py_None; + Py_INCREF(conversion_str); + } + else + conversion_str = STRINGLIB_NEW(&conversion, 1); + if (conversion_str == NULL) + goto done; + + tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str, + conversion_str); + done: + Py_XDECREF(literal_str); + Py_XDECREF(field_name_str); + Py_XDECREF(format_spec_str); + Py_XDECREF(conversion_str); + return tuple; + } +} + +static PyMethodDef formatteriter_methods[] = { + {NULL, NULL} /* sentinel */ +}; + +static PyTypeObject PyFormatterIter_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "formatteriterator", /* tp_name */ + sizeof(formatteriterobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)formatteriter_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + PyObject_SelfIter, /* tp_iter */ + (iternextfunc)formatteriter_next, /* tp_iternext */ + formatteriter_methods, /* tp_methods */ + 0, +}; + +/* unicode_formatter_parser is used to implement + string.Formatter.vformat. it parses a string and returns tuples + describing the parsed elements. It's a wrapper around + stringlib/string_format.h's MarkupIterator */ +static PyObject * +formatter_parser(STRINGLIB_OBJECT *self) +{ + formatteriterobject *it; + + it = PyObject_New(formatteriterobject, &PyFormatterIter_Type); + if (it == NULL) + return NULL; + + /* take ownership, give the object to the iterator */ + Py_INCREF(self); + it->str = self; + + /* initialize the contained MarkupIterator */ + MarkupIterator_init(&it->it_markup, + STRINGLIB_STR(self), + STRINGLIB_LEN(self)); + + return (PyObject *)it; +} + + +/************************************************************************/ +/*********** fieldnameiterator ******************************************/ +/************************************************************************/ + + +/* This is used to implement string.Formatter.vparse(). It parses the + field name into attribute and item values. It's a Python-callable + wrapper around FieldNameIterator */ + +typedef struct { + PyObject_HEAD + + STRINGLIB_OBJECT *str; + + FieldNameIterator it_field; +} fieldnameiterobject; + +static void +fieldnameiter_dealloc(fieldnameiterobject *it) +{ + Py_XDECREF(it->str); + PyObject_FREE(it); +} + +/* returns a tuple: + (is_attr, value) + is_attr is true if we used attribute syntax (e.g., '.foo') + false if we used index syntax (e.g., '[foo]') + value is an integer or string +*/ +static PyObject * +fieldnameiter_next(fieldnameiterobject *it) +{ + int result; + int is_attr; + Py_ssize_t idx; + SubString name; + + result = FieldNameIterator_next(&it->it_field, &is_attr, + &idx, &name); + if (result == 0 || result == 1) + /* if 0, error has already been set, if 1, iterator is empty */ + return NULL; + else { + PyObject* result = NULL; + PyObject* is_attr_obj = NULL; + PyObject* obj = NULL; + + is_attr_obj = PyBool_FromLong(is_attr); + if (is_attr_obj == NULL) + goto done; + + /* either an integer or a string */ + if (idx != -1) + obj = PyLong_FromSsize_t(idx); + else + obj = SubString_new_object(&name); + if (obj == NULL) + goto done; + + /* return a tuple of values */ + result = PyTuple_Pack(2, is_attr_obj, obj); + + done: + Py_XDECREF(is_attr_obj); + Py_XDECREF(obj); + return result; + } +} + +static PyMethodDef fieldnameiter_methods[] = { + {NULL, NULL} /* sentinel */ +}; + +static PyTypeObject PyFieldNameIter_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "fieldnameiterator", /* tp_name */ + sizeof(fieldnameiterobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)fieldnameiter_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + PyObject_SelfIter, /* tp_iter */ + (iternextfunc)fieldnameiter_next, /* tp_iternext */ + fieldnameiter_methods, /* tp_methods */ + 0}; + +/* unicode_formatter_field_name_split is used to implement + string.Formatter.vformat. it takes a PEP 3101 "field name", and + returns a tuple of (first, rest): "first", the part before the + first '.' or '['; and "rest", an iterator for the rest of the field + name. it's a wrapper around stringlib/string_format.h's + field_name_split. The iterator it returns is a + FieldNameIterator */ +static PyObject * +formatter_field_name_split(STRINGLIB_OBJECT *self) +{ + SubString first; + Py_ssize_t first_idx; + fieldnameiterobject *it; + + PyObject *first_obj = NULL; + PyObject *result = NULL; + + it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type); + if (it == NULL) + return NULL; + + /* take ownership, give the object to the iterator. this is + just to keep the field_name alive */ + Py_INCREF(self); + it->str = self; + + /* Pass in auto_number = NULL. We'll return an empty string for + first_obj in that case. */ + if (!field_name_split(STRINGLIB_STR(self), + STRINGLIB_LEN(self), + &first, &first_idx, &it->it_field, NULL)) + goto done; + + /* first becomes an integer, if possible; else a string */ + if (first_idx != -1) + first_obj = PyLong_FromSsize_t(first_idx); + else + /* convert "first" into a string object */ + first_obj = SubString_new_object(&first); + if (first_obj == NULL) + goto done; + + /* return a tuple of values */ + result = PyTuple_Pack(2, first_obj, it); + +done: + Py_XDECREF(it); + Py_XDECREF(first_obj); + return result; +} diff --git a/contrib/tools/python/src/Objects/stringlib/stringdefs.h b/contrib/tools/python/src/Objects/stringlib/stringdefs.h new file mode 100644 index 0000000000..84e461628e --- /dev/null +++ b/contrib/tools/python/src/Objects/stringlib/stringdefs.h @@ -0,0 +1,33 @@ +#ifndef STRINGLIB_STRINGDEFS_H +#define STRINGLIB_STRINGDEFS_H + +/* this is sort of a hack. there's at least one place (formatting + floats) where some stringlib code takes a different path if it's + compiled as unicode. */ +#define STRINGLIB_IS_UNICODE 0 + +#define STRINGLIB_OBJECT PyStringObject +#define STRINGLIB_CHAR char +#define STRINGLIB_TYPE_NAME "string" +#define STRINGLIB_PARSE_CODE "S" +#define STRINGLIB_EMPTY nullstring +#define STRINGLIB_ISSPACE Py_ISSPACE +#define STRINGLIB_ISLINEBREAK(x) ((x == '\n') || (x == '\r')) +#define STRINGLIB_ISDECIMAL(x) ((x >= '0') && (x <= '9')) +#define STRINGLIB_TODECIMAL(x) (STRINGLIB_ISDECIMAL(x) ? (x - '0') : -1) +#define STRINGLIB_TOUPPER Py_TOUPPER +#define STRINGLIB_TOLOWER Py_TOLOWER +#define STRINGLIB_FILL memset +#define STRINGLIB_STR PyString_AS_STRING +#define STRINGLIB_LEN PyString_GET_SIZE +#define STRINGLIB_NEW PyString_FromStringAndSize +#define STRINGLIB_RESIZE _PyString_Resize +#define STRINGLIB_CHECK PyString_Check +#define STRINGLIB_CHECK_EXACT PyString_CheckExact +#define STRINGLIB_TOSTR PyObject_Str +#define STRINGLIB_GROUPING _PyString_InsertThousandsGrouping +#define STRINGLIB_GROUPING_LOCALE _PyString_InsertThousandsGroupingLocale + +#define STRINGLIB_WANT_CONTAINS_OBJ 1 + +#endif /* !STRINGLIB_STRINGDEFS_H */ diff --git a/contrib/tools/python/src/Objects/stringlib/transmogrify.h b/contrib/tools/python/src/Objects/stringlib/transmogrify.h new file mode 100644 index 0000000000..be595a62ef --- /dev/null +++ b/contrib/tools/python/src/Objects/stringlib/transmogrify.h @@ -0,0 +1,264 @@ +/* NOTE: this API is -ONLY- for use with single byte character strings. */ +/* Do not use it with Unicode. */ + +/* the more complicated methods. parts of these should be pulled out into the + shared code in bytes_methods.c to cut down on duplicate code bloat. */ + +PyDoc_STRVAR(expandtabs__doc__, +"B.expandtabs([tabsize]) -> copy of B\n\ +\n\ +Return a copy of B where all tab characters are expanded using spaces.\n\ +If tabsize is not given, a tab size of 8 characters is assumed."); + +static PyObject* +stringlib_expandtabs(PyObject *self, PyObject *args) +{ + const char *e, *p; + char *q; + Py_ssize_t i, j; + PyObject *u; + int tabsize = 8; + + if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize)) + return NULL; + + /* First pass: determine size of output string */ + i = j = 0; + e = STRINGLIB_STR(self) + STRINGLIB_LEN(self); + for (p = STRINGLIB_STR(self); p < e; p++) { + if (*p == '\t') { + if (tabsize > 0) { + Py_ssize_t incr = tabsize - (j % tabsize); + if (j > PY_SSIZE_T_MAX - incr) + goto overflow; + j += incr; + } + } + else { + if (j > PY_SSIZE_T_MAX - 1) + goto overflow; + j++; + if (*p == '\n' || *p == '\r') { + if (i > PY_SSIZE_T_MAX - j) + goto overflow; + i += j; + j = 0; + } + } + } + + if (i > PY_SSIZE_T_MAX - j) + goto overflow; + + /* Second pass: create output string and fill it */ + u = STRINGLIB_NEW(NULL, i + j); + if (!u) + return NULL; + + j = 0; + q = STRINGLIB_STR(u); + + for (p = STRINGLIB_STR(self); p < e; p++) { + if (*p == '\t') { + if (tabsize > 0) { + i = tabsize - (j % tabsize); + j += i; + while (i--) + *q++ = ' '; + } + } + else { + j++; + *q++ = *p; + if (*p == '\n' || *p == '\r') + j = 0; + } + } + + return u; + overflow: + PyErr_SetString(PyExc_OverflowError, "result too long"); + return NULL; +} + +Py_LOCAL_INLINE(PyObject *) +pad(PyObject *self, Py_ssize_t left, Py_ssize_t right, char fill) +{ + PyObject *u; + + if (left < 0) + left = 0; + if (right < 0) + right = 0; + + if (left == 0 && right == 0 && STRINGLIB_CHECK_EXACT(self)) { +#if STRINGLIB_MUTABLE + /* We're defined as returning a copy; If the object is mutable + * that means we must make an identical copy. */ + return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +#else + Py_INCREF(self); + return (PyObject *)self; +#endif /* STRINGLIB_MUTABLE */ + } + + u = STRINGLIB_NEW(NULL, + left + STRINGLIB_LEN(self) + right); + if (u) { + if (left) + memset(STRINGLIB_STR(u), fill, left); + Py_MEMCPY(STRINGLIB_STR(u) + left, + STRINGLIB_STR(self), + STRINGLIB_LEN(self)); + if (right) + memset(STRINGLIB_STR(u) + left + STRINGLIB_LEN(self), + fill, right); + } + + return u; +} + +PyDoc_STRVAR(ljust__doc__, +"B.ljust(width[, fillchar]) -> copy of B\n" +"\n" +"Return B left justified in a string of length width. Padding is\n" +"done using the specified fill character (default is a space)."); + +static PyObject * +stringlib_ljust(PyObject *self, PyObject *args) +{ + Py_ssize_t width; + char fillchar = ' '; + + if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar)) + return NULL; + + if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) { +#if STRINGLIB_MUTABLE + /* We're defined as returning a copy; If the object is mutable + * that means we must make an identical copy. */ + return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +#else + Py_INCREF(self); + return (PyObject*) self; +#endif + } + + return pad(self, 0, width - STRINGLIB_LEN(self), fillchar); +} + + +PyDoc_STRVAR(rjust__doc__, +"B.rjust(width[, fillchar]) -> copy of B\n" +"\n" +"Return B right justified in a string of length width. Padding is\n" +"done using the specified fill character (default is a space)"); + +static PyObject * +stringlib_rjust(PyObject *self, PyObject *args) +{ + Py_ssize_t width; + char fillchar = ' '; + + if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar)) + return NULL; + + if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) { +#if STRINGLIB_MUTABLE + /* We're defined as returning a copy; If the object is mutable + * that means we must make an identical copy. */ + return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +#else + Py_INCREF(self); + return (PyObject*) self; +#endif + } + + return pad(self, width - STRINGLIB_LEN(self), 0, fillchar); +} + + +PyDoc_STRVAR(center__doc__, +"B.center(width[, fillchar]) -> copy of B\n" +"\n" +"Return B centered in a string of length width. Padding is\n" +"done using the specified fill character (default is a space)."); + +static PyObject * +stringlib_center(PyObject *self, PyObject *args) +{ + Py_ssize_t marg, left; + Py_ssize_t width; + char fillchar = ' '; + + if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar)) + return NULL; + + if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) { +#if STRINGLIB_MUTABLE + /* We're defined as returning a copy; If the object is mutable + * that means we must make an identical copy. */ + return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +#else + Py_INCREF(self); + return (PyObject*) self; +#endif + } + + marg = width - STRINGLIB_LEN(self); + left = marg / 2 + (marg & width & 1); + + return pad(self, left, marg - left, fillchar); +} + +PyDoc_STRVAR(zfill__doc__, +"B.zfill(width) -> copy of B\n" +"\n" +"Pad a numeric string B with zeros on the left, to fill a field\n" +"of the specified width. B is never truncated."); + +static PyObject * +stringlib_zfill(PyObject *self, PyObject *args) +{ + Py_ssize_t fill; + PyObject *s; + char *p; + Py_ssize_t width; + + if (!PyArg_ParseTuple(args, "n:zfill", &width)) + return NULL; + + if (STRINGLIB_LEN(self) >= width) { + if (STRINGLIB_CHECK_EXACT(self)) { +#if STRINGLIB_MUTABLE + /* We're defined as returning a copy; If the object is mutable + * that means we must make an identical copy. */ + return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +#else + Py_INCREF(self); + return (PyObject*) self; +#endif + } + else + return STRINGLIB_NEW( + STRINGLIB_STR(self), + STRINGLIB_LEN(self) + ); + } + + fill = width - STRINGLIB_LEN(self); + + s = pad(self, fill, 0, '0'); + + if (s == NULL) + return NULL; + + p = STRINGLIB_STR(s); + if (p[fill] == '+' || p[fill] == '-') { + /* move sign to beginning of string */ + p[0] = p[fill]; + p[fill] = '0'; + } + + return (PyObject*) s; +} diff --git a/contrib/tools/python/src/Objects/stringlib/unicodedefs.h b/contrib/tools/python/src/Objects/stringlib/unicodedefs.h new file mode 100644 index 0000000000..dd814f6c90 --- /dev/null +++ b/contrib/tools/python/src/Objects/stringlib/unicodedefs.h @@ -0,0 +1,37 @@ +#ifndef STRINGLIB_UNICODEDEFS_H +#define STRINGLIB_UNICODEDEFS_H + +/* this is sort of a hack. there's at least one place (formatting + floats) where some stringlib code takes a different path if it's + compiled as unicode. */ +#define STRINGLIB_IS_UNICODE 1 + +#define STRINGLIB_OBJECT PyUnicodeObject +#define STRINGLIB_CHAR Py_UNICODE +#define STRINGLIB_TYPE_NAME "unicode" +#define STRINGLIB_PARSE_CODE "U" +#define STRINGLIB_EMPTY unicode_empty +#define STRINGLIB_ISSPACE Py_UNICODE_ISSPACE +#define STRINGLIB_ISLINEBREAK BLOOM_LINEBREAK +#define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL +#define STRINGLIB_TODECIMAL Py_UNICODE_TODECIMAL +#define STRINGLIB_TOUPPER Py_UNICODE_TOUPPER +#define STRINGLIB_TOLOWER Py_UNICODE_TOLOWER +#define STRINGLIB_FILL Py_UNICODE_FILL +#define STRINGLIB_STR PyUnicode_AS_UNICODE +#define STRINGLIB_LEN PyUnicode_GET_SIZE +#define STRINGLIB_NEW PyUnicode_FromUnicode +#define STRINGLIB_RESIZE PyUnicode_Resize +#define STRINGLIB_CHECK PyUnicode_Check +#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact +#define STRINGLIB_GROUPING _PyUnicode_InsertThousandsGrouping + +#if PY_VERSION_HEX < 0x03000000 +#define STRINGLIB_TOSTR PyObject_Unicode +#else +#define STRINGLIB_TOSTR PyObject_Str +#endif + +#define STRINGLIB_WANT_CONTAINS_OBJ 1 + +#endif /* !STRINGLIB_UNICODEDEFS_H */ |