diff options
| author | AlexSm <[email protected]> | 2024-03-05 10:40:59 +0100 |
|---|---|---|
| committer | GitHub <[email protected]> | 2024-03-05 12:40:59 +0300 |
| commit | 1ac13c847b5358faba44dbb638a828e24369467b (patch) | |
| tree | 07672b4dd3604ad3dee540a02c6494cb7d10dc3d /contrib/tools/python3/src/Objects/stringlib/split.h | |
| parent | ffcca3e7f7958ddc6487b91d3df8c01054bd0638 (diff) | |
Library import 16 (#2433)
Co-authored-by: robot-piglet <[email protected]>
Co-authored-by: deshevoy <[email protected]>
Co-authored-by: robot-contrib <[email protected]>
Co-authored-by: thegeorg <[email protected]>
Co-authored-by: robot-ya-builder <[email protected]>
Co-authored-by: svidyuk <[email protected]>
Co-authored-by: shadchin <[email protected]>
Co-authored-by: robot-ratatosk <[email protected]>
Co-authored-by: innokentii <[email protected]>
Co-authored-by: arkady-e1ppa <[email protected]>
Co-authored-by: snermolaev <[email protected]>
Co-authored-by: dimdim11 <[email protected]>
Co-authored-by: kickbutt <[email protected]>
Co-authored-by: abdullinsaid <[email protected]>
Co-authored-by: korsunandrei <[email protected]>
Co-authored-by: petrk <[email protected]>
Co-authored-by: miroslav2 <[email protected]>
Co-authored-by: serjflint <[email protected]>
Co-authored-by: akhropov <[email protected]>
Co-authored-by: prettyboy <[email protected]>
Co-authored-by: ilikepugs <[email protected]>
Co-authored-by: hiddenpath <[email protected]>
Co-authored-by: mikhnenko <[email protected]>
Co-authored-by: spreis <[email protected]>
Co-authored-by: andreyshspb <[email protected]>
Co-authored-by: dimaandreev <[email protected]>
Co-authored-by: rashid <[email protected]>
Co-authored-by: robot-ydb-importer <[email protected]>
Co-authored-by: r-vetrov <[email protected]>
Co-authored-by: ypodlesov <[email protected]>
Co-authored-by: zaverden <[email protected]>
Co-authored-by: vpozdyayev <[email protected]>
Co-authored-by: robot-cozmo <[email protected]>
Co-authored-by: v-korovin <[email protected]>
Co-authored-by: arikon <[email protected]>
Co-authored-by: khoden <[email protected]>
Co-authored-by: psydmm <[email protected]>
Co-authored-by: robot-javacom <[email protected]>
Co-authored-by: dtorilov <[email protected]>
Co-authored-by: sennikovmv <[email protected]>
Co-authored-by: hcpp <[email protected]>
Diffstat (limited to 'contrib/tools/python3/src/Objects/stringlib/split.h')
| -rw-r--r-- | contrib/tools/python3/src/Objects/stringlib/split.h | 390 |
1 files changed, 0 insertions, 390 deletions
diff --git a/contrib/tools/python3/src/Objects/stringlib/split.h b/contrib/tools/python3/src/Objects/stringlib/split.h deleted file mode 100644 index 0c11b7214e9..00000000000 --- a/contrib/tools/python3/src/Objects/stringlib/split.h +++ /dev/null @@ -1,390 +0,0 @@ -/* stringlib: split implementation */ - -#ifndef STRINGLIB_FASTSEARCH_H -#error must include "stringlib/fastsearch.h" before including this module -#endif - -/* Overallocate the initial list to reduce the number of reallocs for small - split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three - resizes, to sizes 4, 8, then 16. Most observed string splits are for human - text (roughly 11 words per line) and field delimited data (usually 1-10 - fields). For large strings the split algorithms are bandwidth limited - so increasing the preallocation likely will not improve things.*/ - -#define MAX_PREALLOC 12 - -/* 5 splits gives 6 elements */ -#define PREALLOC_SIZE(maxsplit) \ - (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1) - -#define SPLIT_APPEND(data, left, right) \ - sub = STRINGLIB_NEW((data) + (left), \ - (right) - (left)); \ - if (sub == NULL) \ - goto onError; \ - if (PyList_Append(list, sub)) { \ - Py_DECREF(sub); \ - goto onError; \ - } \ - else \ - Py_DECREF(sub); - -#define SPLIT_ADD(data, left, right) { \ - sub = STRINGLIB_NEW((data) + (left), \ - (right) - (left)); \ - if (sub == NULL) \ - goto onError; \ - if (count < MAX_PREALLOC) { \ - PyList_SET_ITEM(list, count, sub); \ - } else { \ - if (PyList_Append(list, sub)) { \ - Py_DECREF(sub); \ - goto onError; \ - } \ - else \ - Py_DECREF(sub); \ - } \ - count++; } - - -/* Always force the list to the expected size. */ -#define FIX_PREALLOC_SIZE(list) Py_SET_SIZE(list, count) - -Py_LOCAL_INLINE(PyObject *) -STRINGLIB(split_whitespace)(PyObject* str_obj, - const STRINGLIB_CHAR* str, Py_ssize_t str_len, - Py_ssize_t maxcount) -{ - Py_ssize_t i, j, count=0; - PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); - PyObject *sub; - - if (list == NULL) - return NULL; - - i = j = 0; - while (maxcount-- > 0) { - while (i < str_len && STRINGLIB_ISSPACE(str[i])) - i++; - if (i == str_len) break; - j = i; i++; - while (i < str_len && !STRINGLIB_ISSPACE(str[i])) - i++; -#if !STRINGLIB_MUTABLE - if (j == 0 && i == str_len && STRINGLIB_CHECK_EXACT(str_obj)) { - /* No whitespace in str_obj, so just use it as list[0] */ - Py_INCREF(str_obj); - PyList_SET_ITEM(list, 0, (PyObject *)str_obj); - count++; - break; - } -#endif - SPLIT_ADD(str, j, i); - } - - if (i < str_len) { - /* Only occurs when maxcount was reached */ - /* Skip any remaining whitespace and copy to end of string */ - while (i < str_len && STRINGLIB_ISSPACE(str[i])) - i++; - if (i != str_len) - SPLIT_ADD(str, i, str_len); - } - FIX_PREALLOC_SIZE(list); - return list; - - onError: - Py_DECREF(list); - return NULL; -} - -Py_LOCAL_INLINE(PyObject *) -STRINGLIB(split_char)(PyObject* str_obj, - const STRINGLIB_CHAR* str, Py_ssize_t str_len, - const STRINGLIB_CHAR ch, - Py_ssize_t maxcount) -{ - Py_ssize_t i, j, count=0; - PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); - PyObject *sub; - - if (list == NULL) - return NULL; - - i = j = 0; - while ((j < str_len) && (maxcount-- > 0)) { - for(; j < str_len; j++) { - /* I found that using memchr makes no difference */ - if (str[j] == ch) { - SPLIT_ADD(str, i, j); - i = j = j + 1; - break; - } - } - } -#if !STRINGLIB_MUTABLE - if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) { - /* ch not in str_obj, so just use str_obj as list[0] */ - Py_INCREF(str_obj); - PyList_SET_ITEM(list, 0, (PyObject *)str_obj); - count++; - } else -#endif - if (i <= str_len) { - SPLIT_ADD(str, i, str_len); - } - FIX_PREALLOC_SIZE(list); - return list; - - onError: - Py_DECREF(list); - return NULL; -} - -Py_LOCAL_INLINE(PyObject *) -STRINGLIB(split)(PyObject* str_obj, - const STRINGLIB_CHAR* str, Py_ssize_t str_len, - const STRINGLIB_CHAR* sep, Py_ssize_t sep_len, - Py_ssize_t maxcount) -{ - Py_ssize_t i, j, pos, count=0; - PyObject *list, *sub; - - if (sep_len == 0) { - PyErr_SetString(PyExc_ValueError, "empty separator"); - return NULL; - } - else if (sep_len == 1) - return STRINGLIB(split_char)(str_obj, str, str_len, sep[0], maxcount); - - list = PyList_New(PREALLOC_SIZE(maxcount)); - if (list == NULL) - return NULL; - - i = j = 0; - while (maxcount-- > 0) { - pos = FASTSEARCH(str+i, str_len-i, sep, sep_len, -1, FAST_SEARCH); - if (pos < 0) - break; - j = i + pos; - SPLIT_ADD(str, i, j); - i = j + sep_len; - } -#if !STRINGLIB_MUTABLE - if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) { - /* No match in str_obj, so just use it as list[0] */ - Py_INCREF(str_obj); - PyList_SET_ITEM(list, 0, (PyObject *)str_obj); - count++; - } else -#endif - { - SPLIT_ADD(str, i, str_len); - } - FIX_PREALLOC_SIZE(list); - return list; - - onError: - Py_DECREF(list); - return NULL; -} - -Py_LOCAL_INLINE(PyObject *) -STRINGLIB(rsplit_whitespace)(PyObject* str_obj, - const STRINGLIB_CHAR* str, Py_ssize_t str_len, - Py_ssize_t maxcount) -{ - Py_ssize_t i, j, count=0; - PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); - PyObject *sub; - - if (list == NULL) - return NULL; - - i = j = str_len - 1; - while (maxcount-- > 0) { - while (i >= 0 && STRINGLIB_ISSPACE(str[i])) - i--; - if (i < 0) break; - j = i; i--; - while (i >= 0 && !STRINGLIB_ISSPACE(str[i])) - i--; -#if !STRINGLIB_MUTABLE - if (j == str_len - 1 && i < 0 && STRINGLIB_CHECK_EXACT(str_obj)) { - /* No whitespace in str_obj, so just use it as list[0] */ - Py_INCREF(str_obj); - PyList_SET_ITEM(list, 0, (PyObject *)str_obj); - count++; - break; - } -#endif - SPLIT_ADD(str, i + 1, j + 1); - } - - if (i >= 0) { - /* Only occurs when maxcount was reached */ - /* Skip any remaining whitespace and copy to beginning of string */ - while (i >= 0 && STRINGLIB_ISSPACE(str[i])) - i--; - if (i >= 0) - SPLIT_ADD(str, 0, i + 1); - } - FIX_PREALLOC_SIZE(list); - if (PyList_Reverse(list) < 0) - goto onError; - return list; - - onError: - Py_DECREF(list); - return NULL; -} - -Py_LOCAL_INLINE(PyObject *) -STRINGLIB(rsplit_char)(PyObject* str_obj, - const STRINGLIB_CHAR* str, Py_ssize_t str_len, - const STRINGLIB_CHAR ch, - Py_ssize_t maxcount) -{ - Py_ssize_t i, j, count=0; - PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); - PyObject *sub; - - if (list == NULL) - return NULL; - - i = j = str_len - 1; - while ((i >= 0) && (maxcount-- > 0)) { - for(; i >= 0; i--) { - if (str[i] == ch) { - SPLIT_ADD(str, i + 1, j + 1); - j = i = i - 1; - break; - } - } - } -#if !STRINGLIB_MUTABLE - if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) { - /* ch not in str_obj, so just use str_obj as list[0] */ - Py_INCREF(str_obj); - PyList_SET_ITEM(list, 0, (PyObject *)str_obj); - count++; - } else -#endif - if (j >= -1) { - SPLIT_ADD(str, 0, j + 1); - } - FIX_PREALLOC_SIZE(list); - if (PyList_Reverse(list) < 0) - goto onError; - return list; - - onError: - Py_DECREF(list); - return NULL; -} - -Py_LOCAL_INLINE(PyObject *) -STRINGLIB(rsplit)(PyObject* str_obj, - const STRINGLIB_CHAR* str, Py_ssize_t str_len, - const STRINGLIB_CHAR* sep, Py_ssize_t sep_len, - Py_ssize_t maxcount) -{ - Py_ssize_t j, pos, count=0; - PyObject *list, *sub; - - if (sep_len == 0) { - PyErr_SetString(PyExc_ValueError, "empty separator"); - return NULL; - } - else if (sep_len == 1) - return STRINGLIB(rsplit_char)(str_obj, str, str_len, sep[0], maxcount); - - list = PyList_New(PREALLOC_SIZE(maxcount)); - if (list == NULL) - return NULL; - - j = str_len; - while (maxcount-- > 0) { - pos = FASTSEARCH(str, j, sep, sep_len, -1, FAST_RSEARCH); - if (pos < 0) - break; - SPLIT_ADD(str, pos + sep_len, j); - j = pos; - } -#if !STRINGLIB_MUTABLE - if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) { - /* No match in str_obj, so just use it as list[0] */ - Py_INCREF(str_obj); - PyList_SET_ITEM(list, 0, (PyObject *)str_obj); - count++; - } else -#endif - { - SPLIT_ADD(str, 0, j); - } - FIX_PREALLOC_SIZE(list); - if (PyList_Reverse(list) < 0) - goto onError; - return list; - - onError: - Py_DECREF(list); - return NULL; -} - -Py_LOCAL_INLINE(PyObject *) -STRINGLIB(splitlines)(PyObject* str_obj, - const STRINGLIB_CHAR* str, Py_ssize_t str_len, - int keepends) -{ - /* This does not use the preallocated list because splitlines is - usually run with hundreds of newlines. The overhead of - switching between PyList_SET_ITEM and append causes about a - 2-3% slowdown for that common case. A smarter implementation - could move the if check out, so the SET_ITEMs are done first - and the appends only done when the prealloc buffer is full. - That's too much work for little gain.*/ - - Py_ssize_t i; - Py_ssize_t j; - PyObject *list = PyList_New(0); - PyObject *sub; - - if (list == NULL) - return NULL; - - for (i = j = 0; i < str_len; ) { - Py_ssize_t eol; - - /* Find a line and append it */ - while (i < str_len && !STRINGLIB_ISLINEBREAK(str[i])) - i++; - - /* Skip the line break reading CRLF as one line break */ - eol = i; - if (i < str_len) { - if (str[i] == '\r' && i + 1 < str_len && str[i+1] == '\n') - i += 2; - else - i++; - if (keepends) - eol = i; - } -#if !STRINGLIB_MUTABLE - if (j == 0 && eol == str_len && STRINGLIB_CHECK_EXACT(str_obj)) { - /* No linebreak in str_obj, so just use it as list[0] */ - if (PyList_Append(list, str_obj)) - goto onError; - break; - } -#endif - SPLIT_APPEND(str, j, eol); - j = i; - } - return list; - - onError: - Py_DECREF(list); - return NULL; -} - |
