diff options
author | thegeorg <thegeorg@yandex-team.com> | 2024-02-19 02:38:52 +0300 |
---|---|---|
committer | thegeorg <thegeorg@yandex-team.com> | 2024-02-19 02:50:43 +0300 |
commit | d96fa07134c06472bfee6718b5cfd1679196fc99 (patch) | |
tree | 31ec344fa9d3ff8dc038692516b6438dfbdb8a2d /contrib/tools/python3/Objects/stringlib/join.h | |
parent | 452cf9e068aef7110e35e654c5d47eb80111ef89 (diff) | |
download | ydb-d96fa07134c06472bfee6718b5cfd1679196fc99.tar.gz |
Sync contrib/tools/python3 layout with upstream
* Move src/ subdir contents to the top of the layout
* Rename self-written lib -> lib2 to avoid CaseFolding warning from the VCS
* Regenerate contrib/libs/python proxy-headers accordingly
4ccc62ac1511abcf0fed14ccade38e984e088f1e
Diffstat (limited to 'contrib/tools/python3/Objects/stringlib/join.h')
-rw-r--r-- | contrib/tools/python3/Objects/stringlib/join.h | 162 |
1 files changed, 162 insertions, 0 deletions
diff --git a/contrib/tools/python3/Objects/stringlib/join.h b/contrib/tools/python3/Objects/stringlib/join.h new file mode 100644 index 0000000000..de6bd83ffe --- /dev/null +++ b/contrib/tools/python3/Objects/stringlib/join.h @@ -0,0 +1,162 @@ +/* stringlib: bytes joining implementation */ + +#if STRINGLIB_IS_UNICODE +#error join.h only compatible with byte-wise strings +#endif + +Py_LOCAL_INLINE(PyObject *) +STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable) +{ + const char *sepstr = STRINGLIB_STR(sep); + Py_ssize_t seplen = STRINGLIB_LEN(sep); + PyObject *res = NULL; + char *p; + Py_ssize_t seqlen = 0; + Py_ssize_t sz = 0; + Py_ssize_t i, nbufs; + PyObject *seq, *item; + Py_buffer *buffers = NULL; +#define NB_STATIC_BUFFERS 10 + Py_buffer static_buffers[NB_STATIC_BUFFERS]; +#define GIL_THRESHOLD 1048576 + int drop_gil = 1; + PyThreadState *save = NULL; + + seq = PySequence_Fast(iterable, "can only join an iterable"); + if (seq == NULL) { + return NULL; + } + + seqlen = PySequence_Fast_GET_SIZE(seq); + if (seqlen == 0) { + Py_DECREF(seq); + return STRINGLIB_NEW(NULL, 0); + } +#if !STRINGLIB_MUTABLE + if (seqlen == 1) { + item = PySequence_Fast_GET_ITEM(seq, 0); + if (STRINGLIB_CHECK_EXACT(item)) { + Py_INCREF(item); + Py_DECREF(seq); + return item; + } + } +#endif + if (seqlen > NB_STATIC_BUFFERS) { + buffers = PyMem_NEW(Py_buffer, seqlen); + if (buffers == NULL) { + Py_DECREF(seq); + PyErr_NoMemory(); + return NULL; + } + } + else { + buffers = static_buffers; + } + + /* Here is the general case. Do a pre-pass to figure out the total + * amount of space we'll need (sz), and see whether all arguments are + * bytes-like. + */ + for (i = 0, nbufs = 0; i < seqlen; i++) { + Py_ssize_t itemlen; + item = PySequence_Fast_GET_ITEM(seq, i); + if (PyBytes_CheckExact(item)) { + /* Fast path. */ + buffers[i].obj = Py_NewRef(item); + buffers[i].buf = PyBytes_AS_STRING(item); + buffers[i].len = PyBytes_GET_SIZE(item); + } + else { + if (PyObject_GetBuffer(item, &buffers[i], PyBUF_SIMPLE) != 0) { + PyErr_Format(PyExc_TypeError, + "sequence item %zd: expected a bytes-like object, " + "%.80s found", + i, Py_TYPE(item)->tp_name); + goto error; + } + /* If the backing objects are mutable, then dropping the GIL + * opens up race conditions where another thread tries to modify + * the object which we hold a buffer on it. Such code has data + * races anyway, but this is a conservative approach that avoids + * changing the behaviour of that data race. + */ + drop_gil = 0; + } + nbufs = i + 1; /* for error cleanup */ + itemlen = buffers[i].len; + if (itemlen > PY_SSIZE_T_MAX - sz) { + PyErr_SetString(PyExc_OverflowError, + "join() result is too long"); + goto error; + } + sz += itemlen; + if (i != 0) { + if (seplen > PY_SSIZE_T_MAX - sz) { + PyErr_SetString(PyExc_OverflowError, + "join() result is too long"); + goto error; + } + sz += seplen; + } + if (seqlen != PySequence_Fast_GET_SIZE(seq)) { + PyErr_SetString(PyExc_RuntimeError, + "sequence changed size during iteration"); + goto error; + } + } + + /* Allocate result space. */ + res = STRINGLIB_NEW(NULL, sz); + if (res == NULL) + goto error; + + /* Catenate everything. */ + p = STRINGLIB_STR(res); + if (sz < GIL_THRESHOLD) { + drop_gil = 0; /* Benefits are likely outweighed by the overheads */ + } + if (drop_gil) { + save = PyEval_SaveThread(); + } + if (!seplen) { + /* fast path */ + for (i = 0; i < nbufs; i++) { + Py_ssize_t n = buffers[i].len; + char *q = buffers[i].buf; + memcpy(p, q, n); + p += n; + } + } + else { + for (i = 0; i < nbufs; i++) { + Py_ssize_t n; + char *q; + if (i) { + memcpy(p, sepstr, seplen); + p += seplen; + } + n = buffers[i].len; + q = buffers[i].buf; + memcpy(p, q, n); + p += n; + } + } + if (drop_gil) { + PyEval_RestoreThread(save); + } + goto done; + +error: + res = NULL; +done: + Py_DECREF(seq); + for (i = 0; i < nbufs; i++) + PyBuffer_Release(&buffers[i]); + if (buffers != static_buffers) + PyMem_Free(buffers); + return res; +} + +#undef NB_STATIC_BUFFERS +#undef GIL_THRESHOLD |