aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/tools/python3/src/Objects/stringlib/join.h
diff options
context:
space:
mode:
authornkozlovskiy <nmk@ydb.tech>2023-09-29 12:24:06 +0300
committernkozlovskiy <nmk@ydb.tech>2023-09-29 12:41:34 +0300
commite0e3e1717e3d33762ce61950504f9637a6e669ed (patch)
treebca3ff6939b10ed60c3d5c12439963a1146b9711 /contrib/tools/python3/src/Objects/stringlib/join.h
parent38f2c5852db84c7b4d83adfcb009eb61541d1ccd (diff)
downloadydb-e0e3e1717e3d33762ce61950504f9637a6e669ed.tar.gz
add ydb deps
Diffstat (limited to 'contrib/tools/python3/src/Objects/stringlib/join.h')
-rw-r--r--contrib/tools/python3/src/Objects/stringlib/join.h163
1 files changed, 163 insertions, 0 deletions
diff --git a/contrib/tools/python3/src/Objects/stringlib/join.h b/contrib/tools/python3/src/Objects/stringlib/join.h
new file mode 100644
index 0000000000..bb011f7db7
--- /dev/null
+++ b/contrib/tools/python3/src/Objects/stringlib/join.h
@@ -0,0 +1,163 @@
+/* stringlib: bytes joining implementation */
+
+#if STRINGLIB_IS_UNICODE
+#error join.h only compatible with byte-wise strings
+#endif
+
+Py_LOCAL_INLINE(PyObject *)
+STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable)
+{
+ const char *sepstr = STRINGLIB_STR(sep);
+ Py_ssize_t seplen = STRINGLIB_LEN(sep);
+ PyObject *res = NULL;
+ char *p;
+ Py_ssize_t seqlen = 0;
+ Py_ssize_t sz = 0;
+ Py_ssize_t i, nbufs;
+ PyObject *seq, *item;
+ Py_buffer *buffers = NULL;
+#define NB_STATIC_BUFFERS 10
+ Py_buffer static_buffers[NB_STATIC_BUFFERS];
+#define GIL_THRESHOLD 1048576
+ int drop_gil = 1;
+ PyThreadState *save = NULL;
+
+ seq = PySequence_Fast(iterable, "can only join an iterable");
+ if (seq == NULL) {
+ return NULL;
+ }
+
+ seqlen = PySequence_Fast_GET_SIZE(seq);
+ if (seqlen == 0) {
+ Py_DECREF(seq);
+ return STRINGLIB_NEW(NULL, 0);
+ }
+#if !STRINGLIB_MUTABLE
+ if (seqlen == 1) {
+ item = PySequence_Fast_GET_ITEM(seq, 0);
+ if (STRINGLIB_CHECK_EXACT(item)) {
+ Py_INCREF(item);
+ Py_DECREF(seq);
+ return item;
+ }
+ }
+#endif
+ if (seqlen > NB_STATIC_BUFFERS) {
+ buffers = PyMem_NEW(Py_buffer, seqlen);
+ if (buffers == NULL) {
+ Py_DECREF(seq);
+ PyErr_NoMemory();
+ return NULL;
+ }
+ }
+ else {
+ buffers = static_buffers;
+ }
+
+ /* Here is the general case. Do a pre-pass to figure out the total
+ * amount of space we'll need (sz), and see whether all arguments are
+ * bytes-like.
+ */
+ for (i = 0, nbufs = 0; i < seqlen; i++) {
+ Py_ssize_t itemlen;
+ item = PySequence_Fast_GET_ITEM(seq, i);
+ if (PyBytes_CheckExact(item)) {
+ /* Fast path. */
+ Py_INCREF(item);
+ buffers[i].obj = item;
+ buffers[i].buf = PyBytes_AS_STRING(item);
+ buffers[i].len = PyBytes_GET_SIZE(item);
+ }
+ else {
+ if (PyObject_GetBuffer(item, &buffers[i], PyBUF_SIMPLE) != 0) {
+ PyErr_Format(PyExc_TypeError,
+ "sequence item %zd: expected a bytes-like object, "
+ "%.80s found",
+ i, Py_TYPE(item)->tp_name);
+ goto error;
+ }
+ /* If the backing objects are mutable, then dropping the GIL
+ * opens up race conditions where another thread tries to modify
+ * the object which we hold a buffer on it. Such code has data
+ * races anyway, but this is a conservative approach that avoids
+ * changing the behaviour of that data race.
+ */
+ drop_gil = 0;
+ }
+ nbufs = i + 1; /* for error cleanup */
+ itemlen = buffers[i].len;
+ if (itemlen > PY_SSIZE_T_MAX - sz) {
+ PyErr_SetString(PyExc_OverflowError,
+ "join() result is too long");
+ goto error;
+ }
+ sz += itemlen;
+ if (i != 0) {
+ if (seplen > PY_SSIZE_T_MAX - sz) {
+ PyErr_SetString(PyExc_OverflowError,
+ "join() result is too long");
+ goto error;
+ }
+ sz += seplen;
+ }
+ if (seqlen != PySequence_Fast_GET_SIZE(seq)) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "sequence changed size during iteration");
+ goto error;
+ }
+ }
+
+ /* Allocate result space. */
+ res = STRINGLIB_NEW(NULL, sz);
+ if (res == NULL)
+ goto error;
+
+ /* Catenate everything. */
+ p = STRINGLIB_STR(res);
+ if (sz < GIL_THRESHOLD) {
+ drop_gil = 0; /* Benefits are likely outweighed by the overheads */
+ }
+ if (drop_gil) {
+ save = PyEval_SaveThread();
+ }
+ if (!seplen) {
+ /* fast path */
+ for (i = 0; i < nbufs; i++) {
+ Py_ssize_t n = buffers[i].len;
+ char *q = buffers[i].buf;
+ memcpy(p, q, n);
+ p += n;
+ }
+ }
+ else {
+ for (i = 0; i < nbufs; i++) {
+ Py_ssize_t n;
+ char *q;
+ if (i) {
+ memcpy(p, sepstr, seplen);
+ p += seplen;
+ }
+ n = buffers[i].len;
+ q = buffers[i].buf;
+ memcpy(p, q, n);
+ p += n;
+ }
+ }
+ if (drop_gil) {
+ PyEval_RestoreThread(save);
+ }
+ goto done;
+
+error:
+ res = NULL;
+done:
+ Py_DECREF(seq);
+ for (i = 0; i < nbufs; i++)
+ PyBuffer_Release(&buffers[i]);
+ if (buffers != static_buffers)
+ PyMem_Free(buffers);
+ return res;
+}
+
+#undef NB_STATIC_BUFFERS
+#undef GIL_THRESHOLD