Sync contrib/tools/python3 layout with upstream

* Move src/ subdir contents to the top of the layout * Rename self-written lib -> lib2 to avoid CaseFolding warning from the VCS * Regenerate contrib/libs/python proxy-headers accordingly 4ccc62ac1511abcf0fed14ccade38e984e088f1e
author: thegeorg <thegeorg@yandex-team.com> 2024-02-19 02:38:52 +0300
committer: thegeorg <thegeorg@yandex-team.com> 2024-02-19 02:50:43 +0300
commit: d96fa07134c06472bfee6718b5cfd1679196fc99 (patch)
tree: 31ec344fa9d3ff8dc038692516b6438dfbdb8a2d /contrib/tools/python3/Objects/stringlib/join.h
parent: 452cf9e068aef7110e35e654c5d47eb80111ef89 (diff)
download: ydb-d96fa07134c06472bfee6718b5cfd1679196fc99.tar.gz
1 files changed, 162 insertions, 0 deletions
diff --git a/contrib/tools/python3/Objects/stringlib/join.h b/contrib/tools/python3/Objects/stringlib/join.h
new file mode 100644
index 0000000000..de6bd83ffe
--- /dev/null
+++ b/contrib/tools/python3/Objects/stringlib/join.h
@@ -0,0 +1,162 @@
+/* stringlib: bytes joining implementation */
+
+#if STRINGLIB_IS_UNICODE
+#error join.h only compatible with byte-wise strings
+#endif
+
+Py_LOCAL_INLINE(PyObject *)
+STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable)
+{
+    const char *sepstr = STRINGLIB_STR(sep);
+    Py_ssize_t seplen = STRINGLIB_LEN(sep);
+    PyObject *res = NULL;
+    char *p;
+    Py_ssize_t seqlen = 0;
+    Py_ssize_t sz = 0;
+    Py_ssize_t i, nbufs;
+    PyObject *seq, *item;
+    Py_buffer *buffers = NULL;
+#define NB_STATIC_BUFFERS 10
+    Py_buffer static_buffers[NB_STATIC_BUFFERS];
+#define GIL_THRESHOLD 1048576
+    int drop_gil = 1;
+    PyThreadState *save = NULL;
+
+    seq = PySequence_Fast(iterable, "can only join an iterable");
+    if (seq == NULL) {
+        return NULL;
+    }
+
+    seqlen = PySequence_Fast_GET_SIZE(seq);
+    if (seqlen == 0) {
+        Py_DECREF(seq);
+        return STRINGLIB_NEW(NULL, 0);
+    }
+#if !STRINGLIB_MUTABLE
+    if (seqlen == 1) {
+        item = PySequence_Fast_GET_ITEM(seq, 0);
+        if (STRINGLIB_CHECK_EXACT(item)) {
+            Py_INCREF(item);
+            Py_DECREF(seq);
+            return item;
+        }
+    }
+#endif
+    if (seqlen > NB_STATIC_BUFFERS) {
+        buffers = PyMem_NEW(Py_buffer, seqlen);
+        if (buffers == NULL) {
+            Py_DECREF(seq);
+            PyErr_NoMemory();
+            return NULL;
+        }
+    }
+    else {
+        buffers = static_buffers;
+    }
+
+    /* Here is the general case.  Do a pre-pass to figure out the total
+     * amount of space we'll need (sz), and see whether all arguments are
+     * bytes-like.
+     */
+    for (i = 0, nbufs = 0; i < seqlen; i++) {
+        Py_ssize_t itemlen;
+        item = PySequence_Fast_GET_ITEM(seq, i);
+        if (PyBytes_CheckExact(item)) {
+            /* Fast path. */
+            buffers[i].obj = Py_NewRef(item);
+            buffers[i].buf = PyBytes_AS_STRING(item);
+            buffers[i].len = PyBytes_GET_SIZE(item);
+        }
+        else {
+            if (PyObject_GetBuffer(item, &buffers[i], PyBUF_SIMPLE) != 0) {
+                PyErr_Format(PyExc_TypeError,
+                             "sequence item %zd: expected a bytes-like object, "
+                             "%.80s found",
+                             i, Py_TYPE(item)->tp_name);
+                goto error;
+            }
+            /* If the backing objects are mutable, then dropping the GIL
+             * opens up race conditions where another thread tries to modify
+             * the object which we hold a buffer on it. Such code has data
+             * races anyway, but this is a conservative approach that avoids
+             * changing the behaviour of that data race.
+             */
+            drop_gil = 0;
+        }
+        nbufs = i + 1;  /* for error cleanup */
+        itemlen = buffers[i].len;
+        if (itemlen > PY_SSIZE_T_MAX - sz) {
+            PyErr_SetString(PyExc_OverflowError,
+                            "join() result is too long");
+            goto error;
+        }
+        sz += itemlen;
+        if (i != 0) {
+            if (seplen > PY_SSIZE_T_MAX - sz) {
+                PyErr_SetString(PyExc_OverflowError,
+                                "join() result is too long");
+                goto error;
+            }
+            sz += seplen;
+        }
+        if (seqlen != PySequence_Fast_GET_SIZE(seq)) {
+            PyErr_SetString(PyExc_RuntimeError,
+                            "sequence changed size during iteration");
+            goto error;
+        }
+    }
+
+    /* Allocate result space. */
+    res = STRINGLIB_NEW(NULL, sz);
+    if (res == NULL)
+        goto error;
+
+    /* Catenate everything. */
+    p = STRINGLIB_STR(res);
+    if (sz < GIL_THRESHOLD) {
+        drop_gil = 0;   /* Benefits are likely outweighed by the overheads */
+    }
+    if (drop_gil) {
+        save = PyEval_SaveThread();
+    }
+    if (!seplen) {
+        /* fast path */
+        for (i = 0; i < nbufs; i++) {
+            Py_ssize_t n = buffers[i].len;
+            char *q = buffers[i].buf;
+            memcpy(p, q, n);
+            p += n;
+        }
+    }
+    else {
+        for (i = 0; i < nbufs; i++) {
+            Py_ssize_t n;
+            char *q;
+            if (i) {
+                memcpy(p, sepstr, seplen);
+                p += seplen;
+            }
+            n = buffers[i].len;
+            q = buffers[i].buf;
+            memcpy(p, q, n);
+            p += n;
+        }
+    }
+    if (drop_gil) {
+        PyEval_RestoreThread(save);
+    }
+    goto done;
+
+error:
+    res = NULL;
+done:
+    Py_DECREF(seq);
+    for (i = 0; i < nbufs; i++)
+        PyBuffer_Release(&buffers[i]);
+    if (buffers != static_buffers)
+        PyMem_Free(buffers);
+    return res;
+}
+
+#undef NB_STATIC_BUFFERS
+#undef GIL_THRESHOLD
author	thegeorg <thegeorg@yandex-team.com>	2024-02-19 02:38:52 +0300
committer	thegeorg <thegeorg@yandex-team.com>	2024-02-19 02:50:43 +0300
commit	d96fa07134c06472bfee6718b5cfd1679196fc99 (patch)
tree	31ec344fa9d3ff8dc038692516b6438dfbdb8a2d /contrib/tools/python3/Objects/stringlib/join.h
parent	452cf9e068aef7110e35e654c5d47eb80111ef89 (diff)
download	ydb-d96fa07134c06472bfee6718b5cfd1679196fc99.tar.gz