aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/tools/python3/Modules/_io/stringio.c
diff options
context:
space:
mode:
authorAlexSm <alex@ydb.tech>2024-03-05 10:40:59 +0100
committerGitHub <noreply@github.com>2024-03-05 12:40:59 +0300
commit1ac13c847b5358faba44dbb638a828e24369467b (patch)
tree07672b4dd3604ad3dee540a02c6494cb7d10dc3d /contrib/tools/python3/Modules/_io/stringio.c
parentffcca3e7f7958ddc6487b91d3df8c01054bd0638 (diff)
downloadydb-1ac13c847b5358faba44dbb638a828e24369467b.tar.gz
Library import 16 (#2433)
Co-authored-by: robot-piglet <robot-piglet@yandex-team.com> Co-authored-by: deshevoy <deshevoy@yandex-team.com> Co-authored-by: robot-contrib <robot-contrib@yandex-team.com> Co-authored-by: thegeorg <thegeorg@yandex-team.com> Co-authored-by: robot-ya-builder <robot-ya-builder@yandex-team.com> Co-authored-by: svidyuk <svidyuk@yandex-team.com> Co-authored-by: shadchin <shadchin@yandex-team.com> Co-authored-by: robot-ratatosk <robot-ratatosk@yandex-team.com> Co-authored-by: innokentii <innokentii@yandex-team.com> Co-authored-by: arkady-e1ppa <arkady-e1ppa@yandex-team.com> Co-authored-by: snermolaev <snermolaev@yandex-team.com> Co-authored-by: dimdim11 <dimdim11@yandex-team.com> Co-authored-by: kickbutt <kickbutt@yandex-team.com> Co-authored-by: abdullinsaid <abdullinsaid@yandex-team.com> Co-authored-by: korsunandrei <korsunandrei@yandex-team.com> Co-authored-by: petrk <petrk@yandex-team.com> Co-authored-by: miroslav2 <miroslav2@yandex-team.com> Co-authored-by: serjflint <serjflint@yandex-team.com> Co-authored-by: akhropov <akhropov@yandex-team.com> Co-authored-by: prettyboy <prettyboy@yandex-team.com> Co-authored-by: ilikepugs <ilikepugs@yandex-team.com> Co-authored-by: hiddenpath <hiddenpath@yandex-team.com> Co-authored-by: mikhnenko <mikhnenko@yandex-team.com> Co-authored-by: spreis <spreis@yandex-team.com> Co-authored-by: andreyshspb <andreyshspb@yandex-team.com> Co-authored-by: dimaandreev <dimaandreev@yandex-team.com> Co-authored-by: rashid <rashid@yandex-team.com> Co-authored-by: robot-ydb-importer <robot-ydb-importer@yandex-team.com> Co-authored-by: r-vetrov <r-vetrov@yandex-team.com> Co-authored-by: ypodlesov <ypodlesov@yandex-team.com> Co-authored-by: zaverden <zaverden@yandex-team.com> Co-authored-by: vpozdyayev <vpozdyayev@yandex-team.com> Co-authored-by: robot-cozmo <robot-cozmo@yandex-team.com> Co-authored-by: v-korovin <v-korovin@yandex-team.com> Co-authored-by: arikon <arikon@yandex-team.com> Co-authored-by: khoden <khoden@yandex-team.com> Co-authored-by: psydmm <psydmm@yandex-team.com> Co-authored-by: robot-javacom <robot-javacom@yandex-team.com> Co-authored-by: dtorilov <dtorilov@yandex-team.com> Co-authored-by: sennikovmv <sennikovmv@yandex-team.com> Co-authored-by: hcpp <hcpp@ydb.tech>
Diffstat (limited to 'contrib/tools/python3/Modules/_io/stringio.c')
-rw-r--r--contrib/tools/python3/Modules/_io/stringio.c1037
1 files changed, 1037 insertions, 0 deletions
diff --git a/contrib/tools/python3/Modules/_io/stringio.c b/contrib/tools/python3/Modules/_io/stringio.c
new file mode 100644
index 0000000000..3eb25704b4
--- /dev/null
+++ b/contrib/tools/python3/Modules/_io/stringio.c
@@ -0,0 +1,1037 @@
+#define PY_SSIZE_T_CLEAN
+#include "Python.h"
+#include <stddef.h> // offsetof()
+#include "pycore_object.h"
+#include "_iomodule.h"
+
+/* Implementation note: the buffer is always at least one character longer
+ than the enclosed string, for proper functioning of _PyIO_find_line_ending.
+*/
+
+#define STATE_REALIZED 1
+#define STATE_ACCUMULATING 2
+
+/*[clinic input]
+module _io
+class _io.StringIO "stringio *" "clinic_state()->PyStringIO_Type"
+[clinic start generated code]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2693eada0658d470]*/
+
+typedef struct {
+ PyObject_HEAD
+ Py_UCS4 *buf;
+ Py_ssize_t pos;
+ Py_ssize_t string_size;
+ size_t buf_size;
+
+ /* The stringio object can be in two states: accumulating or realized.
+ In accumulating state, the internal buffer contains nothing and
+ the contents are given by the embedded _PyUnicodeWriter structure.
+ In realized state, the internal buffer is meaningful and the
+ _PyUnicodeWriter is destroyed.
+ */
+ int state;
+ _PyUnicodeWriter writer;
+
+ char ok; /* initialized? */
+ char closed;
+ char readuniversal;
+ char readtranslate;
+ PyObject *decoder;
+ PyObject *readnl;
+ PyObject *writenl;
+
+ PyObject *dict;
+ PyObject *weakreflist;
+ _PyIO_State *module_state;
+} stringio;
+
+static int _io_StringIO___init__(PyObject *self, PyObject *args, PyObject *kwargs);
+
+#define CHECK_INITIALIZED(self) \
+ if (self->ok <= 0) { \
+ PyErr_SetString(PyExc_ValueError, \
+ "I/O operation on uninitialized object"); \
+ return NULL; \
+ }
+
+#define CHECK_CLOSED(self) \
+ if (self->closed) { \
+ PyErr_SetString(PyExc_ValueError, \
+ "I/O operation on closed file"); \
+ return NULL; \
+ }
+
+#define ENSURE_REALIZED(self) \
+ if (realize(self) < 0) { \
+ return NULL; \
+ }
+
+
+/* Internal routine for changing the size, in terms of characters, of the
+ buffer of StringIO objects. The caller should ensure that the 'size'
+ argument is non-negative. Returns 0 on success, -1 otherwise. */
+static int
+resize_buffer(stringio *self, size_t size)
+{
+ /* Here, unsigned types are used to avoid dealing with signed integer
+ overflow, which is undefined in C. */
+ size_t alloc = self->buf_size;
+ Py_UCS4 *new_buf = NULL;
+
+ assert(self->buf != NULL);
+
+ /* Reserve one more char for line ending detection. */
+ size = size + 1;
+ /* For simplicity, stay in the range of the signed type. Anyway, Python
+ doesn't allow strings to be longer than this. */
+ if (size > PY_SSIZE_T_MAX)
+ goto overflow;
+
+ if (size < alloc / 2) {
+ /* Major downsize; resize down to exact size. */
+ alloc = size + 1;
+ }
+ else if (size < alloc) {
+ /* Within allocated size; quick exit */
+ return 0;
+ }
+ else if (size <= alloc * 1.125) {
+ /* Moderate upsize; overallocate similar to list_resize() */
+ alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
+ }
+ else {
+ /* Major upsize; resize up to exact size */
+ alloc = size + 1;
+ }
+
+ if (alloc > PY_SIZE_MAX / sizeof(Py_UCS4))
+ goto overflow;
+ new_buf = (Py_UCS4 *)PyMem_Realloc(self->buf, alloc * sizeof(Py_UCS4));
+ if (new_buf == NULL) {
+ PyErr_NoMemory();
+ return -1;
+ }
+ self->buf_size = alloc;
+ self->buf = new_buf;
+
+ return 0;
+
+ overflow:
+ PyErr_SetString(PyExc_OverflowError,
+ "new buffer size too large");
+ return -1;
+}
+
+static PyObject *
+make_intermediate(stringio *self)
+{
+ PyObject *intermediate = _PyUnicodeWriter_Finish(&self->writer);
+ self->state = STATE_REALIZED;
+ if (intermediate == NULL)
+ return NULL;
+
+ _PyUnicodeWriter_Init(&self->writer);
+ self->writer.overallocate = 1;
+ if (_PyUnicodeWriter_WriteStr(&self->writer, intermediate)) {
+ Py_DECREF(intermediate);
+ return NULL;
+ }
+ self->state = STATE_ACCUMULATING;
+ return intermediate;
+}
+
+static int
+realize(stringio *self)
+{
+ Py_ssize_t len;
+ PyObject *intermediate;
+
+ if (self->state == STATE_REALIZED)
+ return 0;
+ assert(self->state == STATE_ACCUMULATING);
+ self->state = STATE_REALIZED;
+
+ intermediate = _PyUnicodeWriter_Finish(&self->writer);
+ if (intermediate == NULL)
+ return -1;
+
+ /* Append the intermediate string to the internal buffer.
+ The length should be equal to the current cursor position.
+ */
+ len = PyUnicode_GET_LENGTH(intermediate);
+ if (resize_buffer(self, len) < 0) {
+ Py_DECREF(intermediate);
+ return -1;
+ }
+ if (!PyUnicode_AsUCS4(intermediate, self->buf, len, 0)) {
+ Py_DECREF(intermediate);
+ return -1;
+ }
+
+ Py_DECREF(intermediate);
+ return 0;
+}
+
+/* Internal routine for writing a whole PyUnicode object to the buffer of a
+ StringIO object. Returns 0 on success, or -1 on error. */
+static Py_ssize_t
+write_str(stringio *self, PyObject *obj)
+{
+ Py_ssize_t len;
+ PyObject *decoded = NULL;
+
+ assert(self->buf != NULL);
+ assert(self->pos >= 0);
+
+ if (self->decoder != NULL) {
+ decoded = _PyIncrementalNewlineDecoder_decode(
+ self->decoder, obj, 1 /* always final */);
+ }
+ else {
+ decoded = Py_NewRef(obj);
+ }
+ if (self->writenl) {
+ PyObject *translated = PyUnicode_Replace(
+ decoded, &_Py_STR(newline), self->writenl, -1);
+ Py_SETREF(decoded, translated);
+ }
+ if (decoded == NULL)
+ return -1;
+
+ assert(PyUnicode_Check(decoded));
+ if (PyUnicode_READY(decoded)) {
+ Py_DECREF(decoded);
+ return -1;
+ }
+ len = PyUnicode_GET_LENGTH(decoded);
+ assert(len >= 0);
+
+ /* This overflow check is not strictly necessary. However, it avoids us to
+ deal with funky things like comparing an unsigned and a signed
+ integer. */
+ if (self->pos > PY_SSIZE_T_MAX - len) {
+ PyErr_SetString(PyExc_OverflowError,
+ "new position too large");
+ goto fail;
+ }
+
+ if (self->state == STATE_ACCUMULATING) {
+ if (self->string_size == self->pos) {
+ if (_PyUnicodeWriter_WriteStr(&self->writer, decoded))
+ goto fail;
+ goto success;
+ }
+ if (realize(self))
+ goto fail;
+ }
+
+ if (self->pos + len > self->string_size) {
+ if (resize_buffer(self, self->pos + len) < 0)
+ goto fail;
+ }
+
+ if (self->pos > self->string_size) {
+ /* In case of overseek, pad with null bytes the buffer region between
+ the end of stream and the current position.
+
+ 0 lo string_size hi
+ | |<---used--->|<----------available----------->|
+ | | <--to pad-->|<---to write---> |
+ 0 buf position
+
+ */
+ memset(self->buf + self->string_size, '\0',
+ (self->pos - self->string_size) * sizeof(Py_UCS4));
+ }
+
+ /* Copy the data to the internal buffer, overwriting some of the
+ existing data if self->pos < self->string_size. */
+ if (!PyUnicode_AsUCS4(decoded,
+ self->buf + self->pos,
+ self->buf_size - self->pos,
+ 0))
+ goto fail;
+
+success:
+ /* Set the new length of the internal string if it has changed. */
+ self->pos += len;
+ if (self->string_size < self->pos)
+ self->string_size = self->pos;
+
+ Py_DECREF(decoded);
+ return 0;
+
+fail:
+ Py_XDECREF(decoded);
+ return -1;
+}
+
+/*[clinic input]
+_io.StringIO.getvalue
+
+Retrieve the entire contents of the object.
+[clinic start generated code]*/
+
+static PyObject *
+_io_StringIO_getvalue_impl(stringio *self)
+/*[clinic end generated code: output=27b6a7bfeaebce01 input=d23cb81d6791cf88]*/
+{
+ CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
+ if (self->state == STATE_ACCUMULATING)
+ return make_intermediate(self);
+ return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, self->buf,
+ self->string_size);
+}
+
+/*[clinic input]
+_io.StringIO.tell
+
+Tell the current file position.
+[clinic start generated code]*/
+
+static PyObject *
+_io_StringIO_tell_impl(stringio *self)
+/*[clinic end generated code: output=2e87ac67b116c77b input=ec866ebaff02f405]*/
+{
+ CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
+ return PyLong_FromSsize_t(self->pos);
+}
+
+/*[clinic input]
+_io.StringIO.read
+ size: Py_ssize_t(accept={int, NoneType}) = -1
+ /
+
+Read at most size characters, returned as a string.
+
+If the argument is negative or omitted, read until EOF
+is reached. Return an empty string at EOF.
+[clinic start generated code]*/
+
+static PyObject *
+_io_StringIO_read_impl(stringio *self, Py_ssize_t size)
+/*[clinic end generated code: output=ae8cf6002f71626c input=0921093383dfb92d]*/
+{
+ Py_ssize_t n;
+ Py_UCS4 *output;
+
+ CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
+
+ /* adjust invalid sizes */
+ n = self->string_size - self->pos;
+ if (size < 0 || size > n) {
+ size = n;
+ if (size < 0)
+ size = 0;
+ }
+
+ /* Optimization for seek(0); read() */
+ if (self->state == STATE_ACCUMULATING && self->pos == 0 && size == n) {
+ PyObject *result = make_intermediate(self);
+ self->pos = self->string_size;
+ return result;
+ }
+
+ ENSURE_REALIZED(self);
+ output = self->buf + self->pos;
+ self->pos += size;
+ return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, output, size);
+}
+
+/* Internal helper, used by stringio_readline and stringio_iternext */
+static PyObject *
+_stringio_readline(stringio *self, Py_ssize_t limit)
+{
+ Py_UCS4 *start, *end, old_char;
+ Py_ssize_t len, consumed;
+
+ /* In case of overseek, return the empty string */
+ if (self->pos >= self->string_size)
+ return PyUnicode_New(0, 0);
+
+ start = self->buf + self->pos;
+ if (limit < 0 || limit > self->string_size - self->pos)
+ limit = self->string_size - self->pos;
+
+ end = start + limit;
+ old_char = *end;
+ *end = '\0';
+ len = _PyIO_find_line_ending(
+ self->readtranslate, self->readuniversal, self->readnl,
+ PyUnicode_4BYTE_KIND, (char*)start, (char*)end, &consumed);
+ *end = old_char;
+ /* If we haven't found any line ending, we just return everything
+ (`consumed` is ignored). */
+ if (len < 0)
+ len = limit;
+ self->pos += len;
+ return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, start, len);
+}
+
+/*[clinic input]
+_io.StringIO.readline
+ size: Py_ssize_t(accept={int, NoneType}) = -1
+ /
+
+Read until newline or EOF.
+
+Returns an empty string if EOF is hit immediately.
+[clinic start generated code]*/
+
+static PyObject *
+_io_StringIO_readline_impl(stringio *self, Py_ssize_t size)
+/*[clinic end generated code: output=cabd6452f1b7e85d input=a5bd70bf682aa276]*/
+{
+ CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
+ ENSURE_REALIZED(self);
+
+ return _stringio_readline(self, size);
+}
+
+static PyObject *
+stringio_iternext(stringio *self)
+{
+ PyObject *line;
+
+ CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
+ ENSURE_REALIZED(self);
+
+ if (Py_IS_TYPE(self, self->module_state->PyStringIO_Type)) {
+ /* Skip method call overhead for speed */
+ line = _stringio_readline(self, -1);
+ }
+ else {
+ /* XXX is subclassing StringIO really supported? */
+ line = PyObject_CallMethodNoArgs((PyObject *)self,
+ &_Py_ID(readline));
+ if (line && !PyUnicode_Check(line)) {
+ PyErr_Format(PyExc_OSError,
+ "readline() should have returned a str object, "
+ "not '%.200s'", Py_TYPE(line)->tp_name);
+ Py_DECREF(line);
+ return NULL;
+ }
+ }
+
+ if (line == NULL)
+ return NULL;
+
+ if (PyUnicode_GET_LENGTH(line) == 0) {
+ /* Reached EOF */
+ Py_DECREF(line);
+ return NULL;
+ }
+
+ return line;
+}
+
+/*[clinic input]
+_io.StringIO.truncate
+ pos as size: Py_ssize_t(accept={int, NoneType}, c_default="self->pos") = None
+ /
+
+Truncate size to pos.
+
+The pos argument defaults to the current file position, as
+returned by tell(). The current file position is unchanged.
+Returns the new absolute position.
+[clinic start generated code]*/
+
+static PyObject *
+_io_StringIO_truncate_impl(stringio *self, Py_ssize_t size)
+/*[clinic end generated code: output=eb3aef8e06701365 input=5505cff90ca48b96]*/
+{
+ CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
+
+ if (size < 0) {
+ PyErr_Format(PyExc_ValueError,
+ "Negative size value %zd", size);
+ return NULL;
+ }
+
+ if (size < self->string_size) {
+ ENSURE_REALIZED(self);
+ if (resize_buffer(self, size) < 0)
+ return NULL;
+ self->string_size = size;
+ }
+
+ return PyLong_FromSsize_t(size);
+}
+
+/*[clinic input]
+_io.StringIO.seek
+ pos: Py_ssize_t
+ whence: int = 0
+ /
+
+Change stream position.
+
+Seek to character offset pos relative to position indicated by whence:
+ 0 Start of stream (the default). pos should be >= 0;
+ 1 Current position - pos must be 0;
+ 2 End of stream - pos must be 0.
+Returns the new absolute position.
+[clinic start generated code]*/
+
+static PyObject *
+_io_StringIO_seek_impl(stringio *self, Py_ssize_t pos, int whence)
+/*[clinic end generated code: output=e9e0ac9a8ae71c25 input=e3855b24e7cae06a]*/
+{
+ CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
+
+ if (whence != 0 && whence != 1 && whence != 2) {
+ PyErr_Format(PyExc_ValueError,
+ "Invalid whence (%i, should be 0, 1 or 2)", whence);
+ return NULL;
+ }
+ else if (pos < 0 && whence == 0) {
+ PyErr_Format(PyExc_ValueError,
+ "Negative seek position %zd", pos);
+ return NULL;
+ }
+ else if (whence != 0 && pos != 0) {
+ PyErr_SetString(PyExc_OSError,
+ "Can't do nonzero cur-relative seeks");
+ return NULL;
+ }
+
+ /* whence = 0: offset relative to beginning of the string.
+ whence = 1: no change to current position.
+ whence = 2: change position to end of file. */
+ if (whence == 1) {
+ pos = self->pos;
+ }
+ else if (whence == 2) {
+ pos = self->string_size;
+ }
+
+ self->pos = pos;
+
+ return PyLong_FromSsize_t(self->pos);
+}
+
+/*[clinic input]
+_io.StringIO.write
+ s as obj: object
+ /
+
+Write string to file.
+
+Returns the number of characters written, which is always equal to
+the length of the string.
+[clinic start generated code]*/
+
+static PyObject *
+_io_StringIO_write(stringio *self, PyObject *obj)
+/*[clinic end generated code: output=0deaba91a15b94da input=cf96f3b16586e669]*/
+{
+ Py_ssize_t size;
+
+ CHECK_INITIALIZED(self);
+ if (!PyUnicode_Check(obj)) {
+ PyErr_Format(PyExc_TypeError, "string argument expected, got '%s'",
+ Py_TYPE(obj)->tp_name);
+ return NULL;
+ }
+ if (PyUnicode_READY(obj))
+ return NULL;
+ CHECK_CLOSED(self);
+ size = PyUnicode_GET_LENGTH(obj);
+
+ if (size > 0 && write_str(self, obj) < 0)
+ return NULL;
+
+ return PyLong_FromSsize_t(size);
+}
+
+/*[clinic input]
+_io.StringIO.close
+
+Close the IO object.
+
+Attempting any further operation after the object is closed
+will raise a ValueError.
+
+This method has no effect if the file is already closed.
+[clinic start generated code]*/
+
+static PyObject *
+_io_StringIO_close_impl(stringio *self)
+/*[clinic end generated code: output=04399355cbe518f1 input=cbc10b45f35d6d46]*/
+{
+ self->closed = 1;
+ /* Free up some memory */
+ if (resize_buffer(self, 0) < 0)
+ return NULL;
+ _PyUnicodeWriter_Dealloc(&self->writer);
+ Py_CLEAR(self->readnl);
+ Py_CLEAR(self->writenl);
+ Py_CLEAR(self->decoder);
+ Py_RETURN_NONE;
+}
+
+static int
+stringio_traverse(stringio *self, visitproc visit, void *arg)
+{
+ Py_VISIT(Py_TYPE(self));
+ Py_VISIT(self->readnl);
+ Py_VISIT(self->writenl);
+ Py_VISIT(self->decoder);
+ Py_VISIT(self->dict);
+ return 0;
+}
+
+static int
+stringio_clear(stringio *self)
+{
+ Py_CLEAR(self->readnl);
+ Py_CLEAR(self->writenl);
+ Py_CLEAR(self->decoder);
+ Py_CLEAR(self->dict);
+ return 0;
+}
+
+static void
+stringio_dealloc(stringio *self)
+{
+ PyTypeObject *tp = Py_TYPE(self);
+ _PyObject_GC_UNTRACK(self);
+ self->ok = 0;
+ if (self->buf) {
+ PyMem_Free(self->buf);
+ self->buf = NULL;
+ }
+ _PyUnicodeWriter_Dealloc(&self->writer);
+ (void)stringio_clear(self);
+ if (self->weakreflist != NULL) {
+ PyObject_ClearWeakRefs((PyObject *) self);
+ }
+ tp->tp_free(self);
+ Py_DECREF(tp);
+}
+
+static PyObject *
+stringio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+ stringio *self;
+
+ assert(type != NULL && type->tp_alloc != NULL);
+ self = (stringio *)type->tp_alloc(type, 0);
+ if (self == NULL)
+ return NULL;
+
+ /* tp_alloc initializes all the fields to zero. So we don't have to
+ initialize them here. */
+
+ self->buf = (Py_UCS4 *)PyMem_Malloc(0);
+ if (self->buf == NULL) {
+ Py_DECREF(self);
+ return PyErr_NoMemory();
+ }
+
+ return (PyObject *)self;
+}
+
+/*[clinic input]
+_io.StringIO.__init__
+ initial_value as value: object(c_default="NULL") = ''
+ newline as newline_obj: object(c_default="NULL") = '\n'
+
+Text I/O implementation using an in-memory buffer.
+
+The initial_value argument sets the value of object. The newline
+argument is like the one of TextIOWrapper's constructor.
+[clinic start generated code]*/
+
+static int
+_io_StringIO___init___impl(stringio *self, PyObject *value,
+ PyObject *newline_obj)
+/*[clinic end generated code: output=a421ea023b22ef4e input=cee2d9181b2577a3]*/
+{
+ const char *newline = "\n";
+ Py_ssize_t value_len;
+
+ /* Parse the newline argument. We only want to allow unicode objects or
+ None. */
+ if (newline_obj == Py_None) {
+ newline = NULL;
+ }
+ else if (newline_obj) {
+ if (!PyUnicode_Check(newline_obj)) {
+ PyErr_Format(PyExc_TypeError,
+ "newline must be str or None, not %.200s",
+ Py_TYPE(newline_obj)->tp_name);
+ return -1;
+ }
+ newline = PyUnicode_AsUTF8(newline_obj);
+ if (newline == NULL)
+ return -1;
+ }
+
+ if (newline && newline[0] != '\0'
+ && !(newline[0] == '\n' && newline[1] == '\0')
+ && !(newline[0] == '\r' && newline[1] == '\0')
+ && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
+ PyErr_Format(PyExc_ValueError,
+ "illegal newline value: %R", newline_obj);
+ return -1;
+ }
+ if (value && value != Py_None && !PyUnicode_Check(value)) {
+ PyErr_Format(PyExc_TypeError,
+ "initial_value must be str or None, not %.200s",
+ Py_TYPE(value)->tp_name);
+ return -1;
+ }
+
+ self->ok = 0;
+
+ _PyUnicodeWriter_Dealloc(&self->writer);
+ Py_CLEAR(self->readnl);
+ Py_CLEAR(self->writenl);
+ Py_CLEAR(self->decoder);
+
+ assert((newline != NULL && newline_obj != Py_None) ||
+ (newline == NULL && newline_obj == Py_None));
+
+ if (newline) {
+ self->readnl = PyUnicode_FromString(newline);
+ if (self->readnl == NULL)
+ return -1;
+ }
+ self->readuniversal = (newline == NULL || newline[0] == '\0');
+ self->readtranslate = (newline == NULL);
+ /* If newline == "", we don't translate anything.
+ If newline == "\n" or newline == None, we translate to "\n", which is
+ a no-op.
+ (for newline == None, TextIOWrapper translates to os.linesep, but it
+ is pointless for StringIO)
+ */
+ if (newline != NULL && newline[0] == '\r') {
+ self->writenl = Py_NewRef(self->readnl);
+ }
+
+ _PyIO_State *module_state = find_io_state_by_def(Py_TYPE(self));
+ if (self->readuniversal) {
+ self->decoder = PyObject_CallFunctionObjArgs(
+ (PyObject *)module_state->PyIncrementalNewlineDecoder_Type,
+ Py_None, self->readtranslate ? Py_True : Py_False, NULL);
+ if (self->decoder == NULL)
+ return -1;
+ }
+
+ /* Now everything is set up, resize buffer to size of initial value,
+ and copy it */
+ self->string_size = 0;
+ if (value && value != Py_None)
+ value_len = PyUnicode_GetLength(value);
+ else
+ value_len = 0;
+ if (value_len > 0) {
+ /* This is a heuristic, for newline translation might change
+ the string length. */
+ if (resize_buffer(self, 0) < 0)
+ return -1;
+ self->state = STATE_REALIZED;
+ self->pos = 0;
+ if (write_str(self, value) < 0)
+ return -1;
+ }
+ else {
+ /* Empty stringio object, we can start by accumulating */
+ if (resize_buffer(self, 0) < 0)
+ return -1;
+ _PyUnicodeWriter_Init(&self->writer);
+ self->writer.overallocate = 1;
+ self->state = STATE_ACCUMULATING;
+ }
+ self->pos = 0;
+ self->module_state = module_state;
+ self->closed = 0;
+ self->ok = 1;
+ return 0;
+}
+
+/* Properties and pseudo-properties */
+
+/*[clinic input]
+_io.StringIO.readable
+
+Returns True if the IO object can be read.
+[clinic start generated code]*/
+
+static PyObject *
+_io_StringIO_readable_impl(stringio *self)
+/*[clinic end generated code: output=b19d44dd8b1ceb99 input=39ce068b224c21ad]*/
+{
+ CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
+ Py_RETURN_TRUE;
+}
+
+/*[clinic input]
+_io.StringIO.writable
+
+Returns True if the IO object can be written.
+[clinic start generated code]*/
+
+static PyObject *
+_io_StringIO_writable_impl(stringio *self)
+/*[clinic end generated code: output=13e4dd77187074ca input=7a691353aac38835]*/
+{
+ CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
+ Py_RETURN_TRUE;
+}
+
+/*[clinic input]
+_io.StringIO.seekable
+
+Returns True if the IO object can be seeked.
+[clinic start generated code]*/
+
+static PyObject *
+_io_StringIO_seekable_impl(stringio *self)
+/*[clinic end generated code: output=4d20b4641c756879 input=4c606d05b32952e6]*/
+{
+ CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
+ Py_RETURN_TRUE;
+}
+
+/* Pickling support.
+
+ The implementation of __getstate__ is similar to the one for BytesIO,
+ except that we also save the newline parameter. For __setstate__ and unlike
+ BytesIO, we call __init__ to restore the object's state. Doing so allows us
+ to avoid decoding the complex newline state while keeping the object
+ representation compact.
+
+ See comment in bytesio.c regarding why only pickle protocols and onward are
+ supported.
+*/
+
+static PyObject *
+stringio_getstate(stringio *self, PyObject *Py_UNUSED(ignored))
+{
+ PyObject *initvalue = _io_StringIO_getvalue_impl(self);
+ PyObject *dict;
+ PyObject *state;
+
+ if (initvalue == NULL)
+ return NULL;
+ if (self->dict == NULL) {
+ dict = Py_NewRef(Py_None);
+ }
+ else {
+ dict = PyDict_Copy(self->dict);
+ if (dict == NULL) {
+ Py_DECREF(initvalue);
+ return NULL;
+ }
+ }
+
+ state = Py_BuildValue("(OOnN)", initvalue,
+ self->readnl ? self->readnl : Py_None,
+ self->pos, dict);
+ Py_DECREF(initvalue);
+ return state;
+}
+
+static PyObject *
+stringio_setstate(stringio *self, PyObject *state)
+{
+ PyObject *initarg;
+ PyObject *position_obj;
+ PyObject *dict;
+ Py_ssize_t pos;
+
+ assert(state != NULL);
+ CHECK_CLOSED(self);
+
+ /* We allow the state tuple to be longer than 4, because we may need
+ someday to extend the object's state without breaking
+ backward-compatibility. */
+ if (!PyTuple_Check(state) || PyTuple_GET_SIZE(state) < 4) {
+ PyErr_Format(PyExc_TypeError,
+ "%.200s.__setstate__ argument should be 4-tuple, got %.200s",
+ Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name);
+ return NULL;
+ }
+
+ /* Initialize the object's state. */
+ initarg = PyTuple_GetSlice(state, 0, 2);
+ if (initarg == NULL)
+ return NULL;
+ if (_io_StringIO___init__((PyObject *)self, initarg, NULL) < 0) {
+ Py_DECREF(initarg);
+ return NULL;
+ }
+ Py_DECREF(initarg);
+
+ /* Restore the buffer state. Even if __init__ did initialize the buffer,
+ we have to initialize it again since __init__ may translate the
+ newlines in the initial_value string. We clearly do not want that
+ because the string value in the state tuple has already been translated
+ once by __init__. So we do not take any chance and replace object's
+ buffer completely. */
+ {
+ PyObject *item;
+ Py_UCS4 *buf;
+ Py_ssize_t bufsize;
+
+ item = PyTuple_GET_ITEM(state, 0);
+ buf = PyUnicode_AsUCS4Copy(item);
+ if (buf == NULL)
+ return NULL;
+ bufsize = PyUnicode_GET_LENGTH(item);
+
+ if (resize_buffer(self, bufsize) < 0) {
+ PyMem_Free(buf);
+ return NULL;
+ }
+ memcpy(self->buf, buf, bufsize * sizeof(Py_UCS4));
+ PyMem_Free(buf);
+ self->string_size = bufsize;
+ }
+
+ /* Set carefully the position value. Alternatively, we could use the seek
+ method instead of modifying self->pos directly to better protect the
+ object internal state against erroneous (or malicious) inputs. */
+ position_obj = PyTuple_GET_ITEM(state, 2);
+ if (!PyLong_Check(position_obj)) {
+ PyErr_Format(PyExc_TypeError,
+ "third item of state must be an integer, got %.200s",
+ Py_TYPE(position_obj)->tp_name);
+ return NULL;
+ }
+ pos = PyLong_AsSsize_t(position_obj);
+ if (pos == -1 && PyErr_Occurred())
+ return NULL;
+ if (pos < 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "position value cannot be negative");
+ return NULL;
+ }
+ self->pos = pos;
+
+ /* Set the dictionary of the instance variables. */
+ dict = PyTuple_GET_ITEM(state, 3);
+ if (dict != Py_None) {
+ if (!PyDict_Check(dict)) {
+ PyErr_Format(PyExc_TypeError,
+ "fourth item of state should be a dict, got a %.200s",
+ Py_TYPE(dict)->tp_name);
+ return NULL;
+ }
+ if (self->dict) {
+ /* Alternatively, we could replace the internal dictionary
+ completely. However, it seems more practical to just update it. */
+ if (PyDict_Update(self->dict, dict) < 0)
+ return NULL;
+ }
+ else {
+ self->dict = Py_NewRef(dict);
+ }
+ }
+
+ Py_RETURN_NONE;
+}
+
+
+static PyObject *
+stringio_closed(stringio *self, void *context)
+{
+ CHECK_INITIALIZED(self);
+ return PyBool_FromLong(self->closed);
+}
+
+static PyObject *
+stringio_line_buffering(stringio *self, void *context)
+{
+ CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
+ Py_RETURN_FALSE;
+}
+
+static PyObject *
+stringio_newlines(stringio *self, void *context)
+{
+ CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
+ if (self->decoder == NULL)
+ Py_RETURN_NONE;
+ return PyObject_GetAttr(self->decoder, &_Py_ID(newlines));
+}
+
+#define clinic_state() (find_io_state_by_def(Py_TYPE(self)))
+#include "clinic/stringio.c.h"
+#undef clinic_state
+
+static struct PyMethodDef stringio_methods[] = {
+ _IO_STRINGIO_CLOSE_METHODDEF
+ _IO_STRINGIO_GETVALUE_METHODDEF
+ _IO_STRINGIO_READ_METHODDEF
+ _IO_STRINGIO_READLINE_METHODDEF
+ _IO_STRINGIO_TELL_METHODDEF
+ _IO_STRINGIO_TRUNCATE_METHODDEF
+ _IO_STRINGIO_SEEK_METHODDEF
+ _IO_STRINGIO_WRITE_METHODDEF
+
+ _IO_STRINGIO_SEEKABLE_METHODDEF
+ _IO_STRINGIO_READABLE_METHODDEF
+ _IO_STRINGIO_WRITABLE_METHODDEF
+
+ {"__getstate__", (PyCFunction)stringio_getstate, METH_NOARGS},
+ {"__setstate__", (PyCFunction)stringio_setstate, METH_O},
+ {NULL, NULL} /* sentinel */
+};
+
+static PyGetSetDef stringio_getset[] = {
+ {"closed", (getter)stringio_closed, NULL, NULL},
+ {"newlines", (getter)stringio_newlines, NULL, NULL},
+ /* (following comments straight off of the original Python wrapper:)
+ XXX Cruft to support the TextIOWrapper API. This would only
+ be meaningful if StringIO supported the buffer attribute.
+ Hopefully, a better solution, than adding these pseudo-attributes,
+ will be found.
+ */
+ {"line_buffering", (getter)stringio_line_buffering, NULL, NULL},
+ {NULL}
+};
+
+static struct PyMemberDef stringio_members[] = {
+ {"__weaklistoffset__", T_PYSSIZET, offsetof(stringio, weakreflist), READONLY},
+ {"__dictoffset__", T_PYSSIZET, offsetof(stringio, dict), READONLY},
+ {NULL},
+};
+
+static PyType_Slot stringio_slots[] = {
+ {Py_tp_dealloc, stringio_dealloc},
+ {Py_tp_doc, (void *)_io_StringIO___init____doc__},
+ {Py_tp_traverse, stringio_traverse},
+ {Py_tp_clear, stringio_clear},
+ {Py_tp_iternext, stringio_iternext},
+ {Py_tp_methods, stringio_methods},
+ {Py_tp_members, stringio_members},
+ {Py_tp_getset, stringio_getset},
+ {Py_tp_init, _io_StringIO___init__},
+ {Py_tp_new, stringio_new},
+ {0, NULL},
+};
+
+PyType_Spec stringio_spec = {
+ .name = "_io.StringIO",
+ .basicsize = sizeof(stringio),
+ .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
+ Py_TPFLAGS_IMMUTABLETYPE),
+ .slots = stringio_slots,
+};