diff options
author | AlexSm <alex@ydb.tech> | 2024-03-05 10:40:59 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-05 12:40:59 +0300 |
commit | 1ac13c847b5358faba44dbb638a828e24369467b (patch) | |
tree | 07672b4dd3604ad3dee540a02c6494cb7d10dc3d /contrib/tools/python3/Modules/_io/stringio.c | |
parent | ffcca3e7f7958ddc6487b91d3df8c01054bd0638 (diff) | |
download | ydb-1ac13c847b5358faba44dbb638a828e24369467b.tar.gz |
Library import 16 (#2433)
Co-authored-by: robot-piglet <robot-piglet@yandex-team.com>
Co-authored-by: deshevoy <deshevoy@yandex-team.com>
Co-authored-by: robot-contrib <robot-contrib@yandex-team.com>
Co-authored-by: thegeorg <thegeorg@yandex-team.com>
Co-authored-by: robot-ya-builder <robot-ya-builder@yandex-team.com>
Co-authored-by: svidyuk <svidyuk@yandex-team.com>
Co-authored-by: shadchin <shadchin@yandex-team.com>
Co-authored-by: robot-ratatosk <robot-ratatosk@yandex-team.com>
Co-authored-by: innokentii <innokentii@yandex-team.com>
Co-authored-by: arkady-e1ppa <arkady-e1ppa@yandex-team.com>
Co-authored-by: snermolaev <snermolaev@yandex-team.com>
Co-authored-by: dimdim11 <dimdim11@yandex-team.com>
Co-authored-by: kickbutt <kickbutt@yandex-team.com>
Co-authored-by: abdullinsaid <abdullinsaid@yandex-team.com>
Co-authored-by: korsunandrei <korsunandrei@yandex-team.com>
Co-authored-by: petrk <petrk@yandex-team.com>
Co-authored-by: miroslav2 <miroslav2@yandex-team.com>
Co-authored-by: serjflint <serjflint@yandex-team.com>
Co-authored-by: akhropov <akhropov@yandex-team.com>
Co-authored-by: prettyboy <prettyboy@yandex-team.com>
Co-authored-by: ilikepugs <ilikepugs@yandex-team.com>
Co-authored-by: hiddenpath <hiddenpath@yandex-team.com>
Co-authored-by: mikhnenko <mikhnenko@yandex-team.com>
Co-authored-by: spreis <spreis@yandex-team.com>
Co-authored-by: andreyshspb <andreyshspb@yandex-team.com>
Co-authored-by: dimaandreev <dimaandreev@yandex-team.com>
Co-authored-by: rashid <rashid@yandex-team.com>
Co-authored-by: robot-ydb-importer <robot-ydb-importer@yandex-team.com>
Co-authored-by: r-vetrov <r-vetrov@yandex-team.com>
Co-authored-by: ypodlesov <ypodlesov@yandex-team.com>
Co-authored-by: zaverden <zaverden@yandex-team.com>
Co-authored-by: vpozdyayev <vpozdyayev@yandex-team.com>
Co-authored-by: robot-cozmo <robot-cozmo@yandex-team.com>
Co-authored-by: v-korovin <v-korovin@yandex-team.com>
Co-authored-by: arikon <arikon@yandex-team.com>
Co-authored-by: khoden <khoden@yandex-team.com>
Co-authored-by: psydmm <psydmm@yandex-team.com>
Co-authored-by: robot-javacom <robot-javacom@yandex-team.com>
Co-authored-by: dtorilov <dtorilov@yandex-team.com>
Co-authored-by: sennikovmv <sennikovmv@yandex-team.com>
Co-authored-by: hcpp <hcpp@ydb.tech>
Diffstat (limited to 'contrib/tools/python3/Modules/_io/stringio.c')
-rw-r--r-- | contrib/tools/python3/Modules/_io/stringio.c | 1037 |
1 files changed, 1037 insertions, 0 deletions
diff --git a/contrib/tools/python3/Modules/_io/stringio.c b/contrib/tools/python3/Modules/_io/stringio.c new file mode 100644 index 0000000000..3eb25704b4 --- /dev/null +++ b/contrib/tools/python3/Modules/_io/stringio.c @@ -0,0 +1,1037 @@ +#define PY_SSIZE_T_CLEAN +#include "Python.h" +#include <stddef.h> // offsetof() +#include "pycore_object.h" +#include "_iomodule.h" + +/* Implementation note: the buffer is always at least one character longer + than the enclosed string, for proper functioning of _PyIO_find_line_ending. +*/ + +#define STATE_REALIZED 1 +#define STATE_ACCUMULATING 2 + +/*[clinic input] +module _io +class _io.StringIO "stringio *" "clinic_state()->PyStringIO_Type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2693eada0658d470]*/ + +typedef struct { + PyObject_HEAD + Py_UCS4 *buf; + Py_ssize_t pos; + Py_ssize_t string_size; + size_t buf_size; + + /* The stringio object can be in two states: accumulating or realized. + In accumulating state, the internal buffer contains nothing and + the contents are given by the embedded _PyUnicodeWriter structure. + In realized state, the internal buffer is meaningful and the + _PyUnicodeWriter is destroyed. + */ + int state; + _PyUnicodeWriter writer; + + char ok; /* initialized? */ + char closed; + char readuniversal; + char readtranslate; + PyObject *decoder; + PyObject *readnl; + PyObject *writenl; + + PyObject *dict; + PyObject *weakreflist; + _PyIO_State *module_state; +} stringio; + +static int _io_StringIO___init__(PyObject *self, PyObject *args, PyObject *kwargs); + +#define CHECK_INITIALIZED(self) \ + if (self->ok <= 0) { \ + PyErr_SetString(PyExc_ValueError, \ + "I/O operation on uninitialized object"); \ + return NULL; \ + } + +#define CHECK_CLOSED(self) \ + if (self->closed) { \ + PyErr_SetString(PyExc_ValueError, \ + "I/O operation on closed file"); \ + return NULL; \ + } + +#define ENSURE_REALIZED(self) \ + if (realize(self) < 0) { \ + return NULL; \ + } + + +/* Internal routine for changing the size, in terms of characters, of the + buffer of StringIO objects. The caller should ensure that the 'size' + argument is non-negative. Returns 0 on success, -1 otherwise. */ +static int +resize_buffer(stringio *self, size_t size) +{ + /* Here, unsigned types are used to avoid dealing with signed integer + overflow, which is undefined in C. */ + size_t alloc = self->buf_size; + Py_UCS4 *new_buf = NULL; + + assert(self->buf != NULL); + + /* Reserve one more char for line ending detection. */ + size = size + 1; + /* For simplicity, stay in the range of the signed type. Anyway, Python + doesn't allow strings to be longer than this. */ + if (size > PY_SSIZE_T_MAX) + goto overflow; + + if (size < alloc / 2) { + /* Major downsize; resize down to exact size. */ + alloc = size + 1; + } + else if (size < alloc) { + /* Within allocated size; quick exit */ + return 0; + } + else if (size <= alloc * 1.125) { + /* Moderate upsize; overallocate similar to list_resize() */ + alloc = size + (size >> 3) + (size < 9 ? 3 : 6); + } + else { + /* Major upsize; resize up to exact size */ + alloc = size + 1; + } + + if (alloc > PY_SIZE_MAX / sizeof(Py_UCS4)) + goto overflow; + new_buf = (Py_UCS4 *)PyMem_Realloc(self->buf, alloc * sizeof(Py_UCS4)); + if (new_buf == NULL) { + PyErr_NoMemory(); + return -1; + } + self->buf_size = alloc; + self->buf = new_buf; + + return 0; + + overflow: + PyErr_SetString(PyExc_OverflowError, + "new buffer size too large"); + return -1; +} + +static PyObject * +make_intermediate(stringio *self) +{ + PyObject *intermediate = _PyUnicodeWriter_Finish(&self->writer); + self->state = STATE_REALIZED; + if (intermediate == NULL) + return NULL; + + _PyUnicodeWriter_Init(&self->writer); + self->writer.overallocate = 1; + if (_PyUnicodeWriter_WriteStr(&self->writer, intermediate)) { + Py_DECREF(intermediate); + return NULL; + } + self->state = STATE_ACCUMULATING; + return intermediate; +} + +static int +realize(stringio *self) +{ + Py_ssize_t len; + PyObject *intermediate; + + if (self->state == STATE_REALIZED) + return 0; + assert(self->state == STATE_ACCUMULATING); + self->state = STATE_REALIZED; + + intermediate = _PyUnicodeWriter_Finish(&self->writer); + if (intermediate == NULL) + return -1; + + /* Append the intermediate string to the internal buffer. + The length should be equal to the current cursor position. + */ + len = PyUnicode_GET_LENGTH(intermediate); + if (resize_buffer(self, len) < 0) { + Py_DECREF(intermediate); + return -1; + } + if (!PyUnicode_AsUCS4(intermediate, self->buf, len, 0)) { + Py_DECREF(intermediate); + return -1; + } + + Py_DECREF(intermediate); + return 0; +} + +/* Internal routine for writing a whole PyUnicode object to the buffer of a + StringIO object. Returns 0 on success, or -1 on error. */ +static Py_ssize_t +write_str(stringio *self, PyObject *obj) +{ + Py_ssize_t len; + PyObject *decoded = NULL; + + assert(self->buf != NULL); + assert(self->pos >= 0); + + if (self->decoder != NULL) { + decoded = _PyIncrementalNewlineDecoder_decode( + self->decoder, obj, 1 /* always final */); + } + else { + decoded = Py_NewRef(obj); + } + if (self->writenl) { + PyObject *translated = PyUnicode_Replace( + decoded, &_Py_STR(newline), self->writenl, -1); + Py_SETREF(decoded, translated); + } + if (decoded == NULL) + return -1; + + assert(PyUnicode_Check(decoded)); + if (PyUnicode_READY(decoded)) { + Py_DECREF(decoded); + return -1; + } + len = PyUnicode_GET_LENGTH(decoded); + assert(len >= 0); + + /* This overflow check is not strictly necessary. However, it avoids us to + deal with funky things like comparing an unsigned and a signed + integer. */ + if (self->pos > PY_SSIZE_T_MAX - len) { + PyErr_SetString(PyExc_OverflowError, + "new position too large"); + goto fail; + } + + if (self->state == STATE_ACCUMULATING) { + if (self->string_size == self->pos) { + if (_PyUnicodeWriter_WriteStr(&self->writer, decoded)) + goto fail; + goto success; + } + if (realize(self)) + goto fail; + } + + if (self->pos + len > self->string_size) { + if (resize_buffer(self, self->pos + len) < 0) + goto fail; + } + + if (self->pos > self->string_size) { + /* In case of overseek, pad with null bytes the buffer region between + the end of stream and the current position. + + 0 lo string_size hi + | |<---used--->|<----------available----------->| + | | <--to pad-->|<---to write---> | + 0 buf position + + */ + memset(self->buf + self->string_size, '\0', + (self->pos - self->string_size) * sizeof(Py_UCS4)); + } + + /* Copy the data to the internal buffer, overwriting some of the + existing data if self->pos < self->string_size. */ + if (!PyUnicode_AsUCS4(decoded, + self->buf + self->pos, + self->buf_size - self->pos, + 0)) + goto fail; + +success: + /* Set the new length of the internal string if it has changed. */ + self->pos += len; + if (self->string_size < self->pos) + self->string_size = self->pos; + + Py_DECREF(decoded); + return 0; + +fail: + Py_XDECREF(decoded); + return -1; +} + +/*[clinic input] +_io.StringIO.getvalue + +Retrieve the entire contents of the object. +[clinic start generated code]*/ + +static PyObject * +_io_StringIO_getvalue_impl(stringio *self) +/*[clinic end generated code: output=27b6a7bfeaebce01 input=d23cb81d6791cf88]*/ +{ + CHECK_INITIALIZED(self); + CHECK_CLOSED(self); + if (self->state == STATE_ACCUMULATING) + return make_intermediate(self); + return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, self->buf, + self->string_size); +} + +/*[clinic input] +_io.StringIO.tell + +Tell the current file position. +[clinic start generated code]*/ + +static PyObject * +_io_StringIO_tell_impl(stringio *self) +/*[clinic end generated code: output=2e87ac67b116c77b input=ec866ebaff02f405]*/ +{ + CHECK_INITIALIZED(self); + CHECK_CLOSED(self); + return PyLong_FromSsize_t(self->pos); +} + +/*[clinic input] +_io.StringIO.read + size: Py_ssize_t(accept={int, NoneType}) = -1 + / + +Read at most size characters, returned as a string. + +If the argument is negative or omitted, read until EOF +is reached. Return an empty string at EOF. +[clinic start generated code]*/ + +static PyObject * +_io_StringIO_read_impl(stringio *self, Py_ssize_t size) +/*[clinic end generated code: output=ae8cf6002f71626c input=0921093383dfb92d]*/ +{ + Py_ssize_t n; + Py_UCS4 *output; + + CHECK_INITIALIZED(self); + CHECK_CLOSED(self); + + /* adjust invalid sizes */ + n = self->string_size - self->pos; + if (size < 0 || size > n) { + size = n; + if (size < 0) + size = 0; + } + + /* Optimization for seek(0); read() */ + if (self->state == STATE_ACCUMULATING && self->pos == 0 && size == n) { + PyObject *result = make_intermediate(self); + self->pos = self->string_size; + return result; + } + + ENSURE_REALIZED(self); + output = self->buf + self->pos; + self->pos += size; + return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, output, size); +} + +/* Internal helper, used by stringio_readline and stringio_iternext */ +static PyObject * +_stringio_readline(stringio *self, Py_ssize_t limit) +{ + Py_UCS4 *start, *end, old_char; + Py_ssize_t len, consumed; + + /* In case of overseek, return the empty string */ + if (self->pos >= self->string_size) + return PyUnicode_New(0, 0); + + start = self->buf + self->pos; + if (limit < 0 || limit > self->string_size - self->pos) + limit = self->string_size - self->pos; + + end = start + limit; + old_char = *end; + *end = '\0'; + len = _PyIO_find_line_ending( + self->readtranslate, self->readuniversal, self->readnl, + PyUnicode_4BYTE_KIND, (char*)start, (char*)end, &consumed); + *end = old_char; + /* If we haven't found any line ending, we just return everything + (`consumed` is ignored). */ + if (len < 0) + len = limit; + self->pos += len; + return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, start, len); +} + +/*[clinic input] +_io.StringIO.readline + size: Py_ssize_t(accept={int, NoneType}) = -1 + / + +Read until newline or EOF. + +Returns an empty string if EOF is hit immediately. +[clinic start generated code]*/ + +static PyObject * +_io_StringIO_readline_impl(stringio *self, Py_ssize_t size) +/*[clinic end generated code: output=cabd6452f1b7e85d input=a5bd70bf682aa276]*/ +{ + CHECK_INITIALIZED(self); + CHECK_CLOSED(self); + ENSURE_REALIZED(self); + + return _stringio_readline(self, size); +} + +static PyObject * +stringio_iternext(stringio *self) +{ + PyObject *line; + + CHECK_INITIALIZED(self); + CHECK_CLOSED(self); + ENSURE_REALIZED(self); + + if (Py_IS_TYPE(self, self->module_state->PyStringIO_Type)) { + /* Skip method call overhead for speed */ + line = _stringio_readline(self, -1); + } + else { + /* XXX is subclassing StringIO really supported? */ + line = PyObject_CallMethodNoArgs((PyObject *)self, + &_Py_ID(readline)); + if (line && !PyUnicode_Check(line)) { + PyErr_Format(PyExc_OSError, + "readline() should have returned a str object, " + "not '%.200s'", Py_TYPE(line)->tp_name); + Py_DECREF(line); + return NULL; + } + } + + if (line == NULL) + return NULL; + + if (PyUnicode_GET_LENGTH(line) == 0) { + /* Reached EOF */ + Py_DECREF(line); + return NULL; + } + + return line; +} + +/*[clinic input] +_io.StringIO.truncate + pos as size: Py_ssize_t(accept={int, NoneType}, c_default="self->pos") = None + / + +Truncate size to pos. + +The pos argument defaults to the current file position, as +returned by tell(). The current file position is unchanged. +Returns the new absolute position. +[clinic start generated code]*/ + +static PyObject * +_io_StringIO_truncate_impl(stringio *self, Py_ssize_t size) +/*[clinic end generated code: output=eb3aef8e06701365 input=5505cff90ca48b96]*/ +{ + CHECK_INITIALIZED(self); + CHECK_CLOSED(self); + + if (size < 0) { + PyErr_Format(PyExc_ValueError, + "Negative size value %zd", size); + return NULL; + } + + if (size < self->string_size) { + ENSURE_REALIZED(self); + if (resize_buffer(self, size) < 0) + return NULL; + self->string_size = size; + } + + return PyLong_FromSsize_t(size); +} + +/*[clinic input] +_io.StringIO.seek + pos: Py_ssize_t + whence: int = 0 + / + +Change stream position. + +Seek to character offset pos relative to position indicated by whence: + 0 Start of stream (the default). pos should be >= 0; + 1 Current position - pos must be 0; + 2 End of stream - pos must be 0. +Returns the new absolute position. +[clinic start generated code]*/ + +static PyObject * +_io_StringIO_seek_impl(stringio *self, Py_ssize_t pos, int whence) +/*[clinic end generated code: output=e9e0ac9a8ae71c25 input=e3855b24e7cae06a]*/ +{ + CHECK_INITIALIZED(self); + CHECK_CLOSED(self); + + if (whence != 0 && whence != 1 && whence != 2) { + PyErr_Format(PyExc_ValueError, + "Invalid whence (%i, should be 0, 1 or 2)", whence); + return NULL; + } + else if (pos < 0 && whence == 0) { + PyErr_Format(PyExc_ValueError, + "Negative seek position %zd", pos); + return NULL; + } + else if (whence != 0 && pos != 0) { + PyErr_SetString(PyExc_OSError, + "Can't do nonzero cur-relative seeks"); + return NULL; + } + + /* whence = 0: offset relative to beginning of the string. + whence = 1: no change to current position. + whence = 2: change position to end of file. */ + if (whence == 1) { + pos = self->pos; + } + else if (whence == 2) { + pos = self->string_size; + } + + self->pos = pos; + + return PyLong_FromSsize_t(self->pos); +} + +/*[clinic input] +_io.StringIO.write + s as obj: object + / + +Write string to file. + +Returns the number of characters written, which is always equal to +the length of the string. +[clinic start generated code]*/ + +static PyObject * +_io_StringIO_write(stringio *self, PyObject *obj) +/*[clinic end generated code: output=0deaba91a15b94da input=cf96f3b16586e669]*/ +{ + Py_ssize_t size; + + CHECK_INITIALIZED(self); + if (!PyUnicode_Check(obj)) { + PyErr_Format(PyExc_TypeError, "string argument expected, got '%s'", + Py_TYPE(obj)->tp_name); + return NULL; + } + if (PyUnicode_READY(obj)) + return NULL; + CHECK_CLOSED(self); + size = PyUnicode_GET_LENGTH(obj); + + if (size > 0 && write_str(self, obj) < 0) + return NULL; + + return PyLong_FromSsize_t(size); +} + +/*[clinic input] +_io.StringIO.close + +Close the IO object. + +Attempting any further operation after the object is closed +will raise a ValueError. + +This method has no effect if the file is already closed. +[clinic start generated code]*/ + +static PyObject * +_io_StringIO_close_impl(stringio *self) +/*[clinic end generated code: output=04399355cbe518f1 input=cbc10b45f35d6d46]*/ +{ + self->closed = 1; + /* Free up some memory */ + if (resize_buffer(self, 0) < 0) + return NULL; + _PyUnicodeWriter_Dealloc(&self->writer); + Py_CLEAR(self->readnl); + Py_CLEAR(self->writenl); + Py_CLEAR(self->decoder); + Py_RETURN_NONE; +} + +static int +stringio_traverse(stringio *self, visitproc visit, void *arg) +{ + Py_VISIT(Py_TYPE(self)); + Py_VISIT(self->readnl); + Py_VISIT(self->writenl); + Py_VISIT(self->decoder); + Py_VISIT(self->dict); + return 0; +} + +static int +stringio_clear(stringio *self) +{ + Py_CLEAR(self->readnl); + Py_CLEAR(self->writenl); + Py_CLEAR(self->decoder); + Py_CLEAR(self->dict); + return 0; +} + +static void +stringio_dealloc(stringio *self) +{ + PyTypeObject *tp = Py_TYPE(self); + _PyObject_GC_UNTRACK(self); + self->ok = 0; + if (self->buf) { + PyMem_Free(self->buf); + self->buf = NULL; + } + _PyUnicodeWriter_Dealloc(&self->writer); + (void)stringio_clear(self); + if (self->weakreflist != NULL) { + PyObject_ClearWeakRefs((PyObject *) self); + } + tp->tp_free(self); + Py_DECREF(tp); +} + +static PyObject * +stringio_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + stringio *self; + + assert(type != NULL && type->tp_alloc != NULL); + self = (stringio *)type->tp_alloc(type, 0); + if (self == NULL) + return NULL; + + /* tp_alloc initializes all the fields to zero. So we don't have to + initialize them here. */ + + self->buf = (Py_UCS4 *)PyMem_Malloc(0); + if (self->buf == NULL) { + Py_DECREF(self); + return PyErr_NoMemory(); + } + + return (PyObject *)self; +} + +/*[clinic input] +_io.StringIO.__init__ + initial_value as value: object(c_default="NULL") = '' + newline as newline_obj: object(c_default="NULL") = '\n' + +Text I/O implementation using an in-memory buffer. + +The initial_value argument sets the value of object. The newline +argument is like the one of TextIOWrapper's constructor. +[clinic start generated code]*/ + +static int +_io_StringIO___init___impl(stringio *self, PyObject *value, + PyObject *newline_obj) +/*[clinic end generated code: output=a421ea023b22ef4e input=cee2d9181b2577a3]*/ +{ + const char *newline = "\n"; + Py_ssize_t value_len; + + /* Parse the newline argument. We only want to allow unicode objects or + None. */ + if (newline_obj == Py_None) { + newline = NULL; + } + else if (newline_obj) { + if (!PyUnicode_Check(newline_obj)) { + PyErr_Format(PyExc_TypeError, + "newline must be str or None, not %.200s", + Py_TYPE(newline_obj)->tp_name); + return -1; + } + newline = PyUnicode_AsUTF8(newline_obj); + if (newline == NULL) + return -1; + } + + if (newline && newline[0] != '\0' + && !(newline[0] == '\n' && newline[1] == '\0') + && !(newline[0] == '\r' && newline[1] == '\0') + && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) { + PyErr_Format(PyExc_ValueError, + "illegal newline value: %R", newline_obj); + return -1; + } + if (value && value != Py_None && !PyUnicode_Check(value)) { + PyErr_Format(PyExc_TypeError, + "initial_value must be str or None, not %.200s", + Py_TYPE(value)->tp_name); + return -1; + } + + self->ok = 0; + + _PyUnicodeWriter_Dealloc(&self->writer); + Py_CLEAR(self->readnl); + Py_CLEAR(self->writenl); + Py_CLEAR(self->decoder); + + assert((newline != NULL && newline_obj != Py_None) || + (newline == NULL && newline_obj == Py_None)); + + if (newline) { + self->readnl = PyUnicode_FromString(newline); + if (self->readnl == NULL) + return -1; + } + self->readuniversal = (newline == NULL || newline[0] == '\0'); + self->readtranslate = (newline == NULL); + /* If newline == "", we don't translate anything. + If newline == "\n" or newline == None, we translate to "\n", which is + a no-op. + (for newline == None, TextIOWrapper translates to os.linesep, but it + is pointless for StringIO) + */ + if (newline != NULL && newline[0] == '\r') { + self->writenl = Py_NewRef(self->readnl); + } + + _PyIO_State *module_state = find_io_state_by_def(Py_TYPE(self)); + if (self->readuniversal) { + self->decoder = PyObject_CallFunctionObjArgs( + (PyObject *)module_state->PyIncrementalNewlineDecoder_Type, + Py_None, self->readtranslate ? Py_True : Py_False, NULL); + if (self->decoder == NULL) + return -1; + } + + /* Now everything is set up, resize buffer to size of initial value, + and copy it */ + self->string_size = 0; + if (value && value != Py_None) + value_len = PyUnicode_GetLength(value); + else + value_len = 0; + if (value_len > 0) { + /* This is a heuristic, for newline translation might change + the string length. */ + if (resize_buffer(self, 0) < 0) + return -1; + self->state = STATE_REALIZED; + self->pos = 0; + if (write_str(self, value) < 0) + return -1; + } + else { + /* Empty stringio object, we can start by accumulating */ + if (resize_buffer(self, 0) < 0) + return -1; + _PyUnicodeWriter_Init(&self->writer); + self->writer.overallocate = 1; + self->state = STATE_ACCUMULATING; + } + self->pos = 0; + self->module_state = module_state; + self->closed = 0; + self->ok = 1; + return 0; +} + +/* Properties and pseudo-properties */ + +/*[clinic input] +_io.StringIO.readable + +Returns True if the IO object can be read. +[clinic start generated code]*/ + +static PyObject * +_io_StringIO_readable_impl(stringio *self) +/*[clinic end generated code: output=b19d44dd8b1ceb99 input=39ce068b224c21ad]*/ +{ + CHECK_INITIALIZED(self); + CHECK_CLOSED(self); + Py_RETURN_TRUE; +} + +/*[clinic input] +_io.StringIO.writable + +Returns True if the IO object can be written. +[clinic start generated code]*/ + +static PyObject * +_io_StringIO_writable_impl(stringio *self) +/*[clinic end generated code: output=13e4dd77187074ca input=7a691353aac38835]*/ +{ + CHECK_INITIALIZED(self); + CHECK_CLOSED(self); + Py_RETURN_TRUE; +} + +/*[clinic input] +_io.StringIO.seekable + +Returns True if the IO object can be seeked. +[clinic start generated code]*/ + +static PyObject * +_io_StringIO_seekable_impl(stringio *self) +/*[clinic end generated code: output=4d20b4641c756879 input=4c606d05b32952e6]*/ +{ + CHECK_INITIALIZED(self); + CHECK_CLOSED(self); + Py_RETURN_TRUE; +} + +/* Pickling support. + + The implementation of __getstate__ is similar to the one for BytesIO, + except that we also save the newline parameter. For __setstate__ and unlike + BytesIO, we call __init__ to restore the object's state. Doing so allows us + to avoid decoding the complex newline state while keeping the object + representation compact. + + See comment in bytesio.c regarding why only pickle protocols and onward are + supported. +*/ + +static PyObject * +stringio_getstate(stringio *self, PyObject *Py_UNUSED(ignored)) +{ + PyObject *initvalue = _io_StringIO_getvalue_impl(self); + PyObject *dict; + PyObject *state; + + if (initvalue == NULL) + return NULL; + if (self->dict == NULL) { + dict = Py_NewRef(Py_None); + } + else { + dict = PyDict_Copy(self->dict); + if (dict == NULL) { + Py_DECREF(initvalue); + return NULL; + } + } + + state = Py_BuildValue("(OOnN)", initvalue, + self->readnl ? self->readnl : Py_None, + self->pos, dict); + Py_DECREF(initvalue); + return state; +} + +static PyObject * +stringio_setstate(stringio *self, PyObject *state) +{ + PyObject *initarg; + PyObject *position_obj; + PyObject *dict; + Py_ssize_t pos; + + assert(state != NULL); + CHECK_CLOSED(self); + + /* We allow the state tuple to be longer than 4, because we may need + someday to extend the object's state without breaking + backward-compatibility. */ + if (!PyTuple_Check(state) || PyTuple_GET_SIZE(state) < 4) { + PyErr_Format(PyExc_TypeError, + "%.200s.__setstate__ argument should be 4-tuple, got %.200s", + Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name); + return NULL; + } + + /* Initialize the object's state. */ + initarg = PyTuple_GetSlice(state, 0, 2); + if (initarg == NULL) + return NULL; + if (_io_StringIO___init__((PyObject *)self, initarg, NULL) < 0) { + Py_DECREF(initarg); + return NULL; + } + Py_DECREF(initarg); + + /* Restore the buffer state. Even if __init__ did initialize the buffer, + we have to initialize it again since __init__ may translate the + newlines in the initial_value string. We clearly do not want that + because the string value in the state tuple has already been translated + once by __init__. So we do not take any chance and replace object's + buffer completely. */ + { + PyObject *item; + Py_UCS4 *buf; + Py_ssize_t bufsize; + + item = PyTuple_GET_ITEM(state, 0); + buf = PyUnicode_AsUCS4Copy(item); + if (buf == NULL) + return NULL; + bufsize = PyUnicode_GET_LENGTH(item); + + if (resize_buffer(self, bufsize) < 0) { + PyMem_Free(buf); + return NULL; + } + memcpy(self->buf, buf, bufsize * sizeof(Py_UCS4)); + PyMem_Free(buf); + self->string_size = bufsize; + } + + /* Set carefully the position value. Alternatively, we could use the seek + method instead of modifying self->pos directly to better protect the + object internal state against erroneous (or malicious) inputs. */ + position_obj = PyTuple_GET_ITEM(state, 2); + if (!PyLong_Check(position_obj)) { + PyErr_Format(PyExc_TypeError, + "third item of state must be an integer, got %.200s", + Py_TYPE(position_obj)->tp_name); + return NULL; + } + pos = PyLong_AsSsize_t(position_obj); + if (pos == -1 && PyErr_Occurred()) + return NULL; + if (pos < 0) { + PyErr_SetString(PyExc_ValueError, + "position value cannot be negative"); + return NULL; + } + self->pos = pos; + + /* Set the dictionary of the instance variables. */ + dict = PyTuple_GET_ITEM(state, 3); + if (dict != Py_None) { + if (!PyDict_Check(dict)) { + PyErr_Format(PyExc_TypeError, + "fourth item of state should be a dict, got a %.200s", + Py_TYPE(dict)->tp_name); + return NULL; + } + if (self->dict) { + /* Alternatively, we could replace the internal dictionary + completely. However, it seems more practical to just update it. */ + if (PyDict_Update(self->dict, dict) < 0) + return NULL; + } + else { + self->dict = Py_NewRef(dict); + } + } + + Py_RETURN_NONE; +} + + +static PyObject * +stringio_closed(stringio *self, void *context) +{ + CHECK_INITIALIZED(self); + return PyBool_FromLong(self->closed); +} + +static PyObject * +stringio_line_buffering(stringio *self, void *context) +{ + CHECK_INITIALIZED(self); + CHECK_CLOSED(self); + Py_RETURN_FALSE; +} + +static PyObject * +stringio_newlines(stringio *self, void *context) +{ + CHECK_INITIALIZED(self); + CHECK_CLOSED(self); + if (self->decoder == NULL) + Py_RETURN_NONE; + return PyObject_GetAttr(self->decoder, &_Py_ID(newlines)); +} + +#define clinic_state() (find_io_state_by_def(Py_TYPE(self))) +#include "clinic/stringio.c.h" +#undef clinic_state + +static struct PyMethodDef stringio_methods[] = { + _IO_STRINGIO_CLOSE_METHODDEF + _IO_STRINGIO_GETVALUE_METHODDEF + _IO_STRINGIO_READ_METHODDEF + _IO_STRINGIO_READLINE_METHODDEF + _IO_STRINGIO_TELL_METHODDEF + _IO_STRINGIO_TRUNCATE_METHODDEF + _IO_STRINGIO_SEEK_METHODDEF + _IO_STRINGIO_WRITE_METHODDEF + + _IO_STRINGIO_SEEKABLE_METHODDEF + _IO_STRINGIO_READABLE_METHODDEF + _IO_STRINGIO_WRITABLE_METHODDEF + + {"__getstate__", (PyCFunction)stringio_getstate, METH_NOARGS}, + {"__setstate__", (PyCFunction)stringio_setstate, METH_O}, + {NULL, NULL} /* sentinel */ +}; + +static PyGetSetDef stringio_getset[] = { + {"closed", (getter)stringio_closed, NULL, NULL}, + {"newlines", (getter)stringio_newlines, NULL, NULL}, + /* (following comments straight off of the original Python wrapper:) + XXX Cruft to support the TextIOWrapper API. This would only + be meaningful if StringIO supported the buffer attribute. + Hopefully, a better solution, than adding these pseudo-attributes, + will be found. + */ + {"line_buffering", (getter)stringio_line_buffering, NULL, NULL}, + {NULL} +}; + +static struct PyMemberDef stringio_members[] = { + {"__weaklistoffset__", T_PYSSIZET, offsetof(stringio, weakreflist), READONLY}, + {"__dictoffset__", T_PYSSIZET, offsetof(stringio, dict), READONLY}, + {NULL}, +}; + +static PyType_Slot stringio_slots[] = { + {Py_tp_dealloc, stringio_dealloc}, + {Py_tp_doc, (void *)_io_StringIO___init____doc__}, + {Py_tp_traverse, stringio_traverse}, + {Py_tp_clear, stringio_clear}, + {Py_tp_iternext, stringio_iternext}, + {Py_tp_methods, stringio_methods}, + {Py_tp_members, stringio_members}, + {Py_tp_getset, stringio_getset}, + {Py_tp_init, _io_StringIO___init__}, + {Py_tp_new, stringio_new}, + {0, NULL}, +}; + +PyType_Spec stringio_spec = { + .name = "_io.StringIO", + .basicsize = sizeof(stringio), + .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC | + Py_TPFLAGS_IMMUTABLETYPE), + .slots = stringio_slots, +}; |