aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/python/zstandard/py3/c-ext/compressoriterator.c
blob: 9e4936caa8bb474b8ad7904352ba131086ded6ce (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
/**
 * Copyright (c) 2016-present, Gregory Szorc
 * All rights reserved.
 *
 * This software may be modified and distributed under the terms
 * of the BSD license. See the LICENSE file for details.
 */

#include "python-zstandard.h"

#define min(a, b) (((a) < (b)) ? (a) : (b))

extern PyObject *ZstdError;

static void ZstdCompressorIterator_dealloc(ZstdCompressorIterator *self) {
    Py_XDECREF(self->readResult);
    Py_XDECREF(self->compressor);
    Py_XDECREF(self->reader);

    if (self->buffer.buf) {
        PyBuffer_Release(&self->buffer);
        memset(&self->buffer, 0, sizeof(self->buffer));
    }

    if (self->output.dst) {
        PyMem_Free(self->output.dst);
        self->output.dst = NULL;
    }

    PyObject_Del(self);
}

static PyObject *ZstdCompressorIterator_iter(PyObject *self) {
    Py_INCREF(self);
    return self;
}

static PyObject *ZstdCompressorIterator_iternext(ZstdCompressorIterator *self) {
    size_t zresult;
    PyObject *readResult = NULL;
    PyObject *chunk;
    char *readBuffer;
    Py_ssize_t readSize = 0;
    Py_ssize_t bufferRemaining;

    if (self->finishedOutput) {
        PyErr_SetString(PyExc_StopIteration, "output flushed");
        return NULL;
    }

feedcompressor:

    /* If we have data left in the input, consume it. */
    if (self->input.pos < self->input.size) {
        Py_BEGIN_ALLOW_THREADS zresult =
            ZSTD_compressStream2(self->compressor->cctx, &self->output,
                                 &self->input, ZSTD_e_continue);
        Py_END_ALLOW_THREADS

            /* Release the Python object holding the input buffer. */
            if (self->input.pos == self->input.size) {
            self->input.src = NULL;
            self->input.pos = 0;
            self->input.size = 0;
            Py_DECREF(self->readResult);
            self->readResult = NULL;
        }

        if (ZSTD_isError(zresult)) {
            PyErr_Format(ZstdError, "zstd compress error: %s",
                         ZSTD_getErrorName(zresult));
            return NULL;
        }

        /* If it produced output data, emit it. */
        if (self->output.pos) {
            chunk =
                PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
            self->output.pos = 0;
            return chunk;
        }
    }

    /* We should never have output data sitting around after a previous call. */
    assert(self->output.pos == 0);

    /* The code above should have either emitted a chunk and returned or
    consumed the entire input buffer. So the state of the input buffer is not
    relevant. */
    if (!self->finishedInput) {
        if (self->reader) {
            readResult =
                PyObject_CallMethod(self->reader, "read", "I", self->inSize);
            if (!readResult) {
                return NULL;
            }

            PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
        }
        else {
            assert(self->buffer.buf);

            /* Only support contiguous C arrays. */
            assert(self->buffer.strides == NULL &&
                   self->buffer.suboffsets == NULL);
            assert(self->buffer.itemsize == 1);

            readBuffer = (char *)self->buffer.buf + self->bufferOffset;
            bufferRemaining = self->buffer.len - self->bufferOffset;
            readSize = min(bufferRemaining, (Py_ssize_t)self->inSize);
            self->bufferOffset += readSize;
        }

        if (0 == readSize) {
            Py_XDECREF(readResult);
            self->finishedInput = 1;
        }
        else {
            self->readResult = readResult;
        }
    }

    /* EOF */
    if (0 == readSize) {
        self->input.src = NULL;
        self->input.size = 0;
        self->input.pos = 0;

        zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
                                       &self->input, ZSTD_e_end);
        if (ZSTD_isError(zresult)) {
            PyErr_Format(ZstdError, "error ending compression stream: %s",
                         ZSTD_getErrorName(zresult));
            return NULL;
        }

        assert(self->output.pos);

        if (0 == zresult) {
            self->finishedOutput = 1;
        }

        chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
        self->output.pos = 0;
        return chunk;
    }

    /* New data from reader. Feed into compressor. */
    self->input.src = readBuffer;
    self->input.size = readSize;
    self->input.pos = 0;

    Py_BEGIN_ALLOW_THREADS zresult = ZSTD_compressStream2(
        self->compressor->cctx, &self->output, &self->input, ZSTD_e_continue);
    Py_END_ALLOW_THREADS

        /* The input buffer currently points to memory managed by Python
        (readBuffer). This object was allocated by this function. If it wasn't
        fully consumed, we need to release it in a subsequent function call.
        If it is fully consumed, do that now.
        */
        if (self->input.pos == self->input.size) {
        self->input.src = NULL;
        self->input.pos = 0;
        self->input.size = 0;
        Py_XDECREF(self->readResult);
        self->readResult = NULL;
    }

    if (ZSTD_isError(zresult)) {
        PyErr_Format(ZstdError, "zstd compress error: %s",
                     ZSTD_getErrorName(zresult));
        return NULL;
    }

    assert(self->input.pos <= self->input.size);

    /* If we didn't write anything, start the process over. */
    if (0 == self->output.pos) {
        goto feedcompressor;
    }

    chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
    self->output.pos = 0;
    return chunk;
}

PyType_Slot ZstdCompressorIteratorSlots[] = {
    {Py_tp_dealloc, ZstdCompressorIterator_dealloc},
    {Py_tp_iter, ZstdCompressorIterator_iter},
    {Py_tp_iternext, ZstdCompressorIterator_iternext},
    {Py_tp_new, PyType_GenericNew},
    {0, NULL},
};

PyType_Spec ZstdCompressorIteratorSpec = {
    "zstd.ZstdCompressorIterator",
    sizeof(ZstdCompressorIterator),
    0,
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
    ZstdCompressorIteratorSlots,
};

PyTypeObject *ZstdCompressorIteratorType;

void compressoriterator_module_init(PyObject *mod) {
    ZstdCompressorIteratorType =
        (PyTypeObject *)PyType_FromSpec(&ZstdCompressorIteratorSpec);
    if (PyType_Ready(ZstdCompressorIteratorType) < 0) {
        return;
    }
}