1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
|
"""Internal classes used by the gzip, lzma and bz2 modules"""
import io
import sys
BUFFER_SIZE = io.DEFAULT_BUFFER_SIZE # Compressed data read chunk size
class BaseStream(io.BufferedIOBase):
"""Mode-checking helper functions."""
def _check_not_closed(self):
if self.closed:
raise ValueError("I/O operation on closed file")
def _check_can_read(self):
if not self.readable():
raise io.UnsupportedOperation("File not open for reading")
def _check_can_write(self):
if not self.writable():
raise io.UnsupportedOperation("File not open for writing")
def _check_can_seek(self):
if not self.readable():
raise io.UnsupportedOperation("Seeking is only supported "
"on files open for reading")
if not self.seekable():
raise io.UnsupportedOperation("The underlying file object "
"does not support seeking")
class DecompressReader(io.RawIOBase):
"""Adapts the decompressor API to a RawIOBase reader API"""
def readable(self):
return True
def __init__(self, fp, decomp_factory, trailing_error=(), **decomp_args):
self._fp = fp
self._eof = False
self._pos = 0 # Current offset in decompressed stream
# Set to size of decompressed stream once it is known, for SEEK_END
self._size = -1
# Save the decompressor factory and arguments.
# If the file contains multiple compressed streams, each
# stream will need a separate decompressor object. A new decompressor
# object is also needed when implementing a backwards seek().
self._decomp_factory = decomp_factory
self._decomp_args = decomp_args
self._decompressor = self._decomp_factory(**self._decomp_args)
# Exception class to catch from decompressor signifying invalid
# trailing data to ignore
self._trailing_error = trailing_error
def close(self):
self._decompressor = None
return super().close()
def seekable(self):
return self._fp.seekable()
def readinto(self, b):
with memoryview(b) as view, view.cast("B") as byte_view:
data = self.read(len(byte_view))
byte_view[:len(data)] = data
return len(data)
def read(self, size=-1):
if size < 0:
return self.readall()
if not size or self._eof:
return b""
data = None # Default if EOF is encountered
# Depending on the input data, our call to the decompressor may not
# return any data. In this case, try again after reading another block.
while True:
if self._decompressor.eof:
rawblock = (self._decompressor.unused_data or
self._fp.read(BUFFER_SIZE))
if not rawblock:
break
# Continue to next stream.
self._decompressor = self._decomp_factory(
**self._decomp_args)
try:
data = self._decompressor.decompress(rawblock, size)
except self._trailing_error:
# Trailing data isn't a valid compressed stream; ignore it.
break
else:
if self._decompressor.needs_input:
rawblock = self._fp.read(BUFFER_SIZE)
if not rawblock:
raise EOFError("Compressed file ended before the "
"end-of-stream marker was reached")
else:
rawblock = b""
data = self._decompressor.decompress(rawblock, size)
if data:
break
if not data:
self._eof = True
self._size = self._pos
return b""
self._pos += len(data)
return data
def readall(self):
chunks = []
# sys.maxsize means the max length of output buffer is unlimited,
# so that the whole input buffer can be decompressed within one
# .decompress() call.
while data := self.read(sys.maxsize):
chunks.append(data)
return b"".join(chunks)
# Rewind the file to the beginning of the data stream.
def _rewind(self):
self._fp.seek(0)
self._eof = False
self._pos = 0
self._decompressor = self._decomp_factory(**self._decomp_args)
def seek(self, offset, whence=io.SEEK_SET):
# Recalculate offset as an absolute file position.
if whence == io.SEEK_SET:
pass
elif whence == io.SEEK_CUR:
offset = self._pos + offset
elif whence == io.SEEK_END:
# Seeking relative to EOF - we need to know the file's size.
if self._size < 0:
while self.read(io.DEFAULT_BUFFER_SIZE):
pass
offset = self._size + offset
else:
raise ValueError("Invalid value for whence: {}".format(whence))
# Make it so that offset is the number of bytes to skip forward.
if offset < self._pos:
self._rewind()
else:
offset -= self._pos
# Read and discard data until we reach the desired position.
while offset > 0:
data = self.read(min(io.DEFAULT_BUFFER_SIZE, offset))
if not data:
break
offset -= len(data)
return self._pos
def tell(self):
"""Return the current file position."""
return self._pos
|