summaryrefslogtreecommitdiffstats
path: root/contrib/python/zstandard/py2
diff options
context:
space:
mode:
authorvitalyisaev <[email protected]>2023-11-14 09:58:56 +0300
committervitalyisaev <[email protected]>2023-11-14 10:20:20 +0300
commitc2b2dfd9827a400a8495e172a56343462e3ceb82 (patch)
treecd4e4f597d01bede4c82dffeb2d780d0a9046bd0 /contrib/python/zstandard/py2
parentd4ae8f119e67808cb0cf776ba6e0cf95296f2df7 (diff)
YQ Connector: move tests from yql to ydb (OSS)
Перенос папки с тестами на Коннектор из папки yql в папку ydb (синхронизируется с github).
Diffstat (limited to 'contrib/python/zstandard/py2')
-rw-r--r--contrib/python/zstandard/py2/.dist-info/METADATA1637
-rw-r--r--contrib/python/zstandard/py2/.dist-info/top_level.txt2
-rw-r--r--contrib/python/zstandard/py2/LICENSE27
-rw-r--r--contrib/python/zstandard/py2/README.rst1615
-rw-r--r--contrib/python/zstandard/py2/c-ext/bufferutil.c792
-rw-r--r--contrib/python/zstandard/py2/c-ext/compressionchunker.c360
-rw-r--r--contrib/python/zstandard/py2/c-ext/compressiondict.c411
-rw-r--r--contrib/python/zstandard/py2/c-ext/compressionparams.c572
-rw-r--r--contrib/python/zstandard/py2/c-ext/compressionreader.c818
-rw-r--r--contrib/python/zstandard/py2/c-ext/compressionwriter.c372
-rw-r--r--contrib/python/zstandard/py2/c-ext/compressobj.c256
-rw-r--r--contrib/python/zstandard/py2/c-ext/compressor.c1676
-rw-r--r--contrib/python/zstandard/py2/c-ext/compressoriterator.c235
-rw-r--r--contrib/python/zstandard/py2/c-ext/constants.c109
-rw-r--r--contrib/python/zstandard/py2/c-ext/decompressionreader.c781
-rw-r--r--contrib/python/zstandard/py2/c-ext/decompressionwriter.c295
-rw-r--r--contrib/python/zstandard/py2/c-ext/decompressobj.c202
-rw-r--r--contrib/python/zstandard/py2/c-ext/decompressor.c1828
-rw-r--r--contrib/python/zstandard/py2/c-ext/decompressoriterator.c249
-rw-r--r--contrib/python/zstandard/py2/c-ext/frameparams.c138
-rw-r--r--contrib/python/zstandard/py2/c-ext/python-zstandard.h359
-rw-r--r--contrib/python/zstandard/py2/ya.make58
-rw-r--r--contrib/python/zstandard/py2/zstandard/__init__.py75
-rw-r--r--contrib/python/zstandard/py2/zstd.c344
24 files changed, 13211 insertions, 0 deletions
diff --git a/contrib/python/zstandard/py2/.dist-info/METADATA b/contrib/python/zstandard/py2/.dist-info/METADATA
new file mode 100644
index 00000000000..1c38fe3512d
--- /dev/null
+++ b/contrib/python/zstandard/py2/.dist-info/METADATA
@@ -0,0 +1,1637 @@
+Metadata-Version: 2.1
+Name: zstandard
+Version: 0.14.1
+Summary: Zstandard bindings for Python
+Home-page: https://github.com/indygreg/python-zstandard
+Author: Gregory Szorc
+Author-email: [email protected]
+License: BSD
+Keywords: zstandard zstd compression
+Platform: UNKNOWN
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: BSD License
+Classifier: Programming Language :: C
+Classifier: Programming Language :: Python :: 2.7
+Classifier: Programming Language :: Python :: 3.5
+Classifier: Programming Language :: Python :: 3.6
+Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3.8
+
+================
+python-zstandard
+================
+
+This project provides Python bindings for interfacing with the
+`Zstandard <http://www.zstd.net>`_ compression library. A C extension
+and CFFI interface are provided.
+
+The primary goal of the project is to provide a rich interface to the
+underlying C API through a Pythonic interface while not sacrificing
+performance. This means exposing most of the features and flexibility
+of the C API while not sacrificing usability or safety that Python provides.
+
+The canonical home for this project lives in a Mercurial repository run by
+the author. For convenience, that repository is frequently synchronized to
+https://github.com/indygreg/python-zstandard.
+
+| |ci-status|
+
+Requirements
+============
+
+This extension is designed to run with Python 2.7, 3.5, 3.6, 3.7, and 3.8
+on common platforms (Linux, Windows, and OS X). On PyPy (both PyPy2 and PyPy3) we support version 6.0.0 and above.
+x86 and x86_64 are well-tested on Windows. Only x86_64 is well-tested on Linux and macOS.
+
+Installing
+==========
+
+This package is uploaded to PyPI at https://pypi.python.org/pypi/zstandard.
+So, to install this package::
+
+ $ pip install zstandard
+
+Binary wheels are made available for some platforms. If you need to
+install from a source distribution, all you should need is a working C
+compiler and the Python development headers/libraries. On many Linux
+distributions, you can install a ``python-dev`` or ``python-devel``
+package to provide these dependencies.
+
+Packages are also uploaded to Anaconda Cloud at
+https://anaconda.org/indygreg/zstandard. See that URL for how to install
+this package with ``conda``.
+
+Legacy Format Support
+=====================
+
+To enable legacy zstd format support which is needed to handle files compressed
+with zstd < 1.0 you need to provide an installation option::
+
+ $ pip install zstandard --install-option="--legacy"
+
+and since pip 7.0 it is possible to have the following line in your
+requirements.txt::
+
+ zstandard --install-option="--legacy"
+
+Performance
+===========
+
+zstandard is a highly tunable compression algorithm. In its default settings
+(compression level 3), it will be faster at compression and decompression and
+will have better compression ratios than zlib on most data sets. When tuned
+for speed, it approaches lz4's speed and ratios. When tuned for compression
+ratio, it approaches lzma ratios and compression speed, but decompression
+speed is much faster. See the official zstandard documentation for more.
+
+zstandard and this library support multi-threaded compression. There is a
+mechanism to compress large inputs using multiple threads.
+
+The performance of this library is usually very similar to what the zstandard
+C API can deliver. Overhead in this library is due to general Python overhead
+and can't easily be avoided by *any* zstandard Python binding. This library
+exposes multiple APIs for performing compression and decompression so callers
+can pick an API suitable for their need. Contrast with the compression
+modules in Python's standard library (like ``zlib``), which only offer limited
+mechanisms for performing operations. The API flexibility means consumers can
+choose to use APIs that facilitate zero copying or minimize Python object
+creation and garbage collection overhead.
+
+This library is capable of single-threaded throughputs well over 1 GB/s. For
+exact numbers, measure yourself. The source code repository has a ``bench.py``
+script that can be used to measure things.
+
+API
+===
+
+To interface with Zstandard, simply import the ``zstandard`` module::
+
+ import zstandard
+
+It is a popular convention to alias the module as a different name for
+brevity::
+
+ import zstandard as zstd
+
+This module attempts to import and use either the C extension or CFFI
+implementation. On Python platforms known to support C extensions (like
+CPython), it raises an ImportError if the C extension cannot be imported.
+On Python platforms known to not support C extensions (like PyPy), it only
+attempts to import the CFFI implementation and raises ImportError if that
+can't be done. On other platforms, it first tries to import the C extension
+then falls back to CFFI if that fails and raises ImportError if CFFI fails.
+
+To change the module import behavior, a ``PYTHON_ZSTANDARD_IMPORT_POLICY``
+environment variable can be set. The following values are accepted:
+
+default
+ The behavior described above.
+cffi_fallback
+ Always try to import the C extension then fall back to CFFI if that
+ fails.
+cext
+ Only attempt to import the C extension.
+cffi
+ Only attempt to import the CFFI implementation.
+
+In addition, the ``zstandard`` module exports a ``backend`` attribute
+containing the string name of the backend being used. It will be one
+of ``cext`` or ``cffi`` (for *C extension* and *cffi*, respectively).
+
+The types, functions, and attributes exposed by the ``zstandard`` module
+are documented in the sections below.
+
+.. note::
+
+ The documentation in this section makes references to various zstd
+ concepts and functionality. The source repository contains a
+ ``docs/concepts.rst`` file explaining these in more detail.
+
+ZstdCompressor
+--------------
+
+The ``ZstdCompressor`` class provides an interface for performing
+compression operations. Each instance is essentially a wrapper around a
+``ZSTD_CCtx`` from the C API.
+
+Each instance is associated with parameters that control compression
+behavior. These come from the following named arguments (all optional):
+
+level
+ Integer compression level. Valid values are between 1 and 22.
+dict_data
+ Compression dictionary to use.
+
+ Note: When using dictionary data and ``compress()`` is called multiple
+ times, the ``ZstdCompressionParameters`` derived from an integer
+ compression ``level`` and the first compressed data's size will be reused
+ for all subsequent operations. This may not be desirable if source data
+ size varies significantly.
+compression_params
+ A ``ZstdCompressionParameters`` instance defining compression settings.
+write_checksum
+ Whether a 4 byte checksum should be written with the compressed data.
+ Defaults to False. If True, the decompressor can verify that decompressed
+ data matches the original input data.
+write_content_size
+ Whether the size of the uncompressed data will be written into the
+ header of compressed data. Defaults to True. The data will only be
+ written if the compressor knows the size of the input data. This is
+ often not true for streaming compression.
+write_dict_id
+ Whether to write the dictionary ID into the compressed data.
+ Defaults to True. The dictionary ID is only written if a dictionary
+ is being used.
+threads
+ Enables and sets the number of threads to use for multi-threaded compression
+ operations. Defaults to 0, which means to use single-threaded compression.
+ Negative values will resolve to the number of logical CPUs in the system.
+ Read below for more info on multi-threaded compression. This argument only
+ controls thread count for operations that operate on individual pieces of
+ data. APIs that spawn multiple threads for working on multiple pieces of
+ data have their own ``threads`` argument.
+
+``compression_params`` is mutually exclusive with ``level``, ``write_checksum``,
+``write_content_size``, ``write_dict_id``, and ``threads``.
+
+Unless specified otherwise, assume that no two methods of ``ZstdCompressor``
+instances can be called from multiple Python threads simultaneously. In other
+words, assume instances are not thread safe unless stated otherwise.
+
+Utility Methods
+^^^^^^^^^^^^^^^
+
+``frame_progression()`` returns a 3-tuple containing the number of bytes
+ingested, consumed, and produced by the current compression operation.
+
+``memory_size()`` obtains the memory utilization of the underlying zstd
+compression context, in bytes.::
+
+ cctx = zstd.ZstdCompressor()
+ memory = cctx.memory_size()
+
+Simple API
+^^^^^^^^^^
+
+``compress(data)`` compresses and returns data as a one-shot operation.::
+
+ cctx = zstd.ZstdCompressor()
+ compressed = cctx.compress(b'data to compress')
+
+The ``data`` argument can be any object that implements the *buffer protocol*.
+
+Stream Reader API
+^^^^^^^^^^^^^^^^^
+
+``stream_reader(source)`` can be used to obtain an object conforming to the
+``io.RawIOBase`` interface for reading compressed output as a stream::
+
+ with open(path, 'rb') as fh:
+ cctx = zstd.ZstdCompressor()
+ reader = cctx.stream_reader(fh)
+ while True:
+ chunk = reader.read(16384)
+ if not chunk:
+ break
+
+ # Do something with compressed chunk.
+
+Instances can also be used as context managers::
+
+ with open(path, 'rb') as fh:
+ with cctx.stream_reader(fh) as reader:
+ while True:
+ chunk = reader.read(16384)
+ if not chunk:
+ break
+
+ # Do something with compressed chunk.
+
+When the context manager exits or ``close()`` is called, the stream is closed,
+underlying resources are released, and future operations against the compression
+stream will fail.
+
+The ``source`` argument to ``stream_reader()`` can be any object with a
+``read(size)`` method or any object implementing the *buffer protocol*.
+
+``stream_reader()`` accepts a ``size`` argument specifying how large the input
+stream is. This is used to adjust compression parameters so they are
+tailored to the source size.::
+
+ with open(path, 'rb') as fh:
+ cctx = zstd.ZstdCompressor()
+ with cctx.stream_reader(fh, size=os.stat(path).st_size) as reader:
+ ...
+
+If the ``source`` is a stream, you can specify how large ``read()`` requests
+to that stream should be via the ``read_size`` argument. It defaults to
+``zstandard.COMPRESSION_RECOMMENDED_INPUT_SIZE``.::
+
+ with open(path, 'rb') as fh:
+ cctx = zstd.ZstdCompressor()
+ # Will perform fh.read(8192) when obtaining data to feed into the
+ # compressor.
+ with cctx.stream_reader(fh, read_size=8192) as reader:
+ ...
+
+The stream returned by ``stream_reader()`` is neither writable nor seekable
+(even if the underlying source is seekable). ``readline()`` and
+``readlines()`` are not implemented because they don't make sense for
+compressed data. ``tell()`` returns the number of compressed bytes
+emitted so far.
+
+Streaming Input API
+^^^^^^^^^^^^^^^^^^^
+
+``stream_writer(fh)`` allows you to *stream* data into a compressor.
+
+Returned instances implement the ``io.RawIOBase`` interface. Only methods
+that involve writing will do useful things.
+
+The argument to ``stream_writer()`` must have a ``write(data)`` method. As
+compressed data is available, ``write()`` will be called with the compressed
+data as its argument. Many common Python types implement ``write()``, including
+open file handles and ``io.BytesIO``.
+
+The ``write(data)`` method is used to feed data into the compressor.
+
+The ``flush([flush_mode=FLUSH_BLOCK])`` method can be called to evict whatever
+data remains within the compressor's internal state into the output object. This
+may result in 0 or more ``write()`` calls to the output object. This method
+accepts an optional ``flush_mode`` argument to control the flushing behavior.
+Its value can be any of the ``FLUSH_*`` constants.
+
+Both ``write()`` and ``flush()`` return the number of bytes written to the
+object's ``write()``. In many cases, small inputs do not accumulate enough
+data to cause a write and ``write()`` will return ``0``.
+
+Calling ``close()`` will mark the stream as closed and subsequent I/O
+operations will raise ``ValueError`` (per the documented behavior of
+``io.RawIOBase``). ``close()`` will also call ``close()`` on the underlying
+stream if such a method exists.
+
+Typically usage is as follows::
+
+ cctx = zstd.ZstdCompressor(level=10)
+ compressor = cctx.stream_writer(fh)
+
+ compressor.write(b'chunk 0\n')
+ compressor.write(b'chunk 1\n')
+ compressor.flush()
+ # Receiver will be able to decode ``chunk 0\nchunk 1\n`` at this point.
+ # Receiver is also expecting more data in the zstd *frame*.
+
+ compressor.write(b'chunk 2\n')
+ compressor.flush(zstd.FLUSH_FRAME)
+ # Receiver will be able to decode ``chunk 0\nchunk 1\nchunk 2``.
+ # Receiver is expecting no more data, as the zstd frame is closed.
+ # Any future calls to ``write()`` at this point will construct a new
+ # zstd frame.
+
+Instances can be used as context managers. Exiting the context manager is
+the equivalent of calling ``close()``, which is equivalent to calling
+``flush(zstd.FLUSH_FRAME)``::
+
+ cctx = zstd.ZstdCompressor(level=10)
+ with cctx.stream_writer(fh) as compressor:
+ compressor.write(b'chunk 0')
+ compressor.write(b'chunk 1')
+ ...
+
+.. important::
+
+ If ``flush(FLUSH_FRAME)`` is not called, emitted data doesn't constitute
+ a full zstd *frame* and consumers of this data may complain about malformed
+ input. It is recommended to use instances as a context manager to ensure
+ *frames* are properly finished.
+
+If the size of the data being fed to this streaming compressor is known,
+you can declare it before compression begins::
+
+ cctx = zstd.ZstdCompressor()
+ with cctx.stream_writer(fh, size=data_len) as compressor:
+ compressor.write(chunk0)
+ compressor.write(chunk1)
+ ...
+
+Declaring the size of the source data allows compression parameters to
+be tuned. And if ``write_content_size`` is used, it also results in the
+content size being written into the frame header of the output data.
+
+The size of chunks being ``write()`` to the destination can be specified::
+
+ cctx = zstd.ZstdCompressor()
+ with cctx.stream_writer(fh, write_size=32768) as compressor:
+ ...
+
+To see how much memory is being used by the streaming compressor::
+
+ cctx = zstd.ZstdCompressor()
+ with cctx.stream_writer(fh) as compressor:
+ ...
+ byte_size = compressor.memory_size()
+
+Thte total number of bytes written so far are exposed via ``tell()``::
+
+ cctx = zstd.ZstdCompressor()
+ with cctx.stream_writer(fh) as compressor:
+ ...
+ total_written = compressor.tell()
+
+``stream_writer()`` accepts a ``write_return_read`` boolean argument to control
+the return value of ``write()``. When ``False`` (the default), ``write()`` returns
+the number of bytes that were ``write()``en to the underlying object. When
+``True``, ``write()`` returns the number of bytes read from the input that
+were subsequently written to the compressor. ``True`` is the *proper* behavior
+for ``write()`` as specified by the ``io.RawIOBase`` interface and will become
+the default value in a future release.
+
+Streaming Output API
+^^^^^^^^^^^^^^^^^^^^
+
+``read_to_iter(reader)`` provides a mechanism to stream data out of a
+compressor as an iterator of data chunks.::
+
+ cctx = zstd.ZstdCompressor()
+ for chunk in cctx.read_to_iter(fh):
+ # Do something with emitted data.
+
+``read_to_iter()`` accepts an object that has a ``read(size)`` method or
+conforms to the buffer protocol.
+
+Uncompressed data is fetched from the source either by calling ``read(size)``
+or by fetching a slice of data from the object directly (in the case where
+the buffer protocol is being used). The returned iterator consists of chunks
+of compressed data.
+
+If reading from the source via ``read()``, ``read()`` will be called until
+it raises or returns an empty bytes (``b''``). It is perfectly valid for
+the source to deliver fewer bytes than were what requested by ``read(size)``.
+
+Like ``stream_writer()``, ``read_to_iter()`` also accepts a ``size`` argument
+declaring the size of the input stream::
+
+ cctx = zstd.ZstdCompressor()
+ for chunk in cctx.read_to_iter(fh, size=some_int):
+ pass
+
+You can also control the size that data is ``read()`` from the source and
+the ideal size of output chunks::
+
+ cctx = zstd.ZstdCompressor()
+ for chunk in cctx.read_to_iter(fh, read_size=16384, write_size=8192):
+ pass
+
+Unlike ``stream_writer()``, ``read_to_iter()`` does not give direct control
+over the sizes of chunks fed into the compressor. Instead, chunk sizes will
+be whatever the object being read from delivers. These will often be of a
+uniform size.
+
+Stream Copying API
+^^^^^^^^^^^^^^^^^^
+
+``copy_stream(ifh, ofh)`` can be used to copy data between 2 streams while
+compressing it.::
+
+ cctx = zstd.ZstdCompressor()
+ cctx.copy_stream(ifh, ofh)
+
+For example, say you wish to compress a file::
+
+ cctx = zstd.ZstdCompressor()
+ with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh:
+ cctx.copy_stream(ifh, ofh)
+
+It is also possible to declare the size of the source stream::
+
+ cctx = zstd.ZstdCompressor()
+ cctx.copy_stream(ifh, ofh, size=len_of_input)
+
+You can also specify how large the chunks that are ``read()`` and ``write()``
+from and to the streams::
+
+ cctx = zstd.ZstdCompressor()
+ cctx.copy_stream(ifh, ofh, read_size=32768, write_size=16384)
+
+The stream copier returns a 2-tuple of bytes read and written::
+
+ cctx = zstd.ZstdCompressor()
+ read_count, write_count = cctx.copy_stream(ifh, ofh)
+
+Compressor API
+^^^^^^^^^^^^^^
+
+``compressobj()`` returns an object that exposes ``compress(data)`` and
+``flush()`` methods. Each returns compressed data or an empty bytes.
+
+The purpose of ``compressobj()`` is to provide an API-compatible interface
+with ``zlib.compressobj``, ``bz2.BZ2Compressor``, etc. This allows callers to
+swap in different compressor objects while using the same API.
+
+``flush()`` accepts an optional argument indicating how to end the stream.
+``zstd.COMPRESSOBJ_FLUSH_FINISH`` (the default) ends the compression stream.
+Once this type of flush is performed, ``compress()`` and ``flush()`` can
+no longer be called. This type of flush **must** be called to end the
+compression context. If not called, returned data may be incomplete.
+
+A ``zstd.COMPRESSOBJ_FLUSH_BLOCK`` argument to ``flush()`` will flush a
+zstd block. Flushes of this type can be performed multiple times. The next
+call to ``compress()`` will begin a new zstd block.
+
+Here is how this API should be used::
+
+ cctx = zstd.ZstdCompressor()
+ cobj = cctx.compressobj()
+ data = cobj.compress(b'raw input 0')
+ data = cobj.compress(b'raw input 1')
+ data = cobj.flush()
+
+Or to flush blocks::
+
+ cctx.zstd.ZstdCompressor()
+ cobj = cctx.compressobj()
+ data = cobj.compress(b'chunk in first block')
+ data = cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK)
+ data = cobj.compress(b'chunk in second block')
+ data = cobj.flush()
+
+For best performance results, keep input chunks under 256KB. This avoids
+extra allocations for a large output object.
+
+It is possible to declare the input size of the data that will be fed into
+the compressor::
+
+ cctx = zstd.ZstdCompressor()
+ cobj = cctx.compressobj(size=6)
+ data = cobj.compress(b'foobar')
+ data = cobj.flush()
+
+Chunker API
+^^^^^^^^^^^
+
+``chunker(size=None, chunk_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE)`` returns
+an object that can be used to iteratively feed chunks of data into a compressor
+and produce output chunks of a uniform size.
+
+The object returned by ``chunker()`` exposes the following methods:
+
+``compress(data)``
+ Feeds new input data into the compressor.
+
+``flush()``
+ Flushes all data currently in the compressor.
+
+``finish()``
+ Signals the end of input data. No new data can be compressed after this
+ method is called.
+
+``compress()``, ``flush()``, and ``finish()`` all return an iterator of
+``bytes`` instances holding compressed data. The iterator may be empty. Callers
+MUST iterate through all elements of the returned iterator before performing
+another operation on the object.
+
+All chunks emitted by ``compress()`` will have a length of ``chunk_size``.
+
+``flush()`` and ``finish()`` may return a final chunk smaller than
+``chunk_size``.
+
+Here is how the API should be used::
+
+ cctx = zstd.ZstdCompressor()
+ chunker = cctx.chunker(chunk_size=32768)
+
+ with open(path, 'rb') as fh:
+ while True:
+ in_chunk = fh.read(32768)
+ if not in_chunk:
+ break
+
+ for out_chunk in chunker.compress(in_chunk):
+ # Do something with output chunk of size 32768.
+
+ for out_chunk in chunker.finish():
+ # Do something with output chunks that finalize the zstd frame.
+
+The ``chunker()`` API is often a better alternative to ``compressobj()``.
+
+``compressobj()`` will emit output data as it is available. This results in a
+*stream* of output chunks of varying sizes. The consistency of the output chunk
+size with ``chunker()`` is more appropriate for many usages, such as sending
+compressed data to a socket.
+
+``compressobj()`` may also perform extra memory reallocations in order to
+dynamically adjust the sizes of the output chunks. Since ``chunker()`` output
+chunks are all the same size (except for flushed or final chunks), there is
+less memory allocation overhead.
+
+Batch Compression API
+^^^^^^^^^^^^^^^^^^^^^
+
+(Experimental. Not yet supported in CFFI bindings.)
+
+``multi_compress_to_buffer(data, [threads=0])`` performs compression of multiple
+inputs as a single operation.
+
+Data to be compressed can be passed as a ``BufferWithSegmentsCollection``, a
+``BufferWithSegments``, or a list containing byte like objects. Each element of
+the container will be compressed individually using the configured parameters
+on the ``ZstdCompressor`` instance.
+
+The ``threads`` argument controls how many threads to use for compression. The
+default is ``0`` which means to use a single thread. Negative values use the
+number of logical CPUs in the machine.
+
+The function returns a ``BufferWithSegmentsCollection``. This type represents
+N discrete memory allocations, eaching holding 1 or more compressed frames.
+
+Output data is written to shared memory buffers. This means that unlike
+regular Python objects, a reference to *any* object within the collection
+keeps the shared buffer and therefore memory backing it alive. This can have
+undesirable effects on process memory usage.
+
+The API and behavior of this function is experimental and will likely change.
+Known deficiencies include:
+
+* If asked to use multiple threads, it will always spawn that many threads,
+ even if the input is too small to use them. It should automatically lower
+ the thread count when the extra threads would just add overhead.
+* The buffer allocation strategy is fixed. There is room to make it dynamic,
+ perhaps even to allow one output buffer per input, facilitating a variation
+ of the API to return a list without the adverse effects of shared memory
+ buffers.
+
+ZstdDecompressor
+----------------
+
+The ``ZstdDecompressor`` class provides an interface for performing
+decompression. It is effectively a wrapper around the ``ZSTD_DCtx`` type from
+the C API.
+
+Each instance is associated with parameters that control decompression. These
+come from the following named arguments (all optional):
+
+dict_data
+ Compression dictionary to use.
+max_window_size
+ Sets an uppet limit on the window size for decompression operations in
+ kibibytes. This setting can be used to prevent large memory allocations
+ for inputs using large compression windows.
+format
+ Set the format of data for the decoder. By default, this is
+ ``zstd.FORMAT_ZSTD1``. It can be set to ``zstd.FORMAT_ZSTD1_MAGICLESS`` to
+ allow decoding frames without the 4 byte magic header. Not all decompression
+ APIs support this mode.
+
+The interface of this class is very similar to ``ZstdCompressor`` (by design).
+
+Unless specified otherwise, assume that no two methods of ``ZstdDecompressor``
+instances can be called from multiple Python threads simultaneously. In other
+words, assume instances are not thread safe unless stated otherwise.
+
+Utility Methods
+^^^^^^^^^^^^^^^
+
+``memory_size()`` obtains the size of the underlying zstd decompression context,
+in bytes.::
+
+ dctx = zstd.ZstdDecompressor()
+ size = dctx.memory_size()
+
+Simple API
+^^^^^^^^^^
+
+``decompress(data)`` can be used to decompress an entire compressed zstd
+frame in a single operation.::
+
+ dctx = zstd.ZstdDecompressor()
+ decompressed = dctx.decompress(data)
+
+By default, ``decompress(data)`` will only work on data written with the content
+size encoded in its header (this is the default behavior of
+``ZstdCompressor().compress()`` but may not be true for streaming compression). If
+compressed data without an embedded content size is seen, ``zstd.ZstdError`` will
+be raised.
+
+If the compressed data doesn't have its content size embedded within it,
+decompression can be attempted by specifying the ``max_output_size``
+argument.::
+
+ dctx = zstd.ZstdDecompressor()
+ uncompressed = dctx.decompress(data, max_output_size=1048576)
+
+Ideally, ``max_output_size`` will be identical to the decompressed output
+size.
+
+If ``max_output_size`` is too small to hold the decompressed data,
+``zstd.ZstdError`` will be raised.
+
+If ``max_output_size`` is larger than the decompressed data, the allocated
+output buffer will be resized to only use the space required.
+
+Please note that an allocation of the requested ``max_output_size`` will be
+performed every time the method is called. Setting to a very large value could
+result in a lot of work for the memory allocator and may result in
+``MemoryError`` being raised if the allocation fails.
+
+.. important::
+
+ If the exact size of decompressed data is unknown (not passed in explicitly
+ and not stored in the zstandard frame), for performance reasons it is
+ encouraged to use a streaming API.
+
+Stream Reader API
+^^^^^^^^^^^^^^^^^
+
+``stream_reader(source)`` can be used to obtain an object conforming to the
+``io.RawIOBase`` interface for reading decompressed output as a stream::
+
+ with open(path, 'rb') as fh:
+ dctx = zstd.ZstdDecompressor()
+ reader = dctx.stream_reader(fh)
+ while True:
+ chunk = reader.read(16384)
+ if not chunk:
+ break
+
+ # Do something with decompressed chunk.
+
+The stream can also be used as a context manager::
+
+ with open(path, 'rb') as fh:
+ dctx = zstd.ZstdDecompressor()
+ with dctx.stream_reader(fh) as reader:
+ ...
+
+When used as a context manager, the stream is closed and the underlying
+resources are released when the context manager exits. Future operations against
+the stream will fail.
+
+The ``source`` argument to ``stream_reader()`` can be any object with a
+``read(size)`` method or any object implementing the *buffer protocol*.
+
+If the ``source`` is a stream, you can specify how large ``read()`` requests
+to that stream should be via the ``read_size`` argument. It defaults to
+``zstandard.DECOMPRESSION_RECOMMENDED_INPUT_SIZE``.::
+
+ with open(path, 'rb') as fh:
+ dctx = zstd.ZstdDecompressor()
+ # Will perform fh.read(8192) when obtaining data for the decompressor.
+ with dctx.stream_reader(fh, read_size=8192) as reader:
+ ...
+
+The stream returned by ``stream_reader()`` is not writable.
+
+The stream returned by ``stream_reader()`` is *partially* seekable.
+Absolute and relative positions (``SEEK_SET`` and ``SEEK_CUR``) forward
+of the current position are allowed. Offsets behind the current read
+position and offsets relative to the end of stream are not allowed and
+will raise ``ValueError`` if attempted.
+
+``tell()`` returns the number of decompressed bytes read so far.
+
+Not all I/O methods are implemented. Notably missing is support for
+``readline()``, ``readlines()``, and linewise iteration support. This is
+because streams operate on binary data - not text data. If you want to
+convert decompressed output to text, you can chain an ``io.TextIOWrapper``
+to the stream::
+
+ with open(path, 'rb') as fh:
+ dctx = zstd.ZstdDecompressor()
+ stream_reader = dctx.stream_reader(fh)
+ text_stream = io.TextIOWrapper(stream_reader, encoding='utf-8')
+
+ for line in text_stream:
+ ...
+
+The ``read_across_frames`` argument to ``stream_reader()`` controls the
+behavior of read operations when the end of a zstd *frame* is encountered.
+When ``False`` (the default), a read will complete when the end of a
+zstd *frame* is encountered. When ``True``, a read can potentially
+return data spanning multiple zstd *frames*.
+
+Streaming Input API
+^^^^^^^^^^^^^^^^^^^
+
+``stream_writer(fh)`` allows you to *stream* data into a decompressor.
+
+Returned instances implement the ``io.RawIOBase`` interface. Only methods
+that involve writing will do useful things.
+
+The argument to ``stream_writer()`` is typically an object that also implements
+``io.RawIOBase``. But any object with a ``write(data)`` method will work. Many
+common Python types conform to this interface, including open file handles
+and ``io.BytesIO``.
+
+Behavior is similar to ``ZstdCompressor.stream_writer()``: compressed data
+is sent to the decompressor by calling ``write(data)`` and decompressed
+output is written to the underlying stream by calling its ``write(data)``
+method.::
+
+ dctx = zstd.ZstdDecompressor()
+ decompressor = dctx.stream_writer(fh)
+
+ decompressor.write(compressed_data)
+ ...
+
+
+Calls to ``write()`` will return the number of bytes written to the output
+object. Not all inputs will result in bytes being written, so return values
+of ``0`` are possible.
+
+Like the ``stream_writer()`` compressor, instances can be used as context
+managers. However, context managers add no extra special behavior and offer
+little to no benefit to being used.
+
+Calling ``close()`` will mark the stream as closed and subsequent I/O operations
+will raise ``ValueError`` (per the documented behavior of ``io.RawIOBase``).
+``close()`` will also call ``close()`` on the underlying stream if such a
+method exists.
+
+The size of chunks being ``write()`` to the destination can be specified::
+
+ dctx = zstd.ZstdDecompressor()
+ with dctx.stream_writer(fh, write_size=16384) as decompressor:
+ pass
+
+You can see how much memory is being used by the decompressor::
+
+ dctx = zstd.ZstdDecompressor()
+ with dctx.stream_writer(fh) as decompressor:
+ byte_size = decompressor.memory_size()
+
+``stream_writer()`` accepts a ``write_return_read`` boolean argument to control
+the return value of ``write()``. When ``False`` (the default)``, ``write()``
+returns the number of bytes that were ``write()``en to the underlying stream.
+When ``True``, ``write()`` returns the number of bytes read from the input.
+``True`` is the *proper* behavior for ``write()`` as specified by the
+``io.RawIOBase`` interface and will become the default in a future release.
+
+Streaming Output API
+^^^^^^^^^^^^^^^^^^^^
+
+``read_to_iter(fh)`` provides a mechanism to stream decompressed data out of a
+compressed source as an iterator of data chunks.::
+
+ dctx = zstd.ZstdDecompressor()
+ for chunk in dctx.read_to_iter(fh):
+ # Do something with original data.
+
+``read_to_iter()`` accepts an object with a ``read(size)`` method that will
+return compressed bytes or an object conforming to the buffer protocol that
+can expose its data as a contiguous range of bytes.
+
+``read_to_iter()`` returns an iterator whose elements are chunks of the
+decompressed data.
+
+The size of requested ``read()`` from the source can be specified::
+
+ dctx = zstd.ZstdDecompressor()
+ for chunk in dctx.read_to_iter(fh, read_size=16384):
+ pass
+
+It is also possible to skip leading bytes in the input data::
+
+ dctx = zstd.ZstdDecompressor()
+ for chunk in dctx.read_to_iter(fh, skip_bytes=1):
+ pass
+
+.. tip::
+
+ Skipping leading bytes is useful if the source data contains extra
+ *header* data. Traditionally, you would need to create a slice or
+ ``memoryview`` of the data you want to decompress. This would create
+ overhead. It is more efficient to pass the offset into this API.
+
+Similarly to ``ZstdCompressor.read_to_iter()``, the consumer of the iterator
+controls when data is decompressed. If the iterator isn't consumed,
+decompression is put on hold.
+
+When ``read_to_iter()`` is passed an object conforming to the buffer protocol,
+the behavior may seem similar to what occurs when the simple decompression
+API is used. However, this API works when the decompressed size is unknown.
+Furthermore, if feeding large inputs, the decompressor will work in chunks
+instead of performing a single operation.
+
+Stream Copying API
+^^^^^^^^^^^^^^^^^^
+
+``copy_stream(ifh, ofh)`` can be used to copy data across 2 streams while
+performing decompression.::
+
+ dctx = zstd.ZstdDecompressor()
+ dctx.copy_stream(ifh, ofh)
+
+e.g. to decompress a file to another file::
+
+ dctx = zstd.ZstdDecompressor()
+ with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh:
+ dctx.copy_stream(ifh, ofh)
+
+The size of chunks being ``read()`` and ``write()`` from and to the streams
+can be specified::
+
+ dctx = zstd.ZstdDecompressor()
+ dctx.copy_stream(ifh, ofh, read_size=8192, write_size=16384)
+
+Decompressor API
+^^^^^^^^^^^^^^^^
+
+``decompressobj()`` returns an object that exposes a ``decompress(data)``
+method. Compressed data chunks are fed into ``decompress(data)`` and
+uncompressed output (or an empty bytes) is returned. Output from subsequent
+calls needs to be concatenated to reassemble the full decompressed byte
+sequence.
+
+The purpose of ``decompressobj()`` is to provide an API-compatible interface
+with ``zlib.decompressobj`` and ``bz2.BZ2Decompressor``. This allows callers
+to swap in different decompressor objects while using the same API.
+
+Each object is single use: once an input frame is decoded, ``decompress()``
+can no longer be called.
+
+Here is how this API should be used::
+
+ dctx = zstd.ZstdDecompressor()
+ dobj = dctx.decompressobj()
+ data = dobj.decompress(compressed_chunk_0)
+ data = dobj.decompress(compressed_chunk_1)
+
+By default, calls to ``decompress()`` write output data in chunks of size
+``DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE``. These chunks are concatenated
+before being returned to the caller. It is possible to define the size of
+these temporary chunks by passing ``write_size`` to ``decompressobj()``::
+
+ dctx = zstd.ZstdDecompressor()
+ dobj = dctx.decompressobj(write_size=1048576)
+
+.. note::
+
+ Because calls to ``decompress()`` may need to perform multiple
+ memory (re)allocations, this streaming decompression API isn't as
+ efficient as other APIs.
+
+For compatibility with the standard library APIs, instances expose a
+``flush([length=None])`` method. This method no-ops and has no meaningful
+side-effects, making it safe to call any time.
+
+Batch Decompression API
+^^^^^^^^^^^^^^^^^^^^^^^
+
+(Experimental. Not yet supported in CFFI bindings.)
+
+``multi_decompress_to_buffer()`` performs decompression of multiple
+frames as a single operation and returns a ``BufferWithSegmentsCollection``
+containing decompressed data for all inputs.
+
+Compressed frames can be passed to the function as a ``BufferWithSegments``,
+a ``BufferWithSegmentsCollection``, or as a list containing objects that
+conform to the buffer protocol. For best performance, pass a
+``BufferWithSegmentsCollection`` or a ``BufferWithSegments``, as
+minimal input validation will be done for that type. If calling from
+Python (as opposed to C), constructing one of these instances may add
+overhead cancelling out the performance overhead of validation for list
+inputs.::
+
+ dctx = zstd.ZstdDecompressor()
+ results = dctx.multi_decompress_to_buffer([b'...', b'...'])
+
+The decompressed size of each frame MUST be discoverable. It can either be
+embedded within the zstd frame (``write_content_size=True`` argument to
+``ZstdCompressor``) or passed in via the ``decompressed_sizes`` argument.
+
+The ``decompressed_sizes`` argument is an object conforming to the buffer
+protocol which holds an array of 64-bit unsigned integers in the machine's
+native format defining the decompressed sizes of each frame. If this argument
+is passed, it avoids having to scan each frame for its decompressed size.
+This frame scanning can add noticeable overhead in some scenarios.::
+
+ frames = [...]
+ sizes = struct.pack('=QQQQ', len0, len1, len2, len3)
+
+ dctx = zstd.ZstdDecompressor()
+ results = dctx.multi_decompress_to_buffer(frames, decompressed_sizes=sizes)
+
+The ``threads`` argument controls the number of threads to use to perform
+decompression operations. The default (``0``) or the value ``1`` means to
+use a single thread. Negative values use the number of logical CPUs in the
+machine.
+
+.. note::
+
+ It is possible to pass a ``mmap.mmap()`` instance into this function by
+ wrapping it with a ``BufferWithSegments`` instance (which will define the
+ offsets of frames within the memory mapped region).
+
+This function is logically equivalent to performing ``dctx.decompress()``
+on each input frame and returning the result.
+
+This function exists to perform decompression on multiple frames as fast
+as possible by having as little overhead as possible. Since decompression is
+performed as a single operation and since the decompressed output is stored in
+a single buffer, extra memory allocations, Python objects, and Python function
+calls are avoided. This is ideal for scenarios where callers know up front that
+they need to access data for multiple frames, such as when *delta chains* are
+being used.
+
+Currently, the implementation always spawns multiple threads when requested,
+even if the amount of work to do is small. In the future, it will be smarter
+about avoiding threads and their associated overhead when the amount of
+work to do is small.
+
+Prefix Dictionary Chain Decompression
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``decompress_content_dict_chain(frames)`` performs decompression of a list of
+zstd frames produced using chained *prefix* dictionary compression. Such
+a list of frames is produced by compressing discrete inputs where each
+non-initial input is compressed with a *prefix* dictionary consisting of the
+content of the previous input.
+
+For example, say you have the following inputs::
+
+ inputs = [b'input 1', b'input 2', b'input 3']
+
+The zstd frame chain consists of:
+
+1. ``b'input 1'`` compressed in standalone/discrete mode
+2. ``b'input 2'`` compressed using ``b'input 1'`` as a *prefix* dictionary
+3. ``b'input 3'`` compressed using ``b'input 2'`` as a *prefix* dictionary
+
+Each zstd frame **must** have the content size written.
+
+The following Python code can be used to produce a *prefix dictionary chain*::
+
+ def make_chain(inputs):
+ frames = []
+
+ # First frame is compressed in standalone/discrete mode.
+ zctx = zstd.ZstdCompressor()
+ frames.append(zctx.compress(inputs[0]))
+
+ # Subsequent frames use the previous fulltext as a prefix dictionary
+ for i, raw in enumerate(inputs[1:]):
+ dict_data = zstd.ZstdCompressionDict(
+ inputs[i], dict_type=zstd.DICT_TYPE_RAWCONTENT)
+ zctx = zstd.ZstdCompressor(dict_data=dict_data)
+ frames.append(zctx.compress(raw))
+
+ return frames
+
+``decompress_content_dict_chain()`` returns the uncompressed data of the last
+element in the input chain.
+
+
+.. note::
+
+ It is possible to implement *prefix dictionary chain* decompression
+ on top of other APIs. However, this function will likely be faster -
+ especially for long input chains - as it avoids the overhead of instantiating
+ and passing around intermediate objects between C and Python.
+
+Multi-Threaded Compression
+--------------------------
+
+``ZstdCompressor`` accepts a ``threads`` argument that controls the number
+of threads to use for compression. The way this works is that input is split
+into segments and each segment is fed into a worker pool for compression. Once
+a segment is compressed, it is flushed/appended to the output.
+
+.. note::
+
+ These threads are created at the C layer and are not Python threads. So they
+ work outside the GIL. It is therefore possible to CPU saturate multiple cores
+ from Python.
+
+The segment size for multi-threaded compression is chosen from the window size
+of the compressor. This is derived from the ``window_log`` attribute of a
+``ZstdCompressionParameters`` instance. By default, segment sizes are in the 1+MB
+range.
+
+If multi-threaded compression is requested and the input is smaller than the
+configured segment size, only a single compression thread will be used. If the
+input is smaller than the segment size multiplied by the thread pool size or
+if data cannot be delivered to the compressor fast enough, not all requested
+compressor threads may be active simultaneously.
+
+Compared to non-multi-threaded compression, multi-threaded compression has
+higher per-operation overhead. This includes extra memory operations,
+thread creation, lock acquisition, etc.
+
+Due to the nature of multi-threaded compression using *N* compression
+*states*, the output from multi-threaded compression will likely be larger
+than non-multi-threaded compression. The difference is usually small. But
+there is a CPU/wall time versus size trade off that may warrant investigation.
+
+Output from multi-threaded compression does not require any special handling
+on the decompression side. To the decompressor, data generated with single
+threaded compressor looks the same as data generated by a multi-threaded
+compressor and does not require any special handling or additional resource
+requirements.
+
+Dictionary Creation and Management
+----------------------------------
+
+Compression dictionaries are represented with the ``ZstdCompressionDict`` type.
+
+Instances can be constructed from bytes::
+
+ dict_data = zstd.ZstdCompressionDict(data)
+
+It is possible to construct a dictionary from *any* data. If the data doesn't
+begin with a magic header, it will be treated as a *prefix* dictionary.
+*Prefix* dictionaries allow compression operations to reference raw data
+within the dictionary.
+
+It is possible to force the use of *prefix* dictionaries or to require a
+dictionary header:
+
+ dict_data = zstd.ZstdCompressionDict(data,
+ dict_type=zstd.DICT_TYPE_RAWCONTENT)
+
+ dict_data = zstd.ZstdCompressionDict(data,
+ dict_type=zstd.DICT_TYPE_FULLDICT)
+
+You can see how many bytes are in the dictionary by calling ``len()``::
+
+ dict_data = zstd.train_dictionary(size, samples)
+ dict_size = len(dict_data) # will not be larger than ``size``
+
+Once you have a dictionary, you can pass it to the objects performing
+compression and decompression::
+
+ dict_data = zstd.train_dictionary(131072, samples)
+
+ cctx = zstd.ZstdCompressor(dict_data=dict_data)
+ for source_data in input_data:
+ compressed = cctx.compress(source_data)
+ # Do something with compressed data.
+
+ dctx = zstd.ZstdDecompressor(dict_data=dict_data)
+ for compressed_data in input_data:
+ buffer = io.BytesIO()
+ with dctx.stream_writer(buffer) as decompressor:
+ decompressor.write(compressed_data)
+ # Do something with raw data in ``buffer``.
+
+Dictionaries have unique integer IDs. You can retrieve this ID via::
+
+ dict_id = zstd.dictionary_id(dict_data)
+
+You can obtain the raw data in the dict (useful for persisting and constructing
+a ``ZstdCompressionDict`` later) via ``as_bytes()``::
+
+ dict_data = zstd.train_dictionary(size, samples)
+ raw_data = dict_data.as_bytes()
+
+By default, when a ``ZstdCompressionDict`` is *attached* to a
+``ZstdCompressor``, each ``ZstdCompressor`` performs work to prepare the
+dictionary for use. This is fine if only 1 compression operation is being
+performed or if the ``ZstdCompressor`` is being reused for multiple operations.
+But if multiple ``ZstdCompressor`` instances are being used with the dictionary,
+this can add overhead.
+
+It is possible to *precompute* the dictionary so it can readily be consumed
+by multiple ``ZstdCompressor`` instances::
+
+ d = zstd.ZstdCompressionDict(data)
+
+ # Precompute for compression level 3.
+ d.precompute_compress(level=3)
+
+ # Precompute with specific compression parameters.
+ params = zstd.ZstdCompressionParameters(...)
+ d.precompute_compress(compression_params=params)
+
+.. note::
+
+ When a dictionary is precomputed, the compression parameters used to
+ precompute the dictionary overwrite some of the compression parameters
+ specified to ``ZstdCompressor.__init__``.
+
+Training Dictionaries
+^^^^^^^^^^^^^^^^^^^^^
+
+Unless using *prefix* dictionaries, dictionary data is produced by *training*
+on existing data::
+
+ dict_data = zstd.train_dictionary(size, samples)
+
+This takes a target dictionary size and list of bytes instances and creates and
+returns a ``ZstdCompressionDict``.
+
+The dictionary training mechanism is known as *cover*. More details about it are
+available in the paper *Effective Construction of Relative Lempel-Ziv
+Dictionaries* (authors: Liao, Petri, Moffat, Wirth).
+
+The cover algorithm takes parameters ``k` and ``d``. These are the
+*segment size* and *dmer size*, respectively. The returned dictionary
+instance created by this function has ``k`` and ``d`` attributes
+containing the values for these parameters. If a ``ZstdCompressionDict``
+is constructed from raw bytes data (a content-only dictionary), the
+``k`` and ``d`` attributes will be ``0``.
+
+The segment and dmer size parameters to the cover algorithm can either be
+specified manually or ``train_dictionary()`` can try multiple values
+and pick the best one, where *best* means the smallest compressed data size.
+This later mode is called *optimization* mode.
+
+If none of ``k``, ``d``, ``steps``, ``threads``, ``level``, ``notifications``,
+or ``dict_id`` (basically anything from the underlying ``ZDICT_cover_params_t``
+struct) are defined, *optimization* mode is used with default parameter
+values.
+
+If ``steps`` or ``threads`` are defined, then *optimization* mode is engaged
+with explicit control over those parameters. Specifying ``threads=0`` or
+``threads=1`` can be used to engage *optimization* mode if other parameters
+are not defined.
+
+Otherwise, non-*optimization* mode is used with the parameters specified.
+
+This function takes the following arguments:
+
+dict_size
+ Target size in bytes of the dictionary to generate.
+samples
+ A list of bytes holding samples the dictionary will be trained from.
+k
+ Parameter to cover algorithm defining the segment size. A reasonable range
+ is [16, 2048+].
+d
+ Parameter to cover algorithm defining the dmer size. A reasonable range is
+ [6, 16]. ``d`` must be less than or equal to ``k``.
+dict_id
+ Integer dictionary ID for the produced dictionary. Default is 0, which uses
+ a random value.
+steps
+ Number of steps through ``k`` values to perform when trying parameter
+ variations.
+threads
+ Number of threads to use when trying parameter variations. Default is 0,
+ which means to use a single thread. A negative value can be specified to
+ use as many threads as there are detected logical CPUs.
+level
+ Integer target compression level when trying parameter variations.
+notifications
+ Controls writing of informational messages to ``stderr``. ``0`` (the
+ default) means to write nothing. ``1`` writes errors. ``2`` writes
+ progression info. ``3`` writes more details. And ``4`` writes all info.
+
+Explicit Compression Parameters
+-------------------------------
+
+Zstandard offers a high-level *compression level* that maps to lower-level
+compression parameters. For many consumers, this numeric level is the only
+compression setting you'll need to touch.
+
+But for advanced use cases, it might be desirable to tweak these lower-level
+settings.
+
+The ``ZstdCompressionParameters`` type represents these low-level compression
+settings.
+
+Instances of this type can be constructed from a myriad of keyword arguments
+(defined below) for complete low-level control over each adjustable
+compression setting.
+
+From a higher level, one can construct a ``ZstdCompressionParameters`` instance
+given a desired compression level and target input and dictionary size
+using ``ZstdCompressionParameters.from_level()``. e.g.::
+
+ # Derive compression settings for compression level 7.
+ params = zstd.ZstdCompressionParameters.from_level(7)
+
+ # With an input size of 1MB
+ params = zstd.ZstdCompressionParameters.from_level(7, source_size=1048576)
+
+Using ``from_level()``, it is also possible to override individual compression
+parameters or to define additional settings that aren't automatically derived.
+e.g.::
+
+ params = zstd.ZstdCompressionParameters.from_level(4, window_log=10)
+ params = zstd.ZstdCompressionParameters.from_level(5, threads=4)
+
+Or you can define low-level compression settings directly::
+
+ params = zstd.ZstdCompressionParameters(window_log=12, enable_ldm=True)
+
+Once a ``ZstdCompressionParameters`` instance is obtained, it can be used to
+configure a compressor::
+
+ cctx = zstd.ZstdCompressor(compression_params=params)
+
+The named arguments and attributes of ``ZstdCompressionParameters`` are as
+follows:
+
+* format
+* compression_level
+* window_log
+* hash_log
+* chain_log
+* search_log
+* min_match
+* target_length
+* strategy
+* compression_strategy (deprecated: same as ``strategy``)
+* write_content_size
+* write_checksum
+* write_dict_id
+* job_size
+* overlap_log
+* overlap_size_log (deprecated: same as ``overlap_log``)
+* force_max_window
+* enable_ldm
+* ldm_hash_log
+* ldm_min_match
+* ldm_bucket_size_log
+* ldm_hash_rate_log
+* ldm_hash_every_log (deprecated: same as ``ldm_hash_rate_log``)
+* threads
+
+Some of these are very low-level settings. It may help to consult the official
+zstandard documentation for their behavior. Look for the ``ZSTD_p_*`` constants
+in ``zstd.h`` (https://github.com/facebook/zstd/blob/dev/lib/zstd.h).
+
+Frame Inspection
+----------------
+
+Data emitted from zstd compression is encapsulated in a *frame*. This frame
+begins with a 4 byte *magic number* header followed by 2 to 14 bytes describing
+the frame in more detail. For more info, see
+https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md.
+
+``zstd.get_frame_parameters(data)`` parses a zstd *frame* header from a bytes
+instance and return a ``FrameParameters`` object describing the frame.
+
+Depending on which fields are present in the frame and their values, the
+length of the frame parameters varies. If insufficient bytes are passed
+in to fully parse the frame parameters, ``ZstdError`` is raised. To ensure
+frame parameters can be parsed, pass in at least 18 bytes.
+
+``FrameParameters`` instances have the following attributes:
+
+content_size
+ Integer size of original, uncompressed content. This will be ``0`` if the
+ original content size isn't written to the frame (controlled with the
+ ``write_content_size`` argument to ``ZstdCompressor``) or if the input
+ content size was ``0``.
+
+window_size
+ Integer size of maximum back-reference distance in compressed data.
+
+dict_id
+ Integer of dictionary ID used for compression. ``0`` if no dictionary
+ ID was used or if the dictionary ID was ``0``.
+
+has_checksum
+ Bool indicating whether a 4 byte content checksum is stored at the end
+ of the frame.
+
+``zstd.frame_header_size(data)`` returns the size of the zstandard frame
+header.
+
+``zstd.frame_content_size(data)`` returns the content size as parsed from
+the frame header. ``-1`` means the content size is unknown. ``0`` means
+an empty frame. The content size is usually correct. However, it may not
+be accurate.
+
+Misc Functionality
+------------------
+
+estimate_decompression_context_size()
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Estimate the memory size requirements for a decompressor instance.
+
+Constants
+---------
+
+The following module constants/attributes are exposed:
+
+ZSTD_VERSION
+ This module attribute exposes a 3-tuple of the Zstandard version. e.g.
+ ``(1, 0, 0)``
+MAX_COMPRESSION_LEVEL
+ Integer max compression level accepted by compression functions
+COMPRESSION_RECOMMENDED_INPUT_SIZE
+ Recommended chunk size to feed to compressor functions
+COMPRESSION_RECOMMENDED_OUTPUT_SIZE
+ Recommended chunk size for compression output
+DECOMPRESSION_RECOMMENDED_INPUT_SIZE
+ Recommended chunk size to feed into decompresor functions
+DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE
+ Recommended chunk size for decompression output
+
+FRAME_HEADER
+ bytes containing header of the Zstandard frame
+MAGIC_NUMBER
+ Frame header as an integer
+
+FLUSH_BLOCK
+ Flushing behavior that denotes to flush a zstd block. A decompressor will
+ be able to decode all data fed into the compressor so far.
+FLUSH_FRAME
+ Flushing behavior that denotes to end a zstd frame. Any new data fed
+ to the compressor will start a new frame.
+
+CONTENTSIZE_UNKNOWN
+ Value for content size when the content size is unknown.
+CONTENTSIZE_ERROR
+ Value for content size when content size couldn't be determined.
+
+WINDOWLOG_MIN
+ Minimum value for compression parameter
+WINDOWLOG_MAX
+ Maximum value for compression parameter
+CHAINLOG_MIN
+ Minimum value for compression parameter
+CHAINLOG_MAX
+ Maximum value for compression parameter
+HASHLOG_MIN
+ Minimum value for compression parameter
+HASHLOG_MAX
+ Maximum value for compression parameter
+SEARCHLOG_MIN
+ Minimum value for compression parameter
+SEARCHLOG_MAX
+ Maximum value for compression parameter
+MINMATCH_MIN
+ Minimum value for compression parameter
+MINMATCH_MAX
+ Maximum value for compression parameter
+SEARCHLENGTH_MIN
+ Minimum value for compression parameter
+
+ Deprecated: use ``MINMATCH_MIN``
+SEARCHLENGTH_MAX
+ Maximum value for compression parameter
+
+ Deprecated: use ``MINMATCH_MAX``
+TARGETLENGTH_MIN
+ Minimum value for compression parameter
+STRATEGY_FAST
+ Compression strategy
+STRATEGY_DFAST
+ Compression strategy
+STRATEGY_GREEDY
+ Compression strategy
+STRATEGY_LAZY
+ Compression strategy
+STRATEGY_LAZY2
+ Compression strategy
+STRATEGY_BTLAZY2
+ Compression strategy
+STRATEGY_BTOPT
+ Compression strategy
+STRATEGY_BTULTRA
+ Compression strategy
+STRATEGY_BTULTRA2
+ Compression strategy
+
+FORMAT_ZSTD1
+ Zstandard frame format
+FORMAT_ZSTD1_MAGICLESS
+ Zstandard frame format without magic header
+
+Performance Considerations
+--------------------------
+
+The ``ZstdCompressor`` and ``ZstdDecompressor`` types maintain state to a
+persistent compression or decompression *context*. Reusing a ``ZstdCompressor``
+or ``ZstdDecompressor`` instance for multiple operations is faster than
+instantiating a new ``ZstdCompressor`` or ``ZstdDecompressor`` for each
+operation. The differences are magnified as the size of data decreases. For
+example, the difference between *context* reuse and non-reuse for 100,000
+100 byte inputs will be significant (possiby over 10x faster to reuse contexts)
+whereas 10 100,000,000 byte inputs will be more similar in speed (because the
+time spent doing compression dwarfs time spent creating new *contexts*).
+
+Buffer Types
+------------
+
+The API exposes a handful of custom types for interfacing with memory buffers.
+The primary goal of these types is to facilitate efficient multi-object
+operations.
+
+The essential idea is to have a single memory allocation provide backing
+storage for multiple logical objects. This has 2 main advantages: fewer
+allocations and optimal memory access patterns. This avoids having to allocate
+a Python object for each logical object and furthermore ensures that access of
+data for objects can be sequential (read: fast) in memory.
+
+BufferWithSegments
+^^^^^^^^^^^^^^^^^^
+
+The ``BufferWithSegments`` type represents a memory buffer containing N
+discrete items of known lengths (segments). It is essentially a fixed size
+memory address and an array of 2-tuples of ``(offset, length)`` 64-bit
+unsigned native endian integers defining the byte offset and length of each
+segment within the buffer.
+
+Instances behave like containers.
+
+``len()`` returns the number of segments within the instance.
+
+``o[index]`` or ``__getitem__`` obtains a ``BufferSegment`` representing an
+individual segment within the backing buffer. That returned object references
+(not copies) memory. This means that iterating all objects doesn't copy
+data within the buffer.
+
+The ``.size`` attribute contains the total size in bytes of the backing
+buffer.
+
+Instances conform to the buffer protocol. So a reference to the backing bytes
+can be obtained via ``memoryview(o)``. A *copy* of the backing bytes can also
+be obtained via ``.tobytes()``.
+
+The ``.segments`` attribute exposes the array of ``(offset, length)`` for
+segments within the buffer. It is a ``BufferSegments`` type.
+
+BufferSegment
+^^^^^^^^^^^^^
+
+The ``BufferSegment`` type represents a segment within a ``BufferWithSegments``.
+It is essentially a reference to N bytes within a ``BufferWithSegments``.
+
+``len()`` returns the length of the segment in bytes.
+
+``.offset`` contains the byte offset of this segment within its parent
+``BufferWithSegments`` instance.
+
+The object conforms to the buffer protocol. ``.tobytes()`` can be called to
+obtain a ``bytes`` instance with a copy of the backing bytes.
+
+BufferSegments
+^^^^^^^^^^^^^^
+
+This type represents an array of ``(offset, length)`` integers defining segments
+within a ``BufferWithSegments``.
+
+The array members are 64-bit unsigned integers using host/native bit order.
+
+Instances conform to the buffer protocol.
+
+BufferWithSegmentsCollection
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``BufferWithSegmentsCollection`` type represents a virtual spanning view
+of multiple ``BufferWithSegments`` instances.
+
+Instances are constructed from 1 or more ``BufferWithSegments`` instances. The
+resulting object behaves like an ordered sequence whose members are the
+segments within each ``BufferWithSegments``.
+
+``len()`` returns the number of segments within all ``BufferWithSegments``
+instances.
+
+``o[index]`` and ``__getitem__(index)`` return the ``BufferSegment`` at
+that offset as if all ``BufferWithSegments`` instances were a single
+entity.
+
+If the object is composed of 2 ``BufferWithSegments`` instances with the
+first having 2 segments and the second have 3 segments, then ``b[0]``
+and ``b[1]`` access segments in the first object and ``b[2]``, ``b[3]``,
+and ``b[4]`` access segments from the second.
+
+Choosing an API
+===============
+
+There are multiple APIs for performing compression and decompression. This is
+because different applications have different needs and the library wants to
+facilitate optimal use in as many use cases as possible.
+
+From a high-level, APIs are divided into *one-shot* and *streaming*: either you
+are operating on all data at once or you operate on it piecemeal.
+
+The *one-shot* APIs are useful for small data, where the input or output
+size is known. (The size can come from a buffer length, file size, or
+stored in the zstd frame header.) A limitation of the *one-shot* APIs is that
+input and output must fit in memory simultaneously. For say a 4 GB input,
+this is often not feasible.
+
+The *one-shot* APIs also perform all work as a single operation. So, if you
+feed it large input, it could take a long time for the function to return.
+
+The streaming APIs do not have the limitations of the simple API. But the
+price you pay for this flexibility is that they are more complex than a
+single function call.
+
+The streaming APIs put the caller in control of compression and decompression
+behavior by allowing them to directly control either the input or output side
+of the operation.
+
+With the *streaming input*, *compressor*, and *decompressor* APIs, the caller
+has full control over the input to the compression or decompression stream.
+They can directly choose when new data is operated on.
+
+With the *streaming ouput* APIs, the caller has full control over the output
+of the compression or decompression stream. It can choose when to receive
+new data.
+
+When using the *streaming* APIs that operate on file-like or stream objects,
+it is important to consider what happens in that object when I/O is requested.
+There is potential for long pauses as data is read or written from the
+underlying stream (say from interacting with a filesystem or network). This
+could add considerable overhead.
+
+Thread Safety
+=============
+
+``ZstdCompressor`` and ``ZstdDecompressor`` instances have no guarantees
+about thread safety. Do not operate on the same ``ZstdCompressor`` and
+``ZstdDecompressor`` instance simultaneously from different threads. It is
+fine to have different threads call into a single instance, just not at the
+same time.
+
+Some operations require multiple function calls to complete. e.g. streaming
+operations. A single ``ZstdCompressor`` or ``ZstdDecompressor`` cannot be used
+for simultaneously active operations. e.g. you must not start a streaming
+operation when another streaming operation is already active.
+
+The C extension releases the GIL during non-trivial calls into the zstd C
+API. Non-trivial calls are notably compression and decompression. Trivial
+calls are things like parsing frame parameters. Where the GIL is released
+is considered an implementation detail and can change in any release.
+
+APIs that accept bytes-like objects don't enforce that the underlying object
+is read-only. However, it is assumed that the passed object is read-only for
+the duration of the function call. It is possible to pass a mutable object
+(like a ``bytearray``) to e.g. ``ZstdCompressor.compress()``, have the GIL
+released, and mutate the object from another thread. Such a race condition
+is a bug in the consumer of python-zstandard. Most Python data types are
+immutable, so unless you are doing something fancy, you don't need to
+worry about this.
+
+Note on Zstandard's *Experimental* API
+======================================
+
+Many of the Zstandard APIs used by this module are marked as *experimental*
+within the Zstandard project.
+
+It is unclear how Zstandard's C API will evolve over time, especially with
+regards to this *experimental* functionality. We will try to maintain
+backwards compatibility at the Python API level. However, we cannot
+guarantee this for things not under our control.
+
+Since a copy of the Zstandard source code is distributed with this
+module and since we compile against it, the behavior of a specific
+version of this module should be constant for all of time. So if you
+pin the version of this module used in your projects (which is a Python
+best practice), you should be shielded from unwanted future changes.
+
+Donate
+======
+
+A lot of time has been invested into this project by the author.
+
+If you find this project useful and would like to thank the author for
+their work, consider donating some money. Any amount is appreciated.
+
+.. image:: https://www.paypalobjects.com/en_US/i/btn/btn_donate_LG.gif
+ :target: https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=gregory%2eszorc%40gmail%2ecom&lc=US&item_name=python%2dzstandard&currency_code=USD&bn=PP%2dDonationsBF%3abtn_donate_LG%2egif%3aNonHosted
+ :alt: Donate via PayPal
+
+.. |ci-status| image:: https://dev.azure.com/gregoryszorc/python-zstandard/_apis/build/status/indygreg.python-zstandard?branchName=master
+ :target: https://dev.azure.com/gregoryszorc/python-zstandard/_apis/build/status/indygreg.python-zstandard?branchName=master
+
+
diff --git a/contrib/python/zstandard/py2/.dist-info/top_level.txt b/contrib/python/zstandard/py2/.dist-info/top_level.txt
new file mode 100644
index 00000000000..8ed261e4995
--- /dev/null
+++ b/contrib/python/zstandard/py2/.dist-info/top_level.txt
@@ -0,0 +1,2 @@
+zstandard
+zstd
diff --git a/contrib/python/zstandard/py2/LICENSE b/contrib/python/zstandard/py2/LICENSE
new file mode 100644
index 00000000000..dcec4760996
--- /dev/null
+++ b/contrib/python/zstandard/py2/LICENSE
@@ -0,0 +1,27 @@
+Copyright (c) 2016, Gregory Szorc
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its contributors
+may be used to endorse or promote products derived from this software without
+specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/contrib/python/zstandard/py2/README.rst b/contrib/python/zstandard/py2/README.rst
new file mode 100644
index 00000000000..52bcf32503e
--- /dev/null
+++ b/contrib/python/zstandard/py2/README.rst
@@ -0,0 +1,1615 @@
+================
+python-zstandard
+================
+
+This project provides Python bindings for interfacing with the
+`Zstandard <http://www.zstd.net>`_ compression library. A C extension
+and CFFI interface are provided.
+
+The primary goal of the project is to provide a rich interface to the
+underlying C API through a Pythonic interface while not sacrificing
+performance. This means exposing most of the features and flexibility
+of the C API while not sacrificing usability or safety that Python provides.
+
+The canonical home for this project lives in a Mercurial repository run by
+the author. For convenience, that repository is frequently synchronized to
+https://github.com/indygreg/python-zstandard.
+
+| |ci-status|
+
+Requirements
+============
+
+This extension is designed to run with Python 2.7, 3.5, 3.6, 3.7, and 3.8
+on common platforms (Linux, Windows, and OS X). On PyPy (both PyPy2 and PyPy3) we support version 6.0.0 and above.
+x86 and x86_64 are well-tested on Windows. Only x86_64 is well-tested on Linux and macOS.
+
+Installing
+==========
+
+This package is uploaded to PyPI at https://pypi.python.org/pypi/zstandard.
+So, to install this package::
+
+ $ pip install zstandard
+
+Binary wheels are made available for some platforms. If you need to
+install from a source distribution, all you should need is a working C
+compiler and the Python development headers/libraries. On many Linux
+distributions, you can install a ``python-dev`` or ``python-devel``
+package to provide these dependencies.
+
+Packages are also uploaded to Anaconda Cloud at
+https://anaconda.org/indygreg/zstandard. See that URL for how to install
+this package with ``conda``.
+
+Legacy Format Support
+=====================
+
+To enable legacy zstd format support which is needed to handle files compressed
+with zstd < 1.0 you need to provide an installation option::
+
+ $ pip install zstandard --install-option="--legacy"
+
+and since pip 7.0 it is possible to have the following line in your
+requirements.txt::
+
+ zstandard --install-option="--legacy"
+
+Performance
+===========
+
+zstandard is a highly tunable compression algorithm. In its default settings
+(compression level 3), it will be faster at compression and decompression and
+will have better compression ratios than zlib on most data sets. When tuned
+for speed, it approaches lz4's speed and ratios. When tuned for compression
+ratio, it approaches lzma ratios and compression speed, but decompression
+speed is much faster. See the official zstandard documentation for more.
+
+zstandard and this library support multi-threaded compression. There is a
+mechanism to compress large inputs using multiple threads.
+
+The performance of this library is usually very similar to what the zstandard
+C API can deliver. Overhead in this library is due to general Python overhead
+and can't easily be avoided by *any* zstandard Python binding. This library
+exposes multiple APIs for performing compression and decompression so callers
+can pick an API suitable for their need. Contrast with the compression
+modules in Python's standard library (like ``zlib``), which only offer limited
+mechanisms for performing operations. The API flexibility means consumers can
+choose to use APIs that facilitate zero copying or minimize Python object
+creation and garbage collection overhead.
+
+This library is capable of single-threaded throughputs well over 1 GB/s. For
+exact numbers, measure yourself. The source code repository has a ``bench.py``
+script that can be used to measure things.
+
+API
+===
+
+To interface with Zstandard, simply import the ``zstandard`` module::
+
+ import zstandard
+
+It is a popular convention to alias the module as a different name for
+brevity::
+
+ import zstandard as zstd
+
+This module attempts to import and use either the C extension or CFFI
+implementation. On Python platforms known to support C extensions (like
+CPython), it raises an ImportError if the C extension cannot be imported.
+On Python platforms known to not support C extensions (like PyPy), it only
+attempts to import the CFFI implementation and raises ImportError if that
+can't be done. On other platforms, it first tries to import the C extension
+then falls back to CFFI if that fails and raises ImportError if CFFI fails.
+
+To change the module import behavior, a ``PYTHON_ZSTANDARD_IMPORT_POLICY``
+environment variable can be set. The following values are accepted:
+
+default
+ The behavior described above.
+cffi_fallback
+ Always try to import the C extension then fall back to CFFI if that
+ fails.
+cext
+ Only attempt to import the C extension.
+cffi
+ Only attempt to import the CFFI implementation.
+
+In addition, the ``zstandard`` module exports a ``backend`` attribute
+containing the string name of the backend being used. It will be one
+of ``cext`` or ``cffi`` (for *C extension* and *cffi*, respectively).
+
+The types, functions, and attributes exposed by the ``zstandard`` module
+are documented in the sections below.
+
+.. note::
+
+ The documentation in this section makes references to various zstd
+ concepts and functionality. The source repository contains a
+ ``docs/concepts.rst`` file explaining these in more detail.
+
+ZstdCompressor
+--------------
+
+The ``ZstdCompressor`` class provides an interface for performing
+compression operations. Each instance is essentially a wrapper around a
+``ZSTD_CCtx`` from the C API.
+
+Each instance is associated with parameters that control compression
+behavior. These come from the following named arguments (all optional):
+
+level
+ Integer compression level. Valid values are between 1 and 22.
+dict_data
+ Compression dictionary to use.
+
+ Note: When using dictionary data and ``compress()`` is called multiple
+ times, the ``ZstdCompressionParameters`` derived from an integer
+ compression ``level`` and the first compressed data's size will be reused
+ for all subsequent operations. This may not be desirable if source data
+ size varies significantly.
+compression_params
+ A ``ZstdCompressionParameters`` instance defining compression settings.
+write_checksum
+ Whether a 4 byte checksum should be written with the compressed data.
+ Defaults to False. If True, the decompressor can verify that decompressed
+ data matches the original input data.
+write_content_size
+ Whether the size of the uncompressed data will be written into the
+ header of compressed data. Defaults to True. The data will only be
+ written if the compressor knows the size of the input data. This is
+ often not true for streaming compression.
+write_dict_id
+ Whether to write the dictionary ID into the compressed data.
+ Defaults to True. The dictionary ID is only written if a dictionary
+ is being used.
+threads
+ Enables and sets the number of threads to use for multi-threaded compression
+ operations. Defaults to 0, which means to use single-threaded compression.
+ Negative values will resolve to the number of logical CPUs in the system.
+ Read below for more info on multi-threaded compression. This argument only
+ controls thread count for operations that operate on individual pieces of
+ data. APIs that spawn multiple threads for working on multiple pieces of
+ data have their own ``threads`` argument.
+
+``compression_params`` is mutually exclusive with ``level``, ``write_checksum``,
+``write_content_size``, ``write_dict_id``, and ``threads``.
+
+Unless specified otherwise, assume that no two methods of ``ZstdCompressor``
+instances can be called from multiple Python threads simultaneously. In other
+words, assume instances are not thread safe unless stated otherwise.
+
+Utility Methods
+^^^^^^^^^^^^^^^
+
+``frame_progression()`` returns a 3-tuple containing the number of bytes
+ingested, consumed, and produced by the current compression operation.
+
+``memory_size()`` obtains the memory utilization of the underlying zstd
+compression context, in bytes.::
+
+ cctx = zstd.ZstdCompressor()
+ memory = cctx.memory_size()
+
+Simple API
+^^^^^^^^^^
+
+``compress(data)`` compresses and returns data as a one-shot operation.::
+
+ cctx = zstd.ZstdCompressor()
+ compressed = cctx.compress(b'data to compress')
+
+The ``data`` argument can be any object that implements the *buffer protocol*.
+
+Stream Reader API
+^^^^^^^^^^^^^^^^^
+
+``stream_reader(source)`` can be used to obtain an object conforming to the
+``io.RawIOBase`` interface for reading compressed output as a stream::
+
+ with open(path, 'rb') as fh:
+ cctx = zstd.ZstdCompressor()
+ reader = cctx.stream_reader(fh)
+ while True:
+ chunk = reader.read(16384)
+ if not chunk:
+ break
+
+ # Do something with compressed chunk.
+
+Instances can also be used as context managers::
+
+ with open(path, 'rb') as fh:
+ with cctx.stream_reader(fh) as reader:
+ while True:
+ chunk = reader.read(16384)
+ if not chunk:
+ break
+
+ # Do something with compressed chunk.
+
+When the context manager exits or ``close()`` is called, the stream is closed,
+underlying resources are released, and future operations against the compression
+stream will fail.
+
+The ``source`` argument to ``stream_reader()`` can be any object with a
+``read(size)`` method or any object implementing the *buffer protocol*.
+
+``stream_reader()`` accepts a ``size`` argument specifying how large the input
+stream is. This is used to adjust compression parameters so they are
+tailored to the source size.::
+
+ with open(path, 'rb') as fh:
+ cctx = zstd.ZstdCompressor()
+ with cctx.stream_reader(fh, size=os.stat(path).st_size) as reader:
+ ...
+
+If the ``source`` is a stream, you can specify how large ``read()`` requests
+to that stream should be via the ``read_size`` argument. It defaults to
+``zstandard.COMPRESSION_RECOMMENDED_INPUT_SIZE``.::
+
+ with open(path, 'rb') as fh:
+ cctx = zstd.ZstdCompressor()
+ # Will perform fh.read(8192) when obtaining data to feed into the
+ # compressor.
+ with cctx.stream_reader(fh, read_size=8192) as reader:
+ ...
+
+The stream returned by ``stream_reader()`` is neither writable nor seekable
+(even if the underlying source is seekable). ``readline()`` and
+``readlines()`` are not implemented because they don't make sense for
+compressed data. ``tell()`` returns the number of compressed bytes
+emitted so far.
+
+Streaming Input API
+^^^^^^^^^^^^^^^^^^^
+
+``stream_writer(fh)`` allows you to *stream* data into a compressor.
+
+Returned instances implement the ``io.RawIOBase`` interface. Only methods
+that involve writing will do useful things.
+
+The argument to ``stream_writer()`` must have a ``write(data)`` method. As
+compressed data is available, ``write()`` will be called with the compressed
+data as its argument. Many common Python types implement ``write()``, including
+open file handles and ``io.BytesIO``.
+
+The ``write(data)`` method is used to feed data into the compressor.
+
+The ``flush([flush_mode=FLUSH_BLOCK])`` method can be called to evict whatever
+data remains within the compressor's internal state into the output object. This
+may result in 0 or more ``write()`` calls to the output object. This method
+accepts an optional ``flush_mode`` argument to control the flushing behavior.
+Its value can be any of the ``FLUSH_*`` constants.
+
+Both ``write()`` and ``flush()`` return the number of bytes written to the
+object's ``write()``. In many cases, small inputs do not accumulate enough
+data to cause a write and ``write()`` will return ``0``.
+
+Calling ``close()`` will mark the stream as closed and subsequent I/O
+operations will raise ``ValueError`` (per the documented behavior of
+``io.RawIOBase``). ``close()`` will also call ``close()`` on the underlying
+stream if such a method exists.
+
+Typically usage is as follows::
+
+ cctx = zstd.ZstdCompressor(level=10)
+ compressor = cctx.stream_writer(fh)
+
+ compressor.write(b'chunk 0\n')
+ compressor.write(b'chunk 1\n')
+ compressor.flush()
+ # Receiver will be able to decode ``chunk 0\nchunk 1\n`` at this point.
+ # Receiver is also expecting more data in the zstd *frame*.
+
+ compressor.write(b'chunk 2\n')
+ compressor.flush(zstd.FLUSH_FRAME)
+ # Receiver will be able to decode ``chunk 0\nchunk 1\nchunk 2``.
+ # Receiver is expecting no more data, as the zstd frame is closed.
+ # Any future calls to ``write()`` at this point will construct a new
+ # zstd frame.
+
+Instances can be used as context managers. Exiting the context manager is
+the equivalent of calling ``close()``, which is equivalent to calling
+``flush(zstd.FLUSH_FRAME)``::
+
+ cctx = zstd.ZstdCompressor(level=10)
+ with cctx.stream_writer(fh) as compressor:
+ compressor.write(b'chunk 0')
+ compressor.write(b'chunk 1')
+ ...
+
+.. important::
+
+ If ``flush(FLUSH_FRAME)`` is not called, emitted data doesn't constitute
+ a full zstd *frame* and consumers of this data may complain about malformed
+ input. It is recommended to use instances as a context manager to ensure
+ *frames* are properly finished.
+
+If the size of the data being fed to this streaming compressor is known,
+you can declare it before compression begins::
+
+ cctx = zstd.ZstdCompressor()
+ with cctx.stream_writer(fh, size=data_len) as compressor:
+ compressor.write(chunk0)
+ compressor.write(chunk1)
+ ...
+
+Declaring the size of the source data allows compression parameters to
+be tuned. And if ``write_content_size`` is used, it also results in the
+content size being written into the frame header of the output data.
+
+The size of chunks being ``write()`` to the destination can be specified::
+
+ cctx = zstd.ZstdCompressor()
+ with cctx.stream_writer(fh, write_size=32768) as compressor:
+ ...
+
+To see how much memory is being used by the streaming compressor::
+
+ cctx = zstd.ZstdCompressor()
+ with cctx.stream_writer(fh) as compressor:
+ ...
+ byte_size = compressor.memory_size()
+
+Thte total number of bytes written so far are exposed via ``tell()``::
+
+ cctx = zstd.ZstdCompressor()
+ with cctx.stream_writer(fh) as compressor:
+ ...
+ total_written = compressor.tell()
+
+``stream_writer()`` accepts a ``write_return_read`` boolean argument to control
+the return value of ``write()``. When ``False`` (the default), ``write()`` returns
+the number of bytes that were ``write()``en to the underlying object. When
+``True``, ``write()`` returns the number of bytes read from the input that
+were subsequently written to the compressor. ``True`` is the *proper* behavior
+for ``write()`` as specified by the ``io.RawIOBase`` interface and will become
+the default value in a future release.
+
+Streaming Output API
+^^^^^^^^^^^^^^^^^^^^
+
+``read_to_iter(reader)`` provides a mechanism to stream data out of a
+compressor as an iterator of data chunks.::
+
+ cctx = zstd.ZstdCompressor()
+ for chunk in cctx.read_to_iter(fh):
+ # Do something with emitted data.
+
+``read_to_iter()`` accepts an object that has a ``read(size)`` method or
+conforms to the buffer protocol.
+
+Uncompressed data is fetched from the source either by calling ``read(size)``
+or by fetching a slice of data from the object directly (in the case where
+the buffer protocol is being used). The returned iterator consists of chunks
+of compressed data.
+
+If reading from the source via ``read()``, ``read()`` will be called until
+it raises or returns an empty bytes (``b''``). It is perfectly valid for
+the source to deliver fewer bytes than were what requested by ``read(size)``.
+
+Like ``stream_writer()``, ``read_to_iter()`` also accepts a ``size`` argument
+declaring the size of the input stream::
+
+ cctx = zstd.ZstdCompressor()
+ for chunk in cctx.read_to_iter(fh, size=some_int):
+ pass
+
+You can also control the size that data is ``read()`` from the source and
+the ideal size of output chunks::
+
+ cctx = zstd.ZstdCompressor()
+ for chunk in cctx.read_to_iter(fh, read_size=16384, write_size=8192):
+ pass
+
+Unlike ``stream_writer()``, ``read_to_iter()`` does not give direct control
+over the sizes of chunks fed into the compressor. Instead, chunk sizes will
+be whatever the object being read from delivers. These will often be of a
+uniform size.
+
+Stream Copying API
+^^^^^^^^^^^^^^^^^^
+
+``copy_stream(ifh, ofh)`` can be used to copy data between 2 streams while
+compressing it.::
+
+ cctx = zstd.ZstdCompressor()
+ cctx.copy_stream(ifh, ofh)
+
+For example, say you wish to compress a file::
+
+ cctx = zstd.ZstdCompressor()
+ with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh:
+ cctx.copy_stream(ifh, ofh)
+
+It is also possible to declare the size of the source stream::
+
+ cctx = zstd.ZstdCompressor()
+ cctx.copy_stream(ifh, ofh, size=len_of_input)
+
+You can also specify how large the chunks that are ``read()`` and ``write()``
+from and to the streams::
+
+ cctx = zstd.ZstdCompressor()
+ cctx.copy_stream(ifh, ofh, read_size=32768, write_size=16384)
+
+The stream copier returns a 2-tuple of bytes read and written::
+
+ cctx = zstd.ZstdCompressor()
+ read_count, write_count = cctx.copy_stream(ifh, ofh)
+
+Compressor API
+^^^^^^^^^^^^^^
+
+``compressobj()`` returns an object that exposes ``compress(data)`` and
+``flush()`` methods. Each returns compressed data or an empty bytes.
+
+The purpose of ``compressobj()`` is to provide an API-compatible interface
+with ``zlib.compressobj``, ``bz2.BZ2Compressor``, etc. This allows callers to
+swap in different compressor objects while using the same API.
+
+``flush()`` accepts an optional argument indicating how to end the stream.
+``zstd.COMPRESSOBJ_FLUSH_FINISH`` (the default) ends the compression stream.
+Once this type of flush is performed, ``compress()`` and ``flush()`` can
+no longer be called. This type of flush **must** be called to end the
+compression context. If not called, returned data may be incomplete.
+
+A ``zstd.COMPRESSOBJ_FLUSH_BLOCK`` argument to ``flush()`` will flush a
+zstd block. Flushes of this type can be performed multiple times. The next
+call to ``compress()`` will begin a new zstd block.
+
+Here is how this API should be used::
+
+ cctx = zstd.ZstdCompressor()
+ cobj = cctx.compressobj()
+ data = cobj.compress(b'raw input 0')
+ data = cobj.compress(b'raw input 1')
+ data = cobj.flush()
+
+Or to flush blocks::
+
+ cctx.zstd.ZstdCompressor()
+ cobj = cctx.compressobj()
+ data = cobj.compress(b'chunk in first block')
+ data = cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK)
+ data = cobj.compress(b'chunk in second block')
+ data = cobj.flush()
+
+For best performance results, keep input chunks under 256KB. This avoids
+extra allocations for a large output object.
+
+It is possible to declare the input size of the data that will be fed into
+the compressor::
+
+ cctx = zstd.ZstdCompressor()
+ cobj = cctx.compressobj(size=6)
+ data = cobj.compress(b'foobar')
+ data = cobj.flush()
+
+Chunker API
+^^^^^^^^^^^
+
+``chunker(size=None, chunk_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE)`` returns
+an object that can be used to iteratively feed chunks of data into a compressor
+and produce output chunks of a uniform size.
+
+The object returned by ``chunker()`` exposes the following methods:
+
+``compress(data)``
+ Feeds new input data into the compressor.
+
+``flush()``
+ Flushes all data currently in the compressor.
+
+``finish()``
+ Signals the end of input data. No new data can be compressed after this
+ method is called.
+
+``compress()``, ``flush()``, and ``finish()`` all return an iterator of
+``bytes`` instances holding compressed data. The iterator may be empty. Callers
+MUST iterate through all elements of the returned iterator before performing
+another operation on the object.
+
+All chunks emitted by ``compress()`` will have a length of ``chunk_size``.
+
+``flush()`` and ``finish()`` may return a final chunk smaller than
+``chunk_size``.
+
+Here is how the API should be used::
+
+ cctx = zstd.ZstdCompressor()
+ chunker = cctx.chunker(chunk_size=32768)
+
+ with open(path, 'rb') as fh:
+ while True:
+ in_chunk = fh.read(32768)
+ if not in_chunk:
+ break
+
+ for out_chunk in chunker.compress(in_chunk):
+ # Do something with output chunk of size 32768.
+
+ for out_chunk in chunker.finish():
+ # Do something with output chunks that finalize the zstd frame.
+
+The ``chunker()`` API is often a better alternative to ``compressobj()``.
+
+``compressobj()`` will emit output data as it is available. This results in a
+*stream* of output chunks of varying sizes. The consistency of the output chunk
+size with ``chunker()`` is more appropriate for many usages, such as sending
+compressed data to a socket.
+
+``compressobj()`` may also perform extra memory reallocations in order to
+dynamically adjust the sizes of the output chunks. Since ``chunker()`` output
+chunks are all the same size (except for flushed or final chunks), there is
+less memory allocation overhead.
+
+Batch Compression API
+^^^^^^^^^^^^^^^^^^^^^
+
+(Experimental. Not yet supported in CFFI bindings.)
+
+``multi_compress_to_buffer(data, [threads=0])`` performs compression of multiple
+inputs as a single operation.
+
+Data to be compressed can be passed as a ``BufferWithSegmentsCollection``, a
+``BufferWithSegments``, or a list containing byte like objects. Each element of
+the container will be compressed individually using the configured parameters
+on the ``ZstdCompressor`` instance.
+
+The ``threads`` argument controls how many threads to use for compression. The
+default is ``0`` which means to use a single thread. Negative values use the
+number of logical CPUs in the machine.
+
+The function returns a ``BufferWithSegmentsCollection``. This type represents
+N discrete memory allocations, eaching holding 1 or more compressed frames.
+
+Output data is written to shared memory buffers. This means that unlike
+regular Python objects, a reference to *any* object within the collection
+keeps the shared buffer and therefore memory backing it alive. This can have
+undesirable effects on process memory usage.
+
+The API and behavior of this function is experimental and will likely change.
+Known deficiencies include:
+
+* If asked to use multiple threads, it will always spawn that many threads,
+ even if the input is too small to use them. It should automatically lower
+ the thread count when the extra threads would just add overhead.
+* The buffer allocation strategy is fixed. There is room to make it dynamic,
+ perhaps even to allow one output buffer per input, facilitating a variation
+ of the API to return a list without the adverse effects of shared memory
+ buffers.
+
+ZstdDecompressor
+----------------
+
+The ``ZstdDecompressor`` class provides an interface for performing
+decompression. It is effectively a wrapper around the ``ZSTD_DCtx`` type from
+the C API.
+
+Each instance is associated with parameters that control decompression. These
+come from the following named arguments (all optional):
+
+dict_data
+ Compression dictionary to use.
+max_window_size
+ Sets an uppet limit on the window size for decompression operations in
+ kibibytes. This setting can be used to prevent large memory allocations
+ for inputs using large compression windows.
+format
+ Set the format of data for the decoder. By default, this is
+ ``zstd.FORMAT_ZSTD1``. It can be set to ``zstd.FORMAT_ZSTD1_MAGICLESS`` to
+ allow decoding frames without the 4 byte magic header. Not all decompression
+ APIs support this mode.
+
+The interface of this class is very similar to ``ZstdCompressor`` (by design).
+
+Unless specified otherwise, assume that no two methods of ``ZstdDecompressor``
+instances can be called from multiple Python threads simultaneously. In other
+words, assume instances are not thread safe unless stated otherwise.
+
+Utility Methods
+^^^^^^^^^^^^^^^
+
+``memory_size()`` obtains the size of the underlying zstd decompression context,
+in bytes.::
+
+ dctx = zstd.ZstdDecompressor()
+ size = dctx.memory_size()
+
+Simple API
+^^^^^^^^^^
+
+``decompress(data)`` can be used to decompress an entire compressed zstd
+frame in a single operation.::
+
+ dctx = zstd.ZstdDecompressor()
+ decompressed = dctx.decompress(data)
+
+By default, ``decompress(data)`` will only work on data written with the content
+size encoded in its header (this is the default behavior of
+``ZstdCompressor().compress()`` but may not be true for streaming compression). If
+compressed data without an embedded content size is seen, ``zstd.ZstdError`` will
+be raised.
+
+If the compressed data doesn't have its content size embedded within it,
+decompression can be attempted by specifying the ``max_output_size``
+argument.::
+
+ dctx = zstd.ZstdDecompressor()
+ uncompressed = dctx.decompress(data, max_output_size=1048576)
+
+Ideally, ``max_output_size`` will be identical to the decompressed output
+size.
+
+If ``max_output_size`` is too small to hold the decompressed data,
+``zstd.ZstdError`` will be raised.
+
+If ``max_output_size`` is larger than the decompressed data, the allocated
+output buffer will be resized to only use the space required.
+
+Please note that an allocation of the requested ``max_output_size`` will be
+performed every time the method is called. Setting to a very large value could
+result in a lot of work for the memory allocator and may result in
+``MemoryError`` being raised if the allocation fails.
+
+.. important::
+
+ If the exact size of decompressed data is unknown (not passed in explicitly
+ and not stored in the zstandard frame), for performance reasons it is
+ encouraged to use a streaming API.
+
+Stream Reader API
+^^^^^^^^^^^^^^^^^
+
+``stream_reader(source)`` can be used to obtain an object conforming to the
+``io.RawIOBase`` interface for reading decompressed output as a stream::
+
+ with open(path, 'rb') as fh:
+ dctx = zstd.ZstdDecompressor()
+ reader = dctx.stream_reader(fh)
+ while True:
+ chunk = reader.read(16384)
+ if not chunk:
+ break
+
+ # Do something with decompressed chunk.
+
+The stream can also be used as a context manager::
+
+ with open(path, 'rb') as fh:
+ dctx = zstd.ZstdDecompressor()
+ with dctx.stream_reader(fh) as reader:
+ ...
+
+When used as a context manager, the stream is closed and the underlying
+resources are released when the context manager exits. Future operations against
+the stream will fail.
+
+The ``source`` argument to ``stream_reader()`` can be any object with a
+``read(size)`` method or any object implementing the *buffer protocol*.
+
+If the ``source`` is a stream, you can specify how large ``read()`` requests
+to that stream should be via the ``read_size`` argument. It defaults to
+``zstandard.DECOMPRESSION_RECOMMENDED_INPUT_SIZE``.::
+
+ with open(path, 'rb') as fh:
+ dctx = zstd.ZstdDecompressor()
+ # Will perform fh.read(8192) when obtaining data for the decompressor.
+ with dctx.stream_reader(fh, read_size=8192) as reader:
+ ...
+
+The stream returned by ``stream_reader()`` is not writable.
+
+The stream returned by ``stream_reader()`` is *partially* seekable.
+Absolute and relative positions (``SEEK_SET`` and ``SEEK_CUR``) forward
+of the current position are allowed. Offsets behind the current read
+position and offsets relative to the end of stream are not allowed and
+will raise ``ValueError`` if attempted.
+
+``tell()`` returns the number of decompressed bytes read so far.
+
+Not all I/O methods are implemented. Notably missing is support for
+``readline()``, ``readlines()``, and linewise iteration support. This is
+because streams operate on binary data - not text data. If you want to
+convert decompressed output to text, you can chain an ``io.TextIOWrapper``
+to the stream::
+
+ with open(path, 'rb') as fh:
+ dctx = zstd.ZstdDecompressor()
+ stream_reader = dctx.stream_reader(fh)
+ text_stream = io.TextIOWrapper(stream_reader, encoding='utf-8')
+
+ for line in text_stream:
+ ...
+
+The ``read_across_frames`` argument to ``stream_reader()`` controls the
+behavior of read operations when the end of a zstd *frame* is encountered.
+When ``False`` (the default), a read will complete when the end of a
+zstd *frame* is encountered. When ``True``, a read can potentially
+return data spanning multiple zstd *frames*.
+
+Streaming Input API
+^^^^^^^^^^^^^^^^^^^
+
+``stream_writer(fh)`` allows you to *stream* data into a decompressor.
+
+Returned instances implement the ``io.RawIOBase`` interface. Only methods
+that involve writing will do useful things.
+
+The argument to ``stream_writer()`` is typically an object that also implements
+``io.RawIOBase``. But any object with a ``write(data)`` method will work. Many
+common Python types conform to this interface, including open file handles
+and ``io.BytesIO``.
+
+Behavior is similar to ``ZstdCompressor.stream_writer()``: compressed data
+is sent to the decompressor by calling ``write(data)`` and decompressed
+output is written to the underlying stream by calling its ``write(data)``
+method.::
+
+ dctx = zstd.ZstdDecompressor()
+ decompressor = dctx.stream_writer(fh)
+
+ decompressor.write(compressed_data)
+ ...
+
+
+Calls to ``write()`` will return the number of bytes written to the output
+object. Not all inputs will result in bytes being written, so return values
+of ``0`` are possible.
+
+Like the ``stream_writer()`` compressor, instances can be used as context
+managers. However, context managers add no extra special behavior and offer
+little to no benefit to being used.
+
+Calling ``close()`` will mark the stream as closed and subsequent I/O operations
+will raise ``ValueError`` (per the documented behavior of ``io.RawIOBase``).
+``close()`` will also call ``close()`` on the underlying stream if such a
+method exists.
+
+The size of chunks being ``write()`` to the destination can be specified::
+
+ dctx = zstd.ZstdDecompressor()
+ with dctx.stream_writer(fh, write_size=16384) as decompressor:
+ pass
+
+You can see how much memory is being used by the decompressor::
+
+ dctx = zstd.ZstdDecompressor()
+ with dctx.stream_writer(fh) as decompressor:
+ byte_size = decompressor.memory_size()
+
+``stream_writer()`` accepts a ``write_return_read`` boolean argument to control
+the return value of ``write()``. When ``False`` (the default)``, ``write()``
+returns the number of bytes that were ``write()``en to the underlying stream.
+When ``True``, ``write()`` returns the number of bytes read from the input.
+``True`` is the *proper* behavior for ``write()`` as specified by the
+``io.RawIOBase`` interface and will become the default in a future release.
+
+Streaming Output API
+^^^^^^^^^^^^^^^^^^^^
+
+``read_to_iter(fh)`` provides a mechanism to stream decompressed data out of a
+compressed source as an iterator of data chunks.::
+
+ dctx = zstd.ZstdDecompressor()
+ for chunk in dctx.read_to_iter(fh):
+ # Do something with original data.
+
+``read_to_iter()`` accepts an object with a ``read(size)`` method that will
+return compressed bytes or an object conforming to the buffer protocol that
+can expose its data as a contiguous range of bytes.
+
+``read_to_iter()`` returns an iterator whose elements are chunks of the
+decompressed data.
+
+The size of requested ``read()`` from the source can be specified::
+
+ dctx = zstd.ZstdDecompressor()
+ for chunk in dctx.read_to_iter(fh, read_size=16384):
+ pass
+
+It is also possible to skip leading bytes in the input data::
+
+ dctx = zstd.ZstdDecompressor()
+ for chunk in dctx.read_to_iter(fh, skip_bytes=1):
+ pass
+
+.. tip::
+
+ Skipping leading bytes is useful if the source data contains extra
+ *header* data. Traditionally, you would need to create a slice or
+ ``memoryview`` of the data you want to decompress. This would create
+ overhead. It is more efficient to pass the offset into this API.
+
+Similarly to ``ZstdCompressor.read_to_iter()``, the consumer of the iterator
+controls when data is decompressed. If the iterator isn't consumed,
+decompression is put on hold.
+
+When ``read_to_iter()`` is passed an object conforming to the buffer protocol,
+the behavior may seem similar to what occurs when the simple decompression
+API is used. However, this API works when the decompressed size is unknown.
+Furthermore, if feeding large inputs, the decompressor will work in chunks
+instead of performing a single operation.
+
+Stream Copying API
+^^^^^^^^^^^^^^^^^^
+
+``copy_stream(ifh, ofh)`` can be used to copy data across 2 streams while
+performing decompression.::
+
+ dctx = zstd.ZstdDecompressor()
+ dctx.copy_stream(ifh, ofh)
+
+e.g. to decompress a file to another file::
+
+ dctx = zstd.ZstdDecompressor()
+ with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh:
+ dctx.copy_stream(ifh, ofh)
+
+The size of chunks being ``read()`` and ``write()`` from and to the streams
+can be specified::
+
+ dctx = zstd.ZstdDecompressor()
+ dctx.copy_stream(ifh, ofh, read_size=8192, write_size=16384)
+
+Decompressor API
+^^^^^^^^^^^^^^^^
+
+``decompressobj()`` returns an object that exposes a ``decompress(data)``
+method. Compressed data chunks are fed into ``decompress(data)`` and
+uncompressed output (or an empty bytes) is returned. Output from subsequent
+calls needs to be concatenated to reassemble the full decompressed byte
+sequence.
+
+The purpose of ``decompressobj()`` is to provide an API-compatible interface
+with ``zlib.decompressobj`` and ``bz2.BZ2Decompressor``. This allows callers
+to swap in different decompressor objects while using the same API.
+
+Each object is single use: once an input frame is decoded, ``decompress()``
+can no longer be called.
+
+Here is how this API should be used::
+
+ dctx = zstd.ZstdDecompressor()
+ dobj = dctx.decompressobj()
+ data = dobj.decompress(compressed_chunk_0)
+ data = dobj.decompress(compressed_chunk_1)
+
+By default, calls to ``decompress()`` write output data in chunks of size
+``DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE``. These chunks are concatenated
+before being returned to the caller. It is possible to define the size of
+these temporary chunks by passing ``write_size`` to ``decompressobj()``::
+
+ dctx = zstd.ZstdDecompressor()
+ dobj = dctx.decompressobj(write_size=1048576)
+
+.. note::
+
+ Because calls to ``decompress()`` may need to perform multiple
+ memory (re)allocations, this streaming decompression API isn't as
+ efficient as other APIs.
+
+For compatibility with the standard library APIs, instances expose a
+``flush([length=None])`` method. This method no-ops and has no meaningful
+side-effects, making it safe to call any time.
+
+Batch Decompression API
+^^^^^^^^^^^^^^^^^^^^^^^
+
+(Experimental. Not yet supported in CFFI bindings.)
+
+``multi_decompress_to_buffer()`` performs decompression of multiple
+frames as a single operation and returns a ``BufferWithSegmentsCollection``
+containing decompressed data for all inputs.
+
+Compressed frames can be passed to the function as a ``BufferWithSegments``,
+a ``BufferWithSegmentsCollection``, or as a list containing objects that
+conform to the buffer protocol. For best performance, pass a
+``BufferWithSegmentsCollection`` or a ``BufferWithSegments``, as
+minimal input validation will be done for that type. If calling from
+Python (as opposed to C), constructing one of these instances may add
+overhead cancelling out the performance overhead of validation for list
+inputs.::
+
+ dctx = zstd.ZstdDecompressor()
+ results = dctx.multi_decompress_to_buffer([b'...', b'...'])
+
+The decompressed size of each frame MUST be discoverable. It can either be
+embedded within the zstd frame (``write_content_size=True`` argument to
+``ZstdCompressor``) or passed in via the ``decompressed_sizes`` argument.
+
+The ``decompressed_sizes`` argument is an object conforming to the buffer
+protocol which holds an array of 64-bit unsigned integers in the machine's
+native format defining the decompressed sizes of each frame. If this argument
+is passed, it avoids having to scan each frame for its decompressed size.
+This frame scanning can add noticeable overhead in some scenarios.::
+
+ frames = [...]
+ sizes = struct.pack('=QQQQ', len0, len1, len2, len3)
+
+ dctx = zstd.ZstdDecompressor()
+ results = dctx.multi_decompress_to_buffer(frames, decompressed_sizes=sizes)
+
+The ``threads`` argument controls the number of threads to use to perform
+decompression operations. The default (``0``) or the value ``1`` means to
+use a single thread. Negative values use the number of logical CPUs in the
+machine.
+
+.. note::
+
+ It is possible to pass a ``mmap.mmap()`` instance into this function by
+ wrapping it with a ``BufferWithSegments`` instance (which will define the
+ offsets of frames within the memory mapped region).
+
+This function is logically equivalent to performing ``dctx.decompress()``
+on each input frame and returning the result.
+
+This function exists to perform decompression on multiple frames as fast
+as possible by having as little overhead as possible. Since decompression is
+performed as a single operation and since the decompressed output is stored in
+a single buffer, extra memory allocations, Python objects, and Python function
+calls are avoided. This is ideal for scenarios where callers know up front that
+they need to access data for multiple frames, such as when *delta chains* are
+being used.
+
+Currently, the implementation always spawns multiple threads when requested,
+even if the amount of work to do is small. In the future, it will be smarter
+about avoiding threads and their associated overhead when the amount of
+work to do is small.
+
+Prefix Dictionary Chain Decompression
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``decompress_content_dict_chain(frames)`` performs decompression of a list of
+zstd frames produced using chained *prefix* dictionary compression. Such
+a list of frames is produced by compressing discrete inputs where each
+non-initial input is compressed with a *prefix* dictionary consisting of the
+content of the previous input.
+
+For example, say you have the following inputs::
+
+ inputs = [b'input 1', b'input 2', b'input 3']
+
+The zstd frame chain consists of:
+
+1. ``b'input 1'`` compressed in standalone/discrete mode
+2. ``b'input 2'`` compressed using ``b'input 1'`` as a *prefix* dictionary
+3. ``b'input 3'`` compressed using ``b'input 2'`` as a *prefix* dictionary
+
+Each zstd frame **must** have the content size written.
+
+The following Python code can be used to produce a *prefix dictionary chain*::
+
+ def make_chain(inputs):
+ frames = []
+
+ # First frame is compressed in standalone/discrete mode.
+ zctx = zstd.ZstdCompressor()
+ frames.append(zctx.compress(inputs[0]))
+
+ # Subsequent frames use the previous fulltext as a prefix dictionary
+ for i, raw in enumerate(inputs[1:]):
+ dict_data = zstd.ZstdCompressionDict(
+ inputs[i], dict_type=zstd.DICT_TYPE_RAWCONTENT)
+ zctx = zstd.ZstdCompressor(dict_data=dict_data)
+ frames.append(zctx.compress(raw))
+
+ return frames
+
+``decompress_content_dict_chain()`` returns the uncompressed data of the last
+element in the input chain.
+
+
+.. note::
+
+ It is possible to implement *prefix dictionary chain* decompression
+ on top of other APIs. However, this function will likely be faster -
+ especially for long input chains - as it avoids the overhead of instantiating
+ and passing around intermediate objects between C and Python.
+
+Multi-Threaded Compression
+--------------------------
+
+``ZstdCompressor`` accepts a ``threads`` argument that controls the number
+of threads to use for compression. The way this works is that input is split
+into segments and each segment is fed into a worker pool for compression. Once
+a segment is compressed, it is flushed/appended to the output.
+
+.. note::
+
+ These threads are created at the C layer and are not Python threads. So they
+ work outside the GIL. It is therefore possible to CPU saturate multiple cores
+ from Python.
+
+The segment size for multi-threaded compression is chosen from the window size
+of the compressor. This is derived from the ``window_log`` attribute of a
+``ZstdCompressionParameters`` instance. By default, segment sizes are in the 1+MB
+range.
+
+If multi-threaded compression is requested and the input is smaller than the
+configured segment size, only a single compression thread will be used. If the
+input is smaller than the segment size multiplied by the thread pool size or
+if data cannot be delivered to the compressor fast enough, not all requested
+compressor threads may be active simultaneously.
+
+Compared to non-multi-threaded compression, multi-threaded compression has
+higher per-operation overhead. This includes extra memory operations,
+thread creation, lock acquisition, etc.
+
+Due to the nature of multi-threaded compression using *N* compression
+*states*, the output from multi-threaded compression will likely be larger
+than non-multi-threaded compression. The difference is usually small. But
+there is a CPU/wall time versus size trade off that may warrant investigation.
+
+Output from multi-threaded compression does not require any special handling
+on the decompression side. To the decompressor, data generated with single
+threaded compressor looks the same as data generated by a multi-threaded
+compressor and does not require any special handling or additional resource
+requirements.
+
+Dictionary Creation and Management
+----------------------------------
+
+Compression dictionaries are represented with the ``ZstdCompressionDict`` type.
+
+Instances can be constructed from bytes::
+
+ dict_data = zstd.ZstdCompressionDict(data)
+
+It is possible to construct a dictionary from *any* data. If the data doesn't
+begin with a magic header, it will be treated as a *prefix* dictionary.
+*Prefix* dictionaries allow compression operations to reference raw data
+within the dictionary.
+
+It is possible to force the use of *prefix* dictionaries or to require a
+dictionary header:
+
+ dict_data = zstd.ZstdCompressionDict(data,
+ dict_type=zstd.DICT_TYPE_RAWCONTENT)
+
+ dict_data = zstd.ZstdCompressionDict(data,
+ dict_type=zstd.DICT_TYPE_FULLDICT)
+
+You can see how many bytes are in the dictionary by calling ``len()``::
+
+ dict_data = zstd.train_dictionary(size, samples)
+ dict_size = len(dict_data) # will not be larger than ``size``
+
+Once you have a dictionary, you can pass it to the objects performing
+compression and decompression::
+
+ dict_data = zstd.train_dictionary(131072, samples)
+
+ cctx = zstd.ZstdCompressor(dict_data=dict_data)
+ for source_data in input_data:
+ compressed = cctx.compress(source_data)
+ # Do something with compressed data.
+
+ dctx = zstd.ZstdDecompressor(dict_data=dict_data)
+ for compressed_data in input_data:
+ buffer = io.BytesIO()
+ with dctx.stream_writer(buffer) as decompressor:
+ decompressor.write(compressed_data)
+ # Do something with raw data in ``buffer``.
+
+Dictionaries have unique integer IDs. You can retrieve this ID via::
+
+ dict_id = zstd.dictionary_id(dict_data)
+
+You can obtain the raw data in the dict (useful for persisting and constructing
+a ``ZstdCompressionDict`` later) via ``as_bytes()``::
+
+ dict_data = zstd.train_dictionary(size, samples)
+ raw_data = dict_data.as_bytes()
+
+By default, when a ``ZstdCompressionDict`` is *attached* to a
+``ZstdCompressor``, each ``ZstdCompressor`` performs work to prepare the
+dictionary for use. This is fine if only 1 compression operation is being
+performed or if the ``ZstdCompressor`` is being reused for multiple operations.
+But if multiple ``ZstdCompressor`` instances are being used with the dictionary,
+this can add overhead.
+
+It is possible to *precompute* the dictionary so it can readily be consumed
+by multiple ``ZstdCompressor`` instances::
+
+ d = zstd.ZstdCompressionDict(data)
+
+ # Precompute for compression level 3.
+ d.precompute_compress(level=3)
+
+ # Precompute with specific compression parameters.
+ params = zstd.ZstdCompressionParameters(...)
+ d.precompute_compress(compression_params=params)
+
+.. note::
+
+ When a dictionary is precomputed, the compression parameters used to
+ precompute the dictionary overwrite some of the compression parameters
+ specified to ``ZstdCompressor.__init__``.
+
+Training Dictionaries
+^^^^^^^^^^^^^^^^^^^^^
+
+Unless using *prefix* dictionaries, dictionary data is produced by *training*
+on existing data::
+
+ dict_data = zstd.train_dictionary(size, samples)
+
+This takes a target dictionary size and list of bytes instances and creates and
+returns a ``ZstdCompressionDict``.
+
+The dictionary training mechanism is known as *cover*. More details about it are
+available in the paper *Effective Construction of Relative Lempel-Ziv
+Dictionaries* (authors: Liao, Petri, Moffat, Wirth).
+
+The cover algorithm takes parameters ``k` and ``d``. These are the
+*segment size* and *dmer size*, respectively. The returned dictionary
+instance created by this function has ``k`` and ``d`` attributes
+containing the values for these parameters. If a ``ZstdCompressionDict``
+is constructed from raw bytes data (a content-only dictionary), the
+``k`` and ``d`` attributes will be ``0``.
+
+The segment and dmer size parameters to the cover algorithm can either be
+specified manually or ``train_dictionary()`` can try multiple values
+and pick the best one, where *best* means the smallest compressed data size.
+This later mode is called *optimization* mode.
+
+If none of ``k``, ``d``, ``steps``, ``threads``, ``level``, ``notifications``,
+or ``dict_id`` (basically anything from the underlying ``ZDICT_cover_params_t``
+struct) are defined, *optimization* mode is used with default parameter
+values.
+
+If ``steps`` or ``threads`` are defined, then *optimization* mode is engaged
+with explicit control over those parameters. Specifying ``threads=0`` or
+``threads=1`` can be used to engage *optimization* mode if other parameters
+are not defined.
+
+Otherwise, non-*optimization* mode is used with the parameters specified.
+
+This function takes the following arguments:
+
+dict_size
+ Target size in bytes of the dictionary to generate.
+samples
+ A list of bytes holding samples the dictionary will be trained from.
+k
+ Parameter to cover algorithm defining the segment size. A reasonable range
+ is [16, 2048+].
+d
+ Parameter to cover algorithm defining the dmer size. A reasonable range is
+ [6, 16]. ``d`` must be less than or equal to ``k``.
+dict_id
+ Integer dictionary ID for the produced dictionary. Default is 0, which uses
+ a random value.
+steps
+ Number of steps through ``k`` values to perform when trying parameter
+ variations.
+threads
+ Number of threads to use when trying parameter variations. Default is 0,
+ which means to use a single thread. A negative value can be specified to
+ use as many threads as there are detected logical CPUs.
+level
+ Integer target compression level when trying parameter variations.
+notifications
+ Controls writing of informational messages to ``stderr``. ``0`` (the
+ default) means to write nothing. ``1`` writes errors. ``2`` writes
+ progression info. ``3`` writes more details. And ``4`` writes all info.
+
+Explicit Compression Parameters
+-------------------------------
+
+Zstandard offers a high-level *compression level* that maps to lower-level
+compression parameters. For many consumers, this numeric level is the only
+compression setting you'll need to touch.
+
+But for advanced use cases, it might be desirable to tweak these lower-level
+settings.
+
+The ``ZstdCompressionParameters`` type represents these low-level compression
+settings.
+
+Instances of this type can be constructed from a myriad of keyword arguments
+(defined below) for complete low-level control over each adjustable
+compression setting.
+
+From a higher level, one can construct a ``ZstdCompressionParameters`` instance
+given a desired compression level and target input and dictionary size
+using ``ZstdCompressionParameters.from_level()``. e.g.::
+
+ # Derive compression settings for compression level 7.
+ params = zstd.ZstdCompressionParameters.from_level(7)
+
+ # With an input size of 1MB
+ params = zstd.ZstdCompressionParameters.from_level(7, source_size=1048576)
+
+Using ``from_level()``, it is also possible to override individual compression
+parameters or to define additional settings that aren't automatically derived.
+e.g.::
+
+ params = zstd.ZstdCompressionParameters.from_level(4, window_log=10)
+ params = zstd.ZstdCompressionParameters.from_level(5, threads=4)
+
+Or you can define low-level compression settings directly::
+
+ params = zstd.ZstdCompressionParameters(window_log=12, enable_ldm=True)
+
+Once a ``ZstdCompressionParameters`` instance is obtained, it can be used to
+configure a compressor::
+
+ cctx = zstd.ZstdCompressor(compression_params=params)
+
+The named arguments and attributes of ``ZstdCompressionParameters`` are as
+follows:
+
+* format
+* compression_level
+* window_log
+* hash_log
+* chain_log
+* search_log
+* min_match
+* target_length
+* strategy
+* compression_strategy (deprecated: same as ``strategy``)
+* write_content_size
+* write_checksum
+* write_dict_id
+* job_size
+* overlap_log
+* overlap_size_log (deprecated: same as ``overlap_log``)
+* force_max_window
+* enable_ldm
+* ldm_hash_log
+* ldm_min_match
+* ldm_bucket_size_log
+* ldm_hash_rate_log
+* ldm_hash_every_log (deprecated: same as ``ldm_hash_rate_log``)
+* threads
+
+Some of these are very low-level settings. It may help to consult the official
+zstandard documentation for their behavior. Look for the ``ZSTD_p_*`` constants
+in ``zstd.h`` (https://github.com/facebook/zstd/blob/dev/lib/zstd.h).
+
+Frame Inspection
+----------------
+
+Data emitted from zstd compression is encapsulated in a *frame*. This frame
+begins with a 4 byte *magic number* header followed by 2 to 14 bytes describing
+the frame in more detail. For more info, see
+https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md.
+
+``zstd.get_frame_parameters(data)`` parses a zstd *frame* header from a bytes
+instance and return a ``FrameParameters`` object describing the frame.
+
+Depending on which fields are present in the frame and their values, the
+length of the frame parameters varies. If insufficient bytes are passed
+in to fully parse the frame parameters, ``ZstdError`` is raised. To ensure
+frame parameters can be parsed, pass in at least 18 bytes.
+
+``FrameParameters`` instances have the following attributes:
+
+content_size
+ Integer size of original, uncompressed content. This will be ``0`` if the
+ original content size isn't written to the frame (controlled with the
+ ``write_content_size`` argument to ``ZstdCompressor``) or if the input
+ content size was ``0``.
+
+window_size
+ Integer size of maximum back-reference distance in compressed data.
+
+dict_id
+ Integer of dictionary ID used for compression. ``0`` if no dictionary
+ ID was used or if the dictionary ID was ``0``.
+
+has_checksum
+ Bool indicating whether a 4 byte content checksum is stored at the end
+ of the frame.
+
+``zstd.frame_header_size(data)`` returns the size of the zstandard frame
+header.
+
+``zstd.frame_content_size(data)`` returns the content size as parsed from
+the frame header. ``-1`` means the content size is unknown. ``0`` means
+an empty frame. The content size is usually correct. However, it may not
+be accurate.
+
+Misc Functionality
+------------------
+
+estimate_decompression_context_size()
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Estimate the memory size requirements for a decompressor instance.
+
+Constants
+---------
+
+The following module constants/attributes are exposed:
+
+ZSTD_VERSION
+ This module attribute exposes a 3-tuple of the Zstandard version. e.g.
+ ``(1, 0, 0)``
+MAX_COMPRESSION_LEVEL
+ Integer max compression level accepted by compression functions
+COMPRESSION_RECOMMENDED_INPUT_SIZE
+ Recommended chunk size to feed to compressor functions
+COMPRESSION_RECOMMENDED_OUTPUT_SIZE
+ Recommended chunk size for compression output
+DECOMPRESSION_RECOMMENDED_INPUT_SIZE
+ Recommended chunk size to feed into decompresor functions
+DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE
+ Recommended chunk size for decompression output
+
+FRAME_HEADER
+ bytes containing header of the Zstandard frame
+MAGIC_NUMBER
+ Frame header as an integer
+
+FLUSH_BLOCK
+ Flushing behavior that denotes to flush a zstd block. A decompressor will
+ be able to decode all data fed into the compressor so far.
+FLUSH_FRAME
+ Flushing behavior that denotes to end a zstd frame. Any new data fed
+ to the compressor will start a new frame.
+
+CONTENTSIZE_UNKNOWN
+ Value for content size when the content size is unknown.
+CONTENTSIZE_ERROR
+ Value for content size when content size couldn't be determined.
+
+WINDOWLOG_MIN
+ Minimum value for compression parameter
+WINDOWLOG_MAX
+ Maximum value for compression parameter
+CHAINLOG_MIN
+ Minimum value for compression parameter
+CHAINLOG_MAX
+ Maximum value for compression parameter
+HASHLOG_MIN
+ Minimum value for compression parameter
+HASHLOG_MAX
+ Maximum value for compression parameter
+SEARCHLOG_MIN
+ Minimum value for compression parameter
+SEARCHLOG_MAX
+ Maximum value for compression parameter
+MINMATCH_MIN
+ Minimum value for compression parameter
+MINMATCH_MAX
+ Maximum value for compression parameter
+SEARCHLENGTH_MIN
+ Minimum value for compression parameter
+
+ Deprecated: use ``MINMATCH_MIN``
+SEARCHLENGTH_MAX
+ Maximum value for compression parameter
+
+ Deprecated: use ``MINMATCH_MAX``
+TARGETLENGTH_MIN
+ Minimum value for compression parameter
+STRATEGY_FAST
+ Compression strategy
+STRATEGY_DFAST
+ Compression strategy
+STRATEGY_GREEDY
+ Compression strategy
+STRATEGY_LAZY
+ Compression strategy
+STRATEGY_LAZY2
+ Compression strategy
+STRATEGY_BTLAZY2
+ Compression strategy
+STRATEGY_BTOPT
+ Compression strategy
+STRATEGY_BTULTRA
+ Compression strategy
+STRATEGY_BTULTRA2
+ Compression strategy
+
+FORMAT_ZSTD1
+ Zstandard frame format
+FORMAT_ZSTD1_MAGICLESS
+ Zstandard frame format without magic header
+
+Performance Considerations
+--------------------------
+
+The ``ZstdCompressor`` and ``ZstdDecompressor`` types maintain state to a
+persistent compression or decompression *context*. Reusing a ``ZstdCompressor``
+or ``ZstdDecompressor`` instance for multiple operations is faster than
+instantiating a new ``ZstdCompressor`` or ``ZstdDecompressor`` for each
+operation. The differences are magnified as the size of data decreases. For
+example, the difference between *context* reuse and non-reuse for 100,000
+100 byte inputs will be significant (possiby over 10x faster to reuse contexts)
+whereas 10 100,000,000 byte inputs will be more similar in speed (because the
+time spent doing compression dwarfs time spent creating new *contexts*).
+
+Buffer Types
+------------
+
+The API exposes a handful of custom types for interfacing with memory buffers.
+The primary goal of these types is to facilitate efficient multi-object
+operations.
+
+The essential idea is to have a single memory allocation provide backing
+storage for multiple logical objects. This has 2 main advantages: fewer
+allocations and optimal memory access patterns. This avoids having to allocate
+a Python object for each logical object and furthermore ensures that access of
+data for objects can be sequential (read: fast) in memory.
+
+BufferWithSegments
+^^^^^^^^^^^^^^^^^^
+
+The ``BufferWithSegments`` type represents a memory buffer containing N
+discrete items of known lengths (segments). It is essentially a fixed size
+memory address and an array of 2-tuples of ``(offset, length)`` 64-bit
+unsigned native endian integers defining the byte offset and length of each
+segment within the buffer.
+
+Instances behave like containers.
+
+``len()`` returns the number of segments within the instance.
+
+``o[index]`` or ``__getitem__`` obtains a ``BufferSegment`` representing an
+individual segment within the backing buffer. That returned object references
+(not copies) memory. This means that iterating all objects doesn't copy
+data within the buffer.
+
+The ``.size`` attribute contains the total size in bytes of the backing
+buffer.
+
+Instances conform to the buffer protocol. So a reference to the backing bytes
+can be obtained via ``memoryview(o)``. A *copy* of the backing bytes can also
+be obtained via ``.tobytes()``.
+
+The ``.segments`` attribute exposes the array of ``(offset, length)`` for
+segments within the buffer. It is a ``BufferSegments`` type.
+
+BufferSegment
+^^^^^^^^^^^^^
+
+The ``BufferSegment`` type represents a segment within a ``BufferWithSegments``.
+It is essentially a reference to N bytes within a ``BufferWithSegments``.
+
+``len()`` returns the length of the segment in bytes.
+
+``.offset`` contains the byte offset of this segment within its parent
+``BufferWithSegments`` instance.
+
+The object conforms to the buffer protocol. ``.tobytes()`` can be called to
+obtain a ``bytes`` instance with a copy of the backing bytes.
+
+BufferSegments
+^^^^^^^^^^^^^^
+
+This type represents an array of ``(offset, length)`` integers defining segments
+within a ``BufferWithSegments``.
+
+The array members are 64-bit unsigned integers using host/native bit order.
+
+Instances conform to the buffer protocol.
+
+BufferWithSegmentsCollection
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``BufferWithSegmentsCollection`` type represents a virtual spanning view
+of multiple ``BufferWithSegments`` instances.
+
+Instances are constructed from 1 or more ``BufferWithSegments`` instances. The
+resulting object behaves like an ordered sequence whose members are the
+segments within each ``BufferWithSegments``.
+
+``len()`` returns the number of segments within all ``BufferWithSegments``
+instances.
+
+``o[index]`` and ``__getitem__(index)`` return the ``BufferSegment`` at
+that offset as if all ``BufferWithSegments`` instances were a single
+entity.
+
+If the object is composed of 2 ``BufferWithSegments`` instances with the
+first having 2 segments and the second have 3 segments, then ``b[0]``
+and ``b[1]`` access segments in the first object and ``b[2]``, ``b[3]``,
+and ``b[4]`` access segments from the second.
+
+Choosing an API
+===============
+
+There are multiple APIs for performing compression and decompression. This is
+because different applications have different needs and the library wants to
+facilitate optimal use in as many use cases as possible.
+
+From a high-level, APIs are divided into *one-shot* and *streaming*: either you
+are operating on all data at once or you operate on it piecemeal.
+
+The *one-shot* APIs are useful for small data, where the input or output
+size is known. (The size can come from a buffer length, file size, or
+stored in the zstd frame header.) A limitation of the *one-shot* APIs is that
+input and output must fit in memory simultaneously. For say a 4 GB input,
+this is often not feasible.
+
+The *one-shot* APIs also perform all work as a single operation. So, if you
+feed it large input, it could take a long time for the function to return.
+
+The streaming APIs do not have the limitations of the simple API. But the
+price you pay for this flexibility is that they are more complex than a
+single function call.
+
+The streaming APIs put the caller in control of compression and decompression
+behavior by allowing them to directly control either the input or output side
+of the operation.
+
+With the *streaming input*, *compressor*, and *decompressor* APIs, the caller
+has full control over the input to the compression or decompression stream.
+They can directly choose when new data is operated on.
+
+With the *streaming ouput* APIs, the caller has full control over the output
+of the compression or decompression stream. It can choose when to receive
+new data.
+
+When using the *streaming* APIs that operate on file-like or stream objects,
+it is important to consider what happens in that object when I/O is requested.
+There is potential for long pauses as data is read or written from the
+underlying stream (say from interacting with a filesystem or network). This
+could add considerable overhead.
+
+Thread Safety
+=============
+
+``ZstdCompressor`` and ``ZstdDecompressor`` instances have no guarantees
+about thread safety. Do not operate on the same ``ZstdCompressor`` and
+``ZstdDecompressor`` instance simultaneously from different threads. It is
+fine to have different threads call into a single instance, just not at the
+same time.
+
+Some operations require multiple function calls to complete. e.g. streaming
+operations. A single ``ZstdCompressor`` or ``ZstdDecompressor`` cannot be used
+for simultaneously active operations. e.g. you must not start a streaming
+operation when another streaming operation is already active.
+
+The C extension releases the GIL during non-trivial calls into the zstd C
+API. Non-trivial calls are notably compression and decompression. Trivial
+calls are things like parsing frame parameters. Where the GIL is released
+is considered an implementation detail and can change in any release.
+
+APIs that accept bytes-like objects don't enforce that the underlying object
+is read-only. However, it is assumed that the passed object is read-only for
+the duration of the function call. It is possible to pass a mutable object
+(like a ``bytearray``) to e.g. ``ZstdCompressor.compress()``, have the GIL
+released, and mutate the object from another thread. Such a race condition
+is a bug in the consumer of python-zstandard. Most Python data types are
+immutable, so unless you are doing something fancy, you don't need to
+worry about this.
+
+Note on Zstandard's *Experimental* API
+======================================
+
+Many of the Zstandard APIs used by this module are marked as *experimental*
+within the Zstandard project.
+
+It is unclear how Zstandard's C API will evolve over time, especially with
+regards to this *experimental* functionality. We will try to maintain
+backwards compatibility at the Python API level. However, we cannot
+guarantee this for things not under our control.
+
+Since a copy of the Zstandard source code is distributed with this
+module and since we compile against it, the behavior of a specific
+version of this module should be constant for all of time. So if you
+pin the version of this module used in your projects (which is a Python
+best practice), you should be shielded from unwanted future changes.
+
+Donate
+======
+
+A lot of time has been invested into this project by the author.
+
+If you find this project useful and would like to thank the author for
+their work, consider donating some money. Any amount is appreciated.
+
+.. image:: https://www.paypalobjects.com/en_US/i/btn/btn_donate_LG.gif
+ :target: https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=gregory%2eszorc%40gmail%2ecom&lc=US&item_name=python%2dzstandard&currency_code=USD&bn=PP%2dDonationsBF%3abtn_donate_LG%2egif%3aNonHosted
+ :alt: Donate via PayPal
+
+.. |ci-status| image:: https://dev.azure.com/gregoryszorc/python-zstandard/_apis/build/status/indygreg.python-zstandard?branchName=master
+ :target: https://dev.azure.com/gregoryszorc/python-zstandard/_apis/build/status/indygreg.python-zstandard?branchName=master
diff --git a/contrib/python/zstandard/py2/c-ext/bufferutil.c b/contrib/python/zstandard/py2/c-ext/bufferutil.c
new file mode 100644
index 00000000000..5094a4bb92f
--- /dev/null
+++ b/contrib/python/zstandard/py2/c-ext/bufferutil.c
@@ -0,0 +1,792 @@
+/**
+* Copyright (c) 2017-present, Gregory Szorc
+* All rights reserved.
+*
+* This software may be modified and distributed under the terms
+* of the BSD license. See the LICENSE file for details.
+*/
+
+#include "python-zstandard.h"
+
+extern PyObject* ZstdError;
+
+PyDoc_STRVAR(BufferWithSegments__doc__,
+"BufferWithSegments - A memory buffer holding known sub-segments.\n"
+"\n"
+"This type represents a contiguous chunk of memory containing N discrete\n"
+"items within sub-segments of that memory.\n"
+"\n"
+"Segments within the buffer are stored as an array of\n"
+"``(offset, length)`` pairs, where each element is an unsigned 64-bit\n"
+"integer using the host/native bit order representation.\n"
+"\n"
+"The type exists to facilitate operations against N>1 items without the\n"
+"overhead of Python object creation and management.\n"
+);
+
+static void BufferWithSegments_dealloc(ZstdBufferWithSegments* self) {
+ /* Backing memory is either canonically owned by a Py_buffer or by us. */
+ if (self->parent.buf) {
+ PyBuffer_Release(&self->parent);
+ }
+ else if (self->useFree) {
+ free(self->data);
+ }
+ else {
+ PyMem_Free(self->data);
+ }
+
+ self->data = NULL;
+
+ if (self->useFree) {
+ free(self->segments);
+ }
+ else {
+ PyMem_Free(self->segments);
+ }
+
+ self->segments = NULL;
+
+ PyObject_Del(self);
+}
+
+static int BufferWithSegments_init(ZstdBufferWithSegments* self, PyObject* args, PyObject* kwargs) {
+ static char* kwlist[] = {
+ "data",
+ "segments",
+ NULL
+ };
+
+ Py_buffer segments;
+ Py_ssize_t segmentCount;
+ Py_ssize_t i;
+
+ memset(&self->parent, 0, sizeof(self->parent));
+
+#if PY_MAJOR_VERSION >= 3
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*y*:BufferWithSegments",
+#else
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*s*:BufferWithSegments",
+#endif
+ kwlist, &self->parent, &segments)) {
+ return -1;
+ }
+
+ if (!PyBuffer_IsContiguous(&self->parent, 'C') || self->parent.ndim > 1) {
+ PyErr_SetString(PyExc_ValueError, "data buffer should be contiguous and have a single dimension");
+ goto except;
+ }
+
+ if (!PyBuffer_IsContiguous(&segments, 'C') || segments.ndim > 1) {
+ PyErr_SetString(PyExc_ValueError, "segments buffer should be contiguous and have a single dimension");
+ goto except;
+ }
+
+ if (segments.len % sizeof(BufferSegment)) {
+ PyErr_Format(PyExc_ValueError, "segments array size is not a multiple of %zu",
+ sizeof(BufferSegment));
+ goto except;
+ }
+
+ segmentCount = segments.len / sizeof(BufferSegment);
+
+ /* Validate segments data, as blindly trusting it could lead to arbitrary
+ memory access. */
+ for (i = 0; i < segmentCount; i++) {
+ BufferSegment* segment = &((BufferSegment*)(segments.buf))[i];
+
+ if (segment->offset + segment->length > (unsigned long long)self->parent.len) {
+ PyErr_SetString(PyExc_ValueError, "offset within segments array references memory outside buffer");
+ goto except;
+ return -1;
+ }
+ }
+
+ /* Make a copy of the segments data. It is cheap to do so and is a guard
+ against caller changing offsets, which has security implications. */
+ self->segments = PyMem_Malloc(segments.len);
+ if (!self->segments) {
+ PyErr_NoMemory();
+ goto except;
+ }
+
+ memcpy(self->segments, segments.buf, segments.len);
+ PyBuffer_Release(&segments);
+
+ self->data = self->parent.buf;
+ self->dataSize = self->parent.len;
+ self->segmentCount = segmentCount;
+
+ return 0;
+
+except:
+ PyBuffer_Release(&self->parent);
+ PyBuffer_Release(&segments);
+ return -1;
+}
+
+/**
+ * Construct a BufferWithSegments from existing memory and offsets.
+ *
+ * Ownership of the backing memory and BufferSegments will be transferred to
+ * the created object and freed when the BufferWithSegments is destroyed.
+ */
+ZstdBufferWithSegments* BufferWithSegments_FromMemory(void* data, unsigned long long dataSize,
+ BufferSegment* segments, Py_ssize_t segmentsSize) {
+ ZstdBufferWithSegments* result = NULL;
+ Py_ssize_t i;
+
+ if (NULL == data) {
+ PyErr_SetString(PyExc_ValueError, "data is NULL");
+ return NULL;
+ }
+
+ if (NULL == segments) {
+ PyErr_SetString(PyExc_ValueError, "segments is NULL");
+ return NULL;
+ }
+
+ for (i = 0; i < segmentsSize; i++) {
+ BufferSegment* segment = &segments[i];
+
+ if (segment->offset + segment->length > dataSize) {
+ PyErr_SetString(PyExc_ValueError, "offset in segments overflows buffer size");
+ return NULL;
+ }
+ }
+
+ result = PyObject_New(ZstdBufferWithSegments, &ZstdBufferWithSegmentsType);
+ if (NULL == result) {
+ return NULL;
+ }
+
+ result->useFree = 0;
+
+ memset(&result->parent, 0, sizeof(result->parent));
+ result->data = data;
+ result->dataSize = dataSize;
+ result->segments = segments;
+ result->segmentCount = segmentsSize;
+
+ return result;
+}
+
+static Py_ssize_t BufferWithSegments_length(ZstdBufferWithSegments* self) {
+ return self->segmentCount;
+}
+
+static ZstdBufferSegment* BufferWithSegments_item(ZstdBufferWithSegments* self, Py_ssize_t i) {
+ ZstdBufferSegment* result = NULL;
+
+ if (i < 0) {
+ PyErr_SetString(PyExc_IndexError, "offset must be non-negative");
+ return NULL;
+ }
+
+ if (i >= self->segmentCount) {
+ PyErr_Format(PyExc_IndexError, "offset must be less than %zd", self->segmentCount);
+ return NULL;
+ }
+
+ if (self->segments[i].length > PY_SSIZE_T_MAX) {
+ PyErr_Format(PyExc_ValueError,
+ "item at offset %zd is too large for this platform", i);
+ return NULL;
+ }
+
+ result = (ZstdBufferSegment*)PyObject_CallObject((PyObject*)&ZstdBufferSegmentType, NULL);
+ if (NULL == result) {
+ return NULL;
+ }
+
+ result->parent = (PyObject*)self;
+ Py_INCREF(self);
+
+ result->data = (char*)self->data + self->segments[i].offset;
+ result->dataSize = (Py_ssize_t)self->segments[i].length;
+ result->offset = self->segments[i].offset;
+
+ return result;
+}
+
+#if PY_MAJOR_VERSION >= 3
+static int BufferWithSegments_getbuffer(ZstdBufferWithSegments* self, Py_buffer* view, int flags) {
+ if (self->dataSize > PY_SSIZE_T_MAX) {
+ view->obj = NULL;
+ PyErr_SetString(PyExc_BufferError, "buffer is too large for this platform");
+ return -1;
+ }
+
+ return PyBuffer_FillInfo(view, (PyObject*)self, self->data, (Py_ssize_t)self->dataSize, 1, flags);
+}
+#else
+static Py_ssize_t BufferWithSegments_getreadbuffer(ZstdBufferWithSegments* self, Py_ssize_t segment, void **ptrptr) {
+ if (segment != 0) {
+ PyErr_SetString(PyExc_ValueError, "segment number must be 0");
+ return -1;
+ }
+
+ if (self->dataSize > PY_SSIZE_T_MAX) {
+ PyErr_SetString(PyExc_ValueError, "buffer is too large for this platform");
+ return -1;
+ }
+
+ *ptrptr = self->data;
+ return (Py_ssize_t)self->dataSize;
+}
+
+static Py_ssize_t BufferWithSegments_getsegcount(ZstdBufferWithSegments* self, Py_ssize_t* len) {
+ if (len) {
+ *len = 1;
+ }
+
+ return 1;
+}
+#endif
+
+PyDoc_STRVAR(BufferWithSegments_tobytes__doc__,
+"Obtain a bytes instance for this buffer.\n"
+);
+
+static PyObject* BufferWithSegments_tobytes(ZstdBufferWithSegments* self) {
+ if (self->dataSize > PY_SSIZE_T_MAX) {
+ PyErr_SetString(PyExc_ValueError, "buffer is too large for this platform");
+ return NULL;
+ }
+
+ return PyBytes_FromStringAndSize(self->data, (Py_ssize_t)self->dataSize);
+}
+
+PyDoc_STRVAR(BufferWithSegments_segments__doc__,
+"Obtain a BufferSegments describing segments in this sintance.\n"
+);
+
+static ZstdBufferSegments* BufferWithSegments_segments(ZstdBufferWithSegments* self) {
+ ZstdBufferSegments* result = (ZstdBufferSegments*)PyObject_CallObject((PyObject*)&ZstdBufferSegmentsType, NULL);
+ if (NULL == result) {
+ return NULL;
+ }
+
+ result->parent = (PyObject*)self;
+ Py_INCREF(self);
+ result->segments = self->segments;
+ result->segmentCount = self->segmentCount;
+
+ return result;
+}
+
+static PySequenceMethods BufferWithSegments_sq = {
+ (lenfunc)BufferWithSegments_length, /* sq_length */
+ 0, /* sq_concat */
+ 0, /* sq_repeat */
+ (ssizeargfunc)BufferWithSegments_item, /* sq_item */
+ 0, /* sq_ass_item */
+ 0, /* sq_contains */
+ 0, /* sq_inplace_concat */
+ 0 /* sq_inplace_repeat */
+};
+
+static PyBufferProcs BufferWithSegments_as_buffer = {
+#if PY_MAJOR_VERSION >= 3
+ (getbufferproc)BufferWithSegments_getbuffer, /* bf_getbuffer */
+ 0 /* bf_releasebuffer */
+#else
+ (readbufferproc)BufferWithSegments_getreadbuffer, /* bf_getreadbuffer */
+ 0, /* bf_getwritebuffer */
+ (segcountproc)BufferWithSegments_getsegcount, /* bf_getsegcount */
+ 0 /* bf_getcharbuffer */
+#endif
+};
+
+static PyMethodDef BufferWithSegments_methods[] = {
+ { "segments", (PyCFunction)BufferWithSegments_segments,
+ METH_NOARGS, BufferWithSegments_segments__doc__ },
+ { "tobytes", (PyCFunction)BufferWithSegments_tobytes,
+ METH_NOARGS, BufferWithSegments_tobytes__doc__ },
+ { NULL, NULL }
+};
+
+static PyMemberDef BufferWithSegments_members[] = {
+ { "size", T_ULONGLONG, offsetof(ZstdBufferWithSegments, dataSize),
+ READONLY, "total size of the buffer in bytes" },
+ { NULL }
+};
+
+PyTypeObject ZstdBufferWithSegmentsType = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "zstd.BufferWithSegments", /* tp_name */
+ sizeof(ZstdBufferWithSegments),/* tp_basicsize */
+ 0, /* tp_itemsize */
+ (destructor)BufferWithSegments_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ &BufferWithSegments_sq, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ &BufferWithSegments_as_buffer, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT, /* tp_flags */
+ BufferWithSegments__doc__, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
+ BufferWithSegments_methods, /* tp_methods */
+ BufferWithSegments_members, /* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ (initproc)BufferWithSegments_init, /* tp_init */
+ 0, /* tp_alloc */
+ PyType_GenericNew, /* tp_new */
+};
+
+PyDoc_STRVAR(BufferSegments__doc__,
+"BufferSegments - Represents segments/offsets within a BufferWithSegments\n"
+);
+
+static void BufferSegments_dealloc(ZstdBufferSegments* self) {
+ Py_CLEAR(self->parent);
+ PyObject_Del(self);
+}
+
+#if PY_MAJOR_VERSION >= 3
+static int BufferSegments_getbuffer(ZstdBufferSegments* self, Py_buffer* view, int flags) {
+ return PyBuffer_FillInfo(view, (PyObject*)self,
+ (void*)self->segments, self->segmentCount * sizeof(BufferSegment),
+ 1, flags);
+}
+#else
+static Py_ssize_t BufferSegments_getreadbuffer(ZstdBufferSegments* self, Py_ssize_t segment, void **ptrptr) {
+ if (segment != 0) {
+ PyErr_SetString(PyExc_ValueError, "segment number must be 0");
+ return -1;
+ }
+
+ *ptrptr = (void*)self->segments;
+ return self->segmentCount * sizeof(BufferSegment);
+}
+
+static Py_ssize_t BufferSegments_getsegcount(ZstdBufferSegments* self, Py_ssize_t* len) {
+ if (len) {
+ *len = 1;
+ }
+
+ return 1;
+}
+#endif
+
+static PyBufferProcs BufferSegments_as_buffer = {
+#if PY_MAJOR_VERSION >= 3
+ (getbufferproc)BufferSegments_getbuffer,
+ 0
+#else
+ (readbufferproc)BufferSegments_getreadbuffer,
+ 0,
+ (segcountproc)BufferSegments_getsegcount,
+ 0
+#endif
+};
+
+PyTypeObject ZstdBufferSegmentsType = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "zstd.BufferSegments", /* tp_name */
+ sizeof(ZstdBufferSegments),/* tp_basicsize */
+ 0, /* tp_itemsize */
+ (destructor)BufferSegments_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ &BufferSegments_as_buffer, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT, /* tp_flags */
+ BufferSegments__doc__, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
+ 0, /* tp_methods */
+ 0, /* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ 0, /* tp_init */
+ 0, /* tp_alloc */
+ PyType_GenericNew, /* tp_new */
+};
+
+PyDoc_STRVAR(BufferSegment__doc__,
+ "BufferSegment - Represents a segment within a BufferWithSegments\n"
+);
+
+static void BufferSegment_dealloc(ZstdBufferSegment* self) {
+ Py_CLEAR(self->parent);
+ PyObject_Del(self);
+}
+
+static Py_ssize_t BufferSegment_length(ZstdBufferSegment* self) {
+ return self->dataSize;
+}
+
+#if PY_MAJOR_VERSION >= 3
+static int BufferSegment_getbuffer(ZstdBufferSegment* self, Py_buffer* view, int flags) {
+ return PyBuffer_FillInfo(view, (PyObject*)self,
+ self->data, self->dataSize, 1, flags);
+}
+#else
+static Py_ssize_t BufferSegment_getreadbuffer(ZstdBufferSegment* self, Py_ssize_t segment, void **ptrptr) {
+ if (segment != 0) {
+ PyErr_SetString(PyExc_ValueError, "segment number must be 0");
+ return -1;
+ }
+
+ *ptrptr = self->data;
+ return self->dataSize;
+}
+
+static Py_ssize_t BufferSegment_getsegcount(ZstdBufferSegment* self, Py_ssize_t* len) {
+ if (len) {
+ *len = 1;
+ }
+
+ return 1;
+}
+#endif
+
+PyDoc_STRVAR(BufferSegment_tobytes__doc__,
+"Obtain a bytes instance for this segment.\n"
+);
+
+static PyObject* BufferSegment_tobytes(ZstdBufferSegment* self) {
+ return PyBytes_FromStringAndSize(self->data, self->dataSize);
+}
+
+static PySequenceMethods BufferSegment_sq = {
+ (lenfunc)BufferSegment_length, /* sq_length */
+ 0, /* sq_concat */
+ 0, /* sq_repeat */
+ 0, /* sq_item */
+ 0, /* sq_ass_item */
+ 0, /* sq_contains */
+ 0, /* sq_inplace_concat */
+ 0 /* sq_inplace_repeat */
+};
+
+static PyBufferProcs BufferSegment_as_buffer = {
+#if PY_MAJOR_VERSION >= 3
+ (getbufferproc)BufferSegment_getbuffer,
+ 0
+#else
+ (readbufferproc)BufferSegment_getreadbuffer,
+ 0,
+ (segcountproc)BufferSegment_getsegcount,
+ 0
+#endif
+};
+
+static PyMethodDef BufferSegment_methods[] = {
+ { "tobytes", (PyCFunction)BufferSegment_tobytes,
+ METH_NOARGS, BufferSegment_tobytes__doc__ },
+ { NULL, NULL }
+};
+
+static PyMemberDef BufferSegment_members[] = {
+ { "offset", T_ULONGLONG, offsetof(ZstdBufferSegment, offset), READONLY,
+ "offset of segment within parent buffer" },
+ { NULL }
+};
+
+PyTypeObject ZstdBufferSegmentType = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "zstd.BufferSegment", /* tp_name */
+ sizeof(ZstdBufferSegment),/* tp_basicsize */
+ 0, /* tp_itemsize */
+ (destructor)BufferSegment_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ &BufferSegment_sq, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ &BufferSegment_as_buffer, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT, /* tp_flags */
+ BufferSegment__doc__, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
+ BufferSegment_methods, /* tp_methods */
+ BufferSegment_members, /* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ 0, /* tp_init */
+ 0, /* tp_alloc */
+ PyType_GenericNew, /* tp_new */
+};
+
+PyDoc_STRVAR(BufferWithSegmentsCollection__doc__,
+"Represents a collection of BufferWithSegments.\n"
+);
+
+static void BufferWithSegmentsCollection_dealloc(ZstdBufferWithSegmentsCollection* self) {
+ Py_ssize_t i;
+
+ if (self->firstElements) {
+ PyMem_Free(self->firstElements);
+ self->firstElements = NULL;
+ }
+
+ if (self->buffers) {
+ for (i = 0; i < self->bufferCount; i++) {
+ Py_CLEAR(self->buffers[i]);
+ }
+
+ PyMem_Free(self->buffers);
+ self->buffers = NULL;
+ }
+
+ PyObject_Del(self);
+}
+
+static int BufferWithSegmentsCollection_init(ZstdBufferWithSegmentsCollection* self, PyObject* args) {
+ Py_ssize_t size;
+ Py_ssize_t i;
+ Py_ssize_t offset = 0;
+
+ size = PyTuple_Size(args);
+ if (-1 == size) {
+ return -1;
+ }
+
+ if (0 == size) {
+ PyErr_SetString(PyExc_ValueError, "must pass at least 1 argument");
+ return -1;
+ }
+
+ for (i = 0; i < size; i++) {
+ PyObject* item = PyTuple_GET_ITEM(args, i);
+ if (!PyObject_TypeCheck(item, &ZstdBufferWithSegmentsType)) {
+ PyErr_SetString(PyExc_TypeError, "arguments must be BufferWithSegments instances");
+ return -1;
+ }
+
+ if (0 == ((ZstdBufferWithSegments*)item)->segmentCount ||
+ 0 == ((ZstdBufferWithSegments*)item)->dataSize) {
+ PyErr_SetString(PyExc_ValueError, "ZstdBufferWithSegments cannot be empty");
+ return -1;
+ }
+ }
+
+ self->buffers = PyMem_Malloc(size * sizeof(ZstdBufferWithSegments*));
+ if (NULL == self->buffers) {
+ PyErr_NoMemory();
+ return -1;
+ }
+
+ self->firstElements = PyMem_Malloc(size * sizeof(Py_ssize_t));
+ if (NULL == self->firstElements) {
+ PyMem_Free(self->buffers);
+ self->buffers = NULL;
+ PyErr_NoMemory();
+ return -1;
+ }
+
+ self->bufferCount = size;
+
+ for (i = 0; i < size; i++) {
+ ZstdBufferWithSegments* item = (ZstdBufferWithSegments*)PyTuple_GET_ITEM(args, i);
+
+ self->buffers[i] = item;
+ Py_INCREF(item);
+
+ if (i > 0) {
+ self->firstElements[i - 1] = offset;
+ }
+
+ offset += item->segmentCount;
+ }
+
+ self->firstElements[size - 1] = offset;
+
+ return 0;
+}
+
+static PyObject* BufferWithSegmentsCollection_size(ZstdBufferWithSegmentsCollection* self) {
+ Py_ssize_t i;
+ Py_ssize_t j;
+ unsigned long long size = 0;
+
+ for (i = 0; i < self->bufferCount; i++) {
+ for (j = 0; j < self->buffers[i]->segmentCount; j++) {
+ size += self->buffers[i]->segments[j].length;
+ }
+ }
+
+ return PyLong_FromUnsignedLongLong(size);
+}
+
+Py_ssize_t BufferWithSegmentsCollection_length(ZstdBufferWithSegmentsCollection* self) {
+ return self->firstElements[self->bufferCount - 1];
+}
+
+static ZstdBufferSegment* BufferWithSegmentsCollection_item(ZstdBufferWithSegmentsCollection* self, Py_ssize_t i) {
+ Py_ssize_t bufferOffset;
+
+ if (i < 0) {
+ PyErr_SetString(PyExc_IndexError, "offset must be non-negative");
+ return NULL;
+ }
+
+ if (i >= BufferWithSegmentsCollection_length(self)) {
+ PyErr_Format(PyExc_IndexError, "offset must be less than %zd",
+ BufferWithSegmentsCollection_length(self));
+ return NULL;
+ }
+
+ for (bufferOffset = 0; bufferOffset < self->bufferCount; bufferOffset++) {
+ Py_ssize_t offset = 0;
+
+ if (i < self->firstElements[bufferOffset]) {
+ if (bufferOffset > 0) {
+ offset = self->firstElements[bufferOffset - 1];
+ }
+
+ return BufferWithSegments_item(self->buffers[bufferOffset], i - offset);
+ }
+ }
+
+ PyErr_SetString(ZstdError, "error resolving segment; this should not happen");
+ return NULL;
+}
+
+static PySequenceMethods BufferWithSegmentsCollection_sq = {
+ (lenfunc)BufferWithSegmentsCollection_length, /* sq_length */
+ 0, /* sq_concat */
+ 0, /* sq_repeat */
+ (ssizeargfunc)BufferWithSegmentsCollection_item, /* sq_item */
+ 0, /* sq_ass_item */
+ 0, /* sq_contains */
+ 0, /* sq_inplace_concat */
+ 0 /* sq_inplace_repeat */
+};
+
+static PyMethodDef BufferWithSegmentsCollection_methods[] = {
+ { "size", (PyCFunction)BufferWithSegmentsCollection_size,
+ METH_NOARGS, PyDoc_STR("total size in bytes of all segments") },
+ { NULL, NULL }
+};
+
+PyTypeObject ZstdBufferWithSegmentsCollectionType = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "zstd.BufferWithSegmentsCollection", /* tp_name */
+ sizeof(ZstdBufferWithSegmentsCollection),/* tp_basicsize */
+ 0, /* tp_itemsize */
+ (destructor)BufferWithSegmentsCollection_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ &BufferWithSegmentsCollection_sq, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT, /* tp_flags */
+ BufferWithSegmentsCollection__doc__, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ /* TODO implement iterator for performance. */
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
+ BufferWithSegmentsCollection_methods, /* tp_methods */
+ 0, /* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ (initproc)BufferWithSegmentsCollection_init, /* tp_init */
+ 0, /* tp_alloc */
+ PyType_GenericNew, /* tp_new */
+};
+
+void bufferutil_module_init(PyObject* mod) {
+ Py_TYPE(&ZstdBufferWithSegmentsType) = &PyType_Type;
+ if (PyType_Ready(&ZstdBufferWithSegmentsType) < 0) {
+ return;
+ }
+
+ Py_INCREF(&ZstdBufferWithSegmentsType);
+ PyModule_AddObject(mod, "BufferWithSegments", (PyObject*)&ZstdBufferWithSegmentsType);
+
+ Py_TYPE(&ZstdBufferSegmentsType) = &PyType_Type;
+ if (PyType_Ready(&ZstdBufferSegmentsType) < 0) {
+ return;
+ }
+
+ Py_INCREF(&ZstdBufferSegmentsType);
+ PyModule_AddObject(mod, "BufferSegments", (PyObject*)&ZstdBufferSegmentsType);
+
+ Py_TYPE(&ZstdBufferSegmentType) = &PyType_Type;
+ if (PyType_Ready(&ZstdBufferSegmentType) < 0) {
+ return;
+ }
+
+ Py_INCREF(&ZstdBufferSegmentType);
+ PyModule_AddObject(mod, "BufferSegment", (PyObject*)&ZstdBufferSegmentType);
+
+ Py_TYPE(&ZstdBufferWithSegmentsCollectionType) = &PyType_Type;
+ if (PyType_Ready(&ZstdBufferWithSegmentsCollectionType) < 0) {
+ return;
+ }
+
+ Py_INCREF(&ZstdBufferWithSegmentsCollectionType);
+ PyModule_AddObject(mod, "BufferWithSegmentsCollection", (PyObject*)&ZstdBufferWithSegmentsCollectionType);
+}
diff --git a/contrib/python/zstandard/py2/c-ext/compressionchunker.c b/contrib/python/zstandard/py2/c-ext/compressionchunker.c
new file mode 100644
index 00000000000..6677ebe59f1
--- /dev/null
+++ b/contrib/python/zstandard/py2/c-ext/compressionchunker.c
@@ -0,0 +1,360 @@
+/**
+* Copyright (c) 2018-present, Gregory Szorc
+* All rights reserved.
+*
+* This software may be modified and distributed under the terms
+* of the BSD license. See the LICENSE file for details.
+*/
+
+#include "python-zstandard.h"
+
+extern PyObject* ZstdError;
+
+PyDoc_STRVAR(ZstdCompressionChunkerIterator__doc__,
+ "Iterator of output chunks from ZstdCompressionChunker.\n"
+);
+
+static void ZstdCompressionChunkerIterator_dealloc(ZstdCompressionChunkerIterator* self) {
+ Py_XDECREF(self->chunker);
+
+ PyObject_Del(self);
+}
+
+static PyObject* ZstdCompressionChunkerIterator_iter(PyObject* self) {
+ Py_INCREF(self);
+ return self;
+}
+
+static PyObject* ZstdCompressionChunkerIterator_iternext(ZstdCompressionChunkerIterator* self) {
+ size_t zresult;
+ PyObject* chunk;
+ ZstdCompressionChunker* chunker = self->chunker;
+ ZSTD_EndDirective zFlushMode;
+
+ if (self->mode != compressionchunker_mode_normal && chunker->input.pos != chunker->input.size) {
+ PyErr_SetString(ZstdError, "input should have been fully consumed before calling flush() or finish()");
+ return NULL;
+ }
+
+ if (chunker->finished) {
+ return NULL;
+ }
+
+ /* If we have data left in the input, consume it. */
+ while (chunker->input.pos < chunker->input.size) {
+ Py_BEGIN_ALLOW_THREADS
+ zresult = ZSTD_compressStream2(chunker->compressor->cctx, &chunker->output,
+ &chunker->input, ZSTD_e_continue);
+ Py_END_ALLOW_THREADS
+
+ /* Input is fully consumed. */
+ if (chunker->input.pos == chunker->input.size) {
+ chunker->input.src = NULL;
+ chunker->input.pos = 0;
+ chunker->input.size = 0;
+ PyBuffer_Release(&chunker->inBuffer);
+ }
+
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
+ return NULL;
+ }
+
+ /* If it produced a full output chunk, emit it. */
+ if (chunker->output.pos == chunker->output.size) {
+ chunk = PyBytes_FromStringAndSize(chunker->output.dst, chunker->output.pos);
+ if (!chunk) {
+ return NULL;
+ }
+
+ chunker->output.pos = 0;
+
+ return chunk;
+ }
+
+ /* Else continue to compress available input data. */
+ }
+
+ /* We also need this here for the special case of an empty input buffer. */
+ if (chunker->input.pos == chunker->input.size) {
+ chunker->input.src = NULL;
+ chunker->input.pos = 0;
+ chunker->input.size = 0;
+ PyBuffer_Release(&chunker->inBuffer);
+ }
+
+ /* No more input data. A partial chunk may be in chunker->output.
+ * If we're in normal compression mode, we're done. Otherwise if we're in
+ * flush or finish mode, we need to emit what data remains.
+ */
+ if (self->mode == compressionchunker_mode_normal) {
+ /* We don't need to set StopIteration. */
+ return NULL;
+ }
+
+ if (self->mode == compressionchunker_mode_flush) {
+ zFlushMode = ZSTD_e_flush;
+ }
+ else if (self->mode == compressionchunker_mode_finish) {
+ zFlushMode = ZSTD_e_end;
+ }
+ else {
+ PyErr_SetString(ZstdError, "unhandled compression mode; this should never happen");
+ return NULL;
+ }
+
+ Py_BEGIN_ALLOW_THREADS
+ zresult = ZSTD_compressStream2(chunker->compressor->cctx, &chunker->output,
+ &chunker->input, zFlushMode);
+ Py_END_ALLOW_THREADS
+
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "zstd compress error: %s",
+ ZSTD_getErrorName(zresult));
+ return NULL;
+ }
+
+ if (!zresult && chunker->output.pos == 0) {
+ return NULL;
+ }
+
+ chunk = PyBytes_FromStringAndSize(chunker->output.dst, chunker->output.pos);
+ if (!chunk) {
+ return NULL;
+ }
+
+ chunker->output.pos = 0;
+
+ if (!zresult && self->mode == compressionchunker_mode_finish) {
+ chunker->finished = 1;
+ }
+
+ return chunk;
+}
+
+PyTypeObject ZstdCompressionChunkerIteratorType = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "zstd.ZstdCompressionChunkerIterator", /* tp_name */
+ sizeof(ZstdCompressionChunkerIterator), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ (destructor)ZstdCompressionChunkerIterator_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
+ ZstdCompressionChunkerIterator__doc__, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ ZstdCompressionChunkerIterator_iter, /* tp_iter */
+ (iternextfunc)ZstdCompressionChunkerIterator_iternext, /* tp_iternext */
+ 0, /* tp_methods */
+ 0, /* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ 0, /* tp_init */
+ 0, /* tp_alloc */
+ PyType_GenericNew, /* tp_new */
+};
+
+PyDoc_STRVAR(ZstdCompressionChunker__doc__,
+ "Compress chunks iteratively into exact chunk sizes.\n"
+);
+
+static void ZstdCompressionChunker_dealloc(ZstdCompressionChunker* self) {
+ PyBuffer_Release(&self->inBuffer);
+ self->input.src = NULL;
+
+ PyMem_Free(self->output.dst);
+ self->output.dst = NULL;
+
+ Py_XDECREF(self->compressor);
+
+ PyObject_Del(self);
+}
+
+static ZstdCompressionChunkerIterator* ZstdCompressionChunker_compress(ZstdCompressionChunker* self, PyObject* args, PyObject* kwargs) {
+ static char* kwlist[] = {
+ "data",
+ NULL
+ };
+
+ ZstdCompressionChunkerIterator* result;
+
+ if (self->finished) {
+ PyErr_SetString(ZstdError, "cannot call compress() after compression finished");
+ return NULL;
+ }
+
+ if (self->inBuffer.obj) {
+ PyErr_SetString(ZstdError,
+ "cannot perform operation before consuming output from previous operation");
+ return NULL;
+ }
+
+#if PY_MAJOR_VERSION >= 3
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:compress",
+#else
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:compress",
+#endif
+ kwlist, &self->inBuffer)) {
+ return NULL;
+ }
+
+ if (!PyBuffer_IsContiguous(&self->inBuffer, 'C') || self->inBuffer.ndim > 1) {
+ PyErr_SetString(PyExc_ValueError,
+ "data buffer should be contiguous and have at most one dimension");
+ PyBuffer_Release(&self->inBuffer);
+ return NULL;
+ }
+
+ result = (ZstdCompressionChunkerIterator*)PyObject_CallObject((PyObject*)&ZstdCompressionChunkerIteratorType, NULL);
+ if (!result) {
+ PyBuffer_Release(&self->inBuffer);
+ return NULL;
+ }
+
+ self->input.src = self->inBuffer.buf;
+ self->input.size = self->inBuffer.len;
+ self->input.pos = 0;
+
+ result->chunker = self;
+ Py_INCREF(result->chunker);
+
+ result->mode = compressionchunker_mode_normal;
+
+ return result;
+}
+
+static ZstdCompressionChunkerIterator* ZstdCompressionChunker_finish(ZstdCompressionChunker* self) {
+ ZstdCompressionChunkerIterator* result;
+
+ if (self->finished) {
+ PyErr_SetString(ZstdError, "cannot call finish() after compression finished");
+ return NULL;
+ }
+
+ if (self->inBuffer.obj) {
+ PyErr_SetString(ZstdError,
+ "cannot call finish() before consuming output from previous operation");
+ return NULL;
+ }
+
+ result = (ZstdCompressionChunkerIterator*)PyObject_CallObject((PyObject*)&ZstdCompressionChunkerIteratorType, NULL);
+ if (!result) {
+ return NULL;
+ }
+
+ result->chunker = self;
+ Py_INCREF(result->chunker);
+
+ result->mode = compressionchunker_mode_finish;
+
+ return result;
+}
+
+static ZstdCompressionChunkerIterator* ZstdCompressionChunker_flush(ZstdCompressionChunker* self, PyObject* args, PyObject* kwargs) {
+ ZstdCompressionChunkerIterator* result;
+
+ if (self->finished) {
+ PyErr_SetString(ZstdError, "cannot call flush() after compression finished");
+ return NULL;
+ }
+
+ if (self->inBuffer.obj) {
+ PyErr_SetString(ZstdError,
+ "cannot call flush() before consuming output from previous operation");
+ return NULL;
+ }
+
+ result = (ZstdCompressionChunkerIterator*)PyObject_CallObject((PyObject*)&ZstdCompressionChunkerIteratorType, NULL);
+ if (!result) {
+ return NULL;
+ }
+
+ result->chunker = self;
+ Py_INCREF(result->chunker);
+
+ result->mode = compressionchunker_mode_flush;
+
+ return result;
+}
+
+static PyMethodDef ZstdCompressionChunker_methods[] = {
+ { "compress", (PyCFunction)ZstdCompressionChunker_compress, METH_VARARGS | METH_KEYWORDS,
+ PyDoc_STR("compress data") },
+ { "finish", (PyCFunction)ZstdCompressionChunker_finish, METH_NOARGS,
+ PyDoc_STR("finish compression operation") },
+ { "flush", (PyCFunction)ZstdCompressionChunker_flush, METH_VARARGS | METH_KEYWORDS,
+ PyDoc_STR("finish compression operation") },
+ { NULL, NULL }
+};
+
+PyTypeObject ZstdCompressionChunkerType = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "zstd.ZstdCompressionChunkerType", /* tp_name */
+ sizeof(ZstdCompressionChunker), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ (destructor)ZstdCompressionChunker_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
+ ZstdCompressionChunker__doc__, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
+ ZstdCompressionChunker_methods, /* tp_methods */
+ 0, /* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ 0, /* tp_init */
+ 0, /* tp_alloc */
+ PyType_GenericNew, /* tp_new */
+};
+
+void compressionchunker_module_init(PyObject* module) {
+ Py_TYPE(&ZstdCompressionChunkerIteratorType) = &PyType_Type;
+ if (PyType_Ready(&ZstdCompressionChunkerIteratorType) < 0) {
+ return;
+ }
+
+ Py_TYPE(&ZstdCompressionChunkerType) = &PyType_Type;
+ if (PyType_Ready(&ZstdCompressionChunkerType) < 0) {
+ return;
+ }
+}
diff --git a/contrib/python/zstandard/py2/c-ext/compressiondict.c b/contrib/python/zstandard/py2/c-ext/compressiondict.c
new file mode 100644
index 00000000000..1379861648e
--- /dev/null
+++ b/contrib/python/zstandard/py2/c-ext/compressiondict.c
@@ -0,0 +1,411 @@
+/**
+* Copyright (c) 2016-present, Gregory Szorc
+* All rights reserved.
+*
+* This software may be modified and distributed under the terms
+* of the BSD license. See the LICENSE file for details.
+*/
+
+#include "python-zstandard.h"
+
+extern PyObject* ZstdError;
+
+ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs) {
+ static char* kwlist[] = {
+ "dict_size",
+ "samples",
+ "k",
+ "d",
+ "notifications",
+ "dict_id",
+ "level",
+ "steps",
+ "threads",
+ NULL
+ };
+
+ size_t capacity;
+ PyObject* samples;
+ unsigned k = 0;
+ unsigned d = 0;
+ unsigned notifications = 0;
+ unsigned dictID = 0;
+ int level = 0;
+ unsigned steps = 0;
+ int threads = 0;
+ ZDICT_cover_params_t params;
+ Py_ssize_t samplesLen;
+ Py_ssize_t i;
+ size_t samplesSize = 0;
+ void* sampleBuffer = NULL;
+ size_t* sampleSizes = NULL;
+ void* sampleOffset;
+ Py_ssize_t sampleSize;
+ void* dict = NULL;
+ size_t zresult;
+ ZstdCompressionDict* result = NULL;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|IIIIiIi:train_dictionary",
+ kwlist, &capacity, &PyList_Type, &samples,
+ &k, &d, &notifications, &dictID, &level, &steps, &threads)) {
+ return NULL;
+ }
+
+ if (threads < 0) {
+ threads = cpu_count();
+ }
+
+ memset(&params, 0, sizeof(params));
+ params.k = k;
+ params.d = d;
+ params.steps = steps;
+ params.nbThreads = threads;
+ params.zParams.notificationLevel = notifications;
+ params.zParams.dictID = dictID;
+ params.zParams.compressionLevel = level;
+
+ /* Figure out total size of input samples. */
+ samplesLen = PyList_Size(samples);
+ for (i = 0; i < samplesLen; i++) {
+ PyObject* sampleItem = PyList_GET_ITEM(samples, i);
+
+ if (!PyBytes_Check(sampleItem)) {
+ PyErr_SetString(PyExc_ValueError, "samples must be bytes");
+ return NULL;
+ }
+ samplesSize += PyBytes_GET_SIZE(sampleItem);
+ }
+
+ sampleBuffer = PyMem_Malloc(samplesSize);
+ if (!sampleBuffer) {
+ PyErr_NoMemory();
+ goto finally;
+ }
+
+ sampleSizes = PyMem_Malloc(samplesLen * sizeof(size_t));
+ if (!sampleSizes) {
+ PyErr_NoMemory();
+ goto finally;
+ }
+
+ sampleOffset = sampleBuffer;
+ for (i = 0; i < samplesLen; i++) {
+ PyObject* sampleItem = PyList_GET_ITEM(samples, i);
+ sampleSize = PyBytes_GET_SIZE(sampleItem);
+ sampleSizes[i] = sampleSize;
+ memcpy(sampleOffset, PyBytes_AS_STRING(sampleItem), sampleSize);
+ sampleOffset = (char*)sampleOffset + sampleSize;
+ }
+
+ dict = PyMem_Malloc(capacity);
+ if (!dict) {
+ PyErr_NoMemory();
+ goto finally;
+ }
+
+ Py_BEGIN_ALLOW_THREADS
+ /* No parameters uses the default function, which will use default params
+ and call ZDICT_optimizeTrainFromBuffer_cover under the hood. */
+ if (!params.k && !params.d && !params.zParams.compressionLevel
+ && !params.zParams.notificationLevel && !params.zParams.dictID) {
+ zresult = ZDICT_trainFromBuffer(dict, capacity, sampleBuffer,
+ sampleSizes, (unsigned)samplesLen);
+ }
+ /* Use optimize mode if user controlled steps or threads explicitly. */
+ else if (params.steps || params.nbThreads) {
+ zresult = ZDICT_optimizeTrainFromBuffer_cover(dict, capacity,
+ sampleBuffer, sampleSizes, (unsigned)samplesLen, &params);
+ }
+ /* Non-optimize mode with explicit control. */
+ else {
+ zresult = ZDICT_trainFromBuffer_cover(dict, capacity,
+ sampleBuffer, sampleSizes, (unsigned)samplesLen, params);
+ }
+ Py_END_ALLOW_THREADS
+
+ if (ZDICT_isError(zresult)) {
+ PyMem_Free(dict);
+ PyErr_Format(ZstdError, "cannot train dict: %s", ZDICT_getErrorName(zresult));
+ goto finally;
+ }
+
+ result = PyObject_New(ZstdCompressionDict, &ZstdCompressionDictType);
+ if (!result) {
+ PyMem_Free(dict);
+ goto finally;
+ }
+
+ result->dictData = dict;
+ result->dictSize = zresult;
+ result->dictType = ZSTD_dct_fullDict;
+ result->d = params.d;
+ result->k = params.k;
+ result->cdict = NULL;
+ result->ddict = NULL;
+
+finally:
+ PyMem_Free(sampleBuffer);
+ PyMem_Free(sampleSizes);
+
+ return result;
+}
+
+int ensure_ddict(ZstdCompressionDict* dict) {
+ if (dict->ddict) {
+ return 0;
+ }
+
+ Py_BEGIN_ALLOW_THREADS
+ dict->ddict = ZSTD_createDDict_advanced(dict->dictData, dict->dictSize,
+ ZSTD_dlm_byRef, dict->dictType, ZSTD_defaultCMem);
+ Py_END_ALLOW_THREADS
+ if (!dict->ddict) {
+ PyErr_SetString(ZstdError, "could not create decompression dict");
+ return 1;
+ }
+
+ return 0;
+}
+
+PyDoc_STRVAR(ZstdCompressionDict__doc__,
+"ZstdCompressionDict(data) - Represents a computed compression dictionary\n"
+"\n"
+"This type holds the results of a computed Zstandard compression dictionary.\n"
+"Instances are obtained by calling ``train_dictionary()`` or by passing\n"
+"bytes obtained from another source into the constructor.\n"
+);
+
+static int ZstdCompressionDict_init(ZstdCompressionDict* self, PyObject* args, PyObject* kwargs) {
+ static char* kwlist[] = {
+ "data",
+ "dict_type",
+ NULL
+ };
+
+ int result = -1;
+ Py_buffer source;
+ unsigned dictType = ZSTD_dct_auto;
+
+ self->dictData = NULL;
+ self->dictSize = 0;
+ self->cdict = NULL;
+ self->ddict = NULL;
+
+#if PY_MAJOR_VERSION >= 3
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|I:ZstdCompressionDict",
+#else
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|I:ZstdCompressionDict",
+#endif
+ kwlist, &source, &dictType)) {
+ return -1;
+ }
+
+ if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
+ PyErr_SetString(PyExc_ValueError,
+ "data buffer should be contiguous and have at most one dimension");
+ goto finally;
+ }
+
+ if (dictType != ZSTD_dct_auto && dictType != ZSTD_dct_rawContent
+ && dictType != ZSTD_dct_fullDict) {
+ PyErr_Format(PyExc_ValueError,
+ "invalid dictionary load mode: %d; must use DICT_TYPE_* constants",
+ dictType);
+ goto finally;
+ }
+
+ self->dictType = dictType;
+
+ self->dictData = PyMem_Malloc(source.len);
+ if (!self->dictData) {
+ PyErr_NoMemory();
+ goto finally;
+ }
+
+ memcpy(self->dictData, source.buf, source.len);
+ self->dictSize = source.len;
+
+ result = 0;
+
+finally:
+ PyBuffer_Release(&source);
+ return result;
+}
+
+static void ZstdCompressionDict_dealloc(ZstdCompressionDict* self) {
+ if (self->cdict) {
+ ZSTD_freeCDict(self->cdict);
+ self->cdict = NULL;
+ }
+
+ if (self->ddict) {
+ ZSTD_freeDDict(self->ddict);
+ self->ddict = NULL;
+ }
+
+ if (self->dictData) {
+ PyMem_Free(self->dictData);
+ self->dictData = NULL;
+ }
+
+ PyObject_Del(self);
+}
+
+PyDoc_STRVAR(ZstdCompressionDict_precompute_compress__doc__,
+"Precompute a dictionary so it can be used by multiple compressors.\n"
+);
+
+static PyObject* ZstdCompressionDict_precompute_compress(ZstdCompressionDict* self, PyObject* args, PyObject* kwargs) {
+ static char* kwlist[] = {
+ "level",
+ "compression_params",
+ NULL
+ };
+
+ int level = 0;
+ ZstdCompressionParametersObject* compressionParams = NULL;
+ ZSTD_compressionParameters cParams;
+ size_t zresult;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!:precompute_compress", kwlist,
+ &level, &ZstdCompressionParametersType, &compressionParams)) {
+ return NULL;
+ }
+
+ if (level && compressionParams) {
+ PyErr_SetString(PyExc_ValueError,
+ "must only specify one of level or compression_params");
+ return NULL;
+ }
+
+ if (!level && !compressionParams) {
+ PyErr_SetString(PyExc_ValueError,
+ "must specify one of level or compression_params");
+ return NULL;
+ }
+
+ if (self->cdict) {
+ zresult = ZSTD_freeCDict(self->cdict);
+ self->cdict = NULL;
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "unable to free CDict: %s",
+ ZSTD_getErrorName(zresult));
+ return NULL;
+ }
+ }
+
+ if (level) {
+ cParams = ZSTD_getCParams(level, 0, self->dictSize);
+ }
+ else {
+ if (to_cparams(compressionParams, &cParams)) {
+ return NULL;
+ }
+ }
+
+ assert(!self->cdict);
+ self->cdict = ZSTD_createCDict_advanced(self->dictData, self->dictSize,
+ ZSTD_dlm_byRef, self->dictType, cParams, ZSTD_defaultCMem);
+
+ if (!self->cdict) {
+ PyErr_SetString(ZstdError, "unable to precompute dictionary");
+ return NULL;
+ }
+
+ Py_RETURN_NONE;
+}
+
+static PyObject* ZstdCompressionDict_dict_id(ZstdCompressionDict* self) {
+ unsigned dictID = ZDICT_getDictID(self->dictData, self->dictSize);
+
+ return PyLong_FromLong(dictID);
+}
+
+static PyObject* ZstdCompressionDict_as_bytes(ZstdCompressionDict* self) {
+ return PyBytes_FromStringAndSize(self->dictData, self->dictSize);
+}
+
+static PyMethodDef ZstdCompressionDict_methods[] = {
+ { "dict_id", (PyCFunction)ZstdCompressionDict_dict_id, METH_NOARGS,
+ PyDoc_STR("dict_id() -- obtain the numeric dictionary ID") },
+ { "as_bytes", (PyCFunction)ZstdCompressionDict_as_bytes, METH_NOARGS,
+ PyDoc_STR("as_bytes() -- obtain the raw bytes constituting the dictionary data") },
+ { "precompute_compress", (PyCFunction)ZstdCompressionDict_precompute_compress,
+ METH_VARARGS | METH_KEYWORDS, ZstdCompressionDict_precompute_compress__doc__ },
+ { NULL, NULL }
+};
+
+static PyMemberDef ZstdCompressionDict_members[] = {
+ { "k", T_UINT, offsetof(ZstdCompressionDict, k), READONLY,
+ "segment size" },
+ { "d", T_UINT, offsetof(ZstdCompressionDict, d), READONLY,
+ "dmer size" },
+ { NULL }
+};
+
+static Py_ssize_t ZstdCompressionDict_length(ZstdCompressionDict* self) {
+ return self->dictSize;
+}
+
+static PySequenceMethods ZstdCompressionDict_sq = {
+ (lenfunc)ZstdCompressionDict_length, /* sq_length */
+ 0, /* sq_concat */
+ 0, /* sq_repeat */
+ 0, /* sq_item */
+ 0, /* sq_ass_item */
+ 0, /* sq_contains */
+ 0, /* sq_inplace_concat */
+ 0 /* sq_inplace_repeat */
+};
+
+PyTypeObject ZstdCompressionDictType = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "zstd.ZstdCompressionDict", /* tp_name */
+ sizeof(ZstdCompressionDict), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ (destructor)ZstdCompressionDict_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ &ZstdCompressionDict_sq, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
+ ZstdCompressionDict__doc__, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
+ ZstdCompressionDict_methods, /* tp_methods */
+ ZstdCompressionDict_members, /* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ (initproc)ZstdCompressionDict_init, /* tp_init */
+ 0, /* tp_alloc */
+ PyType_GenericNew, /* tp_new */
+};
+
+void compressiondict_module_init(PyObject* mod) {
+ Py_TYPE(&ZstdCompressionDictType) = &PyType_Type;
+ if (PyType_Ready(&ZstdCompressionDictType) < 0) {
+ return;
+ }
+
+ Py_INCREF((PyObject*)&ZstdCompressionDictType);
+ PyModule_AddObject(mod, "ZstdCompressionDict",
+ (PyObject*)&ZstdCompressionDictType);
+}
diff --git a/contrib/python/zstandard/py2/c-ext/compressionparams.c b/contrib/python/zstandard/py2/c-ext/compressionparams.c
new file mode 100644
index 00000000000..e5e8c55fea2
--- /dev/null
+++ b/contrib/python/zstandard/py2/c-ext/compressionparams.c
@@ -0,0 +1,572 @@
+/**
+* Copyright (c) 2016-present, Gregory Szorc
+* All rights reserved.
+*
+* This software may be modified and distributed under the terms
+* of the BSD license. See the LICENSE file for details.
+*/
+
+#include "python-zstandard.h"
+
+extern PyObject* ZstdError;
+
+int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value) {
+ size_t zresult = ZSTD_CCtxParams_setParameter(params, param, value);
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "unable to set compression context parameter: %s",
+ ZSTD_getErrorName(zresult));
+ return 1;
+ }
+
+ return 0;
+}
+
+#define TRY_SET_PARAMETER(params, param, value) if (set_parameter(params, param, value)) return -1;
+
+#define TRY_COPY_PARAMETER(source, dest, param) { \
+ int result; \
+ size_t zresult = ZSTD_CCtxParams_getParameter(source, param, &result); \
+ if (ZSTD_isError(zresult)) { \
+ return 1; \
+ } \
+ zresult = ZSTD_CCtxParams_setParameter(dest, param, result); \
+ if (ZSTD_isError(zresult)) { \
+ return 1; \
+ } \
+}
+
+int set_parameters(ZSTD_CCtx_params* params, ZstdCompressionParametersObject* obj) {
+ TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_nbWorkers);
+
+ TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_format);
+ TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_compressionLevel);
+ TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_windowLog);
+ TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_hashLog);
+ TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_chainLog);
+ TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_searchLog);
+ TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_minMatch);
+ TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_targetLength);
+ TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_strategy);
+ TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_contentSizeFlag);
+ TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_checksumFlag);
+ TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_dictIDFlag);
+ TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_jobSize);
+ TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_overlapLog);
+ TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_forceMaxWindow);
+ TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_enableLongDistanceMatching);
+ TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmHashLog);
+ TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmMinMatch);
+ TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmBucketSizeLog);
+ TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmHashRateLog);
+
+ return 0;
+}
+
+int reset_params(ZstdCompressionParametersObject* params) {
+ if (params->params) {
+ ZSTD_CCtxParams_reset(params->params);
+ }
+ else {
+ params->params = ZSTD_createCCtxParams();
+ if (!params->params) {
+ PyErr_NoMemory();
+ return 1;
+ }
+ }
+
+ return set_parameters(params->params, params);
+}
+
+#define TRY_GET_PARAMETER(params, param, value) { \
+ size_t zresult = ZSTD_CCtxParams_getParameter(params, param, value); \
+ if (ZSTD_isError(zresult)) { \
+ PyErr_Format(ZstdError, "unable to retrieve parameter: %s", ZSTD_getErrorName(zresult)); \
+ return 1; \
+ } \
+}
+
+int to_cparams(ZstdCompressionParametersObject* params, ZSTD_compressionParameters* cparams) {
+ int value;
+
+ TRY_GET_PARAMETER(params->params, ZSTD_c_windowLog, &value);
+ cparams->windowLog = value;
+
+ TRY_GET_PARAMETER(params->params, ZSTD_c_chainLog, &value);
+ cparams->chainLog = value;
+
+ TRY_GET_PARAMETER(params->params, ZSTD_c_hashLog, &value);
+ cparams->hashLog = value;
+
+ TRY_GET_PARAMETER(params->params, ZSTD_c_searchLog, &value);
+ cparams->searchLog = value;
+
+ TRY_GET_PARAMETER(params->params, ZSTD_c_minMatch, &value);
+ cparams->minMatch = value;
+
+ TRY_GET_PARAMETER(params->params, ZSTD_c_targetLength, &value);
+ cparams->targetLength = value;
+
+ TRY_GET_PARAMETER(params->params, ZSTD_c_strategy, &value);
+ cparams->strategy = value;
+
+ return 0;
+}
+
+static int ZstdCompressionParameters_init(ZstdCompressionParametersObject* self, PyObject* args, PyObject* kwargs) {
+ static char* kwlist[] = {
+ "format",
+ "compression_level",
+ "window_log",
+ "hash_log",
+ "chain_log",
+ "search_log",
+ "min_match",
+ "target_length",
+ "compression_strategy",
+ "strategy",
+ "write_content_size",
+ "write_checksum",
+ "write_dict_id",
+ "job_size",
+ "overlap_log",
+ "overlap_size_log",
+ "force_max_window",
+ "enable_ldm",
+ "ldm_hash_log",
+ "ldm_min_match",
+ "ldm_bucket_size_log",
+ "ldm_hash_rate_log",
+ "ldm_hash_every_log",
+ "threads",
+ NULL
+ };
+
+ int format = 0;
+ int compressionLevel = 0;
+ int windowLog = 0;
+ int hashLog = 0;
+ int chainLog = 0;
+ int searchLog = 0;
+ int minMatch = 0;
+ int targetLength = 0;
+ int compressionStrategy = -1;
+ int strategy = -1;
+ int contentSizeFlag = 1;
+ int checksumFlag = 0;
+ int dictIDFlag = 0;
+ int jobSize = 0;
+ int overlapLog = -1;
+ int overlapSizeLog = -1;
+ int forceMaxWindow = 0;
+ int enableLDM = 0;
+ int ldmHashLog = 0;
+ int ldmMinMatch = 0;
+ int ldmBucketSizeLog = 0;
+ int ldmHashRateLog = -1;
+ int ldmHashEveryLog = -1;
+ int threads = 0;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs,
+ "|iiiiiiiiiiiiiiiiiiiiiiii:CompressionParameters",
+ kwlist, &format, &compressionLevel, &windowLog, &hashLog, &chainLog,
+ &searchLog, &minMatch, &targetLength, &compressionStrategy, &strategy,
+ &contentSizeFlag, &checksumFlag, &dictIDFlag, &jobSize, &overlapLog,
+ &overlapSizeLog, &forceMaxWindow, &enableLDM, &ldmHashLog, &ldmMinMatch,
+ &ldmBucketSizeLog, &ldmHashRateLog, &ldmHashEveryLog, &threads)) {
+ return -1;
+ }
+
+ if (reset_params(self)) {
+ return -1;
+ }
+
+ if (threads < 0) {
+ threads = cpu_count();
+ }
+
+ /* We need to set ZSTD_c_nbWorkers before ZSTD_c_jobSize and ZSTD_c_overlapLog
+ * because setting ZSTD_c_nbWorkers resets the other parameters. */
+ TRY_SET_PARAMETER(self->params, ZSTD_c_nbWorkers, threads);
+
+ TRY_SET_PARAMETER(self->params, ZSTD_c_format, format);
+ TRY_SET_PARAMETER(self->params, ZSTD_c_compressionLevel, compressionLevel);
+ TRY_SET_PARAMETER(self->params, ZSTD_c_windowLog, windowLog);
+ TRY_SET_PARAMETER(self->params, ZSTD_c_hashLog, hashLog);
+ TRY_SET_PARAMETER(self->params, ZSTD_c_chainLog, chainLog);
+ TRY_SET_PARAMETER(self->params, ZSTD_c_searchLog, searchLog);
+ TRY_SET_PARAMETER(self->params, ZSTD_c_minMatch, minMatch);
+ TRY_SET_PARAMETER(self->params, ZSTD_c_targetLength, targetLength);
+
+ if (compressionStrategy != -1 && strategy != -1) {
+ PyErr_SetString(PyExc_ValueError, "cannot specify both compression_strategy and strategy");
+ return -1;
+ }
+
+ if (compressionStrategy != -1) {
+ strategy = compressionStrategy;
+ }
+ else if (strategy == -1) {
+ strategy = 0;
+ }
+
+ TRY_SET_PARAMETER(self->params, ZSTD_c_strategy, strategy);
+ TRY_SET_PARAMETER(self->params, ZSTD_c_contentSizeFlag, contentSizeFlag);
+ TRY_SET_PARAMETER(self->params, ZSTD_c_checksumFlag, checksumFlag);
+ TRY_SET_PARAMETER(self->params, ZSTD_c_dictIDFlag, dictIDFlag);
+ TRY_SET_PARAMETER(self->params, ZSTD_c_jobSize, jobSize);
+
+ if (overlapLog != -1 && overlapSizeLog != -1) {
+ PyErr_SetString(PyExc_ValueError, "cannot specify both overlap_log and overlap_size_log");
+ return -1;
+ }
+
+ if (overlapSizeLog != -1) {
+ overlapLog = overlapSizeLog;
+ }
+ else if (overlapLog == -1) {
+ overlapLog = 0;
+ }
+
+ TRY_SET_PARAMETER(self->params, ZSTD_c_overlapLog, overlapLog);
+ TRY_SET_PARAMETER(self->params, ZSTD_c_forceMaxWindow, forceMaxWindow);
+ TRY_SET_PARAMETER(self->params, ZSTD_c_enableLongDistanceMatching, enableLDM);
+ TRY_SET_PARAMETER(self->params, ZSTD_c_ldmHashLog, ldmHashLog);
+ TRY_SET_PARAMETER(self->params, ZSTD_c_ldmMinMatch, ldmMinMatch);
+ TRY_SET_PARAMETER(self->params, ZSTD_c_ldmBucketSizeLog, ldmBucketSizeLog);
+
+ if (ldmHashRateLog != -1 && ldmHashEveryLog != -1) {
+ PyErr_SetString(PyExc_ValueError, "cannot specify both ldm_hash_rate_log and ldm_hash_everyLog");
+ return -1;
+ }
+
+ if (ldmHashEveryLog != -1) {
+ ldmHashRateLog = ldmHashEveryLog;
+ }
+ else if (ldmHashRateLog == -1) {
+ ldmHashRateLog = 0;
+ }
+
+ TRY_SET_PARAMETER(self->params, ZSTD_c_ldmHashRateLog, ldmHashRateLog);
+
+ return 0;
+}
+
+PyDoc_STRVAR(ZstdCompressionParameters_from_level__doc__,
+"Create a CompressionParameters from a compression level and target sizes\n"
+);
+
+ZstdCompressionParametersObject* CompressionParameters_from_level(PyObject* undef, PyObject* args, PyObject* kwargs) {
+ int managedKwargs = 0;
+ int level;
+ PyObject* sourceSize = NULL;
+ PyObject* dictSize = NULL;
+ unsigned PY_LONG_LONG iSourceSize = 0;
+ Py_ssize_t iDictSize = 0;
+ PyObject* val;
+ ZSTD_compressionParameters params;
+ ZstdCompressionParametersObject* result = NULL;
+ int res;
+
+ if (!PyArg_ParseTuple(args, "i:from_level",
+ &level)) {
+ return NULL;
+ }
+
+ if (!kwargs) {
+ kwargs = PyDict_New();
+ if (!kwargs) {
+ return NULL;
+ }
+ managedKwargs = 1;
+ }
+
+ sourceSize = PyDict_GetItemString(kwargs, "source_size");
+ if (sourceSize) {
+#if PY_MAJOR_VERSION >= 3
+ iSourceSize = PyLong_AsUnsignedLongLong(sourceSize);
+ if (iSourceSize == (unsigned PY_LONG_LONG)(-1)) {
+ goto cleanup;
+ }
+#else
+ iSourceSize = PyInt_AsUnsignedLongLongMask(sourceSize);
+#endif
+
+ PyDict_DelItemString(kwargs, "source_size");
+ }
+
+ dictSize = PyDict_GetItemString(kwargs, "dict_size");
+ if (dictSize) {
+#if PY_MAJOR_VERSION >= 3
+ iDictSize = PyLong_AsSsize_t(dictSize);
+#else
+ iDictSize = PyInt_AsSsize_t(dictSize);
+#endif
+ if (iDictSize == -1) {
+ goto cleanup;
+ }
+
+ PyDict_DelItemString(kwargs, "dict_size");
+ }
+
+
+ params = ZSTD_getCParams(level, iSourceSize, iDictSize);
+
+ /* Values derived from the input level and sizes are passed along to the
+ constructor. But only if a value doesn't already exist. */
+ val = PyDict_GetItemString(kwargs, "window_log");
+ if (!val) {
+ val = PyLong_FromUnsignedLong(params.windowLog);
+ if (!val) {
+ goto cleanup;
+ }
+ PyDict_SetItemString(kwargs, "window_log", val);
+ Py_DECREF(val);
+ }
+
+ val = PyDict_GetItemString(kwargs, "chain_log");
+ if (!val) {
+ val = PyLong_FromUnsignedLong(params.chainLog);
+ if (!val) {
+ goto cleanup;
+ }
+ PyDict_SetItemString(kwargs, "chain_log", val);
+ Py_DECREF(val);
+ }
+
+ val = PyDict_GetItemString(kwargs, "hash_log");
+ if (!val) {
+ val = PyLong_FromUnsignedLong(params.hashLog);
+ if (!val) {
+ goto cleanup;
+ }
+ PyDict_SetItemString(kwargs, "hash_log", val);
+ Py_DECREF(val);
+ }
+
+ val = PyDict_GetItemString(kwargs, "search_log");
+ if (!val) {
+ val = PyLong_FromUnsignedLong(params.searchLog);
+ if (!val) {
+ goto cleanup;
+ }
+ PyDict_SetItemString(kwargs, "search_log", val);
+ Py_DECREF(val);
+ }
+
+ val = PyDict_GetItemString(kwargs, "min_match");
+ if (!val) {
+ val = PyLong_FromUnsignedLong(params.minMatch);
+ if (!val) {
+ goto cleanup;
+ }
+ PyDict_SetItemString(kwargs, "min_match", val);
+ Py_DECREF(val);
+ }
+
+ val = PyDict_GetItemString(kwargs, "target_length");
+ if (!val) {
+ val = PyLong_FromUnsignedLong(params.targetLength);
+ if (!val) {
+ goto cleanup;
+ }
+ PyDict_SetItemString(kwargs, "target_length", val);
+ Py_DECREF(val);
+ }
+
+ val = PyDict_GetItemString(kwargs, "compression_strategy");
+ if (!val) {
+ val = PyLong_FromUnsignedLong(params.strategy);
+ if (!val) {
+ goto cleanup;
+ }
+ PyDict_SetItemString(kwargs, "compression_strategy", val);
+ Py_DECREF(val);
+ }
+
+ result = PyObject_New(ZstdCompressionParametersObject, &ZstdCompressionParametersType);
+ if (!result) {
+ goto cleanup;
+ }
+
+ result->params = NULL;
+
+ val = PyTuple_New(0);
+ if (!val) {
+ Py_CLEAR(result);
+ goto cleanup;
+ }
+
+ res = ZstdCompressionParameters_init(result, val, kwargs);
+ Py_DECREF(val);
+
+ if (res) {
+ Py_CLEAR(result);
+ goto cleanup;
+ }
+
+cleanup:
+ if (managedKwargs) {
+ Py_DECREF(kwargs);
+ }
+
+ return result;
+}
+
+PyDoc_STRVAR(ZstdCompressionParameters_estimated_compression_context_size__doc__,
+"Estimate the size in bytes of a compression context for compression parameters\n"
+);
+
+PyObject* ZstdCompressionParameters_estimated_compression_context_size(ZstdCompressionParametersObject* self) {
+ return PyLong_FromSize_t(ZSTD_estimateCCtxSize_usingCCtxParams(self->params));
+}
+
+PyDoc_STRVAR(ZstdCompressionParameters__doc__,
+"ZstdCompressionParameters: low-level control over zstd compression");
+
+static void ZstdCompressionParameters_dealloc(ZstdCompressionParametersObject* self) {
+ if (self->params) {
+ ZSTD_freeCCtxParams(self->params);
+ self->params = NULL;
+ }
+
+ PyObject_Del(self);
+}
+
+#define PARAM_GETTER(name, param) PyObject* ZstdCompressionParameters_get_##name(PyObject* self, void* unused) { \
+ int result; \
+ size_t zresult; \
+ ZstdCompressionParametersObject* p = (ZstdCompressionParametersObject*)(self); \
+ zresult = ZSTD_CCtxParams_getParameter(p->params, param, &result); \
+ if (ZSTD_isError(zresult)) { \
+ PyErr_Format(ZstdError, "unable to get compression parameter: %s", \
+ ZSTD_getErrorName(zresult)); \
+ return NULL; \
+ } \
+ return PyLong_FromLong(result); \
+}
+
+PARAM_GETTER(format, ZSTD_c_format)
+PARAM_GETTER(compression_level, ZSTD_c_compressionLevel)
+PARAM_GETTER(window_log, ZSTD_c_windowLog)
+PARAM_GETTER(hash_log, ZSTD_c_hashLog)
+PARAM_GETTER(chain_log, ZSTD_c_chainLog)
+PARAM_GETTER(search_log, ZSTD_c_searchLog)
+PARAM_GETTER(min_match, ZSTD_c_minMatch)
+PARAM_GETTER(target_length, ZSTD_c_targetLength)
+PARAM_GETTER(compression_strategy, ZSTD_c_strategy)
+PARAM_GETTER(write_content_size, ZSTD_c_contentSizeFlag)
+PARAM_GETTER(write_checksum, ZSTD_c_checksumFlag)
+PARAM_GETTER(write_dict_id, ZSTD_c_dictIDFlag)
+PARAM_GETTER(job_size, ZSTD_c_jobSize)
+PARAM_GETTER(overlap_log, ZSTD_c_overlapLog)
+PARAM_GETTER(force_max_window, ZSTD_c_forceMaxWindow)
+PARAM_GETTER(enable_ldm, ZSTD_c_enableLongDistanceMatching)
+PARAM_GETTER(ldm_hash_log, ZSTD_c_ldmHashLog)
+PARAM_GETTER(ldm_min_match, ZSTD_c_ldmMinMatch)
+PARAM_GETTER(ldm_bucket_size_log, ZSTD_c_ldmBucketSizeLog)
+PARAM_GETTER(ldm_hash_rate_log, ZSTD_c_ldmHashRateLog)
+PARAM_GETTER(threads, ZSTD_c_nbWorkers)
+
+static PyMethodDef ZstdCompressionParameters_methods[] = {
+ {
+ "from_level",
+ (PyCFunction)CompressionParameters_from_level,
+ METH_VARARGS | METH_KEYWORDS | METH_STATIC,
+ ZstdCompressionParameters_from_level__doc__
+ },
+ {
+ "estimated_compression_context_size",
+ (PyCFunction)ZstdCompressionParameters_estimated_compression_context_size,
+ METH_NOARGS,
+ ZstdCompressionParameters_estimated_compression_context_size__doc__
+ },
+ { NULL, NULL }
+};
+
+#define GET_SET_ENTRY(name) { #name, ZstdCompressionParameters_get_##name, NULL, NULL, NULL }
+
+static PyGetSetDef ZstdCompressionParameters_getset[] = {
+ GET_SET_ENTRY(format),
+ GET_SET_ENTRY(compression_level),
+ GET_SET_ENTRY(window_log),
+ GET_SET_ENTRY(hash_log),
+ GET_SET_ENTRY(chain_log),
+ GET_SET_ENTRY(search_log),
+ GET_SET_ENTRY(min_match),
+ GET_SET_ENTRY(target_length),
+ GET_SET_ENTRY(compression_strategy),
+ GET_SET_ENTRY(write_content_size),
+ GET_SET_ENTRY(write_checksum),
+ GET_SET_ENTRY(write_dict_id),
+ GET_SET_ENTRY(threads),
+ GET_SET_ENTRY(job_size),
+ GET_SET_ENTRY(overlap_log),
+ /* TODO remove this deprecated attribute */
+ { "overlap_size_log", ZstdCompressionParameters_get_overlap_log, NULL, NULL, NULL },
+ GET_SET_ENTRY(force_max_window),
+ GET_SET_ENTRY(enable_ldm),
+ GET_SET_ENTRY(ldm_hash_log),
+ GET_SET_ENTRY(ldm_min_match),
+ GET_SET_ENTRY(ldm_bucket_size_log),
+ GET_SET_ENTRY(ldm_hash_rate_log),
+ /* TODO remove this deprecated attribute */
+ { "ldm_hash_every_log", ZstdCompressionParameters_get_ldm_hash_rate_log, NULL, NULL, NULL },
+ { NULL }
+};
+
+PyTypeObject ZstdCompressionParametersType = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "ZstdCompressionParameters", /* tp_name */
+ sizeof(ZstdCompressionParametersObject), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ (destructor)ZstdCompressionParameters_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
+ ZstdCompressionParameters__doc__, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
+ ZstdCompressionParameters_methods, /* tp_methods */
+ 0, /* tp_members */
+ ZstdCompressionParameters_getset, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ (initproc)ZstdCompressionParameters_init, /* tp_init */
+ 0, /* tp_alloc */
+ PyType_GenericNew, /* tp_new */
+};
+
+void compressionparams_module_init(PyObject* mod) {
+ Py_TYPE(&ZstdCompressionParametersType) = &PyType_Type;
+ if (PyType_Ready(&ZstdCompressionParametersType) < 0) {
+ return;
+ }
+
+ Py_INCREF(&ZstdCompressionParametersType);
+ PyModule_AddObject(mod, "ZstdCompressionParameters",
+ (PyObject*)&ZstdCompressionParametersType);
+
+ /* TODO remove deprecated alias. */
+ Py_INCREF(&ZstdCompressionParametersType);
+ PyModule_AddObject(mod, "CompressionParameters",
+ (PyObject*)&ZstdCompressionParametersType);
+}
diff --git a/contrib/python/zstandard/py2/c-ext/compressionreader.c b/contrib/python/zstandard/py2/c-ext/compressionreader.c
new file mode 100644
index 00000000000..47bd3d77053
--- /dev/null
+++ b/contrib/python/zstandard/py2/c-ext/compressionreader.c
@@ -0,0 +1,818 @@
+/**
+* Copyright (c) 2017-present, Gregory Szorc
+* All rights reserved.
+*
+* This software may be modified and distributed under the terms
+* of the BSD license. See the LICENSE file for details.
+*/
+
+#include "python-zstandard.h"
+
+extern PyObject* ZstdError;
+
+static void set_unsupported_operation(void) {
+ PyObject* iomod;
+ PyObject* exc;
+
+ iomod = PyImport_ImportModule("io");
+ if (NULL == iomod) {
+ return;
+ }
+
+ exc = PyObject_GetAttrString(iomod, "UnsupportedOperation");
+ if (NULL == exc) {
+ Py_DECREF(iomod);
+ return;
+ }
+
+ PyErr_SetNone(exc);
+ Py_DECREF(exc);
+ Py_DECREF(iomod);
+}
+
+static void reader_dealloc(ZstdCompressionReader* self) {
+ Py_XDECREF(self->compressor);
+ Py_XDECREF(self->reader);
+
+ if (self->buffer.buf) {
+ PyBuffer_Release(&self->buffer);
+ memset(&self->buffer, 0, sizeof(self->buffer));
+ }
+
+ PyObject_Del(self);
+}
+
+static ZstdCompressionReader* reader_enter(ZstdCompressionReader* self) {
+ if (self->entered) {
+ PyErr_SetString(PyExc_ValueError, "cannot __enter__ multiple times");
+ return NULL;
+ }
+
+ self->entered = 1;
+
+ Py_INCREF(self);
+ return self;
+}
+
+static PyObject* reader_exit(ZstdCompressionReader* self, PyObject* args) {
+ PyObject* exc_type;
+ PyObject* exc_value;
+ PyObject* exc_tb;
+
+ if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) {
+ return NULL;
+ }
+
+ self->entered = 0;
+ self->closed = 1;
+
+ /* Release resources associated with source. */
+ Py_CLEAR(self->reader);
+ if (self->buffer.buf) {
+ PyBuffer_Release(&self->buffer);
+ memset(&self->buffer, 0, sizeof(self->buffer));
+ }
+
+ Py_CLEAR(self->compressor);
+
+ Py_RETURN_FALSE;
+}
+
+static PyObject* reader_readable(ZstdCompressionReader* self) {
+ Py_RETURN_TRUE;
+}
+
+static PyObject* reader_writable(ZstdCompressionReader* self) {
+ Py_RETURN_FALSE;
+}
+
+static PyObject* reader_seekable(ZstdCompressionReader* self) {
+ Py_RETURN_FALSE;
+}
+
+static PyObject* reader_readline(PyObject* self, PyObject* args) {
+ set_unsupported_operation();
+ return NULL;
+}
+
+static PyObject* reader_readlines(PyObject* self, PyObject* args) {
+ set_unsupported_operation();
+ return NULL;
+}
+
+static PyObject* reader_write(PyObject* self, PyObject* args) {
+ PyErr_SetString(PyExc_OSError, "stream is not writable");
+ return NULL;
+}
+
+static PyObject* reader_writelines(PyObject* self, PyObject* args) {
+ PyErr_SetString(PyExc_OSError, "stream is not writable");
+ return NULL;
+}
+
+static PyObject* reader_isatty(PyObject* self) {
+ Py_RETURN_FALSE;
+}
+
+static PyObject* reader_flush(PyObject* self) {
+ Py_RETURN_NONE;
+}
+
+static PyObject* reader_close(ZstdCompressionReader* self) {
+ self->closed = 1;
+ Py_RETURN_NONE;
+}
+
+static PyObject* reader_tell(ZstdCompressionReader* self) {
+ /* TODO should this raise OSError since stream isn't seekable? */
+ return PyLong_FromUnsignedLongLong(self->bytesCompressed);
+}
+
+int read_compressor_input(ZstdCompressionReader* self) {
+ if (self->finishedInput) {
+ return 0;
+ }
+
+ if (self->input.pos != self->input.size) {
+ return 0;
+ }
+
+ if (self->reader) {
+ Py_buffer buffer;
+
+ assert(self->readResult == NULL);
+
+ self->readResult = PyObject_CallMethod(self->reader, "read",
+ "k", self->readSize);
+
+ if (NULL == self->readResult) {
+ return -1;
+ }
+
+ memset(&buffer, 0, sizeof(buffer));
+
+ if (0 != PyObject_GetBuffer(self->readResult, &buffer, PyBUF_CONTIG_RO)) {
+ return -1;
+ }
+
+ /* EOF */
+ if (0 == buffer.len) {
+ self->finishedInput = 1;
+ Py_CLEAR(self->readResult);
+ }
+ else {
+ self->input.src = buffer.buf;
+ self->input.size = buffer.len;
+ self->input.pos = 0;
+ }
+
+ PyBuffer_Release(&buffer);
+ }
+ else {
+ assert(self->buffer.buf);
+
+ self->input.src = self->buffer.buf;
+ self->input.size = self->buffer.len;
+ self->input.pos = 0;
+ }
+
+ return 1;
+}
+
+int compress_input(ZstdCompressionReader* self, ZSTD_outBuffer* output) {
+ size_t oldPos;
+ size_t zresult;
+
+ /* If we have data left over, consume it. */
+ if (self->input.pos < self->input.size) {
+ oldPos = output->pos;
+
+ Py_BEGIN_ALLOW_THREADS
+ zresult = ZSTD_compressStream2(self->compressor->cctx,
+ output, &self->input, ZSTD_e_continue);
+ Py_END_ALLOW_THREADS
+
+ self->bytesCompressed += output->pos - oldPos;
+
+ /* Input exhausted. Clear out state tracking. */
+ if (self->input.pos == self->input.size) {
+ memset(&self->input, 0, sizeof(self->input));
+ Py_CLEAR(self->readResult);
+
+ if (self->buffer.buf) {
+ self->finishedInput = 1;
+ }
+ }
+
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
+ return -1;
+ }
+ }
+
+ if (output->pos && output->pos == output->size) {
+ return 1;
+ }
+ else {
+ return 0;
+ }
+}
+
+static PyObject* reader_read(ZstdCompressionReader* self, PyObject* args, PyObject* kwargs) {
+ static char* kwlist[] = {
+ "size",
+ NULL
+ };
+
+ Py_ssize_t size = -1;
+ PyObject* result = NULL;
+ char* resultBuffer;
+ Py_ssize_t resultSize;
+ size_t zresult;
+ size_t oldPos;
+ int readResult, compressResult;
+
+ if (self->closed) {
+ PyErr_SetString(PyExc_ValueError, "stream is closed");
+ return NULL;
+ }
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n", kwlist, &size)) {
+ return NULL;
+ }
+
+ if (size < -1) {
+ PyErr_SetString(PyExc_ValueError, "cannot read negative amounts less than -1");
+ return NULL;
+ }
+
+ if (size == -1) {
+ return PyObject_CallMethod((PyObject*)self, "readall", NULL);
+ }
+
+ if (self->finishedOutput || size == 0) {
+ return PyBytes_FromStringAndSize("", 0);
+ }
+
+ result = PyBytes_FromStringAndSize(NULL, size);
+ if (NULL == result) {
+ return NULL;
+ }
+
+ PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
+
+ self->output.dst = resultBuffer;
+ self->output.size = resultSize;
+ self->output.pos = 0;
+
+readinput:
+
+ compressResult = compress_input(self, &self->output);
+
+ if (-1 == compressResult) {
+ Py_XDECREF(result);
+ return NULL;
+ }
+ else if (0 == compressResult) {
+ /* There is room in the output. We fall through to below, which will
+ * either get more input for us or will attempt to end the stream.
+ */
+ }
+ else if (1 == compressResult) {
+ memset(&self->output, 0, sizeof(self->output));
+ return result;
+ }
+ else {
+ assert(0);
+ }
+
+ readResult = read_compressor_input(self);
+
+ if (-1 == readResult) {
+ return NULL;
+ }
+ else if (0 == readResult) { }
+ else if (1 == readResult) { }
+ else {
+ assert(0);
+ }
+
+ if (self->input.size) {
+ goto readinput;
+ }
+
+ /* Else EOF */
+ oldPos = self->output.pos;
+
+ zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
+ &self->input, ZSTD_e_end);
+
+ self->bytesCompressed += self->output.pos - oldPos;
+
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "error ending compression stream: %s",
+ ZSTD_getErrorName(zresult));
+ Py_XDECREF(result);
+ return NULL;
+ }
+
+ assert(self->output.pos);
+
+ if (0 == zresult) {
+ self->finishedOutput = 1;
+ }
+
+ if (safe_pybytes_resize(&result, self->output.pos)) {
+ Py_XDECREF(result);
+ return NULL;
+ }
+
+ memset(&self->output, 0, sizeof(self->output));
+
+ return result;
+}
+
+static PyObject* reader_read1(ZstdCompressionReader* self, PyObject* args, PyObject* kwargs) {
+ static char* kwlist[] = {
+ "size",
+ NULL
+ };
+
+ Py_ssize_t size = -1;
+ PyObject* result = NULL;
+ char* resultBuffer;
+ Py_ssize_t resultSize;
+ ZSTD_outBuffer output;
+ int compressResult;
+ size_t oldPos;
+ size_t zresult;
+
+ if (self->closed) {
+ PyErr_SetString(PyExc_ValueError, "stream is closed");
+ return NULL;
+ }
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n:read1", kwlist, &size)) {
+ return NULL;
+ }
+
+ if (size < -1) {
+ PyErr_SetString(PyExc_ValueError, "cannot read negative amounts less than -1");
+ return NULL;
+ }
+
+ if (self->finishedOutput || size == 0) {
+ return PyBytes_FromStringAndSize("", 0);
+ }
+
+ if (size == -1) {
+ size = ZSTD_CStreamOutSize();
+ }
+
+ result = PyBytes_FromStringAndSize(NULL, size);
+ if (NULL == result) {
+ return NULL;
+ }
+
+ PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
+
+ output.dst = resultBuffer;
+ output.size = resultSize;
+ output.pos = 0;
+
+ /* read1() is supposed to use at most 1 read() from the underlying stream.
+ However, we can't satisfy this requirement with compression because
+ not every input will generate output. We /could/ flush the compressor,
+ but this may not be desirable. We allow multiple read() from the
+ underlying stream. But unlike read(), we return as soon as output data
+ is available.
+ */
+
+ compressResult = compress_input(self, &output);
+
+ if (-1 == compressResult) {
+ Py_XDECREF(result);
+ return NULL;
+ }
+ else if (0 == compressResult || 1 == compressResult) { }
+ else {
+ assert(0);
+ }
+
+ if (output.pos) {
+ goto finally;
+ }
+
+ while (!self->finishedInput) {
+ int readResult = read_compressor_input(self);
+
+ if (-1 == readResult) {
+ Py_XDECREF(result);
+ return NULL;
+ }
+ else if (0 == readResult || 1 == readResult) { }
+ else {
+ assert(0);
+ }
+
+ compressResult = compress_input(self, &output);
+
+ if (-1 == compressResult) {
+ Py_XDECREF(result);
+ return NULL;
+ }
+ else if (0 == compressResult || 1 == compressResult) { }
+ else {
+ assert(0);
+ }
+
+ if (output.pos) {
+ goto finally;
+ }
+ }
+
+ /* EOF */
+ oldPos = output.pos;
+
+ zresult = ZSTD_compressStream2(self->compressor->cctx, &output, &self->input,
+ ZSTD_e_end);
+
+ self->bytesCompressed += output.pos - oldPos;
+
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "error ending compression stream: %s",
+ ZSTD_getErrorName(zresult));
+ Py_XDECREF(result);
+ return NULL;
+ }
+
+ if (zresult == 0) {
+ self->finishedOutput = 1;
+ }
+
+finally:
+ if (result) {
+ if (safe_pybytes_resize(&result, output.pos)) {
+ Py_XDECREF(result);
+ return NULL;
+ }
+ }
+
+ return result;
+}
+
+static PyObject* reader_readall(PyObject* self) {
+ PyObject* chunks = NULL;
+ PyObject* empty = NULL;
+ PyObject* result = NULL;
+
+ /* Our strategy is to collect chunks into a list then join all the
+ * chunks at the end. We could potentially use e.g. an io.BytesIO. But
+ * this feels simple enough to implement and avoids potentially expensive
+ * reallocations of large buffers.
+ */
+ chunks = PyList_New(0);
+ if (NULL == chunks) {
+ return NULL;
+ }
+
+ while (1) {
+ PyObject* chunk = PyObject_CallMethod(self, "read", "i", 1048576);
+ if (NULL == chunk) {
+ Py_DECREF(chunks);
+ return NULL;
+ }
+
+ if (!PyBytes_Size(chunk)) {
+ Py_DECREF(chunk);
+ break;
+ }
+
+ if (PyList_Append(chunks, chunk)) {
+ Py_DECREF(chunk);
+ Py_DECREF(chunks);
+ return NULL;
+ }
+
+ Py_DECREF(chunk);
+ }
+
+ empty = PyBytes_FromStringAndSize("", 0);
+ if (NULL == empty) {
+ Py_DECREF(chunks);
+ return NULL;
+ }
+
+ result = PyObject_CallMethod(empty, "join", "O", chunks);
+
+ Py_DECREF(empty);
+ Py_DECREF(chunks);
+
+ return result;
+}
+
+static PyObject* reader_readinto(ZstdCompressionReader* self, PyObject* args) {
+ Py_buffer dest;
+ ZSTD_outBuffer output;
+ int readResult, compressResult;
+ PyObject* result = NULL;
+ size_t zresult;
+ size_t oldPos;
+
+ if (self->closed) {
+ PyErr_SetString(PyExc_ValueError, "stream is closed");
+ return NULL;
+ }
+
+ if (self->finishedOutput) {
+ return PyLong_FromLong(0);
+ }
+
+ if (!PyArg_ParseTuple(args, "w*:readinto", &dest)) {
+ return NULL;
+ }
+
+ if (!PyBuffer_IsContiguous(&dest, 'C') || dest.ndim > 1) {
+ PyErr_SetString(PyExc_ValueError,
+ "destination buffer should be contiguous and have at most one dimension");
+ goto finally;
+ }
+
+ output.dst = dest.buf;
+ output.size = dest.len;
+ output.pos = 0;
+
+ compressResult = compress_input(self, &output);
+
+ if (-1 == compressResult) {
+ goto finally;
+ }
+ else if (0 == compressResult) { }
+ else if (1 == compressResult) {
+ result = PyLong_FromSize_t(output.pos);
+ goto finally;
+ }
+ else {
+ assert(0);
+ }
+
+ while (!self->finishedInput) {
+ readResult = read_compressor_input(self);
+
+ if (-1 == readResult) {
+ goto finally;
+ }
+ else if (0 == readResult || 1 == readResult) {}
+ else {
+ assert(0);
+ }
+
+ compressResult = compress_input(self, &output);
+
+ if (-1 == compressResult) {
+ goto finally;
+ }
+ else if (0 == compressResult) { }
+ else if (1 == compressResult) {
+ result = PyLong_FromSize_t(output.pos);
+ goto finally;
+ }
+ else {
+ assert(0);
+ }
+ }
+
+ /* EOF */
+ oldPos = output.pos;
+
+ zresult = ZSTD_compressStream2(self->compressor->cctx, &output, &self->input,
+ ZSTD_e_end);
+
+ self->bytesCompressed += self->output.pos - oldPos;
+
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "error ending compression stream: %s",
+ ZSTD_getErrorName(zresult));
+ goto finally;
+ }
+
+ assert(output.pos);
+
+ if (0 == zresult) {
+ self->finishedOutput = 1;
+ }
+
+ result = PyLong_FromSize_t(output.pos);
+
+finally:
+ PyBuffer_Release(&dest);
+
+ return result;
+}
+
+static PyObject* reader_readinto1(ZstdCompressionReader* self, PyObject* args) {
+ Py_buffer dest;
+ PyObject* result = NULL;
+ ZSTD_outBuffer output;
+ int compressResult;
+ size_t oldPos;
+ size_t zresult;
+
+ if (self->closed) {
+ PyErr_SetString(PyExc_ValueError, "stream is closed");
+ return NULL;
+ }
+
+ if (self->finishedOutput) {
+ return PyLong_FromLong(0);
+ }
+
+ if (!PyArg_ParseTuple(args, "w*:readinto1", &dest)) {
+ return NULL;
+ }
+
+ if (!PyBuffer_IsContiguous(&dest, 'C') || dest.ndim > 1) {
+ PyErr_SetString(PyExc_ValueError,
+ "destination buffer should be contiguous and have at most one dimension");
+ goto finally;
+ }
+
+ output.dst = dest.buf;
+ output.size = dest.len;
+ output.pos = 0;
+
+ compressResult = compress_input(self, &output);
+
+ if (-1 == compressResult) {
+ goto finally;
+ }
+ else if (0 == compressResult || 1 == compressResult) { }
+ else {
+ assert(0);
+ }
+
+ if (output.pos) {
+ result = PyLong_FromSize_t(output.pos);
+ goto finally;
+ }
+
+ while (!self->finishedInput) {
+ int readResult = read_compressor_input(self);
+
+ if (-1 == readResult) {
+ goto finally;
+ }
+ else if (0 == readResult || 1 == readResult) { }
+ else {
+ assert(0);
+ }
+
+ compressResult = compress_input(self, &output);
+
+ if (-1 == compressResult) {
+ goto finally;
+ }
+ else if (0 == compressResult) { }
+ else if (1 == compressResult) {
+ result = PyLong_FromSize_t(output.pos);
+ goto finally;
+ }
+ else {
+ assert(0);
+ }
+
+ /* If we produced output and we're not done with input, emit
+ * that output now, as we've hit restrictions of read1().
+ */
+ if (output.pos && !self->finishedInput) {
+ result = PyLong_FromSize_t(output.pos);
+ goto finally;
+ }
+
+ /* Otherwise we either have no output or we've exhausted the
+ * input. Either we try to get more input or we fall through
+ * to EOF below */
+ }
+
+ /* EOF */
+ oldPos = output.pos;
+
+ zresult = ZSTD_compressStream2(self->compressor->cctx, &output, &self->input,
+ ZSTD_e_end);
+
+ self->bytesCompressed += self->output.pos - oldPos;
+
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "error ending compression stream: %s",
+ ZSTD_getErrorName(zresult));
+ goto finally;
+ }
+
+ assert(output.pos);
+
+ if (0 == zresult) {
+ self->finishedOutput = 1;
+ }
+
+ result = PyLong_FromSize_t(output.pos);
+
+finally:
+ PyBuffer_Release(&dest);
+
+ return result;
+}
+
+static PyObject* reader_iter(PyObject* self) {
+ set_unsupported_operation();
+ return NULL;
+}
+
+static PyObject* reader_iternext(PyObject* self) {
+ set_unsupported_operation();
+ return NULL;
+}
+
+static PyMethodDef reader_methods[] = {
+ { "__enter__", (PyCFunction)reader_enter, METH_NOARGS,
+ PyDoc_STR("Enter a compression context") },
+ { "__exit__", (PyCFunction)reader_exit, METH_VARARGS,
+ PyDoc_STR("Exit a compression context") },
+ { "close", (PyCFunction)reader_close, METH_NOARGS,
+ PyDoc_STR("Close the stream so it cannot perform any more operations") },
+ { "flush", (PyCFunction)reader_flush, METH_NOARGS, PyDoc_STR("no-ops") },
+ { "isatty", (PyCFunction)reader_isatty, METH_NOARGS, PyDoc_STR("Returns False") },
+ { "readable", (PyCFunction)reader_readable, METH_NOARGS,
+ PyDoc_STR("Returns True") },
+ { "read", (PyCFunction)reader_read, METH_VARARGS | METH_KEYWORDS, PyDoc_STR("read compressed data") },
+ { "read1", (PyCFunction)reader_read1, METH_VARARGS | METH_KEYWORDS, NULL },
+ { "readall", (PyCFunction)reader_readall, METH_NOARGS, PyDoc_STR("Not implemented") },
+ { "readinto", (PyCFunction)reader_readinto, METH_VARARGS, NULL },
+ { "readinto1", (PyCFunction)reader_readinto1, METH_VARARGS, NULL },
+ { "readline", (PyCFunction)reader_readline, METH_VARARGS, PyDoc_STR("Not implemented") },
+ { "readlines", (PyCFunction)reader_readlines, METH_VARARGS, PyDoc_STR("Not implemented") },
+ { "seekable", (PyCFunction)reader_seekable, METH_NOARGS,
+ PyDoc_STR("Returns False") },
+ { "tell", (PyCFunction)reader_tell, METH_NOARGS,
+ PyDoc_STR("Returns current number of bytes compressed") },
+ { "writable", (PyCFunction)reader_writable, METH_NOARGS,
+ PyDoc_STR("Returns False") },
+ { "write", reader_write, METH_VARARGS, PyDoc_STR("Raises OSError") },
+ { "writelines", reader_writelines, METH_VARARGS, PyDoc_STR("Not implemented") },
+ { NULL, NULL }
+};
+
+static PyMemberDef reader_members[] = {
+ { "closed", T_BOOL, offsetof(ZstdCompressionReader, closed),
+ READONLY, "whether stream is closed" },
+ { NULL }
+};
+
+PyTypeObject ZstdCompressionReaderType = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "zstd.ZstdCompressionReader", /* tp_name */
+ sizeof(ZstdCompressionReader), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ (destructor)reader_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT, /* tp_flags */
+ 0, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ reader_iter, /* tp_iter */
+ reader_iternext, /* tp_iternext */
+ reader_methods, /* tp_methods */
+ reader_members, /* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ 0, /* tp_init */
+ 0, /* tp_alloc */
+ PyType_GenericNew, /* tp_new */
+};
+
+void compressionreader_module_init(PyObject* mod) {
+ /* TODO make reader a sub-class of io.RawIOBase */
+
+ Py_TYPE(&ZstdCompressionReaderType) = &PyType_Type;
+ if (PyType_Ready(&ZstdCompressionReaderType) < 0) {
+ return;
+ }
+}
diff --git a/contrib/python/zstandard/py2/c-ext/compressionwriter.c b/contrib/python/zstandard/py2/c-ext/compressionwriter.c
new file mode 100644
index 00000000000..fe8e55a0fe0
--- /dev/null
+++ b/contrib/python/zstandard/py2/c-ext/compressionwriter.c
@@ -0,0 +1,372 @@
+/**
+* Copyright (c) 2016-present, Gregory Szorc
+* All rights reserved.
+*
+* This software may be modified and distributed under the terms
+* of the BSD license. See the LICENSE file for details.
+*/
+
+#include "python-zstandard.h"
+
+extern PyObject* ZstdError;
+
+PyDoc_STRVAR(ZstdCompresssionWriter__doc__,
+"""A context manager used for writing compressed output to a writer.\n"
+);
+
+static void ZstdCompressionWriter_dealloc(ZstdCompressionWriter* self) {
+ Py_XDECREF(self->compressor);
+ Py_XDECREF(self->writer);
+
+ PyMem_Free(self->output.dst);
+ self->output.dst = NULL;
+
+ PyObject_Del(self);
+}
+
+static PyObject* ZstdCompressionWriter_enter(ZstdCompressionWriter* self) {
+ if (self->closed) {
+ PyErr_SetString(PyExc_ValueError, "stream is closed");
+ return NULL;
+ }
+
+ if (self->entered) {
+ PyErr_SetString(ZstdError, "cannot __enter__ multiple times");
+ return NULL;
+ }
+
+ self->entered = 1;
+
+ Py_INCREF(self);
+ return (PyObject*)self;
+}
+
+static PyObject* ZstdCompressionWriter_exit(ZstdCompressionWriter* self, PyObject* args) {
+ PyObject* exc_type;
+ PyObject* exc_value;
+ PyObject* exc_tb;
+
+ if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) {
+ return NULL;
+ }
+
+ self->entered = 0;
+
+ if (exc_type == Py_None && exc_value == Py_None && exc_tb == Py_None) {
+ PyObject* result = PyObject_CallMethod((PyObject*)self, "close", NULL);
+
+ if (NULL == result) {
+ return NULL;
+ }
+ }
+
+ Py_RETURN_FALSE;
+}
+
+static PyObject* ZstdCompressionWriter_memory_size(ZstdCompressionWriter* self) {
+ return PyLong_FromSize_t(ZSTD_sizeof_CCtx(self->compressor->cctx));
+}
+
+static PyObject* ZstdCompressionWriter_write(ZstdCompressionWriter* self, PyObject* args, PyObject* kwargs) {
+ static char* kwlist[] = {
+ "data",
+ NULL
+ };
+
+ PyObject* result = NULL;
+ Py_buffer source;
+ size_t zresult;
+ ZSTD_inBuffer input;
+ PyObject* res;
+ Py_ssize_t totalWrite = 0;
+
+#if PY_MAJOR_VERSION >= 3
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:write",
+#else
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:write",
+#endif
+ kwlist, &source)) {
+ return NULL;
+ }
+
+ if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
+ PyErr_SetString(PyExc_ValueError,
+ "data buffer should be contiguous and have at most one dimension");
+ goto finally;
+ }
+
+ if (self->closed) {
+ PyErr_SetString(PyExc_ValueError, "stream is closed");
+ return NULL;
+ }
+
+ self->output.pos = 0;
+
+ input.src = source.buf;
+ input.size = source.len;
+ input.pos = 0;
+
+ while (input.pos < (size_t)source.len) {
+ Py_BEGIN_ALLOW_THREADS
+ zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output, &input, ZSTD_e_continue);
+ Py_END_ALLOW_THREADS
+
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
+ goto finally;
+ }
+
+ /* Copy data from output buffer to writer. */
+ if (self->output.pos) {
+#if PY_MAJOR_VERSION >= 3
+ res = PyObject_CallMethod(self->writer, "write", "y#",
+#else
+ res = PyObject_CallMethod(self->writer, "write", "s#",
+#endif
+ self->output.dst, self->output.pos);
+ Py_XDECREF(res);
+ totalWrite += self->output.pos;
+ self->bytesCompressed += self->output.pos;
+ }
+ self->output.pos = 0;
+ }
+
+ if (self->writeReturnRead) {
+ result = PyLong_FromSize_t(input.pos);
+ }
+ else {
+ result = PyLong_FromSsize_t(totalWrite);
+ }
+
+finally:
+ PyBuffer_Release(&source);
+ return result;
+}
+
+static PyObject* ZstdCompressionWriter_flush(ZstdCompressionWriter* self, PyObject* args, PyObject* kwargs) {
+ static char* kwlist[] = {
+ "flush_mode",
+ NULL
+ };
+
+ size_t zresult;
+ ZSTD_inBuffer input;
+ PyObject* res;
+ Py_ssize_t totalWrite = 0;
+ unsigned flush_mode = 0;
+ ZSTD_EndDirective flush;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|I:flush",
+ kwlist, &flush_mode)) {
+ return NULL;
+ }
+
+ switch (flush_mode) {
+ case 0:
+ flush = ZSTD_e_flush;
+ break;
+ case 1:
+ flush = ZSTD_e_end;
+ break;
+ default:
+ PyErr_Format(PyExc_ValueError, "unknown flush_mode: %d", flush_mode);
+ return NULL;
+ }
+
+ if (self->closed) {
+ PyErr_SetString(PyExc_ValueError, "stream is closed");
+ return NULL;
+ }
+
+ self->output.pos = 0;
+
+ input.src = NULL;
+ input.size = 0;
+ input.pos = 0;
+
+ while (1) {
+ Py_BEGIN_ALLOW_THREADS
+ zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output, &input, flush);
+ Py_END_ALLOW_THREADS
+
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
+ return NULL;
+ }
+
+ /* Copy data from output buffer to writer. */
+ if (self->output.pos) {
+#if PY_MAJOR_VERSION >= 3
+ res = PyObject_CallMethod(self->writer, "write", "y#",
+#else
+ res = PyObject_CallMethod(self->writer, "write", "s#",
+#endif
+ self->output.dst, self->output.pos);
+ Py_XDECREF(res);
+ totalWrite += self->output.pos;
+ self->bytesCompressed += self->output.pos;
+ }
+
+ self->output.pos = 0;
+
+ if (!zresult) {
+ break;
+ }
+ }
+
+ return PyLong_FromSsize_t(totalWrite);
+}
+
+static PyObject* ZstdCompressionWriter_close(ZstdCompressionWriter* self) {
+ PyObject* result;
+
+ if (self->closed) {
+ Py_RETURN_NONE;
+ }
+
+ result = PyObject_CallMethod((PyObject*)self, "flush", "I", 1);
+ self->closed = 1;
+
+ if (NULL == result) {
+ return NULL;
+ }
+
+ /* Call close on underlying stream as well. */
+ if (PyObject_HasAttrString(self->writer, "close")) {
+ return PyObject_CallMethod(self->writer, "close", NULL);
+ }
+
+ Py_RETURN_NONE;
+}
+
+static PyObject* ZstdCompressionWriter_fileno(ZstdCompressionWriter* self) {
+ if (PyObject_HasAttrString(self->writer, "fileno")) {
+ return PyObject_CallMethod(self->writer, "fileno", NULL);
+ }
+ else {
+ PyErr_SetString(PyExc_OSError, "fileno not available on underlying writer");
+ return NULL;
+ }
+}
+
+static PyObject* ZstdCompressionWriter_tell(ZstdCompressionWriter* self) {
+ return PyLong_FromUnsignedLongLong(self->bytesCompressed);
+}
+
+static PyObject* ZstdCompressionWriter_writelines(PyObject* self, PyObject* args) {
+ PyErr_SetNone(PyExc_NotImplementedError);
+ return NULL;
+}
+
+static PyObject* ZstdCompressionWriter_false(PyObject* self, PyObject* args) {
+ Py_RETURN_FALSE;
+}
+
+static PyObject* ZstdCompressionWriter_true(PyObject* self, PyObject* args) {
+ Py_RETURN_TRUE;
+}
+
+static PyObject* ZstdCompressionWriter_unsupported(PyObject* self, PyObject* args, PyObject* kwargs) {
+ PyObject* iomod;
+ PyObject* exc;
+
+ iomod = PyImport_ImportModule("io");
+ if (NULL == iomod) {
+ return NULL;
+ }
+
+ exc = PyObject_GetAttrString(iomod, "UnsupportedOperation");
+ if (NULL == exc) {
+ Py_DECREF(iomod);
+ return NULL;
+ }
+
+ PyErr_SetNone(exc);
+ Py_DECREF(exc);
+ Py_DECREF(iomod);
+
+ return NULL;
+}
+
+static PyMethodDef ZstdCompressionWriter_methods[] = {
+ { "__enter__", (PyCFunction)ZstdCompressionWriter_enter, METH_NOARGS,
+ PyDoc_STR("Enter a compression context.") },
+ { "__exit__", (PyCFunction)ZstdCompressionWriter_exit, METH_VARARGS,
+ PyDoc_STR("Exit a compression context.") },
+ { "close", (PyCFunction)ZstdCompressionWriter_close, METH_NOARGS, NULL },
+ { "fileno", (PyCFunction)ZstdCompressionWriter_fileno, METH_NOARGS, NULL },
+ { "isatty", (PyCFunction)ZstdCompressionWriter_false, METH_NOARGS, NULL },
+ { "readable", (PyCFunction)ZstdCompressionWriter_false, METH_NOARGS, NULL },
+ { "readline", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
+ { "readlines", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
+ { "seek", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
+ { "seekable", ZstdCompressionWriter_false, METH_NOARGS, NULL },
+ { "truncate", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
+ { "writable", ZstdCompressionWriter_true, METH_NOARGS, NULL },
+ { "writelines", ZstdCompressionWriter_writelines, METH_VARARGS, NULL },
+ { "read", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
+ { "readall", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
+ { "readinto", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
+ { "memory_size", (PyCFunction)ZstdCompressionWriter_memory_size, METH_NOARGS,
+ PyDoc_STR("Obtain the memory size of the underlying compressor") },
+ { "write", (PyCFunction)ZstdCompressionWriter_write, METH_VARARGS | METH_KEYWORDS,
+ PyDoc_STR("Compress data") },
+ { "flush", (PyCFunction)ZstdCompressionWriter_flush, METH_VARARGS | METH_KEYWORDS,
+ PyDoc_STR("Flush data and finish a zstd frame") },
+ { "tell", (PyCFunction)ZstdCompressionWriter_tell, METH_NOARGS,
+ PyDoc_STR("Returns current number of bytes compressed") },
+ { NULL, NULL }
+};
+
+static PyMemberDef ZstdCompressionWriter_members[] = {
+ { "closed", T_BOOL, offsetof(ZstdCompressionWriter, closed), READONLY, NULL },
+ { NULL }
+};
+
+PyTypeObject ZstdCompressionWriterType = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "zstd.ZstdCompressionWriter", /* tp_name */
+ sizeof(ZstdCompressionWriter), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ (destructor)ZstdCompressionWriter_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
+ ZstdCompresssionWriter__doc__, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
+ ZstdCompressionWriter_methods, /* tp_methods */
+ ZstdCompressionWriter_members, /* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ 0, /* tp_init */
+ 0, /* tp_alloc */
+ PyType_GenericNew, /* tp_new */
+};
+
+void compressionwriter_module_init(PyObject* mod) {
+ Py_TYPE(&ZstdCompressionWriterType) = &PyType_Type;
+ if (PyType_Ready(&ZstdCompressionWriterType) < 0) {
+ return;
+ }
+}
diff --git a/contrib/python/zstandard/py2/c-ext/compressobj.c b/contrib/python/zstandard/py2/c-ext/compressobj.c
new file mode 100644
index 00000000000..0c33dffc7af
--- /dev/null
+++ b/contrib/python/zstandard/py2/c-ext/compressobj.c
@@ -0,0 +1,256 @@
+/**
+* Copyright (c) 2016-present, Gregory Szorc
+* All rights reserved.
+*
+* This software may be modified and distributed under the terms
+* of the BSD license. See the LICENSE file for details.
+*/
+
+#include "python-zstandard.h"
+
+extern PyObject* ZstdError;
+
+PyDoc_STRVAR(ZstdCompressionObj__doc__,
+"Perform compression using a standard library compatible API.\n"
+);
+
+static void ZstdCompressionObj_dealloc(ZstdCompressionObj* self) {
+ PyMem_Free(self->output.dst);
+ self->output.dst = NULL;
+
+ Py_XDECREF(self->compressor);
+
+ PyObject_Del(self);
+}
+
+static PyObject* ZstdCompressionObj_compress(ZstdCompressionObj* self, PyObject* args, PyObject* kwargs) {
+ static char* kwlist[] = {
+ "data",
+ NULL
+ };
+
+ Py_buffer source;
+ ZSTD_inBuffer input;
+ size_t zresult;
+ PyObject* result = NULL;
+ Py_ssize_t resultSize = 0;
+
+ if (self->finished) {
+ PyErr_SetString(ZstdError, "cannot call compress() after compressor finished");
+ return NULL;
+ }
+
+#if PY_MAJOR_VERSION >= 3
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:compress",
+#else
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:compress",
+#endif
+ kwlist, &source)) {
+ return NULL;
+ }
+
+ if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
+ PyErr_SetString(PyExc_ValueError,
+ "data buffer should be contiguous and have at most one dimension");
+ goto finally;
+ }
+
+ input.src = source.buf;
+ input.size = source.len;
+ input.pos = 0;
+
+ while (input.pos < (size_t)source.len) {
+ Py_BEGIN_ALLOW_THREADS
+ zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
+ &input, ZSTD_e_continue);
+ Py_END_ALLOW_THREADS
+
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
+ Py_CLEAR(result);
+ goto finally;
+ }
+
+ if (self->output.pos) {
+ if (result) {
+ resultSize = PyBytes_GET_SIZE(result);
+
+ if (safe_pybytes_resize(&result, resultSize + self->output.pos)) {
+ Py_CLEAR(result);
+ goto finally;
+ }
+
+ memcpy(PyBytes_AS_STRING(result) + resultSize,
+ self->output.dst, self->output.pos);
+ }
+ else {
+ result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
+ if (!result) {
+ goto finally;
+ }
+ }
+
+ self->output.pos = 0;
+ }
+ }
+
+ if (NULL == result) {
+ result = PyBytes_FromString("");
+ }
+
+finally:
+ PyBuffer_Release(&source);
+
+ return result;
+}
+
+static PyObject* ZstdCompressionObj_flush(ZstdCompressionObj* self, PyObject* args, PyObject* kwargs) {
+ static char* kwlist[] = {
+ "flush_mode",
+ NULL
+ };
+
+ int flushMode = compressorobj_flush_finish;
+ size_t zresult;
+ PyObject* result = NULL;
+ Py_ssize_t resultSize = 0;
+ ZSTD_inBuffer input;
+ ZSTD_EndDirective zFlushMode;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:flush", kwlist, &flushMode)) {
+ return NULL;
+ }
+
+ if (flushMode != compressorobj_flush_finish && flushMode != compressorobj_flush_block) {
+ PyErr_SetString(PyExc_ValueError, "flush mode not recognized");
+ return NULL;
+ }
+
+ if (self->finished) {
+ PyErr_SetString(ZstdError, "compressor object already finished");
+ return NULL;
+ }
+
+ switch (flushMode) {
+ case compressorobj_flush_block:
+ zFlushMode = ZSTD_e_flush;
+ break;
+
+ case compressorobj_flush_finish:
+ zFlushMode = ZSTD_e_end;
+ self->finished = 1;
+ break;
+
+ default:
+ PyErr_SetString(ZstdError, "unhandled flush mode");
+ return NULL;
+ }
+
+ assert(self->output.pos == 0);
+
+ input.src = NULL;
+ input.size = 0;
+ input.pos = 0;
+
+ while (1) {
+ Py_BEGIN_ALLOW_THREADS
+ zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
+ &input, zFlushMode);
+ Py_END_ALLOW_THREADS
+
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "error ending compression stream: %s",
+ ZSTD_getErrorName(zresult));
+ return NULL;
+ }
+
+ if (self->output.pos) {
+ if (result) {
+ resultSize = PyBytes_GET_SIZE(result);
+
+ if (safe_pybytes_resize(&result, resultSize + self->output.pos)) {
+ Py_XDECREF(result);
+ return NULL;
+ }
+
+ memcpy(PyBytes_AS_STRING(result) + resultSize,
+ self->output.dst, self->output.pos);
+ }
+ else {
+ result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
+ if (!result) {
+ return NULL;
+ }
+ }
+
+ self->output.pos = 0;
+ }
+
+ if (!zresult) {
+ break;
+ }
+ }
+
+ if (result) {
+ return result;
+ }
+ else {
+ return PyBytes_FromString("");
+ }
+}
+
+static PyMethodDef ZstdCompressionObj_methods[] = {
+ { "compress", (PyCFunction)ZstdCompressionObj_compress, METH_VARARGS | METH_KEYWORDS,
+ PyDoc_STR("compress data") },
+ { "flush", (PyCFunction)ZstdCompressionObj_flush, METH_VARARGS | METH_KEYWORDS,
+ PyDoc_STR("finish compression operation") },
+ { NULL, NULL }
+};
+
+PyTypeObject ZstdCompressionObjType = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "zstd.ZstdCompressionObj", /* tp_name */
+ sizeof(ZstdCompressionObj), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ (destructor)ZstdCompressionObj_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
+ ZstdCompressionObj__doc__, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
+ ZstdCompressionObj_methods, /* tp_methods */
+ 0, /* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ 0, /* tp_init */
+ 0, /* tp_alloc */
+ PyType_GenericNew, /* tp_new */
+};
+
+void compressobj_module_init(PyObject* module) {
+ Py_TYPE(&ZstdCompressionObjType) = &PyType_Type;
+ if (PyType_Ready(&ZstdCompressionObjType) < 0) {
+ return;
+ }
+}
diff --git a/contrib/python/zstandard/py2/c-ext/compressor.c b/contrib/python/zstandard/py2/c-ext/compressor.c
new file mode 100644
index 00000000000..fb729c17c4e
--- /dev/null
+++ b/contrib/python/zstandard/py2/c-ext/compressor.c
@@ -0,0 +1,1676 @@
+/**
+* Copyright (c) 2016-present, Gregory Szorc
+* All rights reserved.
+*
+* This software may be modified and distributed under the terms
+* of the BSD license. See the LICENSE file for details.
+*/
+
+#include "python-zstandard.h"
+#include "pool.h"
+
+extern PyObject* ZstdError;
+
+int setup_cctx(ZstdCompressor* compressor) {
+ size_t zresult;
+
+ assert(compressor);
+ assert(compressor->cctx);
+ assert(compressor->params);
+
+ zresult = ZSTD_CCtx_setParametersUsingCCtxParams(compressor->cctx, compressor->params);
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "could not set compression parameters: %s",
+ ZSTD_getErrorName(zresult));
+ return 1;
+ }
+
+ if (compressor->dict) {
+ if (compressor->dict->cdict) {
+ zresult = ZSTD_CCtx_refCDict(compressor->cctx, compressor->dict->cdict);
+ }
+ else {
+ zresult = ZSTD_CCtx_loadDictionary_advanced(compressor->cctx,
+ compressor->dict->dictData, compressor->dict->dictSize,
+ ZSTD_dlm_byRef, compressor->dict->dictType);
+ }
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "could not load compression dictionary: %s",
+ ZSTD_getErrorName(zresult));
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static PyObject* frame_progression(ZSTD_CCtx* cctx) {
+ PyObject* result = NULL;
+ PyObject* value;
+ ZSTD_frameProgression progression;
+
+ result = PyTuple_New(3);
+ if (!result) {
+ return NULL;
+ }
+
+ progression = ZSTD_getFrameProgression(cctx);
+
+ value = PyLong_FromUnsignedLongLong(progression.ingested);
+ if (!value) {
+ Py_DECREF(result);
+ return NULL;
+ }
+
+ PyTuple_SET_ITEM(result, 0, value);
+
+ value = PyLong_FromUnsignedLongLong(progression.consumed);
+ if (!value) {
+ Py_DECREF(result);
+ return NULL;
+ }
+
+ PyTuple_SET_ITEM(result, 1, value);
+
+ value = PyLong_FromUnsignedLongLong(progression.produced);
+ if (!value) {
+ Py_DECREF(result);
+ return NULL;
+ }
+
+ PyTuple_SET_ITEM(result, 2, value);
+
+ return result;
+}
+
+PyDoc_STRVAR(ZstdCompressor__doc__,
+"ZstdCompressor(level=None, dict_data=None, compression_params=None)\n"
+"\n"
+"Create an object used to perform Zstandard compression.\n"
+"\n"
+"An instance can compress data various ways. Instances can be used multiple\n"
+"times. Each compression operation will use the compression parameters\n"
+"defined at construction time.\n"
+"\n"
+"Compression can be configured via the following names arguments:\n"
+"\n"
+"level\n"
+" Integer compression level.\n"
+"dict_data\n"
+" A ``ZstdCompressionDict`` to be used to compress with dictionary data.\n"
+"compression_params\n"
+" A ``CompressionParameters`` instance defining low-level compression"
+" parameters. If defined, this will overwrite the ``level`` argument.\n"
+"write_checksum\n"
+" If True, a 4 byte content checksum will be written with the compressed\n"
+" data, allowing the decompressor to perform content verification.\n"
+"write_content_size\n"
+" If True (the default), the decompressed content size will be included in\n"
+" the header of the compressed data. This data will only be written if the\n"
+" compressor knows the size of the input data.\n"
+"write_dict_id\n"
+" Determines whether the dictionary ID will be written into the compressed\n"
+" data. Defaults to True. Only adds content to the compressed data if\n"
+" a dictionary is being used.\n"
+"threads\n"
+" Number of threads to use to compress data concurrently. When set,\n"
+" compression operations are performed on multiple threads. The default\n"
+" value (0) disables multi-threaded compression. A value of ``-1`` means to\n"
+" set the number of threads to the number of detected logical CPUs.\n"
+);
+
+static int ZstdCompressor_init(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
+ static char* kwlist[] = {
+ "level",
+ "dict_data",
+ "compression_params",
+ "write_checksum",
+ "write_content_size",
+ "write_dict_id",
+ "threads",
+ NULL
+ };
+
+ int level = 3;
+ ZstdCompressionDict* dict = NULL;
+ ZstdCompressionParametersObject* params = NULL;
+ PyObject* writeChecksum = NULL;
+ PyObject* writeContentSize = NULL;
+ PyObject* writeDictID = NULL;
+ int threads = 0;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!O!OOOi:ZstdCompressor",
+ kwlist, &level, &ZstdCompressionDictType, &dict,
+ &ZstdCompressionParametersType, &params,
+ &writeChecksum, &writeContentSize, &writeDictID, &threads)) {
+ return -1;
+ }
+
+ if (level > ZSTD_maxCLevel()) {
+ PyErr_Format(PyExc_ValueError, "level must be less than %d",
+ ZSTD_maxCLevel() + 1);
+ return -1;
+ }
+
+ if (threads < 0) {
+ threads = cpu_count();
+ }
+
+ /* We create a ZSTD_CCtx for reuse among multiple operations to reduce the
+ overhead of each compression operation. */
+ self->cctx = ZSTD_createCCtx();
+ if (!self->cctx) {
+ PyErr_NoMemory();
+ return -1;
+ }
+
+ /* TODO stuff the original parameters away somewhere so we can reset later. This
+ will allow us to do things like automatically adjust cparams based on input
+ size (assuming zstd isn't doing that internally). */
+
+ self->params = ZSTD_createCCtxParams();
+ if (!self->params) {
+ PyErr_NoMemory();
+ return -1;
+ }
+
+ if (params && writeChecksum) {
+ PyErr_SetString(PyExc_ValueError,
+ "cannot define compression_params and write_checksum");
+ return -1;
+ }
+
+ if (params && writeContentSize) {
+ PyErr_SetString(PyExc_ValueError,
+ "cannot define compression_params and write_content_size");
+ return -1;
+ }
+
+ if (params && writeDictID) {
+ PyErr_SetString(PyExc_ValueError,
+ "cannot define compression_params and write_dict_id");
+ return -1;
+ }
+
+ if (params && threads) {
+ PyErr_SetString(PyExc_ValueError,
+ "cannot define compression_params and threads");
+ return -1;
+ }
+
+ if (params) {
+ if (set_parameters(self->params, params)) {
+ return -1;
+ }
+ }
+ else {
+ if (set_parameter(self->params, ZSTD_c_compressionLevel, level)) {
+ return -1;
+ }
+
+ if (set_parameter(self->params, ZSTD_c_contentSizeFlag,
+ writeContentSize ? PyObject_IsTrue(writeContentSize) : 1)) {
+ return -1;
+ }
+
+ if (set_parameter(self->params, ZSTD_c_checksumFlag,
+ writeChecksum ? PyObject_IsTrue(writeChecksum) : 0)) {
+ return -1;
+ }
+
+ if (set_parameter(self->params, ZSTD_c_dictIDFlag,
+ writeDictID ? PyObject_IsTrue(writeDictID) : 1)) {
+ return -1;
+ }
+
+ if (threads) {
+ if (set_parameter(self->params, ZSTD_c_nbWorkers, threads)) {
+ return -1;
+ }
+ }
+ }
+
+ if (dict) {
+ self->dict = dict;
+ Py_INCREF(dict);
+ }
+
+ if (setup_cctx(self)) {
+ return -1;
+ }
+
+ return 0;
+}
+
+static void ZstdCompressor_dealloc(ZstdCompressor* self) {
+ if (self->cctx) {
+ ZSTD_freeCCtx(self->cctx);
+ self->cctx = NULL;
+ }
+
+ if (self->params) {
+ ZSTD_freeCCtxParams(self->params);
+ self->params = NULL;
+ }
+
+ Py_XDECREF(self->dict);
+ PyObject_Del(self);
+}
+
+PyDoc_STRVAR(ZstdCompressor_memory_size__doc__,
+"memory_size()\n"
+"\n"
+"Obtain the memory usage of this compressor, in bytes.\n"
+);
+
+static PyObject* ZstdCompressor_memory_size(ZstdCompressor* self) {
+ if (self->cctx) {
+ return PyLong_FromSize_t(ZSTD_sizeof_CCtx(self->cctx));
+ }
+ else {
+ PyErr_SetString(ZstdError, "no compressor context found; this should never happen");
+ return NULL;
+ }
+}
+
+PyDoc_STRVAR(ZstdCompressor_frame_progression__doc__,
+"frame_progression()\n"
+"\n"
+"Return information on how much work the compressor has done.\n"
+"\n"
+"Returns a 3-tuple of (ingested, consumed, produced).\n"
+);
+
+static PyObject* ZstdCompressor_frame_progression(ZstdCompressor* self) {
+ return frame_progression(self->cctx);
+}
+
+PyDoc_STRVAR(ZstdCompressor_copy_stream__doc__,
+"copy_stream(ifh, ofh[, size=0, read_size=default, write_size=default])\n"
+"compress data between streams\n"
+"\n"
+"Data will be read from ``ifh``, compressed, and written to ``ofh``.\n"
+"``ifh`` must have a ``read(size)`` method. ``ofh`` must have a ``write(data)``\n"
+"method.\n"
+"\n"
+"An optional ``size`` argument specifies the size of the source stream.\n"
+"If defined, compression parameters will be tuned based on the size.\n"
+"\n"
+"Optional arguments ``read_size`` and ``write_size`` define the chunk sizes\n"
+"of ``read()`` and ``write()`` operations, respectively. By default, they use\n"
+"the default compression stream input and output sizes, respectively.\n"
+);
+
+static PyObject* ZstdCompressor_copy_stream(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
+ static char* kwlist[] = {
+ "ifh",
+ "ofh",
+ "size",
+ "read_size",
+ "write_size",
+ NULL
+ };
+
+ PyObject* source;
+ PyObject* dest;
+ unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
+ size_t inSize = ZSTD_CStreamInSize();
+ size_t outSize = ZSTD_CStreamOutSize();
+ ZSTD_inBuffer input;
+ ZSTD_outBuffer output;
+ Py_ssize_t totalRead = 0;
+ Py_ssize_t totalWrite = 0;
+ char* readBuffer;
+ Py_ssize_t readSize;
+ PyObject* readResult = NULL;
+ PyObject* res = NULL;
+ size_t zresult;
+ PyObject* writeResult;
+ PyObject* totalReadPy;
+ PyObject* totalWritePy;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|Kkk:copy_stream", kwlist,
+ &source, &dest, &sourceSize, &inSize, &outSize)) {
+ return NULL;
+ }
+
+ if (!PyObject_HasAttrString(source, "read")) {
+ PyErr_SetString(PyExc_ValueError, "first argument must have a read() method");
+ return NULL;
+ }
+
+ if (!PyObject_HasAttrString(dest, "write")) {
+ PyErr_SetString(PyExc_ValueError, "second argument must have a write() method");
+ return NULL;
+ }
+
+ ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
+
+ zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "error setting source size: %s",
+ ZSTD_getErrorName(zresult));
+ return NULL;
+ }
+
+ /* Prevent free on uninitialized memory in finally. */
+ output.dst = PyMem_Malloc(outSize);
+ if (!output.dst) {
+ PyErr_NoMemory();
+ res = NULL;
+ goto finally;
+ }
+ output.size = outSize;
+ output.pos = 0;
+
+ input.src = NULL;
+ input.size = 0;
+ input.pos = 0;
+
+ while (1) {
+ /* Try to read from source stream. */
+ readResult = PyObject_CallMethod(source, "read", "n", inSize);
+ if (!readResult) {
+ PyErr_SetString(ZstdError, "could not read() from source");
+ goto finally;
+ }
+
+ PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
+
+ /* If no data was read, we're at EOF. */
+ if (0 == readSize) {
+ break;
+ }
+
+ totalRead += readSize;
+
+ /* Send data to compressor */
+ input.src = readBuffer;
+ input.size = readSize;
+ input.pos = 0;
+
+ while (input.pos < input.size) {
+ Py_BEGIN_ALLOW_THREADS
+ zresult = ZSTD_compressStream2(self->cctx, &output, &input, ZSTD_e_continue);
+ Py_END_ALLOW_THREADS
+
+ if (ZSTD_isError(zresult)) {
+ res = NULL;
+ PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
+ goto finally;
+ }
+
+ if (output.pos) {
+#if PY_MAJOR_VERSION >= 3
+ writeResult = PyObject_CallMethod(dest, "write", "y#",
+#else
+ writeResult = PyObject_CallMethod(dest, "write", "s#",
+#endif
+ output.dst, output.pos);
+ Py_XDECREF(writeResult);
+ totalWrite += output.pos;
+ output.pos = 0;
+ }
+ }
+
+ Py_CLEAR(readResult);
+ }
+
+ /* We've finished reading. Now flush the compressor stream. */
+ assert(input.pos == input.size);
+
+ while (1) {
+ Py_BEGIN_ALLOW_THREADS
+ zresult = ZSTD_compressStream2(self->cctx, &output, &input, ZSTD_e_end);
+ Py_END_ALLOW_THREADS
+
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "error ending compression stream: %s",
+ ZSTD_getErrorName(zresult));
+ res = NULL;
+ goto finally;
+ }
+
+ if (output.pos) {
+#if PY_MAJOR_VERSION >= 3
+ writeResult = PyObject_CallMethod(dest, "write", "y#",
+#else
+ writeResult = PyObject_CallMethod(dest, "write", "s#",
+#endif
+ output.dst, output.pos);
+ totalWrite += output.pos;
+ Py_XDECREF(writeResult);
+ output.pos = 0;
+ }
+
+ if (!zresult) {
+ break;
+ }
+ }
+
+ totalReadPy = PyLong_FromSsize_t(totalRead);
+ totalWritePy = PyLong_FromSsize_t(totalWrite);
+ res = PyTuple_Pack(2, totalReadPy, totalWritePy);
+ Py_DECREF(totalReadPy);
+ Py_DECREF(totalWritePy);
+
+finally:
+ if (output.dst) {
+ PyMem_Free(output.dst);
+ }
+
+ Py_XDECREF(readResult);
+
+ return res;
+}
+
+PyDoc_STRVAR(ZstdCompressor_stream_reader__doc__,
+"stream_reader(source, [size=0])\n"
+"\n"
+"Obtain an object that behaves like an I/O stream.\n"
+"\n"
+"The source object can be any object with a ``read(size)`` method\n"
+"or an object that conforms to the buffer protocol.\n"
+);
+
+static ZstdCompressionReader* ZstdCompressor_stream_reader(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
+ static char* kwlist[] = {
+ "source",
+ "size",
+ "read_size",
+ NULL
+ };
+
+ PyObject* source;
+ unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
+ size_t readSize = ZSTD_CStreamInSize();
+ ZstdCompressionReader* result = NULL;
+ size_t zresult;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Kk:stream_reader", kwlist,
+ &source, &sourceSize, &readSize)) {
+ return NULL;
+ }
+
+ result = (ZstdCompressionReader*)PyObject_CallObject((PyObject*)&ZstdCompressionReaderType, NULL);
+ if (!result) {
+ return NULL;
+ }
+
+ result->entered = 0;
+ result->closed = 0;
+
+ if (PyObject_HasAttrString(source, "read")) {
+ result->reader = source;
+ Py_INCREF(source);
+ result->readSize = readSize;
+ }
+ else if (1 == PyObject_CheckBuffer(source)) {
+ if (0 != PyObject_GetBuffer(source, &result->buffer, PyBUF_CONTIG_RO)) {
+ goto except;
+ }
+
+ assert(result->buffer.len >= 0);
+
+ sourceSize = result->buffer.len;
+ }
+ else {
+ PyErr_SetString(PyExc_TypeError,
+ "must pass an object with a read() method or that conforms to the buffer protocol");
+ goto except;
+ }
+
+ ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
+
+ zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "error setting source source: %s",
+ ZSTD_getErrorName(zresult));
+ goto except;
+ }
+
+ result->compressor = self;
+ Py_INCREF(self);
+
+ return result;
+
+except:
+ Py_CLEAR(result);
+
+ return NULL;
+}
+
+PyDoc_STRVAR(ZstdCompressor_compress__doc__,
+"compress(data)\n"
+"\n"
+"Compress data in a single operation.\n"
+"\n"
+"This is the simplest mechanism to perform compression: simply pass in a\n"
+"value and get a compressed value back. It is almost the most prone to abuse.\n"
+"The input and output values must fit in memory, so passing in very large\n"
+"values can result in excessive memory usage. For this reason, one of the\n"
+"streaming based APIs is preferred for larger values.\n"
+);
+
+static PyObject* ZstdCompressor_compress(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
+ static char* kwlist[] = {
+ "data",
+ NULL
+ };
+
+ Py_buffer source;
+ size_t destSize;
+ PyObject* output = NULL;
+ size_t zresult;
+ ZSTD_outBuffer outBuffer;
+ ZSTD_inBuffer inBuffer;
+
+#if PY_MAJOR_VERSION >= 3
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|O:compress",
+#else
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|O:compress",
+#endif
+ kwlist, &source)) {
+ return NULL;
+ }
+
+ if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
+ PyErr_SetString(PyExc_ValueError,
+ "data buffer should be contiguous and have at most one dimension");
+ goto finally;
+ }
+
+ ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
+
+ destSize = ZSTD_compressBound(source.len);
+ output = PyBytes_FromStringAndSize(NULL, destSize);
+ if (!output) {
+ goto finally;
+ }
+
+ zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, source.len);
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "error setting source size: %s",
+ ZSTD_getErrorName(zresult));
+ Py_CLEAR(output);
+ goto finally;
+ }
+
+ inBuffer.src = source.buf;
+ inBuffer.size = source.len;
+ inBuffer.pos = 0;
+
+ outBuffer.dst = PyBytes_AsString(output);
+ outBuffer.size = destSize;
+ outBuffer.pos = 0;
+
+ Py_BEGIN_ALLOW_THREADS
+ /* By avoiding ZSTD_compress(), we don't necessarily write out content
+ size. This means the argument to ZstdCompressor to control frame
+ parameters is honored. */
+ zresult = ZSTD_compressStream2(self->cctx, &outBuffer, &inBuffer, ZSTD_e_end);
+ Py_END_ALLOW_THREADS
+
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "cannot compress: %s", ZSTD_getErrorName(zresult));
+ Py_CLEAR(output);
+ goto finally;
+ }
+ else if (zresult) {
+ PyErr_SetString(ZstdError, "unexpected partial frame flush");
+ Py_CLEAR(output);
+ goto finally;
+ }
+
+ Py_SIZE(output) = outBuffer.pos;
+
+finally:
+ PyBuffer_Release(&source);
+ return output;
+}
+
+PyDoc_STRVAR(ZstdCompressionObj__doc__,
+"compressobj()\n"
+"\n"
+"Return an object exposing ``compress(data)`` and ``flush()`` methods.\n"
+"\n"
+"The returned object exposes an API similar to ``zlib.compressobj`` and\n"
+"``bz2.BZ2Compressor`` so that callers can swap in the zstd compressor\n"
+"without changing how compression is performed.\n"
+);
+
+static ZstdCompressionObj* ZstdCompressor_compressobj(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
+ static char* kwlist[] = {
+ "size",
+ NULL
+ };
+
+ unsigned long long inSize = ZSTD_CONTENTSIZE_UNKNOWN;
+ size_t outSize = ZSTD_CStreamOutSize();
+ ZstdCompressionObj* result = NULL;
+ size_t zresult;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|K:compressobj", kwlist, &inSize)) {
+ return NULL;
+ }
+
+ ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
+
+ zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, inSize);
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "error setting source size: %s",
+ ZSTD_getErrorName(zresult));
+ return NULL;
+ }
+
+ result = (ZstdCompressionObj*)PyObject_CallObject((PyObject*)&ZstdCompressionObjType, NULL);
+ if (!result) {
+ return NULL;
+ }
+
+ result->output.dst = PyMem_Malloc(outSize);
+ if (!result->output.dst) {
+ PyErr_NoMemory();
+ Py_DECREF(result);
+ return NULL;
+ }
+ result->output.size = outSize;
+ result->compressor = self;
+ Py_INCREF(result->compressor);
+
+ return result;
+}
+
+PyDoc_STRVAR(ZstdCompressor_read_to_iter__doc__,
+"read_to_iter(reader, [size=0, read_size=default, write_size=default])\n"
+"Read uncompressed data from a reader and return an iterator\n"
+"\n"
+"Returns an iterator of compressed data produced from reading from ``reader``.\n"
+"\n"
+"Uncompressed data will be obtained from ``reader`` by calling the\n"
+"``read(size)`` method of it. The source data will be streamed into a\n"
+"compressor. As compressed data is available, it will be exposed to the\n"
+"iterator.\n"
+"\n"
+"Data is read from the source in chunks of ``read_size``. Compressed chunks\n"
+"are at most ``write_size`` bytes. Both values default to the zstd input and\n"
+"and output defaults, respectively.\n"
+"\n"
+"The caller is partially in control of how fast data is fed into the\n"
+"compressor by how it consumes the returned iterator. The compressor will\n"
+"not consume from the reader unless the caller consumes from the iterator.\n"
+);
+
+static ZstdCompressorIterator* ZstdCompressor_read_to_iter(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
+ static char* kwlist[] = {
+ "reader",
+ "size",
+ "read_size",
+ "write_size",
+ NULL
+ };
+
+ PyObject* reader;
+ unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
+ size_t inSize = ZSTD_CStreamInSize();
+ size_t outSize = ZSTD_CStreamOutSize();
+ ZstdCompressorIterator* result;
+ size_t zresult;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Kkk:read_to_iter", kwlist,
+ &reader, &sourceSize, &inSize, &outSize)) {
+ return NULL;
+ }
+
+ result = (ZstdCompressorIterator*)PyObject_CallObject((PyObject*)&ZstdCompressorIteratorType, NULL);
+ if (!result) {
+ return NULL;
+ }
+ if (PyObject_HasAttrString(reader, "read")) {
+ result->reader = reader;
+ Py_INCREF(result->reader);
+ }
+ else if (1 == PyObject_CheckBuffer(reader)) {
+ if (0 != PyObject_GetBuffer(reader, &result->buffer, PyBUF_CONTIG_RO)) {
+ goto except;
+ }
+
+ sourceSize = result->buffer.len;
+ }
+ else {
+ PyErr_SetString(PyExc_ValueError,
+ "must pass an object with a read() method or conforms to buffer protocol");
+ goto except;
+ }
+
+ ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
+
+ zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "error setting source size: %s",
+ ZSTD_getErrorName(zresult));
+ return NULL;
+ }
+
+ result->compressor = self;
+ Py_INCREF(result->compressor);
+
+ result->inSize = inSize;
+ result->outSize = outSize;
+
+ result->output.dst = PyMem_Malloc(outSize);
+ if (!result->output.dst) {
+ PyErr_NoMemory();
+ goto except;
+ }
+ result->output.size = outSize;
+
+ goto finally;
+
+except:
+ Py_CLEAR(result);
+
+finally:
+ return result;
+}
+
+PyDoc_STRVAR(ZstdCompressor_stream_writer___doc__,
+"Create a context manager to write compressed data to an object.\n"
+"\n"
+"The passed object must have a ``write()`` method.\n"
+"\n"
+"The caller feeds input data to the object by calling ``compress(data)``.\n"
+"Compressed data is written to the argument given to this function.\n"
+"\n"
+"The function takes an optional ``size`` argument indicating the total size\n"
+"of the eventual input. If specified, the size will influence compression\n"
+"parameter tuning and could result in the size being written into the\n"
+"header of the compressed data.\n"
+"\n"
+"An optional ``write_size`` argument is also accepted. It defines the maximum\n"
+"byte size of chunks fed to ``write()``. By default, it uses the zstd default\n"
+"for a compressor output stream.\n"
+);
+
+static ZstdCompressionWriter* ZstdCompressor_stream_writer(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
+ static char* kwlist[] = {
+ "writer",
+ "size",
+ "write_size",
+ "write_return_read",
+ NULL
+ };
+
+ PyObject* writer;
+ ZstdCompressionWriter* result;
+ size_t zresult;
+ unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
+ size_t outSize = ZSTD_CStreamOutSize();
+ PyObject* writeReturnRead = NULL;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|KkO:stream_writer", kwlist,
+ &writer, &sourceSize, &outSize, &writeReturnRead)) {
+ return NULL;
+ }
+
+ if (!PyObject_HasAttrString(writer, "write")) {
+ PyErr_SetString(PyExc_ValueError, "must pass an object with a write() method");
+ return NULL;
+ }
+
+ ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
+
+ zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "error setting source size: %s",
+ ZSTD_getErrorName(zresult));
+ return NULL;
+ }
+
+ result = (ZstdCompressionWriter*)PyObject_CallObject((PyObject*)&ZstdCompressionWriterType, NULL);
+ if (!result) {
+ return NULL;
+ }
+
+ result->entered = 0;
+ result->closed = 0;
+
+ result->output.dst = PyMem_Malloc(outSize);
+ if (!result->output.dst) {
+ Py_DECREF(result);
+ return (ZstdCompressionWriter*)PyErr_NoMemory();
+ }
+
+ result->output.pos = 0;
+ result->output.size = outSize;
+
+ result->compressor = self;
+ Py_INCREF(result->compressor);
+
+ result->writer = writer;
+ Py_INCREF(result->writer);
+
+ result->outSize = outSize;
+ result->bytesCompressed = 0;
+ result->writeReturnRead = writeReturnRead ? PyObject_IsTrue(writeReturnRead) : 0;
+
+ return result;
+}
+
+PyDoc_STRVAR(ZstdCompressor_chunker__doc__,
+"Create an object for iterative compressing to same-sized chunks.\n"
+);
+
+static ZstdCompressionChunker* ZstdCompressor_chunker(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
+ static char* kwlist[] = {
+ "size",
+ "chunk_size",
+ NULL
+ };
+
+ unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
+ size_t chunkSize = ZSTD_CStreamOutSize();
+ ZstdCompressionChunker* chunker;
+ size_t zresult;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|Kk:chunker", kwlist,
+ &sourceSize, &chunkSize)) {
+ return NULL;
+ }
+
+ ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
+
+ zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "error setting source size: %s",
+ ZSTD_getErrorName(zresult));
+ return NULL;
+ }
+
+ chunker = (ZstdCompressionChunker*)PyObject_CallObject((PyObject*)&ZstdCompressionChunkerType, NULL);
+ if (!chunker) {
+ return NULL;
+ }
+
+ chunker->output.dst = PyMem_Malloc(chunkSize);
+ if (!chunker->output.dst) {
+ PyErr_NoMemory();
+ Py_DECREF(chunker);
+ return NULL;
+ }
+ chunker->output.size = chunkSize;
+ chunker->output.pos = 0;
+
+ chunker->compressor = self;
+ Py_INCREF(chunker->compressor);
+
+ chunker->chunkSize = chunkSize;
+
+ return chunker;
+}
+
+typedef struct {
+ void* sourceData;
+ size_t sourceSize;
+} DataSource;
+
+typedef struct {
+ DataSource* sources;
+ Py_ssize_t sourcesSize;
+ unsigned long long totalSourceSize;
+} DataSources;
+
+typedef struct {
+ void* dest;
+ Py_ssize_t destSize;
+ BufferSegment* segments;
+ Py_ssize_t segmentsSize;
+} DestBuffer;
+
+typedef enum {
+ WorkerError_none = 0,
+ WorkerError_zstd = 1,
+ WorkerError_no_memory = 2,
+ WorkerError_nospace = 3,
+} WorkerError;
+
+/**
+ * Holds state for an individual worker performing multi_compress_to_buffer work.
+ */
+typedef struct {
+ /* Used for compression. */
+ ZSTD_CCtx* cctx;
+
+ /* What to compress. */
+ DataSource* sources;
+ Py_ssize_t sourcesSize;
+ Py_ssize_t startOffset;
+ Py_ssize_t endOffset;
+ unsigned long long totalSourceSize;
+
+ /* Result storage. */
+ DestBuffer* destBuffers;
+ Py_ssize_t destCount;
+
+ /* Error tracking. */
+ WorkerError error;
+ size_t zresult;
+ Py_ssize_t errorOffset;
+} WorkerState;
+
+static void compress_worker(WorkerState* state) {
+ Py_ssize_t inputOffset = state->startOffset;
+ Py_ssize_t remainingItems = state->endOffset - state->startOffset + 1;
+ Py_ssize_t currentBufferStartOffset = state->startOffset;
+ size_t zresult;
+ void* newDest;
+ size_t allocationSize;
+ size_t boundSize;
+ Py_ssize_t destOffset = 0;
+ DataSource* sources = state->sources;
+ DestBuffer* destBuffer;
+
+ assert(!state->destBuffers);
+ assert(0 == state->destCount);
+
+ /*
+ * The total size of the compressed data is unknown until we actually
+ * compress data. That means we can't pre-allocate the exact size we need.
+ *
+ * There is a cost to every allocation and reallocation. So, it is in our
+ * interest to minimize the number of allocations.
+ *
+ * There is also a cost to too few allocations. If allocations are too
+ * large they may fail. If buffers are shared and all inputs become
+ * irrelevant at different lifetimes, then a reference to one segment
+ * in the buffer will keep the entire buffer alive. This leads to excessive
+ * memory usage.
+ *
+ * Our current strategy is to assume a compression ratio of 16:1 and
+ * allocate buffers of that size, rounded up to the nearest power of 2
+ * (because computers like round numbers). That ratio is greater than what
+ * most inputs achieve. This is by design: we don't want to over-allocate.
+ * But we don't want to under-allocate and lead to too many buffers either.
+ */
+
+ state->destCount = 1;
+
+ state->destBuffers = calloc(1, sizeof(DestBuffer));
+ if (NULL == state->destBuffers) {
+ state->error = WorkerError_no_memory;
+ return;
+ }
+
+ destBuffer = &state->destBuffers[state->destCount - 1];
+
+ /*
+ * Rather than track bounds and grow the segments buffer, allocate space
+ * to hold remaining items then truncate when we're done with it.
+ */
+ destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
+ if (NULL == destBuffer->segments) {
+ state->error = WorkerError_no_memory;
+ return;
+ }
+
+ destBuffer->segmentsSize = remainingItems;
+
+ assert(state->totalSourceSize <= SIZE_MAX);
+ allocationSize = roundpow2((size_t)state->totalSourceSize >> 4);
+
+ /* If the maximum size of the output is larger than that, round up. */
+ boundSize = ZSTD_compressBound(sources[inputOffset].sourceSize);
+
+ if (boundSize > allocationSize) {
+ allocationSize = roundpow2(boundSize);
+ }
+
+ destBuffer->dest = malloc(allocationSize);
+ if (NULL == destBuffer->dest) {
+ state->error = WorkerError_no_memory;
+ return;
+ }
+
+ destBuffer->destSize = allocationSize;
+
+ for (inputOffset = state->startOffset; inputOffset <= state->endOffset; inputOffset++) {
+ void* source = sources[inputOffset].sourceData;
+ size_t sourceSize = sources[inputOffset].sourceSize;
+ size_t destAvailable;
+ void* dest;
+ ZSTD_outBuffer opOutBuffer;
+ ZSTD_inBuffer opInBuffer;
+
+ destAvailable = destBuffer->destSize - destOffset;
+ boundSize = ZSTD_compressBound(sourceSize);
+
+ /*
+ * Not enough space in current buffer to hold largest compressed output.
+ * So allocate and switch to a new output buffer.
+ */
+ if (boundSize > destAvailable) {
+ /*
+ * The downsizing of the existing buffer is optional. It should be cheap
+ * (unlike growing). So we just do it.
+ */
+ if (destAvailable) {
+ newDest = realloc(destBuffer->dest, destOffset);
+ if (NULL == newDest) {
+ state->error = WorkerError_no_memory;
+ return;
+ }
+
+ destBuffer->dest = newDest;
+ destBuffer->destSize = destOffset;
+ }
+
+ /* Truncate segments buffer. */
+ newDest = realloc(destBuffer->segments,
+ (inputOffset - currentBufferStartOffset + 1) * sizeof(BufferSegment));
+ if (NULL == newDest) {
+ state->error = WorkerError_no_memory;
+ return;
+ }
+
+ destBuffer->segments = newDest;
+ destBuffer->segmentsSize = inputOffset - currentBufferStartOffset;
+
+ /* Grow space for new struct. */
+ /* TODO consider over-allocating so we don't do this every time. */
+ newDest = realloc(state->destBuffers, (state->destCount + 1) * sizeof(DestBuffer));
+ if (NULL == newDest) {
+ state->error = WorkerError_no_memory;
+ return;
+ }
+
+ state->destBuffers = newDest;
+ state->destCount++;
+
+ destBuffer = &state->destBuffers[state->destCount - 1];
+
+ /* Don't take any chances with non-NULL pointers. */
+ memset(destBuffer, 0, sizeof(DestBuffer));
+
+ /**
+ * We could dynamically update allocation size based on work done so far.
+ * For now, keep is simple.
+ */
+ assert(state->totalSourceSize <= SIZE_MAX);
+ allocationSize = roundpow2((size_t)state->totalSourceSize >> 4);
+
+ if (boundSize > allocationSize) {
+ allocationSize = roundpow2(boundSize);
+ }
+
+ destBuffer->dest = malloc(allocationSize);
+ if (NULL == destBuffer->dest) {
+ state->error = WorkerError_no_memory;
+ return;
+ }
+
+ destBuffer->destSize = allocationSize;
+ destAvailable = allocationSize;
+ destOffset = 0;
+
+ destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
+ if (NULL == destBuffer->segments) {
+ state->error = WorkerError_no_memory;
+ return;
+ }
+
+ destBuffer->segmentsSize = remainingItems;
+ currentBufferStartOffset = inputOffset;
+ }
+
+ dest = (char*)destBuffer->dest + destOffset;
+
+ opInBuffer.src = source;
+ opInBuffer.size = sourceSize;
+ opInBuffer.pos = 0;
+
+ opOutBuffer.dst = dest;
+ opOutBuffer.size = destAvailable;
+ opOutBuffer.pos = 0;
+
+ zresult = ZSTD_CCtx_setPledgedSrcSize(state->cctx, sourceSize);
+ if (ZSTD_isError(zresult)) {
+ state->error = WorkerError_zstd;
+ state->zresult = zresult;
+ state->errorOffset = inputOffset;
+ break;
+ }
+
+ zresult = ZSTD_compressStream2(state->cctx, &opOutBuffer, &opInBuffer, ZSTD_e_end);
+ if (ZSTD_isError(zresult)) {
+ state->error = WorkerError_zstd;
+ state->zresult = zresult;
+ state->errorOffset = inputOffset;
+ break;
+ }
+ else if (zresult) {
+ state->error = WorkerError_nospace;
+ state->errorOffset = inputOffset;
+ break;
+ }
+
+ destBuffer->segments[inputOffset - currentBufferStartOffset].offset = destOffset;
+ destBuffer->segments[inputOffset - currentBufferStartOffset].length = opOutBuffer.pos;
+
+ destOffset += opOutBuffer.pos;
+ remainingItems--;
+ }
+
+ if (destBuffer->destSize > destOffset) {
+ newDest = realloc(destBuffer->dest, destOffset);
+ if (NULL == newDest) {
+ state->error = WorkerError_no_memory;
+ return;
+ }
+
+ destBuffer->dest = newDest;
+ destBuffer->destSize = destOffset;
+ }
+}
+
+ZstdBufferWithSegmentsCollection* compress_from_datasources(ZstdCompressor* compressor,
+ DataSources* sources, Py_ssize_t threadCount) {
+ unsigned long long bytesPerWorker;
+ POOL_ctx* pool = NULL;
+ WorkerState* workerStates = NULL;
+ Py_ssize_t i;
+ unsigned long long workerBytes = 0;
+ Py_ssize_t workerStartOffset = 0;
+ Py_ssize_t currentThread = 0;
+ int errored = 0;
+ Py_ssize_t segmentsCount = 0;
+ Py_ssize_t segmentIndex;
+ PyObject* segmentsArg = NULL;
+ ZstdBufferWithSegments* buffer;
+ ZstdBufferWithSegmentsCollection* result = NULL;
+
+ assert(sources->sourcesSize > 0);
+ assert(sources->totalSourceSize > 0);
+ assert(threadCount >= 1);
+
+ /* More threads than inputs makes no sense. */
+ threadCount = sources->sourcesSize < threadCount ? sources->sourcesSize
+ : threadCount;
+
+ /* TODO lower thread count when input size is too small and threads would add
+ overhead. */
+
+ workerStates = PyMem_Malloc(threadCount * sizeof(WorkerState));
+ if (NULL == workerStates) {
+ PyErr_NoMemory();
+ goto finally;
+ }
+
+ memset(workerStates, 0, threadCount * sizeof(WorkerState));
+
+ if (threadCount > 1) {
+ pool = POOL_create(threadCount, 1);
+ if (NULL == pool) {
+ PyErr_SetString(ZstdError, "could not initialize zstd thread pool");
+ goto finally;
+ }
+ }
+
+ bytesPerWorker = sources->totalSourceSize / threadCount;
+
+ for (i = 0; i < threadCount; i++) {
+ size_t zresult;
+
+ workerStates[i].cctx = ZSTD_createCCtx();
+ if (!workerStates[i].cctx) {
+ PyErr_NoMemory();
+ goto finally;
+ }
+
+ zresult = ZSTD_CCtx_setParametersUsingCCtxParams(workerStates[i].cctx,
+ compressor->params);
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "could not set compression parameters: %s",
+ ZSTD_getErrorName(zresult));
+ goto finally;
+ }
+
+ if (compressor->dict) {
+ if (compressor->dict->cdict) {
+ zresult = ZSTD_CCtx_refCDict(workerStates[i].cctx, compressor->dict->cdict);
+ }
+ else {
+ zresult = ZSTD_CCtx_loadDictionary_advanced(
+ workerStates[i].cctx,
+ compressor->dict->dictData,
+ compressor->dict->dictSize,
+ ZSTD_dlm_byRef,
+ compressor->dict->dictType);
+ }
+
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "could not load compression dictionary: %s",
+ ZSTD_getErrorName(zresult));
+ goto finally;
+ }
+
+ }
+
+ workerStates[i].sources = sources->sources;
+ workerStates[i].sourcesSize = sources->sourcesSize;
+ }
+
+ Py_BEGIN_ALLOW_THREADS
+ for (i = 0; i < sources->sourcesSize; i++) {
+ workerBytes += sources->sources[i].sourceSize;
+
+ /*
+ * The last worker/thread needs to handle all remaining work. Don't
+ * trigger it prematurely. Defer to the block outside of the loop
+ * to run the last worker/thread. But do still process this loop
+ * so workerBytes is correct.
+ */
+ if (currentThread == threadCount - 1) {
+ continue;
+ }
+
+ if (workerBytes >= bytesPerWorker) {
+ assert(currentThread < threadCount);
+ workerStates[currentThread].totalSourceSize = workerBytes;
+ workerStates[currentThread].startOffset = workerStartOffset;
+ workerStates[currentThread].endOffset = i;
+
+ if (threadCount > 1) {
+ POOL_add(pool, (POOL_function)compress_worker, &workerStates[currentThread]);
+ }
+ else {
+ compress_worker(&workerStates[currentThread]);
+ }
+
+ currentThread++;
+ workerStartOffset = i + 1;
+ workerBytes = 0;
+ }
+ }
+
+ if (workerBytes) {
+ assert(currentThread < threadCount);
+ workerStates[currentThread].totalSourceSize = workerBytes;
+ workerStates[currentThread].startOffset = workerStartOffset;
+ workerStates[currentThread].endOffset = sources->sourcesSize - 1;
+
+ if (threadCount > 1) {
+ POOL_add(pool, (POOL_function)compress_worker, &workerStates[currentThread]);
+ }
+ else {
+ compress_worker(&workerStates[currentThread]);
+ }
+ }
+
+ if (threadCount > 1) {
+ POOL_free(pool);
+ pool = NULL;
+ }
+
+ Py_END_ALLOW_THREADS
+
+ for (i = 0; i < threadCount; i++) {
+ switch (workerStates[i].error) {
+ case WorkerError_no_memory:
+ PyErr_NoMemory();
+ errored = 1;
+ break;
+
+ case WorkerError_zstd:
+ PyErr_Format(ZstdError, "error compressing item %zd: %s",
+ workerStates[i].errorOffset, ZSTD_getErrorName(workerStates[i].zresult));
+ errored = 1;
+ break;
+
+ case WorkerError_nospace:
+ PyErr_Format(ZstdError, "error compressing item %zd: not enough space in output",
+ workerStates[i].errorOffset);
+ errored = 1;
+ break;
+
+ default:
+ ;
+ }
+
+ if (errored) {
+ break;
+ }
+
+ }
+
+ if (errored) {
+ goto finally;
+ }
+
+ segmentsCount = 0;
+ for (i = 0; i < threadCount; i++) {
+ WorkerState* state = &workerStates[i];
+ segmentsCount += state->destCount;
+ }
+
+ segmentsArg = PyTuple_New(segmentsCount);
+ if (NULL == segmentsArg) {
+ goto finally;
+ }
+
+ segmentIndex = 0;
+
+ for (i = 0; i < threadCount; i++) {
+ Py_ssize_t j;
+ WorkerState* state = &workerStates[i];
+
+ for (j = 0; j < state->destCount; j++) {
+ DestBuffer* destBuffer = &state->destBuffers[j];
+ buffer = BufferWithSegments_FromMemory(destBuffer->dest, destBuffer->destSize,
+ destBuffer->segments, destBuffer->segmentsSize);
+
+ if (NULL == buffer) {
+ goto finally;
+ }
+
+ /* Tell instance to use free() instsead of PyMem_Free(). */
+ buffer->useFree = 1;
+
+ /*
+ * BufferWithSegments_FromMemory takes ownership of the backing memory.
+ * Unset it here so it doesn't get freed below.
+ */
+ destBuffer->dest = NULL;
+ destBuffer->segments = NULL;
+
+ PyTuple_SET_ITEM(segmentsArg, segmentIndex++, (PyObject*)buffer);
+ }
+ }
+
+ result = (ZstdBufferWithSegmentsCollection*)PyObject_CallObject(
+ (PyObject*)&ZstdBufferWithSegmentsCollectionType, segmentsArg);
+
+finally:
+ Py_CLEAR(segmentsArg);
+
+ if (pool) {
+ POOL_free(pool);
+ }
+
+ if (workerStates) {
+ Py_ssize_t j;
+
+ for (i = 0; i < threadCount; i++) {
+ WorkerState state = workerStates[i];
+
+ if (state.cctx) {
+ ZSTD_freeCCtx(state.cctx);
+ }
+
+ /* malloc() is used in worker thread. */
+
+ for (j = 0; j < state.destCount; j++) {
+ if (state.destBuffers) {
+ free(state.destBuffers[j].dest);
+ free(state.destBuffers[j].segments);
+ }
+ }
+
+
+ free(state.destBuffers);
+ }
+
+ PyMem_Free(workerStates);
+ }
+
+ return result;
+}
+
+PyDoc_STRVAR(ZstdCompressor_multi_compress_to_buffer__doc__,
+"Compress multiple pieces of data as a single operation\n"
+"\n"
+"Receives a ``BufferWithSegmentsCollection``, a ``BufferWithSegments``, or\n"
+"a list of bytes like objects holding data to compress.\n"
+"\n"
+"Returns a ``BufferWithSegmentsCollection`` holding compressed data.\n"
+"\n"
+"This function is optimized to perform multiple compression operations as\n"
+"as possible with as little overhead as possbile.\n"
+);
+
+static ZstdBufferWithSegmentsCollection* ZstdCompressor_multi_compress_to_buffer(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
+ static char* kwlist[] = {
+ "data",
+ "threads",
+ NULL
+ };
+
+ PyObject* data;
+ int threads = 0;
+ Py_buffer* dataBuffers = NULL;
+ DataSources sources;
+ Py_ssize_t i;
+ Py_ssize_t sourceCount = 0;
+ ZstdBufferWithSegmentsCollection* result = NULL;
+
+ memset(&sources, 0, sizeof(sources));
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|i:multi_compress_to_buffer", kwlist,
+ &data, &threads)) {
+ return NULL;
+ }
+
+ if (threads < 0) {
+ threads = cpu_count();
+ }
+
+ if (threads < 2) {
+ threads = 1;
+ }
+
+ if (PyObject_TypeCheck(data, &ZstdBufferWithSegmentsType)) {
+ ZstdBufferWithSegments* buffer = (ZstdBufferWithSegments*)data;
+
+ sources.sources = PyMem_Malloc(buffer->segmentCount * sizeof(DataSource));
+ if (NULL == sources.sources) {
+ PyErr_NoMemory();
+ goto finally;
+ }
+
+ for (i = 0; i < buffer->segmentCount; i++) {
+ if (buffer->segments[i].length > SIZE_MAX) {
+ PyErr_Format(PyExc_ValueError,
+ "buffer segment %zd is too large for this platform", i);
+ goto finally;
+ }
+
+ sources.sources[i].sourceData = (char*)buffer->data + buffer->segments[i].offset;
+ sources.sources[i].sourceSize = (size_t)buffer->segments[i].length;
+ sources.totalSourceSize += buffer->segments[i].length;
+ }
+
+ sources.sourcesSize = buffer->segmentCount;
+ }
+ else if (PyObject_TypeCheck(data, &ZstdBufferWithSegmentsCollectionType)) {
+ Py_ssize_t j;
+ Py_ssize_t offset = 0;
+ ZstdBufferWithSegments* buffer;
+ ZstdBufferWithSegmentsCollection* collection = (ZstdBufferWithSegmentsCollection*)data;
+
+ sourceCount = BufferWithSegmentsCollection_length(collection);
+
+ sources.sources = PyMem_Malloc(sourceCount * sizeof(DataSource));
+ if (NULL == sources.sources) {
+ PyErr_NoMemory();
+ goto finally;
+ }
+
+ for (i = 0; i < collection->bufferCount; i++) {
+ buffer = collection->buffers[i];
+
+ for (j = 0; j < buffer->segmentCount; j++) {
+ if (buffer->segments[j].length > SIZE_MAX) {
+ PyErr_Format(PyExc_ValueError,
+ "buffer segment %zd in buffer %zd is too large for this platform",
+ j, i);
+ goto finally;
+ }
+
+ sources.sources[offset].sourceData = (char*)buffer->data + buffer->segments[j].offset;
+ sources.sources[offset].sourceSize = (size_t)buffer->segments[j].length;
+ sources.totalSourceSize += buffer->segments[j].length;
+
+ offset++;
+ }
+ }
+
+ sources.sourcesSize = sourceCount;
+ }
+ else if (PyList_Check(data)) {
+ sourceCount = PyList_GET_SIZE(data);
+
+ sources.sources = PyMem_Malloc(sourceCount * sizeof(DataSource));
+ if (NULL == sources.sources) {
+ PyErr_NoMemory();
+ goto finally;
+ }
+
+ dataBuffers = PyMem_Malloc(sourceCount * sizeof(Py_buffer));
+ if (NULL == dataBuffers) {
+ PyErr_NoMemory();
+ goto finally;
+ }
+
+ memset(dataBuffers, 0, sourceCount * sizeof(Py_buffer));
+
+ for (i = 0; i < sourceCount; i++) {
+ if (0 != PyObject_GetBuffer(PyList_GET_ITEM(data, i),
+ &dataBuffers[i], PyBUF_CONTIG_RO)) {
+ PyErr_Clear();
+ PyErr_Format(PyExc_TypeError, "item %zd not a bytes like object", i);
+ goto finally;
+ }
+
+ sources.sources[i].sourceData = dataBuffers[i].buf;
+ sources.sources[i].sourceSize = dataBuffers[i].len;
+ sources.totalSourceSize += dataBuffers[i].len;
+ }
+
+ sources.sourcesSize = sourceCount;
+ }
+ else {
+ PyErr_SetString(PyExc_TypeError, "argument must be list of BufferWithSegments");
+ goto finally;
+ }
+
+ if (0 == sources.sourcesSize) {
+ PyErr_SetString(PyExc_ValueError, "no source elements found");
+ goto finally;
+ }
+
+ if (0 == sources.totalSourceSize) {
+ PyErr_SetString(PyExc_ValueError, "source elements are empty");
+ goto finally;
+ }
+
+ if (sources.totalSourceSize > SIZE_MAX) {
+ PyErr_SetString(PyExc_ValueError, "sources are too large for this platform");
+ goto finally;
+ }
+
+ result = compress_from_datasources(self, &sources, threads);
+
+finally:
+ PyMem_Free(sources.sources);
+
+ if (dataBuffers) {
+ for (i = 0; i < sourceCount; i++) {
+ PyBuffer_Release(&dataBuffers[i]);
+ }
+
+ PyMem_Free(dataBuffers);
+ }
+
+ return result;
+}
+
+static PyMethodDef ZstdCompressor_methods[] = {
+ { "chunker", (PyCFunction)ZstdCompressor_chunker,
+ METH_VARARGS | METH_KEYWORDS, ZstdCompressor_chunker__doc__ },
+ { "compress", (PyCFunction)ZstdCompressor_compress,
+ METH_VARARGS | METH_KEYWORDS, ZstdCompressor_compress__doc__ },
+ { "compressobj", (PyCFunction)ZstdCompressor_compressobj,
+ METH_VARARGS | METH_KEYWORDS, ZstdCompressionObj__doc__ },
+ { "copy_stream", (PyCFunction)ZstdCompressor_copy_stream,
+ METH_VARARGS | METH_KEYWORDS, ZstdCompressor_copy_stream__doc__ },
+ { "stream_reader", (PyCFunction)ZstdCompressor_stream_reader,
+ METH_VARARGS | METH_KEYWORDS, ZstdCompressor_stream_reader__doc__ },
+ { "stream_writer", (PyCFunction)ZstdCompressor_stream_writer,
+ METH_VARARGS | METH_KEYWORDS, ZstdCompressor_stream_writer___doc__ },
+ { "read_to_iter", (PyCFunction)ZstdCompressor_read_to_iter,
+ METH_VARARGS | METH_KEYWORDS, ZstdCompressor_read_to_iter__doc__ },
+ /* TODO Remove deprecated API */
+ { "read_from", (PyCFunction)ZstdCompressor_read_to_iter,
+ METH_VARARGS | METH_KEYWORDS, ZstdCompressor_read_to_iter__doc__ },
+ /* TODO remove deprecated API */
+ { "write_to", (PyCFunction)ZstdCompressor_stream_writer,
+ METH_VARARGS | METH_KEYWORDS, ZstdCompressor_stream_writer___doc__ },
+ { "multi_compress_to_buffer", (PyCFunction)ZstdCompressor_multi_compress_to_buffer,
+ METH_VARARGS | METH_KEYWORDS, ZstdCompressor_multi_compress_to_buffer__doc__ },
+ { "memory_size", (PyCFunction)ZstdCompressor_memory_size,
+ METH_NOARGS, ZstdCompressor_memory_size__doc__ },
+ { "frame_progression", (PyCFunction)ZstdCompressor_frame_progression,
+ METH_NOARGS, ZstdCompressor_frame_progression__doc__ },
+ { NULL, NULL }
+};
+
+PyTypeObject ZstdCompressorType = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "zstd.ZstdCompressor", /* tp_name */
+ sizeof(ZstdCompressor), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ (destructor)ZstdCompressor_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
+ ZstdCompressor__doc__, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
+ ZstdCompressor_methods, /* tp_methods */
+ 0, /* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ (initproc)ZstdCompressor_init, /* tp_init */
+ 0, /* tp_alloc */
+ PyType_GenericNew, /* tp_new */
+};
+
+void compressor_module_init(PyObject* mod) {
+ Py_TYPE(&ZstdCompressorType) = &PyType_Type;
+ if (PyType_Ready(&ZstdCompressorType) < 0) {
+ return;
+ }
+
+ Py_INCREF((PyObject*)&ZstdCompressorType);
+ PyModule_AddObject(mod, "ZstdCompressor",
+ (PyObject*)&ZstdCompressorType);
+}
diff --git a/contrib/python/zstandard/py2/c-ext/compressoriterator.c b/contrib/python/zstandard/py2/c-ext/compressoriterator.c
new file mode 100644
index 00000000000..24e31c5a4be
--- /dev/null
+++ b/contrib/python/zstandard/py2/c-ext/compressoriterator.c
@@ -0,0 +1,235 @@
+/**
+* Copyright (c) 2016-present, Gregory Szorc
+* All rights reserved.
+*
+* This software may be modified and distributed under the terms
+* of the BSD license. See the LICENSE file for details.
+*/
+
+#include "python-zstandard.h"
+
+#define min(a, b) (((a) < (b)) ? (a) : (b))
+
+extern PyObject* ZstdError;
+
+PyDoc_STRVAR(ZstdCompressorIterator__doc__,
+"Represents an iterator of compressed data.\n"
+);
+
+static void ZstdCompressorIterator_dealloc(ZstdCompressorIterator* self) {
+ Py_XDECREF(self->readResult);
+ Py_XDECREF(self->compressor);
+ Py_XDECREF(self->reader);
+
+ if (self->buffer.buf) {
+ PyBuffer_Release(&self->buffer);
+ memset(&self->buffer, 0, sizeof(self->buffer));
+ }
+
+ if (self->output.dst) {
+ PyMem_Free(self->output.dst);
+ self->output.dst = NULL;
+ }
+
+ PyObject_Del(self);
+}
+
+static PyObject* ZstdCompressorIterator_iter(PyObject* self) {
+ Py_INCREF(self);
+ return self;
+}
+
+static PyObject* ZstdCompressorIterator_iternext(ZstdCompressorIterator* self) {
+ size_t zresult;
+ PyObject* readResult = NULL;
+ PyObject* chunk;
+ char* readBuffer;
+ Py_ssize_t readSize = 0;
+ Py_ssize_t bufferRemaining;
+
+ if (self->finishedOutput) {
+ PyErr_SetString(PyExc_StopIteration, "output flushed");
+ return NULL;
+ }
+
+feedcompressor:
+
+ /* If we have data left in the input, consume it. */
+ if (self->input.pos < self->input.size) {
+ Py_BEGIN_ALLOW_THREADS
+ zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
+ &self->input, ZSTD_e_continue);
+ Py_END_ALLOW_THREADS
+
+ /* Release the Python object holding the input buffer. */
+ if (self->input.pos == self->input.size) {
+ self->input.src = NULL;
+ self->input.pos = 0;
+ self->input.size = 0;
+ Py_DECREF(self->readResult);
+ self->readResult = NULL;
+ }
+
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
+ return NULL;
+ }
+
+ /* If it produced output data, emit it. */
+ if (self->output.pos) {
+ chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
+ self->output.pos = 0;
+ return chunk;
+ }
+ }
+
+ /* We should never have output data sitting around after a previous call. */
+ assert(self->output.pos == 0);
+
+ /* The code above should have either emitted a chunk and returned or consumed
+ the entire input buffer. So the state of the input buffer is not
+ relevant. */
+ if (!self->finishedInput) {
+ if (self->reader) {
+ readResult = PyObject_CallMethod(self->reader, "read", "I", self->inSize);
+ if (!readResult) {
+ PyErr_SetString(ZstdError, "could not read() from source");
+ return NULL;
+ }
+
+ PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
+ }
+ else {
+ assert(self->buffer.buf);
+
+ /* Only support contiguous C arrays. */
+ assert(self->buffer.strides == NULL && self->buffer.suboffsets == NULL);
+ assert(self->buffer.itemsize == 1);
+
+ readBuffer = (char*)self->buffer.buf + self->bufferOffset;
+ bufferRemaining = self->buffer.len - self->bufferOffset;
+ readSize = min(bufferRemaining, (Py_ssize_t)self->inSize);
+ self->bufferOffset += readSize;
+ }
+
+ if (0 == readSize) {
+ Py_XDECREF(readResult);
+ self->finishedInput = 1;
+ }
+ else {
+ self->readResult = readResult;
+ }
+ }
+
+ /* EOF */
+ if (0 == readSize) {
+ self->input.src = NULL;
+ self->input.size = 0;
+ self->input.pos = 0;
+
+ zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
+ &self->input, ZSTD_e_end);
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "error ending compression stream: %s",
+ ZSTD_getErrorName(zresult));
+ return NULL;
+ }
+
+ assert(self->output.pos);
+
+ if (0 == zresult) {
+ self->finishedOutput = 1;
+ }
+
+ chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
+ self->output.pos = 0;
+ return chunk;
+ }
+
+ /* New data from reader. Feed into compressor. */
+ self->input.src = readBuffer;
+ self->input.size = readSize;
+ self->input.pos = 0;
+
+ Py_BEGIN_ALLOW_THREADS
+ zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
+ &self->input, ZSTD_e_continue);
+ Py_END_ALLOW_THREADS
+
+ /* The input buffer currently points to memory managed by Python
+ (readBuffer). This object was allocated by this function. If it wasn't
+ fully consumed, we need to release it in a subsequent function call.
+ If it is fully consumed, do that now.
+ */
+ if (self->input.pos == self->input.size) {
+ self->input.src = NULL;
+ self->input.pos = 0;
+ self->input.size = 0;
+ Py_XDECREF(self->readResult);
+ self->readResult = NULL;
+ }
+
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
+ return NULL;
+ }
+
+ assert(self->input.pos <= self->input.size);
+
+ /* If we didn't write anything, start the process over. */
+ if (0 == self->output.pos) {
+ goto feedcompressor;
+ }
+
+ chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
+ self->output.pos = 0;
+ return chunk;
+}
+
+PyTypeObject ZstdCompressorIteratorType = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "zstd.ZstdCompressorIterator", /* tp_name */
+ sizeof(ZstdCompressorIterator), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ (destructor)ZstdCompressorIterator_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
+ ZstdCompressorIterator__doc__, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ ZstdCompressorIterator_iter, /* tp_iter */
+ (iternextfunc)ZstdCompressorIterator_iternext, /* tp_iternext */
+ 0, /* tp_methods */
+ 0, /* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ 0, /* tp_init */
+ 0, /* tp_alloc */
+ PyType_GenericNew, /* tp_new */
+};
+
+void compressoriterator_module_init(PyObject* mod) {
+ Py_TYPE(&ZstdCompressorIteratorType) = &PyType_Type;
+ if (PyType_Ready(&ZstdCompressorIteratorType) < 0) {
+ return;
+ }
+}
diff --git a/contrib/python/zstandard/py2/c-ext/constants.c b/contrib/python/zstandard/py2/c-ext/constants.c
new file mode 100644
index 00000000000..bafdf1e469a
--- /dev/null
+++ b/contrib/python/zstandard/py2/c-ext/constants.c
@@ -0,0 +1,109 @@
+/**
+* Copyright (c) 2016-present, Gregory Szorc
+* All rights reserved.
+*
+* This software may be modified and distributed under the terms
+* of the BSD license. See the LICENSE file for details.
+*/
+
+#include "python-zstandard.h"
+
+extern PyObject* ZstdError;
+
+static char frame_header[] = {
+ '\x28',
+ '\xb5',
+ '\x2f',
+ '\xfd',
+};
+
+void constants_module_init(PyObject* mod) {
+ PyObject* version;
+ PyObject* zstdVersion;
+ PyObject* frameHeader;
+
+#if PY_MAJOR_VERSION >= 3
+ version = PyUnicode_FromString(PYTHON_ZSTANDARD_VERSION);
+#else
+ version = PyString_FromString(PYTHON_ZSTANDARD_VERSION);
+#endif
+ PyModule_AddObject(mod, "__version__", version);
+
+ ZstdError = PyErr_NewException("zstd.ZstdError", NULL, NULL);
+ PyModule_AddObject(mod, "ZstdError", ZstdError);
+
+ PyModule_AddIntConstant(mod, "FLUSH_BLOCK", 0);
+ PyModule_AddIntConstant(mod, "FLUSH_FRAME", 1);
+
+ PyModule_AddIntConstant(mod, "COMPRESSOBJ_FLUSH_FINISH", compressorobj_flush_finish);
+ PyModule_AddIntConstant(mod, "COMPRESSOBJ_FLUSH_BLOCK", compressorobj_flush_block);
+
+ /* For now, the version is a simple tuple instead of a dedicated type. */
+ zstdVersion = PyTuple_New(3);
+ PyTuple_SetItem(zstdVersion, 0, PyLong_FromLong(ZSTD_VERSION_MAJOR));
+ PyTuple_SetItem(zstdVersion, 1, PyLong_FromLong(ZSTD_VERSION_MINOR));
+ PyTuple_SetItem(zstdVersion, 2, PyLong_FromLong(ZSTD_VERSION_RELEASE));
+ PyModule_AddObject(mod, "ZSTD_VERSION", zstdVersion);
+
+ frameHeader = PyBytes_FromStringAndSize(frame_header, sizeof(frame_header));
+ if (frameHeader) {
+ PyModule_AddObject(mod, "FRAME_HEADER", frameHeader);
+ }
+ else {
+ PyErr_Format(PyExc_ValueError, "could not create frame header object");
+ }
+
+ PyModule_AddObject(mod, "CONTENTSIZE_UNKNOWN",
+ PyLong_FromUnsignedLongLong(ZSTD_CONTENTSIZE_UNKNOWN));
+ PyModule_AddObject(mod, "CONTENTSIZE_ERROR",
+ PyLong_FromUnsignedLongLong(ZSTD_CONTENTSIZE_ERROR));
+
+ PyModule_AddIntConstant(mod, "MAX_COMPRESSION_LEVEL", ZSTD_maxCLevel());
+ PyModule_AddIntConstant(mod, "COMPRESSION_RECOMMENDED_INPUT_SIZE",
+ (long)ZSTD_CStreamInSize());
+ PyModule_AddIntConstant(mod, "COMPRESSION_RECOMMENDED_OUTPUT_SIZE",
+ (long)ZSTD_CStreamOutSize());
+ PyModule_AddIntConstant(mod, "DECOMPRESSION_RECOMMENDED_INPUT_SIZE",
+ (long)ZSTD_DStreamInSize());
+ PyModule_AddIntConstant(mod, "DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE",
+ (long)ZSTD_DStreamOutSize());
+
+ PyModule_AddIntConstant(mod, "MAGIC_NUMBER", ZSTD_MAGICNUMBER);
+ PyModule_AddIntConstant(mod, "BLOCKSIZELOG_MAX", ZSTD_BLOCKSIZELOG_MAX);
+ PyModule_AddIntConstant(mod, "BLOCKSIZE_MAX", ZSTD_BLOCKSIZE_MAX);
+ PyModule_AddIntConstant(mod, "WINDOWLOG_MIN", ZSTD_WINDOWLOG_MIN);
+ PyModule_AddIntConstant(mod, "WINDOWLOG_MAX", ZSTD_WINDOWLOG_MAX);
+ PyModule_AddIntConstant(mod, "CHAINLOG_MIN", ZSTD_CHAINLOG_MIN);
+ PyModule_AddIntConstant(mod, "CHAINLOG_MAX", ZSTD_CHAINLOG_MAX);
+ PyModule_AddIntConstant(mod, "HASHLOG_MIN", ZSTD_HASHLOG_MIN);
+ PyModule_AddIntConstant(mod, "HASHLOG_MAX", ZSTD_HASHLOG_MAX);
+ PyModule_AddIntConstant(mod, "SEARCHLOG_MIN", ZSTD_SEARCHLOG_MIN);
+ PyModule_AddIntConstant(mod, "SEARCHLOG_MAX", ZSTD_SEARCHLOG_MAX);
+ PyModule_AddIntConstant(mod, "MINMATCH_MIN", ZSTD_MINMATCH_MIN);
+ PyModule_AddIntConstant(mod, "MINMATCH_MAX", ZSTD_MINMATCH_MAX);
+ /* TODO SEARCHLENGTH_* is deprecated. */
+ PyModule_AddIntConstant(mod, "SEARCHLENGTH_MIN", ZSTD_MINMATCH_MIN);
+ PyModule_AddIntConstant(mod, "SEARCHLENGTH_MAX", ZSTD_MINMATCH_MAX);
+ PyModule_AddIntConstant(mod, "TARGETLENGTH_MIN", ZSTD_TARGETLENGTH_MIN);
+ PyModule_AddIntConstant(mod, "TARGETLENGTH_MAX", ZSTD_TARGETLENGTH_MAX);
+ PyModule_AddIntConstant(mod, "LDM_MINMATCH_MIN", ZSTD_LDM_MINMATCH_MIN);
+ PyModule_AddIntConstant(mod, "LDM_MINMATCH_MAX", ZSTD_LDM_MINMATCH_MAX);
+ PyModule_AddIntConstant(mod, "LDM_BUCKETSIZELOG_MAX", ZSTD_LDM_BUCKETSIZELOG_MAX);
+
+ PyModule_AddIntConstant(mod, "STRATEGY_FAST", ZSTD_fast);
+ PyModule_AddIntConstant(mod, "STRATEGY_DFAST", ZSTD_dfast);
+ PyModule_AddIntConstant(mod, "STRATEGY_GREEDY", ZSTD_greedy);
+ PyModule_AddIntConstant(mod, "STRATEGY_LAZY", ZSTD_lazy);
+ PyModule_AddIntConstant(mod, "STRATEGY_LAZY2", ZSTD_lazy2);
+ PyModule_AddIntConstant(mod, "STRATEGY_BTLAZY2", ZSTD_btlazy2);
+ PyModule_AddIntConstant(mod, "STRATEGY_BTOPT", ZSTD_btopt);
+ PyModule_AddIntConstant(mod, "STRATEGY_BTULTRA", ZSTD_btultra);
+ PyModule_AddIntConstant(mod, "STRATEGY_BTULTRA2", ZSTD_btultra2);
+
+ PyModule_AddIntConstant(mod, "DICT_TYPE_AUTO", ZSTD_dct_auto);
+ PyModule_AddIntConstant(mod, "DICT_TYPE_RAWCONTENT", ZSTD_dct_rawContent);
+ PyModule_AddIntConstant(mod, "DICT_TYPE_FULLDICT", ZSTD_dct_fullDict);
+
+ PyModule_AddIntConstant(mod, "FORMAT_ZSTD1", ZSTD_f_zstd1);
+ PyModule_AddIntConstant(mod, "FORMAT_ZSTD1_MAGICLESS", ZSTD_f_zstd1_magicless);
+}
diff --git a/contrib/python/zstandard/py2/c-ext/decompressionreader.c b/contrib/python/zstandard/py2/c-ext/decompressionreader.c
new file mode 100644
index 00000000000..792852f2abe
--- /dev/null
+++ b/contrib/python/zstandard/py2/c-ext/decompressionreader.c
@@ -0,0 +1,781 @@
+/**
+* Copyright (c) 2017-present, Gregory Szorc
+* All rights reserved.
+*
+* This software may be modified and distributed under the terms
+* of the BSD license. See the LICENSE file for details.
+*/
+
+#include "python-zstandard.h"
+
+extern PyObject* ZstdError;
+
+static void set_unsupported_operation(void) {
+ PyObject* iomod;
+ PyObject* exc;
+
+ iomod = PyImport_ImportModule("io");
+ if (NULL == iomod) {
+ return;
+ }
+
+ exc = PyObject_GetAttrString(iomod, "UnsupportedOperation");
+ if (NULL == exc) {
+ Py_DECREF(iomod);
+ return;
+ }
+
+ PyErr_SetNone(exc);
+ Py_DECREF(exc);
+ Py_DECREF(iomod);
+}
+
+static void reader_dealloc(ZstdDecompressionReader* self) {
+ Py_XDECREF(self->decompressor);
+ Py_XDECREF(self->reader);
+
+ if (self->buffer.buf) {
+ PyBuffer_Release(&self->buffer);
+ }
+
+ PyObject_Del(self);
+}
+
+static ZstdDecompressionReader* reader_enter(ZstdDecompressionReader* self) {
+ if (self->entered) {
+ PyErr_SetString(PyExc_ValueError, "cannot __enter__ multiple times");
+ return NULL;
+ }
+
+ self->entered = 1;
+
+ Py_INCREF(self);
+ return self;
+}
+
+static PyObject* reader_exit(ZstdDecompressionReader* self, PyObject* args) {
+ PyObject* exc_type;
+ PyObject* exc_value;
+ PyObject* exc_tb;
+
+ if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) {
+ return NULL;
+ }
+
+ self->entered = 0;
+ self->closed = 1;
+
+ /* Release resources. */
+ Py_CLEAR(self->reader);
+ if (self->buffer.buf) {
+ PyBuffer_Release(&self->buffer);
+ memset(&self->buffer, 0, sizeof(self->buffer));
+ }
+
+ Py_CLEAR(self->decompressor);
+
+ Py_RETURN_FALSE;
+}
+
+static PyObject* reader_readable(PyObject* self) {
+ Py_RETURN_TRUE;
+}
+
+static PyObject* reader_writable(PyObject* self) {
+ Py_RETURN_FALSE;
+}
+
+static PyObject* reader_seekable(PyObject* self) {
+ Py_RETURN_TRUE;
+}
+
+static PyObject* reader_close(ZstdDecompressionReader* self) {
+ self->closed = 1;
+ Py_RETURN_NONE;
+}
+
+static PyObject* reader_flush(PyObject* self) {
+ Py_RETURN_NONE;
+}
+
+static PyObject* reader_isatty(PyObject* self) {
+ Py_RETURN_FALSE;
+}
+
+/**
+ * Read available input.
+ *
+ * Returns 0 if no data was added to input.
+ * Returns 1 if new input data is available.
+ * Returns -1 on error and sets a Python exception as a side-effect.
+ */
+int read_decompressor_input(ZstdDecompressionReader* self) {
+ if (self->finishedInput) {
+ return 0;
+ }
+
+ if (self->input.pos != self->input.size) {
+ return 0;
+ }
+
+ if (self->reader) {
+ Py_buffer buffer;
+
+ assert(self->readResult == NULL);
+ self->readResult = PyObject_CallMethod(self->reader, "read",
+ "k", self->readSize);
+ if (NULL == self->readResult) {
+ return -1;
+ }
+
+ memset(&buffer, 0, sizeof(buffer));
+
+ if (0 != PyObject_GetBuffer(self->readResult, &buffer, PyBUF_CONTIG_RO)) {
+ return -1;
+ }
+
+ /* EOF */
+ if (0 == buffer.len) {
+ self->finishedInput = 1;
+ Py_CLEAR(self->readResult);
+ }
+ else {
+ self->input.src = buffer.buf;
+ self->input.size = buffer.len;
+ self->input.pos = 0;
+ }
+
+ PyBuffer_Release(&buffer);
+ }
+ else {
+ assert(self->buffer.buf);
+ /*
+ * We should only get here once since expectation is we always
+ * exhaust input buffer before reading again.
+ */
+ assert(self->input.src == NULL);
+
+ self->input.src = self->buffer.buf;
+ self->input.size = self->buffer.len;
+ self->input.pos = 0;
+ }
+
+ return 1;
+}
+
+/**
+ * Decompresses available input into an output buffer.
+ *
+ * Returns 0 if we need more input.
+ * Returns 1 if output buffer should be emitted.
+ * Returns -1 on error and sets a Python exception.
+ */
+int decompress_input(ZstdDecompressionReader* self, ZSTD_outBuffer* output) {
+ size_t zresult;
+
+ if (self->input.pos >= self->input.size) {
+ return 0;
+ }
+
+ Py_BEGIN_ALLOW_THREADS
+ zresult = ZSTD_decompressStream(self->decompressor->dctx, output, &self->input);
+ Py_END_ALLOW_THREADS
+
+ /* Input exhausted. Clear our state tracking. */
+ if (self->input.pos == self->input.size) {
+ memset(&self->input, 0, sizeof(self->input));
+ Py_CLEAR(self->readResult);
+
+ if (self->buffer.buf) {
+ self->finishedInput = 1;
+ }
+ }
+
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "zstd decompress error: %s", ZSTD_getErrorName(zresult));
+ return -1;
+ }
+
+ /* We fulfilled the full read request. Signal to emit. */
+ if (output->pos && output->pos == output->size) {
+ return 1;
+ }
+ /* We're at the end of a frame and we aren't allowed to return data
+ spanning frames. */
+ else if (output->pos && zresult == 0 && !self->readAcrossFrames) {
+ return 1;
+ }
+
+ /* There is more room in the output. Signal to collect more data. */
+ return 0;
+}
+
+static PyObject* reader_read(ZstdDecompressionReader* self, PyObject* args, PyObject* kwargs) {
+ static char* kwlist[] = {
+ "size",
+ NULL
+ };
+
+ Py_ssize_t size = -1;
+ PyObject* result = NULL;
+ char* resultBuffer;
+ Py_ssize_t resultSize;
+ ZSTD_outBuffer output;
+ int decompressResult, readResult;
+
+ if (self->closed) {
+ PyErr_SetString(PyExc_ValueError, "stream is closed");
+ return NULL;
+ }
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n", kwlist, &size)) {
+ return NULL;
+ }
+
+ if (size < -1) {
+ PyErr_SetString(PyExc_ValueError, "cannot read negative amounts less than -1");
+ return NULL;
+ }
+
+ if (size == -1) {
+ return PyObject_CallMethod((PyObject*)self, "readall", NULL);
+ }
+
+ if (self->finishedOutput || size == 0) {
+ return PyBytes_FromStringAndSize("", 0);
+ }
+
+ result = PyBytes_FromStringAndSize(NULL, size);
+ if (NULL == result) {
+ return NULL;
+ }
+
+ PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
+
+ output.dst = resultBuffer;
+ output.size = resultSize;
+ output.pos = 0;
+
+readinput:
+
+ decompressResult = decompress_input(self, &output);
+
+ if (-1 == decompressResult) {
+ Py_XDECREF(result);
+ return NULL;
+ }
+ else if (0 == decompressResult) { }
+ else if (1 == decompressResult) {
+ self->bytesDecompressed += output.pos;
+
+ if (output.pos != output.size) {
+ if (safe_pybytes_resize(&result, output.pos)) {
+ Py_XDECREF(result);
+ return NULL;
+ }
+ }
+ return result;
+ }
+ else {
+ assert(0);
+ }
+
+ readResult = read_decompressor_input(self);
+
+ if (-1 == readResult) {
+ Py_XDECREF(result);
+ return NULL;
+ }
+ else if (0 == readResult) {}
+ else if (1 == readResult) {}
+ else {
+ assert(0);
+ }
+
+ if (self->input.size) {
+ goto readinput;
+ }
+
+ /* EOF */
+ self->bytesDecompressed += output.pos;
+
+ if (safe_pybytes_resize(&result, output.pos)) {
+ Py_XDECREF(result);
+ return NULL;
+ }
+
+ return result;
+}
+
+static PyObject* reader_read1(ZstdDecompressionReader* self, PyObject* args, PyObject* kwargs) {
+ static char* kwlist[] = {
+ "size",
+ NULL
+ };
+
+ Py_ssize_t size = -1;
+ PyObject* result = NULL;
+ char* resultBuffer;
+ Py_ssize_t resultSize;
+ ZSTD_outBuffer output;
+
+ if (self->closed) {
+ PyErr_SetString(PyExc_ValueError, "stream is closed");
+ return NULL;
+ }
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n", kwlist, &size)) {
+ return NULL;
+ }
+
+ if (size < -1) {
+ PyErr_SetString(PyExc_ValueError, "cannot read negative amounts less than -1");
+ return NULL;
+ }
+
+ if (self->finishedOutput || size == 0) {
+ return PyBytes_FromStringAndSize("", 0);
+ }
+
+ if (size == -1) {
+ size = ZSTD_DStreamOutSize();
+ }
+
+ result = PyBytes_FromStringAndSize(NULL, size);
+ if (NULL == result) {
+ return NULL;
+ }
+
+ PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
+
+ output.dst = resultBuffer;
+ output.size = resultSize;
+ output.pos = 0;
+
+ /* read1() is supposed to use at most 1 read() from the underlying stream.
+ * However, we can't satisfy this requirement with decompression due to the
+ * nature of how decompression works. Our strategy is to read + decompress
+ * until we get any output, at which point we return. This satisfies the
+ * intent of the read1() API to limit read operations.
+ */
+ while (!self->finishedInput) {
+ int readResult, decompressResult;
+
+ readResult = read_decompressor_input(self);
+ if (-1 == readResult) {
+ Py_XDECREF(result);
+ return NULL;
+ }
+ else if (0 == readResult || 1 == readResult) { }
+ else {
+ assert(0);
+ }
+
+ decompressResult = decompress_input(self, &output);
+
+ if (-1 == decompressResult) {
+ Py_XDECREF(result);
+ return NULL;
+ }
+ else if (0 == decompressResult || 1 == decompressResult) { }
+ else {
+ assert(0);
+ }
+
+ if (output.pos) {
+ break;
+ }
+ }
+
+ self->bytesDecompressed += output.pos;
+ if (safe_pybytes_resize(&result, output.pos)) {
+ Py_XDECREF(result);
+ return NULL;
+ }
+
+ return result;
+}
+
+static PyObject* reader_readinto(ZstdDecompressionReader* self, PyObject* args) {
+ Py_buffer dest;
+ ZSTD_outBuffer output;
+ int decompressResult, readResult;
+ PyObject* result = NULL;
+
+ if (self->closed) {
+ PyErr_SetString(PyExc_ValueError, "stream is closed");
+ return NULL;
+ }
+
+ if (self->finishedOutput) {
+ return PyLong_FromLong(0);
+ }
+
+ if (!PyArg_ParseTuple(args, "w*:readinto", &dest)) {
+ return NULL;
+ }
+
+ if (!PyBuffer_IsContiguous(&dest, 'C') || dest.ndim > 1) {
+ PyErr_SetString(PyExc_ValueError,
+ "destination buffer should be contiguous and have at most one dimension");
+ goto finally;
+ }
+
+ output.dst = dest.buf;
+ output.size = dest.len;
+ output.pos = 0;
+
+readinput:
+
+ decompressResult = decompress_input(self, &output);
+
+ if (-1 == decompressResult) {
+ goto finally;
+ }
+ else if (0 == decompressResult) { }
+ else if (1 == decompressResult) {
+ self->bytesDecompressed += output.pos;
+ result = PyLong_FromSize_t(output.pos);
+ goto finally;
+ }
+ else {
+ assert(0);
+ }
+
+ readResult = read_decompressor_input(self);
+
+ if (-1 == readResult) {
+ goto finally;
+ }
+ else if (0 == readResult) {}
+ else if (1 == readResult) {}
+ else {
+ assert(0);
+ }
+
+ if (self->input.size) {
+ goto readinput;
+ }
+
+ /* EOF */
+ self->bytesDecompressed += output.pos;
+ result = PyLong_FromSize_t(output.pos);
+
+finally:
+ PyBuffer_Release(&dest);
+
+ return result;
+}
+
+static PyObject* reader_readinto1(ZstdDecompressionReader* self, PyObject* args) {
+ Py_buffer dest;
+ ZSTD_outBuffer output;
+ PyObject* result = NULL;
+
+ if (self->closed) {
+ PyErr_SetString(PyExc_ValueError, "stream is closed");
+ return NULL;
+ }
+
+ if (self->finishedOutput) {
+ return PyLong_FromLong(0);
+ }
+
+ if (!PyArg_ParseTuple(args, "w*:readinto1", &dest)) {
+ return NULL;
+ }
+
+ if (!PyBuffer_IsContiguous(&dest, 'C') || dest.ndim > 1) {
+ PyErr_SetString(PyExc_ValueError,
+ "destination buffer should be contiguous and have at most one dimension");
+ goto finally;
+ }
+
+ output.dst = dest.buf;
+ output.size = dest.len;
+ output.pos = 0;
+
+ while (!self->finishedInput && !self->finishedOutput) {
+ int decompressResult, readResult;
+
+ readResult = read_decompressor_input(self);
+
+ if (-1 == readResult) {
+ goto finally;
+ }
+ else if (0 == readResult || 1 == readResult) {}
+ else {
+ assert(0);
+ }
+
+ decompressResult = decompress_input(self, &output);
+
+ if (-1 == decompressResult) {
+ goto finally;
+ }
+ else if (0 == decompressResult || 1 == decompressResult) {}
+ else {
+ assert(0);
+ }
+
+ if (output.pos) {
+ break;
+ }
+ }
+
+ self->bytesDecompressed += output.pos;
+ result = PyLong_FromSize_t(output.pos);
+
+finally:
+ PyBuffer_Release(&dest);
+
+ return result;
+}
+
+static PyObject* reader_readall(PyObject* self) {
+ PyObject* chunks = NULL;
+ PyObject* empty = NULL;
+ PyObject* result = NULL;
+
+ /* Our strategy is to collect chunks into a list then join all the
+ * chunks at the end. We could potentially use e.g. an io.BytesIO. But
+ * this feels simple enough to implement and avoids potentially expensive
+ * reallocations of large buffers.
+ */
+ chunks = PyList_New(0);
+ if (NULL == chunks) {
+ return NULL;
+ }
+
+ while (1) {
+ PyObject* chunk = PyObject_CallMethod(self, "read", "i", 1048576);
+ if (NULL == chunk) {
+ Py_DECREF(chunks);
+ return NULL;
+ }
+
+ if (!PyBytes_Size(chunk)) {
+ Py_DECREF(chunk);
+ break;
+ }
+
+ if (PyList_Append(chunks, chunk)) {
+ Py_DECREF(chunk);
+ Py_DECREF(chunks);
+ return NULL;
+ }
+
+ Py_DECREF(chunk);
+ }
+
+ empty = PyBytes_FromStringAndSize("", 0);
+ if (NULL == empty) {
+ Py_DECREF(chunks);
+ return NULL;
+ }
+
+ result = PyObject_CallMethod(empty, "join", "O", chunks);
+
+ Py_DECREF(empty);
+ Py_DECREF(chunks);
+
+ return result;
+}
+
+static PyObject* reader_readline(PyObject* self) {
+ set_unsupported_operation();
+ return NULL;
+}
+
+static PyObject* reader_readlines(PyObject* self) {
+ set_unsupported_operation();
+ return NULL;
+}
+
+static PyObject* reader_seek(ZstdDecompressionReader* self, PyObject* args) {
+ Py_ssize_t pos;
+ int whence = 0;
+ unsigned long long readAmount = 0;
+ size_t defaultOutSize = ZSTD_DStreamOutSize();
+
+ if (self->closed) {
+ PyErr_SetString(PyExc_ValueError, "stream is closed");
+ return NULL;
+ }
+
+ if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &whence)) {
+ return NULL;
+ }
+
+ if (whence == SEEK_SET) {
+ if (pos < 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "cannot seek to negative position with SEEK_SET");
+ return NULL;
+ }
+
+ if ((unsigned long long)pos < self->bytesDecompressed) {
+ PyErr_SetString(PyExc_ValueError,
+ "cannot seek zstd decompression stream backwards");
+ return NULL;
+ }
+
+ readAmount = pos - self->bytesDecompressed;
+ }
+ else if (whence == SEEK_CUR) {
+ if (pos < 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "cannot seek zstd decompression stream backwards");
+ return NULL;
+ }
+
+ readAmount = pos;
+ }
+ else if (whence == SEEK_END) {
+ /* We /could/ support this with pos==0. But let's not do that until someone
+ needs it. */
+ PyErr_SetString(PyExc_ValueError,
+ "zstd decompression streams cannot be seeked with SEEK_END");
+ return NULL;
+ }
+
+ /* It is a bit inefficient to do this via the Python API. But since there
+ is a bit of state tracking involved to read from this type, it is the
+ easiest to implement. */
+ while (readAmount) {
+ Py_ssize_t readSize;
+ PyObject* readResult = PyObject_CallMethod((PyObject*)self, "read", "K",
+ readAmount < defaultOutSize ? readAmount : defaultOutSize);
+
+ if (!readResult) {
+ return NULL;
+ }
+
+ readSize = PyBytes_GET_SIZE(readResult);
+
+ Py_CLEAR(readResult);
+
+ /* Empty read means EOF. */
+ if (!readSize) {
+ break;
+ }
+
+ readAmount -= readSize;
+ }
+
+ return PyLong_FromUnsignedLongLong(self->bytesDecompressed);
+}
+
+static PyObject* reader_tell(ZstdDecompressionReader* self) {
+ /* TODO should this raise OSError since stream isn't seekable? */
+ return PyLong_FromUnsignedLongLong(self->bytesDecompressed);
+}
+
+static PyObject* reader_write(PyObject* self, PyObject* args) {
+ set_unsupported_operation();
+ return NULL;
+}
+
+static PyObject* reader_writelines(PyObject* self, PyObject* args) {
+ set_unsupported_operation();
+ return NULL;
+}
+
+static PyObject* reader_iter(PyObject* self) {
+ set_unsupported_operation();
+ return NULL;
+}
+
+static PyObject* reader_iternext(PyObject* self) {
+ set_unsupported_operation();
+ return NULL;
+}
+
+static PyMethodDef reader_methods[] = {
+ { "__enter__", (PyCFunction)reader_enter, METH_NOARGS,
+ PyDoc_STR("Enter a compression context") },
+ { "__exit__", (PyCFunction)reader_exit, METH_VARARGS,
+ PyDoc_STR("Exit a compression context") },
+ { "close", (PyCFunction)reader_close, METH_NOARGS,
+ PyDoc_STR("Close the stream so it cannot perform any more operations") },
+ { "flush", (PyCFunction)reader_flush, METH_NOARGS, PyDoc_STR("no-ops") },
+ { "isatty", (PyCFunction)reader_isatty, METH_NOARGS, PyDoc_STR("Returns False") },
+ { "readable", (PyCFunction)reader_readable, METH_NOARGS,
+ PyDoc_STR("Returns True") },
+ { "read", (PyCFunction)reader_read, METH_VARARGS | METH_KEYWORDS,
+ PyDoc_STR("read compressed data") },
+ { "read1", (PyCFunction)reader_read1, METH_VARARGS | METH_KEYWORDS,
+ PyDoc_STR("read compressed data") },
+ { "readinto", (PyCFunction)reader_readinto, METH_VARARGS, NULL },
+ { "readinto1", (PyCFunction)reader_readinto1, METH_VARARGS, NULL },
+ { "readall", (PyCFunction)reader_readall, METH_NOARGS, PyDoc_STR("Not implemented") },
+ { "readline", (PyCFunction)reader_readline, METH_NOARGS, PyDoc_STR("Not implemented") },
+ { "readlines", (PyCFunction)reader_readlines, METH_NOARGS, PyDoc_STR("Not implemented") },
+ { "seek", (PyCFunction)reader_seek, METH_VARARGS, PyDoc_STR("Seek the stream") },
+ { "seekable", (PyCFunction)reader_seekable, METH_NOARGS,
+ PyDoc_STR("Returns True") },
+ { "tell", (PyCFunction)reader_tell, METH_NOARGS,
+ PyDoc_STR("Returns current number of bytes compressed") },
+ { "writable", (PyCFunction)reader_writable, METH_NOARGS,
+ PyDoc_STR("Returns False") },
+ { "write", (PyCFunction)reader_write, METH_VARARGS, PyDoc_STR("unsupported operation") },
+ { "writelines", (PyCFunction)reader_writelines, METH_VARARGS, PyDoc_STR("unsupported operation") },
+ { NULL, NULL }
+};
+
+static PyMemberDef reader_members[] = {
+ { "closed", T_BOOL, offsetof(ZstdDecompressionReader, closed),
+ READONLY, "whether stream is closed" },
+ { NULL }
+};
+
+PyTypeObject ZstdDecompressionReaderType = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "zstd.ZstdDecompressionReader", /* tp_name */
+ sizeof(ZstdDecompressionReader), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ (destructor)reader_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT, /* tp_flags */
+ 0, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ reader_iter, /* tp_iter */
+ reader_iternext, /* tp_iternext */
+ reader_methods, /* tp_methods */
+ reader_members, /* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ 0, /* tp_init */
+ 0, /* tp_alloc */
+ PyType_GenericNew, /* tp_new */
+};
+
+
+void decompressionreader_module_init(PyObject* mod) {
+ /* TODO make reader a sub-class of io.RawIOBase */
+
+ Py_TYPE(&ZstdDecompressionReaderType) = &PyType_Type;
+ if (PyType_Ready(&ZstdDecompressionReaderType) < 0) {
+ return;
+ }
+}
diff --git a/contrib/python/zstandard/py2/c-ext/decompressionwriter.c b/contrib/python/zstandard/py2/c-ext/decompressionwriter.c
new file mode 100644
index 00000000000..7df750ab06e
--- /dev/null
+++ b/contrib/python/zstandard/py2/c-ext/decompressionwriter.c
@@ -0,0 +1,295 @@
+/**
+* Copyright (c) 2016-present, Gregory Szorc
+* All rights reserved.
+*
+* This software may be modified and distributed under the terms
+* of the BSD license. See the LICENSE file for details.
+*/
+
+#include "python-zstandard.h"
+
+extern PyObject* ZstdError;
+
+PyDoc_STRVAR(ZstdDecompressionWriter__doc,
+"""A context manager used for writing decompressed output.\n"
+);
+
+static void ZstdDecompressionWriter_dealloc(ZstdDecompressionWriter* self) {
+ Py_XDECREF(self->decompressor);
+ Py_XDECREF(self->writer);
+
+ PyObject_Del(self);
+}
+
+static PyObject* ZstdDecompressionWriter_enter(ZstdDecompressionWriter* self) {
+ if (self->closed) {
+ PyErr_SetString(PyExc_ValueError, "stream is closed");
+ return NULL;
+ }
+
+ if (self->entered) {
+ PyErr_SetString(ZstdError, "cannot __enter__ multiple times");
+ return NULL;
+ }
+
+ self->entered = 1;
+
+ Py_INCREF(self);
+ return (PyObject*)self;
+}
+
+static PyObject* ZstdDecompressionWriter_exit(ZstdDecompressionWriter* self, PyObject* args) {
+ self->entered = 0;
+
+ if (NULL == PyObject_CallMethod((PyObject*)self, "close", NULL)) {
+ return NULL;
+ }
+
+ Py_RETURN_FALSE;
+}
+
+static PyObject* ZstdDecompressionWriter_memory_size(ZstdDecompressionWriter* self) {
+ return PyLong_FromSize_t(ZSTD_sizeof_DCtx(self->decompressor->dctx));
+}
+
+static PyObject* ZstdDecompressionWriter_write(ZstdDecompressionWriter* self, PyObject* args, PyObject* kwargs) {
+ static char* kwlist[] = {
+ "data",
+ NULL
+ };
+
+ PyObject* result = NULL;
+ Py_buffer source;
+ size_t zresult = 0;
+ ZSTD_inBuffer input;
+ ZSTD_outBuffer output;
+ PyObject* res;
+ Py_ssize_t totalWrite = 0;
+
+#if PY_MAJOR_VERSION >= 3
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:write",
+#else
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:write",
+#endif
+ kwlist, &source)) {
+ return NULL;
+ }
+
+ if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
+ PyErr_SetString(PyExc_ValueError,
+ "data buffer should be contiguous and have at most one dimension");
+ goto finally;
+ }
+
+ if (self->closed) {
+ PyErr_SetString(PyExc_ValueError, "stream is closed");
+ return NULL;
+ }
+
+ output.dst = PyMem_Malloc(self->outSize);
+ if (!output.dst) {
+ PyErr_NoMemory();
+ goto finally;
+ }
+ output.size = self->outSize;
+ output.pos = 0;
+
+ input.src = source.buf;
+ input.size = source.len;
+ input.pos = 0;
+
+ while (input.pos < (size_t)source.len) {
+ Py_BEGIN_ALLOW_THREADS
+ zresult = ZSTD_decompressStream(self->decompressor->dctx, &output, &input);
+ Py_END_ALLOW_THREADS
+
+ if (ZSTD_isError(zresult)) {
+ PyMem_Free(output.dst);
+ PyErr_Format(ZstdError, "zstd decompress error: %s",
+ ZSTD_getErrorName(zresult));
+ goto finally;
+ }
+
+ if (output.pos) {
+#if PY_MAJOR_VERSION >= 3
+ res = PyObject_CallMethod(self->writer, "write", "y#",
+#else
+ res = PyObject_CallMethod(self->writer, "write", "s#",
+#endif
+ output.dst, output.pos);
+ Py_XDECREF(res);
+ totalWrite += output.pos;
+ output.pos = 0;
+ }
+ }
+
+ PyMem_Free(output.dst);
+
+ if (self->writeReturnRead) {
+ result = PyLong_FromSize_t(input.pos);
+ }
+ else {
+ result = PyLong_FromSsize_t(totalWrite);
+ }
+
+finally:
+ PyBuffer_Release(&source);
+ return result;
+}
+
+static PyObject* ZstdDecompressionWriter_close(ZstdDecompressionWriter* self) {
+ PyObject* result;
+
+ if (self->closed) {
+ Py_RETURN_NONE;
+ }
+
+ result = PyObject_CallMethod((PyObject*)self, "flush", NULL);
+ self->closed = 1;
+
+ if (NULL == result) {
+ return NULL;
+ }
+
+ /* Call close on underlying stream as well. */
+ if (PyObject_HasAttrString(self->writer, "close")) {
+ return PyObject_CallMethod(self->writer, "close", NULL);
+ }
+
+ Py_RETURN_NONE;
+}
+
+static PyObject* ZstdDecompressionWriter_fileno(ZstdDecompressionWriter* self) {
+ if (PyObject_HasAttrString(self->writer, "fileno")) {
+ return PyObject_CallMethod(self->writer, "fileno", NULL);
+ }
+ else {
+ PyErr_SetString(PyExc_OSError, "fileno not available on underlying writer");
+ return NULL;
+ }
+}
+
+static PyObject* ZstdDecompressionWriter_flush(ZstdDecompressionWriter* self) {
+ if (self->closed) {
+ PyErr_SetString(PyExc_ValueError, "stream is closed");
+ return NULL;
+ }
+
+ if (PyObject_HasAttrString(self->writer, "flush")) {
+ return PyObject_CallMethod(self->writer, "flush", NULL);
+ }
+ else {
+ Py_RETURN_NONE;
+ }
+}
+
+static PyObject* ZstdDecompressionWriter_false(PyObject* self, PyObject* args) {
+ Py_RETURN_FALSE;
+}
+
+static PyObject* ZstdDecompressionWriter_true(PyObject* self, PyObject* args) {
+ Py_RETURN_TRUE;
+}
+
+static PyObject* ZstdDecompressionWriter_unsupported(PyObject* self, PyObject* args, PyObject* kwargs) {
+ PyObject* iomod;
+ PyObject* exc;
+
+ iomod = PyImport_ImportModule("io");
+ if (NULL == iomod) {
+ return NULL;
+ }
+
+ exc = PyObject_GetAttrString(iomod, "UnsupportedOperation");
+ if (NULL == exc) {
+ Py_DECREF(iomod);
+ return NULL;
+ }
+
+ PyErr_SetNone(exc);
+ Py_DECREF(exc);
+ Py_DECREF(iomod);
+
+ return NULL;
+}
+
+static PyMethodDef ZstdDecompressionWriter_methods[] = {
+ { "__enter__", (PyCFunction)ZstdDecompressionWriter_enter, METH_NOARGS,
+ PyDoc_STR("Enter a decompression context.") },
+ { "__exit__", (PyCFunction)ZstdDecompressionWriter_exit, METH_VARARGS,
+ PyDoc_STR("Exit a decompression context.") },
+ { "memory_size", (PyCFunction)ZstdDecompressionWriter_memory_size, METH_NOARGS,
+ PyDoc_STR("Obtain the memory size in bytes of the underlying decompressor.") },
+ { "close", (PyCFunction)ZstdDecompressionWriter_close, METH_NOARGS, NULL },
+ { "fileno", (PyCFunction)ZstdDecompressionWriter_fileno, METH_NOARGS, NULL },
+ { "flush", (PyCFunction)ZstdDecompressionWriter_flush, METH_NOARGS, NULL },
+ { "isatty", ZstdDecompressionWriter_false, METH_NOARGS, NULL },
+ { "readable", ZstdDecompressionWriter_false, METH_NOARGS, NULL },
+ { "readline", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
+ { "readlines", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
+ { "seek", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
+ { "seekable", ZstdDecompressionWriter_false, METH_NOARGS, NULL },
+ { "tell", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
+ { "truncate", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
+ { "writable", ZstdDecompressionWriter_true, METH_NOARGS, NULL },
+ { "writelines" , (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
+ { "read", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
+ { "readall", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
+ { "readinto", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
+ { "write", (PyCFunction)ZstdDecompressionWriter_write, METH_VARARGS | METH_KEYWORDS,
+ PyDoc_STR("Compress data") },
+ { NULL, NULL }
+};
+
+static PyMemberDef ZstdDecompressionWriter_members[] = {
+ { "closed", T_BOOL, offsetof(ZstdDecompressionWriter, closed), READONLY, NULL },
+ { NULL }
+};
+
+PyTypeObject ZstdDecompressionWriterType = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "zstd.ZstdDecompressionWriter", /* tp_name */
+ sizeof(ZstdDecompressionWriter),/* tp_basicsize */
+ 0, /* tp_itemsize */
+ (destructor)ZstdDecompressionWriter_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
+ ZstdDecompressionWriter__doc, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
+ ZstdDecompressionWriter_methods,/* tp_methods */
+ ZstdDecompressionWriter_members,/* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ 0, /* tp_init */
+ 0, /* tp_alloc */
+ PyType_GenericNew, /* tp_new */
+};
+
+void decompressionwriter_module_init(PyObject* mod) {
+ Py_TYPE(&ZstdDecompressionWriterType) = &PyType_Type;
+ if (PyType_Ready(&ZstdDecompressionWriterType) < 0) {
+ return;
+ }
+}
diff --git a/contrib/python/zstandard/py2/c-ext/decompressobj.c b/contrib/python/zstandard/py2/c-ext/decompressobj.c
new file mode 100644
index 00000000000..2a55e61f18b
--- /dev/null
+++ b/contrib/python/zstandard/py2/c-ext/decompressobj.c
@@ -0,0 +1,202 @@
+/**
+* Copyright (c) 2016-present, Gregory Szorc
+* All rights reserved.
+*
+* This software may be modified and distributed under the terms
+* of the BSD license. See the LICENSE file for details.
+*/
+
+#include "python-zstandard.h"
+
+extern PyObject* ZstdError;
+
+PyDoc_STRVAR(DecompressionObj__doc__,
+"Perform decompression using a standard library compatible API.\n"
+);
+
+static void DecompressionObj_dealloc(ZstdDecompressionObj* self) {
+ Py_XDECREF(self->decompressor);
+
+ PyObject_Del(self);
+}
+
+static PyObject* DecompressionObj_decompress(ZstdDecompressionObj* self, PyObject* args, PyObject* kwargs) {
+ static char* kwlist[] = {
+ "data",
+ NULL
+ };
+
+ Py_buffer source;
+ size_t zresult;
+ ZSTD_inBuffer input;
+ ZSTD_outBuffer output;
+ PyObject* result = NULL;
+ Py_ssize_t resultSize = 0;
+
+ output.dst = NULL;
+
+ if (self->finished) {
+ PyErr_SetString(ZstdError, "cannot use a decompressobj multiple times");
+ return NULL;
+ }
+
+#if PY_MAJOR_VERSION >= 3
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:decompress",
+#else
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:decompress",
+#endif
+ kwlist, &source)) {
+ return NULL;
+ }
+
+ if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
+ PyErr_SetString(PyExc_ValueError,
+ "data buffer should be contiguous and have at most one dimension");
+ goto finally;
+ }
+
+ /* Special case of empty input. Output will always be empty. */
+ if (source.len == 0) {
+ result = PyBytes_FromString("");
+ goto finally;
+ }
+
+ input.src = source.buf;
+ input.size = source.len;
+ input.pos = 0;
+
+ output.dst = PyMem_Malloc(self->outSize);
+ if (!output.dst) {
+ PyErr_NoMemory();
+ goto except;
+ }
+ output.size = self->outSize;
+ output.pos = 0;
+
+ while (1) {
+ Py_BEGIN_ALLOW_THREADS
+ zresult = ZSTD_decompressStream(self->decompressor->dctx, &output, &input);
+ Py_END_ALLOW_THREADS
+
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "zstd decompressor error: %s",
+ ZSTD_getErrorName(zresult));
+ goto except;
+ }
+
+ if (0 == zresult) {
+ self->finished = 1;
+ }
+
+ if (output.pos) {
+ if (result) {
+ resultSize = PyBytes_GET_SIZE(result);
+ if (-1 == safe_pybytes_resize(&result, resultSize + output.pos)) {
+ Py_XDECREF(result);
+ goto except;
+ }
+
+ memcpy(PyBytes_AS_STRING(result) + resultSize,
+ output.dst, output.pos);
+ }
+ else {
+ result = PyBytes_FromStringAndSize(output.dst, output.pos);
+ if (!result) {
+ goto except;
+ }
+ }
+ }
+
+ if (zresult == 0 || (input.pos == input.size && output.pos == 0)) {
+ break;
+ }
+
+ output.pos = 0;
+ }
+
+ if (!result) {
+ result = PyBytes_FromString("");
+ }
+
+ goto finally;
+
+except:
+ Py_CLEAR(result);
+
+finally:
+ PyMem_Free(output.dst);
+ PyBuffer_Release(&source);
+
+ return result;
+}
+
+static PyObject* DecompressionObj_flush(ZstdDecompressionObj* self, PyObject* args, PyObject* kwargs) {
+ static char* kwlist[] = {
+ "length",
+ NULL
+ };
+
+ PyObject* length = NULL;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O:flush", kwlist, &length)) {
+ return NULL;
+ }
+
+ Py_RETURN_NONE;
+}
+
+static PyMethodDef DecompressionObj_methods[] = {
+ { "decompress", (PyCFunction)DecompressionObj_decompress,
+ METH_VARARGS | METH_KEYWORDS, PyDoc_STR("decompress data") },
+ { "flush", (PyCFunction)DecompressionObj_flush,
+ METH_VARARGS | METH_KEYWORDS, PyDoc_STR("no-op") },
+ { NULL, NULL }
+};
+
+PyTypeObject ZstdDecompressionObjType = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "zstd.ZstdDecompressionObj", /* tp_name */
+ sizeof(ZstdDecompressionObj), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ (destructor)DecompressionObj_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
+ DecompressionObj__doc__, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
+ DecompressionObj_methods, /* tp_methods */
+ 0, /* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ 0, /* tp_init */
+ 0, /* tp_alloc */
+ PyType_GenericNew, /* tp_new */
+};
+
+void decompressobj_module_init(PyObject* module) {
+ Py_TYPE(&ZstdDecompressionObjType) = &PyType_Type;
+ if (PyType_Ready(&ZstdDecompressionObjType) < 0) {
+ return;
+ }
+}
diff --git a/contrib/python/zstandard/py2/c-ext/decompressor.c b/contrib/python/zstandard/py2/c-ext/decompressor.c
new file mode 100644
index 00000000000..b1ef959ec58
--- /dev/null
+++ b/contrib/python/zstandard/py2/c-ext/decompressor.c
@@ -0,0 +1,1828 @@
+/**
+* Copyright (c) 2016-present, Gregory Szorc
+* All rights reserved.
+*
+* This software may be modified and distributed under the terms
+* of the BSD license. See the LICENSE file for details.
+*/
+
+#include "python-zstandard.h"
+#include "pool.h"
+
+extern PyObject* ZstdError;
+
+/**
+ * Ensure the ZSTD_DCtx on a decompressor is initiated and ready for a new operation.
+ */
+int ensure_dctx(ZstdDecompressor* decompressor, int loadDict) {
+ size_t zresult;
+
+ ZSTD_DCtx_reset(decompressor->dctx, ZSTD_reset_session_only);
+
+ if (decompressor->maxWindowSize) {
+ zresult = ZSTD_DCtx_setMaxWindowSize(decompressor->dctx, decompressor->maxWindowSize);
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "unable to set max window size: %s",
+ ZSTD_getErrorName(zresult));
+ return 1;
+ }
+ }
+
+ zresult = ZSTD_DCtx_setFormat(decompressor->dctx, decompressor->format);
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "unable to set decoding format: %s",
+ ZSTD_getErrorName(zresult));
+ return 1;
+ }
+
+ if (loadDict && decompressor->dict) {
+ if (ensure_ddict(decompressor->dict)) {
+ return 1;
+ }
+
+ zresult = ZSTD_DCtx_refDDict(decompressor->dctx, decompressor->dict->ddict);
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "unable to reference prepared dictionary: %s",
+ ZSTD_getErrorName(zresult));
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+PyDoc_STRVAR(Decompressor__doc__,
+"ZstdDecompressor(dict_data=None)\n"
+"\n"
+"Create an object used to perform Zstandard decompression.\n"
+"\n"
+"An instance can perform multiple decompression operations."
+);
+
+static int Decompressor_init(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
+ static char* kwlist[] = {
+ "dict_data",
+ "max_window_size",
+ "format",
+ NULL
+ };
+
+ ZstdCompressionDict* dict = NULL;
+ Py_ssize_t maxWindowSize = 0;
+ ZSTD_format_e format = ZSTD_f_zstd1;
+
+ self->dctx = NULL;
+ self->dict = NULL;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O!nI:ZstdDecompressor", kwlist,
+ &ZstdCompressionDictType, &dict, &maxWindowSize, &format)) {
+ return -1;
+ }
+
+ self->dctx = ZSTD_createDCtx();
+ if (!self->dctx) {
+ PyErr_NoMemory();
+ goto except;
+ }
+
+ self->maxWindowSize = maxWindowSize;
+ self->format = format;
+
+ if (dict) {
+ self->dict = dict;
+ Py_INCREF(dict);
+ }
+
+ if (ensure_dctx(self, 1)) {
+ goto except;
+ }
+
+ return 0;
+
+except:
+ Py_CLEAR(self->dict);
+
+ if (self->dctx) {
+ ZSTD_freeDCtx(self->dctx);
+ self->dctx = NULL;
+ }
+
+ return -1;
+}
+
+static void Decompressor_dealloc(ZstdDecompressor* self) {
+ Py_CLEAR(self->dict);
+
+ if (self->dctx) {
+ ZSTD_freeDCtx(self->dctx);
+ self->dctx = NULL;
+ }
+
+ PyObject_Del(self);
+}
+
+PyDoc_STRVAR(Decompressor_memory_size__doc__,
+"memory_size() -- Size of decompression context, in bytes\n"
+);
+
+static PyObject* Decompressor_memory_size(ZstdDecompressor* self) {
+ if (self->dctx) {
+ return PyLong_FromSize_t(ZSTD_sizeof_DCtx(self->dctx));
+ }
+ else {
+ PyErr_SetString(ZstdError, "no decompressor context found; this should never happen");
+ return NULL;
+ }
+}
+
+PyDoc_STRVAR(Decompressor_copy_stream__doc__,
+ "copy_stream(ifh, ofh[, read_size=default, write_size=default]) -- decompress data between streams\n"
+ "\n"
+ "Compressed data will be read from ``ifh``, decompressed, and written to\n"
+ "``ofh``. ``ifh`` must have a ``read(size)`` method. ``ofh`` must have a\n"
+ "``write(data)`` method.\n"
+ "\n"
+ "The optional ``read_size`` and ``write_size`` arguments control the chunk\n"
+ "size of data that is ``read()`` and ``write()`` between streams. They default\n"
+ "to the default input and output sizes of zstd decompressor streams.\n"
+);
+
+static PyObject* Decompressor_copy_stream(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
+ static char* kwlist[] = {
+ "ifh",
+ "ofh",
+ "read_size",
+ "write_size",
+ NULL
+ };
+
+ PyObject* source;
+ PyObject* dest;
+ size_t inSize = ZSTD_DStreamInSize();
+ size_t outSize = ZSTD_DStreamOutSize();
+ ZSTD_inBuffer input;
+ ZSTD_outBuffer output;
+ Py_ssize_t totalRead = 0;
+ Py_ssize_t totalWrite = 0;
+ char* readBuffer;
+ Py_ssize_t readSize;
+ PyObject* readResult = NULL;
+ PyObject* res = NULL;
+ size_t zresult = 0;
+ PyObject* writeResult;
+ PyObject* totalReadPy;
+ PyObject* totalWritePy;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|kk:copy_stream", kwlist,
+ &source, &dest, &inSize, &outSize)) {
+ return NULL;
+ }
+
+ if (!PyObject_HasAttrString(source, "read")) {
+ PyErr_SetString(PyExc_ValueError, "first argument must have a read() method");
+ return NULL;
+ }
+
+ if (!PyObject_HasAttrString(dest, "write")) {
+ PyErr_SetString(PyExc_ValueError, "second argument must have a write() method");
+ return NULL;
+ }
+
+ /* Prevent free on uninitialized memory in finally. */
+ output.dst = NULL;
+
+ if (ensure_dctx(self, 1)) {
+ res = NULL;
+ goto finally;
+ }
+
+ output.dst = PyMem_Malloc(outSize);
+ if (!output.dst) {
+ PyErr_NoMemory();
+ res = NULL;
+ goto finally;
+ }
+ output.size = outSize;
+ output.pos = 0;
+
+ /* Read source stream until EOF */
+ while (1) {
+ readResult = PyObject_CallMethod(source, "read", "n", inSize);
+ if (!readResult) {
+ PyErr_SetString(ZstdError, "could not read() from source");
+ goto finally;
+ }
+
+ PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
+
+ /* If no data was read, we're at EOF. */
+ if (0 == readSize) {
+ break;
+ }
+
+ totalRead += readSize;
+
+ /* Send data to decompressor */
+ input.src = readBuffer;
+ input.size = readSize;
+ input.pos = 0;
+
+ while (input.pos < input.size) {
+ Py_BEGIN_ALLOW_THREADS
+ zresult = ZSTD_decompressStream(self->dctx, &output, &input);
+ Py_END_ALLOW_THREADS
+
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "zstd decompressor error: %s",
+ ZSTD_getErrorName(zresult));
+ res = NULL;
+ goto finally;
+ }
+
+ if (output.pos) {
+#if PY_MAJOR_VERSION >= 3
+ writeResult = PyObject_CallMethod(dest, "write", "y#",
+#else
+ writeResult = PyObject_CallMethod(dest, "write", "s#",
+#endif
+ output.dst, output.pos);
+
+ Py_XDECREF(writeResult);
+ totalWrite += output.pos;
+ output.pos = 0;
+ }
+ }
+
+ Py_CLEAR(readResult);
+ }
+
+ /* Source stream is exhausted. Finish up. */
+
+ totalReadPy = PyLong_FromSsize_t(totalRead);
+ totalWritePy = PyLong_FromSsize_t(totalWrite);
+ res = PyTuple_Pack(2, totalReadPy, totalWritePy);
+ Py_DECREF(totalReadPy);
+ Py_DECREF(totalWritePy);
+
+finally:
+ if (output.dst) {
+ PyMem_Free(output.dst);
+ }
+
+ Py_XDECREF(readResult);
+
+ return res;
+}
+
+PyDoc_STRVAR(Decompressor_decompress__doc__,
+"decompress(data[, max_output_size=None]) -- Decompress data in its entirety\n"
+"\n"
+"This method will decompress the entirety of the argument and return the\n"
+"result.\n"
+"\n"
+"The input bytes are expected to contain a full Zstandard frame (something\n"
+"compressed with ``ZstdCompressor.compress()`` or similar). If the input does\n"
+"not contain a full frame, an exception will be raised.\n"
+"\n"
+"If the frame header of the compressed data does not contain the content size\n"
+"``max_output_size`` must be specified or ``ZstdError`` will be raised. An\n"
+"allocation of size ``max_output_size`` will be performed and an attempt will\n"
+"be made to perform decompression into that buffer. If the buffer is too\n"
+"small or cannot be allocated, ``ZstdError`` will be raised. The buffer will\n"
+"be resized if it is too large.\n"
+"\n"
+"Uncompressed data could be much larger than compressed data. As a result,\n"
+"calling this function could result in a very large memory allocation being\n"
+"performed to hold the uncompressed data. Therefore it is **highly**\n"
+"recommended to use a streaming decompression method instead of this one.\n"
+);
+
+PyObject* Decompressor_decompress(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
+ static char* kwlist[] = {
+ "data",
+ "max_output_size",
+ NULL
+ };
+
+ Py_buffer source;
+ Py_ssize_t maxOutputSize = 0;
+ unsigned long long decompressedSize;
+ size_t destCapacity;
+ PyObject* result = NULL;
+ size_t zresult;
+ ZSTD_outBuffer outBuffer;
+ ZSTD_inBuffer inBuffer;
+
+#if PY_MAJOR_VERSION >= 3
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|n:decompress",
+#else
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|n:decompress",
+#endif
+ kwlist, &source, &maxOutputSize)) {
+ return NULL;
+ }
+
+ if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
+ PyErr_SetString(PyExc_ValueError,
+ "data buffer should be contiguous and have at most one dimension");
+ goto finally;
+ }
+
+ if (ensure_dctx(self, 1)) {
+ goto finally;
+ }
+
+ decompressedSize = ZSTD_getFrameContentSize(source.buf, source.len);
+
+ if (ZSTD_CONTENTSIZE_ERROR == decompressedSize) {
+ PyErr_SetString(ZstdError, "error determining content size from frame header");
+ goto finally;
+ }
+ /* Special case of empty frame. */
+ else if (0 == decompressedSize) {
+ result = PyBytes_FromStringAndSize("", 0);
+ goto finally;
+ }
+ /* Missing content size in frame header. */
+ if (ZSTD_CONTENTSIZE_UNKNOWN == decompressedSize) {
+ if (0 == maxOutputSize) {
+ PyErr_SetString(ZstdError, "could not determine content size in frame header");
+ goto finally;
+ }
+
+ result = PyBytes_FromStringAndSize(NULL, maxOutputSize);
+ destCapacity = maxOutputSize;
+ decompressedSize = 0;
+ }
+ /* Size is recorded in frame header. */
+ else {
+ assert(SIZE_MAX >= PY_SSIZE_T_MAX);
+ if (decompressedSize > PY_SSIZE_T_MAX) {
+ PyErr_SetString(ZstdError, "frame is too large to decompress on this platform");
+ goto finally;
+ }
+
+ result = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)decompressedSize);
+ destCapacity = (size_t)decompressedSize;
+ }
+
+ if (!result) {
+ goto finally;
+ }
+
+ outBuffer.dst = PyBytes_AsString(result);
+ outBuffer.size = destCapacity;
+ outBuffer.pos = 0;
+
+ inBuffer.src = source.buf;
+ inBuffer.size = source.len;
+ inBuffer.pos = 0;
+
+ Py_BEGIN_ALLOW_THREADS
+ zresult = ZSTD_decompressStream(self->dctx, &outBuffer, &inBuffer);
+ Py_END_ALLOW_THREADS
+
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "decompression error: %s", ZSTD_getErrorName(zresult));
+ Py_CLEAR(result);
+ goto finally;
+ }
+ else if (zresult) {
+ PyErr_Format(ZstdError, "decompression error: did not decompress full frame");
+ Py_CLEAR(result);
+ goto finally;
+ }
+ else if (decompressedSize && outBuffer.pos != decompressedSize) {
+ PyErr_Format(ZstdError, "decompression error: decompressed %zu bytes; expected %llu",
+ zresult, decompressedSize);
+ Py_CLEAR(result);
+ goto finally;
+ }
+ else if (outBuffer.pos < destCapacity) {
+ if (safe_pybytes_resize(&result, outBuffer.pos)) {
+ Py_CLEAR(result);
+ goto finally;
+ }
+ }
+
+finally:
+ PyBuffer_Release(&source);
+ return result;
+}
+
+PyDoc_STRVAR(Decompressor_decompressobj__doc__,
+"decompressobj([write_size=default])\n"
+"\n"
+"Incrementally feed data into a decompressor.\n"
+"\n"
+"The returned object exposes a ``decompress(data)`` method. This makes it\n"
+"compatible with ``zlib.decompressobj`` and ``bz2.BZ2Decompressor`` so that\n"
+"callers can swap in the zstd decompressor while using the same API.\n"
+);
+
+static ZstdDecompressionObj* Decompressor_decompressobj(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
+ static char* kwlist[] = {
+ "write_size",
+ NULL
+ };
+
+ ZstdDecompressionObj* result = NULL;
+ size_t outSize = ZSTD_DStreamOutSize();
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|k:decompressobj", kwlist, &outSize)) {
+ return NULL;
+ }
+
+ if (!outSize) {
+ PyErr_SetString(PyExc_ValueError, "write_size must be positive");
+ return NULL;
+ }
+
+ result = (ZstdDecompressionObj*)PyObject_CallObject((PyObject*)&ZstdDecompressionObjType, NULL);
+ if (!result) {
+ return NULL;
+ }
+
+ if (ensure_dctx(self, 1)) {
+ Py_DECREF(result);
+ return NULL;
+ }
+
+ result->decompressor = self;
+ Py_INCREF(result->decompressor);
+ result->outSize = outSize;
+
+ return result;
+}
+
+PyDoc_STRVAR(Decompressor_read_to_iter__doc__,
+"read_to_iter(reader[, read_size=default, write_size=default, skip_bytes=0])\n"
+"Read compressed data and return an iterator\n"
+"\n"
+"Returns an iterator of decompressed data chunks produced from reading from\n"
+"the ``reader``.\n"
+"\n"
+"Compressed data will be obtained from ``reader`` by calling the\n"
+"``read(size)`` method of it. The source data will be streamed into a\n"
+"decompressor. As decompressed data is available, it will be exposed to the\n"
+"returned iterator.\n"
+"\n"
+"Data is ``read()`` in chunks of size ``read_size`` and exposed to the\n"
+"iterator in chunks of size ``write_size``. The default values are the input\n"
+"and output sizes for a zstd streaming decompressor.\n"
+"\n"
+"There is also support for skipping the first ``skip_bytes`` of data from\n"
+"the source.\n"
+);
+
+static ZstdDecompressorIterator* Decompressor_read_to_iter(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
+ static char* kwlist[] = {
+ "reader",
+ "read_size",
+ "write_size",
+ "skip_bytes",
+ NULL
+ };
+
+ PyObject* reader;
+ size_t inSize = ZSTD_DStreamInSize();
+ size_t outSize = ZSTD_DStreamOutSize();
+ ZstdDecompressorIterator* result;
+ size_t skipBytes = 0;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kkk:read_to_iter", kwlist,
+ &reader, &inSize, &outSize, &skipBytes)) {
+ return NULL;
+ }
+
+ if (skipBytes >= inSize) {
+ PyErr_SetString(PyExc_ValueError,
+ "skip_bytes must be smaller than read_size");
+ return NULL;
+ }
+
+ result = (ZstdDecompressorIterator*)PyObject_CallObject((PyObject*)&ZstdDecompressorIteratorType, NULL);
+ if (!result) {
+ return NULL;
+ }
+
+ if (PyObject_HasAttrString(reader, "read")) {
+ result->reader = reader;
+ Py_INCREF(result->reader);
+ }
+ else if (1 == PyObject_CheckBuffer(reader)) {
+ /* Object claims it is a buffer. Try to get a handle to it. */
+ if (0 != PyObject_GetBuffer(reader, &result->buffer, PyBUF_CONTIG_RO)) {
+ goto except;
+ }
+ }
+ else {
+ PyErr_SetString(PyExc_ValueError,
+ "must pass an object with a read() method or conforms to buffer protocol");
+ goto except;
+ }
+
+ result->decompressor = self;
+ Py_INCREF(result->decompressor);
+
+ result->inSize = inSize;
+ result->outSize = outSize;
+ result->skipBytes = skipBytes;
+
+ if (ensure_dctx(self, 1)) {
+ goto except;
+ }
+
+ result->input.src = PyMem_Malloc(inSize);
+ if (!result->input.src) {
+ PyErr_NoMemory();
+ goto except;
+ }
+
+ goto finally;
+
+except:
+ Py_CLEAR(result);
+
+finally:
+
+ return result;
+}
+
+PyDoc_STRVAR(Decompressor_stream_reader__doc__,
+"stream_reader(source, [read_size=default, [read_across_frames=False]])\n"
+"\n"
+"Obtain an object that behaves like an I/O stream that can be used for\n"
+"reading decompressed output from an object.\n"
+"\n"
+"The source object can be any object with a ``read(size)`` method or that\n"
+"conforms to the buffer protocol.\n"
+"\n"
+"``read_across_frames`` controls the behavior of ``read()`` when the end\n"
+"of a zstd frame is reached. When ``True``, ``read()`` can potentially\n"
+"return data belonging to multiple zstd frames. When ``False``, ``read()``\n"
+"will return when the end of a frame is reached.\n"
+);
+
+static ZstdDecompressionReader* Decompressor_stream_reader(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
+ static char* kwlist[] = {
+ "source",
+ "read_size",
+ "read_across_frames",
+ NULL
+ };
+
+ PyObject* source;
+ size_t readSize = ZSTD_DStreamInSize();
+ PyObject* readAcrossFrames = NULL;
+ ZstdDecompressionReader* result;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kO:stream_reader", kwlist,
+ &source, &readSize, &readAcrossFrames)) {
+ return NULL;
+ }
+
+ if (ensure_dctx(self, 1)) {
+ return NULL;
+ }
+
+ result = (ZstdDecompressionReader*)PyObject_CallObject((PyObject*)&ZstdDecompressionReaderType, NULL);
+ if (NULL == result) {
+ return NULL;
+ }
+
+ result->entered = 0;
+ result->closed = 0;
+
+ if (PyObject_HasAttrString(source, "read")) {
+ result->reader = source;
+ Py_INCREF(source);
+ result->readSize = readSize;
+ }
+ else if (1 == PyObject_CheckBuffer(source)) {
+ if (0 != PyObject_GetBuffer(source, &result->buffer, PyBUF_CONTIG_RO)) {
+ Py_CLEAR(result);
+ return NULL;
+ }
+ }
+ else {
+ PyErr_SetString(PyExc_TypeError,
+ "must pass an object with a read() method or that conforms to the buffer protocol");
+ Py_CLEAR(result);
+ return NULL;
+ }
+
+ result->decompressor = self;
+ Py_INCREF(self);
+ result->readAcrossFrames = readAcrossFrames ? PyObject_IsTrue(readAcrossFrames) : 0;
+
+ return result;
+}
+
+PyDoc_STRVAR(Decompressor_stream_writer__doc__,
+"Create a context manager to write decompressed data to an object.\n"
+"\n"
+"The passed object must have a ``write()`` method.\n"
+"\n"
+"The caller feeds intput data to the object by calling ``write(data)``.\n"
+"Decompressed data is written to the argument given as it is decompressed.\n"
+"\n"
+"An optional ``write_size`` argument defines the size of chunks to\n"
+"``write()`` to the writer. It defaults to the default output size for a zstd\n"
+"streaming decompressor.\n"
+);
+
+static ZstdDecompressionWriter* Decompressor_stream_writer(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
+ static char* kwlist[] = {
+ "writer",
+ "write_size",
+ "write_return_read",
+ NULL
+ };
+
+ PyObject* writer;
+ size_t outSize = ZSTD_DStreamOutSize();
+ PyObject* writeReturnRead = NULL;
+ ZstdDecompressionWriter* result;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kO:stream_writer", kwlist,
+ &writer, &outSize, &writeReturnRead)) {
+ return NULL;
+ }
+
+ if (!PyObject_HasAttrString(writer, "write")) {
+ PyErr_SetString(PyExc_ValueError, "must pass an object with a write() method");
+ return NULL;
+ }
+
+ if (ensure_dctx(self, 1)) {
+ return NULL;
+ }
+
+ result = (ZstdDecompressionWriter*)PyObject_CallObject((PyObject*)&ZstdDecompressionWriterType, NULL);
+ if (!result) {
+ return NULL;
+ }
+
+ result->entered = 0;
+ result->closed = 0;
+
+ result->decompressor = self;
+ Py_INCREF(result->decompressor);
+
+ result->writer = writer;
+ Py_INCREF(result->writer);
+
+ result->outSize = outSize;
+ result->writeReturnRead = writeReturnRead ? PyObject_IsTrue(writeReturnRead) : 0;
+
+ return result;
+}
+
+PyDoc_STRVAR(Decompressor_decompress_content_dict_chain__doc__,
+"Decompress a series of chunks using the content dictionary chaining technique\n"
+);
+
+static PyObject* Decompressor_decompress_content_dict_chain(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
+ static char* kwlist[] = {
+ "frames",
+ NULL
+ };
+
+ PyObject* chunks;
+ Py_ssize_t chunksLen;
+ Py_ssize_t chunkIndex;
+ char parity = 0;
+ PyObject* chunk;
+ char* chunkData;
+ Py_ssize_t chunkSize;
+ size_t zresult;
+ ZSTD_frameHeader frameHeader;
+ void* buffer1 = NULL;
+ size_t buffer1Size = 0;
+ size_t buffer1ContentSize = 0;
+ void* buffer2 = NULL;
+ size_t buffer2Size = 0;
+ size_t buffer2ContentSize = 0;
+ void* destBuffer = NULL;
+ PyObject* result = NULL;
+ ZSTD_outBuffer outBuffer;
+ ZSTD_inBuffer inBuffer;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!:decompress_content_dict_chain",
+ kwlist, &PyList_Type, &chunks)) {
+ return NULL;
+ }
+
+ chunksLen = PyList_Size(chunks);
+ if (!chunksLen) {
+ PyErr_SetString(PyExc_ValueError, "empty input chain");
+ return NULL;
+ }
+
+ /* The first chunk should not be using a dictionary. We handle it specially. */
+ chunk = PyList_GetItem(chunks, 0);
+ if (!PyBytes_Check(chunk)) {
+ PyErr_SetString(PyExc_ValueError, "chunk 0 must be bytes");
+ return NULL;
+ }
+
+ /* We require that all chunks be zstd frames and that they have content size set. */
+ PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize);
+ zresult = ZSTD_getFrameHeader(&frameHeader, (void*)chunkData, chunkSize);
+ if (ZSTD_isError(zresult)) {
+ PyErr_SetString(PyExc_ValueError, "chunk 0 is not a valid zstd frame");
+ return NULL;
+ }
+ else if (zresult) {
+ PyErr_SetString(PyExc_ValueError, "chunk 0 is too small to contain a zstd frame");
+ return NULL;
+ }
+
+ if (ZSTD_CONTENTSIZE_UNKNOWN == frameHeader.frameContentSize) {
+ PyErr_SetString(PyExc_ValueError, "chunk 0 missing content size in frame");
+ return NULL;
+ }
+
+ assert(ZSTD_CONTENTSIZE_ERROR != frameHeader.frameContentSize);
+
+ /* We check against PY_SSIZE_T_MAX here because we ultimately cast the
+ * result to a Python object and it's length can be no greater than
+ * Py_ssize_t. In theory, we could have an intermediate frame that is
+ * larger. But a) why would this API be used for frames that large b)
+ * it isn't worth the complexity to support. */
+ assert(SIZE_MAX >= PY_SSIZE_T_MAX);
+ if (frameHeader.frameContentSize > PY_SSIZE_T_MAX) {
+ PyErr_SetString(PyExc_ValueError,
+ "chunk 0 is too large to decompress on this platform");
+ return NULL;
+ }
+
+ if (ensure_dctx(self, 0)) {
+ goto finally;
+ }
+
+ buffer1Size = (size_t)frameHeader.frameContentSize;
+ buffer1 = PyMem_Malloc(buffer1Size);
+ if (!buffer1) {
+ goto finally;
+ }
+
+ outBuffer.dst = buffer1;
+ outBuffer.size = buffer1Size;
+ outBuffer.pos = 0;
+
+ inBuffer.src = chunkData;
+ inBuffer.size = chunkSize;
+ inBuffer.pos = 0;
+
+ Py_BEGIN_ALLOW_THREADS
+ zresult = ZSTD_decompressStream(self->dctx, &outBuffer, &inBuffer);
+ Py_END_ALLOW_THREADS
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "could not decompress chunk 0: %s", ZSTD_getErrorName(zresult));
+ goto finally;
+ }
+ else if (zresult) {
+ PyErr_Format(ZstdError, "chunk 0 did not decompress full frame");
+ goto finally;
+ }
+
+ buffer1ContentSize = outBuffer.pos;
+
+ /* Special case of a simple chain. */
+ if (1 == chunksLen) {
+ result = PyBytes_FromStringAndSize(buffer1, buffer1Size);
+ goto finally;
+ }
+
+ /* This should ideally look at next chunk. But this is slightly simpler. */
+ buffer2Size = (size_t)frameHeader.frameContentSize;
+ buffer2 = PyMem_Malloc(buffer2Size);
+ if (!buffer2) {
+ goto finally;
+ }
+
+ /* For each subsequent chunk, use the previous fulltext as a content dictionary.
+ Our strategy is to have 2 buffers. One holds the previous fulltext (to be
+ used as a content dictionary) and the other holds the new fulltext. The
+ buffers grow when needed but never decrease in size. This limits the
+ memory allocator overhead.
+ */
+ for (chunkIndex = 1; chunkIndex < chunksLen; chunkIndex++) {
+ chunk = PyList_GetItem(chunks, chunkIndex);
+ if (!PyBytes_Check(chunk)) {
+ PyErr_Format(PyExc_ValueError, "chunk %zd must be bytes", chunkIndex);
+ goto finally;
+ }
+
+ PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize);
+ zresult = ZSTD_getFrameHeader(&frameHeader, (void*)chunkData, chunkSize);
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(PyExc_ValueError, "chunk %zd is not a valid zstd frame", chunkIndex);
+ goto finally;
+ }
+ else if (zresult) {
+ PyErr_Format(PyExc_ValueError, "chunk %zd is too small to contain a zstd frame", chunkIndex);
+ goto finally;
+ }
+
+ if (ZSTD_CONTENTSIZE_UNKNOWN == frameHeader.frameContentSize) {
+ PyErr_Format(PyExc_ValueError, "chunk %zd missing content size in frame", chunkIndex);
+ goto finally;
+ }
+
+ assert(ZSTD_CONTENTSIZE_ERROR != frameHeader.frameContentSize);
+
+ if (frameHeader.frameContentSize > PY_SSIZE_T_MAX) {
+ PyErr_Format(PyExc_ValueError,
+ "chunk %zd is too large to decompress on this platform", chunkIndex);
+ goto finally;
+ }
+
+ inBuffer.src = chunkData;
+ inBuffer.size = chunkSize;
+ inBuffer.pos = 0;
+
+ parity = chunkIndex % 2;
+
+ /* This could definitely be abstracted to reduce code duplication. */
+ if (parity) {
+ /* Resize destination buffer to hold larger content. */
+ if (buffer2Size < frameHeader.frameContentSize) {
+ buffer2Size = (size_t)frameHeader.frameContentSize;
+ destBuffer = PyMem_Realloc(buffer2, buffer2Size);
+ if (!destBuffer) {
+ goto finally;
+ }
+ buffer2 = destBuffer;
+ }
+
+ Py_BEGIN_ALLOW_THREADS
+ zresult = ZSTD_DCtx_refPrefix_advanced(self->dctx,
+ buffer1, buffer1ContentSize, ZSTD_dct_rawContent);
+ Py_END_ALLOW_THREADS
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError,
+ "failed to load prefix dictionary at chunk %zd", chunkIndex);
+ goto finally;
+ }
+
+ outBuffer.dst = buffer2;
+ outBuffer.size = buffer2Size;
+ outBuffer.pos = 0;
+
+ Py_BEGIN_ALLOW_THREADS
+ zresult = ZSTD_decompressStream(self->dctx, &outBuffer, &inBuffer);
+ Py_END_ALLOW_THREADS
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
+ chunkIndex, ZSTD_getErrorName(zresult));
+ goto finally;
+ }
+ else if (zresult) {
+ PyErr_Format(ZstdError, "chunk %zd did not decompress full frame",
+ chunkIndex);
+ goto finally;
+ }
+
+ buffer2ContentSize = outBuffer.pos;
+ }
+ else {
+ if (buffer1Size < frameHeader.frameContentSize) {
+ buffer1Size = (size_t)frameHeader.frameContentSize;
+ destBuffer = PyMem_Realloc(buffer1, buffer1Size);
+ if (!destBuffer) {
+ goto finally;
+ }
+ buffer1 = destBuffer;
+ }
+
+ Py_BEGIN_ALLOW_THREADS
+ zresult = ZSTD_DCtx_refPrefix_advanced(self->dctx,
+ buffer2, buffer2ContentSize, ZSTD_dct_rawContent);
+ Py_END_ALLOW_THREADS
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError,
+ "failed to load prefix dictionary at chunk %zd", chunkIndex);
+ goto finally;
+ }
+
+ outBuffer.dst = buffer1;
+ outBuffer.size = buffer1Size;
+ outBuffer.pos = 0;
+
+ Py_BEGIN_ALLOW_THREADS
+ zresult = ZSTD_decompressStream(self->dctx, &outBuffer, &inBuffer);
+ Py_END_ALLOW_THREADS
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
+ chunkIndex, ZSTD_getErrorName(zresult));
+ goto finally;
+ }
+ else if (zresult) {
+ PyErr_Format(ZstdError, "chunk %zd did not decompress full frame",
+ chunkIndex);
+ goto finally;
+ }
+
+ buffer1ContentSize = outBuffer.pos;
+ }
+ }
+
+ result = PyBytes_FromStringAndSize(parity ? buffer2 : buffer1,
+ parity ? buffer2ContentSize : buffer1ContentSize);
+
+finally:
+ if (buffer2) {
+ PyMem_Free(buffer2);
+ }
+ if (buffer1) {
+ PyMem_Free(buffer1);
+ }
+
+ return result;
+}
+
+typedef struct {
+ void* sourceData;
+ size_t sourceSize;
+ size_t destSize;
+} FramePointer;
+
+typedef struct {
+ FramePointer* frames;
+ Py_ssize_t framesSize;
+ unsigned long long compressedSize;
+} FrameSources;
+
+typedef struct {
+ void* dest;
+ Py_ssize_t destSize;
+ BufferSegment* segments;
+ Py_ssize_t segmentsSize;
+} DestBuffer;
+
+typedef enum {
+ WorkerError_none = 0,
+ WorkerError_zstd = 1,
+ WorkerError_memory = 2,
+ WorkerError_sizeMismatch = 3,
+ WorkerError_unknownSize = 4,
+} WorkerError;
+
+typedef struct {
+ /* Source records and length */
+ FramePointer* framePointers;
+ /* Which records to process. */
+ Py_ssize_t startOffset;
+ Py_ssize_t endOffset;
+ unsigned long long totalSourceSize;
+
+ /* Compression state and settings. */
+ ZSTD_DCtx* dctx;
+ int requireOutputSizes;
+
+ /* Output storage. */
+ DestBuffer* destBuffers;
+ Py_ssize_t destCount;
+
+ /* Item that error occurred on. */
+ Py_ssize_t errorOffset;
+ /* If an error occurred. */
+ WorkerError error;
+ /* result from zstd decompression operation */
+ size_t zresult;
+} WorkerState;
+
+static void decompress_worker(WorkerState* state) {
+ size_t allocationSize;
+ DestBuffer* destBuffer;
+ Py_ssize_t frameIndex;
+ Py_ssize_t localOffset = 0;
+ Py_ssize_t currentBufferStartIndex = state->startOffset;
+ Py_ssize_t remainingItems = state->endOffset - state->startOffset + 1;
+ void* tmpBuf;
+ Py_ssize_t destOffset = 0;
+ FramePointer* framePointers = state->framePointers;
+ size_t zresult;
+ unsigned long long totalOutputSize = 0;
+
+ assert(NULL == state->destBuffers);
+ assert(0 == state->destCount);
+ assert(state->endOffset - state->startOffset >= 0);
+
+ /* We could get here due to the way work is allocated. Ideally we wouldn't
+ get here. But that would require a bit of a refactor in the caller. */
+ if (state->totalSourceSize > SIZE_MAX) {
+ state->error = WorkerError_memory;
+ state->errorOffset = 0;
+ return;
+ }
+
+ /*
+ * We need to allocate a buffer to hold decompressed data. How we do this
+ * depends on what we know about the output. The following scenarios are
+ * possible:
+ *
+ * 1. All structs defining frames declare the output size.
+ * 2. The decompressed size is embedded within the zstd frame.
+ * 3. The decompressed size is not stored anywhere.
+ *
+ * For now, we only support #1 and #2.
+ */
+
+ /* Resolve ouput segments. */
+ for (frameIndex = state->startOffset; frameIndex <= state->endOffset; frameIndex++) {
+ FramePointer* fp = &framePointers[frameIndex];
+ unsigned long long decompressedSize;
+
+ if (0 == fp->destSize) {
+ decompressedSize = ZSTD_getFrameContentSize(fp->sourceData, fp->sourceSize);
+
+ if (ZSTD_CONTENTSIZE_ERROR == decompressedSize) {
+ state->error = WorkerError_unknownSize;
+ state->errorOffset = frameIndex;
+ return;
+ }
+ else if (ZSTD_CONTENTSIZE_UNKNOWN == decompressedSize) {
+ if (state->requireOutputSizes) {
+ state->error = WorkerError_unknownSize;
+ state->errorOffset = frameIndex;
+ return;
+ }
+
+ /* This will fail the assert for .destSize > 0 below. */
+ decompressedSize = 0;
+ }
+
+ if (decompressedSize > SIZE_MAX) {
+ state->error = WorkerError_memory;
+ state->errorOffset = frameIndex;
+ return;
+ }
+
+ fp->destSize = (size_t)decompressedSize;
+ }
+
+ totalOutputSize += fp->destSize;
+ }
+
+ state->destBuffers = calloc(1, sizeof(DestBuffer));
+ if (NULL == state->destBuffers) {
+ state->error = WorkerError_memory;
+ return;
+ }
+
+ state->destCount = 1;
+
+ destBuffer = &state->destBuffers[state->destCount - 1];
+
+ assert(framePointers[state->startOffset].destSize > 0); /* For now. */
+
+ allocationSize = roundpow2((size_t)state->totalSourceSize);
+
+ if (framePointers[state->startOffset].destSize > allocationSize) {
+ allocationSize = roundpow2(framePointers[state->startOffset].destSize);
+ }
+
+ destBuffer->dest = malloc(allocationSize);
+ if (NULL == destBuffer->dest) {
+ state->error = WorkerError_memory;
+ return;
+ }
+
+ destBuffer->destSize = allocationSize;
+
+ destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
+ if (NULL == destBuffer->segments) {
+ /* Caller will free state->dest as part of cleanup. */
+ state->error = WorkerError_memory;
+ return;
+ }
+
+ destBuffer->segmentsSize = remainingItems;
+
+ for (frameIndex = state->startOffset; frameIndex <= state->endOffset; frameIndex++) {
+ ZSTD_outBuffer outBuffer;
+ ZSTD_inBuffer inBuffer;
+ const void* source = framePointers[frameIndex].sourceData;
+ const size_t sourceSize = framePointers[frameIndex].sourceSize;
+ void* dest;
+ const size_t decompressedSize = framePointers[frameIndex].destSize;
+ size_t destAvailable = destBuffer->destSize - destOffset;
+
+ assert(decompressedSize > 0); /* For now. */
+
+ /*
+ * Not enough space in current buffer. Finish current before and allocate and
+ * switch to a new one.
+ */
+ if (decompressedSize > destAvailable) {
+ /*
+ * Shrinking the destination buffer is optional. But it should be cheap,
+ * so we just do it.
+ */
+ if (destAvailable) {
+ tmpBuf = realloc(destBuffer->dest, destOffset);
+ if (NULL == tmpBuf) {
+ state->error = WorkerError_memory;
+ return;
+ }
+
+ destBuffer->dest = tmpBuf;
+ destBuffer->destSize = destOffset;
+ }
+
+ /* Truncate segments buffer. */
+ tmpBuf = realloc(destBuffer->segments,
+ (frameIndex - currentBufferStartIndex) * sizeof(BufferSegment));
+ if (NULL == tmpBuf) {
+ state->error = WorkerError_memory;
+ return;
+ }
+
+ destBuffer->segments = tmpBuf;
+ destBuffer->segmentsSize = frameIndex - currentBufferStartIndex;
+
+ /* Grow space for new DestBuffer. */
+ tmpBuf = realloc(state->destBuffers, (state->destCount + 1) * sizeof(DestBuffer));
+ if (NULL == tmpBuf) {
+ state->error = WorkerError_memory;
+ return;
+ }
+
+ state->destBuffers = tmpBuf;
+ state->destCount++;
+
+ destBuffer = &state->destBuffers[state->destCount - 1];
+
+ /* Don't take any chances will non-NULL pointers. */
+ memset(destBuffer, 0, sizeof(DestBuffer));
+
+ allocationSize = roundpow2((size_t)state->totalSourceSize);
+
+ if (decompressedSize > allocationSize) {
+ allocationSize = roundpow2(decompressedSize);
+ }
+
+ destBuffer->dest = malloc(allocationSize);
+ if (NULL == destBuffer->dest) {
+ state->error = WorkerError_memory;
+ return;
+ }
+
+ destBuffer->destSize = allocationSize;
+ destAvailable = allocationSize;
+ destOffset = 0;
+ localOffset = 0;
+
+ destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
+ if (NULL == destBuffer->segments) {
+ state->error = WorkerError_memory;
+ return;
+ }
+
+ destBuffer->segmentsSize = remainingItems;
+ currentBufferStartIndex = frameIndex;
+ }
+
+ dest = (char*)destBuffer->dest + destOffset;
+
+ outBuffer.dst = dest;
+ outBuffer.size = decompressedSize;
+ outBuffer.pos = 0;
+
+ inBuffer.src = source;
+ inBuffer.size = sourceSize;
+ inBuffer.pos = 0;
+
+ zresult = ZSTD_decompressStream(state->dctx, &outBuffer, &inBuffer);
+ if (ZSTD_isError(zresult)) {
+ state->error = WorkerError_zstd;
+ state->zresult = zresult;
+ state->errorOffset = frameIndex;
+ return;
+ }
+ else if (zresult || outBuffer.pos != decompressedSize) {
+ state->error = WorkerError_sizeMismatch;
+ state->zresult = outBuffer.pos;
+ state->errorOffset = frameIndex;
+ return;
+ }
+
+ destBuffer->segments[localOffset].offset = destOffset;
+ destBuffer->segments[localOffset].length = outBuffer.pos;
+ destOffset += outBuffer.pos;
+ localOffset++;
+ remainingItems--;
+ }
+
+ if (destBuffer->destSize > destOffset) {
+ tmpBuf = realloc(destBuffer->dest, destOffset);
+ if (NULL == tmpBuf) {
+ state->error = WorkerError_memory;
+ return;
+ }
+
+ destBuffer->dest = tmpBuf;
+ destBuffer->destSize = destOffset;
+ }
+}
+
+ZstdBufferWithSegmentsCollection* decompress_from_framesources(ZstdDecompressor* decompressor, FrameSources* frames,
+ Py_ssize_t threadCount) {
+ Py_ssize_t i = 0;
+ int errored = 0;
+ Py_ssize_t segmentsCount;
+ ZstdBufferWithSegments* bws = NULL;
+ PyObject* resultArg = NULL;
+ Py_ssize_t resultIndex;
+ ZstdBufferWithSegmentsCollection* result = NULL;
+ FramePointer* framePointers = frames->frames;
+ unsigned long long workerBytes = 0;
+ Py_ssize_t currentThread = 0;
+ Py_ssize_t workerStartOffset = 0;
+ POOL_ctx* pool = NULL;
+ WorkerState* workerStates = NULL;
+ unsigned long long bytesPerWorker;
+
+ /* Caller should normalize 0 and negative values to 1 or larger. */
+ assert(threadCount >= 1);
+
+ /* More threads than inputs makes no sense under any conditions. */
+ threadCount = frames->framesSize < threadCount ? frames->framesSize
+ : threadCount;
+
+ /* TODO lower thread count if input size is too small and threads would just
+ add overhead. */
+
+ if (decompressor->dict) {
+ if (ensure_ddict(decompressor->dict)) {
+ return NULL;
+ }
+ }
+
+ /* If threadCount==1, we don't start a thread pool. But we do leverage the
+ same API for dispatching work. */
+ workerStates = PyMem_Malloc(threadCount * sizeof(WorkerState));
+ if (NULL == workerStates) {
+ PyErr_NoMemory();
+ goto finally;
+ }
+
+ memset(workerStates, 0, threadCount * sizeof(WorkerState));
+
+ if (threadCount > 1) {
+ pool = POOL_create(threadCount, 1);
+ if (NULL == pool) {
+ PyErr_SetString(ZstdError, "could not initialize zstd thread pool");
+ goto finally;
+ }
+ }
+
+ bytesPerWorker = frames->compressedSize / threadCount;
+
+ if (bytesPerWorker > SIZE_MAX) {
+ PyErr_SetString(ZstdError, "too much data per worker for this platform");
+ goto finally;
+ }
+
+ for (i = 0; i < threadCount; i++) {
+ size_t zresult;
+
+ workerStates[i].dctx = ZSTD_createDCtx();
+ if (NULL == workerStates[i].dctx) {
+ PyErr_NoMemory();
+ goto finally;
+ }
+
+ ZSTD_copyDCtx(workerStates[i].dctx, decompressor->dctx);
+
+ if (decompressor->dict) {
+ zresult = ZSTD_DCtx_refDDict(workerStates[i].dctx, decompressor->dict->ddict);
+ if (zresult) {
+ PyErr_Format(ZstdError, "unable to reference prepared dictionary: %s",
+ ZSTD_getErrorName(zresult));
+ goto finally;
+ }
+ }
+
+ workerStates[i].framePointers = framePointers;
+ workerStates[i].requireOutputSizes = 1;
+ }
+
+ Py_BEGIN_ALLOW_THREADS
+ /* There are many ways to split work among workers.
+
+ For now, we take a simple approach of splitting work so each worker
+ gets roughly the same number of input bytes. This will result in more
+ starvation than running N>threadCount jobs. But it avoids complications
+ around state tracking, which could involve extra locking.
+ */
+ for (i = 0; i < frames->framesSize; i++) {
+ workerBytes += frames->frames[i].sourceSize;
+
+ /*
+ * The last worker/thread needs to handle all remaining work. Don't
+ * trigger it prematurely. Defer to the block outside of the loop.
+ * (But still process this loop so workerBytes is correct.
+ */
+ if (currentThread == threadCount - 1) {
+ continue;
+ }
+
+ if (workerBytes >= bytesPerWorker) {
+ workerStates[currentThread].startOffset = workerStartOffset;
+ workerStates[currentThread].endOffset = i;
+ workerStates[currentThread].totalSourceSize = workerBytes;
+
+ if (threadCount > 1) {
+ POOL_add(pool, (POOL_function)decompress_worker, &workerStates[currentThread]);
+ }
+ else {
+ decompress_worker(&workerStates[currentThread]);
+ }
+ currentThread++;
+ workerStartOffset = i + 1;
+ workerBytes = 0;
+ }
+ }
+
+ if (workerBytes) {
+ workerStates[currentThread].startOffset = workerStartOffset;
+ workerStates[currentThread].endOffset = frames->framesSize - 1;
+ workerStates[currentThread].totalSourceSize = workerBytes;
+
+ if (threadCount > 1) {
+ POOL_add(pool, (POOL_function)decompress_worker, &workerStates[currentThread]);
+ }
+ else {
+ decompress_worker(&workerStates[currentThread]);
+ }
+ }
+
+ if (threadCount > 1) {
+ POOL_free(pool);
+ pool = NULL;
+ }
+ Py_END_ALLOW_THREADS
+
+ for (i = 0; i < threadCount; i++) {
+ switch (workerStates[i].error) {
+ case WorkerError_none:
+ break;
+
+ case WorkerError_zstd:
+ PyErr_Format(ZstdError, "error decompressing item %zd: %s",
+ workerStates[i].errorOffset, ZSTD_getErrorName(workerStates[i].zresult));
+ errored = 1;
+ break;
+
+ case WorkerError_memory:
+ PyErr_NoMemory();
+ errored = 1;
+ break;
+
+ case WorkerError_sizeMismatch:
+ PyErr_Format(ZstdError, "error decompressing item %zd: decompressed %zu bytes; expected %zu",
+ workerStates[i].errorOffset, workerStates[i].zresult,
+ framePointers[workerStates[i].errorOffset].destSize);
+ errored = 1;
+ break;
+
+ case WorkerError_unknownSize:
+ PyErr_Format(PyExc_ValueError, "could not determine decompressed size of item %zd",
+ workerStates[i].errorOffset);
+ errored = 1;
+ break;
+
+ default:
+ PyErr_Format(ZstdError, "unhandled error type: %d; this is a bug",
+ workerStates[i].error);
+ errored = 1;
+ break;
+ }
+
+ if (errored) {
+ break;
+ }
+ }
+
+ if (errored) {
+ goto finally;
+ }
+
+ segmentsCount = 0;
+ for (i = 0; i < threadCount; i++) {
+ segmentsCount += workerStates[i].destCount;
+ }
+
+ resultArg = PyTuple_New(segmentsCount);
+ if (NULL == resultArg) {
+ goto finally;
+ }
+
+ resultIndex = 0;
+
+ for (i = 0; i < threadCount; i++) {
+ Py_ssize_t bufferIndex;
+ WorkerState* state = &workerStates[i];
+
+ for (bufferIndex = 0; bufferIndex < state->destCount; bufferIndex++) {
+ DestBuffer* destBuffer = &state->destBuffers[bufferIndex];
+
+ bws = BufferWithSegments_FromMemory(destBuffer->dest, destBuffer->destSize,
+ destBuffer->segments, destBuffer->segmentsSize);
+ if (NULL == bws) {
+ goto finally;
+ }
+
+ /*
+ * Memory for buffer and segments was allocated using malloc() in worker
+ * and the memory is transferred to the BufferWithSegments instance. So
+ * tell instance to use free() and NULL the reference in the state struct
+ * so it isn't freed below.
+ */
+ bws->useFree = 1;
+ destBuffer->dest = NULL;
+ destBuffer->segments = NULL;
+
+ PyTuple_SET_ITEM(resultArg, resultIndex++, (PyObject*)bws);
+ }
+ }
+
+ result = (ZstdBufferWithSegmentsCollection*)PyObject_CallObject(
+ (PyObject*)&ZstdBufferWithSegmentsCollectionType, resultArg);
+
+finally:
+ Py_CLEAR(resultArg);
+
+ if (workerStates) {
+ for (i = 0; i < threadCount; i++) {
+ Py_ssize_t bufferIndex;
+ WorkerState* state = &workerStates[i];
+
+ if (state->dctx) {
+ ZSTD_freeDCtx(state->dctx);
+ }
+
+ for (bufferIndex = 0; bufferIndex < state->destCount; bufferIndex++) {
+ if (state->destBuffers) {
+ /*
+ * Will be NULL if memory transfered to a BufferWithSegments.
+ * Otherwise it is left over after an error occurred.
+ */
+ free(state->destBuffers[bufferIndex].dest);
+ free(state->destBuffers[bufferIndex].segments);
+ }
+ }
+
+ free(state->destBuffers);
+ }
+
+ PyMem_Free(workerStates);
+ }
+
+ POOL_free(pool);
+
+ return result;
+}
+
+PyDoc_STRVAR(Decompressor_multi_decompress_to_buffer__doc__,
+"Decompress multiple frames to output buffers\n"
+"\n"
+"Receives a ``BufferWithSegments``, a ``BufferWithSegmentsCollection`` or a\n"
+"list of bytes-like objects. Each item in the passed collection should be a\n"
+"compressed zstd frame.\n"
+"\n"
+"Unless ``decompressed_sizes`` is specified, the content size *must* be\n"
+"written into the zstd frame header. If ``decompressed_sizes`` is specified,\n"
+"it is an object conforming to the buffer protocol that represents an array\n"
+"of 64-bit unsigned integers in the machine's native format. Specifying\n"
+"``decompressed_sizes`` avoids a pre-scan of each frame to determine its\n"
+"output size.\n"
+"\n"
+"Returns a ``BufferWithSegmentsCollection`` containing the decompressed\n"
+"data. All decompressed data is allocated in a single memory buffer. The\n"
+"``BufferWithSegments`` instance tracks which objects are at which offsets\n"
+"and their respective lengths.\n"
+"\n"
+"The ``threads`` argument controls how many threads to use for operations.\n"
+"Negative values will use the same number of threads as logical CPUs on the\n"
+"machine.\n"
+);
+
+static ZstdBufferWithSegmentsCollection* Decompressor_multi_decompress_to_buffer(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
+ static char* kwlist[] = {
+ "frames",
+ "decompressed_sizes",
+ "threads",
+ NULL
+ };
+
+ PyObject* frames;
+ Py_buffer frameSizes;
+ int threads = 0;
+ Py_ssize_t frameCount;
+ Py_buffer* frameBuffers = NULL;
+ FramePointer* framePointers = NULL;
+ unsigned long long* frameSizesP = NULL;
+ unsigned long long totalInputSize = 0;
+ FrameSources frameSources;
+ ZstdBufferWithSegmentsCollection* result = NULL;
+ Py_ssize_t i;
+
+ memset(&frameSizes, 0, sizeof(frameSizes));
+
+#if PY_MAJOR_VERSION >= 3
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|y*i:multi_decompress_to_buffer",
+#else
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s*i:multi_decompress_to_buffer",
+#endif
+ kwlist, &frames, &frameSizes, &threads)) {
+ return NULL;
+ }
+
+ if (frameSizes.buf) {
+ if (!PyBuffer_IsContiguous(&frameSizes, 'C') || frameSizes.ndim > 1) {
+ PyErr_SetString(PyExc_ValueError, "decompressed_sizes buffer should be contiguous and have a single dimension");
+ goto finally;
+ }
+
+ frameSizesP = (unsigned long long*)frameSizes.buf;
+ }
+
+ if (threads < 0) {
+ threads = cpu_count();
+ }
+
+ if (threads < 2) {
+ threads = 1;
+ }
+
+ if (PyObject_TypeCheck(frames, &ZstdBufferWithSegmentsType)) {
+ ZstdBufferWithSegments* buffer = (ZstdBufferWithSegments*)frames;
+ frameCount = buffer->segmentCount;
+
+ if (frameSizes.buf && frameSizes.len != frameCount * (Py_ssize_t)sizeof(unsigned long long)) {
+ PyErr_Format(PyExc_ValueError, "decompressed_sizes size mismatch; expected %zd, got %zd",
+ frameCount * sizeof(unsigned long long), frameSizes.len);
+ goto finally;
+ }
+
+ framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer));
+ if (!framePointers) {
+ PyErr_NoMemory();
+ goto finally;
+ }
+
+ for (i = 0; i < frameCount; i++) {
+ void* sourceData;
+ unsigned long long sourceSize;
+ unsigned long long decompressedSize = 0;
+
+ if (buffer->segments[i].offset + buffer->segments[i].length > buffer->dataSize) {
+ PyErr_Format(PyExc_ValueError, "item %zd has offset outside memory area", i);
+ goto finally;
+ }
+
+ sourceData = (char*)buffer->data + buffer->segments[i].offset;
+ sourceSize = buffer->segments[i].length;
+ totalInputSize += sourceSize;
+
+ if (frameSizesP) {
+ decompressedSize = frameSizesP[i];
+ }
+
+ if (sourceSize > SIZE_MAX) {
+ PyErr_Format(PyExc_ValueError,
+ "item %zd is too large for this platform", i);
+ goto finally;
+ }
+
+ if (decompressedSize > SIZE_MAX) {
+ PyErr_Format(PyExc_ValueError,
+ "decompressed size of item %zd is too large for this platform", i);
+ goto finally;
+ }
+
+ framePointers[i].sourceData = sourceData;
+ framePointers[i].sourceSize = (size_t)sourceSize;
+ framePointers[i].destSize = (size_t)decompressedSize;
+ }
+ }
+ else if (PyObject_TypeCheck(frames, &ZstdBufferWithSegmentsCollectionType)) {
+ Py_ssize_t offset = 0;
+ ZstdBufferWithSegments* buffer;
+ ZstdBufferWithSegmentsCollection* collection = (ZstdBufferWithSegmentsCollection*)frames;
+
+ frameCount = BufferWithSegmentsCollection_length(collection);
+
+ if (frameSizes.buf && frameSizes.len != frameCount) {
+ PyErr_Format(PyExc_ValueError,
+ "decompressed_sizes size mismatch; expected %zd; got %zd",
+ frameCount * sizeof(unsigned long long), frameSizes.len);
+ goto finally;
+ }
+
+ framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer));
+ if (NULL == framePointers) {
+ PyErr_NoMemory();
+ goto finally;
+ }
+
+ /* Iterate the data structure directly because it is faster. */
+ for (i = 0; i < collection->bufferCount; i++) {
+ Py_ssize_t segmentIndex;
+ buffer = collection->buffers[i];
+
+ for (segmentIndex = 0; segmentIndex < buffer->segmentCount; segmentIndex++) {
+ unsigned long long decompressedSize = frameSizesP ? frameSizesP[offset] : 0;
+
+ if (buffer->segments[segmentIndex].offset + buffer->segments[segmentIndex].length > buffer->dataSize) {
+ PyErr_Format(PyExc_ValueError, "item %zd has offset outside memory area",
+ offset);
+ goto finally;
+ }
+
+ if (buffer->segments[segmentIndex].length > SIZE_MAX) {
+ PyErr_Format(PyExc_ValueError,
+ "item %zd in buffer %zd is too large for this platform",
+ segmentIndex, i);
+ goto finally;
+ }
+
+ if (decompressedSize > SIZE_MAX) {
+ PyErr_Format(PyExc_ValueError,
+ "decompressed size of item %zd in buffer %zd is too large for this platform",
+ segmentIndex, i);
+ goto finally;
+ }
+
+ totalInputSize += buffer->segments[segmentIndex].length;
+
+ framePointers[offset].sourceData = (char*)buffer->data + buffer->segments[segmentIndex].offset;
+ framePointers[offset].sourceSize = (size_t)buffer->segments[segmentIndex].length;
+ framePointers[offset].destSize = (size_t)decompressedSize;
+
+ offset++;
+ }
+ }
+ }
+ else if (PyList_Check(frames)) {
+ frameCount = PyList_GET_SIZE(frames);
+
+ if (frameSizes.buf && frameSizes.len != frameCount * (Py_ssize_t)sizeof(unsigned long long)) {
+ PyErr_Format(PyExc_ValueError, "decompressed_sizes size mismatch; expected %zd, got %zd",
+ frameCount * sizeof(unsigned long long), frameSizes.len);
+ goto finally;
+ }
+
+ framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer));
+ if (!framePointers) {
+ PyErr_NoMemory();
+ goto finally;
+ }
+
+ frameBuffers = PyMem_Malloc(frameCount * sizeof(Py_buffer));
+ if (NULL == frameBuffers) {
+ PyErr_NoMemory();
+ goto finally;
+ }
+
+ memset(frameBuffers, 0, frameCount * sizeof(Py_buffer));
+
+ /* Do a pass to assemble info about our input buffers and output sizes. */
+ for (i = 0; i < frameCount; i++) {
+ unsigned long long decompressedSize = frameSizesP ? frameSizesP[i] : 0;
+
+ if (0 != PyObject_GetBuffer(PyList_GET_ITEM(frames, i),
+ &frameBuffers[i], PyBUF_CONTIG_RO)) {
+ PyErr_Clear();
+ PyErr_Format(PyExc_TypeError, "item %zd not a bytes like object", i);
+ goto finally;
+ }
+
+ if (decompressedSize > SIZE_MAX) {
+ PyErr_Format(PyExc_ValueError,
+ "decompressed size of item %zd is too large for this platform", i);
+ goto finally;
+ }
+
+ totalInputSize += frameBuffers[i].len;
+
+ framePointers[i].sourceData = frameBuffers[i].buf;
+ framePointers[i].sourceSize = frameBuffers[i].len;
+ framePointers[i].destSize = (size_t)decompressedSize;
+ }
+ }
+ else {
+ PyErr_SetString(PyExc_TypeError, "argument must be list or BufferWithSegments");
+ goto finally;
+ }
+
+ /* We now have an array with info about our inputs and outputs. Feed it into
+ our generic decompression function. */
+ frameSources.frames = framePointers;
+ frameSources.framesSize = frameCount;
+ frameSources.compressedSize = totalInputSize;
+
+ result = decompress_from_framesources(self, &frameSources, threads);
+
+finally:
+ if (frameSizes.buf) {
+ PyBuffer_Release(&frameSizes);
+ }
+ PyMem_Free(framePointers);
+
+ if (frameBuffers) {
+ for (i = 0; i < frameCount; i++) {
+ PyBuffer_Release(&frameBuffers[i]);
+ }
+
+ PyMem_Free(frameBuffers);
+ }
+
+ return result;
+}
+
+static PyMethodDef Decompressor_methods[] = {
+ { "copy_stream", (PyCFunction)Decompressor_copy_stream, METH_VARARGS | METH_KEYWORDS,
+ Decompressor_copy_stream__doc__ },
+ { "decompress", (PyCFunction)Decompressor_decompress, METH_VARARGS | METH_KEYWORDS,
+ Decompressor_decompress__doc__ },
+ { "decompressobj", (PyCFunction)Decompressor_decompressobj, METH_VARARGS | METH_KEYWORDS,
+ Decompressor_decompressobj__doc__ },
+ { "read_to_iter", (PyCFunction)Decompressor_read_to_iter, METH_VARARGS | METH_KEYWORDS,
+ Decompressor_read_to_iter__doc__ },
+ /* TODO Remove deprecated API */
+ { "read_from", (PyCFunction)Decompressor_read_to_iter, METH_VARARGS | METH_KEYWORDS,
+ Decompressor_read_to_iter__doc__ },
+ { "stream_reader", (PyCFunction)Decompressor_stream_reader,
+ METH_VARARGS | METH_KEYWORDS, Decompressor_stream_reader__doc__ },
+ { "stream_writer", (PyCFunction)Decompressor_stream_writer, METH_VARARGS | METH_KEYWORDS,
+ Decompressor_stream_writer__doc__ },
+ /* TODO remove deprecated API */
+ { "write_to", (PyCFunction)Decompressor_stream_writer, METH_VARARGS | METH_KEYWORDS,
+ Decompressor_stream_writer__doc__ },
+ { "decompress_content_dict_chain", (PyCFunction)Decompressor_decompress_content_dict_chain,
+ METH_VARARGS | METH_KEYWORDS, Decompressor_decompress_content_dict_chain__doc__ },
+ { "multi_decompress_to_buffer", (PyCFunction)Decompressor_multi_decompress_to_buffer,
+ METH_VARARGS | METH_KEYWORDS, Decompressor_multi_decompress_to_buffer__doc__ },
+ { "memory_size", (PyCFunction)Decompressor_memory_size, METH_NOARGS,
+ Decompressor_memory_size__doc__ },
+ { NULL, NULL }
+};
+
+PyTypeObject ZstdDecompressorType = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "zstd.ZstdDecompressor", /* tp_name */
+ sizeof(ZstdDecompressor), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ (destructor)Decompressor_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
+ Decompressor__doc__, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
+ Decompressor_methods, /* tp_methods */
+ 0, /* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ (initproc)Decompressor_init, /* tp_init */
+ 0, /* tp_alloc */
+ PyType_GenericNew, /* tp_new */
+};
+
+void decompressor_module_init(PyObject* mod) {
+ Py_TYPE(&ZstdDecompressorType) = &PyType_Type;
+ if (PyType_Ready(&ZstdDecompressorType) < 0) {
+ return;
+ }
+
+ Py_INCREF((PyObject*)&ZstdDecompressorType);
+ PyModule_AddObject(mod, "ZstdDecompressor",
+ (PyObject*)&ZstdDecompressorType);
+}
diff --git a/contrib/python/zstandard/py2/c-ext/decompressoriterator.c b/contrib/python/zstandard/py2/c-ext/decompressoriterator.c
new file mode 100644
index 00000000000..54b56581de1
--- /dev/null
+++ b/contrib/python/zstandard/py2/c-ext/decompressoriterator.c
@@ -0,0 +1,249 @@
+/**
+* Copyright (c) 2016-present, Gregory Szorc
+* All rights reserved.
+*
+* This software may be modified and distributed under the terms
+* of the BSD license. See the LICENSE file for details.
+*/
+
+#include "python-zstandard.h"
+
+#define min(a, b) (((a) < (b)) ? (a) : (b))
+
+extern PyObject* ZstdError;
+
+PyDoc_STRVAR(ZstdDecompressorIterator__doc__,
+"Represents an iterator of decompressed data.\n"
+);
+
+static void ZstdDecompressorIterator_dealloc(ZstdDecompressorIterator* self) {
+ Py_XDECREF(self->decompressor);
+ Py_XDECREF(self->reader);
+
+ if (self->buffer.buf) {
+ PyBuffer_Release(&self->buffer);
+ memset(&self->buffer, 0, sizeof(self->buffer));
+ }
+
+ if (self->input.src) {
+ PyMem_Free((void*)self->input.src);
+ self->input.src = NULL;
+ }
+
+ PyObject_Del(self);
+}
+
+static PyObject* ZstdDecompressorIterator_iter(PyObject* self) {
+ Py_INCREF(self);
+ return self;
+}
+
+static DecompressorIteratorResult read_decompressor_iterator(ZstdDecompressorIterator* self) {
+ size_t zresult;
+ PyObject* chunk;
+ DecompressorIteratorResult result;
+ size_t oldInputPos = self->input.pos;
+
+ result.chunk = NULL;
+
+ chunk = PyBytes_FromStringAndSize(NULL, self->outSize);
+ if (!chunk) {
+ result.errored = 1;
+ return result;
+ }
+
+ self->output.dst = PyBytes_AsString(chunk);
+ self->output.size = self->outSize;
+ self->output.pos = 0;
+
+ Py_BEGIN_ALLOW_THREADS
+ zresult = ZSTD_decompressStream(self->decompressor->dctx, &self->output, &self->input);
+ Py_END_ALLOW_THREADS
+
+ /* We're done with the pointer. Nullify to prevent anyone from getting a
+ handle on a Python object. */
+ self->output.dst = NULL;
+
+ if (ZSTD_isError(zresult)) {
+ Py_DECREF(chunk);
+ PyErr_Format(ZstdError, "zstd decompress error: %s",
+ ZSTD_getErrorName(zresult));
+ result.errored = 1;
+ return result;
+ }
+
+ self->readCount += self->input.pos - oldInputPos;
+
+ /* Frame is fully decoded. Input exhausted and output sitting in buffer. */
+ if (0 == zresult) {
+ self->finishedInput = 1;
+ self->finishedOutput = 1;
+ }
+
+ /* If it produced output data, return it. */
+ if (self->output.pos) {
+ if (self->output.pos < self->outSize) {
+ if (safe_pybytes_resize(&chunk, self->output.pos)) {
+ Py_XDECREF(chunk);
+ result.errored = 1;
+ return result;
+ }
+ }
+ }
+ else {
+ Py_DECREF(chunk);
+ chunk = NULL;
+ }
+
+ result.errored = 0;
+ result.chunk = chunk;
+
+ return result;
+}
+
+static PyObject* ZstdDecompressorIterator_iternext(ZstdDecompressorIterator* self) {
+ PyObject* readResult = NULL;
+ char* readBuffer;
+ Py_ssize_t readSize;
+ Py_ssize_t bufferRemaining;
+ DecompressorIteratorResult result;
+
+ if (self->finishedOutput) {
+ PyErr_SetString(PyExc_StopIteration, "output flushed");
+ return NULL;
+ }
+
+ /* If we have data left in the input, consume it. */
+ if (self->input.pos < self->input.size) {
+ result = read_decompressor_iterator(self);
+ if (result.chunk || result.errored) {
+ return result.chunk;
+ }
+
+ /* Else fall through to get more data from input. */
+ }
+
+read_from_source:
+
+ if (!self->finishedInput) {
+ if (self->reader) {
+ readResult = PyObject_CallMethod(self->reader, "read", "I", self->inSize);
+ if (!readResult) {
+ return NULL;
+ }
+
+ PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
+ }
+ else {
+ assert(self->buffer.buf);
+
+ /* Only support contiguous C arrays for now */
+ assert(self->buffer.strides == NULL && self->buffer.suboffsets == NULL);
+ assert(self->buffer.itemsize == 1);
+
+ /* TODO avoid memcpy() below */
+ readBuffer = (char *)self->buffer.buf + self->bufferOffset;
+ bufferRemaining = self->buffer.len - self->bufferOffset;
+ readSize = min(bufferRemaining, (Py_ssize_t)self->inSize);
+ self->bufferOffset += readSize;
+ }
+
+ if (readSize) {
+ if (!self->readCount && self->skipBytes) {
+ assert(self->skipBytes < self->inSize);
+ if ((Py_ssize_t)self->skipBytes >= readSize) {
+ PyErr_SetString(PyExc_ValueError,
+ "skip_bytes larger than first input chunk; "
+ "this scenario is currently unsupported");
+ Py_XDECREF(readResult);
+ return NULL;
+ }
+
+ readBuffer = readBuffer + self->skipBytes;
+ readSize -= self->skipBytes;
+ }
+
+ /* Copy input into previously allocated buffer because it can live longer
+ than a single function call and we don't want to keep a ref to a Python
+ object around. This could be changed... */
+ memcpy((void*)self->input.src, readBuffer, readSize);
+ self->input.size = readSize;
+ self->input.pos = 0;
+ }
+ /* No bytes on first read must mean an empty input stream. */
+ else if (!self->readCount) {
+ self->finishedInput = 1;
+ self->finishedOutput = 1;
+ Py_XDECREF(readResult);
+ PyErr_SetString(PyExc_StopIteration, "empty input");
+ return NULL;
+ }
+ else {
+ self->finishedInput = 1;
+ }
+
+ /* We've copied the data managed by memory. Discard the Python object. */
+ Py_XDECREF(readResult);
+ }
+
+ result = read_decompressor_iterator(self);
+ if (result.errored || result.chunk) {
+ return result.chunk;
+ }
+
+ /* No new output data. Try again unless we know there is no more data. */
+ if (!self->finishedInput) {
+ goto read_from_source;
+ }
+
+ PyErr_SetString(PyExc_StopIteration, "input exhausted");
+ return NULL;
+}
+
+PyTypeObject ZstdDecompressorIteratorType = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "zstd.ZstdDecompressorIterator", /* tp_name */
+ sizeof(ZstdDecompressorIterator), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ (destructor)ZstdDecompressorIterator_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
+ ZstdDecompressorIterator__doc__, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ ZstdDecompressorIterator_iter, /* tp_iter */
+ (iternextfunc)ZstdDecompressorIterator_iternext, /* tp_iternext */
+ 0, /* tp_methods */
+ 0, /* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ 0, /* tp_init */
+ 0, /* tp_alloc */
+ PyType_GenericNew, /* tp_new */
+};
+
+void decompressoriterator_module_init(PyObject* mod) {
+ Py_TYPE(&ZstdDecompressorIteratorType) = &PyType_Type;
+ if (PyType_Ready(&ZstdDecompressorIteratorType) < 0) {
+ return;
+ }
+}
diff --git a/contrib/python/zstandard/py2/c-ext/frameparams.c b/contrib/python/zstandard/py2/c-ext/frameparams.c
new file mode 100644
index 00000000000..35ca3ca5900
--- /dev/null
+++ b/contrib/python/zstandard/py2/c-ext/frameparams.c
@@ -0,0 +1,138 @@
+/**
+* Copyright (c) 2017-present, Gregory Szorc
+* All rights reserved.
+*
+* This software may be modified and distributed under the terms
+* of the BSD license. See the LICENSE file for details.
+*/
+
+#include "python-zstandard.h"
+
+extern PyObject* ZstdError;
+
+PyDoc_STRVAR(FrameParameters__doc__,
+ "FrameParameters: information about a zstd frame");
+
+FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args, PyObject* kwargs) {
+ static char* kwlist[] = {
+ "data",
+ NULL
+ };
+
+ Py_buffer source;
+ ZSTD_frameHeader header;
+ FrameParametersObject* result = NULL;
+ size_t zresult;
+
+#if PY_MAJOR_VERSION >= 3
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:get_frame_parameters",
+#else
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:get_frame_parameters",
+#endif
+ kwlist, &source)) {
+ return NULL;
+ }
+
+ if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
+ PyErr_SetString(PyExc_ValueError,
+ "data buffer should be contiguous and have at most one dimension");
+ goto finally;
+ }
+
+ zresult = ZSTD_getFrameHeader(&header, source.buf, source.len);
+
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "cannot get frame parameters: %s", ZSTD_getErrorName(zresult));
+ goto finally;
+ }
+
+ if (zresult) {
+ PyErr_Format(ZstdError, "not enough data for frame parameters; need %zu bytes", zresult);
+ goto finally;
+ }
+
+ result = PyObject_New(FrameParametersObject, &FrameParametersType);
+ if (!result) {
+ goto finally;
+ }
+
+ result->frameContentSize = header.frameContentSize;
+ result->windowSize = header.windowSize;
+ result->dictID = header.dictID;
+ result->checksumFlag = header.checksumFlag ? 1 : 0;
+
+finally:
+ PyBuffer_Release(&source);
+ return result;
+}
+
+static void FrameParameters_dealloc(PyObject* self) {
+ PyObject_Del(self);
+}
+
+static PyMemberDef FrameParameters_members[] = {
+ { "content_size", T_ULONGLONG,
+ offsetof(FrameParametersObject, frameContentSize), READONLY,
+ "frame content size" },
+ { "window_size", T_ULONGLONG,
+ offsetof(FrameParametersObject, windowSize), READONLY,
+ "window size" },
+ { "dict_id", T_UINT,
+ offsetof(FrameParametersObject, dictID), READONLY,
+ "dictionary ID" },
+ { "has_checksum", T_BOOL,
+ offsetof(FrameParametersObject, checksumFlag), READONLY,
+ "checksum flag" },
+ { NULL }
+};
+
+PyTypeObject FrameParametersType = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "FrameParameters", /* tp_name */
+ sizeof(FrameParametersObject), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ (destructor)FrameParameters_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT, /* tp_flags */
+ FrameParameters__doc__, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
+ 0, /* tp_methods */
+ FrameParameters_members, /* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ 0, /* tp_init */
+ 0, /* tp_alloc */
+ 0, /* tp_new */
+};
+
+void frameparams_module_init(PyObject* mod) {
+ Py_TYPE(&FrameParametersType) = &PyType_Type;
+ if (PyType_Ready(&FrameParametersType) < 0) {
+ return;
+ }
+
+ Py_INCREF(&FrameParametersType);
+ PyModule_AddObject(mod, "FrameParameters", (PyObject*)&FrameParametersType);
+}
diff --git a/contrib/python/zstandard/py2/c-ext/python-zstandard.h b/contrib/python/zstandard/py2/c-ext/python-zstandard.h
new file mode 100644
index 00000000000..bd1cb4dcad8
--- /dev/null
+++ b/contrib/python/zstandard/py2/c-ext/python-zstandard.h
@@ -0,0 +1,359 @@
+/**
+* Copyright (c) 2016-present, Gregory Szorc
+* All rights reserved.
+*
+* This software may be modified and distributed under the terms
+* of the BSD license. See the LICENSE file for details.
+*/
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include "structmember.h"
+
+#define ZSTD_STATIC_LINKING_ONLY
+#define ZDICT_STATIC_LINKING_ONLY
+#include <zstd.h>
+#include <zdict.h>
+
+/* Remember to change the string in zstandard/__init__ as well */
+#define PYTHON_ZSTANDARD_VERSION "0.14.1"
+
+typedef enum {
+ compressorobj_flush_finish,
+ compressorobj_flush_block,
+} CompressorObj_Flush;
+
+/*
+ Represents a ZstdCompressionParameters type.
+
+ This type holds all the low-level compression parameters that can be set.
+*/
+typedef struct {
+ PyObject_HEAD
+ ZSTD_CCtx_params* params;
+} ZstdCompressionParametersObject;
+
+extern PyTypeObject ZstdCompressionParametersType;
+
+/*
+ Represents a FrameParameters type.
+
+ This type is basically a wrapper around ZSTD_frameParams.
+*/
+typedef struct {
+ PyObject_HEAD
+ unsigned long long frameContentSize;
+ unsigned long long windowSize;
+ unsigned dictID;
+ char checksumFlag;
+} FrameParametersObject;
+
+extern PyTypeObject FrameParametersType;
+
+/*
+ Represents a ZstdCompressionDict type.
+
+ Instances hold data used for a zstd compression dictionary.
+*/
+typedef struct {
+ PyObject_HEAD
+
+ /* Pointer to dictionary data. Owned by self. */
+ void* dictData;
+ /* Size of dictionary data. */
+ size_t dictSize;
+ ZSTD_dictContentType_e dictType;
+ /* k parameter for cover dictionaries. Only populated by train_cover_dict(). */
+ unsigned k;
+ /* d parameter for cover dictionaries. Only populated by train_cover_dict(). */
+ unsigned d;
+ /* Digested dictionary, suitable for reuse. */
+ ZSTD_CDict* cdict;
+ ZSTD_DDict* ddict;
+} ZstdCompressionDict;
+
+extern PyTypeObject ZstdCompressionDictType;
+
+/*
+ Represents a ZstdCompressor type.
+*/
+typedef struct {
+ PyObject_HEAD
+
+ /* Number of threads to use for operations. */
+ unsigned int threads;
+ /* Pointer to compression dictionary to use. NULL if not using dictionary
+ compression. */
+ ZstdCompressionDict* dict;
+ /* Compression context to use. Populated during object construction. */
+ ZSTD_CCtx* cctx;
+ /* Compression parameters in use. */
+ ZSTD_CCtx_params* params;
+} ZstdCompressor;
+
+extern PyTypeObject ZstdCompressorType;
+
+typedef struct {
+ PyObject_HEAD
+
+ ZstdCompressor* compressor;
+ ZSTD_outBuffer output;
+ int finished;
+} ZstdCompressionObj;
+
+extern PyTypeObject ZstdCompressionObjType;
+
+typedef struct {
+ PyObject_HEAD
+
+ ZstdCompressor* compressor;
+ PyObject* writer;
+ ZSTD_outBuffer output;
+ size_t outSize;
+ int entered;
+ int closed;
+ int writeReturnRead;
+ unsigned long long bytesCompressed;
+} ZstdCompressionWriter;
+
+extern PyTypeObject ZstdCompressionWriterType;
+
+typedef struct {
+ PyObject_HEAD
+
+ ZstdCompressor* compressor;
+ PyObject* reader;
+ Py_buffer buffer;
+ Py_ssize_t bufferOffset;
+ size_t inSize;
+ size_t outSize;
+
+ ZSTD_inBuffer input;
+ ZSTD_outBuffer output;
+ int finishedOutput;
+ int finishedInput;
+ PyObject* readResult;
+} ZstdCompressorIterator;
+
+extern PyTypeObject ZstdCompressorIteratorType;
+
+typedef struct {
+ PyObject_HEAD
+
+ ZstdCompressor* compressor;
+ PyObject* reader;
+ Py_buffer buffer;
+ size_t readSize;
+
+ int entered;
+ int closed;
+ unsigned long long bytesCompressed;
+
+ ZSTD_inBuffer input;
+ ZSTD_outBuffer output;
+ int finishedInput;
+ int finishedOutput;
+ PyObject* readResult;
+} ZstdCompressionReader;
+
+extern PyTypeObject ZstdCompressionReaderType;
+
+typedef struct {
+ PyObject_HEAD
+
+ ZstdCompressor* compressor;
+ ZSTD_inBuffer input;
+ ZSTD_outBuffer output;
+ Py_buffer inBuffer;
+ int finished;
+ size_t chunkSize;
+} ZstdCompressionChunker;
+
+extern PyTypeObject ZstdCompressionChunkerType;
+
+typedef enum {
+ compressionchunker_mode_normal,
+ compressionchunker_mode_flush,
+ compressionchunker_mode_finish,
+} CompressionChunkerMode;
+
+typedef struct {
+ PyObject_HEAD
+
+ ZstdCompressionChunker* chunker;
+ CompressionChunkerMode mode;
+} ZstdCompressionChunkerIterator;
+
+extern PyTypeObject ZstdCompressionChunkerIteratorType;
+
+typedef struct {
+ PyObject_HEAD
+
+ ZSTD_DCtx* dctx;
+ ZstdCompressionDict* dict;
+ size_t maxWindowSize;
+ ZSTD_format_e format;
+} ZstdDecompressor;
+
+extern PyTypeObject ZstdDecompressorType;
+
+typedef struct {
+ PyObject_HEAD
+
+ ZstdDecompressor* decompressor;
+ size_t outSize;
+ int finished;
+} ZstdDecompressionObj;
+
+extern PyTypeObject ZstdDecompressionObjType;
+
+typedef struct {
+ PyObject_HEAD
+
+ /* Parent decompressor to which this object is associated. */
+ ZstdDecompressor* decompressor;
+ /* Object to read() from (if reading from a stream). */
+ PyObject* reader;
+ /* Size for read() operations on reader. */
+ size_t readSize;
+ /* Whether a read() can return data spanning multiple zstd frames. */
+ int readAcrossFrames;
+ /* Buffer to read from (if reading from a buffer). */
+ Py_buffer buffer;
+
+ /* Whether the context manager is active. */
+ int entered;
+ /* Whether we've closed the stream. */
+ int closed;
+
+ /* Number of bytes decompressed and returned to user. */
+ unsigned long long bytesDecompressed;
+
+ /* Tracks data going into decompressor. */
+ ZSTD_inBuffer input;
+
+ /* Holds output from read() operation on reader. */
+ PyObject* readResult;
+
+ /* Whether all input has been sent to the decompressor. */
+ int finishedInput;
+ /* Whether all output has been flushed from the decompressor. */
+ int finishedOutput;
+} ZstdDecompressionReader;
+
+extern PyTypeObject ZstdDecompressionReaderType;
+
+typedef struct {
+ PyObject_HEAD
+
+ ZstdDecompressor* decompressor;
+ PyObject* writer;
+ size_t outSize;
+ int entered;
+ int closed;
+ int writeReturnRead;
+} ZstdDecompressionWriter;
+
+extern PyTypeObject ZstdDecompressionWriterType;
+
+typedef struct {
+ PyObject_HEAD
+
+ ZstdDecompressor* decompressor;
+ PyObject* reader;
+ Py_buffer buffer;
+ Py_ssize_t bufferOffset;
+ size_t inSize;
+ size_t outSize;
+ size_t skipBytes;
+ ZSTD_inBuffer input;
+ ZSTD_outBuffer output;
+ Py_ssize_t readCount;
+ int finishedInput;
+ int finishedOutput;
+} ZstdDecompressorIterator;
+
+extern PyTypeObject ZstdDecompressorIteratorType;
+
+typedef struct {
+ int errored;
+ PyObject* chunk;
+} DecompressorIteratorResult;
+
+typedef struct {
+ /* The public API is that these are 64-bit unsigned integers. So these can't
+ * be size_t, even though values larger than SIZE_MAX or PY_SSIZE_T_MAX may
+ * be nonsensical for this platform. */
+ unsigned long long offset;
+ unsigned long long length;
+} BufferSegment;
+
+typedef struct {
+ PyObject_HEAD
+
+ PyObject* parent;
+ BufferSegment* segments;
+ Py_ssize_t segmentCount;
+} ZstdBufferSegments;
+
+extern PyTypeObject ZstdBufferSegmentsType;
+
+typedef struct {
+ PyObject_HEAD
+
+ PyObject* parent;
+ void* data;
+ Py_ssize_t dataSize;
+ unsigned long long offset;
+} ZstdBufferSegment;
+
+extern PyTypeObject ZstdBufferSegmentType;
+
+typedef struct {
+ PyObject_HEAD
+
+ Py_buffer parent;
+ void* data;
+ unsigned long long dataSize;
+ BufferSegment* segments;
+ Py_ssize_t segmentCount;
+ int useFree;
+} ZstdBufferWithSegments;
+
+extern PyTypeObject ZstdBufferWithSegmentsType;
+
+/**
+ * An ordered collection of BufferWithSegments exposed as a squashed collection.
+ *
+ * This type provides a virtual view spanning multiple BufferWithSegments
+ * instances. It allows multiple instances to be "chained" together and
+ * exposed as a single collection. e.g. if there are 2 buffers holding
+ * 10 segments each, then o[14] will access the 5th segment in the 2nd buffer.
+ */
+typedef struct {
+ PyObject_HEAD
+
+ /* An array of buffers that should be exposed through this instance. */
+ ZstdBufferWithSegments** buffers;
+ /* Number of elements in buffers array. */
+ Py_ssize_t bufferCount;
+ /* Array of first offset in each buffer instance. 0th entry corresponds
+ to number of elements in the 0th buffer. 1st entry corresponds to the
+ sum of elements in 0th and 1st buffers. */
+ Py_ssize_t* firstElements;
+} ZstdBufferWithSegmentsCollection;
+
+extern PyTypeObject ZstdBufferWithSegmentsCollectionType;
+
+int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value);
+int set_parameters(ZSTD_CCtx_params* params, ZstdCompressionParametersObject* obj);
+int to_cparams(ZstdCompressionParametersObject* params, ZSTD_compressionParameters* cparams);
+FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args, PyObject* kwargs);
+int ensure_ddict(ZstdCompressionDict* dict);
+int ensure_dctx(ZstdDecompressor* decompressor, int loadDict);
+ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs);
+ZstdBufferWithSegments* BufferWithSegments_FromMemory(void* data, unsigned long long dataSize, BufferSegment* segments, Py_ssize_t segmentsSize);
+Py_ssize_t BufferWithSegmentsCollection_length(ZstdBufferWithSegmentsCollection*);
+int cpu_count(void);
+size_t roundpow2(size_t);
+int safe_pybytes_resize(PyObject** obj, Py_ssize_t size);
diff --git a/contrib/python/zstandard/py2/ya.make b/contrib/python/zstandard/py2/ya.make
new file mode 100644
index 00000000000..2efc543fe85
--- /dev/null
+++ b/contrib/python/zstandard/py2/ya.make
@@ -0,0 +1,58 @@
+# Generated by devtools/yamaker (pypi).
+
+PY2_LIBRARY()
+
+VERSION(0.14.1)
+
+LICENSE(BSD-3-Clause)
+
+PEERDIR(
+ contrib/libs/zstd
+)
+
+ADDINCL(
+ contrib/libs/zstd/include
+ contrib/libs/zstd/lib/common
+ contrib/python/zstandard/py2/c-ext
+)
+
+NO_COMPILER_WARNINGS()
+
+NO_LINT()
+
+SRCS(
+ c-ext/bufferutil.c
+ c-ext/compressionchunker.c
+ c-ext/compressiondict.c
+ c-ext/compressionparams.c
+ c-ext/compressionreader.c
+ c-ext/compressionwriter.c
+ c-ext/compressobj.c
+ c-ext/compressor.c
+ c-ext/compressoriterator.c
+ c-ext/constants.c
+ c-ext/decompressionreader.c
+ c-ext/decompressionwriter.c
+ c-ext/decompressobj.c
+ c-ext/decompressor.c
+ c-ext/decompressoriterator.c
+ c-ext/frameparams.c
+ zstd.c
+)
+
+PY_REGISTER(
+ zstd
+)
+
+PY_SRCS(
+ TOP_LEVEL
+ zstandard/__init__.py
+)
+
+RESOURCE_FILES(
+ PREFIX contrib/python/zstandard/py2/
+ .dist-info/METADATA
+ .dist-info/top_level.txt
+)
+
+END()
diff --git a/contrib/python/zstandard/py2/zstandard/__init__.py b/contrib/python/zstandard/py2/zstandard/__init__.py
new file mode 100644
index 00000000000..5b0a9318761
--- /dev/null
+++ b/contrib/python/zstandard/py2/zstandard/__init__.py
@@ -0,0 +1,75 @@
+# Copyright (c) 2017-present, Gregory Szorc
+# All rights reserved.
+#
+# This software may be modified and distributed under the terms
+# of the BSD license. See the LICENSE file for details.
+
+"""Python interface to the Zstandard (zstd) compression library."""
+
+from __future__ import absolute_import, unicode_literals
+
+# This module serves 2 roles:
+#
+# 1) Export the C or CFFI "backend" through a central module.
+# 2) Implement additional functionality built on top of C or CFFI backend.
+
+import os
+import platform
+
+# Some Python implementations don't support C extensions. That's why we have
+# a CFFI implementation in the first place. The code here import one of our
+# "backends" then re-exports the symbols from this module. For convenience,
+# we support falling back to the CFFI backend if the C extension can't be
+# imported. But for performance reasons, we only do this on unknown Python
+# implementation. Notably, for CPython we require the C extension by default.
+# Because someone will inevitably want special behavior, the behavior is
+# configurable via an environment variable. A potentially better way to handle
+# this is to import a special ``__importpolicy__`` module or something
+# defining a variable and `setup.py` could write the file with whatever
+# policy was specified at build time. Until someone needs it, we go with
+# the hacky but simple environment variable approach.
+_module_policy = os.environ.get("PYTHON_ZSTANDARD_IMPORT_POLICY", "default")
+
+if _module_policy == "default":
+ if platform.python_implementation() in ("CPython",):
+ from zstd import *
+
+ backend = "cext"
+ elif platform.python_implementation() in ("PyPy",):
+ from .cffi import *
+
+ backend = "cffi"
+ else:
+ try:
+ from zstd import *
+
+ backend = "cext"
+ except ImportError:
+ from .cffi import *
+
+ backend = "cffi"
+elif _module_policy == "cffi_fallback":
+ try:
+ from zstd import *
+
+ backend = "cext"
+ except ImportError:
+ from .cffi import *
+
+ backend = "cffi"
+elif _module_policy == "cext":
+ from zstd import *
+
+ backend = "cext"
+elif _module_policy == "cffi":
+ from .cffi import *
+
+ backend = "cffi"
+else:
+ raise ImportError(
+ "unknown module import policy: %s; use default, cffi_fallback, "
+ "cext, or cffi" % _module_policy
+ )
+
+# Keep this in sync with python-zstandard.h.
+__version__ = "0.14.1"
diff --git a/contrib/python/zstandard/py2/zstd.c b/contrib/python/zstandard/py2/zstd.c
new file mode 100644
index 00000000000..3ab69a31139
--- /dev/null
+++ b/contrib/python/zstandard/py2/zstd.c
@@ -0,0 +1,344 @@
+/**
+ * Copyright (c) 2016-present, Gregory Szorc
+ * All rights reserved.
+ *
+ * This software may be modified and distributed under the terms
+ * of the BSD license. See the LICENSE file for details.
+ */
+
+/* A Python C extension for Zstandard. */
+
+#if defined(_WIN32)
+#define WIN32_LEAN_AND_MEAN
+#include <Windows.h>
+#elif defined(__APPLE__) || defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#endif
+
+#include "python-zstandard.h"
+
+PyObject *ZstdError;
+
+PyDoc_STRVAR(estimate_decompression_context_size__doc__,
+"estimate_decompression_context_size()\n"
+"\n"
+"Estimate the amount of memory allocated to a decompression context.\n"
+);
+
+static PyObject* estimate_decompression_context_size(PyObject* self) {
+ return PyLong_FromSize_t(ZSTD_estimateDCtxSize());
+}
+
+PyDoc_STRVAR(frame_content_size__doc__,
+"frame_content_size(data)\n"
+"\n"
+"Obtain the decompressed size of a frame."
+);
+
+static PyObject* frame_content_size(PyObject* self, PyObject* args, PyObject* kwargs) {
+ static char* kwlist[] = {
+ "source",
+ NULL
+ };
+
+ Py_buffer source;
+ PyObject* result = NULL;
+ unsigned long long size;
+
+#if PY_MAJOR_VERSION >= 3
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:frame_content_size",
+#else
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:frame_content_size",
+#endif
+ kwlist, &source)) {
+ return NULL;
+ }
+
+ if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
+ PyErr_SetString(PyExc_ValueError,
+ "data buffer should be contiguous and have at most one dimension");
+ goto finally;
+ }
+
+ size = ZSTD_getFrameContentSize(source.buf, source.len);
+
+ if (size == ZSTD_CONTENTSIZE_ERROR) {
+ PyErr_SetString(ZstdError, "error when determining content size");
+ }
+ else if (size == ZSTD_CONTENTSIZE_UNKNOWN) {
+ result = PyLong_FromLong(-1);
+ }
+ else {
+ result = PyLong_FromUnsignedLongLong(size);
+ }
+
+finally:
+ PyBuffer_Release(&source);
+
+ return result;
+}
+
+PyDoc_STRVAR(frame_header_size__doc__,
+"frame_header_size(data)\n"
+"\n"
+"Obtain the size of a frame header.\n"
+);
+
+static PyObject* frame_header_size(PyObject* self, PyObject* args, PyObject* kwargs) {
+ static char* kwlist[] = {
+ "source",
+ NULL
+ };
+
+ Py_buffer source;
+ PyObject* result = NULL;
+ size_t zresult;
+
+#if PY_MAJOR_VERSION >= 3
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:frame_header_size",
+#else
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:frame_header_size",
+#endif
+ kwlist, &source)) {
+ return NULL;
+ }
+
+ if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
+ PyErr_SetString(PyExc_ValueError,
+ "data buffer should be contiguous and have at most one dimension");
+ goto finally;
+ }
+
+ zresult = ZSTD_frameHeaderSize(source.buf, source.len);
+ if (ZSTD_isError(zresult)) {
+ PyErr_Format(ZstdError, "could not determine frame header size: %s",
+ ZSTD_getErrorName(zresult));
+ }
+ else {
+ result = PyLong_FromSize_t(zresult);
+ }
+
+finally:
+
+ PyBuffer_Release(&source);
+
+ return result;
+}
+
+PyDoc_STRVAR(get_frame_parameters__doc__,
+"get_frame_parameters(data)\n"
+"\n"
+"Obtains a ``FrameParameters`` instance by parsing data.\n");
+
+PyDoc_STRVAR(train_dictionary__doc__,
+"train_dictionary(dict_size, samples, k=None, d=None, steps=None,\n"
+" threads=None,notifications=0, dict_id=0, level=0)\n"
+"\n"
+"Train a dictionary from sample data using the COVER algorithm.\n"
+"\n"
+"A compression dictionary of size ``dict_size`` will be created from the\n"
+"iterable of ``samples``. The raw dictionary bytes will be returned.\n"
+"\n"
+"The COVER algorithm has 2 parameters: ``k`` and ``d``. These control the\n"
+"*segment size* and *dmer size*. A reasonable range for ``k`` is\n"
+"``[16, 2048+]``. A reasonable range for ``d`` is ``[6, 16]``.\n"
+"``d`` must be less than or equal to ``k``.\n"
+"\n"
+"``steps`` can be specified to control the number of steps through potential\n"
+"values of ``k`` and ``d`` to try. ``k`` and ``d`` will only be varied if\n"
+"those arguments are not defined. i.e. if ``d`` is ``8``, then only ``k``\n"
+"will be varied in this mode.\n"
+"\n"
+"``threads`` can specify how many threads to use to test various ``k`` and\n"
+"``d`` values. ``-1`` will use as many threads as available CPUs. By default,\n"
+"a single thread is used.\n"
+"\n"
+"When ``k`` and ``d`` are not defined, default values are used and the\n"
+"algorithm will perform multiple iterations - or steps - to try to find\n"
+"ideal parameters. If both ``k`` and ``d`` are specified, then those values\n"
+"will be used. ``steps`` or ``threads`` triggers optimization mode to test\n"
+"multiple ``k`` and ``d`` variations.\n"
+);
+
+static char zstd_doc[] = "Interface to zstandard";
+
+static PyMethodDef zstd_methods[] = {
+ { "estimate_decompression_context_size", (PyCFunction)estimate_decompression_context_size,
+ METH_NOARGS, estimate_decompression_context_size__doc__ },
+ { "frame_content_size", (PyCFunction)frame_content_size,
+ METH_VARARGS | METH_KEYWORDS, frame_content_size__doc__ },
+ { "frame_header_size", (PyCFunction)frame_header_size,
+ METH_VARARGS | METH_KEYWORDS, frame_header_size__doc__ },
+ { "get_frame_parameters", (PyCFunction)get_frame_parameters,
+ METH_VARARGS | METH_KEYWORDS, get_frame_parameters__doc__ },
+ { "train_dictionary", (PyCFunction)train_dictionary,
+ METH_VARARGS | METH_KEYWORDS, train_dictionary__doc__ },
+ { NULL, NULL }
+};
+
+void bufferutil_module_init(PyObject* mod);
+void compressobj_module_init(PyObject* mod);
+void compressor_module_init(PyObject* mod);
+void compressionparams_module_init(PyObject* mod);
+void constants_module_init(PyObject* mod);
+void compressionchunker_module_init(PyObject* mod);
+void compressiondict_module_init(PyObject* mod);
+void compressionreader_module_init(PyObject* mod);
+void compressionwriter_module_init(PyObject* mod);
+void compressoriterator_module_init(PyObject* mod);
+void decompressor_module_init(PyObject* mod);
+void decompressobj_module_init(PyObject* mod);
+void decompressionreader_module_init(PyObject *mod);
+void decompressionwriter_module_init(PyObject* mod);
+void decompressoriterator_module_init(PyObject* mod);
+void frameparams_module_init(PyObject* mod);
+
+void zstd_module_init(PyObject* m) {
+ /* python-zstandard relies on unstable zstd C API features. This means
+ that changes in zstd may break expectations in python-zstandard.
+
+ python-zstandard is distributed with a copy of the zstd sources.
+ python-zstandard is only guaranteed to work with the bundled version
+ of zstd.
+
+ However, downstream redistributors or packagers may unbundle zstd
+ from python-zstandard. This can result in a mismatch between zstd
+ versions and API semantics. This essentially "voids the warranty"
+ of python-zstandard and may cause undefined behavior.
+
+ We detect this mismatch here and refuse to load the module if this
+ scenario is detected.
+ */
+ if (ZSTD_VERSION_NUMBER != 10505 || ZSTD_versionNumber() != 10505) {
+ PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version");
+ return;
+ }
+
+ bufferutil_module_init(m);
+ compressionparams_module_init(m);
+ compressiondict_module_init(m);
+ compressobj_module_init(m);
+ compressor_module_init(m);
+ compressionchunker_module_init(m);
+ compressionreader_module_init(m);
+ compressionwriter_module_init(m);
+ compressoriterator_module_init(m);
+ constants_module_init(m);
+ decompressor_module_init(m);
+ decompressobj_module_init(m);
+ decompressionreader_module_init(m);
+ decompressionwriter_module_init(m);
+ decompressoriterator_module_init(m);
+ frameparams_module_init(m);
+}
+
+#if defined(__GNUC__) && (__GNUC__ >= 4)
+# define PYTHON_ZSTD_VISIBILITY __attribute__ ((visibility ("default")))
+#else
+# define PYTHON_ZSTD_VISIBILITY
+#endif
+
+#if PY_MAJOR_VERSION >= 3
+static struct PyModuleDef zstd_module = {
+ PyModuleDef_HEAD_INIT,
+ "zstd",
+ zstd_doc,
+ -1,
+ zstd_methods
+};
+
+PYTHON_ZSTD_VISIBILITY PyMODINIT_FUNC PyInit_zstd(void) {
+ PyObject *m = PyModule_Create(&zstd_module);
+ if (m) {
+ zstd_module_init(m);
+ if (PyErr_Occurred()) {
+ Py_DECREF(m);
+ m = NULL;
+ }
+ }
+ return m;
+}
+#else
+PYTHON_ZSTD_VISIBILITY PyMODINIT_FUNC initzstd(void) {
+ PyObject *m = Py_InitModule3("zstd", zstd_methods, zstd_doc);
+ if (m) {
+ zstd_module_init(m);
+ }
+}
+#endif
+
+/* Attempt to resolve the number of CPUs in the system. */
+int cpu_count() {
+ int count = 0;
+
+#if defined(_WIN32)
+ SYSTEM_INFO si;
+ si.dwNumberOfProcessors = 0;
+ GetSystemInfo(&si);
+ count = si.dwNumberOfProcessors;
+#elif defined(__APPLE__)
+ int num;
+ size_t size = sizeof(int);
+
+ if (0 == sysctlbyname("hw.logicalcpu", &num, &size, NULL, 0)) {
+ count = num;
+ }
+#elif defined(__linux__)
+ count = sysconf(_SC_NPROCESSORS_ONLN);
+#elif defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
+ int mib[2];
+ size_t len = sizeof(count);
+ mib[0] = CTL_HW;
+ mib[1] = HW_NCPU;
+ if (0 != sysctl(mib, 2, &count, &len, NULL, 0)) {
+ count = 0;
+ }
+#elif defined(__hpux)
+ count = mpctl(MPC_GETNUMSPUS, NULL, NULL);
+#endif
+
+ return count;
+}
+
+size_t roundpow2(size_t i) {
+ i--;
+ i |= i >> 1;
+ i |= i >> 2;
+ i |= i >> 4;
+ i |= i >> 8;
+ i |= i >> 16;
+ i++;
+
+ return i;
+}
+
+/* Safer version of _PyBytes_Resize().
+ *
+ * _PyBytes_Resize() only works if the refcount is 1. In some scenarios,
+ * we can get an object with a refcount > 1, even if it was just created
+ * with PyBytes_FromStringAndSize()! That's because (at least) CPython
+ * pre-allocates PyBytes instances of size 1 for every possible byte value.
+ *
+ * If non-0 is returned, obj may or may not be NULL.
+ */
+int safe_pybytes_resize(PyObject** obj, Py_ssize_t size) {
+ PyObject* tmp;
+
+ if ((*obj)->ob_refcnt == 1) {
+ return _PyBytes_Resize(obj, size);
+ }
+
+ tmp = PyBytes_FromStringAndSize(NULL, size);
+ if (!tmp) {
+ return -1;
+ }
+
+ memcpy(PyBytes_AS_STRING(tmp), PyBytes_AS_STRING(*obj),
+ PyBytes_GET_SIZE(*obj));
+
+ Py_DECREF(*obj);
+ *obj = tmp;
+
+ return 0;
+} \ No newline at end of file