diff options
author | vitalyisaev <vitalyisaev@ydb.tech> | 2023-11-14 09:58:56 +0300 |
---|---|---|
committer | vitalyisaev <vitalyisaev@ydb.tech> | 2023-11-14 10:20:20 +0300 |
commit | c2b2dfd9827a400a8495e172a56343462e3ceb82 (patch) | |
tree | cd4e4f597d01bede4c82dffeb2d780d0a9046bd0 /contrib/python/lz4 | |
parent | d4ae8f119e67808cb0cf776ba6e0cf95296f2df7 (diff) | |
download | ydb-c2b2dfd9827a400a8495e172a56343462e3ceb82.tar.gz |
YQ Connector: move tests from yql to ydb (OSS)
Перенос папки с тестами на Коннектор из папки yql в папку ydb (синхронизируется с github).
Diffstat (limited to 'contrib/python/lz4')
75 files changed, 13538 insertions, 0 deletions
diff --git a/contrib/python/lz4/py2/.dist-info/METADATA b/contrib/python/lz4/py2/.dist-info/METADATA new file mode 100644 index 0000000000..a896017502 --- /dev/null +++ b/contrib/python/lz4/py2/.dist-info/METADATA @@ -0,0 +1,104 @@ +Metadata-Version: 2.1 +Name: lz4 +Version: 2.2.1 +Summary: LZ4 Bindings for Python +Home-page: https://github.com/python-lz4/python-lz4 +Author: Jonathan Underwood +Author-email: jonathan.underwood@gmail.com +License: UNKNOWN +Platform: UNKNOWN +Classifier: Development Status :: 5 - Production/Stable +Classifier: License :: OSI Approved :: BSD License +Classifier: Intended Audience :: Developers +Classifier: Programming Language :: C +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3.4 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: 3.7 +Requires-Python: >=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.* +Provides-Extra: tests +Provides-Extra: flake8 +Provides-Extra: docs +Provides-Extra: docs +Requires-Dist: sphinx (>=1.6.0); extra == 'docs' +Requires-Dist: sphinx-bootstrap-theme; extra == 'docs' +Provides-Extra: flake8 +Requires-Dist: flake8; extra == 'flake8' +Provides-Extra: tests +Requires-Dist: pytest (!=3.3.0); extra == 'tests' +Requires-Dist: psutil; extra == 'tests' +Requires-Dist: pytest-cov; extra == 'tests' + +========== +python-lz4 +========== + +Status +====== + +.. image:: https://travis-ci.org/python-lz4/python-lz4.svg?branch=master + :target: https://travis-ci.org/python-lz4/python-lz4 + :alt: Build Status + +.. image:: https://ci.appveyor.com/api/projects/status/r2qvw9mlfo63lklo/branch/master?svg=true + :target: https://ci.appveyor.com/project/jonathanunderwood/python-lz4 + :alt: Build Status Windows + +.. image:: https://readthedocs.org/projects/python-lz4/badge/?version=stable + :target: https://readthedocs.org/projects/python-lz4/ + :alt: Documentation + +.. image:: https://codecov.io/gh/python-lz4/python-lz4/branch/codecov/graph/badge.svg + :target: https://codecov.io/gh/python-lz4/python-lz4 + :alt: CodeCov + + +Introduction +============ +This package provides python bindings for the `LZ4 compression library +<https://lz4.github.io/lz4/>`_. + +The bindings provided in this package cover the `frame format +<https://github.com/lz4/lz4/blob/master/doc/lz4_Frame_format.md>`_ and the `block format +<https://github.com/lz4/lz4/blob/dev/doc/lz4_Block_format.md>`_ specifications. The frame +format bindings are the recommended ones to use, as this guarantees +interoperability with other implementations and language bindings. + +The API provided by the frame format bindings follows that of the LZMA, zlib, +gzip and bzip2 compression libraries which are provided with the Python standard +library. As such, these LZ4 bindings should provide a drop-in alternative to the +compression libraries shipped with Python. The package provides context managers +and file handler support. + +The bindings drop the GIL when calling in to the underlying LZ4 library, and is +thread safe. An extensive test suite is included. + +Documenation +============ + +.. image:: https://readthedocs.org/projects/python-lz4/badge/?version=stable + :target: https://readthedocs.org/projects/python-lz4/ + :alt: Documentation + +Full documentation is included with the project. The documentation is +generated using Sphinx. Documentation is also hosted on readthedocs. + +:master: http://python-lz4.readthedocs.io/en/stable/ +:development: http://python-lz4.readthedocs.io/en/latest/ + +Homepage +======== + +The `project homepage <https://www.github.com/python-lz4/python-lz4>`_ is hosted +on Github. Please report any issues you find using the `issue tracker +<https://github.com/python-lz4/python-lz4/issues>`_. + +Licensing +========= +Code specific to this project is covered by the `BSD 3-Clause License +<http://opensource.org/licenses/BSD-3-Clause>`_ + + + diff --git a/contrib/python/lz4/py2/.dist-info/top_level.txt b/contrib/python/lz4/py2/.dist-info/top_level.txt new file mode 100644 index 0000000000..4ef6877a79 --- /dev/null +++ b/contrib/python/lz4/py2/.dist-info/top_level.txt @@ -0,0 +1 @@ +lz4 diff --git a/contrib/python/lz4/py2/LICENSE b/contrib/python/lz4/py2/LICENSE new file mode 100644 index 0000000000..518770111c --- /dev/null +++ b/contrib/python/lz4/py2/LICENSE @@ -0,0 +1,28 @@ +Copyright (c) 2012-2013, Steeve Morin +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of Steeve Morin nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. diff --git a/contrib/python/lz4/py2/README.rst b/contrib/python/lz4/py2/README.rst new file mode 100644 index 0000000000..18fb95f14c --- /dev/null +++ b/contrib/python/lz4/py2/README.rst @@ -0,0 +1,69 @@ +========== +python-lz4 +========== + +Status +====== + +.. image:: https://travis-ci.org/python-lz4/python-lz4.svg?branch=master + :target: https://travis-ci.org/python-lz4/python-lz4 + :alt: Build Status + +.. image:: https://ci.appveyor.com/api/projects/status/r2qvw9mlfo63lklo/branch/master?svg=true + :target: https://ci.appveyor.com/project/jonathanunderwood/python-lz4 + :alt: Build Status Windows + +.. image:: https://readthedocs.org/projects/python-lz4/badge/?version=stable + :target: https://readthedocs.org/projects/python-lz4/ + :alt: Documentation + +.. image:: https://codecov.io/gh/python-lz4/python-lz4/branch/codecov/graph/badge.svg + :target: https://codecov.io/gh/python-lz4/python-lz4 + :alt: CodeCov + + +Introduction +============ +This package provides python bindings for the `LZ4 compression library +<https://lz4.github.io/lz4/>`_. + +The bindings provided in this package cover the `frame format +<https://github.com/lz4/lz4/blob/master/doc/lz4_Frame_format.md>`_ and the `block format +<https://github.com/lz4/lz4/blob/dev/doc/lz4_Block_format.md>`_ specifications. The frame +format bindings are the recommended ones to use, as this guarantees +interoperability with other implementations and language bindings. + +The API provided by the frame format bindings follows that of the LZMA, zlib, +gzip and bzip2 compression libraries which are provided with the Python standard +library. As such, these LZ4 bindings should provide a drop-in alternative to the +compression libraries shipped with Python. The package provides context managers +and file handler support. + +The bindings drop the GIL when calling in to the underlying LZ4 library, and is +thread safe. An extensive test suite is included. + +Documenation +============ + +.. image:: https://readthedocs.org/projects/python-lz4/badge/?version=stable + :target: https://readthedocs.org/projects/python-lz4/ + :alt: Documentation + +Full documentation is included with the project. The documentation is +generated using Sphinx. Documentation is also hosted on readthedocs. + +:master: http://python-lz4.readthedocs.io/en/stable/ +:development: http://python-lz4.readthedocs.io/en/latest/ + +Homepage +======== + +The `project homepage <https://www.github.com/python-lz4/python-lz4>`_ is hosted +on Github. Please report any issues you find using the `issue tracker +<https://github.com/python-lz4/python-lz4/issues>`_. + +Licensing +========= +Code specific to this project is covered by the `BSD 3-Clause License +<http://opensource.org/licenses/BSD-3-Clause>`_ + diff --git a/contrib/python/lz4/py2/lz4/__init__.py b/contrib/python/lz4/py2/lz4/__init__.py new file mode 100644 index 0000000000..af0fa05b3f --- /dev/null +++ b/contrib/python/lz4/py2/lz4/__init__.py @@ -0,0 +1,20 @@ +# Although the canonical way to get the package version is using pkg_resources +# as below, this turns out to be very slow on systems with lots of packages. +# So, until that is remedied, we'll import the version from a local file +# created by setuptools_scm. + +# from pkg_resources import get_distribution, DistributionNotFound +# try: +# __version__ = get_distribution(__name__).version +# except DistributionNotFound: +# # package is not installed +# pass + +from .version import version as __version__ +VERSION = __version__ + + +from ._version import ( # noqa: F401 + library_version_number, + library_version_string, +) diff --git a/contrib/python/lz4/py2/lz4/_version.c b/contrib/python/lz4/py2/lz4/_version.c new file mode 100644 index 0000000000..d477add649 --- /dev/null +++ b/contrib/python/lz4/py2/lz4/_version.c @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2016 Jonathan Underwood + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of Steeve Morin nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#if defined(_WIN32) && defined(_MSC_VER) +#define inline __inline +#elif defined(__SUNPRO_C) || defined(__hpux) || defined(_AIX) +#define inline +#endif + +#include <py3c.h> +#include <py3c/capsulethunk.h> + +#include <stdlib.h> +#include <lz4.h> +#include <lz4hc.h> + +#ifndef Py_UNUSED /* This is already defined for Python 3.4 onwards */ +#ifdef __GNUC__ +#define Py_UNUSED(name) _unused_ ## name __attribute__((unused)) +#else +#define Py_UNUSED(name) _unused_ ## name +#endif +#endif + +static PyObject * +library_version_number (PyObject * Py_UNUSED (self), PyObject * Py_UNUSED (args)) +{ + return Py_BuildValue ("i", LZ4_versionNumber ()); +} + +static PyObject * +library_version_string (PyObject * Py_UNUSED (self), PyObject * Py_UNUSED (args)) +{ + return Py_BuildValue ("s", LZ4_versionString ()); +} + +PyDoc_STRVAR +( + library_version_number__doc, + "library_version_number()\n\n" \ + "Returns the version number of the LZ4 library.\n" \ + "\n" \ + "Args:\n" \ + " None\n" \ + "\n" \ + "Returns:\n" \ + " int: version number eg. 10705" + ); + +PyDoc_STRVAR +( + library_version_string__doc, + "library_version_string()\n\n" \ + "Returns the version number of the LZ4 library as a string\n" \ + "containing the semantic version.\n" \ + "\n" \ + "Args:\n" \ + " None\n" \ + "\n" \ + "Returns:\n" \ + " str: version number eg. \"1.7.5\"" + ); + +static PyMethodDef module_methods[] = { + { + "library_version_number", + (PyCFunction) library_version_number, + METH_VARARGS, + library_version_number__doc + }, + { + "library_version_string", + (PyCFunction) library_version_string, + METH_VARARGS, + library_version_string__doc + }, + { + /* Sentinel */ + NULL, + NULL, + 0, + NULL + } +}; + +static struct PyModuleDef moduledef = + { + PyModuleDef_HEAD_INIT, + "_version", + NULL, + -1, + module_methods + }; + +MODULE_INIT_FUNC (_version) +{ + PyObject *module = PyModule_Create (&moduledef); + + if (module == NULL) + return NULL; + + return module; +} diff --git a/contrib/python/lz4/py2/lz4/block/__init__.py b/contrib/python/lz4/py2/lz4/block/__init__.py new file mode 100644 index 0000000000..6662bab4de --- /dev/null +++ b/contrib/python/lz4/py2/lz4/block/__init__.py @@ -0,0 +1 @@ +from ._block import compress, decompress, LZ4BlockError # noqa: F401 diff --git a/contrib/python/lz4/py2/lz4/block/_block.c b/contrib/python/lz4/py2/lz4/block/_block.c new file mode 100644 index 0000000000..d55b1d92d4 --- /dev/null +++ b/contrib/python/lz4/py2/lz4/block/_block.c @@ -0,0 +1,542 @@ +/* + * Copyright (c) 2012-2018, Steeve Morin, Jonathan Underwood + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of Steeve Morin nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#if defined(_WIN32) && defined(_MSC_VER) +#define inline __inline +#elif defined(__SUNPRO_C) || defined(__hpux) || defined(_AIX) +#define inline +#endif + +#include <py3c.h> +#include <py3c/capsulethunk.h> + +#include <stdlib.h> +#include <math.h> +#include <lz4.h> +#include <lz4hc.h> + +#ifndef Py_UNUSED /* This is already defined for Python 3.4 onwards */ +#ifdef __GNUC__ +#define Py_UNUSED(name) _unused_ ## name __attribute__((unused)) +#else +#define Py_UNUSED(name) _unused_ ## name +#endif +#endif + +#if defined(_WIN32) && defined(_MSC_VER) +#if _MSC_VER >= 1600 +#include <stdint.h> +#else /* _MSC_VER >= 1600 */ +typedef signed char int8_t; +typedef signed short int16_t; +typedef signed int int32_t; +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; +#endif /* _MSC_VER >= 1600 */ +#endif + +static inline void +store_le32 (char *c, uint32_t x) +{ + c[0] = x & 0xff; + c[1] = (x >> 8) & 0xff; + c[2] = (x >> 16) & 0xff; + c[3] = (x >> 24) & 0xff; +} + +static inline uint32_t +load_le32 (const char *c) +{ + const uint8_t *d = (const uint8_t *) c; + return d[0] | (d[1] << 8) | (d[2] << 16) | (d[3] << 24); +} + +static const size_t hdr_size = sizeof (uint32_t); + +typedef enum +{ + DEFAULT, + FAST, + HIGH_COMPRESSION +} compression_type; + +static PyObject * LZ4BlockError; + +static inline int +lz4_compress_generic (int comp, char* source, char* dest, int source_size, int dest_size, + char* dict, int dict_size, int acceleration, int compression) +{ + if (comp != HIGH_COMPRESSION) + { + LZ4_stream_t lz4_state; + LZ4_resetStream (&lz4_state); + if (dict) + { + LZ4_loadDict (&lz4_state, dict, dict_size); + } + if (comp != FAST) + { + acceleration = 1; + } + return LZ4_compress_fast_continue (&lz4_state, source, dest, source_size, dest_size, acceleration); + } + else + { + LZ4_streamHC_t lz4_state; + LZ4_resetStreamHC (&lz4_state, compression); + if (dict) + { + LZ4_loadDictHC (&lz4_state, dict, dict_size); + } + return LZ4_compress_HC_continue (&lz4_state, source, dest, source_size, dest_size); + } +} + +#ifdef inline +#undef inline +#endif + +static PyObject * +compress (PyObject * Py_UNUSED (self), PyObject * args, PyObject * kwargs) +{ + const char *mode = "default"; + size_t dest_size, total_size; + int acceleration = 1; + int compression = 9; + int store_size = 1; + PyObject *py_dest; + char *dest, *dest_start; + compression_type comp; + int output_size; + Py_buffer source; + int source_size; + int return_bytearray = 0; + Py_buffer dict = {0}; + static char *argnames[] = { + "source", + "mode", + "store_size", + "acceleration", + "compression", + "return_bytearray", + "dict", + NULL + }; + + +#if IS_PY3 + if (!PyArg_ParseTupleAndKeywords (args, kwargs, "y*|spiipz*", argnames, + &source, + &mode, &store_size, &acceleration, &compression, + &return_bytearray, &dict)) + { + return NULL; + } +#else + if (!PyArg_ParseTupleAndKeywords (args, kwargs, "s*|siiiiz*", argnames, + &source, + &mode, &store_size, &acceleration, &compression, + &return_bytearray, &dict)) + { + return NULL; + } +#endif + + if (source.len > INT_MAX) + { + PyBuffer_Release(&source); + PyBuffer_Release(&dict); + PyErr_Format(PyExc_OverflowError, + "Input too large for LZ4 API"); + return NULL; + } + + if (dict.len > INT_MAX) + { + PyBuffer_Release(&source); + PyBuffer_Release(&dict); + PyErr_Format(PyExc_OverflowError, + "Dictionary too large for LZ4 API"); + return NULL; + } + + source_size = (int) source.len; + + if (!strncmp (mode, "default", sizeof ("default"))) + { + comp = DEFAULT; + } + else if (!strncmp (mode, "fast", sizeof ("fast"))) + { + comp = FAST; + } + else if (!strncmp (mode, "high_compression", sizeof ("high_compression"))) + { + comp = HIGH_COMPRESSION; + } + else + { + PyBuffer_Release(&source); + PyBuffer_Release(&dict); + PyErr_Format (PyExc_ValueError, + "Invalid mode argument: %s. Must be one of: standard, fast, high_compression", + mode); + return NULL; + } + + dest_size = LZ4_compressBound (source_size); + + if (store_size) + { + total_size = dest_size + hdr_size; + } + else + { + total_size = dest_size; + } + + dest = PyMem_Malloc (total_size * sizeof * dest); + if (dest == NULL) + { + return PyErr_NoMemory(); + } + + Py_BEGIN_ALLOW_THREADS + + if (store_size) + { + store_le32 (dest, source_size); + dest_start = dest + hdr_size; + } + else + { + dest_start = dest; + } + + output_size = lz4_compress_generic (comp, source.buf, dest_start, source_size, + (int) dest_size, dict.buf, (int) dict.len, + acceleration, compression); + + Py_END_ALLOW_THREADS + + PyBuffer_Release(&source); + PyBuffer_Release(&dict); + + if (output_size <= 0) + { + PyErr_SetString (LZ4BlockError, "Compression failed"); + PyMem_Free (dest); + return NULL; + } + + if (store_size) + { + output_size += (int) hdr_size; + } + + if (return_bytearray) + { + py_dest = PyByteArray_FromStringAndSize (dest, (Py_ssize_t) output_size); + } + else + { + py_dest = PyBytes_FromStringAndSize (dest, (Py_ssize_t) output_size); + } + + PyMem_Free (dest); + + if (py_dest == NULL) + { + return PyErr_NoMemory (); + } + + return py_dest; +} + +static PyObject * +decompress (PyObject * Py_UNUSED (self), PyObject * args, PyObject * kwargs) +{ + Py_buffer source; + const char * source_start; + size_t source_size; + PyObject *py_dest; + char *dest; + int output_size; + size_t dest_size; + int uncompressed_size = -1; + int return_bytearray = 0; + Py_buffer dict = {0}; + static char *argnames[] = { + "source", + "uncompressed_size", + "return_bytearray", + "dict", + NULL + }; + +#if IS_PY3 + if (!PyArg_ParseTupleAndKeywords (args, kwargs, "y*|ipz*", argnames, + &source, &uncompressed_size, + &return_bytearray, &dict)) + { + return NULL; + } +#else + if (!PyArg_ParseTupleAndKeywords (args, kwargs, "s*|iiz*", argnames, + &source, &uncompressed_size, + &return_bytearray, &dict)) + { + return NULL; + } +#endif + + if (source.len > INT_MAX) + { + PyBuffer_Release(&source); + PyBuffer_Release(&dict); + PyErr_Format(PyExc_OverflowError, + "Input too large for LZ4 API"); + return NULL; + } + + if (dict.len > INT_MAX) + { + PyBuffer_Release(&source); + PyBuffer_Release(&dict); + PyErr_Format(PyExc_OverflowError, + "Dictionary too large for LZ4 API"); + return NULL; + } + + source_start = (const char *) source.buf; + source_size = (int) source.len; + + if (uncompressed_size >= 0) + { + dest_size = uncompressed_size; + } + else + { + if (source_size < hdr_size) + { + PyBuffer_Release(&source); + PyBuffer_Release(&dict); + PyErr_SetString (PyExc_ValueError, "Input source data size too small"); + return NULL; + } + dest_size = load_le32 (source_start); + source_start += hdr_size; + source_size -= hdr_size; + } + + if (dest_size > INT_MAX) + { + PyBuffer_Release(&source); + PyBuffer_Release(&dict); + PyErr_Format (PyExc_ValueError, "Invalid size: 0x%zu", + dest_size); + return NULL; + } + + dest = PyMem_Malloc (dest_size * sizeof * dest); + if (dest == NULL) + { + return PyErr_NoMemory(); + } + + Py_BEGIN_ALLOW_THREADS + + output_size = + LZ4_decompress_safe_usingDict (source_start, dest, source_size, (int) dest_size, + dict.buf, (int) dict.len); + + Py_END_ALLOW_THREADS + + PyBuffer_Release(&source); + PyBuffer_Release(&dict); + + if (output_size < 0) + { + PyErr_Format (LZ4BlockError, + "Decompression failed: corrupt input or insufficient space in destination buffer. Error code: %u", + -output_size); + PyMem_Free (dest); + return NULL; + } + else if (((size_t)output_size != dest_size) && (uncompressed_size < 0)) + { + PyErr_Format (LZ4BlockError, + "Decompressor wrote %u bytes, but %zu bytes expected from header", + output_size, dest_size); + PyMem_Free (dest); + return NULL; + } + + if (return_bytearray) + { + py_dest = PyByteArray_FromStringAndSize (dest, (Py_ssize_t) output_size); + } + else + { + py_dest = PyBytes_FromStringAndSize (dest, (Py_ssize_t) output_size); + } + + PyMem_Free (dest); + + if (py_dest == NULL) + { + return PyErr_NoMemory (); + } + + return py_dest; +} + +PyDoc_STRVAR(compress__doc, + "compress(source, mode='default', acceleration=1, compression=0, return_bytearray=False)\n\n" \ + "Compress source, returning the compressed data as a string.\n" \ + "Raises an exception if any error occurs.\n" \ + "\n" \ + "Args:\n" \ + " source (str, bytes or buffer-compatible object): Data to compress\n" \ + "\n" \ + "Keyword Args:\n" \ + " mode (str): If ``'default'`` or unspecified use the default LZ4\n" \ + " compression mode. Set to ``'fast'`` to use the fast compression\n" \ + " LZ4 mode at the expense of compression. Set to\n" \ + " ``'high_compression'`` to use the LZ4 high-compression mode at\n" \ + " the exepense of speed.\n" \ + " acceleration (int): When mode is set to ``'fast'`` this argument\n" \ + " specifies the acceleration. The larger the acceleration, the\n" \ + " faster the but the lower the compression. The default\n" \ + " compression corresponds to a value of ``1``.\n" \ + " compression (int): When mode is set to ``high_compression`` this\n" \ + " argument specifies the compression. Valid values are between\n" \ + " ``1`` and ``12``. Values between ``4-9`` are recommended, and\n" \ + " ``9`` is the default.\n" + " store_size (bool): If ``True`` (the default) then the size of the\n" \ + " uncompressed data is stored at the start of the compressed\n" \ + " block.\n" \ + " return_bytearray (bool): If ``False`` (the default) then the function\n" \ + " will return a bytes object. If ``True``, then the function will\n" \ + " return a bytearray object.\n\n" \ + " dict (str, bytes or buffer-compatible object): If specified, perform\n" \ + " compression using this initial dictionary.\n" \ + "Returns:\n" \ + " bytes or bytearray: Compressed data.\n"); + +PyDoc_STRVAR(decompress__doc, + "decompress(source, uncompressed_size=-1, return_bytearray=False)\n\n" \ + "Decompress source, returning the uncompressed data as a string.\n" \ + "Raises an exception if any error occurs.\n" \ + "\n" \ + "Args:\n" \ + " source (str, bytes or buffer-compatible object): Data to decompress.\n" \ + "\n" \ + "Keyword Args:\n" \ + " uncompressed_size (int): If not specified or negative, the uncompressed\n" \ + " data size is read from the start of the source block. If specified,\n" \ + " it is assumed that the full source data is compressed data. If this\n" \ + " argument is specified, it is considered to be a maximum possible size\n" \ + " for the buffer used to hold the uncompressed data, and so less data\n" \ + " may be returned. If `uncompressed_size` is too small, `LZ4BlockError`\n" \ + " will be raised. By catching `LZ4BlockError` it is possible to increase\n" \ + " `uncompressed_size` and try again.\n" \ + " return_bytearray (bool): If ``False`` (the default) then the function\n" \ + " will return a bytes object. If ``True``, then the function will\n" \ + " return a bytearray object.\n\n" \ + " dict (str, bytes or buffer-compatible object): If specified, perform\n" \ + " decompression using this initial dictionary.\n" \ + "\n" \ + "Returns:\n" \ + " bytes or bytearray: Decompressed data.\n" \ + "\n" \ + "Raises:\n" \ + " LZ4BlockError: raised if the call to the LZ4 library fails. This can be\n" \ + " caused by `uncompressed_size` being too small, or invalid data.\n"); + +PyDoc_STRVAR(lz4block__doc, + "A Python wrapper for the LZ4 block protocol" + ); + +static PyMethodDef module_methods[] = { + { + "compress", + (PyCFunction) compress, + METH_VARARGS | METH_KEYWORDS, + compress__doc + }, + { + "decompress", + (PyCFunction) decompress, + METH_VARARGS | METH_KEYWORDS, + decompress__doc + }, + { + /* Sentinel */ + NULL, + NULL, + 0, + NULL + } +}; + +static struct PyModuleDef moduledef = +{ + PyModuleDef_HEAD_INIT, + "_block", + lz4block__doc, + -1, + module_methods +}; + +MODULE_INIT_FUNC (_block) +{ + PyObject *module = PyModule_Create (&moduledef); + + if (module == NULL) + return NULL; + + PyModule_AddIntConstant (module, "HC_LEVEL_MIN", LZ4HC_CLEVEL_MIN); + PyModule_AddIntConstant (module, "HC_LEVEL_DEFAULT", LZ4HC_CLEVEL_DEFAULT); + PyModule_AddIntConstant (module, "HC_LEVEL_OPT_MIN", LZ4HC_CLEVEL_OPT_MIN); + PyModule_AddIntConstant (module, "HC_LEVEL_MAX", LZ4HC_CLEVEL_MAX); + + LZ4BlockError = PyErr_NewExceptionWithDoc("_block.LZ4BlockError", "Call to LZ4 library failed.", NULL, NULL); + if (LZ4BlockError == NULL) + { + return NULL; + } + Py_INCREF(LZ4BlockError); + PyModule_AddObject(module, "LZ4BlockError", LZ4BlockError); + + return module; +} diff --git a/contrib/python/lz4/py2/lz4/frame/__init__.py b/contrib/python/lz4/py2/lz4/frame/__init__.py new file mode 100644 index 0000000000..5fa03ce673 --- /dev/null +++ b/contrib/python/lz4/py2/lz4/frame/__init__.py @@ -0,0 +1,837 @@ +import lz4 +import io +import os +import builtins +import sys +from ._frame import ( # noqa: F401 + compress, + decompress, + create_compression_context, + compress_begin, + compress_chunk, + compress_flush, + create_decompression_context, + reset_decompression_context, + decompress_chunk, + get_frame_info, + BLOCKSIZE_DEFAULT as _BLOCKSIZE_DEFAULT, + BLOCKSIZE_MAX64KB as _BLOCKSIZE_MAX64KB, + BLOCKSIZE_MAX256KB as _BLOCKSIZE_MAX256KB, + BLOCKSIZE_MAX1MB as _BLOCKSIZE_MAX1MB, + BLOCKSIZE_MAX4MB as _BLOCKSIZE_MAX4MB, + __doc__ as _doc +) + +__doc__ = _doc + +try: + import _compression # Python 3.6 and later +except ImportError: + from . import _compression + + +BLOCKSIZE_DEFAULT = _BLOCKSIZE_DEFAULT +"""Specifier for the default block size. + +Specifying ``block_size=lz4.frame.BLOCKSIZE_DEFAULT`` will instruct the LZ4 +library to use the default maximum blocksize. This is currently equivalent to +`lz4.frame.BLOCKSIZE_MAX64KB` + +""" + +BLOCKSIZE_MAX64KB = _BLOCKSIZE_MAX64KB +"""Specifier for a maximum block size of 64 kB. + +Specifying ``block_size=lz4.frame.BLOCKSIZE_MAX64KB`` will instruct the LZ4 +library to create blocks containing a maximum of 64 kB of uncompressed data. + +""" + +BLOCKSIZE_MAX256KB = _BLOCKSIZE_MAX256KB +"""Specifier for a maximum block size of 256 kB. + +Specifying ``block_size=lz4.frame.BLOCKSIZE_MAX256KB`` will instruct the LZ4 +library to create blocks containing a maximum of 256 kB of uncompressed data. + +""" + +BLOCKSIZE_MAX1MB = _BLOCKSIZE_MAX1MB +"""Specifier for a maximum block size of 1 MB. + +Specifying ``block_size=lz4.frame.BLOCKSIZE_MAX1MB`` will instruct the LZ4 +library to create blocks containing a maximum of 1 MB of uncompressed data. + +""" + +BLOCKSIZE_MAX4MB = _BLOCKSIZE_MAX4MB +"""Specifier for a maximum block size of 4 MB. + +Specifying ``block_size=lz4.frame.BLOCKSIZE_MAX4MB`` will instruct the LZ4 +library to create blocks containing a maximum of 4 MB of uncompressed data. + +""" + +COMPRESSIONLEVEL_MIN = 0 +"""Specifier for the minimum compression level. + +Specifying ``compression_level=lz4.frame.COMPRESSIONLEVEL_MIN`` will +instruct the LZ4 library to use a compression level of 0 + +""" + +COMPRESSIONLEVEL_MINHC = 3 +"""Specifier for the minimum compression level for high compression mode. + +Specifying ``compression_level=lz4.frame.COMPRESSIONLEVEL_MINHC`` will +instruct the LZ4 library to use a compression level of 3, the minimum for the +high compression mode. + +""" + +COMPRESSIONLEVEL_MAX = 16 +"""Specifier for the maximum compression level. + +Specifying ``compression_level=lz4.frame.COMPRESSIONLEVEL_MAX`` will +instruct the LZ4 library to use a compression level of 16, the highest +compression level available. + +""" + + +class LZ4FrameCompressor(object): + """Create a LZ4 frame compressor object. + + This object can be used to compress data incrementally. + + Args: + block_size (int): Specifies the maximum blocksize to use. + Options: + + - `lz4.frame.BLOCKSIZE_DEFAULT`: the lz4 library default + - `lz4.frame.BLOCKSIZE_MAX64KB`: 64 kB + - `lz4.frame.BLOCKSIZE_MAX256KB`: 256 kB + - `lz4.frame.BLOCKSIZE_MAX1MB`: 1 MB + - `lz4.frame.BLOCKSIZE_MAX4MB`: 4 MB + + If unspecified, will default to `lz4.frame.BLOCKSIZE_DEFAULT` which + is equal to `lz4.frame.BLOCKSIZE_MAX64KB`. + block_linked (bool): Specifies whether to use block-linked + compression. If ``True``, the compression ratio is improved, + especially for small block sizes. If ``False`` the blocks are + compressed independently. The default is ``True``. + compression_level (int): Specifies the level of compression used. + Values between 0-16 are valid, with 0 (default) being the + lowest compression (0-2 are the same value), and 16 the highest. + Values above 16 will be treated as 16. + Values between 4-9 are recommended. 0 is the default. + The following module constants are provided as a convenience: + + - `lz4.frame.COMPRESSIONLEVEL_MIN`: Minimum compression (0) + - `lz4.frame.COMPRESSIONLEVEL_MINHC`: Minimum high-compression (3) + - `lz4.frame.COMPRESSIONLEVEL_MAX`: Maximum compression (16) + + content_checksum (bool): Specifies whether to enable checksumming of + the payload content. If ``True``, a checksum of the uncompressed + data is stored at the end of the compressed frame which is checked + during decompression. The default is ``False``. + block_checksum (bool): Specifies whether to enable checksumming of + the content of each block. If ``True`` a checksum of the + uncompressed data in each block in the frame is stored at the end + of each block. If present, these checksums will be used to + validate the data during decompression. The default is ``False``, + meaning block checksums are not calculated and stored. This + functionality is only supported if the underlying LZ4 library has + version >= 1.8.0. Attempting to set this value to ``True`` with a + version of LZ4 < 1.8.0 will cause a ``RuntimeError`` to be raised. + auto_flush (bool): When ``False``, the LZ4 library may buffer data + until a block is full. When ``True`` no buffering occurs, and + partially full blocks may be returned. The default is ``False``. + return_bytearray (bool): When ``False`` a ``bytes`` object is returned + from the calls to methods of this class. When ``True`` a + ``bytearray`` object will be returned. The default is ``False``. + + """ + + def __init__(self, + block_size=BLOCKSIZE_DEFAULT, + block_linked=True, + compression_level=COMPRESSIONLEVEL_MIN, + content_checksum=False, + block_checksum=False, + auto_flush=False, + return_bytearray=False): + self.block_size = block_size + self.block_linked = block_linked + self.compression_level = compression_level + self.content_checksum = content_checksum + if block_checksum and lz4.library_version_number() < 10800: + raise RuntimeError( + 'Attempt to set block_checksum to True with LZ4 library' + 'version < 10800' + ) + self.block_checksum = block_checksum + self.auto_flush = auto_flush + self.return_bytearray = return_bytearray + self._context = None + self._started = False + + def __enter__(self): + # All necessary initialization is done in __init__ + return self + + def __exit__(self, exception_type, exception, traceback): + self.block_size = None + self.block_linked = None + self.compression_level = None + self.content_checksum = None + self.block_checksum = None + self.auto_flush = None + self.return_bytearray = None + self._context = None + self._started = False + + def begin(self, source_size=0): + """Begin a compression frame. + + The returned data contains frame header information. The data returned + from subsequent calls to ``compress()`` should be concatenated with + this header. + + Keyword Args: + source_size (int): Optionally specify the total size of the + uncompressed data. If specified, will be stored in the + compressed frame header as an 8-byte field for later use + during decompression. Default is 0 (no size stored). + + Returns: + bytes or bytearray: frame header data + + """ + + if self._started is False: + self._context = create_compression_context() + result = compress_begin( + self._context, + block_size=self.block_size, + block_linked=self.block_linked, + compression_level=self.compression_level, + content_checksum=self.content_checksum, + block_checksum=self.block_checksum, + auto_flush=self.auto_flush, + return_bytearray=self.return_bytearray, + source_size=source_size + ) + self._started = True + return result + else: + raise RuntimeError( + 'LZ4FrameCompressor.begin() called after already initialized' + ) + + def compress(self, data): # noqa: F811 + """Compresses data and returns it. + + This compresses ``data`` (a ``bytes`` object), returning a bytes or + bytearray object containing compressed data the input. + + If ``auto_flush`` has been set to ``False``, some of ``data`` may be + buffered internally, for use in later calls to + `LZ4FrameCompressor.compress()` and `LZ4FrameCompressor.flush()`. + + The returned data should be concatenated with the output of any + previous calls to `compress()` and a single call to + `compress_begin()`. + + Args: + data (str, bytes or buffer-compatible object): data to compress + + Returns: + bytes or bytearray: compressed data + + """ + if self._context is None: + raise RuntimeError('compress called after flush()') + + if self._started is False: + raise RuntimeError('compress called before compress_begin()') + + result = compress_chunk( + self._context, data, + return_bytearray=self.return_bytearray + ) + + return result + + def flush(self): + """Finish the compression process. + + This returns a ``bytes`` or ``bytearray`` object containing any data + stored in the compressor's internal buffers and a frame footer. + + The LZ4FrameCompressor instance may be re-used after this method has + been called to create a new frame of compressed data. + + Returns: + bytes or bytearray: compressed data and frame footer. + + """ + result = compress_flush( + self._context, + end_frame=True, + return_bytearray=self.return_bytearray + ) + self._context = None + self._started = False + return result + + def reset(self): + """Reset the `LZ4FrameCompressor` instance. + + This allows the `LZ4FrameCompression` instance to be re-used after an + error. + + """ + self._context = None + self._started = False + + +class LZ4FrameDecompressor(object): + """Create a LZ4 frame decompressor object. + + This can be used to decompress data incrementally. + + For a more convenient way of decompressing an entire compressed frame at + once, see `lz4.frame.decompress()`. + + Args: + return_bytearray (bool): When ``False`` a bytes object is returned from + the calls to methods of this class. When ``True`` a bytearray + object will be returned. The default is ``False``. + + Attributes: + eof (bool): ``True`` if the end-of-stream marker has been reached. + ``False`` otherwise. + unused_data (bytes): Data found after the end of the compressed stream. + Before the end of the frame is reached, this will be ``b''``. + needs_input (bool): ``False`` if the ``decompress()`` method can + provide more decompressed data before requiring new uncompressed + input. ``True`` otherwise. + + """ + + def __init__(self, return_bytearray=False): + self._context = create_decompression_context() + self.eof = False + self.needs_input = True + self.unused_data = None + self._unconsumed_data = b'' + self._return_bytearray = return_bytearray + + def __enter__(self): + # All necessary initialization is done in __init__ + return self + + def __exit__(self, exception_type, exception, traceback): + self._context = None + self.eof = None + self.needs_input = None + self.unused_data = None + self._unconsumed_data = None + self._return_bytearray = None + + def reset(self): + """Reset the decompressor state. + + This is useful after an error occurs, allowing re-use of the instance. + + """ + reset_decompression_context(self._context) + self.eof = False + self.needs_input = True + self.unused_data = None + self._unconsumed_data = b'' + + def decompress(self, data, max_length=-1): # noqa: F811 + """Decompresses part or all of an LZ4 frame of compressed data. + + The returned data should be concatenated with the output of any + previous calls to `decompress()`. + + If ``max_length`` is non-negative, returns at most ``max_length`` bytes + of decompressed data. If this limit is reached and further output can + be produced, the `needs_input` attribute will be set to ``False``. In + this case, the next call to `decompress()` may provide data as + ``b''`` to obtain more of the output. In all cases, any unconsumed data + from previous calls will be prepended to the input data. + + If all of the input ``data`` was decompressed and returned (either + because this was less than ``max_length`` bytes, or because + ``max_length`` was negative), the `needs_input` attribute will be set + to ``True``. + + If an end of frame marker is encountered in the data during + decompression, decompression will stop at the end of the frame, and any + data after the end of frame is available from the `unused_data` + attribute. In this case, the `LZ4FrameDecompressor` instance is reset + and can be used for further decompression. + + Args: + data (str, bytes or buffer-compatible object): compressed data to + decompress + + Keyword Args: + max_length (int): If this is non-negative, this method returns at + most ``max_length`` bytes of decompressed data. + + Returns: + bytes: Uncompressed data + + """ + + if self._unconsumed_data: + data = self._unconsumed_data + data + + decompressed, bytes_read, eoframe = decompress_chunk( + self._context, + data, + max_length=max_length, + return_bytearray=self._return_bytearray, + ) + + if bytes_read < len(data): + if eoframe: + self.unused_data = data[bytes_read:] + else: + self._unconsumed_data = data[bytes_read:] + self.needs_input = False + else: + self._unconsumed_data = b'' + self.needs_input = True + self.unused_data = None + + self.eof = eoframe + + return decompressed + + +_MODE_CLOSED = 0 +_MODE_READ = 1 +# Value 2 no longer used +_MODE_WRITE = 3 + + +class LZ4FrameFile(_compression.BaseStream): + """A file object providing transparent LZ4F (de)compression. + + An LZ4FFile can act as a wrapper for an existing file object, or refer + directly to a named file on disk. + + Note that LZ4FFile provides a *binary* file interface - data read is + returned as bytes, and data to be written must be given as bytes. + + When opening a file for writing, the settings used by the compressor can be + specified. The underlying compressor object is + `lz4.frame.LZ4FrameCompressor`. See the docstrings for that class for + details on compression options. + + Args: + filename(str, bytes, PathLike, file object): can be either an actual + file name (given as a str, bytes, or + PathLike object), in which case the named file is opened, or it + can be an existing file object to read from or write to. + + Keyword Args: + mode(str): mode can be ``'r'`` for reading (default), ``'w'`` for + (over)writing, ``'x'`` for creating exclusively, or ``'a'`` + for appending. These can equivalently be given as ``'rb'``, + ``'wb'``, ``'xb'`` and ``'ab'`` respectively. + return_bytearray (bool): When ``False`` a bytes object is returned from + the calls to methods of this class. When ``True`` a ``bytearray`` + object will be returned. The default is ``False``. + source_size (int): Optionally specify the total size of the + uncompressed data. If specified, will be stored in the compressed + frame header as an 8-byte field for later use during decompression. + Default is ``0`` (no size stored). Only used for writing + compressed files. + block_size (int): Compressor setting. See + `lz4.frame.LZ4FrameCompressor`. + block_linked (bool): Compressor setting. See + `lz4.frame.LZ4FrameCompressor`. + compression_level (int): Compressor setting. See + `lz4.frame.LZ4FrameCompressor`. + content_checksum (bool): Compressor setting. See + `lz4.frame.LZ4FrameCompressor`. + block_checksum (bool): Compressor setting. See + `lz4.frame.LZ4FrameCompressor`. + auto_flush (bool): Compressor setting. See + `lz4.frame.LZ4FrameCompressor`. + + """ + + def __init__(self, filename=None, mode='r', + block_size=BLOCKSIZE_DEFAULT, + block_linked=True, + compression_level=COMPRESSIONLEVEL_MIN, + content_checksum=False, + block_checksum=False, + auto_flush=False, + return_bytearray=False, + source_size=0): + + self._fp = None + self._closefp = False + self._mode = _MODE_CLOSED + + if mode in ('r', 'rb'): + mode_code = _MODE_READ + elif mode in ('w', 'wb', 'a', 'ab', 'x', 'xb'): + mode_code = _MODE_WRITE + self._compressor = LZ4FrameCompressor( + block_size=block_size, + block_linked=block_linked, + compression_level=compression_level, + content_checksum=content_checksum, + block_checksum=block_checksum, + auto_flush=auto_flush, + return_bytearray=return_bytearray, + ) + self._pos = 0 + else: + raise ValueError('Invalid mode: {!r}'.format(mode)) + + if sys.version_info > (3, 6): + path_test = isinstance(filename, (str, bytes, os.PathLike)) + else: + path_test = isinstance(filename, (str, bytes)) + + if path_test is True: + if 'b' not in mode: + mode += 'b' + self._fp = builtins.open(filename, mode) + self._closefp = True + self._mode = mode_code + elif hasattr(filename, 'read') or hasattr(filename, 'write'): + self._fp = filename + self._mode = mode_code + else: + raise TypeError( + 'filename must be a str, bytes, file or PathLike object' + ) + + if self._mode == _MODE_READ: + raw = _compression.DecompressReader(self._fp, LZ4FrameDecompressor) + self._buffer = io.BufferedReader(raw) + + if self._mode == _MODE_WRITE: + self._fp.write( + self._compressor.begin(source_size=source_size) + ) + + def close(self): + """Flush and close the file. + + May be called more than once without error. Once the file is + closed, any other operation on it will raise a ValueError. + """ + if self._mode == _MODE_CLOSED: + return + try: + if self._mode == _MODE_READ: + self._buffer.close() + self._buffer = None + elif self._mode == _MODE_WRITE: + self._fp.write(self._compressor.flush()) + self._compressor = None + finally: + try: + if self._closefp: + self._fp.close() + finally: + self._fp = None + self._closefp = False + self._mode = _MODE_CLOSED + + @property + def closed(self): + """Returns ``True`` if this file is closed. + + Returns: + bool: ``True`` if the file is closed, ``False`` otherwise. + + """ + return self._mode == _MODE_CLOSED + + def fileno(self): + """Return the file descriptor for the underlying file. + + Returns: + file object: file descriptor for file. + + """ + self._check_not_closed() + return self._fp.fileno() + + def seekable(self): + """Return whether the file supports seeking. + + Returns: + bool: ``True`` if the file supports seeking, ``False`` otherwise. + + """ + return self.readable() and self._buffer.seekable() + + def readable(self): + """Return whether the file was opened for reading. + + Returns: + bool: ``True`` if the file was opened for reading, ``False`` + otherwise. + + """ + self._check_not_closed() + return self._mode == _MODE_READ + + def writable(self): + """Return whether the file was opened for writing. + + Returns: + bool: ``True`` if the file was opened for writing, ``False`` + otherwise. + + """ + self._check_not_closed() + return self._mode == _MODE_WRITE + + def peek(self, size=-1): + """Return buffered data without advancing the file position. + + Always returns at least one byte of data, unless at EOF. The exact + number of bytes returned is unspecified. + + Returns: + bytes: uncompressed data + + """ + self._check_can_read() + # Relies on the undocumented fact that BufferedReader.peek() always + # returns at least one byte (except at EOF) + return self._buffer.peek(size) + + def read(self, size=-1): + """Read up to ``size`` uncompressed bytes from the file. + + If ``size`` is negative or omitted, read until ``EOF`` is reached. + Returns ``b''`` if the file is already at ``EOF``. + + Args: + size(int): If non-negative, specifies the maximum number of + uncompressed bytes to return. + + Returns: + bytes: uncompressed data + + """ + self._check_can_read() + return self._buffer.read(size) + + def read1(self, size=-1): + """Read up to ``size`` uncompressed bytes. + + This method tries to avoid making multiple reads from the underlying + stream. + + This method reads up to a buffer's worth of data if ``size`` is + negative. + + Returns ``b''`` if the file is at EOF. + + Args: + size(int): If non-negative, specifies the maximum number of + uncompressed bytes to return. + + Returns: + bytes: uncompressed data + + """ + self._check_can_read() + if size < 0: + size = io.DEFAULT_BUFFER_SIZE + return self._buffer.read1(size) + + def readline(self, size=-1): + """Read a line of uncompressed bytes from the file. + + The terminating newline (if present) is retained. If size is + non-negative, no more than size bytes will be read (in which case the + line may be incomplete). Returns b'' if already at EOF. + + Args: + size(int): If non-negative, specifies the maximum number of + uncompressed bytes to return. + + Returns: + bytes: uncompressed data + + """ + self._check_can_read() + return self._buffer.readline(size) + + def write(self, data): + """Write a bytes object to the file. + + Returns the number of uncompressed bytes written, which is always + ``len(data)``. Note that due to buffering, the file on disk may not + reflect the data written until close() is called. + + Args: + data(bytes): uncompressed data to compress and write to the file + + Returns: + int: the number of uncompressed bytes written to the file + + """ + self._check_can_write() + compressed = self._compressor.compress(data) + self._fp.write(compressed) + self._pos += len(data) + return len(data) + + def seek(self, offset, whence=io.SEEK_SET): + """Change the file position. + + The new position is specified by ``offset``, relative to the position + indicated by ``whence``. Possible values for ``whence`` are: + + - ``io.SEEK_SET`` or 0: start of stream (default): offset must not be + negative + - ``io.SEEK_CUR`` or 1: current stream position + - ``io.SEEK_END`` or 2: end of stream; offset must not be positive + + Returns the new file position. + + Note that seeking is emulated, so depending on the parameters, this + operation may be extremely slow. + + Args: + offset(int): new position in the file + whence(int): position with which ``offset`` is measured. Allowed + values are 0, 1, 2. The default is 0 (start of stream). + + Returns: + int: new file position + + """ + self._check_can_seek() + return self._buffer.seek(offset, whence) + + def tell(self): + """Return the current file position. + + Args: + None + + Returns: + int: file position + + """ + self._check_not_closed() + if self._mode == _MODE_READ: + return self._buffer.tell() + return self._pos + + +def open(filename, mode="rb", + encoding=None, + errors=None, + newline=None, + block_size=BLOCKSIZE_DEFAULT, + block_linked=True, + compression_level=COMPRESSIONLEVEL_MIN, + content_checksum=False, + block_checksum=False, + auto_flush=False, + return_bytearray=False, + source_size=0): + """Open an LZ4Frame-compressed file in binary or text mode. + + ``filename`` can be either an actual file name (given as a str, bytes, or + PathLike object), in which case the named file is opened, or it can be an + existing file object to read from or write to. + + The ``mode`` argument can be ``'r'``, ``'rb'`` (default), ``'w'``, + ``'wb'``, ``'x'``, ``'xb'``, ``'a'``, or ``'ab'`` for binary mode, or + ``'rt'``, ``'wt'``, ``'xt'``, or ``'at'`` for text mode. + + For binary mode, this function is equivalent to the `LZ4FrameFile` + constructor: `LZ4FrameFile(filename, mode, ...)`. + + For text mode, an `LZ4FrameFile` object is created, and wrapped in an + ``io.TextIOWrapper`` instance with the specified encoding, error handling + behavior, and line ending(s). + + Args: + filename (str, bytes, os.PathLike): file name or file object to open + + Keyword Args: + mode (str): mode for opening the file + encoding (str): the name of the encoding that will be used for + encoding/deconging the stream. It defaults to + ``locale.getpreferredencoding(False)``. See ``io.TextIOWrapper`` + for further details. + errors (str): specifies how encoding and decoding errors are to be + handled. See ``io.TextIOWrapper`` for further details. + newline (str): controls how line endings are handled. See + ``io.TextIOWrapper`` for further details. + return_bytearray (bool): When ``False`` a bytes object is returned + from the calls to methods of this class. When ``True`` a bytearray + object will be returned. The default is ``False``. + source_size (int): Optionally specify the total size of the + uncompressed data. If specified, will be stored in the compressed + frame header as an 8-byte field for later use during decompression. + Default is 0 (no size stored). Only used for writing compressed + files. + block_size (int): Compressor setting. See + `lz4.frame.LZ4FrameCompressor`. + block_linked (bool): Compressor setting. See + `lz4.frame.LZ4FrameCompressor`. + compression_level (int): Compressor setting. See + `lz4.frame.LZ4FrameCompressor`. + content_checksum (bool): Compressor setting. See + `lz4.frame.LZ4FrameCompressor`. + block_checksum (bool): Compressor setting. See + `lz4.frame.LZ4FrameCompressor`. + auto_flush (bool): Compressor setting. See + `lz4.frame.LZ4FrameCompressor`. + + """ + if 't' in mode: + if 'b' in mode: + raise ValueError('Invalid mode: %r' % (mode,)) + else: + if encoding is not None: + raise ValueError( + "Argument 'encoding' not supported in binary mode" + ) + if errors is not None: + raise ValueError("Argument 'errors' not supported in binary mode") + if newline is not None: + raise ValueError("Argument 'newline' not supported in binary mode") + + _mode = mode.replace('t', '') + + binary_file = LZ4FrameFile( + filename, + mode=_mode, + block_size=block_size, + block_linked=block_linked, + compression_level=compression_level, + content_checksum=content_checksum, + block_checksum=block_checksum, + auto_flush=auto_flush, + return_bytearray=return_bytearray, + ) + + if 't' in mode: + return io.TextIOWrapper(binary_file, encoding, errors, newline) + else: + return binary_file diff --git a/contrib/python/lz4/py2/lz4/frame/_compression.py b/contrib/python/lz4/py2/lz4/frame/_compression.py new file mode 100644 index 0000000000..3c68904b98 --- /dev/null +++ b/contrib/python/lz4/py2/lz4/frame/_compression.py @@ -0,0 +1,170 @@ +# Local python-lz4 copy of this file taken from the CPython standard library +# for earlier Python versions that don't ship with this file. This file has +# been modified to work on Python < 3.0. + +"""Internal classes used by the gzip, lzma and bz2 modules""" + +import sys +import io +# Ensure super has Python 3 semantics even on Python 2 +from builtins import super + + +BUFFER_SIZE = io.DEFAULT_BUFFER_SIZE # Compressed data read chunk size + + +class BaseStream(io.BufferedIOBase): + """Mode-checking helper functions.""" + + def _check_not_closed(self): + if self.closed: + raise ValueError("I/O operation on closed file") + + def _check_can_read(self): + if not self.readable(): + raise io.UnsupportedOperation("File not open for reading") + + def _check_can_write(self): + if not self.writable(): + raise io.UnsupportedOperation("File not open for writing") + + def _check_can_seek(self): + if not self.readable(): + raise io.UnsupportedOperation("Seeking is only supported " + "on files open for reading") + if not self.seekable(): + raise io.UnsupportedOperation("The underlying file object " + "does not support seeking") + + +class DecompressReader(io.RawIOBase): + """Adapts the decompressor API to a RawIOBase reader API""" + + def readable(self): + return True + + def __init__(self, fp, decomp_factory, trailing_error=(), **decomp_args): + self._fp = fp + self._eof = False + self._pos = 0 # Current offset in decompressed stream + + # Set to size of decompressed stream once it is known, for SEEK_END + self._size = -1 + + # Save the decompressor factory and arguments. + # If the file contains multiple compressed streams, each + # stream will need a separate decompressor object. A new decompressor + # object is also needed when implementing a backwards seek(). + self._decomp_factory = decomp_factory + self._decomp_args = decomp_args + self._decompressor = self._decomp_factory(**self._decomp_args) + + # Exception class to catch from decompressor signifying invalid + # trailing data to ignore + self._trailing_error = trailing_error + + def close(self): + self._decompressor = None + return super().close() + + def seekable(self): + return self._fp.seekable() + + def readinto(self, b): + with memoryview(b) as view, view.cast("B") as byte_view: + data = self.read(len(byte_view)) + byte_view[:len(data)] = data + return len(data) + + def read(self, size=-1): + if size < 0: + return self.readall() + + if not size or self._eof: + return b"" + data = None # Default if EOF is encountered + # Depending on the input data, our call to the decompressor may not + # return any data. In this case, try again after reading another block. + while True: + if self._decompressor.eof: + rawblock = ( + self._decompressor.unused_data or self._fp.read(BUFFER_SIZE) + ) + if not rawblock: + break + # Continue to next stream. + self._decompressor = self._decomp_factory( + **self._decomp_args) + try: + data = self._decompressor.decompress(rawblock, size) + except self._trailing_error: + # Trailing data isn't a valid compressed stream; ignore it. + break + else: + if self._decompressor.needs_input: + rawblock = self._fp.read(BUFFER_SIZE) + if not rawblock: + raise EOFError("Compressed file ended before the " + "end-of-stream marker was reached") + else: + rawblock = b"" + data = self._decompressor.decompress(rawblock, size) + if data: + break + if not data: + self._eof = True + self._size = self._pos + return b"" + self._pos += len(data) + return data + + # Rewind the file to the beginning of the data stream. + def _rewind(self): + self._fp.seek(0) + self._eof = False + self._pos = 0 + self._decompressor = self._decomp_factory(**self._decomp_args) + + def seek(self, offset, whence=io.SEEK_SET): + # Recalculate offset as an absolute file position. + if whence == io.SEEK_SET: + pass + elif whence == io.SEEK_CUR: + offset = self._pos + offset + elif whence == io.SEEK_END: + # Seeking relative to EOF - we need to know the file's size. + if self._size < 0: + while self.read(io.DEFAULT_BUFFER_SIZE): + pass + offset = self._size + offset + else: + raise ValueError("Invalid value for whence: {}".format(whence)) + + # Make it so that offset is the number of bytes to skip forward. + if offset < self._pos: + self._rewind() + else: + offset -= self._pos + + # Read and discard data until we reach the desired position. + while offset > 0: + data = self.read(min(io.DEFAULT_BUFFER_SIZE, offset)) + if not data: + break + offset -= len(data) + + return self._pos + + def tell(self): + """Return the current file position.""" + return self._pos + + +if sys.version_info < (3, 3): + # memoryview.cast is added in 3.3 + def readinto(self, b): + data = self.read(len(b)) + b[:len(data)] = data + return len(data) + + DecompressReader.readinto = readinto diff --git a/contrib/python/lz4/py2/lz4/frame/_frame.c b/contrib/python/lz4/py2/lz4/frame/_frame.c new file mode 100644 index 0000000000..99ebe02d02 --- /dev/null +++ b/contrib/python/lz4/py2/lz4/frame/_frame.c @@ -0,0 +1,1761 @@ +/* + * Copyright (c) 2015, 2016 Jerry Ryle and Jonathan G. Underwood + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ +#if defined(_WIN32) && defined(_MSC_VER) +#define inline __inline +#elif defined(__SUNPRO_C) || defined(__hpux) || defined(_AIX) +#define inline +#endif + +#include <py3c.h> +#include <py3c/capsulethunk.h> + +#include <stdlib.h> +#include <lz4.h> /* Needed for LZ4_VERSION_NUMBER only. */ +#include <lz4frame.h> + +#ifndef Py_UNUSED /* This is already defined for Python 3.4 onwards */ +#ifdef __GNUC__ +#define Py_UNUSED(name) _unused_ ## name __attribute__((unused)) +#else +#define Py_UNUSED(name) _unused_ ## name +#endif +#endif + +static const char * compression_context_capsule_name = "_frame.LZ4F_cctx"; +static const char * decompression_context_capsule_name = "_frame.LZ4F_dctx"; + +struct compression_context +{ + LZ4F_cctx * context; + LZ4F_preferences_t preferences; +}; + +/***************************** +* create_compression_context * +******************************/ +static void +destroy_compression_context (PyObject * py_context) +{ +#ifndef PyCapsule_Type + struct compression_context *context = + PyCapsule_GetPointer (py_context, compression_context_capsule_name); +#else + /* Compatibility with 2.6 via capsulethunk. */ + struct compression_context *context = py_context; +#endif + Py_BEGIN_ALLOW_THREADS + LZ4F_freeCompressionContext (context->context); + Py_END_ALLOW_THREADS + + PyMem_Free (context); +} + +static PyObject * +create_compression_context (PyObject * Py_UNUSED (self)) +{ + struct compression_context * context; + LZ4F_errorCode_t result; + + context = + (struct compression_context *) + PyMem_Malloc (sizeof (struct compression_context)); + + if (!context) + { + return PyErr_NoMemory (); + } + + Py_BEGIN_ALLOW_THREADS + + result = + LZ4F_createCompressionContext (&context->context, + LZ4F_VERSION); + Py_END_ALLOW_THREADS + + if (LZ4F_isError (result)) + { + LZ4F_freeCompressionContext (context->context); + PyMem_Free (context); + PyErr_Format (PyExc_RuntimeError, + "LZ4F_createCompressionContext failed with code: %s", + LZ4F_getErrorName (result)); + return NULL; + } + + return PyCapsule_New (context, compression_context_capsule_name, + destroy_compression_context); +} + +/************ + * compress * + ************/ +static PyObject * +compress (PyObject * Py_UNUSED (self), PyObject * args, + PyObject * keywds) +{ + Py_buffer source; + Py_ssize_t source_size; + int store_size = 1; + int return_bytearray = 0; + int content_checksum = 0; + int block_checksum = 0; + int block_linked = 1; + LZ4F_preferences_t preferences; + size_t destination_size; + size_t compressed_size; + PyObject *py_destination; + char *destination; + + static char *kwlist[] = { "data", + "compression_level", + "block_size", + "content_checksum", + "block_checksum", + "block_linked", + "store_size", + "return_bytearray", + NULL + }; + + + memset (&preferences, 0, sizeof preferences); + +#if IS_PY3 + if (!PyArg_ParseTupleAndKeywords (args, keywds, "y*|iippppp", kwlist, + &source, + &preferences.compressionLevel, + &preferences.frameInfo.blockSizeID, + &content_checksum, + &block_checksum, + &block_linked, + &store_size, + &return_bytearray)) + { + return NULL; + } +#else + if (!PyArg_ParseTupleAndKeywords (args, keywds, "s*|iiiiiii", kwlist, + &source, + &preferences.compressionLevel, + &preferences.frameInfo.blockSizeID, + &content_checksum, + &block_checksum, + &block_linked, + &store_size, + &return_bytearray)) + { + return NULL; + } +#endif + + if (content_checksum) + { + preferences.frameInfo.contentChecksumFlag = LZ4F_contentChecksumEnabled; + } + else + { + preferences.frameInfo.contentChecksumFlag = LZ4F_noContentChecksum; + } + + if (block_linked) + { + preferences.frameInfo.blockMode = LZ4F_blockLinked; + } + else + { + preferences.frameInfo.blockMode = LZ4F_blockIndependent; + } + + if (LZ4_versionNumber() >= 10800) + { + if (block_checksum) + { + preferences.frameInfo.blockChecksumFlag = LZ4F_blockChecksumEnabled; + } + else + { + preferences.frameInfo.blockChecksumFlag = LZ4F_noBlockChecksum; + } + } + else if (block_checksum) + { + PyErr_SetString (PyExc_RuntimeError, + "block_checksum specified but not supported by LZ4 library version"); + return NULL; + } + + source_size = source.len; + + preferences.autoFlush = 0; + if (store_size) + { + preferences.frameInfo.contentSize = source_size; + } + else + { + preferences.frameInfo.contentSize = 0; + } + + Py_BEGIN_ALLOW_THREADS + destination_size = + LZ4F_compressFrameBound (source_size, &preferences); + Py_END_ALLOW_THREADS + + if (destination_size > PY_SSIZE_T_MAX) + { + PyBuffer_Release(&source); + PyErr_Format (PyExc_ValueError, + "Input data could require %zu bytes, which is larger than the maximum supported size of %zd bytes", + destination_size, PY_SSIZE_T_MAX); + return NULL; + } + + destination = PyMem_Malloc (destination_size * sizeof * destination); + if (destination == NULL) + { + PyBuffer_Release(&source); + return PyErr_NoMemory(); + } + + Py_BEGIN_ALLOW_THREADS + compressed_size = + LZ4F_compressFrame (destination, destination_size, source.buf, source_size, + &preferences); + Py_END_ALLOW_THREADS + + PyBuffer_Release(&source); + + if (LZ4F_isError (compressed_size)) + { + PyMem_Free (destination); + PyErr_Format (PyExc_RuntimeError, + "LZ4F_compressFrame failed with code: %s", + LZ4F_getErrorName (compressed_size)); + return NULL; + } + + if (return_bytearray) + { + py_destination = PyByteArray_FromStringAndSize (destination, (Py_ssize_t) compressed_size); + } + else + { + py_destination = PyBytes_FromStringAndSize (destination, (Py_ssize_t) compressed_size); + } + + PyMem_Free (destination); + + if (py_destination == NULL) + { + return PyErr_NoMemory (); + } + + return py_destination; +} + +/****************** + * compress_begin * + ******************/ +static PyObject * +compress_begin (PyObject * Py_UNUSED (self), PyObject * args, + PyObject * keywds) +{ + PyObject *py_context = NULL; + Py_ssize_t source_size = (Py_ssize_t) 0; + int return_bytearray = 0; + int content_checksum = 0; + int block_checksum = 0; + int block_linked = 1; + LZ4F_preferences_t preferences; + PyObject *py_destination; + char * destination; + /* The destination buffer needs to be large enough for a header, which is 15 + * bytes. Unfortunately, the lz4 library doesn't provide a #define for this. + * We over-allocate to allow for larger headers in the future. */ + const size_t header_size = 32; + struct compression_context *context; + size_t result; + static char *kwlist[] = { "context", + "source_size", + "compression_level", + "block_size", + "content_checksum", + "block_checksum", + "block_linked", + "auto_flush", + "return_bytearray", + NULL + }; + + memset (&preferences, 0, sizeof preferences); + +#if IS_PY3 + if (!PyArg_ParseTupleAndKeywords (args, keywds, "O|kiippppp", kwlist, + &py_context, + &source_size, + &preferences.compressionLevel, + &preferences.frameInfo.blockSizeID, + &content_checksum, + &block_checksum, + &block_linked, + &preferences.autoFlush, + &return_bytearray + )) + { + return NULL; + } +#else + if (!PyArg_ParseTupleAndKeywords (args, keywds, "O|kiiiiiii", kwlist, + &py_context, + &source_size, + &preferences.compressionLevel, + &preferences.frameInfo.blockSizeID, + &content_checksum, + &block_checksum, + &block_linked, + &preferences.autoFlush, + &return_bytearray + )) + { + return NULL; + } +#endif + if (content_checksum) + { + preferences.frameInfo.contentChecksumFlag = LZ4F_contentChecksumEnabled; + } + else + { + preferences.frameInfo.contentChecksumFlag = LZ4F_noContentChecksum; + } + + if (block_linked) + { + preferences.frameInfo.blockMode = LZ4F_blockLinked; + } + else + { + preferences.frameInfo.blockMode = LZ4F_blockIndependent; + } + + if (LZ4_versionNumber() >= 10800) + { + if (block_checksum) + { + preferences.frameInfo.blockChecksumFlag = LZ4F_blockChecksumEnabled; + } + else + { + preferences.frameInfo.blockChecksumFlag = LZ4F_noBlockChecksum; + } + } + else if (block_checksum) + { + PyErr_SetString (PyExc_RuntimeError, + "block_checksum specified but not supported by LZ4 library version"); + return NULL; + } + + if (block_linked) + { + preferences.frameInfo.blockMode = LZ4F_blockLinked; + } + else + { + preferences.frameInfo.blockMode = LZ4F_blockIndependent; + } + + + preferences.frameInfo.contentSize = source_size; + + context = + (struct compression_context *) PyCapsule_GetPointer (py_context, compression_context_capsule_name); + + if (!context || !context->context) + { + PyErr_SetString (PyExc_ValueError, "No valid compression context supplied"); + return NULL; + } + + context->preferences = preferences; + + destination = PyMem_Malloc (header_size * sizeof * destination); + if (destination == NULL) + { + return PyErr_NoMemory(); + } + + Py_BEGIN_ALLOW_THREADS + result = LZ4F_compressBegin (context->context, + destination, + header_size, + &context->preferences); + Py_END_ALLOW_THREADS + + if (LZ4F_isError (result)) + { + PyErr_Format (PyExc_RuntimeError, + "LZ4F_compressBegin failed with code: %s", + LZ4F_getErrorName (result)); + return NULL; + } + + if (return_bytearray) + { + py_destination = PyByteArray_FromStringAndSize (destination, (Py_ssize_t) result); + } + else + { + py_destination = PyBytes_FromStringAndSize (destination, (Py_ssize_t) result); + } + + PyMem_Free (destination); + + if (py_destination == NULL) + { + return PyErr_NoMemory (); + } + + return py_destination; +} + +/****************** + * compress_chunk * + ******************/ +static PyObject * +compress_chunk (PyObject * Py_UNUSED (self), PyObject * args, + PyObject * keywds) +{ + PyObject *py_context = NULL; + Py_buffer source; + Py_ssize_t source_size; + struct compression_context *context; + size_t compressed_bound; + PyObject *py_destination; + char *destination; + LZ4F_compressOptions_t compress_options; + size_t result; + int return_bytearray = 0; + static char *kwlist[] = { "context", + "data", + "return_bytearray", + NULL + }; + + memset (&compress_options, 0, sizeof compress_options); + +#if IS_PY3 + if (!PyArg_ParseTupleAndKeywords (args, keywds, "Oy*|p", kwlist, + &py_context, + &source, + &return_bytearray)) + { + return NULL; + } +#else + if (!PyArg_ParseTupleAndKeywords (args, keywds, "Os*|i", kwlist, + &py_context, + &source, + &return_bytearray)) + { + return NULL; + } +#endif + + source_size = source.len; + + context = + (struct compression_context *) PyCapsule_GetPointer (py_context, compression_context_capsule_name); + if (!context || !context->context) + { + PyBuffer_Release(&source); + PyErr_Format (PyExc_ValueError, "No compression context supplied"); + return NULL; + } + + /* If autoFlush is enabled, then the destination buffer only needs to be as + big as LZ4F_compressFrameBound specifies for this source size. However, if + autoFlush is disabled, previous calls may have resulted in buffered data, + and so we need instead to use LZ4F_compressBound to find the size required + for the destination buffer. This means that with autoFlush disabled we may + frequently allocate more memory than needed. */ + Py_BEGIN_ALLOW_THREADS + if (context->preferences.autoFlush == 1) + { + compressed_bound = + LZ4F_compressFrameBound (source_size, &context->preferences); + } + else + { + compressed_bound = + LZ4F_compressBound (source_size, &context->preferences); + } + Py_END_ALLOW_THREADS + + if (compressed_bound > PY_SSIZE_T_MAX) + { + PyBuffer_Release(&source); + PyErr_Format (PyExc_ValueError, + "input data could require %zu bytes, which is larger than the maximum supported size of %zd bytes", + compressed_bound, PY_SSIZE_T_MAX); + return NULL; + } + + destination = PyMem_Malloc (compressed_bound * sizeof * destination); + if (destination == NULL) + { + PyBuffer_Release(&source); + return PyErr_NoMemory(); + } + + compress_options.stableSrc = 0; + + Py_BEGIN_ALLOW_THREADS + result = + LZ4F_compressUpdate (context->context, destination, + compressed_bound, source.buf, source_size, + &compress_options); + Py_END_ALLOW_THREADS + + PyBuffer_Release(&source); + + if (LZ4F_isError (result)) + { + PyMem_Free (destination); + PyErr_Format (PyExc_RuntimeError, + "LZ4F_compressUpdate failed with code: %s", + LZ4F_getErrorName (result)); + return NULL; + } + + if (return_bytearray) + { + py_destination = PyByteArray_FromStringAndSize (destination, (Py_ssize_t) result); + } + else + { + py_destination = PyBytes_FromStringAndSize (destination, (Py_ssize_t) result); + } + + PyMem_Free (destination); + + if (py_destination == NULL) + { + return PyErr_NoMemory (); + } + + return py_destination; +} + +/****************** + * compress_flush * + ******************/ +static PyObject * +compress_flush (PyObject * Py_UNUSED (self), PyObject * args, PyObject * keywds) +{ + PyObject *py_context = NULL; + LZ4F_compressOptions_t compress_options; + struct compression_context *context; + size_t destination_size; + int return_bytearray = 0; + int end_frame = 1; + PyObject *py_destination; + char * destination; + size_t result; + static char *kwlist[] = { "context", + "end_frame", + "return_bytearray", + NULL + }; + + memset (&compress_options, 0, sizeof compress_options); + +#if IS_PY3 + if (!PyArg_ParseTupleAndKeywords (args, keywds, "O|pp", kwlist, + &py_context, + &end_frame, + &return_bytearray)) + { + return NULL; + } +#else + if (!PyArg_ParseTupleAndKeywords (args, keywds, "O|ii", kwlist, + &py_context, + &end_frame, + &return_bytearray)) + { + return NULL; + } +#endif + if (!end_frame && LZ4_versionNumber() < 10800) + { + PyErr_SetString (PyExc_RuntimeError, + "Flush without ending a frame is not supported with this version of the LZ4 library"); + return NULL; + } + + context = + (struct compression_context *) PyCapsule_GetPointer (py_context, compression_context_capsule_name); + if (!context || !context->context) + { + PyErr_SetString (PyExc_ValueError, "No compression context supplied"); + return NULL; + } + + compress_options.stableSrc = 0; + + /* Calling LZ4F_compressBound with srcSize equal to 0 returns a size + sufficient to fit (i) any remaining buffered data (when autoFlush is + disabled) and the footer size, which is either 4 or 8 bytes depending on + whether checksums are enabled. See: https://github.com/lz4/lz4/issues/280 + and https://github.com/lz4/lz4/issues/290. Prior to 1.7.5, it was necessary + to call LZ4F_compressBound with srcSize equal to 1. Since we now require a + minimum version to 1.7.5 we'll call this with srcSize equal to 0. */ + Py_BEGIN_ALLOW_THREADS + destination_size = LZ4F_compressBound (0, &(context->preferences)); + Py_END_ALLOW_THREADS + + destination = PyMem_Malloc (destination_size * sizeof * destination); + if (destination == NULL) + { + return PyErr_NoMemory(); + } + + Py_BEGIN_ALLOW_THREADS + if (end_frame) + { + result = + LZ4F_compressEnd (context->context, destination, + destination_size, &compress_options); + } + else + { + result = + LZ4F_flush (context->context, destination, + destination_size, &compress_options); + } + Py_END_ALLOW_THREADS + + if (LZ4F_isError (result)) + { + PyMem_Free (destination); + PyErr_Format (PyExc_RuntimeError, + "LZ4F_compressEnd failed with code: %s", + LZ4F_getErrorName (result)); + return NULL; + } + + if (return_bytearray) + { + py_destination = PyByteArray_FromStringAndSize (destination, (Py_ssize_t) result); + } + else + { + py_destination = PyBytes_FromStringAndSize (destination, (Py_ssize_t) result); + } + + PyMem_Free (destination); + + if (py_destination == NULL) + { + return PyErr_NoMemory (); + } + + return py_destination; +} + +/****************** + * get_frame_info * + ******************/ +static PyObject * +get_frame_info (PyObject * Py_UNUSED (self), PyObject * args, + PyObject * keywds) +{ + Py_buffer py_source; + char *source; + size_t source_size; + LZ4F_decompressionContext_t context; + LZ4F_frameInfo_t frame_info; + size_t result; + unsigned int block_size; + unsigned int block_size_id; + int block_linked; + int content_checksum; + int block_checksum; + int skippable; + + static char *kwlist[] = { "data", + NULL + }; + +#if IS_PY3 + if (!PyArg_ParseTupleAndKeywords (args, keywds, "y*", kwlist, + &py_source)) + { + return NULL; + } +#else + if (!PyArg_ParseTupleAndKeywords (args, keywds, "s*", kwlist, + &py_source)) + { + return NULL; + } +#endif + + Py_BEGIN_ALLOW_THREADS + + result = LZ4F_createDecompressionContext (&context, LZ4F_VERSION); + + if (LZ4F_isError (result)) + { + Py_BLOCK_THREADS + PyBuffer_Release (&py_source); + PyErr_Format (PyExc_RuntimeError, + "LZ4F_createDecompressionContext failed with code: %s", + LZ4F_getErrorName (result)); + return NULL; + } + + source = (char *) py_source.buf; + source_size = (size_t) py_source.len; + + result = + LZ4F_getFrameInfo (context, &frame_info, source, &source_size); + + if (LZ4F_isError (result)) + { + LZ4F_freeDecompressionContext (context); + Py_BLOCK_THREADS + PyBuffer_Release (&py_source); + PyErr_Format (PyExc_RuntimeError, + "LZ4F_getFrameInfo failed with code: %s", + LZ4F_getErrorName (result)); + return NULL; + } + + result = LZ4F_freeDecompressionContext (context); + + Py_END_ALLOW_THREADS + + PyBuffer_Release (&py_source); + + if (LZ4F_isError (result)) + { + PyErr_Format (PyExc_RuntimeError, + "LZ4F_freeDecompressionContext failed with code: %s", + LZ4F_getErrorName (result)); + return NULL; + } + +#define KB *(1<<10) +#define MB *(1<<20) + switch (frame_info.blockSizeID) + { + case LZ4F_default: + case LZ4F_max64KB: + block_size = 64 KB; + block_size_id = LZ4F_max64KB; + break; + case LZ4F_max256KB: + block_size = 256 KB; + block_size_id = LZ4F_max256KB; + break; + case LZ4F_max1MB: + block_size = 1 MB; + block_size_id = LZ4F_max1MB; + break; + case LZ4F_max4MB: + block_size = 4 MB; + block_size_id = LZ4F_max4MB; + break; + default: + PyErr_Format (PyExc_RuntimeError, + "Unrecognized blockSizeID in get_frame_info: %d", + frame_info.blockSizeID); + return NULL; + } +#undef KB +#undef MB + + if (frame_info.blockMode == LZ4F_blockLinked) + { + block_linked = 1; + } + else if (frame_info.blockMode == LZ4F_blockIndependent) + { + block_linked = 0; + } + else + { + PyErr_Format (PyExc_RuntimeError, + "Unrecognized blockMode in get_frame_info: %d", + frame_info.blockMode); + return NULL; + } + + if (frame_info.contentChecksumFlag == LZ4F_noContentChecksum) + { + content_checksum = 0; + } + else if (frame_info.contentChecksumFlag == LZ4F_contentChecksumEnabled) + { + content_checksum = 1; + } + else + { + PyErr_Format (PyExc_RuntimeError, + "Unrecognized contentChecksumFlag in get_frame_info: %d", + frame_info.contentChecksumFlag); + return NULL; + } + + if (LZ4_versionNumber() >= 10800) + { + if (frame_info.blockChecksumFlag == LZ4F_noBlockChecksum) + { + block_checksum = 0; + } + else if (frame_info.blockChecksumFlag == LZ4F_blockChecksumEnabled) + { + block_checksum = 1; + } + else + { + PyErr_Format (PyExc_RuntimeError, + "Unrecognized blockChecksumFlag in get_frame_info: %d", + frame_info.blockChecksumFlag); + return NULL; + } + } + else + { + /* Prior to LZ4 1.8.0 the blockChecksum functionality wasn't exposed in the + frame API, and blocks weren't checksummed, so we'll always return 0 + here. */ + block_checksum = 0; + } + + if (frame_info.frameType == LZ4F_frame) + { + skippable = 0; + } + else if (frame_info.frameType == LZ4F_skippableFrame) + { + skippable = 1; + } + else + { + PyErr_Format (PyExc_RuntimeError, + "Unrecognized frameType in get_frame_info: %d", + frame_info.frameType); + return NULL; + } + + return Py_BuildValue ("{s:I,s:I,s:O,s:O,s:O,s:O,s:K}", + "block_size", block_size, + "block_size_id", block_size_id, + "block_linked", block_linked ? Py_True : Py_False, + "content_checksum", content_checksum ? Py_True : Py_False, + "block_checksum", block_checksum ? Py_True : Py_False, + "skippable", skippable ? Py_True : Py_False, + "content_size", frame_info.contentSize); +} + +/******************************** + * create_decompression_context * + ********************************/ +static void +destroy_decompression_context (PyObject * py_context) +{ +#ifndef PyCapsule_Type + LZ4F_dctx * context = + PyCapsule_GetPointer (py_context, decompression_context_capsule_name); +#else + /* Compatibility with 2.6 via capsulethunk. */ + LZ4F_dctx * context = py_context; +#endif + Py_BEGIN_ALLOW_THREADS + LZ4F_freeDecompressionContext (context); + Py_END_ALLOW_THREADS +} + +static PyObject * +create_decompression_context (PyObject * Py_UNUSED (self)) +{ + LZ4F_dctx * context; + LZ4F_errorCode_t result; + + Py_BEGIN_ALLOW_THREADS + result = LZ4F_createDecompressionContext (&context, LZ4F_VERSION); + if (LZ4F_isError (result)) + { + Py_BLOCK_THREADS + LZ4F_freeDecompressionContext (context); + PyErr_Format (PyExc_RuntimeError, + "LZ4F_createDecompressionContext failed with code: %s", + LZ4F_getErrorName (result)); + return NULL; + } + Py_END_ALLOW_THREADS + + return PyCapsule_New (context, decompression_context_capsule_name, + destroy_decompression_context); +} + +/******************************* + * reset_decompression_context * + *******************************/ +static PyObject * +reset_decompression_context (PyObject * Py_UNUSED (self), PyObject * args, + PyObject * keywds) +{ + LZ4F_dctx * context; + PyObject * py_context = NULL; + static char *kwlist[] = { "context", + NULL + }; + + if (!PyArg_ParseTupleAndKeywords (args, keywds, "O", kwlist, + &py_context + )) + { + return NULL; + } + + context = (LZ4F_dctx *) + PyCapsule_GetPointer (py_context, decompression_context_capsule_name); + + if (!context) + { + PyErr_SetString (PyExc_ValueError, + "No valid decompression context supplied"); + return NULL; + } + + if (LZ4_versionNumber() >= 10800) /* LZ4 >= v1.8.0 has LZ4F_resetDecompressionContext */ + { + /* No error checking possible here - this is always successful. */ + Py_BEGIN_ALLOW_THREADS + LZ4F_resetDecompressionContext (context); + Py_END_ALLOW_THREADS + } + else + { + /* No resetDecompressionContext available, so we'll destroy the context + and create a new one. */ + int result; + + Py_BEGIN_ALLOW_THREADS + LZ4F_freeDecompressionContext (context); + + result = LZ4F_createDecompressionContext (&context, LZ4F_VERSION); + if (LZ4F_isError (result)) + { + LZ4F_freeDecompressionContext (context); + Py_BLOCK_THREADS + PyErr_Format (PyExc_RuntimeError, + "LZ4F_createDecompressionContext failed with code: %s", + LZ4F_getErrorName (result)); + return NULL; + } + Py_END_ALLOW_THREADS + + result = PyCapsule_SetPointer(py_context, context); + if (result) + { + LZ4F_freeDecompressionContext (context); + PyErr_SetString (PyExc_RuntimeError, + "PyCapsule_SetPointer failed with code: %s"); + return NULL; + } + } + + Py_RETURN_NONE; +} + +static inline PyObject * +__decompress(LZ4F_dctx * context, char * source, size_t source_size, + Py_ssize_t max_length, int full_frame, + int return_bytearray, int return_bytes_read) +{ + size_t source_remain; + size_t source_read; + char * source_cursor; + char * source_end; + char * destination; + size_t destination_write; + char * destination_cursor; + size_t destination_written; + size_t destination_size; + PyObject * py_destination; + size_t result = 0; + LZ4F_frameInfo_t frame_info; + LZ4F_decompressOptions_t options; + int end_of_frame = 0; + + memset(&options, 0, sizeof options); + + Py_BEGIN_ALLOW_THREADS + + source_cursor = source; + source_end = source + source_size; + source_remain = source_size; + + if (full_frame) + { + source_read = source_size; + + result = + LZ4F_getFrameInfo (context, &frame_info, + source_cursor, &source_read); + + if (LZ4F_isError (result)) + { + Py_BLOCK_THREADS + PyErr_Format (PyExc_RuntimeError, + "LZ4F_getFrameInfo failed with code: %s", + LZ4F_getErrorName (result)); + return NULL; + } + + /* Advance the source_cursor pointer past the header - the call to + getFrameInfo above replaces the passed source_read value with the + number of bytes read. Also reduce source_remain accordingly. */ + source_cursor += source_read; + source_remain -= source_read; + + /* If the uncompressed content size is available, we'll use that to size + the destination buffer. Otherwise, guess at twice the remaining source + source as a starting point, and adjust if needed. */ + if (frame_info.contentSize > 0) + { + destination_size = frame_info.contentSize; + } + else + { + destination_size = 2 * source_remain; + } + } + else + { + if (max_length >= (Py_ssize_t) 0) + { + destination_size = (size_t) max_length; + } + else + { + /* Choose an initial destination size as twice the source size, and we'll + grow the allocation as needed. */ + destination_size = 2 * source_remain; + } + } + + Py_BLOCK_THREADS + + destination = PyMem_Malloc (destination_size * sizeof * destination); + if (destination == NULL) + { + return PyErr_NoMemory(); + } + + Py_UNBLOCK_THREADS + + /* Only set stableDst = 1 if we are sure no PyMem_Realloc will be called since + when stableDst = 1 the LZ4 library stores a pointer to the last compressed + data, which may be invalid after a PyMem_Realloc. */ + if (full_frame && max_length >= (Py_ssize_t) 0) + { + options.stableDst = 1; + } + else + { + options.stableDst = 0; + } + + source_read = source_remain; + + destination_write = destination_size; + destination_cursor = destination; + destination_written = 0; + + while (1) + { + /* Decompress from the source string and write to the destination + until there's no more source string to read, or until we've reached the + frame end. + + On calling LZ4F_decompress, source_read is set to the remaining length + of source available to read. On return, source_read is set to the + actual number of bytes read from source, which may be less than + available. NB: LZ4F_decompress does not explicitly fail on empty input. + + On calling LZ4F_decompress, destination_write is the number of bytes in + destination available for writing. On exit, destination_write is set to + the actual number of bytes written to destination. */ + result = LZ4F_decompress (context, + destination_cursor, + &destination_write, + source_cursor, + &source_read, + &options); + + if (LZ4F_isError (result)) + { + Py_BLOCK_THREADS + PyErr_Format (PyExc_RuntimeError, + "LZ4F_decompress failed with code: %s", + LZ4F_getErrorName (result)); + return NULL; + } + + destination_written += destination_write; + source_cursor += source_read; + source_read = source_end - source_cursor; + + if (result == 0) + { + /* We've reached the end of the frame. */ + end_of_frame = 1; + break; + } + else if (source_cursor == source_end) + { + /* We've reached end of input. */ + break; + } + else if (destination_written == destination_size) + { + /* Destination buffer is full. So, stop decompressing if + max_length is set. Otherwise expand the destination + buffer. */ + if (max_length >= (Py_ssize_t) 0) + { + break; + } + else + { + /* Expand destination buffer. result is an indication of number of + source bytes remaining, so we'll use this to estimate the new + size of the destination buffer. */ + char * buff; + destination_size += 3 * result; + + Py_BLOCK_THREADS + buff = PyMem_Realloc (destination, destination_size); + if (buff == NULL) + { + PyErr_SetString (PyExc_RuntimeError, + "Failed to resize buffer"); + return NULL; + } + else + { + destination = buff; + } + Py_UNBLOCK_THREADS + } + } + /* Data still remaining to be decompressed, so increment the destination + cursor location, and reset destination_write ready for the next + iteration. Important to re-initialize destination_cursor here (as + opposed to simply incrementing it) so we're pointing to the realloc'd + memory location. */ + destination_cursor = destination + destination_written; + destination_write = destination_size - destination_written; + } + + Py_END_ALLOW_THREADS + + if (result > 0 && full_frame) + { + PyErr_Format (PyExc_RuntimeError, + "Frame incomplete. LZ4F_decompress returned: %zu", result); + PyMem_Free (destination); + return NULL; + } + + if (LZ4F_isError (result)) + { + PyErr_Format (PyExc_RuntimeError, + "LZ4F_freeDecompressionContext failed with code: %s", + LZ4F_getErrorName (result)); + PyMem_Free (destination); + return NULL; + } + + if (return_bytearray) + { + py_destination = PyByteArray_FromStringAndSize (destination, (Py_ssize_t) destination_written); + } + else + { + py_destination = PyBytes_FromStringAndSize (destination, (Py_ssize_t) destination_written); + } + + PyMem_Free (destination); + + if (py_destination == NULL) + { + return PyErr_NoMemory (); + } + + if (full_frame) + { + if (return_bytes_read) + { + return Py_BuildValue ("Ni", + py_destination, + source_cursor - source); + } + else + { + return py_destination; + } + } + else + { + return Py_BuildValue ("NiO", + py_destination, + source_cursor - source, + end_of_frame ? Py_True : Py_False); + } +} + +/************** + * decompress * + **************/ +static PyObject * +decompress (PyObject * Py_UNUSED (self), PyObject * args, + PyObject * keywds) +{ + LZ4F_dctx * context; + LZ4F_errorCode_t result; + Py_buffer py_source; + char * source; + size_t source_size; + PyObject * ret; + int return_bytearray = 0; + int return_bytes_read = 0; + static char *kwlist[] = { "data", + "return_bytearray", + "return_bytes_read", + NULL + }; + +#if IS_PY3 + if (!PyArg_ParseTupleAndKeywords (args, keywds, "y*|pp", kwlist, + &py_source, + &return_bytearray, + &return_bytes_read + )) + { + return NULL; + } +#else + if (!PyArg_ParseTupleAndKeywords (args, keywds, "s*|ii", kwlist, + &py_source, + &return_bytearray, + &return_bytes_read + )) + { + return NULL; + } +#endif + + Py_BEGIN_ALLOW_THREADS + result = LZ4F_createDecompressionContext (&context, LZ4F_VERSION); + if (LZ4F_isError (result)) + { + LZ4F_freeDecompressionContext (context); + Py_BLOCK_THREADS + PyBuffer_Release(&py_source); + PyErr_Format (PyExc_RuntimeError, + "LZ4F_createDecompressionContext failed with code: %s", + LZ4F_getErrorName (result)); + return NULL; + } + Py_END_ALLOW_THREADS + + /* MSVC can't do pointer arithmetic on void * pointers, so cast to char * */ + source = (char *) py_source.buf; + source_size = py_source.len; + + ret = __decompress (context, + source, + source_size, + -1, + 1, + return_bytearray, + return_bytes_read); + + PyBuffer_Release(&py_source); + + Py_BEGIN_ALLOW_THREADS + LZ4F_freeDecompressionContext (context); + Py_END_ALLOW_THREADS + + return ret; +} + +/******************** + * decompress_chunk * + ********************/ +static PyObject * +decompress_chunk (PyObject * Py_UNUSED (self), PyObject * args, + PyObject * keywds) +{ + PyObject * py_context = NULL; + PyObject * ret; + LZ4F_dctx * context; + Py_buffer py_source; + char * source; + size_t source_size; + Py_ssize_t max_length = (Py_ssize_t) -1; + int return_bytearray = 0; + static char *kwlist[] = { "context", + "data", + "max_length", + "return_bytearray", + NULL + }; + +#if IS_PY3 + if (!PyArg_ParseTupleAndKeywords (args, keywds, "Oy*|np", kwlist, + &py_context, + &py_source, + &max_length, + &return_bytearray + )) + { + return NULL; + } +#else + if (!PyArg_ParseTupleAndKeywords (args, keywds, "Os*|ni", kwlist, + &py_context, + &py_source, + &max_length, + &return_bytearray + )) + { + return NULL; + } +#endif + + context = (LZ4F_dctx *) + PyCapsule_GetPointer (py_context, decompression_context_capsule_name); + + if (!context) + { + PyBuffer_Release(&py_source); + PyErr_SetString (PyExc_ValueError, + "No valid decompression context supplied"); + return NULL; + } + + /* MSVC can't do pointer arithmetic on void * pointers, so cast to char * */ + source = (char *) py_source.buf; + source_size = py_source.len; + + ret = __decompress (context, + source, + source_size, + max_length, + 0, + return_bytearray, + 0); + + PyBuffer_Release(&py_source); + + return ret; +} + +PyDoc_STRVAR( + create_compression_context__doc, + "create_compression_context()\n" \ + "\n" \ + "Creates a compression context object.\n" \ + "\n" \ + "The compression object is required for compression operations.\n" \ + "\n" \ + "Returns:\n" \ + " cCtx: A compression context\n" + ); + +#define COMPRESS_KWARGS_DOCSTRING \ + " block_size (int): Sepcifies the maximum blocksize to use.\n" \ + " Options:\n\n" \ + " - `lz4.frame.BLOCKSIZE_DEFAULT`: the lz4 library default\n" \ + " - `lz4.frame.BLOCKSIZE_MAX64KB`: 64 kB\n" \ + " - `lz4.frame.BLOCKSIZE_MAX256KB`: 256 kB\n" \ + " - `lz4.frame.BLOCKSIZE_MAX1MB`: 1 MB\n" \ + " - `lz4.frame.BLOCKSIZE_MAX4MB`: 4 MB\n\n" \ + " If unspecified, will default to `lz4.frame.BLOCKSIZE_DEFAULT`\n" \ + " which is currently equal to `lz4.frame.BLOCKSIZE_MAX64KB`.\n" \ + " block_linked (bool): Specifies whether to use block-linked\n" \ + " compression. If ``True``, the compression ratio is improved,\n" \ + " particularly for small block sizes. Default is ``True``.\n" \ + " compression_level (int): Specifies the level of compression used.\n" \ + " Values between 0-16 are valid, with 0 (default) being the\n" \ + " lowest compression (0-2 are the same value), and 16 the highest.\n" \ + " Values below 0 will enable \"fast acceleration\", proportional\n" \ + " to the value. Values above 16 will be treated as 16.\n" \ + " The following module constants are provided as a convenience:\n\n" \ + " - `lz4.frame.COMPRESSIONLEVEL_MIN`: Minimum compression (0, the\n" \ + " default)\n" \ + " - `lz4.frame.COMPRESSIONLEVEL_MINHC`: Minimum high-compression\n" \ + " mode (3)\n" \ + " - `lz4.frame.COMPRESSIONLEVEL_MAX`: Maximum compression (16)\n\n" \ + " content_checksum (bool): Specifies whether to enable checksumming\n" \ + " of the uncompressed content. If True, a checksum is stored at the\n" \ + " end of the frame, and checked during decompression. Default is\n" \ + " ``False``.\n" \ + " block_checksum (bool): Specifies whether to enable checksumming of\n" \ + " the uncompressed content of each block. If `True` a checksum of\n" \ + " the uncompressed data in each block in the frame is stored at\n\n" \ + " the end of each block. If present, these checksums will be used\n\n" \ + " to validate the data during decompression. The default is\n" \ + " ``False`` meaning block checksums are not calculated and stored.\n" \ + " This functionality is only supported if the underlying LZ4\n" \ + " library has version >= 1.8.0. Attempting to set this value\n" \ + " to ``True`` with a version of LZ4 < 1.8.0 will cause a\n" \ + " ``RuntimeError`` to be raised.\n" \ + " return_bytearray (bool): If ``True`` a ``bytearray`` object will be\n" \ + " returned. If ``False``, a string of bytes is returned. The default\n" \ + " is ``False``.\n" \ + +PyDoc_STRVAR( + compress__doc, + "compress(data, compression_level=0, block_size=0, content_checksum=0,\n" \ + "block_linked=True, store_size=True, return_bytearray=False)\n" \ + "\n" \ + "Compresses ``data`` returning the compressed data as a complete frame.\n" \ + "\n" \ + "The returned data includes a header and endmark and so is suitable\n" \ + "for writing to a file.\n" \ + "\n" \ + "Args:\n" \ + " data (str, bytes or buffer-compatible object): data to compress\n" \ + "\n" \ + "Keyword Args:\n" \ + COMPRESS_KWARGS_DOCSTRING \ + " store_size (bool): If ``True`` then the frame will include an 8-byte\n" \ + " header field that is the uncompressed size of data included\n" \ + " within the frame. Default is ``True``.\n" \ + "\n" \ + "Returns:\n" \ + " bytes or bytearray: Compressed data\n" + ); +PyDoc_STRVAR +( + compress_begin__doc, + "compress_begin(context, source_size=0, compression_level=0, block_size=0,\n" \ + "content_checksum=0, content_size=1, block_mode=0, frame_type=0,\n" \ + "auto_flush=1)\n" \ + "\n" \ + "Creates a frame header from a compression context.\n\n" \ + "Args:\n" \ + " context (cCtx): A compression context.\n\n" \ + "Keyword Args:\n" \ + COMPRESS_KWARGS_DOCSTRING \ + " auto_flush (bool): Enable or disable autoFlush. When autoFlush is disabled\n" \ + " the LZ4 library may buffer data internally until a block is full.\n" \ + " Default is ``False`` (autoFlush disabled).\n\n" \ + " source_size (int): This optionally specifies the uncompressed size\n" \ + " of the data to be compressed. If specified, the size will be stored\n" \ + " in the frame header for use during decompression. Default is ``True``\n" \ + " return_bytearray (bool): If ``True`` a bytearray object will be returned.\n" \ + " If ``False``, a string of bytes is returned. Default is ``False``.\n\n" \ + "Returns:\n" \ + " bytes or bytearray: Frame header.\n" + ); + +#undef COMPRESS_KWARGS_DOCSTRING + +PyDoc_STRVAR +( + compress_chunk__doc, + "compress_chunk(context, data)\n" \ + "\n" \ + "Compresses blocks of data and returns the compressed data.\n" \ + "\n" \ + "The returned data should be concatenated with the data returned from\n" \ + "`lz4.frame.compress_begin` and any subsequent calls to\n" \ + "`lz4.frame.compress_chunk`.\n" \ + "\n" \ + "Args:\n" \ + " context (cCtx): compression context\n" \ + " data (str, bytes or buffer-compatible object): data to compress\n" \ + "\n" \ + "Keyword Args:\n" \ + " return_bytearray (bool): If ``True`` a bytearray object will be\n" \ + " returned. If ``False``, a string of bytes is returned. The\n" \ + " default is False.\n" \ + "\n" \ + "Returns:\n" \ + " bytes or bytearray: Compressed data.\n\n" \ + "Notes:\n" \ + " If auto flush is disabled (``auto_flush=False`` when calling\n" \ + " `lz4.frame.compress_begin`) this function may buffer and retain\n" \ + " some or all of the compressed data for future calls to\n" \ + " `lz4.frame.compress`.\n" + ); + +PyDoc_STRVAR +( + compress_flush__doc, + "compress_flush(context, end_frame=True, return_bytearray=False)\n" \ + "\n" \ + "Flushes any buffered data held in the compression context.\n" \ + "\n" \ + "This flushes any data buffed in the compression context, returning it as\n" \ + "compressed data. The returned data should be appended to the output of\n" \ + "previous calls to ``lz4.frame.compress_chunk``.\n" \ + "\n" \ + "The ``end_frame`` argument specifies whether or not the frame should be\n" \ + "ended. If this is ``True`` and end of frame marker will be appended to\n" \ + "the returned data. In this case, if ``content_checksum`` was ``True``\n" \ + "when calling `lz4.frame.compress_begin`, then a checksum of the uncompressed\n" \ + "data will also be included in the returned data.\n" \ + "\n" \ + "If the ``end_frame`` argument is ``True``, the compression context will be\n" \ + "reset and can be re-used.\n" \ + "\n" \ + "Args:\n" \ + " context (cCtx): Compression context\n" \ + "\n" \ + "Keyword Args:\n" \ + " end_frame (bool): If ``True`` the frame will be ended. Default is\n" \ + " ``True``.\n" \ + " return_bytearray (bool): If ``True`` a ``bytearray`` object will\n" \ + " be returned. If ``False``, a ``bytes`` object is returned.\n" \ + " The default is ``False``.\n" \ + "\n" \ + "Returns:\n" \ + " bytes or bytearray: compressed data.\n" \ + "\n" \ + "Notes:\n" \ + " If ``end_frame`` is ``False`` but the underlying LZ4 library does not" \ + " support flushing without ending the frame, a ``RuntimeError`` will be\n" \ + " raised.\n" + ); + +PyDoc_STRVAR +( + get_frame_info__doc, + "get_frame_info(frame)\n\n" \ + "Given a frame of compressed data, returns information about the frame.\n" \ + "\n" \ + "Args:\n" \ + " frame (str, bytes or buffer-compatible object): LZ4 compressed frame\n" \ + "\n" \ + "Returns:\n" \ + " dict: Dictionary with keys:\n" \ + "\n" \ + " - ``block_size`` (int): the maximum size (in bytes) of each block\n" \ + " - ``block_size_id`` (int): identifier for maximum block size\n" \ + " - ``content_checksum`` (bool): specifies whether the frame\n" \ + " contains a checksum of the uncompressed content\n" \ + " - ``content_size`` (int): uncompressed size in bytes of\n" \ + " frame content\n" \ + " - ``block_linked`` (bool): specifies whether the frame contains\n" \ + " blocks which are independently compressed (``False``) or linked\n" \ + " linked (``True``)\n" \ + " - ``block_checksum`` (bool): specifies whether each block contains a\n" \ + " checksum of its contents\n" \ + " - ``skippable`` (bool): whether the block is skippable (``True``) or\n" \ + " not (``False``)\n" + ); + +PyDoc_STRVAR +( + create_decompression_context__doc, + "create_decompression_context()\n" \ + "\n" \ + "Creates a decompression context object.\n" \ + "\n" \ + "A decompression context is needed for decompression operations.\n" \ + "\n" \ + "Returns:\n" \ + " dCtx: A decompression context\n" + ); + +PyDoc_STRVAR +( + reset_decompression_context__doc, + "reset_decompression_context(context)\n" \ + "\n" \ + "Resets a decompression context object.\n" \ + "\n" \ + "This is useful for recovering from an error or for stopping an unfinished\n" \ + "decompression and starting a new one with the same context\n" \ + "\n" \ + "Args:\n" \ + " context (dCtx): A decompression context\n" + ); + +PyDoc_STRVAR +( + decompress__doc, + "decompress(data, return_bytearray=False, return_bytes_read=False)\n" \ + "\n" \ + "Decompresses a frame of data and returns it as a string of bytes.\n" \ + "\n" \ + "Args:\n" \ + " data (str, bytes or buffer-compatible object): data to decompress.\n" \ + " This should contain a complete LZ4 frame of compressed data.\n" \ + "\n" \ + "Keyword Args:\n" \ + " return_bytearray (bool): If ``True`` a bytearray object will be\n" \ + " returned. If ``False``, a string of bytes is returned. The\n" \ + " default is ``False``.\n" \ + " return_bytes_read (bool): If ``True`` then the number of bytes read\n" \ + " from ``data`` will also be returned. Default is ``False``\n" \ + "\n" \ + "Returns:\n" \ + " bytes/bytearray or tuple: Uncompressed data and optionally the number" \ + " of bytes read\n" \ + "\n" \ + " If the ``return_bytes_read`` argument is ``True`` this function\n" \ + " returns a tuple consisting of:\n" \ + "\n" \ + " - bytes or bytearray: Uncompressed data\n" \ + " - int: Number of bytes consumed from ``data``\n" + ); + +PyDoc_STRVAR +( + decompress_chunk__doc, + "decompress_chunk(context, data, max_length=-1)\n" \ + "\n" \ + "Decompresses part of a frame of compressed data.\n" \ + "\n" \ + "The returned uncompressed data should be concatenated with the data\n" \ + "returned from previous calls to `lz4.frame.decompress_chunk`\n" \ + "\n" \ + "Args:\n" \ + " context (dCtx): decompression context\n" \ + " data (str, bytes or buffer-compatible object): part of a LZ4\n" \ + " frame of compressed data\n" \ + "\n" \ + "Keyword Args:\n" \ + " max_length (int): if non-negative this specifies the maximum number\n" \ + " of bytes of uncompressed data to return. Default is ``-1``.\n" \ + " return_bytearray (bool): If ``True`` a bytearray object will be\n" \ + " returned.If ``False``, a string of bytes is returned. The\n" \ + " default is ``False``.\n" \ + "\n" \ + "Returns:\n" \ + " tuple: uncompressed data, bytes read, end of frame indicator\n" \ + "\n" \ + " This function returns a tuple consisting of:\n" \ + "\n" \ + " - The uncompressed data as a ``bytes`` or ``bytearray`` object\n" \ + " - The number of bytes consumed from input ``data`` as an ``int``\n" \ + " - The end of frame indicator as a ``bool``.\n" \ + "\n" + "The end of frame indicator is ``True`` if the end of the compressed\n" \ + "frame has been reached, or ``False`` otherwise\n" + ); + +static PyMethodDef module_methods[] = +{ + { + "create_compression_context", (PyCFunction) create_compression_context, + METH_NOARGS, create_compression_context__doc + }, + { + "compress", (PyCFunction) compress, + METH_VARARGS | METH_KEYWORDS, compress__doc + }, + { + "compress_begin", (PyCFunction) compress_begin, + METH_VARARGS | METH_KEYWORDS, compress_begin__doc + }, + { + "compress_chunk", (PyCFunction) compress_chunk, + METH_VARARGS | METH_KEYWORDS, compress_chunk__doc + }, + { + "compress_flush", (PyCFunction) compress_flush, + METH_VARARGS | METH_KEYWORDS, compress_flush__doc + }, + { + "get_frame_info", (PyCFunction) get_frame_info, + METH_VARARGS | METH_KEYWORDS, get_frame_info__doc + }, + { + "create_decompression_context", (PyCFunction) create_decompression_context, + METH_NOARGS, create_decompression_context__doc + }, + { + "reset_decompression_context", (PyCFunction) reset_decompression_context, + METH_VARARGS | METH_KEYWORDS, reset_decompression_context__doc + }, + { + "decompress", (PyCFunction) decompress, + METH_VARARGS | METH_KEYWORDS, decompress__doc + }, + { + "decompress_chunk", (PyCFunction) decompress_chunk, + METH_VARARGS | METH_KEYWORDS, decompress_chunk__doc + }, + {NULL, NULL, 0, NULL} /* Sentinel */ +}; + +PyDoc_STRVAR(lz4frame__doc, + "A Python wrapper for the LZ4 frame protocol" + ); + +static struct PyModuleDef moduledef = +{ + PyModuleDef_HEAD_INIT, + "_frame", + lz4frame__doc, + -1, + module_methods +}; + +MODULE_INIT_FUNC (_frame) +{ + PyObject *module = PyModule_Create (&moduledef); + + if (module == NULL) + return NULL; + + PyModule_AddIntConstant (module, "BLOCKSIZE_DEFAULT", LZ4F_default); + PyModule_AddIntConstant (module, "BLOCKSIZE_MAX64KB", LZ4F_max64KB); + PyModule_AddIntConstant (module, "BLOCKSIZE_MAX256KB", LZ4F_max256KB); + PyModule_AddIntConstant (module, "BLOCKSIZE_MAX1MB", LZ4F_max1MB); + PyModule_AddIntConstant (module, "BLOCKSIZE_MAX4MB", LZ4F_max4MB); + + return module; +} diff --git a/contrib/python/lz4/py2/lz4/version.py b/contrib/python/lz4/py2/lz4/version.py new file mode 100644 index 0000000000..895ced4fdf --- /dev/null +++ b/contrib/python/lz4/py2/lz4/version.py @@ -0,0 +1,4 @@ +# coding: utf-8 +# file generated by setuptools_scm +# don't change, don't track in version control +version = '2.2.1' diff --git a/contrib/python/lz4/py2/tests/block/conftest.py b/contrib/python/lz4/py2/tests/block/conftest.py new file mode 100644 index 0000000000..089ce0f83c --- /dev/null +++ b/contrib/python/lz4/py2/tests/block/conftest.py @@ -0,0 +1,111 @@ +import pytest +import os +import sys + + +test_data = [ + (b''), + (os.urandom(8 * 1024)), + (b'0' * 8 * 1024), + (bytearray(b'')), + (bytearray(os.urandom(8 * 1024))), + #(bytearray(open(os.path.join(os.path.dirname(__file__), 'numpy_byte_array.bin'), 'rb').read())) +] + +if sys.version_info > (2, 7): + test_data += [ + (memoryview(b'')), + (memoryview(os.urandom(8 * 1024))) + ] + + +@pytest.fixture( + params=test_data, + ids=[ + 'data' + str(i) for i in range(len(test_data)) + ] +) +def data(request): + return request.param + + +@pytest.fixture( + params=[ + ( + { + 'store_size': True + } + ), + ( + { + 'store_size': False + } + ), + ] +) +def store_size(request): + return request.param + + +@pytest.fixture( + params=[ + ( + { + 'return_bytearray': True + } + ), + ( + { + 'return_bytearray': False + } + ), + ] +) +def return_bytearray(request): + return request.param + + +@pytest.fixture +def c_return_bytearray(return_bytearray): + return return_bytearray + + +@pytest.fixture +def d_return_bytearray(return_bytearray): + return return_bytearray + + +@pytest.fixture( + params=[ + ('fast', None) + ] + [ + ('fast', {'acceleration': s}) for s in range(10) + ] + [ + ('high_compression', None) + ] + [ + ('high_compression', {'compression': s}) for s in range(17) + ] + [ + (None, None) + ] +) +def mode(request): + return request.param + + +dictionary = [ + None, + (0, 0), + (100, 200), + (0, 8 * 1024), + os.urandom(8 * 1024) +] + + +@pytest.fixture( + params=dictionary, + ids=[ + 'dictionary' + str(i) for i in range(len(dictionary)) + ] +) +def dictionary(request): + return request.param diff --git a/contrib/python/lz4/py2/tests/block/numpy_byte_array.bin b/contrib/python/lz4/py2/tests/block/numpy_byte_array.bin Binary files differnew file mode 100644 index 0000000000..49537e2d90 --- /dev/null +++ b/contrib/python/lz4/py2/tests/block/numpy_byte_array.bin diff --git a/contrib/python/lz4/py2/tests/block/test_block_0.py b/contrib/python/lz4/py2/tests/block/test_block_0.py new file mode 100644 index 0000000000..cca3e65b61 --- /dev/null +++ b/contrib/python/lz4/py2/tests/block/test_block_0.py @@ -0,0 +1,92 @@ +import lz4.block +from multiprocessing.pool import ThreadPool +import sys +from functools import partial +if sys.version_info <= (3, 2): + import struct + + +def get_stored_size(buff): + if sys.version_info > (2, 7): + if isinstance(buff, memoryview): + b = buff.tobytes() + else: + b = bytes(buff) + else: + b = bytes(buff) + + if len(b) < 4: + return None + + if sys.version_info > (3, 2): + return int.from_bytes(b[:4], 'little') + else: + # This would not work on a memoryview object, hence buff.tobytes call + # above + return struct.unpack('<I', b[:4])[0] + + +def roundtrip(x, c_kwargs, d_kwargs, dictionary): + if dictionary: + if isinstance(dictionary, tuple): + d = x[dictionary[0]:dictionary[1]] + else: + d = dictionary + c_kwargs['dict'] = d + d_kwargs['dict'] = d + + c = lz4.block.compress(x, **c_kwargs) + + if c_kwargs['store_size']: + assert get_stored_size(c) == len(x) + else: + d_kwargs['uncompressed_size'] = len(x) + + return lz4.block.decompress(c, **d_kwargs) + + +def setup_kwargs(mode, store_size, c_return_bytearray=None, d_return_bytearray=None): + c_kwargs = {} + + if mode[0] is not None: + c_kwargs['mode'] = mode[0] + if mode[1] is not None: + c_kwargs.update(mode[1]) + + c_kwargs.update(store_size) + + if(c_return_bytearray): + c_kwargs.update(c_return_bytearray) + + d_kwargs = {} + + if(d_return_bytearray): + d_kwargs.update(d_return_bytearray) + + return (c_kwargs, d_kwargs) + + +# Test single threaded usage with all valid variations of input +def test_1(data, mode, store_size, c_return_bytearray, d_return_bytearray, dictionary): + (c_kwargs, d_kwargs) = setup_kwargs( + mode, store_size, c_return_bytearray, d_return_bytearray) + + d = roundtrip(data, c_kwargs, d_kwargs, dictionary) + + assert d == data + if d_return_bytearray['return_bytearray']: + assert isinstance(d, bytearray) + + +# Test multi threaded usage with all valid variations of input +def test_2(data, mode, store_size, dictionary): + (c_kwargs, d_kwargs) = setup_kwargs(mode, store_size) + + data_in = [data for i in range(32)] + + pool = ThreadPool(8) + rt = partial(roundtrip, c_kwargs=c_kwargs, + d_kwargs=d_kwargs, dictionary=dictionary) + data_out = pool.map(rt, data_in) + pool.close() + assert data_in == data_out diff --git a/contrib/python/lz4/py2/tests/block/test_block_1.py b/contrib/python/lz4/py2/tests/block/test_block_1.py new file mode 100644 index 0000000000..4392bb332c --- /dev/null +++ b/contrib/python/lz4/py2/tests/block/test_block_1.py @@ -0,0 +1,149 @@ +import lz4.block +import pytest +import sys +import os + + +def test_decompress_ui32_overflow(): + data = lz4.block.compress(b'A' * 64) + with pytest.raises(OverflowError): + lz4.block.decompress(data[4:], uncompressed_size=((1 << 32) + 64)) + + +def test_decompress_without_leak(): + # Verify that hand-crafted packet does not leak uninitialized(?) memory. + data = lz4.block.compress(b'A' * 64) + message = r'^Decompressor wrote 64 bytes, but 79 bytes expected from header$' + with pytest.raises(lz4.block.LZ4BlockError, match=message): + lz4.block.decompress(b'\x4f' + data[1:]) + + +def test_decompress_with_small_buffer(): + data = lz4.block.compress(b'A' * 64, store_size=False) + message = r'^Decompression failed: corrupt input or insufficient space in destination buffer. Error code: \d+$' + with pytest.raises(lz4.block.LZ4BlockError, match=message): + lz4.block.decompress(data[4:], uncompressed_size=64) + with pytest.raises(lz4.block.LZ4BlockError, match=message): + lz4.block.decompress(data, uncompressed_size=60) + + +def test_decompress_truncated(): + input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123" * 24 + compressed = lz4.block.compress(input_data) + # for i in range(len(compressed)): + # try: + # lz4.block.decompress(compressed[:i]) + # except: + # print(i, sys.exc_info()[0], sys.exc_info()[1]) + with pytest.raises(ValueError, match='Input source data size too small'): + lz4.block.decompress(compressed[:0]) + for n in [0, 1]: + with pytest.raises(ValueError, match='Input source data size too small'): + lz4.block.decompress(compressed[:n]) + for n in [24, 25, -2, 27, 67, 85]: + with pytest.raises(lz4.block.LZ4BlockError): + lz4.block.decompress(compressed[:n]) + + +def test_decompress_with_trailer(): + data = b'A' * 64 + comp = lz4.block.compress(data) + message = r'^Decompression failed: corrupt input or insufficient space in destination buffer. Error code: \d+$' + with pytest.raises(lz4.block.LZ4BlockError, match=message): + lz4.block.decompress(comp + b'A') + with pytest.raises(lz4.block.LZ4BlockError, match=message): + lz4.block.decompress(comp + comp) + with pytest.raises(lz4.block.LZ4BlockError, match=message): + lz4.block.decompress(comp + comp[4:]) + + +def test_unicode(): + if sys.version_info < (3,): + return # skip + DATA = b'x' + with pytest.raises(TypeError): + lz4.block.compress(DATA.decode('latin1')) + lz4.block.decompress(lz4.block.compress(DATA).decode('latin1')) + +# These next two are probably redundant given test_1 above but we'll keep them +# for now + + +def test_return_bytearray(): + if sys.version_info < (3,): + return # skip + data = os.urandom(128 * 1024) # Read 128kb + compressed = lz4.block.compress(data) + b = lz4.block.compress(data, return_bytearray=True) + assert isinstance(b, bytearray) + assert bytes(b) == compressed + b = lz4.block.decompress(compressed, return_bytearray=True) + assert isinstance(b, bytearray) + assert bytes(b) == data + + +def test_memoryview(): + if sys.version_info < (2, 7): + return # skip + data = os.urandom(128 * 1024) # Read 128kb + compressed = lz4.block.compress(data) + assert lz4.block.compress(memoryview(data)) == compressed + assert lz4.block.decompress(memoryview(compressed)) == data + + +def test_with_dict_none(): + input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123" * 24 + for mode in ['default', 'high_compression']: + assert lz4.block.decompress(lz4.block.compress( + input_data, mode=mode, dict=None)) == input_data + assert lz4.block.decompress(lz4.block.compress( + input_data, mode=mode), dict=None) == input_data + assert lz4.block.decompress(lz4.block.compress( + input_data, mode=mode, dict=b'')) == input_data + assert lz4.block.decompress(lz4.block.compress( + input_data, mode=mode), dict=b'') == input_data + assert lz4.block.decompress(lz4.block.compress( + input_data, mode=mode, dict='')) == input_data + assert lz4.block.decompress(lz4.block.compress( + input_data, mode=mode), dict='') == input_data + + +def test_with_dict(): + input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123" * 24 + dict1 = input_data[10:30] + dict2 = input_data[20:40] + message = r'^Decompression failed: corrupt input or insufficient space in destination buffer. Error code: \d+$' + for mode in ['default', 'high_compression']: + compressed = lz4.block.compress(input_data, mode=mode, dict=dict1) + with pytest.raises(lz4.block.LZ4BlockError, match=message): + lz4.block.decompress(compressed) + with pytest.raises(lz4.block.LZ4BlockError, match=message): + lz4.block.decompress(compressed, dict=dict1[:2]) + assert lz4.block.decompress(compressed, dict=dict2) != input_data + assert lz4.block.decompress(compressed, dict=dict1) == input_data + assert lz4.block.decompress(lz4.block.compress( + input_data), dict=dict1) == input_data + + +def test_known_decompress_1(): + input = b'\x00\x00\x00\x00\x00' + output = b'' + assert lz4.block.decompress(input) == output + + +def test_known_decompress_2(): + input = b'\x01\x00\x00\x00\x10 ' + output = b' ' + assert lz4.block.decompress(input) == output + + +def test_known_decompress_3(): + input = b'h\x00\x00\x00\xff\x0bLorem ipsum dolor sit amet\x1a\x006P amet' + output = b'Lorem ipsum dolor sit amet' * 4 + assert lz4.block.decompress(input) == output + + +def test_known_decompress_4(): + input = b'\xb0\xb3\x00\x00\xff\x1fExcepteur sint occaecat cupidatat non proident.\x00' + (b'\xff' * 180) + b'\x1ePident' + output = b'Excepteur sint occaecat cupidatat non proident' * 1000 + assert lz4.block.decompress(input) == output diff --git a/contrib/python/lz4/py2/tests/block/test_block_2.py b/contrib/python/lz4/py2/tests/block/test_block_2.py new file mode 100644 index 0000000000..87ceefb728 --- /dev/null +++ b/contrib/python/lz4/py2/tests/block/test_block_2.py @@ -0,0 +1,62 @@ +import pytest +import sys +import lz4.block +import psutil +import os + +# This test requires allocating a big lump of memory. In order to +# avoid a massive memory allocation during byte compilation, we have +# to declare a variable for the size of the buffer we're going to +# create outside the scope of the function below. See: +# https://bugs.python.org/issue21074 +_4GB = 0x100000000 # 4GB + +# This test will be killed on Travis due to the 3GB memory limit +# there. Unfortunately psutil reports the host memory, not the memory +# available to the container, and so can't be used to detect available +# memory, so instead, as an ugly hack for detecting we're on Travis we +# check for the TRAVIS environment variable being set. This is quite +# fragile. + + +@pytest.mark.skipif( + os.environ.get('TRAVIS') is not None, + reason='Skipping test on Travis due to insufficient memory' +) +@pytest.mark.skipif( + sys.maxsize < 0xffffffff, + reason='Py_ssize_t too small for this test' +) +@pytest.mark.skipif( + psutil.virtual_memory().total < _4GB, + reason='Insufficient system memory for this test' +) +def test_huge(): + try: + huge = b'\0' * _4GB + except MemoryError: + pytest.skip('Insufficient system memory for this test') + + with pytest.raises( + OverflowError, match='Input too large for LZ4 API' + ): + lz4.block.compress(huge) + + with pytest.raises( + OverflowError, match='Dictionary too large for LZ4 API' + ): + lz4.block.compress(b'', dict=huge) + + with pytest.raises( + OverflowError, match='Input too large for LZ4 API' + ): + lz4.block.decompress(huge) + + with pytest.raises( + OverflowError, match='Dictionary too large for LZ4 API' + ): + lz4.block.decompress(b'', dict=huge) + + +def test_dummy(): + pass diff --git a/contrib/python/lz4/py2/tests/block/test_block_3.py b/contrib/python/lz4/py2/tests/block/test_block_3.py new file mode 100644 index 0000000000..0c3fb0821d --- /dev/null +++ b/contrib/python/lz4/py2/tests/block/test_block_3.py @@ -0,0 +1,38 @@ +import lz4.block +import pytest + + +test_data = [ + (b'a' * 1024 * 1024), +] + + +@pytest.fixture( + params=test_data, + ids=[ + 'data' + str(i) for i in range(len(test_data)) + ] +) +def data(request): + return request.param + + +def test_block_decompress_mem_usage(data): + tracemalloc = pytest.importorskip('tracemalloc') + + tracemalloc.start() + + compressed = lz4.block.compress(data) + prev_snapshot = None + + for i in range(1000): + decompressed = lz4.block.decompress(compressed) # noqa: F841 + + if i % 100 == 0: + snapshot = tracemalloc.take_snapshot() + + if prev_snapshot: + stats = snapshot.compare_to(prev_snapshot, 'lineno') + assert stats[0].size_diff < (1024 * 4) + + prev_snapshot = snapshot diff --git a/contrib/python/lz4/py2/tests/frame/__init__.py b/contrib/python/lz4/py2/tests/frame/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/contrib/python/lz4/py2/tests/frame/__init__.py diff --git a/contrib/python/lz4/py2/tests/frame/conftest.py b/contrib/python/lz4/py2/tests/frame/conftest.py new file mode 100644 index 0000000000..5ab52c0ada --- /dev/null +++ b/contrib/python/lz4/py2/tests/frame/conftest.py @@ -0,0 +1,95 @@ +import pytest +import lz4.frame as lz4frame +import lz4 + + +@pytest.fixture( + params=[ + # (lz4frame.BLOCKSIZE_DEFAULT), + (lz4frame.BLOCKSIZE_MAX64KB), + (lz4frame.BLOCKSIZE_MAX256KB), + (lz4frame.BLOCKSIZE_MAX1MB), + (lz4frame.BLOCKSIZE_MAX4MB), + ] +) +def block_size(request): + return request.param + + +@pytest.fixture( + params=[ + (True), + (False), + ] +) +def block_linked(request): + return request.param + + +@pytest.fixture( + params=[ + (True), + (False), + ] +) +def content_checksum(request): + return request.param + + +if lz4.library_version_number() >= 10800: + p = [True, False] +else: + p = [False, ] + + +@pytest.fixture( + params=[ + (pp) for pp in p + ] +) +def block_checksum(request): + return request.param + + +compression_levels = [ + (lz4frame.COMPRESSIONLEVEL_MIN), + (lz4frame.COMPRESSIONLEVEL_MINHC), + (lz4frame.COMPRESSIONLEVEL_MAX), +] + + +@pytest.fixture( + params=compression_levels +) +def compression_level(request): + return request.param + + +@pytest.fixture( + params=[ + (True), + (False) + ] +) +def auto_flush(request): + return request.param + + +@pytest.fixture( + params=[ + (True), + (False) + ] +) +def store_size(request): + return request.param + + +@pytest.fixture( + params=[ + (True), + (False), + ] +) +def return_bytearray(request): + return request.param diff --git a/contrib/python/lz4/py2/tests/frame/helpers.py b/contrib/python/lz4/py2/tests/frame/helpers.py new file mode 100644 index 0000000000..e6cb0c9ef0 --- /dev/null +++ b/contrib/python/lz4/py2/tests/frame/helpers.py @@ -0,0 +1,44 @@ +import lz4.frame as lz4frame + + +def get_frame_info_check(compressed_data, + source_size, + store_size, + block_size, + block_linked, + content_checksum, + block_checksum): + + frame_info = lz4frame.get_frame_info(compressed_data) + + assert frame_info["content_checksum"] == content_checksum + assert frame_info["block_checksum"] == block_checksum + + assert frame_info["skippable"] is False + + if store_size is True: + assert frame_info["content_size"] == source_size + else: + assert frame_info["content_size"] == 0 + + if source_size > frame_info['block_size']: + # More than a single block + assert frame_info["block_linked"] == block_linked + + if block_size == lz4frame.BLOCKSIZE_DEFAULT: + assert frame_info["block_size_id"] == lz4frame.BLOCKSIZE_MAX64KB + else: + assert frame_info["block_size_id"] == block_size + + +def get_chunked(data, nchunks): + size = len(data) + # stride = int(math.ceil(float(size)/nchunks)) # no // on py 2.6 + stride = size // nchunks + start = 0 + end = start + stride + while end < size: + yield data[start:end] + start += stride + end += stride + yield data[start:] diff --git a/contrib/python/lz4/py2/tests/frame/test_frame_0.py b/contrib/python/lz4/py2/tests/frame/test_frame_0.py new file mode 100644 index 0000000000..f03431d412 --- /dev/null +++ b/contrib/python/lz4/py2/tests/frame/test_frame_0.py @@ -0,0 +1,172 @@ +import lz4.frame as lz4frame +import lz4 +import re + + +def test_library_version_number(): + v = lz4.library_version_number() + assert isinstance(v, int) + assert v > 10000 + + +def test_library_version_string(): + v = lz4.library_version_string() + assert isinstance(v, str) + assert v.count('.') == 2 + r = re.compile(r'^[0-9]*\.[0-9]*\.[0-9]*$') + assert r.match(v) is not None + + +def test_create_compression_context(): + context = lz4frame.create_compression_context() + assert context is not None + + +def test_create_decompression_context(): + context = lz4frame.create_decompression_context() + assert context is not None + + +def test_reset_decompression_context_1(): + if lz4.library_version_number() >= 10800: + context = lz4frame.create_decompression_context() + r = lz4frame.reset_decompression_context(context) + assert r is None + else: + pass + + +def test_reset_decompression_context_2(): + if lz4.library_version_number() >= 10800: + c = lz4frame.compress(b'1234', return_bytearray=False) + context = lz4frame.create_decompression_context() + try: + # Simulate an error by passing junk to decompress + d = lz4frame.decompress_chunk(context, c[4:]) + except RuntimeError: + pass + r = lz4frame.reset_decompression_context(context) + assert r is None + # And confirm we can use the context after reset + d, bytes_read, eof = lz4frame.decompress_chunk(context, c) + assert d == b'1234' + assert bytes_read == len(c) + assert eof is True + else: + pass + + +def test_compress_return_type_1(): + r = lz4frame.compress(b'', return_bytearray=False) + assert isinstance(r, bytes) + + +def test_compress_return_type_2(): + r = lz4frame.compress(b'', return_bytearray=True) + assert isinstance(r, bytearray) + + +def test_decompress_return_type_1(): + c = lz4frame.compress(b'', return_bytearray=False) + r = lz4frame.decompress( + c, + return_bytearray=False, + return_bytes_read=False + ) + assert isinstance(r, bytes) + + +def test_decompress_return_type_2(): + c = lz4frame.compress(b'', return_bytearray=False) + r = lz4frame.decompress( + c, + return_bytearray=True, + return_bytes_read=False + ) + assert isinstance(r, bytearray) + + +def test_decompress_return_type_3(): + c = lz4frame.compress(b'', return_bytearray=False) + r = lz4frame.decompress( + c, + return_bytearray=False, + return_bytes_read=True + ) + assert isinstance(r, tuple) + assert isinstance(r[0], bytes) + assert isinstance(r[1], int) + + +def test_decompress_return_type_4(): + c = lz4frame.compress(b'', return_bytearray=False) + r = lz4frame.decompress( + c, + return_bytearray=True, + return_bytes_read=True + ) + assert isinstance(r, tuple) + assert isinstance(r[0], bytearray) + assert isinstance(r[1], int) + + +def test_decompress_chunk_return_type_1(): + c = lz4frame.compress(b'', return_bytearray=False) + d = lz4frame.create_decompression_context() + r, b, e = lz4frame.decompress_chunk( + d, + c, + return_bytearray=False, + ) + assert isinstance(r, bytes) + assert isinstance(b, int) + assert isinstance(e, bool) + + +def test_decompress_chunk_return_type_2(): + c = lz4frame.compress(b'', return_bytearray=False) + d = lz4frame.create_decompression_context() + r, b, e = lz4frame.decompress_chunk( + d, + c, + return_bytearray=True, + ) + assert isinstance(r, bytearray) + assert isinstance(b, int) + assert isinstance(e, bool) + + +def test_decompress_chunk_return_type_3(): + c = lz4frame.compress(b'', return_bytearray=False) + d = lz4frame.create_decompression_context() + r = lz4frame.decompress_chunk( + d, + c, + return_bytearray=False, + ) + assert isinstance(r, tuple) + assert isinstance(r[0], bytes) + assert isinstance(r[1], int) + assert isinstance(r[2], bool) + + +def test_decompress_chunk_return_type_4(): + c = lz4frame.compress(b'', return_bytearray=False) + d = lz4frame.create_decompression_context() + r = lz4frame.decompress_chunk( + d, + c, + return_bytearray=True, + ) + assert isinstance(r, tuple) + assert isinstance(r[0], bytearray) + assert isinstance(r[1], int) + assert isinstance(r[2], bool) + + +def test_block_size_constants(): + assert lz4frame.BLOCKSIZE_DEFAULT == 0 + assert lz4frame.BLOCKSIZE_MAX64KB == 4 + assert lz4frame.BLOCKSIZE_MAX256KB == 5 + assert lz4frame.BLOCKSIZE_MAX1MB == 6 + assert lz4frame.BLOCKSIZE_MAX4MB == 7 diff --git a/contrib/python/lz4/py2/tests/frame/test_frame_1.py b/contrib/python/lz4/py2/tests/frame/test_frame_1.py new file mode 100644 index 0000000000..35110c44f1 --- /dev/null +++ b/contrib/python/lz4/py2/tests/frame/test_frame_1.py @@ -0,0 +1,111 @@ +import lz4.frame as lz4frame +import os +import sys +import pytest +from .helpers import get_frame_info_check + + +test_data = [ + (b''), + (os.urandom(8 * 1024)), + (b'0' * 8 * 1024), + (bytearray(b'')), + (bytearray(os.urandom(8 * 1024))), + (os.urandom(128 * 1024)), + (os.urandom(256 * 1024)), + (os.urandom(512 * 1024)), +] +if sys.version_info > (2, 7): + test_data += [ + (memoryview(b'')), + (memoryview(os.urandom(8 * 1024))) + ] + + +@pytest.fixture( + params=test_data, + ids=[ + 'data' + str(i) for i in range(len(test_data)) + ] +) +def data(request): + return request.param + + +def test_roundtrip_1( + data, + block_size, + block_linked, + content_checksum, + block_checksum, + compression_level, + store_size): + + compressed = lz4frame.compress( + data, + store_size=store_size, + compression_level=compression_level, + block_size=block_size, + block_linked=block_linked, + content_checksum=content_checksum, + block_checksum=block_checksum, + ) + + get_frame_info_check( + compressed, + len(data), + store_size, + block_size, + block_linked, + content_checksum, + block_checksum, + ) + decompressed, bytes_read = lz4frame.decompress( + compressed, return_bytes_read=True) + assert bytes_read == len(compressed) + assert decompressed == data + + +def test_roundtrip_2(data, + block_size, + block_linked, + content_checksum, + block_checksum, + compression_level, + auto_flush, + store_size): + + c_context = lz4frame.create_compression_context() + + kwargs = {} + kwargs['compression_level'] = compression_level + kwargs['block_size'] = block_size + kwargs['block_linked'] = block_linked + kwargs['content_checksum'] = content_checksum + kwargs['block_checksum'] = block_checksum + kwargs['auto_flush'] = auto_flush + if store_size is True: + kwargs['source_size'] = len(data) + + compressed = lz4frame.compress_begin( + c_context, + **kwargs + ) + compressed += lz4frame.compress_chunk( + c_context, + data + ) + compressed += lz4frame.compress_flush(c_context) + get_frame_info_check( + compressed, + len(data), + store_size, + block_size, + block_linked, + content_checksum, + block_checksum, + ) + decompressed, bytes_read = lz4frame.decompress( + compressed, return_bytes_read=True) + assert bytes_read == len(compressed) + assert decompressed == data diff --git a/contrib/python/lz4/py2/tests/frame/test_frame_2.py b/contrib/python/lz4/py2/tests/frame/test_frame_2.py new file mode 100644 index 0000000000..80b44b87ff --- /dev/null +++ b/contrib/python/lz4/py2/tests/frame/test_frame_2.py @@ -0,0 +1,107 @@ +import lz4.frame as lz4frame +import pytest +import os +import sys +from . helpers import ( + get_chunked, + get_frame_info_check, +) + + +test_data = [ + (b'', 1, 1), + (os.urandom(8 * 1024), 8, 1), + (os.urandom(8 * 1024), 1, 8), + (b'0' * 8 * 1024, 8, 1), + (b'0' * 8 * 1024, 8, 1), + (bytearray(b''), 1, 1), + (bytearray(os.urandom(8 * 1024)), 8, 1), +] +if sys.version_info > (2, 7): + test_data += [ + (memoryview(b''), 1, 1), + (memoryview(os.urandom(8 * 1024)), 8, 1) + ] + + +@pytest.fixture( + params=test_data, + ids=[ + 'data' + str(i) for i in range(len(test_data)) + ] +) +def data(request): + return request.param + + +def test_roundtrip_chunked(data, block_size, block_linked, + content_checksum, block_checksum, + compression_level, + auto_flush, store_size): + + data, c_chunks, d_chunks = data + + c_context = lz4frame.create_compression_context() + + kwargs = {} + kwargs['compression_level'] = compression_level + kwargs['block_size'] = block_size + kwargs['block_linked'] = block_linked + kwargs['content_checksum'] = content_checksum + kwargs['block_checksum'] = block_checksum + kwargs['auto_flush'] = auto_flush + if store_size is True: + kwargs['source_size'] = len(data) + + compressed = lz4frame.compress_begin( + c_context, + **kwargs + ) + data_in = get_chunked(data, c_chunks) + try: + while True: + compressed += lz4frame.compress_chunk( + c_context, + next(data_in) + ) + except StopIteration: + pass + finally: + del data_in + + compressed += lz4frame.compress_flush(c_context) + + get_frame_info_check( + compressed, + len(data), + store_size, + block_size, + block_linked, + content_checksum, + block_checksum, + ) + + d_context = lz4frame.create_decompression_context() + compressed_in = get_chunked(compressed, d_chunks) + decompressed = b'' + bytes_read = 0 + eofs = [] + try: + while True: + d, b, e = lz4frame.decompress_chunk( + d_context, + next(compressed_in), + ) + decompressed += d + bytes_read += b + eofs.append(e) + + except StopIteration: + pass + finally: + del compressed_in + + assert bytes_read == len(compressed) + assert decompressed == data + assert eofs[-1] is True + assert (True in eofs[:-2]) is False diff --git a/contrib/python/lz4/py2/tests/frame/test_frame_3.py b/contrib/python/lz4/py2/tests/frame/test_frame_3.py new file mode 100644 index 0000000000..a7835a46c9 --- /dev/null +++ b/contrib/python/lz4/py2/tests/frame/test_frame_3.py @@ -0,0 +1,57 @@ +import lz4.frame as lz4frame +import pytest +import os +import struct + +test_data = [ + (os.urandom(256 * 1024)), +] + + +@pytest.fixture( + params=test_data, + ids=[ + 'data' + str(i) for i in range(len(test_data)) + ] +) +def data(request): + return request.param + + +def test_decompress_truncated(data): + compressed = lz4frame.compress(data) + + message = r'^LZ4F_getFrameInfo failed with code: ERROR_frameHeader_incomplete' + with pytest.raises(RuntimeError, match=message): + lz4frame.decompress(compressed[:6]) + + for i in range(16, len(compressed) - 1, 5): # 15 is the max size of the header + message = r'^Frame incomplete. LZ4F_decompress returned:' + try: + lz4frame.decompress(compressed[:i]) + except RuntimeError as r: + print(r) + with pytest.raises(RuntimeError, match=message): + lz4frame.decompress(compressed[:i]) + + +def test_content_checksum_failure(data): + compressed = lz4frame.compress(data, content_checksum=True) + message = r'^LZ4F_decompress failed with code: ERROR_contentChecksum_invalid$' + with pytest.raises(RuntimeError, match=message): + last = struct.unpack('B', compressed[-1:])[0] + lz4frame.decompress(compressed[:-1] + struct.pack('B', last ^ 0x42)) + + +def test_block_checksum_failure(data): + compressed = lz4frame.compress( + data, + content_checksum=True, + block_checksum=True, + return_bytearray=True, + ) + message = r'^LZ4F_decompress failed with code: ERROR_blockChecksum_invalid$' + if len(compressed) > 32: + with pytest.raises(RuntimeError, match=message): + compressed[22] = compressed[18] ^ 0x42 + lz4frame.decompress(compressed) diff --git a/contrib/python/lz4/py2/tests/frame/test_frame_4.py b/contrib/python/lz4/py2/tests/frame/test_frame_4.py new file mode 100644 index 0000000000..7fa1654701 --- /dev/null +++ b/contrib/python/lz4/py2/tests/frame/test_frame_4.py @@ -0,0 +1,148 @@ +import lz4.frame as lz4frame +import os +import pytest +from . helpers import ( + get_frame_info_check, + get_chunked, +) + +test_data = [ + b'', + (128 * (32 * os.urandom(32))), + (256 * (32 * os.urandom(32))), + (512 * (32 * os.urandom(32))), + (1024 * (32 * os.urandom(32))), +] + + +@pytest.fixture( + params=test_data, + ids=[ + 'data' + str(i) for i in range(len(test_data)) + ] +) +def data(request): + return request.param + + +@pytest.fixture( + params=[ + (True), + (False) + ] +) +def reset(request): + return request.param + + +@pytest.fixture( + params=[ + (1), + (8) + ] +) +def chunks(request): + return request.param + + +def test_roundtrip_LZ4FrameCompressor( + data, + chunks, + block_size, + block_linked, + reset, + store_size, + block_checksum, + content_checksum): + + with lz4frame.LZ4FrameCompressor( + block_size=block_size, + block_linked=block_linked, + content_checksum=content_checksum, + block_checksum=block_checksum, + ) as compressor: + def do_compress(): + if store_size is True: + compressed = compressor.begin(source_size=len(data)) + else: + compressed = compressor.begin() + + for chunk in get_chunked(data, chunks): + compressed += compressor.compress(chunk) + + compressed += compressor.flush() + return compressed + + compressed = do_compress() + + if reset is True: + compressor.reset() + compressed = do_compress() + + get_frame_info_check( + compressed, + len(data), + store_size, + block_size, + block_linked, + content_checksum, + block_checksum, + ) + + decompressed, bytes_read = lz4frame.decompress( + compressed, return_bytes_read=True) + assert data == decompressed + assert bytes_read == len(compressed) + + +def test_roundtrip_LZ4FrameCompressor_LZ4FrameDecompressor( + data, + chunks, + block_size, + block_linked, + reset, + store_size, + block_checksum, + content_checksum): + + with lz4frame.LZ4FrameCompressor( + block_size=block_size, + block_linked=block_linked, + content_checksum=content_checksum, + block_checksum=block_checksum, + ) as compressor: + def do_compress(): + if store_size is True: + compressed = compressor.begin(source_size=len(data)) + else: + compressed = compressor.begin() + + for chunk in get_chunked(data, chunks): + compressed += compressor.compress(chunk) + + compressed += compressor.flush() + return compressed + + compressed = do_compress() + + if reset is True: + compressor.reset() + compressed = do_compress() + + get_frame_info_check( + compressed, + len(data), + store_size, + block_size, + block_linked, + content_checksum, + block_checksum, + ) + + with lz4frame.LZ4FrameDecompressor() as decompressor: + decompressed = b'' + for chunk in get_chunked(compressed, chunks): + b = decompressor.decompress(chunk) + decompressed += b + + assert data == decompressed diff --git a/contrib/python/lz4/py2/tests/frame/test_frame_5.py b/contrib/python/lz4/py2/tests/frame/test_frame_5.py new file mode 100644 index 0000000000..05daf283f9 --- /dev/null +++ b/contrib/python/lz4/py2/tests/frame/test_frame_5.py @@ -0,0 +1,99 @@ +import lz4.frame +import pytest +import gc + +MEM_INCREASE_LIMIT = (1024 * 25) + +test_data = [ + (b'a' * 1024 * 1024), +] + + +@pytest.fixture( + params=test_data, + ids=[ + 'data' + str(i) for i in range(len(test_data)) + ] +) +def data(request): + return request.param + + +def test_frame_decompress_mem_usage(data): + tracemalloc = pytest.importorskip('tracemalloc') + + tracemalloc.start() + + compressed = lz4.frame.compress(data) + prev_snapshot = None + + for i in range(1000): + decompressed = lz4.frame.decompress(compressed) # noqa: F841 + + if i % 100 == 0: + gc.collect() + snapshot = tracemalloc.take_snapshot() + + if prev_snapshot: + stats = snapshot.compare_to(prev_snapshot, 'lineno') + assert stats[0].size_diff < MEM_INCREASE_LIMIT + + prev_snapshot = snapshot + + +def test_frame_decompress_chunk_mem_usage(data): + tracemalloc = pytest.importorskip('tracemalloc') + tracemalloc.start() + + compressed = lz4.frame.compress(data) + + prev_snapshot = None + + for i in range(1000): + context = lz4.frame.create_decompression_context() + decompressed = lz4.frame.decompress_chunk( # noqa: F841 + context, compressed + ) + + if i % 100 == 0: + gc.collect() + snapshot = tracemalloc.take_snapshot() + + if prev_snapshot: + stats = snapshot.compare_to(prev_snapshot, 'lineno') + assert stats[0].size_diff < MEM_INCREASE_LIMIT + + prev_snapshot = snapshot + + +def test_frame_open_decompress_mem_usage(data): + tracemalloc = pytest.importorskip('tracemalloc') + tracemalloc.start() + + with lz4.frame.open('test.lz4', 'w') as f: + f.write(data) + + prev_snapshot = None + + for i in range(1000): + with lz4.frame.open('test.lz4', 'r') as f: + decompressed = f.read() # noqa: F841 + + if i % 100 == 0: + gc.collect() + snapshot = tracemalloc.take_snapshot() + + if prev_snapshot: + stats = snapshot.compare_to(prev_snapshot, 'lineno') + assert stats[0].size_diff < MEM_INCREASE_LIMIT + + prev_snapshot = snapshot + + +# TODO: add many more memory usage tests along the lines of this one +# for other funcs + +def test_dummy_always_pass(): + # If pytest finds all tests are skipped, then it exits with code 5 rather + # than 0, which tox sees as an error. Here we add a dummy test that always passes. + assert True diff --git a/contrib/python/lz4/py2/tests/frame/test_frame_6.py b/contrib/python/lz4/py2/tests/frame/test_frame_6.py new file mode 100644 index 0000000000..335d09e441 --- /dev/null +++ b/contrib/python/lz4/py2/tests/frame/test_frame_6.py @@ -0,0 +1,100 @@ +import os +import pytest +import lz4.frame as lz4frame + +test_data = [ + b'', + (128 * (32 * os.urandom(32))), + (5 * 128 * os.urandom(1024)), +] + + +@pytest.fixture( + params=test_data, + ids=[ + 'data' + str(i) for i in range(len(test_data)) + ] +) +def data(request): + return request.param + + +compression_levels = [ + (lz4frame.COMPRESSIONLEVEL_MIN), + # (lz4frame.COMPRESSIONLEVEL_MINHC), + # (lz4frame.COMPRESSIONLEVEL_MAX), +] + + +@pytest.fixture( + params=compression_levels +) +def compression_level(request): + return request.param + + +def test_lz4frame_open_write(data): + with lz4frame.open('testfile', mode='wb') as fp: + fp.write(data) + + +def test_lz4frame_open_write_read_defaults(data): + with lz4frame.open('testfile', mode='wb') as fp: + fp.write(data) + with lz4frame.open('testfile', mode='r') as fp: + data_out = fp.read() + assert data_out == data + + +def test_lz4frame_open_write_read_text(): + data = u'This is a test string' + with lz4frame.open('testfile', mode='wt') as fp: + fp.write(data) + with lz4frame.open('testfile', mode='rt') as fp: + data_out = fp.read() + assert data_out == data + + +def test_lz4frame_open_write_read_text_iter(): + data = u'This is a test string' + with lz4frame.open('testfile', mode='wt') as fp: + fp.write(data) + data_out = '' + with lz4frame.open('testfile', mode='rt') as fp: + for line in fp: + data_out += line + assert data_out == data + + +def test_lz4frame_open_write_read( + data, + compression_level, + block_linked, + block_checksum, + block_size, + content_checksum, + auto_flush, + store_size, + return_bytearray): + + kwargs = {} + + if store_size is True: + kwargs['source_size'] = len(data) + + kwargs['compression_level'] = compression_level + kwargs['block_size'] = block_size + kwargs['block_linked'] = block_linked + kwargs['content_checksum'] = content_checksum + kwargs['block_checksum'] = block_checksum + kwargs['auto_flush'] = auto_flush + kwargs['return_bytearray'] = return_bytearray + kwargs['mode'] = 'wb' + + with lz4frame.open('testfile', **kwargs) as fp: + fp.write(data) + + with lz4frame.open('testfile', mode='r') as fp: + data_out = fp.read() + + assert data_out == data diff --git a/contrib/python/lz4/py2/tests/frame/test_frame_7.py b/contrib/python/lz4/py2/tests/frame/test_frame_7.py new file mode 100644 index 0000000000..583f3fbb05 --- /dev/null +++ b/contrib/python/lz4/py2/tests/frame/test_frame_7.py @@ -0,0 +1,102 @@ +import lz4.frame as lz4frame +import pytest +import os + +test_data = [ + (os.urandom(32) * 256), +] + + +@pytest.fixture( + params=test_data, + ids=[ + 'data' + str(i) for i in range(len(test_data)) + ] +) +def data(request): + return request.param + + +def test_roundtrip_multiframe_1(data): + nframes = 4 + + compressed = b'' + for _ in range(nframes): + compressed += lz4frame.compress(data) + + decompressed = b'' + for _ in range(nframes): + decompressed += lz4frame.decompress(compressed) + + assert len(decompressed) == nframes * len(data) + assert data * nframes == decompressed + + +def test_roundtrip_multiframe_2(data): + nframes = 4 + + compressed = b'' + ctx = lz4frame.create_compression_context() + for _ in range(nframes): + compressed += lz4frame.compress_begin(ctx) + compressed += lz4frame.compress_chunk(ctx, data) + compressed += lz4frame.compress_flush(ctx) + + decompressed = b'' + for _ in range(nframes): + decompressed += lz4frame.decompress(compressed) + + assert len(decompressed) == nframes * len(data) + assert data * nframes == decompressed + + +def test_roundtrip_multiframe_3(data): + nframes = 4 + + compressed = b'' + ctx = lz4frame.create_compression_context() + for _ in range(nframes): + compressed += lz4frame.compress_begin(ctx) + compressed += lz4frame.compress_chunk(ctx, data) + compressed += lz4frame.compress_flush(ctx) + + decompressed = b'' + ctx = lz4frame.create_decompression_context() + for _ in range(nframes): + d, bytes_read, eof = lz4frame.decompress_chunk(ctx, compressed) + decompressed += d + assert eof is True + assert bytes_read == len(compressed) // nframes + + assert len(decompressed) == nframes * len(data) + assert data * nframes == decompressed + + +def test_roundtrip_multiframe_4(data): + nframes = 4 + + compressed = b'' + with lz4frame.LZ4FrameCompressor() as compressor: + for _ in range(nframes): + compressed += compressor.begin() + compressed += compressor.compress(data) + compressed += compressor.flush() + + decompressed = b'' + with lz4frame.LZ4FrameDecompressor() as decompressor: + for i in range(nframes): + if i == 0: + d = compressed + else: + d = decompressor.unused_data + decompressed += decompressor.decompress(d) + assert decompressor.eof is True + assert decompressor.needs_input is True + if i == nframes - 1: + assert decompressor.unused_data is None + else: + assert len(decompressor.unused_data) == len( + compressed) * (nframes - i - 1) / nframes + + assert len(decompressed) == nframes * len(data) + assert data * nframes == decompressed diff --git a/contrib/python/lz4/py2/tests/frame/test_frame_8.py b/contrib/python/lz4/py2/tests/frame/test_frame_8.py new file mode 100644 index 0000000000..159534aefe --- /dev/null +++ b/contrib/python/lz4/py2/tests/frame/test_frame_8.py @@ -0,0 +1,12 @@ +import lz4.frame as lz4frame + + +def test_lz4frame_open_write_read_text_iter(): + data = u'This is a test string' + with lz4frame.open('testfile', mode='wt') as fp: + fp.write(data) + data_out = '' + with lz4frame.open('testfile', mode='rt') as fp: + for line in fp: + data_out += line + assert data_out == data diff --git a/contrib/python/lz4/py2/tests/frame/test_frame_9.py b/contrib/python/lz4/py2/tests/frame/test_frame_9.py new file mode 100644 index 0000000000..27d61607da --- /dev/null +++ b/contrib/python/lz4/py2/tests/frame/test_frame_9.py @@ -0,0 +1,44 @@ +import os +import lz4.frame + + +def test_issue_172_1(): + """Test reproducer for issue 172 + + Issue 172 is a reported failure occurring on Windows 10 only. This bug was + due to incorrect handling of Py_ssize_t types when doing comparisons and + using them as a size when allocating memory. + + """ + input_data = 8 * os.urandom(1024) + with lz4.frame.open('testfile_small', 'wb') as fp: + bytes_written = fp.write(input_data) # noqa: F841 + + with lz4.frame.open('testfile_small', 'rb') as fp: + data = fp.read(10) + assert len(data) == 10 + + +def test_issue_172_2(): + input_data = 9 * os.urandom(1024) + with lz4.frame.open('testfile_small', 'w') as fp: + bytes_written = fp.write(input_data) # noqa: F841 + + with lz4.frame.open('testfile_small', 'r') as fp: + data = fp.read(10) + assert len(data) == 10 + + +def test_issue_172_3(): + input_data = 9 * os.urandom(1024) + with lz4.frame.open('testfile_small', 'wb') as fp: + bytes_written = fp.write(input_data) # noqa: F841 + + with lz4.frame.open('testfile_small', 'rb') as fp: + data = fp.read(10) + assert len(data) == 10 + + with lz4.frame.open('testfile_small', 'rb') as fp: + data = fp.read(16 * 1024 - 1) + assert len(data) == 9 * 1024 + assert data == input_data diff --git a/contrib/python/lz4/py2/tests/ya.make b/contrib/python/lz4/py2/tests/ya.make new file mode 100644 index 0000000000..870dcdedb4 --- /dev/null +++ b/contrib/python/lz4/py2/tests/ya.make @@ -0,0 +1,34 @@ +PY2TEST() + +PEERDIR( + contrib/python/lz4 + contrib/python/psutil +) + +FORK_SUBTESTS() +SIZE(MEDIUM) + +TEST_SRCS( + block/conftest.py + #block/test_block_0.py + block/test_block_1.py + block/test_block_2.py + block/test_block_3.py + frame/__init__.py + frame/conftest.py + frame/helpers.py + frame/test_frame_0.py + frame/test_frame_1.py + frame/test_frame_2.py + frame/test_frame_3.py + frame/test_frame_4.py + frame/test_frame_5.py + frame/test_frame_6.py + frame/test_frame_7.py + frame/test_frame_8.py + frame/test_frame_9.py +) + +NO_LINT() + +END() diff --git a/contrib/python/lz4/py2/ya.make b/contrib/python/lz4/py2/ya.make new file mode 100644 index 0000000000..b2beb920c7 --- /dev/null +++ b/contrib/python/lz4/py2/ya.make @@ -0,0 +1,55 @@ +# Generated by devtools/yamaker (pypi). + +PY2_LIBRARY() + +VERSION(2.2.1) + +LICENSE(BSD-3-Clause) + +PEERDIR( + contrib/libs/lz4 + contrib/python/future + contrib/python/py3c +) + +ADDINCL( + contrib/libs/lz4 + contrib/python/py3c +) + +NO_COMPILER_WARNINGS() + +NO_LINT() + +SRCS( + lz4/_version.c + lz4/block/_block.c + lz4/frame/_frame.c +) + +PY_REGISTER( + lz4._version + lz4.block._block + lz4.frame._frame +) + +PY_SRCS( + TOP_LEVEL + lz4/__init__.py + lz4/block/__init__.py + lz4/frame/__init__.py + lz4/frame/_compression.py + lz4/version.py +) + +RESOURCE_FILES( + PREFIX contrib/python/lz4/py2/ + .dist-info/METADATA + .dist-info/top_level.txt +) + +END() + +RECURSE_FOR_TESTS( + tests +) diff --git a/contrib/python/lz4/py3/.dist-info/METADATA b/contrib/python/lz4/py3/.dist-info/METADATA new file mode 100644 index 0000000000..bd01d4f829 --- /dev/null +++ b/contrib/python/lz4/py3/.dist-info/METADATA @@ -0,0 +1,99 @@ +Metadata-Version: 2.1 +Name: lz4 +Version: 4.3.2 +Summary: LZ4 Bindings for Python +Home-page: https://github.com/python-lz4/python-lz4 +Author: Jonathan Underwood +Author-email: jonathan.underwood@gmail.com +Classifier: Development Status :: 5 - Production/Stable +Classifier: License :: OSI Approved :: BSD License +Classifier: Intended Audience :: Developers +Classifier: Programming Language :: C +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Requires-Python: >=3.7 +License-File: LICENSE +Provides-Extra: docs +Requires-Dist: sphinx (>=1.6.0) ; extra == 'docs' +Requires-Dist: sphinx-bootstrap-theme ; extra == 'docs' +Provides-Extra: flake8 +Requires-Dist: flake8 ; extra == 'flake8' +Provides-Extra: tests +Requires-Dist: pytest (!=3.3.0) ; extra == 'tests' +Requires-Dist: psutil ; extra == 'tests' +Requires-Dist: pytest-cov ; extra == 'tests' + +========== +python-lz4 +========== + +Status +====== + +.. image:: https://github.com/python-lz4/python-lz4/actions/workflows/build_dist.yml/badge.svg + :target: https://github.com/python-lz4/python-lz4/actions/workflows/build_dist.yml + :alt: Build Status + +.. image:: https://readthedocs.org/projects/python-lz4/badge/?version=stable + :target: https://readthedocs.org/projects/python-lz4/ + :alt: Documentation + +.. image:: https://codecov.io/gh/python-lz4/python-lz4/branch/codecov/graph/badge.svg + :target: https://codecov.io/gh/python-lz4/python-lz4 + :alt: CodeCov + + +Introduction +============ +This package provides python bindings for the `LZ4 compression library +<https://lz4.github.io/lz4/>`_. + +The production ready bindings provided in this package cover the `frame format +<https://github.com/lz4/lz4/blob/master/doc/lz4_Frame_format.md>`_, and the +`block format <https://github.com/lz4/lz4/blob/dev/doc/lz4_Block_format.md>`_ +specifications. The frame format bindings are the recommended ones to use, as +this guarantees interoperability with other implementations and language +bindings. + +Experimental bindings for the the `streaming format +<https://github.com/lz4/lz4/blob/master/examples/streaming_api_basics.md>`_ +specification are also included, but further work on those is required. + +The API provided by the frame format bindings follows that of the LZMA, zlib, +gzip and bzip2 compression libraries which are provided with the Python standard +library. As such, these LZ4 bindings should provide a drop-in alternative to the +compression libraries shipped with Python. The package provides context managers +and file handler support. + +The bindings drop the GIL when calling in to the underlying LZ4 library, and is +thread safe. An extensive test suite is included. + +Documentation +============= + +.. image:: https://readthedocs.org/projects/python-lz4/badge/?version=stable + :target: https://readthedocs.org/projects/python-lz4/ + :alt: Documentation + +Full documentation is included with the project. The documentation is +generated using Sphinx. Documentation is also hosted on readthedocs. + +:master: http://python-lz4.readthedocs.io/en/stable/ +:development: http://python-lz4.readthedocs.io/en/latest/ + +Homepage +======== + +The `project homepage <https://www.github.com/python-lz4/python-lz4>`_ is hosted +on Github. Please report any issues you find using the `issue tracker +<https://github.com/python-lz4/python-lz4/issues>`_. + +Licensing +========= +Code specific to this project is covered by the `BSD 3-Clause License +<http://opensource.org/licenses/BSD-3-Clause>`_ + diff --git a/contrib/python/lz4/py3/.dist-info/top_level.txt b/contrib/python/lz4/py3/.dist-info/top_level.txt new file mode 100644 index 0000000000..4ef6877a79 --- /dev/null +++ b/contrib/python/lz4/py3/.dist-info/top_level.txt @@ -0,0 +1 @@ +lz4 diff --git a/contrib/python/lz4/py3/LICENSE b/contrib/python/lz4/py3/LICENSE new file mode 100644 index 0000000000..518770111c --- /dev/null +++ b/contrib/python/lz4/py3/LICENSE @@ -0,0 +1,28 @@ +Copyright (c) 2012-2013, Steeve Morin +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of Steeve Morin nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. diff --git a/contrib/python/lz4/py3/README.rst b/contrib/python/lz4/py3/README.rst new file mode 100644 index 0000000000..a75bc4d4e6 --- /dev/null +++ b/contrib/python/lz4/py3/README.rst @@ -0,0 +1,70 @@ +========== +python-lz4 +========== + +Status +====== + +.. image:: https://github.com/python-lz4/python-lz4/actions/workflows/build_dist.yml/badge.svg + :target: https://github.com/python-lz4/python-lz4/actions/workflows/build_dist.yml + :alt: Build Status + +.. image:: https://readthedocs.org/projects/python-lz4/badge/?version=stable + :target: https://readthedocs.org/projects/python-lz4/ + :alt: Documentation + +.. image:: https://codecov.io/gh/python-lz4/python-lz4/branch/codecov/graph/badge.svg + :target: https://codecov.io/gh/python-lz4/python-lz4 + :alt: CodeCov + + +Introduction +============ +This package provides python bindings for the `LZ4 compression library +<https://lz4.github.io/lz4/>`_. + +The production ready bindings provided in this package cover the `frame format +<https://github.com/lz4/lz4/blob/master/doc/lz4_Frame_format.md>`_, and the +`block format <https://github.com/lz4/lz4/blob/dev/doc/lz4_Block_format.md>`_ +specifications. The frame format bindings are the recommended ones to use, as +this guarantees interoperability with other implementations and language +bindings. + +Experimental bindings for the the `streaming format +<https://github.com/lz4/lz4/blob/master/examples/streaming_api_basics.md>`_ +specification are also included, but further work on those is required. + +The API provided by the frame format bindings follows that of the LZMA, zlib, +gzip and bzip2 compression libraries which are provided with the Python standard +library. As such, these LZ4 bindings should provide a drop-in alternative to the +compression libraries shipped with Python. The package provides context managers +and file handler support. + +The bindings drop the GIL when calling in to the underlying LZ4 library, and is +thread safe. An extensive test suite is included. + +Documentation +============= + +.. image:: https://readthedocs.org/projects/python-lz4/badge/?version=stable + :target: https://readthedocs.org/projects/python-lz4/ + :alt: Documentation + +Full documentation is included with the project. The documentation is +generated using Sphinx. Documentation is also hosted on readthedocs. + +:master: http://python-lz4.readthedocs.io/en/stable/ +:development: http://python-lz4.readthedocs.io/en/latest/ + +Homepage +======== + +The `project homepage <https://www.github.com/python-lz4/python-lz4>`_ is hosted +on Github. Please report any issues you find using the `issue tracker +<https://github.com/python-lz4/python-lz4/issues>`_. + +Licensing +========= +Code specific to this project is covered by the `BSD 3-Clause License +<http://opensource.org/licenses/BSD-3-Clause>`_ + diff --git a/contrib/python/lz4/py3/lz4/__init__.py b/contrib/python/lz4/py3/lz4/__init__.py new file mode 100644 index 0000000000..6f2f2430b8 --- /dev/null +++ b/contrib/python/lz4/py3/lz4/__init__.py @@ -0,0 +1,19 @@ +# Although the canonical way to get the package version is using pkg_resources +# as below, this turns out to be very slow on systems with lots of packages. +# So, until that is remedied, we'll import the version from a local file +# created by setuptools_scm. + +# from pkg_resources import get_distribution, DistributionNotFound +# try: +# __version__ = get_distribution(__name__).version +# except DistributionNotFound: +# # package is not installed +# pass + +from .version import version as __version__ +from ._version import ( # noqa: F401 + library_version_number, + library_version_string, +) + +VERSION = __version__ diff --git a/contrib/python/lz4/py3/lz4/_version.c b/contrib/python/lz4/py3/lz4/_version.c new file mode 100644 index 0000000000..c611f0b361 --- /dev/null +++ b/contrib/python/lz4/py3/lz4/_version.c @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2016 Jonathan Underwood + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of Steeve Morin nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <Python.h> + +#include <stdlib.h> +#include <lz4.h> +#include <lz4hc.h> + +static PyObject * +library_version_number (PyObject * Py_UNUSED (self), PyObject * Py_UNUSED (args)) +{ + return Py_BuildValue ("i", LZ4_versionNumber ()); +} + +static PyObject * +library_version_string (PyObject * Py_UNUSED (self), PyObject * Py_UNUSED (args)) +{ + return Py_BuildValue ("s", LZ4_versionString ()); +} + +PyDoc_STRVAR +( + library_version_number__doc, + "library_version_number()\n\n" \ + "Returns the version number of the LZ4 library.\n" \ + "\n" \ + "Args:\n" \ + " None\n" \ + "\n" \ + "Returns:\n" \ + " int: version number eg. 10705" + ); + +PyDoc_STRVAR +( + library_version_string__doc, + "library_version_string()\n\n" \ + "Returns the version number of the LZ4 library as a string\n" \ + "containing the semantic version.\n" \ + "\n" \ + "Args:\n" \ + " None\n" \ + "\n" \ + "Returns:\n" \ + " str: version number eg. \"1.7.5\"" + ); + +static PyMethodDef module_methods[] = { + { + "library_version_number", + (PyCFunction) library_version_number, + METH_VARARGS, + library_version_number__doc + }, + { + "library_version_string", + (PyCFunction) library_version_string, + METH_VARARGS, + library_version_string__doc + }, + { + /* Sentinel */ + NULL, + NULL, + 0, + NULL + } +}; + +static struct PyModuleDef moduledef = + { + PyModuleDef_HEAD_INIT, + "_version", + NULL, + -1, + module_methods + }; + +PyMODINIT_FUNC +PyInit__version(void) +{ + PyObject *module = PyModule_Create (&moduledef); + + if (module == NULL) + return NULL; + + return module; +} diff --git a/contrib/python/lz4/py3/lz4/block/__init__.py b/contrib/python/lz4/py3/lz4/block/__init__.py new file mode 100644 index 0000000000..6662bab4de --- /dev/null +++ b/contrib/python/lz4/py3/lz4/block/__init__.py @@ -0,0 +1 @@ +from ._block import compress, decompress, LZ4BlockError # noqa: F401 diff --git a/contrib/python/lz4/py3/lz4/block/_block.c b/contrib/python/lz4/py3/lz4/block/_block.c new file mode 100644 index 0000000000..3e904a0344 --- /dev/null +++ b/contrib/python/lz4/py3/lz4/block/_block.c @@ -0,0 +1,522 @@ +/* + * Copyright (c) 2012-2018, Steeve Morin, Jonathan Underwood + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of Steeve Morin nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#if defined(_WIN32) && defined(_MSC_VER) +#define inline __inline +#elif defined(__SUNPRO_C) || defined(__hpux) || defined(_AIX) +#define inline +#endif + +#include <Python.h> + +#include <stdlib.h> +#include <math.h> +#include <lz4.h> +#include <lz4hc.h> + +#ifndef Py_UNUSED /* This is already defined for Python 3.4 onwards */ +#ifdef __GNUC__ +#define Py_UNUSED(name) _unused_ ## name __attribute__((unused)) +#else +#define Py_UNUSED(name) _unused_ ## name +#endif +#endif + +#if defined(_WIN32) && defined(_MSC_VER) +#if _MSC_VER >= 1600 +#include <stdint.h> +#else /* _MSC_VER >= 1600 */ +typedef signed char int8_t; +typedef signed short int16_t; +typedef signed int int32_t; +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; +#endif /* _MSC_VER >= 1600 */ +#endif + +static inline void +store_le32 (char *c, uint32_t x) +{ + c[0] = x & 0xff; + c[1] = (x >> 8) & 0xff; + c[2] = (x >> 16) & 0xff; + c[3] = (x >> 24) & 0xff; +} + +static inline uint32_t +load_le32 (const char *c) +{ + const uint8_t *d = (const uint8_t *) c; + return d[0] | (d[1] << 8) | (d[2] << 16) | (d[3] << 24); +} + +static const size_t hdr_size = sizeof (uint32_t); + +typedef enum +{ + DEFAULT, + FAST, + HIGH_COMPRESSION +} compression_type; + +static PyObject * LZ4BlockError; + +static inline int +lz4_compress_generic (int comp, char* source, char* dest, int source_size, int dest_size, + char* dict, int dict_size, int acceleration, int compression) +{ + if (comp != HIGH_COMPRESSION) + { + LZ4_stream_t lz4_state; + LZ4_resetStream (&lz4_state); + if (dict) + { + LZ4_loadDict (&lz4_state, dict, dict_size); + } + if (comp != FAST) + { + acceleration = 1; + } + return LZ4_compress_fast_continue (&lz4_state, source, dest, source_size, dest_size, acceleration); + } + else + { + LZ4_streamHC_t lz4_state; + LZ4_resetStreamHC (&lz4_state, compression); + if (dict) + { + LZ4_loadDictHC (&lz4_state, dict, dict_size); + } + return LZ4_compress_HC_continue (&lz4_state, source, dest, source_size, dest_size); + } +} + +#ifdef inline +#undef inline +#endif + +static PyObject * +compress (PyObject * Py_UNUSED (self), PyObject * args, PyObject * kwargs) +{ + const char *mode = "default"; + size_t dest_size, total_size; + int acceleration = 1; + int compression = 9; + int store_size = 1; + PyObject *py_dest; + char *dest, *dest_start; + compression_type comp; + int output_size; + Py_buffer source; + int source_size; + int return_bytearray = 0; + Py_buffer dict = {0}; + static char *argnames[] = { + "source", + "mode", + "store_size", + "acceleration", + "compression", + "return_bytearray", + "dict", + NULL + }; + + if (!PyArg_ParseTupleAndKeywords (args, kwargs, "y*|spiipz*", argnames, + &source, + &mode, &store_size, &acceleration, &compression, + &return_bytearray, &dict)) + { + return NULL; + } + + if (source.len > INT_MAX) + { + PyBuffer_Release(&source); + PyBuffer_Release(&dict); + PyErr_Format(PyExc_OverflowError, + "Input too large for LZ4 API"); + return NULL; + } + + if (dict.len > INT_MAX) + { + PyBuffer_Release(&source); + PyBuffer_Release(&dict); + PyErr_Format(PyExc_OverflowError, + "Dictionary too large for LZ4 API"); + return NULL; + } + + source_size = (int) source.len; + + if (!strncmp (mode, "default", sizeof ("default"))) + { + comp = DEFAULT; + } + else if (!strncmp (mode, "fast", sizeof ("fast"))) + { + comp = FAST; + } + else if (!strncmp (mode, "high_compression", sizeof ("high_compression"))) + { + comp = HIGH_COMPRESSION; + } + else + { + PyBuffer_Release(&source); + PyBuffer_Release(&dict); + PyErr_Format (PyExc_ValueError, + "Invalid mode argument: %s. Must be one of: standard, fast, high_compression", + mode); + return NULL; + } + + dest_size = LZ4_compressBound (source_size); + + if (store_size) + { + total_size = dest_size + hdr_size; + } + else + { + total_size = dest_size; + } + + dest = PyMem_Malloc (total_size * sizeof * dest); + if (dest == NULL) + { + return PyErr_NoMemory(); + } + + Py_BEGIN_ALLOW_THREADS + + if (store_size) + { + store_le32 (dest, source_size); + dest_start = dest + hdr_size; + } + else + { + dest_start = dest; + } + + output_size = lz4_compress_generic (comp, source.buf, dest_start, source_size, + (int) dest_size, dict.buf, (int) dict.len, + acceleration, compression); + + Py_END_ALLOW_THREADS + + PyBuffer_Release(&source); + PyBuffer_Release(&dict); + + if (output_size <= 0) + { + PyErr_SetString (LZ4BlockError, "Compression failed"); + PyMem_Free (dest); + return NULL; + } + + if (store_size) + { + output_size += (int) hdr_size; + } + + if (return_bytearray) + { + py_dest = PyByteArray_FromStringAndSize (dest, (Py_ssize_t) output_size); + } + else + { + py_dest = PyBytes_FromStringAndSize (dest, (Py_ssize_t) output_size); + } + + PyMem_Free (dest); + + if (py_dest == NULL) + { + return PyErr_NoMemory (); + } + + return py_dest; +} + +static PyObject * +decompress (PyObject * Py_UNUSED (self), PyObject * args, PyObject * kwargs) +{ + Py_buffer source; + const char * source_start; + size_t source_size; + PyObject *py_dest; + char *dest; + int output_size; + size_t dest_size; + int uncompressed_size = -1; + int return_bytearray = 0; + Py_buffer dict = {0}; + static char *argnames[] = { + "source", + "uncompressed_size", + "return_bytearray", + "dict", + NULL + }; + + if (!PyArg_ParseTupleAndKeywords (args, kwargs, "y*|ipz*", argnames, + &source, &uncompressed_size, + &return_bytearray, &dict)) + { + return NULL; + } + + if (source.len > INT_MAX) + { + PyBuffer_Release(&source); + PyBuffer_Release(&dict); + PyErr_Format(PyExc_OverflowError, + "Input too large for LZ4 API"); + return NULL; + } + + if (dict.len > INT_MAX) + { + PyBuffer_Release(&source); + PyBuffer_Release(&dict); + PyErr_Format(PyExc_OverflowError, + "Dictionary too large for LZ4 API"); + return NULL; + } + + source_start = (const char *) source.buf; + source_size = (int) source.len; + + if (uncompressed_size >= 0) + { + dest_size = uncompressed_size; + } + else + { + if (source_size < hdr_size) + { + PyBuffer_Release(&source); + PyBuffer_Release(&dict); + PyErr_SetString (PyExc_ValueError, "Input source data size too small"); + return NULL; + } + dest_size = load_le32 (source_start); + source_start += hdr_size; + source_size -= hdr_size; + } + + if (dest_size > INT_MAX) + { + PyBuffer_Release(&source); + PyBuffer_Release(&dict); + PyErr_Format (PyExc_ValueError, "Invalid size: 0x%zu", + dest_size); + return NULL; + } + + dest = PyMem_Malloc (dest_size * sizeof * dest); + if (dest == NULL) + { + return PyErr_NoMemory(); + } + + Py_BEGIN_ALLOW_THREADS + + output_size = + LZ4_decompress_safe_usingDict (source_start, dest, source_size, (int) dest_size, + dict.buf, (int) dict.len); + + Py_END_ALLOW_THREADS + + PyBuffer_Release(&source); + PyBuffer_Release(&dict); + + if (output_size < 0) + { + PyErr_Format (LZ4BlockError, + "Decompression failed: corrupt input or insufficient space in destination buffer. Error code: %u", + -output_size); + PyMem_Free (dest); + return NULL; + } + else if (((size_t)output_size != dest_size) && (uncompressed_size < 0)) + { + PyErr_Format (LZ4BlockError, + "Decompressor wrote %u bytes, but %zu bytes expected from header", + output_size, dest_size); + PyMem_Free (dest); + return NULL; + } + + if (return_bytearray) + { + py_dest = PyByteArray_FromStringAndSize (dest, (Py_ssize_t) output_size); + } + else + { + py_dest = PyBytes_FromStringAndSize (dest, (Py_ssize_t) output_size); + } + + PyMem_Free (dest); + + if (py_dest == NULL) + { + return PyErr_NoMemory (); + } + + return py_dest; +} + +PyDoc_STRVAR(compress__doc, + "compress(source, mode='default', acceleration=1, compression=0, return_bytearray=False)\n\n" \ + "Compress source, returning the compressed data as a string.\n" \ + "Raises an exception if any error occurs.\n" \ + "\n" \ + "Args:\n" \ + " source (str, bytes or buffer-compatible object): Data to compress\n" \ + "\n" \ + "Keyword Args:\n" \ + " mode (str): If ``'default'`` or unspecified use the default LZ4\n" \ + " compression mode. Set to ``'fast'`` to use the fast compression\n" \ + " LZ4 mode at the expense of compression. Set to\n" \ + " ``'high_compression'`` to use the LZ4 high-compression mode at\n" \ + " the exepense of speed.\n" \ + " acceleration (int): When mode is set to ``'fast'`` this argument\n" \ + " specifies the acceleration. The larger the acceleration, the\n" \ + " faster the but the lower the compression. The default\n" \ + " compression corresponds to a value of ``1``.\n" \ + " compression (int): When mode is set to ``high_compression`` this\n" \ + " argument specifies the compression. Valid values are between\n" \ + " ``1`` and ``12``. Values between ``4-9`` are recommended, and\n" \ + " ``9`` is the default.\n" + " store_size (bool): If ``True`` (the default) then the size of the\n" \ + " uncompressed data is stored at the start of the compressed\n" \ + " block.\n" \ + " return_bytearray (bool): If ``False`` (the default) then the function\n" \ + " will return a bytes object. If ``True``, then the function will\n" \ + " return a bytearray object.\n\n" \ + " dict (str, bytes or buffer-compatible object): If specified, perform\n" \ + " compression using this initial dictionary.\n" \ + "Returns:\n" \ + " bytes or bytearray: Compressed data.\n"); + +PyDoc_STRVAR(decompress__doc, + "decompress(source, uncompressed_size=-1, return_bytearray=False)\n\n" \ + "Decompress source, returning the uncompressed data as a string.\n" \ + "Raises an exception if any error occurs.\n" \ + "\n" \ + "Args:\n" \ + " source (str, bytes or buffer-compatible object): Data to decompress.\n" \ + "\n" \ + "Keyword Args:\n" \ + " uncompressed_size (int): If not specified or negative, the uncompressed\n" \ + " data size is read from the start of the source block. If specified,\n" \ + " it is assumed that the full source data is compressed data. If this\n" \ + " argument is specified, it is considered to be a maximum possible size\n" \ + " for the buffer used to hold the uncompressed data, and so less data\n" \ + " may be returned. If `uncompressed_size` is too small, `LZ4BlockError`\n" \ + " will be raised. By catching `LZ4BlockError` it is possible to increase\n" \ + " `uncompressed_size` and try again.\n" \ + " return_bytearray (bool): If ``False`` (the default) then the function\n" \ + " will return a bytes object. If ``True``, then the function will\n" \ + " return a bytearray object.\n\n" \ + " dict (str, bytes or buffer-compatible object): If specified, perform\n" \ + " decompression using this initial dictionary.\n" \ + "\n" \ + "Returns:\n" \ + " bytes or bytearray: Decompressed data.\n" \ + "\n" \ + "Raises:\n" \ + " LZ4BlockError: raised if the call to the LZ4 library fails. This can be\n" \ + " caused by `uncompressed_size` being too small, or invalid data.\n"); + +PyDoc_STRVAR(lz4block__doc, + "A Python wrapper for the LZ4 block protocol" + ); + +static PyMethodDef module_methods[] = { + { + "compress", + (PyCFunction) compress, + METH_VARARGS | METH_KEYWORDS, + compress__doc + }, + { + "decompress", + (PyCFunction) decompress, + METH_VARARGS | METH_KEYWORDS, + decompress__doc + }, + { + /* Sentinel */ + NULL, + NULL, + 0, + NULL + } +}; + +static struct PyModuleDef moduledef = +{ + PyModuleDef_HEAD_INIT, + "_block", + lz4block__doc, + -1, + module_methods +}; + +PyMODINIT_FUNC +PyInit__block(void) +{ + PyObject *module = PyModule_Create (&moduledef); + + if (module == NULL) + return NULL; + + PyModule_AddIntConstant (module, "HC_LEVEL_MIN", LZ4HC_CLEVEL_MIN); + PyModule_AddIntConstant (module, "HC_LEVEL_DEFAULT", LZ4HC_CLEVEL_DEFAULT); + PyModule_AddIntConstant (module, "HC_LEVEL_OPT_MIN", LZ4HC_CLEVEL_OPT_MIN); + PyModule_AddIntConstant (module, "HC_LEVEL_MAX", LZ4HC_CLEVEL_MAX); + + LZ4BlockError = PyErr_NewExceptionWithDoc("_block.LZ4BlockError", "Call to LZ4 library failed.", NULL, NULL); + if (LZ4BlockError == NULL) + { + return NULL; + } + Py_INCREF(LZ4BlockError); + PyModule_AddObject(module, "LZ4BlockError", LZ4BlockError); + + return module; +} diff --git a/contrib/python/lz4/py3/lz4/frame/__init__.py b/contrib/python/lz4/py3/lz4/frame/__init__.py new file mode 100644 index 0000000000..00f3e64adf --- /dev/null +++ b/contrib/python/lz4/py3/lz4/frame/__init__.py @@ -0,0 +1,894 @@ +import lz4 +import io +import os +import builtins +import sys +from ._frame import ( # noqa: F401 + compress, + decompress, + create_compression_context, + compress_begin, + compress_chunk, + compress_flush, + create_decompression_context, + reset_decompression_context, + decompress_chunk, + get_frame_info, + BLOCKSIZE_DEFAULT as _BLOCKSIZE_DEFAULT, + BLOCKSIZE_MAX64KB as _BLOCKSIZE_MAX64KB, + BLOCKSIZE_MAX256KB as _BLOCKSIZE_MAX256KB, + BLOCKSIZE_MAX1MB as _BLOCKSIZE_MAX1MB, + BLOCKSIZE_MAX4MB as _BLOCKSIZE_MAX4MB, + __doc__ as _doc +) + +__doc__ = _doc + +try: + import _compression # Python 3.6 and later +except ImportError: + from . import _compression + + +BLOCKSIZE_DEFAULT = _BLOCKSIZE_DEFAULT +"""Specifier for the default block size. + +Specifying ``block_size=lz4.frame.BLOCKSIZE_DEFAULT`` will instruct the LZ4 +library to use the default maximum blocksize. This is currently equivalent to +`lz4.frame.BLOCKSIZE_MAX64KB` + +""" + +BLOCKSIZE_MAX64KB = _BLOCKSIZE_MAX64KB +"""Specifier for a maximum block size of 64 kB. + +Specifying ``block_size=lz4.frame.BLOCKSIZE_MAX64KB`` will instruct the LZ4 +library to create blocks containing a maximum of 64 kB of uncompressed data. + +""" + +BLOCKSIZE_MAX256KB = _BLOCKSIZE_MAX256KB +"""Specifier for a maximum block size of 256 kB. + +Specifying ``block_size=lz4.frame.BLOCKSIZE_MAX256KB`` will instruct the LZ4 +library to create blocks containing a maximum of 256 kB of uncompressed data. + +""" + +BLOCKSIZE_MAX1MB = _BLOCKSIZE_MAX1MB +"""Specifier for a maximum block size of 1 MB. + +Specifying ``block_size=lz4.frame.BLOCKSIZE_MAX1MB`` will instruct the LZ4 +library to create blocks containing a maximum of 1 MB of uncompressed data. + +""" + +BLOCKSIZE_MAX4MB = _BLOCKSIZE_MAX4MB +"""Specifier for a maximum block size of 4 MB. + +Specifying ``block_size=lz4.frame.BLOCKSIZE_MAX4MB`` will instruct the LZ4 +library to create blocks containing a maximum of 4 MB of uncompressed data. + +""" + +COMPRESSIONLEVEL_MIN = 0 +"""Specifier for the minimum compression level. + +Specifying ``compression_level=lz4.frame.COMPRESSIONLEVEL_MIN`` will +instruct the LZ4 library to use a compression level of 0 + +""" + +COMPRESSIONLEVEL_MINHC = 3 +"""Specifier for the minimum compression level for high compression mode. + +Specifying ``compression_level=lz4.frame.COMPRESSIONLEVEL_MINHC`` will +instruct the LZ4 library to use a compression level of 3, the minimum for the +high compression mode. + +""" + +COMPRESSIONLEVEL_MAX = 16 +"""Specifier for the maximum compression level. + +Specifying ``compression_level=lz4.frame.COMPRESSIONLEVEL_MAX`` will +instruct the LZ4 library to use a compression level of 16, the highest +compression level available. + +""" + + +class LZ4FrameCompressor(object): + """Create a LZ4 frame compressor object. + + This object can be used to compress data incrementally. + + Args: + block_size (int): Specifies the maximum blocksize to use. + Options: + + - `lz4.frame.BLOCKSIZE_DEFAULT`: the lz4 library default + - `lz4.frame.BLOCKSIZE_MAX64KB`: 64 kB + - `lz4.frame.BLOCKSIZE_MAX256KB`: 256 kB + - `lz4.frame.BLOCKSIZE_MAX1MB`: 1 MB + - `lz4.frame.BLOCKSIZE_MAX4MB`: 4 MB + + If unspecified, will default to `lz4.frame.BLOCKSIZE_DEFAULT` which + is equal to `lz4.frame.BLOCKSIZE_MAX64KB`. + block_linked (bool): Specifies whether to use block-linked + compression. If ``True``, the compression ratio is improved, + especially for small block sizes. If ``False`` the blocks are + compressed independently. The default is ``True``. + compression_level (int): Specifies the level of compression used. + Values between 0-16 are valid, with 0 (default) being the + lowest compression (0-2 are the same value), and 16 the highest. + Values above 16 will be treated as 16. + Values between 4-9 are recommended. 0 is the default. + The following module constants are provided as a convenience: + + - `lz4.frame.COMPRESSIONLEVEL_MIN`: Minimum compression (0) + - `lz4.frame.COMPRESSIONLEVEL_MINHC`: Minimum high-compression (3) + - `lz4.frame.COMPRESSIONLEVEL_MAX`: Maximum compression (16) + + content_checksum (bool): Specifies whether to enable checksumming of + the payload content. If ``True``, a checksum of the uncompressed + data is stored at the end of the compressed frame which is checked + during decompression. The default is ``False``. + block_checksum (bool): Specifies whether to enable checksumming of + the content of each block. If ``True`` a checksum of the + uncompressed data in each block in the frame is stored at the end + of each block. If present, these checksums will be used to + validate the data during decompression. The default is ``False``, + meaning block checksums are not calculated and stored. This + functionality is only supported if the underlying LZ4 library has + version >= 1.8.0. Attempting to set this value to ``True`` with a + version of LZ4 < 1.8.0 will cause a ``RuntimeError`` to be raised. + auto_flush (bool): When ``False``, the LZ4 library may buffer data + until a block is full. When ``True`` no buffering occurs, and + partially full blocks may be returned. The default is ``False``. + return_bytearray (bool): When ``False`` a ``bytes`` object is returned + from the calls to methods of this class. When ``True`` a + ``bytearray`` object will be returned. The default is ``False``. + + """ + + def __init__(self, + block_size=BLOCKSIZE_DEFAULT, + block_linked=True, + compression_level=COMPRESSIONLEVEL_MIN, + content_checksum=False, + block_checksum=False, + auto_flush=False, + return_bytearray=False): + self.block_size = block_size + self.block_linked = block_linked + self.compression_level = compression_level + self.content_checksum = content_checksum + if block_checksum and lz4.library_version_number() < 10800: + raise RuntimeError( + 'Attempt to set block_checksum to True with LZ4 library' + 'version < 10800' + ) + self.block_checksum = block_checksum + self.auto_flush = auto_flush + self.return_bytearray = return_bytearray + self._context = None + self._started = False + + def __enter__(self): + # All necessary initialization is done in __init__ + return self + + def __exit__(self, exception_type, exception, traceback): + self.block_size = None + self.block_linked = None + self.compression_level = None + self.content_checksum = None + self.block_checksum = None + self.auto_flush = None + self.return_bytearray = None + self._context = None + self._started = False + + def begin(self, source_size=0): + """Begin a compression frame. + + The returned data contains frame header information. The data returned + from subsequent calls to ``compress()`` should be concatenated with + this header. + + Keyword Args: + source_size (int): Optionally specify the total size of the + uncompressed data. If specified, will be stored in the + compressed frame header as an 8-byte field for later use + during decompression. Default is 0 (no size stored). + + Returns: + bytes or bytearray: frame header data + + """ + + if self._started is False: + self._context = create_compression_context() + result = compress_begin( + self._context, + block_size=self.block_size, + block_linked=self.block_linked, + compression_level=self.compression_level, + content_checksum=self.content_checksum, + block_checksum=self.block_checksum, + auto_flush=self.auto_flush, + return_bytearray=self.return_bytearray, + source_size=source_size, + ) + self._started = True + return result + else: + raise RuntimeError( + "LZ4FrameCompressor.begin() called after already initialized" + ) + + def compress(self, data): # noqa: F811 + """Compresses data and returns it. + + This compresses ``data`` (a ``bytes`` object), returning a bytes or + bytearray object containing compressed data the input. + + If ``auto_flush`` has been set to ``False``, some of ``data`` may be + buffered internally, for use in later calls to + `LZ4FrameCompressor.compress()` and `LZ4FrameCompressor.flush()`. + + The returned data should be concatenated with the output of any + previous calls to `compress()` and a single call to + `compress_begin()`. + + Args: + data (str, bytes or buffer-compatible object): data to compress + + Returns: + bytes or bytearray: compressed data + + """ + if self._context is None: + raise RuntimeError('compress called after flush()') + + if self._started is False: + raise RuntimeError('compress called before compress_begin()') + + result = compress_chunk( + self._context, data, + return_bytearray=self.return_bytearray + ) + + return result + + def flush(self): + """Finish the compression process. + + This returns a ``bytes`` or ``bytearray`` object containing any data + stored in the compressor's internal buffers and a frame footer. + + The LZ4FrameCompressor instance may be re-used after this method has + been called to create a new frame of compressed data. + + Returns: + bytes or bytearray: compressed data and frame footer. + + """ + result = compress_flush( + self._context, + end_frame=True, + return_bytearray=self.return_bytearray + ) + self._context = None + self._started = False + return result + + def reset(self): + """Reset the `LZ4FrameCompressor` instance. + + This allows the `LZ4FrameCompression` instance to be re-used after an + error. + + """ + self._context = None + self._started = False + + def has_context(self): + """Return whether the compression context exists. + + Returns: + bool: ``True`` if the compression context exists, ``False`` + otherwise. + """ + return self._context is not None + + def started(self): + """Return whether the compression frame has been started. + + Returns: + bool: ``True`` if the compression frame has been started, ``False`` + otherwise. + """ + return self._started + + +class LZ4FrameDecompressor(object): + """Create a LZ4 frame decompressor object. + + This can be used to decompress data incrementally. + + For a more convenient way of decompressing an entire compressed frame at + once, see `lz4.frame.decompress()`. + + Args: + return_bytearray (bool): When ``False`` a bytes object is returned from + the calls to methods of this class. When ``True`` a bytearray + object will be returned. The default is ``False``. + + Attributes: + eof (bool): ``True`` if the end-of-stream marker has been reached. + ``False`` otherwise. + unused_data (bytes): Data found after the end of the compressed stream. + Before the end of the frame is reached, this will be ``b''``. + needs_input (bool): ``False`` if the ``decompress()`` method can + provide more decompressed data before requiring new uncompressed + input. ``True`` otherwise. + + """ + + def __init__(self, return_bytearray=False): + self._context = create_decompression_context() + self.eof = False + self.needs_input = True + self.unused_data = None + self._unconsumed_data = b'' + self._return_bytearray = return_bytearray + + def __enter__(self): + # All necessary initialization is done in __init__ + return self + + def __exit__(self, exception_type, exception, traceback): + self._context = None + self.eof = None + self.needs_input = None + self.unused_data = None + self._unconsumed_data = None + self._return_bytearray = None + + def reset(self): + """Reset the decompressor state. + + This is useful after an error occurs, allowing re-use of the instance. + + """ + reset_decompression_context(self._context) + self.eof = False + self.needs_input = True + self.unused_data = None + self._unconsumed_data = b'' + + def decompress(self, data, max_length=-1): # noqa: F811 + """Decompresses part or all of an LZ4 frame of compressed data. + + The returned data should be concatenated with the output of any + previous calls to `decompress()`. + + If ``max_length`` is non-negative, returns at most ``max_length`` bytes + of decompressed data. If this limit is reached and further output can + be produced, the `needs_input` attribute will be set to ``False``. In + this case, the next call to `decompress()` may provide data as + ``b''`` to obtain more of the output. In all cases, any unconsumed data + from previous calls will be prepended to the input data. + + If all of the input ``data`` was decompressed and returned (either + because this was less than ``max_length`` bytes, or because + ``max_length`` was negative), the `needs_input` attribute will be set + to ``True``. + + If an end of frame marker is encountered in the data during + decompression, decompression will stop at the end of the frame, and any + data after the end of frame is available from the `unused_data` + attribute. In this case, the `LZ4FrameDecompressor` instance is reset + and can be used for further decompression. + + Args: + data (str, bytes or buffer-compatible object): compressed data to + decompress + + Keyword Args: + max_length (int): If this is non-negative, this method returns at + most ``max_length`` bytes of decompressed data. + + Returns: + bytes: Uncompressed data + + """ + if not isinstance(data, (bytes, bytearray)): + data = memoryview(data).tobytes() + + if self._unconsumed_data: + data = self._unconsumed_data + data + + decompressed, bytes_read, eoframe = decompress_chunk( + self._context, + data, + max_length=max_length, + return_bytearray=self._return_bytearray, + ) + + if bytes_read < len(data): + if eoframe: + self.unused_data = data[bytes_read:] + else: + self._unconsumed_data = data[bytes_read:] + self.needs_input = False + else: + self._unconsumed_data = b'' + self.needs_input = True + self.unused_data = None + + self.eof = eoframe + + return decompressed + + +_MODE_CLOSED = 0 +_MODE_READ = 1 +# Value 2 no longer used +_MODE_WRITE = 3 + + +class LZ4FrameFile(_compression.BaseStream): + """A file object providing transparent LZ4F (de)compression. + + An LZ4FFile can act as a wrapper for an existing file object, or refer + directly to a named file on disk. + + Note that LZ4FFile provides a *binary* file interface - data read is + returned as bytes, and data to be written must be given as bytes. + + When opening a file for writing, the settings used by the compressor can be + specified. The underlying compressor object is + `lz4.frame.LZ4FrameCompressor`. See the docstrings for that class for + details on compression options. + + Args: + filename(str, bytes, PathLike, file object): can be either an actual + file name (given as a str, bytes, or + PathLike object), in which case the named file is opened, or it + can be an existing file object to read from or write to. + + Keyword Args: + mode(str): mode can be ``'r'`` for reading (default), ``'w'`` for + (over)writing, ``'x'`` for creating exclusively, or ``'a'`` + for appending. These can equivalently be given as ``'rb'``, + ``'wb'``, ``'xb'`` and ``'ab'`` respectively. + return_bytearray (bool): When ``False`` a bytes object is returned from + the calls to methods of this class. When ``True`` a ``bytearray`` + object will be returned. The default is ``False``. + source_size (int): Optionally specify the total size of the + uncompressed data. If specified, will be stored in the compressed + frame header as an 8-byte field for later use during decompression. + Default is ``0`` (no size stored). Only used for writing + compressed files. + block_size (int): Compressor setting. See + `lz4.frame.LZ4FrameCompressor`. + block_linked (bool): Compressor setting. See + `lz4.frame.LZ4FrameCompressor`. + compression_level (int): Compressor setting. See + `lz4.frame.LZ4FrameCompressor`. + content_checksum (bool): Compressor setting. See + `lz4.frame.LZ4FrameCompressor`. + block_checksum (bool): Compressor setting. See + `lz4.frame.LZ4FrameCompressor`. + auto_flush (bool): Compressor setting. See + `lz4.frame.LZ4FrameCompressor`. + + """ + + def __init__(self, filename=None, mode='r', + block_size=BLOCKSIZE_DEFAULT, + block_linked=True, + compression_level=COMPRESSIONLEVEL_MIN, + content_checksum=False, + block_checksum=False, + auto_flush=False, + return_bytearray=False, + source_size=0): + + self._fp = None + self._closefp = False + self._mode = _MODE_CLOSED + + if mode in ('r', 'rb'): + mode_code = _MODE_READ + elif mode in ('w', 'wb', 'a', 'ab', 'x', 'xb'): + mode_code = _MODE_WRITE + self._compressor = LZ4FrameCompressor( + block_size=block_size, + block_linked=block_linked, + compression_level=compression_level, + content_checksum=content_checksum, + block_checksum=block_checksum, + auto_flush=auto_flush, + return_bytearray=return_bytearray, + ) + self._pos = 0 + else: + raise ValueError('Invalid mode: {!r}'.format(mode)) + + if sys.version_info > (3, 6): + path_test = isinstance(filename, (str, bytes, os.PathLike)) + else: + path_test = isinstance(filename, (str, bytes)) + + if path_test is True: + if 'b' not in mode: + mode += 'b' + self._fp = builtins.open(filename, mode) + self._closefp = True + self._mode = mode_code + elif hasattr(filename, 'read') or hasattr(filename, 'write'): + self._fp = filename + self._mode = mode_code + else: + raise TypeError( + 'filename must be a str, bytes, file or PathLike object' + ) + + if self._mode == _MODE_READ: + raw = _compression.DecompressReader(self._fp, LZ4FrameDecompressor) + self._buffer = io.BufferedReader(raw) + + if self._mode == _MODE_WRITE: + self._source_size = source_size + self._fp.write(self._compressor.begin(source_size=source_size)) + + def close(self): + """Flush and close the file. + + May be called more than once without error. Once the file is + closed, any other operation on it will raise a ValueError. + """ + if self._mode == _MODE_CLOSED: + return + try: + if self._mode == _MODE_READ: + self._buffer.close() + self._buffer = None + elif self._mode == _MODE_WRITE: + self.flush() + self._compressor = None + finally: + try: + if self._closefp: + self._fp.close() + finally: + self._fp = None + self._closefp = False + self._mode = _MODE_CLOSED + + @property + def closed(self): + """Returns ``True`` if this file is closed. + + Returns: + bool: ``True`` if the file is closed, ``False`` otherwise. + + """ + return self._mode == _MODE_CLOSED + + def fileno(self): + """Return the file descriptor for the underlying file. + + Returns: + file object: file descriptor for file. + + """ + self._check_not_closed() + return self._fp.fileno() + + def seekable(self): + """Return whether the file supports seeking. + + Returns: + bool: ``True`` if the file supports seeking, ``False`` otherwise. + + """ + return self.readable() and self._buffer.seekable() + + def readable(self): + """Return whether the file was opened for reading. + + Returns: + bool: ``True`` if the file was opened for reading, ``False`` + otherwise. + + """ + self._check_not_closed() + return self._mode == _MODE_READ + + def writable(self): + """Return whether the file was opened for writing. + + Returns: + bool: ``True`` if the file was opened for writing, ``False`` + otherwise. + + """ + self._check_not_closed() + return self._mode == _MODE_WRITE + + def peek(self, size=-1): + """Return buffered data without advancing the file position. + + Always returns at least one byte of data, unless at EOF. The exact + number of bytes returned is unspecified. + + Returns: + bytes: uncompressed data + + """ + self._check_can_read() + # Relies on the undocumented fact that BufferedReader.peek() always + # returns at least one byte (except at EOF) + return self._buffer.peek(size) + + def readall(self): + chunks = bytearray() + + while True: + data = self.read(io.DEFAULT_BUFFER_SIZE) + chunks += data + if not data: + break + + return bytes(chunks) + + def read(self, size=-1): + """Read up to ``size`` uncompressed bytes from the file. + + If ``size`` is negative or omitted, read until ``EOF`` is reached. + Returns ``b''`` if the file is already at ``EOF``. + + Args: + size(int): If non-negative, specifies the maximum number of + uncompressed bytes to return. + + Returns: + bytes: uncompressed data + + """ + self._check_can_read() + + if size < 0 and sys.version_info >= (3, 10): + return self.readall() + return self._buffer.read(size) + + def read1(self, size=-1): + """Read up to ``size`` uncompressed bytes. + + This method tries to avoid making multiple reads from the underlying + stream. + + This method reads up to a buffer's worth of data if ``size`` is + negative. + + Returns ``b''`` if the file is at EOF. + + Args: + size(int): If non-negative, specifies the maximum number of + uncompressed bytes to return. + + Returns: + bytes: uncompressed data + + """ + self._check_can_read() + if size < 0: + size = io.DEFAULT_BUFFER_SIZE + return self._buffer.read1(size) + + def readline(self, size=-1): + """Read a line of uncompressed bytes from the file. + + The terminating newline (if present) is retained. If size is + non-negative, no more than size bytes will be read (in which case the + line may be incomplete). Returns b'' if already at EOF. + + Args: + size(int): If non-negative, specifies the maximum number of + uncompressed bytes to return. + + Returns: + bytes: uncompressed data + + """ + self._check_can_read() + return self._buffer.readline(size) + + def write(self, data): + """Write a bytes object to the file. + + Returns the number of uncompressed bytes written, which is + always the length of data in bytes. Note that due to buffering, + the file on disk may not reflect the data written until close() + is called. + + Args: + data(bytes): uncompressed data to compress and write to the file + + Returns: + int: the number of uncompressed bytes written to the file + + """ + if isinstance(data, (bytes, bytearray)): + length = len(data) + else: + # accept any data that supports the buffer protocol + data = memoryview(data) + length = data.nbytes + + self._check_can_write() + + if not self._compressor.started(): + header = self._compressor.begin(source_size=self._source_size) + self._fp.write(header) + + compressed = self._compressor.compress(data) + self._fp.write(compressed) + self._pos += length + return length + + def flush(self): + """Flush the file, keeping it open. + + May be called more than once without error. The file may continue + to be used normally after flushing. + """ + if self.writable() and self._compressor.has_context(): + self._fp.write(self._compressor.flush()) + self._fp.flush() + + def seek(self, offset, whence=io.SEEK_SET): + """Change the file position. + + The new position is specified by ``offset``, relative to the position + indicated by ``whence``. Possible values for ``whence`` are: + + - ``io.SEEK_SET`` or 0: start of stream (default): offset must not be + negative + - ``io.SEEK_CUR`` or 1: current stream position + - ``io.SEEK_END`` or 2: end of stream; offset must not be positive + + Returns the new file position. + + Note that seeking is emulated, so depending on the parameters, this + operation may be extremely slow. + + Args: + offset(int): new position in the file + whence(int): position with which ``offset`` is measured. Allowed + values are 0, 1, 2. The default is 0 (start of stream). + + Returns: + int: new file position + + """ + self._check_can_seek() + return self._buffer.seek(offset, whence) + + def tell(self): + """Return the current file position. + + Args: + None + + Returns: + int: file position + + """ + self._check_not_closed() + if self._mode == _MODE_READ: + return self._buffer.tell() + return self._pos + + +def open(filename, mode="rb", + encoding=None, + errors=None, + newline=None, + block_size=BLOCKSIZE_DEFAULT, + block_linked=True, + compression_level=COMPRESSIONLEVEL_MIN, + content_checksum=False, + block_checksum=False, + auto_flush=False, + return_bytearray=False, + source_size=0): + """Open an LZ4Frame-compressed file in binary or text mode. + + ``filename`` can be either an actual file name (given as a str, bytes, or + PathLike object), in which case the named file is opened, or it can be an + existing file object to read from or write to. + + The ``mode`` argument can be ``'r'``, ``'rb'`` (default), ``'w'``, + ``'wb'``, ``'x'``, ``'xb'``, ``'a'``, or ``'ab'`` for binary mode, or + ``'rt'``, ``'wt'``, ``'xt'``, or ``'at'`` for text mode. + + For binary mode, this function is equivalent to the `LZ4FrameFile` + constructor: `LZ4FrameFile(filename, mode, ...)`. + + For text mode, an `LZ4FrameFile` object is created, and wrapped in an + ``io.TextIOWrapper`` instance with the specified encoding, error handling + behavior, and line ending(s). + + Args: + filename (str, bytes, os.PathLike): file name or file object to open + + Keyword Args: + mode (str): mode for opening the file + encoding (str): the name of the encoding that will be used for + encoding/deconging the stream. It defaults to + ``locale.getpreferredencoding(False)``. See ``io.TextIOWrapper`` + for further details. + errors (str): specifies how encoding and decoding errors are to be + handled. See ``io.TextIOWrapper`` for further details. + newline (str): controls how line endings are handled. See + ``io.TextIOWrapper`` for further details. + return_bytearray (bool): When ``False`` a bytes object is returned + from the calls to methods of this class. When ``True`` a bytearray + object will be returned. The default is ``False``. + source_size (int): Optionally specify the total size of the + uncompressed data. If specified, will be stored in the compressed + frame header as an 8-byte field for later use during decompression. + Default is 0 (no size stored). Only used for writing compressed + files. + block_size (int): Compressor setting. See + `lz4.frame.LZ4FrameCompressor`. + block_linked (bool): Compressor setting. See + `lz4.frame.LZ4FrameCompressor`. + compression_level (int): Compressor setting. See + `lz4.frame.LZ4FrameCompressor`. + content_checksum (bool): Compressor setting. See + `lz4.frame.LZ4FrameCompressor`. + block_checksum (bool): Compressor setting. See + `lz4.frame.LZ4FrameCompressor`. + auto_flush (bool): Compressor setting. See + `lz4.frame.LZ4FrameCompressor`. + + """ + if 't' in mode: + if 'b' in mode: + raise ValueError('Invalid mode: %r' % (mode,)) + else: + if encoding is not None: + raise ValueError( + "Argument 'encoding' not supported in binary mode" + ) + if errors is not None: + raise ValueError("Argument 'errors' not supported in binary mode") + if newline is not None: + raise ValueError("Argument 'newline' not supported in binary mode") + + _mode = mode.replace('t', '') + + binary_file = LZ4FrameFile( + filename, + mode=_mode, + block_size=block_size, + block_linked=block_linked, + compression_level=compression_level, + content_checksum=content_checksum, + block_checksum=block_checksum, + auto_flush=auto_flush, + return_bytearray=return_bytearray, + source_size=source_size, + ) + + if 't' in mode: + return io.TextIOWrapper(binary_file, encoding, errors, newline) + else: + return binary_file diff --git a/contrib/python/lz4/py3/lz4/frame/_frame.c b/contrib/python/lz4/py3/lz4/frame/_frame.c new file mode 100644 index 0000000000..34606653b0 --- /dev/null +++ b/contrib/python/lz4/py3/lz4/frame/_frame.c @@ -0,0 +1,1681 @@ +/* + * Copyright (c) 2015, 2016 Jerry Ryle and Jonathan G. Underwood + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ +#if defined(_WIN32) && defined(_MSC_VER) +#define inline __inline +#elif defined(__SUNPRO_C) || defined(__hpux) || defined(_AIX) +#define inline +#endif + +#include <Python.h> + +#include <stdlib.h> +#include <lz4.h> /* Needed for LZ4_VERSION_NUMBER only. */ +#include <lz4frame.h> + +static const char * compression_context_capsule_name = "_frame.LZ4F_cctx"; +static const char * decompression_context_capsule_name = "_frame.LZ4F_dctx"; + +struct compression_context +{ + LZ4F_cctx * context; + LZ4F_preferences_t preferences; +}; + +/***************************** +* create_compression_context * +******************************/ +static void +destroy_compression_context (PyObject * py_context) +{ +#ifndef PyCapsule_Type + struct compression_context *context = + PyCapsule_GetPointer (py_context, compression_context_capsule_name); +#else + /* Compatibility with 2.6 via capsulethunk. */ + struct compression_context *context = py_context; +#endif + Py_BEGIN_ALLOW_THREADS + LZ4F_freeCompressionContext (context->context); + Py_END_ALLOW_THREADS + + PyMem_Free (context); +} + +static PyObject * +create_compression_context (PyObject * Py_UNUSED (self)) +{ + struct compression_context * context; + LZ4F_errorCode_t result; + + context = + (struct compression_context *) + PyMem_Malloc (sizeof (struct compression_context)); + + if (!context) + { + return PyErr_NoMemory (); + } + + Py_BEGIN_ALLOW_THREADS + + result = + LZ4F_createCompressionContext (&context->context, + LZ4F_VERSION); + Py_END_ALLOW_THREADS + + if (LZ4F_isError (result)) + { + LZ4F_freeCompressionContext (context->context); + PyMem_Free (context); + PyErr_Format (PyExc_RuntimeError, + "LZ4F_createCompressionContext failed with code: %s", + LZ4F_getErrorName (result)); + return NULL; + } + + return PyCapsule_New (context, compression_context_capsule_name, + destroy_compression_context); +} + +/************ + * compress * + ************/ +static PyObject * +compress (PyObject * Py_UNUSED (self), PyObject * args, + PyObject * keywds) +{ + Py_buffer source; + Py_ssize_t source_size; + int store_size = 1; + int return_bytearray = 0; + int content_checksum = 0; + int block_checksum = 0; + int block_linked = 1; + LZ4F_preferences_t preferences; + size_t destination_size; + size_t compressed_size; + PyObject *py_destination; + char *destination; + + static char *kwlist[] = { "data", + "compression_level", + "block_size", + "content_checksum", + "block_checksum", + "block_linked", + "store_size", + "return_bytearray", + NULL + }; + + + memset (&preferences, 0, sizeof preferences); + + if (!PyArg_ParseTupleAndKeywords (args, keywds, "y*|iippppp", kwlist, + &source, + &preferences.compressionLevel, + &preferences.frameInfo.blockSizeID, + &content_checksum, + &block_checksum, + &block_linked, + &store_size, + &return_bytearray)) + { + return NULL; + } + + if (content_checksum) + { + preferences.frameInfo.contentChecksumFlag = LZ4F_contentChecksumEnabled; + } + else + { + preferences.frameInfo.contentChecksumFlag = LZ4F_noContentChecksum; + } + + if (block_linked) + { + preferences.frameInfo.blockMode = LZ4F_blockLinked; + } + else + { + preferences.frameInfo.blockMode = LZ4F_blockIndependent; + } + + if (LZ4_versionNumber() >= 10800) + { + if (block_checksum) + { + preferences.frameInfo.blockChecksumFlag = LZ4F_blockChecksumEnabled; + } + else + { + preferences.frameInfo.blockChecksumFlag = LZ4F_noBlockChecksum; + } + } + else if (block_checksum) + { + PyErr_SetString (PyExc_RuntimeError, + "block_checksum specified but not supported by LZ4 library version"); + return NULL; + } + + source_size = source.len; + + preferences.autoFlush = 0; + if (store_size) + { + preferences.frameInfo.contentSize = source_size; + } + else + { + preferences.frameInfo.contentSize = 0; + } + + Py_BEGIN_ALLOW_THREADS + destination_size = + LZ4F_compressFrameBound (source_size, &preferences); + Py_END_ALLOW_THREADS + + if (destination_size > PY_SSIZE_T_MAX) + { + PyBuffer_Release(&source); + PyErr_Format (PyExc_ValueError, + "Input data could require %zu bytes, which is larger than the maximum supported size of %zd bytes", + destination_size, PY_SSIZE_T_MAX); + return NULL; + } + + destination = PyMem_Malloc (destination_size * sizeof * destination); + if (destination == NULL) + { + PyBuffer_Release(&source); + return PyErr_NoMemory(); + } + + Py_BEGIN_ALLOW_THREADS + compressed_size = + LZ4F_compressFrame (destination, destination_size, source.buf, source_size, + &preferences); + Py_END_ALLOW_THREADS + + PyBuffer_Release(&source); + + if (LZ4F_isError (compressed_size)) + { + PyMem_Free (destination); + PyErr_Format (PyExc_RuntimeError, + "LZ4F_compressFrame failed with code: %s", + LZ4F_getErrorName (compressed_size)); + return NULL; + } + + if (return_bytearray) + { + py_destination = PyByteArray_FromStringAndSize (destination, (Py_ssize_t) compressed_size); + } + else + { + py_destination = PyBytes_FromStringAndSize (destination, (Py_ssize_t) compressed_size); + } + + PyMem_Free (destination); + + if (py_destination == NULL) + { + return PyErr_NoMemory (); + } + + return py_destination; +} + +/****************** + * compress_begin * + ******************/ +static PyObject * +compress_begin (PyObject * Py_UNUSED (self), PyObject * args, + PyObject * keywds) +{ + PyObject *py_context = NULL; + Py_ssize_t source_size = (Py_ssize_t) 0; + int return_bytearray = 0; + int content_checksum = 0; + int block_checksum = 0; + int block_linked = 1; + LZ4F_preferences_t preferences; + PyObject *py_destination; + char * destination; + /* The destination buffer needs to be large enough for a header, which is 15 + * bytes. Unfortunately, the lz4 library doesn't provide a #define for this. + * We over-allocate to allow for larger headers in the future. */ + const size_t header_size = 32; + struct compression_context *context; + size_t result; + static char *kwlist[] = { "context", + "source_size", + "compression_level", + "block_size", + "content_checksum", + "block_checksum", + "block_linked", + "auto_flush", + "return_bytearray", + NULL + }; + + memset (&preferences, 0, sizeof preferences); + + if (!PyArg_ParseTupleAndKeywords (args, keywds, "O|kiippppp", kwlist, + &py_context, + &source_size, + &preferences.compressionLevel, + &preferences.frameInfo.blockSizeID, + &content_checksum, + &block_checksum, + &block_linked, + &preferences.autoFlush, + &return_bytearray + )) + { + return NULL; + } + + if (content_checksum) + { + preferences.frameInfo.contentChecksumFlag = LZ4F_contentChecksumEnabled; + } + else + { + preferences.frameInfo.contentChecksumFlag = LZ4F_noContentChecksum; + } + + if (block_linked) + { + preferences.frameInfo.blockMode = LZ4F_blockLinked; + } + else + { + preferences.frameInfo.blockMode = LZ4F_blockIndependent; + } + + if (LZ4_versionNumber() >= 10800) + { + if (block_checksum) + { + preferences.frameInfo.blockChecksumFlag = LZ4F_blockChecksumEnabled; + } + else + { + preferences.frameInfo.blockChecksumFlag = LZ4F_noBlockChecksum; + } + } + else if (block_checksum) + { + PyErr_SetString (PyExc_RuntimeError, + "block_checksum specified but not supported by LZ4 library version"); + return NULL; + } + + if (block_linked) + { + preferences.frameInfo.blockMode = LZ4F_blockLinked; + } + else + { + preferences.frameInfo.blockMode = LZ4F_blockIndependent; + } + + + preferences.frameInfo.contentSize = source_size; + + context = + (struct compression_context *) PyCapsule_GetPointer (py_context, compression_context_capsule_name); + + if (!context || !context->context) + { + PyErr_SetString (PyExc_ValueError, "No valid compression context supplied"); + return NULL; + } + + context->preferences = preferences; + + destination = PyMem_Malloc (header_size * sizeof * destination); + if (destination == NULL) + { + return PyErr_NoMemory(); + } + + Py_BEGIN_ALLOW_THREADS + result = LZ4F_compressBegin (context->context, + destination, + header_size, + &context->preferences); + Py_END_ALLOW_THREADS + + if (LZ4F_isError (result)) + { + PyErr_Format (PyExc_RuntimeError, + "LZ4F_compressBegin failed with code: %s", + LZ4F_getErrorName (result)); + return NULL; + } + + if (return_bytearray) + { + py_destination = PyByteArray_FromStringAndSize (destination, (Py_ssize_t) result); + } + else + { + py_destination = PyBytes_FromStringAndSize (destination, (Py_ssize_t) result); + } + + PyMem_Free (destination); + + if (py_destination == NULL) + { + return PyErr_NoMemory (); + } + + return py_destination; +} + +/****************** + * compress_chunk * + ******************/ +static PyObject * +compress_chunk (PyObject * Py_UNUSED (self), PyObject * args, + PyObject * keywds) +{ + PyObject *py_context = NULL; + Py_buffer source; + Py_ssize_t source_size; + struct compression_context *context; + size_t compressed_bound; + PyObject *py_destination; + char *destination; + LZ4F_compressOptions_t compress_options; + size_t result; + int return_bytearray = 0; + static char *kwlist[] = { "context", + "data", + "return_bytearray", + NULL + }; + + memset (&compress_options, 0, sizeof compress_options); + + if (!PyArg_ParseTupleAndKeywords (args, keywds, "Oy*|p", kwlist, + &py_context, + &source, + &return_bytearray)) + { + return NULL; + } + + source_size = source.len; + + context = + (struct compression_context *) PyCapsule_GetPointer (py_context, compression_context_capsule_name); + if (!context || !context->context) + { + PyBuffer_Release(&source); + PyErr_Format (PyExc_ValueError, "No compression context supplied"); + return NULL; + } + + /* If autoFlush is enabled, then the destination buffer only needs to be as + big as LZ4F_compressFrameBound specifies for this source size. However, if + autoFlush is disabled, previous calls may have resulted in buffered data, + and so we need instead to use LZ4F_compressBound to find the size required + for the destination buffer. This means that with autoFlush disabled we may + frequently allocate more memory than needed. */ + Py_BEGIN_ALLOW_THREADS + if (context->preferences.autoFlush == 1) + { + compressed_bound = + LZ4F_compressFrameBound (source_size, &context->preferences); + } + else + { + compressed_bound = + LZ4F_compressBound (source_size, &context->preferences); + } + Py_END_ALLOW_THREADS + + if (compressed_bound > PY_SSIZE_T_MAX) + { + PyBuffer_Release(&source); + PyErr_Format (PyExc_ValueError, + "input data could require %zu bytes, which is larger than the maximum supported size of %zd bytes", + compressed_bound, PY_SSIZE_T_MAX); + return NULL; + } + + destination = PyMem_Malloc (compressed_bound * sizeof * destination); + if (destination == NULL) + { + PyBuffer_Release(&source); + return PyErr_NoMemory(); + } + + compress_options.stableSrc = 0; + + Py_BEGIN_ALLOW_THREADS + result = + LZ4F_compressUpdate (context->context, destination, + compressed_bound, source.buf, source_size, + &compress_options); + Py_END_ALLOW_THREADS + + PyBuffer_Release(&source); + + if (LZ4F_isError (result)) + { + PyMem_Free (destination); + PyErr_Format (PyExc_RuntimeError, + "LZ4F_compressUpdate failed with code: %s", + LZ4F_getErrorName (result)); + return NULL; + } + + if (return_bytearray) + { + py_destination = PyByteArray_FromStringAndSize (destination, (Py_ssize_t) result); + } + else + { + py_destination = PyBytes_FromStringAndSize (destination, (Py_ssize_t) result); + } + + PyMem_Free (destination); + + if (py_destination == NULL) + { + return PyErr_NoMemory (); + } + + return py_destination; +} + +/****************** + * compress_flush * + ******************/ +static PyObject * +compress_flush (PyObject * Py_UNUSED (self), PyObject * args, PyObject * keywds) +{ + PyObject *py_context = NULL; + LZ4F_compressOptions_t compress_options; + struct compression_context *context; + size_t destination_size; + int return_bytearray = 0; + int end_frame = 1; + PyObject *py_destination; + char * destination; + size_t result; + static char *kwlist[] = { "context", + "end_frame", + "return_bytearray", + NULL + }; + + memset (&compress_options, 0, sizeof compress_options); + + if (!PyArg_ParseTupleAndKeywords (args, keywds, "O|pp", kwlist, + &py_context, + &end_frame, + &return_bytearray)) + { + return NULL; + } + + if (!end_frame && LZ4_versionNumber() < 10800) + { + PyErr_SetString (PyExc_RuntimeError, + "Flush without ending a frame is not supported with this version of the LZ4 library"); + return NULL; + } + + context = + (struct compression_context *) PyCapsule_GetPointer (py_context, compression_context_capsule_name); + if (!context || !context->context) + { + PyErr_SetString (PyExc_ValueError, "No compression context supplied"); + return NULL; + } + + compress_options.stableSrc = 0; + + /* Calling LZ4F_compressBound with srcSize equal to 0 returns a size + sufficient to fit (i) any remaining buffered data (when autoFlush is + disabled) and the footer size, which is either 4 or 8 bytes depending on + whether checksums are enabled. See: https://github.com/lz4/lz4/issues/280 + and https://github.com/lz4/lz4/issues/290. Prior to 1.7.5, it was necessary + to call LZ4F_compressBound with srcSize equal to 1. Since we now require a + minimum version to 1.7.5 we'll call this with srcSize equal to 0. */ + Py_BEGIN_ALLOW_THREADS + destination_size = LZ4F_compressBound (0, &(context->preferences)); + Py_END_ALLOW_THREADS + + destination = PyMem_Malloc (destination_size * sizeof * destination); + if (destination == NULL) + { + return PyErr_NoMemory(); + } + + Py_BEGIN_ALLOW_THREADS + if (end_frame) + { + result = + LZ4F_compressEnd (context->context, destination, + destination_size, &compress_options); + } + else + { + result = + LZ4F_flush (context->context, destination, + destination_size, &compress_options); + } + Py_END_ALLOW_THREADS + + if (LZ4F_isError (result)) + { + PyMem_Free (destination); + PyErr_Format (PyExc_RuntimeError, + "LZ4F_compressEnd failed with code: %s", + LZ4F_getErrorName (result)); + return NULL; + } + + if (return_bytearray) + { + py_destination = PyByteArray_FromStringAndSize (destination, (Py_ssize_t) result); + } + else + { + py_destination = PyBytes_FromStringAndSize (destination, (Py_ssize_t) result); + } + + PyMem_Free (destination); + + if (py_destination == NULL) + { + return PyErr_NoMemory (); + } + + return py_destination; +} + +/****************** + * get_frame_info * + ******************/ +static PyObject * +get_frame_info (PyObject * Py_UNUSED (self), PyObject * args, + PyObject * keywds) +{ + Py_buffer py_source; + char *source; + size_t source_size; + LZ4F_decompressionContext_t context; + LZ4F_frameInfo_t frame_info; + size_t result; + unsigned int block_size; + unsigned int block_size_id; + int block_linked; + int content_checksum; + int block_checksum; + int skippable; + + static char *kwlist[] = { "data", + NULL + }; + + if (!PyArg_ParseTupleAndKeywords (args, keywds, "y*", kwlist, + &py_source)) + { + return NULL; + } + + Py_BEGIN_ALLOW_THREADS + + result = LZ4F_createDecompressionContext (&context, LZ4F_VERSION); + + if (LZ4F_isError (result)) + { + Py_BLOCK_THREADS + PyBuffer_Release (&py_source); + PyErr_Format (PyExc_RuntimeError, + "LZ4F_createDecompressionContext failed with code: %s", + LZ4F_getErrorName (result)); + return NULL; + } + + source = (char *) py_source.buf; + source_size = (size_t) py_source.len; + + result = + LZ4F_getFrameInfo (context, &frame_info, source, &source_size); + + if (LZ4F_isError (result)) + { + LZ4F_freeDecompressionContext (context); + Py_BLOCK_THREADS + PyBuffer_Release (&py_source); + PyErr_Format (PyExc_RuntimeError, + "LZ4F_getFrameInfo failed with code: %s", + LZ4F_getErrorName (result)); + return NULL; + } + + result = LZ4F_freeDecompressionContext (context); + + Py_END_ALLOW_THREADS + + PyBuffer_Release (&py_source); + + if (LZ4F_isError (result)) + { + PyErr_Format (PyExc_RuntimeError, + "LZ4F_freeDecompressionContext failed with code: %s", + LZ4F_getErrorName (result)); + return NULL; + } + +#define KB *(1<<10) +#define MB *(1<<20) + switch (frame_info.blockSizeID) + { + case LZ4F_default: + case LZ4F_max64KB: + block_size = 64 KB; + block_size_id = LZ4F_max64KB; + break; + case LZ4F_max256KB: + block_size = 256 KB; + block_size_id = LZ4F_max256KB; + break; + case LZ4F_max1MB: + block_size = 1 MB; + block_size_id = LZ4F_max1MB; + break; + case LZ4F_max4MB: + block_size = 4 MB; + block_size_id = LZ4F_max4MB; + break; + default: + PyErr_Format (PyExc_RuntimeError, + "Unrecognized blockSizeID in get_frame_info: %d", + frame_info.blockSizeID); + return NULL; + } +#undef KB +#undef MB + + if (frame_info.blockMode == LZ4F_blockLinked) + { + block_linked = 1; + } + else if (frame_info.blockMode == LZ4F_blockIndependent) + { + block_linked = 0; + } + else + { + PyErr_Format (PyExc_RuntimeError, + "Unrecognized blockMode in get_frame_info: %d", + frame_info.blockMode); + return NULL; + } + + if (frame_info.contentChecksumFlag == LZ4F_noContentChecksum) + { + content_checksum = 0; + } + else if (frame_info.contentChecksumFlag == LZ4F_contentChecksumEnabled) + { + content_checksum = 1; + } + else + { + PyErr_Format (PyExc_RuntimeError, + "Unrecognized contentChecksumFlag in get_frame_info: %d", + frame_info.contentChecksumFlag); + return NULL; + } + + if (LZ4_versionNumber() >= 10800) + { + if (frame_info.blockChecksumFlag == LZ4F_noBlockChecksum) + { + block_checksum = 0; + } + else if (frame_info.blockChecksumFlag == LZ4F_blockChecksumEnabled) + { + block_checksum = 1; + } + else + { + PyErr_Format (PyExc_RuntimeError, + "Unrecognized blockChecksumFlag in get_frame_info: %d", + frame_info.blockChecksumFlag); + return NULL; + } + } + else + { + /* Prior to LZ4 1.8.0 the blockChecksum functionality wasn't exposed in the + frame API, and blocks weren't checksummed, so we'll always return 0 + here. */ + block_checksum = 0; + } + + if (frame_info.frameType == LZ4F_frame) + { + skippable = 0; + } + else if (frame_info.frameType == LZ4F_skippableFrame) + { + skippable = 1; + } + else + { + PyErr_Format (PyExc_RuntimeError, + "Unrecognized frameType in get_frame_info: %d", + frame_info.frameType); + return NULL; + } + + return Py_BuildValue ("{s:I,s:I,s:O,s:O,s:O,s:O,s:K}", + "block_size", block_size, + "block_size_id", block_size_id, + "block_linked", block_linked ? Py_True : Py_False, + "content_checksum", content_checksum ? Py_True : Py_False, + "block_checksum", block_checksum ? Py_True : Py_False, + "skippable", skippable ? Py_True : Py_False, + "content_size", frame_info.contentSize); +} + +/******************************** + * create_decompression_context * + ********************************/ +static void +destroy_decompression_context (PyObject * py_context) +{ +#ifndef PyCapsule_Type + LZ4F_dctx * context = + PyCapsule_GetPointer (py_context, decompression_context_capsule_name); +#else + /* Compatibility with 2.6 via capsulethunk. */ + LZ4F_dctx * context = py_context; +#endif + Py_BEGIN_ALLOW_THREADS + LZ4F_freeDecompressionContext (context); + Py_END_ALLOW_THREADS +} + +static PyObject * +create_decompression_context (PyObject * Py_UNUSED (self)) +{ + LZ4F_dctx * context; + LZ4F_errorCode_t result; + + Py_BEGIN_ALLOW_THREADS + result = LZ4F_createDecompressionContext (&context, LZ4F_VERSION); + if (LZ4F_isError (result)) + { + Py_BLOCK_THREADS + LZ4F_freeDecompressionContext (context); + PyErr_Format (PyExc_RuntimeError, + "LZ4F_createDecompressionContext failed with code: %s", + LZ4F_getErrorName (result)); + return NULL; + } + Py_END_ALLOW_THREADS + + return PyCapsule_New (context, decompression_context_capsule_name, + destroy_decompression_context); +} + +/******************************* + * reset_decompression_context * + *******************************/ +static PyObject * +reset_decompression_context (PyObject * Py_UNUSED (self), PyObject * args, + PyObject * keywds) +{ + LZ4F_dctx * context; + PyObject * py_context = NULL; + static char *kwlist[] = { "context", + NULL + }; + + if (!PyArg_ParseTupleAndKeywords (args, keywds, "O", kwlist, + &py_context + )) + { + return NULL; + } + + context = (LZ4F_dctx *) + PyCapsule_GetPointer (py_context, decompression_context_capsule_name); + + if (!context) + { + PyErr_SetString (PyExc_ValueError, + "No valid decompression context supplied"); + return NULL; + } + + if (LZ4_versionNumber() >= 10800) /* LZ4 >= v1.8.0 has LZ4F_resetDecompressionContext */ + { + /* No error checking possible here - this is always successful. */ + Py_BEGIN_ALLOW_THREADS + LZ4F_resetDecompressionContext (context); + Py_END_ALLOW_THREADS + } + else + { + /* No resetDecompressionContext available, so we'll destroy the context + and create a new one. */ + int result; + + Py_BEGIN_ALLOW_THREADS + LZ4F_freeDecompressionContext (context); + + result = LZ4F_createDecompressionContext (&context, LZ4F_VERSION); + if (LZ4F_isError (result)) + { + LZ4F_freeDecompressionContext (context); + Py_BLOCK_THREADS + PyErr_Format (PyExc_RuntimeError, + "LZ4F_createDecompressionContext failed with code: %s", + LZ4F_getErrorName (result)); + return NULL; + } + Py_END_ALLOW_THREADS + + result = PyCapsule_SetPointer(py_context, context); + if (result) + { + LZ4F_freeDecompressionContext (context); + PyErr_SetString (PyExc_RuntimeError, + "PyCapsule_SetPointer failed with code: %s"); + return NULL; + } + } + + Py_RETURN_NONE; +} + +static inline PyObject * +__decompress(LZ4F_dctx * context, char * source, size_t source_size, + Py_ssize_t max_length, int full_frame, + int return_bytearray, int return_bytes_read) +{ + size_t source_remain; + size_t source_read; + char * source_cursor; + char * source_end; + char * destination; + size_t destination_write; + char * destination_cursor; + size_t destination_written; + size_t destination_size; + PyObject * py_destination; + size_t result = 0; + LZ4F_frameInfo_t frame_info; + LZ4F_decompressOptions_t options; + int end_of_frame = 0; + int resize_factor = 1; + + memset(&options, 0, sizeof options); + + Py_BEGIN_ALLOW_THREADS + + source_cursor = source; + source_end = source + source_size; + source_remain = source_size; + + if (full_frame) + { + source_read = source_size; + + result = + LZ4F_getFrameInfo (context, &frame_info, + source_cursor, &source_read); + + if (LZ4F_isError (result)) + { + Py_BLOCK_THREADS + PyErr_Format (PyExc_RuntimeError, + "LZ4F_getFrameInfo failed with code: %s", + LZ4F_getErrorName (result)); + return NULL; + } + + /* Advance the source_cursor pointer past the header - the call to + getFrameInfo above replaces the passed source_read value with the + number of bytes read. Also reduce source_remain accordingly. */ + source_cursor += source_read; + source_remain -= source_read; + + /* If the uncompressed content size is available, we'll use that to size + the destination buffer. Otherwise, guess at twice the remaining source + source as a starting point, and adjust if needed. */ + if (frame_info.contentSize > 0) + { + destination_size = frame_info.contentSize; + } + else + { + destination_size = 2 * source_remain; + } + } + else + { + if (max_length >= (Py_ssize_t) 0) + { + destination_size = (size_t) max_length; + } + else + { + /* Choose an initial destination size as twice the source size, and we'll + grow the allocation as needed. */ + destination_size = 2 * source_remain; + } + } + + Py_BLOCK_THREADS + + destination = PyMem_Malloc (destination_size * sizeof * destination); + if (destination == NULL) + { + return PyErr_NoMemory(); + } + + Py_UNBLOCK_THREADS + + /* Only set stableDst = 1 if we are sure no PyMem_Realloc will be called since + when stableDst = 1 the LZ4 library stores a pointer to the last compressed + data, which may be invalid after a PyMem_Realloc. */ + if (full_frame && max_length >= (Py_ssize_t) 0) + { + options.stableDst = 1; + } + else + { + options.stableDst = 0; + } + + source_read = source_remain; + + destination_write = destination_size; + destination_cursor = destination; + destination_written = 0; + + while (1) + { + /* Decompress from the source string and write to the destination + until there's no more source string to read, or until we've reached the + frame end. + + On calling LZ4F_decompress, source_read is set to the remaining length + of source available to read. On return, source_read is set to the + actual number of bytes read from source, which may be less than + available. NB: LZ4F_decompress does not explicitly fail on empty input. + + On calling LZ4F_decompress, destination_write is the number of bytes in + destination available for writing. On exit, destination_write is set to + the actual number of bytes written to destination. */ + result = LZ4F_decompress (context, + destination_cursor, + &destination_write, + source_cursor, + &source_read, + &options); + + if (LZ4F_isError (result)) + { + Py_BLOCK_THREADS + PyErr_Format (PyExc_RuntimeError, + "LZ4F_decompress failed with code: %s", + LZ4F_getErrorName (result)); + PyMem_Free (destination); + return NULL; + } + + destination_written += destination_write; + source_cursor += source_read; + source_read = source_end - source_cursor; + + if (result == 0) + { + /* We've reached the end of the frame. */ + end_of_frame = 1; + break; + } + else if (source_cursor == source_end) + { + /* We've reached end of input. */ + break; + } + else if (destination_written == destination_size) + { + /* Destination buffer is full. So, stop decompressing if + max_length is set. Otherwise expand the destination + buffer. */ + if (max_length >= (Py_ssize_t) 0) + { + break; + } + else + { + /* Expand the destination buffer. We've tried various strategies + here to estimate the compression ratio so far and adjust the + buffer size accordingly. However, that grows the buffer too + slowly. The best choices found were to either double the buffer + size each time, or to grow faster by multiplying the buffer + size by 2^N, where N is the number of resizes. We take the + latter approach, though the former approach may actually be + good enough in practice. */ + char * buff; + + resize_factor *= 2; + destination_size *= resize_factor; + + Py_BLOCK_THREADS + buff = PyMem_Realloc (destination, destination_size); + if (buff == NULL) + { + PyErr_SetString (PyExc_RuntimeError, + "Failed to resize buffer"); + return NULL; + } + else + { + destination = buff; + } + Py_UNBLOCK_THREADS + } + } + /* Data still remaining to be decompressed, so increment the destination + cursor location, and reset destination_write ready for the next + iteration. Important to re-initialize destination_cursor here (as + opposed to simply incrementing it) so we're pointing to the realloc'd + memory location. */ + destination_cursor = destination + destination_written; + destination_write = destination_size - destination_written; + } + + Py_END_ALLOW_THREADS + + if (result > 0 && full_frame) + { + PyErr_Format (PyExc_RuntimeError, + "Frame incomplete. LZ4F_decompress returned: %zu", result); + PyMem_Free (destination); + return NULL; + } + + if (LZ4F_isError (result)) + { + PyErr_Format (PyExc_RuntimeError, + "LZ4F_freeDecompressionContext failed with code: %s", + LZ4F_getErrorName (result)); + PyMem_Free (destination); + return NULL; + } + + if (return_bytearray) + { + py_destination = PyByteArray_FromStringAndSize (destination, (Py_ssize_t) destination_written); + } + else + { + py_destination = PyBytes_FromStringAndSize (destination, (Py_ssize_t) destination_written); + } + + PyMem_Free (destination); + + if (py_destination == NULL) + { + return PyErr_NoMemory (); + } + + if (full_frame) + { + if (return_bytes_read) + { + return Py_BuildValue ("Ni", + py_destination, + source_cursor - source); + } + else + { + return py_destination; + } + } + else + { + return Py_BuildValue ("NiO", + py_destination, + source_cursor - source, + end_of_frame ? Py_True : Py_False); + } +} + +/************** + * decompress * + **************/ +static PyObject * +decompress (PyObject * Py_UNUSED (self), PyObject * args, + PyObject * keywds) +{ + LZ4F_dctx * context; + LZ4F_errorCode_t result; + Py_buffer py_source; + char * source; + size_t source_size; + PyObject * ret; + int return_bytearray = 0; + int return_bytes_read = 0; + static char *kwlist[] = { "data", + "return_bytearray", + "return_bytes_read", + NULL + }; + + if (!PyArg_ParseTupleAndKeywords (args, keywds, "y*|pp", kwlist, + &py_source, + &return_bytearray, + &return_bytes_read + )) + { + return NULL; + } + + Py_BEGIN_ALLOW_THREADS + result = LZ4F_createDecompressionContext (&context, LZ4F_VERSION); + if (LZ4F_isError (result)) + { + LZ4F_freeDecompressionContext (context); + Py_BLOCK_THREADS + PyBuffer_Release(&py_source); + PyErr_Format (PyExc_RuntimeError, + "LZ4F_createDecompressionContext failed with code: %s", + LZ4F_getErrorName (result)); + return NULL; + } + Py_END_ALLOW_THREADS + + /* MSVC can't do pointer arithmetic on void * pointers, so cast to char * */ + source = (char *) py_source.buf; + source_size = py_source.len; + + ret = __decompress (context, + source, + source_size, + -1, + 1, + return_bytearray, + return_bytes_read); + + PyBuffer_Release(&py_source); + + Py_BEGIN_ALLOW_THREADS + LZ4F_freeDecompressionContext (context); + Py_END_ALLOW_THREADS + + return ret; +} + +/******************** + * decompress_chunk * + ********************/ +static PyObject * +decompress_chunk (PyObject * Py_UNUSED (self), PyObject * args, + PyObject * keywds) +{ + PyObject * py_context = NULL; + PyObject * ret; + LZ4F_dctx * context; + Py_buffer py_source; + char * source; + size_t source_size; + Py_ssize_t max_length = (Py_ssize_t) -1; + int return_bytearray = 0; + static char *kwlist[] = { "context", + "data", + "max_length", + "return_bytearray", + NULL + }; + + if (!PyArg_ParseTupleAndKeywords (args, keywds, "Oy*|np", kwlist, + &py_context, + &py_source, + &max_length, + &return_bytearray + )) + { + return NULL; + } + + context = (LZ4F_dctx *) + PyCapsule_GetPointer (py_context, decompression_context_capsule_name); + + if (!context) + { + PyBuffer_Release(&py_source); + PyErr_SetString (PyExc_ValueError, + "No valid decompression context supplied"); + return NULL; + } + + /* MSVC can't do pointer arithmetic on void * pointers, so cast to char * */ + source = (char *) py_source.buf; + source_size = py_source.len; + + ret = __decompress (context, + source, + source_size, + max_length, + 0, + return_bytearray, + 0); + + PyBuffer_Release(&py_source); + + return ret; +} + +PyDoc_STRVAR( + create_compression_context__doc, + "create_compression_context()\n" \ + "\n" \ + "Creates a compression context object.\n" \ + "\n" \ + "The compression object is required for compression operations.\n" \ + "\n" \ + "Returns:\n" \ + " cCtx: A compression context\n" + ); + +#define COMPRESS_KWARGS_DOCSTRING \ + " block_size (int): Sepcifies the maximum blocksize to use.\n" \ + " Options:\n\n" \ + " - `lz4.frame.BLOCKSIZE_DEFAULT`: the lz4 library default\n" \ + " - `lz4.frame.BLOCKSIZE_MAX64KB`: 64 kB\n" \ + " - `lz4.frame.BLOCKSIZE_MAX256KB`: 256 kB\n" \ + " - `lz4.frame.BLOCKSIZE_MAX1MB`: 1 MB\n" \ + " - `lz4.frame.BLOCKSIZE_MAX4MB`: 4 MB\n\n" \ + " If unspecified, will default to `lz4.frame.BLOCKSIZE_DEFAULT`\n" \ + " which is currently equal to `lz4.frame.BLOCKSIZE_MAX64KB`.\n" \ + " block_linked (bool): Specifies whether to use block-linked\n" \ + " compression. If ``True``, the compression ratio is improved,\n" \ + " particularly for small block sizes. Default is ``True``.\n" \ + " compression_level (int): Specifies the level of compression used.\n" \ + " Values between 0-16 are valid, with 0 (default) being the\n" \ + " lowest compression (0-2 are the same value), and 16 the highest.\n" \ + " Values below 0 will enable \"fast acceleration\", proportional\n" \ + " to the value. Values above 16 will be treated as 16.\n" \ + " The following module constants are provided as a convenience:\n\n" \ + " - `lz4.frame.COMPRESSIONLEVEL_MIN`: Minimum compression (0, the\n" \ + " default)\n" \ + " - `lz4.frame.COMPRESSIONLEVEL_MINHC`: Minimum high-compression\n" \ + " mode (3)\n" \ + " - `lz4.frame.COMPRESSIONLEVEL_MAX`: Maximum compression (16)\n\n" \ + " content_checksum (bool): Specifies whether to enable checksumming\n" \ + " of the uncompressed content. If True, a checksum is stored at the\n" \ + " end of the frame, and checked during decompression. Default is\n" \ + " ``False``.\n" \ + " block_checksum (bool): Specifies whether to enable checksumming of\n" \ + " the uncompressed content of each block. If `True` a checksum of\n" \ + " the uncompressed data in each block in the frame is stored at\n\n" \ + " the end of each block. If present, these checksums will be used\n\n" \ + " to validate the data during decompression. The default is\n" \ + " ``False`` meaning block checksums are not calculated and stored.\n" \ + " This functionality is only supported if the underlying LZ4\n" \ + " library has version >= 1.8.0. Attempting to set this value\n" \ + " to ``True`` with a version of LZ4 < 1.8.0 will cause a\n" \ + " ``RuntimeError`` to be raised.\n" \ + " return_bytearray (bool): If ``True`` a ``bytearray`` object will be\n" \ + " returned. If ``False``, a string of bytes is returned. The default\n" \ + " is ``False``.\n" \ + +PyDoc_STRVAR( + compress__doc, + "compress(data, compression_level=0, block_size=0, content_checksum=0,\n" \ + "block_linked=True, store_size=True, return_bytearray=False)\n" \ + "\n" \ + "Compresses ``data`` returning the compressed data as a complete frame.\n" \ + "\n" \ + "The returned data includes a header and endmark and so is suitable\n" \ + "for writing to a file.\n" \ + "\n" \ + "Args:\n" \ + " data (str, bytes or buffer-compatible object): data to compress\n" \ + "\n" \ + "Keyword Args:\n" \ + COMPRESS_KWARGS_DOCSTRING \ + " store_size (bool): If ``True`` then the frame will include an 8-byte\n" \ + " header field that is the uncompressed size of data included\n" \ + " within the frame. Default is ``True``.\n" \ + "\n" \ + "Returns:\n" \ + " bytes or bytearray: Compressed data\n" + ); +PyDoc_STRVAR +( + compress_begin__doc, + "compress_begin(context, source_size=0, compression_level=0, block_size=0,\n" \ + "content_checksum=0, content_size=1, block_linked=0, frame_type=0,\n" \ + "auto_flush=1)\n" \ + "\n" \ + "Creates a frame header from a compression context.\n\n" \ + "Args:\n" \ + " context (cCtx): A compression context.\n\n" \ + "Keyword Args:\n" \ + COMPRESS_KWARGS_DOCSTRING \ + " auto_flush (bool): Enable or disable autoFlush. When autoFlush is disabled\n" \ + " the LZ4 library may buffer data internally until a block is full.\n" \ + " Default is ``False`` (autoFlush disabled).\n\n" \ + " source_size (int): This optionally specifies the uncompressed size\n" \ + " of the data to be compressed. If specified, the size will be stored\n" \ + " in the frame header for use during decompression. Default is ``True``\n" \ + " return_bytearray (bool): If ``True`` a bytearray object will be returned.\n" \ + " If ``False``, a string of bytes is returned. Default is ``False``.\n\n" \ + "Returns:\n" \ + " bytes or bytearray: Frame header.\n" + ); + +#undef COMPRESS_KWARGS_DOCSTRING + +PyDoc_STRVAR +( + compress_chunk__doc, + "compress_chunk(context, data)\n" \ + "\n" \ + "Compresses blocks of data and returns the compressed data.\n" \ + "\n" \ + "The returned data should be concatenated with the data returned from\n" \ + "`lz4.frame.compress_begin` and any subsequent calls to\n" \ + "`lz4.frame.compress_chunk`.\n" \ + "\n" \ + "Args:\n" \ + " context (cCtx): compression context\n" \ + " data (str, bytes or buffer-compatible object): data to compress\n" \ + "\n" \ + "Keyword Args:\n" \ + " return_bytearray (bool): If ``True`` a bytearray object will be\n" \ + " returned. If ``False``, a string of bytes is returned. The\n" \ + " default is False.\n" \ + "\n" \ + "Returns:\n" \ + " bytes or bytearray: Compressed data.\n\n" \ + "Notes:\n" \ + " If auto flush is disabled (``auto_flush=False`` when calling\n" \ + " `lz4.frame.compress_begin`) this function may buffer and retain\n" \ + " some or all of the compressed data for future calls to\n" \ + " `lz4.frame.compress`.\n" + ); + +PyDoc_STRVAR +( + compress_flush__doc, + "compress_flush(context, end_frame=True, return_bytearray=False)\n" \ + "\n" \ + "Flushes any buffered data held in the compression context.\n" \ + "\n" \ + "This flushes any data buffed in the compression context, returning it as\n" \ + "compressed data. The returned data should be appended to the output of\n" \ + "previous calls to ``lz4.frame.compress_chunk``.\n" \ + "\n" \ + "The ``end_frame`` argument specifies whether or not the frame should be\n" \ + "ended. If this is ``True`` and end of frame marker will be appended to\n" \ + "the returned data. In this case, if ``content_checksum`` was ``True``\n" \ + "when calling `lz4.frame.compress_begin`, then a checksum of the uncompressed\n" \ + "data will also be included in the returned data.\n" \ + "\n" \ + "If the ``end_frame`` argument is ``True``, the compression context will be\n" \ + "reset and can be re-used.\n" \ + "\n" \ + "Args:\n" \ + " context (cCtx): Compression context\n" \ + "\n" \ + "Keyword Args:\n" \ + " end_frame (bool): If ``True`` the frame will be ended. Default is\n" \ + " ``True``.\n" \ + " return_bytearray (bool): If ``True`` a ``bytearray`` object will\n" \ + " be returned. If ``False``, a ``bytes`` object is returned.\n" \ + " The default is ``False``.\n" \ + "\n" \ + "Returns:\n" \ + " bytes or bytearray: compressed data.\n" \ + "\n" \ + "Notes:\n" \ + " If ``end_frame`` is ``False`` but the underlying LZ4 library does not" \ + " support flushing without ending the frame, a ``RuntimeError`` will be\n" \ + " raised.\n" + ); + +PyDoc_STRVAR +( + get_frame_info__doc, + "get_frame_info(frame)\n\n" \ + "Given a frame of compressed data, returns information about the frame.\n" \ + "\n" \ + "Args:\n" \ + " frame (str, bytes or buffer-compatible object): LZ4 compressed frame\n" \ + "\n" \ + "Returns:\n" \ + " dict: Dictionary with keys:\n" \ + "\n" \ + " - ``block_size`` (int): the maximum size (in bytes) of each block\n" \ + " - ``block_size_id`` (int): identifier for maximum block size\n" \ + " - ``content_checksum`` (bool): specifies whether the frame\n" \ + " contains a checksum of the uncompressed content\n" \ + " - ``content_size`` (int): uncompressed size in bytes of\n" \ + " frame content\n" \ + " - ``block_linked`` (bool): specifies whether the frame contains\n" \ + " blocks which are independently compressed (``False``) or linked\n" \ + " linked (``True``)\n" \ + " - ``block_checksum`` (bool): specifies whether each block contains a\n" \ + " checksum of its contents\n" \ + " - ``skippable`` (bool): whether the block is skippable (``True``) or\n" \ + " not (``False``)\n" + ); + +PyDoc_STRVAR +( + create_decompression_context__doc, + "create_decompression_context()\n" \ + "\n" \ + "Creates a decompression context object.\n" \ + "\n" \ + "A decompression context is needed for decompression operations.\n" \ + "\n" \ + "Returns:\n" \ + " dCtx: A decompression context\n" + ); + +PyDoc_STRVAR +( + reset_decompression_context__doc, + "reset_decompression_context(context)\n" \ + "\n" \ + "Resets a decompression context object.\n" \ + "\n" \ + "This is useful for recovering from an error or for stopping an unfinished\n" \ + "decompression and starting a new one with the same context\n" \ + "\n" \ + "Args:\n" \ + " context (dCtx): A decompression context\n" + ); + +PyDoc_STRVAR +( + decompress__doc, + "decompress(data, return_bytearray=False, return_bytes_read=False)\n" \ + "\n" \ + "Decompresses a frame of data and returns it as a string of bytes.\n" \ + "\n" \ + "Args:\n" \ + " data (str, bytes or buffer-compatible object): data to decompress.\n" \ + " This should contain a complete LZ4 frame of compressed data.\n" \ + "\n" \ + "Keyword Args:\n" \ + " return_bytearray (bool): If ``True`` a bytearray object will be\n" \ + " returned. If ``False``, a string of bytes is returned. The\n" \ + " default is ``False``.\n" \ + " return_bytes_read (bool): If ``True`` then the number of bytes read\n" \ + " from ``data`` will also be returned. Default is ``False``\n" \ + "\n" \ + "Returns:\n" \ + " bytes/bytearray or tuple: Uncompressed data and optionally the number" \ + " of bytes read\n" \ + "\n" \ + " If the ``return_bytes_read`` argument is ``True`` this function\n" \ + " returns a tuple consisting of:\n" \ + "\n" \ + " - bytes or bytearray: Uncompressed data\n" \ + " - int: Number of bytes consumed from ``data``\n" + ); + +PyDoc_STRVAR +( + decompress_chunk__doc, + "decompress_chunk(context, data, max_length=-1)\n" \ + "\n" \ + "Decompresses part of a frame of compressed data.\n" \ + "\n" \ + "The returned uncompressed data should be concatenated with the data\n" \ + "returned from previous calls to `lz4.frame.decompress_chunk`\n" \ + "\n" \ + "Args:\n" \ + " context (dCtx): decompression context\n" \ + " data (str, bytes or buffer-compatible object): part of a LZ4\n" \ + " frame of compressed data\n" \ + "\n" \ + "Keyword Args:\n" \ + " max_length (int): if non-negative this specifies the maximum number\n" \ + " of bytes of uncompressed data to return. Default is ``-1``.\n" \ + " return_bytearray (bool): If ``True`` a bytearray object will be\n" \ + " returned.If ``False``, a string of bytes is returned. The\n" \ + " default is ``False``.\n" \ + "\n" \ + "Returns:\n" \ + " tuple: uncompressed data, bytes read, end of frame indicator\n" \ + "\n" \ + " This function returns a tuple consisting of:\n" \ + "\n" \ + " - The uncompressed data as a ``bytes`` or ``bytearray`` object\n" \ + " - The number of bytes consumed from input ``data`` as an ``int``\n" \ + " - The end of frame indicator as a ``bool``.\n" \ + "\n" + "The end of frame indicator is ``True`` if the end of the compressed\n" \ + "frame has been reached, or ``False`` otherwise\n" + ); + +static PyMethodDef module_methods[] = +{ + { + "create_compression_context", (PyCFunction) create_compression_context, + METH_NOARGS, create_compression_context__doc + }, + { + "compress", (PyCFunction) compress, + METH_VARARGS | METH_KEYWORDS, compress__doc + }, + { + "compress_begin", (PyCFunction) compress_begin, + METH_VARARGS | METH_KEYWORDS, compress_begin__doc + }, + { + "compress_chunk", (PyCFunction) compress_chunk, + METH_VARARGS | METH_KEYWORDS, compress_chunk__doc + }, + { + "compress_flush", (PyCFunction) compress_flush, + METH_VARARGS | METH_KEYWORDS, compress_flush__doc + }, + { + "get_frame_info", (PyCFunction) get_frame_info, + METH_VARARGS | METH_KEYWORDS, get_frame_info__doc + }, + { + "create_decompression_context", (PyCFunction) create_decompression_context, + METH_NOARGS, create_decompression_context__doc + }, + { + "reset_decompression_context", (PyCFunction) reset_decompression_context, + METH_VARARGS | METH_KEYWORDS, reset_decompression_context__doc + }, + { + "decompress", (PyCFunction) decompress, + METH_VARARGS | METH_KEYWORDS, decompress__doc + }, + { + "decompress_chunk", (PyCFunction) decompress_chunk, + METH_VARARGS | METH_KEYWORDS, decompress_chunk__doc + }, + {NULL, NULL, 0, NULL} /* Sentinel */ +}; + +PyDoc_STRVAR(lz4frame__doc, + "A Python wrapper for the LZ4 frame protocol" + ); + +static struct PyModuleDef moduledef = +{ + PyModuleDef_HEAD_INIT, + "_frame", + lz4frame__doc, + -1, + module_methods +}; + +PyMODINIT_FUNC +PyInit__frame(void) +{ + PyObject *module = PyModule_Create (&moduledef); + + if (module == NULL) + return NULL; + + PyModule_AddIntConstant (module, "BLOCKSIZE_DEFAULT", LZ4F_default); + PyModule_AddIntConstant (module, "BLOCKSIZE_MAX64KB", LZ4F_max64KB); + PyModule_AddIntConstant (module, "BLOCKSIZE_MAX256KB", LZ4F_max256KB); + PyModule_AddIntConstant (module, "BLOCKSIZE_MAX1MB", LZ4F_max1MB); + PyModule_AddIntConstant (module, "BLOCKSIZE_MAX4MB", LZ4F_max4MB); + + return module; +} diff --git a/contrib/python/lz4/py3/lz4/stream/__init__.py b/contrib/python/lz4/py3/lz4/stream/__init__.py new file mode 100644 index 0000000000..4b0074648d --- /dev/null +++ b/contrib/python/lz4/py3/lz4/stream/__init__.py @@ -0,0 +1,204 @@ +from ._stream import _create_context, _compress, _decompress, _get_block +from ._stream import LZ4StreamError, _compress_bound, _input_bound, LZ4_MAX_INPUT_SIZE # noqa: F401 + + +__doc__ = """\ +A Python wrapper for the LZ4 stream protocol. + +""" + + +class LZ4StreamDecompressor: + """ LZ4 stream decompression context. + + """ + def __init__(self, strategy, buffer_size, return_bytearray=False, store_comp_size=4, dictionary=""): + """ Instantiates and initializes a LZ4 stream decompression context. + + Args: + strategy (str): Buffer management strategy. Can be: ``double_buffer``. + buffer_size (int): Size of one buffer of the double-buffer used + internally for stream decompression in the case of ``double_buffer`` + strategy. + + Keyword Args: + return_bytearray (bool): If ``False`` (the default) then the function + will return a ``bytes`` object. If ``True``, then the function will + return a ``bytearray`` object. + store_comp_size (int): Specify the size in bytes of the following + compressed block. Can be: ``0`` (meaning out-of-band block size), + ``1``, ``2`` or ``4`` (default: ``4``). + dictionary (str, bytes or buffer-compatible object): If specified, + perform decompression using this initial dictionary. + + Raises: + Exceptions occuring during the context initialization. + + OverflowError: raised if the ``dictionary`` parameter is too large + for the LZ4 context. + ValueError: raised if some parameters are invalid. + MemoryError: raised if some internal resources cannot be allocated. + RuntimeError: raised if some internal resources cannot be initialized. + + """ + return_bytearray = 1 if return_bytearray else 0 + + self._context = _create_context(strategy, "decompress", buffer_size, + return_bytearray=return_bytearray, + store_comp_size=store_comp_size, + dictionary=dictionary) + + def __enter__(self): + """ Enter the LZ4 stream context. + + """ + return self + + def __exit__(self, exc_type, exc, exc_tb): + """ Exit the LZ4 stream context. + + """ + pass + + def decompress(self, chunk): + """ Decompress streamed compressed data. + + Decompress the given ``chunk``, using the given LZ4 stream context, + Raises an exception if any error occurs. + + Args: + chunk (str, bytes or buffer-compatible object): Data to decompress + + Returns: + bytes or bytearray: Decompressed data. + + Raises: + Exceptions occuring during decompression. + + ValueError: raised if the source is inconsistent with a finite LZ4 + stream block chain. + MemoryError: raised if the work output buffer cannot be allocated. + OverflowError: raised if the source is too large for being decompressed + in the given context. + LZ4StreamError: raised if the call to the LZ4 library fails. This can be + caused by ``decompressed_size`` being too small, or invalid data. + + """ + return _decompress(self._context, chunk) + + def get_block(self, stream): + """ Return the first LZ4 compressed block from ``stream``. + + Args: + stream (str, bytes or buffer-compatible object): LZ4 compressed stream. + + Returns: + bytes or bytearray: LZ4 compressed data block. + + Raises: + Exceptions occuring while getting the first block from ``stream``. + + BufferError: raised if the function cannot return a complete LZ4 + compressed block from the stream (i.e. the stream does not hold + a complete block). + MemoryError: raised if the output buffer cannot be allocated. + OverflowError: raised if the source is too large for being handled by + the given context. + LZ4StreamError: raised if used while in an out-of-band block size record + configuration. + + """ + return _get_block(self._context, stream) + + +class LZ4StreamCompressor: + """ LZ4 stream compressing context. + + """ + def __init__(self, strategy, buffer_size, mode="default", acceleration=True, compression_level=9, + return_bytearray=False, store_comp_size=4, dictionary=""): + """ Instantiates and initializes a LZ4 stream compression context. + + Args: + strategy (str): Buffer management strategy. Can be: ``double_buffer``. + buffer_size (int): Base size of the buffer(s) used internally for stream + compression/decompression. In the ``double_buffer`` strategy case, + this is the size of each buffer of the double-buffer. + + Keyword Args: + mode (str): If ``default`` or unspecified use the default LZ4 + compression mode. Set to ``fast`` to use the fast compression + LZ4 mode at the expense of compression. Set to + ``high_compression`` to use the LZ4 high-compression mode at + the expense of speed. + acceleration (int): When mode is set to ``fast`` this argument + specifies the acceleration. The larger the acceleration, the + faster the but the lower the compression. The default + compression corresponds to a value of ``1``. + compression_level (int): When mode is set to ``high_compression`` this + argument specifies the compression. Valid values are between + ``1`` and ``12``. Values between ``4-9`` are recommended, and + ``9`` is the default. Only relevant if ``mode`` is + ``high_compression``. + return_bytearray (bool): If ``False`` (the default) then the function + will return a bytes object. If ``True``, then the function will + return a bytearray object. + store_comp_size (int): Specify the size in bytes of the following + compressed block. Can be: ``0`` (meaning out-of-band block size), + ``1``, ``2`` or ``4`` (default: ``4``). + dictionary (str, bytes or buffer-compatible object): If specified, + perform compression using this initial dictionary. + + Raises: + Exceptions occuring during the context initialization. + + OverflowError: raised if the ``dictionary`` parameter is too large + for the LZ4 context. + ValueError: raised if some parameters are invalid. + MemoryError: raised if some internal resources cannot be allocated. + RuntimeError: raised if some internal resources cannot be initialized. + + """ + return_bytearray = 1 if return_bytearray else 0 + + self._context = _create_context(strategy, "compress", buffer_size, + mode=mode, + acceleration=acceleration, + compression_level=compression_level, + return_bytearray=return_bytearray, + store_comp_size=store_comp_size, + dictionary=dictionary) + + def __enter__(self): + """ Enter the LZ4 stream context. + + """ + return self + + def __exit__(self, exc_type, exc, exc_tb): + """ Exit the LZ4 stream context. + + """ + pass + + def compress(self, chunk): + """ Stream compress given ``chunk`` of data. + + Compress the given ``chunk``, using the given LZ4 stream context, + returning the compressed data as a ``bytearray`` or as a ``bytes`` object. + + Args: + chunk (str, bytes or buffer-compatible object): Data to compress + + Returns: + bytes or bytearray: Compressed data. + + Raises: + Exceptions occuring during compression. + + OverflowError: raised if the source is too large for being compressed in + the given context. + LZ4StreamError: raised if the call to the LZ4 library fails. + + """ + return _compress(self._context, chunk) diff --git a/contrib/python/lz4/py3/lz4/stream/_stream.c b/contrib/python/lz4/py3/lz4/stream/_stream.c new file mode 100644 index 0000000000..522fdedacd --- /dev/null +++ b/contrib/python/lz4/py3/lz4/stream/_stream.c @@ -0,0 +1,1653 @@ +/* + * Copyright (c) 2019, Samuel Martin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#if defined(_WIN32) && defined(_MSC_VER) +#define inline __inline +#elif defined(__SUNPRO_C) || defined(__hpux) || defined(_AIX) +#define inline +#endif + +#include <Python.h> + +#include <stdlib.h> +#include <math.h> +#include <lz4.h> +#include <lz4hc.h> +#include <stddef.h> +#include <stdio.h> + +#if defined(_WIN32) && defined(_MSC_VER) && _MSC_VER < 1600 +/* MSVC 2008 and earlier lacks stdint.h */ +typedef signed __int8 int8_t; +typedef signed __int16 int16_t; +typedef signed __int32 int32_t; +typedef signed __int64 int64_t; +typedef unsigned __int8 uint8_t; +typedef unsigned __int16 uint16_t; +typedef unsigned __int32 uint32_t; +typedef unsigned __int64 uint64_t; + +#if !defined(UINT8_MAX) +#define UINT8_MAX 0xff +#endif +#if !defined(UINT16_MAX) +#define UINT16_MAX 0xffff +#endif +#if !defined(UINT32_MAX) +#define UINT32_MAX 0xffffffff +#endif +#if !defined(INT32_MAX) +#define INT32_MAX 0x7fffffff +#endif +#if !defined(CHAR_BIT) +#define CHAR_BIT 8 +#endif + +#else +/* Not MSVC, or MSVC 2010 or higher */ +#include <stdint.h> +#endif /* _WIN32 && _MSC_VER && _MSC_VER < 1600 */ + +#define LZ4_VERSION_NUMBER_1_9_0 10900 + +static const char * stream_context_capsule_name = "_stream.LZ4S_ctx"; + +typedef enum { + DOUBLE_BUFFER, + RING_BUFFER, + + BUFFER_STRATEGY_COUNT /* must be the last entry */ +} buffer_strategy_e; + +typedef enum { + COMPRESS, + DECOMPRESS, +} direction_e; + +typedef enum { + DEFAULT, + FAST, + HIGH_COMPRESSION, +} compression_type_e; + + +/* Forward declarations */ +static PyObject * LZ4StreamError; + +#define DOUBLE_BUFFER_PAGE_COUNT (2) + +#define DOUBLE_BUFFER_INDEX_MIN (0) +#define DOUBLE_BUFFER_INDEX_INVALID (-1) + +#define _GET_MAX_UINT(byte_depth, type) (type)( ( 1ULL << ( CHAR_BIT * (byte_depth) ) ) - 1 ) +#define _GET_MAX_UINT32(byte_depth) _GET_MAX_UINT((byte_depth), uint32_t) + +typedef struct { + char * buf; + unsigned int len; +} buffer_t; + +/* forward declaration */ +typedef struct stream_context_t stream_context_t; + +typedef struct { + /** + * Release buffer strategy's resources. + * + * \param[inout] context Stream context. + */ + void (*release_resources) (stream_context_t *context); + + /** + * Reserve buffer strategy's resources. + * + * \param[inout] context Stream context. + * \param[in] buffer_size Base buffer size to allocate and initialize. + * + * \return 0 on success, non-0 otherwise + */ + int (*reserve_resources) (stream_context_t * context, unsigned int buffer_size); + + /** + * Return a pointer on the work buffer. + * + * \param[inout] context Stream context. + * + * \return A pointer on the work buffer. + */ + char * (*get_work_buffer) (const stream_context_t * context); + + /** + * Return the length of (available space in) the work buffer. + * + * \param[inout] context Stream context. + * + * \return The length of the work buffer. + */ + unsigned int (*get_work_buffer_size) (const stream_context_t * context); + + /** + * Return the length of the output buffer. + * + * \param[inout] context Stream context. + * + * \return The length the output buffer. + */ + unsigned int (*get_dest_buffer_size) (const stream_context_t * context); + + /** + * Update the stream context at the end of the LZ4 operation (a block compression or + * decompression). + * + * \param[inout] context Stream context. + * + * \return 0 on success, non-0 otherwise + */ + int (*update_context_after_process) (stream_context_t * context); +} strategy_ops_t; + +struct stream_context_t { + /* Buffer strategy resources */ + struct { + strategy_ops_t * ops; + union { + /* Double-buffer */ + struct { + char * buf; + unsigned int page_size; + char * pages[DOUBLE_BUFFER_PAGE_COUNT]; + int index; + } double_buffer; + + /* Ring-buffer (not implemented) */ + struct { + char * buf; + unsigned int size; + } ring_buffer; + } data; + } strategy; + + buffer_t output; + + /* LZ4 state */ + union { + union { + LZ4_stream_t * fast; + LZ4_streamHC_t * hc; + } compress; + LZ4_streamDecode_t * decompress; + void * context; + } lz4_state; + + /* LZ4 configuration */ + struct { + int acceleration; + int compression_level; + int store_comp_size; + int return_bytearray; + direction_e direction; + compression_type_e comp; + } config; +}; + + +#ifndef PyCapsule_Type +#define _PyCapsule_get_context(py_ctx) \ + ((stream_context_t *) PyCapsule_GetPointer((py_ctx), stream_context_capsule_name)) +#else +/* Compatibility with 2.6 via capsulethunk. */ +#define _PyCapsule_get_context(py_ctx) \ + ((stream_context_t *) (py_ctx)) +#endif + + +static inline void +store_le8 (char * c, uint8_t x) +{ + c[0] = x & 0xff; +} + + +/************************* + * block content helpers * + *************************/ +static inline uint8_t +load_le8 (const char * c) +{ + const uint8_t * d = (const uint8_t *) c; + return d[0]; +} + +static inline void +store_le16 (char * c, uint16_t x) +{ + c[0] = x & 0xff; + c[1] = (x >> 8) & 0xff; +} + +static inline uint16_t +load_le16 (const char * c) +{ + const uint8_t * d = (const uint8_t *) c; + return (d[0] | (d[1] << 8)); +} + +static inline void +store_le32 (char * c, uint32_t x) +{ + c[0] = x & 0xff; + c[1] = (x >> 8) & 0xff; + c[2] = (x >> 16) & 0xff; + c[3] = (x >> 24) & 0xff; +} + +static inline uint32_t +load_le32 (const char * c) +{ + const uint8_t * d = (const uint8_t *) c; + return (d[0] | (d[1] << 8) | (d[2] << 16) | (d[3] << 24)); +} + +static inline int +load_block_length (int block_length_size, const char *buf) +{ + int block_length = -1; + switch (block_length_size) + { + case 1: + block_length = load_le8 (buf); + break; + + case 2: + block_length = load_le16 (buf); + break; + + case 4: + block_length = load_le32 (buf); + break; + + case 0: + /* fallthrough */ + default: + break; + } + + return block_length; +} + +static inline int +store_block_length (int block_length, int block_length_size, char * buf) +{ + int status = 1; + + switch (block_length_size) + { + case 0: /* do nothing */ + break; + + case 1: + { + if (block_length > UINT8_MAX) + { + status = 0; + break; + } + store_le8 (buf, (uint8_t) (block_length & UINT8_MAX)); + } + break; + + case 2: + { + if (block_length > UINT16_MAX) + { + status = 0; + break; + } + store_le16 (buf, (uint16_t) (block_length & UINT16_MAX)); + } + break; + + case 4: + { + if (block_length > INT32_MAX) + { + status = 0; + break; + } + store_le32 (buf, (uint32_t) (block_length & UINT32_MAX)); + } + break; + + default: /* unsupported cases */ + status = 0; + break; + } + + if (status != 1) + { + PyErr_SetString (LZ4StreamError, "Compressed stream size too large"); + } + + return status; +} + + +/*************** + * LZ4 helpers * + ***************/ +static inline uint32_t +get_compress_bound(uint32_t input_size) +{ + /* result of LZ4_compressBound is null or positive */ + return (uint32_t)LZ4_compressBound(input_size); +} + + +static inline uint32_t +get_input_bound(uint32_t compress_max_size) +{ + uint64_t isize = 0; + uint64_t csize = (uint64_t) compress_max_size; + + /* Reversing the LZ4_COMPRESSBOUND macro gives: + * isize = ((csize - 16) * 255) / 256 + * = (((csize - 16) * 256) - (csize - 16)) / 256 + * = ((csize * 256) - (16 * 256) - (csize - 16)) / 256 + * = ((csize << 8) - (16 << 8) - csize + 16) >> 8 + * = ((csize << 8) - csize + 16 - (16 << 8)) >> 8 + * = ((csize << 8) - csize - 4080) >> 8 + * + * Notes: + * - Using 64-bit long integer for intermediate computation to avoid any + * truncation when shifting left large csize values. + * - Due to the round integer approximation, running the following + * calculation can give a non-null result: + * result = n - _LZ4_inputBound( _LZ4_compressBound( n ) ) + * but in all cases, this difference is between 0 and 1. + * Thus, the valid maximal input size returned by this funtcion is + * incremented by 1 to avoid any buffer overflow in case of decompression + * in a dynamically allocated buffer. + * - For small compressed length (shorter than 16 bytes), make sure a + * non-null size is returned. + */ + if (csize < 16) + { + csize = 17; + } + + if (csize <= get_compress_bound (LZ4_MAX_INPUT_SIZE)) + { + isize = ((csize << 8) - csize - 4080) >> 8; + + if (isize > (uint32_t)LZ4_MAX_INPUT_SIZE) + { + isize = 0; + } + else + { + isize += 1; + } + } + + return (uint32_t)(isize & UINT32_MAX); +} + + +/************************************** + * LZ4 version-compatibility wrappers * + **************************************/ +#if defined (__GNUC__) +/* Runtime detection of the support of new functions in the LZ4 API + * (old functions remain available but are deprecated and will trigger + * compilation warnings. + * + * Declare weak symbols on the required functions provided by recent versions + * of the library. + * This way, these symbols will always be available - NULL if the (old version + * of the) library does not define them. + */ + +/* Function introduced in LZ4 >= 1.9.0 */ +__attribute__ ((weak)) void +LZ4_resetStreamHC_fast (LZ4_streamHC_t* streamHCPtr, int compressionLevel); + +/* Function introduced in LZ4 >= 1.8.2 */ +__attribute__ ((weak)) void +LZ4_resetStream_fast (LZ4_stream_t* streamPtr); + +#else +/* Assuming the bundled LZ4 library sources are always used, so meet the + * LZ4 minimal version requirements. + */ +#endif + +static inline void reset_stream (LZ4_stream_t* streamPtr) +{ + if (LZ4_versionNumber () >= LZ4_VERSION_NUMBER_1_9_0) + { + if (LZ4_resetStream_fast) + { + LZ4_resetStream_fast (streamPtr); + } + else + { + PyErr_SetString (PyExc_RuntimeError, + "Inconsistent LZ4 library version/available APIs"); + } + } + else + { + LZ4_resetStream (streamPtr); + } +} + + +static inline void reset_stream_hc (LZ4_streamHC_t* streamHCPtr, int compressionLevel) +{ + if (LZ4_versionNumber () >= LZ4_VERSION_NUMBER_1_9_0) + { + if (LZ4_resetStreamHC_fast) + { + LZ4_resetStreamHC_fast (streamHCPtr, compressionLevel); + } + else + { + PyErr_SetString (PyExc_RuntimeError, + "Inconsistent LZ4 library version/available APIs"); + } + } + else + { + LZ4_resetStreamHC (streamHCPtr, compressionLevel); + } +} + + +/************************* + * Double-buffer helpers * + *************************/ +static int +double_buffer_update_index (stream_context_t * context) +{ +#if DOUBLE_BUFFER_PAGE_COUNT != 2 +#error "DOUBLE_BUFFER_PAGE_COUNT must be 2." +#endif /* DOUBLE_BUFFER_PAGE_COUNT != 2 */ + context->strategy.data.double_buffer.index = (context->strategy.data.double_buffer.index + 1) & 0x1; /* modulo 2 */ + + return 0; +} + +static char * +double_buffer_get_compression_page (const stream_context_t * context) +{ + return context->strategy.data.double_buffer.pages[context->strategy.data.double_buffer.index]; +} + +static void +double_buffer_release_resources (stream_context_t * context) +{ + unsigned int i; + for (i = DOUBLE_BUFFER_INDEX_MIN; + i < (DOUBLE_BUFFER_INDEX_MIN + DOUBLE_BUFFER_PAGE_COUNT); + ++i) + { + context->strategy.data.double_buffer.pages[i] = NULL; + } + + if (context->strategy.data.double_buffer.buf != NULL) + { + PyMem_Free (context->strategy.data.double_buffer.buf); + } + context->strategy.data.double_buffer.buf = NULL; + context->strategy.data.double_buffer.index = DOUBLE_BUFFER_INDEX_INVALID; + context->strategy.data.double_buffer.page_size = 0; +} + +static int +double_buffer_reserve_resources (stream_context_t * context, unsigned int buffer_size) +{ + int status = 0; + unsigned int i; + + context->strategy.data.double_buffer.page_size = buffer_size; + context->strategy.data.double_buffer.buf = PyMem_Malloc (buffer_size * DOUBLE_BUFFER_PAGE_COUNT); + + if (context->strategy.data.double_buffer.buf == NULL) + { + PyErr_Format (PyExc_MemoryError, + "Could not allocate double-buffer"); + status = -1; + goto exit_now; + } + + for (i = DOUBLE_BUFFER_INDEX_MIN; + i < (DOUBLE_BUFFER_INDEX_MIN + DOUBLE_BUFFER_PAGE_COUNT); + ++i) + { + context->strategy.data.double_buffer.pages[i] = context->strategy.data.double_buffer.buf + + (i * buffer_size); + } + + context->strategy.data.double_buffer.index = DOUBLE_BUFFER_INDEX_MIN; + +exit_now: + return status; +} + +static unsigned int +double_buffer_get_work_buffer_size (const stream_context_t * context) +{ + return context->strategy.data.double_buffer.page_size; +} + +static unsigned int +double_buffer_get_dest_buffer_size (const stream_context_t * context) +{ + unsigned int len; + + if (context->config.direction == COMPRESS) + { + len = context->output.len; + } + else + { + len = context->strategy.data.double_buffer.page_size; + } + + return len; +} + + +/**************************************** + * Ring-buffer helpers: Not implemented * + ****************************************/ +static void +ring_buffer_release_resources (stream_context_t * context) +{ + (void) context; /* unused */ + + /* Not implemented (yet) */ + PyErr_Format (PyExc_NotImplementedError, + "Buffer strategy not implemented: ring_buffer"); + + return; +} + +static int +ring_buffer_reserve_resources (stream_context_t * context, unsigned int buffer_size) +{ + (void) context; /* unused */ + (void) buffer_size; /* unused */ + + /* Not implemented (yet) */ + PyErr_Format (PyExc_NotImplementedError, + "Buffer strategy not implemented: ring_buffer"); + return -1; +} + +static unsigned int +ring_buffer_get_dest_buffer_size (const stream_context_t * context) +{ + (void) context; /* unused */ + + /* Not implemented (yet) */ + PyErr_Format (PyExc_NotImplementedError, + "Buffer strategy not implemented: ring_buffer"); + + return 0; +} + +static unsigned int +ring_buffer_get_work_buffer_size (const stream_context_t * context) +{ + (void) context; /* unused */ + + /* Not implemented (yet) */ + PyErr_Format (PyExc_NotImplementedError, + "Buffer strategy not implemented: ring_buffer"); + + return 0; +} + +static char * +ring_buffer_get_buffer_position (const stream_context_t * context) +{ + (void) context; /* unused */ + + /* Not implemented (yet) */ + PyErr_Format (PyExc_NotImplementedError, + "Buffer strategy not implemented: ring_buffer"); + + return NULL; +} + +static int +ring_buffer_update_context (stream_context_t * context) +{ + (void) context; /* unused */ + + /* Not implemented (yet) */ + PyErr_Format (PyExc_NotImplementedError, + "Buffer strategy not implemented: ring_buffer"); + + return -1; +} + + +/********************** + * strategy operators * + **********************/ +static strategy_ops_t strategy_ops[BUFFER_STRATEGY_COUNT] = { + /* [DOUBLE_BUFFER] = */ + { + /* .release_resources */ double_buffer_release_resources, + /* .reserve_resources */ double_buffer_reserve_resources, + /* .get_work_buffer */ double_buffer_get_compression_page, + /* .get_work_buffer_size */ double_buffer_get_work_buffer_size, + /* .get_dest_buffer_size */ double_buffer_get_dest_buffer_size, + /* .update_context_after_process */ double_buffer_update_index, + }, + /* [RING_BUFFER] = */ + { + /* .release_resources */ ring_buffer_release_resources, + /* .reserve_resources */ ring_buffer_reserve_resources, + /* .get_work_buffer */ ring_buffer_get_buffer_position, + /* .get_work_buffer_size */ ring_buffer_get_work_buffer_size, + /* .get_dest_buffer_size */ ring_buffer_get_dest_buffer_size, + /* .update_context_after_process */ ring_buffer_update_context, + }, +}; + + +/******************* + * generic helpers * + *******************/ +static void +destroy_context (stream_context_t * context) +{ + if (context == NULL) + { + return; + } + + /* Release lz4 state */ + Py_BEGIN_ALLOW_THREADS + if (context->lz4_state.context != NULL) + { + if (context->config.direction == COMPRESS) + { + if (context->config.comp == HIGH_COMPRESSION) + { + LZ4_freeStreamHC (context->lz4_state.compress.hc); + } + else + { + LZ4_freeStream (context->lz4_state.compress.fast); + } + } + else /* context->config.direction == DECOMPRESS */ + { + LZ4_freeStreamDecode (context->lz4_state.decompress); + } + } + Py_END_ALLOW_THREADS + context->lz4_state.context = NULL; + + /* Release strategy resources */ + if (context->strategy.ops != NULL) + { + context->strategy.ops->release_resources (context); + } + context->strategy.ops = NULL; + + /* Release output buffer */ + if (context->output.buf != NULL) + { + PyMem_Free (context->output.buf); + } + context->output.buf = NULL; + context->output.len = 0; + + /* Release python memory */ + PyMem_Free (context); +} + +static void +destroy_py_context (PyObject * py_context) +{ + if (py_context == NULL) + { + return; + } + + destroy_context (_PyCapsule_get_context (py_context)); +} + + +/************** + * Python API * + **************/ +static PyObject * +_create_context (PyObject * Py_UNUSED (self), PyObject * args, PyObject * kwds) +{ + stream_context_t * context = NULL; + + const char * direction = ""; + const char * strategy_name = ""; + unsigned int buffer_size; + + buffer_strategy_e strategy = BUFFER_STRATEGY_COUNT; + const char * mode = "default"; + int acceleration = 1; + int compression_level = 9; + int store_comp_size = 4; + int return_bytearray = 0; + Py_buffer dict = { NULL, NULL, }; + + int status = 0; + unsigned int total_size = 0; + uint32_t store_max_size; + + static char * argnames[] = { + "strategy", + "direction", + "buffer_size", + "mode", + "acceleration", + "compression_level", + "return_bytearray", + "store_comp_size", + "dictionary", + NULL + }; + + if (!PyArg_ParseTupleAndKeywords (args, kwds, "ssI|sIIpIz*", argnames, + &strategy_name, &direction, &buffer_size, + &mode, &acceleration, &compression_level, &return_bytearray, + &store_comp_size, &dict)) + { + goto abort_now; + } + + /* Sanity checks on arguments */ + if (dict.len > INT_MAX) + { + PyErr_Format (PyExc_OverflowError, + "Dictionary too large for LZ4 API"); + goto abort_now; + } + + /* Input max length limited to 0x7E000000 (2 113 929 216 bytes < 2GiB). + * https://github.com/lz4/lz4/blob/dev/lib/lz4.h#L161 + * + * So, restrict the block length bitwise to 32 (signed 32 bit integer). */ + if ((store_comp_size != 0) && (store_comp_size != 1) && + (store_comp_size != 2) && (store_comp_size != 4)) + { + PyErr_Format (PyExc_ValueError, + "Invalid store_comp_size, valid values: 0, 1, 2 or 4"); + goto abort_now; + } + + context = (stream_context_t *) PyMem_Malloc (sizeof (stream_context_t)); + if (context == NULL) + { + PyErr_NoMemory (); + goto abort_now; + } + + memset (context, 0x00, sizeof (stream_context_t)); + + /* Set buffer strategy */ + if (!strncmp (strategy_name, "double_buffer", sizeof ("double_buffer"))) + { + strategy = DOUBLE_BUFFER; + } + else if (!strncmp (strategy_name, "ring_buffer", sizeof ("ring_buffer"))) + { + strategy = RING_BUFFER; + } + else + { + PyErr_Format (PyExc_ValueError, + "Invalid strategy argument: %s. Must be one of: double_buffer, ring_buffer", + strategy_name); + goto abort_now; + } + + /* Set direction */ + if (!strncmp (direction, "compress", sizeof ("compress"))) + { + context->config.direction = COMPRESS; + } + else if (!strncmp (direction, "decompress", sizeof ("decompress"))) + { + context->config.direction = DECOMPRESS; + } + else + { + PyErr_Format (PyExc_ValueError, + "Invalid direction argument: %s. Must be one of: compress, decompress", + direction); + goto abort_now; + } + + /* Set compression mode */ + if (!strncmp (mode, "default", sizeof ("default"))) + { + context->config.comp = DEFAULT; + } + else if (!strncmp (mode, "fast", sizeof ("fast"))) + { + context->config.comp = FAST; + } + else if (!strncmp (mode, "high_compression", sizeof ("high_compression"))) + { + context->config.comp = HIGH_COMPRESSION; + } + else + { + PyErr_Format (PyExc_ValueError, + "Invalid mode argument: %s. Must be one of: default, fast, high_compression", + mode); + goto abort_now; + } + + /* Initialize the output buffer + * + * In out-of-band block size case, use a best-effort strategy for scaling + * buffers. + */ + if (store_comp_size == 0) + { + store_max_size = _GET_MAX_UINT32(4); + } + else + { + store_max_size = _GET_MAX_UINT32(store_comp_size); + } + + if (context->config.direction == COMPRESS) + { + context->output.len = get_compress_bound (buffer_size); + total_size = context->output.len + store_comp_size; + + if (context->output.len == 0) + { + PyErr_Format (PyExc_ValueError, + "Invalid buffer_size argument: %u. Cannot define output buffer size. " + "Must be lesser or equal to %u", + buffer_size, LZ4_MAX_INPUT_SIZE); + goto abort_now; + } + + /* Assert the output buffer size and the store_comp_size values are consistent */ + if (context->output.len > store_max_size) + { + /* The maximal/"worst case" compressed data length cannot fit in the + * store_comp_size bytes. */ + PyErr_Format (LZ4StreamError, + "Inconsistent buffer_size/store_comp_size values. " + "Maximal compressed length (%u) cannot fit in a %u byte-long integer", + buffer_size, store_comp_size); + goto abort_now; + } + } + else /* context->config.direction == DECOMPRESS */ + { + if (store_max_size > LZ4_MAX_INPUT_SIZE) + { + store_max_size = LZ4_MAX_INPUT_SIZE; + } + + context->output.len = buffer_size; + total_size = context->output.len; + + /* Here we cannot assert the maximal theorical decompressed chunk length + * will fit in one page of the double_buffer, i.e.: + * assert( !(double_buffer.page_size < _LZ4_inputBound(store_max_size)) ) + * + * Doing such a check would require aligning the page_size on the maximal + * value of the store_comp_size prefix, i.e.: + * page_size = 256B if store_comp_size == 1 + * page_size = 64KB if store_comp_size == 2 + * page_size = 4GB if store_comp_size == 4 + * + * This constraint is too strict, so is not implemented. + * + * On the other hand, the compression logic tells the page size cannot be + * larger than the maximal value fitting in store_comp_size bytes. + * So here, the check could be checking the page_size is smaller or equal + * to the maximal decompressed chunk length, i.e.: + * assert( !(double_buffer.page_size > _LZ4_inputBound(store_max_size)) ) + * + * But this check is not really relevant and could bring other limitations. + * + * So, on the decompression case, no check regarding the page_size and the + * store_comp_size values can reliably be done during the LZ4 context + * initialization, they will be deferred in the decompression process. + */ + } + + /* Set all remaining settings in the context */ + context->config.store_comp_size = store_comp_size; + context->config.acceleration = acceleration; + context->config.compression_level = compression_level; + context->config.return_bytearray = !!return_bytearray; + + /* Set internal resources related to the buffer strategy */ + context->strategy.ops = &strategy_ops[strategy]; + + status = context->strategy.ops->reserve_resources (context, buffer_size); + if (status != 0) + { + /* Python exception already set in the strategy's resource creation helper */ + goto abort_now; + } + + /* Set output buffer */ + context->output.buf = PyMem_Malloc (total_size * sizeof (* (context->output.buf))); + if (context->output.buf == NULL) + { + PyErr_Format (PyExc_MemoryError, + "Could not allocate output buffer"); + goto abort_now; + } + + /* Initialize lz4 state */ + if (context->config.direction == COMPRESS) + { + if (context->config.comp == HIGH_COMPRESSION) + { + context->lz4_state.compress.hc = LZ4_createStreamHC (); + if (context->lz4_state.compress.hc == NULL) + { + PyErr_Format (PyExc_MemoryError, + "Could not create LZ4 state"); + goto abort_now; + } + + reset_stream_hc (context->lz4_state.compress.hc, context->config.compression_level); + + if (dict.len > 0) + { + LZ4_loadDictHC (context->lz4_state.compress.hc, dict.buf, dict.len); + } + } + else + { + context->lz4_state.compress.fast = LZ4_createStream (); + if (context->lz4_state.compress.fast == NULL) + { + PyErr_Format (PyExc_MemoryError, + "Could not create LZ4 state"); + goto abort_now; + } + + reset_stream (context->lz4_state.compress.fast); + + if (dict.len > 0) + { + LZ4_loadDict (context->lz4_state.compress.fast, dict.buf, dict.len); + } + } + } + else /* context->config.direction == DECOMPRESS */ + { + context->lz4_state.decompress = LZ4_createStreamDecode (); + if (context->lz4_state.decompress == NULL) + { + PyErr_Format (PyExc_MemoryError, + "Could not create LZ4 state"); + goto abort_now; + } + + if (!LZ4_setStreamDecode (context->lz4_state.decompress, dict.buf, dict.len)) + { + PyErr_Format (PyExc_RuntimeError, + "Could not initialize LZ4 state"); + LZ4_freeStreamDecode (context->lz4_state.decompress); + goto abort_now; + } + } + + PyBuffer_Release (&dict); + + return PyCapsule_New (context, stream_context_capsule_name, destroy_py_context); + +abort_now: + if (dict.buf != NULL) + { + PyBuffer_Release (&dict); + } + destroy_context (context); + + return NULL; +} + +static PyObject * +_compress_bound (PyObject * Py_UNUSED (self), PyObject * args) +{ + PyObject * py_dest = NULL; + uint32_t input_size; + + + /* Positional arguments: input_size + * Keyword arguments : none + */ + if (!PyArg_ParseTuple (args, "OI", &input_size)) + { + goto exit_now; + } + + py_dest = PyLong_FromUnsignedLong (get_compress_bound (input_size)); + + if (py_dest == NULL) + { + PyErr_NoMemory (); + } + +exit_now: + return py_dest; +} + +static PyObject * +_input_bound (PyObject * Py_UNUSED (self), PyObject * args) +{ + PyObject * py_dest = NULL; + uint32_t compress_max_size; + + /* Positional arguments: compress_max_size + * Keyword arguments : none + */ + if (!PyArg_ParseTuple (args, "I", &compress_max_size)) + { + goto exit_now; + } + + py_dest = PyLong_FromUnsignedLong (get_input_bound (compress_max_size)); + + if (py_dest == NULL) + { + PyErr_NoMemory (); + } + +exit_now: + return py_dest; +} + +static inline int +_compress_generic (stream_context_t * lz4_ctxt, char * source, int source_size, + char * dest, int dest_size) +{ + int comp_len; + + if (lz4_ctxt->config.comp == HIGH_COMPRESSION) + { + LZ4_streamHC_t * lz4_state = lz4_ctxt->lz4_state.compress.hc; + + comp_len = LZ4_compress_HC_continue (lz4_state, source, dest, source_size, dest_size); + } + else + { + LZ4_stream_t * lz4_state = lz4_ctxt->lz4_state.compress.fast; + int acceleration = (lz4_ctxt->config.comp != FAST) + ? 1 /* defaults */ + : lz4_ctxt->config.acceleration; + + comp_len = LZ4_compress_fast_continue (lz4_state, source, dest, source_size, dest_size, + acceleration); + } + + return comp_len; +} + +#ifdef inline +#undef inline +#endif + +static PyObject * +_compress (PyObject * Py_UNUSED (self), PyObject * args) +{ + stream_context_t * context = NULL; + PyObject * py_context = NULL; + PyObject * py_dest = NULL; + int output_size; + Py_buffer source = { NULL, NULL, }; + + /* Positional arguments: capsule_context, source + * Keyword arguments : none + */ + if (!PyArg_ParseTuple (args, "Oy*", &py_context, &source)) + { + goto exit_now; + } + + context = _PyCapsule_get_context (py_context); + if ((context == NULL) || (context->lz4_state.context == NULL)) + { + PyErr_SetString (PyExc_ValueError, "No valid LZ4 stream context supplied"); + goto exit_now; + } + + if (source.len > context->strategy.ops->get_work_buffer_size (context)) + { + PyErr_SetString (PyExc_OverflowError, + "Input too large for LZ4 API"); + goto exit_now; + } + + memcpy (context->strategy.ops->get_work_buffer (context), source.buf, source.len); + + Py_BEGIN_ALLOW_THREADS + + output_size = _compress_generic (context, + context->strategy.ops->get_work_buffer (context), + source.len, + context->output.buf + context->config.store_comp_size, + context->output.len); + + Py_END_ALLOW_THREADS + + if (output_size <= 0) + { + /* No error code set in output_size! */ + PyErr_SetString (LZ4StreamError, + "Compression failed"); + goto exit_now; + } + + if (!store_block_length (output_size, context->config.store_comp_size, context->output.buf)) + { + PyErr_SetString (LZ4StreamError, + "Compressed stream size too large"); + goto exit_now; + } + + output_size += context->config.store_comp_size; + + if (context->config.return_bytearray) + { + py_dest = PyByteArray_FromStringAndSize (context->output.buf, (Py_ssize_t) output_size); + } + else + { + py_dest = PyBytes_FromStringAndSize (context->output.buf, (Py_ssize_t) output_size); + } + + if (py_dest == NULL) + { + PyErr_NoMemory (); + goto exit_now; + } + + if (context->strategy.ops->update_context_after_process (context) != 0) + { + PyErr_Format (PyExc_RuntimeError, "Internal error"); + goto exit_now; + } + +exit_now: + if (source.buf != NULL) + { + PyBuffer_Release (&source); + } + + return py_dest; +} + +static PyObject * +_get_block (PyObject * Py_UNUSED (self), PyObject * args) +{ + stream_context_t * context = NULL; + PyObject * py_context = NULL; + PyObject * py_dest = NULL; + Py_buffer source = { NULL, NULL, }; + buffer_t block = { NULL, 0, }; + + /* Positional arguments: capsule_context, source + * Keyword arguments : none + */ + + if (!PyArg_ParseTuple (args, "Oy*", &py_context, &source)) + { + goto exit_now; + } + + context = _PyCapsule_get_context (py_context); + if ((context == NULL) || (context->lz4_state.context == NULL)) + { + PyErr_SetString (PyExc_ValueError, "No valid LZ4 stream context supplied"); + goto exit_now; + } + + if (source.len > INT_MAX) + { + PyErr_Format (PyExc_OverflowError, + "Input too large for LZ4 API"); + goto exit_now; + } + + if (context->config.store_comp_size == 0) + { + PyErr_Format (LZ4StreamError, + "LZ4 context is configured for storing block size out-of-band"); + goto exit_now; + } + + if (source.len < context->config.store_comp_size) + { + PyErr_Format (LZ4StreamError, + "Invalid source, too small for holding any block"); + goto exit_now; + } + + block.buf = (char *) source.buf + context->config.store_comp_size; + block.len = load_block_length (context->config.store_comp_size, source.buf); + + if ((source.len - context->config.store_comp_size) < block.len) + { + PyErr_Format (LZ4StreamError, + "Requested input size (%d) larger than source size (%ld)", + block.len, (source.len - context->config.store_comp_size)); + goto exit_now; + } + + if (context->config.return_bytearray) + { + py_dest = PyByteArray_FromStringAndSize (block.buf, (Py_ssize_t) block.len); + } + else + { + py_dest = PyBytes_FromStringAndSize (block.buf, (Py_ssize_t) block.len); + } + + if (py_dest == NULL) + { + PyErr_NoMemory (); + } + +exit_now: + if (source.buf != NULL) + { + PyBuffer_Release (&source); + } + + return py_dest; +} + +static PyObject * +_decompress (PyObject * Py_UNUSED (self), PyObject * args) +{ + stream_context_t * context = NULL; + PyObject * py_context = NULL; + PyObject * py_dest = NULL; + int output_size = 0; + uint32_t source_size_max = 0; + Py_buffer source = { NULL, NULL, }; + + /* Positional arguments: capsule_context, source + * Keyword arguments : none + */ + if (!PyArg_ParseTuple (args, "Oy*", &py_context, &source)) + { + goto exit_now; + } + + context = _PyCapsule_get_context (py_context); + if ((context == NULL) || (context->lz4_state.context == NULL)) + { + PyErr_SetString (PyExc_ValueError, "No valid LZ4 stream context supplied"); + goto exit_now; + } + + /* In out-of-band block size case, use a best-effort strategy for scaling + * buffers. + */ + if (context->config.store_comp_size == 0) + { + source_size_max = _GET_MAX_UINT32(4); + } + else + { + source_size_max = _GET_MAX_UINT32(context->config.store_comp_size); + } + + if (source.len > source_size_max) + { + PyErr_Format (PyExc_OverflowError, + "Source length (%ld) too large for LZ4 store_comp_size (%d) value", + source.len, context->config.store_comp_size); + goto exit_now; + } + + if ((get_input_bound (source.len) == 0) || + (get_input_bound (source.len) > context->strategy.ops->get_dest_buffer_size (context))) + { + PyErr_Format (LZ4StreamError, + "Maximal decompressed data (%d) cannot fit in LZ4 internal buffer (%u)", + get_input_bound (source.len), + context->strategy.ops->get_dest_buffer_size (context)); + goto exit_now; + } + + Py_BEGIN_ALLOW_THREADS + + output_size = LZ4_decompress_safe_continue (context->lz4_state.decompress, + (const char *) source.buf, + context->strategy.ops->get_work_buffer (context), + source.len, + context->strategy.ops->get_dest_buffer_size (context)); + + Py_END_ALLOW_THREADS + + if (output_size < 0) + { + /* In case of LZ4 decompression error, output_size holds the error code */ + PyErr_Format (LZ4StreamError, + "Decompression failed. error: %d", + -output_size); + goto exit_now; + } + + if ((unsigned int) output_size > context->output.len) + { + output_size = -1; + PyErr_Format (PyExc_OverflowError, + "Decompressed stream too large for LZ4 API"); + goto exit_now; + } + + memcpy (context->output.buf, + context->strategy.ops->get_work_buffer (context), + output_size); + + if ( context->strategy.ops->update_context_after_process (context) != 0) + { + PyErr_Format (PyExc_RuntimeError, "Internal error"); + goto exit_now; + } + + if (context->config.return_bytearray) + { + py_dest = PyByteArray_FromStringAndSize (context->output.buf, (Py_ssize_t) output_size); + } + else + { + py_dest = PyBytes_FromStringAndSize (context->output.buf, (Py_ssize_t) output_size); + } + + if (py_dest == NULL) + { + PyErr_NoMemory (); + } + +exit_now: + if (source.buf != NULL) + { + PyBuffer_Release (&source); + } + + return py_dest; +} + + +PyDoc_STRVAR (_compress_bound__doc, + "_compress_bound(input_size)\n" \ + "\n" \ + "Provides the maximum size that LZ4 compression may output in a \"worst case\"\n" \ + "scenario (input data not compressible).\n" \ + "This function is primarily useful for memory allocation purposes (destination\n" \ + "buffer size).\n" \ + "\n" \ + "Args:\n" \ + " input_size (int): Input data size.\n" \ + "\n" \ + "Returns:\n" \ + " int: Maximal (worst case) size of the compressed data;\n" \ + " or 0 if the input size is greater than 2 113 929 216.\n"); + +PyDoc_STRVAR (_input_bound__doc, + "_input_bound(compress_max_size)\n" \ + "\n" \ + "Provides the maximum size that LZ4 decompression may output in a \"worst case\"\n" \ + "scenario (compressed data with null compression ratio).\n" \ + "This function is primarily useful for memory allocation purposes (destination\n" \ + "buffer size).\n" \ + "\n" \ + "Args:\n" \ + " compress_max_size (int): Compressed data size.\n" \ + "\n" \ + "Returns:\n" \ + " int: Maximal (worst case) size of the input data;\n" \ + " or 0 if the compressed maximal size is lower than the LZ4 compression\n" \ + " minimal overhead, or if the computed input size is greater than\n" \ + " 2 113 929 216.\n"); + +PyDoc_STRVAR (_compress__doc, + "_compress(context, source)\n" \ + "\n" \ + "Compress source, using the given LZ4 stream context, returning the compressed\n" \ + "data as a bytearray or as a bytes object.\n" \ + "Raises an exception if any error occurs.\n" \ + "\n" \ + "Args:\n" \ + " context (ctx): LZ4 stream context.\n" \ + " source (str, bytes or buffer-compatible object): Data to compress.\n" \ + "\n" \ + "Returns:\n" \ + " bytes or bytearray: Compressed data.\n" \ + "\n" \ + "Raises:\n" \ + " OverflowError: raised if the source is too large for being compressed in\n" \ + " the given context.\n" \ + " RuntimeError: raised if some internal resources cannot be updated.\n" \ + " LZ4StreamError: raised if the call to the LZ4 library fails.\n"); + +PyDoc_STRVAR (_get_block__doc, + "_get_block(context, source)\n" \ + "\n" \ + "Return the first LZ4 compressed block from ``'source'``. \n" \ + "\n" \ + "Args:\n" \ + " context (ctx): LZ4 stream context.\n" \ + " source (str, bytes or buffer-compatible object): LZ4 compressed stream.\n" \ + "\n" \ + "Returns:\n" \ + " bytes or bytearray: LZ4 compressed data block.\n" \ + "\n" \ + "Raises:\n" \ + " MemoryError: raised if the output buffer cannot be allocated.\n" \ + " OverflowError: raised if the source is too large for being handled by \n" \ + " the given context.\n"); + +PyDoc_STRVAR (_decompress__doc, + "_decompress(context, source)\n" \ + "\n" \ + "Decompress source, using the given LZ4 stream context, returning the\n" \ + "uncompressed data as a bytearray or as a bytes object.\n" \ + "Raises an exception if any error occurs.\n" \ + "\n" \ + "Args:\n" \ + " context (obj): LZ4 stream context.\n" \ + " source (str, bytes or buffer-compatible object): Data to uncompress.\n" \ + "\n" \ + "Returns:\n" \ + " bytes or bytearray: Uncompressed data.\n" \ + "\n" \ + "Raises:\n" \ + " ValueError: raised if the source is inconsistent with a finite LZ4\n" \ + " stream block chain.\n" \ + " MemoryError: raised if the work output buffer cannot be allocated.\n" \ + " OverflowError: raised if the source is too large for being decompressed\n" \ + " in the given context.\n" \ + " RuntimeError: raised if some internal resources cannot be updated.\n" \ + " LZ4StreamError: raised if the call to the LZ4 library fails.\n"); + +PyDoc_STRVAR (_create_context__doc, + "_create_context(strategy, direction, buffer_size,\n" \ + " mode='default', acceleration=1, compression_level=9,\n" \ + " return_bytearray=0, store_comp_size=4, dict=None)\n" \ + "\n" \ + "Instantiates and initializes a LZ4 stream context.\n" \ + "Raises an exception if any error occurs.\n" \ + "\n" \ + "Args:\n" \ + " strategy (str): Can be ``'double_buffer'``.\n" \ + " Only ``'double_buffer'`` is currently implemented.\n" \ + " direction (str): Can be ``'compress'`` or ``'decompress'``.\n" \ + " buffer_size (int): Base size of the buffer(s) used internally for stream\n" \ + " compression/decompression.\n" \ + " For the ``'double_buffer'`` strategy, this is the size of each buffer\n" \ + " of the double-buffer.\n" \ + "\n" \ + "Keyword Args:\n" \ + " mode (str): If ``'default'`` or unspecified use the default LZ4\n" \ + " compression mode. Set to ``'fast'`` to use the fast compression\n" \ + " LZ4 mode at the expense of compression. Set to\n" \ + " ``'high_compression'`` to use the LZ4 high-compression mode at\n" \ + " the expense of speed.\n" \ + " acceleration (int): When mode is set to ``'fast'`` this argument\n" \ + " specifies the acceleration. The larger the acceleration, the\n" \ + " faster the but the lower the compression. The default\n" \ + " compression corresponds to a value of ``1``.\n" \ + " Only relevant if ``'direction'`` is ``'compress'``.\n" \ + " compression_level (int): When mode is set to ``high_compression`` this\n" \ + " argument specifies the compression. Valid values are between\n" \ + " ``1`` and ``12``. Values between ``4-9`` are recommended, and\n" \ + " ``9`` is the default.\n" \ + " Only relevant if ``'direction'`` is ``'compress'`` and ``'mode'`` .\n" \ + " is ``'high_compression'``.\n" \ + " return_bytearray (bool): If ``False`` (the default) then the function\n" \ + " will return a bytes object. If ``True``, then the function will\n" \ + " return a bytearray object.\n" \ + " store_comp_size (int): Specify the size in bytes of the following\n" \ + " compressed block. Can be: ``1``, ``2`` or ``4`` (default: ``4``).\n" \ + " dict (str, bytes or buffer-compatible object): If specified, perform\n" \ + " compression using this initial dictionary.\n" \ + "\n" \ + "Returns:\n" \ + " lz4_ctx: A LZ4 stream context.\n" \ + "\n" \ + "Raises:\n" \ + " OverflowError: raised if the ``dict`` parameter is too large for the\n" \ + " LZ4 context.\n" \ + " ValueError: raised if some parameters are invalid.\n" \ + " MemoryError: raised if some internal resources cannot be allocated.\n" \ + " RuntimeError: raised if some internal resources cannot be initialized.\n"); + +PyDoc_STRVAR (lz4stream__doc, + "A Python wrapper for the LZ4 stream protocol" + ); + +static PyMethodDef module_methods[] = { + { + "_create_context", (PyCFunction) _create_context, + METH_VARARGS | METH_KEYWORDS, + _create_context__doc + }, + + { + "_compress", + (PyCFunction) _compress, + METH_VARARGS, + _compress__doc + }, + { + "_decompress", + (PyCFunction) _decompress, + METH_VARARGS, + _decompress__doc + }, + { + "_get_block", + (PyCFunction) _get_block, + METH_VARARGS, + _get_block__doc + }, + { + "_compress_bound", + (PyCFunction) _compress_bound, + METH_VARARGS, + _compress_bound__doc + }, + { + "_input_bound", + (PyCFunction) _input_bound, + METH_VARARGS, + _input_bound__doc + }, + { + /* Sentinel */ + NULL, + NULL, + 0, + NULL + } +}; + +static PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + /* m_name */ "_stream", + /* m_doc */ lz4stream__doc, + /* m_size */ -1, + /* m_methods */ module_methods, +}; + + +PyMODINIT_FUNC +PyInit__stream(void) +{ + PyObject * module = PyModule_Create (&moduledef); + + if (module == NULL) + { + return NULL; + } + + PyModule_AddIntConstant (module, "HC_LEVEL_MIN", LZ4HC_CLEVEL_MIN); + PyModule_AddIntConstant (module, "HC_LEVEL_DEFAULT", LZ4HC_CLEVEL_DEFAULT); + PyModule_AddIntConstant (module, "HC_LEVEL_OPT_MIN", LZ4HC_CLEVEL_OPT_MIN); + PyModule_AddIntConstant (module, "HC_LEVEL_MAX", LZ4HC_CLEVEL_MAX); + PyModule_AddIntConstant (module, "LZ4_MAX_INPUT_SIZE", LZ4_MAX_INPUT_SIZE); + + LZ4StreamError = PyErr_NewExceptionWithDoc ("_stream.LZ4StreamError", + "Call to LZ4 library failed.", + NULL, NULL); + if (LZ4StreamError == NULL) + { + return NULL; + } + Py_INCREF (LZ4StreamError); + PyModule_AddObject (module, "LZ4StreamError", LZ4StreamError); + + return module; +} diff --git a/contrib/python/lz4/py3/lz4/version.py b/contrib/python/lz4/py3/lz4/version.py new file mode 100644 index 0000000000..1e7ce2f6f5 --- /dev/null +++ b/contrib/python/lz4/py3/lz4/version.py @@ -0,0 +1,4 @@ +# file generated by setuptools_scm +# don't change, don't track in version control +__version__ = version = '4.3.2' +__version_tuple__ = version_tuple = (4, 3, 2) diff --git a/contrib/python/lz4/py3/tests/block/conftest.py b/contrib/python/lz4/py3/tests/block/conftest.py new file mode 100644 index 0000000000..089ce0f83c --- /dev/null +++ b/contrib/python/lz4/py3/tests/block/conftest.py @@ -0,0 +1,111 @@ +import pytest +import os +import sys + + +test_data = [ + (b''), + (os.urandom(8 * 1024)), + (b'0' * 8 * 1024), + (bytearray(b'')), + (bytearray(os.urandom(8 * 1024))), + #(bytearray(open(os.path.join(os.path.dirname(__file__), 'numpy_byte_array.bin'), 'rb').read())) +] + +if sys.version_info > (2, 7): + test_data += [ + (memoryview(b'')), + (memoryview(os.urandom(8 * 1024))) + ] + + +@pytest.fixture( + params=test_data, + ids=[ + 'data' + str(i) for i in range(len(test_data)) + ] +) +def data(request): + return request.param + + +@pytest.fixture( + params=[ + ( + { + 'store_size': True + } + ), + ( + { + 'store_size': False + } + ), + ] +) +def store_size(request): + return request.param + + +@pytest.fixture( + params=[ + ( + { + 'return_bytearray': True + } + ), + ( + { + 'return_bytearray': False + } + ), + ] +) +def return_bytearray(request): + return request.param + + +@pytest.fixture +def c_return_bytearray(return_bytearray): + return return_bytearray + + +@pytest.fixture +def d_return_bytearray(return_bytearray): + return return_bytearray + + +@pytest.fixture( + params=[ + ('fast', None) + ] + [ + ('fast', {'acceleration': s}) for s in range(10) + ] + [ + ('high_compression', None) + ] + [ + ('high_compression', {'compression': s}) for s in range(17) + ] + [ + (None, None) + ] +) +def mode(request): + return request.param + + +dictionary = [ + None, + (0, 0), + (100, 200), + (0, 8 * 1024), + os.urandom(8 * 1024) +] + + +@pytest.fixture( + params=dictionary, + ids=[ + 'dictionary' + str(i) for i in range(len(dictionary)) + ] +) +def dictionary(request): + return request.param diff --git a/contrib/python/lz4/py3/tests/block/numpy_byte_array.bin b/contrib/python/lz4/py3/tests/block/numpy_byte_array.bin Binary files differnew file mode 100644 index 0000000000..49537e2d90 --- /dev/null +++ b/contrib/python/lz4/py3/tests/block/numpy_byte_array.bin diff --git a/contrib/python/lz4/py3/tests/block/test_block_0.py b/contrib/python/lz4/py3/tests/block/test_block_0.py new file mode 100644 index 0000000000..8fc0f48887 --- /dev/null +++ b/contrib/python/lz4/py3/tests/block/test_block_0.py @@ -0,0 +1,92 @@ +import lz4.block +from multiprocessing.pool import ThreadPool +import sys +from functools import partial +if sys.version_info <= (3, 2): + import struct + + +def get_stored_size(buff): + if sys.version_info > (2, 7): + if isinstance(buff, memoryview): + b = buff.tobytes() + else: + b = bytes(buff) + else: + b = bytes(buff) + + if len(b) < 4: + return None + + if sys.version_info > (3, 2): + return int.from_bytes(b[:4], 'little') + else: + # This would not work on a memoryview object, hence buff.tobytes call + # above + return struct.unpack('<I', b[:4])[0] + + +def roundtrip(x, c_kwargs, d_kwargs, dictionary): + if dictionary: + if isinstance(dictionary, tuple): + d = x[dictionary[0]:dictionary[1]] + else: + d = dictionary + c_kwargs['dict'] = d + d_kwargs['dict'] = d + + c = lz4.block.compress(x, **c_kwargs) + + if c_kwargs['store_size']: + assert get_stored_size(c) == len(x) + else: + d_kwargs['uncompressed_size'] = len(x) + + return lz4.block.decompress(c, **d_kwargs) + + +def setup_kwargs(mode, store_size, c_return_bytearray=None, d_return_bytearray=None): + c_kwargs = {} + + if mode[0] is not None: + c_kwargs['mode'] = mode[0] + if mode[1] is not None: + c_kwargs.update(mode[1]) + + c_kwargs.update(store_size) + + if c_return_bytearray: + c_kwargs.update(c_return_bytearray) + + d_kwargs = {} + + if d_return_bytearray: + d_kwargs.update(d_return_bytearray) + + return (c_kwargs, d_kwargs) + + +# Test single threaded usage with all valid variations of input +def test_1(data, mode, store_size, c_return_bytearray, d_return_bytearray, dictionary): + (c_kwargs, d_kwargs) = setup_kwargs( + mode, store_size, c_return_bytearray, d_return_bytearray) + + d = roundtrip(data, c_kwargs, d_kwargs, dictionary) + + assert d == data + if d_return_bytearray['return_bytearray']: + assert isinstance(d, bytearray) + + +# Test multi threaded usage with all valid variations of input +def test_2(data, mode, store_size, dictionary): + (c_kwargs, d_kwargs) = setup_kwargs(mode, store_size) + + data_in = [data for i in range(32)] + + pool = ThreadPool(2) + rt = partial(roundtrip, c_kwargs=c_kwargs, + d_kwargs=d_kwargs, dictionary=dictionary) + data_out = pool.map(rt, data_in) + pool.close() + assert data_in == data_out diff --git a/contrib/python/lz4/py3/tests/block/test_block_1.py b/contrib/python/lz4/py3/tests/block/test_block_1.py new file mode 100644 index 0000000000..4392bb332c --- /dev/null +++ b/contrib/python/lz4/py3/tests/block/test_block_1.py @@ -0,0 +1,149 @@ +import lz4.block +import pytest +import sys +import os + + +def test_decompress_ui32_overflow(): + data = lz4.block.compress(b'A' * 64) + with pytest.raises(OverflowError): + lz4.block.decompress(data[4:], uncompressed_size=((1 << 32) + 64)) + + +def test_decompress_without_leak(): + # Verify that hand-crafted packet does not leak uninitialized(?) memory. + data = lz4.block.compress(b'A' * 64) + message = r'^Decompressor wrote 64 bytes, but 79 bytes expected from header$' + with pytest.raises(lz4.block.LZ4BlockError, match=message): + lz4.block.decompress(b'\x4f' + data[1:]) + + +def test_decompress_with_small_buffer(): + data = lz4.block.compress(b'A' * 64, store_size=False) + message = r'^Decompression failed: corrupt input or insufficient space in destination buffer. Error code: \d+$' + with pytest.raises(lz4.block.LZ4BlockError, match=message): + lz4.block.decompress(data[4:], uncompressed_size=64) + with pytest.raises(lz4.block.LZ4BlockError, match=message): + lz4.block.decompress(data, uncompressed_size=60) + + +def test_decompress_truncated(): + input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123" * 24 + compressed = lz4.block.compress(input_data) + # for i in range(len(compressed)): + # try: + # lz4.block.decompress(compressed[:i]) + # except: + # print(i, sys.exc_info()[0], sys.exc_info()[1]) + with pytest.raises(ValueError, match='Input source data size too small'): + lz4.block.decompress(compressed[:0]) + for n in [0, 1]: + with pytest.raises(ValueError, match='Input source data size too small'): + lz4.block.decompress(compressed[:n]) + for n in [24, 25, -2, 27, 67, 85]: + with pytest.raises(lz4.block.LZ4BlockError): + lz4.block.decompress(compressed[:n]) + + +def test_decompress_with_trailer(): + data = b'A' * 64 + comp = lz4.block.compress(data) + message = r'^Decompression failed: corrupt input or insufficient space in destination buffer. Error code: \d+$' + with pytest.raises(lz4.block.LZ4BlockError, match=message): + lz4.block.decompress(comp + b'A') + with pytest.raises(lz4.block.LZ4BlockError, match=message): + lz4.block.decompress(comp + comp) + with pytest.raises(lz4.block.LZ4BlockError, match=message): + lz4.block.decompress(comp + comp[4:]) + + +def test_unicode(): + if sys.version_info < (3,): + return # skip + DATA = b'x' + with pytest.raises(TypeError): + lz4.block.compress(DATA.decode('latin1')) + lz4.block.decompress(lz4.block.compress(DATA).decode('latin1')) + +# These next two are probably redundant given test_1 above but we'll keep them +# for now + + +def test_return_bytearray(): + if sys.version_info < (3,): + return # skip + data = os.urandom(128 * 1024) # Read 128kb + compressed = lz4.block.compress(data) + b = lz4.block.compress(data, return_bytearray=True) + assert isinstance(b, bytearray) + assert bytes(b) == compressed + b = lz4.block.decompress(compressed, return_bytearray=True) + assert isinstance(b, bytearray) + assert bytes(b) == data + + +def test_memoryview(): + if sys.version_info < (2, 7): + return # skip + data = os.urandom(128 * 1024) # Read 128kb + compressed = lz4.block.compress(data) + assert lz4.block.compress(memoryview(data)) == compressed + assert lz4.block.decompress(memoryview(compressed)) == data + + +def test_with_dict_none(): + input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123" * 24 + for mode in ['default', 'high_compression']: + assert lz4.block.decompress(lz4.block.compress( + input_data, mode=mode, dict=None)) == input_data + assert lz4.block.decompress(lz4.block.compress( + input_data, mode=mode), dict=None) == input_data + assert lz4.block.decompress(lz4.block.compress( + input_data, mode=mode, dict=b'')) == input_data + assert lz4.block.decompress(lz4.block.compress( + input_data, mode=mode), dict=b'') == input_data + assert lz4.block.decompress(lz4.block.compress( + input_data, mode=mode, dict='')) == input_data + assert lz4.block.decompress(lz4.block.compress( + input_data, mode=mode), dict='') == input_data + + +def test_with_dict(): + input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123" * 24 + dict1 = input_data[10:30] + dict2 = input_data[20:40] + message = r'^Decompression failed: corrupt input or insufficient space in destination buffer. Error code: \d+$' + for mode in ['default', 'high_compression']: + compressed = lz4.block.compress(input_data, mode=mode, dict=dict1) + with pytest.raises(lz4.block.LZ4BlockError, match=message): + lz4.block.decompress(compressed) + with pytest.raises(lz4.block.LZ4BlockError, match=message): + lz4.block.decompress(compressed, dict=dict1[:2]) + assert lz4.block.decompress(compressed, dict=dict2) != input_data + assert lz4.block.decompress(compressed, dict=dict1) == input_data + assert lz4.block.decompress(lz4.block.compress( + input_data), dict=dict1) == input_data + + +def test_known_decompress_1(): + input = b'\x00\x00\x00\x00\x00' + output = b'' + assert lz4.block.decompress(input) == output + + +def test_known_decompress_2(): + input = b'\x01\x00\x00\x00\x10 ' + output = b' ' + assert lz4.block.decompress(input) == output + + +def test_known_decompress_3(): + input = b'h\x00\x00\x00\xff\x0bLorem ipsum dolor sit amet\x1a\x006P amet' + output = b'Lorem ipsum dolor sit amet' * 4 + assert lz4.block.decompress(input) == output + + +def test_known_decompress_4(): + input = b'\xb0\xb3\x00\x00\xff\x1fExcepteur sint occaecat cupidatat non proident.\x00' + (b'\xff' * 180) + b'\x1ePident' + output = b'Excepteur sint occaecat cupidatat non proident' * 1000 + assert lz4.block.decompress(input) == output diff --git a/contrib/python/lz4/py3/tests/block/test_block_2.py b/contrib/python/lz4/py3/tests/block/test_block_2.py new file mode 100644 index 0000000000..a2aea4dab6 --- /dev/null +++ b/contrib/python/lz4/py3/tests/block/test_block_2.py @@ -0,0 +1,62 @@ +import pytest +import sys +import lz4.block +import psutil +import os + +# This test requires allocating a big lump of memory. In order to +# avoid a massive memory allocation during byte compilation, we have +# to declare a variable for the size of the buffer we're going to +# create outside the scope of the function below. See: +# https://bugs.python.org/issue21074 +_4GB = 0x100000000 # 4GB + +# This test will be killed on Travis due to the 3GB memory limit +# there. Unfortunately psutil reports the host memory, not the memory +# available to the container, and so can't be used to detect available +# memory, so instead, as an ugly hack for detecting we're on Travis we +# check for the TRAVIS environment variable being set. This is quite +# fragile. + + +@pytest.mark.skipif( + os.environ.get('TRAVIS') is not None, + reason='Skipping test on Travis due to insufficient memory' +) +@pytest.mark.skipif( + sys.maxsize < 0xffffffff, + reason='Py_ssize_t too small for this test' +) +@pytest.mark.skipif( + psutil.virtual_memory().available < _4GB, + reason='Insufficient system memory for this test' +) +def test_huge(): + try: + huge = b'\0' * _4GB + except MemoryError: + pytest.skip('Insufficient system memory for this test') + + with pytest.raises( + OverflowError, match='Input too large for LZ4 API' + ): + lz4.block.compress(huge) + + with pytest.raises( + OverflowError, match='Dictionary too large for LZ4 API' + ): + lz4.block.compress(b'', dict=huge) + + with pytest.raises( + OverflowError, match='Input too large for LZ4 API' + ): + lz4.block.decompress(huge) + + with pytest.raises( + OverflowError, match='Dictionary too large for LZ4 API' + ): + lz4.block.decompress(b'', dict=huge) + + +def test_dummy(): + pass diff --git a/contrib/python/lz4/py3/tests/block/test_block_3.py b/contrib/python/lz4/py3/tests/block/test_block_3.py new file mode 100644 index 0000000000..3fcb175b3b --- /dev/null +++ b/contrib/python/lz4/py3/tests/block/test_block_3.py @@ -0,0 +1,40 @@ +import gc +import lz4.block +import pytest + + +test_data = [ + (b'a' * 1024 * 1024), +] + + +@pytest.fixture( + params=test_data, + ids=[ + 'data' + str(i) for i in range(len(test_data)) + ] +) +def data(request): + return request.param + + +def test_block_decompress_mem_usage(data): + tracemalloc = pytest.importorskip('tracemalloc') + + tracemalloc.start() + + compressed = lz4.block.compress(data) + prev_snapshot = None + + for i in range(1000): + decompressed = lz4.block.decompress(compressed) # noqa: F841 + + if i % 100 == 0: + gc.collect() + snapshot = tracemalloc.take_snapshot() + + if prev_snapshot: + stats = snapshot.compare_to(prev_snapshot, 'lineno') + assert stats[0].size_diff < (1024 * 8) + + prev_snapshot = snapshot diff --git a/contrib/python/lz4/py3/tests/frame/__init__.py b/contrib/python/lz4/py3/tests/frame/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/contrib/python/lz4/py3/tests/frame/__init__.py diff --git a/contrib/python/lz4/py3/tests/frame/conftest.py b/contrib/python/lz4/py3/tests/frame/conftest.py new file mode 100644 index 0000000000..5ab52c0ada --- /dev/null +++ b/contrib/python/lz4/py3/tests/frame/conftest.py @@ -0,0 +1,95 @@ +import pytest +import lz4.frame as lz4frame +import lz4 + + +@pytest.fixture( + params=[ + # (lz4frame.BLOCKSIZE_DEFAULT), + (lz4frame.BLOCKSIZE_MAX64KB), + (lz4frame.BLOCKSIZE_MAX256KB), + (lz4frame.BLOCKSIZE_MAX1MB), + (lz4frame.BLOCKSIZE_MAX4MB), + ] +) +def block_size(request): + return request.param + + +@pytest.fixture( + params=[ + (True), + (False), + ] +) +def block_linked(request): + return request.param + + +@pytest.fixture( + params=[ + (True), + (False), + ] +) +def content_checksum(request): + return request.param + + +if lz4.library_version_number() >= 10800: + p = [True, False] +else: + p = [False, ] + + +@pytest.fixture( + params=[ + (pp) for pp in p + ] +) +def block_checksum(request): + return request.param + + +compression_levels = [ + (lz4frame.COMPRESSIONLEVEL_MIN), + (lz4frame.COMPRESSIONLEVEL_MINHC), + (lz4frame.COMPRESSIONLEVEL_MAX), +] + + +@pytest.fixture( + params=compression_levels +) +def compression_level(request): + return request.param + + +@pytest.fixture( + params=[ + (True), + (False) + ] +) +def auto_flush(request): + return request.param + + +@pytest.fixture( + params=[ + (True), + (False) + ] +) +def store_size(request): + return request.param + + +@pytest.fixture( + params=[ + (True), + (False), + ] +) +def return_bytearray(request): + return request.param diff --git a/contrib/python/lz4/py3/tests/frame/helpers.py b/contrib/python/lz4/py3/tests/frame/helpers.py new file mode 100644 index 0000000000..e6cb0c9ef0 --- /dev/null +++ b/contrib/python/lz4/py3/tests/frame/helpers.py @@ -0,0 +1,44 @@ +import lz4.frame as lz4frame + + +def get_frame_info_check(compressed_data, + source_size, + store_size, + block_size, + block_linked, + content_checksum, + block_checksum): + + frame_info = lz4frame.get_frame_info(compressed_data) + + assert frame_info["content_checksum"] == content_checksum + assert frame_info["block_checksum"] == block_checksum + + assert frame_info["skippable"] is False + + if store_size is True: + assert frame_info["content_size"] == source_size + else: + assert frame_info["content_size"] == 0 + + if source_size > frame_info['block_size']: + # More than a single block + assert frame_info["block_linked"] == block_linked + + if block_size == lz4frame.BLOCKSIZE_DEFAULT: + assert frame_info["block_size_id"] == lz4frame.BLOCKSIZE_MAX64KB + else: + assert frame_info["block_size_id"] == block_size + + +def get_chunked(data, nchunks): + size = len(data) + # stride = int(math.ceil(float(size)/nchunks)) # no // on py 2.6 + stride = size // nchunks + start = 0 + end = start + stride + while end < size: + yield data[start:end] + start += stride + end += stride + yield data[start:] diff --git a/contrib/python/lz4/py3/tests/frame/test_frame_0.py b/contrib/python/lz4/py3/tests/frame/test_frame_0.py new file mode 100644 index 0000000000..f03431d412 --- /dev/null +++ b/contrib/python/lz4/py3/tests/frame/test_frame_0.py @@ -0,0 +1,172 @@ +import lz4.frame as lz4frame +import lz4 +import re + + +def test_library_version_number(): + v = lz4.library_version_number() + assert isinstance(v, int) + assert v > 10000 + + +def test_library_version_string(): + v = lz4.library_version_string() + assert isinstance(v, str) + assert v.count('.') == 2 + r = re.compile(r'^[0-9]*\.[0-9]*\.[0-9]*$') + assert r.match(v) is not None + + +def test_create_compression_context(): + context = lz4frame.create_compression_context() + assert context is not None + + +def test_create_decompression_context(): + context = lz4frame.create_decompression_context() + assert context is not None + + +def test_reset_decompression_context_1(): + if lz4.library_version_number() >= 10800: + context = lz4frame.create_decompression_context() + r = lz4frame.reset_decompression_context(context) + assert r is None + else: + pass + + +def test_reset_decompression_context_2(): + if lz4.library_version_number() >= 10800: + c = lz4frame.compress(b'1234', return_bytearray=False) + context = lz4frame.create_decompression_context() + try: + # Simulate an error by passing junk to decompress + d = lz4frame.decompress_chunk(context, c[4:]) + except RuntimeError: + pass + r = lz4frame.reset_decompression_context(context) + assert r is None + # And confirm we can use the context after reset + d, bytes_read, eof = lz4frame.decompress_chunk(context, c) + assert d == b'1234' + assert bytes_read == len(c) + assert eof is True + else: + pass + + +def test_compress_return_type_1(): + r = lz4frame.compress(b'', return_bytearray=False) + assert isinstance(r, bytes) + + +def test_compress_return_type_2(): + r = lz4frame.compress(b'', return_bytearray=True) + assert isinstance(r, bytearray) + + +def test_decompress_return_type_1(): + c = lz4frame.compress(b'', return_bytearray=False) + r = lz4frame.decompress( + c, + return_bytearray=False, + return_bytes_read=False + ) + assert isinstance(r, bytes) + + +def test_decompress_return_type_2(): + c = lz4frame.compress(b'', return_bytearray=False) + r = lz4frame.decompress( + c, + return_bytearray=True, + return_bytes_read=False + ) + assert isinstance(r, bytearray) + + +def test_decompress_return_type_3(): + c = lz4frame.compress(b'', return_bytearray=False) + r = lz4frame.decompress( + c, + return_bytearray=False, + return_bytes_read=True + ) + assert isinstance(r, tuple) + assert isinstance(r[0], bytes) + assert isinstance(r[1], int) + + +def test_decompress_return_type_4(): + c = lz4frame.compress(b'', return_bytearray=False) + r = lz4frame.decompress( + c, + return_bytearray=True, + return_bytes_read=True + ) + assert isinstance(r, tuple) + assert isinstance(r[0], bytearray) + assert isinstance(r[1], int) + + +def test_decompress_chunk_return_type_1(): + c = lz4frame.compress(b'', return_bytearray=False) + d = lz4frame.create_decompression_context() + r, b, e = lz4frame.decompress_chunk( + d, + c, + return_bytearray=False, + ) + assert isinstance(r, bytes) + assert isinstance(b, int) + assert isinstance(e, bool) + + +def test_decompress_chunk_return_type_2(): + c = lz4frame.compress(b'', return_bytearray=False) + d = lz4frame.create_decompression_context() + r, b, e = lz4frame.decompress_chunk( + d, + c, + return_bytearray=True, + ) + assert isinstance(r, bytearray) + assert isinstance(b, int) + assert isinstance(e, bool) + + +def test_decompress_chunk_return_type_3(): + c = lz4frame.compress(b'', return_bytearray=False) + d = lz4frame.create_decompression_context() + r = lz4frame.decompress_chunk( + d, + c, + return_bytearray=False, + ) + assert isinstance(r, tuple) + assert isinstance(r[0], bytes) + assert isinstance(r[1], int) + assert isinstance(r[2], bool) + + +def test_decompress_chunk_return_type_4(): + c = lz4frame.compress(b'', return_bytearray=False) + d = lz4frame.create_decompression_context() + r = lz4frame.decompress_chunk( + d, + c, + return_bytearray=True, + ) + assert isinstance(r, tuple) + assert isinstance(r[0], bytearray) + assert isinstance(r[1], int) + assert isinstance(r[2], bool) + + +def test_block_size_constants(): + assert lz4frame.BLOCKSIZE_DEFAULT == 0 + assert lz4frame.BLOCKSIZE_MAX64KB == 4 + assert lz4frame.BLOCKSIZE_MAX256KB == 5 + assert lz4frame.BLOCKSIZE_MAX1MB == 6 + assert lz4frame.BLOCKSIZE_MAX4MB == 7 diff --git a/contrib/python/lz4/py3/tests/frame/test_frame_1.py b/contrib/python/lz4/py3/tests/frame/test_frame_1.py new file mode 100644 index 0000000000..35110c44f1 --- /dev/null +++ b/contrib/python/lz4/py3/tests/frame/test_frame_1.py @@ -0,0 +1,111 @@ +import lz4.frame as lz4frame +import os +import sys +import pytest +from .helpers import get_frame_info_check + + +test_data = [ + (b''), + (os.urandom(8 * 1024)), + (b'0' * 8 * 1024), + (bytearray(b'')), + (bytearray(os.urandom(8 * 1024))), + (os.urandom(128 * 1024)), + (os.urandom(256 * 1024)), + (os.urandom(512 * 1024)), +] +if sys.version_info > (2, 7): + test_data += [ + (memoryview(b'')), + (memoryview(os.urandom(8 * 1024))) + ] + + +@pytest.fixture( + params=test_data, + ids=[ + 'data' + str(i) for i in range(len(test_data)) + ] +) +def data(request): + return request.param + + +def test_roundtrip_1( + data, + block_size, + block_linked, + content_checksum, + block_checksum, + compression_level, + store_size): + + compressed = lz4frame.compress( + data, + store_size=store_size, + compression_level=compression_level, + block_size=block_size, + block_linked=block_linked, + content_checksum=content_checksum, + block_checksum=block_checksum, + ) + + get_frame_info_check( + compressed, + len(data), + store_size, + block_size, + block_linked, + content_checksum, + block_checksum, + ) + decompressed, bytes_read = lz4frame.decompress( + compressed, return_bytes_read=True) + assert bytes_read == len(compressed) + assert decompressed == data + + +def test_roundtrip_2(data, + block_size, + block_linked, + content_checksum, + block_checksum, + compression_level, + auto_flush, + store_size): + + c_context = lz4frame.create_compression_context() + + kwargs = {} + kwargs['compression_level'] = compression_level + kwargs['block_size'] = block_size + kwargs['block_linked'] = block_linked + kwargs['content_checksum'] = content_checksum + kwargs['block_checksum'] = block_checksum + kwargs['auto_flush'] = auto_flush + if store_size is True: + kwargs['source_size'] = len(data) + + compressed = lz4frame.compress_begin( + c_context, + **kwargs + ) + compressed += lz4frame.compress_chunk( + c_context, + data + ) + compressed += lz4frame.compress_flush(c_context) + get_frame_info_check( + compressed, + len(data), + store_size, + block_size, + block_linked, + content_checksum, + block_checksum, + ) + decompressed, bytes_read = lz4frame.decompress( + compressed, return_bytes_read=True) + assert bytes_read == len(compressed) + assert decompressed == data diff --git a/contrib/python/lz4/py3/tests/frame/test_frame_2.py b/contrib/python/lz4/py3/tests/frame/test_frame_2.py new file mode 100644 index 0000000000..80b44b87ff --- /dev/null +++ b/contrib/python/lz4/py3/tests/frame/test_frame_2.py @@ -0,0 +1,107 @@ +import lz4.frame as lz4frame +import pytest +import os +import sys +from . helpers import ( + get_chunked, + get_frame_info_check, +) + + +test_data = [ + (b'', 1, 1), + (os.urandom(8 * 1024), 8, 1), + (os.urandom(8 * 1024), 1, 8), + (b'0' * 8 * 1024, 8, 1), + (b'0' * 8 * 1024, 8, 1), + (bytearray(b''), 1, 1), + (bytearray(os.urandom(8 * 1024)), 8, 1), +] +if sys.version_info > (2, 7): + test_data += [ + (memoryview(b''), 1, 1), + (memoryview(os.urandom(8 * 1024)), 8, 1) + ] + + +@pytest.fixture( + params=test_data, + ids=[ + 'data' + str(i) for i in range(len(test_data)) + ] +) +def data(request): + return request.param + + +def test_roundtrip_chunked(data, block_size, block_linked, + content_checksum, block_checksum, + compression_level, + auto_flush, store_size): + + data, c_chunks, d_chunks = data + + c_context = lz4frame.create_compression_context() + + kwargs = {} + kwargs['compression_level'] = compression_level + kwargs['block_size'] = block_size + kwargs['block_linked'] = block_linked + kwargs['content_checksum'] = content_checksum + kwargs['block_checksum'] = block_checksum + kwargs['auto_flush'] = auto_flush + if store_size is True: + kwargs['source_size'] = len(data) + + compressed = lz4frame.compress_begin( + c_context, + **kwargs + ) + data_in = get_chunked(data, c_chunks) + try: + while True: + compressed += lz4frame.compress_chunk( + c_context, + next(data_in) + ) + except StopIteration: + pass + finally: + del data_in + + compressed += lz4frame.compress_flush(c_context) + + get_frame_info_check( + compressed, + len(data), + store_size, + block_size, + block_linked, + content_checksum, + block_checksum, + ) + + d_context = lz4frame.create_decompression_context() + compressed_in = get_chunked(compressed, d_chunks) + decompressed = b'' + bytes_read = 0 + eofs = [] + try: + while True: + d, b, e = lz4frame.decompress_chunk( + d_context, + next(compressed_in), + ) + decompressed += d + bytes_read += b + eofs.append(e) + + except StopIteration: + pass + finally: + del compressed_in + + assert bytes_read == len(compressed) + assert decompressed == data + assert eofs[-1] is True + assert (True in eofs[:-2]) is False diff --git a/contrib/python/lz4/py3/tests/frame/test_frame_3.py b/contrib/python/lz4/py3/tests/frame/test_frame_3.py new file mode 100644 index 0000000000..c0b0028e13 --- /dev/null +++ b/contrib/python/lz4/py3/tests/frame/test_frame_3.py @@ -0,0 +1,57 @@ +import lz4.frame as lz4frame +import pytest +import os +import struct + +test_data = [ + (os.urandom(256 * 1024)), +] + + +@pytest.fixture( + params=test_data, + ids=[ + 'data' + str(i) for i in range(len(test_data)) + ] +) +def data(request): + return request.param + + +def test_decompress_truncated(data): + compressed = lz4frame.compress(data) + + message = r'^LZ4F_getFrameInfo failed with code: ERROR_frameHeader_incomplete' + with pytest.raises(RuntimeError, match=message): + lz4frame.decompress(compressed[:6]) + + for i in range(16, len(compressed) - 1, 5): # 15 is the max size of the header + message = r'^Frame incomplete. LZ4F_decompress returned:' + try: + lz4frame.decompress(compressed[:i]) + except RuntimeError as r: + print(r) + with pytest.raises(RuntimeError, match=message): + lz4frame.decompress(compressed[:i]) + + +def test_content_checksum_failure(data): + compressed = lz4frame.compress(data, content_checksum=True) + message = r'^LZ4F_decompress failed with code: ERROR_contentChecksum_invalid$' + with pytest.raises(RuntimeError, match=message): + last = struct.unpack('B', compressed[-1:])[0] + lz4frame.decompress(compressed[:-1] + struct.pack('B', last ^ 0x42)) + + +def test_block_checksum_failure(data): + compressed = lz4frame.compress( + data, + content_checksum=True, + block_checksum=True, + return_bytearray=True, + ) + message = r'^LZ4F_decompress failed with code: ERROR_blockChecksum_invalid$' + if len(compressed) > 32: + with pytest.raises(RuntimeError, match=message): + compressed[22] = compressed[22] ^ 0x42 + lz4frame.decompress(compressed) diff --git a/contrib/python/lz4/py3/tests/frame/test_frame_4.py b/contrib/python/lz4/py3/tests/frame/test_frame_4.py new file mode 100644 index 0000000000..7fa1654701 --- /dev/null +++ b/contrib/python/lz4/py3/tests/frame/test_frame_4.py @@ -0,0 +1,148 @@ +import lz4.frame as lz4frame +import os +import pytest +from . helpers import ( + get_frame_info_check, + get_chunked, +) + +test_data = [ + b'', + (128 * (32 * os.urandom(32))), + (256 * (32 * os.urandom(32))), + (512 * (32 * os.urandom(32))), + (1024 * (32 * os.urandom(32))), +] + + +@pytest.fixture( + params=test_data, + ids=[ + 'data' + str(i) for i in range(len(test_data)) + ] +) +def data(request): + return request.param + + +@pytest.fixture( + params=[ + (True), + (False) + ] +) +def reset(request): + return request.param + + +@pytest.fixture( + params=[ + (1), + (8) + ] +) +def chunks(request): + return request.param + + +def test_roundtrip_LZ4FrameCompressor( + data, + chunks, + block_size, + block_linked, + reset, + store_size, + block_checksum, + content_checksum): + + with lz4frame.LZ4FrameCompressor( + block_size=block_size, + block_linked=block_linked, + content_checksum=content_checksum, + block_checksum=block_checksum, + ) as compressor: + def do_compress(): + if store_size is True: + compressed = compressor.begin(source_size=len(data)) + else: + compressed = compressor.begin() + + for chunk in get_chunked(data, chunks): + compressed += compressor.compress(chunk) + + compressed += compressor.flush() + return compressed + + compressed = do_compress() + + if reset is True: + compressor.reset() + compressed = do_compress() + + get_frame_info_check( + compressed, + len(data), + store_size, + block_size, + block_linked, + content_checksum, + block_checksum, + ) + + decompressed, bytes_read = lz4frame.decompress( + compressed, return_bytes_read=True) + assert data == decompressed + assert bytes_read == len(compressed) + + +def test_roundtrip_LZ4FrameCompressor_LZ4FrameDecompressor( + data, + chunks, + block_size, + block_linked, + reset, + store_size, + block_checksum, + content_checksum): + + with lz4frame.LZ4FrameCompressor( + block_size=block_size, + block_linked=block_linked, + content_checksum=content_checksum, + block_checksum=block_checksum, + ) as compressor: + def do_compress(): + if store_size is True: + compressed = compressor.begin(source_size=len(data)) + else: + compressed = compressor.begin() + + for chunk in get_chunked(data, chunks): + compressed += compressor.compress(chunk) + + compressed += compressor.flush() + return compressed + + compressed = do_compress() + + if reset is True: + compressor.reset() + compressed = do_compress() + + get_frame_info_check( + compressed, + len(data), + store_size, + block_size, + block_linked, + content_checksum, + block_checksum, + ) + + with lz4frame.LZ4FrameDecompressor() as decompressor: + decompressed = b'' + for chunk in get_chunked(compressed, chunks): + b = decompressor.decompress(chunk) + decompressed += b + + assert data == decompressed diff --git a/contrib/python/lz4/py3/tests/frame/test_frame_5.py b/contrib/python/lz4/py3/tests/frame/test_frame_5.py new file mode 100644 index 0000000000..05daf283f9 --- /dev/null +++ b/contrib/python/lz4/py3/tests/frame/test_frame_5.py @@ -0,0 +1,99 @@ +import lz4.frame +import pytest +import gc + +MEM_INCREASE_LIMIT = (1024 * 25) + +test_data = [ + (b'a' * 1024 * 1024), +] + + +@pytest.fixture( + params=test_data, + ids=[ + 'data' + str(i) for i in range(len(test_data)) + ] +) +def data(request): + return request.param + + +def test_frame_decompress_mem_usage(data): + tracemalloc = pytest.importorskip('tracemalloc') + + tracemalloc.start() + + compressed = lz4.frame.compress(data) + prev_snapshot = None + + for i in range(1000): + decompressed = lz4.frame.decompress(compressed) # noqa: F841 + + if i % 100 == 0: + gc.collect() + snapshot = tracemalloc.take_snapshot() + + if prev_snapshot: + stats = snapshot.compare_to(prev_snapshot, 'lineno') + assert stats[0].size_diff < MEM_INCREASE_LIMIT + + prev_snapshot = snapshot + + +def test_frame_decompress_chunk_mem_usage(data): + tracemalloc = pytest.importorskip('tracemalloc') + tracemalloc.start() + + compressed = lz4.frame.compress(data) + + prev_snapshot = None + + for i in range(1000): + context = lz4.frame.create_decompression_context() + decompressed = lz4.frame.decompress_chunk( # noqa: F841 + context, compressed + ) + + if i % 100 == 0: + gc.collect() + snapshot = tracemalloc.take_snapshot() + + if prev_snapshot: + stats = snapshot.compare_to(prev_snapshot, 'lineno') + assert stats[0].size_diff < MEM_INCREASE_LIMIT + + prev_snapshot = snapshot + + +def test_frame_open_decompress_mem_usage(data): + tracemalloc = pytest.importorskip('tracemalloc') + tracemalloc.start() + + with lz4.frame.open('test.lz4', 'w') as f: + f.write(data) + + prev_snapshot = None + + for i in range(1000): + with lz4.frame.open('test.lz4', 'r') as f: + decompressed = f.read() # noqa: F841 + + if i % 100 == 0: + gc.collect() + snapshot = tracemalloc.take_snapshot() + + if prev_snapshot: + stats = snapshot.compare_to(prev_snapshot, 'lineno') + assert stats[0].size_diff < MEM_INCREASE_LIMIT + + prev_snapshot = snapshot + + +# TODO: add many more memory usage tests along the lines of this one +# for other funcs + +def test_dummy_always_pass(): + # If pytest finds all tests are skipped, then it exits with code 5 rather + # than 0, which tox sees as an error. Here we add a dummy test that always passes. + assert True diff --git a/contrib/python/lz4/py3/tests/frame/test_frame_6.py b/contrib/python/lz4/py3/tests/frame/test_frame_6.py new file mode 100644 index 0000000000..c20a4f3131 --- /dev/null +++ b/contrib/python/lz4/py3/tests/frame/test_frame_6.py @@ -0,0 +1,119 @@ +import os +import pytest +import lz4.frame as lz4frame + +test_data = [ + b'', + (128 * (32 * os.urandom(32))), + (5 * 128 * os.urandom(1024)), +] + + +@pytest.fixture( + params=test_data, + ids=[ + 'data' + str(i) for i in range(len(test_data)) + ] +) +def data(request): + return request.param + + +compression_levels = [ + (lz4frame.COMPRESSIONLEVEL_MIN), + # (lz4frame.COMPRESSIONLEVEL_MINHC), + # (lz4frame.COMPRESSIONLEVEL_MAX), +] + + +@pytest.fixture( + params=compression_levels +) +def compression_level(request): + return request.param + + +def test_lz4frame_open_write(data): + with lz4frame.open('testfile', mode='wb') as fp: + fp.write(data) + + +def test_lz4frame_open_write_read_defaults(data): + with lz4frame.open('testfile', mode='wb') as fp: + fp.write(data) + with lz4frame.open('testfile', mode='r') as fp: + data_out = fp.read() + assert data_out == data + + +def test_lz4frame_open_write_read_text(): + data = u'This is a test string' + with lz4frame.open('testfile', mode='wt') as fp: + fp.write(data) + with lz4frame.open('testfile', mode='rt') as fp: + data_out = fp.read() + assert data_out == data + + +def test_lz4frame_open_write_read_text_iter(): + data = u'This is a test string' + with lz4frame.open('testfile', mode='wt') as fp: + fp.write(data) + data_out = '' + with lz4frame.open('testfile', mode='rt') as fp: + for line in fp: + data_out += line + assert data_out == data + + +def test_lz4frame_open_write_read( + data, + compression_level, + block_linked, + block_checksum, + block_size, + content_checksum, + auto_flush, + store_size, + return_bytearray): + + kwargs = {} + + if store_size is True: + kwargs['source_size'] = len(data) + + kwargs['compression_level'] = compression_level + kwargs['block_size'] = block_size + kwargs['block_linked'] = block_linked + kwargs['content_checksum'] = content_checksum + kwargs['block_checksum'] = block_checksum + kwargs['auto_flush'] = auto_flush + kwargs['return_bytearray'] = return_bytearray + kwargs['mode'] = 'wb' + + with lz4frame.open('testfile', **kwargs) as fp: + fp.write(data) + + with lz4frame.open('testfile', mode='r') as fp: + data_out = fp.read() + + assert data_out == data + + +def test_lz4frame_flush(): + data_1 = b"This is a..." + data_2 = b" test string!" + + with lz4frame.open("testfile", mode="w") as fp_write: + fp_write.write(data_1) + fp_write.flush() + + fp_write.write(data_2) + + with lz4frame.open("testfile", mode="r") as fp_read: + assert fp_read.read() == data_1 + + fp_write.flush() + + with lz4frame.open("testfile", mode="r") as fp_read: + assert fp_read.read() == data_1 + data_2 diff --git a/contrib/python/lz4/py3/tests/frame/test_frame_7.py b/contrib/python/lz4/py3/tests/frame/test_frame_7.py new file mode 100644 index 0000000000..583f3fbb05 --- /dev/null +++ b/contrib/python/lz4/py3/tests/frame/test_frame_7.py @@ -0,0 +1,102 @@ +import lz4.frame as lz4frame +import pytest +import os + +test_data = [ + (os.urandom(32) * 256), +] + + +@pytest.fixture( + params=test_data, + ids=[ + 'data' + str(i) for i in range(len(test_data)) + ] +) +def data(request): + return request.param + + +def test_roundtrip_multiframe_1(data): + nframes = 4 + + compressed = b'' + for _ in range(nframes): + compressed += lz4frame.compress(data) + + decompressed = b'' + for _ in range(nframes): + decompressed += lz4frame.decompress(compressed) + + assert len(decompressed) == nframes * len(data) + assert data * nframes == decompressed + + +def test_roundtrip_multiframe_2(data): + nframes = 4 + + compressed = b'' + ctx = lz4frame.create_compression_context() + for _ in range(nframes): + compressed += lz4frame.compress_begin(ctx) + compressed += lz4frame.compress_chunk(ctx, data) + compressed += lz4frame.compress_flush(ctx) + + decompressed = b'' + for _ in range(nframes): + decompressed += lz4frame.decompress(compressed) + + assert len(decompressed) == nframes * len(data) + assert data * nframes == decompressed + + +def test_roundtrip_multiframe_3(data): + nframes = 4 + + compressed = b'' + ctx = lz4frame.create_compression_context() + for _ in range(nframes): + compressed += lz4frame.compress_begin(ctx) + compressed += lz4frame.compress_chunk(ctx, data) + compressed += lz4frame.compress_flush(ctx) + + decompressed = b'' + ctx = lz4frame.create_decompression_context() + for _ in range(nframes): + d, bytes_read, eof = lz4frame.decompress_chunk(ctx, compressed) + decompressed += d + assert eof is True + assert bytes_read == len(compressed) // nframes + + assert len(decompressed) == nframes * len(data) + assert data * nframes == decompressed + + +def test_roundtrip_multiframe_4(data): + nframes = 4 + + compressed = b'' + with lz4frame.LZ4FrameCompressor() as compressor: + for _ in range(nframes): + compressed += compressor.begin() + compressed += compressor.compress(data) + compressed += compressor.flush() + + decompressed = b'' + with lz4frame.LZ4FrameDecompressor() as decompressor: + for i in range(nframes): + if i == 0: + d = compressed + else: + d = decompressor.unused_data + decompressed += decompressor.decompress(d) + assert decompressor.eof is True + assert decompressor.needs_input is True + if i == nframes - 1: + assert decompressor.unused_data is None + else: + assert len(decompressor.unused_data) == len( + compressed) * (nframes - i - 1) / nframes + + assert len(decompressed) == nframes * len(data) + assert data * nframes == decompressed diff --git a/contrib/python/lz4/py3/tests/frame/test_frame_8.py b/contrib/python/lz4/py3/tests/frame/test_frame_8.py new file mode 100644 index 0000000000..159534aefe --- /dev/null +++ b/contrib/python/lz4/py3/tests/frame/test_frame_8.py @@ -0,0 +1,12 @@ +import lz4.frame as lz4frame + + +def test_lz4frame_open_write_read_text_iter(): + data = u'This is a test string' + with lz4frame.open('testfile', mode='wt') as fp: + fp.write(data) + data_out = '' + with lz4frame.open('testfile', mode='rt') as fp: + for line in fp: + data_out += line + assert data_out == data diff --git a/contrib/python/lz4/py3/tests/frame/test_frame_9.py b/contrib/python/lz4/py3/tests/frame/test_frame_9.py new file mode 100644 index 0000000000..5143393417 --- /dev/null +++ b/contrib/python/lz4/py3/tests/frame/test_frame_9.py @@ -0,0 +1,71 @@ +import array +import os +import io +import pickle +import sys +import lz4.frame +import pytest + + +def test_issue_172_1(): + """Test reproducer for issue 172 + + Issue 172 is a reported failure occurring on Windows 10 only. This bug was + due to incorrect handling of Py_ssize_t types when doing comparisons and + using them as a size when allocating memory. + + """ + input_data = 8 * os.urandom(1024) + with lz4.frame.open('testfile_small', 'wb') as fp: + bytes_written = fp.write(input_data) # noqa: F841 + + with lz4.frame.open('testfile_small', 'rb') as fp: + data = fp.read(10) + assert len(data) == 10 + + +def test_issue_172_2(): + input_data = 9 * os.urandom(1024) + with lz4.frame.open('testfile_small', 'w') as fp: + bytes_written = fp.write(input_data) # noqa: F841 + + with lz4.frame.open('testfile_small', 'r') as fp: + data = fp.read(10) + assert len(data) == 10 + + +def test_issue_172_3(): + input_data = 9 * os.urandom(1024) + with lz4.frame.open('testfile_small', 'wb') as fp: + bytes_written = fp.write(input_data) # noqa: F841 + + with lz4.frame.open('testfile_small', 'rb') as fp: + data = fp.read(10) + assert len(data) == 10 + + with lz4.frame.open('testfile_small', 'rb') as fp: + data = fp.read(16 * 1024 - 1) + assert len(data) == 9 * 1024 + assert data == input_data + + +def test_issue_227_1(): + q = array.array('Q', [1, 2, 3, 4, 5]) + LENGTH = len(q) * q.itemsize + + with lz4.frame.open(io.BytesIO(), 'w') as f: + assert f.write(q) == LENGTH + assert f.tell() == LENGTH + + +@pytest.mark.skipif( + sys.version_info < (3, 8), + reason="PickleBuffer only availiable in Python 3.8 or greater" +) +def test_issue_227_2(): + q = array.array('Q', [1, 2, 3, 4, 5]) + + c = lz4.frame.compress(q) + d = lz4.frame.LZ4FrameDecompressor().decompress(pickle.PickleBuffer(c)) + + assert memoryview(q).tobytes() == d diff --git a/contrib/python/lz4/py3/tests/stream/conftest.py b/contrib/python/lz4/py3/tests/stream/conftest.py new file mode 100644 index 0000000000..b31ab14317 --- /dev/null +++ b/contrib/python/lz4/py3/tests/stream/conftest.py @@ -0,0 +1,155 @@ +import pytest +import os +import sys + +test_data = [ + (b''), + (os.urandom(8 * 1024)), + # (b'0' * 8 * 1024), + # (bytearray(b'')), + # (bytearray(os.urandom(8 * 1024))), + #(bytearray(open(os.path.join(os.path.dirname(__file__), 'numpy_byte_array.bin'), 'rb').read())) +] + +if sys.version_info > (2, 7): + test_data += [ + (memoryview(b'')), + (memoryview(os.urandom(8 * 1024))) + ] + + +@pytest.fixture( + params=test_data, + ids=[ + 'data' + str(i) for i in range(len(test_data)) + ] +) +def data(request): + return request.param + + +@pytest.fixture( + params=[ + ("double_buffer"), + # ("ring_buffer"), # not implemented + ] +) +def strategy(request): + return request.param + + +test_buffer_size = sorted( + [1, + # 4, + # 8, + # 64, + # 256, + 941, + # 1 * 1024, + # 4 * 1024, + # 8 * 1024, + # 16 * 1024, + # 32 * 1024, + 64 * 1024, + # 128 * 1024 + ] +) + + +@pytest.fixture( + params=test_buffer_size, + ids=[ + 'buffer_size' + str(i) for i in range(len(test_buffer_size)) + ] +) +def buffer_size(request): + return request.param + + +@pytest.fixture( + params=[ + ( + { + 'store_comp_size': 1 + } + ), + ( + { + 'store_comp_size': 2 + } + ), + # ( + # { + # 'store_comp_size': 4 + # } + # ), + ] +) +def store_comp_size(request): + return request.param + + +@pytest.fixture( + params=[ + ( + { + 'return_bytearray': True + } + ), + ( + { + 'return_bytearray': False + } + ), + ] +) +def return_bytearray(request): + return request.param + + +@pytest.fixture +def c_return_bytearray(return_bytearray): + return return_bytearray + + +@pytest.fixture +def d_return_bytearray(return_bytearray): + return return_bytearray + + +@pytest.fixture( + params=[ + ('default', None) + ] + [ + ('fast', None) + ] + [ + ('fast', {'acceleration': 2 * s}) for s in range(5) + ] + [ + ('high_compression', None) + ] + [ + ('high_compression', {'compression_level': 2 * s}) for s in range(9) + ] + [ + (None, None) + ] +) +def mode(request): + return request.param + + +dictionary = [ + None, + (0, 0), + (100, 200), + (0, 8 * 1024), + os.urandom(8 * 1024) +] + + +@pytest.fixture( + params=dictionary, + ids=[ + 'dictionary' + str(i) for i in range(len(dictionary)) + ] +) +def dictionary(request): + return request.param diff --git a/contrib/python/lz4/py3/tests/stream/numpy_byte_array.bin b/contrib/python/lz4/py3/tests/stream/numpy_byte_array.bin Binary files differnew file mode 100644 index 0000000000..49537e2d90 --- /dev/null +++ b/contrib/python/lz4/py3/tests/stream/numpy_byte_array.bin diff --git a/contrib/python/lz4/py3/tests/stream/test_stream_0.py b/contrib/python/lz4/py3/tests/stream/test_stream_0.py new file mode 100644 index 0000000000..03b19f3f42 --- /dev/null +++ b/contrib/python/lz4/py3/tests/stream/test_stream_0.py @@ -0,0 +1,116 @@ +import lz4.stream +import sys +import pytest +if sys.version_info <= (3, 2): + import struct + + +def get_stored_size(buff, block_length_size): + if sys.version_info > (2, 7): + if isinstance(buff, memoryview): + b = buff.tobytes() + else: + b = bytes(buff) + else: + b = bytes(buff) + + if len(b) < block_length_size: + return None + + if sys.version_info > (3, 2): + return int.from_bytes(b[:block_length_size], 'little') + else: + # This would not work on a memoryview object, hence buff.tobytes call + # above + fmt = {1: 'B', 2: 'H', 4: 'I', } + return struct.unpack('<' + fmt[block_length_size], b[:block_length_size])[0] + + +def roundtrip(x, c_kwargs, d_kwargs, dictionary): + if dictionary: + if isinstance(dictionary, tuple): + dict_ = x[dictionary[0]:dictionary[1]] + else: + dict_ = dictionary + c_kwargs['dictionary'] = dict_ + d_kwargs['dictionary'] = dict_ + + c = bytes() + with lz4.stream.LZ4StreamCompressor(**c_kwargs) as proc: + for start in range(0, len(x), c_kwargs['buffer_size']): + chunk = x[start:start + c_kwargs['buffer_size']] + assert len(chunk) <= c_kwargs['buffer_size'] + block = proc.compress(chunk) + if c_kwargs.get('return_bytearray'): + assert isinstance(block, bytearray) + if start == 0: + c = block + else: + c += block + assert get_stored_size(block, c_kwargs['store_comp_size']) == \ + (len(block) - c_kwargs['store_comp_size']) + + d = bytes() + with lz4.stream.LZ4StreamDecompressor(**d_kwargs) as proc: + start = 0 + while start < len(c): + block = proc.get_block(c[start:]) + chunk = proc.decompress(block) + if d_kwargs.get('return_bytearray'): + assert isinstance(chunk, bytearray) + if start == 0: + d = chunk + else: + d += chunk + start += d_kwargs['store_comp_size'] + len(block) + + return d + + +def setup_kwargs(strategy, mode, buffer_size, store_comp_size, + c_return_bytearray=None, d_return_bytearray=None): + c_kwargs = {} + + if mode[0] is not None: + c_kwargs['mode'] = mode[0] + if mode[1] is not None: + c_kwargs.update(mode[1]) + + c_kwargs['strategy'] = strategy + c_kwargs['buffer_size'] = buffer_size + c_kwargs.update(store_comp_size) + + if c_return_bytearray: + c_kwargs.update(c_return_bytearray) + + d_kwargs = {} + + if d_return_bytearray: + d_kwargs.update(d_return_bytearray) + + d_kwargs['strategy'] = strategy + d_kwargs['buffer_size'] = buffer_size + d_kwargs.update(store_comp_size) + + return (c_kwargs, d_kwargs) + + +# Test single threaded usage with all valid variations of input +def test_1(data, strategy, mode, buffer_size, store_comp_size, + c_return_bytearray, d_return_bytearray, dictionary): + if buffer_size >= (1 << (8 * store_comp_size['store_comp_size'])): + pytest.skip("Invalid case: buffer_size too large for the block length area") + + (c_kwargs, d_kwargs) = setup_kwargs( + strategy, mode, buffer_size, store_comp_size, c_return_bytearray, d_return_bytearray) + + d = roundtrip(data, c_kwargs, d_kwargs, dictionary) + + assert d == data + + +# Test multi threaded: +# Not relevant in the lz4.stream case (the process is highly sequential, +# and re-use/share the same context from one input chunk to the next one). +def test_2(data, strategy, mode, buffer_size, store_comp_size, dictionary): # noqa + pass diff --git a/contrib/python/lz4/py3/tests/stream/test_stream_1.py b/contrib/python/lz4/py3/tests/stream/test_stream_1.py new file mode 100644 index 0000000000..6b49267e26 --- /dev/null +++ b/contrib/python/lz4/py3/tests/stream/test_stream_1.py @@ -0,0 +1,555 @@ +import lz4.stream +import pytest +import sys +import os + + +if sys.version_info < (3, ): + from struct import pack, unpack + + def _get_format(length, byteorder, signed): + _order = {'l': '<', 'b': '>'} + _fmt = {1: 'b', 2: 'h', 4: 'i', 8: 'q'} + _sign = {True: lambda x: x.lower(), False: lambda x: x.upper()} + return _sign[signed](_order[byteorder[0].lower()] + _fmt[length]) + + def int_to_bytes(value, length=4, byteorder='little', signed=False): + return bytearray(pack(_get_format(length, byteorder, signed), value)) + + def int_from_bytes(bytes, byteorder='little', signed=False): + return unpack(_get_format(len(bytes), byteorder, signed), bytes)[0] + +else: + def int_to_bytes(value, length=4, byteorder='little', signed=False): + return value.to_bytes(length, byteorder, signed=signed) + + def int_from_bytes(bytes, byteorder='little', signed=False): + return int.from_bytes(bytes, byteorder, signed=signed) + + +# This test requires allocating a big lump of memory. In order to +# avoid a massive memory allocation during byte compilation, we have +# to declare a variable for the size of the buffer we're going to +# create outside the scope of the function below. See: +# https://bugs.python.org/issue21074 +_4GB = 0x100000000 # 4GB + + +def compress(x, c_kwargs, return_block_offset=False, check_block_type=False): + o = [0, ] + if c_kwargs.get('return_bytearray', False): + c = bytearray() + else: + c = bytes() + with lz4.stream.LZ4StreamCompressor(**c_kwargs) as proc: + for start in range(0, len(x), c_kwargs['buffer_size']): + chunk = x[start:start + c_kwargs['buffer_size']] + block = proc.compress(chunk) + c += block + if return_block_offset: + o.append(len(c)) + if check_block_type: + assert isinstance(block, c.__class__) + if return_block_offset: + return c, o + else: + return c + + +def decompress(x, d_kwargs, check_chunk_type=False): + if d_kwargs.get('return_bytearray', False): + d = bytearray() + else: + d = bytes() + with lz4.stream.LZ4StreamDecompressor(**d_kwargs) as proc: + start = 0 + while start < len(x): + block = proc.get_block(x[start:]) + chunk = proc.decompress(block) + d += chunk + start += d_kwargs['store_comp_size'] + len(block) + if check_chunk_type: + assert isinstance(chunk, d.__class__) + return d + + +def test_invalid_config_c_1(): + c_kwargs = {} + c_kwargs['strategy'] = "ring_buffer" + c_kwargs['buffer_size'] = 1024 + + with pytest.raises(NotImplementedError): + lz4.stream.LZ4StreamCompressor(**c_kwargs) + + +def test_invalid_config_d_1(): + d_kwargs = {} + d_kwargs['strategy'] = "ring_buffer" + d_kwargs['buffer_size'] = 1024 + + with pytest.raises(NotImplementedError): + lz4.stream.LZ4StreamDecompressor(**d_kwargs) + + +def test_invalid_config_c_2(): + c_kwargs = {} + c_kwargs['strategy'] = "foo" + c_kwargs['buffer_size'] = 1024 + + with pytest.raises(ValueError): + lz4.stream.LZ4StreamCompressor(**c_kwargs) + + +def test_invalid_config_d_2(): + d_kwargs = {} + d_kwargs['strategy'] = "foo" + d_kwargs['buffer_size'] = 1024 + + with pytest.raises(ValueError): + lz4.stream.LZ4StreamDecompressor(**d_kwargs) + + +def test_invalid_config_c_3(store_comp_size): + c_kwargs = {} + c_kwargs['strategy'] = "double_buffer" + c_kwargs['buffer_size'] = 1024 + c_kwargs['store_comp_size'] = store_comp_size['store_comp_size'] + 5 + + with pytest.raises(ValueError): + lz4.stream.LZ4StreamCompressor(**c_kwargs) + + +def test_invalid_config_d_3(store_comp_size): + d_kwargs = {} + d_kwargs['strategy'] = "double_buffer" + d_kwargs['buffer_size'] = 1024 + d_kwargs['store_comp_size'] = store_comp_size['store_comp_size'] + 5 + + with pytest.raises(ValueError): + lz4.stream.LZ4StreamDecompressor(**d_kwargs) + + +def test_invalid_config_c_4(store_comp_size): + c_kwargs = {} + c_kwargs['strategy'] = "double_buffer" + c_kwargs['buffer_size'] = 1 << (8 * store_comp_size['store_comp_size']) + c_kwargs.update(store_comp_size) + + if store_comp_size['store_comp_size'] >= 4: + # No need for skiping this test case, since arguments check is + # expecting to raise an error. + + # Make sure the page size is larger than what the input bound will be, + # but still fit in 4 bytes + c_kwargs['buffer_size'] -= 1 + + if c_kwargs['buffer_size'] > lz4.stream.LZ4_MAX_INPUT_SIZE: + message = r"^Invalid buffer_size argument: \d+. Cannot define output buffer size. Must be lesser or equal to 2113929216$" # noqa + err_class = ValueError + else: + message = r"^Inconsistent buffer_size/store_comp_size values. Maximal compressed length \(\d+\) cannot fit in a \d+ byte-long integer$" # noqa + err_class = lz4.stream.LZ4StreamError + + with pytest.raises(err_class, match=message): + lz4.stream.LZ4StreamCompressor(**c_kwargs) + + +def test_invalid_config_d_4(store_comp_size): + d_kwargs = {} + d_kwargs['strategy'] = "double_buffer" + d_kwargs['buffer_size'] = 1 << (8 * store_comp_size['store_comp_size']) + d_kwargs.update(store_comp_size) + + if store_comp_size['store_comp_size'] >= 4: + + if sys.maxsize < 0xffffffff: + pytest.skip('Py_ssize_t too small for this test') + + # Make sure the page size is larger than what the input bound will be, + # but still fit in 4 bytes + d_kwargs['buffer_size'] -= 1 + + # No failure expected during instanciation/initialization + lz4.stream.LZ4StreamDecompressor(**d_kwargs) + + +def test_invalid_config_c_5(): + c_kwargs = {} + c_kwargs['strategy'] = "double_buffer" + c_kwargs['buffer_size'] = lz4.stream.LZ4_MAX_INPUT_SIZE + + if sys.maxsize < 0xffffffff: + pytest.skip('Py_ssize_t too small for this test') + + # No failure expected + lz4.stream.LZ4StreamCompressor(**c_kwargs) + + c_kwargs['buffer_size'] = lz4.stream.LZ4_MAX_INPUT_SIZE + 1 + with pytest.raises(ValueError): + lz4.stream.LZ4StreamCompressor(**c_kwargs) + + # Make sure the page size is larger than what the input bound will be, + # but still fit in 4 bytes + c_kwargs['buffer_size'] = _4GB - 1 # 4GB - 1 (to fit in 4 bytes) + with pytest.raises(ValueError): + lz4.stream.LZ4StreamCompressor(**c_kwargs) + + +def test_invalid_config_d_5(): + d_kwargs = {} + d_kwargs['strategy'] = "double_buffer" + + # No failure expected during instanciation/initialization + d_kwargs['buffer_size'] = lz4.stream.LZ4_MAX_INPUT_SIZE + + if sys.maxsize < 0xffffffff: + pytest.skip('Py_ssize_t too small for this test') + + lz4.stream.LZ4StreamDecompressor(**d_kwargs) + + # No failure expected during instanciation/initialization + d_kwargs['buffer_size'] = lz4.stream.LZ4_MAX_INPUT_SIZE + 1 + + if sys.maxsize < 0xffffffff: + pytest.skip('Py_ssize_t too small for this test') + + lz4.stream.LZ4StreamDecompressor(**d_kwargs) + + # No failure expected during instanciation/initialization + d_kwargs['buffer_size'] = _4GB - 1 # 4GB - 1 (to fit in 4 bytes) + + if sys.maxsize < 0xffffffff: + pytest.skip('Py_ssize_t too small for this test') + + lz4.stream.LZ4StreamDecompressor(**d_kwargs) + + +def test_decompress_corrupted_input_1(): + c_kwargs = {'strategy': "double_buffer", 'buffer_size': 128, 'store_comp_size': 4} + + d_kwargs = {} + d_kwargs.update(c_kwargs) + + data = compress(b'A' * 512, c_kwargs) + decompress(data, d_kwargs) + + message = r"^Requested input size \(\d+\) larger than source size \(\d+\)$" + + with pytest.raises(lz4.stream.LZ4StreamError, match=message): + decompress(data[4:], d_kwargs) + + +def test_decompress_corrupted_input_2(): + c_kwargs = {'strategy': "double_buffer", 'buffer_size': 128, 'store_comp_size': 4} + + d_kwargs = {} + d_kwargs.update(c_kwargs) + + data = compress(b'A' * 512, c_kwargs) + decompress(data, d_kwargs) + + message = r"^Decompression failed. error: \d+$" + + # Block size corruption in the first block + + # Block size longer than actual: + data = int_to_bytes(int_from_bytes(data[:4], 'little') + 1, 4, 'little') + data[4:] + with pytest.raises(lz4.stream.LZ4StreamError, match=message): + decompress(data, d_kwargs) + + # Block size shorter than actual: + data = int_to_bytes(int_from_bytes(data[:4], 'little') - 2, 4, 'little') + data[4:] + with pytest.raises(lz4.stream.LZ4StreamError, match=message): + decompress(data, d_kwargs) + + +def test_decompress_corrupted_input_3(): + c_kwargs = {'strategy': "double_buffer", 'buffer_size': 128, 'store_comp_size': 4} + + d_kwargs = {} + d_kwargs.update(c_kwargs) + + data = compress(b'A' * 512, c_kwargs) + decompress(data, d_kwargs) + + message = r"^Decompression failed. error: \d+$" + + # Block size corruption in a block in the middle of the stream + offset = 4 + int_from_bytes(data[:4], 'little') + + # Block size longer than actual: + block_len = int_from_bytes(data[offset:offset + 4], 'little') + 1 + data = data[:offset] + int_to_bytes(block_len, 4, 'little') + data[offset + 4:] + + with pytest.raises(lz4.stream.LZ4StreamError, match=message): + decompress(data, d_kwargs) + + # Block size shorter than actual: + block_len = int_from_bytes(data[offset:offset + 4], 'little') - 2 + data = data[:offset] + int_to_bytes(block_len, 4, 'little') + data[offset + 4:] + + with pytest.raises(lz4.stream.LZ4StreamError, match=message): + decompress(data, d_kwargs) + + +def test_decompress_corrupted_input_4(): + c_kwargs = {'strategy': "double_buffer", 'buffer_size': 128, 'store_comp_size': 4} + + d_kwargs = {} + d_kwargs.update(c_kwargs) + + data = compress(b'A' * 256, c_kwargs) + decompress(data, d_kwargs) + + # Block size corruption in the last block of the stream + offset = 4 + int_from_bytes(data[:4], 'little') + + # Block size longer than actual: + block_len = int_from_bytes(data[offset:offset + 4], 'little') + 1 + data = data[:offset] + int_to_bytes(block_len, 4, 'little') + data[offset + 4:] + + message = r"^Requested input size \(\d+\) larger than source size \(\d+\)$" + + with pytest.raises(lz4.stream.LZ4StreamError, match=message): + decompress(data, d_kwargs) + + # Block size shorter than actual: + block_len = int_from_bytes(data[offset:offset + 4], 'little') - 2 + data = data[:offset] + int_to_bytes(block_len, 4, 'little') + data[offset + 4:] + + message = r"^Decompression failed. error: \d+$" + + with pytest.raises(lz4.stream.LZ4StreamError, match=message): + decompress(data, d_kwargs) + + +def test_decompress_truncated(): + c_kwargs = {'strategy': "double_buffer", 'buffer_size': 128, 'store_comp_size': 4} + + d_kwargs = {} + d_kwargs.update(c_kwargs) + + input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123" * 24 + compressed, block_offsets = compress(input_data, c_kwargs, return_block_offset=True) + + last_block_offset = 0 + for n in range(len(compressed)): + if n in block_offsets: + # end of input matches end of block, so decompression must succeed + last_block_offset = n + decompress(compressed[:n], d_kwargs) + + else: + # end of input does not match end of block, so decompression failure is expected + if n - last_block_offset < c_kwargs['store_comp_size']: + message = "^Invalid source, too small for holding any block$" + else: + message = r"^Requested input size \(\d+\) larger than source size \(\d+\)$" + + with pytest.raises(lz4.stream.LZ4StreamError, match=message): + decompress(compressed[:n], d_kwargs) + + +# This next test is probably redundant given test_decompress_truncated above +# since the trailing bytes will be considered as the truncated last block, but +# we will keep them for now + + +def test_decompress_with_trailer(): + c_kwargs = {'strategy': "double_buffer", 'buffer_size': 128, 'store_comp_size': 4} + + d_kwargs = {} + d_kwargs.update(c_kwargs) + + data = b'A' * 64 + comp = compress(data, c_kwargs) + + message = "^Invalid source, too small for holding any block$" + with pytest.raises(lz4.stream.LZ4StreamError, match=message): + decompress(comp + b'A', d_kwargs) + + message = r"^Requested input size \(\d+\) larger than source size \(\d+\)$" + with pytest.raises(lz4.stream.LZ4StreamError, match=message): + decompress(comp + b'A' * 10, d_kwargs) + + for n in range(1, 10): + if n < d_kwargs['store_comp_size']: + message = "^Invalid source, too small for holding any block$" + else: + message = r"^Decompression failed. error: \d+$" + with pytest.raises(lz4.stream.LZ4StreamError, match=message): + decompress(comp + b'\x00' * n, d_kwargs) + + +def test_unicode(): + if sys.version_info < (3,): + return # skip + + c_kwargs = {'strategy': "double_buffer", 'buffer_size': 128, 'store_comp_size': 4} + + d_kwargs = {} + d_kwargs.update(c_kwargs) + + DATA = b'x' + with pytest.raises(TypeError): + compress(DATA.decode('latin1'), c_kwargs) + decompress(compress(DATA, c_kwargs).decode('latin1'), d_kwargs) + + +# These next two are probably redundant given test_1 above but we'll keep them +# for now + + +def test_return_bytearray(): + if sys.version_info < (3,): + return # skip + + c_kwargs_r = {'strategy': "double_buffer", 'buffer_size': 128, 'store_comp_size': 4} + c_kwargs = {'return_bytearray': True} + c_kwargs.update(c_kwargs_r) + + d_kwargs = {} + d_kwargs.update(c_kwargs) + + data = os.urandom(128 * 1024) # Read 128kb + compressed = compress(data, c_kwargs_r, check_block_type=True) + b = compress(data, c_kwargs, check_block_type=True) + assert isinstance(b, bytearray) + assert bytes(b) == compressed + b = decompress(compressed, d_kwargs, check_chunk_type=True) + assert isinstance(b, bytearray) + assert bytes(b) == data + + +def test_memoryview(): + if sys.version_info < (2, 7): + return # skip + + c_kwargs = {'strategy': "double_buffer", 'buffer_size': 128, 'store_comp_size': 4} + + d_kwargs = {} + d_kwargs.update(c_kwargs) + + data = os.urandom(128 * 1024) # Read 128kb + compressed = compress(data, c_kwargs) + assert compress(memoryview(data), c_kwargs) == compressed + assert decompress(memoryview(compressed), d_kwargs) == data + + +def test_with_dict_none(): + kwargs = {'strategy': "double_buffer", 'buffer_size': 128, 'store_comp_size': 4} + + input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123" * 24 + for mode in ['default', 'high_compression']: + c_kwargs = {'mode': mode, 'dictionary': None} + c_kwargs.update(kwargs) + d_kwargs = {} + d_kwargs.update(kwargs) + assert decompress(compress(input_data, c_kwargs), d_kwargs) == input_data + + c_kwargs = {'mode': mode} + c_kwargs.update(kwargs) + d_kwargs = {'dictionary': None} + d_kwargs.update(kwargs) + assert decompress(compress(input_data, c_kwargs), d_kwargs) == input_data + + c_kwargs = {'mode': mode, 'dictionary': b''} + c_kwargs.update(kwargs) + d_kwargs = {} + d_kwargs.update(kwargs) + assert decompress(compress(input_data, c_kwargs), d_kwargs) == input_data + + c_kwargs = {'mode': mode} + c_kwargs.update(kwargs) + d_kwargs = {'dictionary': b''} + d_kwargs.update(kwargs) + assert decompress(compress(input_data, c_kwargs), d_kwargs) == input_data + + c_kwargs = {'mode': mode, 'dictionary': ''} + c_kwargs.update(kwargs) + d_kwargs = {} + d_kwargs.update(kwargs) + assert decompress(compress(input_data, c_kwargs), d_kwargs) == input_data + + c_kwargs = {'mode': mode} + c_kwargs.update(kwargs) + d_kwargs = {'dictionary': ''} + d_kwargs.update(kwargs) + assert decompress(compress(input_data, c_kwargs), d_kwargs) == input_data + + +def test_with_dict(): + kwargs = {'strategy': "double_buffer", 'buffer_size': 128, 'store_comp_size': 4} + + input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123" * 24 + dict1 = input_data[10:30] + dict2 = input_data[20:40] + message = r"^Decompression failed. error: \d+$" + + for mode in ['default', 'high_compression']: + c_kwargs = {'mode': mode, 'dictionary': dict1} + c_kwargs.update(kwargs) + compressed = compress(input_data, c_kwargs) + + d_kwargs = {} + d_kwargs.update(kwargs) + with pytest.raises(lz4.stream.LZ4StreamError, match=message): + decompress(compressed, d_kwargs) + + d_kwargs = {'dictionary': dict1[:2]} + d_kwargs.update(kwargs) + with pytest.raises(lz4.stream.LZ4StreamError, match=message): + decompress(compressed, d_kwargs) + + d_kwargs = {'dictionary': dict2} + d_kwargs.update(kwargs) + assert decompress(compressed, d_kwargs) != input_data + + d_kwargs = {'dictionary': dict1} + d_kwargs.update(kwargs) + assert decompress(compressed, d_kwargs) == input_data + + c_kwargs = {} + c_kwargs.update(kwargs) + d_kwargs = {'dictionary': dict1} + d_kwargs.update(kwargs) + assert decompress(compress(input_data, c_kwargs), d_kwargs) == input_data + + +def test_known_decompress_1(): + d_kwargs = {'strategy': "double_buffer", 'buffer_size': 128, 'store_comp_size': 4} + + output = b'' + + input = b'\x00\x00\x00\x00' + message = "^Decompression failed. error: 1$" + with pytest.raises(lz4.stream.LZ4StreamError, match=message): + decompress(input, d_kwargs) + + input = b'\x01\x00\x00\x00\x00' + assert decompress(input, d_kwargs) == output + + +def test_known_decompress_2(): + d_kwargs = {'strategy': "double_buffer", 'buffer_size': 128, 'store_comp_size': 4} + + input = b'\x02\x00\x00\x00\x10 ' + output = b' ' + assert decompress(input, d_kwargs) == output + + +def test_known_decompress_3(): + d_kwargs = {'strategy': "double_buffer", 'buffer_size': 128, 'store_comp_size': 4} + + # uncompressed data size smaller than buffer_size + input = b'%\x00\x00\x00\xff\x0bLorem ipsum dolor sit amet\x1a\x006P amet' + output = b'Lorem ipsum dolor sit amet' * 4 + assert decompress(input, d_kwargs) == output + + +def test_known_decompress_4(): + d_kwargs = {'strategy': "double_buffer", 'buffer_size': 128, 'store_comp_size': 4} + + input = b'%\x00\x00\x00\xff\x0bLorem ipsum dolor sit amet\x1a\x00NPit am\n\x00\x00\x00\x0fh\x00hP sit \x05\x00\x00\x00@amet' + output = b'Lorem ipsum dolor sit amet' * 10 + assert decompress(input, d_kwargs) == output diff --git a/contrib/python/lz4/py3/tests/stream/test_stream_2.py b/contrib/python/lz4/py3/tests/stream/test_stream_2.py new file mode 100644 index 0000000000..5578f832c4 --- /dev/null +++ b/contrib/python/lz4/py3/tests/stream/test_stream_2.py @@ -0,0 +1,152 @@ +import pytest +import sys +import lz4.stream +import psutil +import os + + +# This test requires allocating a big lump of memory. In order to +# avoid a massive memory allocation during byte compilation, we have +# to declare a variable for the size of the buffer we're going to +# create outside the scope of the function below. See: +# https://bugs.python.org/issue21074 + +_4GB = 0xffffffff # actually 4GB - 1B, the maximum size on 4 bytes. + +# This test will be killed on Travis due to the 3GB memory limit +# there. Unfortunately psutil reports the host memory, not the memory +# available to the container, and so can't be used to detect available +# memory, so instead, as an ugly hack for detecting we're on Travis we +# check for the TRAVIS environment variable being set. This is quite +# fragile. + +if os.environ.get('TRAVIS') is not None or sys.maxsize < _4GB or \ + psutil.virtual_memory().available < _4GB: + huge = None +else: + try: + huge = b'\0' * _4GB + except (MemoryError, OverflowError): + huge = None + + +@pytest.mark.skipif( + os.environ.get('TRAVIS') is not None, + reason='Skipping test on Travis due to insufficient memory' +) +@pytest.mark.skipif( + sys.maxsize < _4GB, + reason='Py_ssize_t too small for this test' +) +@pytest.mark.skipif( + psutil.virtual_memory().available < _4GB or huge is None, + reason='Insufficient system memory for this test' +) +def test_huge_1(): + data = b'' + kwargs = { + 'strategy': "double_buffer", + 'buffer_size': lz4.stream.LZ4_MAX_INPUT_SIZE, + 'store_comp_size': 4, + 'dictionary': huge, + } + + if psutil.virtual_memory().available < 3 * kwargs['buffer_size']: + # The internal LZ4 context will request at least 3 times buffer_size + # as memory (2 buffer_size for the double-buffer, and 1.x buffer_size + # for the output buffer) + pytest.skip('Insufficient system memory for this test') + + # Triggering overflow error + message = r'^Dictionary too large for LZ4 API$' + + with pytest.raises(OverflowError, match=message): + with lz4.stream.LZ4StreamCompressor(**kwargs) as proc: + proc.compress(data) + + with pytest.raises(OverflowError, match=message): + with lz4.stream.LZ4StreamDecompressor(**kwargs) as proc: + proc.decompress(data) + + +@pytest.mark.skipif( + os.environ.get('TRAVIS') is not None, + reason='Skipping test on Travis due to insufficient memory' +) +@pytest.mark.skipif( + sys.maxsize < 0xffffffff, + reason='Py_ssize_t too small for this test' +) +@pytest.mark.skipif( + psutil.virtual_memory().available < _4GB or huge is None, + reason='Insufficient system memory for this test' +) +def test_huge_2(): + data = huge + kwargs = { + 'strategy': "double_buffer", + 'buffer_size': lz4.stream.LZ4_MAX_INPUT_SIZE, + 'store_comp_size': 4, + 'dictionary': b'', + } + + if psutil.virtual_memory().available < 3 * kwargs['buffer_size']: + # The internal LZ4 context will request at least 3 times buffer_size + # as memory (2 buffer_size for the double-buffer, and 1.x buffer_size + # for the output buffer) + pytest.skip('Insufficient system memory for this test') + + # Raising overflow error + message = r'^Input too large for LZ4 API$' + + with pytest.raises(OverflowError, match=message): + with lz4.stream.LZ4StreamCompressor(**kwargs) as proc: + proc.compress(data) + + # On decompression, too large input will raise LZ4StreamError + with pytest.raises(lz4.stream.LZ4StreamError): + with lz4.stream.LZ4StreamDecompressor(**kwargs) as proc: + proc.decompress(data) + + +@pytest.mark.skipif( + os.environ.get('TRAVIS') is not None, + reason='Skipping test on Travis due to insufficient memory' +) +@pytest.mark.skipif( + sys.maxsize < 0xffffffff, + reason='Py_ssize_t too small for this test' +) +@pytest.mark.skipif( + psutil.virtual_memory().available < _4GB or huge is None, + reason='Insufficient system memory for this test' +) +def test_huge_3(): + data = huge + kwargs = { + 'strategy': "double_buffer", + 'buffer_size': lz4.stream.LZ4_MAX_INPUT_SIZE, + 'store_comp_size': 4, + 'dictionary': huge, + } + + if psutil.virtual_memory().available < 3 * kwargs['buffer_size']: + # The internal LZ4 context will request at least 3 times buffer_size + # as memory (2 buffer_size for the double-buffer, and 1.x buffer_size + # for the output buffer) + pytest.skip('Insufficient system memory for this test') + + # Raising overflow error (during initialization because of the dictionary parameter) + message = r'^Dictionary too large for LZ4 API$' + + with pytest.raises(OverflowError, match=message): + with lz4.stream.LZ4StreamCompressor(**kwargs) as proc: + proc.compress(data) + + with pytest.raises(OverflowError, match=message): + with lz4.stream.LZ4StreamDecompressor(**kwargs) as proc: + proc.decompress(data) + + +def test_dummy(): + pass diff --git a/contrib/python/lz4/py3/tests/stream/test_stream_3.py b/contrib/python/lz4/py3/tests/stream/test_stream_3.py new file mode 100644 index 0000000000..2b52d6b549 --- /dev/null +++ b/contrib/python/lz4/py3/tests/stream/test_stream_3.py @@ -0,0 +1,123 @@ +import lz4.stream +import pytest +import sys + + +_1KB = 1024 +_1MB = _1KB * 1024 +_1GB = _1MB * 1024 + + +def compress(x, c_kwargs): + c = [] + with lz4.stream.LZ4StreamCompressor(**c_kwargs) as proc: + for start in range(0, len(x), c_kwargs['buffer_size']): + chunk = x[start:start + c_kwargs['buffer_size']] + block = proc.compress(chunk) + c.append(block) + if c_kwargs.get('return_bytearray', False): + return bytearray().join(c) + else: + return bytes().join(c) + + +def decompress(x, d_kwargs): + d = [] + with lz4.stream.LZ4StreamDecompressor(**d_kwargs) as proc: + start = 0 + while start < len(x): + block = proc.get_block(x[start:]) + chunk = proc.decompress(block) + d.append(chunk) + start += d_kwargs['store_comp_size'] + len(block) + if d_kwargs.get('return_bytearray', False): + return bytearray().join(d) + else: + return bytes().join(d) + + +test_buffer_size = sorted( + [256, + 1 * _1KB, + 64 * _1KB, + 1 * _1MB, + 1 * _1GB, + lz4.stream.LZ4_MAX_INPUT_SIZE] +) + + +@pytest.fixture( + params=test_buffer_size, + ids=[ + 'buffer_size' + str(i) for i in range(len(test_buffer_size)) + ] +) +def buffer_size(request): + return request.param + + +test_data = [ + (b'a' * _1MB), +] + + +@pytest.fixture( + params=test_data, + ids=[ + 'data' + str(i) for i in range(len(test_data)) + ] +) +def data(request): + return request.param + + +def test_block_decompress_mem_usage(data, buffer_size): + kwargs = { + 'strategy': "double_buffer", + 'buffer_size': buffer_size, + 'store_comp_size': 4, + } + + if sys.maxsize < 0xffffffff: + pytest.skip('Py_ssize_t too small for this test') + + tracemalloc = pytest.importorskip('tracemalloc') + + # Trace memory usage on compression + tracemalloc.start() + prev_snapshot = None + + for i in range(1000): + compressed = compress(data, kwargs) + + if i % 100 == 0: + snapshot = tracemalloc.take_snapshot() + + if prev_snapshot: + # Filter on lz4.stream module'a allocations + stats = [x for x in snapshot.compare_to(prev_snapshot, 'lineno') + if lz4.stream.__file__ in x.traceback._frames[0][0]] + assert sum(map(lambda x: x.size_diff, stats)) < (1024 * 4) + + prev_snapshot = snapshot + + tracemalloc.stop() + + tracemalloc.start() + prev_snapshot = None + + for i in range(1000): + decompressed = decompress(compressed, kwargs) # noqa: F841 + + if i % 100 == 0: + snapshot = tracemalloc.take_snapshot() + + if prev_snapshot: + # Filter on lz4.stream module'a allocations + stats = [x for x in snapshot.compare_to(prev_snapshot, 'lineno') + if lz4.stream.__file__ in x.traceback._frames[0][0]] + assert sum(map(lambda x: x.size_diff, stats)) < (1024 * 4) + + prev_snapshot = snapshot + + tracemalloc.stop() diff --git a/contrib/python/lz4/py3/tests/stream/test_stream_4.py b/contrib/python/lz4/py3/tests/stream/test_stream_4.py new file mode 100644 index 0000000000..3d139a02ef --- /dev/null +++ b/contrib/python/lz4/py3/tests/stream/test_stream_4.py @@ -0,0 +1,139 @@ +import lz4.stream +import pytest +import sys + + +if sys.version_info < (3, ): + from struct import pack, unpack + + def _get_format(length, byteorder, signed): + _order = {'l': '<', 'b': '>'} + _fmt = {1: 'b', 2: 'h', 4: 'i', 8: 'q'} + _sign = {True: lambda x: x.lower(), False: lambda x: x.upper()} + return _sign[signed](_order[byteorder[0].lower()] + _fmt[length]) + + def int_to_bytes(value, length=4, byteorder='little', signed=False): + return bytearray(pack(_get_format(length, byteorder, signed), value)) + + def int_from_bytes(bytes, byteorder='little', signed=False): + return unpack(_get_format(len(bytes), byteorder, signed), bytes)[0] + +else: + def int_to_bytes(value, length=4, byteorder='little', signed=False): + return value.to_bytes(length, byteorder, signed=signed) + + def int_from_bytes(bytes, byteorder='little', signed=False): + return int.from_bytes(bytes, byteorder, signed=signed) + +# Out-of-band block size record tests + + +def test_round_trip(): + data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123" * 24 + kwargs = {'strategy': "double_buffer", 'buffer_size': 256, 'store_comp_size': 4} + + oob_kwargs = {} + oob_kwargs.update(kwargs) + oob_kwargs['store_comp_size'] = 0 + + ib_cstream = bytearray() + oob_cstream = bytearray() + oob_sizes = [] + + with lz4.stream.LZ4StreamCompressor(**kwargs) as ib_proc, \ + lz4.stream.LZ4StreamCompressor(**oob_kwargs) as oob_proc: + for start in range(0, len(data), kwargs['buffer_size']): + chunk = data[start:start + kwargs['buffer_size']] + ib_block = ib_proc.compress(chunk) + oob_block = oob_proc.compress(chunk) + + assert (len(ib_block) == (len(oob_block) + kwargs['store_comp_size'])), \ + "Blocks size mismatch: " \ + "{}/{}".format(len(ib_block), len(oob_block) + kwargs['store_comp_size']) + + assert (int_from_bytes(ib_block[:kwargs['store_comp_size']]) == len(oob_block)), \ + "Blocks size record mismatch: got {}, expected {}".format( + int_from_bytes(ib_block[:kwargs['store_comp_size']]), + len(oob_block)) + + assert (ib_block[kwargs['store_comp_size']:] == oob_block), "Blocks data mismatch" + + ib_cstream += ib_block + oob_cstream += oob_block + oob_sizes.append(len(oob_block)) + + ib_dstream = bytearray() + oob_dstream = bytearray() + + with lz4.stream.LZ4StreamDecompressor(**kwargs) as ib_proc, \ + lz4.stream.LZ4StreamDecompressor(**oob_kwargs) as oob_proc: + ib_offset = 0 + oob_index = 0 + oob_offset = 0 + while ib_offset < len(ib_cstream) and oob_index < len(oob_sizes): + ib_block = ib_proc.get_block(ib_cstream[ib_offset:]) + oob_block = oob_cstream[oob_offset:oob_offset + oob_sizes[oob_index]] + + assert (len(ib_block) == len(oob_block)), \ + "Blocks size mismatch: {}/{}".format(len(ib_block), len(oob_block)) + + assert (ib_block == oob_block), "Blocks data mismatch" + + ib_chunk = ib_proc.decompress(ib_block) + oob_chunk = oob_proc.decompress(oob_block) + + assert (len(ib_chunk) == len(oob_chunk)), \ + "Chunks size mismatch: {}/{}".format(len(ib_chunk), len(oob_chunk)) + + assert (ib_chunk == oob_chunk), "Chunks data mismatch" + + ib_dstream += ib_chunk + oob_dstream += oob_chunk + + ib_offset += kwargs['store_comp_size'] + len(ib_block) + oob_offset += oob_sizes[oob_index] + oob_index += 1 + + assert (len(ib_dstream) == len(oob_dstream)), "Decompressed streams length mismatch" + + assert (len(data) == len(ib_dstream)), "Decompressed streams length mismatch" + + assert (len(data) == len(oob_dstream)), "Decompressed streams length mismatch" + + assert (ib_dstream == oob_dstream), "Decompressed streams mismatch" + + assert (data == ib_dstream), "Decompressed streams mismatch" + + assert (data == oob_dstream), "Decompressed streams mismatch" + + +def test_invalid_usage(): + data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123" * 24 + kwargs = {'strategy': "double_buffer", 'buffer_size': 256, 'store_comp_size': 0} + + cstream = bytearray() + oob_sizes = [] + + with lz4.stream.LZ4StreamCompressor(**kwargs) as proc: + for start in range(0, len(data), kwargs['buffer_size']): + chunk = data[start:start + kwargs['buffer_size']] + block = proc.compress(chunk) + cstream += block + oob_sizes.append(len(block)) + + message = r"^LZ4 context is configured for storing block size out-of-band$" + + with pytest.raises(lz4.stream.LZ4StreamError, match=message): + dstream = bytearray() + + with lz4.stream.LZ4StreamDecompressor(**kwargs) as proc: + offset = 0 + index = 0 + while offset < len(cstream): + block = proc.get_block(cstream[offset:]) + chunk = proc.decompress(block) + + dstream += chunk + + offset += kwargs['store_comp_size'] + len(block) + index += 1 diff --git a/contrib/python/lz4/py3/tests/ya.make b/contrib/python/lz4/py3/tests/ya.make new file mode 100644 index 0000000000..38f4c85b8d --- /dev/null +++ b/contrib/python/lz4/py3/tests/ya.make @@ -0,0 +1,42 @@ +PY3TEST() + +PEERDIR( + contrib/python/lz4 + contrib/python/psutil +) + +FORK_SUBTESTS() +SIZE(MEDIUM) + +TEST_SRCS( + block/conftest.py + #block/test_block_0.py + block/test_block_1.py + block/test_block_2.py + block/test_block_3.py + frame/__init__.py + frame/conftest.py + frame/helpers.py + frame/test_frame_0.py + frame/test_frame_1.py + frame/test_frame_2.py + frame/test_frame_3.py + frame/test_frame_4.py + frame/test_frame_5.py + frame/test_frame_6.py + frame/test_frame_7.py + frame/test_frame_8.py + frame/test_frame_9.py + stream/conftest.py + stream/test_stream_0.py + #stream/test_stream_1.py + stream/test_stream_2.py + stream/test_stream_3.py + stream/test_stream_4.py +) + +NO_LINT() + +REQUIREMENTS(ram:18) + +END() diff --git a/contrib/python/lz4/py3/ya.make b/contrib/python/lz4/py3/ya.make new file mode 100644 index 0000000000..c703ac87ce --- /dev/null +++ b/contrib/python/lz4/py3/ya.make @@ -0,0 +1,54 @@ +# Generated by devtools/yamaker (pypi). + +PY3_LIBRARY() + +VERSION(4.3.2) + +LICENSE(BSD-3-Clause) + +PEERDIR( + contrib/libs/lz4 +) + +ADDINCL( + contrib/libs/lz4 +) + +NO_COMPILER_WARNINGS() + +NO_LINT() + +SRCS( + lz4/_version.c + lz4/block/_block.c + lz4/frame/_frame.c + lz4/stream/_stream.c +) + +PY_REGISTER( + lz4._version + lz4.block._block + lz4.frame._frame + lz4.stream._stream +) + +PY_SRCS( + TOP_LEVEL + lz4/__init__.py + lz4/block/__init__.py + lz4/frame/__init__.py + lz4/stream/__init__.py + lz4/version.py +) + +RESOURCE_FILES( + PREFIX contrib/python/lz4/py3/ + .dist-info/METADATA + .dist-info/top_level.txt +) + +END() + +RECURSE_FOR_TESTS( + tests +) diff --git a/contrib/python/lz4/ya.make b/contrib/python/lz4/ya.make new file mode 100644 index 0000000000..977908d228 --- /dev/null +++ b/contrib/python/lz4/ya.make @@ -0,0 +1,18 @@ +PY23_LIBRARY() + +LICENSE(Service-Py23-Proxy) + +IF (PYTHON2) + PEERDIR(contrib/python/lz4/py2) +ELSE() + PEERDIR(contrib/python/lz4/py3) +ENDIF() + +NO_LINT() + +END() + +RECURSE( + py2 + py3 +) |