diff options
author | vitalyisaev <vitalyisaev@ydb.tech> | 2023-11-14 09:58:56 +0300 |
---|---|---|
committer | vitalyisaev <vitalyisaev@ydb.tech> | 2023-11-14 10:20:20 +0300 |
commit | c2b2dfd9827a400a8495e172a56343462e3ceb82 (patch) | |
tree | cd4e4f597d01bede4c82dffeb2d780d0a9046bd0 /contrib/python/lz4/py2 | |
parent | d4ae8f119e67808cb0cf776ba6e0cf95296f2df7 (diff) | |
download | ydb-c2b2dfd9827a400a8495e172a56343462e3ceb82.tar.gz |
YQ Connector: move tests from yql to ydb (OSS)
Перенос папки с тестами на Коннектор из папки yql в папку ydb (синхронизируется с github).
Diffstat (limited to 'contrib/python/lz4/py2')
33 files changed, 5300 insertions, 0 deletions
diff --git a/contrib/python/lz4/py2/.dist-info/METADATA b/contrib/python/lz4/py2/.dist-info/METADATA new file mode 100644 index 0000000000..a896017502 --- /dev/null +++ b/contrib/python/lz4/py2/.dist-info/METADATA @@ -0,0 +1,104 @@ +Metadata-Version: 2.1 +Name: lz4 +Version: 2.2.1 +Summary: LZ4 Bindings for Python +Home-page: https://github.com/python-lz4/python-lz4 +Author: Jonathan Underwood +Author-email: jonathan.underwood@gmail.com +License: UNKNOWN +Platform: UNKNOWN +Classifier: Development Status :: 5 - Production/Stable +Classifier: License :: OSI Approved :: BSD License +Classifier: Intended Audience :: Developers +Classifier: Programming Language :: C +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3.4 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: 3.7 +Requires-Python: >=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.* +Provides-Extra: tests +Provides-Extra: flake8 +Provides-Extra: docs +Provides-Extra: docs +Requires-Dist: sphinx (>=1.6.0); extra == 'docs' +Requires-Dist: sphinx-bootstrap-theme; extra == 'docs' +Provides-Extra: flake8 +Requires-Dist: flake8; extra == 'flake8' +Provides-Extra: tests +Requires-Dist: pytest (!=3.3.0); extra == 'tests' +Requires-Dist: psutil; extra == 'tests' +Requires-Dist: pytest-cov; extra == 'tests' + +========== +python-lz4 +========== + +Status +====== + +.. image:: https://travis-ci.org/python-lz4/python-lz4.svg?branch=master + :target: https://travis-ci.org/python-lz4/python-lz4 + :alt: Build Status + +.. image:: https://ci.appveyor.com/api/projects/status/r2qvw9mlfo63lklo/branch/master?svg=true + :target: https://ci.appveyor.com/project/jonathanunderwood/python-lz4 + :alt: Build Status Windows + +.. image:: https://readthedocs.org/projects/python-lz4/badge/?version=stable + :target: https://readthedocs.org/projects/python-lz4/ + :alt: Documentation + +.. image:: https://codecov.io/gh/python-lz4/python-lz4/branch/codecov/graph/badge.svg + :target: https://codecov.io/gh/python-lz4/python-lz4 + :alt: CodeCov + + +Introduction +============ +This package provides python bindings for the `LZ4 compression library +<https://lz4.github.io/lz4/>`_. + +The bindings provided in this package cover the `frame format +<https://github.com/lz4/lz4/blob/master/doc/lz4_Frame_format.md>`_ and the `block format +<https://github.com/lz4/lz4/blob/dev/doc/lz4_Block_format.md>`_ specifications. The frame +format bindings are the recommended ones to use, as this guarantees +interoperability with other implementations and language bindings. + +The API provided by the frame format bindings follows that of the LZMA, zlib, +gzip and bzip2 compression libraries which are provided with the Python standard +library. As such, these LZ4 bindings should provide a drop-in alternative to the +compression libraries shipped with Python. The package provides context managers +and file handler support. + +The bindings drop the GIL when calling in to the underlying LZ4 library, and is +thread safe. An extensive test suite is included. + +Documenation +============ + +.. image:: https://readthedocs.org/projects/python-lz4/badge/?version=stable + :target: https://readthedocs.org/projects/python-lz4/ + :alt: Documentation + +Full documentation is included with the project. The documentation is +generated using Sphinx. Documentation is also hosted on readthedocs. + +:master: http://python-lz4.readthedocs.io/en/stable/ +:development: http://python-lz4.readthedocs.io/en/latest/ + +Homepage +======== + +The `project homepage <https://www.github.com/python-lz4/python-lz4>`_ is hosted +on Github. Please report any issues you find using the `issue tracker +<https://github.com/python-lz4/python-lz4/issues>`_. + +Licensing +========= +Code specific to this project is covered by the `BSD 3-Clause License +<http://opensource.org/licenses/BSD-3-Clause>`_ + + + diff --git a/contrib/python/lz4/py2/.dist-info/top_level.txt b/contrib/python/lz4/py2/.dist-info/top_level.txt new file mode 100644 index 0000000000..4ef6877a79 --- /dev/null +++ b/contrib/python/lz4/py2/.dist-info/top_level.txt @@ -0,0 +1 @@ +lz4 diff --git a/contrib/python/lz4/py2/LICENSE b/contrib/python/lz4/py2/LICENSE new file mode 100644 index 0000000000..518770111c --- /dev/null +++ b/contrib/python/lz4/py2/LICENSE @@ -0,0 +1,28 @@ +Copyright (c) 2012-2013, Steeve Morin +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of Steeve Morin nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. diff --git a/contrib/python/lz4/py2/README.rst b/contrib/python/lz4/py2/README.rst new file mode 100644 index 0000000000..18fb95f14c --- /dev/null +++ b/contrib/python/lz4/py2/README.rst @@ -0,0 +1,69 @@ +========== +python-lz4 +========== + +Status +====== + +.. image:: https://travis-ci.org/python-lz4/python-lz4.svg?branch=master + :target: https://travis-ci.org/python-lz4/python-lz4 + :alt: Build Status + +.. image:: https://ci.appveyor.com/api/projects/status/r2qvw9mlfo63lklo/branch/master?svg=true + :target: https://ci.appveyor.com/project/jonathanunderwood/python-lz4 + :alt: Build Status Windows + +.. image:: https://readthedocs.org/projects/python-lz4/badge/?version=stable + :target: https://readthedocs.org/projects/python-lz4/ + :alt: Documentation + +.. image:: https://codecov.io/gh/python-lz4/python-lz4/branch/codecov/graph/badge.svg + :target: https://codecov.io/gh/python-lz4/python-lz4 + :alt: CodeCov + + +Introduction +============ +This package provides python bindings for the `LZ4 compression library +<https://lz4.github.io/lz4/>`_. + +The bindings provided in this package cover the `frame format +<https://github.com/lz4/lz4/blob/master/doc/lz4_Frame_format.md>`_ and the `block format +<https://github.com/lz4/lz4/blob/dev/doc/lz4_Block_format.md>`_ specifications. The frame +format bindings are the recommended ones to use, as this guarantees +interoperability with other implementations and language bindings. + +The API provided by the frame format bindings follows that of the LZMA, zlib, +gzip and bzip2 compression libraries which are provided with the Python standard +library. As such, these LZ4 bindings should provide a drop-in alternative to the +compression libraries shipped with Python. The package provides context managers +and file handler support. + +The bindings drop the GIL when calling in to the underlying LZ4 library, and is +thread safe. An extensive test suite is included. + +Documenation +============ + +.. image:: https://readthedocs.org/projects/python-lz4/badge/?version=stable + :target: https://readthedocs.org/projects/python-lz4/ + :alt: Documentation + +Full documentation is included with the project. The documentation is +generated using Sphinx. Documentation is also hosted on readthedocs. + +:master: http://python-lz4.readthedocs.io/en/stable/ +:development: http://python-lz4.readthedocs.io/en/latest/ + +Homepage +======== + +The `project homepage <https://www.github.com/python-lz4/python-lz4>`_ is hosted +on Github. Please report any issues you find using the `issue tracker +<https://github.com/python-lz4/python-lz4/issues>`_. + +Licensing +========= +Code specific to this project is covered by the `BSD 3-Clause License +<http://opensource.org/licenses/BSD-3-Clause>`_ + diff --git a/contrib/python/lz4/py2/lz4/__init__.py b/contrib/python/lz4/py2/lz4/__init__.py new file mode 100644 index 0000000000..af0fa05b3f --- /dev/null +++ b/contrib/python/lz4/py2/lz4/__init__.py @@ -0,0 +1,20 @@ +# Although the canonical way to get the package version is using pkg_resources +# as below, this turns out to be very slow on systems with lots of packages. +# So, until that is remedied, we'll import the version from a local file +# created by setuptools_scm. + +# from pkg_resources import get_distribution, DistributionNotFound +# try: +# __version__ = get_distribution(__name__).version +# except DistributionNotFound: +# # package is not installed +# pass + +from .version import version as __version__ +VERSION = __version__ + + +from ._version import ( # noqa: F401 + library_version_number, + library_version_string, +) diff --git a/contrib/python/lz4/py2/lz4/_version.c b/contrib/python/lz4/py2/lz4/_version.c new file mode 100644 index 0000000000..d477add649 --- /dev/null +++ b/contrib/python/lz4/py2/lz4/_version.c @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2016 Jonathan Underwood + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of Steeve Morin nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#if defined(_WIN32) && defined(_MSC_VER) +#define inline __inline +#elif defined(__SUNPRO_C) || defined(__hpux) || defined(_AIX) +#define inline +#endif + +#include <py3c.h> +#include <py3c/capsulethunk.h> + +#include <stdlib.h> +#include <lz4.h> +#include <lz4hc.h> + +#ifndef Py_UNUSED /* This is already defined for Python 3.4 onwards */ +#ifdef __GNUC__ +#define Py_UNUSED(name) _unused_ ## name __attribute__((unused)) +#else +#define Py_UNUSED(name) _unused_ ## name +#endif +#endif + +static PyObject * +library_version_number (PyObject * Py_UNUSED (self), PyObject * Py_UNUSED (args)) +{ + return Py_BuildValue ("i", LZ4_versionNumber ()); +} + +static PyObject * +library_version_string (PyObject * Py_UNUSED (self), PyObject * Py_UNUSED (args)) +{ + return Py_BuildValue ("s", LZ4_versionString ()); +} + +PyDoc_STRVAR +( + library_version_number__doc, + "library_version_number()\n\n" \ + "Returns the version number of the LZ4 library.\n" \ + "\n" \ + "Args:\n" \ + " None\n" \ + "\n" \ + "Returns:\n" \ + " int: version number eg. 10705" + ); + +PyDoc_STRVAR +( + library_version_string__doc, + "library_version_string()\n\n" \ + "Returns the version number of the LZ4 library as a string\n" \ + "containing the semantic version.\n" \ + "\n" \ + "Args:\n" \ + " None\n" \ + "\n" \ + "Returns:\n" \ + " str: version number eg. \"1.7.5\"" + ); + +static PyMethodDef module_methods[] = { + { + "library_version_number", + (PyCFunction) library_version_number, + METH_VARARGS, + library_version_number__doc + }, + { + "library_version_string", + (PyCFunction) library_version_string, + METH_VARARGS, + library_version_string__doc + }, + { + /* Sentinel */ + NULL, + NULL, + 0, + NULL + } +}; + +static struct PyModuleDef moduledef = + { + PyModuleDef_HEAD_INIT, + "_version", + NULL, + -1, + module_methods + }; + +MODULE_INIT_FUNC (_version) +{ + PyObject *module = PyModule_Create (&moduledef); + + if (module == NULL) + return NULL; + + return module; +} diff --git a/contrib/python/lz4/py2/lz4/block/__init__.py b/contrib/python/lz4/py2/lz4/block/__init__.py new file mode 100644 index 0000000000..6662bab4de --- /dev/null +++ b/contrib/python/lz4/py2/lz4/block/__init__.py @@ -0,0 +1 @@ +from ._block import compress, decompress, LZ4BlockError # noqa: F401 diff --git a/contrib/python/lz4/py2/lz4/block/_block.c b/contrib/python/lz4/py2/lz4/block/_block.c new file mode 100644 index 0000000000..d55b1d92d4 --- /dev/null +++ b/contrib/python/lz4/py2/lz4/block/_block.c @@ -0,0 +1,542 @@ +/* + * Copyright (c) 2012-2018, Steeve Morin, Jonathan Underwood + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of Steeve Morin nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#if defined(_WIN32) && defined(_MSC_VER) +#define inline __inline +#elif defined(__SUNPRO_C) || defined(__hpux) || defined(_AIX) +#define inline +#endif + +#include <py3c.h> +#include <py3c/capsulethunk.h> + +#include <stdlib.h> +#include <math.h> +#include <lz4.h> +#include <lz4hc.h> + +#ifndef Py_UNUSED /* This is already defined for Python 3.4 onwards */ +#ifdef __GNUC__ +#define Py_UNUSED(name) _unused_ ## name __attribute__((unused)) +#else +#define Py_UNUSED(name) _unused_ ## name +#endif +#endif + +#if defined(_WIN32) && defined(_MSC_VER) +#if _MSC_VER >= 1600 +#include <stdint.h> +#else /* _MSC_VER >= 1600 */ +typedef signed char int8_t; +typedef signed short int16_t; +typedef signed int int32_t; +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; +#endif /* _MSC_VER >= 1600 */ +#endif + +static inline void +store_le32 (char *c, uint32_t x) +{ + c[0] = x & 0xff; + c[1] = (x >> 8) & 0xff; + c[2] = (x >> 16) & 0xff; + c[3] = (x >> 24) & 0xff; +} + +static inline uint32_t +load_le32 (const char *c) +{ + const uint8_t *d = (const uint8_t *) c; + return d[0] | (d[1] << 8) | (d[2] << 16) | (d[3] << 24); +} + +static const size_t hdr_size = sizeof (uint32_t); + +typedef enum +{ + DEFAULT, + FAST, + HIGH_COMPRESSION +} compression_type; + +static PyObject * LZ4BlockError; + +static inline int +lz4_compress_generic (int comp, char* source, char* dest, int source_size, int dest_size, + char* dict, int dict_size, int acceleration, int compression) +{ + if (comp != HIGH_COMPRESSION) + { + LZ4_stream_t lz4_state; + LZ4_resetStream (&lz4_state); + if (dict) + { + LZ4_loadDict (&lz4_state, dict, dict_size); + } + if (comp != FAST) + { + acceleration = 1; + } + return LZ4_compress_fast_continue (&lz4_state, source, dest, source_size, dest_size, acceleration); + } + else + { + LZ4_streamHC_t lz4_state; + LZ4_resetStreamHC (&lz4_state, compression); + if (dict) + { + LZ4_loadDictHC (&lz4_state, dict, dict_size); + } + return LZ4_compress_HC_continue (&lz4_state, source, dest, source_size, dest_size); + } +} + +#ifdef inline +#undef inline +#endif + +static PyObject * +compress (PyObject * Py_UNUSED (self), PyObject * args, PyObject * kwargs) +{ + const char *mode = "default"; + size_t dest_size, total_size; + int acceleration = 1; + int compression = 9; + int store_size = 1; + PyObject *py_dest; + char *dest, *dest_start; + compression_type comp; + int output_size; + Py_buffer source; + int source_size; + int return_bytearray = 0; + Py_buffer dict = {0}; + static char *argnames[] = { + "source", + "mode", + "store_size", + "acceleration", + "compression", + "return_bytearray", + "dict", + NULL + }; + + +#if IS_PY3 + if (!PyArg_ParseTupleAndKeywords (args, kwargs, "y*|spiipz*", argnames, + &source, + &mode, &store_size, &acceleration, &compression, + &return_bytearray, &dict)) + { + return NULL; + } +#else + if (!PyArg_ParseTupleAndKeywords (args, kwargs, "s*|siiiiz*", argnames, + &source, + &mode, &store_size, &acceleration, &compression, + &return_bytearray, &dict)) + { + return NULL; + } +#endif + + if (source.len > INT_MAX) + { + PyBuffer_Release(&source); + PyBuffer_Release(&dict); + PyErr_Format(PyExc_OverflowError, + "Input too large for LZ4 API"); + return NULL; + } + + if (dict.len > INT_MAX) + { + PyBuffer_Release(&source); + PyBuffer_Release(&dict); + PyErr_Format(PyExc_OverflowError, + "Dictionary too large for LZ4 API"); + return NULL; + } + + source_size = (int) source.len; + + if (!strncmp (mode, "default", sizeof ("default"))) + { + comp = DEFAULT; + } + else if (!strncmp (mode, "fast", sizeof ("fast"))) + { + comp = FAST; + } + else if (!strncmp (mode, "high_compression", sizeof ("high_compression"))) + { + comp = HIGH_COMPRESSION; + } + else + { + PyBuffer_Release(&source); + PyBuffer_Release(&dict); + PyErr_Format (PyExc_ValueError, + "Invalid mode argument: %s. Must be one of: standard, fast, high_compression", + mode); + return NULL; + } + + dest_size = LZ4_compressBound (source_size); + + if (store_size) + { + total_size = dest_size + hdr_size; + } + else + { + total_size = dest_size; + } + + dest = PyMem_Malloc (total_size * sizeof * dest); + if (dest == NULL) + { + return PyErr_NoMemory(); + } + + Py_BEGIN_ALLOW_THREADS + + if (store_size) + { + store_le32 (dest, source_size); + dest_start = dest + hdr_size; + } + else + { + dest_start = dest; + } + + output_size = lz4_compress_generic (comp, source.buf, dest_start, source_size, + (int) dest_size, dict.buf, (int) dict.len, + acceleration, compression); + + Py_END_ALLOW_THREADS + + PyBuffer_Release(&source); + PyBuffer_Release(&dict); + + if (output_size <= 0) + { + PyErr_SetString (LZ4BlockError, "Compression failed"); + PyMem_Free (dest); + return NULL; + } + + if (store_size) + { + output_size += (int) hdr_size; + } + + if (return_bytearray) + { + py_dest = PyByteArray_FromStringAndSize (dest, (Py_ssize_t) output_size); + } + else + { + py_dest = PyBytes_FromStringAndSize (dest, (Py_ssize_t) output_size); + } + + PyMem_Free (dest); + + if (py_dest == NULL) + { + return PyErr_NoMemory (); + } + + return py_dest; +} + +static PyObject * +decompress (PyObject * Py_UNUSED (self), PyObject * args, PyObject * kwargs) +{ + Py_buffer source; + const char * source_start; + size_t source_size; + PyObject *py_dest; + char *dest; + int output_size; + size_t dest_size; + int uncompressed_size = -1; + int return_bytearray = 0; + Py_buffer dict = {0}; + static char *argnames[] = { + "source", + "uncompressed_size", + "return_bytearray", + "dict", + NULL + }; + +#if IS_PY3 + if (!PyArg_ParseTupleAndKeywords (args, kwargs, "y*|ipz*", argnames, + &source, &uncompressed_size, + &return_bytearray, &dict)) + { + return NULL; + } +#else + if (!PyArg_ParseTupleAndKeywords (args, kwargs, "s*|iiz*", argnames, + &source, &uncompressed_size, + &return_bytearray, &dict)) + { + return NULL; + } +#endif + + if (source.len > INT_MAX) + { + PyBuffer_Release(&source); + PyBuffer_Release(&dict); + PyErr_Format(PyExc_OverflowError, + "Input too large for LZ4 API"); + return NULL; + } + + if (dict.len > INT_MAX) + { + PyBuffer_Release(&source); + PyBuffer_Release(&dict); + PyErr_Format(PyExc_OverflowError, + "Dictionary too large for LZ4 API"); + return NULL; + } + + source_start = (const char *) source.buf; + source_size = (int) source.len; + + if (uncompressed_size >= 0) + { + dest_size = uncompressed_size; + } + else + { + if (source_size < hdr_size) + { + PyBuffer_Release(&source); + PyBuffer_Release(&dict); + PyErr_SetString (PyExc_ValueError, "Input source data size too small"); + return NULL; + } + dest_size = load_le32 (source_start); + source_start += hdr_size; + source_size -= hdr_size; + } + + if (dest_size > INT_MAX) + { + PyBuffer_Release(&source); + PyBuffer_Release(&dict); + PyErr_Format (PyExc_ValueError, "Invalid size: 0x%zu", + dest_size); + return NULL; + } + + dest = PyMem_Malloc (dest_size * sizeof * dest); + if (dest == NULL) + { + return PyErr_NoMemory(); + } + + Py_BEGIN_ALLOW_THREADS + + output_size = + LZ4_decompress_safe_usingDict (source_start, dest, source_size, (int) dest_size, + dict.buf, (int) dict.len); + + Py_END_ALLOW_THREADS + + PyBuffer_Release(&source); + PyBuffer_Release(&dict); + + if (output_size < 0) + { + PyErr_Format (LZ4BlockError, + "Decompression failed: corrupt input or insufficient space in destination buffer. Error code: %u", + -output_size); + PyMem_Free (dest); + return NULL; + } + else if (((size_t)output_size != dest_size) && (uncompressed_size < 0)) + { + PyErr_Format (LZ4BlockError, + "Decompressor wrote %u bytes, but %zu bytes expected from header", + output_size, dest_size); + PyMem_Free (dest); + return NULL; + } + + if (return_bytearray) + { + py_dest = PyByteArray_FromStringAndSize (dest, (Py_ssize_t) output_size); + } + else + { + py_dest = PyBytes_FromStringAndSize (dest, (Py_ssize_t) output_size); + } + + PyMem_Free (dest); + + if (py_dest == NULL) + { + return PyErr_NoMemory (); + } + + return py_dest; +} + +PyDoc_STRVAR(compress__doc, + "compress(source, mode='default', acceleration=1, compression=0, return_bytearray=False)\n\n" \ + "Compress source, returning the compressed data as a string.\n" \ + "Raises an exception if any error occurs.\n" \ + "\n" \ + "Args:\n" \ + " source (str, bytes or buffer-compatible object): Data to compress\n" \ + "\n" \ + "Keyword Args:\n" \ + " mode (str): If ``'default'`` or unspecified use the default LZ4\n" \ + " compression mode. Set to ``'fast'`` to use the fast compression\n" \ + " LZ4 mode at the expense of compression. Set to\n" \ + " ``'high_compression'`` to use the LZ4 high-compression mode at\n" \ + " the exepense of speed.\n" \ + " acceleration (int): When mode is set to ``'fast'`` this argument\n" \ + " specifies the acceleration. The larger the acceleration, the\n" \ + " faster the but the lower the compression. The default\n" \ + " compression corresponds to a value of ``1``.\n" \ + " compression (int): When mode is set to ``high_compression`` this\n" \ + " argument specifies the compression. Valid values are between\n" \ + " ``1`` and ``12``. Values between ``4-9`` are recommended, and\n" \ + " ``9`` is the default.\n" + " store_size (bool): If ``True`` (the default) then the size of the\n" \ + " uncompressed data is stored at the start of the compressed\n" \ + " block.\n" \ + " return_bytearray (bool): If ``False`` (the default) then the function\n" \ + " will return a bytes object. If ``True``, then the function will\n" \ + " return a bytearray object.\n\n" \ + " dict (str, bytes or buffer-compatible object): If specified, perform\n" \ + " compression using this initial dictionary.\n" \ + "Returns:\n" \ + " bytes or bytearray: Compressed data.\n"); + +PyDoc_STRVAR(decompress__doc, + "decompress(source, uncompressed_size=-1, return_bytearray=False)\n\n" \ + "Decompress source, returning the uncompressed data as a string.\n" \ + "Raises an exception if any error occurs.\n" \ + "\n" \ + "Args:\n" \ + " source (str, bytes or buffer-compatible object): Data to decompress.\n" \ + "\n" \ + "Keyword Args:\n" \ + " uncompressed_size (int): If not specified or negative, the uncompressed\n" \ + " data size is read from the start of the source block. If specified,\n" \ + " it is assumed that the full source data is compressed data. If this\n" \ + " argument is specified, it is considered to be a maximum possible size\n" \ + " for the buffer used to hold the uncompressed data, and so less data\n" \ + " may be returned. If `uncompressed_size` is too small, `LZ4BlockError`\n" \ + " will be raised. By catching `LZ4BlockError` it is possible to increase\n" \ + " `uncompressed_size` and try again.\n" \ + " return_bytearray (bool): If ``False`` (the default) then the function\n" \ + " will return a bytes object. If ``True``, then the function will\n" \ + " return a bytearray object.\n\n" \ + " dict (str, bytes or buffer-compatible object): If specified, perform\n" \ + " decompression using this initial dictionary.\n" \ + "\n" \ + "Returns:\n" \ + " bytes or bytearray: Decompressed data.\n" \ + "\n" \ + "Raises:\n" \ + " LZ4BlockError: raised if the call to the LZ4 library fails. This can be\n" \ + " caused by `uncompressed_size` being too small, or invalid data.\n"); + +PyDoc_STRVAR(lz4block__doc, + "A Python wrapper for the LZ4 block protocol" + ); + +static PyMethodDef module_methods[] = { + { + "compress", + (PyCFunction) compress, + METH_VARARGS | METH_KEYWORDS, + compress__doc + }, + { + "decompress", + (PyCFunction) decompress, + METH_VARARGS | METH_KEYWORDS, + decompress__doc + }, + { + /* Sentinel */ + NULL, + NULL, + 0, + NULL + } +}; + +static struct PyModuleDef moduledef = +{ + PyModuleDef_HEAD_INIT, + "_block", + lz4block__doc, + -1, + module_methods +}; + +MODULE_INIT_FUNC (_block) +{ + PyObject *module = PyModule_Create (&moduledef); + + if (module == NULL) + return NULL; + + PyModule_AddIntConstant (module, "HC_LEVEL_MIN", LZ4HC_CLEVEL_MIN); + PyModule_AddIntConstant (module, "HC_LEVEL_DEFAULT", LZ4HC_CLEVEL_DEFAULT); + PyModule_AddIntConstant (module, "HC_LEVEL_OPT_MIN", LZ4HC_CLEVEL_OPT_MIN); + PyModule_AddIntConstant (module, "HC_LEVEL_MAX", LZ4HC_CLEVEL_MAX); + + LZ4BlockError = PyErr_NewExceptionWithDoc("_block.LZ4BlockError", "Call to LZ4 library failed.", NULL, NULL); + if (LZ4BlockError == NULL) + { + return NULL; + } + Py_INCREF(LZ4BlockError); + PyModule_AddObject(module, "LZ4BlockError", LZ4BlockError); + + return module; +} diff --git a/contrib/python/lz4/py2/lz4/frame/__init__.py b/contrib/python/lz4/py2/lz4/frame/__init__.py new file mode 100644 index 0000000000..5fa03ce673 --- /dev/null +++ b/contrib/python/lz4/py2/lz4/frame/__init__.py @@ -0,0 +1,837 @@ +import lz4 +import io +import os +import builtins +import sys +from ._frame import ( # noqa: F401 + compress, + decompress, + create_compression_context, + compress_begin, + compress_chunk, + compress_flush, + create_decompression_context, + reset_decompression_context, + decompress_chunk, + get_frame_info, + BLOCKSIZE_DEFAULT as _BLOCKSIZE_DEFAULT, + BLOCKSIZE_MAX64KB as _BLOCKSIZE_MAX64KB, + BLOCKSIZE_MAX256KB as _BLOCKSIZE_MAX256KB, + BLOCKSIZE_MAX1MB as _BLOCKSIZE_MAX1MB, + BLOCKSIZE_MAX4MB as _BLOCKSIZE_MAX4MB, + __doc__ as _doc +) + +__doc__ = _doc + +try: + import _compression # Python 3.6 and later +except ImportError: + from . import _compression + + +BLOCKSIZE_DEFAULT = _BLOCKSIZE_DEFAULT +"""Specifier for the default block size. + +Specifying ``block_size=lz4.frame.BLOCKSIZE_DEFAULT`` will instruct the LZ4 +library to use the default maximum blocksize. This is currently equivalent to +`lz4.frame.BLOCKSIZE_MAX64KB` + +""" + +BLOCKSIZE_MAX64KB = _BLOCKSIZE_MAX64KB +"""Specifier for a maximum block size of 64 kB. + +Specifying ``block_size=lz4.frame.BLOCKSIZE_MAX64KB`` will instruct the LZ4 +library to create blocks containing a maximum of 64 kB of uncompressed data. + +""" + +BLOCKSIZE_MAX256KB = _BLOCKSIZE_MAX256KB +"""Specifier for a maximum block size of 256 kB. + +Specifying ``block_size=lz4.frame.BLOCKSIZE_MAX256KB`` will instruct the LZ4 +library to create blocks containing a maximum of 256 kB of uncompressed data. + +""" + +BLOCKSIZE_MAX1MB = _BLOCKSIZE_MAX1MB +"""Specifier for a maximum block size of 1 MB. + +Specifying ``block_size=lz4.frame.BLOCKSIZE_MAX1MB`` will instruct the LZ4 +library to create blocks containing a maximum of 1 MB of uncompressed data. + +""" + +BLOCKSIZE_MAX4MB = _BLOCKSIZE_MAX4MB +"""Specifier for a maximum block size of 4 MB. + +Specifying ``block_size=lz4.frame.BLOCKSIZE_MAX4MB`` will instruct the LZ4 +library to create blocks containing a maximum of 4 MB of uncompressed data. + +""" + +COMPRESSIONLEVEL_MIN = 0 +"""Specifier for the minimum compression level. + +Specifying ``compression_level=lz4.frame.COMPRESSIONLEVEL_MIN`` will +instruct the LZ4 library to use a compression level of 0 + +""" + +COMPRESSIONLEVEL_MINHC = 3 +"""Specifier for the minimum compression level for high compression mode. + +Specifying ``compression_level=lz4.frame.COMPRESSIONLEVEL_MINHC`` will +instruct the LZ4 library to use a compression level of 3, the minimum for the +high compression mode. + +""" + +COMPRESSIONLEVEL_MAX = 16 +"""Specifier for the maximum compression level. + +Specifying ``compression_level=lz4.frame.COMPRESSIONLEVEL_MAX`` will +instruct the LZ4 library to use a compression level of 16, the highest +compression level available. + +""" + + +class LZ4FrameCompressor(object): + """Create a LZ4 frame compressor object. + + This object can be used to compress data incrementally. + + Args: + block_size (int): Specifies the maximum blocksize to use. + Options: + + - `lz4.frame.BLOCKSIZE_DEFAULT`: the lz4 library default + - `lz4.frame.BLOCKSIZE_MAX64KB`: 64 kB + - `lz4.frame.BLOCKSIZE_MAX256KB`: 256 kB + - `lz4.frame.BLOCKSIZE_MAX1MB`: 1 MB + - `lz4.frame.BLOCKSIZE_MAX4MB`: 4 MB + + If unspecified, will default to `lz4.frame.BLOCKSIZE_DEFAULT` which + is equal to `lz4.frame.BLOCKSIZE_MAX64KB`. + block_linked (bool): Specifies whether to use block-linked + compression. If ``True``, the compression ratio is improved, + especially for small block sizes. If ``False`` the blocks are + compressed independently. The default is ``True``. + compression_level (int): Specifies the level of compression used. + Values between 0-16 are valid, with 0 (default) being the + lowest compression (0-2 are the same value), and 16 the highest. + Values above 16 will be treated as 16. + Values between 4-9 are recommended. 0 is the default. + The following module constants are provided as a convenience: + + - `lz4.frame.COMPRESSIONLEVEL_MIN`: Minimum compression (0) + - `lz4.frame.COMPRESSIONLEVEL_MINHC`: Minimum high-compression (3) + - `lz4.frame.COMPRESSIONLEVEL_MAX`: Maximum compression (16) + + content_checksum (bool): Specifies whether to enable checksumming of + the payload content. If ``True``, a checksum of the uncompressed + data is stored at the end of the compressed frame which is checked + during decompression. The default is ``False``. + block_checksum (bool): Specifies whether to enable checksumming of + the content of each block. If ``True`` a checksum of the + uncompressed data in each block in the frame is stored at the end + of each block. If present, these checksums will be used to + validate the data during decompression. The default is ``False``, + meaning block checksums are not calculated and stored. This + functionality is only supported if the underlying LZ4 library has + version >= 1.8.0. Attempting to set this value to ``True`` with a + version of LZ4 < 1.8.0 will cause a ``RuntimeError`` to be raised. + auto_flush (bool): When ``False``, the LZ4 library may buffer data + until a block is full. When ``True`` no buffering occurs, and + partially full blocks may be returned. The default is ``False``. + return_bytearray (bool): When ``False`` a ``bytes`` object is returned + from the calls to methods of this class. When ``True`` a + ``bytearray`` object will be returned. The default is ``False``. + + """ + + def __init__(self, + block_size=BLOCKSIZE_DEFAULT, + block_linked=True, + compression_level=COMPRESSIONLEVEL_MIN, + content_checksum=False, + block_checksum=False, + auto_flush=False, + return_bytearray=False): + self.block_size = block_size + self.block_linked = block_linked + self.compression_level = compression_level + self.content_checksum = content_checksum + if block_checksum and lz4.library_version_number() < 10800: + raise RuntimeError( + 'Attempt to set block_checksum to True with LZ4 library' + 'version < 10800' + ) + self.block_checksum = block_checksum + self.auto_flush = auto_flush + self.return_bytearray = return_bytearray + self._context = None + self._started = False + + def __enter__(self): + # All necessary initialization is done in __init__ + return self + + def __exit__(self, exception_type, exception, traceback): + self.block_size = None + self.block_linked = None + self.compression_level = None + self.content_checksum = None + self.block_checksum = None + self.auto_flush = None + self.return_bytearray = None + self._context = None + self._started = False + + def begin(self, source_size=0): + """Begin a compression frame. + + The returned data contains frame header information. The data returned + from subsequent calls to ``compress()`` should be concatenated with + this header. + + Keyword Args: + source_size (int): Optionally specify the total size of the + uncompressed data. If specified, will be stored in the + compressed frame header as an 8-byte field for later use + during decompression. Default is 0 (no size stored). + + Returns: + bytes or bytearray: frame header data + + """ + + if self._started is False: + self._context = create_compression_context() + result = compress_begin( + self._context, + block_size=self.block_size, + block_linked=self.block_linked, + compression_level=self.compression_level, + content_checksum=self.content_checksum, + block_checksum=self.block_checksum, + auto_flush=self.auto_flush, + return_bytearray=self.return_bytearray, + source_size=source_size + ) + self._started = True + return result + else: + raise RuntimeError( + 'LZ4FrameCompressor.begin() called after already initialized' + ) + + def compress(self, data): # noqa: F811 + """Compresses data and returns it. + + This compresses ``data`` (a ``bytes`` object), returning a bytes or + bytearray object containing compressed data the input. + + If ``auto_flush`` has been set to ``False``, some of ``data`` may be + buffered internally, for use in later calls to + `LZ4FrameCompressor.compress()` and `LZ4FrameCompressor.flush()`. + + The returned data should be concatenated with the output of any + previous calls to `compress()` and a single call to + `compress_begin()`. + + Args: + data (str, bytes or buffer-compatible object): data to compress + + Returns: + bytes or bytearray: compressed data + + """ + if self._context is None: + raise RuntimeError('compress called after flush()') + + if self._started is False: + raise RuntimeError('compress called before compress_begin()') + + result = compress_chunk( + self._context, data, + return_bytearray=self.return_bytearray + ) + + return result + + def flush(self): + """Finish the compression process. + + This returns a ``bytes`` or ``bytearray`` object containing any data + stored in the compressor's internal buffers and a frame footer. + + The LZ4FrameCompressor instance may be re-used after this method has + been called to create a new frame of compressed data. + + Returns: + bytes or bytearray: compressed data and frame footer. + + """ + result = compress_flush( + self._context, + end_frame=True, + return_bytearray=self.return_bytearray + ) + self._context = None + self._started = False + return result + + def reset(self): + """Reset the `LZ4FrameCompressor` instance. + + This allows the `LZ4FrameCompression` instance to be re-used after an + error. + + """ + self._context = None + self._started = False + + +class LZ4FrameDecompressor(object): + """Create a LZ4 frame decompressor object. + + This can be used to decompress data incrementally. + + For a more convenient way of decompressing an entire compressed frame at + once, see `lz4.frame.decompress()`. + + Args: + return_bytearray (bool): When ``False`` a bytes object is returned from + the calls to methods of this class. When ``True`` a bytearray + object will be returned. The default is ``False``. + + Attributes: + eof (bool): ``True`` if the end-of-stream marker has been reached. + ``False`` otherwise. + unused_data (bytes): Data found after the end of the compressed stream. + Before the end of the frame is reached, this will be ``b''``. + needs_input (bool): ``False`` if the ``decompress()`` method can + provide more decompressed data before requiring new uncompressed + input. ``True`` otherwise. + + """ + + def __init__(self, return_bytearray=False): + self._context = create_decompression_context() + self.eof = False + self.needs_input = True + self.unused_data = None + self._unconsumed_data = b'' + self._return_bytearray = return_bytearray + + def __enter__(self): + # All necessary initialization is done in __init__ + return self + + def __exit__(self, exception_type, exception, traceback): + self._context = None + self.eof = None + self.needs_input = None + self.unused_data = None + self._unconsumed_data = None + self._return_bytearray = None + + def reset(self): + """Reset the decompressor state. + + This is useful after an error occurs, allowing re-use of the instance. + + """ + reset_decompression_context(self._context) + self.eof = False + self.needs_input = True + self.unused_data = None + self._unconsumed_data = b'' + + def decompress(self, data, max_length=-1): # noqa: F811 + """Decompresses part or all of an LZ4 frame of compressed data. + + The returned data should be concatenated with the output of any + previous calls to `decompress()`. + + If ``max_length`` is non-negative, returns at most ``max_length`` bytes + of decompressed data. If this limit is reached and further output can + be produced, the `needs_input` attribute will be set to ``False``. In + this case, the next call to `decompress()` may provide data as + ``b''`` to obtain more of the output. In all cases, any unconsumed data + from previous calls will be prepended to the input data. + + If all of the input ``data`` was decompressed and returned (either + because this was less than ``max_length`` bytes, or because + ``max_length`` was negative), the `needs_input` attribute will be set + to ``True``. + + If an end of frame marker is encountered in the data during + decompression, decompression will stop at the end of the frame, and any + data after the end of frame is available from the `unused_data` + attribute. In this case, the `LZ4FrameDecompressor` instance is reset + and can be used for further decompression. + + Args: + data (str, bytes or buffer-compatible object): compressed data to + decompress + + Keyword Args: + max_length (int): If this is non-negative, this method returns at + most ``max_length`` bytes of decompressed data. + + Returns: + bytes: Uncompressed data + + """ + + if self._unconsumed_data: + data = self._unconsumed_data + data + + decompressed, bytes_read, eoframe = decompress_chunk( + self._context, + data, + max_length=max_length, + return_bytearray=self._return_bytearray, + ) + + if bytes_read < len(data): + if eoframe: + self.unused_data = data[bytes_read:] + else: + self._unconsumed_data = data[bytes_read:] + self.needs_input = False + else: + self._unconsumed_data = b'' + self.needs_input = True + self.unused_data = None + + self.eof = eoframe + + return decompressed + + +_MODE_CLOSED = 0 +_MODE_READ = 1 +# Value 2 no longer used +_MODE_WRITE = 3 + + +class LZ4FrameFile(_compression.BaseStream): + """A file object providing transparent LZ4F (de)compression. + + An LZ4FFile can act as a wrapper for an existing file object, or refer + directly to a named file on disk. + + Note that LZ4FFile provides a *binary* file interface - data read is + returned as bytes, and data to be written must be given as bytes. + + When opening a file for writing, the settings used by the compressor can be + specified. The underlying compressor object is + `lz4.frame.LZ4FrameCompressor`. See the docstrings for that class for + details on compression options. + + Args: + filename(str, bytes, PathLike, file object): can be either an actual + file name (given as a str, bytes, or + PathLike object), in which case the named file is opened, or it + can be an existing file object to read from or write to. + + Keyword Args: + mode(str): mode can be ``'r'`` for reading (default), ``'w'`` for + (over)writing, ``'x'`` for creating exclusively, or ``'a'`` + for appending. These can equivalently be given as ``'rb'``, + ``'wb'``, ``'xb'`` and ``'ab'`` respectively. + return_bytearray (bool): When ``False`` a bytes object is returned from + the calls to methods of this class. When ``True`` a ``bytearray`` + object will be returned. The default is ``False``. + source_size (int): Optionally specify the total size of the + uncompressed data. If specified, will be stored in the compressed + frame header as an 8-byte field for later use during decompression. + Default is ``0`` (no size stored). Only used for writing + compressed files. + block_size (int): Compressor setting. See + `lz4.frame.LZ4FrameCompressor`. + block_linked (bool): Compressor setting. See + `lz4.frame.LZ4FrameCompressor`. + compression_level (int): Compressor setting. See + `lz4.frame.LZ4FrameCompressor`. + content_checksum (bool): Compressor setting. See + `lz4.frame.LZ4FrameCompressor`. + block_checksum (bool): Compressor setting. See + `lz4.frame.LZ4FrameCompressor`. + auto_flush (bool): Compressor setting. See + `lz4.frame.LZ4FrameCompressor`. + + """ + + def __init__(self, filename=None, mode='r', + block_size=BLOCKSIZE_DEFAULT, + block_linked=True, + compression_level=COMPRESSIONLEVEL_MIN, + content_checksum=False, + block_checksum=False, + auto_flush=False, + return_bytearray=False, + source_size=0): + + self._fp = None + self._closefp = False + self._mode = _MODE_CLOSED + + if mode in ('r', 'rb'): + mode_code = _MODE_READ + elif mode in ('w', 'wb', 'a', 'ab', 'x', 'xb'): + mode_code = _MODE_WRITE + self._compressor = LZ4FrameCompressor( + block_size=block_size, + block_linked=block_linked, + compression_level=compression_level, + content_checksum=content_checksum, + block_checksum=block_checksum, + auto_flush=auto_flush, + return_bytearray=return_bytearray, + ) + self._pos = 0 + else: + raise ValueError('Invalid mode: {!r}'.format(mode)) + + if sys.version_info > (3, 6): + path_test = isinstance(filename, (str, bytes, os.PathLike)) + else: + path_test = isinstance(filename, (str, bytes)) + + if path_test is True: + if 'b' not in mode: + mode += 'b' + self._fp = builtins.open(filename, mode) + self._closefp = True + self._mode = mode_code + elif hasattr(filename, 'read') or hasattr(filename, 'write'): + self._fp = filename + self._mode = mode_code + else: + raise TypeError( + 'filename must be a str, bytes, file or PathLike object' + ) + + if self._mode == _MODE_READ: + raw = _compression.DecompressReader(self._fp, LZ4FrameDecompressor) + self._buffer = io.BufferedReader(raw) + + if self._mode == _MODE_WRITE: + self._fp.write( + self._compressor.begin(source_size=source_size) + ) + + def close(self): + """Flush and close the file. + + May be called more than once without error. Once the file is + closed, any other operation on it will raise a ValueError. + """ + if self._mode == _MODE_CLOSED: + return + try: + if self._mode == _MODE_READ: + self._buffer.close() + self._buffer = None + elif self._mode == _MODE_WRITE: + self._fp.write(self._compressor.flush()) + self._compressor = None + finally: + try: + if self._closefp: + self._fp.close() + finally: + self._fp = None + self._closefp = False + self._mode = _MODE_CLOSED + + @property + def closed(self): + """Returns ``True`` if this file is closed. + + Returns: + bool: ``True`` if the file is closed, ``False`` otherwise. + + """ + return self._mode == _MODE_CLOSED + + def fileno(self): + """Return the file descriptor for the underlying file. + + Returns: + file object: file descriptor for file. + + """ + self._check_not_closed() + return self._fp.fileno() + + def seekable(self): + """Return whether the file supports seeking. + + Returns: + bool: ``True`` if the file supports seeking, ``False`` otherwise. + + """ + return self.readable() and self._buffer.seekable() + + def readable(self): + """Return whether the file was opened for reading. + + Returns: + bool: ``True`` if the file was opened for reading, ``False`` + otherwise. + + """ + self._check_not_closed() + return self._mode == _MODE_READ + + def writable(self): + """Return whether the file was opened for writing. + + Returns: + bool: ``True`` if the file was opened for writing, ``False`` + otherwise. + + """ + self._check_not_closed() + return self._mode == _MODE_WRITE + + def peek(self, size=-1): + """Return buffered data without advancing the file position. + + Always returns at least one byte of data, unless at EOF. The exact + number of bytes returned is unspecified. + + Returns: + bytes: uncompressed data + + """ + self._check_can_read() + # Relies on the undocumented fact that BufferedReader.peek() always + # returns at least one byte (except at EOF) + return self._buffer.peek(size) + + def read(self, size=-1): + """Read up to ``size`` uncompressed bytes from the file. + + If ``size`` is negative or omitted, read until ``EOF`` is reached. + Returns ``b''`` if the file is already at ``EOF``. + + Args: + size(int): If non-negative, specifies the maximum number of + uncompressed bytes to return. + + Returns: + bytes: uncompressed data + + """ + self._check_can_read() + return self._buffer.read(size) + + def read1(self, size=-1): + """Read up to ``size`` uncompressed bytes. + + This method tries to avoid making multiple reads from the underlying + stream. + + This method reads up to a buffer's worth of data if ``size`` is + negative. + + Returns ``b''`` if the file is at EOF. + + Args: + size(int): If non-negative, specifies the maximum number of + uncompressed bytes to return. + + Returns: + bytes: uncompressed data + + """ + self._check_can_read() + if size < 0: + size = io.DEFAULT_BUFFER_SIZE + return self._buffer.read1(size) + + def readline(self, size=-1): + """Read a line of uncompressed bytes from the file. + + The terminating newline (if present) is retained. If size is + non-negative, no more than size bytes will be read (in which case the + line may be incomplete). Returns b'' if already at EOF. + + Args: + size(int): If non-negative, specifies the maximum number of + uncompressed bytes to return. + + Returns: + bytes: uncompressed data + + """ + self._check_can_read() + return self._buffer.readline(size) + + def write(self, data): + """Write a bytes object to the file. + + Returns the number of uncompressed bytes written, which is always + ``len(data)``. Note that due to buffering, the file on disk may not + reflect the data written until close() is called. + + Args: + data(bytes): uncompressed data to compress and write to the file + + Returns: + int: the number of uncompressed bytes written to the file + + """ + self._check_can_write() + compressed = self._compressor.compress(data) + self._fp.write(compressed) + self._pos += len(data) + return len(data) + + def seek(self, offset, whence=io.SEEK_SET): + """Change the file position. + + The new position is specified by ``offset``, relative to the position + indicated by ``whence``. Possible values for ``whence`` are: + + - ``io.SEEK_SET`` or 0: start of stream (default): offset must not be + negative + - ``io.SEEK_CUR`` or 1: current stream position + - ``io.SEEK_END`` or 2: end of stream; offset must not be positive + + Returns the new file position. + + Note that seeking is emulated, so depending on the parameters, this + operation may be extremely slow. + + Args: + offset(int): new position in the file + whence(int): position with which ``offset`` is measured. Allowed + values are 0, 1, 2. The default is 0 (start of stream). + + Returns: + int: new file position + + """ + self._check_can_seek() + return self._buffer.seek(offset, whence) + + def tell(self): + """Return the current file position. + + Args: + None + + Returns: + int: file position + + """ + self._check_not_closed() + if self._mode == _MODE_READ: + return self._buffer.tell() + return self._pos + + +def open(filename, mode="rb", + encoding=None, + errors=None, + newline=None, + block_size=BLOCKSIZE_DEFAULT, + block_linked=True, + compression_level=COMPRESSIONLEVEL_MIN, + content_checksum=False, + block_checksum=False, + auto_flush=False, + return_bytearray=False, + source_size=0): + """Open an LZ4Frame-compressed file in binary or text mode. + + ``filename`` can be either an actual file name (given as a str, bytes, or + PathLike object), in which case the named file is opened, or it can be an + existing file object to read from or write to. + + The ``mode`` argument can be ``'r'``, ``'rb'`` (default), ``'w'``, + ``'wb'``, ``'x'``, ``'xb'``, ``'a'``, or ``'ab'`` for binary mode, or + ``'rt'``, ``'wt'``, ``'xt'``, or ``'at'`` for text mode. + + For binary mode, this function is equivalent to the `LZ4FrameFile` + constructor: `LZ4FrameFile(filename, mode, ...)`. + + For text mode, an `LZ4FrameFile` object is created, and wrapped in an + ``io.TextIOWrapper`` instance with the specified encoding, error handling + behavior, and line ending(s). + + Args: + filename (str, bytes, os.PathLike): file name or file object to open + + Keyword Args: + mode (str): mode for opening the file + encoding (str): the name of the encoding that will be used for + encoding/deconging the stream. It defaults to + ``locale.getpreferredencoding(False)``. See ``io.TextIOWrapper`` + for further details. + errors (str): specifies how encoding and decoding errors are to be + handled. See ``io.TextIOWrapper`` for further details. + newline (str): controls how line endings are handled. See + ``io.TextIOWrapper`` for further details. + return_bytearray (bool): When ``False`` a bytes object is returned + from the calls to methods of this class. When ``True`` a bytearray + object will be returned. The default is ``False``. + source_size (int): Optionally specify the total size of the + uncompressed data. If specified, will be stored in the compressed + frame header as an 8-byte field for later use during decompression. + Default is 0 (no size stored). Only used for writing compressed + files. + block_size (int): Compressor setting. See + `lz4.frame.LZ4FrameCompressor`. + block_linked (bool): Compressor setting. See + `lz4.frame.LZ4FrameCompressor`. + compression_level (int): Compressor setting. See + `lz4.frame.LZ4FrameCompressor`. + content_checksum (bool): Compressor setting. See + `lz4.frame.LZ4FrameCompressor`. + block_checksum (bool): Compressor setting. See + `lz4.frame.LZ4FrameCompressor`. + auto_flush (bool): Compressor setting. See + `lz4.frame.LZ4FrameCompressor`. + + """ + if 't' in mode: + if 'b' in mode: + raise ValueError('Invalid mode: %r' % (mode,)) + else: + if encoding is not None: + raise ValueError( + "Argument 'encoding' not supported in binary mode" + ) + if errors is not None: + raise ValueError("Argument 'errors' not supported in binary mode") + if newline is not None: + raise ValueError("Argument 'newline' not supported in binary mode") + + _mode = mode.replace('t', '') + + binary_file = LZ4FrameFile( + filename, + mode=_mode, + block_size=block_size, + block_linked=block_linked, + compression_level=compression_level, + content_checksum=content_checksum, + block_checksum=block_checksum, + auto_flush=auto_flush, + return_bytearray=return_bytearray, + ) + + if 't' in mode: + return io.TextIOWrapper(binary_file, encoding, errors, newline) + else: + return binary_file diff --git a/contrib/python/lz4/py2/lz4/frame/_compression.py b/contrib/python/lz4/py2/lz4/frame/_compression.py new file mode 100644 index 0000000000..3c68904b98 --- /dev/null +++ b/contrib/python/lz4/py2/lz4/frame/_compression.py @@ -0,0 +1,170 @@ +# Local python-lz4 copy of this file taken from the CPython standard library +# for earlier Python versions that don't ship with this file. This file has +# been modified to work on Python < 3.0. + +"""Internal classes used by the gzip, lzma and bz2 modules""" + +import sys +import io +# Ensure super has Python 3 semantics even on Python 2 +from builtins import super + + +BUFFER_SIZE = io.DEFAULT_BUFFER_SIZE # Compressed data read chunk size + + +class BaseStream(io.BufferedIOBase): + """Mode-checking helper functions.""" + + def _check_not_closed(self): + if self.closed: + raise ValueError("I/O operation on closed file") + + def _check_can_read(self): + if not self.readable(): + raise io.UnsupportedOperation("File not open for reading") + + def _check_can_write(self): + if not self.writable(): + raise io.UnsupportedOperation("File not open for writing") + + def _check_can_seek(self): + if not self.readable(): + raise io.UnsupportedOperation("Seeking is only supported " + "on files open for reading") + if not self.seekable(): + raise io.UnsupportedOperation("The underlying file object " + "does not support seeking") + + +class DecompressReader(io.RawIOBase): + """Adapts the decompressor API to a RawIOBase reader API""" + + def readable(self): + return True + + def __init__(self, fp, decomp_factory, trailing_error=(), **decomp_args): + self._fp = fp + self._eof = False + self._pos = 0 # Current offset in decompressed stream + + # Set to size of decompressed stream once it is known, for SEEK_END + self._size = -1 + + # Save the decompressor factory and arguments. + # If the file contains multiple compressed streams, each + # stream will need a separate decompressor object. A new decompressor + # object is also needed when implementing a backwards seek(). + self._decomp_factory = decomp_factory + self._decomp_args = decomp_args + self._decompressor = self._decomp_factory(**self._decomp_args) + + # Exception class to catch from decompressor signifying invalid + # trailing data to ignore + self._trailing_error = trailing_error + + def close(self): + self._decompressor = None + return super().close() + + def seekable(self): + return self._fp.seekable() + + def readinto(self, b): + with memoryview(b) as view, view.cast("B") as byte_view: + data = self.read(len(byte_view)) + byte_view[:len(data)] = data + return len(data) + + def read(self, size=-1): + if size < 0: + return self.readall() + + if not size or self._eof: + return b"" + data = None # Default if EOF is encountered + # Depending on the input data, our call to the decompressor may not + # return any data. In this case, try again after reading another block. + while True: + if self._decompressor.eof: + rawblock = ( + self._decompressor.unused_data or self._fp.read(BUFFER_SIZE) + ) + if not rawblock: + break + # Continue to next stream. + self._decompressor = self._decomp_factory( + **self._decomp_args) + try: + data = self._decompressor.decompress(rawblock, size) + except self._trailing_error: + # Trailing data isn't a valid compressed stream; ignore it. + break + else: + if self._decompressor.needs_input: + rawblock = self._fp.read(BUFFER_SIZE) + if not rawblock: + raise EOFError("Compressed file ended before the " + "end-of-stream marker was reached") + else: + rawblock = b"" + data = self._decompressor.decompress(rawblock, size) + if data: + break + if not data: + self._eof = True + self._size = self._pos + return b"" + self._pos += len(data) + return data + + # Rewind the file to the beginning of the data stream. + def _rewind(self): + self._fp.seek(0) + self._eof = False + self._pos = 0 + self._decompressor = self._decomp_factory(**self._decomp_args) + + def seek(self, offset, whence=io.SEEK_SET): + # Recalculate offset as an absolute file position. + if whence == io.SEEK_SET: + pass + elif whence == io.SEEK_CUR: + offset = self._pos + offset + elif whence == io.SEEK_END: + # Seeking relative to EOF - we need to know the file's size. + if self._size < 0: + while self.read(io.DEFAULT_BUFFER_SIZE): + pass + offset = self._size + offset + else: + raise ValueError("Invalid value for whence: {}".format(whence)) + + # Make it so that offset is the number of bytes to skip forward. + if offset < self._pos: + self._rewind() + else: + offset -= self._pos + + # Read and discard data until we reach the desired position. + while offset > 0: + data = self.read(min(io.DEFAULT_BUFFER_SIZE, offset)) + if not data: + break + offset -= len(data) + + return self._pos + + def tell(self): + """Return the current file position.""" + return self._pos + + +if sys.version_info < (3, 3): + # memoryview.cast is added in 3.3 + def readinto(self, b): + data = self.read(len(b)) + b[:len(data)] = data + return len(data) + + DecompressReader.readinto = readinto diff --git a/contrib/python/lz4/py2/lz4/frame/_frame.c b/contrib/python/lz4/py2/lz4/frame/_frame.c new file mode 100644 index 0000000000..99ebe02d02 --- /dev/null +++ b/contrib/python/lz4/py2/lz4/frame/_frame.c @@ -0,0 +1,1761 @@ +/* + * Copyright (c) 2015, 2016 Jerry Ryle and Jonathan G. Underwood + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ +#if defined(_WIN32) && defined(_MSC_VER) +#define inline __inline +#elif defined(__SUNPRO_C) || defined(__hpux) || defined(_AIX) +#define inline +#endif + +#include <py3c.h> +#include <py3c/capsulethunk.h> + +#include <stdlib.h> +#include <lz4.h> /* Needed for LZ4_VERSION_NUMBER only. */ +#include <lz4frame.h> + +#ifndef Py_UNUSED /* This is already defined for Python 3.4 onwards */ +#ifdef __GNUC__ +#define Py_UNUSED(name) _unused_ ## name __attribute__((unused)) +#else +#define Py_UNUSED(name) _unused_ ## name +#endif +#endif + +static const char * compression_context_capsule_name = "_frame.LZ4F_cctx"; +static const char * decompression_context_capsule_name = "_frame.LZ4F_dctx"; + +struct compression_context +{ + LZ4F_cctx * context; + LZ4F_preferences_t preferences; +}; + +/***************************** +* create_compression_context * +******************************/ +static void +destroy_compression_context (PyObject * py_context) +{ +#ifndef PyCapsule_Type + struct compression_context *context = + PyCapsule_GetPointer (py_context, compression_context_capsule_name); +#else + /* Compatibility with 2.6 via capsulethunk. */ + struct compression_context *context = py_context; +#endif + Py_BEGIN_ALLOW_THREADS + LZ4F_freeCompressionContext (context->context); + Py_END_ALLOW_THREADS + + PyMem_Free (context); +} + +static PyObject * +create_compression_context (PyObject * Py_UNUSED (self)) +{ + struct compression_context * context; + LZ4F_errorCode_t result; + + context = + (struct compression_context *) + PyMem_Malloc (sizeof (struct compression_context)); + + if (!context) + { + return PyErr_NoMemory (); + } + + Py_BEGIN_ALLOW_THREADS + + result = + LZ4F_createCompressionContext (&context->context, + LZ4F_VERSION); + Py_END_ALLOW_THREADS + + if (LZ4F_isError (result)) + { + LZ4F_freeCompressionContext (context->context); + PyMem_Free (context); + PyErr_Format (PyExc_RuntimeError, + "LZ4F_createCompressionContext failed with code: %s", + LZ4F_getErrorName (result)); + return NULL; + } + + return PyCapsule_New (context, compression_context_capsule_name, + destroy_compression_context); +} + +/************ + * compress * + ************/ +static PyObject * +compress (PyObject * Py_UNUSED (self), PyObject * args, + PyObject * keywds) +{ + Py_buffer source; + Py_ssize_t source_size; + int store_size = 1; + int return_bytearray = 0; + int content_checksum = 0; + int block_checksum = 0; + int block_linked = 1; + LZ4F_preferences_t preferences; + size_t destination_size; + size_t compressed_size; + PyObject *py_destination; + char *destination; + + static char *kwlist[] = { "data", + "compression_level", + "block_size", + "content_checksum", + "block_checksum", + "block_linked", + "store_size", + "return_bytearray", + NULL + }; + + + memset (&preferences, 0, sizeof preferences); + +#if IS_PY3 + if (!PyArg_ParseTupleAndKeywords (args, keywds, "y*|iippppp", kwlist, + &source, + &preferences.compressionLevel, + &preferences.frameInfo.blockSizeID, + &content_checksum, + &block_checksum, + &block_linked, + &store_size, + &return_bytearray)) + { + return NULL; + } +#else + if (!PyArg_ParseTupleAndKeywords (args, keywds, "s*|iiiiiii", kwlist, + &source, + &preferences.compressionLevel, + &preferences.frameInfo.blockSizeID, + &content_checksum, + &block_checksum, + &block_linked, + &store_size, + &return_bytearray)) + { + return NULL; + } +#endif + + if (content_checksum) + { + preferences.frameInfo.contentChecksumFlag = LZ4F_contentChecksumEnabled; + } + else + { + preferences.frameInfo.contentChecksumFlag = LZ4F_noContentChecksum; + } + + if (block_linked) + { + preferences.frameInfo.blockMode = LZ4F_blockLinked; + } + else + { + preferences.frameInfo.blockMode = LZ4F_blockIndependent; + } + + if (LZ4_versionNumber() >= 10800) + { + if (block_checksum) + { + preferences.frameInfo.blockChecksumFlag = LZ4F_blockChecksumEnabled; + } + else + { + preferences.frameInfo.blockChecksumFlag = LZ4F_noBlockChecksum; + } + } + else if (block_checksum) + { + PyErr_SetString (PyExc_RuntimeError, + "block_checksum specified but not supported by LZ4 library version"); + return NULL; + } + + source_size = source.len; + + preferences.autoFlush = 0; + if (store_size) + { + preferences.frameInfo.contentSize = source_size; + } + else + { + preferences.frameInfo.contentSize = 0; + } + + Py_BEGIN_ALLOW_THREADS + destination_size = + LZ4F_compressFrameBound (source_size, &preferences); + Py_END_ALLOW_THREADS + + if (destination_size > PY_SSIZE_T_MAX) + { + PyBuffer_Release(&source); + PyErr_Format (PyExc_ValueError, + "Input data could require %zu bytes, which is larger than the maximum supported size of %zd bytes", + destination_size, PY_SSIZE_T_MAX); + return NULL; + } + + destination = PyMem_Malloc (destination_size * sizeof * destination); + if (destination == NULL) + { + PyBuffer_Release(&source); + return PyErr_NoMemory(); + } + + Py_BEGIN_ALLOW_THREADS + compressed_size = + LZ4F_compressFrame (destination, destination_size, source.buf, source_size, + &preferences); + Py_END_ALLOW_THREADS + + PyBuffer_Release(&source); + + if (LZ4F_isError (compressed_size)) + { + PyMem_Free (destination); + PyErr_Format (PyExc_RuntimeError, + "LZ4F_compressFrame failed with code: %s", + LZ4F_getErrorName (compressed_size)); + return NULL; + } + + if (return_bytearray) + { + py_destination = PyByteArray_FromStringAndSize (destination, (Py_ssize_t) compressed_size); + } + else + { + py_destination = PyBytes_FromStringAndSize (destination, (Py_ssize_t) compressed_size); + } + + PyMem_Free (destination); + + if (py_destination == NULL) + { + return PyErr_NoMemory (); + } + + return py_destination; +} + +/****************** + * compress_begin * + ******************/ +static PyObject * +compress_begin (PyObject * Py_UNUSED (self), PyObject * args, + PyObject * keywds) +{ + PyObject *py_context = NULL; + Py_ssize_t source_size = (Py_ssize_t) 0; + int return_bytearray = 0; + int content_checksum = 0; + int block_checksum = 0; + int block_linked = 1; + LZ4F_preferences_t preferences; + PyObject *py_destination; + char * destination; + /* The destination buffer needs to be large enough for a header, which is 15 + * bytes. Unfortunately, the lz4 library doesn't provide a #define for this. + * We over-allocate to allow for larger headers in the future. */ + const size_t header_size = 32; + struct compression_context *context; + size_t result; + static char *kwlist[] = { "context", + "source_size", + "compression_level", + "block_size", + "content_checksum", + "block_checksum", + "block_linked", + "auto_flush", + "return_bytearray", + NULL + }; + + memset (&preferences, 0, sizeof preferences); + +#if IS_PY3 + if (!PyArg_ParseTupleAndKeywords (args, keywds, "O|kiippppp", kwlist, + &py_context, + &source_size, + &preferences.compressionLevel, + &preferences.frameInfo.blockSizeID, + &content_checksum, + &block_checksum, + &block_linked, + &preferences.autoFlush, + &return_bytearray + )) + { + return NULL; + } +#else + if (!PyArg_ParseTupleAndKeywords (args, keywds, "O|kiiiiiii", kwlist, + &py_context, + &source_size, + &preferences.compressionLevel, + &preferences.frameInfo.blockSizeID, + &content_checksum, + &block_checksum, + &block_linked, + &preferences.autoFlush, + &return_bytearray + )) + { + return NULL; + } +#endif + if (content_checksum) + { + preferences.frameInfo.contentChecksumFlag = LZ4F_contentChecksumEnabled; + } + else + { + preferences.frameInfo.contentChecksumFlag = LZ4F_noContentChecksum; + } + + if (block_linked) + { + preferences.frameInfo.blockMode = LZ4F_blockLinked; + } + else + { + preferences.frameInfo.blockMode = LZ4F_blockIndependent; + } + + if (LZ4_versionNumber() >= 10800) + { + if (block_checksum) + { + preferences.frameInfo.blockChecksumFlag = LZ4F_blockChecksumEnabled; + } + else + { + preferences.frameInfo.blockChecksumFlag = LZ4F_noBlockChecksum; + } + } + else if (block_checksum) + { + PyErr_SetString (PyExc_RuntimeError, + "block_checksum specified but not supported by LZ4 library version"); + return NULL; + } + + if (block_linked) + { + preferences.frameInfo.blockMode = LZ4F_blockLinked; + } + else + { + preferences.frameInfo.blockMode = LZ4F_blockIndependent; + } + + + preferences.frameInfo.contentSize = source_size; + + context = + (struct compression_context *) PyCapsule_GetPointer (py_context, compression_context_capsule_name); + + if (!context || !context->context) + { + PyErr_SetString (PyExc_ValueError, "No valid compression context supplied"); + return NULL; + } + + context->preferences = preferences; + + destination = PyMem_Malloc (header_size * sizeof * destination); + if (destination == NULL) + { + return PyErr_NoMemory(); + } + + Py_BEGIN_ALLOW_THREADS + result = LZ4F_compressBegin (context->context, + destination, + header_size, + &context->preferences); + Py_END_ALLOW_THREADS + + if (LZ4F_isError (result)) + { + PyErr_Format (PyExc_RuntimeError, + "LZ4F_compressBegin failed with code: %s", + LZ4F_getErrorName (result)); + return NULL; + } + + if (return_bytearray) + { + py_destination = PyByteArray_FromStringAndSize (destination, (Py_ssize_t) result); + } + else + { + py_destination = PyBytes_FromStringAndSize (destination, (Py_ssize_t) result); + } + + PyMem_Free (destination); + + if (py_destination == NULL) + { + return PyErr_NoMemory (); + } + + return py_destination; +} + +/****************** + * compress_chunk * + ******************/ +static PyObject * +compress_chunk (PyObject * Py_UNUSED (self), PyObject * args, + PyObject * keywds) +{ + PyObject *py_context = NULL; + Py_buffer source; + Py_ssize_t source_size; + struct compression_context *context; + size_t compressed_bound; + PyObject *py_destination; + char *destination; + LZ4F_compressOptions_t compress_options; + size_t result; + int return_bytearray = 0; + static char *kwlist[] = { "context", + "data", + "return_bytearray", + NULL + }; + + memset (&compress_options, 0, sizeof compress_options); + +#if IS_PY3 + if (!PyArg_ParseTupleAndKeywords (args, keywds, "Oy*|p", kwlist, + &py_context, + &source, + &return_bytearray)) + { + return NULL; + } +#else + if (!PyArg_ParseTupleAndKeywords (args, keywds, "Os*|i", kwlist, + &py_context, + &source, + &return_bytearray)) + { + return NULL; + } +#endif + + source_size = source.len; + + context = + (struct compression_context *) PyCapsule_GetPointer (py_context, compression_context_capsule_name); + if (!context || !context->context) + { + PyBuffer_Release(&source); + PyErr_Format (PyExc_ValueError, "No compression context supplied"); + return NULL; + } + + /* If autoFlush is enabled, then the destination buffer only needs to be as + big as LZ4F_compressFrameBound specifies for this source size. However, if + autoFlush is disabled, previous calls may have resulted in buffered data, + and so we need instead to use LZ4F_compressBound to find the size required + for the destination buffer. This means that with autoFlush disabled we may + frequently allocate more memory than needed. */ + Py_BEGIN_ALLOW_THREADS + if (context->preferences.autoFlush == 1) + { + compressed_bound = + LZ4F_compressFrameBound (source_size, &context->preferences); + } + else + { + compressed_bound = + LZ4F_compressBound (source_size, &context->preferences); + } + Py_END_ALLOW_THREADS + + if (compressed_bound > PY_SSIZE_T_MAX) + { + PyBuffer_Release(&source); + PyErr_Format (PyExc_ValueError, + "input data could require %zu bytes, which is larger than the maximum supported size of %zd bytes", + compressed_bound, PY_SSIZE_T_MAX); + return NULL; + } + + destination = PyMem_Malloc (compressed_bound * sizeof * destination); + if (destination == NULL) + { + PyBuffer_Release(&source); + return PyErr_NoMemory(); + } + + compress_options.stableSrc = 0; + + Py_BEGIN_ALLOW_THREADS + result = + LZ4F_compressUpdate (context->context, destination, + compressed_bound, source.buf, source_size, + &compress_options); + Py_END_ALLOW_THREADS + + PyBuffer_Release(&source); + + if (LZ4F_isError (result)) + { + PyMem_Free (destination); + PyErr_Format (PyExc_RuntimeError, + "LZ4F_compressUpdate failed with code: %s", + LZ4F_getErrorName (result)); + return NULL; + } + + if (return_bytearray) + { + py_destination = PyByteArray_FromStringAndSize (destination, (Py_ssize_t) result); + } + else + { + py_destination = PyBytes_FromStringAndSize (destination, (Py_ssize_t) result); + } + + PyMem_Free (destination); + + if (py_destination == NULL) + { + return PyErr_NoMemory (); + } + + return py_destination; +} + +/****************** + * compress_flush * + ******************/ +static PyObject * +compress_flush (PyObject * Py_UNUSED (self), PyObject * args, PyObject * keywds) +{ + PyObject *py_context = NULL; + LZ4F_compressOptions_t compress_options; + struct compression_context *context; + size_t destination_size; + int return_bytearray = 0; + int end_frame = 1; + PyObject *py_destination; + char * destination; + size_t result; + static char *kwlist[] = { "context", + "end_frame", + "return_bytearray", + NULL + }; + + memset (&compress_options, 0, sizeof compress_options); + +#if IS_PY3 + if (!PyArg_ParseTupleAndKeywords (args, keywds, "O|pp", kwlist, + &py_context, + &end_frame, + &return_bytearray)) + { + return NULL; + } +#else + if (!PyArg_ParseTupleAndKeywords (args, keywds, "O|ii", kwlist, + &py_context, + &end_frame, + &return_bytearray)) + { + return NULL; + } +#endif + if (!end_frame && LZ4_versionNumber() < 10800) + { + PyErr_SetString (PyExc_RuntimeError, + "Flush without ending a frame is not supported with this version of the LZ4 library"); + return NULL; + } + + context = + (struct compression_context *) PyCapsule_GetPointer (py_context, compression_context_capsule_name); + if (!context || !context->context) + { + PyErr_SetString (PyExc_ValueError, "No compression context supplied"); + return NULL; + } + + compress_options.stableSrc = 0; + + /* Calling LZ4F_compressBound with srcSize equal to 0 returns a size + sufficient to fit (i) any remaining buffered data (when autoFlush is + disabled) and the footer size, which is either 4 or 8 bytes depending on + whether checksums are enabled. See: https://github.com/lz4/lz4/issues/280 + and https://github.com/lz4/lz4/issues/290. Prior to 1.7.5, it was necessary + to call LZ4F_compressBound with srcSize equal to 1. Since we now require a + minimum version to 1.7.5 we'll call this with srcSize equal to 0. */ + Py_BEGIN_ALLOW_THREADS + destination_size = LZ4F_compressBound (0, &(context->preferences)); + Py_END_ALLOW_THREADS + + destination = PyMem_Malloc (destination_size * sizeof * destination); + if (destination == NULL) + { + return PyErr_NoMemory(); + } + + Py_BEGIN_ALLOW_THREADS + if (end_frame) + { + result = + LZ4F_compressEnd (context->context, destination, + destination_size, &compress_options); + } + else + { + result = + LZ4F_flush (context->context, destination, + destination_size, &compress_options); + } + Py_END_ALLOW_THREADS + + if (LZ4F_isError (result)) + { + PyMem_Free (destination); + PyErr_Format (PyExc_RuntimeError, + "LZ4F_compressEnd failed with code: %s", + LZ4F_getErrorName (result)); + return NULL; + } + + if (return_bytearray) + { + py_destination = PyByteArray_FromStringAndSize (destination, (Py_ssize_t) result); + } + else + { + py_destination = PyBytes_FromStringAndSize (destination, (Py_ssize_t) result); + } + + PyMem_Free (destination); + + if (py_destination == NULL) + { + return PyErr_NoMemory (); + } + + return py_destination; +} + +/****************** + * get_frame_info * + ******************/ +static PyObject * +get_frame_info (PyObject * Py_UNUSED (self), PyObject * args, + PyObject * keywds) +{ + Py_buffer py_source; + char *source; + size_t source_size; + LZ4F_decompressionContext_t context; + LZ4F_frameInfo_t frame_info; + size_t result; + unsigned int block_size; + unsigned int block_size_id; + int block_linked; + int content_checksum; + int block_checksum; + int skippable; + + static char *kwlist[] = { "data", + NULL + }; + +#if IS_PY3 + if (!PyArg_ParseTupleAndKeywords (args, keywds, "y*", kwlist, + &py_source)) + { + return NULL; + } +#else + if (!PyArg_ParseTupleAndKeywords (args, keywds, "s*", kwlist, + &py_source)) + { + return NULL; + } +#endif + + Py_BEGIN_ALLOW_THREADS + + result = LZ4F_createDecompressionContext (&context, LZ4F_VERSION); + + if (LZ4F_isError (result)) + { + Py_BLOCK_THREADS + PyBuffer_Release (&py_source); + PyErr_Format (PyExc_RuntimeError, + "LZ4F_createDecompressionContext failed with code: %s", + LZ4F_getErrorName (result)); + return NULL; + } + + source = (char *) py_source.buf; + source_size = (size_t) py_source.len; + + result = + LZ4F_getFrameInfo (context, &frame_info, source, &source_size); + + if (LZ4F_isError (result)) + { + LZ4F_freeDecompressionContext (context); + Py_BLOCK_THREADS + PyBuffer_Release (&py_source); + PyErr_Format (PyExc_RuntimeError, + "LZ4F_getFrameInfo failed with code: %s", + LZ4F_getErrorName (result)); + return NULL; + } + + result = LZ4F_freeDecompressionContext (context); + + Py_END_ALLOW_THREADS + + PyBuffer_Release (&py_source); + + if (LZ4F_isError (result)) + { + PyErr_Format (PyExc_RuntimeError, + "LZ4F_freeDecompressionContext failed with code: %s", + LZ4F_getErrorName (result)); + return NULL; + } + +#define KB *(1<<10) +#define MB *(1<<20) + switch (frame_info.blockSizeID) + { + case LZ4F_default: + case LZ4F_max64KB: + block_size = 64 KB; + block_size_id = LZ4F_max64KB; + break; + case LZ4F_max256KB: + block_size = 256 KB; + block_size_id = LZ4F_max256KB; + break; + case LZ4F_max1MB: + block_size = 1 MB; + block_size_id = LZ4F_max1MB; + break; + case LZ4F_max4MB: + block_size = 4 MB; + block_size_id = LZ4F_max4MB; + break; + default: + PyErr_Format (PyExc_RuntimeError, + "Unrecognized blockSizeID in get_frame_info: %d", + frame_info.blockSizeID); + return NULL; + } +#undef KB +#undef MB + + if (frame_info.blockMode == LZ4F_blockLinked) + { + block_linked = 1; + } + else if (frame_info.blockMode == LZ4F_blockIndependent) + { + block_linked = 0; + } + else + { + PyErr_Format (PyExc_RuntimeError, + "Unrecognized blockMode in get_frame_info: %d", + frame_info.blockMode); + return NULL; + } + + if (frame_info.contentChecksumFlag == LZ4F_noContentChecksum) + { + content_checksum = 0; + } + else if (frame_info.contentChecksumFlag == LZ4F_contentChecksumEnabled) + { + content_checksum = 1; + } + else + { + PyErr_Format (PyExc_RuntimeError, + "Unrecognized contentChecksumFlag in get_frame_info: %d", + frame_info.contentChecksumFlag); + return NULL; + } + + if (LZ4_versionNumber() >= 10800) + { + if (frame_info.blockChecksumFlag == LZ4F_noBlockChecksum) + { + block_checksum = 0; + } + else if (frame_info.blockChecksumFlag == LZ4F_blockChecksumEnabled) + { + block_checksum = 1; + } + else + { + PyErr_Format (PyExc_RuntimeError, + "Unrecognized blockChecksumFlag in get_frame_info: %d", + frame_info.blockChecksumFlag); + return NULL; + } + } + else + { + /* Prior to LZ4 1.8.0 the blockChecksum functionality wasn't exposed in the + frame API, and blocks weren't checksummed, so we'll always return 0 + here. */ + block_checksum = 0; + } + + if (frame_info.frameType == LZ4F_frame) + { + skippable = 0; + } + else if (frame_info.frameType == LZ4F_skippableFrame) + { + skippable = 1; + } + else + { + PyErr_Format (PyExc_RuntimeError, + "Unrecognized frameType in get_frame_info: %d", + frame_info.frameType); + return NULL; + } + + return Py_BuildValue ("{s:I,s:I,s:O,s:O,s:O,s:O,s:K}", + "block_size", block_size, + "block_size_id", block_size_id, + "block_linked", block_linked ? Py_True : Py_False, + "content_checksum", content_checksum ? Py_True : Py_False, + "block_checksum", block_checksum ? Py_True : Py_False, + "skippable", skippable ? Py_True : Py_False, + "content_size", frame_info.contentSize); +} + +/******************************** + * create_decompression_context * + ********************************/ +static void +destroy_decompression_context (PyObject * py_context) +{ +#ifndef PyCapsule_Type + LZ4F_dctx * context = + PyCapsule_GetPointer (py_context, decompression_context_capsule_name); +#else + /* Compatibility with 2.6 via capsulethunk. */ + LZ4F_dctx * context = py_context; +#endif + Py_BEGIN_ALLOW_THREADS + LZ4F_freeDecompressionContext (context); + Py_END_ALLOW_THREADS +} + +static PyObject * +create_decompression_context (PyObject * Py_UNUSED (self)) +{ + LZ4F_dctx * context; + LZ4F_errorCode_t result; + + Py_BEGIN_ALLOW_THREADS + result = LZ4F_createDecompressionContext (&context, LZ4F_VERSION); + if (LZ4F_isError (result)) + { + Py_BLOCK_THREADS + LZ4F_freeDecompressionContext (context); + PyErr_Format (PyExc_RuntimeError, + "LZ4F_createDecompressionContext failed with code: %s", + LZ4F_getErrorName (result)); + return NULL; + } + Py_END_ALLOW_THREADS + + return PyCapsule_New (context, decompression_context_capsule_name, + destroy_decompression_context); +} + +/******************************* + * reset_decompression_context * + *******************************/ +static PyObject * +reset_decompression_context (PyObject * Py_UNUSED (self), PyObject * args, + PyObject * keywds) +{ + LZ4F_dctx * context; + PyObject * py_context = NULL; + static char *kwlist[] = { "context", + NULL + }; + + if (!PyArg_ParseTupleAndKeywords (args, keywds, "O", kwlist, + &py_context + )) + { + return NULL; + } + + context = (LZ4F_dctx *) + PyCapsule_GetPointer (py_context, decompression_context_capsule_name); + + if (!context) + { + PyErr_SetString (PyExc_ValueError, + "No valid decompression context supplied"); + return NULL; + } + + if (LZ4_versionNumber() >= 10800) /* LZ4 >= v1.8.0 has LZ4F_resetDecompressionContext */ + { + /* No error checking possible here - this is always successful. */ + Py_BEGIN_ALLOW_THREADS + LZ4F_resetDecompressionContext (context); + Py_END_ALLOW_THREADS + } + else + { + /* No resetDecompressionContext available, so we'll destroy the context + and create a new one. */ + int result; + + Py_BEGIN_ALLOW_THREADS + LZ4F_freeDecompressionContext (context); + + result = LZ4F_createDecompressionContext (&context, LZ4F_VERSION); + if (LZ4F_isError (result)) + { + LZ4F_freeDecompressionContext (context); + Py_BLOCK_THREADS + PyErr_Format (PyExc_RuntimeError, + "LZ4F_createDecompressionContext failed with code: %s", + LZ4F_getErrorName (result)); + return NULL; + } + Py_END_ALLOW_THREADS + + result = PyCapsule_SetPointer(py_context, context); + if (result) + { + LZ4F_freeDecompressionContext (context); + PyErr_SetString (PyExc_RuntimeError, + "PyCapsule_SetPointer failed with code: %s"); + return NULL; + } + } + + Py_RETURN_NONE; +} + +static inline PyObject * +__decompress(LZ4F_dctx * context, char * source, size_t source_size, + Py_ssize_t max_length, int full_frame, + int return_bytearray, int return_bytes_read) +{ + size_t source_remain; + size_t source_read; + char * source_cursor; + char * source_end; + char * destination; + size_t destination_write; + char * destination_cursor; + size_t destination_written; + size_t destination_size; + PyObject * py_destination; + size_t result = 0; + LZ4F_frameInfo_t frame_info; + LZ4F_decompressOptions_t options; + int end_of_frame = 0; + + memset(&options, 0, sizeof options); + + Py_BEGIN_ALLOW_THREADS + + source_cursor = source; + source_end = source + source_size; + source_remain = source_size; + + if (full_frame) + { + source_read = source_size; + + result = + LZ4F_getFrameInfo (context, &frame_info, + source_cursor, &source_read); + + if (LZ4F_isError (result)) + { + Py_BLOCK_THREADS + PyErr_Format (PyExc_RuntimeError, + "LZ4F_getFrameInfo failed with code: %s", + LZ4F_getErrorName (result)); + return NULL; + } + + /* Advance the source_cursor pointer past the header - the call to + getFrameInfo above replaces the passed source_read value with the + number of bytes read. Also reduce source_remain accordingly. */ + source_cursor += source_read; + source_remain -= source_read; + + /* If the uncompressed content size is available, we'll use that to size + the destination buffer. Otherwise, guess at twice the remaining source + source as a starting point, and adjust if needed. */ + if (frame_info.contentSize > 0) + { + destination_size = frame_info.contentSize; + } + else + { + destination_size = 2 * source_remain; + } + } + else + { + if (max_length >= (Py_ssize_t) 0) + { + destination_size = (size_t) max_length; + } + else + { + /* Choose an initial destination size as twice the source size, and we'll + grow the allocation as needed. */ + destination_size = 2 * source_remain; + } + } + + Py_BLOCK_THREADS + + destination = PyMem_Malloc (destination_size * sizeof * destination); + if (destination == NULL) + { + return PyErr_NoMemory(); + } + + Py_UNBLOCK_THREADS + + /* Only set stableDst = 1 if we are sure no PyMem_Realloc will be called since + when stableDst = 1 the LZ4 library stores a pointer to the last compressed + data, which may be invalid after a PyMem_Realloc. */ + if (full_frame && max_length >= (Py_ssize_t) 0) + { + options.stableDst = 1; + } + else + { + options.stableDst = 0; + } + + source_read = source_remain; + + destination_write = destination_size; + destination_cursor = destination; + destination_written = 0; + + while (1) + { + /* Decompress from the source string and write to the destination + until there's no more source string to read, or until we've reached the + frame end. + + On calling LZ4F_decompress, source_read is set to the remaining length + of source available to read. On return, source_read is set to the + actual number of bytes read from source, which may be less than + available. NB: LZ4F_decompress does not explicitly fail on empty input. + + On calling LZ4F_decompress, destination_write is the number of bytes in + destination available for writing. On exit, destination_write is set to + the actual number of bytes written to destination. */ + result = LZ4F_decompress (context, + destination_cursor, + &destination_write, + source_cursor, + &source_read, + &options); + + if (LZ4F_isError (result)) + { + Py_BLOCK_THREADS + PyErr_Format (PyExc_RuntimeError, + "LZ4F_decompress failed with code: %s", + LZ4F_getErrorName (result)); + return NULL; + } + + destination_written += destination_write; + source_cursor += source_read; + source_read = source_end - source_cursor; + + if (result == 0) + { + /* We've reached the end of the frame. */ + end_of_frame = 1; + break; + } + else if (source_cursor == source_end) + { + /* We've reached end of input. */ + break; + } + else if (destination_written == destination_size) + { + /* Destination buffer is full. So, stop decompressing if + max_length is set. Otherwise expand the destination + buffer. */ + if (max_length >= (Py_ssize_t) 0) + { + break; + } + else + { + /* Expand destination buffer. result is an indication of number of + source bytes remaining, so we'll use this to estimate the new + size of the destination buffer. */ + char * buff; + destination_size += 3 * result; + + Py_BLOCK_THREADS + buff = PyMem_Realloc (destination, destination_size); + if (buff == NULL) + { + PyErr_SetString (PyExc_RuntimeError, + "Failed to resize buffer"); + return NULL; + } + else + { + destination = buff; + } + Py_UNBLOCK_THREADS + } + } + /* Data still remaining to be decompressed, so increment the destination + cursor location, and reset destination_write ready for the next + iteration. Important to re-initialize destination_cursor here (as + opposed to simply incrementing it) so we're pointing to the realloc'd + memory location. */ + destination_cursor = destination + destination_written; + destination_write = destination_size - destination_written; + } + + Py_END_ALLOW_THREADS + + if (result > 0 && full_frame) + { + PyErr_Format (PyExc_RuntimeError, + "Frame incomplete. LZ4F_decompress returned: %zu", result); + PyMem_Free (destination); + return NULL; + } + + if (LZ4F_isError (result)) + { + PyErr_Format (PyExc_RuntimeError, + "LZ4F_freeDecompressionContext failed with code: %s", + LZ4F_getErrorName (result)); + PyMem_Free (destination); + return NULL; + } + + if (return_bytearray) + { + py_destination = PyByteArray_FromStringAndSize (destination, (Py_ssize_t) destination_written); + } + else + { + py_destination = PyBytes_FromStringAndSize (destination, (Py_ssize_t) destination_written); + } + + PyMem_Free (destination); + + if (py_destination == NULL) + { + return PyErr_NoMemory (); + } + + if (full_frame) + { + if (return_bytes_read) + { + return Py_BuildValue ("Ni", + py_destination, + source_cursor - source); + } + else + { + return py_destination; + } + } + else + { + return Py_BuildValue ("NiO", + py_destination, + source_cursor - source, + end_of_frame ? Py_True : Py_False); + } +} + +/************** + * decompress * + **************/ +static PyObject * +decompress (PyObject * Py_UNUSED (self), PyObject * args, + PyObject * keywds) +{ + LZ4F_dctx * context; + LZ4F_errorCode_t result; + Py_buffer py_source; + char * source; + size_t source_size; + PyObject * ret; + int return_bytearray = 0; + int return_bytes_read = 0; + static char *kwlist[] = { "data", + "return_bytearray", + "return_bytes_read", + NULL + }; + +#if IS_PY3 + if (!PyArg_ParseTupleAndKeywords (args, keywds, "y*|pp", kwlist, + &py_source, + &return_bytearray, + &return_bytes_read + )) + { + return NULL; + } +#else + if (!PyArg_ParseTupleAndKeywords (args, keywds, "s*|ii", kwlist, + &py_source, + &return_bytearray, + &return_bytes_read + )) + { + return NULL; + } +#endif + + Py_BEGIN_ALLOW_THREADS + result = LZ4F_createDecompressionContext (&context, LZ4F_VERSION); + if (LZ4F_isError (result)) + { + LZ4F_freeDecompressionContext (context); + Py_BLOCK_THREADS + PyBuffer_Release(&py_source); + PyErr_Format (PyExc_RuntimeError, + "LZ4F_createDecompressionContext failed with code: %s", + LZ4F_getErrorName (result)); + return NULL; + } + Py_END_ALLOW_THREADS + + /* MSVC can't do pointer arithmetic on void * pointers, so cast to char * */ + source = (char *) py_source.buf; + source_size = py_source.len; + + ret = __decompress (context, + source, + source_size, + -1, + 1, + return_bytearray, + return_bytes_read); + + PyBuffer_Release(&py_source); + + Py_BEGIN_ALLOW_THREADS + LZ4F_freeDecompressionContext (context); + Py_END_ALLOW_THREADS + + return ret; +} + +/******************** + * decompress_chunk * + ********************/ +static PyObject * +decompress_chunk (PyObject * Py_UNUSED (self), PyObject * args, + PyObject * keywds) +{ + PyObject * py_context = NULL; + PyObject * ret; + LZ4F_dctx * context; + Py_buffer py_source; + char * source; + size_t source_size; + Py_ssize_t max_length = (Py_ssize_t) -1; + int return_bytearray = 0; + static char *kwlist[] = { "context", + "data", + "max_length", + "return_bytearray", + NULL + }; + +#if IS_PY3 + if (!PyArg_ParseTupleAndKeywords (args, keywds, "Oy*|np", kwlist, + &py_context, + &py_source, + &max_length, + &return_bytearray + )) + { + return NULL; + } +#else + if (!PyArg_ParseTupleAndKeywords (args, keywds, "Os*|ni", kwlist, + &py_context, + &py_source, + &max_length, + &return_bytearray + )) + { + return NULL; + } +#endif + + context = (LZ4F_dctx *) + PyCapsule_GetPointer (py_context, decompression_context_capsule_name); + + if (!context) + { + PyBuffer_Release(&py_source); + PyErr_SetString (PyExc_ValueError, + "No valid decompression context supplied"); + return NULL; + } + + /* MSVC can't do pointer arithmetic on void * pointers, so cast to char * */ + source = (char *) py_source.buf; + source_size = py_source.len; + + ret = __decompress (context, + source, + source_size, + max_length, + 0, + return_bytearray, + 0); + + PyBuffer_Release(&py_source); + + return ret; +} + +PyDoc_STRVAR( + create_compression_context__doc, + "create_compression_context()\n" \ + "\n" \ + "Creates a compression context object.\n" \ + "\n" \ + "The compression object is required for compression operations.\n" \ + "\n" \ + "Returns:\n" \ + " cCtx: A compression context\n" + ); + +#define COMPRESS_KWARGS_DOCSTRING \ + " block_size (int): Sepcifies the maximum blocksize to use.\n" \ + " Options:\n\n" \ + " - `lz4.frame.BLOCKSIZE_DEFAULT`: the lz4 library default\n" \ + " - `lz4.frame.BLOCKSIZE_MAX64KB`: 64 kB\n" \ + " - `lz4.frame.BLOCKSIZE_MAX256KB`: 256 kB\n" \ + " - `lz4.frame.BLOCKSIZE_MAX1MB`: 1 MB\n" \ + " - `lz4.frame.BLOCKSIZE_MAX4MB`: 4 MB\n\n" \ + " If unspecified, will default to `lz4.frame.BLOCKSIZE_DEFAULT`\n" \ + " which is currently equal to `lz4.frame.BLOCKSIZE_MAX64KB`.\n" \ + " block_linked (bool): Specifies whether to use block-linked\n" \ + " compression. If ``True``, the compression ratio is improved,\n" \ + " particularly for small block sizes. Default is ``True``.\n" \ + " compression_level (int): Specifies the level of compression used.\n" \ + " Values between 0-16 are valid, with 0 (default) being the\n" \ + " lowest compression (0-2 are the same value), and 16 the highest.\n" \ + " Values below 0 will enable \"fast acceleration\", proportional\n" \ + " to the value. Values above 16 will be treated as 16.\n" \ + " The following module constants are provided as a convenience:\n\n" \ + " - `lz4.frame.COMPRESSIONLEVEL_MIN`: Minimum compression (0, the\n" \ + " default)\n" \ + " - `lz4.frame.COMPRESSIONLEVEL_MINHC`: Minimum high-compression\n" \ + " mode (3)\n" \ + " - `lz4.frame.COMPRESSIONLEVEL_MAX`: Maximum compression (16)\n\n" \ + " content_checksum (bool): Specifies whether to enable checksumming\n" \ + " of the uncompressed content. If True, a checksum is stored at the\n" \ + " end of the frame, and checked during decompression. Default is\n" \ + " ``False``.\n" \ + " block_checksum (bool): Specifies whether to enable checksumming of\n" \ + " the uncompressed content of each block. If `True` a checksum of\n" \ + " the uncompressed data in each block in the frame is stored at\n\n" \ + " the end of each block. If present, these checksums will be used\n\n" \ + " to validate the data during decompression. The default is\n" \ + " ``False`` meaning block checksums are not calculated and stored.\n" \ + " This functionality is only supported if the underlying LZ4\n" \ + " library has version >= 1.8.0. Attempting to set this value\n" \ + " to ``True`` with a version of LZ4 < 1.8.0 will cause a\n" \ + " ``RuntimeError`` to be raised.\n" \ + " return_bytearray (bool): If ``True`` a ``bytearray`` object will be\n" \ + " returned. If ``False``, a string of bytes is returned. The default\n" \ + " is ``False``.\n" \ + +PyDoc_STRVAR( + compress__doc, + "compress(data, compression_level=0, block_size=0, content_checksum=0,\n" \ + "block_linked=True, store_size=True, return_bytearray=False)\n" \ + "\n" \ + "Compresses ``data`` returning the compressed data as a complete frame.\n" \ + "\n" \ + "The returned data includes a header and endmark and so is suitable\n" \ + "for writing to a file.\n" \ + "\n" \ + "Args:\n" \ + " data (str, bytes or buffer-compatible object): data to compress\n" \ + "\n" \ + "Keyword Args:\n" \ + COMPRESS_KWARGS_DOCSTRING \ + " store_size (bool): If ``True`` then the frame will include an 8-byte\n" \ + " header field that is the uncompressed size of data included\n" \ + " within the frame. Default is ``True``.\n" \ + "\n" \ + "Returns:\n" \ + " bytes or bytearray: Compressed data\n" + ); +PyDoc_STRVAR +( + compress_begin__doc, + "compress_begin(context, source_size=0, compression_level=0, block_size=0,\n" \ + "content_checksum=0, content_size=1, block_mode=0, frame_type=0,\n" \ + "auto_flush=1)\n" \ + "\n" \ + "Creates a frame header from a compression context.\n\n" \ + "Args:\n" \ + " context (cCtx): A compression context.\n\n" \ + "Keyword Args:\n" \ + COMPRESS_KWARGS_DOCSTRING \ + " auto_flush (bool): Enable or disable autoFlush. When autoFlush is disabled\n" \ + " the LZ4 library may buffer data internally until a block is full.\n" \ + " Default is ``False`` (autoFlush disabled).\n\n" \ + " source_size (int): This optionally specifies the uncompressed size\n" \ + " of the data to be compressed. If specified, the size will be stored\n" \ + " in the frame header for use during decompression. Default is ``True``\n" \ + " return_bytearray (bool): If ``True`` a bytearray object will be returned.\n" \ + " If ``False``, a string of bytes is returned. Default is ``False``.\n\n" \ + "Returns:\n" \ + " bytes or bytearray: Frame header.\n" + ); + +#undef COMPRESS_KWARGS_DOCSTRING + +PyDoc_STRVAR +( + compress_chunk__doc, + "compress_chunk(context, data)\n" \ + "\n" \ + "Compresses blocks of data and returns the compressed data.\n" \ + "\n" \ + "The returned data should be concatenated with the data returned from\n" \ + "`lz4.frame.compress_begin` and any subsequent calls to\n" \ + "`lz4.frame.compress_chunk`.\n" \ + "\n" \ + "Args:\n" \ + " context (cCtx): compression context\n" \ + " data (str, bytes or buffer-compatible object): data to compress\n" \ + "\n" \ + "Keyword Args:\n" \ + " return_bytearray (bool): If ``True`` a bytearray object will be\n" \ + " returned. If ``False``, a string of bytes is returned. The\n" \ + " default is False.\n" \ + "\n" \ + "Returns:\n" \ + " bytes or bytearray: Compressed data.\n\n" \ + "Notes:\n" \ + " If auto flush is disabled (``auto_flush=False`` when calling\n" \ + " `lz4.frame.compress_begin`) this function may buffer and retain\n" \ + " some or all of the compressed data for future calls to\n" \ + " `lz4.frame.compress`.\n" + ); + +PyDoc_STRVAR +( + compress_flush__doc, + "compress_flush(context, end_frame=True, return_bytearray=False)\n" \ + "\n" \ + "Flushes any buffered data held in the compression context.\n" \ + "\n" \ + "This flushes any data buffed in the compression context, returning it as\n" \ + "compressed data. The returned data should be appended to the output of\n" \ + "previous calls to ``lz4.frame.compress_chunk``.\n" \ + "\n" \ + "The ``end_frame`` argument specifies whether or not the frame should be\n" \ + "ended. If this is ``True`` and end of frame marker will be appended to\n" \ + "the returned data. In this case, if ``content_checksum`` was ``True``\n" \ + "when calling `lz4.frame.compress_begin`, then a checksum of the uncompressed\n" \ + "data will also be included in the returned data.\n" \ + "\n" \ + "If the ``end_frame`` argument is ``True``, the compression context will be\n" \ + "reset and can be re-used.\n" \ + "\n" \ + "Args:\n" \ + " context (cCtx): Compression context\n" \ + "\n" \ + "Keyword Args:\n" \ + " end_frame (bool): If ``True`` the frame will be ended. Default is\n" \ + " ``True``.\n" \ + " return_bytearray (bool): If ``True`` a ``bytearray`` object will\n" \ + " be returned. If ``False``, a ``bytes`` object is returned.\n" \ + " The default is ``False``.\n" \ + "\n" \ + "Returns:\n" \ + " bytes or bytearray: compressed data.\n" \ + "\n" \ + "Notes:\n" \ + " If ``end_frame`` is ``False`` but the underlying LZ4 library does not" \ + " support flushing without ending the frame, a ``RuntimeError`` will be\n" \ + " raised.\n" + ); + +PyDoc_STRVAR +( + get_frame_info__doc, + "get_frame_info(frame)\n\n" \ + "Given a frame of compressed data, returns information about the frame.\n" \ + "\n" \ + "Args:\n" \ + " frame (str, bytes or buffer-compatible object): LZ4 compressed frame\n" \ + "\n" \ + "Returns:\n" \ + " dict: Dictionary with keys:\n" \ + "\n" \ + " - ``block_size`` (int): the maximum size (in bytes) of each block\n" \ + " - ``block_size_id`` (int): identifier for maximum block size\n" \ + " - ``content_checksum`` (bool): specifies whether the frame\n" \ + " contains a checksum of the uncompressed content\n" \ + " - ``content_size`` (int): uncompressed size in bytes of\n" \ + " frame content\n" \ + " - ``block_linked`` (bool): specifies whether the frame contains\n" \ + " blocks which are independently compressed (``False``) or linked\n" \ + " linked (``True``)\n" \ + " - ``block_checksum`` (bool): specifies whether each block contains a\n" \ + " checksum of its contents\n" \ + " - ``skippable`` (bool): whether the block is skippable (``True``) or\n" \ + " not (``False``)\n" + ); + +PyDoc_STRVAR +( + create_decompression_context__doc, + "create_decompression_context()\n" \ + "\n" \ + "Creates a decompression context object.\n" \ + "\n" \ + "A decompression context is needed for decompression operations.\n" \ + "\n" \ + "Returns:\n" \ + " dCtx: A decompression context\n" + ); + +PyDoc_STRVAR +( + reset_decompression_context__doc, + "reset_decompression_context(context)\n" \ + "\n" \ + "Resets a decompression context object.\n" \ + "\n" \ + "This is useful for recovering from an error or for stopping an unfinished\n" \ + "decompression and starting a new one with the same context\n" \ + "\n" \ + "Args:\n" \ + " context (dCtx): A decompression context\n" + ); + +PyDoc_STRVAR +( + decompress__doc, + "decompress(data, return_bytearray=False, return_bytes_read=False)\n" \ + "\n" \ + "Decompresses a frame of data and returns it as a string of bytes.\n" \ + "\n" \ + "Args:\n" \ + " data (str, bytes or buffer-compatible object): data to decompress.\n" \ + " This should contain a complete LZ4 frame of compressed data.\n" \ + "\n" \ + "Keyword Args:\n" \ + " return_bytearray (bool): If ``True`` a bytearray object will be\n" \ + " returned. If ``False``, a string of bytes is returned. The\n" \ + " default is ``False``.\n" \ + " return_bytes_read (bool): If ``True`` then the number of bytes read\n" \ + " from ``data`` will also be returned. Default is ``False``\n" \ + "\n" \ + "Returns:\n" \ + " bytes/bytearray or tuple: Uncompressed data and optionally the number" \ + " of bytes read\n" \ + "\n" \ + " If the ``return_bytes_read`` argument is ``True`` this function\n" \ + " returns a tuple consisting of:\n" \ + "\n" \ + " - bytes or bytearray: Uncompressed data\n" \ + " - int: Number of bytes consumed from ``data``\n" + ); + +PyDoc_STRVAR +( + decompress_chunk__doc, + "decompress_chunk(context, data, max_length=-1)\n" \ + "\n" \ + "Decompresses part of a frame of compressed data.\n" \ + "\n" \ + "The returned uncompressed data should be concatenated with the data\n" \ + "returned from previous calls to `lz4.frame.decompress_chunk`\n" \ + "\n" \ + "Args:\n" \ + " context (dCtx): decompression context\n" \ + " data (str, bytes or buffer-compatible object): part of a LZ4\n" \ + " frame of compressed data\n" \ + "\n" \ + "Keyword Args:\n" \ + " max_length (int): if non-negative this specifies the maximum number\n" \ + " of bytes of uncompressed data to return. Default is ``-1``.\n" \ + " return_bytearray (bool): If ``True`` a bytearray object will be\n" \ + " returned.If ``False``, a string of bytes is returned. The\n" \ + " default is ``False``.\n" \ + "\n" \ + "Returns:\n" \ + " tuple: uncompressed data, bytes read, end of frame indicator\n" \ + "\n" \ + " This function returns a tuple consisting of:\n" \ + "\n" \ + " - The uncompressed data as a ``bytes`` or ``bytearray`` object\n" \ + " - The number of bytes consumed from input ``data`` as an ``int``\n" \ + " - The end of frame indicator as a ``bool``.\n" \ + "\n" + "The end of frame indicator is ``True`` if the end of the compressed\n" \ + "frame has been reached, or ``False`` otherwise\n" + ); + +static PyMethodDef module_methods[] = +{ + { + "create_compression_context", (PyCFunction) create_compression_context, + METH_NOARGS, create_compression_context__doc + }, + { + "compress", (PyCFunction) compress, + METH_VARARGS | METH_KEYWORDS, compress__doc + }, + { + "compress_begin", (PyCFunction) compress_begin, + METH_VARARGS | METH_KEYWORDS, compress_begin__doc + }, + { + "compress_chunk", (PyCFunction) compress_chunk, + METH_VARARGS | METH_KEYWORDS, compress_chunk__doc + }, + { + "compress_flush", (PyCFunction) compress_flush, + METH_VARARGS | METH_KEYWORDS, compress_flush__doc + }, + { + "get_frame_info", (PyCFunction) get_frame_info, + METH_VARARGS | METH_KEYWORDS, get_frame_info__doc + }, + { + "create_decompression_context", (PyCFunction) create_decompression_context, + METH_NOARGS, create_decompression_context__doc + }, + { + "reset_decompression_context", (PyCFunction) reset_decompression_context, + METH_VARARGS | METH_KEYWORDS, reset_decompression_context__doc + }, + { + "decompress", (PyCFunction) decompress, + METH_VARARGS | METH_KEYWORDS, decompress__doc + }, + { + "decompress_chunk", (PyCFunction) decompress_chunk, + METH_VARARGS | METH_KEYWORDS, decompress_chunk__doc + }, + {NULL, NULL, 0, NULL} /* Sentinel */ +}; + +PyDoc_STRVAR(lz4frame__doc, + "A Python wrapper for the LZ4 frame protocol" + ); + +static struct PyModuleDef moduledef = +{ + PyModuleDef_HEAD_INIT, + "_frame", + lz4frame__doc, + -1, + module_methods +}; + +MODULE_INIT_FUNC (_frame) +{ + PyObject *module = PyModule_Create (&moduledef); + + if (module == NULL) + return NULL; + + PyModule_AddIntConstant (module, "BLOCKSIZE_DEFAULT", LZ4F_default); + PyModule_AddIntConstant (module, "BLOCKSIZE_MAX64KB", LZ4F_max64KB); + PyModule_AddIntConstant (module, "BLOCKSIZE_MAX256KB", LZ4F_max256KB); + PyModule_AddIntConstant (module, "BLOCKSIZE_MAX1MB", LZ4F_max1MB); + PyModule_AddIntConstant (module, "BLOCKSIZE_MAX4MB", LZ4F_max4MB); + + return module; +} diff --git a/contrib/python/lz4/py2/lz4/version.py b/contrib/python/lz4/py2/lz4/version.py new file mode 100644 index 0000000000..895ced4fdf --- /dev/null +++ b/contrib/python/lz4/py2/lz4/version.py @@ -0,0 +1,4 @@ +# coding: utf-8 +# file generated by setuptools_scm +# don't change, don't track in version control +version = '2.2.1' diff --git a/contrib/python/lz4/py2/tests/block/conftest.py b/contrib/python/lz4/py2/tests/block/conftest.py new file mode 100644 index 0000000000..089ce0f83c --- /dev/null +++ b/contrib/python/lz4/py2/tests/block/conftest.py @@ -0,0 +1,111 @@ +import pytest +import os +import sys + + +test_data = [ + (b''), + (os.urandom(8 * 1024)), + (b'0' * 8 * 1024), + (bytearray(b'')), + (bytearray(os.urandom(8 * 1024))), + #(bytearray(open(os.path.join(os.path.dirname(__file__), 'numpy_byte_array.bin'), 'rb').read())) +] + +if sys.version_info > (2, 7): + test_data += [ + (memoryview(b'')), + (memoryview(os.urandom(8 * 1024))) + ] + + +@pytest.fixture( + params=test_data, + ids=[ + 'data' + str(i) for i in range(len(test_data)) + ] +) +def data(request): + return request.param + + +@pytest.fixture( + params=[ + ( + { + 'store_size': True + } + ), + ( + { + 'store_size': False + } + ), + ] +) +def store_size(request): + return request.param + + +@pytest.fixture( + params=[ + ( + { + 'return_bytearray': True + } + ), + ( + { + 'return_bytearray': False + } + ), + ] +) +def return_bytearray(request): + return request.param + + +@pytest.fixture +def c_return_bytearray(return_bytearray): + return return_bytearray + + +@pytest.fixture +def d_return_bytearray(return_bytearray): + return return_bytearray + + +@pytest.fixture( + params=[ + ('fast', None) + ] + [ + ('fast', {'acceleration': s}) for s in range(10) + ] + [ + ('high_compression', None) + ] + [ + ('high_compression', {'compression': s}) for s in range(17) + ] + [ + (None, None) + ] +) +def mode(request): + return request.param + + +dictionary = [ + None, + (0, 0), + (100, 200), + (0, 8 * 1024), + os.urandom(8 * 1024) +] + + +@pytest.fixture( + params=dictionary, + ids=[ + 'dictionary' + str(i) for i in range(len(dictionary)) + ] +) +def dictionary(request): + return request.param diff --git a/contrib/python/lz4/py2/tests/block/numpy_byte_array.bin b/contrib/python/lz4/py2/tests/block/numpy_byte_array.bin Binary files differnew file mode 100644 index 0000000000..49537e2d90 --- /dev/null +++ b/contrib/python/lz4/py2/tests/block/numpy_byte_array.bin diff --git a/contrib/python/lz4/py2/tests/block/test_block_0.py b/contrib/python/lz4/py2/tests/block/test_block_0.py new file mode 100644 index 0000000000..cca3e65b61 --- /dev/null +++ b/contrib/python/lz4/py2/tests/block/test_block_0.py @@ -0,0 +1,92 @@ +import lz4.block +from multiprocessing.pool import ThreadPool +import sys +from functools import partial +if sys.version_info <= (3, 2): + import struct + + +def get_stored_size(buff): + if sys.version_info > (2, 7): + if isinstance(buff, memoryview): + b = buff.tobytes() + else: + b = bytes(buff) + else: + b = bytes(buff) + + if len(b) < 4: + return None + + if sys.version_info > (3, 2): + return int.from_bytes(b[:4], 'little') + else: + # This would not work on a memoryview object, hence buff.tobytes call + # above + return struct.unpack('<I', b[:4])[0] + + +def roundtrip(x, c_kwargs, d_kwargs, dictionary): + if dictionary: + if isinstance(dictionary, tuple): + d = x[dictionary[0]:dictionary[1]] + else: + d = dictionary + c_kwargs['dict'] = d + d_kwargs['dict'] = d + + c = lz4.block.compress(x, **c_kwargs) + + if c_kwargs['store_size']: + assert get_stored_size(c) == len(x) + else: + d_kwargs['uncompressed_size'] = len(x) + + return lz4.block.decompress(c, **d_kwargs) + + +def setup_kwargs(mode, store_size, c_return_bytearray=None, d_return_bytearray=None): + c_kwargs = {} + + if mode[0] is not None: + c_kwargs['mode'] = mode[0] + if mode[1] is not None: + c_kwargs.update(mode[1]) + + c_kwargs.update(store_size) + + if(c_return_bytearray): + c_kwargs.update(c_return_bytearray) + + d_kwargs = {} + + if(d_return_bytearray): + d_kwargs.update(d_return_bytearray) + + return (c_kwargs, d_kwargs) + + +# Test single threaded usage with all valid variations of input +def test_1(data, mode, store_size, c_return_bytearray, d_return_bytearray, dictionary): + (c_kwargs, d_kwargs) = setup_kwargs( + mode, store_size, c_return_bytearray, d_return_bytearray) + + d = roundtrip(data, c_kwargs, d_kwargs, dictionary) + + assert d == data + if d_return_bytearray['return_bytearray']: + assert isinstance(d, bytearray) + + +# Test multi threaded usage with all valid variations of input +def test_2(data, mode, store_size, dictionary): + (c_kwargs, d_kwargs) = setup_kwargs(mode, store_size) + + data_in = [data for i in range(32)] + + pool = ThreadPool(8) + rt = partial(roundtrip, c_kwargs=c_kwargs, + d_kwargs=d_kwargs, dictionary=dictionary) + data_out = pool.map(rt, data_in) + pool.close() + assert data_in == data_out diff --git a/contrib/python/lz4/py2/tests/block/test_block_1.py b/contrib/python/lz4/py2/tests/block/test_block_1.py new file mode 100644 index 0000000000..4392bb332c --- /dev/null +++ b/contrib/python/lz4/py2/tests/block/test_block_1.py @@ -0,0 +1,149 @@ +import lz4.block +import pytest +import sys +import os + + +def test_decompress_ui32_overflow(): + data = lz4.block.compress(b'A' * 64) + with pytest.raises(OverflowError): + lz4.block.decompress(data[4:], uncompressed_size=((1 << 32) + 64)) + + +def test_decompress_without_leak(): + # Verify that hand-crafted packet does not leak uninitialized(?) memory. + data = lz4.block.compress(b'A' * 64) + message = r'^Decompressor wrote 64 bytes, but 79 bytes expected from header$' + with pytest.raises(lz4.block.LZ4BlockError, match=message): + lz4.block.decompress(b'\x4f' + data[1:]) + + +def test_decompress_with_small_buffer(): + data = lz4.block.compress(b'A' * 64, store_size=False) + message = r'^Decompression failed: corrupt input or insufficient space in destination buffer. Error code: \d+$' + with pytest.raises(lz4.block.LZ4BlockError, match=message): + lz4.block.decompress(data[4:], uncompressed_size=64) + with pytest.raises(lz4.block.LZ4BlockError, match=message): + lz4.block.decompress(data, uncompressed_size=60) + + +def test_decompress_truncated(): + input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123" * 24 + compressed = lz4.block.compress(input_data) + # for i in range(len(compressed)): + # try: + # lz4.block.decompress(compressed[:i]) + # except: + # print(i, sys.exc_info()[0], sys.exc_info()[1]) + with pytest.raises(ValueError, match='Input source data size too small'): + lz4.block.decompress(compressed[:0]) + for n in [0, 1]: + with pytest.raises(ValueError, match='Input source data size too small'): + lz4.block.decompress(compressed[:n]) + for n in [24, 25, -2, 27, 67, 85]: + with pytest.raises(lz4.block.LZ4BlockError): + lz4.block.decompress(compressed[:n]) + + +def test_decompress_with_trailer(): + data = b'A' * 64 + comp = lz4.block.compress(data) + message = r'^Decompression failed: corrupt input or insufficient space in destination buffer. Error code: \d+$' + with pytest.raises(lz4.block.LZ4BlockError, match=message): + lz4.block.decompress(comp + b'A') + with pytest.raises(lz4.block.LZ4BlockError, match=message): + lz4.block.decompress(comp + comp) + with pytest.raises(lz4.block.LZ4BlockError, match=message): + lz4.block.decompress(comp + comp[4:]) + + +def test_unicode(): + if sys.version_info < (3,): + return # skip + DATA = b'x' + with pytest.raises(TypeError): + lz4.block.compress(DATA.decode('latin1')) + lz4.block.decompress(lz4.block.compress(DATA).decode('latin1')) + +# These next two are probably redundant given test_1 above but we'll keep them +# for now + + +def test_return_bytearray(): + if sys.version_info < (3,): + return # skip + data = os.urandom(128 * 1024) # Read 128kb + compressed = lz4.block.compress(data) + b = lz4.block.compress(data, return_bytearray=True) + assert isinstance(b, bytearray) + assert bytes(b) == compressed + b = lz4.block.decompress(compressed, return_bytearray=True) + assert isinstance(b, bytearray) + assert bytes(b) == data + + +def test_memoryview(): + if sys.version_info < (2, 7): + return # skip + data = os.urandom(128 * 1024) # Read 128kb + compressed = lz4.block.compress(data) + assert lz4.block.compress(memoryview(data)) == compressed + assert lz4.block.decompress(memoryview(compressed)) == data + + +def test_with_dict_none(): + input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123" * 24 + for mode in ['default', 'high_compression']: + assert lz4.block.decompress(lz4.block.compress( + input_data, mode=mode, dict=None)) == input_data + assert lz4.block.decompress(lz4.block.compress( + input_data, mode=mode), dict=None) == input_data + assert lz4.block.decompress(lz4.block.compress( + input_data, mode=mode, dict=b'')) == input_data + assert lz4.block.decompress(lz4.block.compress( + input_data, mode=mode), dict=b'') == input_data + assert lz4.block.decompress(lz4.block.compress( + input_data, mode=mode, dict='')) == input_data + assert lz4.block.decompress(lz4.block.compress( + input_data, mode=mode), dict='') == input_data + + +def test_with_dict(): + input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123" * 24 + dict1 = input_data[10:30] + dict2 = input_data[20:40] + message = r'^Decompression failed: corrupt input or insufficient space in destination buffer. Error code: \d+$' + for mode in ['default', 'high_compression']: + compressed = lz4.block.compress(input_data, mode=mode, dict=dict1) + with pytest.raises(lz4.block.LZ4BlockError, match=message): + lz4.block.decompress(compressed) + with pytest.raises(lz4.block.LZ4BlockError, match=message): + lz4.block.decompress(compressed, dict=dict1[:2]) + assert lz4.block.decompress(compressed, dict=dict2) != input_data + assert lz4.block.decompress(compressed, dict=dict1) == input_data + assert lz4.block.decompress(lz4.block.compress( + input_data), dict=dict1) == input_data + + +def test_known_decompress_1(): + input = b'\x00\x00\x00\x00\x00' + output = b'' + assert lz4.block.decompress(input) == output + + +def test_known_decompress_2(): + input = b'\x01\x00\x00\x00\x10 ' + output = b' ' + assert lz4.block.decompress(input) == output + + +def test_known_decompress_3(): + input = b'h\x00\x00\x00\xff\x0bLorem ipsum dolor sit amet\x1a\x006P amet' + output = b'Lorem ipsum dolor sit amet' * 4 + assert lz4.block.decompress(input) == output + + +def test_known_decompress_4(): + input = b'\xb0\xb3\x00\x00\xff\x1fExcepteur sint occaecat cupidatat non proident.\x00' + (b'\xff' * 180) + b'\x1ePident' + output = b'Excepteur sint occaecat cupidatat non proident' * 1000 + assert lz4.block.decompress(input) == output diff --git a/contrib/python/lz4/py2/tests/block/test_block_2.py b/contrib/python/lz4/py2/tests/block/test_block_2.py new file mode 100644 index 0000000000..87ceefb728 --- /dev/null +++ b/contrib/python/lz4/py2/tests/block/test_block_2.py @@ -0,0 +1,62 @@ +import pytest +import sys +import lz4.block +import psutil +import os + +# This test requires allocating a big lump of memory. In order to +# avoid a massive memory allocation during byte compilation, we have +# to declare a variable for the size of the buffer we're going to +# create outside the scope of the function below. See: +# https://bugs.python.org/issue21074 +_4GB = 0x100000000 # 4GB + +# This test will be killed on Travis due to the 3GB memory limit +# there. Unfortunately psutil reports the host memory, not the memory +# available to the container, and so can't be used to detect available +# memory, so instead, as an ugly hack for detecting we're on Travis we +# check for the TRAVIS environment variable being set. This is quite +# fragile. + + +@pytest.mark.skipif( + os.environ.get('TRAVIS') is not None, + reason='Skipping test on Travis due to insufficient memory' +) +@pytest.mark.skipif( + sys.maxsize < 0xffffffff, + reason='Py_ssize_t too small for this test' +) +@pytest.mark.skipif( + psutil.virtual_memory().total < _4GB, + reason='Insufficient system memory for this test' +) +def test_huge(): + try: + huge = b'\0' * _4GB + except MemoryError: + pytest.skip('Insufficient system memory for this test') + + with pytest.raises( + OverflowError, match='Input too large for LZ4 API' + ): + lz4.block.compress(huge) + + with pytest.raises( + OverflowError, match='Dictionary too large for LZ4 API' + ): + lz4.block.compress(b'', dict=huge) + + with pytest.raises( + OverflowError, match='Input too large for LZ4 API' + ): + lz4.block.decompress(huge) + + with pytest.raises( + OverflowError, match='Dictionary too large for LZ4 API' + ): + lz4.block.decompress(b'', dict=huge) + + +def test_dummy(): + pass diff --git a/contrib/python/lz4/py2/tests/block/test_block_3.py b/contrib/python/lz4/py2/tests/block/test_block_3.py new file mode 100644 index 0000000000..0c3fb0821d --- /dev/null +++ b/contrib/python/lz4/py2/tests/block/test_block_3.py @@ -0,0 +1,38 @@ +import lz4.block +import pytest + + +test_data = [ + (b'a' * 1024 * 1024), +] + + +@pytest.fixture( + params=test_data, + ids=[ + 'data' + str(i) for i in range(len(test_data)) + ] +) +def data(request): + return request.param + + +def test_block_decompress_mem_usage(data): + tracemalloc = pytest.importorskip('tracemalloc') + + tracemalloc.start() + + compressed = lz4.block.compress(data) + prev_snapshot = None + + for i in range(1000): + decompressed = lz4.block.decompress(compressed) # noqa: F841 + + if i % 100 == 0: + snapshot = tracemalloc.take_snapshot() + + if prev_snapshot: + stats = snapshot.compare_to(prev_snapshot, 'lineno') + assert stats[0].size_diff < (1024 * 4) + + prev_snapshot = snapshot diff --git a/contrib/python/lz4/py2/tests/frame/__init__.py b/contrib/python/lz4/py2/tests/frame/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/contrib/python/lz4/py2/tests/frame/__init__.py diff --git a/contrib/python/lz4/py2/tests/frame/conftest.py b/contrib/python/lz4/py2/tests/frame/conftest.py new file mode 100644 index 0000000000..5ab52c0ada --- /dev/null +++ b/contrib/python/lz4/py2/tests/frame/conftest.py @@ -0,0 +1,95 @@ +import pytest +import lz4.frame as lz4frame +import lz4 + + +@pytest.fixture( + params=[ + # (lz4frame.BLOCKSIZE_DEFAULT), + (lz4frame.BLOCKSIZE_MAX64KB), + (lz4frame.BLOCKSIZE_MAX256KB), + (lz4frame.BLOCKSIZE_MAX1MB), + (lz4frame.BLOCKSIZE_MAX4MB), + ] +) +def block_size(request): + return request.param + + +@pytest.fixture( + params=[ + (True), + (False), + ] +) +def block_linked(request): + return request.param + + +@pytest.fixture( + params=[ + (True), + (False), + ] +) +def content_checksum(request): + return request.param + + +if lz4.library_version_number() >= 10800: + p = [True, False] +else: + p = [False, ] + + +@pytest.fixture( + params=[ + (pp) for pp in p + ] +) +def block_checksum(request): + return request.param + + +compression_levels = [ + (lz4frame.COMPRESSIONLEVEL_MIN), + (lz4frame.COMPRESSIONLEVEL_MINHC), + (lz4frame.COMPRESSIONLEVEL_MAX), +] + + +@pytest.fixture( + params=compression_levels +) +def compression_level(request): + return request.param + + +@pytest.fixture( + params=[ + (True), + (False) + ] +) +def auto_flush(request): + return request.param + + +@pytest.fixture( + params=[ + (True), + (False) + ] +) +def store_size(request): + return request.param + + +@pytest.fixture( + params=[ + (True), + (False), + ] +) +def return_bytearray(request): + return request.param diff --git a/contrib/python/lz4/py2/tests/frame/helpers.py b/contrib/python/lz4/py2/tests/frame/helpers.py new file mode 100644 index 0000000000..e6cb0c9ef0 --- /dev/null +++ b/contrib/python/lz4/py2/tests/frame/helpers.py @@ -0,0 +1,44 @@ +import lz4.frame as lz4frame + + +def get_frame_info_check(compressed_data, + source_size, + store_size, + block_size, + block_linked, + content_checksum, + block_checksum): + + frame_info = lz4frame.get_frame_info(compressed_data) + + assert frame_info["content_checksum"] == content_checksum + assert frame_info["block_checksum"] == block_checksum + + assert frame_info["skippable"] is False + + if store_size is True: + assert frame_info["content_size"] == source_size + else: + assert frame_info["content_size"] == 0 + + if source_size > frame_info['block_size']: + # More than a single block + assert frame_info["block_linked"] == block_linked + + if block_size == lz4frame.BLOCKSIZE_DEFAULT: + assert frame_info["block_size_id"] == lz4frame.BLOCKSIZE_MAX64KB + else: + assert frame_info["block_size_id"] == block_size + + +def get_chunked(data, nchunks): + size = len(data) + # stride = int(math.ceil(float(size)/nchunks)) # no // on py 2.6 + stride = size // nchunks + start = 0 + end = start + stride + while end < size: + yield data[start:end] + start += stride + end += stride + yield data[start:] diff --git a/contrib/python/lz4/py2/tests/frame/test_frame_0.py b/contrib/python/lz4/py2/tests/frame/test_frame_0.py new file mode 100644 index 0000000000..f03431d412 --- /dev/null +++ b/contrib/python/lz4/py2/tests/frame/test_frame_0.py @@ -0,0 +1,172 @@ +import lz4.frame as lz4frame +import lz4 +import re + + +def test_library_version_number(): + v = lz4.library_version_number() + assert isinstance(v, int) + assert v > 10000 + + +def test_library_version_string(): + v = lz4.library_version_string() + assert isinstance(v, str) + assert v.count('.') == 2 + r = re.compile(r'^[0-9]*\.[0-9]*\.[0-9]*$') + assert r.match(v) is not None + + +def test_create_compression_context(): + context = lz4frame.create_compression_context() + assert context is not None + + +def test_create_decompression_context(): + context = lz4frame.create_decompression_context() + assert context is not None + + +def test_reset_decompression_context_1(): + if lz4.library_version_number() >= 10800: + context = lz4frame.create_decompression_context() + r = lz4frame.reset_decompression_context(context) + assert r is None + else: + pass + + +def test_reset_decompression_context_2(): + if lz4.library_version_number() >= 10800: + c = lz4frame.compress(b'1234', return_bytearray=False) + context = lz4frame.create_decompression_context() + try: + # Simulate an error by passing junk to decompress + d = lz4frame.decompress_chunk(context, c[4:]) + except RuntimeError: + pass + r = lz4frame.reset_decompression_context(context) + assert r is None + # And confirm we can use the context after reset + d, bytes_read, eof = lz4frame.decompress_chunk(context, c) + assert d == b'1234' + assert bytes_read == len(c) + assert eof is True + else: + pass + + +def test_compress_return_type_1(): + r = lz4frame.compress(b'', return_bytearray=False) + assert isinstance(r, bytes) + + +def test_compress_return_type_2(): + r = lz4frame.compress(b'', return_bytearray=True) + assert isinstance(r, bytearray) + + +def test_decompress_return_type_1(): + c = lz4frame.compress(b'', return_bytearray=False) + r = lz4frame.decompress( + c, + return_bytearray=False, + return_bytes_read=False + ) + assert isinstance(r, bytes) + + +def test_decompress_return_type_2(): + c = lz4frame.compress(b'', return_bytearray=False) + r = lz4frame.decompress( + c, + return_bytearray=True, + return_bytes_read=False + ) + assert isinstance(r, bytearray) + + +def test_decompress_return_type_3(): + c = lz4frame.compress(b'', return_bytearray=False) + r = lz4frame.decompress( + c, + return_bytearray=False, + return_bytes_read=True + ) + assert isinstance(r, tuple) + assert isinstance(r[0], bytes) + assert isinstance(r[1], int) + + +def test_decompress_return_type_4(): + c = lz4frame.compress(b'', return_bytearray=False) + r = lz4frame.decompress( + c, + return_bytearray=True, + return_bytes_read=True + ) + assert isinstance(r, tuple) + assert isinstance(r[0], bytearray) + assert isinstance(r[1], int) + + +def test_decompress_chunk_return_type_1(): + c = lz4frame.compress(b'', return_bytearray=False) + d = lz4frame.create_decompression_context() + r, b, e = lz4frame.decompress_chunk( + d, + c, + return_bytearray=False, + ) + assert isinstance(r, bytes) + assert isinstance(b, int) + assert isinstance(e, bool) + + +def test_decompress_chunk_return_type_2(): + c = lz4frame.compress(b'', return_bytearray=False) + d = lz4frame.create_decompression_context() + r, b, e = lz4frame.decompress_chunk( + d, + c, + return_bytearray=True, + ) + assert isinstance(r, bytearray) + assert isinstance(b, int) + assert isinstance(e, bool) + + +def test_decompress_chunk_return_type_3(): + c = lz4frame.compress(b'', return_bytearray=False) + d = lz4frame.create_decompression_context() + r = lz4frame.decompress_chunk( + d, + c, + return_bytearray=False, + ) + assert isinstance(r, tuple) + assert isinstance(r[0], bytes) + assert isinstance(r[1], int) + assert isinstance(r[2], bool) + + +def test_decompress_chunk_return_type_4(): + c = lz4frame.compress(b'', return_bytearray=False) + d = lz4frame.create_decompression_context() + r = lz4frame.decompress_chunk( + d, + c, + return_bytearray=True, + ) + assert isinstance(r, tuple) + assert isinstance(r[0], bytearray) + assert isinstance(r[1], int) + assert isinstance(r[2], bool) + + +def test_block_size_constants(): + assert lz4frame.BLOCKSIZE_DEFAULT == 0 + assert lz4frame.BLOCKSIZE_MAX64KB == 4 + assert lz4frame.BLOCKSIZE_MAX256KB == 5 + assert lz4frame.BLOCKSIZE_MAX1MB == 6 + assert lz4frame.BLOCKSIZE_MAX4MB == 7 diff --git a/contrib/python/lz4/py2/tests/frame/test_frame_1.py b/contrib/python/lz4/py2/tests/frame/test_frame_1.py new file mode 100644 index 0000000000..35110c44f1 --- /dev/null +++ b/contrib/python/lz4/py2/tests/frame/test_frame_1.py @@ -0,0 +1,111 @@ +import lz4.frame as lz4frame +import os +import sys +import pytest +from .helpers import get_frame_info_check + + +test_data = [ + (b''), + (os.urandom(8 * 1024)), + (b'0' * 8 * 1024), + (bytearray(b'')), + (bytearray(os.urandom(8 * 1024))), + (os.urandom(128 * 1024)), + (os.urandom(256 * 1024)), + (os.urandom(512 * 1024)), +] +if sys.version_info > (2, 7): + test_data += [ + (memoryview(b'')), + (memoryview(os.urandom(8 * 1024))) + ] + + +@pytest.fixture( + params=test_data, + ids=[ + 'data' + str(i) for i in range(len(test_data)) + ] +) +def data(request): + return request.param + + +def test_roundtrip_1( + data, + block_size, + block_linked, + content_checksum, + block_checksum, + compression_level, + store_size): + + compressed = lz4frame.compress( + data, + store_size=store_size, + compression_level=compression_level, + block_size=block_size, + block_linked=block_linked, + content_checksum=content_checksum, + block_checksum=block_checksum, + ) + + get_frame_info_check( + compressed, + len(data), + store_size, + block_size, + block_linked, + content_checksum, + block_checksum, + ) + decompressed, bytes_read = lz4frame.decompress( + compressed, return_bytes_read=True) + assert bytes_read == len(compressed) + assert decompressed == data + + +def test_roundtrip_2(data, + block_size, + block_linked, + content_checksum, + block_checksum, + compression_level, + auto_flush, + store_size): + + c_context = lz4frame.create_compression_context() + + kwargs = {} + kwargs['compression_level'] = compression_level + kwargs['block_size'] = block_size + kwargs['block_linked'] = block_linked + kwargs['content_checksum'] = content_checksum + kwargs['block_checksum'] = block_checksum + kwargs['auto_flush'] = auto_flush + if store_size is True: + kwargs['source_size'] = len(data) + + compressed = lz4frame.compress_begin( + c_context, + **kwargs + ) + compressed += lz4frame.compress_chunk( + c_context, + data + ) + compressed += lz4frame.compress_flush(c_context) + get_frame_info_check( + compressed, + len(data), + store_size, + block_size, + block_linked, + content_checksum, + block_checksum, + ) + decompressed, bytes_read = lz4frame.decompress( + compressed, return_bytes_read=True) + assert bytes_read == len(compressed) + assert decompressed == data diff --git a/contrib/python/lz4/py2/tests/frame/test_frame_2.py b/contrib/python/lz4/py2/tests/frame/test_frame_2.py new file mode 100644 index 0000000000..80b44b87ff --- /dev/null +++ b/contrib/python/lz4/py2/tests/frame/test_frame_2.py @@ -0,0 +1,107 @@ +import lz4.frame as lz4frame +import pytest +import os +import sys +from . helpers import ( + get_chunked, + get_frame_info_check, +) + + +test_data = [ + (b'', 1, 1), + (os.urandom(8 * 1024), 8, 1), + (os.urandom(8 * 1024), 1, 8), + (b'0' * 8 * 1024, 8, 1), + (b'0' * 8 * 1024, 8, 1), + (bytearray(b''), 1, 1), + (bytearray(os.urandom(8 * 1024)), 8, 1), +] +if sys.version_info > (2, 7): + test_data += [ + (memoryview(b''), 1, 1), + (memoryview(os.urandom(8 * 1024)), 8, 1) + ] + + +@pytest.fixture( + params=test_data, + ids=[ + 'data' + str(i) for i in range(len(test_data)) + ] +) +def data(request): + return request.param + + +def test_roundtrip_chunked(data, block_size, block_linked, + content_checksum, block_checksum, + compression_level, + auto_flush, store_size): + + data, c_chunks, d_chunks = data + + c_context = lz4frame.create_compression_context() + + kwargs = {} + kwargs['compression_level'] = compression_level + kwargs['block_size'] = block_size + kwargs['block_linked'] = block_linked + kwargs['content_checksum'] = content_checksum + kwargs['block_checksum'] = block_checksum + kwargs['auto_flush'] = auto_flush + if store_size is True: + kwargs['source_size'] = len(data) + + compressed = lz4frame.compress_begin( + c_context, + **kwargs + ) + data_in = get_chunked(data, c_chunks) + try: + while True: + compressed += lz4frame.compress_chunk( + c_context, + next(data_in) + ) + except StopIteration: + pass + finally: + del data_in + + compressed += lz4frame.compress_flush(c_context) + + get_frame_info_check( + compressed, + len(data), + store_size, + block_size, + block_linked, + content_checksum, + block_checksum, + ) + + d_context = lz4frame.create_decompression_context() + compressed_in = get_chunked(compressed, d_chunks) + decompressed = b'' + bytes_read = 0 + eofs = [] + try: + while True: + d, b, e = lz4frame.decompress_chunk( + d_context, + next(compressed_in), + ) + decompressed += d + bytes_read += b + eofs.append(e) + + except StopIteration: + pass + finally: + del compressed_in + + assert bytes_read == len(compressed) + assert decompressed == data + assert eofs[-1] is True + assert (True in eofs[:-2]) is False diff --git a/contrib/python/lz4/py2/tests/frame/test_frame_3.py b/contrib/python/lz4/py2/tests/frame/test_frame_3.py new file mode 100644 index 0000000000..a7835a46c9 --- /dev/null +++ b/contrib/python/lz4/py2/tests/frame/test_frame_3.py @@ -0,0 +1,57 @@ +import lz4.frame as lz4frame +import pytest +import os +import struct + +test_data = [ + (os.urandom(256 * 1024)), +] + + +@pytest.fixture( + params=test_data, + ids=[ + 'data' + str(i) for i in range(len(test_data)) + ] +) +def data(request): + return request.param + + +def test_decompress_truncated(data): + compressed = lz4frame.compress(data) + + message = r'^LZ4F_getFrameInfo failed with code: ERROR_frameHeader_incomplete' + with pytest.raises(RuntimeError, match=message): + lz4frame.decompress(compressed[:6]) + + for i in range(16, len(compressed) - 1, 5): # 15 is the max size of the header + message = r'^Frame incomplete. LZ4F_decompress returned:' + try: + lz4frame.decompress(compressed[:i]) + except RuntimeError as r: + print(r) + with pytest.raises(RuntimeError, match=message): + lz4frame.decompress(compressed[:i]) + + +def test_content_checksum_failure(data): + compressed = lz4frame.compress(data, content_checksum=True) + message = r'^LZ4F_decompress failed with code: ERROR_contentChecksum_invalid$' + with pytest.raises(RuntimeError, match=message): + last = struct.unpack('B', compressed[-1:])[0] + lz4frame.decompress(compressed[:-1] + struct.pack('B', last ^ 0x42)) + + +def test_block_checksum_failure(data): + compressed = lz4frame.compress( + data, + content_checksum=True, + block_checksum=True, + return_bytearray=True, + ) + message = r'^LZ4F_decompress failed with code: ERROR_blockChecksum_invalid$' + if len(compressed) > 32: + with pytest.raises(RuntimeError, match=message): + compressed[22] = compressed[18] ^ 0x42 + lz4frame.decompress(compressed) diff --git a/contrib/python/lz4/py2/tests/frame/test_frame_4.py b/contrib/python/lz4/py2/tests/frame/test_frame_4.py new file mode 100644 index 0000000000..7fa1654701 --- /dev/null +++ b/contrib/python/lz4/py2/tests/frame/test_frame_4.py @@ -0,0 +1,148 @@ +import lz4.frame as lz4frame +import os +import pytest +from . helpers import ( + get_frame_info_check, + get_chunked, +) + +test_data = [ + b'', + (128 * (32 * os.urandom(32))), + (256 * (32 * os.urandom(32))), + (512 * (32 * os.urandom(32))), + (1024 * (32 * os.urandom(32))), +] + + +@pytest.fixture( + params=test_data, + ids=[ + 'data' + str(i) for i in range(len(test_data)) + ] +) +def data(request): + return request.param + + +@pytest.fixture( + params=[ + (True), + (False) + ] +) +def reset(request): + return request.param + + +@pytest.fixture( + params=[ + (1), + (8) + ] +) +def chunks(request): + return request.param + + +def test_roundtrip_LZ4FrameCompressor( + data, + chunks, + block_size, + block_linked, + reset, + store_size, + block_checksum, + content_checksum): + + with lz4frame.LZ4FrameCompressor( + block_size=block_size, + block_linked=block_linked, + content_checksum=content_checksum, + block_checksum=block_checksum, + ) as compressor: + def do_compress(): + if store_size is True: + compressed = compressor.begin(source_size=len(data)) + else: + compressed = compressor.begin() + + for chunk in get_chunked(data, chunks): + compressed += compressor.compress(chunk) + + compressed += compressor.flush() + return compressed + + compressed = do_compress() + + if reset is True: + compressor.reset() + compressed = do_compress() + + get_frame_info_check( + compressed, + len(data), + store_size, + block_size, + block_linked, + content_checksum, + block_checksum, + ) + + decompressed, bytes_read = lz4frame.decompress( + compressed, return_bytes_read=True) + assert data == decompressed + assert bytes_read == len(compressed) + + +def test_roundtrip_LZ4FrameCompressor_LZ4FrameDecompressor( + data, + chunks, + block_size, + block_linked, + reset, + store_size, + block_checksum, + content_checksum): + + with lz4frame.LZ4FrameCompressor( + block_size=block_size, + block_linked=block_linked, + content_checksum=content_checksum, + block_checksum=block_checksum, + ) as compressor: + def do_compress(): + if store_size is True: + compressed = compressor.begin(source_size=len(data)) + else: + compressed = compressor.begin() + + for chunk in get_chunked(data, chunks): + compressed += compressor.compress(chunk) + + compressed += compressor.flush() + return compressed + + compressed = do_compress() + + if reset is True: + compressor.reset() + compressed = do_compress() + + get_frame_info_check( + compressed, + len(data), + store_size, + block_size, + block_linked, + content_checksum, + block_checksum, + ) + + with lz4frame.LZ4FrameDecompressor() as decompressor: + decompressed = b'' + for chunk in get_chunked(compressed, chunks): + b = decompressor.decompress(chunk) + decompressed += b + + assert data == decompressed diff --git a/contrib/python/lz4/py2/tests/frame/test_frame_5.py b/contrib/python/lz4/py2/tests/frame/test_frame_5.py new file mode 100644 index 0000000000..05daf283f9 --- /dev/null +++ b/contrib/python/lz4/py2/tests/frame/test_frame_5.py @@ -0,0 +1,99 @@ +import lz4.frame +import pytest +import gc + +MEM_INCREASE_LIMIT = (1024 * 25) + +test_data = [ + (b'a' * 1024 * 1024), +] + + +@pytest.fixture( + params=test_data, + ids=[ + 'data' + str(i) for i in range(len(test_data)) + ] +) +def data(request): + return request.param + + +def test_frame_decompress_mem_usage(data): + tracemalloc = pytest.importorskip('tracemalloc') + + tracemalloc.start() + + compressed = lz4.frame.compress(data) + prev_snapshot = None + + for i in range(1000): + decompressed = lz4.frame.decompress(compressed) # noqa: F841 + + if i % 100 == 0: + gc.collect() + snapshot = tracemalloc.take_snapshot() + + if prev_snapshot: + stats = snapshot.compare_to(prev_snapshot, 'lineno') + assert stats[0].size_diff < MEM_INCREASE_LIMIT + + prev_snapshot = snapshot + + +def test_frame_decompress_chunk_mem_usage(data): + tracemalloc = pytest.importorskip('tracemalloc') + tracemalloc.start() + + compressed = lz4.frame.compress(data) + + prev_snapshot = None + + for i in range(1000): + context = lz4.frame.create_decompression_context() + decompressed = lz4.frame.decompress_chunk( # noqa: F841 + context, compressed + ) + + if i % 100 == 0: + gc.collect() + snapshot = tracemalloc.take_snapshot() + + if prev_snapshot: + stats = snapshot.compare_to(prev_snapshot, 'lineno') + assert stats[0].size_diff < MEM_INCREASE_LIMIT + + prev_snapshot = snapshot + + +def test_frame_open_decompress_mem_usage(data): + tracemalloc = pytest.importorskip('tracemalloc') + tracemalloc.start() + + with lz4.frame.open('test.lz4', 'w') as f: + f.write(data) + + prev_snapshot = None + + for i in range(1000): + with lz4.frame.open('test.lz4', 'r') as f: + decompressed = f.read() # noqa: F841 + + if i % 100 == 0: + gc.collect() + snapshot = tracemalloc.take_snapshot() + + if prev_snapshot: + stats = snapshot.compare_to(prev_snapshot, 'lineno') + assert stats[0].size_diff < MEM_INCREASE_LIMIT + + prev_snapshot = snapshot + + +# TODO: add many more memory usage tests along the lines of this one +# for other funcs + +def test_dummy_always_pass(): + # If pytest finds all tests are skipped, then it exits with code 5 rather + # than 0, which tox sees as an error. Here we add a dummy test that always passes. + assert True diff --git a/contrib/python/lz4/py2/tests/frame/test_frame_6.py b/contrib/python/lz4/py2/tests/frame/test_frame_6.py new file mode 100644 index 0000000000..335d09e441 --- /dev/null +++ b/contrib/python/lz4/py2/tests/frame/test_frame_6.py @@ -0,0 +1,100 @@ +import os +import pytest +import lz4.frame as lz4frame + +test_data = [ + b'', + (128 * (32 * os.urandom(32))), + (5 * 128 * os.urandom(1024)), +] + + +@pytest.fixture( + params=test_data, + ids=[ + 'data' + str(i) for i in range(len(test_data)) + ] +) +def data(request): + return request.param + + +compression_levels = [ + (lz4frame.COMPRESSIONLEVEL_MIN), + # (lz4frame.COMPRESSIONLEVEL_MINHC), + # (lz4frame.COMPRESSIONLEVEL_MAX), +] + + +@pytest.fixture( + params=compression_levels +) +def compression_level(request): + return request.param + + +def test_lz4frame_open_write(data): + with lz4frame.open('testfile', mode='wb') as fp: + fp.write(data) + + +def test_lz4frame_open_write_read_defaults(data): + with lz4frame.open('testfile', mode='wb') as fp: + fp.write(data) + with lz4frame.open('testfile', mode='r') as fp: + data_out = fp.read() + assert data_out == data + + +def test_lz4frame_open_write_read_text(): + data = u'This is a test string' + with lz4frame.open('testfile', mode='wt') as fp: + fp.write(data) + with lz4frame.open('testfile', mode='rt') as fp: + data_out = fp.read() + assert data_out == data + + +def test_lz4frame_open_write_read_text_iter(): + data = u'This is a test string' + with lz4frame.open('testfile', mode='wt') as fp: + fp.write(data) + data_out = '' + with lz4frame.open('testfile', mode='rt') as fp: + for line in fp: + data_out += line + assert data_out == data + + +def test_lz4frame_open_write_read( + data, + compression_level, + block_linked, + block_checksum, + block_size, + content_checksum, + auto_flush, + store_size, + return_bytearray): + + kwargs = {} + + if store_size is True: + kwargs['source_size'] = len(data) + + kwargs['compression_level'] = compression_level + kwargs['block_size'] = block_size + kwargs['block_linked'] = block_linked + kwargs['content_checksum'] = content_checksum + kwargs['block_checksum'] = block_checksum + kwargs['auto_flush'] = auto_flush + kwargs['return_bytearray'] = return_bytearray + kwargs['mode'] = 'wb' + + with lz4frame.open('testfile', **kwargs) as fp: + fp.write(data) + + with lz4frame.open('testfile', mode='r') as fp: + data_out = fp.read() + + assert data_out == data diff --git a/contrib/python/lz4/py2/tests/frame/test_frame_7.py b/contrib/python/lz4/py2/tests/frame/test_frame_7.py new file mode 100644 index 0000000000..583f3fbb05 --- /dev/null +++ b/contrib/python/lz4/py2/tests/frame/test_frame_7.py @@ -0,0 +1,102 @@ +import lz4.frame as lz4frame +import pytest +import os + +test_data = [ + (os.urandom(32) * 256), +] + + +@pytest.fixture( + params=test_data, + ids=[ + 'data' + str(i) for i in range(len(test_data)) + ] +) +def data(request): + return request.param + + +def test_roundtrip_multiframe_1(data): + nframes = 4 + + compressed = b'' + for _ in range(nframes): + compressed += lz4frame.compress(data) + + decompressed = b'' + for _ in range(nframes): + decompressed += lz4frame.decompress(compressed) + + assert len(decompressed) == nframes * len(data) + assert data * nframes == decompressed + + +def test_roundtrip_multiframe_2(data): + nframes = 4 + + compressed = b'' + ctx = lz4frame.create_compression_context() + for _ in range(nframes): + compressed += lz4frame.compress_begin(ctx) + compressed += lz4frame.compress_chunk(ctx, data) + compressed += lz4frame.compress_flush(ctx) + + decompressed = b'' + for _ in range(nframes): + decompressed += lz4frame.decompress(compressed) + + assert len(decompressed) == nframes * len(data) + assert data * nframes == decompressed + + +def test_roundtrip_multiframe_3(data): + nframes = 4 + + compressed = b'' + ctx = lz4frame.create_compression_context() + for _ in range(nframes): + compressed += lz4frame.compress_begin(ctx) + compressed += lz4frame.compress_chunk(ctx, data) + compressed += lz4frame.compress_flush(ctx) + + decompressed = b'' + ctx = lz4frame.create_decompression_context() + for _ in range(nframes): + d, bytes_read, eof = lz4frame.decompress_chunk(ctx, compressed) + decompressed += d + assert eof is True + assert bytes_read == len(compressed) // nframes + + assert len(decompressed) == nframes * len(data) + assert data * nframes == decompressed + + +def test_roundtrip_multiframe_4(data): + nframes = 4 + + compressed = b'' + with lz4frame.LZ4FrameCompressor() as compressor: + for _ in range(nframes): + compressed += compressor.begin() + compressed += compressor.compress(data) + compressed += compressor.flush() + + decompressed = b'' + with lz4frame.LZ4FrameDecompressor() as decompressor: + for i in range(nframes): + if i == 0: + d = compressed + else: + d = decompressor.unused_data + decompressed += decompressor.decompress(d) + assert decompressor.eof is True + assert decompressor.needs_input is True + if i == nframes - 1: + assert decompressor.unused_data is None + else: + assert len(decompressor.unused_data) == len( + compressed) * (nframes - i - 1) / nframes + + assert len(decompressed) == nframes * len(data) + assert data * nframes == decompressed diff --git a/contrib/python/lz4/py2/tests/frame/test_frame_8.py b/contrib/python/lz4/py2/tests/frame/test_frame_8.py new file mode 100644 index 0000000000..159534aefe --- /dev/null +++ b/contrib/python/lz4/py2/tests/frame/test_frame_8.py @@ -0,0 +1,12 @@ +import lz4.frame as lz4frame + + +def test_lz4frame_open_write_read_text_iter(): + data = u'This is a test string' + with lz4frame.open('testfile', mode='wt') as fp: + fp.write(data) + data_out = '' + with lz4frame.open('testfile', mode='rt') as fp: + for line in fp: + data_out += line + assert data_out == data diff --git a/contrib/python/lz4/py2/tests/frame/test_frame_9.py b/contrib/python/lz4/py2/tests/frame/test_frame_9.py new file mode 100644 index 0000000000..27d61607da --- /dev/null +++ b/contrib/python/lz4/py2/tests/frame/test_frame_9.py @@ -0,0 +1,44 @@ +import os +import lz4.frame + + +def test_issue_172_1(): + """Test reproducer for issue 172 + + Issue 172 is a reported failure occurring on Windows 10 only. This bug was + due to incorrect handling of Py_ssize_t types when doing comparisons and + using them as a size when allocating memory. + + """ + input_data = 8 * os.urandom(1024) + with lz4.frame.open('testfile_small', 'wb') as fp: + bytes_written = fp.write(input_data) # noqa: F841 + + with lz4.frame.open('testfile_small', 'rb') as fp: + data = fp.read(10) + assert len(data) == 10 + + +def test_issue_172_2(): + input_data = 9 * os.urandom(1024) + with lz4.frame.open('testfile_small', 'w') as fp: + bytes_written = fp.write(input_data) # noqa: F841 + + with lz4.frame.open('testfile_small', 'r') as fp: + data = fp.read(10) + assert len(data) == 10 + + +def test_issue_172_3(): + input_data = 9 * os.urandom(1024) + with lz4.frame.open('testfile_small', 'wb') as fp: + bytes_written = fp.write(input_data) # noqa: F841 + + with lz4.frame.open('testfile_small', 'rb') as fp: + data = fp.read(10) + assert len(data) == 10 + + with lz4.frame.open('testfile_small', 'rb') as fp: + data = fp.read(16 * 1024 - 1) + assert len(data) == 9 * 1024 + assert data == input_data diff --git a/contrib/python/lz4/py2/tests/ya.make b/contrib/python/lz4/py2/tests/ya.make new file mode 100644 index 0000000000..870dcdedb4 --- /dev/null +++ b/contrib/python/lz4/py2/tests/ya.make @@ -0,0 +1,34 @@ +PY2TEST() + +PEERDIR( + contrib/python/lz4 + contrib/python/psutil +) + +FORK_SUBTESTS() +SIZE(MEDIUM) + +TEST_SRCS( + block/conftest.py + #block/test_block_0.py + block/test_block_1.py + block/test_block_2.py + block/test_block_3.py + frame/__init__.py + frame/conftest.py + frame/helpers.py + frame/test_frame_0.py + frame/test_frame_1.py + frame/test_frame_2.py + frame/test_frame_3.py + frame/test_frame_4.py + frame/test_frame_5.py + frame/test_frame_6.py + frame/test_frame_7.py + frame/test_frame_8.py + frame/test_frame_9.py +) + +NO_LINT() + +END() diff --git a/contrib/python/lz4/py2/ya.make b/contrib/python/lz4/py2/ya.make new file mode 100644 index 0000000000..b2beb920c7 --- /dev/null +++ b/contrib/python/lz4/py2/ya.make @@ -0,0 +1,55 @@ +# Generated by devtools/yamaker (pypi). + +PY2_LIBRARY() + +VERSION(2.2.1) + +LICENSE(BSD-3-Clause) + +PEERDIR( + contrib/libs/lz4 + contrib/python/future + contrib/python/py3c +) + +ADDINCL( + contrib/libs/lz4 + contrib/python/py3c +) + +NO_COMPILER_WARNINGS() + +NO_LINT() + +SRCS( + lz4/_version.c + lz4/block/_block.c + lz4/frame/_frame.c +) + +PY_REGISTER( + lz4._version + lz4.block._block + lz4.frame._frame +) + +PY_SRCS( + TOP_LEVEL + lz4/__init__.py + lz4/block/__init__.py + lz4/frame/__init__.py + lz4/frame/_compression.py + lz4/version.py +) + +RESOURCE_FILES( + PREFIX contrib/python/lz4/py2/ + .dist-info/METADATA + .dist-info/top_level.txt +) + +END() + +RECURSE_FOR_TESTS( + tests +) |