diff options
author | vitalyisaev <vitalyisaev@ydb.tech> | 2023-11-14 09:58:56 +0300 |
---|---|---|
committer | vitalyisaev <vitalyisaev@ydb.tech> | 2023-11-14 10:20:20 +0300 |
commit | c2b2dfd9827a400a8495e172a56343462e3ceb82 (patch) | |
tree | cd4e4f597d01bede4c82dffeb2d780d0a9046bd0 /contrib/python/lz4/py2/tests/block | |
parent | d4ae8f119e67808cb0cf776ba6e0cf95296f2df7 (diff) | |
download | ydb-c2b2dfd9827a400a8495e172a56343462e3ceb82.tar.gz |
YQ Connector: move tests from yql to ydb (OSS)
Перенос папки с тестами на Коннектор из папки yql в папку ydb (синхронизируется с github).
Diffstat (limited to 'contrib/python/lz4/py2/tests/block')
-rw-r--r-- | contrib/python/lz4/py2/tests/block/conftest.py | 111 | ||||
-rw-r--r-- | contrib/python/lz4/py2/tests/block/numpy_byte_array.bin | bin | 0 -> 8552 bytes | |||
-rw-r--r-- | contrib/python/lz4/py2/tests/block/test_block_0.py | 92 | ||||
-rw-r--r-- | contrib/python/lz4/py2/tests/block/test_block_1.py | 149 | ||||
-rw-r--r-- | contrib/python/lz4/py2/tests/block/test_block_2.py | 62 | ||||
-rw-r--r-- | contrib/python/lz4/py2/tests/block/test_block_3.py | 38 |
6 files changed, 452 insertions, 0 deletions
diff --git a/contrib/python/lz4/py2/tests/block/conftest.py b/contrib/python/lz4/py2/tests/block/conftest.py new file mode 100644 index 0000000000..089ce0f83c --- /dev/null +++ b/contrib/python/lz4/py2/tests/block/conftest.py @@ -0,0 +1,111 @@ +import pytest +import os +import sys + + +test_data = [ + (b''), + (os.urandom(8 * 1024)), + (b'0' * 8 * 1024), + (bytearray(b'')), + (bytearray(os.urandom(8 * 1024))), + #(bytearray(open(os.path.join(os.path.dirname(__file__), 'numpy_byte_array.bin'), 'rb').read())) +] + +if sys.version_info > (2, 7): + test_data += [ + (memoryview(b'')), + (memoryview(os.urandom(8 * 1024))) + ] + + +@pytest.fixture( + params=test_data, + ids=[ + 'data' + str(i) for i in range(len(test_data)) + ] +) +def data(request): + return request.param + + +@pytest.fixture( + params=[ + ( + { + 'store_size': True + } + ), + ( + { + 'store_size': False + } + ), + ] +) +def store_size(request): + return request.param + + +@pytest.fixture( + params=[ + ( + { + 'return_bytearray': True + } + ), + ( + { + 'return_bytearray': False + } + ), + ] +) +def return_bytearray(request): + return request.param + + +@pytest.fixture +def c_return_bytearray(return_bytearray): + return return_bytearray + + +@pytest.fixture +def d_return_bytearray(return_bytearray): + return return_bytearray + + +@pytest.fixture( + params=[ + ('fast', None) + ] + [ + ('fast', {'acceleration': s}) for s in range(10) + ] + [ + ('high_compression', None) + ] + [ + ('high_compression', {'compression': s}) for s in range(17) + ] + [ + (None, None) + ] +) +def mode(request): + return request.param + + +dictionary = [ + None, + (0, 0), + (100, 200), + (0, 8 * 1024), + os.urandom(8 * 1024) +] + + +@pytest.fixture( + params=dictionary, + ids=[ + 'dictionary' + str(i) for i in range(len(dictionary)) + ] +) +def dictionary(request): + return request.param diff --git a/contrib/python/lz4/py2/tests/block/numpy_byte_array.bin b/contrib/python/lz4/py2/tests/block/numpy_byte_array.bin Binary files differnew file mode 100644 index 0000000000..49537e2d90 --- /dev/null +++ b/contrib/python/lz4/py2/tests/block/numpy_byte_array.bin diff --git a/contrib/python/lz4/py2/tests/block/test_block_0.py b/contrib/python/lz4/py2/tests/block/test_block_0.py new file mode 100644 index 0000000000..cca3e65b61 --- /dev/null +++ b/contrib/python/lz4/py2/tests/block/test_block_0.py @@ -0,0 +1,92 @@ +import lz4.block +from multiprocessing.pool import ThreadPool +import sys +from functools import partial +if sys.version_info <= (3, 2): + import struct + + +def get_stored_size(buff): + if sys.version_info > (2, 7): + if isinstance(buff, memoryview): + b = buff.tobytes() + else: + b = bytes(buff) + else: + b = bytes(buff) + + if len(b) < 4: + return None + + if sys.version_info > (3, 2): + return int.from_bytes(b[:4], 'little') + else: + # This would not work on a memoryview object, hence buff.tobytes call + # above + return struct.unpack('<I', b[:4])[0] + + +def roundtrip(x, c_kwargs, d_kwargs, dictionary): + if dictionary: + if isinstance(dictionary, tuple): + d = x[dictionary[0]:dictionary[1]] + else: + d = dictionary + c_kwargs['dict'] = d + d_kwargs['dict'] = d + + c = lz4.block.compress(x, **c_kwargs) + + if c_kwargs['store_size']: + assert get_stored_size(c) == len(x) + else: + d_kwargs['uncompressed_size'] = len(x) + + return lz4.block.decompress(c, **d_kwargs) + + +def setup_kwargs(mode, store_size, c_return_bytearray=None, d_return_bytearray=None): + c_kwargs = {} + + if mode[0] is not None: + c_kwargs['mode'] = mode[0] + if mode[1] is not None: + c_kwargs.update(mode[1]) + + c_kwargs.update(store_size) + + if(c_return_bytearray): + c_kwargs.update(c_return_bytearray) + + d_kwargs = {} + + if(d_return_bytearray): + d_kwargs.update(d_return_bytearray) + + return (c_kwargs, d_kwargs) + + +# Test single threaded usage with all valid variations of input +def test_1(data, mode, store_size, c_return_bytearray, d_return_bytearray, dictionary): + (c_kwargs, d_kwargs) = setup_kwargs( + mode, store_size, c_return_bytearray, d_return_bytearray) + + d = roundtrip(data, c_kwargs, d_kwargs, dictionary) + + assert d == data + if d_return_bytearray['return_bytearray']: + assert isinstance(d, bytearray) + + +# Test multi threaded usage with all valid variations of input +def test_2(data, mode, store_size, dictionary): + (c_kwargs, d_kwargs) = setup_kwargs(mode, store_size) + + data_in = [data for i in range(32)] + + pool = ThreadPool(8) + rt = partial(roundtrip, c_kwargs=c_kwargs, + d_kwargs=d_kwargs, dictionary=dictionary) + data_out = pool.map(rt, data_in) + pool.close() + assert data_in == data_out diff --git a/contrib/python/lz4/py2/tests/block/test_block_1.py b/contrib/python/lz4/py2/tests/block/test_block_1.py new file mode 100644 index 0000000000..4392bb332c --- /dev/null +++ b/contrib/python/lz4/py2/tests/block/test_block_1.py @@ -0,0 +1,149 @@ +import lz4.block +import pytest +import sys +import os + + +def test_decompress_ui32_overflow(): + data = lz4.block.compress(b'A' * 64) + with pytest.raises(OverflowError): + lz4.block.decompress(data[4:], uncompressed_size=((1 << 32) + 64)) + + +def test_decompress_without_leak(): + # Verify that hand-crafted packet does not leak uninitialized(?) memory. + data = lz4.block.compress(b'A' * 64) + message = r'^Decompressor wrote 64 bytes, but 79 bytes expected from header$' + with pytest.raises(lz4.block.LZ4BlockError, match=message): + lz4.block.decompress(b'\x4f' + data[1:]) + + +def test_decompress_with_small_buffer(): + data = lz4.block.compress(b'A' * 64, store_size=False) + message = r'^Decompression failed: corrupt input or insufficient space in destination buffer. Error code: \d+$' + with pytest.raises(lz4.block.LZ4BlockError, match=message): + lz4.block.decompress(data[4:], uncompressed_size=64) + with pytest.raises(lz4.block.LZ4BlockError, match=message): + lz4.block.decompress(data, uncompressed_size=60) + + +def test_decompress_truncated(): + input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123" * 24 + compressed = lz4.block.compress(input_data) + # for i in range(len(compressed)): + # try: + # lz4.block.decompress(compressed[:i]) + # except: + # print(i, sys.exc_info()[0], sys.exc_info()[1]) + with pytest.raises(ValueError, match='Input source data size too small'): + lz4.block.decompress(compressed[:0]) + for n in [0, 1]: + with pytest.raises(ValueError, match='Input source data size too small'): + lz4.block.decompress(compressed[:n]) + for n in [24, 25, -2, 27, 67, 85]: + with pytest.raises(lz4.block.LZ4BlockError): + lz4.block.decompress(compressed[:n]) + + +def test_decompress_with_trailer(): + data = b'A' * 64 + comp = lz4.block.compress(data) + message = r'^Decompression failed: corrupt input or insufficient space in destination buffer. Error code: \d+$' + with pytest.raises(lz4.block.LZ4BlockError, match=message): + lz4.block.decompress(comp + b'A') + with pytest.raises(lz4.block.LZ4BlockError, match=message): + lz4.block.decompress(comp + comp) + with pytest.raises(lz4.block.LZ4BlockError, match=message): + lz4.block.decompress(comp + comp[4:]) + + +def test_unicode(): + if sys.version_info < (3,): + return # skip + DATA = b'x' + with pytest.raises(TypeError): + lz4.block.compress(DATA.decode('latin1')) + lz4.block.decompress(lz4.block.compress(DATA).decode('latin1')) + +# These next two are probably redundant given test_1 above but we'll keep them +# for now + + +def test_return_bytearray(): + if sys.version_info < (3,): + return # skip + data = os.urandom(128 * 1024) # Read 128kb + compressed = lz4.block.compress(data) + b = lz4.block.compress(data, return_bytearray=True) + assert isinstance(b, bytearray) + assert bytes(b) == compressed + b = lz4.block.decompress(compressed, return_bytearray=True) + assert isinstance(b, bytearray) + assert bytes(b) == data + + +def test_memoryview(): + if sys.version_info < (2, 7): + return # skip + data = os.urandom(128 * 1024) # Read 128kb + compressed = lz4.block.compress(data) + assert lz4.block.compress(memoryview(data)) == compressed + assert lz4.block.decompress(memoryview(compressed)) == data + + +def test_with_dict_none(): + input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123" * 24 + for mode in ['default', 'high_compression']: + assert lz4.block.decompress(lz4.block.compress( + input_data, mode=mode, dict=None)) == input_data + assert lz4.block.decompress(lz4.block.compress( + input_data, mode=mode), dict=None) == input_data + assert lz4.block.decompress(lz4.block.compress( + input_data, mode=mode, dict=b'')) == input_data + assert lz4.block.decompress(lz4.block.compress( + input_data, mode=mode), dict=b'') == input_data + assert lz4.block.decompress(lz4.block.compress( + input_data, mode=mode, dict='')) == input_data + assert lz4.block.decompress(lz4.block.compress( + input_data, mode=mode), dict='') == input_data + + +def test_with_dict(): + input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123" * 24 + dict1 = input_data[10:30] + dict2 = input_data[20:40] + message = r'^Decompression failed: corrupt input or insufficient space in destination buffer. Error code: \d+$' + for mode in ['default', 'high_compression']: + compressed = lz4.block.compress(input_data, mode=mode, dict=dict1) + with pytest.raises(lz4.block.LZ4BlockError, match=message): + lz4.block.decompress(compressed) + with pytest.raises(lz4.block.LZ4BlockError, match=message): + lz4.block.decompress(compressed, dict=dict1[:2]) + assert lz4.block.decompress(compressed, dict=dict2) != input_data + assert lz4.block.decompress(compressed, dict=dict1) == input_data + assert lz4.block.decompress(lz4.block.compress( + input_data), dict=dict1) == input_data + + +def test_known_decompress_1(): + input = b'\x00\x00\x00\x00\x00' + output = b'' + assert lz4.block.decompress(input) == output + + +def test_known_decompress_2(): + input = b'\x01\x00\x00\x00\x10 ' + output = b' ' + assert lz4.block.decompress(input) == output + + +def test_known_decompress_3(): + input = b'h\x00\x00\x00\xff\x0bLorem ipsum dolor sit amet\x1a\x006P amet' + output = b'Lorem ipsum dolor sit amet' * 4 + assert lz4.block.decompress(input) == output + + +def test_known_decompress_4(): + input = b'\xb0\xb3\x00\x00\xff\x1fExcepteur sint occaecat cupidatat non proident.\x00' + (b'\xff' * 180) + b'\x1ePident' + output = b'Excepteur sint occaecat cupidatat non proident' * 1000 + assert lz4.block.decompress(input) == output diff --git a/contrib/python/lz4/py2/tests/block/test_block_2.py b/contrib/python/lz4/py2/tests/block/test_block_2.py new file mode 100644 index 0000000000..87ceefb728 --- /dev/null +++ b/contrib/python/lz4/py2/tests/block/test_block_2.py @@ -0,0 +1,62 @@ +import pytest +import sys +import lz4.block +import psutil +import os + +# This test requires allocating a big lump of memory. In order to +# avoid a massive memory allocation during byte compilation, we have +# to declare a variable for the size of the buffer we're going to +# create outside the scope of the function below. See: +# https://bugs.python.org/issue21074 +_4GB = 0x100000000 # 4GB + +# This test will be killed on Travis due to the 3GB memory limit +# there. Unfortunately psutil reports the host memory, not the memory +# available to the container, and so can't be used to detect available +# memory, so instead, as an ugly hack for detecting we're on Travis we +# check for the TRAVIS environment variable being set. This is quite +# fragile. + + +@pytest.mark.skipif( + os.environ.get('TRAVIS') is not None, + reason='Skipping test on Travis due to insufficient memory' +) +@pytest.mark.skipif( + sys.maxsize < 0xffffffff, + reason='Py_ssize_t too small for this test' +) +@pytest.mark.skipif( + psutil.virtual_memory().total < _4GB, + reason='Insufficient system memory for this test' +) +def test_huge(): + try: + huge = b'\0' * _4GB + except MemoryError: + pytest.skip('Insufficient system memory for this test') + + with pytest.raises( + OverflowError, match='Input too large for LZ4 API' + ): + lz4.block.compress(huge) + + with pytest.raises( + OverflowError, match='Dictionary too large for LZ4 API' + ): + lz4.block.compress(b'', dict=huge) + + with pytest.raises( + OverflowError, match='Input too large for LZ4 API' + ): + lz4.block.decompress(huge) + + with pytest.raises( + OverflowError, match='Dictionary too large for LZ4 API' + ): + lz4.block.decompress(b'', dict=huge) + + +def test_dummy(): + pass diff --git a/contrib/python/lz4/py2/tests/block/test_block_3.py b/contrib/python/lz4/py2/tests/block/test_block_3.py new file mode 100644 index 0000000000..0c3fb0821d --- /dev/null +++ b/contrib/python/lz4/py2/tests/block/test_block_3.py @@ -0,0 +1,38 @@ +import lz4.block +import pytest + + +test_data = [ + (b'a' * 1024 * 1024), +] + + +@pytest.fixture( + params=test_data, + ids=[ + 'data' + str(i) for i in range(len(test_data)) + ] +) +def data(request): + return request.param + + +def test_block_decompress_mem_usage(data): + tracemalloc = pytest.importorskip('tracemalloc') + + tracemalloc.start() + + compressed = lz4.block.compress(data) + prev_snapshot = None + + for i in range(1000): + decompressed = lz4.block.decompress(compressed) # noqa: F841 + + if i % 100 == 0: + snapshot = tracemalloc.take_snapshot() + + if prev_snapshot: + stats = snapshot.compare_to(prev_snapshot, 'lineno') + assert stats[0].size_diff < (1024 * 4) + + prev_snapshot = snapshot |