aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/python/lz4/py2/tests/block
diff options
context:
space:
mode:
authorvitalyisaev <vitalyisaev@ydb.tech>2023-11-14 09:58:56 +0300
committervitalyisaev <vitalyisaev@ydb.tech>2023-11-14 10:20:20 +0300
commitc2b2dfd9827a400a8495e172a56343462e3ceb82 (patch)
treecd4e4f597d01bede4c82dffeb2d780d0a9046bd0 /contrib/python/lz4/py2/tests/block
parentd4ae8f119e67808cb0cf776ba6e0cf95296f2df7 (diff)
downloadydb-c2b2dfd9827a400a8495e172a56343462e3ceb82.tar.gz
YQ Connector: move tests from yql to ydb (OSS)
Перенос папки с тестами на Коннектор из папки yql в папку ydb (синхронизируется с github).
Diffstat (limited to 'contrib/python/lz4/py2/tests/block')
-rw-r--r--contrib/python/lz4/py2/tests/block/conftest.py111
-rw-r--r--contrib/python/lz4/py2/tests/block/numpy_byte_array.binbin0 -> 8552 bytes
-rw-r--r--contrib/python/lz4/py2/tests/block/test_block_0.py92
-rw-r--r--contrib/python/lz4/py2/tests/block/test_block_1.py149
-rw-r--r--contrib/python/lz4/py2/tests/block/test_block_2.py62
-rw-r--r--contrib/python/lz4/py2/tests/block/test_block_3.py38
6 files changed, 452 insertions, 0 deletions
diff --git a/contrib/python/lz4/py2/tests/block/conftest.py b/contrib/python/lz4/py2/tests/block/conftest.py
new file mode 100644
index 0000000000..089ce0f83c
--- /dev/null
+++ b/contrib/python/lz4/py2/tests/block/conftest.py
@@ -0,0 +1,111 @@
+import pytest
+import os
+import sys
+
+
+test_data = [
+ (b''),
+ (os.urandom(8 * 1024)),
+ (b'0' * 8 * 1024),
+ (bytearray(b'')),
+ (bytearray(os.urandom(8 * 1024))),
+ #(bytearray(open(os.path.join(os.path.dirname(__file__), 'numpy_byte_array.bin'), 'rb').read()))
+]
+
+if sys.version_info > (2, 7):
+ test_data += [
+ (memoryview(b'')),
+ (memoryview(os.urandom(8 * 1024)))
+ ]
+
+
+@pytest.fixture(
+ params=test_data,
+ ids=[
+ 'data' + str(i) for i in range(len(test_data))
+ ]
+)
+def data(request):
+ return request.param
+
+
+@pytest.fixture(
+ params=[
+ (
+ {
+ 'store_size': True
+ }
+ ),
+ (
+ {
+ 'store_size': False
+ }
+ ),
+ ]
+)
+def store_size(request):
+ return request.param
+
+
+@pytest.fixture(
+ params=[
+ (
+ {
+ 'return_bytearray': True
+ }
+ ),
+ (
+ {
+ 'return_bytearray': False
+ }
+ ),
+ ]
+)
+def return_bytearray(request):
+ return request.param
+
+
+@pytest.fixture
+def c_return_bytearray(return_bytearray):
+ return return_bytearray
+
+
+@pytest.fixture
+def d_return_bytearray(return_bytearray):
+ return return_bytearray
+
+
+@pytest.fixture(
+ params=[
+ ('fast', None)
+ ] + [
+ ('fast', {'acceleration': s}) for s in range(10)
+ ] + [
+ ('high_compression', None)
+ ] + [
+ ('high_compression', {'compression': s}) for s in range(17)
+ ] + [
+ (None, None)
+ ]
+)
+def mode(request):
+ return request.param
+
+
+dictionary = [
+ None,
+ (0, 0),
+ (100, 200),
+ (0, 8 * 1024),
+ os.urandom(8 * 1024)
+]
+
+
+@pytest.fixture(
+ params=dictionary,
+ ids=[
+ 'dictionary' + str(i) for i in range(len(dictionary))
+ ]
+)
+def dictionary(request):
+ return request.param
diff --git a/contrib/python/lz4/py2/tests/block/numpy_byte_array.bin b/contrib/python/lz4/py2/tests/block/numpy_byte_array.bin
new file mode 100644
index 0000000000..49537e2d90
--- /dev/null
+++ b/contrib/python/lz4/py2/tests/block/numpy_byte_array.bin
Binary files differ
diff --git a/contrib/python/lz4/py2/tests/block/test_block_0.py b/contrib/python/lz4/py2/tests/block/test_block_0.py
new file mode 100644
index 0000000000..cca3e65b61
--- /dev/null
+++ b/contrib/python/lz4/py2/tests/block/test_block_0.py
@@ -0,0 +1,92 @@
+import lz4.block
+from multiprocessing.pool import ThreadPool
+import sys
+from functools import partial
+if sys.version_info <= (3, 2):
+ import struct
+
+
+def get_stored_size(buff):
+ if sys.version_info > (2, 7):
+ if isinstance(buff, memoryview):
+ b = buff.tobytes()
+ else:
+ b = bytes(buff)
+ else:
+ b = bytes(buff)
+
+ if len(b) < 4:
+ return None
+
+ if sys.version_info > (3, 2):
+ return int.from_bytes(b[:4], 'little')
+ else:
+ # This would not work on a memoryview object, hence buff.tobytes call
+ # above
+ return struct.unpack('<I', b[:4])[0]
+
+
+def roundtrip(x, c_kwargs, d_kwargs, dictionary):
+ if dictionary:
+ if isinstance(dictionary, tuple):
+ d = x[dictionary[0]:dictionary[1]]
+ else:
+ d = dictionary
+ c_kwargs['dict'] = d
+ d_kwargs['dict'] = d
+
+ c = lz4.block.compress(x, **c_kwargs)
+
+ if c_kwargs['store_size']:
+ assert get_stored_size(c) == len(x)
+ else:
+ d_kwargs['uncompressed_size'] = len(x)
+
+ return lz4.block.decompress(c, **d_kwargs)
+
+
+def setup_kwargs(mode, store_size, c_return_bytearray=None, d_return_bytearray=None):
+ c_kwargs = {}
+
+ if mode[0] is not None:
+ c_kwargs['mode'] = mode[0]
+ if mode[1] is not None:
+ c_kwargs.update(mode[1])
+
+ c_kwargs.update(store_size)
+
+ if(c_return_bytearray):
+ c_kwargs.update(c_return_bytearray)
+
+ d_kwargs = {}
+
+ if(d_return_bytearray):
+ d_kwargs.update(d_return_bytearray)
+
+ return (c_kwargs, d_kwargs)
+
+
+# Test single threaded usage with all valid variations of input
+def test_1(data, mode, store_size, c_return_bytearray, d_return_bytearray, dictionary):
+ (c_kwargs, d_kwargs) = setup_kwargs(
+ mode, store_size, c_return_bytearray, d_return_bytearray)
+
+ d = roundtrip(data, c_kwargs, d_kwargs, dictionary)
+
+ assert d == data
+ if d_return_bytearray['return_bytearray']:
+ assert isinstance(d, bytearray)
+
+
+# Test multi threaded usage with all valid variations of input
+def test_2(data, mode, store_size, dictionary):
+ (c_kwargs, d_kwargs) = setup_kwargs(mode, store_size)
+
+ data_in = [data for i in range(32)]
+
+ pool = ThreadPool(8)
+ rt = partial(roundtrip, c_kwargs=c_kwargs,
+ d_kwargs=d_kwargs, dictionary=dictionary)
+ data_out = pool.map(rt, data_in)
+ pool.close()
+ assert data_in == data_out
diff --git a/contrib/python/lz4/py2/tests/block/test_block_1.py b/contrib/python/lz4/py2/tests/block/test_block_1.py
new file mode 100644
index 0000000000..4392bb332c
--- /dev/null
+++ b/contrib/python/lz4/py2/tests/block/test_block_1.py
@@ -0,0 +1,149 @@
+import lz4.block
+import pytest
+import sys
+import os
+
+
+def test_decompress_ui32_overflow():
+ data = lz4.block.compress(b'A' * 64)
+ with pytest.raises(OverflowError):
+ lz4.block.decompress(data[4:], uncompressed_size=((1 << 32) + 64))
+
+
+def test_decompress_without_leak():
+ # Verify that hand-crafted packet does not leak uninitialized(?) memory.
+ data = lz4.block.compress(b'A' * 64)
+ message = r'^Decompressor wrote 64 bytes, but 79 bytes expected from header$'
+ with pytest.raises(lz4.block.LZ4BlockError, match=message):
+ lz4.block.decompress(b'\x4f' + data[1:])
+
+
+def test_decompress_with_small_buffer():
+ data = lz4.block.compress(b'A' * 64, store_size=False)
+ message = r'^Decompression failed: corrupt input or insufficient space in destination buffer. Error code: \d+$'
+ with pytest.raises(lz4.block.LZ4BlockError, match=message):
+ lz4.block.decompress(data[4:], uncompressed_size=64)
+ with pytest.raises(lz4.block.LZ4BlockError, match=message):
+ lz4.block.decompress(data, uncompressed_size=60)
+
+
+def test_decompress_truncated():
+ input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123" * 24
+ compressed = lz4.block.compress(input_data)
+ # for i in range(len(compressed)):
+ # try:
+ # lz4.block.decompress(compressed[:i])
+ # except:
+ # print(i, sys.exc_info()[0], sys.exc_info()[1])
+ with pytest.raises(ValueError, match='Input source data size too small'):
+ lz4.block.decompress(compressed[:0])
+ for n in [0, 1]:
+ with pytest.raises(ValueError, match='Input source data size too small'):
+ lz4.block.decompress(compressed[:n])
+ for n in [24, 25, -2, 27, 67, 85]:
+ with pytest.raises(lz4.block.LZ4BlockError):
+ lz4.block.decompress(compressed[:n])
+
+
+def test_decompress_with_trailer():
+ data = b'A' * 64
+ comp = lz4.block.compress(data)
+ message = r'^Decompression failed: corrupt input or insufficient space in destination buffer. Error code: \d+$'
+ with pytest.raises(lz4.block.LZ4BlockError, match=message):
+ lz4.block.decompress(comp + b'A')
+ with pytest.raises(lz4.block.LZ4BlockError, match=message):
+ lz4.block.decompress(comp + comp)
+ with pytest.raises(lz4.block.LZ4BlockError, match=message):
+ lz4.block.decompress(comp + comp[4:])
+
+
+def test_unicode():
+ if sys.version_info < (3,):
+ return # skip
+ DATA = b'x'
+ with pytest.raises(TypeError):
+ lz4.block.compress(DATA.decode('latin1'))
+ lz4.block.decompress(lz4.block.compress(DATA).decode('latin1'))
+
+# These next two are probably redundant given test_1 above but we'll keep them
+# for now
+
+
+def test_return_bytearray():
+ if sys.version_info < (3,):
+ return # skip
+ data = os.urandom(128 * 1024) # Read 128kb
+ compressed = lz4.block.compress(data)
+ b = lz4.block.compress(data, return_bytearray=True)
+ assert isinstance(b, bytearray)
+ assert bytes(b) == compressed
+ b = lz4.block.decompress(compressed, return_bytearray=True)
+ assert isinstance(b, bytearray)
+ assert bytes(b) == data
+
+
+def test_memoryview():
+ if sys.version_info < (2, 7):
+ return # skip
+ data = os.urandom(128 * 1024) # Read 128kb
+ compressed = lz4.block.compress(data)
+ assert lz4.block.compress(memoryview(data)) == compressed
+ assert lz4.block.decompress(memoryview(compressed)) == data
+
+
+def test_with_dict_none():
+ input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123" * 24
+ for mode in ['default', 'high_compression']:
+ assert lz4.block.decompress(lz4.block.compress(
+ input_data, mode=mode, dict=None)) == input_data
+ assert lz4.block.decompress(lz4.block.compress(
+ input_data, mode=mode), dict=None) == input_data
+ assert lz4.block.decompress(lz4.block.compress(
+ input_data, mode=mode, dict=b'')) == input_data
+ assert lz4.block.decompress(lz4.block.compress(
+ input_data, mode=mode), dict=b'') == input_data
+ assert lz4.block.decompress(lz4.block.compress(
+ input_data, mode=mode, dict='')) == input_data
+ assert lz4.block.decompress(lz4.block.compress(
+ input_data, mode=mode), dict='') == input_data
+
+
+def test_with_dict():
+ input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123" * 24
+ dict1 = input_data[10:30]
+ dict2 = input_data[20:40]
+ message = r'^Decompression failed: corrupt input or insufficient space in destination buffer. Error code: \d+$'
+ for mode in ['default', 'high_compression']:
+ compressed = lz4.block.compress(input_data, mode=mode, dict=dict1)
+ with pytest.raises(lz4.block.LZ4BlockError, match=message):
+ lz4.block.decompress(compressed)
+ with pytest.raises(lz4.block.LZ4BlockError, match=message):
+ lz4.block.decompress(compressed, dict=dict1[:2])
+ assert lz4.block.decompress(compressed, dict=dict2) != input_data
+ assert lz4.block.decompress(compressed, dict=dict1) == input_data
+ assert lz4.block.decompress(lz4.block.compress(
+ input_data), dict=dict1) == input_data
+
+
+def test_known_decompress_1():
+ input = b'\x00\x00\x00\x00\x00'
+ output = b''
+ assert lz4.block.decompress(input) == output
+
+
+def test_known_decompress_2():
+ input = b'\x01\x00\x00\x00\x10 '
+ output = b' '
+ assert lz4.block.decompress(input) == output
+
+
+def test_known_decompress_3():
+ input = b'h\x00\x00\x00\xff\x0bLorem ipsum dolor sit amet\x1a\x006P amet'
+ output = b'Lorem ipsum dolor sit amet' * 4
+ assert lz4.block.decompress(input) == output
+
+
+def test_known_decompress_4():
+ input = b'\xb0\xb3\x00\x00\xff\x1fExcepteur sint occaecat cupidatat non proident.\x00' + (b'\xff' * 180) + b'\x1ePident'
+ output = b'Excepteur sint occaecat cupidatat non proident' * 1000
+ assert lz4.block.decompress(input) == output
diff --git a/contrib/python/lz4/py2/tests/block/test_block_2.py b/contrib/python/lz4/py2/tests/block/test_block_2.py
new file mode 100644
index 0000000000..87ceefb728
--- /dev/null
+++ b/contrib/python/lz4/py2/tests/block/test_block_2.py
@@ -0,0 +1,62 @@
+import pytest
+import sys
+import lz4.block
+import psutil
+import os
+
+# This test requires allocating a big lump of memory. In order to
+# avoid a massive memory allocation during byte compilation, we have
+# to declare a variable for the size of the buffer we're going to
+# create outside the scope of the function below. See:
+# https://bugs.python.org/issue21074
+_4GB = 0x100000000 # 4GB
+
+# This test will be killed on Travis due to the 3GB memory limit
+# there. Unfortunately psutil reports the host memory, not the memory
+# available to the container, and so can't be used to detect available
+# memory, so instead, as an ugly hack for detecting we're on Travis we
+# check for the TRAVIS environment variable being set. This is quite
+# fragile.
+
+
+@pytest.mark.skipif(
+ os.environ.get('TRAVIS') is not None,
+ reason='Skipping test on Travis due to insufficient memory'
+)
+@pytest.mark.skipif(
+ sys.maxsize < 0xffffffff,
+ reason='Py_ssize_t too small for this test'
+)
+@pytest.mark.skipif(
+ psutil.virtual_memory().total < _4GB,
+ reason='Insufficient system memory for this test'
+)
+def test_huge():
+ try:
+ huge = b'\0' * _4GB
+ except MemoryError:
+ pytest.skip('Insufficient system memory for this test')
+
+ with pytest.raises(
+ OverflowError, match='Input too large for LZ4 API'
+ ):
+ lz4.block.compress(huge)
+
+ with pytest.raises(
+ OverflowError, match='Dictionary too large for LZ4 API'
+ ):
+ lz4.block.compress(b'', dict=huge)
+
+ with pytest.raises(
+ OverflowError, match='Input too large for LZ4 API'
+ ):
+ lz4.block.decompress(huge)
+
+ with pytest.raises(
+ OverflowError, match='Dictionary too large for LZ4 API'
+ ):
+ lz4.block.decompress(b'', dict=huge)
+
+
+def test_dummy():
+ pass
diff --git a/contrib/python/lz4/py2/tests/block/test_block_3.py b/contrib/python/lz4/py2/tests/block/test_block_3.py
new file mode 100644
index 0000000000..0c3fb0821d
--- /dev/null
+++ b/contrib/python/lz4/py2/tests/block/test_block_3.py
@@ -0,0 +1,38 @@
+import lz4.block
+import pytest
+
+
+test_data = [
+ (b'a' * 1024 * 1024),
+]
+
+
+@pytest.fixture(
+ params=test_data,
+ ids=[
+ 'data' + str(i) for i in range(len(test_data))
+ ]
+)
+def data(request):
+ return request.param
+
+
+def test_block_decompress_mem_usage(data):
+ tracemalloc = pytest.importorskip('tracemalloc')
+
+ tracemalloc.start()
+
+ compressed = lz4.block.compress(data)
+ prev_snapshot = None
+
+ for i in range(1000):
+ decompressed = lz4.block.decompress(compressed) # noqa: F841
+
+ if i % 100 == 0:
+ snapshot = tracemalloc.take_snapshot()
+
+ if prev_snapshot:
+ stats = snapshot.compare_to(prev_snapshot, 'lineno')
+ assert stats[0].size_diff < (1024 * 4)
+
+ prev_snapshot = snapshot