YQ Connector: move tests from yql to ydb (OSS)

Перенос папки с тестами на Коннектор из папки yql в папку ydb (синхронизируется с github).
author: vitalyisaev <vitalyisaev@ydb.tech> 2023-11-14 09:58:56 +0300
committer: vitalyisaev <vitalyisaev@ydb.tech> 2023-11-14 10:20:20 +0300
commit: c2b2dfd9827a400a8495e172a56343462e3ceb82 (patch)
tree: cd4e4f597d01bede4c82dffeb2d780d0a9046bd0 /contrib/python/lz4/py2/tests/block
parent: d4ae8f119e67808cb0cf776ba6e0cf95296f2df7 (diff)
download: ydb-c2b2dfd9827a400a8495e172a56343462e3ceb82.tar.gz
6 files changed, 452 insertions, 0 deletions
diff --git a/contrib/python/lz4/py2/tests/block/conftest.py b/contrib/python/lz4/py2/tests/block/conftest.py
new file mode 100644
index 0000000000..089ce0f83c
--- /dev/null
+++ b/contrib/python/lz4/py2/tests/block/conftest.py
@@ -0,0 +1,111 @@
+import pytest
+import os
+import sys
+
+
+test_data = [
+    (b''),
+    (os.urandom(8 * 1024)),
+    (b'0' * 8 * 1024),
+    (bytearray(b'')),
+    (bytearray(os.urandom(8 * 1024))),
+    #(bytearray(open(os.path.join(os.path.dirname(__file__), 'numpy_byte_array.bin'), 'rb').read()))
+]
+
+if sys.version_info > (2, 7):
+    test_data += [
+        (memoryview(b'')),
+        (memoryview(os.urandom(8 * 1024)))
+    ]
+
+
+@pytest.fixture(
+    params=test_data,
+    ids=[
+        'data' + str(i) for i in range(len(test_data))
+    ]
+)
+def data(request):
+    return request.param
+
+
+@pytest.fixture(
+    params=[
+        (
+            {
+                'store_size': True
+            }
+        ),
+        (
+            {
+                'store_size': False
+            }
+        ),
+    ]
+)
+def store_size(request):
+    return request.param
+
+
+@pytest.fixture(
+    params=[
+        (
+            {
+                'return_bytearray': True
+            }
+        ),
+        (
+            {
+                'return_bytearray': False
+            }
+        ),
+    ]
+)
+def return_bytearray(request):
+    return request.param
+
+
+@pytest.fixture
+def c_return_bytearray(return_bytearray):
+    return return_bytearray
+
+
+@pytest.fixture
+def d_return_bytearray(return_bytearray):
+    return return_bytearray
+
+
+@pytest.fixture(
+    params=[
+        ('fast', None)
+    ] + [
+        ('fast', {'acceleration': s}) for s in range(10)
+    ] + [
+        ('high_compression', None)
+    ] + [
+        ('high_compression', {'compression': s}) for s in range(17)
+    ] + [
+        (None, None)
+    ]
+)
+def mode(request):
+    return request.param
+
+
+dictionary = [
+    None,
+    (0, 0),
+    (100, 200),
+    (0, 8 * 1024),
+    os.urandom(8 * 1024)
+]
+
+
+@pytest.fixture(
+    params=dictionary,
+    ids=[
+        'dictionary' + str(i) for i in range(len(dictionary))
+    ]
+)
+def dictionary(request):
+    return request.param
diff --git a/contrib/python/lz4/py2/tests/block/numpy_byte_array.bin b/contrib/python/lz4/py2/tests/block/numpy_byte_array.bin
new file mode 100644
index 0000000000..49537e2d90
--- /dev/null
+++ b/contrib/python/lz4/py2/tests/block/numpy_byte_array.bin
diff --git a/contrib/python/lz4/py2/tests/block/test_block_0.py b/contrib/python/lz4/py2/tests/block/test_block_0.py
new file mode 100644
index 0000000000..cca3e65b61
--- /dev/null
+++ b/contrib/python/lz4/py2/tests/block/test_block_0.py
@@ -0,0 +1,92 @@
+import lz4.block
+from multiprocessing.pool import ThreadPool
+import sys
+from functools import partial
+if sys.version_info <= (3, 2):
+    import struct
+
+
+def get_stored_size(buff):
+    if sys.version_info > (2, 7):
+        if isinstance(buff, memoryview):
+            b = buff.tobytes()
+        else:
+            b = bytes(buff)
+    else:
+        b = bytes(buff)
+
+    if len(b) < 4:
+        return None
+
+    if sys.version_info > (3, 2):
+        return int.from_bytes(b[:4], 'little')
+    else:
+        # This would not work on a memoryview object, hence buff.tobytes call
+        # above
+        return struct.unpack('<I', b[:4])[0]
+
+
+def roundtrip(x, c_kwargs, d_kwargs, dictionary):
+    if dictionary:
+        if isinstance(dictionary, tuple):
+            d = x[dictionary[0]:dictionary[1]]
+        else:
+            d = dictionary
+        c_kwargs['dict'] = d
+        d_kwargs['dict'] = d
+
+    c = lz4.block.compress(x, **c_kwargs)
+
+    if c_kwargs['store_size']:
+        assert get_stored_size(c) == len(x)
+    else:
+        d_kwargs['uncompressed_size'] = len(x)
+
+    return lz4.block.decompress(c, **d_kwargs)
+
+
+def setup_kwargs(mode, store_size, c_return_bytearray=None, d_return_bytearray=None):
+    c_kwargs = {}
+
+    if mode[0] is not None:
+        c_kwargs['mode'] = mode[0]
+    if mode[1] is not None:
+        c_kwargs.update(mode[1])
+
+    c_kwargs.update(store_size)
+
+    if(c_return_bytearray):
+        c_kwargs.update(c_return_bytearray)
+
+    d_kwargs = {}
+
+    if(d_return_bytearray):
+        d_kwargs.update(d_return_bytearray)
+
+    return (c_kwargs, d_kwargs)
+
+
+# Test single threaded usage with all valid variations of input
+def test_1(data, mode, store_size, c_return_bytearray, d_return_bytearray, dictionary):
+    (c_kwargs, d_kwargs) = setup_kwargs(
+        mode, store_size, c_return_bytearray, d_return_bytearray)
+
+    d = roundtrip(data, c_kwargs, d_kwargs, dictionary)
+
+    assert d == data
+    if d_return_bytearray['return_bytearray']:
+        assert isinstance(d, bytearray)
+
+
+# Test multi threaded usage with all valid variations of input
+def test_2(data, mode, store_size, dictionary):
+    (c_kwargs, d_kwargs) = setup_kwargs(mode, store_size)
+
+    data_in = [data for i in range(32)]
+
+    pool = ThreadPool(8)
+    rt = partial(roundtrip, c_kwargs=c_kwargs,
+                 d_kwargs=d_kwargs, dictionary=dictionary)
+    data_out = pool.map(rt, data_in)
+    pool.close()
+    assert data_in == data_out
diff --git a/contrib/python/lz4/py2/tests/block/test_block_1.py b/contrib/python/lz4/py2/tests/block/test_block_1.py
new file mode 100644
index 0000000000..4392bb332c
--- /dev/null
+++ b/contrib/python/lz4/py2/tests/block/test_block_1.py
@@ -0,0 +1,149 @@
+import lz4.block
+import pytest
+import sys
+import os
+
+
+def test_decompress_ui32_overflow():
+    data = lz4.block.compress(b'A' * 64)
+    with pytest.raises(OverflowError):
+        lz4.block.decompress(data[4:], uncompressed_size=((1 << 32) + 64))
+
+
+def test_decompress_without_leak():
+    # Verify that hand-crafted packet does not leak uninitialized(?) memory.
+    data = lz4.block.compress(b'A' * 64)
+    message = r'^Decompressor wrote 64 bytes, but 79 bytes expected from header$'
+    with pytest.raises(lz4.block.LZ4BlockError, match=message):
+        lz4.block.decompress(b'\x4f' + data[1:])
+
+
+def test_decompress_with_small_buffer():
+    data = lz4.block.compress(b'A' * 64, store_size=False)
+    message = r'^Decompression failed: corrupt input or insufficient space in destination buffer. Error code: \d+$'
+    with pytest.raises(lz4.block.LZ4BlockError, match=message):
+        lz4.block.decompress(data[4:], uncompressed_size=64)
+    with pytest.raises(lz4.block.LZ4BlockError, match=message):
+        lz4.block.decompress(data, uncompressed_size=60)
+
+
+def test_decompress_truncated():
+    input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123" * 24
+    compressed = lz4.block.compress(input_data)
+    # for i in range(len(compressed)):
+    #     try:
+    #         lz4.block.decompress(compressed[:i])
+    #     except:
+    #         print(i, sys.exc_info()[0], sys.exc_info()[1])
+    with pytest.raises(ValueError, match='Input source data size too small'):
+        lz4.block.decompress(compressed[:0])
+    for n in [0, 1]:
+        with pytest.raises(ValueError, match='Input source data size too small'):
+            lz4.block.decompress(compressed[:n])
+    for n in [24, 25, -2, 27, 67, 85]:
+        with pytest.raises(lz4.block.LZ4BlockError):
+            lz4.block.decompress(compressed[:n])
+
+
+def test_decompress_with_trailer():
+    data = b'A' * 64
+    comp = lz4.block.compress(data)
+    message = r'^Decompression failed: corrupt input or insufficient space in destination buffer. Error code: \d+$'
+    with pytest.raises(lz4.block.LZ4BlockError, match=message):
+        lz4.block.decompress(comp + b'A')
+    with pytest.raises(lz4.block.LZ4BlockError, match=message):
+        lz4.block.decompress(comp + comp)
+    with pytest.raises(lz4.block.LZ4BlockError, match=message):
+        lz4.block.decompress(comp + comp[4:])
+
+
+def test_unicode():
+    if sys.version_info < (3,):
+        return  # skip
+    DATA = b'x'
+    with pytest.raises(TypeError):
+        lz4.block.compress(DATA.decode('latin1'))
+        lz4.block.decompress(lz4.block.compress(DATA).decode('latin1'))
+
+# These next two are probably redundant given test_1 above but we'll keep them
+# for now
+
+
+def test_return_bytearray():
+    if sys.version_info < (3,):
+        return  # skip
+    data = os.urandom(128 * 1024)  # Read 128kb
+    compressed = lz4.block.compress(data)
+    b = lz4.block.compress(data, return_bytearray=True)
+    assert isinstance(b, bytearray)
+    assert bytes(b) == compressed
+    b = lz4.block.decompress(compressed, return_bytearray=True)
+    assert isinstance(b, bytearray)
+    assert bytes(b) == data
+
+
+def test_memoryview():
+    if sys.version_info < (2, 7):
+        return  # skip
+    data = os.urandom(128 * 1024)  # Read 128kb
+    compressed = lz4.block.compress(data)
+    assert lz4.block.compress(memoryview(data)) == compressed
+    assert lz4.block.decompress(memoryview(compressed)) == data
+
+
+def test_with_dict_none():
+    input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123" * 24
+    for mode in ['default', 'high_compression']:
+        assert lz4.block.decompress(lz4.block.compress(
+            input_data, mode=mode, dict=None)) == input_data
+        assert lz4.block.decompress(lz4.block.compress(
+            input_data, mode=mode), dict=None) == input_data
+        assert lz4.block.decompress(lz4.block.compress(
+            input_data, mode=mode, dict=b'')) == input_data
+        assert lz4.block.decompress(lz4.block.compress(
+            input_data, mode=mode), dict=b'') == input_data
+        assert lz4.block.decompress(lz4.block.compress(
+            input_data, mode=mode, dict='')) == input_data
+        assert lz4.block.decompress(lz4.block.compress(
+            input_data, mode=mode), dict='') == input_data
+
+
+def test_with_dict():
+    input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123" * 24
+    dict1 = input_data[10:30]
+    dict2 = input_data[20:40]
+    message = r'^Decompression failed: corrupt input or insufficient space in destination buffer. Error code: \d+$'
+    for mode in ['default', 'high_compression']:
+        compressed = lz4.block.compress(input_data, mode=mode, dict=dict1)
+        with pytest.raises(lz4.block.LZ4BlockError, match=message):
+            lz4.block.decompress(compressed)
+        with pytest.raises(lz4.block.LZ4BlockError, match=message):
+            lz4.block.decompress(compressed, dict=dict1[:2])
+        assert lz4.block.decompress(compressed, dict=dict2) != input_data
+        assert lz4.block.decompress(compressed, dict=dict1) == input_data
+    assert lz4.block.decompress(lz4.block.compress(
+        input_data), dict=dict1) == input_data
+
+
+def test_known_decompress_1():
+    input = b'\x00\x00\x00\x00\x00'
+    output = b''
+    assert lz4.block.decompress(input) == output
+
+
+def test_known_decompress_2():
+    input = b'\x01\x00\x00\x00\x10 '
+    output = b' '
+    assert lz4.block.decompress(input) == output
+
+
+def test_known_decompress_3():
+    input = b'h\x00\x00\x00\xff\x0bLorem ipsum dolor sit amet\x1a\x006P amet'
+    output = b'Lorem ipsum dolor sit amet' * 4
+    assert lz4.block.decompress(input) == output
+
+
+def test_known_decompress_4():
+    input = b'\xb0\xb3\x00\x00\xff\x1fExcepteur sint occaecat cupidatat non proident.\x00' + (b'\xff' * 180) + b'\x1ePident'
+    output = b'Excepteur sint occaecat cupidatat non proident' * 1000
+    assert lz4.block.decompress(input) == output
diff --git a/contrib/python/lz4/py2/tests/block/test_block_2.py b/contrib/python/lz4/py2/tests/block/test_block_2.py
new file mode 100644
index 0000000000..87ceefb728
--- /dev/null
+++ b/contrib/python/lz4/py2/tests/block/test_block_2.py
@@ -0,0 +1,62 @@
+import pytest
+import sys
+import lz4.block
+import psutil
+import os
+
+# This test requires allocating a big lump of memory. In order to
+# avoid a massive memory allocation during byte compilation, we have
+# to declare a variable for the size of the buffer we're going to
+# create outside the scope of the function below. See:
+# https://bugs.python.org/issue21074
+_4GB = 0x100000000  # 4GB
+
+# This test will be killed on Travis due to the 3GB memory limit
+# there. Unfortunately psutil reports the host memory, not the memory
+# available to the container, and so can't be used to detect available
+# memory, so instead, as an ugly hack for detecting we're on Travis we
+# check for the TRAVIS environment variable being set. This is quite
+# fragile.
+
+
+@pytest.mark.skipif(
+    os.environ.get('TRAVIS') is not None,
+    reason='Skipping test on Travis due to insufficient memory'
+)
+@pytest.mark.skipif(
+    sys.maxsize < 0xffffffff,
+    reason='Py_ssize_t too small for this test'
+)
+@pytest.mark.skipif(
+    psutil.virtual_memory().total < _4GB,
+    reason='Insufficient system memory for this test'
+)
+def test_huge():
+    try:
+        huge = b'\0' * _4GB
+    except MemoryError:
+        pytest.skip('Insufficient system memory for this test')
+
+    with pytest.raises(
+            OverflowError, match='Input too large for LZ4 API'
+    ):
+        lz4.block.compress(huge)
+
+    with pytest.raises(
+            OverflowError, match='Dictionary too large for LZ4 API'
+    ):
+        lz4.block.compress(b'', dict=huge)
+
+    with pytest.raises(
+            OverflowError, match='Input too large for LZ4 API'
+    ):
+        lz4.block.decompress(huge)
+
+    with pytest.raises(
+            OverflowError, match='Dictionary too large for LZ4 API'
+    ):
+        lz4.block.decompress(b'', dict=huge)
+
+
+def test_dummy():
+    pass
diff --git a/contrib/python/lz4/py2/tests/block/test_block_3.py b/contrib/python/lz4/py2/tests/block/test_block_3.py
new file mode 100644
index 0000000000..0c3fb0821d
--- /dev/null
+++ b/contrib/python/lz4/py2/tests/block/test_block_3.py
@@ -0,0 +1,38 @@
+import lz4.block
+import pytest
+
+
+test_data = [
+    (b'a' * 1024 * 1024),
+]
+
+
+@pytest.fixture(
+    params=test_data,
+    ids=[
+        'data' + str(i) for i in range(len(test_data))
+    ]
+)
+def data(request):
+    return request.param
+
+
+def test_block_decompress_mem_usage(data):
+    tracemalloc = pytest.importorskip('tracemalloc')
+
+    tracemalloc.start()
+
+    compressed = lz4.block.compress(data)
+    prev_snapshot = None
+
+    for i in range(1000):
+        decompressed = lz4.block.decompress(compressed)  # noqa: F841
+
+        if i % 100 == 0:
+            snapshot = tracemalloc.take_snapshot()
+
+            if prev_snapshot:
+                stats = snapshot.compare_to(prev_snapshot, 'lineno')
+                assert stats[0].size_diff < (1024 * 4)
+
+            prev_snapshot = snapshot
author	vitalyisaev <vitalyisaev@ydb.tech>	2023-11-14 09:58:56 +0300
committer	vitalyisaev <vitalyisaev@ydb.tech>	2023-11-14 10:20:20 +0300
commit	c2b2dfd9827a400a8495e172a56343462e3ceb82 (patch)
tree	cd4e4f597d01bede4c82dffeb2d780d0a9046bd0 /contrib/python/lz4/py2/tests/block
parent	d4ae8f119e67808cb0cf776ba6e0cf95296f2df7 (diff)
download	ydb-c2b2dfd9827a400a8495e172a56343462e3ceb82.tar.gz