diff options
author | vvvv <vvvv@ydb.tech> | 2023-08-30 20:49:53 +0300 |
---|---|---|
committer | vvvv <vvvv@ydb.tech> | 2023-08-30 21:17:44 +0300 |
commit | f154e22342f327342effe873b0a00ad80c975e76 (patch) | |
tree | fff231496c10fbfcff025ed953b512bf2a82d7c0 /library/python/cyson/ut/test_reader_writer.py | |
parent | 4ebafdd49d8b0706c5af76ef7c2d0b3b498d0310 (diff) | |
download | ydb-f154e22342f327342effe873b0a00ad80c975e76.tar.gz |
Moved udf_test and refactored test_framework
Локально упавший тест выполняется
%%
vvvv@mr-nvme-testing-08:~/repo/arcadia/statbox/nile/tests/yql/py2/part_2$ arc checkout move_udf_test_and_refactor_tf
Switched to branch 'move_udf_test_and_refactor_tf'
vvvv@mr-nvme-testing-08:~/repo/arcadia/statbox/nile/tests/yql/py2/part_2$ ya make -tA -F '*test_unchanged_table*'
Warn[-WPluginErr]: in $B/statbox/nile/tests/yql/py2/part_2/libpy2-part_2.so: Requirement cpu is redefined 2 -> 4
Warn[-WPluginErr]: in $B/statbox/nile/tests/yql/py2/part_2/libpy2-part_2.so: Requirement ram is redefined 16 -> 9
Number of suites skipped by name: 2, by filter *test_unchanged_table*
Total 1 suite:
1 - GOOD
Total 4 tests:
4 - GOOD
Ok
%%
судя по ошибке он flaky
Diffstat (limited to 'library/python/cyson/ut/test_reader_writer.py')
-rw-r--r-- | library/python/cyson/ut/test_reader_writer.py | 251 |
1 files changed, 251 insertions, 0 deletions
diff --git a/library/python/cyson/ut/test_reader_writer.py b/library/python/cyson/ut/test_reader_writer.py new file mode 100644 index 0000000000..6428ea0b56 --- /dev/null +++ b/library/python/cyson/ut/test_reader_writer.py @@ -0,0 +1,251 @@ +# -*- coding: utf-8 -*- + +from __future__ import print_function, absolute_import, division + +import io +import math +import pytest +import six +import sys + +from functools import partial + +from cyson import ( + dumps, loads, YsonInt64, YsonUInt64, UInt, Writer, OutputStream, + UnicodeReader, +) + + +if six.PY2: + NativeUInt = long # noqa: F821 +elif six.PY3: + NativeUInt = UInt + unicode = str + long = int +else: + raise RuntimeError('Unsupported Python version') + + +def canonize(value, as_unicode=False): + _canonize = partial(canonize, as_unicode=as_unicode) + + if isinstance(value, (list, tuple)): + return [_canonize(_) for _ in value] + elif isinstance(value, dict): + return {_canonize(k): _canonize(value[k]) for k in value} + elif isinstance(value, unicode) and not as_unicode: + return value.encode('utf8') + elif isinstance(value, bytes) and as_unicode: + return value.decode('utf8') + + return value + + +def switch_string_type(string): + if isinstance(string, bytes): + return string.decode('utf8') + elif isinstance(string, unicode): + return string.encode('utf8') + + raise TypeError('expected unicode or bytes, got {!r}'.format(string)) + + +def coerce(obj, to, via=None): + if via is None: + via = to + + if isinstance(obj, to): + return obj + + return via(obj) + + +SKIP_PY3 = pytest.mark.skipif(six.PY3, reason='Makes no sense for Python3') + + +if six.PY3 and sys.platform == 'win32': + NUMPY_CASES = [] +else: + import numpy as np + + NUMPY_CASES = [ + # numpy int + np.int8(2 ** 7 - 1), np.int16(2 ** 15 - 1), + np.int32(2 ** 31 - 1), np.int64(2 ** 63 - 1), + # numpy uint + np.uint8(2 ** 8 - 1), np.uint16(2 ** 16 - 1), + np.uint32(2 ** 32 - 1), np.uint64(2 ** 64 - 1), + # numpy float + np.float16(100.0), np.float32(100.0), np.float64(100.0), + ] + + +CASES = [ + # NoneType + None, + # boolean + True, False, + # int + 0, 1, -1, int(2 ** 63 - 1), int(-2 ** 63), + # float + 0.0, 100.0, -100.0, float('inf'), float('-inf'), + # bytes + b'', b'hello', u'Привет'.encode('utf8'), + # unicode + u'', u'hello', u'Привет', + # list + [], [0], [1, 'hello'], [17, 'q'] * 100, [b'bytes'], + # tuple + (), (0,), (1, 'hello'), (17, 'q') * 100, (b'bytes',), + # dict + {}, {'a': 'b'}, {'a': 17}, {'a': [1, 2, 3]}, {b'a': 1, u'b': b'a'} +] + NUMPY_CASES + + +@pytest.mark.parametrize('format', ['binary', 'text', 'pretty']) +@pytest.mark.parametrize('value', CASES) +def test_roundtrip(value, format): + encoded = dumps(value, format) + decoded = loads(encoded) + assert encoded == dumps(value, switch_string_type(format)) + assert decoded == canonize(value) + + +# NOTE: roundtrip test doesn't work for NaN (NaN != NaN) +@pytest.mark.parametrize('format', ['binary', 'text', 'pretty']) +def test_nan(format): + encoded = dumps(float('nan'), format) + decoded = loads(encoded) + assert encoded == dumps(float('nan'), switch_string_type(format)) + assert math.isnan(decoded) + + +@SKIP_PY3 +@pytest.mark.parametrize('format', ['binary', 'text', 'pretty']) +@pytest.mark.parametrize( + 'value', [long(0), long(1), long(2 ** 63), long(2 ** 64 - 1)] +) +def test_long_roundtrip(value, format): + encoded = dumps(value, format) + decoded = loads(encoded) + assert encoded == dumps(value, switch_string_type(format)) + assert decoded == value + + +@pytest.mark.parametrize( + 'value', [NativeUInt(0), NativeUInt(111), NativeUInt(2 ** 63), NativeUInt(2 ** 64 - 1)] +) +@pytest.mark.parametrize('format', ['binary', 'text', 'pretty']) +def test_readwrite_uint64(value, format): + dumped_uint64 = dumps(coerce(value, YsonUInt64), format=format) + loaded_uint64 = loads(dumped_uint64) + + assert type(value) is NativeUInt + assert type(loaded_uint64) is NativeUInt + assert dumps(value, format=format) == dumped_uint64 + + +@pytest.mark.parametrize('value', [int(-2 ** 63), -111, 0, 111, int(2 ** 63 - 1)]) +@pytest.mark.parametrize('format', ['binary', 'text', 'pretty']) +def test_readwrite_int64(value, format): + dumped_int64 = dumps(YsonInt64(value), format=format) + loaded_int64 = loads(dumped_int64) + + assert type(value) is int + assert type(loaded_int64) is int + assert dumps(value, format=format) == dumped_int64 + + +@SKIP_PY3 +def test_long_overflow(): + with pytest.raises(OverflowError): + dumps(long(-1)) + + with pytest.raises(OverflowError): + dumps(long(2**64)) + + +@pytest.mark.parametrize('value', [2 ** 63, -2 ** 63 - 1]) +def test_int64_overflow(value): + with pytest.raises(OverflowError): + int64_value = YsonInt64(value) + dumps(int64_value) + + if six.PY3: + with pytest.raises(OverflowError): + dumps(value) + + +@pytest.mark.parametrize('value', [2 ** 64, 2 ** 100]) +def test_uint64_overflow(value): + with pytest.raises(OverflowError): + uint64_value = YsonUInt64(value) + dumps(uint64_value) + + +@pytest.mark.parametrize('format', ['binary', 'text', 'pretty']) +def test_force_write_sequence(format): + class Sequence(object): + def __init__(self, seq): + self._seq = seq + + def __getitem__(self, index): + return self._seq[index] + + def __len__(self): + return len(self._seq) + + sequence = [1, 1.1, None, b'xyz'] + + sink = io.BytesIO() + writer = Writer(OutputStream.from_file(sink), format=format) + + writer.begin_stream() + writer.list(Sequence(sequence)) + writer.end_stream() + + assert sink.getvalue() == dumps(sequence, format) + + +@pytest.mark.parametrize('format', ['binary', 'text', 'pretty']) +def test_force_write_mapping(format): + class Mapping(object): + def __init__(self, mapping): + self._mapping = mapping + + def __getitem__(self, key): + return self._mapping[key] + + def keys(self): + return self._mapping.keys() + + mapping = {b'a': 1, b'b': 1.1, b'c': None, b'd': b'some'} + + sink = io.BytesIO() + writer = Writer(OutputStream.from_file(sink), format=format) + + writer.begin_stream() + writer.map(Mapping(mapping)) + writer.end_stream() + + assert sink.getvalue() == dumps(mapping, format) + + +@pytest.mark.parametrize('format', ['binary', 'text', 'pretty']) +@pytest.mark.parametrize('value', CASES) +def test_unicode_reader(value, format): + expected = canonize(value, as_unicode=True) + got = loads(dumps(value, format), UnicodeReader) + assert expected == got + + +def test_unicode_reader_raises_unicode_decode_error(): + not_decodable = b'\x80\x81' + with pytest.raises(UnicodeDecodeError): + loads(dumps(not_decodable, format='binary'), UnicodeReader) + + +def test_unicode_reader_decodes_object_with_attributes(): + data = b'{"a" = "b"; "c" = <"foo" = "bar">"d"}' + expected = {u"a": u"b", u"c": u"d"} + assert loads(data, UnicodeReader) == expected |