path: root/library/python/cyson/ut/test_reader_writer.py



# -*- coding: utf-8 -*-

from __future__ import print_function, absolute_import, division

import io
import math
import pytest
import six
import sys

from functools import partial

from cyson import (
    dumps, loads, YsonInt64, YsonUInt64, UInt, Writer, OutputStream,
    UnicodeReader,
)


if six.PY2:
    NativeUInt = long  # noqa: F821
elif six.PY3:
    NativeUInt = UInt
    unicode = str
    long = int
else:
    raise RuntimeError('Unsupported Python version')


def canonize(value, as_unicode=False):
    _canonize = partial(canonize, as_unicode=as_unicode)

    if isinstance(value, (list, tuple)):
        return [_canonize(_) for _ in value]
    elif isinstance(value, dict):
        return {_canonize(k): _canonize(value[k]) for k in value}
    elif isinstance(value, unicode) and not as_unicode:
        return value.encode('utf8')
    elif isinstance(value, bytes) and as_unicode:
        return value.decode('utf8')

    return value


def switch_string_type(string):
    if isinstance(string, bytes):
        return string.decode('utf8')
    elif isinstance(string, unicode):
        return string.encode('utf8')

    raise TypeError('expected unicode or bytes, got {!r}'.format(string))


def coerce(obj, to, via=None):
    if via is None:
        via = to

    if isinstance(obj, to):
        return obj

    return via(obj)


SKIP_PY3 = pytest.mark.skipif(six.PY3, reason='Makes no sense for Python3')


if six.PY3 and sys.platform == 'win32':
    NUMPY_CASES = []
else:
    import numpy as np

    NUMPY_CASES = [
        # numpy int
        np.int8(2 ** 7 - 1), np.int16(2 ** 15 - 1),
        np.int32(2 ** 31 - 1), np.int64(2 ** 63 - 1),
        # numpy uint
        np.uint8(2 ** 8 - 1), np.uint16(2 ** 16 - 1),
        np.uint32(2 ** 32 - 1), np.uint64(2 ** 64 - 1),
        # numpy float
        np.float16(100.0), np.float32(100.0), np.float64(100.0),
    ]


CASES = [
    # NoneType
    None,
    # boolean
    True, False,
    # int
    0, 1, -1, int(2 ** 63 - 1), int(-2 ** 63),
    # float
    0.0, 100.0, -100.0, float('inf'), float('-inf'),
    # bytes
    b'', b'hello', u'Привет'.encode('utf8'),
    # unicode
    u'', u'hello', u'Привет',
    # list
    [], [0], [1, 'hello'], [17, 'q'] * 100, [b'bytes'],
    # tuple
    (), (0,), (1, 'hello'), (17, 'q') * 100, (b'bytes',),
    # dict
    {}, {'a': 'b'}, {'a': 17}, {'a': [1, 2, 3]}, {b'a': 1, u'b': b'a'}
] + NUMPY_CASES


@pytest.mark.parametrize('format', ['binary', 'text', 'pretty'])
@pytest.mark.parametrize('value', CASES)
def test_roundtrip(value, format):
    encoded = dumps(value, format)
    decoded = loads(encoded)
    assert encoded == dumps(value, switch_string_type(format))
    assert decoded == canonize(value)


# NOTE: roundtrip test doesn't work for NaN (NaN != NaN)
@pytest.mark.parametrize('format', ['binary', 'text', 'pretty'])
def test_nan(format):
    encoded = dumps(float('nan'), format)
    decoded = loads(encoded)
    assert encoded == dumps(float('nan'), switch_string_type(format))
    assert math.isnan(decoded)


@SKIP_PY3
@pytest.mark.parametrize('format', ['binary', 'text', 'pretty'])
@pytest.mark.parametrize(
    'value', [long(0), long(1), long(2 ** 63), long(2 ** 64 - 1)]
)
def test_long_roundtrip(value, format):
    encoded = dumps(value, format)
    decoded = loads(encoded)
    assert encoded == dumps(value, switch_string_type(format))
    assert decoded == value


@pytest.mark.parametrize(
    'value', [NativeUInt(0), NativeUInt(111), NativeUInt(2 ** 63), NativeUInt(2 ** 64 - 1)]
)
@pytest.mark.parametrize('format', ['binary', 'text', 'pretty'])
def test_readwrite_uint64(value, format):
    dumped_uint64 = dumps(coerce(value, YsonUInt64), format=format)
    loaded_uint64 = loads(dumped_uint64)

    assert type(value) is NativeUInt
    assert type(loaded_uint64) is NativeUInt
    assert dumps(value, format=format) == dumped_uint64


@pytest.mark.parametrize('value', [int(-2 ** 63), -111, 0, 111, int(2 ** 63 - 1)])
@pytest.mark.parametrize('format', ['binary', 'text', 'pretty'])
def test_readwrite_int64(value, format):
    dumped_int64 = dumps(YsonInt64(value), format=format)
    loaded_int64 = loads(dumped_int64)

    assert type(value) is int
    assert type(loaded_int64) is int
    assert dumps(value, format=format) == dumped_int64


@SKIP_PY3
def test_long_overflow():
    with pytest.raises(OverflowError):
        dumps(long(-1))

    with pytest.raises(OverflowError):
        dumps(long(2**64))


@pytest.mark.parametrize('value', [2 ** 63, -2 ** 63 - 1])
def test_int64_overflow(value):
    with pytest.raises(OverflowError):
        int64_value = YsonInt64(value)
        dumps(int64_value)

    if six.PY3:
        with pytest.raises(OverflowError):
            dumps(value)


@pytest.mark.parametrize('value', [2 ** 64, 2 ** 100])
def test_uint64_overflow(value):
    with pytest.raises(OverflowError):
        uint64_value = YsonUInt64(value)
        dumps(uint64_value)


@pytest.mark.parametrize('format', ['binary', 'text', 'pretty'])
def test_force_write_sequence(format):
    class Sequence(object):
        def __init__(self, seq):
            self._seq = seq

        def __getitem__(self, index):
            return self._seq[index]

        def __len__(self):
            return len(self._seq)

    sequence = [1, 1.1, None, b'xyz']

    sink = io.BytesIO()
    writer = Writer(OutputStream.from_file(sink), format=format)

    writer.begin_stream()
    writer.list(Sequence(sequence))
    writer.end_stream()

    assert sink.getvalue() == dumps(sequence, format)


@pytest.mark.parametrize('format', ['binary', 'text', 'pretty'])
def test_force_write_mapping(format):
    class Mapping(object):
        def __init__(self, mapping):
            self._mapping = mapping

        def __getitem__(self, key):
            return self._mapping[key]

        def keys(self):
            return self._mapping.keys()

    mapping = {b'a': 1, b'b': 1.1, b'c': None, b'd': b'some'}

    sink = io.BytesIO()
    writer = Writer(OutputStream.from_file(sink), format=format)

    writer.begin_stream()
    writer.map(Mapping(mapping))
    writer.end_stream()

    assert sink.getvalue() == dumps(mapping, format)


@pytest.mark.parametrize('format', ['binary', 'text', 'pretty'])
@pytest.mark.parametrize('value', CASES)
def test_unicode_reader(value, format):
    expected = canonize(value, as_unicode=True)
    got = loads(dumps(value, format), UnicodeReader)
    assert expected == got


def test_unicode_reader_raises_unicode_decode_error():
    not_decodable = b'\x80\x81'
    with pytest.raises(UnicodeDecodeError):
        loads(dumps(not_decodable, format='binary'), UnicodeReader)


def test_unicode_reader_decodes_object_with_attributes():
    data = b'{"a" = "b"; "c" = <"foo" = "bar">"d"}'
    expected = {u"a": u"b", u"c": u"d"}
    assert loads(data, UnicodeReader) == expected
# -*- coding: utf-8 -*-

from __future__ import print_function, absolute_import, division

import io
import math
import pytest
import six
import sys

from functools import partial

from cyson import (
    dumps, loads, YsonInt64, YsonUInt64, UInt, Writer, OutputStream,
    UnicodeReader,
)


if six.PY2:
    NativeUInt = long  # noqa: F821
elif six.PY3:
    NativeUInt = UInt
    unicode = str
    long = int
else:
    raise RuntimeError('Unsupported Python version')


def canonize(value, as_unicode=False):
    _canonize = partial(canonize, as_unicode=as_unicode)

    if isinstance(value, (list, tuple)):
        return [_canonize(_) for _ in value]
    elif isinstance(value, dict):
        return {_canonize(k): _canonize(value[k]) for k in value}
    elif isinstance(value, unicode) and not as_unicode:
        return value.encode('utf8')
    elif isinstance(value, bytes) and as_unicode:
        return value.decode('utf8')

    return value


def switch_string_type(string):
    if isinstance(string, bytes):
        return string.decode('utf8')
    elif isinstance(string, unicode):
        return string.encode('utf8')

    raise TypeError('expected unicode or bytes, got {!r}'.format(string))


def coerce(obj, to, via=None):
    if via is None:
        via = to

    if isinstance(obj, to):
        return obj

    return via(obj)


SKIP_PY3 = pytest.mark.skipif(six.PY3, reason='Makes no sense for Python3')


if six.PY3 and sys.platform == 'win32':
    NUMPY_CASES = []
else:
    import numpy as np

    NUMPY_CASES = [
        # numpy int
        np.int8(2 ** 7 - 1), np.int16(2 ** 15 - 1),
        np.int32(2 ** 31 - 1), np.int64(2 ** 63 - 1),
        # numpy uint
        np.uint8(2 ** 8 - 1), np.uint16(2 ** 16 - 1),
        np.uint32(2 ** 32 - 1), np.uint64(2 ** 64 - 1),
        # numpy float
        np.float16(100.0), np.float32(100.0), np.float64(100.0),
    ]


CASES = [
    # NoneType
    None,
    # boolean
    True, False,
    # int
    0, 1, -1, int(2 ** 63 - 1), int(-2 ** 63),
    # float
    0.0, 100.0, -100.0, float('inf'), float('-inf'),
    # bytes
    b'', b'hello', u'Привет'.encode('utf8'),
    # unicode
    u'', u'hello', u'Привет',
    # list
    [], [0], [1, 'hello'], [17, 'q'] * 100, [b'bytes'],
    # tuple
    (), (0,), (1, 'hello'), (17, 'q') * 100, (b'bytes',),
    # dict
    {}, {'a': 'b'}, {'a': 17}, {'a': [1, 2, 3]}, {b'a': 1, u'b': b'a'}
] + NUMPY_CASES


@pytest.mark.parametrize('format', ['binary', 'text', 'pretty'])
@pytest.mark.parametrize('value', CASES)
def test_roundtrip(value, format):
    encoded = dumps(value, format)
    decoded = loads(encoded)
    assert encoded == dumps(value, switch_string_type(format))
    assert decoded == canonize(value)


# NOTE: roundtrip test doesn't work for NaN (NaN != NaN)
@pytest.mark.parametrize('format', ['binary', 'text', 'pretty'])
def test_nan(format):
    encoded = dumps(float('nan'), format)
    decoded = loads(encoded)
    assert encoded == dumps(float('nan'), switch_string_type(format))
    assert math.isnan(decoded)


@SKIP_PY3
@pytest.mark.parametrize('format', ['binary', 'text', 'pretty'])
@pytest.mark.parametrize(
    'value', [long(0), long(1), long(2 ** 63), long(2 ** 64 - 1)]
)
def test_long_roundtrip(value, format):
    encoded = dumps(value, format)
    decoded = loads(encoded)
    assert encoded == dumps(value, switch_string_type(format))
    assert decoded == value


@pytest.mark.parametrize(
    'value', [NativeUInt(0), NativeUInt(111), NativeUInt(2 ** 63), NativeUInt(2 ** 64 - 1)]
)
@pytest.mark.parametrize('format', ['binary', 'text', 'pretty'])
def test_readwrite_uint64(value, format):
    dumped_uint64 = dumps(coerce(value, YsonUInt64), format=format)
    loaded_uint64 = loads(dumped_uint64)

    assert type(value) is NativeUInt
    assert type(loaded_uint64) is NativeUInt
    assert dumps(value, format=format) == dumped_uint64


@pytest.mark.parametrize('value', [int(-2 ** 63), -111, 0, 111, int(2 ** 63 - 1)])
@pytest.mark.parametrize('format', ['binary', 'text', 'pretty'])
def test_readwrite_int64(value, format):
    dumped_int64 = dumps(YsonInt64(value), format=format)
    loaded_int64 = loads(dumped_int64)

    assert type(value) is int
    assert type(loaded_int64) is int
    assert dumps(value, format=format) == dumped_int64


@SKIP_PY3
def test_long_overflow():
    with pytest.raises(OverflowError):
        dumps(long(-1))

    with pytest.raises(OverflowError):
        dumps(long(2**64))


@pytest.mark.parametrize('value', [2 ** 63, -2 ** 63 - 1])
def test_int64_overflow(value):
    with pytest.raises(OverflowError):
        int64_value = YsonInt64(value)
        dumps(int64_value)

    if six.PY3:
        with pytest.raises(OverflowError):
            dumps(value)


@pytest.mark.parametrize('value', [2 ** 64, 2 ** 100])
def test_uint64_overflow(value):
    with pytest.raises(OverflowError):
        uint64_value = YsonUInt64(value)
        dumps(uint64_value)


@pytest.mark.parametrize('format', ['binary', 'text', 'pretty'])
def test_force_write_sequence(format):
    class Sequence(object):
        def __init__(self, seq):
            self._seq = seq

        def __getitem__(self, index):
            return self._seq[index]

        def __len__(self):
            return len(self._seq)

    sequence = [1, 1.1, None, b'xyz']

    sink = io.BytesIO()
    writer = Writer(OutputStream.from_file(sink), format=format)

    writer.begin_stream()
    writer.list(Sequence(sequence))
    writer.end_stream()

    assert sink.getvalue() == dumps(sequence, format)


@pytest.mark.parametrize('format', ['binary', 'text', 'pretty'])
def test_force_write_mapping(format):
    class Mapping(object):
        def __init__(self, mapping):
            self._mapping = mapping

        def __getitem__(self, key):
            return self._mapping[key]

        def keys(self):
            return self._mapping.keys()

    mapping = {b'a': 1, b'b': 1.1, b'c': None, b'd': b'some'}

    sink = io.BytesIO()
    writer = Writer(OutputStream.from_file(sink), format=format)

    writer.begin_stream()
    writer.map(Mapping(mapping))
    writer.end_stream()

    assert sink.getvalue() == dumps(mapping, format)


@pytest.mark.parametrize('format', ['binary', 'text', 'pretty'])
@pytest.mark.parametrize('value', CASES)
def test_unicode_reader(value, format):
    expected = canonize(value, as_unicode=True)
    got = loads(dumps(value, format), UnicodeReader)
    assert expected == got


def test_unicode_reader_raises_unicode_decode_error():
    not_decodable = b'\x80\x81'
    with pytest.raises(UnicodeDecodeError):
        loads(dumps(not_decodable, format='binary'), UnicodeReader)


def test_unicode_reader_decodes_object_with_attributes():
    data = b'{"a" = "b"; "c" = <"foo" = "bar">"d"}'
    expected = {u"a": u"b", u"c": u"d"}
    assert loads(data, UnicodeReader) == expected