diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/python/strings/ut/test_strings.py | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/python/strings/ut/test_strings.py')
-rw-r--r-- | library/python/strings/ut/test_strings.py | 205 |
1 files changed, 205 insertions, 0 deletions
diff --git a/library/python/strings/ut/test_strings.py b/library/python/strings/ut/test_strings.py new file mode 100644 index 0000000000..dd0c694ee1 --- /dev/null +++ b/library/python/strings/ut/test_strings.py @@ -0,0 +1,205 @@ +# coding=utf-8 + +import pytest +import six + +import library.python.strings + + +class Convertible(object): + text = u'текст' + text_utf8 = text.encode('utf-8') + + def __unicode__(self): + return self.text + + def __str__(self): + return self.text_utf8 + + +class ConvertibleToUnicodeOnly(Convertible): + def __str__(self): + return self.text.encode('ascii') + + +class ConvertibleToStrOnly(Convertible): + def __unicode__(self): + return self.text_utf8.decode('ascii') + + +class NonConvertible(ConvertibleToUnicodeOnly, ConvertibleToStrOnly): + pass + + +def test_to_basestring(): + assert library.python.strings.to_basestring('str') == 'str' + assert library.python.strings.to_basestring(u'юникод') == u'юникод' + if six.PY2: # __str__ should return str not bytes in Python3 + assert library.python.strings.to_basestring(Convertible()) == Convertible.text + assert library.python.strings.to_basestring(ConvertibleToUnicodeOnly()) == Convertible.text + assert library.python.strings.to_basestring(ConvertibleToStrOnly()) == Convertible.text_utf8 + assert library.python.strings.to_basestring(NonConvertible()) + + +def test_to_unicode(): + assert library.python.strings.to_unicode(u'юникод') == u'юникод' + assert library.python.strings.to_unicode('str') == u'str' + assert library.python.strings.to_unicode(u'строка'.encode('utf-8')) == u'строка' + assert library.python.strings.to_unicode(u'строка'.encode('cp1251'), 'cp1251') == u'строка' + if six.PY2: # __str__ should return str not bytes in Python3 + assert library.python.strings.to_unicode(Convertible()) == Convertible.text + assert library.python.strings.to_unicode(ConvertibleToUnicodeOnly()) == Convertible.text + with pytest.raises(UnicodeDecodeError): + library.python.strings.to_unicode(ConvertibleToStrOnly()) + with pytest.raises(UnicodeDecodeError): + library.python.strings.to_unicode(NonConvertible()) + + +def test_to_unicode_errors_replace(): + assert library.python.strings.to_unicode(u'abcабв'.encode('utf-8'), 'ascii') + assert library.python.strings.to_unicode(u'абв'.encode('utf-8'), 'ascii') + + +def test_to_str(): + assert library.python.strings.to_str('str') == 'str' if six.PY2 else b'str' + assert library.python.strings.to_str(u'unicode') == 'unicode' if six.PY2 else b'unicode' + assert library.python.strings.to_str(u'юникод') == u'юникод'.encode('utf-8') + assert library.python.strings.to_str(u'юникод', 'cp1251') == u'юникод'.encode('cp1251') + if six.PY2: + assert library.python.strings.to_str(Convertible()) == Convertible.text_utf8 + with pytest.raises(UnicodeEncodeError): + library.python.strings.to_str(ConvertibleToUnicodeOnly()) + assert library.python.strings.to_str(ConvertibleToStrOnly()) == Convertible.text_utf8 + with pytest.raises(UnicodeEncodeError): + library.python.strings.to_str(NonConvertible()) + + +def test_to_str_errors_replace(): + assert library.python.strings.to_str(u'abcабв', 'ascii') + assert library.python.strings.to_str(u'абв', 'ascii') + + +def test_to_str_transcode(): + assert library.python.strings.to_str('str', from_enc='ascii') == 'str' if six.PY2 else b'str' + assert library.python.strings.to_str('str', from_enc='utf-8') == 'str' if six.PY2 else b'str' + + assert library.python.strings.to_str(u'юникод'.encode('utf-8'), from_enc='utf-8') == u'юникод'.encode('utf-8') + assert library.python.strings.to_str(u'юникод'.encode('utf-8'), to_enc='utf-8', from_enc='utf-8') == u'юникод'.encode('utf-8') + assert library.python.strings.to_str(u'юникод'.encode('utf-8'), to_enc='cp1251', from_enc='utf-8') == u'юникод'.encode('cp1251') + + assert library.python.strings.to_str(u'юникод'.encode('cp1251'), from_enc='cp1251') == u'юникод'.encode('utf-8') + assert library.python.strings.to_str(u'юникод'.encode('cp1251'), to_enc='cp1251', from_enc='cp1251') == u'юникод'.encode('cp1251') + assert library.python.strings.to_str(u'юникод'.encode('cp1251'), to_enc='utf-8', from_enc='cp1251') == u'юникод'.encode('utf-8') + + assert library.python.strings.to_str(u'юникод'.encode('koi8-r'), from_enc='koi8-r') == u'юникод'.encode('utf-8') + assert library.python.strings.to_str(u'юникод'.encode('koi8-r'), to_enc='koi8-r', from_enc='koi8-r') == u'юникод'.encode('koi8-r') + assert library.python.strings.to_str(u'юникод'.encode('koi8-r'), to_enc='cp1251', from_enc='koi8-r') == u'юникод'.encode('cp1251') + + +def test_to_str_transcode_wrong(): + assert library.python.strings.to_str(u'юникод'.encode('utf-8'), from_enc='cp1251') + assert library.python.strings.to_str(u'юникод'.encode('cp1251'), from_enc='utf-8') + + +def test_to_str_transcode_disabled(): + # No transcoding enabled, set from_enc to enable + assert library.python.strings.to_str(u'юникод'.encode('utf-8'), to_enc='utf-8') == u'юникод'.encode('utf-8') + assert library.python.strings.to_str(u'юникод'.encode('utf-8'), to_enc='cp1251') == u'юникод'.encode('utf-8') + assert library.python.strings.to_str(u'юникод'.encode('cp1251'), to_enc='utf-8') == u'юникод'.encode('cp1251') + assert library.python.strings.to_str(u'юникод'.encode('cp1251'), to_enc='cp1251') == u'юникод'.encode('cp1251') + assert library.python.strings.to_str(u'юникод'.encode('cp1251'), to_enc='koi8-r') == u'юникод'.encode('cp1251') + assert library.python.strings.to_str(u'юникод'.encode('koi8-r'), to_enc='cp1251') == u'юникод'.encode('koi8-r') + + +def test_stringize_deep(): + assert library.python.strings.stringize_deep({ + 'key 1': 'value 1', + u'ключ 2': u'значение 2', + 'list': [u'ключ 2', 'key 1', (u'к', 2)] + }) == { + 'key 1' if six.PY2 else b'key 1': 'value 1' if six.PY2 else b'value 1', + u'ключ 2'.encode('utf-8'): u'значение 2'.encode('utf-8'), + 'list' if six.PY2 else b'list': [u'ключ 2'.encode('utf-8'), 'key 1' if six.PY2 else b'key 1', (u'к'.encode('utf-8'), 2)] + } + + +def test_stringize_deep_doesnt_transcode(): + assert library.python.strings.stringize_deep({ + u'ключ 1'.encode('utf-8'): u'значение 1'.encode('utf-8'), + u'ключ 2'.encode('cp1251'): u'значение 2'.encode('cp1251'), + }) == { + u'ключ 1'.encode('utf-8'): u'значение 1'.encode('utf-8'), + u'ключ 2'.encode('cp1251'): u'значение 2'.encode('cp1251'), + } + + +def test_stringize_deep_nested(): + assert library.python.strings.stringize_deep({ + 'key 1': 'value 1', + u'ключ 2': { + 'subkey 1': 'value 1', + u'подключ 2': u'value 2', + }, + }) == { + 'key 1' if six.PY2 else b'key 1': 'value 1' if six.PY2 else b'value 1', + u'ключ 2'.encode('utf-8'): { + 'subkey 1' if six.PY2 else b'subkey 1': 'value 1' if six.PY2 else b'value 1', + u'подключ 2'.encode('utf-8'): u'value 2'.encode('utf-8'), + }, + } + + +def test_stringize_deep_plain(): + assert library.python.strings.stringize_deep('str') == 'str' if six.PY2 else b'str' + assert library.python.strings.stringize_deep(u'юникод') == u'юникод'.encode('utf-8') + assert library.python.strings.stringize_deep(u'юникод'.encode('utf-8')) == u'юникод'.encode('utf-8') + + +def test_stringize_deep_nonstr(): + with pytest.raises(TypeError): + library.python.strings.stringize_deep(Convertible(), relaxed=False) + x = Convertible() + assert x == library.python.strings.stringize_deep(x) + + +def test_unicodize_deep(): + assert library.python.strings.unicodize_deep({ + 'key 1': 'value 1', + u'ключ 2': u'значение 2', + u'ключ 3'.encode('utf-8'): u'значение 3'.encode('utf-8'), + }) == { + u'key 1': u'value 1', + u'ключ 2': u'значение 2', + u'ключ 3': u'значение 3', + } + + +def test_unicodize_deep_nested(): + assert library.python.strings.unicodize_deep({ + 'key 1': 'value 1', + u'ключ 2': { + 'subkey 1': 'value 1', + u'подключ 2': u'значение 2', + u'подключ 3'.encode('utf-8'): u'значение 3'.encode('utf-8'), + }, + }) == { + u'key 1': u'value 1', + u'ключ 2': { + u'subkey 1': u'value 1', + u'подключ 2': u'значение 2', + u'подключ 3': u'значение 3', + }, + } + + +def test_unicodize_deep_plain(): + assert library.python.strings.unicodize_deep('str') == u'str' + assert library.python.strings.unicodize_deep(u'юникод') == u'юникод' + assert library.python.strings.unicodize_deep(u'юникод'.encode('utf-8')) == u'юникод' + + +def test_unicodize_deep_nonstr(): + with pytest.raises(TypeError): + library.python.strings.unicodize_deep(Convertible(), relaxed=False) + x = Convertible() + assert x == library.python.strings.stringize_deep(x) |