diff options
author | Nikita Slyusarev <nslus@yandex-team.com> | 2022-02-10 16:46:53 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:53 +0300 |
commit | 469afdc4e2587bf62ecdd096b75a0baa444c4012 (patch) | |
tree | 49e222ea1c5804306084bb3ae065bb702625360f /library/python/strings | |
parent | cd77cecfc03a3eaf87816af28a33067c4f0cdb59 (diff) | |
download | ydb-469afdc4e2587bf62ecdd096b75a0baa444c4012.tar.gz |
Restoring authorship annotation for Nikita Slyusarev <nslus@yandex-team.com>. Commit 2 of 2.
Diffstat (limited to 'library/python/strings')
-rw-r--r-- | library/python/strings/strings.py | 100 | ||||
-rw-r--r-- | library/python/strings/ut/test_strings.py | 258 | ||||
-rw-r--r-- | library/python/strings/ut/ya.make | 2 | ||||
-rw-r--r-- | library/python/strings/ya.make | 6 |
4 files changed, 183 insertions, 183 deletions
diff --git a/library/python/strings/strings.py b/library/python/strings/strings.py index cb903be611..5bfddfe78a 100644 --- a/library/python/strings/strings.py +++ b/library/python/strings/strings.py @@ -1,17 +1,17 @@ -import locale -import logging +import locale +import logging import six -import sys +import sys import codecs - + import library.python.func - + logger = logging.getLogger(__name__) - + DEFAULT_ENCODING = 'utf-8' -ENCODING_ERRORS_POLICY = 'replace' - +ENCODING_ERRORS_POLICY = 'replace' + def left_strip(el, prefix): """ @@ -20,29 +20,29 @@ def left_strip(el, prefix): if el.startswith(prefix): return el[len(prefix):] return el - - -# Explicit to-text conversion + + +# Explicit to-text conversion # Chooses between str/unicode, i.e. six.binary_type/six.text_type -def to_basestring(value): +def to_basestring(value): if isinstance(value, (six.binary_type, six.text_type)): - return value - try: + return value + try: if six.PY2: return unicode(value) else: return str(value) - except UnicodeDecodeError: - try: - return str(value) - except UnicodeEncodeError: - return repr(value) -to_text = to_basestring - - + except UnicodeDecodeError: + try: + return str(value) + except UnicodeEncodeError: + return repr(value) +to_text = to_basestring + + def to_unicode(value, from_enc=DEFAULT_ENCODING): if isinstance(value, six.text_type): - return value + return value if isinstance(value, six.binary_type): if six.PY2: return unicode(value, from_enc, ENCODING_ERRORS_POLICY) @@ -51,23 +51,23 @@ def to_unicode(value, from_enc=DEFAULT_ENCODING): return six.text_type(value) -# Optional from_enc enables transcoding +# Optional from_enc enables transcoding def to_str(value, to_enc=DEFAULT_ENCODING, from_enc=None): if isinstance(value, six.binary_type): - if from_enc is None or to_enc == from_enc: - # Unknown input encoding or input and output encoding are the same - return value - value = to_unicode(value, from_enc=from_enc) + if from_enc is None or to_enc == from_enc: + # Unknown input encoding or input and output encoding are the same + return value + value = to_unicode(value, from_enc=from_enc) if isinstance(value, six.text_type): - return value.encode(to_enc, ENCODING_ERRORS_POLICY) + return value.encode(to_enc, ENCODING_ERRORS_POLICY) return six.binary_type(value) - - + + def _convert_deep(x, enc, convert, relaxed=True): - if x is None: - return None + if x is None: + return None if isinstance(x, (six.text_type, six.binary_type)): - return convert(x, enc) + return convert(x, enc) if isinstance(x, dict): return {convert(k, enc): _convert_deep(v, enc, convert, relaxed) for k, v in six.iteritems(x)} if isinstance(x, list): @@ -82,15 +82,15 @@ def _convert_deep(x, enc, convert, relaxed=True): def unicodize_deep(x, enc=DEFAULT_ENCODING, relaxed=True): return _convert_deep(x, enc, to_unicode, relaxed) - - + + def stringize_deep(x, enc=DEFAULT_ENCODING, relaxed=True): return _convert_deep(x, enc, to_str, relaxed) - - + + @library.python.func.memoize() -def locale_encoding(): - try: +def locale_encoding(): + try: loc = locale.getdefaultlocale()[1] if loc: codecs.lookup(loc) @@ -98,17 +98,17 @@ def locale_encoding(): except LookupError as e: logger.debug('Cannot get system locale: %s', e) return None - except ValueError as e: + except ValueError as e: logger.warn('Cannot get system locale: %s', e) - return None - - -def fs_encoding(): - return sys.getfilesystemencoding() - - -def guess_default_encoding(): - enc = locale_encoding() + return None + + +def fs_encoding(): + return sys.getfilesystemencoding() + + +def guess_default_encoding(): + enc = locale_encoding() return enc if enc else DEFAULT_ENCODING diff --git a/library/python/strings/ut/test_strings.py b/library/python/strings/ut/test_strings.py index 0ec4fb5d79..dd0c694ee1 100644 --- a/library/python/strings/ut/test_strings.py +++ b/library/python/strings/ut/test_strings.py @@ -1,37 +1,37 @@ -# coding=utf-8 - -import pytest +# coding=utf-8 + +import pytest import six - + import library.python.strings - - -class Convertible(object): - text = u'текст' - text_utf8 = text.encode('utf-8') - - def __unicode__(self): - return self.text - - def __str__(self): - return self.text_utf8 - - -class ConvertibleToUnicodeOnly(Convertible): - def __str__(self): - return self.text.encode('ascii') - - -class ConvertibleToStrOnly(Convertible): - def __unicode__(self): - return self.text_utf8.decode('ascii') - - -class NonConvertible(ConvertibleToUnicodeOnly, ConvertibleToStrOnly): - pass - - -def test_to_basestring(): + + +class Convertible(object): + text = u'текст' + text_utf8 = text.encode('utf-8') + + def __unicode__(self): + return self.text + + def __str__(self): + return self.text_utf8 + + +class ConvertibleToUnicodeOnly(Convertible): + def __str__(self): + return self.text.encode('ascii') + + +class ConvertibleToStrOnly(Convertible): + def __unicode__(self): + return self.text_utf8.decode('ascii') + + +class NonConvertible(ConvertibleToUnicodeOnly, ConvertibleToStrOnly): + pass + + +def test_to_basestring(): assert library.python.strings.to_basestring('str') == 'str' assert library.python.strings.to_basestring(u'юникод') == u'юникод' if six.PY2: # __str__ should return str not bytes in Python3 @@ -39,9 +39,9 @@ def test_to_basestring(): assert library.python.strings.to_basestring(ConvertibleToUnicodeOnly()) == Convertible.text assert library.python.strings.to_basestring(ConvertibleToStrOnly()) == Convertible.text_utf8 assert library.python.strings.to_basestring(NonConvertible()) - - -def test_to_unicode(): + + +def test_to_unicode(): assert library.python.strings.to_unicode(u'юникод') == u'юникод' assert library.python.strings.to_unicode('str') == u'str' assert library.python.strings.to_unicode(u'строка'.encode('utf-8')) == u'строка' @@ -53,14 +53,14 @@ def test_to_unicode(): library.python.strings.to_unicode(ConvertibleToStrOnly()) with pytest.raises(UnicodeDecodeError): library.python.strings.to_unicode(NonConvertible()) - - -def test_to_unicode_errors_replace(): + + +def test_to_unicode_errors_replace(): assert library.python.strings.to_unicode(u'abcабв'.encode('utf-8'), 'ascii') assert library.python.strings.to_unicode(u'абв'.encode('utf-8'), 'ascii') - - -def test_to_str(): + + +def test_to_str(): assert library.python.strings.to_str('str') == 'str' if six.PY2 else b'str' assert library.python.strings.to_str(u'unicode') == 'unicode' if six.PY2 else b'unicode' assert library.python.strings.to_str(u'юникод') == u'юникод'.encode('utf-8') @@ -72,134 +72,134 @@ def test_to_str(): assert library.python.strings.to_str(ConvertibleToStrOnly()) == Convertible.text_utf8 with pytest.raises(UnicodeEncodeError): library.python.strings.to_str(NonConvertible()) - - -def test_to_str_errors_replace(): + + +def test_to_str_errors_replace(): assert library.python.strings.to_str(u'abcабв', 'ascii') assert library.python.strings.to_str(u'абв', 'ascii') - - -def test_to_str_transcode(): + + +def test_to_str_transcode(): assert library.python.strings.to_str('str', from_enc='ascii') == 'str' if six.PY2 else b'str' assert library.python.strings.to_str('str', from_enc='utf-8') == 'str' if six.PY2 else b'str' - + assert library.python.strings.to_str(u'юникод'.encode('utf-8'), from_enc='utf-8') == u'юникод'.encode('utf-8') assert library.python.strings.to_str(u'юникод'.encode('utf-8'), to_enc='utf-8', from_enc='utf-8') == u'юникод'.encode('utf-8') assert library.python.strings.to_str(u'юникод'.encode('utf-8'), to_enc='cp1251', from_enc='utf-8') == u'юникод'.encode('cp1251') - + assert library.python.strings.to_str(u'юникод'.encode('cp1251'), from_enc='cp1251') == u'юникод'.encode('utf-8') assert library.python.strings.to_str(u'юникод'.encode('cp1251'), to_enc='cp1251', from_enc='cp1251') == u'юникод'.encode('cp1251') assert library.python.strings.to_str(u'юникод'.encode('cp1251'), to_enc='utf-8', from_enc='cp1251') == u'юникод'.encode('utf-8') - + assert library.python.strings.to_str(u'юникод'.encode('koi8-r'), from_enc='koi8-r') == u'юникод'.encode('utf-8') assert library.python.strings.to_str(u'юникод'.encode('koi8-r'), to_enc='koi8-r', from_enc='koi8-r') == u'юникод'.encode('koi8-r') assert library.python.strings.to_str(u'юникод'.encode('koi8-r'), to_enc='cp1251', from_enc='koi8-r') == u'юникод'.encode('cp1251') - - -def test_to_str_transcode_wrong(): + + +def test_to_str_transcode_wrong(): assert library.python.strings.to_str(u'юникод'.encode('utf-8'), from_enc='cp1251') assert library.python.strings.to_str(u'юникод'.encode('cp1251'), from_enc='utf-8') - - -def test_to_str_transcode_disabled(): - # No transcoding enabled, set from_enc to enable + + +def test_to_str_transcode_disabled(): + # No transcoding enabled, set from_enc to enable assert library.python.strings.to_str(u'юникод'.encode('utf-8'), to_enc='utf-8') == u'юникод'.encode('utf-8') assert library.python.strings.to_str(u'юникод'.encode('utf-8'), to_enc='cp1251') == u'юникод'.encode('utf-8') assert library.python.strings.to_str(u'юникод'.encode('cp1251'), to_enc='utf-8') == u'юникод'.encode('cp1251') assert library.python.strings.to_str(u'юникод'.encode('cp1251'), to_enc='cp1251') == u'юникод'.encode('cp1251') assert library.python.strings.to_str(u'юникод'.encode('cp1251'), to_enc='koi8-r') == u'юникод'.encode('cp1251') assert library.python.strings.to_str(u'юникод'.encode('koi8-r'), to_enc='cp1251') == u'юникод'.encode('koi8-r') - - -def test_stringize_deep(): + + +def test_stringize_deep(): assert library.python.strings.stringize_deep({ - 'key 1': 'value 1', - u'ключ 2': u'значение 2', + 'key 1': 'value 1', + u'ключ 2': u'значение 2', 'list': [u'ключ 2', 'key 1', (u'к', 2)] - }) == { + }) == { 'key 1' if six.PY2 else b'key 1': 'value 1' if six.PY2 else b'value 1', - u'ключ 2'.encode('utf-8'): u'значение 2'.encode('utf-8'), + u'ключ 2'.encode('utf-8'): u'значение 2'.encode('utf-8'), 'list' if six.PY2 else b'list': [u'ключ 2'.encode('utf-8'), 'key 1' if six.PY2 else b'key 1', (u'к'.encode('utf-8'), 2)] - } - - -def test_stringize_deep_doesnt_transcode(): + } + + +def test_stringize_deep_doesnt_transcode(): assert library.python.strings.stringize_deep({ - u'ключ 1'.encode('utf-8'): u'значение 1'.encode('utf-8'), - u'ключ 2'.encode('cp1251'): u'значение 2'.encode('cp1251'), - }) == { - u'ключ 1'.encode('utf-8'): u'значение 1'.encode('utf-8'), - u'ключ 2'.encode('cp1251'): u'значение 2'.encode('cp1251'), - } - - -def test_stringize_deep_nested(): + u'ключ 1'.encode('utf-8'): u'значение 1'.encode('utf-8'), + u'ключ 2'.encode('cp1251'): u'значение 2'.encode('cp1251'), + }) == { + u'ключ 1'.encode('utf-8'): u'значение 1'.encode('utf-8'), + u'ключ 2'.encode('cp1251'): u'значение 2'.encode('cp1251'), + } + + +def test_stringize_deep_nested(): assert library.python.strings.stringize_deep({ - 'key 1': 'value 1', - u'ключ 2': { - 'subkey 1': 'value 1', - u'подключ 2': u'value 2', - }, - }) == { + 'key 1': 'value 1', + u'ключ 2': { + 'subkey 1': 'value 1', + u'подключ 2': u'value 2', + }, + }) == { 'key 1' if six.PY2 else b'key 1': 'value 1' if six.PY2 else b'value 1', - u'ключ 2'.encode('utf-8'): { + u'ключ 2'.encode('utf-8'): { 'subkey 1' if six.PY2 else b'subkey 1': 'value 1' if six.PY2 else b'value 1', - u'подключ 2'.encode('utf-8'): u'value 2'.encode('utf-8'), - }, - } - - -def test_stringize_deep_plain(): + u'подключ 2'.encode('utf-8'): u'value 2'.encode('utf-8'), + }, + } + + +def test_stringize_deep_plain(): assert library.python.strings.stringize_deep('str') == 'str' if six.PY2 else b'str' assert library.python.strings.stringize_deep(u'юникод') == u'юникод'.encode('utf-8') assert library.python.strings.stringize_deep(u'юникод'.encode('utf-8')) == u'юникод'.encode('utf-8') - - -def test_stringize_deep_nonstr(): - with pytest.raises(TypeError): + + +def test_stringize_deep_nonstr(): + with pytest.raises(TypeError): library.python.strings.stringize_deep(Convertible(), relaxed=False) x = Convertible() assert x == library.python.strings.stringize_deep(x) - - -def test_unicodize_deep(): + + +def test_unicodize_deep(): assert library.python.strings.unicodize_deep({ - 'key 1': 'value 1', - u'ключ 2': u'значение 2', - u'ключ 3'.encode('utf-8'): u'значение 3'.encode('utf-8'), - }) == { - u'key 1': u'value 1', - u'ключ 2': u'значение 2', - u'ключ 3': u'значение 3', - } - - -def test_unicodize_deep_nested(): + 'key 1': 'value 1', + u'ключ 2': u'значение 2', + u'ключ 3'.encode('utf-8'): u'значение 3'.encode('utf-8'), + }) == { + u'key 1': u'value 1', + u'ключ 2': u'значение 2', + u'ключ 3': u'значение 3', + } + + +def test_unicodize_deep_nested(): assert library.python.strings.unicodize_deep({ - 'key 1': 'value 1', - u'ключ 2': { - 'subkey 1': 'value 1', - u'подключ 2': u'значение 2', - u'подключ 3'.encode('utf-8'): u'значение 3'.encode('utf-8'), - }, - }) == { - u'key 1': u'value 1', - u'ключ 2': { - u'subkey 1': u'value 1', - u'подключ 2': u'значение 2', - u'подключ 3': u'значение 3', - }, - } - - -def test_unicodize_deep_plain(): + 'key 1': 'value 1', + u'ключ 2': { + 'subkey 1': 'value 1', + u'подключ 2': u'значение 2', + u'подключ 3'.encode('utf-8'): u'значение 3'.encode('utf-8'), + }, + }) == { + u'key 1': u'value 1', + u'ключ 2': { + u'subkey 1': u'value 1', + u'подключ 2': u'значение 2', + u'подключ 3': u'значение 3', + }, + } + + +def test_unicodize_deep_plain(): assert library.python.strings.unicodize_deep('str') == u'str' assert library.python.strings.unicodize_deep(u'юникод') == u'юникод' assert library.python.strings.unicodize_deep(u'юникод'.encode('utf-8')) == u'юникод' - - -def test_unicodize_deep_nonstr(): - with pytest.raises(TypeError): + + +def test_unicodize_deep_nonstr(): + with pytest.raises(TypeError): library.python.strings.unicodize_deep(Convertible(), relaxed=False) x = Convertible() assert x == library.python.strings.stringize_deep(x) diff --git a/library/python/strings/ut/ya.make b/library/python/strings/ut/ya.make index adbeb9a054..dfacb226c7 100644 --- a/library/python/strings/ut/ya.make +++ b/library/python/strings/ut/ya.make @@ -2,7 +2,7 @@ OWNER(g:yatool) PY23_TEST() -TEST_SRCS(test_strings.py) +TEST_SRCS(test_strings.py) PEERDIR( library/python/strings diff --git a/library/python/strings/ya.make b/library/python/strings/ya.make index eee89ae7dc..7e0b033717 100644 --- a/library/python/strings/ya.make +++ b/library/python/strings/ya.make @@ -8,9 +8,9 @@ PY_SRCS( strings.py ) -PEERDIR( - library/python/func +PEERDIR( + library/python/func contrib/python/six -) +) END() |