aboutsummaryrefslogtreecommitdiffstats
path: root/library/python/strings
diff options
context:
space:
mode:
authorNikita Slyusarev <nslus@yandex-team.com>2022-02-10 16:46:52 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:46:52 +0300
commitcd77cecfc03a3eaf87816af28a33067c4f0cdb59 (patch)
tree1308e0bae862d52e0020d881fe758080437fe389 /library/python/strings
parentcdae02d225fb5b3afbb28990e79a7ac6c9125327 (diff)
downloadydb-cd77cecfc03a3eaf87816af28a33067c4f0cdb59.tar.gz
Restoring authorship annotation for Nikita Slyusarev <nslus@yandex-team.com>. Commit 1 of 2.
Diffstat (limited to 'library/python/strings')
-rw-r--r--library/python/strings/strings.py100
-rw-r--r--library/python/strings/ut/test_strings.py258
-rw-r--r--library/python/strings/ut/ya.make2
-rw-r--r--library/python/strings/ya.make6
4 files changed, 183 insertions, 183 deletions
diff --git a/library/python/strings/strings.py b/library/python/strings/strings.py
index 5bfddfe78a..cb903be611 100644
--- a/library/python/strings/strings.py
+++ b/library/python/strings/strings.py
@@ -1,17 +1,17 @@
-import locale
-import logging
+import locale
+import logging
import six
-import sys
+import sys
import codecs
-
+
import library.python.func
-
+
logger = logging.getLogger(__name__)
-
+
DEFAULT_ENCODING = 'utf-8'
-ENCODING_ERRORS_POLICY = 'replace'
-
+ENCODING_ERRORS_POLICY = 'replace'
+
def left_strip(el, prefix):
"""
@@ -20,29 +20,29 @@ def left_strip(el, prefix):
if el.startswith(prefix):
return el[len(prefix):]
return el
-
-
-# Explicit to-text conversion
+
+
+# Explicit to-text conversion
# Chooses between str/unicode, i.e. six.binary_type/six.text_type
-def to_basestring(value):
+def to_basestring(value):
if isinstance(value, (six.binary_type, six.text_type)):
- return value
- try:
+ return value
+ try:
if six.PY2:
return unicode(value)
else:
return str(value)
- except UnicodeDecodeError:
- try:
- return str(value)
- except UnicodeEncodeError:
- return repr(value)
-to_text = to_basestring
-
-
+ except UnicodeDecodeError:
+ try:
+ return str(value)
+ except UnicodeEncodeError:
+ return repr(value)
+to_text = to_basestring
+
+
def to_unicode(value, from_enc=DEFAULT_ENCODING):
if isinstance(value, six.text_type):
- return value
+ return value
if isinstance(value, six.binary_type):
if six.PY2:
return unicode(value, from_enc, ENCODING_ERRORS_POLICY)
@@ -51,23 +51,23 @@ def to_unicode(value, from_enc=DEFAULT_ENCODING):
return six.text_type(value)
-# Optional from_enc enables transcoding
+# Optional from_enc enables transcoding
def to_str(value, to_enc=DEFAULT_ENCODING, from_enc=None):
if isinstance(value, six.binary_type):
- if from_enc is None or to_enc == from_enc:
- # Unknown input encoding or input and output encoding are the same
- return value
- value = to_unicode(value, from_enc=from_enc)
+ if from_enc is None or to_enc == from_enc:
+ # Unknown input encoding or input and output encoding are the same
+ return value
+ value = to_unicode(value, from_enc=from_enc)
if isinstance(value, six.text_type):
- return value.encode(to_enc, ENCODING_ERRORS_POLICY)
+ return value.encode(to_enc, ENCODING_ERRORS_POLICY)
return six.binary_type(value)
-
-
+
+
def _convert_deep(x, enc, convert, relaxed=True):
- if x is None:
- return None
+ if x is None:
+ return None
if isinstance(x, (six.text_type, six.binary_type)):
- return convert(x, enc)
+ return convert(x, enc)
if isinstance(x, dict):
return {convert(k, enc): _convert_deep(v, enc, convert, relaxed) for k, v in six.iteritems(x)}
if isinstance(x, list):
@@ -82,15 +82,15 @@ def _convert_deep(x, enc, convert, relaxed=True):
def unicodize_deep(x, enc=DEFAULT_ENCODING, relaxed=True):
return _convert_deep(x, enc, to_unicode, relaxed)
-
-
+
+
def stringize_deep(x, enc=DEFAULT_ENCODING, relaxed=True):
return _convert_deep(x, enc, to_str, relaxed)
-
-
+
+
@library.python.func.memoize()
-def locale_encoding():
- try:
+def locale_encoding():
+ try:
loc = locale.getdefaultlocale()[1]
if loc:
codecs.lookup(loc)
@@ -98,17 +98,17 @@ def locale_encoding():
except LookupError as e:
logger.debug('Cannot get system locale: %s', e)
return None
- except ValueError as e:
+ except ValueError as e:
logger.warn('Cannot get system locale: %s', e)
- return None
-
-
-def fs_encoding():
- return sys.getfilesystemencoding()
-
-
-def guess_default_encoding():
- enc = locale_encoding()
+ return None
+
+
+def fs_encoding():
+ return sys.getfilesystemencoding()
+
+
+def guess_default_encoding():
+ enc = locale_encoding()
return enc if enc else DEFAULT_ENCODING
diff --git a/library/python/strings/ut/test_strings.py b/library/python/strings/ut/test_strings.py
index dd0c694ee1..0ec4fb5d79 100644
--- a/library/python/strings/ut/test_strings.py
+++ b/library/python/strings/ut/test_strings.py
@@ -1,37 +1,37 @@
-# coding=utf-8
-
-import pytest
+# coding=utf-8
+
+import pytest
import six
-
+
import library.python.strings
-
-
-class Convertible(object):
- text = u'текст'
- text_utf8 = text.encode('utf-8')
-
- def __unicode__(self):
- return self.text
-
- def __str__(self):
- return self.text_utf8
-
-
-class ConvertibleToUnicodeOnly(Convertible):
- def __str__(self):
- return self.text.encode('ascii')
-
-
-class ConvertibleToStrOnly(Convertible):
- def __unicode__(self):
- return self.text_utf8.decode('ascii')
-
-
-class NonConvertible(ConvertibleToUnicodeOnly, ConvertibleToStrOnly):
- pass
-
-
-def test_to_basestring():
+
+
+class Convertible(object):
+ text = u'текст'
+ text_utf8 = text.encode('utf-8')
+
+ def __unicode__(self):
+ return self.text
+
+ def __str__(self):
+ return self.text_utf8
+
+
+class ConvertibleToUnicodeOnly(Convertible):
+ def __str__(self):
+ return self.text.encode('ascii')
+
+
+class ConvertibleToStrOnly(Convertible):
+ def __unicode__(self):
+ return self.text_utf8.decode('ascii')
+
+
+class NonConvertible(ConvertibleToUnicodeOnly, ConvertibleToStrOnly):
+ pass
+
+
+def test_to_basestring():
assert library.python.strings.to_basestring('str') == 'str'
assert library.python.strings.to_basestring(u'юникод') == u'юникод'
if six.PY2: # __str__ should return str not bytes in Python3
@@ -39,9 +39,9 @@ def test_to_basestring():
assert library.python.strings.to_basestring(ConvertibleToUnicodeOnly()) == Convertible.text
assert library.python.strings.to_basestring(ConvertibleToStrOnly()) == Convertible.text_utf8
assert library.python.strings.to_basestring(NonConvertible())
-
-
-def test_to_unicode():
+
+
+def test_to_unicode():
assert library.python.strings.to_unicode(u'юникод') == u'юникод'
assert library.python.strings.to_unicode('str') == u'str'
assert library.python.strings.to_unicode(u'строка'.encode('utf-8')) == u'строка'
@@ -53,14 +53,14 @@ def test_to_unicode():
library.python.strings.to_unicode(ConvertibleToStrOnly())
with pytest.raises(UnicodeDecodeError):
library.python.strings.to_unicode(NonConvertible())
-
-
-def test_to_unicode_errors_replace():
+
+
+def test_to_unicode_errors_replace():
assert library.python.strings.to_unicode(u'abcабв'.encode('utf-8'), 'ascii')
assert library.python.strings.to_unicode(u'абв'.encode('utf-8'), 'ascii')
-
-
-def test_to_str():
+
+
+def test_to_str():
assert library.python.strings.to_str('str') == 'str' if six.PY2 else b'str'
assert library.python.strings.to_str(u'unicode') == 'unicode' if six.PY2 else b'unicode'
assert library.python.strings.to_str(u'юникод') == u'юникод'.encode('utf-8')
@@ -72,134 +72,134 @@ def test_to_str():
assert library.python.strings.to_str(ConvertibleToStrOnly()) == Convertible.text_utf8
with pytest.raises(UnicodeEncodeError):
library.python.strings.to_str(NonConvertible())
-
-
-def test_to_str_errors_replace():
+
+
+def test_to_str_errors_replace():
assert library.python.strings.to_str(u'abcабв', 'ascii')
assert library.python.strings.to_str(u'абв', 'ascii')
-
-
-def test_to_str_transcode():
+
+
+def test_to_str_transcode():
assert library.python.strings.to_str('str', from_enc='ascii') == 'str' if six.PY2 else b'str'
assert library.python.strings.to_str('str', from_enc='utf-8') == 'str' if six.PY2 else b'str'
-
+
assert library.python.strings.to_str(u'юникод'.encode('utf-8'), from_enc='utf-8') == u'юникод'.encode('utf-8')
assert library.python.strings.to_str(u'юникод'.encode('utf-8'), to_enc='utf-8', from_enc='utf-8') == u'юникод'.encode('utf-8')
assert library.python.strings.to_str(u'юникод'.encode('utf-8'), to_enc='cp1251', from_enc='utf-8') == u'юникод'.encode('cp1251')
-
+
assert library.python.strings.to_str(u'юникод'.encode('cp1251'), from_enc='cp1251') == u'юникод'.encode('utf-8')
assert library.python.strings.to_str(u'юникод'.encode('cp1251'), to_enc='cp1251', from_enc='cp1251') == u'юникод'.encode('cp1251')
assert library.python.strings.to_str(u'юникод'.encode('cp1251'), to_enc='utf-8', from_enc='cp1251') == u'юникод'.encode('utf-8')
-
+
assert library.python.strings.to_str(u'юникод'.encode('koi8-r'), from_enc='koi8-r') == u'юникод'.encode('utf-8')
assert library.python.strings.to_str(u'юникод'.encode('koi8-r'), to_enc='koi8-r', from_enc='koi8-r') == u'юникод'.encode('koi8-r')
assert library.python.strings.to_str(u'юникод'.encode('koi8-r'), to_enc='cp1251', from_enc='koi8-r') == u'юникод'.encode('cp1251')
-
-
-def test_to_str_transcode_wrong():
+
+
+def test_to_str_transcode_wrong():
assert library.python.strings.to_str(u'юникод'.encode('utf-8'), from_enc='cp1251')
assert library.python.strings.to_str(u'юникод'.encode('cp1251'), from_enc='utf-8')
-
-
-def test_to_str_transcode_disabled():
- # No transcoding enabled, set from_enc to enable
+
+
+def test_to_str_transcode_disabled():
+ # No transcoding enabled, set from_enc to enable
assert library.python.strings.to_str(u'юникод'.encode('utf-8'), to_enc='utf-8') == u'юникод'.encode('utf-8')
assert library.python.strings.to_str(u'юникод'.encode('utf-8'), to_enc='cp1251') == u'юникод'.encode('utf-8')
assert library.python.strings.to_str(u'юникод'.encode('cp1251'), to_enc='utf-8') == u'юникод'.encode('cp1251')
assert library.python.strings.to_str(u'юникод'.encode('cp1251'), to_enc='cp1251') == u'юникод'.encode('cp1251')
assert library.python.strings.to_str(u'юникод'.encode('cp1251'), to_enc='koi8-r') == u'юникод'.encode('cp1251')
assert library.python.strings.to_str(u'юникод'.encode('koi8-r'), to_enc='cp1251') == u'юникод'.encode('koi8-r')
-
-
-def test_stringize_deep():
+
+
+def test_stringize_deep():
assert library.python.strings.stringize_deep({
- 'key 1': 'value 1',
- u'ключ 2': u'значение 2',
+ 'key 1': 'value 1',
+ u'ключ 2': u'значение 2',
'list': [u'ключ 2', 'key 1', (u'к', 2)]
- }) == {
+ }) == {
'key 1' if six.PY2 else b'key 1': 'value 1' if six.PY2 else b'value 1',
- u'ключ 2'.encode('utf-8'): u'значение 2'.encode('utf-8'),
+ u'ключ 2'.encode('utf-8'): u'значение 2'.encode('utf-8'),
'list' if six.PY2 else b'list': [u'ключ 2'.encode('utf-8'), 'key 1' if six.PY2 else b'key 1', (u'к'.encode('utf-8'), 2)]
- }
-
-
-def test_stringize_deep_doesnt_transcode():
+ }
+
+
+def test_stringize_deep_doesnt_transcode():
assert library.python.strings.stringize_deep({
- u'ключ 1'.encode('utf-8'): u'значение 1'.encode('utf-8'),
- u'ключ 2'.encode('cp1251'): u'значение 2'.encode('cp1251'),
- }) == {
- u'ключ 1'.encode('utf-8'): u'значение 1'.encode('utf-8'),
- u'ключ 2'.encode('cp1251'): u'значение 2'.encode('cp1251'),
- }
-
-
-def test_stringize_deep_nested():
+ u'ключ 1'.encode('utf-8'): u'значение 1'.encode('utf-8'),
+ u'ключ 2'.encode('cp1251'): u'значение 2'.encode('cp1251'),
+ }) == {
+ u'ключ 1'.encode('utf-8'): u'значение 1'.encode('utf-8'),
+ u'ключ 2'.encode('cp1251'): u'значение 2'.encode('cp1251'),
+ }
+
+
+def test_stringize_deep_nested():
assert library.python.strings.stringize_deep({
- 'key 1': 'value 1',
- u'ключ 2': {
- 'subkey 1': 'value 1',
- u'подключ 2': u'value 2',
- },
- }) == {
+ 'key 1': 'value 1',
+ u'ключ 2': {
+ 'subkey 1': 'value 1',
+ u'подключ 2': u'value 2',
+ },
+ }) == {
'key 1' if six.PY2 else b'key 1': 'value 1' if six.PY2 else b'value 1',
- u'ключ 2'.encode('utf-8'): {
+ u'ключ 2'.encode('utf-8'): {
'subkey 1' if six.PY2 else b'subkey 1': 'value 1' if six.PY2 else b'value 1',
- u'подключ 2'.encode('utf-8'): u'value 2'.encode('utf-8'),
- },
- }
-
-
-def test_stringize_deep_plain():
+ u'подключ 2'.encode('utf-8'): u'value 2'.encode('utf-8'),
+ },
+ }
+
+
+def test_stringize_deep_plain():
assert library.python.strings.stringize_deep('str') == 'str' if six.PY2 else b'str'
assert library.python.strings.stringize_deep(u'юникод') == u'юникод'.encode('utf-8')
assert library.python.strings.stringize_deep(u'юникод'.encode('utf-8')) == u'юникод'.encode('utf-8')
-
-
-def test_stringize_deep_nonstr():
- with pytest.raises(TypeError):
+
+
+def test_stringize_deep_nonstr():
+ with pytest.raises(TypeError):
library.python.strings.stringize_deep(Convertible(), relaxed=False)
x = Convertible()
assert x == library.python.strings.stringize_deep(x)
-
-
-def test_unicodize_deep():
+
+
+def test_unicodize_deep():
assert library.python.strings.unicodize_deep({
- 'key 1': 'value 1',
- u'ключ 2': u'значение 2',
- u'ключ 3'.encode('utf-8'): u'значение 3'.encode('utf-8'),
- }) == {
- u'key 1': u'value 1',
- u'ключ 2': u'значение 2',
- u'ключ 3': u'значение 3',
- }
-
-
-def test_unicodize_deep_nested():
+ 'key 1': 'value 1',
+ u'ключ 2': u'значение 2',
+ u'ключ 3'.encode('utf-8'): u'значение 3'.encode('utf-8'),
+ }) == {
+ u'key 1': u'value 1',
+ u'ключ 2': u'значение 2',
+ u'ключ 3': u'значение 3',
+ }
+
+
+def test_unicodize_deep_nested():
assert library.python.strings.unicodize_deep({
- 'key 1': 'value 1',
- u'ключ 2': {
- 'subkey 1': 'value 1',
- u'подключ 2': u'значение 2',
- u'подключ 3'.encode('utf-8'): u'значение 3'.encode('utf-8'),
- },
- }) == {
- u'key 1': u'value 1',
- u'ключ 2': {
- u'subkey 1': u'value 1',
- u'подключ 2': u'значение 2',
- u'подключ 3': u'значение 3',
- },
- }
-
-
-def test_unicodize_deep_plain():
+ 'key 1': 'value 1',
+ u'ключ 2': {
+ 'subkey 1': 'value 1',
+ u'подключ 2': u'значение 2',
+ u'подключ 3'.encode('utf-8'): u'значение 3'.encode('utf-8'),
+ },
+ }) == {
+ u'key 1': u'value 1',
+ u'ключ 2': {
+ u'subkey 1': u'value 1',
+ u'подключ 2': u'значение 2',
+ u'подключ 3': u'значение 3',
+ },
+ }
+
+
+def test_unicodize_deep_plain():
assert library.python.strings.unicodize_deep('str') == u'str'
assert library.python.strings.unicodize_deep(u'юникод') == u'юникод'
assert library.python.strings.unicodize_deep(u'юникод'.encode('utf-8')) == u'юникод'
-
-
-def test_unicodize_deep_nonstr():
- with pytest.raises(TypeError):
+
+
+def test_unicodize_deep_nonstr():
+ with pytest.raises(TypeError):
library.python.strings.unicodize_deep(Convertible(), relaxed=False)
x = Convertible()
assert x == library.python.strings.stringize_deep(x)
diff --git a/library/python/strings/ut/ya.make b/library/python/strings/ut/ya.make
index dfacb226c7..adbeb9a054 100644
--- a/library/python/strings/ut/ya.make
+++ b/library/python/strings/ut/ya.make
@@ -2,7 +2,7 @@ OWNER(g:yatool)
PY23_TEST()
-TEST_SRCS(test_strings.py)
+TEST_SRCS(test_strings.py)
PEERDIR(
library/python/strings
diff --git a/library/python/strings/ya.make b/library/python/strings/ya.make
index 7e0b033717..eee89ae7dc 100644
--- a/library/python/strings/ya.make
+++ b/library/python/strings/ya.make
@@ -8,9 +8,9 @@ PY_SRCS(
strings.py
)
-PEERDIR(
- library/python/func
+PEERDIR(
+ library/python/func
contrib/python/six
-)
+)
END()