1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
|
import locale
import logging
import six
import sys
import codecs
import library.python.func
logger = logging.getLogger(__name__)
DEFAULT_ENCODING = 'utf-8'
ENCODING_ERRORS_POLICY = 'replace'
def left_strip(el, prefix):
"""
Strips prefix at the left of el
"""
if el.startswith(prefix):
return el[len(prefix):]
return el
# Explicit to-text conversion
# Chooses between str/unicode, i.e. six.binary_type/six.text_type
def to_basestring(value):
if isinstance(value, (six.binary_type, six.text_type)):
return value
try:
if six.PY2:
return unicode(value) # noqa
else:
return str(value)
except UnicodeDecodeError:
try:
return str(value)
except UnicodeEncodeError:
return repr(value)
to_text = to_basestring
def to_unicode(value, from_enc=DEFAULT_ENCODING):
if isinstance(value, six.text_type):
return value
if isinstance(value, six.binary_type):
if six.PY2:
return unicode(value, from_enc, ENCODING_ERRORS_POLICY) # noqa
else:
return value.decode(from_enc, errors=ENCODING_ERRORS_POLICY)
return six.text_type(value)
# Optional from_enc enables transcoding
def to_str(value, to_enc=DEFAULT_ENCODING, from_enc=None):
if isinstance(value, six.binary_type):
if from_enc is None or to_enc == from_enc:
# Unknown input encoding or input and output encoding are the same
return value
value = to_unicode(value, from_enc=from_enc)
if isinstance(value, six.text_type):
return value.encode(to_enc, ENCODING_ERRORS_POLICY)
return six.binary_type(value)
def _convert_deep(x, enc, convert, relaxed=True):
if x is None:
return None
if isinstance(x, (six.text_type, six.binary_type)):
return convert(x, enc)
if isinstance(x, dict):
return {convert(k, enc): _convert_deep(v, enc, convert, relaxed) for k, v in six.iteritems(x)}
if isinstance(x, list):
return [_convert_deep(e, enc, convert, relaxed) for e in x]
if isinstance(x, tuple):
return tuple([_convert_deep(e, enc, convert, relaxed) for e in x])
if relaxed:
return x
raise TypeError('unsupported type')
# Result as from six.ensure_text
def unicodize_deep(x, enc=DEFAULT_ENCODING, relaxed=True):
return _convert_deep(x, enc, to_unicode, relaxed)
# Result as from six.ensure_str
def ensure_str_deep(x, enc=DEFAULT_ENCODING, relaxed=True):
return _convert_deep(x, enc, six.ensure_str, relaxed)
# Result as from six.ensure_binary
def stringize_deep(x, enc=DEFAULT_ENCODING, relaxed=True):
return _convert_deep(x, enc, to_str, relaxed)
@library.python.func.memoize()
def locale_encoding():
try:
loc = locale.getdefaultlocale()[1]
if loc:
codecs.lookup(loc)
return loc
except LookupError as e:
logger.debug('Cannot get system locale: %s', e)
return None
except ValueError as e:
logger.warn('Cannot get system locale: %s', e)
return None
def fs_encoding():
return sys.getfilesystemencoding()
def guess_default_encoding():
enc = locale_encoding()
return enc if enc else DEFAULT_ENCODING
@library.python.func.memoize()
def get_stream_encoding(stream):
if stream.encoding:
try:
codecs.lookup(stream.encoding)
return stream.encoding
except LookupError:
pass
return DEFAULT_ENCODING
def encode(value, encoding=DEFAULT_ENCODING):
if isinstance(value, six.binary_type):
value = value.decode(encoding, errors='ignore')
return value.encode(encoding)
|