1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
import locale
import logging
import six
import sys
import codecs
import library.python.func
logger = logging.getLogger(__name__)
DEFAULT_ENCODING = 'utf-8'
ENCODING_ERRORS_POLICY = 'replace'
def left_strip(el, prefix):
"""
Strips prefix at the left of el
"""
if el.startswith(prefix):
return el[len(prefix):]
return el
# Explicit to-text conversion
# Chooses between str/unicode, i.e. six.binary_type/six.text_type
def to_basestring(value):
if isinstance(value, (six.binary_type, six.text_type)):
return value
try:
if six.PY2:
return unicode(value)
else:
return str(value)
except UnicodeDecodeError:
try:
return str(value)
except UnicodeEncodeError:
return repr(value)
to_text = to_basestring
def to_unicode(value, from_enc=DEFAULT_ENCODING):
if isinstance(value, six.text_type):
return value
if isinstance(value, six.binary_type):
if six.PY2:
return unicode(value, from_enc, ENCODING_ERRORS_POLICY)
else:
return value.decode(from_enc, errors=ENCODING_ERRORS_POLICY)
return six.text_type(value)
# Optional from_enc enables transcoding
def to_str(value, to_enc=DEFAULT_ENCODING, from_enc=None):
if isinstance(value, six.binary_type):
if from_enc is None or to_enc == from_enc:
# Unknown input encoding or input and output encoding are the same
return value
value = to_unicode(value, from_enc=from_enc)
if isinstance(value, six.text_type):
return value.encode(to_enc, ENCODING_ERRORS_POLICY)
return six.binary_type(value)
def _convert_deep(x, enc, convert, relaxed=True):
if x is None:
return None
if isinstance(x, (six.text_type, six.binary_type)):
return convert(x, enc)
if isinstance(x, dict):
return {convert(k, enc): _convert_deep(v, enc, convert, relaxed) for k, v in six.iteritems(x)}
if isinstance(x, list):
return [_convert_deep(e, enc, convert, relaxed) for e in x]
if isinstance(x, tuple):
return tuple([_convert_deep(e, enc, convert, relaxed) for e in x])
if relaxed:
return x
raise TypeError('unsupported type')
def unicodize_deep(x, enc=DEFAULT_ENCODING, relaxed=True):
return _convert_deep(x, enc, to_unicode, relaxed)
def stringize_deep(x, enc=DEFAULT_ENCODING, relaxed=True):
return _convert_deep(x, enc, to_str, relaxed)
@library.python.func.memoize()
def locale_encoding():
try:
loc = locale.getdefaultlocale()[1]
if loc:
codecs.lookup(loc)
return loc
except LookupError as e:
logger.debug('Cannot get system locale: %s', e)
return None
except ValueError as e:
logger.warn('Cannot get system locale: %s', e)
return None
def fs_encoding():
return sys.getfilesystemencoding()
def guess_default_encoding():
enc = locale_encoding()
return enc if enc else DEFAULT_ENCODING
@library.python.func.memoize()
def get_stream_encoding(stream):
if stream.encoding:
try:
codecs.lookup(stream.encoding)
return stream.encoding
except LookupError:
pass
return DEFAULT_ENCODING
def encode(value, encoding=DEFAULT_ENCODING):
if isinstance(value, six.binary_type):
value = value.decode(encoding, errors='ignore')
return value.encode(encoding)
|