1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
|
"""Extend the Python codecs module with a few encodings that are used in OpenType (name table)
but missing from Python. See https://github.com/fonttools/fonttools/issues/236 for details."""
import codecs
import encodings
class ExtendCodec(codecs.Codec):
def __init__(self, name, base_encoding, mapping):
self.name = name
self.base_encoding = base_encoding
self.mapping = mapping
self.reverse = {v: k for k, v in mapping.items()}
self.max_len = max(len(v) for v in mapping.values())
self.info = codecs.CodecInfo(
name=self.name, encode=self.encode, decode=self.decode
)
codecs.register_error(name, self.error)
def _map(self, mapper, output_type, exc_type, input, errors):
base_error_handler = codecs.lookup_error(errors)
length = len(input)
out = output_type()
while input:
# first try to use self.error as the error handler
try:
part = mapper(input, self.base_encoding, errors=self.name)
out += part
break # All converted
except exc_type as e:
# else convert the correct part, handle error as requested and continue
out += mapper(input[: e.start], self.base_encoding, self.name)
replacement, pos = base_error_handler(e)
out += replacement
input = input[pos:]
return out, length
def encode(self, input, errors="strict"):
return self._map(codecs.encode, bytes, UnicodeEncodeError, input, errors)
def decode(self, input, errors="strict"):
return self._map(codecs.decode, str, UnicodeDecodeError, input, errors)
def error(self, e):
if isinstance(e, UnicodeDecodeError):
for end in range(e.start + 1, e.end + 1):
s = e.object[e.start : end]
if s in self.mapping:
return self.mapping[s], end
elif isinstance(e, UnicodeEncodeError):
for end in range(e.start + 1, e.start + self.max_len + 1):
s = e.object[e.start : end]
if s in self.reverse:
return self.reverse[s], end
e.encoding = self.name
raise e
_extended_encodings = {
"x_mac_japanese_ttx": (
"shift_jis",
{
b"\xFC": chr(0x007C),
b"\x7E": chr(0x007E),
b"\x80": chr(0x005C),
b"\xA0": chr(0x00A0),
b"\xFD": chr(0x00A9),
b"\xFE": chr(0x2122),
b"\xFF": chr(0x2026),
},
),
"x_mac_trad_chinese_ttx": (
"big5",
{
b"\x80": chr(0x005C),
b"\xA0": chr(0x00A0),
b"\xFD": chr(0x00A9),
b"\xFE": chr(0x2122),
b"\xFF": chr(0x2026),
},
),
"x_mac_korean_ttx": (
"euc_kr",
{
b"\x80": chr(0x00A0),
b"\x81": chr(0x20A9),
b"\x82": chr(0x2014),
b"\x83": chr(0x00A9),
b"\xFE": chr(0x2122),
b"\xFF": chr(0x2026),
},
),
"x_mac_simp_chinese_ttx": (
"gb2312",
{
b"\x80": chr(0x00FC),
b"\xA0": chr(0x00A0),
b"\xFD": chr(0x00A9),
b"\xFE": chr(0x2122),
b"\xFF": chr(0x2026),
},
),
}
_cache = {}
def search_function(name):
name = encodings.normalize_encoding(name) # Rather undocumented...
if name in _extended_encodings:
if name not in _cache:
base_encoding, mapping = _extended_encodings[name]
assert name[-4:] == "_ttx"
# Python 2 didn't have any of the encodings that we are implementing
# in this file. Python 3 added aliases for the East Asian ones, mapping
# them "temporarily" to the same base encoding as us, with a comment
# suggesting that full implementation will appear some time later.
# As such, try the Python version of the x_mac_... first, if that is found,
# use *that* as our base encoding. This would make our encoding upgrade
# to the full encoding when and if Python finally implements that.
# http://bugs.python.org/issue24041
base_encodings = [name[:-4], base_encoding]
for base_encoding in base_encodings:
try:
codecs.lookup(base_encoding)
except LookupError:
continue
_cache[name] = ExtendCodec(name, base_encoding, mapping)
break
return _cache[name].info
return None
codecs.register(search_function)
|