contrib/python/fonttools/fontTools/encodings/codecs.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135

"""Extend the Python codecs module with a few encodings that are used in OpenType (name table)
but missing from Python.  See https://github.com/fonttools/fonttools/issues/236 for details."""

import codecs
import encodings


class ExtendCodec(codecs.Codec):
    def __init__(self, name, base_encoding, mapping):
        self.name = name
        self.base_encoding = base_encoding
        self.mapping = mapping
        self.reverse = {v: k for k, v in mapping.items()}
        self.max_len = max(len(v) for v in mapping.values())
        self.info = codecs.CodecInfo(
            name=self.name, encode=self.encode, decode=self.decode
        )
        codecs.register_error(name, self.error)

    def _map(self, mapper, output_type, exc_type, input, errors):
        base_error_handler = codecs.lookup_error(errors)
        length = len(input)
        out = output_type()
        while input:
            # first try to use self.error as the error handler
            try:
                part = mapper(input, self.base_encoding, errors=self.name)
                out += part
                break  # All converted
            except exc_type as e:
                # else convert the correct part, handle error as requested and continue
                out += mapper(input[: e.start], self.base_encoding, self.name)
                replacement, pos = base_error_handler(e)
                out += replacement
                input = input[pos:]
        return out, length

    def encode(self, input, errors="strict"):
        return self._map(codecs.encode, bytes, UnicodeEncodeError, input, errors)

    def decode(self, input, errors="strict"):
        return self._map(codecs.decode, str, UnicodeDecodeError, input, errors)

    def error(self, e):
        if isinstance(e, UnicodeDecodeError):
            for end in range(e.start + 1, e.end + 1):
                s = e.object[e.start : end]
                if s in self.mapping:
                    return self.mapping[s], end
        elif isinstance(e, UnicodeEncodeError):
            for end in range(e.start + 1, e.start + self.max_len + 1):
                s = e.object[e.start : end]
                if s in self.reverse:
                    return self.reverse[s], end
        e.encoding = self.name
        raise e


_extended_encodings = {
    "x_mac_japanese_ttx": (
        "shift_jis",
        {
            b"\xFC": chr(0x007C),
            b"\x7E": chr(0x007E),
            b"\x80": chr(0x005C),
            b"\xA0": chr(0x00A0),
            b"\xFD": chr(0x00A9),
            b"\xFE": chr(0x2122),
            b"\xFF": chr(0x2026),
        },
    ),
    "x_mac_trad_chinese_ttx": (
        "big5",
        {
            b"\x80": chr(0x005C),
            b"\xA0": chr(0x00A0),
            b"\xFD": chr(0x00A9),
            b"\xFE": chr(0x2122),
            b"\xFF": chr(0x2026),
        },
    ),
    "x_mac_korean_ttx": (
        "euc_kr",
        {
            b"\x80": chr(0x00A0),
            b"\x81": chr(0x20A9),
            b"\x82": chr(0x2014),
            b"\x83": chr(0x00A9),
            b"\xFE": chr(0x2122),
            b"\xFF": chr(0x2026),
        },
    ),
    "x_mac_simp_chinese_ttx": (
        "gb2312",
        {
            b"\x80": chr(0x00FC),
            b"\xA0": chr(0x00A0),
            b"\xFD": chr(0x00A9),
            b"\xFE": chr(0x2122),
            b"\xFF": chr(0x2026),
        },
    ),
}

_cache = {}


def search_function(name):
    name = encodings.normalize_encoding(name)  # Rather undocumented...
    if name in _extended_encodings:
        if name not in _cache:
            base_encoding, mapping = _extended_encodings[name]
            assert name[-4:] == "_ttx"
            # Python 2 didn't have any of the encodings that we are implementing
            # in this file.  Python 3 added aliases for the East Asian ones, mapping
            # them "temporarily" to the same base encoding as us, with a comment
            # suggesting that full implementation will appear some time later.
            # As such, try the Python version of the x_mac_... first, if that is found,
            # use *that* as our base encoding.  This would make our encoding upgrade
            # to the full encoding when and if Python finally implements that.
            # http://bugs.python.org/issue24041
            base_encodings = [name[:-4], base_encoding]
            for base_encoding in base_encodings:
                try:
                    codecs.lookup(base_encoding)
                except LookupError:
                    continue
                _cache[name] = ExtendCodec(name, base_encoding, mapping)
                break
        return _cache[name].info

    return None


codecs.register(search_function)