aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/tools/cython/Cython/Plex/Transitions.py
blob: 3833817946cbfba8825c696acbe6dd721aa7f7a3 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
#
# Plex - Transition Maps
#
# This version represents state sets directly as dicts for speed.
#

from __future__ import absolute_import

try:
    from sys import maxsize as maxint
except ImportError:
    from sys import maxint


class TransitionMap(object):
    """
    A TransitionMap maps an input event to a set of states.
    An input event is one of: a range of character codes,
    the empty string (representing an epsilon move), or one
    of the special symbols BOL, EOL, EOF.

    For characters, this implementation compactly represents
    the map by means of a list:

      [code_0, states_0, code_1, states_1, code_2, states_2,
        ..., code_n-1, states_n-1, code_n]

    where |code_i| is a character code, and |states_i| is a
    set of states corresponding to characters with codes |c|
    in the range |code_i| <= |c| <= |code_i+1|.

    The following invariants hold:
      n >= 1
      code_0 == -maxint
      code_n == maxint
      code_i < code_i+1 for i in 0..n-1
      states_0 == states_n-1

    Mappings for the special events '', BOL, EOL, EOF are
    kept separately in a dictionary.
    """

    map = None      # The list of codes and states
    special = None  # Mapping for special events

    def __init__(self, map=None, special=None):
        if not map:
            map = [-maxint, {}, maxint]
        if not special:
            special = {}
        self.map = map
        self.special = special
        #self.check() ###

    def add(self, event, new_state,
            TupleType=tuple):
        """
        Add transition to |new_state| on |event|.
        """
        if type(event) is TupleType:
            code0, code1 = event
            i = self.split(code0)
            j = self.split(code1)
            map = self.map
            while i < j:
                map[i + 1][new_state] = 1
                i += 2
        else:
            self.get_special(event)[new_state] = 1

    def add_set(self, event, new_set,
                TupleType=tuple):
        """
        Add transitions to the states in |new_set| on |event|.
        """
        if type(event) is TupleType:
            code0, code1 = event
            i = self.split(code0)
            j = self.split(code1)
            map = self.map
            while i < j:
                map[i + 1].update(new_set)
                i += 2
        else:
            self.get_special(event).update(new_set)

    def get_epsilon(self,
                    none=None):
        """
        Return the mapping for epsilon, or None.
        """
        return self.special.get('', none)

    def iteritems(self,
                  len=len):
        """
        Return the mapping as an iterable of ((code1, code2), state_set) and
        (special_event, state_set) pairs.
        """
        result = []
        map = self.map
        else_set = map[1]
        i = 0
        n = len(map) - 1
        code0 = map[0]
        while i < n:
            set = map[i + 1]
            code1 = map[i + 2]
            if set or else_set:
                result.append(((code0, code1), set))
            code0 = code1
            i += 2
        for event, set in self.special.items():
            if set:
                result.append((event, set))
        return iter(result)

    items = iteritems

    # ------------------- Private methods --------------------

    def split(self, code,
              len=len, maxint=maxint):
        """
        Search the list for the position of the split point for |code|,
        inserting a new split point if necessary. Returns index |i| such
        that |code| == |map[i]|.
        """
        # We use a funky variation on binary search.
        map = self.map
        hi = len(map) - 1
        # Special case: code == map[-1]
        if code == maxint:
            return hi
        # General case
        lo = 0
        # loop invariant: map[lo] <= code < map[hi] and hi - lo >= 2
        while hi - lo >= 4:
            # Find midpoint truncated to even index
            mid = ((lo + hi) // 2) & ~1
            if code < map[mid]:
                hi = mid
            else:
                lo = mid
        # map[lo] <= code < map[hi] and hi - lo == 2
        if map[lo] == code:
            return lo
        else:
            map[hi:hi] = [code, map[hi - 1].copy()]
            #self.check() ###
            return hi

    def get_special(self, event):
        """
        Get state set for special event, adding a new entry if necessary.
        """
        special = self.special
        set = special.get(event, None)
        if not set:
            set = {}
            special[event] = set
        return set

    # --------------------- Conversion methods -----------------------

    def __str__(self):
        map_strs = []
        map = self.map
        n = len(map)
        i = 0
        while i < n:
            code = map[i]
            if code == -maxint:
                code_str = "-inf"
            elif code == maxint:
                code_str = "inf"
            else:
                code_str = str(code)
            map_strs.append(code_str)
            i += 1
            if i < n:
                map_strs.append(state_set_str(map[i]))
            i += 1
        special_strs = {}
        for event, set in self.special.items():
            special_strs[event] = state_set_str(set)
        return "[%s]+%s" % (
            ','.join(map_strs),
            special_strs
        )

    # --------------------- Debugging methods -----------------------

    def check(self):
        """Check data structure integrity."""
        if not self.map[-3] < self.map[-1]:
            print(self)
            assert 0

    def dump(self, file):
        map = self.map
        i = 0
        n = len(map) - 1
        while i < n:
            self.dump_range(map[i], map[i + 2], map[i + 1], file)
            i += 2
        for event, set in self.special.items():
            if set:
                if not event:
                    event = 'empty'
                self.dump_trans(event, set, file)

    def dump_range(self, code0, code1, set, file):
        if set:
            if code0 == -maxint:
                if code1 == maxint:
                    k = "any"
                else:
                    k = "< %s" % self.dump_char(code1)
            elif code1 == maxint:
                k = "> %s" % self.dump_char(code0 - 1)
            elif code0 == code1 - 1:
                k = self.dump_char(code0)
            else:
                k = "%s..%s" % (self.dump_char(code0),
                                self.dump_char(code1 - 1))
            self.dump_trans(k, set, file)

    def dump_char(self, code):
        if 0 <= code <= 255:
            return repr(chr(code))
        else:
            return "chr(%d)" % code

    def dump_trans(self, key, set, file):
        file.write("      %s --> %s\n" % (key, self.dump_set(set)))

    def dump_set(self, set):
        return state_set_str(set)


#
#   State set manipulation functions
#

#def merge_state_sets(set1, set2):
#        for state in set2.keys():
#            set1[state] = 1

def state_set_str(set):
    return "[%s]" % ','.join(["S%d" % state.number for state in set])