add library/cpp/actors, ymake build to ydb oss export

author: alexv-smirnov <alex@ydb.tech> 2023-03-15 19:59:12 +0300
committer: alexv-smirnov <alex@ydb.tech> 2023-03-15 19:59:12 +0300
commit: 056bb284ccf8dd6793ec3a54ffa36c4fb2b9ad11 (patch)
tree: 4740980126f32e3af7937ba0ca5f83e59baa4ab0 /contrib/tools/cython/Cython/Plex
parent: 269126dcced1cc8b53eb4398b4a33e5142f10290 (diff)
download: ydb-056bb284ccf8dd6793ec3a54ffa36c4fb2b9ad11.tar.gz
13 files changed, 2243 insertions, 0 deletions
diff --git a/contrib/tools/cython/Cython/Plex/Actions.pxd b/contrib/tools/cython/Cython/Plex/Actions.pxd
new file mode 100644
index 0000000000..34660a2d9b
--- /dev/null
+++ b/contrib/tools/cython/Cython/Plex/Actions.pxd
@@ -0,0 +1,25 @@
+
+cdef class Action:
+  cdef perform(self, token_stream, text)
+  cpdef same_as(self, other)
+
+cdef class Return(Action):
+  cdef object value
+  cdef perform(self, token_stream, text)
+  cpdef same_as(self, other)
+
+cdef class Call(Action):
+  cdef object function
+  cdef perform(self, token_stream, text)
+  cpdef same_as(self, other)
+
+cdef class Begin(Action):
+  cdef object state_name
+  cdef perform(self, token_stream, text)
+  cpdef same_as(self, other)
+
+cdef class Ignore(Action):
+  cdef perform(self, token_stream, text)
+
+cdef class Text(Action):
+  cdef perform(self, token_stream, text)
diff --git a/contrib/tools/cython/Cython/Plex/Actions.py b/contrib/tools/cython/Cython/Plex/Actions.py
new file mode 100644
index 0000000000..c88176e716
--- /dev/null
+++ b/contrib/tools/cython/Cython/Plex/Actions.py
@@ -0,0 +1,110 @@
+# cython: auto_pickle=False
+#=======================================================================
+#
+#   Python Lexical Analyser
+#
+#   Actions for use in token specifications
+#
+#=======================================================================
+
+class Action(object):
+    def perform(self, token_stream, text):
+        pass  # abstract
+
+    def same_as(self, other):
+        return self is other
+
+
+class Return(Action):
+    """
+    Internal Plex action which causes |value| to
+    be returned as the value of the associated token
+    """
+
+    def __init__(self, value):
+        self.value = value
+
+    def perform(self, token_stream, text):
+        return self.value
+
+    def same_as(self, other):
+        return isinstance(other, Return) and self.value == other.value
+
+    def __repr__(self):
+        return "Return(%s)" % repr(self.value)
+
+
+class Call(Action):
+    """
+    Internal Plex action which causes a function to be called.
+    """
+
+    def __init__(self, function):
+        self.function = function
+
+    def perform(self, token_stream, text):
+        return self.function(token_stream, text)
+
+    def __repr__(self):
+        return "Call(%s)" % self.function.__name__
+
+    def same_as(self, other):
+        return isinstance(other, Call) and self.function is other.function
+
+
+class Begin(Action):
+    """
+    Begin(state_name) is a Plex action which causes the Scanner to
+    enter the state |state_name|. See the docstring of Plex.Lexicon
+    for more information.
+    """
+
+    def __init__(self, state_name):
+        self.state_name = state_name
+
+    def perform(self, token_stream, text):
+        token_stream.begin(self.state_name)
+
+    def __repr__(self):
+        return "Begin(%s)" % self.state_name
+
+    def same_as(self, other):
+        return isinstance(other, Begin) and self.state_name == other.state_name
+
+
+class Ignore(Action):
+    """
+    IGNORE is a Plex action which causes its associated token
+    to be ignored. See the docstring of Plex.Lexicon  for more
+    information.
+    """
+
+    def perform(self, token_stream, text):
+        return None
+
+    def __repr__(self):
+        return "IGNORE"
+
+
+IGNORE = Ignore()
+#IGNORE.__doc__ = Ignore.__doc__
+
+
+class Text(Action):
+    """
+    TEXT is a Plex action which causes the text of a token to
+    be returned as the value of the token. See the docstring of
+    Plex.Lexicon  for more information.
+    """
+
+    def perform(self, token_stream, text):
+        return text
+
+    def __repr__(self):
+        return "TEXT"
+
+
+TEXT = Text()
+#TEXT.__doc__ = Text.__doc__
+
+
diff --git a/contrib/tools/cython/Cython/Plex/DFA.py b/contrib/tools/cython/Cython/Plex/DFA.py
new file mode 100644
index 0000000000..76324621fc
--- /dev/null
+++ b/contrib/tools/cython/Cython/Plex/DFA.py
@@ -0,0 +1,164 @@
+#=======================================================================
+#
+#   Python Lexical Analyser
+#
+#   Converting NFA to DFA
+#
+#=======================================================================
+
+from __future__ import absolute_import
+
+from . import Machines
+from .Machines import LOWEST_PRIORITY
+from .Transitions import TransitionMap
+
+
+def nfa_to_dfa(old_machine, debug=None):
+    """
+    Given a nondeterministic Machine, return a new equivalent
+    Machine which is deterministic.
+    """
+    # We build a new machine whose states correspond to sets of states
+    # in the old machine. Initially we add a new state corresponding to
+    # the epsilon-closure of each initial old state. Then we give transitions
+    # to each new state which are the union of all transitions out of any
+    # of the corresponding old states. The new state reached on a given
+    # character is the one corresponding to the set of states reachable
+    # on that character from any of the old states. As new combinations of
+    # old states are created, new states are added as needed until closure
+    # is reached.
+    new_machine = Machines.FastMachine()
+    state_map = StateMap(new_machine)
+    # Seed the process using the initial states of the old machine.
+    # Make the corresponding new states into initial states of the new
+    # machine with the same names.
+    for (key, old_state) in old_machine.initial_states.items():
+        new_state = state_map.old_to_new(epsilon_closure(old_state))
+        new_machine.make_initial_state(key, new_state)
+    # Tricky bit here: we add things to the end of this list while we're
+    # iterating over it. The iteration stops when closure is achieved.
+    for new_state in new_machine.states:
+        transitions = TransitionMap()
+        for old_state in state_map.new_to_old(new_state):
+            for event, old_target_states in old_state.transitions.items():
+                if event and old_target_states:
+                    transitions.add_set(event, set_epsilon_closure(old_target_states))
+        for event, old_states in transitions.items():
+            new_machine.add_transitions(new_state, event, state_map.old_to_new(old_states))
+    if debug:
+        debug.write("\n===== State Mapping =====\n")
+        state_map.dump(debug)
+    return new_machine
+
+
+def set_epsilon_closure(state_set):
+    """
+    Given a set of states, return the union of the epsilon
+    closures of its member states.
+    """
+    result = {}
+    for state1 in state_set:
+        for state2 in epsilon_closure(state1):
+            result[state2] = 1
+    return result
+
+
+def epsilon_closure(state):
+    """
+    Return the set of states reachable from the given state
+    by epsilon moves.
+    """
+    # Cache the result
+    result = state.epsilon_closure
+    if result is None:
+        result = {}
+        state.epsilon_closure = result
+        add_to_epsilon_closure(result, state)
+    return result
+
+
+def add_to_epsilon_closure(state_set, state):
+    """
+    Recursively add to |state_set| states reachable from the given state
+    by epsilon moves.
+    """
+    if not state_set.get(state, 0):
+        state_set[state] = 1
+        state_set_2 = state.transitions.get_epsilon()
+        if state_set_2:
+            for state2 in state_set_2:
+                add_to_epsilon_closure(state_set, state2)
+
+
+class StateMap(object):
+    """
+    Helper class used by nfa_to_dfa() to map back and forth between
+    sets of states from the old machine and states of the new machine.
+    """
+    new_machine = None      # Machine
+    old_to_new_dict = None  # {(old_state,...) : new_state}
+    new_to_old_dict = None  # {id(new_state) : old_state_set}
+
+    def __init__(self, new_machine):
+        self.new_machine = new_machine
+        self.old_to_new_dict = {}
+        self.new_to_old_dict = {}
+
+    def old_to_new(self, old_state_set):
+        """
+        Return the state of the new machine corresponding to the
+        set of old machine states represented by |state_set|. A new
+        state will be created if necessary. If any of the old states
+        are accepting states, the new state will be an accepting state
+        with the highest priority action from the old states.
+        """
+        key = self.make_key(old_state_set)
+        new_state = self.old_to_new_dict.get(key, None)
+        if not new_state:
+            action = self.highest_priority_action(old_state_set)
+            new_state = self.new_machine.new_state(action)
+            self.old_to_new_dict[key] = new_state
+            self.new_to_old_dict[id(new_state)] = old_state_set
+            #for old_state in old_state_set.keys():
+            #new_state.merge_actions(old_state)
+        return new_state
+
+    def highest_priority_action(self, state_set):
+        best_action = None
+        best_priority = LOWEST_PRIORITY
+        for state in state_set:
+            priority = state.action_priority
+            if priority > best_priority:
+                best_action = state.action
+                best_priority = priority
+        return best_action
+
+    #    def old_to_new_set(self, old_state_set):
+    #        """
+    #        Return the new state corresponding to a set of old states as
+    #        a singleton set.
+    #        """
+    #        return {self.old_to_new(old_state_set):1}
+
+    def new_to_old(self, new_state):
+        """Given a new state, return a set of corresponding old states."""
+        return self.new_to_old_dict[id(new_state)]
+
+    def make_key(self, state_set):
+        """
+        Convert a set of states into a uniquified
+        sorted tuple suitable for use as a dictionary key.
+        """
+        lst = list(state_set)
+        lst.sort()
+        return tuple(lst)
+
+    def dump(self, file):
+        from .Transitions import state_set_str
+
+        for new_state in self.new_machine.states:
+            old_state_set = self.new_to_old_dict[id(new_state)]
+            file.write("   State %s <-- %s\n" % (
+                new_state['number'], state_set_str(old_state_set)))
+
+
diff --git a/contrib/tools/cython/Cython/Plex/Errors.py b/contrib/tools/cython/Cython/Plex/Errors.py
new file mode 100644
index 0000000000..f460100d77
--- /dev/null
+++ b/contrib/tools/cython/Cython/Plex/Errors.py
@@ -0,0 +1,54 @@
+#=======================================================================
+#
+#   Python Lexical Analyser
+#
+#   Exception classes
+#
+#=======================================================================
+
+
+class PlexError(Exception):
+    message = ""
+
+
+class PlexTypeError(PlexError, TypeError):
+    pass
+
+
+class PlexValueError(PlexError, ValueError):
+    pass
+
+
+class InvalidRegex(PlexError):
+    pass
+
+
+class InvalidToken(PlexError):
+    def __init__(self, token_number, message):
+        PlexError.__init__(self, "Token number %d: %s" % (token_number, message))
+
+
+class InvalidScanner(PlexError):
+    pass
+
+
+class AmbiguousAction(PlexError):
+    message = "Two tokens with different actions can match the same string"
+
+    def __init__(self):
+        pass
+
+
+class UnrecognizedInput(PlexError):
+    scanner = None
+    position = None
+    state_name = None
+
+    def __init__(self, scanner, state_name):
+        self.scanner = scanner
+        self.position = scanner.get_position()
+        self.state_name = state_name
+
+    def __str__(self):
+        return ("'%s', line %d, char %d: Token not recognised in state %r" % (
+            self.position + (self.state_name,)))
diff --git a/contrib/tools/cython/Cython/Plex/Lexicons.py b/contrib/tools/cython/Cython/Plex/Lexicons.py
new file mode 100644
index 0000000000..787f5854b8
--- /dev/null
+++ b/contrib/tools/cython/Cython/Plex/Lexicons.py
@@ -0,0 +1,200 @@
+#=======================================================================
+#
+#   Python Lexical Analyser
+#
+#   Lexical Analyser Specification
+#
+#=======================================================================
+
+from __future__ import absolute_import
+
+import types
+
+from . import Actions
+from . import DFA
+from . import Errors
+from . import Machines
+from . import Regexps
+
+# debug_flags for Lexicon constructor
+DUMP_NFA = 1
+DUMP_DFA = 2
+
+
+class State(object):
+    """
+    This class is used as part of a Plex.Lexicon specification to
+    introduce a user-defined state.
+
+    Constructor:
+
+       State(name, token_specifications)
+    """
+
+    name = None
+    tokens = None
+
+    def __init__(self, name, tokens):
+        self.name = name
+        self.tokens = tokens
+
+
+class Lexicon(object):
+    """
+    Lexicon(specification) builds a lexical analyser from the given
+    |specification|. The specification consists of a list of
+    specification items. Each specification item may be either:
+
+       1) A token definition, which is a tuple:
+
+             (pattern, action)
+
+          The |pattern| is a regular axpression built using the
+          constructors defined in the Plex module.
+
+          The |action| is the action to be performed when this pattern
+          is recognised (see below).
+
+       2) A state definition:
+
+             State(name, tokens)
+
+          where |name| is a character string naming the state,
+          and |tokens| is a list of token definitions as
+          above. The meaning and usage of states is described
+          below.
+
+    Actions
+    -------
+
+    The |action| in a token specication may be one of three things:
+
+       1) A function, which is called as follows:
+
+             function(scanner, text)
+
+          where |scanner| is the relevant Scanner instance, and |text|
+          is the matched text. If the function returns anything
+          other than None, that value is returned as the value of the
+          token. If it returns None, scanning continues as if the IGNORE
+          action were specified (see below).
+
+        2) One of the following special actions:
+
+           IGNORE means that the recognised characters will be treated as
+                  white space and ignored. Scanning will continue until
+                  the next non-ignored token is recognised before returning.
+
+           TEXT   causes the scanned text itself to be returned as the
+                  value of the token.
+
+        3) Any other value, which is returned as the value of the token.
+
+    States
+    ------
+
+    At any given time, the scanner is in one of a number of states.
+    Associated with each state is a set of possible tokens. When scanning,
+    only tokens associated with the current state are recognised.
+
+    There is a default state, whose name is the empty string. Token
+    definitions which are not inside any State definition belong to
+    the default state.
+
+    The initial state of the scanner is the default state. The state can
+    be changed in one of two ways:
+
+       1) Using Begin(state_name) as the action of a token.
+
+       2) Calling the begin(state_name) method of the Scanner.
+
+    To change back to the default state, use '' as the state name.
+    """
+
+    machine = None  # Machine
+    tables = None   # StateTableMachine
+
+    def __init__(self, specifications, debug=None, debug_flags=7, timings=None):
+        if not isinstance(specifications, list):
+            raise Errors.InvalidScanner("Scanner definition is not a list")
+        if timings:
+            from .Timing import time
+
+            total_time = 0.0
+            time1 = time()
+        nfa = Machines.Machine()
+        default_initial_state = nfa.new_initial_state('')
+        token_number = 1
+        for spec in specifications:
+            if isinstance(spec, State):
+                user_initial_state = nfa.new_initial_state(spec.name)
+                for token in spec.tokens:
+                    self.add_token_to_machine(
+                        nfa, user_initial_state, token, token_number)
+                    token_number += 1
+            elif isinstance(spec, tuple):
+                self.add_token_to_machine(
+                    nfa, default_initial_state, spec, token_number)
+                token_number += 1
+            else:
+                raise Errors.InvalidToken(
+                    token_number,
+                    "Expected a token definition (tuple) or State instance")
+        if timings:
+            time2 = time()
+            total_time = total_time + (time2 - time1)
+            time3 = time()
+        if debug and (debug_flags & 1):
+            debug.write("\n============= NFA ===========\n")
+            nfa.dump(debug)
+        dfa = DFA.nfa_to_dfa(nfa, debug=(debug_flags & 3) == 3 and debug)
+        if timings:
+            time4 = time()
+            total_time = total_time + (time4 - time3)
+        if debug and (debug_flags & 2):
+            debug.write("\n============= DFA ===========\n")
+            dfa.dump(debug)
+        if timings:
+            timings.write("Constructing NFA : %5.2f\n" % (time2 - time1))
+            timings.write("Converting to DFA: %5.2f\n" % (time4 - time3))
+            timings.write("TOTAL            : %5.2f\n" % total_time)
+        self.machine = dfa
+
+    def add_token_to_machine(self, machine, initial_state, token_spec, token_number):
+        try:
+            (re, action_spec) = self.parse_token_definition(token_spec)
+            # Disabled this -- matching empty strings can be useful
+            #if re.nullable:
+            #  raise Errors.InvalidToken(
+            #    token_number, "Pattern can match 0 input symbols")
+            if isinstance(action_spec, Actions.Action):
+                action = action_spec
+            else:
+                try:
+                    action_spec.__call__
+                except AttributeError:
+                    action = Actions.Return(action_spec)
+                else:
+                    action = Actions.Call(action_spec)
+            final_state = machine.new_state()
+            re.build_machine(machine, initial_state, final_state,
+                             match_bol=1, nocase=0)
+            final_state.set_action(action, priority=-token_number)
+        except Errors.PlexError as e:
+            raise e.__class__("Token number %d: %s" % (token_number, e))
+
+    def parse_token_definition(self, token_spec):
+        if not isinstance(token_spec, tuple):
+            raise Errors.InvalidToken("Token definition is not a tuple")
+        if len(token_spec) != 2:
+            raise Errors.InvalidToken("Wrong number of items in token definition")
+        pattern, action = token_spec
+        if not isinstance(pattern, Regexps.RE):
+            raise Errors.InvalidToken("Pattern is not an RE instance")
+        return (pattern, action)
+
+    def get_initial_state(self, name):
+        return self.machine.get_initial_state(name)
+
+
+
diff --git a/contrib/tools/cython/Cython/Plex/Machines.py b/contrib/tools/cython/Cython/Plex/Machines.py
new file mode 100644
index 0000000000..398850976b
--- /dev/null
+++ b/contrib/tools/cython/Cython/Plex/Machines.py
@@ -0,0 +1,255 @@
+#=======================================================================
+#
+#   Python Lexical Analyser
+#
+#   Classes for building NFAs and DFAs
+#
+#=======================================================================
+
+from __future__ import absolute_import
+
+import sys
+
+from .Transitions import TransitionMap
+
+try:
+    from sys import maxsize as maxint
+except ImportError:
+    from sys import maxint
+
+try:
+    unichr
+except NameError:
+    unichr = chr
+
+LOWEST_PRIORITY = -maxint
+
+
+class Machine(object):
+    """A collection of Nodes representing an NFA or DFA."""
+    states = None          # [Node]
+    next_state_number = 1
+    initial_states = None  # {(name, bol): Node}
+
+    def __init__(self):
+        self.states = []
+        self.initial_states = {}
+
+    def __del__(self):
+        #print "Destroying", self ###
+        for state in self.states:
+            state.destroy()
+
+    def new_state(self):
+        """Add a new state to the machine and return it."""
+        s = Node()
+        n = self.next_state_number
+        self.next_state_number = n + 1
+        s.number = n
+        self.states.append(s)
+        return s
+
+    def new_initial_state(self, name):
+        state = self.new_state()
+        self.make_initial_state(name, state)
+        return state
+
+    def make_initial_state(self, name, state):
+        self.initial_states[name] = state
+
+    def get_initial_state(self, name):
+        return self.initial_states[name]
+
+    def dump(self, file):
+        file.write("Plex.Machine:\n")
+        if self.initial_states is not None:
+            file.write("   Initial states:\n")
+            for (name, state) in sorted(self.initial_states.items()):
+                file.write("      '%s': %d\n" % (name, state.number))
+        for s in self.states:
+            s.dump(file)
+
+
+class Node(object):
+    """A state of an NFA or DFA."""
+    transitions = None      # TransitionMap
+    action = None           # Action
+    action_priority = None  # integer
+    number = 0              # for debug output
+    epsilon_closure = None  # used by nfa_to_dfa()
+
+    def __init__(self):
+        # Preinitialise the list of empty transitions, because
+        # the nfa-to-dfa algorithm needs it
+        #self.transitions = {'':[]}
+        self.transitions = TransitionMap()
+        self.action_priority = LOWEST_PRIORITY
+
+    def destroy(self):
+        #print "Destroying", self ###
+        self.transitions = None
+        self.action = None
+        self.epsilon_closure = None
+
+    def add_transition(self, event, new_state):
+        self.transitions.add(event, new_state)
+
+    def link_to(self, state):
+        """Add an epsilon-move from this state to another state."""
+        self.add_transition('', state)
+
+    def set_action(self, action, priority):
+        """Make this an accepting state with the given action. If
+        there is already an action, choose the action with highest
+        priority."""
+        if priority > self.action_priority:
+            self.action = action
+            self.action_priority = priority
+
+    def get_action(self):
+        return self.action
+
+    def get_action_priority(self):
+        return self.action_priority
+
+    def is_accepting(self):
+        return self.action is not None
+
+    def __str__(self):
+        return "State %d" % self.number
+
+    def dump(self, file):
+        # Header
+        file.write("   State %d:\n" % self.number)
+        # Transitions
+        #        self.dump_transitions(file)
+        self.transitions.dump(file)
+        # Action
+        action = self.action
+        priority = self.action_priority
+        if action is not None:
+            file.write("      %s [priority %d]\n" % (action, priority))
+
+    def __lt__(self, other):
+        return self.number < other.number
+
+
+class FastMachine(object):
+    """
+    FastMachine is a deterministic machine represented in a way that
+    allows fast scanning.
+    """
+    initial_states = None  # {state_name:state}
+    states = None          # [state]  where state = {event:state, 'else':state, 'action':Action}
+    next_number = 1        # for debugging
+
+    new_state_template = {
+        '': None, 'bol': None, 'eol': None, 'eof': None, 'else': None
+    }
+
+    def __init__(self):
+        self.initial_states = {}
+        self.states = []
+
+    def __del__(self):
+        for state in self.states:
+            state.clear()
+
+    def new_state(self, action=None):
+        number = self.next_number
+        self.next_number = number + 1
+        result = self.new_state_template.copy()
+        result['number'] = number
+        result['action'] = action
+        self.states.append(result)
+        return result
+
+    def make_initial_state(self, name, state):
+        self.initial_states[name] = state
+
+    def add_transitions(self, state, event, new_state, maxint=maxint):
+        if type(event) is tuple:
+            code0, code1 = event
+            if code0 == -maxint:
+                state['else'] = new_state
+            elif code1 != maxint:
+                while code0 < code1:
+                    state[unichr(code0)] = new_state
+                    code0 += 1
+        else:
+            state[event] = new_state
+
+    def get_initial_state(self, name):
+        return self.initial_states[name]
+
+    def dump(self, file):
+        file.write("Plex.FastMachine:\n")
+        file.write("   Initial states:\n")
+        for name, state in sorted(self.initial_states.items()):
+            file.write("      %s: %s\n" % (repr(name), state['number']))
+        for state in self.states:
+            self.dump_state(state, file)
+
+    def dump_state(self, state, file):
+        # Header
+        file.write("   State %d:\n" % state['number'])
+        # Transitions
+        self.dump_transitions(state, file)
+        # Action
+        action = state['action']
+        if action is not None:
+            file.write("      %s\n" % action)
+
+    def dump_transitions(self, state, file):
+        chars_leading_to_state = {}
+        special_to_state = {}
+        for (c, s) in state.items():
+            if len(c) == 1:
+                chars = chars_leading_to_state.get(id(s), None)
+                if chars is None:
+                    chars = []
+                    chars_leading_to_state[id(s)] = chars
+                chars.append(c)
+            elif len(c) <= 4:
+                special_to_state[c] = s
+        ranges_to_state = {}
+        for state in self.states:
+            char_list = chars_leading_to_state.get(id(state), None)
+            if char_list:
+                ranges = self.chars_to_ranges(char_list)
+                ranges_to_state[ranges] = state
+        ranges_list = ranges_to_state.keys()
+        ranges_list.sort()
+        for ranges in ranges_list:
+            key = self.ranges_to_string(ranges)
+            state = ranges_to_state[ranges]
+            file.write("      %s --> State %d\n" % (key, state['number']))
+        for key in ('bol', 'eol', 'eof', 'else'):
+            state = special_to_state.get(key, None)
+            if state:
+                file.write("      %s --> State %d\n" % (key, state['number']))
+
+    def chars_to_ranges(self, char_list):
+        char_list.sort()
+        i = 0
+        n = len(char_list)
+        result = []
+        while i < n:
+            c1 = ord(char_list[i])
+            c2 = c1
+            i += 1
+            while i < n and ord(char_list[i]) == c2 + 1:
+                i += 1
+                c2 += 1
+            result.append((chr(c1), chr(c2)))
+        return tuple(result)
+
+    def ranges_to_string(self, range_list):
+        return ','.join(map(self.range_to_string, range_list))
+
+    def range_to_string(self, range_tuple):
+        (c1, c2) = range_tuple
+        if c1 == c2:
+            return repr(c1)
+        else:
+            return "%s..%s" % (repr(c1), repr(c2))
diff --git a/contrib/tools/cython/Cython/Plex/Regexps.py b/contrib/tools/cython/Cython/Plex/Regexps.py
new file mode 100644
index 0000000000..41816c939a
--- /dev/null
+++ b/contrib/tools/cython/Cython/Plex/Regexps.py
@@ -0,0 +1,576 @@
+#=======================================================================
+#
+#     Python Lexical Analyser
+#
+#     Regular Expressions
+#
+#=======================================================================
+
+from __future__ import absolute_import
+
+import types
+try:
+    from sys import maxsize as maxint
+except ImportError:
+    from sys import maxint
+
+from . import Errors
+
+#
+#     Constants
+#
+
+BOL = 'bol'
+EOL = 'eol'
+EOF = 'eof'
+
+nl_code = ord('\n')
+
+
+#
+#     Helper functions
+#
+
+def chars_to_ranges(s):
+    """
+    Return a list of character codes consisting of pairs
+    [code1a, code1b, code2a, code2b,...] which cover all
+    the characters in |s|.
+    """
+    char_list = list(s)
+    char_list.sort()
+    i = 0
+    n = len(char_list)
+    result = []
+    while i < n:
+        code1 = ord(char_list[i])
+        code2 = code1 + 1
+        i += 1
+        while i < n and code2 >= ord(char_list[i]):
+            code2 += 1
+            i += 1
+        result.append(code1)
+        result.append(code2)
+    return result
+
+
+def uppercase_range(code1, code2):
+    """
+    If the range of characters from code1 to code2-1 includes any
+    lower case letters, return the corresponding upper case range.
+    """
+    code3 = max(code1, ord('a'))
+    code4 = min(code2, ord('z') + 1)
+    if code3 < code4:
+        d = ord('A') - ord('a')
+        return (code3 + d, code4 + d)
+    else:
+        return None
+
+
+def lowercase_range(code1, code2):
+    """
+    If the range of characters from code1 to code2-1 includes any
+    upper case letters, return the corresponding lower case range.
+    """
+    code3 = max(code1, ord('A'))
+    code4 = min(code2, ord('Z') + 1)
+    if code3 < code4:
+        d = ord('a') - ord('A')
+        return (code3 + d, code4 + d)
+    else:
+        return None
+
+
+def CodeRanges(code_list):
+    """
+    Given a list of codes as returned by chars_to_ranges, return
+    an RE which will match a character in any of the ranges.
+    """
+    re_list = [CodeRange(code_list[i], code_list[i + 1]) for i in range(0, len(code_list), 2)]
+    return Alt(*re_list)
+
+
+def CodeRange(code1, code2):
+    """
+    CodeRange(code1, code2) is an RE which matches any character
+    with a code |c| in the range |code1| <= |c| < |code2|.
+    """
+    if code1 <= nl_code < code2:
+        return Alt(RawCodeRange(code1, nl_code),
+                   RawNewline,
+                   RawCodeRange(nl_code + 1, code2))
+    else:
+        return RawCodeRange(code1, code2)
+
+
+#
+#     Abstract classes
+#
+
+class RE(object):
+    """RE is the base class for regular expression constructors.
+    The following operators are defined on REs:
+
+         re1 + re2         is an RE which matches |re1| followed by |re2|
+         re1 | re2         is an RE which matches either |re1| or |re2|
+    """
+
+    nullable = 1  # True if this RE can match 0 input symbols
+    match_nl = 1  # True if this RE can match a string ending with '\n'
+    str = None    # Set to a string to override the class's __str__ result
+
+    def build_machine(self, machine, initial_state, final_state,
+                      match_bol, nocase):
+        """
+        This method should add states to |machine| to implement this
+        RE, starting at |initial_state| and ending at |final_state|.
+        If |match_bol| is true, the RE must be able to match at the
+        beginning of a line. If nocase is true, upper and lower case
+        letters should be treated as equivalent.
+        """
+        raise NotImplementedError("%s.build_machine not implemented" %
+                                  self.__class__.__name__)
+
+    def build_opt(self, m, initial_state, c):
+        """
+        Given a state |s| of machine |m|, return a new state
+        reachable from |s| on character |c| or epsilon.
+        """
+        s = m.new_state()
+        initial_state.link_to(s)
+        initial_state.add_transition(c, s)
+        return s
+
+    def __add__(self, other):
+        return Seq(self, other)
+
+    def __or__(self, other):
+        return Alt(self, other)
+
+    def __str__(self):
+        if self.str:
+            return self.str
+        else:
+            return self.calc_str()
+
+    def check_re(self, num, value):
+        if not isinstance(value, RE):
+            self.wrong_type(num, value, "Plex.RE instance")
+
+    def check_string(self, num, value):
+        if type(value) != type(''):
+            self.wrong_type(num, value, "string")
+
+    def check_char(self, num, value):
+        self.check_string(num, value)
+        if len(value) != 1:
+            raise Errors.PlexValueError("Invalid value for argument %d of Plex.%s."
+                                        "Expected a string of length 1, got: %s" % (
+                                            num, self.__class__.__name__, repr(value)))
+
+    def wrong_type(self, num, value, expected):
+        if type(value) == types.InstanceType:
+            got = "%s.%s instance" % (
+                value.__class__.__module__, value.__class__.__name__)
+        else:
+            got = type(value).__name__
+        raise Errors.PlexTypeError("Invalid type for argument %d of Plex.%s "
+                                   "(expected %s, got %s" % (
+                                       num, self.__class__.__name__, expected, got))
+
+#
+#     Primitive RE constructors
+#     -------------------------
+#
+#     These are the basic REs from which all others are built.
+#
+
+## class Char(RE):
+##     """
+##     Char(c) is an RE which matches the character |c|.
+##     """
+
+##     nullable = 0
+
+##     def __init__(self, char):
+##         self.char = char
+##         self.match_nl = char == '\n'
+
+##     def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+##         c = self.char
+##         if match_bol and c != BOL:
+##             s1 = self.build_opt(m, initial_state, BOL)
+##         else:
+##             s1 = initial_state
+##         if c == '\n' or c == EOF:
+##             s1 = self.build_opt(m, s1, EOL)
+##         if len(c) == 1:
+##             code = ord(self.char)
+##             s1.add_transition((code, code+1), final_state)
+##             if nocase and is_letter_code(code):
+##                 code2 = other_case_code(code)
+##                 s1.add_transition((code2, code2+1), final_state)
+##         else:
+##             s1.add_transition(c, final_state)
+
+##     def calc_str(self):
+##         return "Char(%s)" % repr(self.char)
+
+
+def Char(c):
+    """
+    Char(c) is an RE which matches the character |c|.
+    """
+    if len(c) == 1:
+        result = CodeRange(ord(c), ord(c) + 1)
+    else:
+        result = SpecialSymbol(c)
+    result.str = "Char(%s)" % repr(c)
+    return result
+
+
+class RawCodeRange(RE):
+    """
+    RawCodeRange(code1, code2) is a low-level RE which matches any character
+    with a code |c| in the range |code1| <= |c| < |code2|, where the range
+    does not include newline. For internal use only.
+    """
+    nullable = 0
+    match_nl = 0
+    range = None            # (code, code)
+    uppercase_range = None  # (code, code) or None
+    lowercase_range = None  # (code, code) or None
+
+    def __init__(self, code1, code2):
+        self.range = (code1, code2)
+        self.uppercase_range = uppercase_range(code1, code2)
+        self.lowercase_range = lowercase_range(code1, code2)
+
+    def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+        if match_bol:
+            initial_state = self.build_opt(m, initial_state, BOL)
+        initial_state.add_transition(self.range, final_state)
+        if nocase:
+            if self.uppercase_range:
+                initial_state.add_transition(self.uppercase_range, final_state)
+            if self.lowercase_range:
+                initial_state.add_transition(self.lowercase_range, final_state)
+
+    def calc_str(self):
+        return "CodeRange(%d,%d)" % (self.code1, self.code2)
+
+
+class _RawNewline(RE):
+    """
+    RawNewline is a low-level RE which matches a newline character.
+    For internal use only.
+    """
+    nullable = 0
+    match_nl = 1
+
+    def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+        if match_bol:
+            initial_state = self.build_opt(m, initial_state, BOL)
+        s = self.build_opt(m, initial_state, EOL)
+        s.add_transition((nl_code, nl_code + 1), final_state)
+
+
+RawNewline = _RawNewline()
+
+
+class SpecialSymbol(RE):
+    """
+    SpecialSymbol(sym) is an RE which matches the special input
+    symbol |sym|, which is one of BOL, EOL or EOF.
+    """
+    nullable = 0
+    match_nl = 0
+    sym = None
+
+    def __init__(self, sym):
+        self.sym = sym
+
+    def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+        # Sequences 'bol bol' and 'bol eof' are impossible, so only need
+        # to allow for bol if sym is eol
+        if match_bol and self.sym == EOL:
+            initial_state = self.build_opt(m, initial_state, BOL)
+        initial_state.add_transition(self.sym, final_state)
+
+
+class Seq(RE):
+    """Seq(re1, re2, re3...) is an RE which matches |re1| followed by
+    |re2| followed by |re3|..."""
+
+    def __init__(self, *re_list):
+        nullable = 1
+        for i, re in enumerate(re_list):
+            self.check_re(i, re)
+            nullable = nullable and re.nullable
+        self.re_list = re_list
+        self.nullable = nullable
+        i = len(re_list)
+        match_nl = 0
+        while i:
+            i -= 1
+            re = re_list[i]
+            if re.match_nl:
+                match_nl = 1
+                break
+            if not re.nullable:
+                break
+        self.match_nl = match_nl
+
+    def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+        re_list = self.re_list
+        if len(re_list) == 0:
+            initial_state.link_to(final_state)
+        else:
+            s1 = initial_state
+            n = len(re_list)
+            for i, re in enumerate(re_list):
+                if i < n - 1:
+                    s2 = m.new_state()
+                else:
+                    s2 = final_state
+                re.build_machine(m, s1, s2, match_bol, nocase)
+                s1 = s2
+                match_bol = re.match_nl or (match_bol and re.nullable)
+
+    def calc_str(self):
+        return "Seq(%s)" % ','.join(map(str, self.re_list))
+
+
+class Alt(RE):
+    """Alt(re1, re2, re3...) is an RE which matches either |re1| or
+    |re2| or |re3|..."""
+
+    def __init__(self, *re_list):
+        self.re_list = re_list
+        nullable = 0
+        match_nl = 0
+        nullable_res = []
+        non_nullable_res = []
+        i = 1
+        for re in re_list:
+            self.check_re(i, re)
+            if re.nullable:
+                nullable_res.append(re)
+                nullable = 1
+            else:
+                non_nullable_res.append(re)
+            if re.match_nl:
+                match_nl = 1
+            i += 1
+        self.nullable_res = nullable_res
+        self.non_nullable_res = non_nullable_res
+        self.nullable = nullable
+        self.match_nl = match_nl
+
+    def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+        for re in self.nullable_res:
+            re.build_machine(m, initial_state, final_state, match_bol, nocase)
+        if self.non_nullable_res:
+            if match_bol:
+                initial_state = self.build_opt(m, initial_state, BOL)
+            for re in self.non_nullable_res:
+                re.build_machine(m, initial_state, final_state, 0, nocase)
+
+    def calc_str(self):
+        return "Alt(%s)" % ','.join(map(str, self.re_list))
+
+
+class Rep1(RE):
+    """Rep1(re) is an RE which matches one or more repetitions of |re|."""
+
+    def __init__(self, re):
+        self.check_re(1, re)
+        self.re = re
+        self.nullable = re.nullable
+        self.match_nl = re.match_nl
+
+    def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+        s1 = m.new_state()
+        s2 = m.new_state()
+        initial_state.link_to(s1)
+        self.re.build_machine(m, s1, s2, match_bol or self.re.match_nl, nocase)
+        s2.link_to(s1)
+        s2.link_to(final_state)
+
+    def calc_str(self):
+        return "Rep1(%s)" % self.re
+
+
+class SwitchCase(RE):
+    """
+    SwitchCase(re, nocase) is an RE which matches the same strings as RE,
+    but treating upper and lower case letters according to |nocase|. If
+    |nocase| is true, case is ignored, otherwise it is not.
+    """
+    re = None
+    nocase = None
+
+    def __init__(self, re, nocase):
+        self.re = re
+        self.nocase = nocase
+        self.nullable = re.nullable
+        self.match_nl = re.match_nl
+
+    def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+        self.re.build_machine(m, initial_state, final_state, match_bol,
+                              self.nocase)
+
+    def calc_str(self):
+        if self.nocase:
+            name = "NoCase"
+        else:
+            name = "Case"
+        return "%s(%s)" % (name, self.re)
+
+#
+#     Composite RE constructors
+#     -------------------------
+#
+#     These REs are defined in terms of the primitive REs.
+#
+
+Empty = Seq()
+Empty.__doc__ = \
+    """
+    Empty is an RE which matches the empty string.
+    """
+Empty.str = "Empty"
+
+
+def Str1(s):
+    """
+    Str1(s) is an RE which matches the literal string |s|.
+    """
+    result = Seq(*tuple(map(Char, s)))
+    result.str = "Str(%s)" % repr(s)
+    return result
+
+
+def Str(*strs):
+    """
+    Str(s) is an RE which matches the literal string |s|.
+    Str(s1, s2, s3, ...) is an RE which matches any of |s1| or |s2| or |s3|...
+    """
+    if len(strs) == 1:
+        return Str1(strs[0])
+    else:
+        result = Alt(*tuple(map(Str1, strs)))
+        result.str = "Str(%s)" % ','.join(map(repr, strs))
+        return result
+
+
+def Any(s):
+    """
+    Any(s) is an RE which matches any character in the string |s|.
+    """
+    #result = apply(Alt, tuple(map(Char, s)))
+    result = CodeRanges(chars_to_ranges(s))
+    result.str = "Any(%s)" % repr(s)
+    return result
+
+
+def AnyBut(s):
+    """
+    AnyBut(s) is an RE which matches any character (including
+    newline) which is not in the string |s|.
+    """
+    ranges = chars_to_ranges(s)
+    ranges.insert(0, -maxint)
+    ranges.append(maxint)
+    result = CodeRanges(ranges)
+    result.str = "AnyBut(%s)" % repr(s)
+    return result
+
+
+AnyChar = AnyBut("")
+AnyChar.__doc__ = \
+    """
+    AnyChar is an RE which matches any single character (including a newline).
+    """
+AnyChar.str = "AnyChar"
+
+
+def Range(s1, s2=None):
+    """
+    Range(c1, c2) is an RE which matches any single character in the range
+    |c1| to |c2| inclusive.
+    Range(s) where |s| is a string of even length is an RE which matches
+    any single character in the ranges |s[0]| to |s[1]|, |s[2]| to |s[3]|,...
+    """
+    if s2:
+        result = CodeRange(ord(s1), ord(s2) + 1)
+        result.str = "Range(%s,%s)" % (s1, s2)
+    else:
+        ranges = []
+        for i in range(0, len(s1), 2):
+            ranges.append(CodeRange(ord(s1[i]), ord(s1[i + 1]) + 1))
+        result = Alt(*ranges)
+        result.str = "Range(%s)" % repr(s1)
+    return result
+
+
+def Opt(re):
+    """
+    Opt(re) is an RE which matches either |re| or the empty string.
+    """
+    result = Alt(re, Empty)
+    result.str = "Opt(%s)" % re
+    return result
+
+
+def Rep(re):
+    """
+    Rep(re) is an RE which matches zero or more repetitions of |re|.
+    """
+    result = Opt(Rep1(re))
+    result.str = "Rep(%s)" % re
+    return result
+
+
+def NoCase(re):
+    """
+    NoCase(re) is an RE which matches the same strings as RE, but treating
+    upper and lower case letters as equivalent.
+    """
+    return SwitchCase(re, nocase=1)
+
+
+def Case(re):
+    """
+    Case(re) is an RE which matches the same strings as RE, but treating
+    upper and lower case letters as distinct, i.e. it cancels the effect
+    of any enclosing NoCase().
+    """
+    return SwitchCase(re, nocase=0)
+
+#
+#     RE Constants
+#
+
+Bol = Char(BOL)
+Bol.__doc__ = \
+    """
+    Bol is an RE which matches the beginning of a line.
+    """
+Bol.str = "Bol"
+
+Eol = Char(EOL)
+Eol.__doc__ = \
+    """
+    Eol is an RE which matches the end of a line.
+    """
+Eol.str = "Eol"
+
+Eof = Char(EOF)
+Eof.__doc__ = \
+    """
+    Eof is an RE which matches the end of the file.
+    """
+Eof.str = "Eof"
+
diff --git a/contrib/tools/cython/Cython/Plex/Scanners.pxd b/contrib/tools/cython/Cython/Plex/Scanners.pxd
new file mode 100644
index 0000000000..6e75f55e61
--- /dev/null
+++ b/contrib/tools/cython/Cython/Plex/Scanners.pxd
@@ -0,0 +1,50 @@
+from __future__ import absolute_import
+
+import cython
+
+from Cython.Plex.Actions cimport Action
+
+cdef class Scanner:
+
+    cdef public lexicon
+    cdef public stream
+    cdef public name
+    cdef public unicode buffer
+    cdef public Py_ssize_t buf_start_pos
+    cdef public Py_ssize_t next_pos
+    cdef public Py_ssize_t cur_pos
+    cdef public Py_ssize_t cur_line
+    cdef public Py_ssize_t cur_line_start
+    cdef public Py_ssize_t start_pos
+    cdef public Py_ssize_t start_line
+    cdef public Py_ssize_t start_col
+    cdef public text
+    cdef public initial_state # int?
+    cdef public state_name
+    cdef public list queue
+    cdef public bint trace
+    cdef public cur_char
+    cdef public long input_state
+
+    cdef public level
+
+    @cython.final
+    @cython.locals(input_state=long)
+    cdef next_char(self)
+    @cython.locals(action=Action)
+    cpdef tuple read(self)
+    @cython.final
+    cdef tuple scan_a_token(self)
+    ##cdef tuple position(self)  # used frequently by Parsing.py
+
+    @cython.final
+    @cython.locals(cur_pos=Py_ssize_t, cur_line=Py_ssize_t, cur_line_start=Py_ssize_t,
+                   input_state=long, next_pos=Py_ssize_t, state=dict,
+                   buf_start_pos=Py_ssize_t, buf_len=Py_ssize_t, buf_index=Py_ssize_t,
+                   trace=bint, discard=Py_ssize_t, data=unicode, buffer=unicode)
+    cdef run_machine_inlined(self)
+
+    @cython.final
+    cdef begin(self, state)
+    @cython.final
+    cdef produce(self, value, text = *)
diff --git a/contrib/tools/cython/Cython/Plex/Scanners.py b/contrib/tools/cython/Cython/Plex/Scanners.py
new file mode 100644
index 0000000000..88f7e2da3b
--- /dev/null
+++ b/contrib/tools/cython/Cython/Plex/Scanners.py
@@ -0,0 +1,338 @@
+# cython: auto_pickle=False
+#=======================================================================
+#
+#   Python Lexical Analyser
+#
+#
+#   Scanning an input stream
+#
+#=======================================================================
+
+from __future__ import absolute_import
+
+import cython
+
+cython.declare(BOL=object, EOL=object, EOF=object, NOT_FOUND=object)
+
+from . import Errors
+from .Regexps import BOL, EOL, EOF
+
+NOT_FOUND = object()
+
+
+class Scanner(object):
+    """
+    A Scanner is used to read tokens from a stream of characters
+    using the token set specified by a Plex.Lexicon.
+
+    Constructor:
+
+      Scanner(lexicon, stream, name = '')
+
+        See the docstring of the __init__ method for details.
+
+    Methods:
+
+      See the docstrings of the individual methods for more
+      information.
+
+      read() --> (value, text)
+        Reads the next lexical token from the stream.
+
+      position() --> (name, line, col)
+        Returns the position of the last token read using the
+        read() method.
+
+      begin(state_name)
+        Causes scanner to change state.
+
+      produce(value [, text])
+        Causes return of a token value to the caller of the
+        Scanner.
+
+    """
+
+    #  lexicon = None        # Lexicon
+    #  stream = None         # file-like object
+    #  name = ''
+    #  buffer = ''
+    #  buf_start_pos = 0     # position in input of start of buffer
+    #  next_pos = 0          # position in input of next char to read
+    #  cur_pos = 0           # position in input of current char
+    #  cur_line = 1          # line number of current char
+    #  cur_line_start = 0    # position in input of start of current line
+    #  start_pos = 0         # position in input of start of token
+    #  start_line = 0        # line number of start of token
+    #  start_col = 0         # position in line of start of token
+    #  text = None           # text of last token read
+    #  initial_state = None  # Node
+    #  state_name = ''       # Name of initial state
+    #  queue = None          # list of tokens to be returned
+    #  trace = 0
+
+    def __init__(self, lexicon, stream, name='', initial_pos=None):
+        """
+        Scanner(lexicon, stream, name = '')
+
+          |lexicon| is a Plex.Lexicon instance specifying the lexical tokens
+          to be recognised.
+
+          |stream| can be a file object or anything which implements a
+          compatible read() method.
+
+          |name| is optional, and may be the name of the file being
+          scanned or any other identifying string.
+        """
+        self.trace = 0
+
+        self.buffer = u''
+        self.buf_start_pos = 0
+        self.next_pos = 0
+        self.cur_pos = 0
+        self.cur_line = 1
+        self.start_pos = 0
+        self.start_line = 0
+        self.start_col = 0
+        self.text = None
+        self.state_name = None
+
+        self.lexicon = lexicon
+        self.stream = stream
+        self.name = name
+        self.queue = []
+        self.initial_state = None
+        self.begin('')
+        self.next_pos = 0
+        self.cur_pos = 0
+        self.cur_line_start = 0
+        self.cur_char = BOL
+        self.input_state = 1
+        if initial_pos is not None:
+            self.cur_line, self.cur_line_start = initial_pos[1], -initial_pos[2]
+
+    def read(self):
+        """
+        Read the next lexical token from the stream and return a
+        tuple (value, text), where |value| is the value associated with
+        the token as specified by the Lexicon, and |text| is the actual
+        string read from the stream. Returns (None, '') on end of file.
+        """
+        queue = self.queue
+        while not queue:
+            self.text, action = self.scan_a_token()
+            if action is None:
+                self.produce(None)
+                self.eof()
+            else:
+                value = action.perform(self, self.text)
+                if value is not None:
+                    self.produce(value)
+        result = queue[0]
+        del queue[0]
+        return result
+
+    def scan_a_token(self):
+        """
+        Read the next input sequence recognised by the machine
+        and return (text, action). Returns ('', None) on end of
+        file.
+        """
+        self.start_pos = self.cur_pos
+        self.start_line = self.cur_line
+        self.start_col = self.cur_pos - self.cur_line_start
+        action = self.run_machine_inlined()
+        if action is not None:
+            if self.trace:
+                print("Scanner: read: Performing %s %d:%d" % (
+                    action, self.start_pos, self.cur_pos))
+            text = self.buffer[
+                self.start_pos - self.buf_start_pos:
+                self.cur_pos - self.buf_start_pos]
+            return (text, action)
+        else:
+            if self.cur_pos == self.start_pos:
+                if self.cur_char is EOL:
+                    self.next_char()
+                if self.cur_char is None or self.cur_char is EOF:
+                    return (u'', None)
+            raise Errors.UnrecognizedInput(self, self.state_name)
+
+    def run_machine_inlined(self):
+        """
+        Inlined version of run_machine for speed.
+        """
+        state = self.initial_state
+        cur_pos = self.cur_pos
+        cur_line = self.cur_line
+        cur_line_start = self.cur_line_start
+        cur_char = self.cur_char
+        input_state = self.input_state
+        next_pos = self.next_pos
+        buffer = self.buffer
+        buf_start_pos = self.buf_start_pos
+        buf_len = len(buffer)
+        b_action, b_cur_pos, b_cur_line, b_cur_line_start, b_cur_char, b_input_state, b_next_pos = \
+            None, 0, 0, 0, u'', 0, 0
+        trace = self.trace
+        while 1:
+            if trace:  #TRACE#
+                print("State %d, %d/%d:%s -->" % (  #TRACE#
+                    state['number'], input_state, cur_pos, repr(cur_char)))  #TRACE#
+            # Begin inlined self.save_for_backup()
+            #action = state.action #@slow
+            action = state['action']  #@fast
+            if action is not None:
+                b_action, b_cur_pos, b_cur_line, b_cur_line_start, b_cur_char, b_input_state, b_next_pos = \
+                    action, cur_pos, cur_line, cur_line_start, cur_char, input_state, next_pos
+            # End inlined self.save_for_backup()
+            c = cur_char
+            #new_state = state.new_state(c) #@slow
+            new_state = state.get(c, NOT_FOUND)  #@fast
+            if new_state is NOT_FOUND:  #@fast
+                new_state = c and state.get('else')  #@fast
+            if new_state:
+                if trace:  #TRACE#
+                    print("State %d" % new_state['number'])  #TRACE#
+                state = new_state
+                # Begin inlined: self.next_char()
+                if input_state == 1:
+                    cur_pos = next_pos
+                    # Begin inlined: c = self.read_char()
+                    buf_index = next_pos - buf_start_pos
+                    if buf_index < buf_len:
+                        c = buffer[buf_index]
+                        next_pos += 1
+                    else:
+                        discard = self.start_pos - buf_start_pos
+                        data = self.stream.read(0x1000)
+                        buffer = self.buffer[discard:] + data
+                        self.buffer = buffer
+                        buf_start_pos += discard
+                        self.buf_start_pos = buf_start_pos
+                        buf_len = len(buffer)
+                        buf_index -= discard
+                        if data:
+                            c = buffer[buf_index]
+                            next_pos += 1
+                        else:
+                            c = u''
+                    # End inlined: c = self.read_char()
+                    if c == u'\n':
+                        cur_char = EOL
+                        input_state = 2
+                    elif not c:
+                        cur_char = EOL
+                        input_state = 4
+                    else:
+                        cur_char = c
+                elif input_state == 2:
+                    cur_char = u'\n'
+                    input_state = 3
+                elif input_state == 3:
+                    cur_line += 1
+                    cur_line_start = cur_pos = next_pos
+                    cur_char = BOL
+                    input_state = 1
+                elif input_state == 4:
+                    cur_char = EOF
+                    input_state = 5
+                else:  # input_state = 5
+                    cur_char = u''
+                    # End inlined self.next_char()
+            else:  # not new_state
+                if trace:  #TRACE#
+                    print("blocked")  #TRACE#
+                # Begin inlined: action = self.back_up()
+                if b_action is not None:
+                    (action, cur_pos, cur_line, cur_line_start,
+                     cur_char, input_state, next_pos) = \
+                        (b_action, b_cur_pos, b_cur_line, b_cur_line_start,
+                         b_cur_char, b_input_state, b_next_pos)
+                else:
+                    action = None
+                break  # while 1
+                # End inlined: action = self.back_up()
+        self.cur_pos = cur_pos
+        self.cur_line = cur_line
+        self.cur_line_start = cur_line_start
+        self.cur_char = cur_char
+        self.input_state = input_state
+        self.next_pos = next_pos
+        if trace:  #TRACE#
+            if action is not None:  #TRACE#
+                print("Doing %s" % action)  #TRACE#
+        return action
+
+    def next_char(self):
+        input_state = self.input_state
+        if self.trace:
+            print("Scanner: next: %s [%d] %d" % (" " * 20, input_state, self.cur_pos))
+        if input_state == 1:
+            self.cur_pos = self.next_pos
+            c = self.read_char()
+            if c == u'\n':
+                self.cur_char = EOL
+                self.input_state = 2
+            elif not c:
+                self.cur_char = EOL
+                self.input_state = 4
+            else:
+                self.cur_char = c
+        elif input_state == 2:
+            self.cur_char = u'\n'
+            self.input_state = 3
+        elif input_state == 3:
+            self.cur_line += 1
+            self.cur_line_start = self.cur_pos = self.next_pos
+            self.cur_char = BOL
+            self.input_state = 1
+        elif input_state == 4:
+            self.cur_char = EOF
+            self.input_state = 5
+        else:  # input_state = 5
+            self.cur_char = u''
+        if self.trace:
+            print("--> [%d] %d %r" % (input_state, self.cur_pos, self.cur_char))
+
+    def position(self):
+        """
+        Return a tuple (name, line, col) representing the location of
+        the last token read using the read() method. |name| is the
+        name that was provided to the Scanner constructor; |line|
+        is the line number in the stream (1-based); |col| is the
+        position within the line of the first character of the token
+        (0-based).
+        """
+        return (self.name, self.start_line, self.start_col)
+
+    def get_position(self):
+        """Python accessible wrapper around position(), only for error reporting.
+        """
+        return self.position()
+
+    def begin(self, state_name):
+        """Set the current state of the scanner to the named state."""
+        self.initial_state = (
+            self.lexicon.get_initial_state(state_name))
+        self.state_name = state_name
+
+    def produce(self, value, text=None):
+        """
+        Called from an action procedure, causes |value| to be returned
+        as the token value from read(). If |text| is supplied, it is
+        returned in place of the scanned text.
+
+        produce() can be called more than once during a single call to an action
+        procedure, in which case the tokens are queued up and returned one
+        at a time by subsequent calls to read(), until the queue is empty,
+        whereupon scanning resumes.
+        """
+        if text is None:
+            text = self.text
+        self.queue.append((value, text))
+
+    def eof(self):
+        """
+        Override this method if you want something to be done at
+        end of file.
+        """
diff --git a/contrib/tools/cython/Cython/Plex/Timing.py b/contrib/tools/cython/Cython/Plex/Timing.py
new file mode 100644
index 0000000000..5c3692693b
--- /dev/null
+++ b/contrib/tools/cython/Cython/Plex/Timing.py
@@ -0,0 +1,23 @@
+#
+#   Get time in platform-dependent way
+#
+
+from __future__ import absolute_import
+
+import os
+from sys import platform, exit, stderr
+
+if platform == 'mac':
+    import MacOS
+    def time():
+        return MacOS.GetTicks() / 60.0
+    timekind = "real"
+elif hasattr(os, 'times'):
+    def time():
+        t = os.times()
+        return t[0] + t[1]
+    timekind = "cpu"
+else:
+    stderr.write(
+        "Don't know how to get time on platform %s\n" % repr(platform))
+    exit(1)
diff --git a/contrib/tools/cython/Cython/Plex/Traditional.py b/contrib/tools/cython/Cython/Plex/Traditional.py
new file mode 100644
index 0000000000..ec7252daed
--- /dev/null
+++ b/contrib/tools/cython/Cython/Plex/Traditional.py
@@ -0,0 +1,158 @@
+#=======================================================================
+#
+#   Python Lexical Analyser
+#
+#   Traditional Regular Expression Syntax
+#
+#=======================================================================
+
+from __future__ import absolute_import
+
+from .Regexps import Alt, Seq, Rep, Rep1, Opt, Any, AnyBut, Bol, Eol, Char
+from .Errors import PlexError
+
+
+class RegexpSyntaxError(PlexError):
+    pass
+
+
+def re(s):
+    """
+    Convert traditional string representation of regular expression |s|
+    into Plex representation.
+    """
+    return REParser(s).parse_re()
+
+
+class REParser(object):
+    def __init__(self, s):
+        self.s = s
+        self.i = -1
+        self.end = 0
+        self.next()
+
+    def parse_re(self):
+        re = self.parse_alt()
+        if not self.end:
+            self.error("Unexpected %s" % repr(self.c))
+        return re
+
+    def parse_alt(self):
+        """Parse a set of alternative regexps."""
+        re = self.parse_seq()
+        if self.c == '|':
+            re_list = [re]
+            while self.c == '|':
+                self.next()
+                re_list.append(self.parse_seq())
+            re = Alt(*re_list)
+        return re
+
+    def parse_seq(self):
+        """Parse a sequence of regexps."""
+        re_list = []
+        while not self.end and not self.c in "|)":
+            re_list.append(self.parse_mod())
+        return Seq(*re_list)
+
+    def parse_mod(self):
+        """Parse a primitive regexp followed by *, +, ? modifiers."""
+        re = self.parse_prim()
+        while not self.end and self.c in "*+?":
+            if self.c == '*':
+                re = Rep(re)
+            elif self.c == '+':
+                re = Rep1(re)
+            else:  # self.c == '?'
+                re = Opt(re)
+            self.next()
+        return re
+
+    def parse_prim(self):
+        """Parse a primitive regexp."""
+        c = self.get()
+        if c == '.':
+            re = AnyBut("\n")
+        elif c == '^':
+            re = Bol
+        elif c == '$':
+            re = Eol
+        elif c == '(':
+            re = self.parse_alt()
+            self.expect(')')
+        elif c == '[':
+            re = self.parse_charset()
+            self.expect(']')
+        else:
+            if c == '\\':
+                c = self.get()
+            re = Char(c)
+        return re
+
+    def parse_charset(self):
+        """Parse a charset. Does not include the surrounding []."""
+        char_list = []
+        invert = 0
+        if self.c == '^':
+            invert = 1
+            self.next()
+        if self.c == ']':
+            char_list.append(']')
+            self.next()
+        while not self.end and self.c != ']':
+            c1 = self.get()
+            if self.c == '-' and self.lookahead(1) != ']':
+                self.next()
+                c2 = self.get()
+                for a in range(ord(c1), ord(c2) + 1):
+                    char_list.append(chr(a))
+            else:
+                char_list.append(c1)
+        chars = ''.join(char_list)
+        if invert:
+            return AnyBut(chars)
+        else:
+            return Any(chars)
+
+    def next(self):
+        """Advance to the next char."""
+        s = self.s
+        i = self.i = self.i + 1
+        if i < len(s):
+            self.c = s[i]
+        else:
+            self.c = ''
+            self.end = 1
+
+    def get(self):
+        if self.end:
+            self.error("Premature end of string")
+        c = self.c
+        self.next()
+        return c
+
+    def lookahead(self, n):
+        """Look ahead n chars."""
+        j = self.i + n
+        if j < len(self.s):
+            return self.s[j]
+        else:
+            return ''
+
+    def expect(self, c):
+        """
+        Expect to find character |c| at current position.
+        Raises an exception otherwise.
+        """
+        if self.c == c:
+            self.next()
+        else:
+            self.error("Missing %s" % repr(c))
+
+    def error(self, mess):
+        """Raise exception to signal syntax error in regexp."""
+        raise RegexpSyntaxError("Syntax error in regexp %s at position %d: %s" % (
+            repr(self.s), self.i, mess))
+
+
+
diff --git a/contrib/tools/cython/Cython/Plex/Transitions.py b/contrib/tools/cython/Cython/Plex/Transitions.py
new file mode 100644
index 0000000000..3833817946
--- /dev/null
+++ b/contrib/tools/cython/Cython/Plex/Transitions.py
@@ -0,0 +1,251 @@
+#
+# Plex - Transition Maps
+#
+# This version represents state sets directly as dicts for speed.
+#
+
+from __future__ import absolute_import
+
+try:
+    from sys import maxsize as maxint
+except ImportError:
+    from sys import maxint
+
+
+class TransitionMap(object):
+    """
+    A TransitionMap maps an input event to a set of states.
+    An input event is one of: a range of character codes,
+    the empty string (representing an epsilon move), or one
+    of the special symbols BOL, EOL, EOF.
+
+    For characters, this implementation compactly represents
+    the map by means of a list:
+
+      [code_0, states_0, code_1, states_1, code_2, states_2,
+        ..., code_n-1, states_n-1, code_n]
+
+    where |code_i| is a character code, and |states_i| is a
+    set of states corresponding to characters with codes |c|
+    in the range |code_i| <= |c| <= |code_i+1|.
+
+    The following invariants hold:
+      n >= 1
+      code_0 == -maxint
+      code_n == maxint
+      code_i < code_i+1 for i in 0..n-1
+      states_0 == states_n-1
+
+    Mappings for the special events '', BOL, EOL, EOF are
+    kept separately in a dictionary.
+    """
+
+    map = None      # The list of codes and states
+    special = None  # Mapping for special events
+
+    def __init__(self, map=None, special=None):
+        if not map:
+            map = [-maxint, {}, maxint]
+        if not special:
+            special = {}
+        self.map = map
+        self.special = special
+        #self.check() ###
+
+    def add(self, event, new_state,
+            TupleType=tuple):
+        """
+        Add transition to |new_state| on |event|.
+        """
+        if type(event) is TupleType:
+            code0, code1 = event
+            i = self.split(code0)
+            j = self.split(code1)
+            map = self.map
+            while i < j:
+                map[i + 1][new_state] = 1
+                i += 2
+        else:
+            self.get_special(event)[new_state] = 1
+
+    def add_set(self, event, new_set,
+                TupleType=tuple):
+        """
+        Add transitions to the states in |new_set| on |event|.
+        """
+        if type(event) is TupleType:
+            code0, code1 = event
+            i = self.split(code0)
+            j = self.split(code1)
+            map = self.map
+            while i < j:
+                map[i + 1].update(new_set)
+                i += 2
+        else:
+            self.get_special(event).update(new_set)
+
+    def get_epsilon(self,
+                    none=None):
+        """
+        Return the mapping for epsilon, or None.
+        """
+        return self.special.get('', none)
+
+    def iteritems(self,
+                  len=len):
+        """
+        Return the mapping as an iterable of ((code1, code2), state_set) and
+        (special_event, state_set) pairs.
+        """
+        result = []
+        map = self.map
+        else_set = map[1]
+        i = 0
+        n = len(map) - 1
+        code0 = map[0]
+        while i < n:
+            set = map[i + 1]
+            code1 = map[i + 2]
+            if set or else_set:
+                result.append(((code0, code1), set))
+            code0 = code1
+            i += 2
+        for event, set in self.special.items():
+            if set:
+                result.append((event, set))
+        return iter(result)
+
+    items = iteritems
+
+    # ------------------- Private methods --------------------
+
+    def split(self, code,
+              len=len, maxint=maxint):
+        """
+        Search the list for the position of the split point for |code|,
+        inserting a new split point if necessary. Returns index |i| such
+        that |code| == |map[i]|.
+        """
+        # We use a funky variation on binary search.
+        map = self.map
+        hi = len(map) - 1
+        # Special case: code == map[-1]
+        if code == maxint:
+            return hi
+        # General case
+        lo = 0
+        # loop invariant: map[lo] <= code < map[hi] and hi - lo >= 2
+        while hi - lo >= 4:
+            # Find midpoint truncated to even index
+            mid = ((lo + hi) // 2) & ~1
+            if code < map[mid]:
+                hi = mid
+            else:
+                lo = mid
+        # map[lo] <= code < map[hi] and hi - lo == 2
+        if map[lo] == code:
+            return lo
+        else:
+            map[hi:hi] = [code, map[hi - 1].copy()]
+            #self.check() ###
+            return hi
+
+    def get_special(self, event):
+        """
+        Get state set for special event, adding a new entry if necessary.
+        """
+        special = self.special
+        set = special.get(event, None)
+        if not set:
+            set = {}
+            special[event] = set
+        return set
+
+    # --------------------- Conversion methods -----------------------
+
+    def __str__(self):
+        map_strs = []
+        map = self.map
+        n = len(map)
+        i = 0
+        while i < n:
+            code = map[i]
+            if code == -maxint:
+                code_str = "-inf"
+            elif code == maxint:
+                code_str = "inf"
+            else:
+                code_str = str(code)
+            map_strs.append(code_str)
+            i += 1
+            if i < n:
+                map_strs.append(state_set_str(map[i]))
+            i += 1
+        special_strs = {}
+        for event, set in self.special.items():
+            special_strs[event] = state_set_str(set)
+        return "[%s]+%s" % (
+            ','.join(map_strs),
+            special_strs
+        )
+
+    # --------------------- Debugging methods -----------------------
+
+    def check(self):
+        """Check data structure integrity."""
+        if not self.map[-3] < self.map[-1]:
+            print(self)
+            assert 0
+
+    def dump(self, file):
+        map = self.map
+        i = 0
+        n = len(map) - 1
+        while i < n:
+            self.dump_range(map[i], map[i + 2], map[i + 1], file)
+            i += 2
+        for event, set in self.special.items():
+            if set:
+                if not event:
+                    event = 'empty'
+                self.dump_trans(event, set, file)
+
+    def dump_range(self, code0, code1, set, file):
+        if set:
+            if code0 == -maxint:
+                if code1 == maxint:
+                    k = "any"
+                else:
+                    k = "< %s" % self.dump_char(code1)
+            elif code1 == maxint:
+                k = "> %s" % self.dump_char(code0 - 1)
+            elif code0 == code1 - 1:
+                k = self.dump_char(code0)
+            else:
+                k = "%s..%s" % (self.dump_char(code0),
+                                self.dump_char(code1 - 1))
+            self.dump_trans(k, set, file)
+
+    def dump_char(self, code):
+        if 0 <= code <= 255:
+            return repr(chr(code))
+        else:
+            return "chr(%d)" % code
+
+    def dump_trans(self, key, set, file):
+        file.write("      %s --> %s\n" % (key, self.dump_set(set)))
+
+    def dump_set(self, set):
+        return state_set_str(set)
+
+
+#
+#   State set manipulation functions
+#
+
+#def merge_state_sets(set1, set2):
+#        for state in set2.keys():
+#            set1[state] = 1
+
+def state_set_str(set):
+    return "[%s]" % ','.join(["S%d" % state.number for state in set])
diff --git a/contrib/tools/cython/Cython/Plex/__init__.py b/contrib/tools/cython/Cython/Plex/__init__.py
new file mode 100644
index 0000000000..81a066f782
--- /dev/null
+++ b/contrib/tools/cython/Cython/Plex/__init__.py
@@ -0,0 +1,39 @@
+#=======================================================================
+#
+#   Python Lexical Analyser
+#
+#=======================================================================
+
+"""
+The Plex module provides lexical analysers with similar capabilities
+to GNU Flex. The following classes and functions are exported;
+see the attached docstrings for more information.
+
+   Scanner          For scanning a character stream under the
+                    direction of a Lexicon.
+
+   Lexicon          For constructing a lexical definition
+                    to be used by a Scanner.
+
+   Str, Any, AnyBut, AnyChar, Seq, Alt, Opt, Rep, Rep1,
+   Bol, Eol, Eof, Empty
+
+                    Regular expression constructors, for building pattern
+                    definitions for a Lexicon.
+
+   State            For defining scanner states when creating a
+                    Lexicon.
+
+   TEXT, IGNORE, Begin
+
+                    Actions for associating with patterns when
+        creating a Lexicon.
+"""
+
+from __future__ import absolute_import
+
+from .Actions import TEXT, IGNORE, Begin
+from .Lexicons import Lexicon, State
+from .Regexps import RE, Seq, Alt, Rep1, Empty, Str, Any, AnyBut, AnyChar, Range
+from .Regexps import Opt, Rep, Bol, Eol, Eof, Case, NoCase
+from .Scanners import Scanner
author	alexv-smirnov <alex@ydb.tech>	2023-03-15 19:59:12 +0300
committer	alexv-smirnov <alex@ydb.tech>	2023-03-15 19:59:12 +0300
commit	056bb284ccf8dd6793ec3a54ffa36c4fb2b9ad11 (patch)
tree	4740980126f32e3af7937ba0ca5f83e59baa4ab0 /contrib/tools/cython/Cython/Plex
parent	269126dcced1cc8b53eb4398b4a33e5142f10290 (diff)
download	ydb-056bb284ccf8dd6793ec3a54ffa36c4fb2b9ad11.tar.gz