aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/tools/cython/Cython/Plex
diff options
context:
space:
mode:
authorAnton Samokhvalov <pg83@yandex.ru>2022-02-10 16:45:17 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:45:17 +0300
commitd3a398281c6fd1d3672036cb2d63f842d2cb28c5 (patch)
treedd4bd3ca0f36b817e96812825ffaf10d645803f2 /contrib/tools/cython/Cython/Plex
parent72cb13b4aff9bc9cf22e49251bc8fd143f82538f (diff)
downloadydb-d3a398281c6fd1d3672036cb2d63f842d2cb28c5.tar.gz
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/tools/cython/Cython/Plex')
-rw-r--r--contrib/tools/cython/Cython/Plex/Actions.pxd50
-rw-r--r--contrib/tools/cython/Cython/Plex/Actions.py94
-rw-r--r--contrib/tools/cython/Cython/Plex/DFA.py64
-rw-r--r--contrib/tools/cython/Cython/Plex/Errors.py56
-rw-r--r--contrib/tools/cython/Cython/Plex/Lexicons.py124
-rw-r--r--contrib/tools/cython/Cython/Plex/Machines.py104
-rw-r--r--contrib/tools/cython/Cython/Plex/Regexps.py1084
-rw-r--r--contrib/tools/cython/Cython/Plex/Scanners.pxd78
-rw-r--r--contrib/tools/cython/Cython/Plex/Scanners.py108
-rw-r--r--contrib/tools/cython/Cython/Plex/Timing.py46
-rw-r--r--contrib/tools/cython/Cython/Plex/Traditional.py72
-rw-r--r--contrib/tools/cython/Cython/Plex/Transitions.py86
-rw-r--r--contrib/tools/cython/Cython/Plex/__init__.py78
13 files changed, 1022 insertions, 1022 deletions
diff --git a/contrib/tools/cython/Cython/Plex/Actions.pxd b/contrib/tools/cython/Cython/Plex/Actions.pxd
index 595e932181..34660a2d9b 100644
--- a/contrib/tools/cython/Cython/Plex/Actions.pxd
+++ b/contrib/tools/cython/Cython/Plex/Actions.pxd
@@ -1,25 +1,25 @@
-
-cdef class Action:
- cdef perform(self, token_stream, text)
- cpdef same_as(self, other)
-
-cdef class Return(Action):
- cdef object value
- cdef perform(self, token_stream, text)
- cpdef same_as(self, other)
-
-cdef class Call(Action):
- cdef object function
- cdef perform(self, token_stream, text)
- cpdef same_as(self, other)
-
-cdef class Begin(Action):
- cdef object state_name
- cdef perform(self, token_stream, text)
- cpdef same_as(self, other)
-
-cdef class Ignore(Action):
- cdef perform(self, token_stream, text)
-
-cdef class Text(Action):
- cdef perform(self, token_stream, text)
+
+cdef class Action:
+ cdef perform(self, token_stream, text)
+ cpdef same_as(self, other)
+
+cdef class Return(Action):
+ cdef object value
+ cdef perform(self, token_stream, text)
+ cpdef same_as(self, other)
+
+cdef class Call(Action):
+ cdef object function
+ cdef perform(self, token_stream, text)
+ cpdef same_as(self, other)
+
+cdef class Begin(Action):
+ cdef object state_name
+ cdef perform(self, token_stream, text)
+ cpdef same_as(self, other)
+
+cdef class Ignore(Action):
+ cdef perform(self, token_stream, text)
+
+cdef class Text(Action):
+ cdef perform(self, token_stream, text)
diff --git a/contrib/tools/cython/Cython/Plex/Actions.py b/contrib/tools/cython/Cython/Plex/Actions.py
index f372352443..c88176e716 100644
--- a/contrib/tools/cython/Cython/Plex/Actions.py
+++ b/contrib/tools/cython/Cython/Plex/Actions.py
@@ -1,110 +1,110 @@
# cython: auto_pickle=False
-#=======================================================================
-#
-# Python Lexical Analyser
-#
-# Actions for use in token specifications
-#
-#=======================================================================
-
-class Action(object):
+#=======================================================================
+#
+# Python Lexical Analyser
+#
+# Actions for use in token specifications
+#
+#=======================================================================
+
+class Action(object):
def perform(self, token_stream, text):
pass # abstract
-
+
def same_as(self, other):
return self is other
-
-
-class Return(Action):
+
+
+class Return(Action):
"""
Internal Plex action which causes |value| to
be returned as the value of the associated token
"""
-
+
def __init__(self, value):
self.value = value
-
+
def perform(self, token_stream, text):
return self.value
-
+
def same_as(self, other):
return isinstance(other, Return) and self.value == other.value
-
+
def __repr__(self):
return "Return(%s)" % repr(self.value)
-
-
-class Call(Action):
+
+
+class Call(Action):
"""
Internal Plex action which causes a function to be called.
"""
-
+
def __init__(self, function):
self.function = function
-
+
def perform(self, token_stream, text):
return self.function(token_stream, text)
-
+
def __repr__(self):
return "Call(%s)" % self.function.__name__
-
+
def same_as(self, other):
return isinstance(other, Call) and self.function is other.function
-
-
-class Begin(Action):
+
+
+class Begin(Action):
"""
Begin(state_name) is a Plex action which causes the Scanner to
enter the state |state_name|. See the docstring of Plex.Lexicon
for more information.
"""
-
+
def __init__(self, state_name):
self.state_name = state_name
-
+
def perform(self, token_stream, text):
token_stream.begin(self.state_name)
-
+
def __repr__(self):
return "Begin(%s)" % self.state_name
-
+
def same_as(self, other):
return isinstance(other, Begin) and self.state_name == other.state_name
-
-
-class Ignore(Action):
+
+
+class Ignore(Action):
"""
IGNORE is a Plex action which causes its associated token
to be ignored. See the docstring of Plex.Lexicon for more
information.
"""
-
+
def perform(self, token_stream, text):
return None
-
+
def __repr__(self):
return "IGNORE"
-IGNORE = Ignore()
-#IGNORE.__doc__ = Ignore.__doc__
-
+IGNORE = Ignore()
+#IGNORE.__doc__ = Ignore.__doc__
+
-class Text(Action):
+class Text(Action):
"""
TEXT is a Plex action which causes the text of a token to
be returned as the value of the token. See the docstring of
Plex.Lexicon for more information.
"""
-
+
def perform(self, token_stream, text):
return text
-
+
def __repr__(self):
return "TEXT"
-
-TEXT = Text()
-#TEXT.__doc__ = Text.__doc__
-
-
+
+TEXT = Text()
+#TEXT.__doc__ = Text.__doc__
+
+
diff --git a/contrib/tools/cython/Cython/Plex/DFA.py b/contrib/tools/cython/Cython/Plex/DFA.py
index 478eddc2ce..76324621fc 100644
--- a/contrib/tools/cython/Cython/Plex/DFA.py
+++ b/contrib/tools/cython/Cython/Plex/DFA.py
@@ -1,18 +1,18 @@
-#=======================================================================
-#
-# Python Lexical Analyser
-#
-# Converting NFA to DFA
-#
-#=======================================================================
-
-from __future__ import absolute_import
-
-from . import Machines
-from .Machines import LOWEST_PRIORITY
-from .Transitions import TransitionMap
-
-
+#=======================================================================
+#
+# Python Lexical Analyser
+#
+# Converting NFA to DFA
+#
+#=======================================================================
+
+from __future__ import absolute_import
+
+from . import Machines
+from .Machines import LOWEST_PRIORITY
+from .Transitions import TransitionMap
+
+
def nfa_to_dfa(old_machine, debug=None):
"""
Given a nondeterministic Machine, return a new equivalent
@@ -49,9 +49,9 @@ def nfa_to_dfa(old_machine, debug=None):
debug.write("\n===== State Mapping =====\n")
state_map.dump(debug)
return new_machine
-
-def set_epsilon_closure(state_set):
+
+def set_epsilon_closure(state_set):
"""
Given a set of states, return the union of the epsilon
closures of its member states.
@@ -61,9 +61,9 @@ def set_epsilon_closure(state_set):
for state2 in epsilon_closure(state1):
result[state2] = 1
return result
-
-def epsilon_closure(state):
+
+def epsilon_closure(state):
"""
Return the set of states reachable from the given state
by epsilon moves.
@@ -75,9 +75,9 @@ def epsilon_closure(state):
state.epsilon_closure = result
add_to_epsilon_closure(result, state)
return result
-
-def add_to_epsilon_closure(state_set, state):
+
+def add_to_epsilon_closure(state_set, state):
"""
Recursively add to |state_set| states reachable from the given state
by epsilon moves.
@@ -88,22 +88,22 @@ def add_to_epsilon_closure(state_set, state):
if state_set_2:
for state2 in state_set_2:
add_to_epsilon_closure(state_set, state2)
-
-class StateMap(object):
- """
+
+class StateMap(object):
+ """
Helper class used by nfa_to_dfa() to map back and forth between
sets of states from the old machine and states of the new machine.
- """
+ """
new_machine = None # Machine
old_to_new_dict = None # {(old_state,...) : new_state}
new_to_old_dict = None # {id(new_state) : old_state_set}
-
+
def __init__(self, new_machine):
self.new_machine = new_machine
self.old_to_new_dict = {}
self.new_to_old_dict = {}
-
+
def old_to_new(self, old_state_set):
"""
Return the state of the new machine corresponding to the
@@ -122,7 +122,7 @@ class StateMap(object):
#for old_state in old_state_set.keys():
#new_state.merge_actions(old_state)
return new_state
-
+
def highest_priority_action(self, state_set):
best_action = None
best_priority = LOWEST_PRIORITY
@@ -132,18 +132,18 @@ class StateMap(object):
best_action = state.action
best_priority = priority
return best_action
-
+
# def old_to_new_set(self, old_state_set):
# """
# Return the new state corresponding to a set of old states as
# a singleton set.
# """
# return {self.old_to_new(old_state_set):1}
-
+
def new_to_old(self, new_state):
"""Given a new state, return a set of corresponding old states."""
return self.new_to_old_dict[id(new_state)]
-
+
def make_key(self, state_set):
"""
Convert a set of states into a uniquified
@@ -152,7 +152,7 @@ class StateMap(object):
lst = list(state_set)
lst.sort()
return tuple(lst)
-
+
def dump(self, file):
from .Transitions import state_set_str
diff --git a/contrib/tools/cython/Cython/Plex/Errors.py b/contrib/tools/cython/Cython/Plex/Errors.py
index b375bb528b..f460100d77 100644
--- a/contrib/tools/cython/Cython/Plex/Errors.py
+++ b/contrib/tools/cython/Cython/Plex/Errors.py
@@ -1,54 +1,54 @@
-#=======================================================================
-#
-# Python Lexical Analyser
-#
-# Exception classes
-#
-#=======================================================================
-
-
-class PlexError(Exception):
+#=======================================================================
+#
+# Python Lexical Analyser
+#
+# Exception classes
+#
+#=======================================================================
+
+
+class PlexError(Exception):
message = ""
-
-class PlexTypeError(PlexError, TypeError):
+
+class PlexTypeError(PlexError, TypeError):
pass
-
-class PlexValueError(PlexError, ValueError):
+
+class PlexValueError(PlexError, ValueError):
pass
-
-class InvalidRegex(PlexError):
+
+class InvalidRegex(PlexError):
pass
-
-class InvalidToken(PlexError):
+
+class InvalidToken(PlexError):
def __init__(self, token_number, message):
PlexError.__init__(self, "Token number %d: %s" % (token_number, message))
-
-
-class InvalidScanner(PlexError):
+
+
+class InvalidScanner(PlexError):
pass
-
-class AmbiguousAction(PlexError):
+
+class AmbiguousAction(PlexError):
message = "Two tokens with different actions can match the same string"
-
+
def __init__(self):
pass
-
-class UnrecognizedInput(PlexError):
+
+class UnrecognizedInput(PlexError):
scanner = None
position = None
state_name = None
-
+
def __init__(self, scanner, state_name):
self.scanner = scanner
self.position = scanner.get_position()
self.state_name = state_name
-
+
def __str__(self):
return ("'%s', line %d, char %d: Token not recognised in state %r" % (
self.position + (self.state_name,)))
diff --git a/contrib/tools/cython/Cython/Plex/Lexicons.py b/contrib/tools/cython/Cython/Plex/Lexicons.py
index e163caef41..787f5854b8 100644
--- a/contrib/tools/cython/Cython/Plex/Lexicons.py
+++ b/contrib/tools/cython/Cython/Plex/Lexicons.py
@@ -1,125 +1,125 @@
-#=======================================================================
-#
-# Python Lexical Analyser
-#
-# Lexical Analyser Specification
-#
-#=======================================================================
-
-from __future__ import absolute_import
-
-import types
-
-from . import Actions
-from . import DFA
-from . import Errors
-from . import Machines
-from . import Regexps
-
-# debug_flags for Lexicon constructor
-DUMP_NFA = 1
-DUMP_DFA = 2
-
-
-class State(object):
+#=======================================================================
+#
+# Python Lexical Analyser
+#
+# Lexical Analyser Specification
+#
+#=======================================================================
+
+from __future__ import absolute_import
+
+import types
+
+from . import Actions
+from . import DFA
+from . import Errors
+from . import Machines
+from . import Regexps
+
+# debug_flags for Lexicon constructor
+DUMP_NFA = 1
+DUMP_DFA = 2
+
+
+class State(object):
"""
This class is used as part of a Plex.Lexicon specification to
introduce a user-defined state.
-
+
Constructor:
-
+
State(name, token_specifications)
"""
-
+
name = None
tokens = None
-
+
def __init__(self, name, tokens):
self.name = name
self.tokens = tokens
-
-class Lexicon(object):
+
+class Lexicon(object):
"""
Lexicon(specification) builds a lexical analyser from the given
|specification|. The specification consists of a list of
specification items. Each specification item may be either:
-
+
1) A token definition, which is a tuple:
-
+
(pattern, action)
-
+
The |pattern| is a regular axpression built using the
constructors defined in the Plex module.
-
+
The |action| is the action to be performed when this pattern
is recognised (see below).
-
+
2) A state definition:
-
+
State(name, tokens)
-
+
where |name| is a character string naming the state,
and |tokens| is a list of token definitions as
above. The meaning and usage of states is described
below.
-
+
Actions
-------
-
+
The |action| in a token specication may be one of three things:
-
+
1) A function, which is called as follows:
-
+
function(scanner, text)
-
+
where |scanner| is the relevant Scanner instance, and |text|
is the matched text. If the function returns anything
other than None, that value is returned as the value of the
token. If it returns None, scanning continues as if the IGNORE
action were specified (see below).
-
+
2) One of the following special actions:
-
+
IGNORE means that the recognised characters will be treated as
white space and ignored. Scanning will continue until
the next non-ignored token is recognised before returning.
-
+
TEXT causes the scanned text itself to be returned as the
value of the token.
-
+
3) Any other value, which is returned as the value of the token.
-
+
States
------
-
+
At any given time, the scanner is in one of a number of states.
Associated with each state is a set of possible tokens. When scanning,
only tokens associated with the current state are recognised.
-
+
There is a default state, whose name is the empty string. Token
definitions which are not inside any State definition belong to
the default state.
-
+
The initial state of the scanner is the default state. The state can
be changed in one of two ways:
-
+
1) Using Begin(state_name) as the action of a token.
-
+
2) Calling the begin(state_name) method of the Scanner.
-
+
To change back to the default state, use '' as the state name.
"""
-
+
machine = None # Machine
tables = None # StateTableMachine
-
+
def __init__(self, specifications, debug=None, debug_flags=7, timings=None):
if not isinstance(specifications, list):
raise Errors.InvalidScanner("Scanner definition is not a list")
if timings:
from .Timing import time
-
+
total_time = 0.0
time1 = time()
nfa = Machines.Machine()
@@ -161,7 +161,7 @@ class Lexicon(object):
self.machine = dfa
def add_token_to_machine(self, machine, initial_state, token_spec, token_number):
- try:
+ try:
(re, action_spec) = self.parse_token_definition(token_spec)
# Disabled this -- matching empty strings can be useful
#if re.nullable:
@@ -182,7 +182,7 @@ class Lexicon(object):
final_state.set_action(action, priority=-token_number)
except Errors.PlexError as e:
raise e.__class__("Token number %d: %s" % (token_number, e))
-
+
def parse_token_definition(self, token_spec):
if not isinstance(token_spec, tuple):
raise Errors.InvalidToken("Token definition is not a tuple")
@@ -192,9 +192,9 @@ class Lexicon(object):
if not isinstance(pattern, Regexps.RE):
raise Errors.InvalidToken("Pattern is not an RE instance")
return (pattern, action)
-
+
def get_initial_state(self, name):
return self.machine.get_initial_state(name)
-
-
-
+
+
+
diff --git a/contrib/tools/cython/Cython/Plex/Machines.py b/contrib/tools/cython/Cython/Plex/Machines.py
index 6ddcbd5fe9..398850976b 100644
--- a/contrib/tools/cython/Cython/Plex/Machines.py
+++ b/contrib/tools/cython/Cython/Plex/Machines.py
@@ -1,45 +1,45 @@
-#=======================================================================
-#
-# Python Lexical Analyser
-#
-# Classes for building NFAs and DFAs
-#
-#=======================================================================
-
-from __future__ import absolute_import
-
-import sys
-
-from .Transitions import TransitionMap
-
+#=======================================================================
+#
+# Python Lexical Analyser
+#
+# Classes for building NFAs and DFAs
+#
+#=======================================================================
+
+from __future__ import absolute_import
+
+import sys
+
+from .Transitions import TransitionMap
+
try:
from sys import maxsize as maxint
except ImportError:
from sys import maxint
-
+
try:
unichr
except NameError:
unichr = chr
-
+
LOWEST_PRIORITY = -maxint
-class Machine(object):
+class Machine(object):
"""A collection of Nodes representing an NFA or DFA."""
states = None # [Node]
next_state_number = 1
initial_states = None # {(name, bol): Node}
-
+
def __init__(self):
self.states = []
self.initial_states = {}
-
+
def __del__(self):
#print "Destroying", self ###
for state in self.states:
state.destroy()
-
+
def new_state(self):
"""Add a new state to the machine and return it."""
s = Node()
@@ -48,18 +48,18 @@ class Machine(object):
s.number = n
self.states.append(s)
return s
-
+
def new_initial_state(self, name):
state = self.new_state()
self.make_initial_state(name, state)
return state
-
+
def make_initial_state(self, name, state):
self.initial_states[name] = state
-
+
def get_initial_state(self, name):
return self.initial_states[name]
-
+
def dump(self, file):
file.write("Plex.Machine:\n")
if self.initial_states is not None:
@@ -68,36 +68,36 @@ class Machine(object):
file.write(" '%s': %d\n" % (name, state.number))
for s in self.states:
s.dump(file)
-
-class Node(object):
+
+class Node(object):
"""A state of an NFA or DFA."""
transitions = None # TransitionMap
action = None # Action
action_priority = None # integer
number = 0 # for debug output
epsilon_closure = None # used by nfa_to_dfa()
-
+
def __init__(self):
# Preinitialise the list of empty transitions, because
# the nfa-to-dfa algorithm needs it
#self.transitions = {'':[]}
self.transitions = TransitionMap()
self.action_priority = LOWEST_PRIORITY
-
+
def destroy(self):
#print "Destroying", self ###
self.transitions = None
self.action = None
self.epsilon_closure = None
-
+
def add_transition(self, event, new_state):
self.transitions.add(event, new_state)
-
+
def link_to(self, state):
"""Add an epsilon-move from this state to another state."""
self.add_transition('', state)
-
+
def set_action(self, action, priority):
"""Make this an accepting state with the given action. If
there is already an action, choose the action with highest
@@ -105,19 +105,19 @@ class Node(object):
if priority > self.action_priority:
self.action = action
self.action_priority = priority
-
+
def get_action(self):
return self.action
-
+
def get_action_priority(self):
return self.action_priority
-
+
def is_accepting(self):
return self.action is not None
-
+
def __str__(self):
return "State %d" % self.number
-
+
def dump(self, file):
# Header
file.write(" State %d:\n" % self.number)
@@ -129,12 +129,12 @@ class Node(object):
priority = self.action_priority
if action is not None:
file.write(" %s [priority %d]\n" % (action, priority))
-
+
def __lt__(self, other):
return self.number < other.number
-
-class FastMachine(object):
+
+class FastMachine(object):
"""
FastMachine is a deterministic machine represented in a way that
allows fast scanning.
@@ -142,19 +142,19 @@ class FastMachine(object):
initial_states = None # {state_name:state}
states = None # [state] where state = {event:state, 'else':state, 'action':Action}
next_number = 1 # for debugging
-
+
new_state_template = {
'': None, 'bol': None, 'eol': None, 'eof': None, 'else': None
}
-
+
def __init__(self):
self.initial_states = {}
self.states = []
-
+
def __del__(self):
for state in self.states:
state.clear()
-
+
def new_state(self, action=None):
number = self.next_number
self.next_number = number + 1
@@ -163,10 +163,10 @@ class FastMachine(object):
result['action'] = action
self.states.append(result)
return result
-
+
def make_initial_state(self, name, state):
self.initial_states[name] = state
-
+
def add_transitions(self, state, event, new_state, maxint=maxint):
if type(event) is tuple:
code0, code1 = event
@@ -178,10 +178,10 @@ class FastMachine(object):
code0 += 1
else:
state[event] = new_state
-
+
def get_initial_state(self, name):
return self.initial_states[name]
-
+
def dump(self, file):
file.write("Plex.FastMachine:\n")
file.write(" Initial states:\n")
@@ -189,7 +189,7 @@ class FastMachine(object):
file.write(" %s: %s\n" % (repr(name), state['number']))
for state in self.states:
self.dump_state(state, file)
-
+
def dump_state(self, state, file):
# Header
file.write(" State %d:\n" % state['number'])
@@ -199,7 +199,7 @@ class FastMachine(object):
action = state['action']
if action is not None:
file.write(" %s\n" % action)
-
+
def dump_transitions(self, state, file):
chars_leading_to_state = {}
special_to_state = {}
@@ -228,7 +228,7 @@ class FastMachine(object):
state = special_to_state.get(key, None)
if state:
file.write(" %s --> State %d\n" % (key, state['number']))
-
+
def chars_to_ranges(self, char_list):
char_list.sort()
i = 0
@@ -243,10 +243,10 @@ class FastMachine(object):
c2 += 1
result.append((chr(c1), chr(c2)))
return tuple(result)
-
+
def ranges_to_string(self, range_list):
return ','.join(map(self.range_to_string, range_list))
-
+
def range_to_string(self, range_tuple):
(c1, c2) = range_tuple
if c1 == c2:
diff --git a/contrib/tools/cython/Cython/Plex/Regexps.py b/contrib/tools/cython/Cython/Plex/Regexps.py
index 43e5fa3de9..41816c939a 100644
--- a/contrib/tools/cython/Cython/Plex/Regexps.py
+++ b/contrib/tools/cython/Cython/Plex/Regexps.py
@@ -1,576 +1,576 @@
-#=======================================================================
-#
-# Python Lexical Analyser
-#
-# Regular Expressions
-#
-#=======================================================================
-
-from __future__ import absolute_import
-
-import types
+#=======================================================================
+#
+# Python Lexical Analyser
+#
+# Regular Expressions
+#
+#=======================================================================
+
+from __future__ import absolute_import
+
+import types
try:
from sys import maxsize as maxint
except ImportError:
from sys import maxint
-
-from . import Errors
-
-#
-# Constants
-#
-
-BOL = 'bol'
-EOL = 'eol'
-EOF = 'eof'
-
-nl_code = ord('\n')
-
-
-#
-# Helper functions
-#
-
-def chars_to_ranges(s):
- """
- Return a list of character codes consisting of pairs
- [code1a, code1b, code2a, code2b,...] which cover all
- the characters in |s|.
- """
- char_list = list(s)
- char_list.sort()
- i = 0
- n = len(char_list)
- result = []
- while i < n:
- code1 = ord(char_list[i])
- code2 = code1 + 1
+
+from . import Errors
+
+#
+# Constants
+#
+
+BOL = 'bol'
+EOL = 'eol'
+EOF = 'eof'
+
+nl_code = ord('\n')
+
+
+#
+# Helper functions
+#
+
+def chars_to_ranges(s):
+ """
+ Return a list of character codes consisting of pairs
+ [code1a, code1b, code2a, code2b,...] which cover all
+ the characters in |s|.
+ """
+ char_list = list(s)
+ char_list.sort()
+ i = 0
+ n = len(char_list)
+ result = []
+ while i < n:
+ code1 = ord(char_list[i])
+ code2 = code1 + 1
i += 1
- while i < n and code2 >= ord(char_list[i]):
+ while i < n and code2 >= ord(char_list[i]):
code2 += 1
i += 1
- result.append(code1)
- result.append(code2)
- return result
-
-
-def uppercase_range(code1, code2):
- """
- If the range of characters from code1 to code2-1 includes any
- lower case letters, return the corresponding upper case range.
- """
- code3 = max(code1, ord('a'))
- code4 = min(code2, ord('z') + 1)
- if code3 < code4:
- d = ord('A') - ord('a')
- return (code3 + d, code4 + d)
- else:
- return None
-
-
-def lowercase_range(code1, code2):
- """
- If the range of characters from code1 to code2-1 includes any
- upper case letters, return the corresponding lower case range.
- """
- code3 = max(code1, ord('A'))
- code4 = min(code2, ord('Z') + 1)
- if code3 < code4:
- d = ord('a') - ord('A')
- return (code3 + d, code4 + d)
- else:
- return None
-
-
-def CodeRanges(code_list):
- """
- Given a list of codes as returned by chars_to_ranges, return
- an RE which will match a character in any of the ranges.
- """
+ result.append(code1)
+ result.append(code2)
+ return result
+
+
+def uppercase_range(code1, code2):
+ """
+ If the range of characters from code1 to code2-1 includes any
+ lower case letters, return the corresponding upper case range.
+ """
+ code3 = max(code1, ord('a'))
+ code4 = min(code2, ord('z') + 1)
+ if code3 < code4:
+ d = ord('A') - ord('a')
+ return (code3 + d, code4 + d)
+ else:
+ return None
+
+
+def lowercase_range(code1, code2):
+ """
+ If the range of characters from code1 to code2-1 includes any
+ upper case letters, return the corresponding lower case range.
+ """
+ code3 = max(code1, ord('A'))
+ code4 = min(code2, ord('Z') + 1)
+ if code3 < code4:
+ d = ord('a') - ord('A')
+ return (code3 + d, code4 + d)
+ else:
+ return None
+
+
+def CodeRanges(code_list):
+ """
+ Given a list of codes as returned by chars_to_ranges, return
+ an RE which will match a character in any of the ranges.
+ """
re_list = [CodeRange(code_list[i], code_list[i + 1]) for i in range(0, len(code_list), 2)]
- return Alt(*re_list)
-
-
-def CodeRange(code1, code2):
- """
- CodeRange(code1, code2) is an RE which matches any character
- with a code |c| in the range |code1| <= |c| < |code2|.
- """
- if code1 <= nl_code < code2:
- return Alt(RawCodeRange(code1, nl_code),
+ return Alt(*re_list)
+
+
+def CodeRange(code1, code2):
+ """
+ CodeRange(code1, code2) is an RE which matches any character
+ with a code |c| in the range |code1| <= |c| < |code2|.
+ """
+ if code1 <= nl_code < code2:
+ return Alt(RawCodeRange(code1, nl_code),
RawNewline,
RawCodeRange(nl_code + 1, code2))
- else:
- return RawCodeRange(code1, code2)
-
-
-#
-# Abstract classes
-#
-
-class RE(object):
- """RE is the base class for regular expression constructors.
- The following operators are defined on REs:
-
- re1 + re2 is an RE which matches |re1| followed by |re2|
- re1 | re2 is an RE which matches either |re1| or |re2|
- """
-
+ else:
+ return RawCodeRange(code1, code2)
+
+
+#
+# Abstract classes
+#
+
+class RE(object):
+ """RE is the base class for regular expression constructors.
+ The following operators are defined on REs:
+
+ re1 + re2 is an RE which matches |re1| followed by |re2|
+ re1 | re2 is an RE which matches either |re1| or |re2|
+ """
+
nullable = 1 # True if this RE can match 0 input symbols
match_nl = 1 # True if this RE can match a string ending with '\n'
str = None # Set to a string to override the class's __str__ result
-
- def build_machine(self, machine, initial_state, final_state,
+
+ def build_machine(self, machine, initial_state, final_state,
match_bol, nocase):
- """
- This method should add states to |machine| to implement this
- RE, starting at |initial_state| and ending at |final_state|.
- If |match_bol| is true, the RE must be able to match at the
- beginning of a line. If nocase is true, upper and lower case
- letters should be treated as equivalent.
- """
- raise NotImplementedError("%s.build_machine not implemented" %
+ """
+ This method should add states to |machine| to implement this
+ RE, starting at |initial_state| and ending at |final_state|.
+ If |match_bol| is true, the RE must be able to match at the
+ beginning of a line. If nocase is true, upper and lower case
+ letters should be treated as equivalent.
+ """
+ raise NotImplementedError("%s.build_machine not implemented" %
self.__class__.__name__)
-
- def build_opt(self, m, initial_state, c):
- """
- Given a state |s| of machine |m|, return a new state
- reachable from |s| on character |c| or epsilon.
- """
- s = m.new_state()
- initial_state.link_to(s)
- initial_state.add_transition(c, s)
- return s
-
- def __add__(self, other):
- return Seq(self, other)
-
- def __or__(self, other):
- return Alt(self, other)
-
- def __str__(self):
- if self.str:
- return self.str
- else:
- return self.calc_str()
-
- def check_re(self, num, value):
- if not isinstance(value, RE):
- self.wrong_type(num, value, "Plex.RE instance")
-
- def check_string(self, num, value):
- if type(value) != type(''):
- self.wrong_type(num, value, "string")
-
- def check_char(self, num, value):
- self.check_string(num, value)
- if len(value) != 1:
- raise Errors.PlexValueError("Invalid value for argument %d of Plex.%s."
+
+ def build_opt(self, m, initial_state, c):
+ """
+ Given a state |s| of machine |m|, return a new state
+ reachable from |s| on character |c| or epsilon.
+ """
+ s = m.new_state()
+ initial_state.link_to(s)
+ initial_state.add_transition(c, s)
+ return s
+
+ def __add__(self, other):
+ return Seq(self, other)
+
+ def __or__(self, other):
+ return Alt(self, other)
+
+ def __str__(self):
+ if self.str:
+ return self.str
+ else:
+ return self.calc_str()
+
+ def check_re(self, num, value):
+ if not isinstance(value, RE):
+ self.wrong_type(num, value, "Plex.RE instance")
+
+ def check_string(self, num, value):
+ if type(value) != type(''):
+ self.wrong_type(num, value, "string")
+
+ def check_char(self, num, value):
+ self.check_string(num, value)
+ if len(value) != 1:
+ raise Errors.PlexValueError("Invalid value for argument %d of Plex.%s."
"Expected a string of length 1, got: %s" % (
num, self.__class__.__name__, repr(value)))
-
- def wrong_type(self, num, value, expected):
- if type(value) == types.InstanceType:
+
+ def wrong_type(self, num, value, expected):
+ if type(value) == types.InstanceType:
got = "%s.%s instance" % (
value.__class__.__module__, value.__class__.__name__)
- else:
- got = type(value).__name__
- raise Errors.PlexTypeError("Invalid type for argument %d of Plex.%s "
+ else:
+ got = type(value).__name__
+ raise Errors.PlexTypeError("Invalid type for argument %d of Plex.%s "
"(expected %s, got %s" % (
num, self.__class__.__name__, expected, got))
-
-#
-# Primitive RE constructors
-# -------------------------
-#
-# These are the basic REs from which all others are built.
-#
-
-## class Char(RE):
-## """
-## Char(c) is an RE which matches the character |c|.
-## """
-
-## nullable = 0
-
-## def __init__(self, char):
-## self.char = char
-## self.match_nl = char == '\n'
-
-## def build_machine(self, m, initial_state, final_state, match_bol, nocase):
-## c = self.char
-## if match_bol and c != BOL:
-## s1 = self.build_opt(m, initial_state, BOL)
-## else:
-## s1 = initial_state
-## if c == '\n' or c == EOF:
-## s1 = self.build_opt(m, s1, EOL)
-## if len(c) == 1:
-## code = ord(self.char)
-## s1.add_transition((code, code+1), final_state)
-## if nocase and is_letter_code(code):
-## code2 = other_case_code(code)
-## s1.add_transition((code2, code2+1), final_state)
-## else:
-## s1.add_transition(c, final_state)
-
-## def calc_str(self):
-## return "Char(%s)" % repr(self.char)
-
-
-def Char(c):
- """
- Char(c) is an RE which matches the character |c|.
- """
- if len(c) == 1:
- result = CodeRange(ord(c), ord(c) + 1)
- else:
- result = SpecialSymbol(c)
- result.str = "Char(%s)" % repr(c)
- return result
-
-
-class RawCodeRange(RE):
- """
- RawCodeRange(code1, code2) is a low-level RE which matches any character
- with a code |c| in the range |code1| <= |c| < |code2|, where the range
- does not include newline. For internal use only.
- """
- nullable = 0
- match_nl = 0
+
+#
+# Primitive RE constructors
+# -------------------------
+#
+# These are the basic REs from which all others are built.
+#
+
+## class Char(RE):
+## """
+## Char(c) is an RE which matches the character |c|.
+## """
+
+## nullable = 0
+
+## def __init__(self, char):
+## self.char = char
+## self.match_nl = char == '\n'
+
+## def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+## c = self.char
+## if match_bol and c != BOL:
+## s1 = self.build_opt(m, initial_state, BOL)
+## else:
+## s1 = initial_state
+## if c == '\n' or c == EOF:
+## s1 = self.build_opt(m, s1, EOL)
+## if len(c) == 1:
+## code = ord(self.char)
+## s1.add_transition((code, code+1), final_state)
+## if nocase and is_letter_code(code):
+## code2 = other_case_code(code)
+## s1.add_transition((code2, code2+1), final_state)
+## else:
+## s1.add_transition(c, final_state)
+
+## def calc_str(self):
+## return "Char(%s)" % repr(self.char)
+
+
+def Char(c):
+ """
+ Char(c) is an RE which matches the character |c|.
+ """
+ if len(c) == 1:
+ result = CodeRange(ord(c), ord(c) + 1)
+ else:
+ result = SpecialSymbol(c)
+ result.str = "Char(%s)" % repr(c)
+ return result
+
+
+class RawCodeRange(RE):
+ """
+ RawCodeRange(code1, code2) is a low-level RE which matches any character
+ with a code |c| in the range |code1| <= |c| < |code2|, where the range
+ does not include newline. For internal use only.
+ """
+ nullable = 0
+ match_nl = 0
range = None # (code, code)
uppercase_range = None # (code, code) or None
lowercase_range = None # (code, code) or None
-
- def __init__(self, code1, code2):
- self.range = (code1, code2)
- self.uppercase_range = uppercase_range(code1, code2)
- self.lowercase_range = lowercase_range(code1, code2)
-
- def build_machine(self, m, initial_state, final_state, match_bol, nocase):
- if match_bol:
- initial_state = self.build_opt(m, initial_state, BOL)
- initial_state.add_transition(self.range, final_state)
- if nocase:
- if self.uppercase_range:
- initial_state.add_transition(self.uppercase_range, final_state)
- if self.lowercase_range:
- initial_state.add_transition(self.lowercase_range, final_state)
-
- def calc_str(self):
- return "CodeRange(%d,%d)" % (self.code1, self.code2)
-
-
-class _RawNewline(RE):
- """
- RawNewline is a low-level RE which matches a newline character.
- For internal use only.
- """
- nullable = 0
- match_nl = 1
-
- def build_machine(self, m, initial_state, final_state, match_bol, nocase):
- if match_bol:
- initial_state = self.build_opt(m, initial_state, BOL)
- s = self.build_opt(m, initial_state, EOL)
- s.add_transition((nl_code, nl_code + 1), final_state)
-
-
-RawNewline = _RawNewline()
-
-
-class SpecialSymbol(RE):
- """
- SpecialSymbol(sym) is an RE which matches the special input
- symbol |sym|, which is one of BOL, EOL or EOF.
- """
- nullable = 0
- match_nl = 0
- sym = None
-
- def __init__(self, sym):
- self.sym = sym
-
- def build_machine(self, m, initial_state, final_state, match_bol, nocase):
- # Sequences 'bol bol' and 'bol eof' are impossible, so only need
- # to allow for bol if sym is eol
- if match_bol and self.sym == EOL:
- initial_state = self.build_opt(m, initial_state, BOL)
- initial_state.add_transition(self.sym, final_state)
-
-
-class Seq(RE):
- """Seq(re1, re2, re3...) is an RE which matches |re1| followed by
- |re2| followed by |re3|..."""
-
- def __init__(self, *re_list):
- nullable = 1
+
+ def __init__(self, code1, code2):
+ self.range = (code1, code2)
+ self.uppercase_range = uppercase_range(code1, code2)
+ self.lowercase_range = lowercase_range(code1, code2)
+
+ def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+ if match_bol:
+ initial_state = self.build_opt(m, initial_state, BOL)
+ initial_state.add_transition(self.range, final_state)
+ if nocase:
+ if self.uppercase_range:
+ initial_state.add_transition(self.uppercase_range, final_state)
+ if self.lowercase_range:
+ initial_state.add_transition(self.lowercase_range, final_state)
+
+ def calc_str(self):
+ return "CodeRange(%d,%d)" % (self.code1, self.code2)
+
+
+class _RawNewline(RE):
+ """
+ RawNewline is a low-level RE which matches a newline character.
+ For internal use only.
+ """
+ nullable = 0
+ match_nl = 1
+
+ def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+ if match_bol:
+ initial_state = self.build_opt(m, initial_state, BOL)
+ s = self.build_opt(m, initial_state, EOL)
+ s.add_transition((nl_code, nl_code + 1), final_state)
+
+
+RawNewline = _RawNewline()
+
+
+class SpecialSymbol(RE):
+ """
+ SpecialSymbol(sym) is an RE which matches the special input
+ symbol |sym|, which is one of BOL, EOL or EOF.
+ """
+ nullable = 0
+ match_nl = 0
+ sym = None
+
+ def __init__(self, sym):
+ self.sym = sym
+
+ def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+ # Sequences 'bol bol' and 'bol eof' are impossible, so only need
+ # to allow for bol if sym is eol
+ if match_bol and self.sym == EOL:
+ initial_state = self.build_opt(m, initial_state, BOL)
+ initial_state.add_transition(self.sym, final_state)
+
+
+class Seq(RE):
+ """Seq(re1, re2, re3...) is an RE which matches |re1| followed by
+ |re2| followed by |re3|..."""
+
+ def __init__(self, *re_list):
+ nullable = 1
for i, re in enumerate(re_list):
- self.check_re(i, re)
- nullable = nullable and re.nullable
- self.re_list = re_list
- self.nullable = nullable
- i = len(re_list)
- match_nl = 0
- while i:
+ self.check_re(i, re)
+ nullable = nullable and re.nullable
+ self.re_list = re_list
+ self.nullable = nullable
+ i = len(re_list)
+ match_nl = 0
+ while i:
i -= 1
- re = re_list[i]
- if re.match_nl:
- match_nl = 1
- break
- if not re.nullable:
- break
- self.match_nl = match_nl
-
- def build_machine(self, m, initial_state, final_state, match_bol, nocase):
- re_list = self.re_list
- if len(re_list) == 0:
- initial_state.link_to(final_state)
- else:
- s1 = initial_state
- n = len(re_list)
+ re = re_list[i]
+ if re.match_nl:
+ match_nl = 1
+ break
+ if not re.nullable:
+ break
+ self.match_nl = match_nl
+
+ def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+ re_list = self.re_list
+ if len(re_list) == 0:
+ initial_state.link_to(final_state)
+ else:
+ s1 = initial_state
+ n = len(re_list)
for i, re in enumerate(re_list):
- if i < n - 1:
- s2 = m.new_state()
- else:
- s2 = final_state
- re.build_machine(m, s1, s2, match_bol, nocase)
- s1 = s2
- match_bol = re.match_nl or (match_bol and re.nullable)
-
- def calc_str(self):
- return "Seq(%s)" % ','.join(map(str, self.re_list))
-
-
-class Alt(RE):
- """Alt(re1, re2, re3...) is an RE which matches either |re1| or
- |re2| or |re3|..."""
-
- def __init__(self, *re_list):
- self.re_list = re_list
- nullable = 0
- match_nl = 0
- nullable_res = []
- non_nullable_res = []
- i = 1
- for re in re_list:
- self.check_re(i, re)
- if re.nullable:
- nullable_res.append(re)
- nullable = 1
- else:
- non_nullable_res.append(re)
- if re.match_nl:
- match_nl = 1
+ if i < n - 1:
+ s2 = m.new_state()
+ else:
+ s2 = final_state
+ re.build_machine(m, s1, s2, match_bol, nocase)
+ s1 = s2
+ match_bol = re.match_nl or (match_bol and re.nullable)
+
+ def calc_str(self):
+ return "Seq(%s)" % ','.join(map(str, self.re_list))
+
+
+class Alt(RE):
+ """Alt(re1, re2, re3...) is an RE which matches either |re1| or
+ |re2| or |re3|..."""
+
+ def __init__(self, *re_list):
+ self.re_list = re_list
+ nullable = 0
+ match_nl = 0
+ nullable_res = []
+ non_nullable_res = []
+ i = 1
+ for re in re_list:
+ self.check_re(i, re)
+ if re.nullable:
+ nullable_res.append(re)
+ nullable = 1
+ else:
+ non_nullable_res.append(re)
+ if re.match_nl:
+ match_nl = 1
i += 1
- self.nullable_res = nullable_res
- self.non_nullable_res = non_nullable_res
- self.nullable = nullable
- self.match_nl = match_nl
-
- def build_machine(self, m, initial_state, final_state, match_bol, nocase):
- for re in self.nullable_res:
- re.build_machine(m, initial_state, final_state, match_bol, nocase)
- if self.non_nullable_res:
- if match_bol:
- initial_state = self.build_opt(m, initial_state, BOL)
- for re in self.non_nullable_res:
- re.build_machine(m, initial_state, final_state, 0, nocase)
-
- def calc_str(self):
- return "Alt(%s)" % ','.join(map(str, self.re_list))
-
-
-class Rep1(RE):
- """Rep1(re) is an RE which matches one or more repetitions of |re|."""
-
- def __init__(self, re):
- self.check_re(1, re)
- self.re = re
- self.nullable = re.nullable
- self.match_nl = re.match_nl
-
- def build_machine(self, m, initial_state, final_state, match_bol, nocase):
- s1 = m.new_state()
- s2 = m.new_state()
- initial_state.link_to(s1)
- self.re.build_machine(m, s1, s2, match_bol or self.re.match_nl, nocase)
- s2.link_to(s1)
- s2.link_to(final_state)
-
- def calc_str(self):
- return "Rep1(%s)" % self.re
-
-
-class SwitchCase(RE):
- """
- SwitchCase(re, nocase) is an RE which matches the same strings as RE,
- but treating upper and lower case letters according to |nocase|. If
- |nocase| is true, case is ignored, otherwise it is not.
- """
- re = None
- nocase = None
-
- def __init__(self, re, nocase):
- self.re = re
- self.nocase = nocase
- self.nullable = re.nullable
- self.match_nl = re.match_nl
-
- def build_machine(self, m, initial_state, final_state, match_bol, nocase):
- self.re.build_machine(m, initial_state, final_state, match_bol,
+ self.nullable_res = nullable_res
+ self.non_nullable_res = non_nullable_res
+ self.nullable = nullable
+ self.match_nl = match_nl
+
+ def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+ for re in self.nullable_res:
+ re.build_machine(m, initial_state, final_state, match_bol, nocase)
+ if self.non_nullable_res:
+ if match_bol:
+ initial_state = self.build_opt(m, initial_state, BOL)
+ for re in self.non_nullable_res:
+ re.build_machine(m, initial_state, final_state, 0, nocase)
+
+ def calc_str(self):
+ return "Alt(%s)" % ','.join(map(str, self.re_list))
+
+
+class Rep1(RE):
+ """Rep1(re) is an RE which matches one or more repetitions of |re|."""
+
+ def __init__(self, re):
+ self.check_re(1, re)
+ self.re = re
+ self.nullable = re.nullable
+ self.match_nl = re.match_nl
+
+ def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+ s1 = m.new_state()
+ s2 = m.new_state()
+ initial_state.link_to(s1)
+ self.re.build_machine(m, s1, s2, match_bol or self.re.match_nl, nocase)
+ s2.link_to(s1)
+ s2.link_to(final_state)
+
+ def calc_str(self):
+ return "Rep1(%s)" % self.re
+
+
+class SwitchCase(RE):
+ """
+ SwitchCase(re, nocase) is an RE which matches the same strings as RE,
+ but treating upper and lower case letters according to |nocase|. If
+ |nocase| is true, case is ignored, otherwise it is not.
+ """
+ re = None
+ nocase = None
+
+ def __init__(self, re, nocase):
+ self.re = re
+ self.nocase = nocase
+ self.nullable = re.nullable
+ self.match_nl = re.match_nl
+
+ def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+ self.re.build_machine(m, initial_state, final_state, match_bol,
self.nocase)
-
- def calc_str(self):
- if self.nocase:
- name = "NoCase"
- else:
- name = "Case"
- return "%s(%s)" % (name, self.re)
-
-#
-# Composite RE constructors
-# -------------------------
-#
-# These REs are defined in terms of the primitive REs.
-#
-
-Empty = Seq()
-Empty.__doc__ = \
- """
- Empty is an RE which matches the empty string.
- """
-Empty.str = "Empty"
-
-
-def Str1(s):
- """
- Str1(s) is an RE which matches the literal string |s|.
- """
- result = Seq(*tuple(map(Char, s)))
- result.str = "Str(%s)" % repr(s)
- return result
-
-
-def Str(*strs):
- """
- Str(s) is an RE which matches the literal string |s|.
- Str(s1, s2, s3, ...) is an RE which matches any of |s1| or |s2| or |s3|...
- """
- if len(strs) == 1:
- return Str1(strs[0])
- else:
- result = Alt(*tuple(map(Str1, strs)))
- result.str = "Str(%s)" % ','.join(map(repr, strs))
- return result
-
-
-def Any(s):
- """
- Any(s) is an RE which matches any character in the string |s|.
- """
- #result = apply(Alt, tuple(map(Char, s)))
- result = CodeRanges(chars_to_ranges(s))
- result.str = "Any(%s)" % repr(s)
- return result
-
-
-def AnyBut(s):
- """
- AnyBut(s) is an RE which matches any character (including
- newline) which is not in the string |s|.
- """
- ranges = chars_to_ranges(s)
- ranges.insert(0, -maxint)
- ranges.append(maxint)
- result = CodeRanges(ranges)
- result.str = "AnyBut(%s)" % repr(s)
- return result
-
-
-AnyChar = AnyBut("")
-AnyChar.__doc__ = \
- """
- AnyChar is an RE which matches any single character (including a newline).
- """
-AnyChar.str = "AnyChar"
-
+
+ def calc_str(self):
+ if self.nocase:
+ name = "NoCase"
+ else:
+ name = "Case"
+ return "%s(%s)" % (name, self.re)
+
+#
+# Composite RE constructors
+# -------------------------
+#
+# These REs are defined in terms of the primitive REs.
+#
+
+Empty = Seq()
+Empty.__doc__ = \
+ """
+ Empty is an RE which matches the empty string.
+ """
+Empty.str = "Empty"
+
+
+def Str1(s):
+ """
+ Str1(s) is an RE which matches the literal string |s|.
+ """
+ result = Seq(*tuple(map(Char, s)))
+ result.str = "Str(%s)" % repr(s)
+ return result
+
+
+def Str(*strs):
+ """
+ Str(s) is an RE which matches the literal string |s|.
+ Str(s1, s2, s3, ...) is an RE which matches any of |s1| or |s2| or |s3|...
+ """
+ if len(strs) == 1:
+ return Str1(strs[0])
+ else:
+ result = Alt(*tuple(map(Str1, strs)))
+ result.str = "Str(%s)" % ','.join(map(repr, strs))
+ return result
+
+
+def Any(s):
+ """
+ Any(s) is an RE which matches any character in the string |s|.
+ """
+ #result = apply(Alt, tuple(map(Char, s)))
+ result = CodeRanges(chars_to_ranges(s))
+ result.str = "Any(%s)" % repr(s)
+ return result
+
+
+def AnyBut(s):
+ """
+ AnyBut(s) is an RE which matches any character (including
+ newline) which is not in the string |s|.
+ """
+ ranges = chars_to_ranges(s)
+ ranges.insert(0, -maxint)
+ ranges.append(maxint)
+ result = CodeRanges(ranges)
+ result.str = "AnyBut(%s)" % repr(s)
+ return result
+
+
+AnyChar = AnyBut("")
+AnyChar.__doc__ = \
+ """
+ AnyChar is an RE which matches any single character (including a newline).
+ """
+AnyChar.str = "AnyChar"
+
def Range(s1, s2=None):
- """
- Range(c1, c2) is an RE which matches any single character in the range
- |c1| to |c2| inclusive.
- Range(s) where |s| is a string of even length is an RE which matches
- any single character in the ranges |s[0]| to |s[1]|, |s[2]| to |s[3]|,...
- """
- if s2:
- result = CodeRange(ord(s1), ord(s2) + 1)
- result.str = "Range(%s,%s)" % (s1, s2)
- else:
- ranges = []
- for i in range(0, len(s1), 2):
+ """
+ Range(c1, c2) is an RE which matches any single character in the range
+ |c1| to |c2| inclusive.
+ Range(s) where |s| is a string of even length is an RE which matches
+ any single character in the ranges |s[0]| to |s[1]|, |s[2]| to |s[3]|,...
+ """
+ if s2:
+ result = CodeRange(ord(s1), ord(s2) + 1)
+ result.str = "Range(%s,%s)" % (s1, s2)
+ else:
+ ranges = []
+ for i in range(0, len(s1), 2):
ranges.append(CodeRange(ord(s1[i]), ord(s1[i + 1]) + 1))
- result = Alt(*ranges)
- result.str = "Range(%s)" % repr(s1)
- return result
-
-
-def Opt(re):
- """
- Opt(re) is an RE which matches either |re| or the empty string.
- """
- result = Alt(re, Empty)
- result.str = "Opt(%s)" % re
- return result
-
-
-def Rep(re):
- """
- Rep(re) is an RE which matches zero or more repetitions of |re|.
- """
- result = Opt(Rep1(re))
- result.str = "Rep(%s)" % re
- return result
-
-
-def NoCase(re):
- """
- NoCase(re) is an RE which matches the same strings as RE, but treating
- upper and lower case letters as equivalent.
- """
+ result = Alt(*ranges)
+ result.str = "Range(%s)" % repr(s1)
+ return result
+
+
+def Opt(re):
+ """
+ Opt(re) is an RE which matches either |re| or the empty string.
+ """
+ result = Alt(re, Empty)
+ result.str = "Opt(%s)" % re
+ return result
+
+
+def Rep(re):
+ """
+ Rep(re) is an RE which matches zero or more repetitions of |re|.
+ """
+ result = Opt(Rep1(re))
+ result.str = "Rep(%s)" % re
+ return result
+
+
+def NoCase(re):
+ """
+ NoCase(re) is an RE which matches the same strings as RE, but treating
+ upper and lower case letters as equivalent.
+ """
return SwitchCase(re, nocase=1)
-
-
-def Case(re):
- """
- Case(re) is an RE which matches the same strings as RE, but treating
- upper and lower case letters as distinct, i.e. it cancels the effect
- of any enclosing NoCase().
- """
+
+
+def Case(re):
+ """
+ Case(re) is an RE which matches the same strings as RE, but treating
+ upper and lower case letters as distinct, i.e. it cancels the effect
+ of any enclosing NoCase().
+ """
return SwitchCase(re, nocase=0)
-
-#
-# RE Constants
-#
-
-Bol = Char(BOL)
-Bol.__doc__ = \
- """
- Bol is an RE which matches the beginning of a line.
- """
-Bol.str = "Bol"
-
-Eol = Char(EOL)
-Eol.__doc__ = \
- """
- Eol is an RE which matches the end of a line.
- """
-Eol.str = "Eol"
-
-Eof = Char(EOF)
-Eof.__doc__ = \
- """
- Eof is an RE which matches the end of the file.
- """
-Eof.str = "Eof"
-
+
+#
+# RE Constants
+#
+
+Bol = Char(BOL)
+Bol.__doc__ = \
+ """
+ Bol is an RE which matches the beginning of a line.
+ """
+Bol.str = "Bol"
+
+Eol = Char(EOL)
+Eol.__doc__ = \
+ """
+ Eol is an RE which matches the end of a line.
+ """
+Eol.str = "Eol"
+
+Eof = Char(EOF)
+Eof.__doc__ = \
+ """
+ Eof is an RE which matches the end of the file.
+ """
+Eof.str = "Eof"
+
diff --git a/contrib/tools/cython/Cython/Plex/Scanners.pxd b/contrib/tools/cython/Cython/Plex/Scanners.pxd
index e2ac99b552..6e75f55e61 100644
--- a/contrib/tools/cython/Cython/Plex/Scanners.pxd
+++ b/contrib/tools/cython/Cython/Plex/Scanners.pxd
@@ -1,50 +1,50 @@
-from __future__ import absolute_import
-
-import cython
-
-from Cython.Plex.Actions cimport Action
-
-cdef class Scanner:
-
- cdef public lexicon
- cdef public stream
- cdef public name
- cdef public unicode buffer
- cdef public Py_ssize_t buf_start_pos
- cdef public Py_ssize_t next_pos
- cdef public Py_ssize_t cur_pos
- cdef public Py_ssize_t cur_line
- cdef public Py_ssize_t cur_line_start
- cdef public Py_ssize_t start_pos
- cdef public Py_ssize_t start_line
- cdef public Py_ssize_t start_col
- cdef public text
- cdef public initial_state # int?
- cdef public state_name
- cdef public list queue
- cdef public bint trace
- cdef public cur_char
- cdef public long input_state
-
- cdef public level
-
+from __future__ import absolute_import
+
+import cython
+
+from Cython.Plex.Actions cimport Action
+
+cdef class Scanner:
+
+ cdef public lexicon
+ cdef public stream
+ cdef public name
+ cdef public unicode buffer
+ cdef public Py_ssize_t buf_start_pos
+ cdef public Py_ssize_t next_pos
+ cdef public Py_ssize_t cur_pos
+ cdef public Py_ssize_t cur_line
+ cdef public Py_ssize_t cur_line_start
+ cdef public Py_ssize_t start_pos
+ cdef public Py_ssize_t start_line
+ cdef public Py_ssize_t start_col
+ cdef public text
+ cdef public initial_state # int?
+ cdef public state_name
+ cdef public list queue
+ cdef public bint trace
+ cdef public cur_char
+ cdef public long input_state
+
+ cdef public level
+
@cython.final
- @cython.locals(input_state=long)
- cdef next_char(self)
- @cython.locals(action=Action)
+ @cython.locals(input_state=long)
+ cdef next_char(self)
+ @cython.locals(action=Action)
cpdef tuple read(self)
@cython.final
- cdef tuple scan_a_token(self)
+ cdef tuple scan_a_token(self)
##cdef tuple position(self) # used frequently by Parsing.py
-
+
@cython.final
@cython.locals(cur_pos=Py_ssize_t, cur_line=Py_ssize_t, cur_line_start=Py_ssize_t,
input_state=long, next_pos=Py_ssize_t, state=dict,
buf_start_pos=Py_ssize_t, buf_len=Py_ssize_t, buf_index=Py_ssize_t,
trace=bint, discard=Py_ssize_t, data=unicode, buffer=unicode)
- cdef run_machine_inlined(self)
-
+ cdef run_machine_inlined(self)
+
@cython.final
- cdef begin(self, state)
+ cdef begin(self, state)
@cython.final
- cdef produce(self, value, text = *)
+ cdef produce(self, value, text = *)
diff --git a/contrib/tools/cython/Cython/Plex/Scanners.py b/contrib/tools/cython/Cython/Plex/Scanners.py
index ee5fea728e..88f7e2da3b 100644
--- a/contrib/tools/cython/Cython/Plex/Scanners.py
+++ b/contrib/tools/cython/Cython/Plex/Scanners.py
@@ -1,57 +1,57 @@
# cython: auto_pickle=False
-#=======================================================================
-#
-# Python Lexical Analyser
-#
-#
-# Scanning an input stream
-#
-#=======================================================================
-
-from __future__ import absolute_import
-
-import cython
-
-cython.declare(BOL=object, EOL=object, EOF=object, NOT_FOUND=object)
-
-from . import Errors
-from .Regexps import BOL, EOL, EOF
-
-NOT_FOUND = object()
-
-
-class Scanner(object):
+#=======================================================================
+#
+# Python Lexical Analyser
+#
+#
+# Scanning an input stream
+#
+#=======================================================================
+
+from __future__ import absolute_import
+
+import cython
+
+cython.declare(BOL=object, EOL=object, EOF=object, NOT_FOUND=object)
+
+from . import Errors
+from .Regexps import BOL, EOL, EOF
+
+NOT_FOUND = object()
+
+
+class Scanner(object):
"""
A Scanner is used to read tokens from a stream of characters
using the token set specified by a Plex.Lexicon.
-
+
Constructor:
-
+
Scanner(lexicon, stream, name = '')
-
+
See the docstring of the __init__ method for details.
-
+
Methods:
-
+
See the docstrings of the individual methods for more
information.
-
+
read() --> (value, text)
Reads the next lexical token from the stream.
-
+
position() --> (name, line, col)
Returns the position of the last token read using the
read() method.
-
+
begin(state_name)
Causes scanner to change state.
-
+
produce(value [, text])
Causes return of a token value to the caller of the
Scanner.
-
+
"""
-
+
# lexicon = None # Lexicon
# stream = None # file-like object
# name = ''
@@ -69,22 +69,22 @@ class Scanner(object):
# state_name = '' # Name of initial state
# queue = None # list of tokens to be returned
# trace = 0
-
+
def __init__(self, lexicon, stream, name='', initial_pos=None):
"""
Scanner(lexicon, stream, name = '')
-
+
|lexicon| is a Plex.Lexicon instance specifying the lexical tokens
to be recognised.
-
+
|stream| can be a file object or anything which implements a
compatible read() method.
-
+
|name| is optional, and may be the name of the file being
scanned or any other identifying string.
"""
self.trace = 0
-
+
self.buffer = u''
self.buf_start_pos = 0
self.next_pos = 0
@@ -95,7 +95,7 @@ class Scanner(object):
self.start_col = 0
self.text = None
self.state_name = None
-
+
self.lexicon = lexicon
self.stream = stream
self.name = name
@@ -109,7 +109,7 @@ class Scanner(object):
self.input_state = 1
if initial_pos is not None:
self.cur_line, self.cur_line_start = initial_pos[1], -initial_pos[2]
-
+
def read(self):
"""
Read the next lexical token from the stream and return a
@@ -130,7 +130,7 @@ class Scanner(object):
result = queue[0]
del queue[0]
return result
-
+
def scan_a_token(self):
"""
Read the next input sequence recognised by the machine
@@ -156,7 +156,7 @@ class Scanner(object):
if self.cur_char is None or self.cur_char is EOF:
return (u'', None)
raise Errors.UnrecognizedInput(self, self.state_name)
-
+
def run_machine_inlined(self):
"""
Inlined version of run_machine for speed.
@@ -171,7 +171,7 @@ class Scanner(object):
buffer = self.buffer
buf_start_pos = self.buf_start_pos
buf_len = len(buffer)
- b_action, b_cur_pos, b_cur_line, b_cur_line_start, b_cur_char, b_input_state, b_next_pos = \
+ b_action, b_cur_pos, b_cur_line, b_cur_line_start, b_cur_char, b_input_state, b_next_pos = \
None, 0, 0, 0, u'', 0, 0
trace = self.trace
while 1:
@@ -267,7 +267,7 @@ class Scanner(object):
input_state = self.input_state
if self.trace:
print("Scanner: next: %s [%d] %d" % (" " * 20, input_state, self.cur_pos))
- if input_state == 1:
+ if input_state == 1:
self.cur_pos = self.next_pos
c = self.read_char()
if c == u'\n':
@@ -276,24 +276,24 @@ class Scanner(object):
elif not c:
self.cur_char = EOL
self.input_state = 4
- else:
+ else:
self.cur_char = c
- elif input_state == 2:
+ elif input_state == 2:
self.cur_char = u'\n'
self.input_state = 3
- elif input_state == 3:
+ elif input_state == 3:
self.cur_line += 1
self.cur_line_start = self.cur_pos = self.next_pos
self.cur_char = BOL
self.input_state = 1
- elif input_state == 4:
+ elif input_state == 4:
self.cur_char = EOF
self.input_state = 5
else: # input_state = 5
self.cur_char = u''
if self.trace:
print("--> [%d] %d %r" % (input_state, self.cur_pos, self.cur_char))
-
+
def position(self):
"""
Return a tuple (name, line, col) representing the location of
@@ -304,24 +304,24 @@ class Scanner(object):
(0-based).
"""
return (self.name, self.start_line, self.start_col)
-
+
def get_position(self):
"""Python accessible wrapper around position(), only for error reporting.
"""
return self.position()
-
+
def begin(self, state_name):
"""Set the current state of the scanner to the named state."""
self.initial_state = (
self.lexicon.get_initial_state(state_name))
self.state_name = state_name
-
+
def produce(self, value, text=None):
"""
Called from an action procedure, causes |value| to be returned
as the token value from read(). If |text| is supplied, it is
returned in place of the scanned text.
-
+
produce() can be called more than once during a single call to an action
procedure, in which case the tokens are queued up and returned one
at a time by subsequent calls to read(), until the queue is empty,
@@ -330,7 +330,7 @@ class Scanner(object):
if text is None:
text = self.text
self.queue.append((value, text))
-
+
def eof(self):
"""
Override this method if you want something to be done at
diff --git a/contrib/tools/cython/Cython/Plex/Timing.py b/contrib/tools/cython/Cython/Plex/Timing.py
index 48f482cf30..5c3692693b 100644
--- a/contrib/tools/cython/Cython/Plex/Timing.py
+++ b/contrib/tools/cython/Cython/Plex/Timing.py
@@ -1,23 +1,23 @@
-#
-# Get time in platform-dependent way
-#
-
-from __future__ import absolute_import
-
-import os
-from sys import platform, exit, stderr
-
-if platform == 'mac':
- import MacOS
- def time():
- return MacOS.GetTicks() / 60.0
- timekind = "real"
-elif hasattr(os, 'times'):
- def time():
- t = os.times()
- return t[0] + t[1]
- timekind = "cpu"
-else:
- stderr.write(
- "Don't know how to get time on platform %s\n" % repr(platform))
- exit(1)
+#
+# Get time in platform-dependent way
+#
+
+from __future__ import absolute_import
+
+import os
+from sys import platform, exit, stderr
+
+if platform == 'mac':
+ import MacOS
+ def time():
+ return MacOS.GetTicks() / 60.0
+ timekind = "real"
+elif hasattr(os, 'times'):
+ def time():
+ t = os.times()
+ return t[0] + t[1]
+ timekind = "cpu"
+else:
+ stderr.write(
+ "Don't know how to get time on platform %s\n" % repr(platform))
+ exit(1)
diff --git a/contrib/tools/cython/Cython/Plex/Traditional.py b/contrib/tools/cython/Cython/Plex/Traditional.py
index 5c06cc23b4..ec7252daed 100644
--- a/contrib/tools/cython/Cython/Plex/Traditional.py
+++ b/contrib/tools/cython/Cython/Plex/Traditional.py
@@ -1,42 +1,42 @@
-#=======================================================================
-#
-# Python Lexical Analyser
-#
-# Traditional Regular Expression Syntax
-#
-#=======================================================================
-
-from __future__ import absolute_import
-
-from .Regexps import Alt, Seq, Rep, Rep1, Opt, Any, AnyBut, Bol, Eol, Char
-from .Errors import PlexError
-
-
-class RegexpSyntaxError(PlexError):
+#=======================================================================
+#
+# Python Lexical Analyser
+#
+# Traditional Regular Expression Syntax
+#
+#=======================================================================
+
+from __future__ import absolute_import
+
+from .Regexps import Alt, Seq, Rep, Rep1, Opt, Any, AnyBut, Bol, Eol, Char
+from .Errors import PlexError
+
+
+class RegexpSyntaxError(PlexError):
pass
-
-
-def re(s):
+
+
+def re(s):
"""
Convert traditional string representation of regular expression |s|
into Plex representation.
"""
return REParser(s).parse_re()
-
-
-class REParser(object):
+
+
+class REParser(object):
def __init__(self, s):
self.s = s
self.i = -1
self.end = 0
self.next()
-
+
def parse_re(self):
re = self.parse_alt()
if not self.end:
self.error("Unexpected %s" % repr(self.c))
return re
-
+
def parse_alt(self):
"""Parse a set of alternative regexps."""
re = self.parse_seq()
@@ -47,14 +47,14 @@ class REParser(object):
re_list.append(self.parse_seq())
re = Alt(*re_list)
return re
-
+
def parse_seq(self):
"""Parse a sequence of regexps."""
re_list = []
while not self.end and not self.c in "|)":
re_list.append(self.parse_mod())
return Seq(*re_list)
-
+
def parse_mod(self):
"""Parse a primitive regexp followed by *, +, ? modifiers."""
re = self.parse_prim()
@@ -67,10 +67,10 @@ class REParser(object):
re = Opt(re)
self.next()
return re
-
+
def parse_prim(self):
"""Parse a primitive regexp."""
- c = self.get()
+ c = self.get()
if c == '.':
re = AnyBut("\n")
elif c == '^':
@@ -88,7 +88,7 @@ class REParser(object):
c = self.get()
re = Char(c)
return re
-
+
def parse_charset(self):
"""Parse a charset. Does not include the surrounding []."""
char_list = []
@@ -113,7 +113,7 @@ class REParser(object):
return AnyBut(chars)
else:
return Any(chars)
-
+
def next(self):
"""Advance to the next char."""
s = self.s
@@ -123,14 +123,14 @@ class REParser(object):
else:
self.c = ''
self.end = 1
-
+
def get(self):
if self.end:
self.error("Premature end of string")
c = self.c
self.next()
return c
-
+
def lookahead(self, n):
"""Look ahead n chars."""
j = self.i + n
@@ -138,7 +138,7 @@ class REParser(object):
return self.s[j]
else:
return ''
-
+
def expect(self, c):
"""
Expect to find character |c| at current position.
@@ -148,11 +148,11 @@ class REParser(object):
self.next()
else:
self.error("Missing %s" % repr(c))
-
+
def error(self, mess):
"""Raise exception to signal syntax error in regexp."""
raise RegexpSyntaxError("Syntax error in regexp %s at position %d: %s" % (
repr(self.s), self.i, mess))
-
-
-
+
+
+
diff --git a/contrib/tools/cython/Cython/Plex/Transitions.py b/contrib/tools/cython/Cython/Plex/Transitions.py
index b7c920f467..3833817946 100644
--- a/contrib/tools/cython/Cython/Plex/Transitions.py
+++ b/contrib/tools/cython/Cython/Plex/Transitions.py
@@ -1,48 +1,48 @@
-#
+#
# Plex - Transition Maps
-#
+#
# This version represents state sets directly as dicts for speed.
-#
-
-from __future__ import absolute_import
-
+#
+
+from __future__ import absolute_import
+
try:
from sys import maxsize as maxint
except ImportError:
from sys import maxint
-
-
-class TransitionMap(object):
+
+
+class TransitionMap(object):
"""
A TransitionMap maps an input event to a set of states.
An input event is one of: a range of character codes,
the empty string (representing an epsilon move), or one
of the special symbols BOL, EOL, EOF.
-
+
For characters, this implementation compactly represents
the map by means of a list:
-
+
[code_0, states_0, code_1, states_1, code_2, states_2,
..., code_n-1, states_n-1, code_n]
-
+
where |code_i| is a character code, and |states_i| is a
set of states corresponding to characters with codes |c|
in the range |code_i| <= |c| <= |code_i+1|.
-
+
The following invariants hold:
n >= 1
code_0 == -maxint
code_n == maxint
code_i < code_i+1 for i in 0..n-1
states_0 == states_n-1
-
+
Mappings for the special events '', BOL, EOL, EOF are
kept separately in a dictionary.
"""
-
+
map = None # The list of codes and states
special = None # Mapping for special events
-
+
def __init__(self, map=None, special=None):
if not map:
map = [-maxint, {}, maxint]
@@ -51,7 +51,7 @@ class TransitionMap(object):
self.map = map
self.special = special
#self.check() ###
-
+
def add(self, event, new_state,
TupleType=tuple):
"""
@@ -67,7 +67,7 @@ class TransitionMap(object):
i += 2
else:
self.get_special(event)[new_state] = 1
-
+
def add_set(self, event, new_set,
TupleType=tuple):
"""
@@ -83,14 +83,14 @@ class TransitionMap(object):
i += 2
else:
self.get_special(event).update(new_set)
-
+
def get_epsilon(self,
none=None):
"""
Return the mapping for epsilon, or None.
"""
return self.special.get('', none)
-
+
def iteritems(self,
len=len):
"""
@@ -114,11 +114,11 @@ class TransitionMap(object):
if set:
result.append((event, set))
return iter(result)
-
+
items = iteritems
-
+
# ------------------- Private methods --------------------
-
+
def split(self, code,
len=len, maxint=maxint):
"""
@@ -149,7 +149,7 @@ class TransitionMap(object):
map[hi:hi] = [code, map[hi - 1].copy()]
#self.check() ###
return hi
-
+
def get_special(self, event):
"""
Get state set for special event, adding a new entry if necessary.
@@ -160,9 +160,9 @@ class TransitionMap(object):
set = {}
special[event] = set
return set
-
+
# --------------------- Conversion methods -----------------------
-
+
def __str__(self):
map_strs = []
map = self.map
@@ -188,15 +188,15 @@ class TransitionMap(object):
','.join(map_strs),
special_strs
)
-
+
# --------------------- Debugging methods -----------------------
-
+
def check(self):
"""Check data structure integrity."""
if not self.map[-3] < self.map[-1]:
print(self)
assert 0
-
+
def dump(self, file):
map = self.map
i = 0
@@ -229,23 +229,23 @@ class TransitionMap(object):
def dump_char(self, code):
if 0 <= code <= 255:
return repr(chr(code))
- else:
+ else:
return "chr(%d)" % code
-
+
def dump_trans(self, key, set, file):
file.write(" %s --> %s\n" % (key, self.dump_set(set)))
-
+
def dump_set(self, set):
return state_set_str(set)
-
-
-#
-# State set manipulation functions
-#
-
-#def merge_state_sets(set1, set2):
-# for state in set2.keys():
-# set1[state] = 1
-
-def state_set_str(set):
+
+
+#
+# State set manipulation functions
+#
+
+#def merge_state_sets(set1, set2):
+# for state in set2.keys():
+# set1[state] = 1
+
+def state_set_str(set):
return "[%s]" % ','.join(["S%d" % state.number for state in set])
diff --git a/contrib/tools/cython/Cython/Plex/__init__.py b/contrib/tools/cython/Cython/Plex/__init__.py
index d968a43ce9..81a066f782 100644
--- a/contrib/tools/cython/Cython/Plex/__init__.py
+++ b/contrib/tools/cython/Cython/Plex/__init__.py
@@ -1,39 +1,39 @@
-#=======================================================================
-#
-# Python Lexical Analyser
-#
-#=======================================================================
-
-"""
-The Plex module provides lexical analysers with similar capabilities
-to GNU Flex. The following classes and functions are exported;
-see the attached docstrings for more information.
-
- Scanner For scanning a character stream under the
- direction of a Lexicon.
-
- Lexicon For constructing a lexical definition
- to be used by a Scanner.
-
- Str, Any, AnyBut, AnyChar, Seq, Alt, Opt, Rep, Rep1,
- Bol, Eol, Eof, Empty
-
- Regular expression constructors, for building pattern
- definitions for a Lexicon.
-
- State For defining scanner states when creating a
- Lexicon.
-
- TEXT, IGNORE, Begin
-
- Actions for associating with patterns when
- creating a Lexicon.
-"""
-
-from __future__ import absolute_import
-
-from .Actions import TEXT, IGNORE, Begin
-from .Lexicons import Lexicon, State
-from .Regexps import RE, Seq, Alt, Rep1, Empty, Str, Any, AnyBut, AnyChar, Range
-from .Regexps import Opt, Rep, Bol, Eol, Eof, Case, NoCase
-from .Scanners import Scanner
+#=======================================================================
+#
+# Python Lexical Analyser
+#
+#=======================================================================
+
+"""
+The Plex module provides lexical analysers with similar capabilities
+to GNU Flex. The following classes and functions are exported;
+see the attached docstrings for more information.
+
+ Scanner For scanning a character stream under the
+ direction of a Lexicon.
+
+ Lexicon For constructing a lexical definition
+ to be used by a Scanner.
+
+ Str, Any, AnyBut, AnyChar, Seq, Alt, Opt, Rep, Rep1,
+ Bol, Eol, Eof, Empty
+
+ Regular expression constructors, for building pattern
+ definitions for a Lexicon.
+
+ State For defining scanner states when creating a
+ Lexicon.
+
+ TEXT, IGNORE, Begin
+
+ Actions for associating with patterns when
+ creating a Lexicon.
+"""
+
+from __future__ import absolute_import
+
+from .Actions import TEXT, IGNORE, Begin
+from .Lexicons import Lexicon, State
+from .Regexps import RE, Seq, Alt, Rep1, Empty, Str, Any, AnyBut, AnyChar, Range
+from .Regexps import Opt, Rep, Bol, Eol, Eof, Case, NoCase
+from .Scanners import Scanner