aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/tools/cython/Cython/Plex/Lexicons.py
diff options
context:
space:
mode:
authorAnton Samokhvalov <pg83@yandex.ru>2022-02-10 16:45:15 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:45:15 +0300
commit72cb13b4aff9bc9cf22e49251bc8fd143f82538f (patch)
treeda2c34829458c7d4e74bdfbdf85dff449e9e7fb8 /contrib/tools/cython/Cython/Plex/Lexicons.py
parent778e51ba091dc39e7b7fcab2b9cf4dbedfb6f2b5 (diff)
downloadydb-72cb13b4aff9bc9cf22e49251bc8fd143f82538f.tar.gz
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/tools/cython/Cython/Plex/Lexicons.py')
-rw-r--r--contrib/tools/cython/Cython/Plex/Lexicons.py124
1 files changed, 62 insertions, 62 deletions
diff --git a/contrib/tools/cython/Cython/Plex/Lexicons.py b/contrib/tools/cython/Cython/Plex/Lexicons.py
index 787f5854b8..e163caef41 100644
--- a/contrib/tools/cython/Cython/Plex/Lexicons.py
+++ b/contrib/tools/cython/Cython/Plex/Lexicons.py
@@ -1,125 +1,125 @@
-#=======================================================================
-#
-# Python Lexical Analyser
-#
-# Lexical Analyser Specification
-#
-#=======================================================================
-
-from __future__ import absolute_import
-
-import types
-
-from . import Actions
-from . import DFA
-from . import Errors
-from . import Machines
-from . import Regexps
-
-# debug_flags for Lexicon constructor
-DUMP_NFA = 1
-DUMP_DFA = 2
-
-
-class State(object):
+#=======================================================================
+#
+# Python Lexical Analyser
+#
+# Lexical Analyser Specification
+#
+#=======================================================================
+
+from __future__ import absolute_import
+
+import types
+
+from . import Actions
+from . import DFA
+from . import Errors
+from . import Machines
+from . import Regexps
+
+# debug_flags for Lexicon constructor
+DUMP_NFA = 1
+DUMP_DFA = 2
+
+
+class State(object):
"""
This class is used as part of a Plex.Lexicon specification to
introduce a user-defined state.
-
+
Constructor:
-
+
State(name, token_specifications)
"""
-
+
name = None
tokens = None
-
+
def __init__(self, name, tokens):
self.name = name
self.tokens = tokens
+
-
-class Lexicon(object):
+class Lexicon(object):
"""
Lexicon(specification) builds a lexical analyser from the given
|specification|. The specification consists of a list of
specification items. Each specification item may be either:
-
+
1) A token definition, which is a tuple:
-
+
(pattern, action)
-
+
The |pattern| is a regular axpression built using the
constructors defined in the Plex module.
-
+
The |action| is the action to be performed when this pattern
is recognised (see below).
-
+
2) A state definition:
-
+
State(name, tokens)
-
+
where |name| is a character string naming the state,
and |tokens| is a list of token definitions as
above. The meaning and usage of states is described
below.
-
+
Actions
-------
-
+
The |action| in a token specication may be one of three things:
-
+
1) A function, which is called as follows:
-
+
function(scanner, text)
-
+
where |scanner| is the relevant Scanner instance, and |text|
is the matched text. If the function returns anything
other than None, that value is returned as the value of the
token. If it returns None, scanning continues as if the IGNORE
action were specified (see below).
-
+
2) One of the following special actions:
-
+
IGNORE means that the recognised characters will be treated as
white space and ignored. Scanning will continue until
the next non-ignored token is recognised before returning.
-
+
TEXT causes the scanned text itself to be returned as the
value of the token.
-
+
3) Any other value, which is returned as the value of the token.
-
+
States
------
-
+
At any given time, the scanner is in one of a number of states.
Associated with each state is a set of possible tokens. When scanning,
only tokens associated with the current state are recognised.
-
+
There is a default state, whose name is the empty string. Token
definitions which are not inside any State definition belong to
the default state.
-
+
The initial state of the scanner is the default state. The state can
be changed in one of two ways:
-
+
1) Using Begin(state_name) as the action of a token.
-
+
2) Calling the begin(state_name) method of the Scanner.
-
+
To change back to the default state, use '' as the state name.
"""
-
+
machine = None # Machine
tables = None # StateTableMachine
-
+
def __init__(self, specifications, debug=None, debug_flags=7, timings=None):
if not isinstance(specifications, list):
raise Errors.InvalidScanner("Scanner definition is not a list")
if timings:
from .Timing import time
-
+
total_time = 0.0
time1 = time()
nfa = Machines.Machine()
@@ -161,7 +161,7 @@ class Lexicon(object):
self.machine = dfa
def add_token_to_machine(self, machine, initial_state, token_spec, token_number):
- try:
+ try:
(re, action_spec) = self.parse_token_definition(token_spec)
# Disabled this -- matching empty strings can be useful
#if re.nullable:
@@ -182,7 +182,7 @@ class Lexicon(object):
final_state.set_action(action, priority=-token_number)
except Errors.PlexError as e:
raise e.__class__("Token number %d: %s" % (token_number, e))
-
+
def parse_token_definition(self, token_spec):
if not isinstance(token_spec, tuple):
raise Errors.InvalidToken("Token definition is not a tuple")
@@ -192,9 +192,9 @@ class Lexicon(object):
if not isinstance(pattern, Regexps.RE):
raise Errors.InvalidToken("Pattern is not an RE instance")
return (pattern, action)
-
+
def get_initial_state(self, name):
return self.machine.get_initial_state(name)
-
-
-
+
+
+