Restoring authorship annotation for <orivej@yandex-team.ru>. Commit 1 of 2.

author: orivej <orivej@yandex-team.ru> 2022-02-10 16:44:49 +0300
committer: Daniil Cherednik <dcherednik@yandex-team.ru> 2022-02-10 16:44:49 +0300
commit: 718c552901d703c502ccbefdfc3c9028d608b947 (patch)
tree: 46534a98bbefcd7b1f3faa5b52c138ab27db75b7 /contrib/tools/cython/Cython/Plex/Lexicons.py
parent: e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (diff)
download: ydb-718c552901d703c502ccbefdfc3c9028d608b947.tar.gz
1 files changed, 164 insertions, 164 deletions
diff --git a/contrib/tools/cython/Cython/Plex/Lexicons.py b/contrib/tools/cython/Cython/Plex/Lexicons.py
index 787f5854b8..eaacbc9c54 100644
--- a/contrib/tools/cython/Cython/Plex/Lexicons.py
+++ b/contrib/tools/cython/Cython/Plex/Lexicons.py
@@ -22,179 +22,179 @@ DUMP_DFA = 2
 
 
 class State(object):
-    """
-    This class is used as part of a Plex.Lexicon specification to
-    introduce a user-defined state.
+    """ 
+    This class is used as part of a Plex.Lexicon specification to 
+    introduce a user-defined state. 
 
-    Constructor:
+    Constructor: 
 
-       State(name, token_specifications)
-    """
+       State(name, token_specifications) 
+    """ 
 
-    name = None
-    tokens = None
-
-    def __init__(self, name, tokens):
-        self.name = name
-        self.tokens = tokens
+    name = None 
+    tokens = None 
 
+    def __init__(self, name, tokens): 
+        self.name = name 
+        self.tokens = tokens 
 
+ 
 class Lexicon(object):
-    """
-    Lexicon(specification) builds a lexical analyser from the given
-    |specification|. The specification consists of a list of
-    specification items. Each specification item may be either:
+    """ 
+    Lexicon(specification) builds a lexical analyser from the given 
+    |specification|. The specification consists of a list of 
+    specification items. Each specification item may be either: 
 
-       1) A token definition, which is a tuple:
+       1) A token definition, which is a tuple: 
 
-             (pattern, action)
-
-          The |pattern| is a regular axpression built using the
-          constructors defined in the Plex module.
-
-          The |action| is the action to be performed when this pattern
-          is recognised (see below).
-
-       2) A state definition:
-
-             State(name, tokens)
-
-          where |name| is a character string naming the state,
-          and |tokens| is a list of token definitions as
-          above. The meaning and usage of states is described
-          below.
-
-    Actions
-    -------
-
-    The |action| in a token specication may be one of three things:
-
-       1) A function, which is called as follows:
-
-             function(scanner, text)
-
-          where |scanner| is the relevant Scanner instance, and |text|
-          is the matched text. If the function returns anything
-          other than None, that value is returned as the value of the
-          token. If it returns None, scanning continues as if the IGNORE
-          action were specified (see below).
-
-        2) One of the following special actions:
-
-           IGNORE means that the recognised characters will be treated as
-                  white space and ignored. Scanning will continue until
-                  the next non-ignored token is recognised before returning.
-
-           TEXT   causes the scanned text itself to be returned as the
-                  value of the token.
-
-        3) Any other value, which is returned as the value of the token.
-
-    States
-    ------
-
-    At any given time, the scanner is in one of a number of states.
-    Associated with each state is a set of possible tokens. When scanning,
-    only tokens associated with the current state are recognised.
-
-    There is a default state, whose name is the empty string. Token
-    definitions which are not inside any State definition belong to
-    the default state.
-
-    The initial state of the scanner is the default state. The state can
-    be changed in one of two ways:
-
-       1) Using Begin(state_name) as the action of a token.
-
-       2) Calling the begin(state_name) method of the Scanner.
-
-    To change back to the default state, use '' as the state name.
-    """
-
-    machine = None  # Machine
-    tables = None   # StateTableMachine
-
-    def __init__(self, specifications, debug=None, debug_flags=7, timings=None):
-        if not isinstance(specifications, list):
-            raise Errors.InvalidScanner("Scanner definition is not a list")
-        if timings:
-            from .Timing import time
-
-            total_time = 0.0
-            time1 = time()
-        nfa = Machines.Machine()
-        default_initial_state = nfa.new_initial_state('')
-        token_number = 1
-        for spec in specifications:
-            if isinstance(spec, State):
-                user_initial_state = nfa.new_initial_state(spec.name)
-                for token in spec.tokens:
-                    self.add_token_to_machine(
-                        nfa, user_initial_state, token, token_number)
-                    token_number += 1
-            elif isinstance(spec, tuple):
-                self.add_token_to_machine(
-                    nfa, default_initial_state, spec, token_number)
-                token_number += 1
-            else:
-                raise Errors.InvalidToken(
-                    token_number,
-                    "Expected a token definition (tuple) or State instance")
-        if timings:
-            time2 = time()
-            total_time = total_time + (time2 - time1)
-            time3 = time()
-        if debug and (debug_flags & 1):
-            debug.write("\n============= NFA ===========\n")
-            nfa.dump(debug)
-        dfa = DFA.nfa_to_dfa(nfa, debug=(debug_flags & 3) == 3 and debug)
-        if timings:
-            time4 = time()
-            total_time = total_time + (time4 - time3)
-        if debug and (debug_flags & 2):
-            debug.write("\n============= DFA ===========\n")
-            dfa.dump(debug)
-        if timings:
-            timings.write("Constructing NFA : %5.2f\n" % (time2 - time1))
-            timings.write("Converting to DFA: %5.2f\n" % (time4 - time3))
-            timings.write("TOTAL            : %5.2f\n" % total_time)
-        self.machine = dfa
-
-    def add_token_to_machine(self, machine, initial_state, token_spec, token_number):
+             (pattern, action) 
+
+          The |pattern| is a regular axpression built using the 
+          constructors defined in the Plex module. 
+
+          The |action| is the action to be performed when this pattern 
+          is recognised (see below). 
+
+       2) A state definition: 
+
+             State(name, tokens) 
+
+          where |name| is a character string naming the state, 
+          and |tokens| is a list of token definitions as 
+          above. The meaning and usage of states is described 
+          below. 
+
+    Actions 
+    ------- 
+
+    The |action| in a token specication may be one of three things: 
+
+       1) A function, which is called as follows: 
+
+             function(scanner, text) 
+
+          where |scanner| is the relevant Scanner instance, and |text| 
+          is the matched text. If the function returns anything 
+          other than None, that value is returned as the value of the 
+          token. If it returns None, scanning continues as if the IGNORE 
+          action were specified (see below). 
+
+        2) One of the following special actions: 
+
+           IGNORE means that the recognised characters will be treated as 
+                  white space and ignored. Scanning will continue until 
+                  the next non-ignored token is recognised before returning. 
+
+           TEXT   causes the scanned text itself to be returned as the 
+                  value of the token. 
+
+        3) Any other value, which is returned as the value of the token. 
+
+    States 
+    ------ 
+
+    At any given time, the scanner is in one of a number of states. 
+    Associated with each state is a set of possible tokens. When scanning, 
+    only tokens associated with the current state are recognised. 
+
+    There is a default state, whose name is the empty string. Token 
+    definitions which are not inside any State definition belong to 
+    the default state. 
+
+    The initial state of the scanner is the default state. The state can 
+    be changed in one of two ways: 
+
+       1) Using Begin(state_name) as the action of a token. 
+
+       2) Calling the begin(state_name) method of the Scanner. 
+
+    To change back to the default state, use '' as the state name. 
+    """ 
+
+    machine = None  # Machine 
+    tables = None   # StateTableMachine 
+
+    def __init__(self, specifications, debug=None, debug_flags=7, timings=None): 
+        if not isinstance(specifications, list): 
+            raise Errors.InvalidScanner("Scanner definition is not a list") 
+        if timings: 
+            from .Timing import time 
+
+            total_time = 0.0 
+            time1 = time() 
+        nfa = Machines.Machine() 
+        default_initial_state = nfa.new_initial_state('') 
+        token_number = 1 
+        for spec in specifications: 
+            if isinstance(spec, State): 
+                user_initial_state = nfa.new_initial_state(spec.name) 
+                for token in spec.tokens: 
+                    self.add_token_to_machine( 
+                        nfa, user_initial_state, token, token_number) 
+                    token_number += 1 
+            elif isinstance(spec, tuple): 
+                self.add_token_to_machine( 
+                    nfa, default_initial_state, spec, token_number) 
+                token_number += 1 
+            else: 
+                raise Errors.InvalidToken( 
+                    token_number, 
+                    "Expected a token definition (tuple) or State instance") 
+        if timings: 
+            time2 = time() 
+            total_time = total_time + (time2 - time1) 
+            time3 = time() 
+        if debug and (debug_flags & 1): 
+            debug.write("\n============= NFA ===========\n") 
+            nfa.dump(debug) 
+        dfa = DFA.nfa_to_dfa(nfa, debug=(debug_flags & 3) == 3 and debug) 
+        if timings: 
+            time4 = time() 
+            total_time = total_time + (time4 - time3) 
+        if debug and (debug_flags & 2): 
+            debug.write("\n============= DFA ===========\n") 
+            dfa.dump(debug) 
+        if timings: 
+            timings.write("Constructing NFA : %5.2f\n" % (time2 - time1)) 
+            timings.write("Converting to DFA: %5.2f\n" % (time4 - time3)) 
+            timings.write("TOTAL            : %5.2f\n" % total_time) 
+        self.machine = dfa 
+ 
+    def add_token_to_machine(self, machine, initial_state, token_spec, token_number): 
         try:
-            (re, action_spec) = self.parse_token_definition(token_spec)
-            # Disabled this -- matching empty strings can be useful
-            #if re.nullable:
-            #  raise Errors.InvalidToken(
-            #    token_number, "Pattern can match 0 input symbols")
-            if isinstance(action_spec, Actions.Action):
-                action = action_spec
-            else:
-                try:
-                    action_spec.__call__
-                except AttributeError:
-                    action = Actions.Return(action_spec)
-                else:
-                    action = Actions.Call(action_spec)
-            final_state = machine.new_state()
-            re.build_machine(machine, initial_state, final_state,
-                             match_bol=1, nocase=0)
-            final_state.set_action(action, priority=-token_number)
-        except Errors.PlexError as e:
-            raise e.__class__("Token number %d: %s" % (token_number, e))
-
-    def parse_token_definition(self, token_spec):
-        if not isinstance(token_spec, tuple):
-            raise Errors.InvalidToken("Token definition is not a tuple")
-        if len(token_spec) != 2:
-            raise Errors.InvalidToken("Wrong number of items in token definition")
-        pattern, action = token_spec
-        if not isinstance(pattern, Regexps.RE):
-            raise Errors.InvalidToken("Pattern is not an RE instance")
-        return (pattern, action)
-
-    def get_initial_state(self, name):
-        return self.machine.get_initial_state(name)
+            (re, action_spec) = self.parse_token_definition(token_spec) 
+            # Disabled this -- matching empty strings can be useful 
+            #if re.nullable: 
+            #  raise Errors.InvalidToken( 
+            #    token_number, "Pattern can match 0 input symbols") 
+            if isinstance(action_spec, Actions.Action): 
+                action = action_spec 
+            else: 
+                try: 
+                    action_spec.__call__ 
+                except AttributeError: 
+                    action = Actions.Return(action_spec) 
+                else: 
+                    action = Actions.Call(action_spec) 
+            final_state = machine.new_state() 
+            re.build_machine(machine, initial_state, final_state, 
+                             match_bol=1, nocase=0) 
+            final_state.set_action(action, priority=-token_number) 
+        except Errors.PlexError as e: 
+            raise e.__class__("Token number %d: %s" % (token_number, e)) 
+
+    def parse_token_definition(self, token_spec): 
+        if not isinstance(token_spec, tuple): 
+            raise Errors.InvalidToken("Token definition is not a tuple") 
+        if len(token_spec) != 2: 
+            raise Errors.InvalidToken("Wrong number of items in token definition") 
+        pattern, action = token_spec 
+        if not isinstance(pattern, Regexps.RE): 
+            raise Errors.InvalidToken("Pattern is not an RE instance") 
+        return (pattern, action) 
+
+    def get_initial_state(self, name): 
+        return self.machine.get_initial_state(name)
author	orivej <orivej@yandex-team.ru>	2022-02-10 16:44:49 +0300
committer	Daniil Cherednik <dcherednik@yandex-team.ru>	2022-02-10 16:44:49 +0300
commit	718c552901d703c502ccbefdfc3c9028d608b947 (patch)
tree	46534a98bbefcd7b1f3faa5b52c138ab27db75b7 /contrib/tools/cython/Cython/Plex/Lexicons.py
parent	e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (diff)
download	ydb-718c552901d703c502ccbefdfc3c9028d608b947.tar.gz