Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 2 of 2.

author: Anton Samokhvalov <pg83@yandex.ru> 2022-02-10 16:45:17 +0300
committer: Daniil Cherednik <dcherednik@yandex-team.ru> 2022-02-10 16:45:17 +0300
commit: d3a398281c6fd1d3672036cb2d63f842d2cb28c5 (patch)
tree: dd4bd3ca0f36b817e96812825ffaf10d645803f2 /contrib/tools/cython/Cython/Plex/Regexps.py
parent: 72cb13b4aff9bc9cf22e49251bc8fd143f82538f (diff)
download: ydb-d3a398281c6fd1d3672036cb2d63f842d2cb28c5.tar.gz
1 files changed, 542 insertions, 542 deletions
diff --git a/contrib/tools/cython/Cython/Plex/Regexps.py b/contrib/tools/cython/Cython/Plex/Regexps.py
index 43e5fa3de9..41816c939a 100644
--- a/contrib/tools/cython/Cython/Plex/Regexps.py
+++ b/contrib/tools/cython/Cython/Plex/Regexps.py
@@ -1,576 +1,576 @@
-#======================================================================= 
-# 
-#     Python Lexical Analyser 
-# 
-#     Regular Expressions 
-# 
-#======================================================================= 
- 
-from __future__ import absolute_import 
- 
-import types 
+#=======================================================================
+#
+#     Python Lexical Analyser
+#
+#     Regular Expressions
+#
+#=======================================================================
+
+from __future__ import absolute_import
+
+import types
 try:
     from sys import maxsize as maxint
 except ImportError:
     from sys import maxint
- 
-from . import Errors 
- 
-# 
-#     Constants 
-# 
- 
-BOL = 'bol' 
-EOL = 'eol' 
-EOF = 'eof' 
- 
-nl_code = ord('\n') 
- 
- 
-# 
-#     Helper functions 
-# 
- 
-def chars_to_ranges(s): 
-    """ 
-    Return a list of character codes consisting of pairs 
-    [code1a, code1b, code2a, code2b,...] which cover all 
-    the characters in |s|. 
-    """ 
-    char_list = list(s) 
-    char_list.sort() 
-    i = 0 
-    n = len(char_list) 
-    result = [] 
-    while i < n: 
-        code1 = ord(char_list[i]) 
-        code2 = code1 + 1 
+
+from . import Errors
+
+#
+#     Constants
+#
+
+BOL = 'bol'
+EOL = 'eol'
+EOF = 'eof'
+
+nl_code = ord('\n')
+
+
+#
+#     Helper functions
+#
+
+def chars_to_ranges(s):
+    """
+    Return a list of character codes consisting of pairs
+    [code1a, code1b, code2a, code2b,...] which cover all
+    the characters in |s|.
+    """
+    char_list = list(s)
+    char_list.sort()
+    i = 0
+    n = len(char_list)
+    result = []
+    while i < n:
+        code1 = ord(char_list[i])
+        code2 = code1 + 1
         i += 1
-        while i < n and code2 >= ord(char_list[i]): 
+        while i < n and code2 >= ord(char_list[i]):
             code2 += 1
             i += 1
-        result.append(code1) 
-        result.append(code2) 
-    return result 
- 
-
-def uppercase_range(code1, code2): 
-    """ 
-    If the range of characters from code1 to code2-1 includes any 
-    lower case letters, return the corresponding upper case range. 
-    """ 
-    code3 = max(code1, ord('a')) 
-    code4 = min(code2, ord('z') + 1) 
-    if code3 < code4: 
-        d = ord('A') - ord('a') 
-        return (code3 + d, code4 + d) 
-    else: 
-        return None 
- 
-
-def lowercase_range(code1, code2): 
-    """ 
-    If the range of characters from code1 to code2-1 includes any 
-    upper case letters, return the corresponding lower case range. 
-    """ 
-    code3 = max(code1, ord('A')) 
-    code4 = min(code2, ord('Z') + 1) 
-    if code3 < code4: 
-        d = ord('a') - ord('A') 
-        return (code3 + d, code4 + d) 
-    else: 
-        return None 
- 
-
-def CodeRanges(code_list): 
-    """ 
-    Given a list of codes as returned by chars_to_ranges, return 
-    an RE which will match a character in any of the ranges. 
-    """ 
+        result.append(code1)
+        result.append(code2)
+    return result
+
+
+def uppercase_range(code1, code2):
+    """
+    If the range of characters from code1 to code2-1 includes any
+    lower case letters, return the corresponding upper case range.
+    """
+    code3 = max(code1, ord('a'))
+    code4 = min(code2, ord('z') + 1)
+    if code3 < code4:
+        d = ord('A') - ord('a')
+        return (code3 + d, code4 + d)
+    else:
+        return None
+
+
+def lowercase_range(code1, code2):
+    """
+    If the range of characters from code1 to code2-1 includes any
+    upper case letters, return the corresponding lower case range.
+    """
+    code3 = max(code1, ord('A'))
+    code4 = min(code2, ord('Z') + 1)
+    if code3 < code4:
+        d = ord('a') - ord('A')
+        return (code3 + d, code4 + d)
+    else:
+        return None
+
+
+def CodeRanges(code_list):
+    """
+    Given a list of codes as returned by chars_to_ranges, return
+    an RE which will match a character in any of the ranges.
+    """
     re_list = [CodeRange(code_list[i], code_list[i + 1]) for i in range(0, len(code_list), 2)]
-    return Alt(*re_list) 
- 
-
-def CodeRange(code1, code2): 
-    """ 
-    CodeRange(code1, code2) is an RE which matches any character 
-    with a code |c| in the range |code1| <= |c| < |code2|. 
-    """ 
-    if code1 <= nl_code < code2: 
-        return Alt(RawCodeRange(code1, nl_code), 
+    return Alt(*re_list)
+
+
+def CodeRange(code1, code2):
+    """
+    CodeRange(code1, code2) is an RE which matches any character
+    with a code |c| in the range |code1| <= |c| < |code2|.
+    """
+    if code1 <= nl_code < code2:
+        return Alt(RawCodeRange(code1, nl_code),
                    RawNewline,
                    RawCodeRange(nl_code + 1, code2))
-    else: 
-        return RawCodeRange(code1, code2) 
- 
-
-# 
-#     Abstract classes 
-# 
- 
-class RE(object): 
-    """RE is the base class for regular expression constructors. 
-    The following operators are defined on REs: 
- 
-         re1 + re2         is an RE which matches |re1| followed by |re2| 
-         re1 | re2         is an RE which matches either |re1| or |re2| 
-    """ 
- 
+    else:
+        return RawCodeRange(code1, code2)
+
+
+#
+#     Abstract classes
+#
+
+class RE(object):
+    """RE is the base class for regular expression constructors.
+    The following operators are defined on REs:
+
+         re1 + re2         is an RE which matches |re1| followed by |re2|
+         re1 | re2         is an RE which matches either |re1| or |re2|
+    """
+
     nullable = 1  # True if this RE can match 0 input symbols
     match_nl = 1  # True if this RE can match a string ending with '\n'
     str = None    # Set to a string to override the class's __str__ result
- 
-    def build_machine(self, machine, initial_state, final_state, 
+
+    def build_machine(self, machine, initial_state, final_state,
                       match_bol, nocase):
-        """ 
-        This method should add states to |machine| to implement this 
-        RE, starting at |initial_state| and ending at |final_state|. 
-        If |match_bol| is true, the RE must be able to match at the 
-        beginning of a line. If nocase is true, upper and lower case 
-        letters should be treated as equivalent. 
-        """ 
-        raise NotImplementedError("%s.build_machine not implemented" % 
+        """
+        This method should add states to |machine| to implement this
+        RE, starting at |initial_state| and ending at |final_state|.
+        If |match_bol| is true, the RE must be able to match at the
+        beginning of a line. If nocase is true, upper and lower case
+        letters should be treated as equivalent.
+        """
+        raise NotImplementedError("%s.build_machine not implemented" %
                                   self.__class__.__name__)
- 
-    def build_opt(self, m, initial_state, c): 
-        """ 
-        Given a state |s| of machine |m|, return a new state 
-        reachable from |s| on character |c| or epsilon. 
-        """ 
-        s = m.new_state() 
-        initial_state.link_to(s) 
-        initial_state.add_transition(c, s) 
-        return s 
- 
-    def __add__(self, other): 
-        return Seq(self, other) 
- 
-    def __or__(self, other): 
-        return Alt(self, other) 
- 
-    def __str__(self): 
-        if self.str: 
-            return self.str 
-        else: 
-            return self.calc_str() 
- 
-    def check_re(self, num, value): 
-        if not isinstance(value, RE): 
-            self.wrong_type(num, value, "Plex.RE instance") 
- 
-    def check_string(self, num, value): 
-        if type(value) != type(''): 
-            self.wrong_type(num, value, "string") 
- 
-    def check_char(self, num, value): 
-        self.check_string(num, value) 
-        if len(value) != 1: 
-            raise Errors.PlexValueError("Invalid value for argument %d of Plex.%s." 
+
+    def build_opt(self, m, initial_state, c):
+        """
+        Given a state |s| of machine |m|, return a new state
+        reachable from |s| on character |c| or epsilon.
+        """
+        s = m.new_state()
+        initial_state.link_to(s)
+        initial_state.add_transition(c, s)
+        return s
+
+    def __add__(self, other):
+        return Seq(self, other)
+
+    def __or__(self, other):
+        return Alt(self, other)
+
+    def __str__(self):
+        if self.str:
+            return self.str
+        else:
+            return self.calc_str()
+
+    def check_re(self, num, value):
+        if not isinstance(value, RE):
+            self.wrong_type(num, value, "Plex.RE instance")
+
+    def check_string(self, num, value):
+        if type(value) != type(''):
+            self.wrong_type(num, value, "string")
+
+    def check_char(self, num, value):
+        self.check_string(num, value)
+        if len(value) != 1:
+            raise Errors.PlexValueError("Invalid value for argument %d of Plex.%s."
                                         "Expected a string of length 1, got: %s" % (
                                             num, self.__class__.__name__, repr(value)))
- 
-    def wrong_type(self, num, value, expected): 
-        if type(value) == types.InstanceType: 
+
+    def wrong_type(self, num, value, expected):
+        if type(value) == types.InstanceType:
             got = "%s.%s instance" % (
                 value.__class__.__module__, value.__class__.__name__)
-        else: 
-            got = type(value).__name__ 
-        raise Errors.PlexTypeError("Invalid type for argument %d of Plex.%s " 
+        else:
+            got = type(value).__name__
+        raise Errors.PlexTypeError("Invalid type for argument %d of Plex.%s "
                                    "(expected %s, got %s" % (
                                        num, self.__class__.__name__, expected, got))
- 
-# 
-#     Primitive RE constructors 
-#     ------------------------- 
-# 
-#     These are the basic REs from which all others are built. 
-# 
- 
-## class Char(RE): 
-##     """ 
-##     Char(c) is an RE which matches the character |c|. 
-##     """ 
- 
-##     nullable = 0 
- 
-##     def __init__(self, char): 
-##         self.char = char 
-##         self.match_nl = char == '\n' 
- 
-##     def build_machine(self, m, initial_state, final_state, match_bol, nocase): 
-##         c = self.char 
-##         if match_bol and c != BOL: 
-##             s1 = self.build_opt(m, initial_state, BOL) 
-##         else: 
-##             s1 = initial_state 
-##         if c == '\n' or c == EOF: 
-##             s1 = self.build_opt(m, s1, EOL) 
-##         if len(c) == 1: 
-##             code = ord(self.char) 
-##             s1.add_transition((code, code+1), final_state) 
-##             if nocase and is_letter_code(code): 
-##                 code2 = other_case_code(code) 
-##                 s1.add_transition((code2, code2+1), final_state) 
-##         else: 
-##             s1.add_transition(c, final_state) 
- 
-##     def calc_str(self): 
-##         return "Char(%s)" % repr(self.char) 
- 
-
-def Char(c): 
-    """ 
-    Char(c) is an RE which matches the character |c|. 
-    """ 
-    if len(c) == 1: 
-        result = CodeRange(ord(c), ord(c) + 1) 
-    else: 
-        result = SpecialSymbol(c) 
-    result.str = "Char(%s)" % repr(c) 
-    return result 
- 
-
-class RawCodeRange(RE): 
-    """ 
-    RawCodeRange(code1, code2) is a low-level RE which matches any character 
-    with a code |c| in the range |code1| <= |c| < |code2|, where the range 
-    does not include newline. For internal use only. 
-    """ 
-    nullable = 0 
-    match_nl = 0 
+
+#
+#     Primitive RE constructors
+#     -------------------------
+#
+#     These are the basic REs from which all others are built.
+#
+
+## class Char(RE):
+##     """
+##     Char(c) is an RE which matches the character |c|.
+##     """
+
+##     nullable = 0
+
+##     def __init__(self, char):
+##         self.char = char
+##         self.match_nl = char == '\n'
+
+##     def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+##         c = self.char
+##         if match_bol and c != BOL:
+##             s1 = self.build_opt(m, initial_state, BOL)
+##         else:
+##             s1 = initial_state
+##         if c == '\n' or c == EOF:
+##             s1 = self.build_opt(m, s1, EOL)
+##         if len(c) == 1:
+##             code = ord(self.char)
+##             s1.add_transition((code, code+1), final_state)
+##             if nocase and is_letter_code(code):
+##                 code2 = other_case_code(code)
+##                 s1.add_transition((code2, code2+1), final_state)
+##         else:
+##             s1.add_transition(c, final_state)
+
+##     def calc_str(self):
+##         return "Char(%s)" % repr(self.char)
+
+
+def Char(c):
+    """
+    Char(c) is an RE which matches the character |c|.
+    """
+    if len(c) == 1:
+        result = CodeRange(ord(c), ord(c) + 1)
+    else:
+        result = SpecialSymbol(c)
+    result.str = "Char(%s)" % repr(c)
+    return result
+
+
+class RawCodeRange(RE):
+    """
+    RawCodeRange(code1, code2) is a low-level RE which matches any character
+    with a code |c| in the range |code1| <= |c| < |code2|, where the range
+    does not include newline. For internal use only.
+    """
+    nullable = 0
+    match_nl = 0
     range = None            # (code, code)
     uppercase_range = None  # (code, code) or None
     lowercase_range = None  # (code, code) or None
- 
-    def __init__(self, code1, code2): 
-        self.range = (code1, code2) 
-        self.uppercase_range = uppercase_range(code1, code2) 
-        self.lowercase_range = lowercase_range(code1, code2) 
- 
-    def build_machine(self, m, initial_state, final_state, match_bol, nocase): 
-        if match_bol: 
-            initial_state = self.build_opt(m, initial_state, BOL) 
-        initial_state.add_transition(self.range, final_state) 
-        if nocase: 
-            if self.uppercase_range: 
-                initial_state.add_transition(self.uppercase_range, final_state) 
-            if self.lowercase_range: 
-                initial_state.add_transition(self.lowercase_range, final_state) 
- 
-    def calc_str(self): 
-        return "CodeRange(%d,%d)" % (self.code1, self.code2) 
- 
-
-class _RawNewline(RE): 
-    """ 
-    RawNewline is a low-level RE which matches a newline character. 
-    For internal use only. 
-    """ 
-    nullable = 0 
-    match_nl = 1 
- 
-    def build_machine(self, m, initial_state, final_state, match_bol, nocase): 
-        if match_bol: 
-            initial_state = self.build_opt(m, initial_state, BOL) 
-        s = self.build_opt(m, initial_state, EOL) 
-        s.add_transition((nl_code, nl_code + 1), final_state) 
- 
-
-RawNewline = _RawNewline() 
- 
- 
-class SpecialSymbol(RE): 
-    """ 
-    SpecialSymbol(sym) is an RE which matches the special input 
-    symbol |sym|, which is one of BOL, EOL or EOF. 
-    """ 
-    nullable = 0 
-    match_nl = 0 
-    sym = None 
- 
-    def __init__(self, sym): 
-        self.sym = sym 
- 
-    def build_machine(self, m, initial_state, final_state, match_bol, nocase): 
-        # Sequences 'bol bol' and 'bol eof' are impossible, so only need 
-        # to allow for bol if sym is eol 
-        if match_bol and self.sym == EOL: 
-            initial_state = self.build_opt(m, initial_state, BOL) 
-        initial_state.add_transition(self.sym, final_state) 
- 
- 
-class Seq(RE): 
-    """Seq(re1, re2, re3...) is an RE which matches |re1| followed by 
-    |re2| followed by |re3|...""" 
- 
-    def __init__(self, *re_list): 
-        nullable = 1 
+
+    def __init__(self, code1, code2):
+        self.range = (code1, code2)
+        self.uppercase_range = uppercase_range(code1, code2)
+        self.lowercase_range = lowercase_range(code1, code2)
+
+    def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+        if match_bol:
+            initial_state = self.build_opt(m, initial_state, BOL)
+        initial_state.add_transition(self.range, final_state)
+        if nocase:
+            if self.uppercase_range:
+                initial_state.add_transition(self.uppercase_range, final_state)
+            if self.lowercase_range:
+                initial_state.add_transition(self.lowercase_range, final_state)
+
+    def calc_str(self):
+        return "CodeRange(%d,%d)" % (self.code1, self.code2)
+
+
+class _RawNewline(RE):
+    """
+    RawNewline is a low-level RE which matches a newline character.
+    For internal use only.
+    """
+    nullable = 0
+    match_nl = 1
+
+    def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+        if match_bol:
+            initial_state = self.build_opt(m, initial_state, BOL)
+        s = self.build_opt(m, initial_state, EOL)
+        s.add_transition((nl_code, nl_code + 1), final_state)
+
+
+RawNewline = _RawNewline()
+
+
+class SpecialSymbol(RE):
+    """
+    SpecialSymbol(sym) is an RE which matches the special input
+    symbol |sym|, which is one of BOL, EOL or EOF.
+    """
+    nullable = 0
+    match_nl = 0
+    sym = None
+
+    def __init__(self, sym):
+        self.sym = sym
+
+    def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+        # Sequences 'bol bol' and 'bol eof' are impossible, so only need
+        # to allow for bol if sym is eol
+        if match_bol and self.sym == EOL:
+            initial_state = self.build_opt(m, initial_state, BOL)
+        initial_state.add_transition(self.sym, final_state)
+
+
+class Seq(RE):
+    """Seq(re1, re2, re3...) is an RE which matches |re1| followed by
+    |re2| followed by |re3|..."""
+
+    def __init__(self, *re_list):
+        nullable = 1
         for i, re in enumerate(re_list):
-            self.check_re(i, re) 
-            nullable = nullable and re.nullable 
-        self.re_list = re_list 
-        self.nullable = nullable 
-        i = len(re_list) 
-        match_nl = 0 
-        while i: 
+            self.check_re(i, re)
+            nullable = nullable and re.nullable
+        self.re_list = re_list
+        self.nullable = nullable
+        i = len(re_list)
+        match_nl = 0
+        while i:
             i -= 1
-            re = re_list[i] 
-            if re.match_nl: 
-                match_nl = 1 
-                break 
-            if not re.nullable: 
-                break 
-        self.match_nl = match_nl 
- 
-    def build_machine(self, m, initial_state, final_state, match_bol, nocase): 
-        re_list = self.re_list 
-        if len(re_list) == 0: 
-            initial_state.link_to(final_state) 
-        else: 
-            s1 = initial_state 
-            n = len(re_list) 
+            re = re_list[i]
+            if re.match_nl:
+                match_nl = 1
+                break
+            if not re.nullable:
+                break
+        self.match_nl = match_nl
+
+    def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+        re_list = self.re_list
+        if len(re_list) == 0:
+            initial_state.link_to(final_state)
+        else:
+            s1 = initial_state
+            n = len(re_list)
             for i, re in enumerate(re_list):
-                if i < n - 1: 
-                    s2 = m.new_state() 
-                else: 
-                    s2 = final_state 
-                re.build_machine(m, s1, s2, match_bol, nocase) 
-                s1 = s2 
-                match_bol = re.match_nl or (match_bol and re.nullable) 
- 
-    def calc_str(self): 
-        return "Seq(%s)" % ','.join(map(str, self.re_list)) 
- 
- 
-class Alt(RE): 
-    """Alt(re1, re2, re3...) is an RE which matches either |re1| or 
-    |re2| or |re3|...""" 
- 
-    def __init__(self, *re_list): 
-        self.re_list = re_list 
-        nullable = 0 
-        match_nl = 0 
-        nullable_res = [] 
-        non_nullable_res = [] 
-        i = 1 
-        for re in re_list: 
-            self.check_re(i, re) 
-            if re.nullable: 
-                nullable_res.append(re) 
-                nullable = 1 
-            else: 
-                non_nullable_res.append(re) 
-            if re.match_nl: 
-                match_nl = 1 
+                if i < n - 1:
+                    s2 = m.new_state()
+                else:
+                    s2 = final_state
+                re.build_machine(m, s1, s2, match_bol, nocase)
+                s1 = s2
+                match_bol = re.match_nl or (match_bol and re.nullable)
+
+    def calc_str(self):
+        return "Seq(%s)" % ','.join(map(str, self.re_list))
+
+
+class Alt(RE):
+    """Alt(re1, re2, re3...) is an RE which matches either |re1| or
+    |re2| or |re3|..."""
+
+    def __init__(self, *re_list):
+        self.re_list = re_list
+        nullable = 0
+        match_nl = 0
+        nullable_res = []
+        non_nullable_res = []
+        i = 1
+        for re in re_list:
+            self.check_re(i, re)
+            if re.nullable:
+                nullable_res.append(re)
+                nullable = 1
+            else:
+                non_nullable_res.append(re)
+            if re.match_nl:
+                match_nl = 1
             i += 1
-        self.nullable_res = nullable_res 
-        self.non_nullable_res = non_nullable_res 
-        self.nullable = nullable 
-        self.match_nl = match_nl 
- 
-    def build_machine(self, m, initial_state, final_state, match_bol, nocase): 
-        for re in self.nullable_res: 
-            re.build_machine(m, initial_state, final_state, match_bol, nocase) 
-        if self.non_nullable_res: 
-            if match_bol: 
-                initial_state = self.build_opt(m, initial_state, BOL) 
-            for re in self.non_nullable_res: 
-                re.build_machine(m, initial_state, final_state, 0, nocase) 
- 
-    def calc_str(self): 
-        return "Alt(%s)" % ','.join(map(str, self.re_list)) 
- 
- 
-class Rep1(RE): 
-    """Rep1(re) is an RE which matches one or more repetitions of |re|.""" 
- 
-    def __init__(self, re): 
-        self.check_re(1, re) 
-        self.re = re 
-        self.nullable = re.nullable 
-        self.match_nl = re.match_nl 
- 
-    def build_machine(self, m, initial_state, final_state, match_bol, nocase): 
-        s1 = m.new_state() 
-        s2 = m.new_state() 
-        initial_state.link_to(s1) 
-        self.re.build_machine(m, s1, s2, match_bol or self.re.match_nl, nocase) 
-        s2.link_to(s1) 
-        s2.link_to(final_state) 
- 
-    def calc_str(self): 
-        return "Rep1(%s)" % self.re 
- 
- 
-class SwitchCase(RE): 
-    """ 
-    SwitchCase(re, nocase) is an RE which matches the same strings as RE, 
-    but treating upper and lower case letters according to |nocase|. If 
-    |nocase| is true, case is ignored, otherwise it is not. 
-    """ 
-    re = None 
-    nocase = None 
- 
-    def __init__(self, re, nocase): 
-        self.re = re 
-        self.nocase = nocase 
-        self.nullable = re.nullable 
-        self.match_nl = re.match_nl 
- 
-    def build_machine(self, m, initial_state, final_state, match_bol, nocase): 
-        self.re.build_machine(m, initial_state, final_state, match_bol, 
+        self.nullable_res = nullable_res
+        self.non_nullable_res = non_nullable_res
+        self.nullable = nullable
+        self.match_nl = match_nl
+
+    def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+        for re in self.nullable_res:
+            re.build_machine(m, initial_state, final_state, match_bol, nocase)
+        if self.non_nullable_res:
+            if match_bol:
+                initial_state = self.build_opt(m, initial_state, BOL)
+            for re in self.non_nullable_res:
+                re.build_machine(m, initial_state, final_state, 0, nocase)
+
+    def calc_str(self):
+        return "Alt(%s)" % ','.join(map(str, self.re_list))
+
+
+class Rep1(RE):
+    """Rep1(re) is an RE which matches one or more repetitions of |re|."""
+
+    def __init__(self, re):
+        self.check_re(1, re)
+        self.re = re
+        self.nullable = re.nullable
+        self.match_nl = re.match_nl
+
+    def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+        s1 = m.new_state()
+        s2 = m.new_state()
+        initial_state.link_to(s1)
+        self.re.build_machine(m, s1, s2, match_bol or self.re.match_nl, nocase)
+        s2.link_to(s1)
+        s2.link_to(final_state)
+
+    def calc_str(self):
+        return "Rep1(%s)" % self.re
+
+
+class SwitchCase(RE):
+    """
+    SwitchCase(re, nocase) is an RE which matches the same strings as RE,
+    but treating upper and lower case letters according to |nocase|. If
+    |nocase| is true, case is ignored, otherwise it is not.
+    """
+    re = None
+    nocase = None
+
+    def __init__(self, re, nocase):
+        self.re = re
+        self.nocase = nocase
+        self.nullable = re.nullable
+        self.match_nl = re.match_nl
+
+    def build_machine(self, m, initial_state, final_state, match_bol, nocase):
+        self.re.build_machine(m, initial_state, final_state, match_bol,
                               self.nocase)
- 
-    def calc_str(self): 
-        if self.nocase: 
-            name = "NoCase" 
-        else: 
-            name = "Case" 
-        return "%s(%s)" % (name, self.re) 
- 
-# 
-#     Composite RE constructors 
-#     ------------------------- 
-# 
-#     These REs are defined in terms of the primitive REs. 
-# 
- 
-Empty = Seq() 
-Empty.__doc__ = \ 
-    """ 
-    Empty is an RE which matches the empty string. 
-    """ 
-Empty.str = "Empty" 
- 
-
-def Str1(s): 
-    """ 
-    Str1(s) is an RE which matches the literal string |s|. 
-    """ 
-    result = Seq(*tuple(map(Char, s))) 
-    result.str = "Str(%s)" % repr(s) 
-    return result 
- 
-
-def Str(*strs): 
-    """ 
-    Str(s) is an RE which matches the literal string |s|. 
-    Str(s1, s2, s3, ...) is an RE which matches any of |s1| or |s2| or |s3|... 
-    """ 
-    if len(strs) == 1: 
-        return Str1(strs[0]) 
-    else: 
-        result = Alt(*tuple(map(Str1, strs))) 
-        result.str = "Str(%s)" % ','.join(map(repr, strs)) 
-        return result 
- 
-
-def Any(s): 
-    """ 
-    Any(s) is an RE which matches any character in the string |s|. 
-    """ 
-    #result = apply(Alt, tuple(map(Char, s))) 
-    result = CodeRanges(chars_to_ranges(s)) 
-    result.str = "Any(%s)" % repr(s) 
-    return result 
- 
-
-def AnyBut(s): 
-    """ 
-    AnyBut(s) is an RE which matches any character (including 
-    newline) which is not in the string |s|. 
-    """ 
-    ranges = chars_to_ranges(s) 
-    ranges.insert(0, -maxint) 
-    ranges.append(maxint) 
-    result = CodeRanges(ranges) 
-    result.str = "AnyBut(%s)" % repr(s) 
-    return result 
- 
-
-AnyChar = AnyBut("") 
-AnyChar.__doc__ = \ 
-    """ 
-    AnyChar is an RE which matches any single character (including a newline). 
-    """ 
-AnyChar.str = "AnyChar" 
- 
+
+    def calc_str(self):
+        if self.nocase:
+            name = "NoCase"
+        else:
+            name = "Case"
+        return "%s(%s)" % (name, self.re)
+
+#
+#     Composite RE constructors
+#     -------------------------
+#
+#     These REs are defined in terms of the primitive REs.
+#
+
+Empty = Seq()
+Empty.__doc__ = \
+    """
+    Empty is an RE which matches the empty string.
+    """
+Empty.str = "Empty"
+
+
+def Str1(s):
+    """
+    Str1(s) is an RE which matches the literal string |s|.
+    """
+    result = Seq(*tuple(map(Char, s)))
+    result.str = "Str(%s)" % repr(s)
+    return result
+
+
+def Str(*strs):
+    """
+    Str(s) is an RE which matches the literal string |s|.
+    Str(s1, s2, s3, ...) is an RE which matches any of |s1| or |s2| or |s3|...
+    """
+    if len(strs) == 1:
+        return Str1(strs[0])
+    else:
+        result = Alt(*tuple(map(Str1, strs)))
+        result.str = "Str(%s)" % ','.join(map(repr, strs))
+        return result
+
+
+def Any(s):
+    """
+    Any(s) is an RE which matches any character in the string |s|.
+    """
+    #result = apply(Alt, tuple(map(Char, s)))
+    result = CodeRanges(chars_to_ranges(s))
+    result.str = "Any(%s)" % repr(s)
+    return result
+
+
+def AnyBut(s):
+    """
+    AnyBut(s) is an RE which matches any character (including
+    newline) which is not in the string |s|.
+    """
+    ranges = chars_to_ranges(s)
+    ranges.insert(0, -maxint)
+    ranges.append(maxint)
+    result = CodeRanges(ranges)
+    result.str = "AnyBut(%s)" % repr(s)
+    return result
+
+
+AnyChar = AnyBut("")
+AnyChar.__doc__ = \
+    """
+    AnyChar is an RE which matches any single character (including a newline).
+    """
+AnyChar.str = "AnyChar"
+
 
 def Range(s1, s2=None):
-    """ 
-    Range(c1, c2) is an RE which matches any single character in the range 
-    |c1| to |c2| inclusive. 
-    Range(s) where |s| is a string of even length is an RE which matches 
-    any single character in the ranges |s[0]| to |s[1]|, |s[2]| to |s[3]|,... 
-    """ 
-    if s2: 
-        result = CodeRange(ord(s1), ord(s2) + 1) 
-        result.str = "Range(%s,%s)" % (s1, s2) 
-    else: 
-        ranges = [] 
-        for i in range(0, len(s1), 2): 
+    """
+    Range(c1, c2) is an RE which matches any single character in the range
+    |c1| to |c2| inclusive.
+    Range(s) where |s| is a string of even length is an RE which matches
+    any single character in the ranges |s[0]| to |s[1]|, |s[2]| to |s[3]|,...
+    """
+    if s2:
+        result = CodeRange(ord(s1), ord(s2) + 1)
+        result.str = "Range(%s,%s)" % (s1, s2)
+    else:
+        ranges = []
+        for i in range(0, len(s1), 2):
             ranges.append(CodeRange(ord(s1[i]), ord(s1[i + 1]) + 1))
-        result = Alt(*ranges) 
-        result.str = "Range(%s)" % repr(s1) 
-    return result 
- 
-
-def Opt(re): 
-    """ 
-    Opt(re) is an RE which matches either |re| or the empty string. 
-    """ 
-    result = Alt(re, Empty) 
-    result.str = "Opt(%s)" % re 
-    return result 
- 
-
-def Rep(re): 
-    """ 
-    Rep(re) is an RE which matches zero or more repetitions of |re|. 
-    """ 
-    result = Opt(Rep1(re)) 
-    result.str = "Rep(%s)" % re 
-    return result 
- 
-
-def NoCase(re): 
-    """ 
-    NoCase(re) is an RE which matches the same strings as RE, but treating 
-    upper and lower case letters as equivalent. 
-    """ 
+        result = Alt(*ranges)
+        result.str = "Range(%s)" % repr(s1)
+    return result
+
+
+def Opt(re):
+    """
+    Opt(re) is an RE which matches either |re| or the empty string.
+    """
+    result = Alt(re, Empty)
+    result.str = "Opt(%s)" % re
+    return result
+
+
+def Rep(re):
+    """
+    Rep(re) is an RE which matches zero or more repetitions of |re|.
+    """
+    result = Opt(Rep1(re))
+    result.str = "Rep(%s)" % re
+    return result
+
+
+def NoCase(re):
+    """
+    NoCase(re) is an RE which matches the same strings as RE, but treating
+    upper and lower case letters as equivalent.
+    """
     return SwitchCase(re, nocase=1)
- 
-
-def Case(re): 
-    """ 
-    Case(re) is an RE which matches the same strings as RE, but treating 
-    upper and lower case letters as distinct, i.e. it cancels the effect 
-    of any enclosing NoCase(). 
-    """ 
+
+
+def Case(re):
+    """
+    Case(re) is an RE which matches the same strings as RE, but treating
+    upper and lower case letters as distinct, i.e. it cancels the effect
+    of any enclosing NoCase().
+    """
     return SwitchCase(re, nocase=0)
- 
-# 
-#     RE Constants 
-# 
- 
-Bol = Char(BOL) 
-Bol.__doc__ = \ 
-    """ 
-    Bol is an RE which matches the beginning of a line. 
-    """ 
-Bol.str = "Bol" 
- 
-Eol = Char(EOL) 
-Eol.__doc__ = \ 
-    """ 
-    Eol is an RE which matches the end of a line. 
-    """ 
-Eol.str = "Eol" 
- 
-Eof = Char(EOF) 
-Eof.__doc__ = \ 
-    """ 
-    Eof is an RE which matches the end of the file. 
-    """ 
-Eof.str = "Eof" 
- 
+
+#
+#     RE Constants
+#
+
+Bol = Char(BOL)
+Bol.__doc__ = \
+    """
+    Bol is an RE which matches the beginning of a line.
+    """
+Bol.str = "Bol"
+
+Eol = Char(EOL)
+Eol.__doc__ = \
+    """
+    Eol is an RE which matches the end of a line.
+    """
+Eol.str = "Eol"
+
+Eof = Char(EOF)
+Eof.__doc__ = \
+    """
+    Eof is an RE which matches the end of the file.
+    """
+Eof.str = "Eof"
+
author	Anton Samokhvalov <pg83@yandex.ru>	2022-02-10 16:45:17 +0300
committer	Daniil Cherednik <dcherednik@yandex-team.ru>	2022-02-10 16:45:17 +0300
commit	d3a398281c6fd1d3672036cb2d63f842d2cb28c5 (patch)
tree	dd4bd3ca0f36b817e96812825ffaf10d645803f2 /contrib/tools/cython/Cython/Plex/Regexps.py
parent	72cb13b4aff9bc9cf22e49251bc8fd143f82538f (diff)
download	ydb-d3a398281c6fd1d3672036cb2d63f842d2cb28c5.tar.gz