diff options
| author | ilezhankin <[email protected]> | 2022-02-10 16:45:56 +0300 | 
|---|---|---|
| committer | Daniil Cherednik <[email protected]> | 2022-02-10 16:45:56 +0300 | 
| commit | 62a805381e41500fbc7914c37c71ab040a098f4e (patch) | |
| tree | 1a2c5ffcf89eb53ecd79dbc9bc0a195c27404d0c /contrib/python/Pygments/py3/pygments/regexopt.py | |
| parent | 1d125034f06575234f83f24f08677955133f140e (diff) | |
Restoring authorship annotation for <[email protected]>. Commit 2 of 2.
Diffstat (limited to 'contrib/python/Pygments/py3/pygments/regexopt.py')
| -rw-r--r-- | contrib/python/Pygments/py3/pygments/regexopt.py | 176 | 
1 files changed, 88 insertions, 88 deletions
| diff --git a/contrib/python/Pygments/py3/pygments/regexopt.py b/contrib/python/Pygments/py3/pygments/regexopt.py index 88cea0cc9f3..cb2c8e21a9e 100644 --- a/contrib/python/Pygments/py3/pygments/regexopt.py +++ b/contrib/python/Pygments/py3/pygments/regexopt.py @@ -1,91 +1,91 @@ -"""  -    pygments.regexopt  -    ~~~~~~~~~~~~~~~~~  -  -    An algorithm that generates optimized regexes for matching long lists of  -    literal strings.  -  +""" +    pygments.regexopt +    ~~~~~~~~~~~~~~~~~ + +    An algorithm that generates optimized regexes for matching long lists of +    literal strings. +      :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS. -    :license: BSD, see LICENSE for details.  -"""  -  -import re  -from re import escape  -from os.path import commonprefix  -from itertools import groupby  -from operator import itemgetter  -  +    :license: BSD, see LICENSE for details. +""" + +import re +from re import escape +from os.path import commonprefix +from itertools import groupby +from operator import itemgetter +  CS_ESCAPE = re.compile(r'[\[\^\\\-\]]') -FIRST_ELEMENT = itemgetter(0)  -  -  -def make_charset(letters):  -    return '[' + CS_ESCAPE.sub(lambda m: '\\' + m.group(), ''.join(letters)) + ']'  -  -  -def regex_opt_inner(strings, open_paren):  -    """Return a regex that matches any string in the sorted list of strings."""  -    close_paren = open_paren and ')' or ''  -    # print strings, repr(open_paren)  -    if not strings:  -        # print '-> nothing left'  -        return ''  -    first = strings[0]  -    if len(strings) == 1:  -        # print '-> only 1 string'  -        return open_paren + escape(first) + close_paren  -    if not first:  -        # print '-> first string empty'  -        return open_paren + regex_opt_inner(strings[1:], '(?:') \  -            + '?' + close_paren  -    if len(first) == 1:  -        # multiple one-char strings? make a charset  -        oneletter = []  -        rest = []  -        for s in strings:  -            if len(s) == 1:  -                oneletter.append(s)  -            else:  -                rest.append(s)  -        if len(oneletter) > 1:  # do we have more than one oneletter string?  -            if rest:  -                # print '-> 1-character + rest'  -                return open_paren + regex_opt_inner(rest, '') + '|' \  -                    + make_charset(oneletter) + close_paren  -            # print '-> only 1-character'  +FIRST_ELEMENT = itemgetter(0) + + +def make_charset(letters): +    return '[' + CS_ESCAPE.sub(lambda m: '\\' + m.group(), ''.join(letters)) + ']' + + +def regex_opt_inner(strings, open_paren): +    """Return a regex that matches any string in the sorted list of strings.""" +    close_paren = open_paren and ')' or '' +    # print strings, repr(open_paren) +    if not strings: +        # print '-> nothing left' +        return '' +    first = strings[0] +    if len(strings) == 1: +        # print '-> only 1 string' +        return open_paren + escape(first) + close_paren +    if not first: +        # print '-> first string empty' +        return open_paren + regex_opt_inner(strings[1:], '(?:') \ +            + '?' + close_paren +    if len(first) == 1: +        # multiple one-char strings? make a charset +        oneletter = [] +        rest = [] +        for s in strings: +            if len(s) == 1: +                oneletter.append(s) +            else: +                rest.append(s) +        if len(oneletter) > 1:  # do we have more than one oneletter string? +            if rest: +                # print '-> 1-character + rest' +                return open_paren + regex_opt_inner(rest, '') + '|' \ +                    + make_charset(oneletter) + close_paren +            # print '-> only 1-character'              return open_paren + make_charset(oneletter) + close_paren -    prefix = commonprefix(strings)  -    if prefix:  -        plen = len(prefix)  -        # we have a prefix for all strings  -        # print '-> prefix:', prefix  -        return open_paren + escape(prefix) \  -            + regex_opt_inner([s[plen:] for s in strings], '(?:') \  -            + close_paren  -    # is there a suffix?  -    strings_rev = [s[::-1] for s in strings]  -    suffix = commonprefix(strings_rev)  -    if suffix:  -        slen = len(suffix)  -        # print '-> suffix:', suffix[::-1]  -        return open_paren \  -            + regex_opt_inner(sorted(s[:-slen] for s in strings), '(?:') \  -            + escape(suffix[::-1]) + close_paren  -    # recurse on common 1-string prefixes  -    # print '-> last resort'  -    return open_paren + \  -        '|'.join(regex_opt_inner(list(group[1]), '')  -                 for group in groupby(strings, lambda s: s[0] == first[0])) \  -        + close_paren  -  -  -def regex_opt(strings, prefix='', suffix=''):  -    """Return a compiled regex that matches any string in the given list.  -  -    The strings to match must be literal strings, not regexes.  They will be  -    regex-escaped.  -  -    *prefix* and *suffix* are pre- and appended to the final regex.  -    """  -    strings = sorted(strings)  -    return prefix + regex_opt_inner(strings, '(') + suffix  +    prefix = commonprefix(strings) +    if prefix: +        plen = len(prefix) +        # we have a prefix for all strings +        # print '-> prefix:', prefix +        return open_paren + escape(prefix) \ +            + regex_opt_inner([s[plen:] for s in strings], '(?:') \ +            + close_paren +    # is there a suffix? +    strings_rev = [s[::-1] for s in strings] +    suffix = commonprefix(strings_rev) +    if suffix: +        slen = len(suffix) +        # print '-> suffix:', suffix[::-1] +        return open_paren \ +            + regex_opt_inner(sorted(s[:-slen] for s in strings), '(?:') \ +            + escape(suffix[::-1]) + close_paren +    # recurse on common 1-string prefixes +    # print '-> last resort' +    return open_paren + \ +        '|'.join(regex_opt_inner(list(group[1]), '') +                 for group in groupby(strings, lambda s: s[0] == first[0])) \ +        + close_paren + + +def regex_opt(strings, prefix='', suffix=''): +    """Return a compiled regex that matches any string in the given list. + +    The strings to match must be literal strings, not regexes.  They will be +    regex-escaped. + +    *prefix* and *suffix* are pre- and appended to the final regex. +    """ +    strings = sorted(strings) +    return prefix + regex_opt_inner(strings, '(') + suffix | 
