diff options
author | ilezhankin <ilezhankin@yandex-team.ru> | 2022-02-10 16:45:56 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:56 +0300 |
commit | 62a805381e41500fbc7914c37c71ab040a098f4e (patch) | |
tree | 1a2c5ffcf89eb53ecd79dbc9bc0a195c27404d0c /contrib/python/Pygments/py2/pygments/regexopt.py | |
parent | 1d125034f06575234f83f24f08677955133f140e (diff) | |
download | ydb-62a805381e41500fbc7914c37c71ab040a098f4e.tar.gz |
Restoring authorship annotation for <ilezhankin@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/python/Pygments/py2/pygments/regexopt.py')
-rw-r--r-- | contrib/python/Pygments/py2/pygments/regexopt.py | 180 |
1 files changed, 90 insertions, 90 deletions
diff --git a/contrib/python/Pygments/py2/pygments/regexopt.py b/contrib/python/Pygments/py2/pygments/regexopt.py index 0529778606..59d77ee064 100644 --- a/contrib/python/Pygments/py2/pygments/regexopt.py +++ b/contrib/python/Pygments/py2/pygments/regexopt.py @@ -1,92 +1,92 @@ -# -*- coding: utf-8 -*- -""" - pygments.regexopt - ~~~~~~~~~~~~~~~~~ - - An algorithm that generates optimized regexes for matching long lists of - literal strings. - +# -*- coding: utf-8 -*- +""" + pygments.regexopt + ~~~~~~~~~~~~~~~~~ + + An algorithm that generates optimized regexes for matching long lists of + literal strings. + :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS. - :license: BSD, see LICENSE for details. -""" - -import re -from re import escape -from os.path import commonprefix -from itertools import groupby -from operator import itemgetter - -CS_ESCAPE = re.compile(r'[\^\\\-\]]') -FIRST_ELEMENT = itemgetter(0) - - -def make_charset(letters): - return '[' + CS_ESCAPE.sub(lambda m: '\\' + m.group(), ''.join(letters)) + ']' - - -def regex_opt_inner(strings, open_paren): - """Return a regex that matches any string in the sorted list of strings.""" - close_paren = open_paren and ')' or '' - # print strings, repr(open_paren) - if not strings: - # print '-> nothing left' - return '' - first = strings[0] - if len(strings) == 1: - # print '-> only 1 string' - return open_paren + escape(first) + close_paren - if not first: - # print '-> first string empty' - return open_paren + regex_opt_inner(strings[1:], '(?:') \ - + '?' + close_paren - if len(first) == 1: - # multiple one-char strings? make a charset - oneletter = [] - rest = [] - for s in strings: - if len(s) == 1: - oneletter.append(s) - else: - rest.append(s) - if len(oneletter) > 1: # do we have more than one oneletter string? - if rest: - # print '-> 1-character + rest' - return open_paren + regex_opt_inner(rest, '') + '|' \ - + make_charset(oneletter) + close_paren - # print '-> only 1-character' + :license: BSD, see LICENSE for details. +""" + +import re +from re import escape +from os.path import commonprefix +from itertools import groupby +from operator import itemgetter + +CS_ESCAPE = re.compile(r'[\^\\\-\]]') +FIRST_ELEMENT = itemgetter(0) + + +def make_charset(letters): + return '[' + CS_ESCAPE.sub(lambda m: '\\' + m.group(), ''.join(letters)) + ']' + + +def regex_opt_inner(strings, open_paren): + """Return a regex that matches any string in the sorted list of strings.""" + close_paren = open_paren and ')' or '' + # print strings, repr(open_paren) + if not strings: + # print '-> nothing left' + return '' + first = strings[0] + if len(strings) == 1: + # print '-> only 1 string' + return open_paren + escape(first) + close_paren + if not first: + # print '-> first string empty' + return open_paren + regex_opt_inner(strings[1:], '(?:') \ + + '?' + close_paren + if len(first) == 1: + # multiple one-char strings? make a charset + oneletter = [] + rest = [] + for s in strings: + if len(s) == 1: + oneletter.append(s) + else: + rest.append(s) + if len(oneletter) > 1: # do we have more than one oneletter string? + if rest: + # print '-> 1-character + rest' + return open_paren + regex_opt_inner(rest, '') + '|' \ + + make_charset(oneletter) + close_paren + # print '-> only 1-character' return open_paren + make_charset(oneletter) + close_paren - prefix = commonprefix(strings) - if prefix: - plen = len(prefix) - # we have a prefix for all strings - # print '-> prefix:', prefix - return open_paren + escape(prefix) \ - + regex_opt_inner([s[plen:] for s in strings], '(?:') \ - + close_paren - # is there a suffix? - strings_rev = [s[::-1] for s in strings] - suffix = commonprefix(strings_rev) - if suffix: - slen = len(suffix) - # print '-> suffix:', suffix[::-1] - return open_paren \ - + regex_opt_inner(sorted(s[:-slen] for s in strings), '(?:') \ - + escape(suffix[::-1]) + close_paren - # recurse on common 1-string prefixes - # print '-> last resort' - return open_paren + \ - '|'.join(regex_opt_inner(list(group[1]), '') - for group in groupby(strings, lambda s: s[0] == first[0])) \ - + close_paren - - -def regex_opt(strings, prefix='', suffix=''): - """Return a compiled regex that matches any string in the given list. - - The strings to match must be literal strings, not regexes. They will be - regex-escaped. - - *prefix* and *suffix* are pre- and appended to the final regex. - """ - strings = sorted(strings) - return prefix + regex_opt_inner(strings, '(') + suffix + prefix = commonprefix(strings) + if prefix: + plen = len(prefix) + # we have a prefix for all strings + # print '-> prefix:', prefix + return open_paren + escape(prefix) \ + + regex_opt_inner([s[plen:] for s in strings], '(?:') \ + + close_paren + # is there a suffix? + strings_rev = [s[::-1] for s in strings] + suffix = commonprefix(strings_rev) + if suffix: + slen = len(suffix) + # print '-> suffix:', suffix[::-1] + return open_paren \ + + regex_opt_inner(sorted(s[:-slen] for s in strings), '(?:') \ + + escape(suffix[::-1]) + close_paren + # recurse on common 1-string prefixes + # print '-> last resort' + return open_paren + \ + '|'.join(regex_opt_inner(list(group[1]), '') + for group in groupby(strings, lambda s: s[0] == first[0])) \ + + close_paren + + +def regex_opt(strings, prefix='', suffix=''): + """Return a compiled regex that matches any string in the given list. + + The strings to match must be literal strings, not regexes. They will be + regex-escaped. + + *prefix* and *suffix* are pre- and appended to the final regex. + """ + strings = sorted(strings) + return prefix + regex_opt_inner(strings, '(') + suffix |