diff options
author | orivej <orivej@yandex-team.ru> | 2022-02-10 16:45:01 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:01 +0300 |
commit | 2d37894b1b037cf24231090eda8589bbb44fb6fc (patch) | |
tree | be835aa92c6248212e705f25388ebafcf84bc7a1 /contrib/tools/python3/src/Lib/string.py | |
parent | 718c552901d703c502ccbefdfc3c9028d608b947 (diff) | |
download | ydb-2d37894b1b037cf24231090eda8589bbb44fb6fc.tar.gz |
Restoring authorship annotation for <orivej@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/tools/python3/src/Lib/string.py')
-rw-r--r-- | contrib/tools/python3/src/Lib/string.py | 498 |
1 files changed, 249 insertions, 249 deletions
diff --git a/contrib/tools/python3/src/Lib/string.py b/contrib/tools/python3/src/Lib/string.py index 9d26f145b3..489777b10c 100644 --- a/contrib/tools/python3/src/Lib/string.py +++ b/contrib/tools/python3/src/Lib/string.py @@ -1,71 +1,71 @@ -"""A collection of string constants. - -Public module variables: - -whitespace -- a string containing all ASCII whitespace -ascii_lowercase -- a string containing all ASCII lowercase letters -ascii_uppercase -- a string containing all ASCII uppercase letters -ascii_letters -- a string containing all ASCII letters -digits -- a string containing all ASCII decimal digits -hexdigits -- a string containing all ASCII hexadecimal digits -octdigits -- a string containing all ASCII octal digits -punctuation -- a string containing all ASCII punctuation characters -printable -- a string containing all ASCII characters considered printable - -""" - -__all__ = ["ascii_letters", "ascii_lowercase", "ascii_uppercase", "capwords", - "digits", "hexdigits", "octdigits", "printable", "punctuation", - "whitespace", "Formatter", "Template"] - -import _string - -# Some strings for ctype-style character classification -whitespace = ' \t\n\r\v\f' -ascii_lowercase = 'abcdefghijklmnopqrstuvwxyz' -ascii_uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' -ascii_letters = ascii_lowercase + ascii_uppercase -digits = '0123456789' -hexdigits = digits + 'abcdef' + 'ABCDEF' -octdigits = '01234567' -punctuation = r"""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~""" -printable = digits + ascii_letters + punctuation + whitespace - -# Functions which aren't available as string methods. - -# Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def". -def capwords(s, sep=None): - """capwords(s [,sep]) -> string - - Split the argument into words using split, capitalize each - word using capitalize, and join the capitalized words using - join. If the optional second argument sep is absent or None, - runs of whitespace characters are replaced by a single space - and leading and trailing whitespace are removed, otherwise - sep is used to split and join the words. - - """ - return (sep or ' ').join(x.capitalize() for x in s.split(sep)) - - -#################################################################### -import re as _re -from collections import ChainMap as _ChainMap - +"""A collection of string constants. + +Public module variables: + +whitespace -- a string containing all ASCII whitespace +ascii_lowercase -- a string containing all ASCII lowercase letters +ascii_uppercase -- a string containing all ASCII uppercase letters +ascii_letters -- a string containing all ASCII letters +digits -- a string containing all ASCII decimal digits +hexdigits -- a string containing all ASCII hexadecimal digits +octdigits -- a string containing all ASCII octal digits +punctuation -- a string containing all ASCII punctuation characters +printable -- a string containing all ASCII characters considered printable + +""" + +__all__ = ["ascii_letters", "ascii_lowercase", "ascii_uppercase", "capwords", + "digits", "hexdigits", "octdigits", "printable", "punctuation", + "whitespace", "Formatter", "Template"] + +import _string + +# Some strings for ctype-style character classification +whitespace = ' \t\n\r\v\f' +ascii_lowercase = 'abcdefghijklmnopqrstuvwxyz' +ascii_uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' +ascii_letters = ascii_lowercase + ascii_uppercase +digits = '0123456789' +hexdigits = digits + 'abcdef' + 'ABCDEF' +octdigits = '01234567' +punctuation = r"""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~""" +printable = digits + ascii_letters + punctuation + whitespace + +# Functions which aren't available as string methods. + +# Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def". +def capwords(s, sep=None): + """capwords(s [,sep]) -> string + + Split the argument into words using split, capitalize each + word using capitalize, and join the capitalized words using + join. If the optional second argument sep is absent or None, + runs of whitespace characters are replaced by a single space + and leading and trailing whitespace are removed, otherwise + sep is used to split and join the words. + + """ + return (sep or ' ').join(x.capitalize() for x in s.split(sep)) + + +#################################################################### +import re as _re +from collections import ChainMap as _ChainMap + _sentinel_dict = {} class Template: - """A string class for supporting $-substitutions.""" - - delimiter = '$' - # r'[a-z]' matches to non-ASCII letters when used with IGNORECASE, but - # without the ASCII flag. We can't add re.ASCII to flags because of - # backward compatibility. So we use the ?a local flag and [a-z] pattern. - # See https://bugs.python.org/issue31672 - idpattern = r'(?a:[_a-z][_a-z0-9]*)' - braceidpattern = None - flags = _re.IGNORECASE - + """A string class for supporting $-substitutions.""" + + delimiter = '$' + # r'[a-z]' matches to non-ASCII letters when used with IGNORECASE, but + # without the ASCII flag. We can't add re.ASCII to flags because of + # backward compatibility. So we use the ?a local flag and [a-z] pattern. + # See https://bugs.python.org/issue31672 + idpattern = r'(?a:[_a-z][_a-z0-9]*)' + braceidpattern = None + flags = _re.IGNORECASE + def __init_subclass__(cls): super().__init_subclass__() if 'pattern' in cls.__dict__: @@ -84,197 +84,197 @@ class Template: """ cls.pattern = _re.compile(pattern, cls.flags | _re.VERBOSE) - def __init__(self, template): - self.template = template - - # Search for $$, $identifier, ${identifier}, and any bare $'s - - def _invalid(self, mo): - i = mo.start('invalid') - lines = self.template[:i].splitlines(keepends=True) - if not lines: - colno = 1 - lineno = 1 - else: - colno = i - len(''.join(lines[:-1])) - lineno = len(lines) - raise ValueError('Invalid placeholder in string: line %d, col %d' % - (lineno, colno)) - + def __init__(self, template): + self.template = template + + # Search for $$, $identifier, ${identifier}, and any bare $'s + + def _invalid(self, mo): + i = mo.start('invalid') + lines = self.template[:i].splitlines(keepends=True) + if not lines: + colno = 1 + lineno = 1 + else: + colno = i - len(''.join(lines[:-1])) + lineno = len(lines) + raise ValueError('Invalid placeholder in string: line %d, col %d' % + (lineno, colno)) + def substitute(self, mapping=_sentinel_dict, /, **kws): if mapping is _sentinel_dict: - mapping = kws - elif kws: + mapping = kws + elif kws: mapping = _ChainMap(kws, mapping) - # Helper function for .sub() - def convert(mo): - # Check the most common path first. - named = mo.group('named') or mo.group('braced') - if named is not None: - return str(mapping[named]) - if mo.group('escaped') is not None: - return self.delimiter - if mo.group('invalid') is not None: - self._invalid(mo) - raise ValueError('Unrecognized named group in pattern', - self.pattern) - return self.pattern.sub(convert, self.template) - + # Helper function for .sub() + def convert(mo): + # Check the most common path first. + named = mo.group('named') or mo.group('braced') + if named is not None: + return str(mapping[named]) + if mo.group('escaped') is not None: + return self.delimiter + if mo.group('invalid') is not None: + self._invalid(mo) + raise ValueError('Unrecognized named group in pattern', + self.pattern) + return self.pattern.sub(convert, self.template) + def safe_substitute(self, mapping=_sentinel_dict, /, **kws): if mapping is _sentinel_dict: - mapping = kws - elif kws: + mapping = kws + elif kws: mapping = _ChainMap(kws, mapping) - # Helper function for .sub() - def convert(mo): - named = mo.group('named') or mo.group('braced') - if named is not None: - try: - return str(mapping[named]) - except KeyError: - return mo.group() - if mo.group('escaped') is not None: - return self.delimiter - if mo.group('invalid') is not None: - return mo.group() - raise ValueError('Unrecognized named group in pattern', - self.pattern) - return self.pattern.sub(convert, self.template) - + # Helper function for .sub() + def convert(mo): + named = mo.group('named') or mo.group('braced') + if named is not None: + try: + return str(mapping[named]) + except KeyError: + return mo.group() + if mo.group('escaped') is not None: + return self.delimiter + if mo.group('invalid') is not None: + return mo.group() + raise ValueError('Unrecognized named group in pattern', + self.pattern) + return self.pattern.sub(convert, self.template) + # Initialize Template.pattern. __init_subclass__() is automatically called # only for subclasses, not for the Template class itself. Template.__init_subclass__() - - -######################################################################## -# the Formatter class -# see PEP 3101 for details and purpose of this class - -# The hard parts are reused from the C implementation. They're exposed as "_" -# prefixed methods of str. - -# The overall parser is implemented in _string.formatter_parser. -# The field name parser is implemented in _string.formatter_field_name_split - -class Formatter: + + +######################################################################## +# the Formatter class +# see PEP 3101 for details and purpose of this class + +# The hard parts are reused from the C implementation. They're exposed as "_" +# prefixed methods of str. + +# The overall parser is implemented in _string.formatter_parser. +# The field name parser is implemented in _string.formatter_field_name_split + +class Formatter: def format(self, format_string, /, *args, **kwargs): - return self.vformat(format_string, args, kwargs) - - def vformat(self, format_string, args, kwargs): - used_args = set() - result, _ = self._vformat(format_string, args, kwargs, used_args, 2) - self.check_unused_args(used_args, args, kwargs) - return result - - def _vformat(self, format_string, args, kwargs, used_args, recursion_depth, - auto_arg_index=0): - if recursion_depth < 0: - raise ValueError('Max string recursion exceeded') - result = [] - for literal_text, field_name, format_spec, conversion in \ - self.parse(format_string): - - # output the literal text - if literal_text: - result.append(literal_text) - - # if there's a field, output it - if field_name is not None: - # this is some markup, find the object and do - # the formatting - - # handle arg indexing when empty field_names are given. - if field_name == '': - if auto_arg_index is False: - raise ValueError('cannot switch from manual field ' - 'specification to automatic field ' - 'numbering') - field_name = str(auto_arg_index) - auto_arg_index += 1 - elif field_name.isdigit(): - if auto_arg_index: - raise ValueError('cannot switch from manual field ' - 'specification to automatic field ' - 'numbering') - # disable auto arg incrementing, if it gets - # used later on, then an exception will be raised - auto_arg_index = False - - # given the field_name, find the object it references - # and the argument it came from - obj, arg_used = self.get_field(field_name, args, kwargs) - used_args.add(arg_used) - - # do any conversion on the resulting object - obj = self.convert_field(obj, conversion) - - # expand the format spec, if needed - format_spec, auto_arg_index = self._vformat( - format_spec, args, kwargs, - used_args, recursion_depth-1, - auto_arg_index=auto_arg_index) - - # format the object and append to the result - result.append(self.format_field(obj, format_spec)) - - return ''.join(result), auto_arg_index - - - def get_value(self, key, args, kwargs): - if isinstance(key, int): - return args[key] - else: - return kwargs[key] - - - def check_unused_args(self, used_args, args, kwargs): - pass - - - def format_field(self, value, format_spec): - return format(value, format_spec) - - - def convert_field(self, value, conversion): - # do any conversion on the resulting object - if conversion is None: - return value - elif conversion == 's': - return str(value) - elif conversion == 'r': - return repr(value) - elif conversion == 'a': - return ascii(value) - raise ValueError("Unknown conversion specifier {0!s}".format(conversion)) - - - # returns an iterable that contains tuples of the form: - # (literal_text, field_name, format_spec, conversion) - # literal_text can be zero length - # field_name can be None, in which case there's no - # object to format and output - # if field_name is not None, it is looked up, formatted - # with format_spec and conversion and then used - def parse(self, format_string): - return _string.formatter_parser(format_string) - - - # given a field_name, find the object it references. - # field_name: the field being looked up, e.g. "0.name" - # or "lookup[3]" - # used_args: a set of which args have been used - # args, kwargs: as passed in to vformat - def get_field(self, field_name, args, kwargs): - first, rest = _string.formatter_field_name_split(field_name) - - obj = self.get_value(first, args, kwargs) - - # loop through the rest of the field_name, doing - # getattr or getitem as needed - for is_attr, i in rest: - if is_attr: - obj = getattr(obj, i) - else: - obj = obj[i] - - return obj, first + return self.vformat(format_string, args, kwargs) + + def vformat(self, format_string, args, kwargs): + used_args = set() + result, _ = self._vformat(format_string, args, kwargs, used_args, 2) + self.check_unused_args(used_args, args, kwargs) + return result + + def _vformat(self, format_string, args, kwargs, used_args, recursion_depth, + auto_arg_index=0): + if recursion_depth < 0: + raise ValueError('Max string recursion exceeded') + result = [] + for literal_text, field_name, format_spec, conversion in \ + self.parse(format_string): + + # output the literal text + if literal_text: + result.append(literal_text) + + # if there's a field, output it + if field_name is not None: + # this is some markup, find the object and do + # the formatting + + # handle arg indexing when empty field_names are given. + if field_name == '': + if auto_arg_index is False: + raise ValueError('cannot switch from manual field ' + 'specification to automatic field ' + 'numbering') + field_name = str(auto_arg_index) + auto_arg_index += 1 + elif field_name.isdigit(): + if auto_arg_index: + raise ValueError('cannot switch from manual field ' + 'specification to automatic field ' + 'numbering') + # disable auto arg incrementing, if it gets + # used later on, then an exception will be raised + auto_arg_index = False + + # given the field_name, find the object it references + # and the argument it came from + obj, arg_used = self.get_field(field_name, args, kwargs) + used_args.add(arg_used) + + # do any conversion on the resulting object + obj = self.convert_field(obj, conversion) + + # expand the format spec, if needed + format_spec, auto_arg_index = self._vformat( + format_spec, args, kwargs, + used_args, recursion_depth-1, + auto_arg_index=auto_arg_index) + + # format the object and append to the result + result.append(self.format_field(obj, format_spec)) + + return ''.join(result), auto_arg_index + + + def get_value(self, key, args, kwargs): + if isinstance(key, int): + return args[key] + else: + return kwargs[key] + + + def check_unused_args(self, used_args, args, kwargs): + pass + + + def format_field(self, value, format_spec): + return format(value, format_spec) + + + def convert_field(self, value, conversion): + # do any conversion on the resulting object + if conversion is None: + return value + elif conversion == 's': + return str(value) + elif conversion == 'r': + return repr(value) + elif conversion == 'a': + return ascii(value) + raise ValueError("Unknown conversion specifier {0!s}".format(conversion)) + + + # returns an iterable that contains tuples of the form: + # (literal_text, field_name, format_spec, conversion) + # literal_text can be zero length + # field_name can be None, in which case there's no + # object to format and output + # if field_name is not None, it is looked up, formatted + # with format_spec and conversion and then used + def parse(self, format_string): + return _string.formatter_parser(format_string) + + + # given a field_name, find the object it references. + # field_name: the field being looked up, e.g. "0.name" + # or "lookup[3]" + # used_args: a set of which args have been used + # args, kwargs: as passed in to vformat + def get_field(self, field_name, args, kwargs): + first, rest = _string.formatter_field_name_split(field_name) + + obj = self.get_value(first, args, kwargs) + + # loop through the rest of the field_name, doing + # getattr or getitem as needed + for is_attr, i in rest: + if is_attr: + obj = getattr(obj, i) + else: + obj = obj[i] + + return obj, first |