diff options
author | nkozlovskiy <nmk@ydb.tech> | 2023-09-29 12:24:06 +0300 |
---|---|---|
committer | nkozlovskiy <nmk@ydb.tech> | 2023-09-29 12:41:34 +0300 |
commit | e0e3e1717e3d33762ce61950504f9637a6e669ed (patch) | |
tree | bca3ff6939b10ed60c3d5c12439963a1146b9711 /contrib/tools/python3/src/Lib/string.py | |
parent | 38f2c5852db84c7b4d83adfcb009eb61541d1ccd (diff) | |
download | ydb-e0e3e1717e3d33762ce61950504f9637a6e669ed.tar.gz |
add ydb deps
Diffstat (limited to 'contrib/tools/python3/src/Lib/string.py')
-rw-r--r-- | contrib/tools/python3/src/Lib/string.py | 309 |
1 files changed, 309 insertions, 0 deletions
diff --git a/contrib/tools/python3/src/Lib/string.py b/contrib/tools/python3/src/Lib/string.py new file mode 100644 index 0000000000..2eab6d4f59 --- /dev/null +++ b/contrib/tools/python3/src/Lib/string.py @@ -0,0 +1,309 @@ +"""A collection of string constants. + +Public module variables: + +whitespace -- a string containing all ASCII whitespace +ascii_lowercase -- a string containing all ASCII lowercase letters +ascii_uppercase -- a string containing all ASCII uppercase letters +ascii_letters -- a string containing all ASCII letters +digits -- a string containing all ASCII decimal digits +hexdigits -- a string containing all ASCII hexadecimal digits +octdigits -- a string containing all ASCII octal digits +punctuation -- a string containing all ASCII punctuation characters +printable -- a string containing all ASCII characters considered printable + +""" + +__all__ = ["ascii_letters", "ascii_lowercase", "ascii_uppercase", "capwords", + "digits", "hexdigits", "octdigits", "printable", "punctuation", + "whitespace", "Formatter", "Template"] + +import _string + +# Some strings for ctype-style character classification +whitespace = ' \t\n\r\v\f' +ascii_lowercase = 'abcdefghijklmnopqrstuvwxyz' +ascii_uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' +ascii_letters = ascii_lowercase + ascii_uppercase +digits = '0123456789' +hexdigits = digits + 'abcdef' + 'ABCDEF' +octdigits = '01234567' +punctuation = r"""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~""" +printable = digits + ascii_letters + punctuation + whitespace + +# Functions which aren't available as string methods. + +# Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def". +def capwords(s, sep=None): + """capwords(s [,sep]) -> string + + Split the argument into words using split, capitalize each + word using capitalize, and join the capitalized words using + join. If the optional second argument sep is absent or None, + runs of whitespace characters are replaced by a single space + and leading and trailing whitespace are removed, otherwise + sep is used to split and join the words. + + """ + return (sep or ' ').join(map(str.capitalize, s.split(sep))) + + +#################################################################### +import re as _re +from collections import ChainMap as _ChainMap + +_sentinel_dict = {} + +class Template: + """A string class for supporting $-substitutions.""" + + delimiter = '$' + # r'[a-z]' matches to non-ASCII letters when used with IGNORECASE, but + # without the ASCII flag. We can't add re.ASCII to flags because of + # backward compatibility. So we use the ?a local flag and [a-z] pattern. + # See https://bugs.python.org/issue31672 + idpattern = r'(?a:[_a-z][_a-z0-9]*)' + braceidpattern = None + flags = _re.IGNORECASE + + def __init_subclass__(cls): + super().__init_subclass__() + if 'pattern' in cls.__dict__: + pattern = cls.pattern + else: + delim = _re.escape(cls.delimiter) + id = cls.idpattern + bid = cls.braceidpattern or cls.idpattern + pattern = fr""" + {delim}(?: + (?P<escaped>{delim}) | # Escape sequence of two delimiters + (?P<named>{id}) | # delimiter and a Python identifier + {{(?P<braced>{bid})}} | # delimiter and a braced identifier + (?P<invalid>) # Other ill-formed delimiter exprs + ) + """ + cls.pattern = _re.compile(pattern, cls.flags | _re.VERBOSE) + + def __init__(self, template): + self.template = template + + # Search for $$, $identifier, ${identifier}, and any bare $'s + + def _invalid(self, mo): + i = mo.start('invalid') + lines = self.template[:i].splitlines(keepends=True) + if not lines: + colno = 1 + lineno = 1 + else: + colno = i - len(''.join(lines[:-1])) + lineno = len(lines) + raise ValueError('Invalid placeholder in string: line %d, col %d' % + (lineno, colno)) + + def substitute(self, mapping=_sentinel_dict, /, **kws): + if mapping is _sentinel_dict: + mapping = kws + elif kws: + mapping = _ChainMap(kws, mapping) + # Helper function for .sub() + def convert(mo): + # Check the most common path first. + named = mo.group('named') or mo.group('braced') + if named is not None: + return str(mapping[named]) + if mo.group('escaped') is not None: + return self.delimiter + if mo.group('invalid') is not None: + self._invalid(mo) + raise ValueError('Unrecognized named group in pattern', + self.pattern) + return self.pattern.sub(convert, self.template) + + def safe_substitute(self, mapping=_sentinel_dict, /, **kws): + if mapping is _sentinel_dict: + mapping = kws + elif kws: + mapping = _ChainMap(kws, mapping) + # Helper function for .sub() + def convert(mo): + named = mo.group('named') or mo.group('braced') + if named is not None: + try: + return str(mapping[named]) + except KeyError: + return mo.group() + if mo.group('escaped') is not None: + return self.delimiter + if mo.group('invalid') is not None: + return mo.group() + raise ValueError('Unrecognized named group in pattern', + self.pattern) + return self.pattern.sub(convert, self.template) + + def is_valid(self): + for mo in self.pattern.finditer(self.template): + if mo.group('invalid') is not None: + return False + if (mo.group('named') is None + and mo.group('braced') is None + and mo.group('escaped') is None): + # If all the groups are None, there must be + # another group we're not expecting + raise ValueError('Unrecognized named group in pattern', + self.pattern) + return True + + def get_identifiers(self): + ids = [] + for mo in self.pattern.finditer(self.template): + named = mo.group('named') or mo.group('braced') + if named is not None and named not in ids: + # add a named group only the first time it appears + ids.append(named) + elif (named is None + and mo.group('invalid') is None + and mo.group('escaped') is None): + # If all the groups are None, there must be + # another group we're not expecting + raise ValueError('Unrecognized named group in pattern', + self.pattern) + return ids + +# Initialize Template.pattern. __init_subclass__() is automatically called +# only for subclasses, not for the Template class itself. +Template.__init_subclass__() + + +######################################################################## +# the Formatter class +# see PEP 3101 for details and purpose of this class + +# The hard parts are reused from the C implementation. They're exposed as "_" +# prefixed methods of str. + +# The overall parser is implemented in _string.formatter_parser. +# The field name parser is implemented in _string.formatter_field_name_split + +class Formatter: + def format(self, format_string, /, *args, **kwargs): + return self.vformat(format_string, args, kwargs) + + def vformat(self, format_string, args, kwargs): + used_args = set() + result, _ = self._vformat(format_string, args, kwargs, used_args, 2) + self.check_unused_args(used_args, args, kwargs) + return result + + def _vformat(self, format_string, args, kwargs, used_args, recursion_depth, + auto_arg_index=0): + if recursion_depth < 0: + raise ValueError('Max string recursion exceeded') + result = [] + for literal_text, field_name, format_spec, conversion in \ + self.parse(format_string): + + # output the literal text + if literal_text: + result.append(literal_text) + + # if there's a field, output it + if field_name is not None: + # this is some markup, find the object and do + # the formatting + + # handle arg indexing when empty field_names are given. + if field_name == '': + if auto_arg_index is False: + raise ValueError('cannot switch from manual field ' + 'specification to automatic field ' + 'numbering') + field_name = str(auto_arg_index) + auto_arg_index += 1 + elif field_name.isdigit(): + if auto_arg_index: + raise ValueError('cannot switch from manual field ' + 'specification to automatic field ' + 'numbering') + # disable auto arg incrementing, if it gets + # used later on, then an exception will be raised + auto_arg_index = False + + # given the field_name, find the object it references + # and the argument it came from + obj, arg_used = self.get_field(field_name, args, kwargs) + used_args.add(arg_used) + + # do any conversion on the resulting object + obj = self.convert_field(obj, conversion) + + # expand the format spec, if needed + format_spec, auto_arg_index = self._vformat( + format_spec, args, kwargs, + used_args, recursion_depth-1, + auto_arg_index=auto_arg_index) + + # format the object and append to the result + result.append(self.format_field(obj, format_spec)) + + return ''.join(result), auto_arg_index + + + def get_value(self, key, args, kwargs): + if isinstance(key, int): + return args[key] + else: + return kwargs[key] + + + def check_unused_args(self, used_args, args, kwargs): + pass + + + def format_field(self, value, format_spec): + return format(value, format_spec) + + + def convert_field(self, value, conversion): + # do any conversion on the resulting object + if conversion is None: + return value + elif conversion == 's': + return str(value) + elif conversion == 'r': + return repr(value) + elif conversion == 'a': + return ascii(value) + raise ValueError("Unknown conversion specifier {0!s}".format(conversion)) + + + # returns an iterable that contains tuples of the form: + # (literal_text, field_name, format_spec, conversion) + # literal_text can be zero length + # field_name can be None, in which case there's no + # object to format and output + # if field_name is not None, it is looked up, formatted + # with format_spec and conversion and then used + def parse(self, format_string): + return _string.formatter_parser(format_string) + + + # given a field_name, find the object it references. + # field_name: the field being looked up, e.g. "0.name" + # or "lookup[3]" + # used_args: a set of which args have been used + # args, kwargs: as passed in to vformat + def get_field(self, field_name, args, kwargs): + first, rest = _string.formatter_field_name_split(field_name) + + obj = self.get_value(first, args, kwargs) + + # loop through the rest of the field_name, doing + # getattr or getitem as needed + for is_attr, i in rest: + if is_attr: + obj = getattr(obj, i) + else: + obj = obj[i] + + return obj, first |