diff options
author | grok <grok@yandex-team.ru> | 2022-02-10 16:49:05 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:49:05 +0300 |
commit | 625709abc3d774450a80a3cc1e8c7ee6b6b5dcbb (patch) | |
tree | d174a92369e2e0b6ea57179baace522d40ae1b0e /contrib/python/protobuf/py3 | |
parent | 9533560926db5e88beedf9b4154e57617ea8cf1b (diff) | |
download | ydb-625709abc3d774450a80a3cc1e8c7ee6b6b5dcbb.tar.gz |
Restoring authorship annotation for <grok@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/python/protobuf/py3')
-rw-r--r-- | contrib/python/protobuf/py3/google/protobuf/text_encoding.py | 96 |
1 files changed, 48 insertions, 48 deletions
diff --git a/contrib/python/protobuf/py3/google/protobuf/text_encoding.py b/contrib/python/protobuf/py3/google/protobuf/text_encoding.py index 39898765f2..1c4635be9a 100644 --- a/contrib/python/protobuf/py3/google/protobuf/text_encoding.py +++ b/contrib/python/protobuf/py3/google/protobuf/text_encoding.py @@ -33,71 +33,71 @@ import re import six -_cescape_chr_to_symbol_map = {} -_cescape_chr_to_symbol_map[9] = r'\t' # optional escape -_cescape_chr_to_symbol_map[10] = r'\n' # optional escape -_cescape_chr_to_symbol_map[13] = r'\r' # optional escape -_cescape_chr_to_symbol_map[34] = r'\"' # necessary escape -_cescape_chr_to_symbol_map[39] = r"\'" # optional escape -_cescape_chr_to_symbol_map[92] = r'\\' # necessary escape - -# Lookup table for unicode -_cescape_unicode_to_str = [chr(i) for i in range(0, 256)] -for byte, string in _cescape_chr_to_symbol_map.items(): - _cescape_unicode_to_str[byte] = string +_cescape_chr_to_symbol_map = {} +_cescape_chr_to_symbol_map[9] = r'\t' # optional escape +_cescape_chr_to_symbol_map[10] = r'\n' # optional escape +_cescape_chr_to_symbol_map[13] = r'\r' # optional escape +_cescape_chr_to_symbol_map[34] = r'\"' # necessary escape +_cescape_chr_to_symbol_map[39] = r"\'" # optional escape +_cescape_chr_to_symbol_map[92] = r'\\' # necessary escape + +# Lookup table for unicode +_cescape_unicode_to_str = [chr(i) for i in range(0, 256)] +for byte, string in _cescape_chr_to_symbol_map.items(): + _cescape_unicode_to_str[byte] = string # Lookup table for non-utf8, with necessary escapes at (o >= 127 or o < 32) _cescape_byte_to_str = ([r'\%03o' % i for i in range(0, 32)] + [chr(i) for i in range(32, 127)] + [r'\%03o' % i for i in range(127, 256)]) -for byte, string in _cescape_chr_to_symbol_map.items(): - _cescape_byte_to_str[byte] = string -del byte, string +for byte, string in _cescape_chr_to_symbol_map.items(): + _cescape_byte_to_str[byte] = string +del byte, string def CEscape(text, as_utf8): - # type: (...) -> str - """Escape a bytes string for use in an text protocol buffer. + # type: (...) -> str + """Escape a bytes string for use in an text protocol buffer. Args: - text: A byte string to be escaped. - as_utf8: Specifies if result may contain non-ASCII characters. - In Python 3 this allows unescaped non-ASCII Unicode characters. - In Python 2 the return value will be valid UTF-8 rather than only ASCII. + text: A byte string to be escaped. + as_utf8: Specifies if result may contain non-ASCII characters. + In Python 3 this allows unescaped non-ASCII Unicode characters. + In Python 2 the return value will be valid UTF-8 rather than only ASCII. Returns: - Escaped string (str). + Escaped string (str). """ - # Python's text.encode() 'string_escape' or 'unicode_escape' codecs do not - # satisfy our needs; they encodes unprintable characters using two-digit hex - # escapes whereas our C++ unescaping function allows hex escapes to be any - # length. So, "\0011".encode('string_escape') ends up being "\\x011", which - # will be decoded in C++ as a single-character string with char code 0x11. - if six.PY3: - text_is_unicode = isinstance(text, str) - if as_utf8 and text_is_unicode: - # We're already unicode, no processing beyond control char escapes. - return text.translate(_cescape_chr_to_symbol_map) - ord_ = ord if text_is_unicode else lambda x: x # bytes iterate as ints. - else: - ord_ = ord # PY2 + # Python's text.encode() 'string_escape' or 'unicode_escape' codecs do not + # satisfy our needs; they encodes unprintable characters using two-digit hex + # escapes whereas our C++ unescaping function allows hex escapes to be any + # length. So, "\0011".encode('string_escape') ends up being "\\x011", which + # will be decoded in C++ as a single-character string with char code 0x11. + if six.PY3: + text_is_unicode = isinstance(text, str) + if as_utf8 and text_is_unicode: + # We're already unicode, no processing beyond control char escapes. + return text.translate(_cescape_chr_to_symbol_map) + ord_ = ord if text_is_unicode else lambda x: x # bytes iterate as ints. + else: + ord_ = ord # PY2 if as_utf8: - return ''.join(_cescape_unicode_to_str[ord_(c)] for c in text) - return ''.join(_cescape_byte_to_str[ord_(c)] for c in text) + return ''.join(_cescape_unicode_to_str[ord_(c)] for c in text) + return ''.join(_cescape_byte_to_str[ord_(c)] for c in text) _CUNESCAPE_HEX = re.compile(r'(\\+)x([0-9a-fA-F])(?![0-9a-fA-F])') def CUnescape(text): - # type: (str) -> bytes - """Unescape a text string with C-style escape sequences to UTF-8 bytes. - - Args: - text: The data to parse in a str. - Returns: - A byte string. - """ - + # type: (str) -> bytes + """Unescape a text string with C-style escape sequences to UTF-8 bytes. + + Args: + text: The data to parse in a str. + Returns: + A byte string. + """ + def ReplaceHex(m): # Only replace the match if the number of leading back slashes is odd. i.e. # the slash itself is not escaped. @@ -109,9 +109,9 @@ def CUnescape(text): # allow single-digit hex escapes (like '\xf'). result = _CUNESCAPE_HEX.sub(ReplaceHex, text) - if six.PY2: + if six.PY2: return result.decode('string_escape') - return (result.encode('utf-8') # PY3: Make it bytes to allow decode. + return (result.encode('utf-8') # PY3: Make it bytes to allow decode. .decode('unicode_escape') # Make it bytes again to return the proper type. .encode('raw_unicode_escape')) |