diff options
author | Mikhail Borisov <borisov.mikhail@gmail.com> | 2022-02-10 16:45:39 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:39 +0300 |
commit | a6a92afe03e02795227d2641b49819b687f088f8 (patch) | |
tree | f6984a1d27d5a7ec88a6fdd6e20cd5b7693b6ece /contrib/python/ipython/py2/IPython/utils/openpy.py | |
parent | c6dc8b8bd530985bc4cce0137e9a5de32f1087cb (diff) | |
download | ydb-a6a92afe03e02795227d2641b49819b687f088f8.tar.gz |
Restoring authorship annotation for Mikhail Borisov <borisov.mikhail@gmail.com>. Commit 1 of 2.
Diffstat (limited to 'contrib/python/ipython/py2/IPython/utils/openpy.py')
-rw-r--r-- | contrib/python/ipython/py2/IPython/utils/openpy.py | 496 |
1 files changed, 248 insertions, 248 deletions
diff --git a/contrib/python/ipython/py2/IPython/utils/openpy.py b/contrib/python/ipython/py2/IPython/utils/openpy.py index 0a7cc0f00e..f55f254bc1 100644 --- a/contrib/python/ipython/py2/IPython/utils/openpy.py +++ b/contrib/python/ipython/py2/IPython/utils/openpy.py @@ -1,249 +1,249 @@ -""" -Tools to open .py files as Unicode, using the encoding specified within the file, -as per PEP 263. - -Much of the code is taken from the tokenize module in Python 3.2. -""" -from __future__ import absolute_import - -import io -from io import TextIOWrapper, BytesIO -import os.path -import re - -from .py3compat import unicode_type - -cookie_re = re.compile(r"coding[:=]\s*([-\w.]+)", re.UNICODE) -cookie_comment_re = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", re.UNICODE) - -try: - # Available in Python 3 - from tokenize import detect_encoding -except ImportError: - from codecs import lookup, BOM_UTF8 - - # Copied from Python 3.2 tokenize - def _get_normal_name(orig_enc): - """Imitates get_normal_name in tokenizer.c.""" - # Only care about the first 12 characters. - enc = orig_enc[:12].lower().replace("_", "-") - if enc == "utf-8" or enc.startswith("utf-8-"): - return "utf-8" - if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \ - enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")): - return "iso-8859-1" - return orig_enc - - # Copied from Python 3.2 tokenize - def detect_encoding(readline): - """ - The detect_encoding() function is used to detect the encoding that should - be used to decode a Python source file. It requires one argment, readline, - in the same way as the tokenize() generator. - - It will call readline a maximum of twice, and return the encoding used - (as a string) and a list of any lines (left as bytes) it has read in. - - It detects the encoding from the presence of a utf-8 bom or an encoding - cookie as specified in pep-0263. If both a bom and a cookie are present, - but disagree, a SyntaxError will be raised. If the encoding cookie is an - invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found, - 'utf-8-sig' is returned. - - If no encoding is specified, then the default of 'utf-8' will be returned. - """ - bom_found = False - encoding = None - default = 'utf-8' - def read_or_stop(): - try: - return readline() - except StopIteration: - return b'' - - def find_cookie(line): - try: - line_string = line.decode('ascii') - except UnicodeDecodeError: - return None - - matches = cookie_re.findall(line_string) - if not matches: - return None - encoding = _get_normal_name(matches[0]) - try: - codec = lookup(encoding) - except LookupError: - # This behaviour mimics the Python interpreter - raise SyntaxError("unknown encoding: " + encoding) - - if bom_found: - if codec.name != 'utf-8': - # This behaviour mimics the Python interpreter - raise SyntaxError('encoding problem: utf-8') - encoding += '-sig' - return encoding - - first = read_or_stop() - if first.startswith(BOM_UTF8): - bom_found = True - first = first[3:] - default = 'utf-8-sig' - if not first: - return default, [] - - encoding = find_cookie(first) - if encoding: - return encoding, [first] - - second = read_or_stop() - if not second: - return default, [first] - - encoding = find_cookie(second) - if encoding: - return encoding, [first, second] - - return default, [first, second] - -try: - # Available in Python 3.2 and above. - from tokenize import open -except ImportError: - # Copied from Python 3.2 tokenize - def open(filename): - """Open a file in read only mode using the encoding detected by - detect_encoding(). - """ - buffer = io.open(filename, 'rb') # Tweaked to use io.open for Python 2 - encoding, lines = detect_encoding(buffer.readline) - buffer.seek(0) - text = TextIOWrapper(buffer, encoding, line_buffering=True) - text.mode = 'r' - return text - -def source_to_unicode(txt, errors='replace', skip_encoding_cookie=True): - """Converts a bytes string with python source code to unicode. - - Unicode strings are passed through unchanged. Byte strings are checked - for the python source file encoding cookie to determine encoding. - txt can be either a bytes buffer or a string containing the source - code. - """ - if isinstance(txt, unicode_type): - return txt - if isinstance(txt, bytes): - buffer = BytesIO(txt) - else: - buffer = txt - try: - encoding, _ = detect_encoding(buffer.readline) - except SyntaxError: - encoding = "ascii" - buffer.seek(0) - text = TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True) - text.mode = 'r' - if skip_encoding_cookie: - return u"".join(strip_encoding_cookie(text)) - else: - return text.read() - -def strip_encoding_cookie(filelike): - """Generator to pull lines from a text-mode file, skipping the encoding - cookie if it is found in the first two lines. - """ - it = iter(filelike) - try: - first = next(it) - if not cookie_comment_re.match(first): - yield first - second = next(it) - if not cookie_comment_re.match(second): - yield second - except StopIteration: - return - - for line in it: - yield line - -def read_py_file(filename, skip_encoding_cookie=True): - """Read a Python file, using the encoding declared inside the file. - - Parameters - ---------- - filename : str - The path to the file to read. - skip_encoding_cookie : bool - If True (the default), and the encoding declaration is found in the first - two lines, that line will be excluded from the output - compiling a - unicode string with an encoding declaration is a SyntaxError in Python 2. - - Returns - ------- - A unicode string containing the contents of the file. - """ - with open(filename) as f: # the open function defined in this module. - if skip_encoding_cookie: - return "".join(strip_encoding_cookie(f)) - else: - return f.read() - -def read_py_url(url, errors='replace', skip_encoding_cookie=True): - """Read a Python file from a URL, using the encoding declared inside the file. - - Parameters - ---------- - url : str - The URL from which to fetch the file. - errors : str - How to handle decoding errors in the file. Options are the same as for - bytes.decode(), but here 'replace' is the default. - skip_encoding_cookie : bool - If True (the default), and the encoding declaration is found in the first - two lines, that line will be excluded from the output - compiling a - unicode string with an encoding declaration is a SyntaxError in Python 2. - - Returns - ------- - A unicode string containing the contents of the file. - """ - # Deferred import for faster start - try: - from urllib.request import urlopen # Py 3 - except ImportError: - from urllib import urlopen - response = urlopen(url) - buffer = io.BytesIO(response.read()) - return source_to_unicode(buffer, errors, skip_encoding_cookie) - -def _list_readline(x): - """Given a list, returns a readline() function that returns the next element - with each call. - """ - x = iter(x) - def readline(): - return next(x) - return readline - -# Code for going between .py files and cached .pyc files ---------------------- - -try: # Python 3.2, see PEP 3147 +""" +Tools to open .py files as Unicode, using the encoding specified within the file, +as per PEP 263. + +Much of the code is taken from the tokenize module in Python 3.2. +""" +from __future__ import absolute_import + +import io +from io import TextIOWrapper, BytesIO +import os.path +import re + +from .py3compat import unicode_type + +cookie_re = re.compile(r"coding[:=]\s*([-\w.]+)", re.UNICODE) +cookie_comment_re = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", re.UNICODE) + +try: + # Available in Python 3 + from tokenize import detect_encoding +except ImportError: + from codecs import lookup, BOM_UTF8 + + # Copied from Python 3.2 tokenize + def _get_normal_name(orig_enc): + """Imitates get_normal_name in tokenizer.c.""" + # Only care about the first 12 characters. + enc = orig_enc[:12].lower().replace("_", "-") + if enc == "utf-8" or enc.startswith("utf-8-"): + return "utf-8" + if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \ + enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")): + return "iso-8859-1" + return orig_enc + + # Copied from Python 3.2 tokenize + def detect_encoding(readline): + """ + The detect_encoding() function is used to detect the encoding that should + be used to decode a Python source file. It requires one argment, readline, + in the same way as the tokenize() generator. + + It will call readline a maximum of twice, and return the encoding used + (as a string) and a list of any lines (left as bytes) it has read in. + + It detects the encoding from the presence of a utf-8 bom or an encoding + cookie as specified in pep-0263. If both a bom and a cookie are present, + but disagree, a SyntaxError will be raised. If the encoding cookie is an + invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found, + 'utf-8-sig' is returned. + + If no encoding is specified, then the default of 'utf-8' will be returned. + """ + bom_found = False + encoding = None + default = 'utf-8' + def read_or_stop(): + try: + return readline() + except StopIteration: + return b'' + + def find_cookie(line): + try: + line_string = line.decode('ascii') + except UnicodeDecodeError: + return None + + matches = cookie_re.findall(line_string) + if not matches: + return None + encoding = _get_normal_name(matches[0]) + try: + codec = lookup(encoding) + except LookupError: + # This behaviour mimics the Python interpreter + raise SyntaxError("unknown encoding: " + encoding) + + if bom_found: + if codec.name != 'utf-8': + # This behaviour mimics the Python interpreter + raise SyntaxError('encoding problem: utf-8') + encoding += '-sig' + return encoding + + first = read_or_stop() + if first.startswith(BOM_UTF8): + bom_found = True + first = first[3:] + default = 'utf-8-sig' + if not first: + return default, [] + + encoding = find_cookie(first) + if encoding: + return encoding, [first] + + second = read_or_stop() + if not second: + return default, [first] + + encoding = find_cookie(second) + if encoding: + return encoding, [first, second] + + return default, [first, second] + +try: + # Available in Python 3.2 and above. + from tokenize import open +except ImportError: + # Copied from Python 3.2 tokenize + def open(filename): + """Open a file in read only mode using the encoding detected by + detect_encoding(). + """ + buffer = io.open(filename, 'rb') # Tweaked to use io.open for Python 2 + encoding, lines = detect_encoding(buffer.readline) + buffer.seek(0) + text = TextIOWrapper(buffer, encoding, line_buffering=True) + text.mode = 'r' + return text + +def source_to_unicode(txt, errors='replace', skip_encoding_cookie=True): + """Converts a bytes string with python source code to unicode. + + Unicode strings are passed through unchanged. Byte strings are checked + for the python source file encoding cookie to determine encoding. + txt can be either a bytes buffer or a string containing the source + code. + """ + if isinstance(txt, unicode_type): + return txt + if isinstance(txt, bytes): + buffer = BytesIO(txt) + else: + buffer = txt try: - from importlib.util import source_from_cache, cache_from_source - except ImportError : - ## deprecated since 3.4 - from imp import source_from_cache, cache_from_source -except ImportError: - # Python <= 3.1: .pyc files go next to .py - def source_from_cache(path): - basename, ext = os.path.splitext(path) - if ext not in ('.pyc', '.pyo'): - raise ValueError('Not a cached Python file extension', ext) - # Should we look for .pyw files? - return basename + '.py' - - def cache_from_source(path, debug_override=None): - if debug_override is None: - debug_override = __debug__ - basename, ext = os.path.splitext(path) - return basename + '.pyc' if debug_override else '.pyo' + encoding, _ = detect_encoding(buffer.readline) + except SyntaxError: + encoding = "ascii" + buffer.seek(0) + text = TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True) + text.mode = 'r' + if skip_encoding_cookie: + return u"".join(strip_encoding_cookie(text)) + else: + return text.read() + +def strip_encoding_cookie(filelike): + """Generator to pull lines from a text-mode file, skipping the encoding + cookie if it is found in the first two lines. + """ + it = iter(filelike) + try: + first = next(it) + if not cookie_comment_re.match(first): + yield first + second = next(it) + if not cookie_comment_re.match(second): + yield second + except StopIteration: + return + + for line in it: + yield line + +def read_py_file(filename, skip_encoding_cookie=True): + """Read a Python file, using the encoding declared inside the file. + + Parameters + ---------- + filename : str + The path to the file to read. + skip_encoding_cookie : bool + If True (the default), and the encoding declaration is found in the first + two lines, that line will be excluded from the output - compiling a + unicode string with an encoding declaration is a SyntaxError in Python 2. + + Returns + ------- + A unicode string containing the contents of the file. + """ + with open(filename) as f: # the open function defined in this module. + if skip_encoding_cookie: + return "".join(strip_encoding_cookie(f)) + else: + return f.read() + +def read_py_url(url, errors='replace', skip_encoding_cookie=True): + """Read a Python file from a URL, using the encoding declared inside the file. + + Parameters + ---------- + url : str + The URL from which to fetch the file. + errors : str + How to handle decoding errors in the file. Options are the same as for + bytes.decode(), but here 'replace' is the default. + skip_encoding_cookie : bool + If True (the default), and the encoding declaration is found in the first + two lines, that line will be excluded from the output - compiling a + unicode string with an encoding declaration is a SyntaxError in Python 2. + + Returns + ------- + A unicode string containing the contents of the file. + """ + # Deferred import for faster start + try: + from urllib.request import urlopen # Py 3 + except ImportError: + from urllib import urlopen + response = urlopen(url) + buffer = io.BytesIO(response.read()) + return source_to_unicode(buffer, errors, skip_encoding_cookie) + +def _list_readline(x): + """Given a list, returns a readline() function that returns the next element + with each call. + """ + x = iter(x) + def readline(): + return next(x) + return readline + +# Code for going between .py files and cached .pyc files ---------------------- + +try: # Python 3.2, see PEP 3147 + try: + from importlib.util import source_from_cache, cache_from_source + except ImportError : + ## deprecated since 3.4 + from imp import source_from_cache, cache_from_source +except ImportError: + # Python <= 3.1: .pyc files go next to .py + def source_from_cache(path): + basename, ext = os.path.splitext(path) + if ext not in ('.pyc', '.pyo'): + raise ValueError('Not a cached Python file extension', ext) + # Should we look for .pyw files? + return basename + '.py' + + def cache_from_source(path, debug_override=None): + if debug_override is None: + debug_override = __debug__ + basename, ext = os.path.splitext(path) + return basename + '.pyc' if debug_override else '.pyo' |