aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/python/ipython/py3/IPython/utils/openpy.py
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /contrib/python/ipython/py3/IPython/utils/openpy.py
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'contrib/python/ipython/py3/IPython/utils/openpy.py')
-rw-r--r--contrib/python/ipython/py3/IPython/utils/openpy.py103
1 files changed, 103 insertions, 0 deletions
diff --git a/contrib/python/ipython/py3/IPython/utils/openpy.py b/contrib/python/ipython/py3/IPython/utils/openpy.py
new file mode 100644
index 0000000000..c90d2b53a3
--- /dev/null
+++ b/contrib/python/ipython/py3/IPython/utils/openpy.py
@@ -0,0 +1,103 @@
+"""
+Tools to open .py files as Unicode, using the encoding specified within the file,
+as per PEP 263.
+
+Much of the code is taken from the tokenize module in Python 3.2.
+"""
+
+import io
+from io import TextIOWrapper, BytesIO
+import re
+from tokenize import open, detect_encoding
+
+cookie_re = re.compile(r"coding[:=]\s*([-\w.]+)", re.UNICODE)
+cookie_comment_re = re.compile(r"^\s*#.*coding[:=]\s*([-\w.]+)", re.UNICODE)
+
+def source_to_unicode(txt, errors='replace', skip_encoding_cookie=True):
+ """Converts a bytes string with python source code to unicode.
+
+ Unicode strings are passed through unchanged. Byte strings are checked
+ for the python source file encoding cookie to determine encoding.
+ txt can be either a bytes buffer or a string containing the source
+ code.
+ """
+ if isinstance(txt, str):
+ return txt
+ if isinstance(txt, bytes):
+ buffer = BytesIO(txt)
+ else:
+ buffer = txt
+ try:
+ encoding, _ = detect_encoding(buffer.readline)
+ except SyntaxError:
+ encoding = "ascii"
+ buffer.seek(0)
+ with TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True) as text:
+ text.mode = 'r'
+ if skip_encoding_cookie:
+ return u"".join(strip_encoding_cookie(text))
+ else:
+ return text.read()
+
+def strip_encoding_cookie(filelike):
+ """Generator to pull lines from a text-mode file, skipping the encoding
+ cookie if it is found in the first two lines.
+ """
+ it = iter(filelike)
+ try:
+ first = next(it)
+ if not cookie_comment_re.match(first):
+ yield first
+ second = next(it)
+ if not cookie_comment_re.match(second):
+ yield second
+ except StopIteration:
+ return
+
+ for line in it:
+ yield line
+
+def read_py_file(filename, skip_encoding_cookie=True):
+ """Read a Python file, using the encoding declared inside the file.
+
+ Parameters
+ ----------
+ filename : str
+ The path to the file to read.
+ skip_encoding_cookie : bool
+ If True (the default), and the encoding declaration is found in the first
+ two lines, that line will be excluded from the output.
+
+ Returns
+ -------
+ A unicode string containing the contents of the file.
+ """
+ with open(filename) as f: # the open function defined in this module.
+ if skip_encoding_cookie:
+ return "".join(strip_encoding_cookie(f))
+ else:
+ return f.read()
+
+def read_py_url(url, errors='replace', skip_encoding_cookie=True):
+ """Read a Python file from a URL, using the encoding declared inside the file.
+
+ Parameters
+ ----------
+ url : str
+ The URL from which to fetch the file.
+ errors : str
+ How to handle decoding errors in the file. Options are the same as for
+ bytes.decode(), but here 'replace' is the default.
+ skip_encoding_cookie : bool
+ If True (the default), and the encoding declaration is found in the first
+ two lines, that line will be excluded from the output.
+
+ Returns
+ -------
+ A unicode string containing the contents of the file.
+ """
+ # Deferred import for faster start
+ from urllib.request import urlopen
+ response = urlopen(url)
+ buffer = io.BytesIO(response.read())
+ return source_to_unicode(buffer, errors, skip_encoding_cookie)