diff options
author | nkozlovskiy <nmk@ydb.tech> | 2023-10-02 18:57:38 +0300 |
---|---|---|
committer | nkozlovskiy <nmk@ydb.tech> | 2023-10-02 19:39:06 +0300 |
commit | 6295ef4d23465c11296e898b9dc4524ad9592b5d (patch) | |
tree | fc0c852877b2c52f365a1f6ed0710955844338c2 /contrib/deprecated/python/win-unicode-console | |
parent | de63c80b75948ecc13894854514d147840ff8430 (diff) | |
download | ydb-6295ef4d23465c11296e898b9dc4524ad9592b5d.tar.gz |
oss ydb: fix dstool building and test run
Diffstat (limited to 'contrib/deprecated/python/win-unicode-console')
15 files changed, 1621 insertions, 0 deletions
diff --git a/contrib/deprecated/python/win-unicode-console/.dist-info/METADATA b/contrib/deprecated/python/win-unicode-console/.dist-info/METADATA new file mode 100644 index 0000000000..9a922bc913 --- /dev/null +++ b/contrib/deprecated/python/win-unicode-console/.dist-info/METADATA @@ -0,0 +1,133 @@ +Metadata-Version: 2.1 +Name: win-unicode-console +Version: 0.5 +Summary: Enable Unicode input and display when running Python from Windows console. +Home-page: https://github.com/Drekin/win-unicode-console +Author: Drekin +Author-email: drekin@gmail.com +License: MIT +Download-URL: https://github.com/Drekin/win-unicode-console/archive/0.5.zip +Keywords: Windows,Unicode,console +Platform: UNKNOWN +Classifier: Development Status :: 4 - Beta +Classifier: Environment :: Console +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: MIT License +Classifier: Operating System :: Microsoft :: Windows +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.4 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: 2 +Classifier: Programming Language :: Python :: 2.7 + + +win-unicode-console +=================== + +A Python package to enable Unicode input and display when running Python from Windows console. + +General information +------------------- + +When running Python in the standard console on Windows, there are several problems when one tries to enter or display Unicode characters. The relevant issue is http://bugs.python.org/issue1602. This package solves some of them. + +- First, when you want to display Unicode characters in Windows console, you have to select a font able to display them. Similarly, if you want to enter Unicode characters, you have to have you keyboard properly configured. This has nothing to do with Python, but is included here for completeness. + +- The standard stream objects (``sys.stdin``, ``sys.stdout``, ``sys.stderr``) are not capable of reading and displaying Unicode characters in Windows console. This has nothing to do with encoding, since even ``sys.stdin.buffer.raw.readline()`` returns ``b"?\n"`` when entering ``α`` and there is no encoding under which ``sys.stdout.buffer.raw.write`` displays ``α``. + + The ``streams`` module provides several alternative stream objects. ``stdin_raw``, ``stdout_raw``, and ``stderr_raw`` are raw stream objects using WinAPI functions ``ReadConsoleW`` and ``WriteConsoleW`` to interact with Windows console through UTF-16-LE encoded bytes. The ``stdin_text``, ``stdout_text``, and ``stderr_text`` are standard text IO wrappers over standard buffered IO over our raw streams, and are intended to be primary replacements to ``sys.std*`` streams. Unfortunately, other wrappers around ``std*_text`` are needed (see below), so there are more stream objects in ``streams`` module. + + The function ``streams.enable`` installs chosen stream objects instead of the original ones. By default, it chooses appropriate stream objects itself. The function ``streams.disable`` restores the original stream objects (these are stored in ``sys.__std*__`` attributes by Python). + + After replacing the stream objects, also using ``print`` with a string containing Unicode characters and displaying Unicode characters in the interactive loop works. For ``input``, see below. + +- Python interactive loop doesn't use ``sys.stdin`` to read input so fixing it doesn't help. Also the ``input`` function may or may not use ``sys.stdin`` depending on whether ``sys.stdin`` and ``sys.stdout`` have the standard filenos and whether they are interactive. See http://bugs.python.org/issue17620 for more information. + + To solve this, we install a custom readline hook. Readline hook is a function which is used to read a single line interactively by Python REPL. It may also be used by ``input`` function under certain conditions (see above). On Linux, this hook is usually set to GNU readline function, which provides features like autocompletion, history, … + + The module ``readline_hook`` provides our custom readline hook, which uses ``sys.stdin`` to get the input and is (de)activated by functions ``readline_hook.enable``, ``readline_hook.disable``. + + As we said, readline hook can be called from two places – from the REPL and from ``input`` function. In the first case the prompt is encoded using ``sys.stdin.encoding``, but in the second case ``sys.stdout.encoding`` is used. So Python currently makes an assumption that these two encodings are equal. + +- Python tokenizer, which is used when parsing the input from REPL, cannot handle UTF-16 or generally any encoding containing null bytes. Because UTF-16-LE is the encoding of Unicode used by Windows, we have to additionally wrap our text stream objects (``std*_text``). Thus, ``streams`` module contains also stream objects ``stdin_text_transcoded``, ``stdout_text_transcoded``, and ``stderr_text_transcoded``. They basically just hide the underlying UTF-16-LE encoded buffered IO, and sets encoding to UTF-8. These transcoding wrappers are used by default by ``streams.enable``. + +There are additional issues on Python 2. + +- Since default Python 2 strings correspond to ``bytes`` rather than ``unicode``, people are usually calling ``print`` with ``bytes`` argument. Therefore, ``sys.stdout.write`` and ``sys.stderr.write`` should support ``bytes`` argument. That is why we add ``stdout_text_str`` and ``stderr_text_str`` stream objects to ``streams`` module. They are used by default on Python 2. + +- When we enter a Unicode literal into interactive interpreter, it gets processed by the Python tokenizer, which is bytes-based. When we enter ``u"\u03b1"`` into the interactive interpreter, the tokenizer gets essentially ``b'u"\xce\xb1"'`` plus the information that the encoding used is UTF-8. The problem is that the tokenizer uses the encoding only if ``sys.stdin`` is a file object (see https://hg.python.org/cpython/file/d356e68de236/Parser/tokenizer.c#l797). Hence, we introduce another stream object ``streams.stdin_text_fileobj`` that wraps ``stdin_text_transcoded`` and also is structurally compatible with Python file object. This object is used by default on Python 2. + +- The check for interactive streams done by ``raw_input`` unfortunately requires that both ``sys.stdin`` and ``sys.stdout`` are file objects. Besides ``stdin_text_fileobj`` for stdin we could use also ``stdout_text_str_fileobj`` for stdout. Unfortunately, that breaks ``print``. + + Using ``print`` statement or function leads to calling ``PyFile_WriteObject`` with ``sys.stdout`` as argument. Unfortunately, its generic ``write`` method is used only if it is *not* a file object. Otherwise, ``PyObject_Print`` is called, and this function is file-based, so it ends with a ``fprintf`` call, which is not something we want. In conclusion, we need stdout *not* to be a file object. + + Given the situation described, the best solution seems to be reimplementing ``raw_input`` and ``input`` builtin functions and monkeypatching ``__builtins__``. This is done by our ``raw_input`` module on Python 2. + +- Similarly to the input from from ``sys.stdin`` the arguments in ``sys.argv`` are also ``bytes`` on Python 2 and the original ones may not be reconstructable. To overcome this we add ``unicode_argv`` module. The function ``unicode_argv.get_unicode_argv`` returns Unicode version of ``sys.argv`` obtained by WinAPI functions ``GetCommandLineW`` and ``CommandLineToArgvW``. The function ``unicode_argv.enable`` monkeypatches ``sys.argv`` with the Unicode arguments. + + +Installation +------------ + +Install the package from PyPI via ``pip install win-unicode-console`` (recommended), or download the archive and install it from the archive (e.g. ``pip install win_unicode_console-0.x.zip``), or install the package manually by placing directory ``win_unicode_console`` and module ``run.py`` from the archive to the ``site-packages`` directory of your Python installation. + + +Usage +----- + +The top-level ``win_unicode_console`` module contains a function ``enable``, which install various fixes offered by ``win_unicode_console`` modules, and a function ``disable``, which restores the original environment. By default, custom stream objects are installed as well as a custom readline hook. On Python 2, ``raw_input`` and ``input`` functions are monkeypatched. ``sys.argv`` is not monkeypatched by default since unfortunately some Python 2 code strictly assumes ``str`` instances in ``sys.argv`` list. Use ``enable(use_unicode_argv=True)`` if you want the monkeypathcing. For further customization, see the sources. The logic should be clear. + +Generic usage of the package is just calling ``win_unicode_console.enable()`` whenever the fixes should be applied and ``win_unicode_console.disable()`` to revert all the changes. Note that it should be a responsibility of a Python user on Windows to install ``win_unicode_console`` and fix his Python environment regarding Unicode interaction with console, rather than of a third-party developer enabling ``win_unicode_console`` in his application, which adds a dependency. Our package should be seen as an external patch to Python on Windows rather than a feature package for other packages not directly related to fixing Unicode issues. + +Different ways of how ``win_unicode_console`` can be used to fix a Python environment on Windows follow. + +- *Python patch (recommended).* Just call ``win_unicode_console.enable()`` in your ``sitecustomize`` or ``usercustomize`` module (see https://docs.python.org/3/tutorial/appendix.html#the-customization-modules for more information). This will enable ``win_unicode_console`` on every run of the Python interpreter (unless ``site`` is disabled). Doing so should not break executed scripts in any way. Otherwise, it is a bug of ``win_unicode_console`` that should be fixed. + +- *Opt-in runner.* You may easily run a script with ``win_unicode_console`` enabled by using our ``runner`` module and its helper ``run`` script. To do so, execute ``py -i -m run script.py`` instead of ``py -i script.py`` for interactive mode, and similarly ``py -m run script.py`` instead of ``py script.py`` for non-interactive mode. Of course you may provide arguments to your script: ``py -i -m run script.py arg1 arg2``. To run the bare interactive interpreter with ``win_unicode_console`` enabled, execute ``py -i -m run``. + +- *Opt-out runner.* In case you are using ``win_unicode_console`` as Python patch, but you want to run a particular script with ``win_unicode_console`` disabled, you can also use the runner. To do so, execute ``py -i -m run --init-disable script.py``. + +- *Customized runner.* To move arbitrary initialization (e.g. enabling ``win_unicode_console`` with non-default arguments) from ``sitecustomize`` to opt-in runner, move it to a separate module and use ``py -i -m run --init-module module script.py``. That will import a module ``module`` on startup instead of enabling ``win_unicode_console`` with default arguments. + + +Compatibility +------------- + +``win_unicode_console`` package was tested on Python 3.4, Python 3.5, and Python 2.7. 32-bit or 64-bit shouldn't matter. It also interacts well with the following packages: + +- ``colorama`` package (https://pypi.python.org/pypi/colorama) makes ANSI escape character sequences (for producing colored terminal text and cursor positioning) work under MS Windows. It does so by wrapping ``sys.stdout`` and ``sys.stderr`` streams. Since ``win_unicode_console`` replaces the streams in order to support Unicode, ``win_unicode_console.enable`` has to be called before ``colorama.init`` so everything works as expected. + + As of ``colorama`` v0.3.3, there was an early binding issue (https://github.com/tartley/colorama/issues/32), so ``win_unicode_console.enable`` has to be called even before importing ``colorama``. Note that is already the case when ``win_unicode_console`` is used as Python patch or as opt-in runner. The issue was already fixed. + +- ``pyreadline`` package (https://pypi.python.org/pypi/pyreadline/2.0) implements GNU readline features on Windows. It provides its own readline hook, which actually supports Unicode input. ``win_unicode_console.readline_hook`` detects when ``pyreadline`` is active, and in that case, by default, reuses its readline hook rather than installing its own, so GNU readline features are preserved on top of our Unicode streams. + +- ``IPython`` (https://pypi.python.org/pypi/ipython) can be also used with ``win_unicode_console``. + + As of ``IPython`` 3.2.1, there is an early binding issue (https://github.com/ipython/ipython/issues/8669), so ``win_unicode_console.enable`` has to be called even before importing ``IPython``. That is the case when ``win_unicode_console`` is used as Python patch. + + There was also an issue that IPython was not compatible with the builtin function ``raw_input`` returning unicode on Python 2 (https://github.com/ipython/ipython/issues/8670). If you hit this issue, you can make ``win_unicode_console.raw_input.raw_input`` return bytes by enabling it as ``win_unicode_console.enable(raw_input__return_unicode=False)``. This was fixed in IPython 4. + + +Backward incompatibility +------------------------ + +- Since version 0.4, the signature of ``streams.enable`` has been changed because there are now more options for the stream objects to be used. It now accepts a keyword argument for each ``stdin``, ``stdout``, ``stderr``, setting the corresponding stream. ``None`` means “do not set”, ``Ellipsis`` means “use the default value”. + + A function ``streams.enable_only`` was added. It works the same way as ``streams.enable``, but the default value for each parameter is ``None``. + + Functions ``streams.enable_reader``, ``streams.enable_writer``, and ``streams.enable_error_writer`` have been removed. Example: instead of ``streams.enable_reader(transcode=True)`` use ``streams.enable_only(stdin=streams.stdin_text_transcoding)``. + + There are also corresponding changes in top-level ``enable`` function. + +- Since version 0.3, the custom stream objects have the standard filenos, so calling ``input`` doesn't handle Unicode without custom readline hook. + + +Acknowledgements +---------------- + +- The code of ``streams`` module is based on the code submitted to http://bugs.python.org/issue1602. +- The idea of providing custom readline hook and the code of ``readline_hook`` module is based on https://github.com/pyreadline/pyreadline. +- The code related to ``unicode_argv.get_full_unicode_argv`` is based on http://code.activestate.com/recipes/572200/. +- The idea of using path hooks and the code related to ``unicode_argv.argv_setter_hook`` is based on https://mail.python.org/pipermail/python-list/2016-June/710183.html. + + diff --git a/contrib/deprecated/python/win-unicode-console/.dist-info/top_level.txt b/contrib/deprecated/python/win-unicode-console/.dist-info/top_level.txt new file mode 100644 index 0000000000..f1cd8d49cd --- /dev/null +++ b/contrib/deprecated/python/win-unicode-console/.dist-info/top_level.txt @@ -0,0 +1,2 @@ +run +win_unicode_console diff --git a/contrib/deprecated/python/win-unicode-console/README.rst b/contrib/deprecated/python/win-unicode-console/README.rst new file mode 100644 index 0000000000..21fc11166d --- /dev/null +++ b/contrib/deprecated/python/win-unicode-console/README.rst @@ -0,0 +1,109 @@ + +win-unicode-console +=================== + +A Python package to enable Unicode input and display when running Python from Windows console. + +General information +------------------- + +When running Python in the standard console on Windows, there are several problems when one tries to enter or display Unicode characters. The relevant issue is http://bugs.python.org/issue1602. This package solves some of them. + +- First, when you want to display Unicode characters in Windows console, you have to select a font able to display them. Similarly, if you want to enter Unicode characters, you have to have you keyboard properly configured. This has nothing to do with Python, but is included here for completeness. + +- The standard stream objects (``sys.stdin``, ``sys.stdout``, ``sys.stderr``) are not capable of reading and displaying Unicode characters in Windows console. This has nothing to do with encoding, since even ``sys.stdin.buffer.raw.readline()`` returns ``b"?\n"`` when entering ``α`` and there is no encoding under which ``sys.stdout.buffer.raw.write`` displays ``α``. + + The ``streams`` module provides several alternative stream objects. ``stdin_raw``, ``stdout_raw``, and ``stderr_raw`` are raw stream objects using WinAPI functions ``ReadConsoleW`` and ``WriteConsoleW`` to interact with Windows console through UTF-16-LE encoded bytes. The ``stdin_text``, ``stdout_text``, and ``stderr_text`` are standard text IO wrappers over standard buffered IO over our raw streams, and are intended to be primary replacements to ``sys.std*`` streams. Unfortunately, other wrappers around ``std*_text`` are needed (see below), so there are more stream objects in ``streams`` module. + + The function ``streams.enable`` installs chosen stream objects instead of the original ones. By default, it chooses appropriate stream objects itself. The function ``streams.disable`` restores the original stream objects (these are stored in ``sys.__std*__`` attributes by Python). + + After replacing the stream objects, also using ``print`` with a string containing Unicode characters and displaying Unicode characters in the interactive loop works. For ``input``, see below. + +- Python interactive loop doesn't use ``sys.stdin`` to read input so fixing it doesn't help. Also the ``input`` function may or may not use ``sys.stdin`` depending on whether ``sys.stdin`` and ``sys.stdout`` have the standard filenos and whether they are interactive. See http://bugs.python.org/issue17620 for more information. + + To solve this, we install a custom readline hook. Readline hook is a function which is used to read a single line interactively by Python REPL. It may also be used by ``input`` function under certain conditions (see above). On Linux, this hook is usually set to GNU readline function, which provides features like autocompletion, history, … + + The module ``readline_hook`` provides our custom readline hook, which uses ``sys.stdin`` to get the input and is (de)activated by functions ``readline_hook.enable``, ``readline_hook.disable``. + + As we said, readline hook can be called from two places – from the REPL and from ``input`` function. In the first case the prompt is encoded using ``sys.stdin.encoding``, but in the second case ``sys.stdout.encoding`` is used. So Python currently makes an assumption that these two encodings are equal. + +- Python tokenizer, which is used when parsing the input from REPL, cannot handle UTF-16 or generally any encoding containing null bytes. Because UTF-16-LE is the encoding of Unicode used by Windows, we have to additionally wrap our text stream objects (``std*_text``). Thus, ``streams`` module contains also stream objects ``stdin_text_transcoded``, ``stdout_text_transcoded``, and ``stderr_text_transcoded``. They basically just hide the underlying UTF-16-LE encoded buffered IO, and sets encoding to UTF-8. These transcoding wrappers are used by default by ``streams.enable``. + +There are additional issues on Python 2. + +- Since default Python 2 strings correspond to ``bytes`` rather than ``unicode``, people are usually calling ``print`` with ``bytes`` argument. Therefore, ``sys.stdout.write`` and ``sys.stderr.write`` should support ``bytes`` argument. That is why we add ``stdout_text_str`` and ``stderr_text_str`` stream objects to ``streams`` module. They are used by default on Python 2. + +- When we enter a Unicode literal into interactive interpreter, it gets processed by the Python tokenizer, which is bytes-based. When we enter ``u"\u03b1"`` into the interactive interpreter, the tokenizer gets essentially ``b'u"\xce\xb1"'`` plus the information that the encoding used is UTF-8. The problem is that the tokenizer uses the encoding only if ``sys.stdin`` is a file object (see https://hg.python.org/cpython/file/d356e68de236/Parser/tokenizer.c#l797). Hence, we introduce another stream object ``streams.stdin_text_fileobj`` that wraps ``stdin_text_transcoded`` and also is structurally compatible with Python file object. This object is used by default on Python 2. + +- The check for interactive streams done by ``raw_input`` unfortunately requires that both ``sys.stdin`` and ``sys.stdout`` are file objects. Besides ``stdin_text_fileobj`` for stdin we could use also ``stdout_text_str_fileobj`` for stdout. Unfortunately, that breaks ``print``. + + Using ``print`` statement or function leads to calling ``PyFile_WriteObject`` with ``sys.stdout`` as argument. Unfortunately, its generic ``write`` method is used only if it is *not* a file object. Otherwise, ``PyObject_Print`` is called, and this function is file-based, so it ends with a ``fprintf`` call, which is not something we want. In conclusion, we need stdout *not* to be a file object. + + Given the situation described, the best solution seems to be reimplementing ``raw_input`` and ``input`` builtin functions and monkeypatching ``__builtins__``. This is done by our ``raw_input`` module on Python 2. + +- Similarly to the input from from ``sys.stdin`` the arguments in ``sys.argv`` are also ``bytes`` on Python 2 and the original ones may not be reconstructable. To overcome this we add ``unicode_argv`` module. The function ``unicode_argv.get_unicode_argv`` returns Unicode version of ``sys.argv`` obtained by WinAPI functions ``GetCommandLineW`` and ``CommandLineToArgvW``. The function ``unicode_argv.enable`` monkeypatches ``sys.argv`` with the Unicode arguments. + + +Installation +------------ + +Install the package from PyPI via ``pip install win-unicode-console`` (recommended), or download the archive and install it from the archive (e.g. ``pip install win_unicode_console-0.x.zip``), or install the package manually by placing directory ``win_unicode_console`` and module ``run.py`` from the archive to the ``site-packages`` directory of your Python installation. + + +Usage +----- + +The top-level ``win_unicode_console`` module contains a function ``enable``, which install various fixes offered by ``win_unicode_console`` modules, and a function ``disable``, which restores the original environment. By default, custom stream objects are installed as well as a custom readline hook. On Python 2, ``raw_input`` and ``input`` functions are monkeypatched. ``sys.argv`` is not monkeypatched by default since unfortunately some Python 2 code strictly assumes ``str`` instances in ``sys.argv`` list. Use ``enable(use_unicode_argv=True)`` if you want the monkeypathcing. For further customization, see the sources. The logic should be clear. + +Generic usage of the package is just calling ``win_unicode_console.enable()`` whenever the fixes should be applied and ``win_unicode_console.disable()`` to revert all the changes. Note that it should be a responsibility of a Python user on Windows to install ``win_unicode_console`` and fix his Python environment regarding Unicode interaction with console, rather than of a third-party developer enabling ``win_unicode_console`` in his application, which adds a dependency. Our package should be seen as an external patch to Python on Windows rather than a feature package for other packages not directly related to fixing Unicode issues. + +Different ways of how ``win_unicode_console`` can be used to fix a Python environment on Windows follow. + +- *Python patch (recommended).* Just call ``win_unicode_console.enable()`` in your ``sitecustomize`` or ``usercustomize`` module (see https://docs.python.org/3/tutorial/appendix.html#the-customization-modules for more information). This will enable ``win_unicode_console`` on every run of the Python interpreter (unless ``site`` is disabled). Doing so should not break executed scripts in any way. Otherwise, it is a bug of ``win_unicode_console`` that should be fixed. + +- *Opt-in runner.* You may easily run a script with ``win_unicode_console`` enabled by using our ``runner`` module and its helper ``run`` script. To do so, execute ``py -i -m run script.py`` instead of ``py -i script.py`` for interactive mode, and similarly ``py -m run script.py`` instead of ``py script.py`` for non-interactive mode. Of course you may provide arguments to your script: ``py -i -m run script.py arg1 arg2``. To run the bare interactive interpreter with ``win_unicode_console`` enabled, execute ``py -i -m run``. + +- *Opt-out runner.* In case you are using ``win_unicode_console`` as Python patch, but you want to run a particular script with ``win_unicode_console`` disabled, you can also use the runner. To do so, execute ``py -i -m run --init-disable script.py``. + +- *Customized runner.* To move arbitrary initialization (e.g. enabling ``win_unicode_console`` with non-default arguments) from ``sitecustomize`` to opt-in runner, move it to a separate module and use ``py -i -m run --init-module module script.py``. That will import a module ``module`` on startup instead of enabling ``win_unicode_console`` with default arguments. + + +Compatibility +------------- + +``win_unicode_console`` package was tested on Python 3.4, Python 3.5, and Python 2.7. 32-bit or 64-bit shouldn't matter. It also interacts well with the following packages: + +- ``colorama`` package (https://pypi.python.org/pypi/colorama) makes ANSI escape character sequences (for producing colored terminal text and cursor positioning) work under MS Windows. It does so by wrapping ``sys.stdout`` and ``sys.stderr`` streams. Since ``win_unicode_console`` replaces the streams in order to support Unicode, ``win_unicode_console.enable`` has to be called before ``colorama.init`` so everything works as expected. + + As of ``colorama`` v0.3.3, there was an early binding issue (https://github.com/tartley/colorama/issues/32), so ``win_unicode_console.enable`` has to be called even before importing ``colorama``. Note that is already the case when ``win_unicode_console`` is used as Python patch or as opt-in runner. The issue was already fixed. + +- ``pyreadline`` package (https://pypi.python.org/pypi/pyreadline/2.0) implements GNU readline features on Windows. It provides its own readline hook, which actually supports Unicode input. ``win_unicode_console.readline_hook`` detects when ``pyreadline`` is active, and in that case, by default, reuses its readline hook rather than installing its own, so GNU readline features are preserved on top of our Unicode streams. + +- ``IPython`` (https://pypi.python.org/pypi/ipython) can be also used with ``win_unicode_console``. + + As of ``IPython`` 3.2.1, there is an early binding issue (https://github.com/ipython/ipython/issues/8669), so ``win_unicode_console.enable`` has to be called even before importing ``IPython``. That is the case when ``win_unicode_console`` is used as Python patch. + + There was also an issue that IPython was not compatible with the builtin function ``raw_input`` returning unicode on Python 2 (https://github.com/ipython/ipython/issues/8670). If you hit this issue, you can make ``win_unicode_console.raw_input.raw_input`` return bytes by enabling it as ``win_unicode_console.enable(raw_input__return_unicode=False)``. This was fixed in IPython 4. + + +Backward incompatibility +------------------------ + +- Since version 0.4, the signature of ``streams.enable`` has been changed because there are now more options for the stream objects to be used. It now accepts a keyword argument for each ``stdin``, ``stdout``, ``stderr``, setting the corresponding stream. ``None`` means “do not set”, ``Ellipsis`` means “use the default value”. + + A function ``streams.enable_only`` was added. It works the same way as ``streams.enable``, but the default value for each parameter is ``None``. + + Functions ``streams.enable_reader``, ``streams.enable_writer``, and ``streams.enable_error_writer`` have been removed. Example: instead of ``streams.enable_reader(transcode=True)`` use ``streams.enable_only(stdin=streams.stdin_text_transcoding)``. + + There are also corresponding changes in top-level ``enable`` function. + +- Since version 0.3, the custom stream objects have the standard filenos, so calling ``input`` doesn't handle Unicode without custom readline hook. + + +Acknowledgements +---------------- + +- The code of ``streams`` module is based on the code submitted to http://bugs.python.org/issue1602. +- The idea of providing custom readline hook and the code of ``readline_hook`` module is based on https://github.com/pyreadline/pyreadline. +- The code related to ``unicode_argv.get_full_unicode_argv`` is based on http://code.activestate.com/recipes/572200/. +- The idea of using path hooks and the code related to ``unicode_argv.argv_setter_hook`` is based on https://mail.python.org/pipermail/python-list/2016-June/710183.html. diff --git a/contrib/deprecated/python/win-unicode-console/win_unicode_console/__init__.py b/contrib/deprecated/python/win-unicode-console/win_unicode_console/__init__.py new file mode 100644 index 0000000000..7ec03c523f --- /dev/null +++ b/contrib/deprecated/python/win-unicode-console/win_unicode_console/__init__.py @@ -0,0 +1,54 @@ + +from . import streams, console #, readline_hook +from .info import WINDOWS, PY2 + +if PY2: + from . import raw_input + +if PY2 and WINDOWS: + from . import unicode_argv + + +# PY3 # def enable(*, +def enable( + stdin = Ellipsis, + stdout = Ellipsis, + stderr = Ellipsis, + use_readline_hook = False, + use_pyreadline = True, + use_raw_input = True, # PY2 + raw_input__return_unicode = raw_input.RETURN_UNICODE if PY2 else None, + use_unicode_argv = False, # PY2, has some issues + use_repl = False#, + ): + + if not WINDOWS: + return + + streams.enable(stdin=stdin, stdout=stdout, stderr=stderr) + + #if use_readline_hook: + # readline_hook.enable(use_pyreadline=use_pyreadline) + + if PY2 and use_raw_input: + raw_input.enable(raw_input__return_unicode) + + if PY2 and use_unicode_argv: + unicode_argv.enable() + + if use_repl: + console.enable() + +def disable(): + if not WINDOWS: + return + + if console.running_console is not None: + console.disable() + + if PY2: + unicode_argv.disable() + raw_input.disable() + + #readline_hook.disable() + streams.disable() diff --git a/contrib/deprecated/python/win-unicode-console/win_unicode_console/buffer.py b/contrib/deprecated/python/win-unicode-console/win_unicode_console/buffer.py new file mode 100644 index 0000000000..4f87d5ffb6 --- /dev/null +++ b/contrib/deprecated/python/win-unicode-console/win_unicode_console/buffer.py @@ -0,0 +1,54 @@ + +import ctypes +from ctypes import (byref, POINTER, pythonapi, + c_int, c_char, c_char_p, c_void_p, py_object, c_ssize_t) + +from .info import PY2 + + +c_ssize_p = POINTER(c_ssize_t) + +PyObject_GetBuffer = pythonapi.PyObject_GetBuffer +PyBuffer_Release = pythonapi.PyBuffer_Release + + +PyBUF_SIMPLE = 0 +PyBUF_WRITABLE = 1 + + +class Py_buffer(ctypes.Structure): + _fields_ = [ + ("buf", c_void_p), + ("obj", py_object), + ("len", c_ssize_t), + ("itemsize", c_ssize_t), + ("readonly", c_int), + ("ndim", c_int), + ("format", c_char_p), + ("shape", c_ssize_p), + ("strides", c_ssize_p), + ("suboffsets", c_ssize_p), + ("internal", c_void_p) + ] + + if PY2: + _fields_.insert(-1, ("smalltable", c_ssize_t * 2)) + + @classmethod + def get_from(cls, obj, flags=PyBUF_SIMPLE): + buf = cls() + PyObject_GetBuffer(py_object(obj), byref(buf), flags) + return buf + + def release(self): + PyBuffer_Release(byref(self)) + + +def get_buffer(obj, writable=False): + buf = Py_buffer.get_from(obj, PyBUF_WRITABLE if writable else PyBUF_SIMPLE) + try: + buffer_type = c_char * buf.len + return buffer_type.from_address(buf.buf) + finally: + buf.release() + diff --git a/contrib/deprecated/python/win-unicode-console/win_unicode_console/console.py b/contrib/deprecated/python/win-unicode-console/win_unicode_console/console.py new file mode 100644 index 0000000000..f5da52ca88 --- /dev/null +++ b/contrib/deprecated/python/win-unicode-console/win_unicode_console/console.py @@ -0,0 +1,106 @@ + +from __future__ import print_function # PY2 + +import __main__ +import code +import sys + +from .info import PY2 + + +def print_banner(file=sys.stderr): + print("Python {} on {}".format(sys.version, sys.platform), file=file) + print('Type "help", "copyright", "credits" or "license" for more information.', file=file) + +# PY3 # class InteractiveConsole(code.InteractiveConsole): +class InteractiveConsole(code.InteractiveConsole, object): + # code.InteractiveConsole without banner + # exits on EOF + # also more robust treating of sys.ps1, sys.ps2 + # prints prompt into stderr rather than stdout + # flushes sys.stderr and sys.stdout + + def __init__(self, locals=None, filename="<stdin>"): + self.done = False + # PY3 # super().__init__(locals, filename) + super(InteractiveConsole, self).__init__(locals, filename) + + def raw_input(self, prompt=""): + sys.stderr.write(prompt) + if PY2: + return raw_input() + else: + return input() + + def runcode(self, code): + # PY3 # super().runcode(code) + super(InteractiveConsole, self).runcode(code) + sys.stderr.flush() + sys.stdout.flush() + + def interact(self): + #sys.ps1 = "~>> " + #sys.ps2 = "~.. " + + try: + sys.ps1 + except AttributeError: + sys.ps1 = ">>> " + + try: + sys.ps2 + except AttributeError: + sys.ps2 = "... " + + more = 0 + while not self.done: + try: + if more: + try: + prompt = sys.ps2 + except AttributeError: + prompt = "" + else: + try: + prompt = sys.ps1 + except AttributeError: + prompt = "" + + try: + line = self.raw_input(prompt) + except EOFError: + self.on_EOF() + else: + more = self.push(line) + + except KeyboardInterrupt: + self.write("\nKeyboardInterrupt\n") + self.resetbuffer() + more = 0 + + def on_EOF(self): + self.write("\n") + # PY3 # raise SystemExit from None + raise SystemExit + + +running_console = None + +def enable(): + global running_console + + if running_console is not None: + raise RuntimeError("interactive console already running") + else: + running_console = InteractiveConsole(__main__.__dict__) + running_console.interact() + +def disable(): + global running_console + + if running_console is None: + raise RuntimeError("interactive console is not running") + else: + running_console.done = True + running_console = None + diff --git a/contrib/deprecated/python/win-unicode-console/win_unicode_console/file_object.py b/contrib/deprecated/python/win-unicode-console/win_unicode_console/file_object.py new file mode 100644 index 0000000000..3c9b56ccb4 --- /dev/null +++ b/contrib/deprecated/python/win-unicode-console/win_unicode_console/file_object.py @@ -0,0 +1,55 @@ + +from .info import check_PY2 +check_PY2() + +import ctypes +from ctypes import (byref, pythonapi, + c_int, c_char_p, c_void_p, py_object, c_ssize_t) + + +class FileObject(ctypes.Structure): + _fields_ = [ + #("_ob_next", c_void_p), + #("_ob_prev", c_void_p), + ("ob_refcnt", c_ssize_t), + ("ob_type", c_void_p), + + ("fp", c_void_p), + ("name", py_object), + ("mode", py_object), + ("close", c_void_p), + ("softspace", c_int), + ("binary", c_int), + ("buf", c_char_p), + ("bufend", c_char_p), + ("bufptr", c_char_p), + ("setbuf", c_char_p), + ("univ_newline", c_int), + ("newlinetypes", c_int), + ("skipnextlf", c_int), + ("encoding", py_object), + ("errors", py_object), + ("weakreflist", py_object), + ("unlocked_count", c_int), + ("readable", c_int), + ("writable", c_int), + ] + + @classmethod + def from_file(cls, f): + if not isinstance(f, file): + raise TypeError("f has to be a file") + + return cls.from_address(id(f)) + + def set_encoding(self, encoding): + if not isinstance(encoding, str): + raise TypeError("encoding has to be a str") + + pythonapi.PyFile_SetEncoding(byref(self), encoding) + + def copy_file_pointer(self, f): + if not isinstance(f, file): + raise TypeError("f has to be a file") + + self.fp = pythonapi.PyFile_AsFile(py_object(f)) diff --git a/contrib/deprecated/python/win-unicode-console/win_unicode_console/info.py b/contrib/deprecated/python/win-unicode-console/win_unicode_console/info.py new file mode 100644 index 0000000000..ed058484a5 --- /dev/null +++ b/contrib/deprecated/python/win-unicode-console/win_unicode_console/info.py @@ -0,0 +1,17 @@ + +import sys +import platform + + +WINDOWS = platform.system().lower() == "windows" +PY2 = sys.version_info.major < 3 + +def check_Windows(): + current_platform = platform.system() + + if not WINDOWS: + raise RuntimeError("available only for Windows, not {}.".format(current_platform)) + +def check_PY2(): + if not PY2: + raise RuntimeError("needed only in Python 2") diff --git a/contrib/deprecated/python/win-unicode-console/win_unicode_console/raw_input.py b/contrib/deprecated/python/win-unicode-console/win_unicode_console/raw_input.py new file mode 100644 index 0000000000..35adcbb292 --- /dev/null +++ b/contrib/deprecated/python/win-unicode-console/win_unicode_console/raw_input.py @@ -0,0 +1,125 @@ + +from .info import check_PY2 +check_PY2() + +import __builtin__ as builtins +import sys +from ctypes import pythonapi, c_char_p, c_void_p, py_object + +from .streams import STDIN, STDOUT +from .readline_hook import check_encodings, stdio_readline + + +original_raw_input = builtins.raw_input +original_input = builtins.input + +RETURN_UNICODE = True + + +PyOS_Readline = pythonapi.PyOS_Readline +PyOS_Readline.restype = c_char_p +PyOS_Readline.argtypes = [c_void_p, c_void_p, c_char_p] + +PyFile_AsFile = pythonapi.PyFile_AsFile +PyFile_AsFile.restype = c_void_p +PyFile_AsFile.argtypes = [py_object] + +STDIN_FILE_POINTER = PyFile_AsFile(sys.stdin) +STDOUT_FILE_POINTER = PyFile_AsFile(sys.stdout) + + +def stdout_encode(s): + if isinstance(s, bytes): + return s + encoding = sys.stdout.encoding + errors = sys.stdout.errors + if errors is not None: + return s.encode(encoding, errors) + else: + return s.encode(encoding) + +def stdin_encode(s): + if isinstance(s, bytes): + return s + encoding = sys.stdin.encoding + errors = sys.stdin.errors + if errors is not None: + return s.encode(encoding, errors) + else: + return s.encode(encoding) + +def stdin_decode(b): + if isinstance(b, unicode): + return b + encoding = sys.stdin.encoding + errors = sys.stdin.errors + if errors is not None: + return b.decode(encoding, errors) + else: + return b.decode(encoding) + +def readline(prompt=""): + check_encodings() + prompt_bytes = stdout_encode(prompt) + line_bytes = PyOS_Readline(STDIN_FILE_POINTER, STDOUT_FILE_POINTER, prompt_bytes) + if line_bytes is None: + raise KeyboardInterrupt + else: + return line_bytes + + +def raw_input(prompt=""): + """raw_input([prompt]) -> string + +Read a string from standard input. The trailing newline is stripped. +If the user hits EOF (Unix: Ctl-D, Windows: Ctl-Z+Return), raise EOFError. +On Unix, GNU readline is used if enabled. The prompt string, if given, +is printed without a trailing newline before reading.""" + + sys.stderr.flush() + + tty = STDIN.is_a_TTY() and STDOUT.is_a_TTY() + + if RETURN_UNICODE: + if tty: + line_bytes = readline(prompt) + line = stdin_decode(line_bytes) + else: + line = stdio_readline(prompt) + + else: + if tty: + line = readline(prompt) + else: + line_unicode = stdio_readline(prompt) + line = stdin_encode(line_unicode) + + if line: + return line[:-1] # strip strailing "\n" + else: + raise EOFError + +def input(prompt=""): + """input([prompt]) -> value + +Equivalent to eval(raw_input(prompt)).""" + + string = stdin_decode(raw_input(prompt)) + + caller_frame = sys._getframe(1) + globals = caller_frame.f_globals + locals = caller_frame.f_locals + + return eval(string, globals, locals) + + +def enable(return_unicode=RETURN_UNICODE): + global RETURN_UNICODE + RETURN_UNICODE = return_unicode + + builtins.raw_input = raw_input + builtins.input = input + +def disable(): + builtins.raw_input = original_raw_input + builtins.input = original_input diff --git a/contrib/deprecated/python/win-unicode-console/win_unicode_console/readline_hook.py b/contrib/deprecated/python/win-unicode-console/win_unicode_console/readline_hook.py new file mode 100644 index 0000000000..c7688d9681 --- /dev/null +++ b/contrib/deprecated/python/win-unicode-console/win_unicode_console/readline_hook.py @@ -0,0 +1,149 @@ + +from __future__ import print_function # PY2 + +import sys +import traceback +import warnings +import ctypes.util +from ctypes import (pythonapi, cdll, cast, + c_char_p, c_void_p, c_size_t, CFUNCTYPE) + +from .info import WINDOWS + +try: + import pyreadline +except ImportError: + pyreadline = None + + +def get_libc(): + if WINDOWS: + path = "msvcrt" + else: + path = ctypes.util.find_library("c") + if path is None: + raise RuntimeError("cannot locate libc") + + return cdll[path] + +LIBC = get_libc() + +PyMem_Malloc = pythonapi.PyMem_Malloc +PyMem_Malloc.restype = c_size_t +PyMem_Malloc.argtypes = [c_size_t] + +strncpy = LIBC.strncpy +strncpy.restype = c_char_p +strncpy.argtypes = [c_char_p, c_char_p, c_size_t] + +HOOKFUNC = CFUNCTYPE(c_char_p, c_void_p, c_void_p, c_char_p) + +#PyOS_ReadlineFunctionPointer = c_void_p.in_dll(pythonapi, "PyOS_ReadlineFunctionPointer") + + +def new_zero_terminated_string(b): + p = PyMem_Malloc(len(b) + 1) + strncpy(cast(p, c_char_p), b, len(b) + 1) + return p + +def check_encodings(): + if sys.stdin.encoding != sys.stdout.encoding: + # raise RuntimeError("sys.stdin.encoding != sys.stdout.encoding, readline hook doesn't know, which one to use to decode prompt") + + warnings.warn("sys.stdin.encoding == {!r}, whereas sys.stdout.encoding == {!r}, readline hook consumer may assume they are the same".format(sys.stdin.encoding, sys.stdout.encoding), + RuntimeWarning, stacklevel=3) + +def stdio_readline(prompt=""): + sys.stdout.write(prompt) + sys.stdout.flush() + return sys.stdin.readline() + + +class ReadlineHookManager: + def __init__(self): + self.readline_wrapper_ref = HOOKFUNC(self.readline_wrapper) + self.address = cast(self.readline_wrapper_ref, c_void_p).value + #self.original_address = PyOS_ReadlineFunctionPointer.value + self.readline_hook = None + + def readline_wrapper(self, stdin, stdout, prompt): + try: + try: + check_encodings() + except RuntimeError: + traceback.print_exc(file=sys.stderr) + try: + prompt = prompt.decode("utf-8") + except UnicodeDecodeError: + prompt = "" + + else: + prompt = prompt.decode(sys.stdout.encoding) + + try: + line = self.readline_hook(prompt) + except KeyboardInterrupt: + return 0 + else: + return new_zero_terminated_string(line.encode(sys.stdin.encoding)) + + except: + self.restore_original() + print("Internal win_unicode_console error, disabling custom readline hook...", file=sys.stderr) + traceback.print_exc(file=sys.stderr) + return new_zero_terminated_string(b"\n") + + def install_hook(self, hook): + self.readline_hook = hook + PyOS_ReadlineFunctionPointer.value = self.address + + def restore_original(self): + self.readline_hook = None + PyOS_ReadlineFunctionPointer.value = self.original_address + + +class PyReadlineManager: + def __init__(self): + self.original_codepage = pyreadline.unicode_helper.pyreadline_codepage + + def set_codepage(self, codepage): + pyreadline.unicode_helper.pyreadline_codepage = codepage + + def restore_original(self): + self.set_codepage(self.original_codepage) + +def pyreadline_is_active(): + if not pyreadline: + return False + + ref = pyreadline.console.console.readline_ref + if ref is None: + return False + + return cast(ref, c_void_p).value == PyOS_ReadlineFunctionPointer.value + + +manager = ReadlineHookManager() + +if pyreadline: + pyreadline_manager = PyReadlineManager() + + +# PY3 # def enable(*, use_pyreadline=True): +def enable(use_pyreadline=True): + check_encodings() + + if use_pyreadline and pyreadline: + pyreadline_manager.set_codepage(sys.stdin.encoding) + # pyreadline assumes that encoding of all sys.stdio objects is the same + if not pyreadline_is_active(): + manager.install_hook(stdio_readline) + + else: + manager.install_hook(stdio_readline) + +def disable(): + if pyreadline: + pyreadline_manager.restore_original() + else: + manager.restore_original() diff --git a/contrib/deprecated/python/win-unicode-console/win_unicode_console/runner.py b/contrib/deprecated/python/win-unicode-console/win_unicode_console/runner.py new file mode 100644 index 0000000000..f3c04e685e --- /dev/null +++ b/contrib/deprecated/python/win-unicode-console/win_unicode_console/runner.py @@ -0,0 +1,199 @@ + +from __future__ import print_function # PY2 + +import __main__ +import argparse +import sys +import traceback +import tokenize +from ctypes import pythonapi, POINTER, c_long, cast +from types import CodeType as Code + +from . import console, enable, disable +from .info import PY2 + + +inspect_flag = cast(pythonapi.Py_InspectFlag, POINTER(c_long)).contents + +def set_inspect_flag(value): + inspect_flag.value = int(value) + + +CODE_FIELDS = ["argcount", "kwonlyargcount", "nlocals", "stacksize", + "flags", "code", "consts", "names", "varnames", "filename", + "name", "firstlineno", "lnotab", "freevars", "cellvars"] +if PY2: + CODE_FIELDS.remove("kwonlyargcount") + +def update_code(codeobj, **kwargs): + def field_values(): + for field in CODE_FIELDS: + original_value = getattr(codeobj, "co_{}".format(field)) + value = kwargs.get(field, original_value) + yield value + + return Code(*field_values()) + +def update_code_recursively(codeobj, **kwargs): + updated = {} + + def update(codeobj, **kwargs): + result = updated.get(codeobj, None) + if result is not None: + return result + + if any(isinstance(c, Code) for c in codeobj.co_consts): + consts = tuple(update(c, **kwargs) if isinstance(c, Code) else c + for c in codeobj.co_consts) + else: + consts = codeobj.co_consts + + result = update_code(codeobj, consts=consts, **kwargs) + updated[codeobj] = result + return result + + return update(codeobj, **kwargs) + + +def get_code(path): + if PY2: + from .tokenize_open import read_source_lines + source = u"".join(read_source_lines(path)) + else: + with tokenize.open(path) as f: # opens with detected source encoding + source = f.read() + + try: + code = compile(source, path, "exec", dont_inherit=True) + except UnicodeEncodeError: + code = compile(source, "<encoding error>", "exec", dont_inherit=True) + if PY2: + path = path.encode("utf-8") + code = update_code_recursively(code, filename=path) + # so code constains correct filename (even if it contains Unicode) + # and tracebacks show contents of code lines + + return code + + +def print_exception_without_first_line(etype, value, tb, limit=None, file=None, chain=True): + if file is None: + file = sys.stderr + + lines = iter(traceback.TracebackException( + type(value), value, tb, limit=limit).format(chain=chain)) + + next(lines) + for line in lines: + print(line, file=file, end="") + + +def run_script(args): + sys.argv = [args.script] + args.script_arguments + path = args.script + __main__.__file__ = path + + try: + code = get_code(path) + except Exception as e: + traceback.print_exception(e.__class__, e, None, file=sys.stderr) + else: + try: + exec(code, __main__.__dict__) + except BaseException as e: + if not sys.flags.inspect and isinstance(e, SystemExit): + raise + + elif PY2: # Python 2 produces tracebacks in mixed encoding (!) + etype, e, tb = sys.exc_info() + for line in traceback.format_exception(etype, e, tb.tb_next): + line = line.decode("utf-8", "replace") + try: + sys.stderr.write(line) + except UnicodeEncodeError: + line = line.encode(sys.stderr.encoding, "backslashreplace") + sys.stderr.write(line) + + sys.stderr.flush() # is this needed? + + else: # PY3 + traceback.print_exception(e.__class__, e, e.__traceback__.tb_next, file=sys.stderr) + +def run_init(args): + if args.init == "enable": + enable() + elif args.init == "disable": + disable() + elif args.init == "module": + __import__(args.module) + elif args.init == "none": + pass + else: + raise ValueError("unknown runner init mode {}".format(repr(args.init))) + +def run_with_custom_repl(args): + run_init(args) + + if args.script: + run_script(args) + + if sys.flags.interactive or not args.script: + if sys.flags.interactive and not args.script: + console.print_banner() + try: + console.enable() + finally: + set_inspect_flag(0) + +def run_with_standard_repl(args): + run_init(args) + + if args.script: + run_script(args) + + if sys.flags.interactive and not args.script: + console.print_banner() + +def run_arguments(): + parser = argparse.ArgumentParser(description="Runs a script after customizable initialization. By default, win_unicode_console is enabled.") + + init_group = parser.add_mutually_exclusive_group() + init_group.add_argument( + "-e", "--init-enable", dest="init", action="store_const", const="enable", + help="enable win_unicode_console on init (default)") + init_group.add_argument( + "-d", "--init-disable", dest="init", action="store_const", const="disable", + help="disable win_unicode_console on init") + init_group.add_argument( + "-m", "--init-module", dest="module", + help="import the given module on init") + init_group.add_argument( + "-n", "--no-init", dest="init", action="store_const", const="none", + help="do nothing special on init") + parser.set_defaults(init="enable") + + repl_group = parser.add_mutually_exclusive_group() + repl_group.add_argument( + "-s", "--standard-repl", dest="use_repl", action="store_false", + help="use the standard Python REPL (default)") + repl_group.add_argument( + "-c", "--custom-repl", dest="use_repl", action="store_true", + help="use win_unicode_console.console REPL") + parser.set_defaults(use_repl=False) + + parser.add_argument("script", nargs="?") + parser.add_argument("script_arguments", nargs=argparse.REMAINDER, metavar="script-arguments") + + try: + args = parser.parse_args(sys.argv[1:]) + except SystemExit: + set_inspect_flag(0) # don't go interactive after printing help + raise + + if args.module: + args.init = "module" + + if args.use_repl: + run_with_custom_repl(args) + else: + run_with_standard_repl(args) diff --git a/contrib/deprecated/python/win-unicode-console/win_unicode_console/streams.py b/contrib/deprecated/python/win-unicode-console/win_unicode_console/streams.py new file mode 100644 index 0000000000..6a5eda0c18 --- /dev/null +++ b/contrib/deprecated/python/win-unicode-console/win_unicode_console/streams.py @@ -0,0 +1,337 @@ + +import io +import sys +import time +from ctypes import byref, c_ulong + +from .buffer import get_buffer +from .info import WINDOWS, PY2 + +if PY2: + from .file_object import FileObject + + +if WINDOWS: + from ctypes import WinDLL, get_last_error, set_last_error, WinError + from msvcrt import get_osfhandle + + kernel32 = WinDLL("kernel32", use_last_error=True) + ReadConsoleW = kernel32.ReadConsoleW + WriteConsoleW = kernel32.WriteConsoleW + GetConsoleMode = kernel32.GetConsoleMode + + +ERROR_SUCCESS = 0 +ERROR_INVALID_HANDLE = 6 +ERROR_NOT_ENOUGH_MEMORY = 8 +ERROR_OPERATION_ABORTED = 995 + +EOF = b"\x1a" + +MAX_BYTES_WRITTEN = 32767 # arbitrary because WriteConsoleW ability to write big buffers depends on heap usage + + +class StandardStreamInfo: + def __init__(self, name, standard_fileno): + self.name = name + self.fileno = standard_fileno + self.handle = get_osfhandle(standard_fileno) if WINDOWS else None + + def __repr__(self): + return "<{} '{}' fileno={} handle={}>".format(self.__class__.__name__, self.name, self.fileno, self.handle) + + @property + def stream(self): + return getattr(sys, self.name) + + def is_a_TTY(self): + # the test used in input() + try: + get_fileno = self.stream.fileno + except AttributeError: # e.g. StringIO in Python 2 + return False + + try: + fileno = get_fileno() + except io.UnsupportedOperation: + return False + else: + return fileno == self.fileno and self.stream.isatty() + + def is_a_console(self): + if self.handle is None: + return False + + if GetConsoleMode(self.handle, byref(c_ulong())): + return True + else: + last_error = get_last_error() + if last_error == ERROR_INVALID_HANDLE: + return False + else: + raise WinError(last_error) + + def should_be_fixed(self): + if self.stream is None: # e.g. with IDLE + return True + + return self.is_a_TTY() and self.is_a_console() + +STDIN = StandardStreamInfo("stdin", standard_fileno=0) +STDOUT = StandardStreamInfo("stdout", standard_fileno=1) +STDERR = StandardStreamInfo("stderr", standard_fileno=2) + + +class _ReprMixin: + def __repr__(self): + modname = self.__class__.__module__ + + if PY2: + clsname = self.__class__.__name__ + else: + clsname = self.__class__.__qualname__ + + attributes = [] + for name in ["name", "encoding"]: + try: + value = getattr(self, name) + except AttributeError: + pass + else: + attributes.append("{}={}".format(name, repr(value))) + + return "<{}.{} {}>".format(modname, clsname, " ".join(attributes)) + + +class WindowsConsoleRawIOBase(_ReprMixin, io.RawIOBase): + def __init__(self, name, handle, fileno): + self.name = name + self.handle = handle + self.file_no = fileno + + def fileno(self): + return self.file_no + + def isatty(self): + # PY3 # super().isatty() # for close check in default implementation + super(WindowsConsoleRawIOBase, self).isatty() + return True + +class WindowsConsoleRawReader(WindowsConsoleRawIOBase): + def readable(self): + return True + + def readinto(self, b): + bytes_to_be_read = len(b) + if not bytes_to_be_read: + return 0 + elif bytes_to_be_read % 2: + raise ValueError("cannot read odd number of bytes from UTF-16-LE encoded console") + + buffer = get_buffer(b, writable=True) + code_units_to_be_read = bytes_to_be_read // 2 + code_units_read = c_ulong() + + set_last_error(ERROR_SUCCESS) + ReadConsoleW(self.handle, buffer, code_units_to_be_read, byref(code_units_read), None) + last_error = get_last_error() + if last_error == ERROR_OPERATION_ABORTED: + time.sleep(0.1) # wait for KeyboardInterrupt + if last_error != ERROR_SUCCESS: + raise WinError(last_error) + + if buffer[0] == EOF: + return 0 + else: + return 2 * code_units_read.value # bytes read + +class WindowsConsoleRawWriter(WindowsConsoleRawIOBase): + def writable(self): + return True + + def write(self, b): + bytes_to_be_written = len(b) + buffer = get_buffer(b) + code_units_to_be_written = min(bytes_to_be_written, MAX_BYTES_WRITTEN) // 2 + code_units_written = c_ulong() + + if code_units_to_be_written == 0 != bytes_to_be_written: + raise ValueError("two-byte code units expected, just one byte given") + + if not WriteConsoleW(self.handle, buffer, code_units_to_be_written, byref(code_units_written), None): + exc = WinError(get_last_error()) + if exc.winerror == ERROR_NOT_ENOUGH_MEMORY: + exc.strerror += " Try to lower `win_unicode_console.streams.MAX_BYTES_WRITTEN`." + raise exc + + return 2 * code_units_written.value # bytes written + + +class _TextStreamWrapperMixin(_ReprMixin): + def __init__(self, base): + self.base = base + + @property + def encoding(self): + return self.base.encoding + + @property + def errors(self): + return self.base.errors + + @property + def line_buffering(self): + return self.base.line_buffering + + def seekable(self): + return self.base.seekable() + + def readable(self): + return self.base.readable() + + def writable(self): + return self.base.writable() + + def flush(self): + self.base.flush() + + def close(self): + self.base.close() + + @property + def closed(self): + return self.base.closed + + @property + def name(self): + return self.base.name + + def fileno(self): + return self.base.fileno() + + def isatty(self): + return self.base.isatty() + + def write(self, s): + return self.base.write(s) + + def tell(self): + return self.base.tell() + + def truncate(self, pos=None): + return self.base.truncate(pos) + + def seek(self, cookie, whence=0): + return self.base.seek(cookie, whence) + + def read(self, size=None): + return self.base.read(size) + + def __next__(self): + return next(self.base) + + def readline(self, size=-1): + return self.base.readline(size) + + @property + def newlines(self): + return self.base.newlines + +class TextStreamWrapper(_TextStreamWrapperMixin, io.TextIOBase): + pass + +class TextTranscodingWrapper(TextStreamWrapper): + encoding = None # disable the descriptor + + def __init__(self, base, encoding): + # PY3 # super().__init__(base) + super(TextTranscodingWrapper, self).__init__(base) + self.encoding = encoding + +class StrStreamWrapper(TextStreamWrapper): + def write(self, s): + if isinstance(s, bytes): + s = s.decode(self.encoding) + + self.base.write(s) + +if PY2: + class FileobjWrapper(_TextStreamWrapperMixin, file): + def __init__(self, base, f): + super(FileobjWrapper, self).__init__(base) + fileobj = self._fileobj = FileObject.from_file(self) + fileobj.set_encoding(base.encoding) + fileobj.copy_file_pointer(f) + fileobj.readable = base.readable() + fileobj.writable = base.writable() + + # needed for the right interpretation of unicode literals in interactive mode when win_unicode_console is enabled in sitecustomize since Py_Initialize changes encoding afterwards + def _reset_encoding(self): + self._fileobj.set_encoding(self.base.encoding) + + def readline(self, size=-1): + self._reset_encoding() + return self.base.readline(size) + + +if WINDOWS: + stdin_raw = WindowsConsoleRawReader("<stdin>", STDIN.handle, STDIN.fileno) + stdout_raw = WindowsConsoleRawWriter("<stdout>", STDOUT.handle, STDOUT.fileno) + stderr_raw = WindowsConsoleRawWriter("<stderr>", STDERR.handle, STDERR.fileno) + + stdin_text = io.TextIOWrapper(io.BufferedReader(stdin_raw), encoding="utf-16-le", line_buffering=True) + stdout_text = io.TextIOWrapper(io.BufferedWriter(stdout_raw), encoding="utf-16-le", line_buffering=True) + stderr_text = io.TextIOWrapper(io.BufferedWriter(stderr_raw), encoding="utf-16-le", line_buffering=True) + + stdin_text_transcoded = TextTranscodingWrapper(stdin_text, encoding="utf-8") + stdout_text_transcoded = TextTranscodingWrapper(stdout_text, encoding="utf-8") + stderr_text_transcoded = TextTranscodingWrapper(stderr_text, encoding="utf-8") + + stdout_text_str = StrStreamWrapper(stdout_text_transcoded) + stderr_text_str = StrStreamWrapper(stderr_text_transcoded) + if PY2: + stdin_text_fileobj = FileobjWrapper(stdin_text_transcoded, sys.__stdin__) + stdout_text_str_fileobj = FileobjWrapper(stdout_text_str, sys.__stdout__) + + +def disable(): + sys.stdin.flush() + sys.stdout.flush() + sys.stderr.flush() + sys.stdin = sys.__stdin__ + sys.stdout = sys.__stdout__ + sys.stderr = sys.__stderr__ + +# PY3 # def enable(*, stdin=Ellipsis, stdout=Ellipsis, stderr=Ellipsis): +def enable(stdin=Ellipsis, stdout=Ellipsis, stderr=Ellipsis): + if not WINDOWS: + return + + # defaults + if PY2: + if stdin is Ellipsis: + stdin = stdin_text_fileobj + if stdout is Ellipsis: + stdout = stdout_text_str + if stderr is Ellipsis: + stderr = stderr_text_str + else: # transcoding because Python tokenizer cannot handle UTF-16 + if stdin is Ellipsis: + stdin = stdin_text_transcoded + if stdout is Ellipsis: + stdout = stdout_text_transcoded + if stderr is Ellipsis: + stderr = stderr_text_transcoded + + if stdin is not None and STDIN.should_be_fixed(): + sys.stdin = stdin + if stdout is not None and STDOUT.should_be_fixed(): + sys.stdout.flush() + sys.stdout = stdout + if stderr is not None and STDERR.should_be_fixed(): + sys.stderr.flush() + sys.stderr = stderr + +# PY3 # def enable_only(*, stdin=None, stdout=None, stderr=None): +def enable_only(stdin=None, stdout=None, stderr=None): + enable(stdin=stdin, stdout=stdout, stderr=stderr) diff --git a/contrib/deprecated/python/win-unicode-console/win_unicode_console/tokenize_open.py b/contrib/deprecated/python/win-unicode-console/win_unicode_console/tokenize_open.py new file mode 100644 index 0000000000..aa583dfa5f --- /dev/null +++ b/contrib/deprecated/python/win-unicode-console/win_unicode_console/tokenize_open.py @@ -0,0 +1,162 @@ +"""Backport of tokenize.open from Python 3.5 + +This is the exact Python 3.5 with the following differences: + - detect_encoding_ex is detect_encoding from Python 3.5 returning also a bool whether a cookie was found + - detect_encoding calls detect_encoding_ex, so that its signature is the same as in Python 3.5 + - function read_source_lines was added +""" + +from codecs import lookup, BOM_UTF8 +from io import TextIOWrapper, open as _builtin_open +import re + +re_ASCII = 256 # not present in Python 2 +cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re_ASCII) +blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re_ASCII) + + +def _get_normal_name(orig_enc): + """Imitates get_normal_name in tokenizer.c.""" + # Only care about the first 12 characters. + enc = orig_enc[:12].lower().replace("_", "-") + if enc == "utf-8" or enc.startswith("utf-8-"): + return "utf-8" + if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \ + enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")): + return "iso-8859-1" + return orig_enc + + +def detect_encoding(readline): + """ + The detect_encoding() function is used to detect the encoding that should + be used to decode a Python source file. It requires one argument, readline, + in the same way as the tokenize() generator. + + It will call readline a maximum of twice, and return the encoding used + (as a string) and a list of any lines (left as bytes) it has read in. + + It detects the encoding from the presence of a utf-8 bom or an encoding + cookie as specified in pep-0263. If both a bom and a cookie are present, + but disagree, a SyntaxError will be raised. If the encoding cookie is an + invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found, + 'utf-8-sig' is returned. + + If no encoding is specified, then the default of 'utf-8' will be returned. + """ + + return detect_encoding_ex(readline)[:2] + + +def detect_encoding_ex(readline): + try: + filename = readline.__self__.name + except AttributeError: + filename = None + bom_found = False + encoding = None + default = 'utf-8' + def read_or_stop(): + try: + return readline() + except StopIteration: + return b'' + + def find_cookie(line): + try: + # Decode as UTF-8. Either the line is an encoding declaration, + # in which case it should be pure ASCII, or it must be UTF-8 + # per default encoding. + line_string = line.decode('utf-8') + except UnicodeDecodeError: + msg = "invalid or missing encoding declaration" + if filename is not None: + msg = '{} for {!r}'.format(msg, filename) + raise SyntaxError(msg) + + match = cookie_re.match(line_string) + if not match: + return None + encoding = _get_normal_name(match.group(1)) + try: + codec = lookup(encoding) + except LookupError: + # This behaviour mimics the Python interpreter + if filename is None: + msg = "unknown encoding: " + encoding + else: + msg = "unknown encoding for {!r}: {}".format(filename, + encoding) + raise SyntaxError(msg) + + if bom_found: + if encoding != 'utf-8': + # This behaviour mimics the Python interpreter + if filename is None: + msg = 'encoding problem: utf-8' + else: + msg = 'encoding problem for {!r}: utf-8'.format(filename) + raise SyntaxError(msg) + encoding += '-sig' + return encoding + + first = read_or_stop() + if first.startswith(BOM_UTF8): + bom_found = True + first = first[3:] + default = 'utf-8-sig' + if not first: + return default, [], False + + encoding = find_cookie(first) + if encoding: + return encoding, [first], True + if not blank_re.match(first): + return default, [first], False + + second = read_or_stop() + if not second: + return default, [first], False + + encoding = find_cookie(second) + if encoding: + return encoding, [first, second], True + + return default, [first, second], False + + +def open(filename): + """Open a file in read only mode using the encoding detected by + detect_encoding(). + """ + buffer = _builtin_open(filename, 'rb') + try: + encoding, lines = detect_encoding(buffer.readline) + buffer.seek(0) + text = TextIOWrapper(buffer, encoding, line_buffering=True) + text.mode = 'r' + return text + except: + buffer.close() + raise + +def read_source_lines(filename): + buffer = _builtin_open(filename, 'rb') + try: + encoding, lines, cookie_present = detect_encoding_ex(buffer.readline) + buffer.seek(0) + text = TextIOWrapper(buffer, encoding, line_buffering=True) + text.mode = 'r' + except: + buffer.close() + raise + + with text: + if cookie_present: + for i in lines: + yield text.readline().replace("coding", "Coding") + # so compile() won't complain about encoding declatation in a Unicode string + # see 2.7/Python/ast.c:228 + + for line in text: + yield line diff --git a/contrib/deprecated/python/win-unicode-console/win_unicode_console/unicode_argv.py b/contrib/deprecated/python/win-unicode-console/win_unicode_console/unicode_argv.py new file mode 100644 index 0000000000..d23bc05f12 --- /dev/null +++ b/contrib/deprecated/python/win-unicode-console/win_unicode_console/unicode_argv.py @@ -0,0 +1,79 @@ +"""Get Unicode argv strings in Python 2 on Windows + +get_full_unicode_argv based on +http://code.activestate.com/recipes/572200/ + +argv_setter_hook based on +https://mail.python.org/pipermail/python-list/2016-June/710183.html +""" + +import sys +from ctypes import WinDLL, c_int, POINTER, byref +from ctypes.wintypes import LPCWSTR, LPWSTR + +kernel32 = WinDLL("kernel32", use_last_error=True) +shell32 = WinDLL("shell32", use_last_error=True) + +GetCommandLineW = kernel32.GetCommandLineW +GetCommandLineW.argtypes = () +GetCommandLineW.restype = LPCWSTR + +CommandLineToArgvW = shell32.CommandLineToArgvW +CommandLineToArgvW.argtypes = (LPCWSTR, POINTER(c_int)) +CommandLineToArgvW.restype = POINTER(LPWSTR) + +LocalFree = kernel32.LocalFree + + +def get_full_unicode_argv(): + cmd = GetCommandLineW() + argc = c_int(0) + argv = CommandLineToArgvW(cmd, byref(argc)) + py_argv = [arg for i, arg in zip(range(argc.value), argv)] + LocalFree(argv) + return py_argv + +def get_unicode_argv(): + if original_argv == [""]: + return [u""] + + new_argv = get_full_unicode_argv()[-len(original_argv):] + + if original_argv[0] == "-c": + new_argv[0] = u"-c" + + return new_argv + + +original_argv = None + +def argv_setter_hook(path): + global original_argv + + if original_argv is not None: # already got it + raise ImportError + + try: + original_argv = sys.argv + except AttributeError: + pass + else: + enable() + finally: + raise ImportError + +def enable(): + global original_argv + + if original_argv is None: + try: + original_argv = sys.argv + except AttributeError: # in sitecustomize in Python 2 + sys.path_hooks[:0] = [argv_setter_hook] + return + + sys.argv = get_unicode_argv() + +def disable(): + if original_argv is not None: + sys.argv = original_argv diff --git a/contrib/deprecated/python/win-unicode-console/ya.make b/contrib/deprecated/python/win-unicode-console/ya.make new file mode 100644 index 0000000000..b56f61a378 --- /dev/null +++ b/contrib/deprecated/python/win-unicode-console/ya.make @@ -0,0 +1,40 @@ +# Generated by devtools/yamaker (pypi). + +PY2_LIBRARY() + +VERSION(0.5) + +LICENSE(MIT) + +PEERDIR( + library/python/symbols/win_unicode_console +) + +NO_LINT() + +NO_CHECK_IMPORTS( + win_unicode_console.runner +) + +PY_SRCS( + TOP_LEVEL + win_unicode_console/__init__.py + win_unicode_console/buffer.py + win_unicode_console/console.py + win_unicode_console/file_object.py + win_unicode_console/info.py + win_unicode_console/raw_input.py + win_unicode_console/readline_hook.py + win_unicode_console/runner.py + win_unicode_console/streams.py + win_unicode_console/tokenize_open.py + win_unicode_console/unicode_argv.py +) + +RESOURCE_FILES( + PREFIX contrib/deprecated/python/win-unicode-console/ + .dist-info/METADATA + .dist-info/top_level.txt +) + +END() |