diff options
author | nkozlovskiy <nmk@ydb.tech> | 2023-09-29 12:24:06 +0300 |
---|---|---|
committer | nkozlovskiy <nmk@ydb.tech> | 2023-09-29 12:41:34 +0300 |
commit | e0e3e1717e3d33762ce61950504f9637a6e669ed (patch) | |
tree | bca3ff6939b10ed60c3d5c12439963a1146b9711 /contrib/python/parso | |
parent | 38f2c5852db84c7b4d83adfcb009eb61541d1ccd (diff) | |
download | ydb-e0e3e1717e3d33762ce61950504f9637a6e669ed.tar.gz |
add ydb deps
Diffstat (limited to 'contrib/python/parso')
188 files changed, 35668 insertions, 0 deletions
diff --git a/contrib/python/parso/py2/.dist-info/METADATA b/contrib/python/parso/py2/.dist-info/METADATA new file mode 100644 index 0000000000..083385aabe --- /dev/null +++ b/contrib/python/parso/py2/.dist-info/METADATA @@ -0,0 +1,253 @@ +Metadata-Version: 2.1 +Name: parso +Version: 0.7.1 +Summary: A Python Parser +Home-page: https://github.com/davidhalter/parso +Author: David Halter +Author-email: davidhalter88@gmail.com +Maintainer: David Halter +Maintainer-email: davidhalter88@gmail.com +License: MIT +Keywords: python parser parsing +Platform: any +Classifier: Development Status :: 4 - Beta +Classifier: Environment :: Plugins +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: MIT License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python :: 2 +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.4 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Topic :: Software Development :: Libraries :: Python Modules +Classifier: Topic :: Text Editors :: Integrated Development Environments (IDE) +Classifier: Topic :: Utilities +Classifier: Typing :: Typed +Requires-Python: >=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.* +Provides-Extra: testing +Requires-Dist: docopt ; extra == 'testing' +Requires-Dist: pytest (>=3.0.7) ; extra == 'testing' + +################################################################### +parso - A Python Parser +################################################################### + + +.. image:: https://travis-ci.org/davidhalter/parso.svg?branch=master + :target: https://travis-ci.org/davidhalter/parso + :alt: Travis CI build status + +.. image:: https://coveralls.io/repos/github/davidhalter/parso/badge.svg?branch=master + :target: https://coveralls.io/github/davidhalter/parso?branch=master + :alt: Coverage Status + +.. image:: https://pepy.tech/badge/parso + :target: https://pepy.tech/project/parso + :alt: PyPI Downloads + +.. image:: https://raw.githubusercontent.com/davidhalter/parso/master/docs/_static/logo_characters.png + +Parso is a Python parser that supports error recovery and round-trip parsing +for different Python versions (in multiple Python versions). Parso is also able +to list multiple syntax errors in your python file. + +Parso has been battle-tested by jedi_. It was pulled out of jedi to be useful +for other projects as well. + +Parso consists of a small API to parse Python and analyse the syntax tree. + +A simple example: + +.. code-block:: python + + >>> import parso + >>> module = parso.parse('hello + 1', version="3.6") + >>> expr = module.children[0] + >>> expr + PythonNode(arith_expr, [<Name: hello@1,0>, <Operator: +>, <Number: 1>]) + >>> print(expr.get_code()) + hello + 1 + >>> name = expr.children[0] + >>> name + <Name: hello@1,0> + >>> name.end_pos + (1, 5) + >>> expr.end_pos + (1, 9) + +To list multiple issues: + +.. code-block:: python + + >>> grammar = parso.load_grammar() + >>> module = grammar.parse('foo +\nbar\ncontinue') + >>> error1, error2 = grammar.iter_errors(module) + >>> error1.message + 'SyntaxError: invalid syntax' + >>> error2.message + "SyntaxError: 'continue' not properly in loop" + +Resources +========= + +- `Testing <https://parso.readthedocs.io/en/latest/docs/development.html#testing>`_ +- `PyPI <https://pypi.python.org/pypi/parso>`_ +- `Docs <https://parso.readthedocs.org/en/latest/>`_ +- Uses `semantic versioning <https://semver.org/>`_ + +Installation +============ + + pip install parso + +Future +====== + +- There will be better support for refactoring and comments. Stay tuned. +- There's a WIP PEP8 validator. It's however not in a good shape, yet. + +Known Issues +============ + +- `async`/`await` are already used as keywords in Python3.6. +- `from __future__ import print_function` is not ignored. + + +Acknowledgements +================ + +- Guido van Rossum (@gvanrossum) for creating the parser generator pgen2 + (originally used in lib2to3). +- `Salome Schneider <https://www.crepes-schnaegg.ch/cr%C3%AApes-schn%C3%A4gg/kunst-f%C3%BCrs-cr%C3%AApes-mobil/>`_ + for the extremely awesome parso logo. + + +.. _jedi: https://github.com/davidhalter/jedi + + +.. :changelog: + +Changelog +--------- + +0.7.1 (2020-07-24) +++++++++++++++++++ + +- Fixed a couple of smaller bugs (mostly syntax error detection in + ``Grammar.iter_errors``) + +This is going to be the last release that supports Python 2.7, 3.4 and 3.5. + +0.7.0 (2020-04-13) +++++++++++++++++++ + +- Fix a lot of annoying bugs in the diff parser. The fuzzer did not find + issues anymore even after running it for more than 24 hours (500k tests). +- Small grammar change: suites can now contain newlines even after a newline. + This should really not matter if you don't use error recovery. It allows for + nicer error recovery. + +0.6.2 (2020-02-27) +++++++++++++++++++ + +- Bugfixes +- Add Grammar.refactor (might still be subject to change until 0.7.0) + +0.6.1 (2020-02-03) +++++++++++++++++++ + +- Add ``parso.normalizer.Issue.end_pos`` to make it possible to know where an + issue ends + +0.6.0 (2020-01-26) +++++++++++++++++++ + +- Dropped Python 2.6/Python 3.3 support +- del_stmt names are now considered as a definition + (for ``name.is_definition()``) +- Bugfixes + +0.5.2 (2019-12-15) +++++++++++++++++++ + +- Add include_setitem to get_definition/is_definition and get_defined_names (#66) +- Fix named expression error listing (#89, #90) +- Fix some f-string tokenizer issues (#93) + +0.5.1 (2019-07-13) +++++++++++++++++++ + +- Fix: Some unicode identifiers were not correctly tokenized +- Fix: Line continuations in f-strings are now working + +0.5.0 (2019-06-20) +++++++++++++++++++ + +- **Breaking Change** comp_for is now called sync_comp_for for all Python + versions to be compatible with the Python 3.8 Grammar +- Added .pyi stubs for a lot of the parso API +- Small FileIO changes + +0.4.0 (2019-04-05) +++++++++++++++++++ + +- Python 3.8 support +- FileIO support, it's now possible to use abstract file IO, support is alpha + +0.3.4 (2019-02-13) ++++++++++++++++++++ + +- Fix an f-string tokenizer error + +0.3.3 (2019-02-06) ++++++++++++++++++++ + +- Fix async errors in the diff parser +- A fix in iter_errors +- This is a very small bugfix release + +0.3.2 (2019-01-24) ++++++++++++++++++++ + +- 20+ bugfixes in the diff parser and 3 in the tokenizer +- A fuzzer for the diff parser, to give confidence that the diff parser is in a + good shape. +- Some bugfixes for f-string + +0.3.1 (2018-07-09) ++++++++++++++++++++ + +- Bugfixes in the diff parser and keyword-only arguments + +0.3.0 (2018-06-30) ++++++++++++++++++++ + +- Rewrote the pgen2 parser generator. + +0.2.1 (2018-05-21) ++++++++++++++++++++ + +- A bugfix for the diff parser. +- Grammar files can now be loaded from a specific path. + +0.2.0 (2018-04-15) ++++++++++++++++++++ + +- f-strings are now parsed as a part of the normal Python grammar. This makes + it way easier to deal with them. + +0.1.1 (2017-11-05) ++++++++++++++++++++ + +- Fixed a few bugs in the caching layer +- Added support for Python 3.7 + +0.1.0 (2017-09-04) ++++++++++++++++++++ + +- Pulling the library out of Jedi. Some APIs will definitely change. + + diff --git a/contrib/python/parso/py2/.dist-info/top_level.txt b/contrib/python/parso/py2/.dist-info/top_level.txt new file mode 100644 index 0000000000..0e23344047 --- /dev/null +++ b/contrib/python/parso/py2/.dist-info/top_level.txt @@ -0,0 +1 @@ +parso diff --git a/contrib/python/parso/py2/AUTHORS.txt b/contrib/python/parso/py2/AUTHORS.txt new file mode 100644 index 0000000000..4ca3d0b4ee --- /dev/null +++ b/contrib/python/parso/py2/AUTHORS.txt @@ -0,0 +1,57 @@ +Main Authors +============ + +David Halter (@davidhalter) <davidhalter88@gmail.com> + +Code Contributors +================= +Alisdair Robertson (@robodair) + + +Code Contributors (to Jedi and therefore possibly to this library) +================================================================== + +Takafumi Arakaki (@tkf) <aka.tkf@gmail.com> +Danilo Bargen (@dbrgn) <mail@dbrgn.ch> +Laurens Van Houtven (@lvh) <_@lvh.cc> +Aldo Stracquadanio (@Astrac) <aldo.strac@gmail.com> +Jean-Louis Fuchs (@ganwell) <ganwell@fangorn.ch> +tek (@tek) +Yasha Borevich (@jjay) <j.borevich@gmail.com> +Aaron Griffin <aaronmgriffin@gmail.com> +andviro (@andviro) +Mike Gilbert (@floppym) <floppym@gentoo.org> +Aaron Meurer (@asmeurer) <asmeurer@gmail.com> +Lubos Trilety <ltrilety@redhat.com> +Akinori Hattori (@hattya) <hattya@gmail.com> +srusskih (@srusskih) +Steven Silvester (@blink1073) +Colin Duquesnoy (@ColinDuquesnoy) <colin.duquesnoy@gmail.com> +Jorgen Schaefer (@jorgenschaefer) <contact@jorgenschaefer.de> +Fredrik Bergroth (@fbergroth) +Mathias Fußenegger (@mfussenegger) +Syohei Yoshida (@syohex) <syohex@gmail.com> +ppalucky (@ppalucky) +immerrr (@immerrr) immerrr@gmail.com +Albertas Agejevas (@alga) +Savor d'Isavano (@KenetJervet) <newelevenken@163.com> +Phillip Berndt (@phillipberndt) <phillip.berndt@gmail.com> +Ian Lee (@IanLee1521) <IanLee1521@gmail.com> +Farkhad Khatamov (@hatamov) <comsgn@gmail.com> +Kevin Kelley (@kelleyk) <kelleyk@kelleyk.net> +Sid Shanker (@squidarth) <sid.p.shanker@gmail.com> +Reinoud Elhorst (@reinhrst) +Guido van Rossum (@gvanrossum) <guido@python.org> +Dmytro Sadovnychyi (@sadovnychyi) <jedi@dmit.ro> +Cristi Burcă (@scribu) +bstaint (@bstaint) +Mathias Rav (@Mortal) <rav@cs.au.dk> +Daniel Fiterman (@dfit99) <fitermandaniel2@gmail.com> +Simon Ruggier (@sruggier) +Élie Gouzien (@ElieGouzien) +Tim Gates (@timgates42) <tim.gates@iress.com> +Batuhan Taskaya (@isidentical) <isidentical@gmail.com> +Jocelyn Boullier (@Kazy) <jocelyn@boullier.bzh> + + +Note: (@user) means a github user name. diff --git a/contrib/python/parso/py2/LICENSE.txt b/contrib/python/parso/py2/LICENSE.txt new file mode 100644 index 0000000000..08c41db014 --- /dev/null +++ b/contrib/python/parso/py2/LICENSE.txt @@ -0,0 +1,86 @@ +All contributions towards parso are MIT licensed. + +Some Python files have been taken from the standard library and are therefore +PSF licensed. Modifications on these files are dual licensed (both MIT and +PSF). These files are: + +- parso/pgen2/* +- parso/tokenize.py +- parso/token.py +- test/test_pgen2.py + +Also some test files under test/normalizer_issue_files have been copied from +https://github.com/PyCQA/pycodestyle (Expat License == MIT License). + +------------------------------------------------------------------------------- +The MIT License (MIT) + +Copyright (c) <2013-2017> <David Halter and others, see AUTHORS.txt> + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +------------------------------------------------------------------------------- + +PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 +-------------------------------------------- + +1. This LICENSE AGREEMENT is between the Python Software Foundation +("PSF"), and the Individual or Organization ("Licensee") accessing and +otherwise using this software ("Python") in source or binary form and +its associated documentation. + +2. Subject to the terms and conditions of this License Agreement, PSF hereby +grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, +analyze, test, perform and/or display publicly, prepare derivative works, +distribute, and otherwise use Python alone or in any derivative version, +provided, however, that PSF's License Agreement and PSF's notice of copyright, +i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +2011, 2012, 2013, 2014, 2015 Python Software Foundation; All Rights Reserved" +are retained in Python alone or in any derivative version prepared by Licensee. + +3. In the event Licensee prepares a derivative work that is based on +or incorporates Python or any part thereof, and wants to make +the derivative work available to others as provided herein, then +Licensee hereby agrees to include in any such work a brief summary of +the changes made to Python. + +4. PSF is making Python available to Licensee on an "AS IS" +basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON +FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS +A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, +OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +6. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +7. Nothing in this License Agreement shall be deemed to create any +relationship of agency, partnership, or joint venture between PSF and +Licensee. This License Agreement does not grant permission to use PSF +trademarks or trade name in a trademark sense to endorse or promote +products or services of Licensee, or any third party. + +8. By copying, installing or otherwise using Python, Licensee +agrees to be bound by the terms and conditions of this License +Agreement. diff --git a/contrib/python/parso/py2/README.rst b/contrib/python/parso/py2/README.rst new file mode 100644 index 0000000000..d87f2ef776 --- /dev/null +++ b/contrib/python/parso/py2/README.rst @@ -0,0 +1,95 @@ +################################################################### +parso - A Python Parser +################################################################### + + +.. image:: https://travis-ci.org/davidhalter/parso.svg?branch=master + :target: https://travis-ci.org/davidhalter/parso + :alt: Travis CI build status + +.. image:: https://coveralls.io/repos/github/davidhalter/parso/badge.svg?branch=master + :target: https://coveralls.io/github/davidhalter/parso?branch=master + :alt: Coverage Status + +.. image:: https://pepy.tech/badge/parso + :target: https://pepy.tech/project/parso + :alt: PyPI Downloads + +.. image:: https://raw.githubusercontent.com/davidhalter/parso/master/docs/_static/logo_characters.png + +Parso is a Python parser that supports error recovery and round-trip parsing +for different Python versions (in multiple Python versions). Parso is also able +to list multiple syntax errors in your python file. + +Parso has been battle-tested by jedi_. It was pulled out of jedi to be useful +for other projects as well. + +Parso consists of a small API to parse Python and analyse the syntax tree. + +A simple example: + +.. code-block:: python + + >>> import parso + >>> module = parso.parse('hello + 1', version="3.6") + >>> expr = module.children[0] + >>> expr + PythonNode(arith_expr, [<Name: hello@1,0>, <Operator: +>, <Number: 1>]) + >>> print(expr.get_code()) + hello + 1 + >>> name = expr.children[0] + >>> name + <Name: hello@1,0> + >>> name.end_pos + (1, 5) + >>> expr.end_pos + (1, 9) + +To list multiple issues: + +.. code-block:: python + + >>> grammar = parso.load_grammar() + >>> module = grammar.parse('foo +\nbar\ncontinue') + >>> error1, error2 = grammar.iter_errors(module) + >>> error1.message + 'SyntaxError: invalid syntax' + >>> error2.message + "SyntaxError: 'continue' not properly in loop" + +Resources +========= + +- `Testing <https://parso.readthedocs.io/en/latest/docs/development.html#testing>`_ +- `PyPI <https://pypi.python.org/pypi/parso>`_ +- `Docs <https://parso.readthedocs.org/en/latest/>`_ +- Uses `semantic versioning <https://semver.org/>`_ + +Installation +============ + + pip install parso + +Future +====== + +- There will be better support for refactoring and comments. Stay tuned. +- There's a WIP PEP8 validator. It's however not in a good shape, yet. + +Known Issues +============ + +- `async`/`await` are already used as keywords in Python3.6. +- `from __future__ import print_function` is not ignored. + + +Acknowledgements +================ + +- Guido van Rossum (@gvanrossum) for creating the parser generator pgen2 + (originally used in lib2to3). +- `Salome Schneider <https://www.crepes-schnaegg.ch/cr%C3%AApes-schn%C3%A4gg/kunst-f%C3%BCrs-cr%C3%AApes-mobil/>`_ + for the extremely awesome parso logo. + + +.. _jedi: https://github.com/davidhalter/jedi diff --git a/contrib/python/parso/py2/parso/__init__.py b/contrib/python/parso/py2/parso/__init__.py new file mode 100644 index 0000000000..f331984c3e --- /dev/null +++ b/contrib/python/parso/py2/parso/__init__.py @@ -0,0 +1,58 @@ +r""" +Parso is a Python parser that supports error recovery and round-trip parsing +for different Python versions (in multiple Python versions). Parso is also able +to list multiple syntax errors in your python file. + +Parso has been battle-tested by jedi_. It was pulled out of jedi to be useful +for other projects as well. + +Parso consists of a small API to parse Python and analyse the syntax tree. + +.. _jedi: https://github.com/davidhalter/jedi + +A simple example: + +>>> import parso +>>> module = parso.parse('hello + 1', version="3.6") +>>> expr = module.children[0] +>>> expr +PythonNode(arith_expr, [<Name: hello@1,0>, <Operator: +>, <Number: 1>]) +>>> print(expr.get_code()) +hello + 1 +>>> name = expr.children[0] +>>> name +<Name: hello@1,0> +>>> name.end_pos +(1, 5) +>>> expr.end_pos +(1, 9) + +To list multiple issues: + +>>> grammar = parso.load_grammar() +>>> module = grammar.parse('foo +\nbar\ncontinue') +>>> error1, error2 = grammar.iter_errors(module) +>>> error1.message +'SyntaxError: invalid syntax' +>>> error2.message +"SyntaxError: 'continue' not properly in loop" +""" + +from parso.parser import ParserSyntaxError +from parso.grammar import Grammar, load_grammar +from parso.utils import split_lines, python_bytes_to_unicode + + +__version__ = '0.7.1' + + +def parse(code=None, **kwargs): + """ + A utility function to avoid loading grammars. + Params are documented in :py:meth:`parso.Grammar.parse`. + + :param str version: The version used by :py:func:`parso.load_grammar`. + """ + version = kwargs.pop('version', None) + grammar = load_grammar(version=version) + return grammar.parse(code, **kwargs) diff --git a/contrib/python/parso/py2/parso/_compatibility.py b/contrib/python/parso/py2/parso/_compatibility.py new file mode 100644 index 0000000000..4c966d61b8 --- /dev/null +++ b/contrib/python/parso/py2/parso/_compatibility.py @@ -0,0 +1,101 @@ +""" +To ensure compatibility from Python ``2.7`` - ``3.3``, a module has been +created. Clearly there is huge need to use conforming syntax. +""" +import os +import sys +import platform + +# unicode function +try: + unicode = unicode +except NameError: + unicode = str + +is_pypy = platform.python_implementation() == 'PyPy' + + +def use_metaclass(meta, *bases): + """ Create a class with a metaclass. """ + if not bases: + bases = (object,) + return meta("HackClass", bases, {}) + + +try: + encoding = sys.stdout.encoding + if encoding is None: + encoding = 'utf-8' +except AttributeError: + encoding = 'ascii' + + +def u(string): + """Cast to unicode DAMMIT! + Written because Python2 repr always implicitly casts to a string, so we + have to cast back to a unicode (and we know that we always deal with valid + unicode, because we check that in the beginning). + """ + if sys.version_info.major >= 3: + return str(string) + + if not isinstance(string, unicode): + return unicode(str(string), 'UTF-8') + return string + + +try: + # Python 3.3+ + FileNotFoundError = FileNotFoundError +except NameError: + # Python 2.7 (both IOError + OSError) + FileNotFoundError = EnvironmentError +try: + # Python 3.3+ + PermissionError = PermissionError +except NameError: + # Python 2.7 (both IOError + OSError) + PermissionError = EnvironmentError + + +def utf8_repr(func): + """ + ``__repr__`` methods in Python 2 don't allow unicode objects to be + returned. Therefore cast them to utf-8 bytes in this decorator. + """ + def wrapper(self): + result = func(self) + if isinstance(result, unicode): + return result.encode('utf-8') + else: + return result + + if sys.version_info.major >= 3: + return func + else: + return wrapper + + +if sys.version_info < (3, 5): + """ + A super-minimal shim around listdir that behave like + scandir for the information we need. + """ + class _DirEntry: + + def __init__(self, name, basepath): + self.name = name + self.basepath = basepath + + @property + def path(self): + return os.path.join(self.basepath, self.name) + + def stat(self): + # won't follow symlinks + return os.lstat(os.path.join(self.basepath, self.name)) + + def scandir(dir): + return [_DirEntry(name, dir) for name in os.listdir(dir)] +else: + from os import scandir diff --git a/contrib/python/parso/py2/parso/cache.py b/contrib/python/parso/py2/parso/cache.py new file mode 100644 index 0000000000..68ec32b4ed --- /dev/null +++ b/contrib/python/parso/py2/parso/cache.py @@ -0,0 +1,278 @@ +import time +import os +import sys +import hashlib +import gc +import shutil +import platform +import errno +import logging +import warnings + +try: + import cPickle as pickle +except: + import pickle + +from parso._compatibility import FileNotFoundError, PermissionError, scandir +from parso.file_io import FileIO + +LOG = logging.getLogger(__name__) + +_CACHED_FILE_MINIMUM_SURVIVAL = 60 * 10 # 10 minutes +""" +Cached files should survive at least a few minutes. +""" + +_CACHED_FILE_MAXIMUM_SURVIVAL = 60 * 60 * 24 * 30 +""" +Maximum time for a cached file to survive if it is not +accessed within. +""" + +_CACHED_SIZE_TRIGGER = 600 +""" +This setting limits the amount of cached files. It's basically a way to start +garbage collection. + +The reasoning for this limit being as big as it is, is the following: + +Numpy, Pandas, Matplotlib and Tensorflow together use about 500 files. This +makes Jedi use ~500mb of memory. Since we might want a bit more than those few +libraries, we just increase it a bit. +""" + +_PICKLE_VERSION = 33 +""" +Version number (integer) for file system cache. + +Increment this number when there are any incompatible changes in +the parser tree classes. For example, the following changes +are regarded as incompatible. + +- A class name is changed. +- A class is moved to another module. +- A __slot__ of a class is changed. +""" + +_VERSION_TAG = '%s-%s%s-%s' % ( + platform.python_implementation(), + sys.version_info[0], + sys.version_info[1], + _PICKLE_VERSION +) +""" +Short name for distinguish Python implementations and versions. + +It's like `sys.implementation.cache_tag` but for Python2 +we generate something similar. See: +http://docs.python.org/3/library/sys.html#sys.implementation +""" + + +def _get_default_cache_path(): + if platform.system().lower() == 'windows': + dir_ = os.path.join(os.getenv('LOCALAPPDATA') + or os.path.expanduser('~'), 'Parso', 'Parso') + elif platform.system().lower() == 'darwin': + dir_ = os.path.join('~', 'Library', 'Caches', 'Parso') + else: + dir_ = os.path.join(os.getenv('XDG_CACHE_HOME') or '~/.cache', 'parso') + return os.path.expanduser(dir_) + + +_default_cache_path = _get_default_cache_path() +""" +The path where the cache is stored. + +On Linux, this defaults to ``~/.cache/parso/``, on OS X to +``~/Library/Caches/Parso/`` and on Windows to ``%LOCALAPPDATA%\\Parso\\Parso\\``. +On Linux, if environment variable ``$XDG_CACHE_HOME`` is set, +``$XDG_CACHE_HOME/parso`` is used instead of the default one. +""" + +_CACHE_CLEAR_THRESHOLD = 60 * 60 * 24 + +def _get_cache_clear_lock(cache_path = None): + """ + The path where the cache lock is stored. + + Cache lock will prevent continous cache clearing and only allow garbage + collection once a day (can be configured in _CACHE_CLEAR_THRESHOLD). + """ + cache_path = cache_path or _get_default_cache_path() + return FileIO(os.path.join(cache_path, "PARSO-CACHE-LOCK")) + + +parser_cache = {} + + +class _NodeCacheItem(object): + def __init__(self, node, lines, change_time=None): + self.node = node + self.lines = lines + if change_time is None: + change_time = time.time() + self.change_time = change_time + self.last_used = change_time + + +def load_module(hashed_grammar, file_io, cache_path=None): + """ + Returns a module or None, if it fails. + """ + p_time = file_io.get_last_modified() + if p_time is None: + return None + + try: + module_cache_item = parser_cache[hashed_grammar][file_io.path] + if p_time <= module_cache_item.change_time: + module_cache_item.last_used = time.time() + return module_cache_item.node + except KeyError: + return _load_from_file_system( + hashed_grammar, + file_io.path, + p_time, + cache_path=cache_path + ) + + +def _load_from_file_system(hashed_grammar, path, p_time, cache_path=None): + cache_path = _get_hashed_path(hashed_grammar, path, cache_path=cache_path) + try: + # SUBBOTNIK-2721 - Для безопасности отключаем загрузку с диска + raise FileNotFoundError + try: + if p_time > os.path.getmtime(cache_path): + # Cache is outdated + return None + except OSError as e: + if e.errno == errno.ENOENT: + # In Python 2 instead of an IOError here we get an OSError. + raise FileNotFoundError + else: + raise + + with open(cache_path, 'rb') as f: + gc.disable() + try: + module_cache_item = pickle.load(f) + finally: + gc.enable() + except FileNotFoundError: + return None + else: + _set_cache_item(hashed_grammar, path, module_cache_item) + LOG.debug('pickle loaded: %s', path) + return module_cache_item.node + + +def _set_cache_item(hashed_grammar, path, module_cache_item): + if sum(len(v) for v in parser_cache.values()) >= _CACHED_SIZE_TRIGGER: + # Garbage collection of old cache files. + # We are basically throwing everything away that hasn't been accessed + # in 10 minutes. + cutoff_time = time.time() - _CACHED_FILE_MINIMUM_SURVIVAL + for key, path_to_item_map in parser_cache.items(): + parser_cache[key] = { + path: node_item + for path, node_item in path_to_item_map.items() + if node_item.last_used > cutoff_time + } + + parser_cache.setdefault(hashed_grammar, {})[path] = module_cache_item + + +def try_to_save_module(hashed_grammar, file_io, module, lines, pickling=True, cache_path=None): + path = file_io.path + try: + p_time = None if path is None else file_io.get_last_modified() + except OSError: + p_time = None + pickling = False + + item = _NodeCacheItem(module, lines, p_time) + _set_cache_item(hashed_grammar, path, item) + if pickling and path is not None: + try: + _save_to_file_system(hashed_grammar, path, item, cache_path=cache_path) + except PermissionError: + # It's not really a big issue if the cache cannot be saved to the + # file system. It's still in RAM in that case. However we should + # still warn the user that this is happening. + warnings.warn( + 'Tried to save a file to %s, but got permission denied.', + Warning + ) + else: + _remove_cache_and_update_lock(cache_path=cache_path) + + +def _save_to_file_system(hashed_grammar, path, item, cache_path=None): + with open(_get_hashed_path(hashed_grammar, path, cache_path=cache_path), 'wb') as f: + pickle.dump(item, f, pickle.HIGHEST_PROTOCOL) + + +def clear_cache(cache_path=None): + if cache_path is None: + cache_path = _default_cache_path + shutil.rmtree(cache_path) + parser_cache.clear() + + +def clear_inactive_cache( + cache_path=None, + inactivity_threshold=_CACHED_FILE_MAXIMUM_SURVIVAL, +): + if cache_path is None: + cache_path = _get_default_cache_path() + if not os.path.exists(cache_path): + return False + for version_path in os.listdir(cache_path): + version_path = os.path.join(cache_path, version_path) + if not os.path.isdir(version_path): + continue + for file in scandir(version_path): + if ( + file.stat().st_atime + _CACHED_FILE_MAXIMUM_SURVIVAL + <= time.time() + ): + try: + os.remove(file.path) + except OSError: # silently ignore all failures + continue + else: + return True + + +def _remove_cache_and_update_lock(cache_path = None): + lock = _get_cache_clear_lock(cache_path=cache_path) + clear_lock_time = lock.get_last_modified() + if ( + clear_lock_time is None # first time + or clear_lock_time + _CACHE_CLEAR_THRESHOLD <= time.time() + ): + if not lock._touch(): + # First make sure that as few as possible other cleanup jobs also + # get started. There is still a race condition but it's probably + # not a big problem. + return False + + clear_inactive_cache(cache_path = cache_path) + +def _get_hashed_path(hashed_grammar, path, cache_path=None): + directory = _get_cache_directory_path(cache_path=cache_path) + + file_hash = hashlib.sha256(path.encode("utf-8")).hexdigest() + return os.path.join(directory, '%s-%s.pkl' % (hashed_grammar, file_hash)) + + +def _get_cache_directory_path(cache_path=None): + if cache_path is None: + cache_path = _default_cache_path + directory = os.path.join(cache_path, _VERSION_TAG) + if not os.path.exists(directory): + os.makedirs(directory) + return directory diff --git a/contrib/python/parso/py2/parso/file_io.py b/contrib/python/parso/py2/parso/file_io.py new file mode 100644 index 0000000000..6eb16adfb1 --- /dev/null +++ b/contrib/python/parso/py2/parso/file_io.py @@ -0,0 +1,52 @@ +import os +from parso._compatibility import FileNotFoundError + +import __res as res + + +class FileIO(object): + def __init__(self, path): + self.path = path + + def read(self): # Returns bytes/str + # We would like to read unicode here, but we cannot, because we are not + # sure if it is a valid unicode file. Therefore just read whatever is + # here. + data = res.resfs_read(self.path) + if data: + return data + with open(self.path, 'rb') as f: + return f.read() + + def get_last_modified(self): + """ + Returns float - timestamp or None, if path doesn't exist. + """ + try: + return os.path.getmtime(self.path) + except OSError: + # Might raise FileNotFoundError, OSError for Python 2 + return None + + def _touch(self): + try: + os.utime(self.path, None) + except FileNotFoundError: + try: + file = open(self.path, 'a') + file.close() + except (OSError, IOError): # TODO Maybe log this? + return False + return True + + def __repr__(self): + return '%s(%s)' % (self.__class__.__name__, self.path) + + +class KnownContentFileIO(FileIO): + def __init__(self, path, content): + super(KnownContentFileIO, self).__init__(path) + self._content = content + + def read(self): + return self._content diff --git a/contrib/python/parso/py2/parso/grammar.py b/contrib/python/parso/py2/parso/grammar.py new file mode 100644 index 0000000000..fe1a65c411 --- /dev/null +++ b/contrib/python/parso/py2/parso/grammar.py @@ -0,0 +1,265 @@ +import hashlib +import os +import sys +import pkgutil + +from parso._compatibility import FileNotFoundError, is_pypy +from parso.pgen2 import generate_grammar +from parso.utils import split_lines, python_bytes_to_unicode, parse_version_string +from parso.python.diff import DiffParser +from parso.python.tokenize import tokenize_lines, tokenize +from parso.python.token import PythonTokenTypes +from parso.cache import parser_cache, load_module, try_to_save_module +from parso.parser import BaseParser +from parso.python.parser import Parser as PythonParser +from parso.python.errors import ErrorFinderConfig +from parso.python import pep8 +from parso.file_io import FileIO, KnownContentFileIO +from parso.normalizer import RefactoringNormalizer + +_loaded_grammars = {} + + +class Grammar(object): + """ + :py:func:`parso.load_grammar` returns instances of this class. + + Creating custom none-python grammars by calling this is not supported, yet. + """ + #:param text: A BNF representation of your grammar. + _error_normalizer_config = None + _token_namespace = None + _default_normalizer_config = pep8.PEP8NormalizerConfig() + + def __init__(self, text, tokenizer, parser=BaseParser, diff_parser=None): + self._pgen_grammar = generate_grammar( + text, + token_namespace=self._get_token_namespace() + ) + self._parser = parser + self._tokenizer = tokenizer + self._diff_parser = diff_parser + self._hashed = hashlib.sha256(text.encode("utf-8")).hexdigest() + + def parse(self, code=None, **kwargs): + """ + If you want to parse a Python file you want to start here, most likely. + + If you need finer grained control over the parsed instance, there will be + other ways to access it. + + :param str code: A unicode or bytes string. When it's not possible to + decode bytes to a string, returns a + :py:class:`UnicodeDecodeError`. + :param bool error_recovery: If enabled, any code will be returned. If + it is invalid, it will be returned as an error node. If disabled, + you will get a ParseError when encountering syntax errors in your + code. + :param str start_symbol: The grammar rule (nonterminal) that you want + to parse. Only allowed to be used when error_recovery is False. + :param str path: The path to the file you want to open. Only needed for caching. + :param bool cache: Keeps a copy of the parser tree in RAM and on disk + if a path is given. Returns the cached trees if the corresponding + files on disk have not changed. Note that this stores pickle files + on your file system (e.g. for Linux in ``~/.cache/parso/``). + :param bool diff_cache: Diffs the cached python module against the new + code and tries to parse only the parts that have changed. Returns + the same (changed) module that is found in cache. Using this option + requires you to not do anything anymore with the cached modules + under that path, because the contents of it might change. This + option is still somewhat experimental. If you want stability, + please don't use it. + :param bool cache_path: If given saves the parso cache in this + directory. If not given, defaults to the default cache places on + each platform. + + :return: A subclass of :py:class:`parso.tree.NodeOrLeaf`. Typically a + :py:class:`parso.python.tree.Module`. + """ + if 'start_pos' in kwargs: + raise TypeError("parse() got an unexpected keyword argument.") + return self._parse(code=code, **kwargs) + + def _parse(self, code=None, error_recovery=True, path=None, + start_symbol=None, cache=False, diff_cache=False, + cache_path=None, file_io=None, start_pos=(1, 0)): + """ + Wanted python3.5 * operator and keyword only arguments. Therefore just + wrap it all. + start_pos here is just a parameter internally used. Might be public + sometime in the future. + """ + if code is None and path is None and file_io is None: + raise TypeError("Please provide either code or a path.") + + if start_symbol is None: + start_symbol = self._start_nonterminal + + if error_recovery and start_symbol != 'file_input': + raise NotImplementedError("This is currently not implemented.") + + if file_io is None: + if code is None: + file_io = FileIO(path) + else: + file_io = KnownContentFileIO(path, code) + + if cache and file_io.path is not None: + module_node = load_module(self._hashed, file_io, cache_path=cache_path) + if module_node is not None: + return module_node + + if code is None: + code = file_io.read() + code = python_bytes_to_unicode(code) + + lines = split_lines(code, keepends=True) + if diff_cache: + if self._diff_parser is None: + raise TypeError("You have to define a diff parser to be able " + "to use this option.") + try: + module_cache_item = parser_cache[self._hashed][file_io.path] + except KeyError: + pass + else: + module_node = module_cache_item.node + old_lines = module_cache_item.lines + if old_lines == lines: + return module_node + + new_node = self._diff_parser( + self._pgen_grammar, self._tokenizer, module_node + ).update( + old_lines=old_lines, + new_lines=lines + ) + try_to_save_module(self._hashed, file_io, new_node, lines, + # Never pickle in pypy, it's slow as hell. + pickling=cache and not is_pypy, + cache_path=cache_path) + return new_node + + tokens = self._tokenizer(lines, start_pos=start_pos) + + p = self._parser( + self._pgen_grammar, + error_recovery=error_recovery, + start_nonterminal=start_symbol + ) + root_node = p.parse(tokens=tokens) + + if cache or diff_cache: + try_to_save_module(self._hashed, file_io, root_node, lines, + # Never pickle in pypy, it's slow as hell. + pickling=cache and not is_pypy, + cache_path=cache_path) + return root_node + + def _get_token_namespace(self): + ns = self._token_namespace + if ns is None: + raise ValueError("The token namespace should be set.") + return ns + + def iter_errors(self, node): + """ + Given a :py:class:`parso.tree.NodeOrLeaf` returns a generator of + :py:class:`parso.normalizer.Issue` objects. For Python this is + a list of syntax/indentation errors. + """ + if self._error_normalizer_config is None: + raise ValueError("No error normalizer specified for this grammar.") + + return self._get_normalizer_issues(node, self._error_normalizer_config) + + def refactor(self, base_node, node_to_str_map): + return RefactoringNormalizer(node_to_str_map).walk(base_node) + + def _get_normalizer(self, normalizer_config): + if normalizer_config is None: + normalizer_config = self._default_normalizer_config + if normalizer_config is None: + raise ValueError("You need to specify a normalizer, because " + "there's no default normalizer for this tree.") + return normalizer_config.create_normalizer(self) + + def _normalize(self, node, normalizer_config=None): + """ + TODO this is not public, yet. + The returned code will be normalized, e.g. PEP8 for Python. + """ + normalizer = self._get_normalizer(normalizer_config) + return normalizer.walk(node) + + def _get_normalizer_issues(self, node, normalizer_config=None): + normalizer = self._get_normalizer(normalizer_config) + normalizer.walk(node) + return normalizer.issues + + def __repr__(self): + nonterminals = self._pgen_grammar.nonterminal_to_dfas.keys() + txt = ' '.join(list(nonterminals)[:3]) + ' ...' + return '<%s:%s>' % (self.__class__.__name__, txt) + + +class PythonGrammar(Grammar): + _error_normalizer_config = ErrorFinderConfig() + _token_namespace = PythonTokenTypes + _start_nonterminal = 'file_input' + + def __init__(self, version_info, bnf_text): + super(PythonGrammar, self).__init__( + bnf_text, + tokenizer=self._tokenize_lines, + parser=PythonParser, + diff_parser=DiffParser + ) + self.version_info = version_info + + def _tokenize_lines(self, lines, **kwargs): + return tokenize_lines(lines, self.version_info, **kwargs) + + def _tokenize(self, code): + # Used by Jedi. + return tokenize(code, self.version_info) + + +def load_grammar(**kwargs): + """ + Loads a :py:class:`parso.Grammar`. The default version is the current Python + version. + + :param str version: A python version string, e.g. ``version='3.8'``. + :param str path: A path to a grammar file + """ + def load_grammar(language='python', version=None, path=None): + if language == 'python': + version_info = parse_version_string(version) + + file = path or os.path.join( + 'python', + 'grammar%s%s.txt' % (version_info.major, version_info.minor) + ) + + global _loaded_grammars + path = os.path.join(os.path.dirname(__file__), file) + try: + return _loaded_grammars[path] + except KeyError: + try: + bnf_text = pkgutil.get_data("parso", file) + if bnf_text is None: + raise FileNotFoundError + if sys.version_info[0] == 3: + bnf_text = bnf_text.decode("ascii") + + grammar = PythonGrammar(version_info, bnf_text) + return _loaded_grammars.setdefault(path, grammar) + except (FileNotFoundError, IOError): + message = "Python version %s.%s is currently not supported." % (version_info.major, version_info.minor) + raise NotImplementedError(message) + else: + raise NotImplementedError("No support for language %s." % language) + + return load_grammar(**kwargs) diff --git a/contrib/python/parso/py2/parso/normalizer.py b/contrib/python/parso/py2/parso/normalizer.py new file mode 100644 index 0000000000..09fde99a67 --- /dev/null +++ b/contrib/python/parso/py2/parso/normalizer.py @@ -0,0 +1,203 @@ +from contextlib import contextmanager + +from parso._compatibility import use_metaclass + + +class _NormalizerMeta(type): + def __new__(cls, name, bases, dct): + new_cls = type.__new__(cls, name, bases, dct) + new_cls.rule_value_classes = {} + new_cls.rule_type_classes = {} + return new_cls + + +class Normalizer(use_metaclass(_NormalizerMeta)): + _rule_type_instances = {} + _rule_value_instances = {} + + def __init__(self, grammar, config): + self.grammar = grammar + self._config = config + self.issues = [] + + self._rule_type_instances = self._instantiate_rules('rule_type_classes') + self._rule_value_instances = self._instantiate_rules('rule_value_classes') + + def _instantiate_rules(self, attr): + dct = {} + for base in type(self).mro(): + rules_map = getattr(base, attr, {}) + for type_, rule_classes in rules_map.items(): + new = [rule_cls(self) for rule_cls in rule_classes] + dct.setdefault(type_, []).extend(new) + return dct + + def walk(self, node): + self.initialize(node) + value = self.visit(node) + self.finalize() + return value + + def visit(self, node): + try: + children = node.children + except AttributeError: + return self.visit_leaf(node) + else: + with self.visit_node(node): + return ''.join(self.visit(child) for child in children) + + @contextmanager + def visit_node(self, node): + self._check_type_rules(node) + yield + + def _check_type_rules(self, node): + for rule in self._rule_type_instances.get(node.type, []): + rule.feed_node(node) + + def visit_leaf(self, leaf): + self._check_type_rules(leaf) + + for rule in self._rule_value_instances.get(leaf.value, []): + rule.feed_node(leaf) + + return leaf.prefix + leaf.value + + def initialize(self, node): + pass + + def finalize(self): + pass + + def add_issue(self, node, code, message): + issue = Issue(node, code, message) + if issue not in self.issues: + self.issues.append(issue) + return True + + @classmethod + def register_rule(cls, **kwargs): + """ + Use it as a class decorator:: + + normalizer = Normalizer('grammar', 'config') + @normalizer.register_rule(value='foo') + class MyRule(Rule): + error_code = 42 + """ + return cls._register_rule(**kwargs) + + @classmethod + def _register_rule(cls, value=None, values=(), type=None, types=()): + values = list(values) + types = list(types) + if value is not None: + values.append(value) + if type is not None: + types.append(type) + + if not values and not types: + raise ValueError("You must register at least something.") + + def decorator(rule_cls): + for v in values: + cls.rule_value_classes.setdefault(v, []).append(rule_cls) + for t in types: + cls.rule_type_classes.setdefault(t, []).append(rule_cls) + return rule_cls + + return decorator + + +class NormalizerConfig(object): + normalizer_class = Normalizer + + def create_normalizer(self, grammar): + if self.normalizer_class is None: + return None + + return self.normalizer_class(grammar, self) + + +class Issue(object): + def __init__(self, node, code, message): + self.code = code + """ + An integer code that stands for the type of error. + """ + self.message = message + """ + A message (string) for the issue. + """ + self.start_pos = node.start_pos + """ + The start position position of the error as a tuple (line, column). As + always in |parso| the first line is 1 and the first column 0. + """ + self.end_pos = node.end_pos + + def __eq__(self, other): + return self.start_pos == other.start_pos and self.code == other.code + + def __ne__(self, other): + return not self.__eq__(other) + + def __hash__(self): + return hash((self.code, self.start_pos)) + + def __repr__(self): + return '<%s: %s>' % (self.__class__.__name__, self.code) + + +class Rule(object): + code = None + message = None + + def __init__(self, normalizer): + self._normalizer = normalizer + + def is_issue(self, node): + raise NotImplementedError() + + def get_node(self, node): + return node + + def _get_message(self, message, node): + if message is None: + message = self.message + if message is None: + raise ValueError("The message on the class is not set.") + return message + + def add_issue(self, node, code=None, message=None): + if code is None: + code = self.code + if code is None: + raise ValueError("The error code on the class is not set.") + + message = self._get_message(message, node) + + self._normalizer.add_issue(node, code, message) + + def feed_node(self, node): + if self.is_issue(node): + issue_node = self.get_node(node) + self.add_issue(issue_node) + + +class RefactoringNormalizer(Normalizer): + def __init__(self, node_to_str_map): + self._node_to_str_map = node_to_str_map + + def visit(self, node): + try: + return self._node_to_str_map[node] + except KeyError: + return super(RefactoringNormalizer, self).visit(node) + + def visit_leaf(self, leaf): + try: + return self._node_to_str_map[leaf] + except KeyError: + return super(RefactoringNormalizer, self).visit_leaf(leaf) diff --git a/contrib/python/parso/py2/parso/parser.py b/contrib/python/parso/py2/parso/parser.py new file mode 100644 index 0000000000..66f5443b45 --- /dev/null +++ b/contrib/python/parso/py2/parso/parser.py @@ -0,0 +1,211 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +# Modifications: +# Copyright David Halter and Contributors +# Modifications are dual-licensed: MIT and PSF. +# 99% of the code is different from pgen2, now. + +""" +The ``Parser`` tries to convert the available Python code in an easy to read +format, something like an abstract syntax tree. The classes who represent this +tree, are sitting in the :mod:`parso.tree` module. + +The Python module ``tokenize`` is a very important part in the ``Parser``, +because it splits the code into different words (tokens). Sometimes it looks a +bit messy. Sorry for that! You might ask now: "Why didn't you use the ``ast`` +module for this? Well, ``ast`` does a very good job understanding proper Python +code, but fails to work as soon as there's a single line of broken code. + +There's one important optimization that needs to be known: Statements are not +being parsed completely. ``Statement`` is just a representation of the tokens +within the statement. This lowers memory usage and cpu time and reduces the +complexity of the ``Parser`` (there's another parser sitting inside +``Statement``, which produces ``Array`` and ``Call``). +""" +from parso import tree +from parso.pgen2.generator import ReservedString + + +class ParserSyntaxError(Exception): + """ + Contains error information about the parser tree. + + May be raised as an exception. + """ + def __init__(self, message, error_leaf): + self.message = message + self.error_leaf = error_leaf + + +class InternalParseError(Exception): + """ + Exception to signal the parser is stuck and error recovery didn't help. + Basically this shouldn't happen. It's a sign that something is really + wrong. + """ + + def __init__(self, msg, type_, value, start_pos): + Exception.__init__(self, "%s: type=%r, value=%r, start_pos=%r" % + (msg, type_.name, value, start_pos)) + self.msg = msg + self.type = type + self.value = value + self.start_pos = start_pos + + +class Stack(list): + def _allowed_transition_names_and_token_types(self): + def iterate(): + # An API just for Jedi. + for stack_node in reversed(self): + for transition in stack_node.dfa.transitions: + if isinstance(transition, ReservedString): + yield transition.value + else: + yield transition # A token type + + if not stack_node.dfa.is_final: + break + + return list(iterate()) + + +class StackNode(object): + def __init__(self, dfa): + self.dfa = dfa + self.nodes = [] + + @property + def nonterminal(self): + return self.dfa.from_rule + + def __repr__(self): + return '%s(%s, %s)' % (self.__class__.__name__, self.dfa, self.nodes) + + +def _token_to_transition(grammar, type_, value): + # Map from token to label + if type_.contains_syntax: + # Check for reserved words (keywords) + try: + return grammar.reserved_syntax_strings[value] + except KeyError: + pass + + return type_ + + +class BaseParser(object): + """Parser engine. + + A Parser instance contains state pertaining to the current token + sequence, and should not be used concurrently by different threads + to parse separate token sequences. + + See python/tokenize.py for how to get input tokens by a string. + + When a syntax error occurs, error_recovery() is called. + """ + + node_map = {} + default_node = tree.Node + + leaf_map = { + } + default_leaf = tree.Leaf + + def __init__(self, pgen_grammar, start_nonterminal='file_input', error_recovery=False): + self._pgen_grammar = pgen_grammar + self._start_nonterminal = start_nonterminal + self._error_recovery = error_recovery + + def parse(self, tokens): + first_dfa = self._pgen_grammar.nonterminal_to_dfas[self._start_nonterminal][0] + self.stack = Stack([StackNode(first_dfa)]) + + for token in tokens: + self._add_token(token) + + while True: + tos = self.stack[-1] + if not tos.dfa.is_final: + # We never broke out -- EOF is too soon -- Unfinished statement. + # However, the error recovery might have added the token again, if + # the stack is empty, we're fine. + raise InternalParseError( + "incomplete input", token.type, token.string, token.start_pos + ) + + if len(self.stack) > 1: + self._pop() + else: + return self.convert_node(tos.nonterminal, tos.nodes) + + def error_recovery(self, token): + if self._error_recovery: + raise NotImplementedError("Error Recovery is not implemented") + else: + type_, value, start_pos, prefix = token + error_leaf = tree.ErrorLeaf(type_, value, start_pos, prefix) + raise ParserSyntaxError('SyntaxError: invalid syntax', error_leaf) + + def convert_node(self, nonterminal, children): + try: + node = self.node_map[nonterminal](children) + except KeyError: + node = self.default_node(nonterminal, children) + for c in children: + c.parent = node + return node + + def convert_leaf(self, type_, value, prefix, start_pos): + try: + return self.leaf_map[type_](value, start_pos, prefix) + except KeyError: + return self.default_leaf(value, start_pos, prefix) + + def _add_token(self, token): + """ + This is the only core function for parsing. Here happens basically + everything. Everything is well prepared by the parser generator and we + only apply the necessary steps here. + """ + grammar = self._pgen_grammar + stack = self.stack + type_, value, start_pos, prefix = token + transition = _token_to_transition(grammar, type_, value) + + while True: + try: + plan = stack[-1].dfa.transitions[transition] + break + except KeyError: + if stack[-1].dfa.is_final: + self._pop() + else: + self.error_recovery(token) + return + except IndexError: + raise InternalParseError("too much input", type_, value, start_pos) + + stack[-1].dfa = plan.next_dfa + + for push in plan.dfa_pushes: + stack.append(StackNode(push)) + + leaf = self.convert_leaf(type_, value, prefix, start_pos) + stack[-1].nodes.append(leaf) + + def _pop(self): + tos = self.stack.pop() + # If there's exactly one child, return that child instead of + # creating a new node. We still create expr_stmt and + # file_input though, because a lot of Jedi depends on its + # logic. + if len(tos.nodes) == 1: + new_node = tos.nodes[0] + else: + new_node = self.convert_node(tos.dfa.from_rule, tos.nodes) + + self.stack[-1].nodes.append(new_node) diff --git a/contrib/python/parso/py2/parso/pgen2/__init__.py b/contrib/python/parso/py2/parso/pgen2/__init__.py new file mode 100644 index 0000000000..d4d9dcdc49 --- /dev/null +++ b/contrib/python/parso/py2/parso/pgen2/__init__.py @@ -0,0 +1,10 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +# Modifications: +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. +# Copyright 2014 David Halter and Contributors +# Modifications are dual-licensed: MIT and PSF. + +from parso.pgen2.generator import generate_grammar diff --git a/contrib/python/parso/py2/parso/pgen2/generator.py b/contrib/python/parso/py2/parso/pgen2/generator.py new file mode 100644 index 0000000000..9bf54ae308 --- /dev/null +++ b/contrib/python/parso/py2/parso/pgen2/generator.py @@ -0,0 +1,378 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +# Modifications: +# Copyright David Halter and Contributors +# Modifications are dual-licensed: MIT and PSF. + +""" +This module defines the data structures used to represent a grammar. + +Specifying grammars in pgen is possible with this grammar:: + + grammar: (NEWLINE | rule)* ENDMARKER + rule: NAME ':' rhs NEWLINE + rhs: items ('|' items)* + items: item+ + item: '[' rhs ']' | atom ['+' | '*'] + atom: '(' rhs ')' | NAME | STRING + +This grammar is self-referencing. + +This parser generator (pgen2) was created by Guido Rossum and used for lib2to3. +Most of the code has been refactored to make it more Pythonic. Since this was a +"copy" of the CPython Parser parser "pgen", there was some work needed to make +it more readable. It should also be slightly faster than the original pgen2, +because we made some optimizations. +""" + +from ast import literal_eval + +from parso.pgen2.grammar_parser import GrammarParser, NFAState + + +class Grammar(object): + """ + Once initialized, this class supplies the grammar tables for the + parsing engine implemented by parse.py. The parsing engine + accesses the instance variables directly. + + The only important part in this parsers are dfas and transitions between + dfas. + """ + + def __init__(self, start_nonterminal, rule_to_dfas, reserved_syntax_strings): + self.nonterminal_to_dfas = rule_to_dfas # Dict[str, List[DFAState]] + self.reserved_syntax_strings = reserved_syntax_strings + self.start_nonterminal = start_nonterminal + + +class DFAPlan(object): + """ + Plans are used for the parser to create stack nodes and do the proper + DFA state transitions. + """ + def __init__(self, next_dfa, dfa_pushes=[]): + self.next_dfa = next_dfa + self.dfa_pushes = dfa_pushes + + def __repr__(self): + return '%s(%s, %s)' % (self.__class__.__name__, self.next_dfa, self.dfa_pushes) + + +class DFAState(object): + """ + The DFAState object is the core class for pretty much anything. DFAState + are the vertices of an ordered graph while arcs and transitions are the + edges. + + Arcs are the initial edges, where most DFAStates are not connected and + transitions are then calculated to connect the DFA state machines that have + different nonterminals. + """ + def __init__(self, from_rule, nfa_set, final): + assert isinstance(nfa_set, set) + assert isinstance(next(iter(nfa_set)), NFAState) + assert isinstance(final, NFAState) + self.from_rule = from_rule + self.nfa_set = nfa_set + self.arcs = {} # map from terminals/nonterminals to DFAState + # In an intermediary step we set these nonterminal arcs (which has the + # same structure as arcs). These don't contain terminals anymore. + self.nonterminal_arcs = {} + + # Transitions are basically the only thing that the parser is using + # with is_final. Everyting else is purely here to create a parser. + self.transitions = {} #: Dict[Union[TokenType, ReservedString], DFAPlan] + self.is_final = final in nfa_set + + def add_arc(self, next_, label): + assert isinstance(label, str) + assert label not in self.arcs + assert isinstance(next_, DFAState) + self.arcs[label] = next_ + + def unifystate(self, old, new): + for label, next_ in self.arcs.items(): + if next_ is old: + self.arcs[label] = new + + def __eq__(self, other): + # Equality test -- ignore the nfa_set instance variable + assert isinstance(other, DFAState) + if self.is_final != other.is_final: + return False + # Can't just return self.arcs == other.arcs, because that + # would invoke this method recursively, with cycles... + if len(self.arcs) != len(other.arcs): + return False + for label, next_ in self.arcs.items(): + if next_ is not other.arcs.get(label): + return False + return True + + __hash__ = None # For Py3 compatibility. + + def __repr__(self): + return '<%s: %s is_final=%s>' % ( + self.__class__.__name__, self.from_rule, self.is_final + ) + + +class ReservedString(object): + """ + Most grammars will have certain keywords and operators that are mentioned + in the grammar as strings (e.g. "if") and not token types (e.g. NUMBER). + This class basically is the former. + """ + + def __init__(self, value): + self.value = value + + def __repr__(self): + return '%s(%s)' % (self.__class__.__name__, self.value) + + +def _simplify_dfas(dfas): + """ + This is not theoretically optimal, but works well enough. + Algorithm: repeatedly look for two states that have the same + set of arcs (same labels pointing to the same nodes) and + unify them, until things stop changing. + + dfas is a list of DFAState instances + """ + changes = True + while changes: + changes = False + for i, state_i in enumerate(dfas): + for j in range(i + 1, len(dfas)): + state_j = dfas[j] + if state_i == state_j: + #print " unify", i, j + del dfas[j] + for state in dfas: + state.unifystate(state_j, state_i) + changes = True + break + + +def _make_dfas(start, finish): + """ + Uses the powerset construction algorithm to create DFA states from sets of + NFA states. + + Also does state reduction if some states are not needed. + """ + # To turn an NFA into a DFA, we define the states of the DFA + # to correspond to *sets* of states of the NFA. Then do some + # state reduction. + assert isinstance(start, NFAState) + assert isinstance(finish, NFAState) + + def addclosure(nfa_state, base_nfa_set): + assert isinstance(nfa_state, NFAState) + if nfa_state in base_nfa_set: + return + base_nfa_set.add(nfa_state) + for nfa_arc in nfa_state.arcs: + if nfa_arc.nonterminal_or_string is None: + addclosure(nfa_arc.next, base_nfa_set) + + base_nfa_set = set() + addclosure(start, base_nfa_set) + states = [DFAState(start.from_rule, base_nfa_set, finish)] + for state in states: # NB states grows while we're iterating + arcs = {} + # Find state transitions and store them in arcs. + for nfa_state in state.nfa_set: + for nfa_arc in nfa_state.arcs: + if nfa_arc.nonterminal_or_string is not None: + nfa_set = arcs.setdefault(nfa_arc.nonterminal_or_string, set()) + addclosure(nfa_arc.next, nfa_set) + + # Now create the dfa's with no None's in arcs anymore. All Nones have + # been eliminated and state transitions (arcs) are properly defined, we + # just need to create the dfa's. + for nonterminal_or_string, nfa_set in arcs.items(): + for nested_state in states: + if nested_state.nfa_set == nfa_set: + # The DFA state already exists for this rule. + break + else: + nested_state = DFAState(start.from_rule, nfa_set, finish) + states.append(nested_state) + + state.add_arc(nested_state, nonterminal_or_string) + return states # List of DFAState instances; first one is start + + +def _dump_nfa(start, finish): + print("Dump of NFA for", start.from_rule) + todo = [start] + for i, state in enumerate(todo): + print(" State", i, state is finish and "(final)" or "") + for arc in state.arcs: + label, next_ = arc.nonterminal_or_string, arc.next + if next_ in todo: + j = todo.index(next_) + else: + j = len(todo) + todo.append(next_) + if label is None: + print(" -> %d" % j) + else: + print(" %s -> %d" % (label, j)) + + +def _dump_dfas(dfas): + print("Dump of DFA for", dfas[0].from_rule) + for i, state in enumerate(dfas): + print(" State", i, state.is_final and "(final)" or "") + for nonterminal, next_ in state.arcs.items(): + print(" %s -> %d" % (nonterminal, dfas.index(next_))) + + +def generate_grammar(bnf_grammar, token_namespace): + """ + ``bnf_text`` is a grammar in extended BNF (using * for repetition, + for + at-least-once repetition, [] for optional parts, | for alternatives and () + for grouping). + + It's not EBNF according to ISO/IEC 14977. It's a dialect Python uses in its + own parser. + """ + rule_to_dfas = {} + start_nonterminal = None + for nfa_a, nfa_z in GrammarParser(bnf_grammar).parse(): + #_dump_nfa(nfa_a, nfa_z) + dfas = _make_dfas(nfa_a, nfa_z) + #_dump_dfas(dfas) + # oldlen = len(dfas) + _simplify_dfas(dfas) + # newlen = len(dfas) + rule_to_dfas[nfa_a.from_rule] = dfas + #print(nfa_a.from_rule, oldlen, newlen) + + if start_nonterminal is None: + start_nonterminal = nfa_a.from_rule + + reserved_strings = {} + for nonterminal, dfas in rule_to_dfas.items(): + for dfa_state in dfas: + for terminal_or_nonterminal, next_dfa in dfa_state.arcs.items(): + if terminal_or_nonterminal in rule_to_dfas: + dfa_state.nonterminal_arcs[terminal_or_nonterminal] = next_dfa + else: + transition = _make_transition( + token_namespace, + reserved_strings, + terminal_or_nonterminal + ) + dfa_state.transitions[transition] = DFAPlan(next_dfa) + + _calculate_tree_traversal(rule_to_dfas) + return Grammar(start_nonterminal, rule_to_dfas, reserved_strings) + + +def _make_transition(token_namespace, reserved_syntax_strings, label): + """ + Creates a reserved string ("if", "for", "*", ...) or returns the token type + (NUMBER, STRING, ...) for a given grammar terminal. + """ + if label[0].isalpha(): + # A named token (e.g. NAME, NUMBER, STRING) + return getattr(token_namespace, label) + else: + # Either a keyword or an operator + assert label[0] in ('"', "'"), label + assert not label.startswith('"""') and not label.startswith("'''") + value = literal_eval(label) + try: + return reserved_syntax_strings[value] + except KeyError: + r = reserved_syntax_strings[value] = ReservedString(value) + return r + + +def _calculate_tree_traversal(nonterminal_to_dfas): + """ + By this point we know how dfas can move around within a stack node, but we + don't know how we can add a new stack node (nonterminal transitions). + """ + # Map from grammar rule (nonterminal) name to a set of tokens. + first_plans = {} + + nonterminals = list(nonterminal_to_dfas.keys()) + nonterminals.sort() + for nonterminal in nonterminals: + if nonterminal not in first_plans: + _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal) + + # Now that we have calculated the first terminals, we are sure that + # there is no left recursion. + + for dfas in nonterminal_to_dfas.values(): + for dfa_state in dfas: + transitions = dfa_state.transitions + for nonterminal, next_dfa in dfa_state.nonterminal_arcs.items(): + for transition, pushes in first_plans[nonterminal].items(): + if transition in transitions: + prev_plan = transitions[transition] + # Make sure these are sorted so that error messages are + # at least deterministic + choices = sorted([ + ( + prev_plan.dfa_pushes[0].from_rule + if prev_plan.dfa_pushes + else prev_plan.next_dfa.from_rule + ), + ( + pushes[0].from_rule + if pushes else next_dfa.from_rule + ), + ]) + raise ValueError( + "Rule %s is ambiguous; given a %s token, we " + "can't determine if we should evaluate %s or %s." + % ( + ( + dfa_state.from_rule, + transition, + ) + tuple(choices) + ) + ) + transitions[transition] = DFAPlan(next_dfa, pushes) + + +def _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal): + """ + Calculates the first plan in the first_plans dictionary for every given + nonterminal. This is going to be used to know when to create stack nodes. + """ + dfas = nonterminal_to_dfas[nonterminal] + new_first_plans = {} + first_plans[nonterminal] = None # dummy to detect left recursion + # We only need to check the first dfa. All the following ones are not + # interesting to find first terminals. + state = dfas[0] + for transition, next_ in state.transitions.items(): + # It's a string. We have finally found a possible first token. + new_first_plans[transition] = [next_.next_dfa] + + for nonterminal2, next_ in state.nonterminal_arcs.items(): + # It's a nonterminal and we have either a left recursion issue + # in the grammar or we have to recurse. + try: + first_plans2 = first_plans[nonterminal2] + except KeyError: + first_plans2 = _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal2) + else: + if first_plans2 is None: + raise ValueError("left recursion for rule %r" % nonterminal) + + for t, pushes in first_plans2.items(): + new_first_plans[t] = [next_] + pushes + + first_plans[nonterminal] = new_first_plans + return new_first_plans diff --git a/contrib/python/parso/py2/parso/pgen2/grammar_parser.py b/contrib/python/parso/py2/parso/pgen2/grammar_parser.py new file mode 100644 index 0000000000..0be72096f9 --- /dev/null +++ b/contrib/python/parso/py2/parso/pgen2/grammar_parser.py @@ -0,0 +1,159 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +# Modifications: +# Copyright David Halter and Contributors +# Modifications are dual-licensed: MIT and PSF. + +from parso.python.tokenize import tokenize +from parso.utils import parse_version_string +from parso.python.token import PythonTokenTypes + + +class GrammarParser(): + """ + The parser for Python grammar files. + """ + def __init__(self, bnf_grammar): + self._bnf_grammar = bnf_grammar + self.generator = tokenize( + bnf_grammar, + version_info=parse_version_string('3.6') + ) + self._gettoken() # Initialize lookahead + + def parse(self): + # grammar: (NEWLINE | rule)* ENDMARKER + while self.type != PythonTokenTypes.ENDMARKER: + while self.type == PythonTokenTypes.NEWLINE: + self._gettoken() + + # rule: NAME ':' rhs NEWLINE + self._current_rule_name = self._expect(PythonTokenTypes.NAME) + self._expect(PythonTokenTypes.OP, ':') + + a, z = self._parse_rhs() + self._expect(PythonTokenTypes.NEWLINE) + + yield a, z + + def _parse_rhs(self): + # rhs: items ('|' items)* + a, z = self._parse_items() + if self.value != "|": + return a, z + else: + aa = NFAState(self._current_rule_name) + zz = NFAState(self._current_rule_name) + while True: + # Add the possibility to go into the state of a and come back + # to finish. + aa.add_arc(a) + z.add_arc(zz) + if self.value != "|": + break + + self._gettoken() + a, z = self._parse_items() + return aa, zz + + def _parse_items(self): + # items: item+ + a, b = self._parse_item() + while self.type in (PythonTokenTypes.NAME, PythonTokenTypes.STRING) \ + or self.value in ('(', '['): + c, d = self._parse_item() + # Need to end on the next item. + b.add_arc(c) + b = d + return a, b + + def _parse_item(self): + # item: '[' rhs ']' | atom ['+' | '*'] + if self.value == "[": + self._gettoken() + a, z = self._parse_rhs() + self._expect(PythonTokenTypes.OP, ']') + # Make it also possible that there is no token and change the + # state. + a.add_arc(z) + return a, z + else: + a, z = self._parse_atom() + value = self.value + if value not in ("+", "*"): + return a, z + self._gettoken() + # Make it clear that we can go back to the old state and repeat. + z.add_arc(a) + if value == "+": + return a, z + else: + # The end state is the same as the beginning, nothing must + # change. + return a, a + + def _parse_atom(self): + # atom: '(' rhs ')' | NAME | STRING + if self.value == "(": + self._gettoken() + a, z = self._parse_rhs() + self._expect(PythonTokenTypes.OP, ')') + return a, z + elif self.type in (PythonTokenTypes.NAME, PythonTokenTypes.STRING): + a = NFAState(self._current_rule_name) + z = NFAState(self._current_rule_name) + # Make it clear that the state transition requires that value. + a.add_arc(z, self.value) + self._gettoken() + return a, z + else: + self._raise_error("expected (...) or NAME or STRING, got %s/%s", + self.type, self.value) + + def _expect(self, type_, value=None): + if self.type != type_: + self._raise_error("expected %s, got %s [%s]", + type_, self.type, self.value) + if value is not None and self.value != value: + self._raise_error("expected %s, got %s", value, self.value) + value = self.value + self._gettoken() + return value + + def _gettoken(self): + tup = next(self.generator) + self.type, self.value, self.begin, prefix = tup + + def _raise_error(self, msg, *args): + if args: + try: + msg = msg % args + except: + msg = " ".join([msg] + list(map(str, args))) + line = self._bnf_grammar.splitlines()[self.begin[0] - 1] + raise SyntaxError(msg, ('<grammar>', self.begin[0], + self.begin[1], line)) + + +class NFAArc(object): + def __init__(self, next_, nonterminal_or_string): + self.next = next_ + self.nonterminal_or_string = nonterminal_or_string + + def __repr__(self): + return '<%s: %s>' % (self.__class__.__name__, self.nonterminal_or_string) + + +class NFAState(object): + def __init__(self, from_rule): + self.from_rule = from_rule + self.arcs = [] # List[nonterminal (str), NFAState] + + def add_arc(self, next_, nonterminal_or_string=None): + assert nonterminal_or_string is None or isinstance(nonterminal_or_string, str) + assert isinstance(next_, NFAState) + self.arcs.append(NFAArc(next_, nonterminal_or_string)) + + def __repr__(self): + return '<%s: from %s>' % (self.__class__.__name__, self.from_rule) diff --git a/contrib/python/parso/py2/parso/py.typed b/contrib/python/parso/py2/parso/py.typed new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/contrib/python/parso/py2/parso/py.typed diff --git a/contrib/python/parso/py2/parso/python/__init__.py b/contrib/python/parso/py2/parso/python/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/contrib/python/parso/py2/parso/python/__init__.py diff --git a/contrib/python/parso/py2/parso/python/diff.py b/contrib/python/parso/py2/parso/python/diff.py new file mode 100644 index 0000000000..1863413685 --- /dev/null +++ b/contrib/python/parso/py2/parso/python/diff.py @@ -0,0 +1,886 @@ +""" +The diff parser is trying to be a faster version of the normal parser by trying +to reuse the nodes of a previous pass over the same file. This is also called +incremental parsing in parser literature. The difference is mostly that with +incremental parsing you get a range that needs to be reparsed. Here we +calculate that range ourselves by using difflib. After that it's essentially +incremental parsing. + +The biggest issue of this approach is that we reuse nodes in a mutable way. The +intial design and idea is quite problematic for this parser, but it is also +pretty fast. Measurements showed that just copying nodes in Python is simply +quite a bit slower (especially for big files >3 kLOC). Therefore we did not +want to get rid of the mutable nodes, since this is usually not an issue. + +This is by far the hardest software I ever wrote, exactly because the initial +design is crappy. When you have to account for a lot of mutable state, it +creates a ton of issues that you would otherwise not have. This file took +probably 3-6 months to write, which is insane for a parser. + +There is a fuzzer in that helps test this whole thing. Please use it if you +make changes here. If you run the fuzzer like:: + + test/fuzz_diff_parser.py random -n 100000 + +you can be pretty sure that everything is still fine. I sometimes run the +fuzzer up to 24h to make sure everything is still ok. +""" +import re +import difflib +from collections import namedtuple +import logging + +from parso.utils import split_lines +from parso.python.parser import Parser +from parso.python.tree import EndMarker +from parso.python.tokenize import PythonToken, BOM_UTF8_STRING +from parso.python.token import PythonTokenTypes + +LOG = logging.getLogger(__name__) +DEBUG_DIFF_PARSER = False + +_INDENTATION_TOKENS = 'INDENT', 'ERROR_DEDENT', 'DEDENT' + +NEWLINE = PythonTokenTypes.NEWLINE +DEDENT = PythonTokenTypes.DEDENT +NAME = PythonTokenTypes.NAME +ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT +ENDMARKER = PythonTokenTypes.ENDMARKER + + +def _is_indentation_error_leaf(node): + return node.type == 'error_leaf' and node.token_type in _INDENTATION_TOKENS + + +def _get_previous_leaf_if_indentation(leaf): + while leaf and _is_indentation_error_leaf(leaf): + leaf = leaf.get_previous_leaf() + return leaf + + +def _get_next_leaf_if_indentation(leaf): + while leaf and _is_indentation_error_leaf(leaf): + leaf = leaf.get_next_leaf() + return leaf + + +def _get_suite_indentation(tree_node): + return _get_indentation(tree_node.children[1]) + + +def _get_indentation(tree_node): + return tree_node.start_pos[1] + + +def _assert_valid_graph(node): + """ + Checks if the parent/children relationship is correct. + + This is a check that only runs during debugging/testing. + """ + try: + children = node.children + except AttributeError: + # Ignore INDENT is necessary, because indent/dedent tokens don't + # contain value/prefix and are just around, because of the tokenizer. + if node.type == 'error_leaf' and node.token_type in _INDENTATION_TOKENS: + assert not node.value + assert not node.prefix + return + + # Calculate the content between two start positions. + previous_leaf = _get_previous_leaf_if_indentation(node.get_previous_leaf()) + if previous_leaf is None: + content = node.prefix + previous_start_pos = 1, 0 + else: + assert previous_leaf.end_pos <= node.start_pos, \ + (previous_leaf, node) + + content = previous_leaf.value + node.prefix + previous_start_pos = previous_leaf.start_pos + + if '\n' in content or '\r' in content: + splitted = split_lines(content) + line = previous_start_pos[0] + len(splitted) - 1 + actual = line, len(splitted[-1]) + else: + actual = previous_start_pos[0], previous_start_pos[1] + len(content) + if content.startswith(BOM_UTF8_STRING) \ + and node.get_start_pos_of_prefix() == (1, 0): + # Remove the byte order mark + actual = actual[0], actual[1] - 1 + + assert node.start_pos == actual, (node.start_pos, actual) + else: + for child in children: + assert child.parent == node, (node, child) + _assert_valid_graph(child) + + +def _assert_nodes_are_equal(node1, node2): + try: + children1 = node1.children + except AttributeError: + assert not hasattr(node2, 'children'), (node1, node2) + assert node1.value == node2.value, (node1, node2) + assert node1.type == node2.type, (node1, node2) + assert node1.prefix == node2.prefix, (node1, node2) + assert node1.start_pos == node2.start_pos, (node1, node2) + return + else: + try: + children2 = node2.children + except AttributeError: + assert False, (node1, node2) + for n1, n2 in zip(children1, children2): + _assert_nodes_are_equal(n1, n2) + assert len(children1) == len(children2), '\n' + repr(children1) + '\n' + repr(children2) + + +def _get_debug_error_message(module, old_lines, new_lines): + current_lines = split_lines(module.get_code(), keepends=True) + current_diff = difflib.unified_diff(new_lines, current_lines) + old_new_diff = difflib.unified_diff(old_lines, new_lines) + import parso + return ( + "There's an issue with the diff parser. Please " + "report (parso v%s) - Old/New:\n%s\nActual Diff (May be empty):\n%s" + % (parso.__version__, ''.join(old_new_diff), ''.join(current_diff)) + ) + + +def _get_last_line(node_or_leaf): + last_leaf = node_or_leaf.get_last_leaf() + if _ends_with_newline(last_leaf): + return last_leaf.start_pos[0] + else: + n = last_leaf.get_next_leaf() + if n.type == 'endmarker' and '\n' in n.prefix: + # This is a very special case and has to do with error recovery in + # Parso. The problem is basically that there's no newline leaf at + # the end sometimes (it's required in the grammar, but not needed + # actually before endmarker, CPython just adds a newline to make + # source code pass the parser, to account for that Parso error + # recovery allows small_stmt instead of simple_stmt). + return last_leaf.end_pos[0] + 1 + return last_leaf.end_pos[0] + + +def _skip_dedent_error_leaves(leaf): + while leaf is not None and leaf.type == 'error_leaf' and leaf.token_type == 'DEDENT': + leaf = leaf.get_previous_leaf() + return leaf + + +def _ends_with_newline(leaf, suffix=''): + leaf = _skip_dedent_error_leaves(leaf) + + if leaf.type == 'error_leaf': + typ = leaf.token_type.lower() + else: + typ = leaf.type + + return typ == 'newline' or suffix.endswith('\n') or suffix.endswith('\r') + + +def _flows_finished(pgen_grammar, stack): + """ + if, while, for and try might not be finished, because another part might + still be parsed. + """ + for stack_node in stack: + if stack_node.nonterminal in ('if_stmt', 'while_stmt', 'for_stmt', 'try_stmt'): + return False + return True + + +def _func_or_class_has_suite(node): + if node.type == 'decorated': + node = node.children[-1] + if node.type in ('async_funcdef', 'async_stmt'): + node = node.children[-1] + return node.type in ('classdef', 'funcdef') and node.children[-1].type == 'suite' + + +def _suite_or_file_input_is_valid(pgen_grammar, stack): + if not _flows_finished(pgen_grammar, stack): + return False + + for stack_node in reversed(stack): + if stack_node.nonterminal == 'decorator': + # A decorator is only valid with the upcoming function. + return False + + if stack_node.nonterminal == 'suite': + # If only newline is in the suite, the suite is not valid, yet. + return len(stack_node.nodes) > 1 + # Not reaching a suite means that we're dealing with file_input levels + # where there's no need for a valid statement in it. It can also be empty. + return True + + +def _is_flow_node(node): + if node.type == 'async_stmt': + node = node.children[1] + try: + value = node.children[0].value + except AttributeError: + return False + return value in ('if', 'for', 'while', 'try', 'with') + + +class _PositionUpdatingFinished(Exception): + pass + + +def _update_positions(nodes, line_offset, last_leaf): + for node in nodes: + try: + children = node.children + except AttributeError: + # Is a leaf + node.line += line_offset + if node is last_leaf: + raise _PositionUpdatingFinished + else: + _update_positions(children, line_offset, last_leaf) + + +class DiffParser(object): + """ + An advanced form of parsing a file faster. Unfortunately comes with huge + side effects. It changes the given module. + """ + def __init__(self, pgen_grammar, tokenizer, module): + self._pgen_grammar = pgen_grammar + self._tokenizer = tokenizer + self._module = module + + def _reset(self): + self._copy_count = 0 + self._parser_count = 0 + + self._nodes_tree = _NodesTree(self._module) + + def update(self, old_lines, new_lines): + ''' + The algorithm works as follows: + + Equal: + - Assure that the start is a newline, otherwise parse until we get + one. + - Copy from parsed_until_line + 1 to max(i2 + 1) + - Make sure that the indentation is correct (e.g. add DEDENT) + - Add old and change positions + Insert: + - Parse from parsed_until_line + 1 to min(j2 + 1), hopefully not + much more. + + Returns the new module node. + ''' + LOG.debug('diff parser start') + # Reset the used names cache so they get regenerated. + self._module._used_names = None + + self._parser_lines_new = new_lines + + self._reset() + + line_length = len(new_lines) + sm = difflib.SequenceMatcher(None, old_lines, self._parser_lines_new) + opcodes = sm.get_opcodes() + LOG.debug('line_lengths old: %s; new: %s' % (len(old_lines), line_length)) + + for operation, i1, i2, j1, j2 in opcodes: + LOG.debug('-> code[%s] old[%s:%s] new[%s:%s]', + operation, i1 + 1, i2, j1 + 1, j2) + + if j2 == line_length and new_lines[-1] == '': + # The empty part after the last newline is not relevant. + j2 -= 1 + + if operation == 'equal': + line_offset = j1 - i1 + self._copy_from_old_parser(line_offset, i1 + 1, i2, j2) + elif operation == 'replace': + self._parse(until_line=j2) + elif operation == 'insert': + self._parse(until_line=j2) + else: + assert operation == 'delete' + + # With this action all change will finally be applied and we have a + # changed module. + self._nodes_tree.close() + + if DEBUG_DIFF_PARSER: + # If there is reasonable suspicion that the diff parser is not + # behaving well, this should be enabled. + try: + code = ''.join(new_lines) + assert self._module.get_code() == code + _assert_valid_graph(self._module) + without_diff_parser_module = Parser( + self._pgen_grammar, + error_recovery=True + ).parse(self._tokenizer(new_lines)) + _assert_nodes_are_equal(self._module, without_diff_parser_module) + except AssertionError: + print(_get_debug_error_message(self._module, old_lines, new_lines)) + raise + + last_pos = self._module.end_pos[0] + if last_pos != line_length: + raise Exception( + ('(%s != %s) ' % (last_pos, line_length)) + + _get_debug_error_message(self._module, old_lines, new_lines) + ) + LOG.debug('diff parser end') + return self._module + + def _enabled_debugging(self, old_lines, lines_new): + if self._module.get_code() != ''.join(lines_new): + LOG.warning('parser issue:\n%s\n%s', ''.join(old_lines), ''.join(lines_new)) + + def _copy_from_old_parser(self, line_offset, start_line_old, until_line_old, until_line_new): + last_until_line = -1 + while until_line_new > self._nodes_tree.parsed_until_line: + parsed_until_line_old = self._nodes_tree.parsed_until_line - line_offset + line_stmt = self._get_old_line_stmt(parsed_until_line_old + 1) + if line_stmt is None: + # Parse 1 line at least. We don't need more, because we just + # want to get into a state where the old parser has statements + # again that can be copied (e.g. not lines within parentheses). + self._parse(self._nodes_tree.parsed_until_line + 1) + else: + p_children = line_stmt.parent.children + index = p_children.index(line_stmt) + + if start_line_old == 1 \ + and p_children[0].get_first_leaf().prefix.startswith(BOM_UTF8_STRING): + # If there's a BOM in the beginning, just reparse. It's too + # complicated to account for it otherwise. + copied_nodes = [] + else: + from_ = self._nodes_tree.parsed_until_line + 1 + copied_nodes = self._nodes_tree.copy_nodes( + p_children[index:], + until_line_old, + line_offset + ) + # Match all the nodes that are in the wanted range. + if copied_nodes: + self._copy_count += 1 + + to = self._nodes_tree.parsed_until_line + + LOG.debug('copy old[%s:%s] new[%s:%s]', + copied_nodes[0].start_pos[0], + copied_nodes[-1].end_pos[0] - 1, from_, to) + else: + # We have copied as much as possible (but definitely not too + # much). Therefore we just parse a bit more. + self._parse(self._nodes_tree.parsed_until_line + 1) + # Since there are potential bugs that might loop here endlessly, we + # just stop here. + assert last_until_line != self._nodes_tree.parsed_until_line, last_until_line + last_until_line = self._nodes_tree.parsed_until_line + + def _get_old_line_stmt(self, old_line): + leaf = self._module.get_leaf_for_position((old_line, 0), include_prefixes=True) + + if _ends_with_newline(leaf): + leaf = leaf.get_next_leaf() + if leaf.get_start_pos_of_prefix()[0] == old_line: + node = leaf + while node.parent.type not in ('file_input', 'suite'): + node = node.parent + + # Make sure that if only the `else:` line of an if statement is + # copied that not the whole thing is going to be copied. + if node.start_pos[0] >= old_line: + return node + # Must be on the same line. Otherwise we need to parse that bit. + return None + + def _parse(self, until_line): + """ + Parses at least until the given line, but might just parse more until a + valid state is reached. + """ + last_until_line = 0 + while until_line > self._nodes_tree.parsed_until_line: + node = self._try_parse_part(until_line) + nodes = node.children + + self._nodes_tree.add_parsed_nodes(nodes, self._keyword_token_indents) + if self._replace_tos_indent is not None: + self._nodes_tree.indents[-1] = self._replace_tos_indent + + LOG.debug( + 'parse_part from %s to %s (to %s in part parser)', + nodes[0].get_start_pos_of_prefix()[0], + self._nodes_tree.parsed_until_line, + node.end_pos[0] - 1 + ) + # Since the tokenizer sometimes has bugs, we cannot be sure that + # this loop terminates. Therefore assert that there's always a + # change. + assert last_until_line != self._nodes_tree.parsed_until_line, last_until_line + last_until_line = self._nodes_tree.parsed_until_line + + def _try_parse_part(self, until_line): + """ + Sets up a normal parser that uses a spezialized tokenizer to only parse + until a certain position (or a bit longer if the statement hasn't + ended. + """ + self._parser_count += 1 + # TODO speed up, shouldn't copy the whole list all the time. + # memoryview? + parsed_until_line = self._nodes_tree.parsed_until_line + lines_after = self._parser_lines_new[parsed_until_line:] + tokens = self._diff_tokenize( + lines_after, + until_line, + line_offset=parsed_until_line + ) + self._active_parser = Parser( + self._pgen_grammar, + error_recovery=True + ) + return self._active_parser.parse(tokens=tokens) + + def _diff_tokenize(self, lines, until_line, line_offset=0): + was_newline = False + indents = self._nodes_tree.indents + initial_indentation_count = len(indents) + + tokens = self._tokenizer( + lines, + start_pos=(line_offset + 1, 0), + indents=indents, + is_first_token=line_offset == 0, + ) + stack = self._active_parser.stack + self._replace_tos_indent = None + self._keyword_token_indents = {} + # print('start', line_offset + 1, indents) + for token in tokens: + # print(token, indents) + typ = token.type + if typ == DEDENT: + if len(indents) < initial_indentation_count: + # We are done here, only thing that can come now is an + # endmarker or another dedented code block. + while True: + typ, string, start_pos, prefix = token = next(tokens) + if typ in (DEDENT, ERROR_DEDENT): + if typ == ERROR_DEDENT: + # We want to force an error dedent in the next + # parser/pass. To make this possible we just + # increase the location by one. + self._replace_tos_indent = start_pos[1] + 1 + pass + else: + break + + if '\n' in prefix or '\r' in prefix: + prefix = re.sub(r'[^\n\r]+\Z', '', prefix) + else: + assert start_pos[1] >= len(prefix), repr(prefix) + if start_pos[1] - len(prefix) == 0: + prefix = '' + yield PythonToken( + ENDMARKER, '', + start_pos, + prefix + ) + break + elif typ == NEWLINE and token.start_pos[0] >= until_line: + was_newline = True + elif was_newline: + was_newline = False + if len(indents) == initial_indentation_count: + # Check if the parser is actually in a valid suite state. + if _suite_or_file_input_is_valid(self._pgen_grammar, stack): + yield PythonToken(ENDMARKER, '', token.start_pos, '') + break + + if typ == NAME and token.string in ('class', 'def'): + self._keyword_token_indents[token.start_pos] = list(indents) + + yield token + + +class _NodesTreeNode(object): + _ChildrenGroup = namedtuple( + '_ChildrenGroup', + 'prefix children line_offset last_line_offset_leaf') + + def __init__(self, tree_node, parent=None, indentation=0): + self.tree_node = tree_node + self._children_groups = [] + self.parent = parent + self._node_children = [] + self.indentation = indentation + + def finish(self): + children = [] + for prefix, children_part, line_offset, last_line_offset_leaf in self._children_groups: + first_leaf = _get_next_leaf_if_indentation( + children_part[0].get_first_leaf() + ) + + first_leaf.prefix = prefix + first_leaf.prefix + if line_offset != 0: + try: + _update_positions( + children_part, line_offset, last_line_offset_leaf) + except _PositionUpdatingFinished: + pass + children += children_part + self.tree_node.children = children + # Reset the parents + for node in children: + node.parent = self.tree_node + + for node_child in self._node_children: + node_child.finish() + + def add_child_node(self, child_node): + self._node_children.append(child_node) + + def add_tree_nodes(self, prefix, children, line_offset=0, + last_line_offset_leaf=None): + if last_line_offset_leaf is None: + last_line_offset_leaf = children[-1].get_last_leaf() + group = self._ChildrenGroup( + prefix, children, line_offset, last_line_offset_leaf + ) + self._children_groups.append(group) + + def get_last_line(self, suffix): + line = 0 + if self._children_groups: + children_group = self._children_groups[-1] + last_leaf = _get_previous_leaf_if_indentation( + children_group.last_line_offset_leaf + ) + + line = last_leaf.end_pos[0] + children_group.line_offset + + # Newlines end on the next line, which means that they would cover + # the next line. That line is not fully parsed at this point. + if _ends_with_newline(last_leaf, suffix): + line -= 1 + line += len(split_lines(suffix)) - 1 + + if suffix and not suffix.endswith('\n') and not suffix.endswith('\r'): + # This is the end of a file (that doesn't end with a newline). + line += 1 + + if self._node_children: + return max(line, self._node_children[-1].get_last_line(suffix)) + return line + + def __repr__(self): + return '<%s: %s>' % (self.__class__.__name__, self.tree_node) + + +class _NodesTree(object): + def __init__(self, module): + self._base_node = _NodesTreeNode(module) + self._working_stack = [self._base_node] + self._module = module + self._prefix_remainder = '' + self.prefix = '' + self.indents = [0] + + @property + def parsed_until_line(self): + return self._working_stack[-1].get_last_line(self.prefix) + + def _update_insertion_node(self, indentation): + for node in reversed(list(self._working_stack)): + if node.indentation < indentation or node is self._working_stack[0]: + return node + self._working_stack.pop() + + def add_parsed_nodes(self, tree_nodes, keyword_token_indents): + old_prefix = self.prefix + tree_nodes = self._remove_endmarker(tree_nodes) + if not tree_nodes: + self.prefix = old_prefix + self.prefix + return + + assert tree_nodes[0].type != 'newline' + + node = self._update_insertion_node(tree_nodes[0].start_pos[1]) + assert node.tree_node.type in ('suite', 'file_input') + node.add_tree_nodes(old_prefix, tree_nodes) + # tos = Top of stack + self._update_parsed_node_tos(tree_nodes[-1], keyword_token_indents) + + def _update_parsed_node_tos(self, tree_node, keyword_token_indents): + if tree_node.type == 'suite': + def_leaf = tree_node.parent.children[0] + new_tos = _NodesTreeNode( + tree_node, + indentation=keyword_token_indents[def_leaf.start_pos][-1], + ) + new_tos.add_tree_nodes('', list(tree_node.children)) + + self._working_stack[-1].add_child_node(new_tos) + self._working_stack.append(new_tos) + + self._update_parsed_node_tos(tree_node.children[-1], keyword_token_indents) + elif _func_or_class_has_suite(tree_node): + self._update_parsed_node_tos(tree_node.children[-1], keyword_token_indents) + + def _remove_endmarker(self, tree_nodes): + """ + Helps cleaning up the tree nodes that get inserted. + """ + last_leaf = tree_nodes[-1].get_last_leaf() + is_endmarker = last_leaf.type == 'endmarker' + self._prefix_remainder = '' + if is_endmarker: + prefix = last_leaf.prefix + separation = max(prefix.rfind('\n'), prefix.rfind('\r')) + if separation > -1: + # Remove the whitespace part of the prefix after a newline. + # That is not relevant if parentheses were opened. Always parse + # until the end of a line. + last_leaf.prefix, self._prefix_remainder = \ + last_leaf.prefix[:separation + 1], last_leaf.prefix[separation + 1:] + + self.prefix = '' + + if is_endmarker: + self.prefix = last_leaf.prefix + + tree_nodes = tree_nodes[:-1] + return tree_nodes + + def _get_matching_indent_nodes(self, tree_nodes, is_new_suite): + # There might be a random dedent where we have to stop copying. + # Invalid indents are ok, because the parser handled that + # properly before. An invalid dedent can happen, because a few + # lines above there was an invalid indent. + node_iterator = iter(tree_nodes) + if is_new_suite: + yield next(node_iterator) + + first_node = next(node_iterator) + indent = _get_indentation(first_node) + if not is_new_suite and indent not in self.indents: + return + yield first_node + + for n in node_iterator: + if _get_indentation(n) != indent: + return + yield n + + def copy_nodes(self, tree_nodes, until_line, line_offset): + """ + Copies tree nodes from the old parser tree. + + Returns the number of tree nodes that were copied. + """ + if tree_nodes[0].type in ('error_leaf', 'error_node'): + # Avoid copying errors in the beginning. Can lead to a lot of + # issues. + return [] + + indentation = _get_indentation(tree_nodes[0]) + old_working_stack = list(self._working_stack) + old_prefix = self.prefix + old_indents = self.indents + self.indents = [i for i in self.indents if i <= indentation] + + self._update_insertion_node(indentation) + + new_nodes, self._working_stack, self.prefix, added_indents = self._copy_nodes( + list(self._working_stack), + tree_nodes, + until_line, + line_offset, + self.prefix, + ) + if new_nodes: + self.indents += added_indents + else: + self._working_stack = old_working_stack + self.prefix = old_prefix + self.indents = old_indents + return new_nodes + + def _copy_nodes(self, working_stack, nodes, until_line, line_offset, + prefix='', is_nested=False): + new_nodes = [] + added_indents = [] + + nodes = list(self._get_matching_indent_nodes( + nodes, + is_new_suite=is_nested, + )) + + new_prefix = '' + for node in nodes: + if node.start_pos[0] > until_line: + break + + if node.type == 'endmarker': + break + + if node.type == 'error_leaf' and node.token_type in ('DEDENT', 'ERROR_DEDENT'): + break + # TODO this check might take a bit of time for large files. We + # might want to change this to do more intelligent guessing or + # binary search. + if _get_last_line(node) > until_line: + # We can split up functions and classes later. + if _func_or_class_has_suite(node): + new_nodes.append(node) + break + try: + c = node.children + except AttributeError: + pass + else: + # This case basically appears with error recovery of one line + # suites like `def foo(): bar.-`. In this case we might not + # include a newline in the statement and we need to take care + # of that. + n = node + if n.type == 'decorated': + n = n.children[-1] + if n.type in ('async_funcdef', 'async_stmt'): + n = n.children[-1] + if n.type in ('classdef', 'funcdef'): + suite_node = n.children[-1] + else: + suite_node = c[-1] + + if suite_node.type in ('error_leaf', 'error_node'): + break + + new_nodes.append(node) + + # Pop error nodes at the end from the list + if new_nodes: + while new_nodes: + last_node = new_nodes[-1] + if (last_node.type in ('error_leaf', 'error_node') + or _is_flow_node(new_nodes[-1])): + # Error leafs/nodes don't have a defined start/end. Error + # nodes might not end with a newline (e.g. if there's an + # open `(`). Therefore ignore all of them unless they are + # succeeded with valid parser state. + # If we copy flows at the end, they might be continued + # after the copy limit (in the new parser). + # In this while loop we try to remove until we find a newline. + new_prefix = '' + new_nodes.pop() + while new_nodes: + last_node = new_nodes[-1] + if last_node.get_last_leaf().type == 'newline': + break + new_nodes.pop() + continue + if len(new_nodes) > 1 and new_nodes[-2].type == 'error_node': + # The problem here is that Parso error recovery sometimes + # influences nodes before this node. + # Since the new last node is an error node this will get + # cleaned up in the next while iteration. + new_nodes.pop() + continue + break + + if not new_nodes: + return [], working_stack, prefix, added_indents + + tos = working_stack[-1] + last_node = new_nodes[-1] + had_valid_suite_last = False + # Pop incomplete suites from the list + if _func_or_class_has_suite(last_node): + suite = last_node + while suite.type != 'suite': + suite = suite.children[-1] + + indent = _get_suite_indentation(suite) + added_indents.append(indent) + + suite_tos = _NodesTreeNode(suite, indentation=_get_indentation(last_node)) + # Don't need to pass line_offset here, it's already done by the + # parent. + suite_nodes, new_working_stack, new_prefix, ai = self._copy_nodes( + working_stack + [suite_tos], suite.children, until_line, line_offset, + is_nested=True, + ) + added_indents += ai + if len(suite_nodes) < 2: + # A suite only with newline is not valid. + new_nodes.pop() + new_prefix = '' + else: + assert new_nodes + tos.add_child_node(suite_tos) + working_stack = new_working_stack + had_valid_suite_last = True + + if new_nodes: + if not _ends_with_newline(new_nodes[-1].get_last_leaf()) and not had_valid_suite_last: + p = new_nodes[-1].get_next_leaf().prefix + # We are not allowed to remove the newline at the end of the + # line, otherwise it's going to be missing. This happens e.g. + # if a bracket is around before that moves newlines to + # prefixes. + new_prefix = split_lines(p, keepends=True)[0] + + if had_valid_suite_last: + last = new_nodes[-1] + if last.type == 'decorated': + last = last.children[-1] + if last.type in ('async_funcdef', 'async_stmt'): + last = last.children[-1] + last_line_offset_leaf = last.children[-2].get_last_leaf() + assert last_line_offset_leaf == ':' + else: + last_line_offset_leaf = new_nodes[-1].get_last_leaf() + tos.add_tree_nodes( + prefix, new_nodes, line_offset, last_line_offset_leaf, + ) + prefix = new_prefix + self._prefix_remainder = '' + + return new_nodes, working_stack, prefix, added_indents + + def close(self): + self._base_node.finish() + + # Add an endmarker. + try: + last_leaf = self._module.get_last_leaf() + except IndexError: + end_pos = [1, 0] + else: + last_leaf = _skip_dedent_error_leaves(last_leaf) + end_pos = list(last_leaf.end_pos) + lines = split_lines(self.prefix) + assert len(lines) > 0 + if len(lines) == 1: + if lines[0].startswith(BOM_UTF8_STRING) and end_pos == [1, 0]: + end_pos[1] -= 1 + end_pos[1] += len(lines[0]) + else: + end_pos[0] += len(lines) - 1 + end_pos[1] = len(lines[-1]) + + endmarker = EndMarker('', tuple(end_pos), self.prefix + self._prefix_remainder) + endmarker.parent = self._module + self._module.children.append(endmarker) diff --git a/contrib/python/parso/py2/parso/python/errors.py b/contrib/python/parso/py2/parso/python/errors.py new file mode 100644 index 0000000000..54a66417b7 --- /dev/null +++ b/contrib/python/parso/py2/parso/python/errors.py @@ -0,0 +1,1277 @@ +# -*- coding: utf-8 -*- +import codecs +import warnings +import re +from contextlib import contextmanager + +from parso.normalizer import Normalizer, NormalizerConfig, Issue, Rule +from parso.python.tree import search_ancestor +from parso.python.tokenize import _get_token_collection + +_BLOCK_STMTS = ('if_stmt', 'while_stmt', 'for_stmt', 'try_stmt', 'with_stmt') +_STAR_EXPR_PARENTS = ('testlist_star_expr', 'testlist_comp', 'exprlist') +# This is the maximal block size given by python. +_MAX_BLOCK_SIZE = 20 +_MAX_INDENT_COUNT = 100 +ALLOWED_FUTURES = ( + 'nested_scopes', 'generators', 'division', 'absolute_import', + 'with_statement', 'print_function', 'unicode_literals', +) +_COMP_FOR_TYPES = ('comp_for', 'sync_comp_for') + +def _get_rhs_name(node, version): + type_ = node.type + if type_ == "lambdef": + return "lambda" + elif type_ == "atom": + comprehension = _get_comprehension_type(node) + first, second = node.children[:2] + if comprehension is not None: + return comprehension + elif second.type == "dictorsetmaker": + if version < (3, 8): + return "literal" + else: + if second.children[1] == ":" or second.children[0] == "**": + return "dict display" + else: + return "set display" + elif ( + first == "(" + and (second == ")" + or (len(node.children) == 3 and node.children[1].type == "testlist_comp")) + ): + return "tuple" + elif first == "(": + return _get_rhs_name(_remove_parens(node), version=version) + elif first == "[": + return "list" + elif first == "{" and second == "}": + return "dict display" + elif first == "{" and len(node.children) > 2: + return "set display" + elif type_ == "keyword": + if "yield" in node.value: + return "yield expression" + if version < (3, 8): + return "keyword" + else: + return str(node.value) + elif type_ == "operator" and node.value == "...": + return "Ellipsis" + elif type_ == "comparison": + return "comparison" + elif type_ in ("string", "number", "strings"): + return "literal" + elif type_ == "yield_expr": + return "yield expression" + elif type_ == "test": + return "conditional expression" + elif type_ in ("atom_expr", "power"): + if node.children[0] == "await": + return "await expression" + elif node.children[-1].type == "trailer": + trailer = node.children[-1] + if trailer.children[0] == "(": + return "function call" + elif trailer.children[0] == "[": + return "subscript" + elif trailer.children[0] == ".": + return "attribute" + elif ( + ("expr" in type_ + and "star_expr" not in type_) # is a substring + or "_test" in type_ + or type_ in ("term", "factor") + ): + return "operator" + elif type_ == "star_expr": + return "starred" + elif type_ == "testlist_star_expr": + return "tuple" + elif type_ == "fstring": + return "f-string expression" + return type_ # shouldn't reach here + +def _iter_stmts(scope): + """ + Iterates over all statements and splits up simple_stmt. + """ + for child in scope.children: + if child.type == 'simple_stmt': + for child2 in child.children: + if child2.type == 'newline' or child2 == ';': + continue + yield child2 + else: + yield child + + +def _get_comprehension_type(atom): + first, second = atom.children[:2] + if second.type == 'testlist_comp' and second.children[1].type in _COMP_FOR_TYPES: + if first == '[': + return 'list comprehension' + else: + return 'generator expression' + elif second.type == 'dictorsetmaker' and second.children[-1].type in _COMP_FOR_TYPES: + if second.children[1] == ':': + return 'dict comprehension' + else: + return 'set comprehension' + return None + + +def _is_future_import(import_from): + # It looks like a __future__ import that is relative is still a future + # import. That feels kind of odd, but whatever. + # if import_from.level != 0: + # return False + from_names = import_from.get_from_names() + return [n.value for n in from_names] == ['__future__'] + + +def _remove_parens(atom): + """ + Returns the inner part of an expression like `(foo)`. Also removes nested + parens. + """ + try: + children = atom.children + except AttributeError: + pass + else: + if len(children) == 3 and children[0] == '(': + return _remove_parens(atom.children[1]) + return atom + + +def _iter_params(parent_node): + return (n for n in parent_node.children if n.type == 'param') + + +def _is_future_import_first(import_from): + """ + Checks if the import is the first statement of a file. + """ + found_docstring = False + for stmt in _iter_stmts(import_from.get_root_node()): + if stmt.type == 'string' and not found_docstring: + continue + found_docstring = True + + if stmt == import_from: + return True + if stmt.type == 'import_from' and _is_future_import(stmt): + continue + return False + + +def _iter_definition_exprs_from_lists(exprlist): + def check_expr(child): + if child.type == 'atom': + if child.children[0] == '(': + testlist_comp = child.children[1] + if testlist_comp.type == 'testlist_comp': + for expr in _iter_definition_exprs_from_lists(testlist_comp): + yield expr + return + else: + # It's a paren that doesn't do anything, like 1 + (1) + for c in check_expr(testlist_comp): + yield c + return + elif child.children[0] == '[': + yield testlist_comp + return + yield child + + if exprlist.type in _STAR_EXPR_PARENTS: + for child in exprlist.children[::2]: + for c in check_expr(child): # Python 2 sucks + yield c + else: + for c in check_expr(exprlist): # Python 2 sucks + yield c + + +def _get_expr_stmt_definition_exprs(expr_stmt): + exprs = [] + for list_ in expr_stmt.children[:-2:2]: + if list_.type in ('testlist_star_expr', 'testlist'): + exprs += _iter_definition_exprs_from_lists(list_) + else: + exprs.append(list_) + return exprs + + +def _get_for_stmt_definition_exprs(for_stmt): + exprlist = for_stmt.children[1] + return list(_iter_definition_exprs_from_lists(exprlist)) + + +def _is_argument_comprehension(argument): + return argument.children[1].type in _COMP_FOR_TYPES + + +def _any_fstring_error(version, node): + if version < (3, 9) or node is None: + return False + if node.type == "error_node": + return any(child.type == "fstring_start" for child in node.children) + elif node.type == "fstring": + return True + else: + return search_ancestor(node, "fstring") + + +class _Context(object): + def __init__(self, node, add_syntax_error, parent_context=None): + self.node = node + self.blocks = [] + self.parent_context = parent_context + self._used_name_dict = {} + self._global_names = [] + self._nonlocal_names = [] + self._nonlocal_names_in_subscopes = [] + self._add_syntax_error = add_syntax_error + + def is_async_funcdef(self): + # Stupidly enough async funcdefs can have two different forms, + # depending if a decorator is used or not. + return self.is_function() \ + and self.node.parent.type in ('async_funcdef', 'async_stmt') + + def is_function(self): + return self.node.type == 'funcdef' + + def add_name(self, name): + parent_type = name.parent.type + if parent_type == 'trailer': + # We are only interested in first level names. + return + + if parent_type == 'global_stmt': + self._global_names.append(name) + elif parent_type == 'nonlocal_stmt': + self._nonlocal_names.append(name) + else: + self._used_name_dict.setdefault(name.value, []).append(name) + + def finalize(self): + """ + Returns a list of nonlocal names that need to be part of that scope. + """ + self._analyze_names(self._global_names, 'global') + self._analyze_names(self._nonlocal_names, 'nonlocal') + + global_name_strs = {n.value: n for n in self._global_names} + for nonlocal_name in self._nonlocal_names: + try: + global_name = global_name_strs[nonlocal_name.value] + except KeyError: + continue + + message = "name '%s' is nonlocal and global" % global_name.value + if global_name.start_pos < nonlocal_name.start_pos: + error_name = global_name + else: + error_name = nonlocal_name + self._add_syntax_error(error_name, message) + + nonlocals_not_handled = [] + for nonlocal_name in self._nonlocal_names_in_subscopes: + search = nonlocal_name.value + if search in global_name_strs or self.parent_context is None: + message = "no binding for nonlocal '%s' found" % nonlocal_name.value + self._add_syntax_error(nonlocal_name, message) + elif not self.is_function() or \ + nonlocal_name.value not in self._used_name_dict: + nonlocals_not_handled.append(nonlocal_name) + return self._nonlocal_names + nonlocals_not_handled + + def _analyze_names(self, globals_or_nonlocals, type_): + def raise_(message): + self._add_syntax_error(base_name, message % (base_name.value, type_)) + + params = [] + if self.node.type == 'funcdef': + params = self.node.get_params() + + for base_name in globals_or_nonlocals: + found_global_or_nonlocal = False + # Somehow Python does it the reversed way. + for name in reversed(self._used_name_dict.get(base_name.value, [])): + if name.start_pos > base_name.start_pos: + # All following names don't have to be checked. + found_global_or_nonlocal = True + + parent = name.parent + if parent.type == 'param' and parent.name == name: + # Skip those here, these definitions belong to the next + # scope. + continue + + if name.is_definition(): + if parent.type == 'expr_stmt' \ + and parent.children[1].type == 'annassign': + if found_global_or_nonlocal: + # If it's after the global the error seems to be + # placed there. + base_name = name + raise_("annotated name '%s' can't be %s") + break + else: + message = "name '%s' is assigned to before %s declaration" + else: + message = "name '%s' is used prior to %s declaration" + + if not found_global_or_nonlocal: + raise_(message) + # Only add an error for the first occurence. + break + + for param in params: + if param.name.value == base_name.value: + raise_("name '%s' is parameter and %s"), + + @contextmanager + def add_block(self, node): + self.blocks.append(node) + yield + self.blocks.pop() + + def add_context(self, node): + return _Context(node, self._add_syntax_error, parent_context=self) + + def close_child_context(self, child_context): + self._nonlocal_names_in_subscopes += child_context.finalize() + + +class ErrorFinder(Normalizer): + """ + Searches for errors in the syntax tree. + """ + def __init__(self, *args, **kwargs): + super(ErrorFinder, self).__init__(*args, **kwargs) + self._error_dict = {} + self.version = self.grammar.version_info + + def initialize(self, node): + def create_context(node): + if node is None: + return None + + parent_context = create_context(node.parent) + if node.type in ('classdef', 'funcdef', 'file_input'): + return _Context(node, self._add_syntax_error, parent_context) + return parent_context + + self.context = create_context(node) or _Context(node, self._add_syntax_error) + self._indentation_count = 0 + + def visit(self, node): + if node.type == 'error_node': + with self.visit_node(node): + # Don't need to investigate the inners of an error node. We + # might find errors in there that should be ignored, because + # the error node itself already shows that there's an issue. + return '' + return super(ErrorFinder, self).visit(node) + + @contextmanager + def visit_node(self, node): + self._check_type_rules(node) + + if node.type in _BLOCK_STMTS: + with self.context.add_block(node): + if len(self.context.blocks) == _MAX_BLOCK_SIZE: + self._add_syntax_error(node, "too many statically nested blocks") + yield + return + elif node.type == 'suite': + self._indentation_count += 1 + if self._indentation_count == _MAX_INDENT_COUNT: + self._add_indentation_error(node.children[1], "too many levels of indentation") + + yield + + if node.type == 'suite': + self._indentation_count -= 1 + elif node.type in ('classdef', 'funcdef'): + context = self.context + self.context = context.parent_context + self.context.close_child_context(context) + + def visit_leaf(self, leaf): + if leaf.type == 'error_leaf': + if leaf.token_type in ('INDENT', 'ERROR_DEDENT'): + # Indents/Dedents itself never have a prefix. They are just + # "pseudo" tokens that get removed by the syntax tree later. + # Therefore in case of an error we also have to check for this. + spacing = list(leaf.get_next_leaf()._split_prefix())[-1] + if leaf.token_type == 'INDENT': + message = 'unexpected indent' + else: + message = 'unindent does not match any outer indentation level' + self._add_indentation_error(spacing, message) + else: + if leaf.value.startswith('\\'): + message = 'unexpected character after line continuation character' + else: + match = re.match('\\w{,2}("{1,3}|\'{1,3})', leaf.value) + if match is None: + message = 'invalid syntax' + if ( + self.version >= (3, 9) + and leaf.value in _get_token_collection(self.version).always_break_tokens + ): + message = "f-string: " + message + else: + if len(match.group(1)) == 1: + message = 'EOL while scanning string literal' + else: + message = 'EOF while scanning triple-quoted string literal' + self._add_syntax_error(leaf, message) + return '' + elif leaf.value == ':': + parent = leaf.parent + if parent.type in ('classdef', 'funcdef'): + self.context = self.context.add_context(parent) + + # The rest is rule based. + return super(ErrorFinder, self).visit_leaf(leaf) + + def _add_indentation_error(self, spacing, message): + self.add_issue(spacing, 903, "IndentationError: " + message) + + def _add_syntax_error(self, node, message): + self.add_issue(node, 901, "SyntaxError: " + message) + + def add_issue(self, node, code, message): + # Overwrite the default behavior. + # Check if the issues are on the same line. + line = node.start_pos[0] + args = (code, message, node) + self._error_dict.setdefault(line, args) + + def finalize(self): + self.context.finalize() + + for code, message, node in self._error_dict.values(): + self.issues.append(Issue(node, code, message)) + + +class IndentationRule(Rule): + code = 903 + + def _get_message(self, message, node): + message = super(IndentationRule, self)._get_message(message, node) + return "IndentationError: " + message + + +@ErrorFinder.register_rule(type='error_node') +class _ExpectIndentedBlock(IndentationRule): + message = 'expected an indented block' + + def get_node(self, node): + leaf = node.get_next_leaf() + return list(leaf._split_prefix())[-1] + + def is_issue(self, node): + # This is the beginning of a suite that is not indented. + return node.children[-1].type == 'newline' + + +class ErrorFinderConfig(NormalizerConfig): + normalizer_class = ErrorFinder + + +class SyntaxRule(Rule): + code = 901 + + def _get_message(self, message, node): + message = super(SyntaxRule, self)._get_message(message, node) + if ( + "f-string" not in message + and _any_fstring_error(self._normalizer.version, node) + ): + message = "f-string: " + message + return "SyntaxError: " + message + + +@ErrorFinder.register_rule(type='error_node') +class _InvalidSyntaxRule(SyntaxRule): + message = "invalid syntax" + fstring_message = "f-string: invalid syntax" + + def get_node(self, node): + return node.get_next_leaf() + + def is_issue(self, node): + error = node.get_next_leaf().type != 'error_leaf' + if ( + error + and _any_fstring_error(self._normalizer.version, node) + ): + self.add_issue(node, message=self.fstring_message) + else: + # Error leafs will be added later as an error. + return error + + +@ErrorFinder.register_rule(value='await') +class _AwaitOutsideAsync(SyntaxRule): + message = "'await' outside async function" + + def is_issue(self, leaf): + return not self._normalizer.context.is_async_funcdef() + + def get_error_node(self, node): + # Return the whole await statement. + return node.parent + + +@ErrorFinder.register_rule(value='break') +class _BreakOutsideLoop(SyntaxRule): + message = "'break' outside loop" + + def is_issue(self, leaf): + in_loop = False + for block in self._normalizer.context.blocks: + if block.type in ('for_stmt', 'while_stmt'): + in_loop = True + return not in_loop + + +@ErrorFinder.register_rule(value='continue') +class _ContinueChecks(SyntaxRule): + message = "'continue' not properly in loop" + message_in_finally = "'continue' not supported inside 'finally' clause" + + def is_issue(self, leaf): + in_loop = False + for block in self._normalizer.context.blocks: + if block.type in ('for_stmt', 'while_stmt'): + in_loop = True + if block.type == 'try_stmt': + last_block = block.children[-3] + if ( + last_block == "finally" + and leaf.start_pos > last_block.start_pos + and self._normalizer.version < (3, 8) + ): + self.add_issue(leaf, message=self.message_in_finally) + return False # Error already added + if not in_loop: + return True + + +@ErrorFinder.register_rule(value='from') +class _YieldFromCheck(SyntaxRule): + message = "'yield from' inside async function" + + def get_node(self, leaf): + return leaf.parent.parent # This is the actual yield statement. + + def is_issue(self, leaf): + return leaf.parent.type == 'yield_arg' \ + and self._normalizer.context.is_async_funcdef() + + +@ErrorFinder.register_rule(type='name') +class _NameChecks(SyntaxRule): + message = 'cannot assign to __debug__' + message_none = 'cannot assign to None' + + def is_issue(self, leaf): + self._normalizer.context.add_name(leaf) + + if leaf.value == '__debug__' and leaf.is_definition(): + return True + if leaf.value == 'None' and self._normalizer.version < (3, 0) \ + and leaf.is_definition(): + self.add_issue(leaf, message=self.message_none) + + +@ErrorFinder.register_rule(type='string') +class _StringChecks(SyntaxRule): + message = "bytes can only contain ASCII literal characters." + + def is_issue(self, leaf): + string_prefix = leaf.string_prefix.lower() + if 'b' in string_prefix \ + and self._normalizer.version >= (3, 0) \ + and any(c for c in leaf.value if ord(c) > 127): + # b'ä' + return True + + if 'r' not in string_prefix: + # Raw strings don't need to be checked if they have proper + # escaping. + is_bytes = self._normalizer.version < (3, 0) + if 'b' in string_prefix: + is_bytes = True + if 'u' in string_prefix: + is_bytes = False + + payload = leaf._get_payload() + if is_bytes: + payload = payload.encode('utf-8') + func = codecs.escape_decode + else: + func = codecs.unicode_escape_decode + + try: + with warnings.catch_warnings(): + # The warnings from parsing strings are not relevant. + warnings.filterwarnings('ignore') + func(payload) + except UnicodeDecodeError as e: + self.add_issue(leaf, message='(unicode error) ' + str(e)) + except ValueError as e: + self.add_issue(leaf, message='(value error) ' + str(e)) + + +@ErrorFinder.register_rule(value='*') +class _StarCheck(SyntaxRule): + message = "named arguments must follow bare *" + + def is_issue(self, leaf): + params = leaf.parent + if params.type == 'parameters' and params: + after = params.children[params.children.index(leaf) + 1:] + after = [child for child in after + if child not in (',', ')') and not child.star_count] + return len(after) == 0 + + +@ErrorFinder.register_rule(value='**') +class _StarStarCheck(SyntaxRule): + # e.g. {**{} for a in [1]} + # TODO this should probably get a better end_pos including + # the next sibling of leaf. + message = "dict unpacking cannot be used in dict comprehension" + + def is_issue(self, leaf): + if leaf.parent.type == 'dictorsetmaker': + comp_for = leaf.get_next_sibling().get_next_sibling() + return comp_for is not None and comp_for.type in _COMP_FOR_TYPES + + +@ErrorFinder.register_rule(value='yield') +@ErrorFinder.register_rule(value='return') +class _ReturnAndYieldChecks(SyntaxRule): + message = "'return' with value in async generator" + message_async_yield = "'yield' inside async function" + + def get_node(self, leaf): + return leaf.parent + + def is_issue(self, leaf): + if self._normalizer.context.node.type != 'funcdef': + self.add_issue(self.get_node(leaf), message="'%s' outside function" % leaf.value) + elif self._normalizer.context.is_async_funcdef() \ + and any(self._normalizer.context.node.iter_yield_exprs()): + if leaf.value == 'return' and leaf.parent.type == 'return_stmt': + return True + elif leaf.value == 'yield' \ + and leaf.get_next_leaf() != 'from' \ + and self._normalizer.version == (3, 5): + self.add_issue(self.get_node(leaf), message=self.message_async_yield) + + +@ErrorFinder.register_rule(type='strings') +class _BytesAndStringMix(SyntaxRule): + # e.g. 's' b'' + message = "cannot mix bytes and nonbytes literals" + + def _is_bytes_literal(self, string): + if string.type == 'fstring': + return False + return 'b' in string.string_prefix.lower() + + def is_issue(self, node): + first = node.children[0] + # In Python 2 it's allowed to mix bytes and unicode. + if self._normalizer.version >= (3, 0): + first_is_bytes = self._is_bytes_literal(first) + for string in node.children[1:]: + if first_is_bytes != self._is_bytes_literal(string): + return True + + +@ErrorFinder.register_rule(type='import_as_names') +class _TrailingImportComma(SyntaxRule): + # e.g. from foo import a, + message = "trailing comma not allowed without surrounding parentheses" + + def is_issue(self, node): + if node.children[-1] == ',' and node.parent.children[-1] != ')': + return True + + +@ErrorFinder.register_rule(type='import_from') +class _ImportStarInFunction(SyntaxRule): + message = "import * only allowed at module level" + + def is_issue(self, node): + return node.is_star_import() and self._normalizer.context.parent_context is not None + + +@ErrorFinder.register_rule(type='import_from') +class _FutureImportRule(SyntaxRule): + message = "from __future__ imports must occur at the beginning of the file" + + def is_issue(self, node): + if _is_future_import(node): + if not _is_future_import_first(node): + return True + + for from_name, future_name in node.get_paths(): + name = future_name.value + allowed_futures = list(ALLOWED_FUTURES) + if self._normalizer.version >= (3, 5): + allowed_futures.append('generator_stop') + if self._normalizer.version >= (3, 7): + allowed_futures.append('annotations') + if name == 'braces': + self.add_issue(node, message="not a chance") + elif name == 'barry_as_FLUFL': + m = "Seriously I'm not implementing this :) ~ Dave" + self.add_issue(node, message=m) + elif name not in allowed_futures: + message = "future feature %s is not defined" % name + self.add_issue(node, message=message) + + +@ErrorFinder.register_rule(type='star_expr') +class _StarExprRule(SyntaxRule): + message_iterable_unpacking = "iterable unpacking cannot be used in comprehension" + message_assignment = "can use starred expression only as assignment target" + + def is_issue(self, node): + if node.parent.type == 'testlist_comp': + # [*[] for a in [1]] + if node.parent.children[1].type in _COMP_FOR_TYPES: + self.add_issue(node, message=self.message_iterable_unpacking) + if self._normalizer.version <= (3, 4): + n = search_ancestor(node, 'for_stmt', 'expr_stmt') + found_definition = False + if n is not None: + if n.type == 'expr_stmt': + exprs = _get_expr_stmt_definition_exprs(n) + else: + exprs = _get_for_stmt_definition_exprs(n) + if node in exprs: + found_definition = True + + if not found_definition: + self.add_issue(node, message=self.message_assignment) + + +@ErrorFinder.register_rule(types=_STAR_EXPR_PARENTS) +class _StarExprParentRule(SyntaxRule): + def is_issue(self, node): + if node.parent.type == 'del_stmt': + if self._normalizer.version >= (3, 9): + self.add_issue(node.parent, message="cannot delete starred") + else: + self.add_issue(node.parent, message="can't use starred expression here") + else: + def is_definition(node, ancestor): + if ancestor is None: + return False + + type_ = ancestor.type + if type_ == 'trailer': + return False + + if type_ == 'expr_stmt': + return node.start_pos < ancestor.children[-1].start_pos + + return is_definition(node, ancestor.parent) + + if is_definition(node, node.parent): + args = [c for c in node.children if c != ','] + starred = [c for c in args if c.type == 'star_expr'] + if len(starred) > 1: + if self._normalizer.version < (3, 9): + message = "two starred expressions in assignment" + else: + message = "multiple starred expressions in assignment" + self.add_issue(starred[1], message=message) + elif starred: + count = args.index(starred[0]) + if count >= 256: + message = "too many expressions in star-unpacking assignment" + self.add_issue(starred[0], message=message) + + +@ErrorFinder.register_rule(type='annassign') +class _AnnotatorRule(SyntaxRule): + # True: int + # {}: float + message = "illegal target for annotation" + + def get_node(self, node): + return node.parent + + def is_issue(self, node): + type_ = None + lhs = node.parent.children[0] + lhs = _remove_parens(lhs) + try: + children = lhs.children + except AttributeError: + pass + else: + if ',' in children or lhs.type == 'atom' and children[0] == '(': + type_ = 'tuple' + elif lhs.type == 'atom' and children[0] == '[': + type_ = 'list' + trailer = children[-1] + + if type_ is None: + if not (lhs.type == 'name' + # subscript/attributes are allowed + or lhs.type in ('atom_expr', 'power') + and trailer.type == 'trailer' + and trailer.children[0] != '('): + return True + else: + # x, y: str + message = "only single target (not %s) can be annotated" + self.add_issue(lhs.parent, message=message % type_) + + +@ErrorFinder.register_rule(type='argument') +class _ArgumentRule(SyntaxRule): + def is_issue(self, node): + first = node.children[0] + if self._normalizer.version < (3, 8): + # a((b)=c) is valid in <3.8 + first = _remove_parens(first) + if node.children[1] == '=' and first.type != 'name': + if first.type == 'lambdef': + # f(lambda: 1=1) + if self._normalizer.version < (3, 8): + message = "lambda cannot contain assignment" + else: + message = 'expression cannot contain assignment, perhaps you meant "=="?' + else: + # f(+x=1) + if self._normalizer.version < (3, 8): + message = "keyword can't be an expression" + else: + message = 'expression cannot contain assignment, perhaps you meant "=="?' + self.add_issue(first, message=message) + + if _is_argument_comprehension(node) and node.parent.type == 'classdef': + self.add_issue(node, message='invalid syntax') + + +@ErrorFinder.register_rule(type='nonlocal_stmt') +class _NonlocalModuleLevelRule(SyntaxRule): + message = "nonlocal declaration not allowed at module level" + + def is_issue(self, node): + return self._normalizer.context.parent_context is None + + +@ErrorFinder.register_rule(type='arglist') +class _ArglistRule(SyntaxRule): + @property + def message(self): + if self._normalizer.version < (3, 7): + return "Generator expression must be parenthesized if not sole argument" + else: + return "Generator expression must be parenthesized" + + def is_issue(self, node): + arg_set = set() + kw_only = False + kw_unpacking_only = False + is_old_starred = False + # In python 3 this would be a bit easier (stars are part of + # argument), but we have to understand both. + for argument in node.children: + if argument == ',': + continue + + if argument in ('*', '**'): + # Python < 3.5 has the order engraved in the grammar + # file. No need to do anything here. + is_old_starred = True + continue + if is_old_starred: + is_old_starred = False + continue + + if argument.type == 'argument': + first = argument.children[0] + if _is_argument_comprehension(argument) and len(node.children) >= 2: + # a(a, b for b in c) + return True + + if first in ('*', '**'): + if first == '*': + if kw_unpacking_only: + # foo(**kwargs, *args) + message = "iterable argument unpacking " \ + "follows keyword argument unpacking" + self.add_issue(argument, message=message) + else: + kw_unpacking_only = True + else: # Is a keyword argument. + kw_only = True + if first.type == 'name': + if first.value in arg_set: + # f(x=1, x=2) + message = "keyword argument repeated" + if self._normalizer.version >= (3, 9): + message += ": {}".format(first.value) + self.add_issue(first, message=message) + else: + arg_set.add(first.value) + else: + if kw_unpacking_only: + # f(**x, y) + message = "positional argument follows keyword argument unpacking" + self.add_issue(argument, message=message) + elif kw_only: + # f(x=2, y) + message = "positional argument follows keyword argument" + self.add_issue(argument, message=message) + + +@ErrorFinder.register_rule(type='parameters') +@ErrorFinder.register_rule(type='lambdef') +class _ParameterRule(SyntaxRule): + # def f(x=3, y): pass + message = "non-default argument follows default argument" + + def is_issue(self, node): + param_names = set() + default_only = False + for p in _iter_params(node): + if p.name.value in param_names: + message = "duplicate argument '%s' in function definition" + self.add_issue(p.name, message=message % p.name.value) + param_names.add(p.name.value) + + if p.default is None and not p.star_count: + if default_only: + return True + else: + default_only = True + + +@ErrorFinder.register_rule(type='try_stmt') +class _TryStmtRule(SyntaxRule): + message = "default 'except:' must be last" + + def is_issue(self, try_stmt): + default_except = None + for except_clause in try_stmt.children[3::3]: + if except_clause in ('else', 'finally'): + break + if except_clause == 'except': + default_except = except_clause + elif default_except is not None: + self.add_issue(default_except, message=self.message) + + +@ErrorFinder.register_rule(type='fstring') +class _FStringRule(SyntaxRule): + _fstring_grammar = None + message_expr = "f-string expression part cannot include a backslash" + message_nested = "f-string: expressions nested too deeply" + message_conversion = "f-string: invalid conversion character: expected 's', 'r', or 'a'" + + def _check_format_spec(self, format_spec, depth): + self._check_fstring_contents(format_spec.children[1:], depth) + + def _check_fstring_expr(self, fstring_expr, depth): + if depth >= 2: + self.add_issue(fstring_expr, message=self.message_nested) + + expr = fstring_expr.children[1] + if '\\' in expr.get_code(): + self.add_issue(expr, message=self.message_expr) + + conversion = fstring_expr.children[2] + if conversion.type == 'fstring_conversion': + name = conversion.children[1] + if name.value not in ('s', 'r', 'a'): + self.add_issue(name, message=self.message_conversion) + + format_spec = fstring_expr.children[-2] + if format_spec.type == 'fstring_format_spec': + self._check_format_spec(format_spec, depth + 1) + + def is_issue(self, fstring): + self._check_fstring_contents(fstring.children[1:-1]) + + def _check_fstring_contents(self, children, depth=0): + for fstring_content in children: + if fstring_content.type == 'fstring_expr': + self._check_fstring_expr(fstring_content, depth) + + +class _CheckAssignmentRule(SyntaxRule): + def _check_assignment(self, node, is_deletion=False, is_namedexpr=False, is_aug_assign=False): + error = None + type_ = node.type + if type_ == 'lambdef': + error = 'lambda' + elif type_ == 'atom': + first, second = node.children[:2] + error = _get_comprehension_type(node) + if error is None: + if second.type == 'dictorsetmaker': + if self._normalizer.version < (3, 8): + error = 'literal' + else: + if second.children[1] == ':': + error = 'dict display' + else: + error = 'set display' + elif first == "{" and second == "}": + if self._normalizer.version < (3, 8): + error = 'literal' + else: + error = "dict display" + elif first == "{" and len(node.children) > 2: + if self._normalizer.version < (3, 8): + error = 'literal' + else: + error = "set display" + elif first in ('(', '['): + if second.type == 'yield_expr': + error = 'yield expression' + elif second.type == 'testlist_comp': + # ([a, b] := [1, 2]) + # ((a, b) := [1, 2]) + if is_namedexpr: + if first == '(': + error = 'tuple' + elif first == '[': + error = 'list' + + # This is not a comprehension, they were handled + # further above. + for child in second.children[::2]: + self._check_assignment(child, is_deletion, is_namedexpr, is_aug_assign) + else: # Everything handled, must be useless brackets. + self._check_assignment(second, is_deletion, is_namedexpr, is_aug_assign) + elif type_ == 'keyword': + if node.value == "yield": + error = "yield expression" + elif self._normalizer.version < (3, 8): + error = 'keyword' + else: + error = str(node.value) + elif type_ == 'operator': + if node.value == '...': + error = 'Ellipsis' + elif type_ == 'comparison': + error = 'comparison' + elif type_ in ('string', 'number', 'strings'): + error = 'literal' + elif type_ == 'yield_expr': + # This one seems to be a slightly different warning in Python. + message = 'assignment to yield expression not possible' + self.add_issue(node, message=message) + elif type_ == 'test': + error = 'conditional expression' + elif type_ in ('atom_expr', 'power'): + if node.children[0] == 'await': + error = 'await expression' + elif node.children[-2] == '**': + error = 'operator' + else: + # Has a trailer + trailer = node.children[-1] + assert trailer.type == 'trailer' + if trailer.children[0] == '(': + error = 'function call' + elif is_namedexpr and trailer.children[0] == '[': + error = 'subscript' + elif is_namedexpr and trailer.children[0] == '.': + error = 'attribute' + elif type_ == "fstring": + if self._normalizer.version < (3, 8): + error = 'literal' + else: + error = "f-string expression" + elif type_ in ('testlist_star_expr', 'exprlist', 'testlist'): + for child in node.children[::2]: + self._check_assignment(child, is_deletion, is_namedexpr, is_aug_assign) + elif ('expr' in type_ and type_ != 'star_expr' # is a substring + or '_test' in type_ + or type_ in ('term', 'factor')): + error = 'operator' + elif type_ == "star_expr": + if is_deletion: + if self._normalizer.version >= (3, 9): + error = "starred" + else: + self.add_issue(node, message="can't use starred expression here") + elif not search_ancestor(node, *_STAR_EXPR_PARENTS) and not is_aug_assign: + self.add_issue(node, message="starred assignment target must be in a list or tuple") + + self._check_assignment(node.children[1]) + + if error is not None: + if is_namedexpr: + message = 'cannot use assignment expressions with %s' % error + else: + cannot = "can't" if self._normalizer.version < (3, 8) else "cannot" + message = ' '.join([cannot, "delete" if is_deletion else "assign to", error]) + self.add_issue(node, message=message) + + +@ErrorFinder.register_rule(type='sync_comp_for') +class _CompForRule(_CheckAssignmentRule): + message = "asynchronous comprehension outside of an asynchronous function" + + def is_issue(self, node): + expr_list = node.children[1] + if expr_list.type != 'expr_list': # Already handled. + self._check_assignment(expr_list) + + return node.parent.children[0] == 'async' \ + and not self._normalizer.context.is_async_funcdef() + + +@ErrorFinder.register_rule(type='expr_stmt') +class _ExprStmtRule(_CheckAssignmentRule): + message = "illegal expression for augmented assignment" + extended_message = "'{target}' is an " + message + def is_issue(self, node): + augassign = node.children[1] + is_aug_assign = augassign != '=' and augassign.type != 'annassign' + + if self._normalizer.version <= (3, 8) or not is_aug_assign: + for before_equal in node.children[:-2:2]: + self._check_assignment(before_equal, is_aug_assign=is_aug_assign) + + if is_aug_assign: + target = _remove_parens(node.children[0]) + # a, a[b], a.b + + if target.type == "name" or ( + target.type in ("atom_expr", "power") + and target.children[1].type == "trailer" + and target.children[-1].children[0] != "(" + ): + return False + + if self._normalizer.version <= (3, 8): + return True + else: + self.add_issue( + node, + message=self.extended_message.format( + target=_get_rhs_name(node.children[0], self._normalizer.version) + ), + ) + +@ErrorFinder.register_rule(type='with_item') +class _WithItemRule(_CheckAssignmentRule): + def is_issue(self, with_item): + self._check_assignment(with_item.children[2]) + + +@ErrorFinder.register_rule(type='del_stmt') +class _DelStmtRule(_CheckAssignmentRule): + def is_issue(self, del_stmt): + child = del_stmt.children[1] + + if child.type != 'expr_list': # Already handled. + self._check_assignment(child, is_deletion=True) + + +@ErrorFinder.register_rule(type='expr_list') +class _ExprListRule(_CheckAssignmentRule): + def is_issue(self, expr_list): + for expr in expr_list.children[::2]: + self._check_assignment(expr) + + +@ErrorFinder.register_rule(type='for_stmt') +class _ForStmtRule(_CheckAssignmentRule): + def is_issue(self, for_stmt): + # Some of the nodes here are already used, so no else if + expr_list = for_stmt.children[1] + if expr_list.type != 'expr_list': # Already handled. + self._check_assignment(expr_list) + + +@ErrorFinder.register_rule(type='namedexpr_test') +class _NamedExprRule(_CheckAssignmentRule): + # namedexpr_test: test [':=' test] + + def is_issue(self, namedexpr_test): + # assigned name + first = namedexpr_test.children[0] + + def search_namedexpr_in_comp_for(node): + while True: + parent = node.parent + if parent is None: + return parent + if parent.type == 'sync_comp_for' and parent.children[3] == node: + return parent + node = parent + + if search_namedexpr_in_comp_for(namedexpr_test): + # [i+1 for i in (i := range(5))] + # [i+1 for i in (j := range(5))] + # [i+1 for i in (lambda: (j := range(5)))()] + message = 'assignment expression cannot be used in a comprehension iterable expression' + self.add_issue(namedexpr_test, message=message) + + # defined names + exprlist = list() + + def process_comp_for(comp_for): + if comp_for.type == 'sync_comp_for': + comp = comp_for + elif comp_for.type == 'comp_for': + comp = comp_for.children[1] + exprlist.extend(_get_for_stmt_definition_exprs(comp)) + + def search_all_comp_ancestors(node): + has_ancestors = False + while True: + node = search_ancestor(node, 'testlist_comp', 'dictorsetmaker') + if node is None: + break + for child in node.children: + if child.type in _COMP_FOR_TYPES: + process_comp_for(child) + has_ancestors = True + break + return has_ancestors + + # check assignment expressions in comprehensions + search_all = search_all_comp_ancestors(namedexpr_test) + if search_all: + if self._normalizer.context.node.type == 'classdef': + message = 'assignment expression within a comprehension ' \ + 'cannot be used in a class body' + self.add_issue(namedexpr_test, message=message) + + namelist = [expr.value for expr in exprlist if expr.type == 'name'] + if first.type == 'name' and first.value in namelist: + # [i := 0 for i, j in range(5)] + # [[(i := i) for j in range(5)] for i in range(5)] + # [i for i, j in range(5) if True or (i := 1)] + # [False and (i := 0) for i, j in range(5)] + message = 'assignment expression cannot rebind ' \ + 'comprehension iteration variable %r' % first.value + self.add_issue(namedexpr_test, message=message) + + self._check_assignment(first, is_namedexpr=True) diff --git a/contrib/python/parso/py2/parso/python/grammar27.txt b/contrib/python/parso/py2/parso/python/grammar27.txt new file mode 100644 index 0000000000..29f1b827c0 --- /dev/null +++ b/contrib/python/parso/py2/parso/python/grammar27.txt @@ -0,0 +1,143 @@ +# Grammar for Python + +# Note: Changing the grammar specified in this file will most likely +# require corresponding changes in the parser module +# (../Modules/parsermodule.c). If you can't make the changes to +# that module yourself, please co-ordinate the required changes +# with someone who can; ask around on python-dev for help. Fred +# Drake <fdrake@acm.org> will probably be listening there. + +# NOTE WELL: You should also follow all the steps listed in PEP 306, +# "How to Change Python's Grammar" + +# Start symbols for the grammar: +# single_input is a single interactive statement; +# file_input is a module or sequence of commands read from an input file; +# eval_input is the input for the eval() and input() functions. +# NB: compound_stmt in single_input is followed by extra NEWLINE! +single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE +file_input: stmt* ENDMARKER +eval_input: testlist NEWLINE* ENDMARKER + +decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE +decorators: decorator+ +decorated: decorators (classdef | funcdef) +funcdef: 'def' NAME parameters ':' suite +parameters: '(' [varargslist] ')' +varargslist: ((fpdef ['=' test] ',')* + ('*' NAME [',' '**' NAME] | '**' NAME) | + fpdef ['=' test] (',' fpdef ['=' test])* [',']) +fpdef: NAME | '(' fplist ')' +fplist: fpdef (',' fpdef)* [','] + +stmt: simple_stmt | compound_stmt | NEWLINE +simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE +small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt | + import_stmt | global_stmt | exec_stmt | assert_stmt) +expr_stmt: testlist (augassign (yield_expr|testlist) | + ('=' (yield_expr|testlist))*) +augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | + '<<=' | '>>=' | '**=' | '//=') +# For normal assignments, additional restrictions enforced by the interpreter +print_stmt: 'print' ( [ test (',' test)* [','] ] | + '>>' test [ (',' test)+ [','] ] ) +del_stmt: 'del' exprlist +pass_stmt: 'pass' +flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt +break_stmt: 'break' +continue_stmt: 'continue' +return_stmt: 'return' [testlist] +yield_stmt: yield_expr +raise_stmt: 'raise' [test [',' test [',' test]]] +import_stmt: import_name | import_from +import_name: 'import' dotted_as_names +import_from: ('from' ('.'* dotted_name | '.'+) + 'import' ('*' | '(' import_as_names ')' | import_as_names)) +import_as_name: NAME ['as' NAME] +dotted_as_name: dotted_name ['as' NAME] +import_as_names: import_as_name (',' import_as_name)* [','] +dotted_as_names: dotted_as_name (',' dotted_as_name)* +dotted_name: NAME ('.' NAME)* +global_stmt: 'global' NAME (',' NAME)* +exec_stmt: 'exec' expr ['in' test [',' test]] +assert_stmt: 'assert' test [',' test] + +compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated +if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] +while_stmt: 'while' test ':' suite ['else' ':' suite] +for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] +try_stmt: ('try' ':' suite + ((except_clause ':' suite)+ + ['else' ':' suite] + ['finally' ':' suite] | + 'finally' ':' suite)) +with_stmt: 'with' with_item (',' with_item)* ':' suite +with_item: test ['as' expr] +# NB compile.c makes sure that the default except clause is last +except_clause: 'except' [test [('as' | ',') test]] +suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT + +# Backward compatibility cruft to support: +# [ x for x in lambda: True, lambda: False if x() ] +# even while also allowing: +# lambda x: 5 if x else 2 +# (But not a mix of the two) +testlist_safe: old_test [(',' old_test)+ [',']] +old_test: or_test | old_lambdef +old_lambdef: 'lambda' [varargslist] ':' old_test + +test: or_test ['if' or_test 'else' test] | lambdef +or_test: and_test ('or' and_test)* +and_test: not_test ('and' not_test)* +not_test: 'not' not_test | comparison +comparison: expr (comp_op expr)* +comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' +expr: xor_expr ('|' xor_expr)* +xor_expr: and_expr ('^' and_expr)* +and_expr: shift_expr ('&' shift_expr)* +shift_expr: arith_expr (('<<'|'>>') arith_expr)* +arith_expr: term (('+'|'-') term)* +term: factor (('*'|'/'|'%'|'//') factor)* +factor: ('+'|'-'|'~') factor | power +power: atom trailer* ['**' factor] +atom: ('(' [yield_expr|testlist_comp] ')' | + '[' [listmaker] ']' | + '{' [dictorsetmaker] '}' | + '`' testlist1 '`' | + NAME | NUMBER | strings) +strings: STRING+ +listmaker: test ( list_for | (',' test)* [','] ) +testlist_comp: test ( sync_comp_for | (',' test)* [','] ) +lambdef: 'lambda' [varargslist] ':' test +trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME +subscriptlist: subscript (',' subscript)* [','] +subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop] +sliceop: ':' [test] +exprlist: expr (',' expr)* [','] +testlist: test (',' test)* [','] +dictorsetmaker: ( (test ':' test (sync_comp_for | (',' test ':' test)* [','])) | + (test (sync_comp_for | (',' test)* [','])) ) + +classdef: 'class' NAME ['(' [testlist] ')'] ':' suite + +arglist: (argument ',')* (argument [','] + |'*' test (',' argument)* [',' '**' test] + |'**' test) +# The reason that keywords are test nodes instead of NAME is that using NAME +# results in an ambiguity. ast.c makes sure it's a NAME. +argument: test [sync_comp_for] | test '=' test + +list_iter: list_for | list_if +list_for: 'for' exprlist 'in' testlist_safe [list_iter] +list_if: 'if' old_test [list_iter] + +comp_iter: sync_comp_for | comp_if +sync_comp_for: 'for' exprlist 'in' or_test [comp_iter] +comp_if: 'if' old_test [comp_iter] + +testlist1: test (',' test)* + +# not used in grammar, but may appear in "node" passed from Parser to Compiler +encoding_decl: NAME + +yield_expr: 'yield' [testlist] diff --git a/contrib/python/parso/py2/parso/python/grammar310.txt b/contrib/python/parso/py2/parso/python/grammar310.txt new file mode 100644 index 0000000000..3c39bb51b4 --- /dev/null +++ b/contrib/python/parso/py2/parso/python/grammar310.txt @@ -0,0 +1,171 @@ +# Grammar for Python + +# NOTE WELL: You should also follow all the steps listed at +# https://devguide.python.org/grammar/ + +# Start symbols for the grammar: +# single_input is a single interactive statement; +# file_input is a module or sequence of commands read from an input file; +# eval_input is the input for the eval() functions. +# NB: compound_stmt in single_input is followed by extra NEWLINE! +single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE +file_input: stmt* ENDMARKER +eval_input: testlist NEWLINE* ENDMARKER + +decorator: '@' namedexpr_test NEWLINE +decorators: decorator+ +decorated: decorators (classdef | funcdef | async_funcdef) + +async_funcdef: 'async' funcdef +funcdef: 'def' NAME parameters ['->' test] ':' suite + +parameters: '(' [typedargslist] ')' +typedargslist: ( + (tfpdef ['=' test] (',' tfpdef ['=' test])* ',' '/' [',' [ tfpdef ['=' test] ( + ',' tfpdef ['=' test])* ([',' [ + '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] + | '**' tfpdef [',']]]) + | '*' [tfpdef] (',' tfpdef ['=' test])* ([',' ['**' tfpdef [',']]]) + | '**' tfpdef [',']]] ) +| (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [ + '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] + | '**' tfpdef [',']]] + | '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] + | '**' tfpdef [',']) +) +tfpdef: NAME [':' test] +varargslist: vfpdef ['=' test ](',' vfpdef ['=' test])* ',' '/' [',' [ (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [ + '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [',']]] + | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [',']) ]] | (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [ + '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [',']]] + | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [','] +) +vfpdef: NAME + +stmt: simple_stmt | compound_stmt | NEWLINE +simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE +small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | + import_stmt | global_stmt | nonlocal_stmt | assert_stmt) +expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) | + ('=' (yield_expr|testlist_star_expr))*) +annassign: ':' test ['=' (yield_expr|testlist_star_expr)] +testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] +augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | + '<<=' | '>>=' | '**=' | '//=') +# For normal and annotated assignments, additional restrictions enforced by the interpreter +del_stmt: 'del' exprlist +pass_stmt: 'pass' +flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt +break_stmt: 'break' +continue_stmt: 'continue' +return_stmt: 'return' [testlist_star_expr] +yield_stmt: yield_expr +raise_stmt: 'raise' [test ['from' test]] +import_stmt: import_name | import_from +import_name: 'import' dotted_as_names +# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS +import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) + 'import' ('*' | '(' import_as_names ')' | import_as_names)) +import_as_name: NAME ['as' NAME] +dotted_as_name: dotted_name ['as' NAME] +import_as_names: import_as_name (',' import_as_name)* [','] +dotted_as_names: dotted_as_name (',' dotted_as_name)* +dotted_name: NAME ('.' NAME)* +global_stmt: 'global' NAME (',' NAME)* +nonlocal_stmt: 'nonlocal' NAME (',' NAME)* +assert_stmt: 'assert' test [',' test] + +compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt +async_stmt: 'async' (funcdef | with_stmt | for_stmt) +if_stmt: 'if' namedexpr_test ':' suite ('elif' namedexpr_test ':' suite)* ['else' ':' suite] +while_stmt: 'while' namedexpr_test ':' suite ['else' ':' suite] +for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] +try_stmt: ('try' ':' suite + ((except_clause ':' suite)+ + ['else' ':' suite] + ['finally' ':' suite] | + 'finally' ':' suite)) +with_stmt: 'with' with_item (',' with_item)* ':' suite +with_item: test ['as' expr] +# NB compile.c makes sure that the default except clause is last +except_clause: 'except' [test ['as' NAME]] +suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT + +namedexpr_test: test [':=' test] +test: or_test ['if' or_test 'else' test] | lambdef +test_nocond: or_test | lambdef_nocond +lambdef: 'lambda' [varargslist] ':' test +lambdef_nocond: 'lambda' [varargslist] ':' test_nocond +or_test: and_test ('or' and_test)* +and_test: not_test ('and' not_test)* +not_test: 'not' not_test | comparison +comparison: expr (comp_op expr)* +# <> isn't actually a valid comparison operator in Python. It's here for the +# sake of a __future__ import described in PEP 401 (which really works :-) +comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' +star_expr: '*' expr +expr: xor_expr ('|' xor_expr)* +xor_expr: and_expr ('^' and_expr)* +and_expr: shift_expr ('&' shift_expr)* +shift_expr: arith_expr (('<<'|'>>') arith_expr)* +arith_expr: term (('+'|'-') term)* +term: factor (('*'|'@'|'/'|'%'|'//') factor)* +factor: ('+'|'-'|'~') factor | power +power: atom_expr ['**' factor] +atom_expr: ['await'] atom trailer* +atom: ('(' [yield_expr|testlist_comp] ')' | + '[' [testlist_comp] ']' | + '{' [dictorsetmaker] '}' | + NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False') +testlist_comp: (namedexpr_test|star_expr) ( comp_for | (',' (namedexpr_test|star_expr))* [','] ) +trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME +subscriptlist: subscript (',' subscript)* [','] +subscript: test | [test] ':' [test] [sliceop] +sliceop: ':' [test] +exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] +testlist: test (',' test)* [','] +dictorsetmaker: ( ((test ':' test | '**' expr) + (comp_for | (',' (test ':' test | '**' expr))* [','])) | + ((test | star_expr) + (comp_for | (',' (test | star_expr))* [','])) ) + +classdef: 'class' NAME ['(' [arglist] ')'] ':' suite + +arglist: argument (',' argument)* [','] + +# The reason that keywords are test nodes instead of NAME is that using NAME +# results in an ambiguity. ast.c makes sure it's a NAME. +# "test '=' test" is really "keyword '=' test", but we have no such token. +# These need to be in a single rule to avoid grammar that is ambiguous +# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr, +# we explicitly match '*' here, too, to give it proper precedence. +# Illegal combinations and orderings are blocked in ast.c: +# multiple (test comp_for) arguments are blocked; keyword unpackings +# that precede iterable unpackings are blocked; etc. +argument: ( test [comp_for] | + test ':=' test | + test '=' test | + '**' test | + '*' test ) + +comp_iter: comp_for | comp_if +sync_comp_for: 'for' exprlist 'in' or_test [comp_iter] +comp_for: ['async'] sync_comp_for +comp_if: 'if' test_nocond [comp_iter] + +# not used in grammar, but may appear in "node" passed from Parser to Compiler +encoding_decl: NAME + +yield_expr: 'yield' [yield_arg] +yield_arg: 'from' test | testlist_star_expr + +strings: (STRING | fstring)+ +fstring: FSTRING_START fstring_content* FSTRING_END +fstring_content: FSTRING_STRING | fstring_expr +fstring_conversion: '!' NAME +fstring_expr: '{' testlist ['='] [ fstring_conversion ] [ fstring_format_spec ] '}' +fstring_format_spec: ':' fstring_content* diff --git a/contrib/python/parso/py2/parso/python/grammar33.txt b/contrib/python/parso/py2/parso/python/grammar33.txt new file mode 100644 index 0000000000..dd93d8b5ce --- /dev/null +++ b/contrib/python/parso/py2/parso/python/grammar33.txt @@ -0,0 +1,134 @@ +# Grammar for Python + +# Note: Changing the grammar specified in this file will most likely +# require corresponding changes in the parser module +# (../Modules/parsermodule.c). If you can't make the changes to +# that module yourself, please co-ordinate the required changes +# with someone who can; ask around on python-dev for help. Fred +# Drake <fdrake@acm.org> will probably be listening there. + +# NOTE WELL: You should also follow all the steps listed in PEP 306, +# "How to Change Python's Grammar" + +# Start symbols for the grammar: +# single_input is a single interactive statement; +# file_input is a module or sequence of commands read from an input file; +# eval_input is the input for the eval() functions. +# NB: compound_stmt in single_input is followed by extra NEWLINE! +single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE +file_input: stmt* ENDMARKER +eval_input: testlist NEWLINE* ENDMARKER + +decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE +decorators: decorator+ +decorated: decorators (classdef | funcdef) +funcdef: 'def' NAME parameters ['->' test] ':' suite +parameters: '(' [typedargslist] ')' +typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' + ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]] + | '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef) +tfpdef: NAME [':' test] +varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' + ['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]] + | '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef) +vfpdef: NAME + +stmt: simple_stmt | compound_stmt | NEWLINE +simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE +small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | + import_stmt | global_stmt | nonlocal_stmt | assert_stmt) +expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) | + ('=' (yield_expr|testlist_star_expr))*) +testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] +augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | + '<<=' | '>>=' | '**=' | '//=') +# For normal assignments, additional restrictions enforced by the interpreter +del_stmt: 'del' exprlist +pass_stmt: 'pass' +flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt +break_stmt: 'break' +continue_stmt: 'continue' +return_stmt: 'return' [testlist] +yield_stmt: yield_expr +raise_stmt: 'raise' [test ['from' test]] +import_stmt: import_name | import_from +import_name: 'import' dotted_as_names +# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS +import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) + 'import' ('*' | '(' import_as_names ')' | import_as_names)) +import_as_name: NAME ['as' NAME] +dotted_as_name: dotted_name ['as' NAME] +import_as_names: import_as_name (',' import_as_name)* [','] +dotted_as_names: dotted_as_name (',' dotted_as_name)* +dotted_name: NAME ('.' NAME)* +global_stmt: 'global' NAME (',' NAME)* +nonlocal_stmt: 'nonlocal' NAME (',' NAME)* +assert_stmt: 'assert' test [',' test] + +compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated +if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] +while_stmt: 'while' test ':' suite ['else' ':' suite] +for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] +try_stmt: ('try' ':' suite + ((except_clause ':' suite)+ + ['else' ':' suite] + ['finally' ':' suite] | + 'finally' ':' suite)) +with_stmt: 'with' with_item (',' with_item)* ':' suite +with_item: test ['as' expr] +# NB compile.c makes sure that the default except clause is last +except_clause: 'except' [test ['as' NAME]] +suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT + +test: or_test ['if' or_test 'else' test] | lambdef +test_nocond: or_test | lambdef_nocond +lambdef: 'lambda' [varargslist] ':' test +lambdef_nocond: 'lambda' [varargslist] ':' test_nocond +or_test: and_test ('or' and_test)* +and_test: not_test ('and' not_test)* +not_test: 'not' not_test | comparison +comparison: expr (comp_op expr)* +# <> isn't actually a valid comparison operator in Python. It's here for the +# sake of a __future__ import described in PEP 401 +comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' +star_expr: '*' expr +expr: xor_expr ('|' xor_expr)* +xor_expr: and_expr ('^' and_expr)* +and_expr: shift_expr ('&' shift_expr)* +shift_expr: arith_expr (('<<'|'>>') arith_expr)* +arith_expr: term (('+'|'-') term)* +term: factor (('*'|'/'|'%'|'//') factor)* +factor: ('+'|'-'|'~') factor | power +power: atom trailer* ['**' factor] +atom: ('(' [yield_expr|testlist_comp] ')' | + '[' [testlist_comp] ']' | + '{' [dictorsetmaker] '}' | + NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False') +strings: STRING+ +testlist_comp: (test|star_expr) ( sync_comp_for | (',' (test|star_expr))* [','] ) +trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME +subscriptlist: subscript (',' subscript)* [','] +subscript: test | [test] ':' [test] [sliceop] +sliceop: ':' [test] +exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] +testlist: test (',' test)* [','] +dictorsetmaker: ( (test ':' test (sync_comp_for | (',' test ':' test)* [','])) | + (test (sync_comp_for | (',' test)* [','])) ) + +classdef: 'class' NAME ['(' [arglist] ')'] ':' suite + +arglist: (argument ',')* (argument [','] + |'*' test (',' argument)* [',' '**' test] + |'**' test) +# The reason that keywords are test nodes instead of NAME is that using NAME +# results in an ambiguity. ast.c makes sure it's a NAME. +argument: test [sync_comp_for] | test '=' test # Really [keyword '='] test +comp_iter: sync_comp_for | comp_if +sync_comp_for: 'for' exprlist 'in' or_test [comp_iter] +comp_if: 'if' test_nocond [comp_iter] + +# not used in grammar, but may appear in "node" passed from Parser to Compiler +encoding_decl: NAME + +yield_expr: 'yield' [yield_arg] +yield_arg: 'from' test | testlist diff --git a/contrib/python/parso/py2/parso/python/grammar34.txt b/contrib/python/parso/py2/parso/python/grammar34.txt new file mode 100644 index 0000000000..999f4cd121 --- /dev/null +++ b/contrib/python/parso/py2/parso/python/grammar34.txt @@ -0,0 +1,134 @@ +# Grammar for Python + +# Note: Changing the grammar specified in this file will most likely +# require corresponding changes in the parser module +# (../Modules/parsermodule.c). If you can't make the changes to +# that module yourself, please co-ordinate the required changes +# with someone who can; ask around on python-dev for help. Fred +# Drake <fdrake@acm.org> will probably be listening there. + +# NOTE WELL: You should also follow all the steps listed at +# https://docs.python.org/devguide/grammar.html + +# Start symbols for the grammar: +# single_input is a single interactive statement; +# file_input is a module or sequence of commands read from an input file; +# eval_input is the input for the eval() functions. +# NB: compound_stmt in single_input is followed by extra NEWLINE! +single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE +file_input: stmt* ENDMARKER +eval_input: testlist NEWLINE* ENDMARKER + +decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE +decorators: decorator+ +decorated: decorators (classdef | funcdef) +funcdef: 'def' NAME parameters ['->' test] ':' suite +parameters: '(' [typedargslist] ')' +typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' + ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]] + | '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef) +tfpdef: NAME [':' test] +varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' + ['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]] + | '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef) +vfpdef: NAME + +stmt: simple_stmt | compound_stmt | NEWLINE +simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE +small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | + import_stmt | global_stmt | nonlocal_stmt | assert_stmt) +expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) | + ('=' (yield_expr|testlist_star_expr))*) +testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] +augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | + '<<=' | '>>=' | '**=' | '//=') +# For normal assignments, additional restrictions enforced by the interpreter +del_stmt: 'del' exprlist +pass_stmt: 'pass' +flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt +break_stmt: 'break' +continue_stmt: 'continue' +return_stmt: 'return' [testlist] +yield_stmt: yield_expr +raise_stmt: 'raise' [test ['from' test]] +import_stmt: import_name | import_from +import_name: 'import' dotted_as_names +# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS +import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) + 'import' ('*' | '(' import_as_names ')' | import_as_names)) +import_as_name: NAME ['as' NAME] +dotted_as_name: dotted_name ['as' NAME] +import_as_names: import_as_name (',' import_as_name)* [','] +dotted_as_names: dotted_as_name (',' dotted_as_name)* +dotted_name: NAME ('.' NAME)* +global_stmt: 'global' NAME (',' NAME)* +nonlocal_stmt: 'nonlocal' NAME (',' NAME)* +assert_stmt: 'assert' test [',' test] + +compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated +if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] +while_stmt: 'while' test ':' suite ['else' ':' suite] +for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] +try_stmt: ('try' ':' suite + ((except_clause ':' suite)+ + ['else' ':' suite] + ['finally' ':' suite] | + 'finally' ':' suite)) +with_stmt: 'with' with_item (',' with_item)* ':' suite +with_item: test ['as' expr] +# NB compile.c makes sure that the default except clause is last +except_clause: 'except' [test ['as' NAME]] +suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT + +test: or_test ['if' or_test 'else' test] | lambdef +test_nocond: or_test | lambdef_nocond +lambdef: 'lambda' [varargslist] ':' test +lambdef_nocond: 'lambda' [varargslist] ':' test_nocond +or_test: and_test ('or' and_test)* +and_test: not_test ('and' not_test)* +not_test: 'not' not_test | comparison +comparison: expr (comp_op expr)* +# <> isn't actually a valid comparison operator in Python. It's here for the +# sake of a __future__ import described in PEP 401 +comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' +star_expr: '*' expr +expr: xor_expr ('|' xor_expr)* +xor_expr: and_expr ('^' and_expr)* +and_expr: shift_expr ('&' shift_expr)* +shift_expr: arith_expr (('<<'|'>>') arith_expr)* +arith_expr: term (('+'|'-') term)* +term: factor (('*'|'/'|'%'|'//') factor)* +factor: ('+'|'-'|'~') factor | power +power: atom trailer* ['**' factor] +atom: ('(' [yield_expr|testlist_comp] ')' | + '[' [testlist_comp] ']' | + '{' [dictorsetmaker] '}' | + NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False') +strings: STRING+ +testlist_comp: (test|star_expr) ( sync_comp_for | (',' (test|star_expr))* [','] ) +trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME +subscriptlist: subscript (',' subscript)* [','] +subscript: test | [test] ':' [test] [sliceop] +sliceop: ':' [test] +exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] +testlist: test (',' test)* [','] +dictorsetmaker: ( (test ':' test (sync_comp_for | (',' test ':' test)* [','])) | + (test (sync_comp_for | (',' test)* [','])) ) + +classdef: 'class' NAME ['(' [arglist] ')'] ':' suite + +arglist: (argument ',')* (argument [','] + |'*' test (',' argument)* [',' '**' test] + |'**' test) +# The reason that keywords are test nodes instead of NAME is that using NAME +# results in an ambiguity. ast.c makes sure it's a NAME. +argument: test [sync_comp_for] | test '=' test # Really [keyword '='] test +comp_iter: sync_comp_for | comp_if +sync_comp_for: 'for' exprlist 'in' or_test [comp_iter] +comp_if: 'if' test_nocond [comp_iter] + +# not used in grammar, but may appear in "node" passed from Parser to Compiler +encoding_decl: NAME + +yield_expr: 'yield' [yield_arg] +yield_arg: 'from' test | testlist diff --git a/contrib/python/parso/py2/parso/python/grammar35.txt b/contrib/python/parso/py2/parso/python/grammar35.txt new file mode 100644 index 0000000000..29f49e4f73 --- /dev/null +++ b/contrib/python/parso/py2/parso/python/grammar35.txt @@ -0,0 +1,153 @@ +# Grammar for Python + +# Note: Changing the grammar specified in this file will most likely +# require corresponding changes in the parser module +# (../Modules/parsermodule.c). If you can't make the changes to +# that module yourself, please co-ordinate the required changes +# with someone who can; ask around on python-dev for help. Fred +# Drake <fdrake@acm.org> will probably be listening there. + +# NOTE WELL: You should also follow all the steps listed at +# https://docs.python.org/devguide/grammar.html + +# Start symbols for the grammar: +# single_input is a single interactive statement; +# file_input is a module or sequence of commands read from an input file; +# eval_input is the input for the eval() functions. +# NB: compound_stmt in single_input is followed by extra NEWLINE! +single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE +file_input: stmt* ENDMARKER +eval_input: testlist NEWLINE* ENDMARKER + +decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE +decorators: decorator+ +decorated: decorators (classdef | funcdef | async_funcdef) + +# NOTE: Reinoud Elhorst, using ASYNC/AWAIT keywords instead of tokens +# skipping python3.5 compatibility, in favour of 3.7 solution +async_funcdef: 'async' funcdef +funcdef: 'def' NAME parameters ['->' test] ':' suite + +parameters: '(' [typedargslist] ')' +typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' + ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]] + | '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef) +tfpdef: NAME [':' test] +varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' + ['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]] + | '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef) +vfpdef: NAME + +stmt: simple_stmt | compound_stmt | NEWLINE +simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE +small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | + import_stmt | global_stmt | nonlocal_stmt | assert_stmt) +expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) | + ('=' (yield_expr|testlist_star_expr))*) +testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] +augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | + '<<=' | '>>=' | '**=' | '//=') +# For normal assignments, additional restrictions enforced by the interpreter +del_stmt: 'del' exprlist +pass_stmt: 'pass' +flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt +break_stmt: 'break' +continue_stmt: 'continue' +return_stmt: 'return' [testlist] +yield_stmt: yield_expr +raise_stmt: 'raise' [test ['from' test]] +import_stmt: import_name | import_from +import_name: 'import' dotted_as_names +# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS +import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) + 'import' ('*' | '(' import_as_names ')' | import_as_names)) +import_as_name: NAME ['as' NAME] +dotted_as_name: dotted_name ['as' NAME] +import_as_names: import_as_name (',' import_as_name)* [','] +dotted_as_names: dotted_as_name (',' dotted_as_name)* +dotted_name: NAME ('.' NAME)* +global_stmt: 'global' NAME (',' NAME)* +nonlocal_stmt: 'nonlocal' NAME (',' NAME)* +assert_stmt: 'assert' test [',' test] + +compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt +async_stmt: 'async' (funcdef | with_stmt | for_stmt) +if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] +while_stmt: 'while' test ':' suite ['else' ':' suite] +for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] +try_stmt: ('try' ':' suite + ((except_clause ':' suite)+ + ['else' ':' suite] + ['finally' ':' suite] | + 'finally' ':' suite)) +with_stmt: 'with' with_item (',' with_item)* ':' suite +with_item: test ['as' expr] +# NB compile.c makes sure that the default except clause is last +except_clause: 'except' [test ['as' NAME]] +suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT + +test: or_test ['if' or_test 'else' test] | lambdef +test_nocond: or_test | lambdef_nocond +lambdef: 'lambda' [varargslist] ':' test +lambdef_nocond: 'lambda' [varargslist] ':' test_nocond +or_test: and_test ('or' and_test)* +and_test: not_test ('and' not_test)* +not_test: 'not' not_test | comparison +comparison: expr (comp_op expr)* +# <> isn't actually a valid comparison operator in Python. It's here for the +# sake of a __future__ import described in PEP 401 (which really works :-) +comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' +star_expr: '*' expr +expr: xor_expr ('|' xor_expr)* +xor_expr: and_expr ('^' and_expr)* +and_expr: shift_expr ('&' shift_expr)* +shift_expr: arith_expr (('<<'|'>>') arith_expr)* +arith_expr: term (('+'|'-') term)* +term: factor (('*'|'@'|'/'|'%'|'//') factor)* +factor: ('+'|'-'|'~') factor | power +power: atom_expr ['**' factor] +atom_expr: ['await'] atom trailer* +atom: ('(' [yield_expr|testlist_comp] ')' | + '[' [testlist_comp] ']' | + '{' [dictorsetmaker] '}' | + NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False') +strings: STRING+ +testlist_comp: (test|star_expr) ( sync_comp_for | (',' (test|star_expr))* [','] ) +trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME +subscriptlist: subscript (',' subscript)* [','] +subscript: test | [test] ':' [test] [sliceop] +sliceop: ':' [test] +exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] +testlist: test (',' test)* [','] +dictorsetmaker: ( ((test ':' test | '**' expr) + (sync_comp_for | (',' (test ':' test | '**' expr))* [','])) | + ((test | star_expr) + (sync_comp_for | (',' (test | star_expr))* [','])) ) + +classdef: 'class' NAME ['(' [arglist] ')'] ':' suite + +arglist: argument (',' argument)* [','] + +# The reason that keywords are test nodes instead of NAME is that using NAME +# results in an ambiguity. ast.c makes sure it's a NAME. +# "test '=' test" is really "keyword '=' test", but we have no such token. +# These need to be in a single rule to avoid grammar that is ambiguous +# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr, +# we explicitly match '*' here, too, to give it proper precedence. +# Illegal combinations and orderings are blocked in ast.c: +# multiple (test comp_for) arguments are blocked; keyword unpackings +# that precede iterable unpackings are blocked; etc. +argument: ( test [sync_comp_for] | + test '=' test | + '**' test | + '*' test ) + +comp_iter: sync_comp_for | comp_if +sync_comp_for: 'for' exprlist 'in' or_test [comp_iter] +comp_if: 'if' test_nocond [comp_iter] + +# not used in grammar, but may appear in "node" passed from Parser to Compiler +encoding_decl: NAME + +yield_expr: 'yield' [yield_arg] +yield_arg: 'from' test | testlist diff --git a/contrib/python/parso/py2/parso/python/grammar36.txt b/contrib/python/parso/py2/parso/python/grammar36.txt new file mode 100644 index 0000000000..2fec1f9908 --- /dev/null +++ b/contrib/python/parso/py2/parso/python/grammar36.txt @@ -0,0 +1,158 @@ +# Grammar for Python + +# NOTE WELL: You should also follow all the steps listed at +# https://docs.python.org/devguide/grammar.html + +# Start symbols for the grammar: +# single_input is a single interactive statement; +# file_input is a module or sequence of commands read from an input file; +# eval_input is the input for the eval() functions. +# NB: compound_stmt in single_input is followed by extra NEWLINE! +single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE +file_input: stmt* ENDMARKER +eval_input: testlist NEWLINE* ENDMARKER +decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE +decorators: decorator+ +decorated: decorators (classdef | funcdef | async_funcdef) + +# NOTE: Francisco Souza/Reinoud Elhorst, using ASYNC/'await' keywords instead of +# skipping python3.5+ compatibility, in favour of 3.7 solution +async_funcdef: 'async' funcdef +funcdef: 'def' NAME parameters ['->' test] ':' suite + +parameters: '(' [typedargslist] ')' +typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [ + '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] + | '**' tfpdef [',']]] + | '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] + | '**' tfpdef [',']) +tfpdef: NAME [':' test] +varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [ + '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [',']]] + | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [','] +) +vfpdef: NAME + +stmt: simple_stmt | compound_stmt | NEWLINE +simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE +small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | + import_stmt | global_stmt | nonlocal_stmt | assert_stmt) +expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) | + ('=' (yield_expr|testlist_star_expr))*) +annassign: ':' test ['=' test] +testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] +augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | + '<<=' | '>>=' | '**=' | '//=') +# For normal and annotated assignments, additional restrictions enforced by the interpreter +del_stmt: 'del' exprlist +pass_stmt: 'pass' +flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt +break_stmt: 'break' +continue_stmt: 'continue' +return_stmt: 'return' [testlist] +yield_stmt: yield_expr +raise_stmt: 'raise' [test ['from' test]] +import_stmt: import_name | import_from +import_name: 'import' dotted_as_names +# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS +import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) + 'import' ('*' | '(' import_as_names ')' | import_as_names)) +import_as_name: NAME ['as' NAME] +dotted_as_name: dotted_name ['as' NAME] +import_as_names: import_as_name (',' import_as_name)* [','] +dotted_as_names: dotted_as_name (',' dotted_as_name)* +dotted_name: NAME ('.' NAME)* +global_stmt: 'global' NAME (',' NAME)* +nonlocal_stmt: 'nonlocal' NAME (',' NAME)* +assert_stmt: 'assert' test [',' test] + +compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt +async_stmt: 'async' (funcdef | with_stmt | for_stmt) +if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] +while_stmt: 'while' test ':' suite ['else' ':' suite] +for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] +try_stmt: ('try' ':' suite + ((except_clause ':' suite)+ + ['else' ':' suite] + ['finally' ':' suite] | + 'finally' ':' suite)) +with_stmt: 'with' with_item (',' with_item)* ':' suite +with_item: test ['as' expr] +# NB compile.c makes sure that the default except clause is last +except_clause: 'except' [test ['as' NAME]] +suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT + +test: or_test ['if' or_test 'else' test] | lambdef +test_nocond: or_test | lambdef_nocond +lambdef: 'lambda' [varargslist] ':' test +lambdef_nocond: 'lambda' [varargslist] ':' test_nocond +or_test: and_test ('or' and_test)* +and_test: not_test ('and' not_test)* +not_test: 'not' not_test | comparison +comparison: expr (comp_op expr)* +# <> isn't actually a valid comparison operator in Python. It's here for the +# sake of a __future__ import described in PEP 401 (which really works :-) +comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' +star_expr: '*' expr +expr: xor_expr ('|' xor_expr)* +xor_expr: and_expr ('^' and_expr)* +and_expr: shift_expr ('&' shift_expr)* +shift_expr: arith_expr (('<<'|'>>') arith_expr)* +arith_expr: term (('+'|'-') term)* +term: factor (('*'|'@'|'/'|'%'|'//') factor)* +factor: ('+'|'-'|'~') factor | power +power: atom_expr ['**' factor] +atom_expr: ['await'] atom trailer* +atom: ('(' [yield_expr|testlist_comp] ')' | + '[' [testlist_comp] ']' | + '{' [dictorsetmaker] '}' | + NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False') +testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) +trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME +subscriptlist: subscript (',' subscript)* [','] +subscript: test | [test] ':' [test] [sliceop] +sliceop: ':' [test] +exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] +testlist: test (',' test)* [','] +dictorsetmaker: ( ((test ':' test | '**' expr) + (comp_for | (',' (test ':' test | '**' expr))* [','])) | + ((test | star_expr) + (comp_for | (',' (test | star_expr))* [','])) ) + +classdef: 'class' NAME ['(' [arglist] ')'] ':' suite + +arglist: argument (',' argument)* [','] + +# The reason that keywords are test nodes instead of NAME is that using NAME +# results in an ambiguity. ast.c makes sure it's a NAME. +# "test '=' test" is really "keyword '=' test", but we have no such token. +# These need to be in a single rule to avoid grammar that is ambiguous +# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr, +# we explicitly match '*' here, too, to give it proper precedence. +# Illegal combinations and orderings are blocked in ast.c: +# multiple (test comp_for) arguments are blocked; keyword unpackings +# that precede iterable unpackings are blocked; etc. +argument: ( test [comp_for] | + test '=' test | + '**' test | + '*' test ) + +comp_iter: comp_for | comp_if +sync_comp_for: 'for' exprlist 'in' or_test [comp_iter] +comp_for: ['async'] sync_comp_for +comp_if: 'if' test_nocond [comp_iter] + +# not used in grammar, but may appear in "node" passed from Parser to Compiler +encoding_decl: NAME + +yield_expr: 'yield' [yield_arg] +yield_arg: 'from' test | testlist + +strings: (STRING | fstring)+ +fstring: FSTRING_START fstring_content* FSTRING_END +fstring_content: FSTRING_STRING | fstring_expr +fstring_conversion: '!' NAME +fstring_expr: '{' testlist_comp [ fstring_conversion ] [ fstring_format_spec ] '}' +fstring_format_spec: ':' fstring_content* diff --git a/contrib/python/parso/py2/parso/python/grammar37.txt b/contrib/python/parso/py2/parso/python/grammar37.txt new file mode 100644 index 0000000000..8799b84357 --- /dev/null +++ b/contrib/python/parso/py2/parso/python/grammar37.txt @@ -0,0 +1,156 @@ +# Grammar for Python + +# NOTE WELL: You should also follow all the steps listed at +# https://docs.python.org/devguide/grammar.html + +# Start symbols for the grammar: +# single_input is a single interactive statement; +# file_input is a module or sequence of commands read from an input file; +# eval_input is the input for the eval() functions. +# NB: compound_stmt in single_input is followed by extra NEWLINE! +single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE +file_input: stmt* ENDMARKER +eval_input: testlist NEWLINE* ENDMARKER +decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE +decorators: decorator+ +decorated: decorators (classdef | funcdef | async_funcdef) + +async_funcdef: 'async' funcdef +funcdef: 'def' NAME parameters ['->' test] ':' suite + +parameters: '(' [typedargslist] ')' +typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [ + '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] + | '**' tfpdef [',']]] + | '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] + | '**' tfpdef [',']) +tfpdef: NAME [':' test] +varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [ + '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [',']]] + | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [','] +) +vfpdef: NAME + +stmt: simple_stmt | compound_stmt | NEWLINE +simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE +small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | + import_stmt | global_stmt | nonlocal_stmt | assert_stmt) +expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) | + ('=' (yield_expr|testlist_star_expr))*) +annassign: ':' test ['=' test] +testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] +augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | + '<<=' | '>>=' | '**=' | '//=') +# For normal and annotated assignments, additional restrictions enforced by the interpreter +del_stmt: 'del' exprlist +pass_stmt: 'pass' +flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt +break_stmt: 'break' +continue_stmt: 'continue' +return_stmt: 'return' [testlist] +yield_stmt: yield_expr +raise_stmt: 'raise' [test ['from' test]] +import_stmt: import_name | import_from +import_name: 'import' dotted_as_names +# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS +import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) + 'import' ('*' | '(' import_as_names ')' | import_as_names)) +import_as_name: NAME ['as' NAME] +dotted_as_name: dotted_name ['as' NAME] +import_as_names: import_as_name (',' import_as_name)* [','] +dotted_as_names: dotted_as_name (',' dotted_as_name)* +dotted_name: NAME ('.' NAME)* +global_stmt: 'global' NAME (',' NAME)* +nonlocal_stmt: 'nonlocal' NAME (',' NAME)* +assert_stmt: 'assert' test [',' test] + +compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt +async_stmt: 'async' (funcdef | with_stmt | for_stmt) +if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] +while_stmt: 'while' test ':' suite ['else' ':' suite] +for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] +try_stmt: ('try' ':' suite + ((except_clause ':' suite)+ + ['else' ':' suite] + ['finally' ':' suite] | + 'finally' ':' suite)) +with_stmt: 'with' with_item (',' with_item)* ':' suite +with_item: test ['as' expr] +# NB compile.c makes sure that the default except clause is last +except_clause: 'except' [test ['as' NAME]] +suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT + +test: or_test ['if' or_test 'else' test] | lambdef +test_nocond: or_test | lambdef_nocond +lambdef: 'lambda' [varargslist] ':' test +lambdef_nocond: 'lambda' [varargslist] ':' test_nocond +or_test: and_test ('or' and_test)* +and_test: not_test ('and' not_test)* +not_test: 'not' not_test | comparison +comparison: expr (comp_op expr)* +# <> isn't actually a valid comparison operator in Python. It's here for the +# sake of a __future__ import described in PEP 401 (which really works :-) +comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' +star_expr: '*' expr +expr: xor_expr ('|' xor_expr)* +xor_expr: and_expr ('^' and_expr)* +and_expr: shift_expr ('&' shift_expr)* +shift_expr: arith_expr (('<<'|'>>') arith_expr)* +arith_expr: term (('+'|'-') term)* +term: factor (('*'|'@'|'/'|'%'|'//') factor)* +factor: ('+'|'-'|'~') factor | power +power: atom_expr ['**' factor] +atom_expr: ['await'] atom trailer* +atom: ('(' [yield_expr|testlist_comp] ')' | + '[' [testlist_comp] ']' | + '{' [dictorsetmaker] '}' | + NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False') +testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) +trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME +subscriptlist: subscript (',' subscript)* [','] +subscript: test | [test] ':' [test] [sliceop] +sliceop: ':' [test] +exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] +testlist: test (',' test)* [','] +dictorsetmaker: ( ((test ':' test | '**' expr) + (comp_for | (',' (test ':' test | '**' expr))* [','])) | + ((test | star_expr) + (comp_for | (',' (test | star_expr))* [','])) ) + +classdef: 'class' NAME ['(' [arglist] ')'] ':' suite + +arglist: argument (',' argument)* [','] + +# The reason that keywords are test nodes instead of NAME is that using NAME +# results in an ambiguity. ast.c makes sure it's a NAME. +# "test '=' test" is really "keyword '=' test", but we have no such token. +# These need to be in a single rule to avoid grammar that is ambiguous +# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr, +# we explicitly match '*' here, too, to give it proper precedence. +# Illegal combinations and orderings are blocked in ast.c: +# multiple (test comp_for) arguments are blocked; keyword unpackings +# that precede iterable unpackings are blocked; etc. +argument: ( test [comp_for] | + test '=' test | + '**' test | + '*' test ) + +comp_iter: comp_for | comp_if +sync_comp_for: 'for' exprlist 'in' or_test [comp_iter] +comp_for: ['async'] sync_comp_for +comp_if: 'if' test_nocond [comp_iter] + +# not used in grammar, but may appear in "node" passed from Parser to Compiler +encoding_decl: NAME + +yield_expr: 'yield' [yield_arg] +yield_arg: 'from' test | testlist + +strings: (STRING | fstring)+ +fstring: FSTRING_START fstring_content* FSTRING_END +fstring_content: FSTRING_STRING | fstring_expr +fstring_conversion: '!' NAME +fstring_expr: '{' testlist [ fstring_conversion ] [ fstring_format_spec ] '}' +fstring_format_spec: ':' fstring_content* diff --git a/contrib/python/parso/py2/parso/python/grammar38.txt b/contrib/python/parso/py2/parso/python/grammar38.txt new file mode 100644 index 0000000000..3e943099f9 --- /dev/null +++ b/contrib/python/parso/py2/parso/python/grammar38.txt @@ -0,0 +1,171 @@ +# Grammar for Python + +# NOTE WELL: You should also follow all the steps listed at +# https://devguide.python.org/grammar/ + +# Start symbols for the grammar: +# single_input is a single interactive statement; +# file_input is a module or sequence of commands read from an input file; +# eval_input is the input for the eval() functions. +# NB: compound_stmt in single_input is followed by extra NEWLINE! +single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE +file_input: stmt* ENDMARKER +eval_input: testlist NEWLINE* ENDMARKER + +decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE +decorators: decorator+ +decorated: decorators (classdef | funcdef | async_funcdef) + +async_funcdef: 'async' funcdef +funcdef: 'def' NAME parameters ['->' test] ':' suite + +parameters: '(' [typedargslist] ')' +typedargslist: ( + (tfpdef ['=' test] (',' tfpdef ['=' test])* ',' '/' [',' [ tfpdef ['=' test] ( + ',' tfpdef ['=' test])* ([',' [ + '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] + | '**' tfpdef [',']]]) + | '*' [tfpdef] (',' tfpdef ['=' test])* ([',' ['**' tfpdef [',']]]) + | '**' tfpdef [',']]] ) +| (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [ + '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] + | '**' tfpdef [',']]] + | '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] + | '**' tfpdef [',']) +) +tfpdef: NAME [':' test] +varargslist: vfpdef ['=' test ](',' vfpdef ['=' test])* ',' '/' [',' [ (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [ + '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [',']]] + | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [',']) ]] | (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [ + '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [',']]] + | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [','] +) +vfpdef: NAME + +stmt: simple_stmt | compound_stmt | NEWLINE +simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE +small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | + import_stmt | global_stmt | nonlocal_stmt | assert_stmt) +expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) | + ('=' (yield_expr|testlist_star_expr))*) +annassign: ':' test ['=' (yield_expr|testlist_star_expr)] +testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] +augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | + '<<=' | '>>=' | '**=' | '//=') +# For normal and annotated assignments, additional restrictions enforced by the interpreter +del_stmt: 'del' exprlist +pass_stmt: 'pass' +flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt +break_stmt: 'break' +continue_stmt: 'continue' +return_stmt: 'return' [testlist_star_expr] +yield_stmt: yield_expr +raise_stmt: 'raise' [test ['from' test]] +import_stmt: import_name | import_from +import_name: 'import' dotted_as_names +# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS +import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) + 'import' ('*' | '(' import_as_names ')' | import_as_names)) +import_as_name: NAME ['as' NAME] +dotted_as_name: dotted_name ['as' NAME] +import_as_names: import_as_name (',' import_as_name)* [','] +dotted_as_names: dotted_as_name (',' dotted_as_name)* +dotted_name: NAME ('.' NAME)* +global_stmt: 'global' NAME (',' NAME)* +nonlocal_stmt: 'nonlocal' NAME (',' NAME)* +assert_stmt: 'assert' test [',' test] + +compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt +async_stmt: 'async' (funcdef | with_stmt | for_stmt) +if_stmt: 'if' namedexpr_test ':' suite ('elif' namedexpr_test ':' suite)* ['else' ':' suite] +while_stmt: 'while' namedexpr_test ':' suite ['else' ':' suite] +for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] +try_stmt: ('try' ':' suite + ((except_clause ':' suite)+ + ['else' ':' suite] + ['finally' ':' suite] | + 'finally' ':' suite)) +with_stmt: 'with' with_item (',' with_item)* ':' suite +with_item: test ['as' expr] +# NB compile.c makes sure that the default except clause is last +except_clause: 'except' [test ['as' NAME]] +suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT + +namedexpr_test: test [':=' test] +test: or_test ['if' or_test 'else' test] | lambdef +test_nocond: or_test | lambdef_nocond +lambdef: 'lambda' [varargslist] ':' test +lambdef_nocond: 'lambda' [varargslist] ':' test_nocond +or_test: and_test ('or' and_test)* +and_test: not_test ('and' not_test)* +not_test: 'not' not_test | comparison +comparison: expr (comp_op expr)* +# <> isn't actually a valid comparison operator in Python. It's here for the +# sake of a __future__ import described in PEP 401 (which really works :-) +comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' +star_expr: '*' expr +expr: xor_expr ('|' xor_expr)* +xor_expr: and_expr ('^' and_expr)* +and_expr: shift_expr ('&' shift_expr)* +shift_expr: arith_expr (('<<'|'>>') arith_expr)* +arith_expr: term (('+'|'-') term)* +term: factor (('*'|'@'|'/'|'%'|'//') factor)* +factor: ('+'|'-'|'~') factor | power +power: atom_expr ['**' factor] +atom_expr: ['await'] atom trailer* +atom: ('(' [yield_expr|testlist_comp] ')' | + '[' [testlist_comp] ']' | + '{' [dictorsetmaker] '}' | + NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False') +testlist_comp: (namedexpr_test|star_expr) ( comp_for | (',' (namedexpr_test|star_expr))* [','] ) +trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME +subscriptlist: subscript (',' subscript)* [','] +subscript: test | [test] ':' [test] [sliceop] +sliceop: ':' [test] +exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] +testlist: test (',' test)* [','] +dictorsetmaker: ( ((test ':' test | '**' expr) + (comp_for | (',' (test ':' test | '**' expr))* [','])) | + ((test | star_expr) + (comp_for | (',' (test | star_expr))* [','])) ) + +classdef: 'class' NAME ['(' [arglist] ')'] ':' suite + +arglist: argument (',' argument)* [','] + +# The reason that keywords are test nodes instead of NAME is that using NAME +# results in an ambiguity. ast.c makes sure it's a NAME. +# "test '=' test" is really "keyword '=' test", but we have no such token. +# These need to be in a single rule to avoid grammar that is ambiguous +# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr, +# we explicitly match '*' here, too, to give it proper precedence. +# Illegal combinations and orderings are blocked in ast.c: +# multiple (test comp_for) arguments are blocked; keyword unpackings +# that precede iterable unpackings are blocked; etc. +argument: ( test [comp_for] | + test ':=' test | + test '=' test | + '**' test | + '*' test ) + +comp_iter: comp_for | comp_if +sync_comp_for: 'for' exprlist 'in' or_test [comp_iter] +comp_for: ['async'] sync_comp_for +comp_if: 'if' test_nocond [comp_iter] + +# not used in grammar, but may appear in "node" passed from Parser to Compiler +encoding_decl: NAME + +yield_expr: 'yield' [yield_arg] +yield_arg: 'from' test | testlist_star_expr + +strings: (STRING | fstring)+ +fstring: FSTRING_START fstring_content* FSTRING_END +fstring_content: FSTRING_STRING | fstring_expr +fstring_conversion: '!' NAME +fstring_expr: '{' testlist ['='] [ fstring_conversion ] [ fstring_format_spec ] '}' +fstring_format_spec: ':' fstring_content* diff --git a/contrib/python/parso/py2/parso/python/grammar39.txt b/contrib/python/parso/py2/parso/python/grammar39.txt new file mode 100644 index 0000000000..3c39bb51b4 --- /dev/null +++ b/contrib/python/parso/py2/parso/python/grammar39.txt @@ -0,0 +1,171 @@ +# Grammar for Python + +# NOTE WELL: You should also follow all the steps listed at +# https://devguide.python.org/grammar/ + +# Start symbols for the grammar: +# single_input is a single interactive statement; +# file_input is a module or sequence of commands read from an input file; +# eval_input is the input for the eval() functions. +# NB: compound_stmt in single_input is followed by extra NEWLINE! +single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE +file_input: stmt* ENDMARKER +eval_input: testlist NEWLINE* ENDMARKER + +decorator: '@' namedexpr_test NEWLINE +decorators: decorator+ +decorated: decorators (classdef | funcdef | async_funcdef) + +async_funcdef: 'async' funcdef +funcdef: 'def' NAME parameters ['->' test] ':' suite + +parameters: '(' [typedargslist] ')' +typedargslist: ( + (tfpdef ['=' test] (',' tfpdef ['=' test])* ',' '/' [',' [ tfpdef ['=' test] ( + ',' tfpdef ['=' test])* ([',' [ + '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] + | '**' tfpdef [',']]]) + | '*' [tfpdef] (',' tfpdef ['=' test])* ([',' ['**' tfpdef [',']]]) + | '**' tfpdef [',']]] ) +| (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [ + '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] + | '**' tfpdef [',']]] + | '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] + | '**' tfpdef [',']) +) +tfpdef: NAME [':' test] +varargslist: vfpdef ['=' test ](',' vfpdef ['=' test])* ',' '/' [',' [ (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [ + '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [',']]] + | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [',']) ]] | (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [ + '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [',']]] + | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [','] +) +vfpdef: NAME + +stmt: simple_stmt | compound_stmt | NEWLINE +simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE +small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | + import_stmt | global_stmt | nonlocal_stmt | assert_stmt) +expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) | + ('=' (yield_expr|testlist_star_expr))*) +annassign: ':' test ['=' (yield_expr|testlist_star_expr)] +testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] +augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | + '<<=' | '>>=' | '**=' | '//=') +# For normal and annotated assignments, additional restrictions enforced by the interpreter +del_stmt: 'del' exprlist +pass_stmt: 'pass' +flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt +break_stmt: 'break' +continue_stmt: 'continue' +return_stmt: 'return' [testlist_star_expr] +yield_stmt: yield_expr +raise_stmt: 'raise' [test ['from' test]] +import_stmt: import_name | import_from +import_name: 'import' dotted_as_names +# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS +import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) + 'import' ('*' | '(' import_as_names ')' | import_as_names)) +import_as_name: NAME ['as' NAME] +dotted_as_name: dotted_name ['as' NAME] +import_as_names: import_as_name (',' import_as_name)* [','] +dotted_as_names: dotted_as_name (',' dotted_as_name)* +dotted_name: NAME ('.' NAME)* +global_stmt: 'global' NAME (',' NAME)* +nonlocal_stmt: 'nonlocal' NAME (',' NAME)* +assert_stmt: 'assert' test [',' test] + +compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt +async_stmt: 'async' (funcdef | with_stmt | for_stmt) +if_stmt: 'if' namedexpr_test ':' suite ('elif' namedexpr_test ':' suite)* ['else' ':' suite] +while_stmt: 'while' namedexpr_test ':' suite ['else' ':' suite] +for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] +try_stmt: ('try' ':' suite + ((except_clause ':' suite)+ + ['else' ':' suite] + ['finally' ':' suite] | + 'finally' ':' suite)) +with_stmt: 'with' with_item (',' with_item)* ':' suite +with_item: test ['as' expr] +# NB compile.c makes sure that the default except clause is last +except_clause: 'except' [test ['as' NAME]] +suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT + +namedexpr_test: test [':=' test] +test: or_test ['if' or_test 'else' test] | lambdef +test_nocond: or_test | lambdef_nocond +lambdef: 'lambda' [varargslist] ':' test +lambdef_nocond: 'lambda' [varargslist] ':' test_nocond +or_test: and_test ('or' and_test)* +and_test: not_test ('and' not_test)* +not_test: 'not' not_test | comparison +comparison: expr (comp_op expr)* +# <> isn't actually a valid comparison operator in Python. It's here for the +# sake of a __future__ import described in PEP 401 (which really works :-) +comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' +star_expr: '*' expr +expr: xor_expr ('|' xor_expr)* +xor_expr: and_expr ('^' and_expr)* +and_expr: shift_expr ('&' shift_expr)* +shift_expr: arith_expr (('<<'|'>>') arith_expr)* +arith_expr: term (('+'|'-') term)* +term: factor (('*'|'@'|'/'|'%'|'//') factor)* +factor: ('+'|'-'|'~') factor | power +power: atom_expr ['**' factor] +atom_expr: ['await'] atom trailer* +atom: ('(' [yield_expr|testlist_comp] ')' | + '[' [testlist_comp] ']' | + '{' [dictorsetmaker] '}' | + NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False') +testlist_comp: (namedexpr_test|star_expr) ( comp_for | (',' (namedexpr_test|star_expr))* [','] ) +trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME +subscriptlist: subscript (',' subscript)* [','] +subscript: test | [test] ':' [test] [sliceop] +sliceop: ':' [test] +exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] +testlist: test (',' test)* [','] +dictorsetmaker: ( ((test ':' test | '**' expr) + (comp_for | (',' (test ':' test | '**' expr))* [','])) | + ((test | star_expr) + (comp_for | (',' (test | star_expr))* [','])) ) + +classdef: 'class' NAME ['(' [arglist] ')'] ':' suite + +arglist: argument (',' argument)* [','] + +# The reason that keywords are test nodes instead of NAME is that using NAME +# results in an ambiguity. ast.c makes sure it's a NAME. +# "test '=' test" is really "keyword '=' test", but we have no such token. +# These need to be in a single rule to avoid grammar that is ambiguous +# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr, +# we explicitly match '*' here, too, to give it proper precedence. +# Illegal combinations and orderings are blocked in ast.c: +# multiple (test comp_for) arguments are blocked; keyword unpackings +# that precede iterable unpackings are blocked; etc. +argument: ( test [comp_for] | + test ':=' test | + test '=' test | + '**' test | + '*' test ) + +comp_iter: comp_for | comp_if +sync_comp_for: 'for' exprlist 'in' or_test [comp_iter] +comp_for: ['async'] sync_comp_for +comp_if: 'if' test_nocond [comp_iter] + +# not used in grammar, but may appear in "node" passed from Parser to Compiler +encoding_decl: NAME + +yield_expr: 'yield' [yield_arg] +yield_arg: 'from' test | testlist_star_expr + +strings: (STRING | fstring)+ +fstring: FSTRING_START fstring_content* FSTRING_END +fstring_content: FSTRING_STRING | fstring_expr +fstring_conversion: '!' NAME +fstring_expr: '{' testlist ['='] [ fstring_conversion ] [ fstring_format_spec ] '}' +fstring_format_spec: ':' fstring_content* diff --git a/contrib/python/parso/py2/parso/python/parser.py b/contrib/python/parso/py2/parso/python/parser.py new file mode 100644 index 0000000000..ada60fbb0f --- /dev/null +++ b/contrib/python/parso/py2/parso/python/parser.py @@ -0,0 +1,217 @@ +from parso.python import tree +from parso.python.token import PythonTokenTypes +from parso.parser import BaseParser + + +NAME = PythonTokenTypes.NAME +INDENT = PythonTokenTypes.INDENT +DEDENT = PythonTokenTypes.DEDENT + + +class Parser(BaseParser): + """ + This class is used to parse a Python file, it then divides them into a + class structure of different scopes. + + :param pgen_grammar: The grammar object of pgen2. Loaded by load_grammar. + """ + + node_map = { + 'expr_stmt': tree.ExprStmt, + 'classdef': tree.Class, + 'funcdef': tree.Function, + 'file_input': tree.Module, + 'import_name': tree.ImportName, + 'import_from': tree.ImportFrom, + 'break_stmt': tree.KeywordStatement, + 'continue_stmt': tree.KeywordStatement, + 'return_stmt': tree.ReturnStmt, + 'raise_stmt': tree.KeywordStatement, + 'yield_expr': tree.YieldExpr, + 'del_stmt': tree.KeywordStatement, + 'pass_stmt': tree.KeywordStatement, + 'global_stmt': tree.GlobalStmt, + 'nonlocal_stmt': tree.KeywordStatement, + 'print_stmt': tree.KeywordStatement, + 'assert_stmt': tree.AssertStmt, + 'if_stmt': tree.IfStmt, + 'with_stmt': tree.WithStmt, + 'for_stmt': tree.ForStmt, + 'while_stmt': tree.WhileStmt, + 'try_stmt': tree.TryStmt, + 'sync_comp_for': tree.SyncCompFor, + # Not sure if this is the best idea, but IMO it's the easiest way to + # avoid extreme amounts of work around the subtle difference of 2/3 + # grammar in list comoprehensions. + 'list_for': tree.SyncCompFor, + 'decorator': tree.Decorator, + 'lambdef': tree.Lambda, + 'old_lambdef': tree.Lambda, + 'lambdef_nocond': tree.Lambda, + } + default_node = tree.PythonNode + + # Names/Keywords are handled separately + _leaf_map = { + PythonTokenTypes.STRING: tree.String, + PythonTokenTypes.NUMBER: tree.Number, + PythonTokenTypes.NEWLINE: tree.Newline, + PythonTokenTypes.ENDMARKER: tree.EndMarker, + PythonTokenTypes.FSTRING_STRING: tree.FStringString, + PythonTokenTypes.FSTRING_START: tree.FStringStart, + PythonTokenTypes.FSTRING_END: tree.FStringEnd, + } + + def __init__(self, pgen_grammar, error_recovery=True, start_nonterminal='file_input'): + super(Parser, self).__init__(pgen_grammar, start_nonterminal, + error_recovery=error_recovery) + + self.syntax_errors = [] + self._omit_dedent_list = [] + self._indent_counter = 0 + + def parse(self, tokens): + if self._error_recovery: + if self._start_nonterminal != 'file_input': + raise NotImplementedError + + tokens = self._recovery_tokenize(tokens) + + return super(Parser, self).parse(tokens) + + def convert_node(self, nonterminal, children): + """ + Convert raw node information to a PythonBaseNode instance. + + This is passed to the parser driver which calls it whenever a reduction of a + grammar rule produces a new complete node, so that the tree is build + strictly bottom-up. + """ + try: + node = self.node_map[nonterminal](children) + except KeyError: + if nonterminal == 'suite': + # We don't want the INDENT/DEDENT in our parser tree. Those + # leaves are just cancer. They are virtual leaves and not real + # ones and therefore have pseudo start/end positions and no + # prefixes. Just ignore them. + children = [children[0]] + children[2:-1] + elif nonterminal == 'list_if': + # Make transitioning from 2 to 3 easier. + nonterminal = 'comp_if' + elif nonterminal == 'listmaker': + # Same as list_if above. + nonterminal = 'testlist_comp' + node = self.default_node(nonterminal, children) + for c in children: + c.parent = node + return node + + def convert_leaf(self, type, value, prefix, start_pos): + # print('leaf', repr(value), token.tok_name[type]) + if type == NAME: + if value in self._pgen_grammar.reserved_syntax_strings: + return tree.Keyword(value, start_pos, prefix) + else: + return tree.Name(value, start_pos, prefix) + + return self._leaf_map.get(type, tree.Operator)(value, start_pos, prefix) + + def error_recovery(self, token): + tos_nodes = self.stack[-1].nodes + if tos_nodes: + last_leaf = tos_nodes[-1].get_last_leaf() + else: + last_leaf = None + + if self._start_nonterminal == 'file_input' and \ + (token.type == PythonTokenTypes.ENDMARKER + or token.type == DEDENT and not last_leaf.value.endswith('\n') + and not last_leaf.value.endswith('\r')): + # In Python statements need to end with a newline. But since it's + # possible (and valid in Python) that there's no newline at the + # end of a file, we have to recover even if the user doesn't want + # error recovery. + if self.stack[-1].dfa.from_rule == 'simple_stmt': + try: + plan = self.stack[-1].dfa.transitions[PythonTokenTypes.NEWLINE] + except KeyError: + pass + else: + if plan.next_dfa.is_final and not plan.dfa_pushes: + # We are ignoring here that the newline would be + # required for a simple_stmt. + self.stack[-1].dfa = plan.next_dfa + self._add_token(token) + return + + if not self._error_recovery: + return super(Parser, self).error_recovery(token) + + def current_suite(stack): + # For now just discard everything that is not a suite or + # file_input, if we detect an error. + for until_index, stack_node in reversed(list(enumerate(stack))): + # `suite` can sometimes be only simple_stmt, not stmt. + if stack_node.nonterminal == 'file_input': + break + elif stack_node.nonterminal == 'suite': + # In the case where we just have a newline we don't want to + # do error recovery here. In all other cases, we want to do + # error recovery. + if len(stack_node.nodes) != 1: + break + return until_index + + until_index = current_suite(self.stack) + + if self._stack_removal(until_index + 1): + self._add_token(token) + else: + typ, value, start_pos, prefix = token + if typ == INDENT: + # For every deleted INDENT we have to delete a DEDENT as well. + # Otherwise the parser will get into trouble and DEDENT too early. + self._omit_dedent_list.append(self._indent_counter) + + error_leaf = tree.PythonErrorLeaf(typ.name, value, start_pos, prefix) + self.stack[-1].nodes.append(error_leaf) + + tos = self.stack[-1] + if tos.nonterminal == 'suite': + # Need at least one statement in the suite. This happend with the + # error recovery above. + try: + tos.dfa = tos.dfa.arcs['stmt'] + except KeyError: + # We're already in a final state. + pass + + def _stack_removal(self, start_index): + all_nodes = [node for stack_node in self.stack[start_index:] for node in stack_node.nodes] + + if all_nodes: + node = tree.PythonErrorNode(all_nodes) + for n in all_nodes: + n.parent = node + self.stack[start_index - 1].nodes.append(node) + + self.stack[start_index:] = [] + return bool(all_nodes) + + def _recovery_tokenize(self, tokens): + for token in tokens: + typ = token[0] + if typ == DEDENT: + # We need to count indents, because if we just omit any DEDENT, + # we might omit them in the wrong place. + o = self._omit_dedent_list + if o and o[-1] == self._indent_counter: + o.pop() + self._indent_counter -= 1 + continue + + self._indent_counter -= 1 + elif typ == INDENT: + self._indent_counter += 1 + yield token diff --git a/contrib/python/parso/py2/parso/python/pep8.py b/contrib/python/parso/py2/parso/python/pep8.py new file mode 100644 index 0000000000..2a037f9c2c --- /dev/null +++ b/contrib/python/parso/py2/parso/python/pep8.py @@ -0,0 +1,727 @@ +import re +from contextlib import contextmanager + +from parso.python.errors import ErrorFinder, ErrorFinderConfig +from parso.normalizer import Rule +from parso.python.tree import search_ancestor, Flow, Scope + + +_IMPORT_TYPES = ('import_name', 'import_from') +_SUITE_INTRODUCERS = ('classdef', 'funcdef', 'if_stmt', 'while_stmt', + 'for_stmt', 'try_stmt', 'with_stmt') +_NON_STAR_TYPES = ('term', 'import_from', 'power') +_OPENING_BRACKETS = '(', '[', '{' +_CLOSING_BRACKETS = ')', ']', '}' +_FACTOR = '+', '-', '~' +_ALLOW_SPACE = '*', '+', '-', '**', '/', '//', '@' +_BITWISE_OPERATOR = '<<', '>>', '|', '&', '^' +_NEEDS_SPACE = ('=', '%', '->', + '<', '>', '==', '>=', '<=', '<>', '!=', + '+=', '-=', '*=', '@=', '/=', '%=', '&=', '|=', '^=', '<<=', + '>>=', '**=', '//=') +_NEEDS_SPACE += _BITWISE_OPERATOR +_IMPLICIT_INDENTATION_TYPES = ('dictorsetmaker', 'argument') +_POSSIBLE_SLICE_PARENTS = ('subscript', 'subscriptlist', 'sliceop') + + +class IndentationTypes(object): + VERTICAL_BRACKET = object() + HANGING_BRACKET = object() + BACKSLASH = object() + SUITE = object() + IMPLICIT = object() + + +class IndentationNode(object): + type = IndentationTypes.SUITE + + def __init__(self, config, indentation, parent=None): + self.bracket_indentation = self.indentation = indentation + self.parent = parent + + def __repr__(self): + return '<%s>' % self.__class__.__name__ + + def get_latest_suite_node(self): + n = self + while n is not None: + if n.type == IndentationTypes.SUITE: + return n + + n = n.parent + + +class BracketNode(IndentationNode): + def __init__(self, config, leaf, parent, in_suite_introducer=False): + self.leaf = leaf + + # Figure out here what the indentation is. For chained brackets + # we can basically use the previous indentation. + previous_leaf = leaf + n = parent + if n.type == IndentationTypes.IMPLICIT: + n = n.parent + while True: + if hasattr(n, 'leaf') and previous_leaf.line != n.leaf.line: + break + + previous_leaf = previous_leaf.get_previous_leaf() + if not isinstance(n, BracketNode) or previous_leaf != n.leaf: + break + n = n.parent + parent_indentation = n.indentation + + + next_leaf = leaf.get_next_leaf() + if '\n' in next_leaf.prefix: + # This implies code like: + # foobarbaz( + # a, + # b, + # ) + self.bracket_indentation = parent_indentation \ + + config.closing_bracket_hanging_indentation + self.indentation = parent_indentation + config.indentation + self.type = IndentationTypes.HANGING_BRACKET + else: + # Implies code like: + # foobarbaz( + # a, + # b, + # ) + expected_end_indent = leaf.end_pos[1] + if '\t' in config.indentation: + self.indentation = None + else: + self.indentation = ' ' * expected_end_indent + self.bracket_indentation = self.indentation + self.type = IndentationTypes.VERTICAL_BRACKET + + if in_suite_introducer and parent.type == IndentationTypes.SUITE \ + and self.indentation == parent_indentation + config.indentation: + self.indentation += config.indentation + # The closing bracket should have the same indentation. + self.bracket_indentation = self.indentation + self.parent = parent + + +class ImplicitNode(BracketNode): + """ + Implicit indentation after keyword arguments, default arguments, + annotations and dict values. + """ + def __init__(self, config, leaf, parent): + super(ImplicitNode, self).__init__(config, leaf, parent) + self.type = IndentationTypes.IMPLICIT + + next_leaf = leaf.get_next_leaf() + if leaf == ':' and '\n' not in next_leaf.prefix: + self.indentation += ' ' + + +class BackslashNode(IndentationNode): + type = IndentationTypes.BACKSLASH + + def __init__(self, config, parent_indentation, containing_leaf, spacing, parent=None): + expr_stmt = search_ancestor(containing_leaf, 'expr_stmt') + if expr_stmt is not None: + equals = expr_stmt.children[-2] + + if '\t' in config.indentation: + # TODO unite with the code of BracketNode + self.indentation = None + else: + # If the backslash follows the equals, use normal indentation + # otherwise it should align with the equals. + if equals.end_pos == spacing.start_pos: + self.indentation = parent_indentation + config.indentation + else: + # +1 because there is a space. + self.indentation = ' ' * (equals.end_pos[1] + 1) + else: + self.indentation = parent_indentation + config.indentation + self.bracket_indentation = self.indentation + self.parent = parent + + +def _is_magic_name(name): + return name.value.startswith('__') and name.value.endswith('__') + + +class PEP8Normalizer(ErrorFinder): + def __init__(self, *args, **kwargs): + super(PEP8Normalizer, self).__init__(*args, **kwargs) + self._previous_part = None + self._previous_leaf = None + self._on_newline = True + self._newline_count = 0 + self._wanted_newline_count = None + self._max_new_lines_in_prefix = 0 + self._new_statement = True + self._implicit_indentation_possible = False + # The top of stack of the indentation nodes. + self._indentation_tos = self._last_indentation_tos = \ + IndentationNode(self._config, indentation='') + self._in_suite_introducer = False + + if ' ' in self._config.indentation: + self._indentation_type = 'spaces' + self._wrong_indentation_char = '\t' + else: + self._indentation_type = 'tabs' + self._wrong_indentation_char = ' ' + + @contextmanager + def visit_node(self, node): + with super(PEP8Normalizer, self).visit_node(node): + with self._visit_node(node): + yield + + @contextmanager + def _visit_node(self, node): + typ = node.type + + if typ in 'import_name': + names = node.get_defined_names() + if len(names) > 1: + for name in names[:1]: + self.add_issue(name, 401, 'Multiple imports on one line') + elif typ == 'lambdef': + expr_stmt = node.parent + # Check if it's simply defining a single name, not something like + # foo.bar or x[1], where using a lambda could make more sense. + if expr_stmt.type == 'expr_stmt' and any(n.type == 'name' for n in expr_stmt.children[:-2:2]): + self.add_issue(node, 731, 'Do not assign a lambda expression, use a def') + elif typ == 'try_stmt': + for child in node.children: + # Here we can simply check if it's an except, because otherwise + # it would be an except_clause. + if child.type == 'keyword' and child.value == 'except': + self.add_issue(child, 722, 'Do not use bare except, specify exception instead') + elif typ == 'comparison': + for child in node.children: + if child.type not in ('atom_expr', 'power'): + continue + if len(child.children) > 2: + continue + trailer = child.children[1] + atom = child.children[0] + if trailer.type == 'trailer' and atom.type == 'name' \ + and atom.value == 'type': + self.add_issue(node, 721, "Do not compare types, use 'isinstance()") + break + elif typ == 'file_input': + endmarker = node.children[-1] + prev = endmarker.get_previous_leaf() + prefix = endmarker.prefix + if (not prefix.endswith('\n') and ( + prefix or prev is None or prev.value != '\n')): + self.add_issue(endmarker, 292, "No newline at end of file") + + if typ in _IMPORT_TYPES: + simple_stmt = node.parent + module = simple_stmt.parent + #if module.type == 'simple_stmt': + if module.type == 'file_input': + index = module.children.index(simple_stmt) + for child in module.children[:index]: + children = [child] + if child.type == 'simple_stmt': + # Remove the newline. + children = child.children[:-1] + + found_docstring = False + for c in children: + if c.type == 'string' and not found_docstring: + continue + found_docstring = True + + if c.type == 'expr_stmt' and \ + all(_is_magic_name(n) for n in c.get_defined_names()): + continue + + if c.type in _IMPORT_TYPES or isinstance(c, Flow): + continue + + self.add_issue(node, 402, 'Module level import not at top of file') + break + else: + continue + break + + implicit_indentation_possible = typ in _IMPLICIT_INDENTATION_TYPES + in_introducer = typ in _SUITE_INTRODUCERS + if in_introducer: + self._in_suite_introducer = True + elif typ == 'suite': + if self._indentation_tos.type == IndentationTypes.BACKSLASH: + self._indentation_tos = self._indentation_tos.parent + + self._indentation_tos = IndentationNode( + self._config, + self._indentation_tos.indentation + self._config.indentation, + parent=self._indentation_tos + ) + elif implicit_indentation_possible: + self._implicit_indentation_possible = True + yield + if typ == 'suite': + assert self._indentation_tos.type == IndentationTypes.SUITE + self._indentation_tos = self._indentation_tos.parent + # If we dedent, no lines are needed anymore. + self._wanted_newline_count = None + elif implicit_indentation_possible: + self._implicit_indentation_possible = False + if self._indentation_tos.type == IndentationTypes.IMPLICIT: + self._indentation_tos = self._indentation_tos.parent + elif in_introducer: + self._in_suite_introducer = False + if typ in ('classdef', 'funcdef'): + self._wanted_newline_count = self._get_wanted_blank_lines_count() + + def _check_tabs_spaces(self, spacing): + if self._wrong_indentation_char in spacing.value: + self.add_issue(spacing, 101, 'Indentation contains ' + self._indentation_type) + return True + return False + + def _get_wanted_blank_lines_count(self): + suite_node = self._indentation_tos.get_latest_suite_node() + return int(suite_node.parent is None) + 1 + + def _reset_newlines(self, spacing, leaf, is_comment=False): + self._max_new_lines_in_prefix = \ + max(self._max_new_lines_in_prefix, self._newline_count) + + wanted = self._wanted_newline_count + if wanted is not None: + # Need to substract one + blank_lines = self._newline_count - 1 + if wanted > blank_lines and leaf.type != 'endmarker': + # In case of a comment we don't need to add the issue, yet. + if not is_comment: + # TODO end_pos wrong. + code = 302 if wanted == 2 else 301 + message = "expected %s blank line, found %s" \ + % (wanted, blank_lines) + self.add_issue(spacing, code, message) + self._wanted_newline_count = None + else: + self._wanted_newline_count = None + + if not is_comment: + wanted = self._get_wanted_blank_lines_count() + actual = self._max_new_lines_in_prefix - 1 + + val = leaf.value + needs_lines = ( + val == '@' and leaf.parent.type == 'decorator' + or ( + val == 'class' + or val == 'async' and leaf.get_next_leaf() == 'def' + or val == 'def' and self._previous_leaf != 'async' + ) and leaf.parent.parent.type != 'decorated' + ) + if needs_lines and actual < wanted: + func_or_cls = leaf.parent + suite = func_or_cls.parent + if suite.type == 'decorated': + suite = suite.parent + + # The first leaf of a file or a suite should not need blank + # lines. + if suite.children[int(suite.type == 'suite')] != func_or_cls: + code = 302 if wanted == 2 else 301 + message = "expected %s blank line, found %s" \ + % (wanted, actual) + self.add_issue(spacing, code, message) + + self._max_new_lines_in_prefix = 0 + + self._newline_count = 0 + + def visit_leaf(self, leaf): + super(PEP8Normalizer, self).visit_leaf(leaf) + for part in leaf._split_prefix(): + if part.type == 'spacing': + # This part is used for the part call after for. + break + self._visit_part(part, part.create_spacing_part(), leaf) + + self._analyse_non_prefix(leaf) + self._visit_part(leaf, part, leaf) + + # Cleanup + self._last_indentation_tos = self._indentation_tos + + self._new_statement = leaf.type == 'newline' + + # TODO does this work? with brackets and stuff? + if leaf.type == 'newline' and \ + self._indentation_tos.type == IndentationTypes.BACKSLASH: + self._indentation_tos = self._indentation_tos.parent + + if leaf.value == ':' and leaf.parent.type in _SUITE_INTRODUCERS: + self._in_suite_introducer = False + elif leaf.value == 'elif': + self._in_suite_introducer = True + + if not self._new_statement: + self._reset_newlines(part, leaf) + self._max_blank_lines = 0 + + self._previous_leaf = leaf + + return leaf.value + + def _visit_part(self, part, spacing, leaf): + value = part.value + type_ = part.type + if type_ == 'error_leaf': + return + + if value == ',' and part.parent.type == 'dictorsetmaker': + self._indentation_tos = self._indentation_tos.parent + + node = self._indentation_tos + + if type_ == 'comment': + if value.startswith('##'): + # Whole blocks of # should not raise an error. + if value.lstrip('#'): + self.add_issue(part, 266, "Too many leading '#' for block comment.") + elif self._on_newline: + if not re.match(r'#:? ', value) and not value == '#' \ + and not (value.startswith('#!') and part.start_pos == (1, 0)): + self.add_issue(part, 265, "Block comment should start with '# '") + else: + if not re.match(r'#:? [^ ]', value): + self.add_issue(part, 262, "Inline comment should start with '# '") + + self._reset_newlines(spacing, leaf, is_comment=True) + elif type_ == 'newline': + if self._newline_count > self._get_wanted_blank_lines_count(): + self.add_issue(part, 303, "Too many blank lines (%s)" % self._newline_count) + elif leaf in ('def', 'class') \ + and leaf.parent.parent.type == 'decorated': + self.add_issue(part, 304, "Blank lines found after function decorator") + + + self._newline_count += 1 + + if type_ == 'backslash': + # TODO is this enough checking? What about ==? + if node.type != IndentationTypes.BACKSLASH: + if node.type != IndentationTypes.SUITE: + self.add_issue(part, 502, 'The backslash is redundant between brackets') + else: + indentation = node.indentation + if self._in_suite_introducer and node.type == IndentationTypes.SUITE: + indentation += self._config.indentation + + self._indentation_tos = BackslashNode( + self._config, + indentation, + part, + spacing, + parent=self._indentation_tos + ) + elif self._on_newline: + indentation = spacing.value + if node.type == IndentationTypes.BACKSLASH \ + and self._previous_part.type == 'newline': + self._indentation_tos = self._indentation_tos.parent + + if not self._check_tabs_spaces(spacing): + should_be_indentation = node.indentation + if type_ == 'comment': + # Comments can be dedented. So we have to care for that. + n = self._last_indentation_tos + while True: + if len(indentation) > len(n.indentation): + break + + should_be_indentation = n.indentation + + self._last_indentation_tos = n + if n == node: + break + n = n.parent + + if self._new_statement: + if type_ == 'newline': + if indentation: + self.add_issue(spacing, 291, 'Trailing whitespace') + elif indentation != should_be_indentation: + s = '%s %s' % (len(self._config.indentation), self._indentation_type) + self.add_issue(part, 111, 'Indentation is not a multiple of ' + s) + else: + if value in '])}': + should_be_indentation = node.bracket_indentation + else: + should_be_indentation = node.indentation + if self._in_suite_introducer and indentation == \ + node.get_latest_suite_node().indentation \ + + self._config.indentation: + self.add_issue(part, 129, "Line with same indent as next logical block") + elif indentation != should_be_indentation: + if not self._check_tabs_spaces(spacing) and part.value != '\n': + if value in '])}': + if node.type == IndentationTypes.VERTICAL_BRACKET: + self.add_issue(part, 124, "Closing bracket does not match visual indentation") + else: + self.add_issue(part, 123, "Losing bracket does not match indentation of opening bracket's line") + else: + if len(indentation) < len(should_be_indentation): + if node.type == IndentationTypes.VERTICAL_BRACKET: + self.add_issue(part, 128, 'Continuation line under-indented for visual indent') + elif node.type == IndentationTypes.BACKSLASH: + self.add_issue(part, 122, 'Continuation line missing indentation or outdented') + elif node.type == IndentationTypes.IMPLICIT: + self.add_issue(part, 135, 'xxx') + else: + self.add_issue(part, 121, 'Continuation line under-indented for hanging indent') + else: + if node.type == IndentationTypes.VERTICAL_BRACKET: + self.add_issue(part, 127, 'Continuation line over-indented for visual indent') + elif node.type == IndentationTypes.IMPLICIT: + self.add_issue(part, 136, 'xxx') + else: + self.add_issue(part, 126, 'Continuation line over-indented for hanging indent') + else: + self._check_spacing(part, spacing) + + self._check_line_length(part, spacing) + # ------------------------------- + # Finalizing. Updating the state. + # ------------------------------- + if value and value in '()[]{}' and type_ != 'error_leaf' \ + and part.parent.type != 'error_node': + if value in _OPENING_BRACKETS: + self._indentation_tos = BracketNode( + self._config, part, + parent=self._indentation_tos, + in_suite_introducer=self._in_suite_introducer + ) + else: + assert node.type != IndentationTypes.IMPLICIT + self._indentation_tos = self._indentation_tos.parent + elif value in ('=', ':') and self._implicit_indentation_possible \ + and part.parent.type in _IMPLICIT_INDENTATION_TYPES: + indentation = node.indentation + self._indentation_tos = ImplicitNode( + self._config, part, parent=self._indentation_tos + ) + + self._on_newline = type_ in ('newline', 'backslash', 'bom') + + self._previous_part = part + self._previous_spacing = spacing + + def _check_line_length(self, part, spacing): + if part.type == 'backslash': + last_column = part.start_pos[1] + 1 + else: + last_column = part.end_pos[1] + if last_column > self._config.max_characters \ + and spacing.start_pos[1] <= self._config.max_characters : + # Special case for long URLs in multi-line docstrings or comments, + # but still report the error when the 72 first chars are whitespaces. + report = True + if part.type == 'comment': + splitted = part.value[1:].split() + if len(splitted) == 1 \ + and (part.end_pos[1] - len(splitted[0])) < 72: + report = False + if report: + self.add_issue( + part, + 501, + 'Line too long (%s > %s characters)' % + (last_column, self._config.max_characters), + ) + + def _check_spacing(self, part, spacing): + def add_if_spaces(*args): + if spaces: + return self.add_issue(*args) + + def add_not_spaces(*args): + if not spaces: + return self.add_issue(*args) + + spaces = spacing.value + prev = self._previous_part + if prev is not None and prev.type == 'error_leaf' or part.type == 'error_leaf': + return + + type_ = part.type + if '\t' in spaces: + self.add_issue(spacing, 223, 'Used tab to separate tokens') + elif type_ == 'comment': + if len(spaces) < self._config.spaces_before_comment: + self.add_issue(spacing, 261, 'At least two spaces before inline comment') + elif type_ == 'newline': + add_if_spaces(spacing, 291, 'Trailing whitespace') + elif len(spaces) > 1: + self.add_issue(spacing, 221, 'Multiple spaces used') + else: + if prev in _OPENING_BRACKETS: + message = "Whitespace after '%s'" % part.value + add_if_spaces(spacing, 201, message) + elif part in _CLOSING_BRACKETS: + message = "Whitespace before '%s'" % part.value + add_if_spaces(spacing, 202, message) + elif part in (',', ';') or part == ':' \ + and part.parent.type not in _POSSIBLE_SLICE_PARENTS: + message = "Whitespace before '%s'" % part.value + add_if_spaces(spacing, 203, message) + elif prev == ':' and prev.parent.type in _POSSIBLE_SLICE_PARENTS: + pass # TODO + elif prev in (',', ';', ':'): + add_not_spaces(spacing, 231, "missing whitespace after '%s'") + elif part == ':': # Is a subscript + # TODO + pass + elif part in ('*', '**') and part.parent.type not in _NON_STAR_TYPES \ + or prev in ('*', '**') \ + and prev.parent.type not in _NON_STAR_TYPES: + # TODO + pass + elif prev in _FACTOR and prev.parent.type == 'factor': + pass + elif prev == '@' and prev.parent.type == 'decorator': + pass # TODO should probably raise an error if there's a space here + elif part in _NEEDS_SPACE or prev in _NEEDS_SPACE: + if part == '=' and part.parent.type in ('argument', 'param') \ + or prev == '=' and prev.parent.type in ('argument', 'param'): + if part == '=': + param = part.parent + else: + param = prev.parent + if param.type == 'param' and param.annotation: + add_not_spaces(spacing, 252, 'Expected spaces around annotation equals') + else: + add_if_spaces(spacing, 251, 'Unexpected spaces around keyword / parameter equals') + elif part in _BITWISE_OPERATOR or prev in _BITWISE_OPERATOR: + add_not_spaces(spacing, 227, 'Missing whitespace around bitwise or shift operator') + elif part == '%' or prev == '%': + add_not_spaces(spacing, 228, 'Missing whitespace around modulo operator') + else: + message_225 = 'Missing whitespace between tokens' + add_not_spaces(spacing, 225, message_225) + elif type_ == 'keyword' or prev.type == 'keyword': + add_not_spaces(spacing, 275, 'Missing whitespace around keyword') + else: + prev_spacing = self._previous_spacing + if prev in _ALLOW_SPACE and spaces != prev_spacing.value \ + and '\n' not in self._previous_leaf.prefix: + message = "Whitespace before operator doesn't match with whitespace after" + self.add_issue(spacing, 229, message) + + if spaces and part not in _ALLOW_SPACE and prev not in _ALLOW_SPACE: + message_225 = 'Missing whitespace between tokens' + #print('xy', spacing) + #self.add_issue(spacing, 225, message_225) + # TODO why only brackets? + if part in _OPENING_BRACKETS: + message = "Whitespace before '%s'" % part.value + add_if_spaces(spacing, 211, message) + + def _analyse_non_prefix(self, leaf): + typ = leaf.type + if typ == 'name' and leaf.value in ('l', 'O', 'I'): + if leaf.is_definition(): + message = "Do not define %s named 'l', 'O', or 'I' one line" + if leaf.parent.type == 'class' and leaf.parent.name == leaf: + self.add_issue(leaf, 742, message % 'classes') + elif leaf.parent.type == 'function' and leaf.parent.name == leaf: + self.add_issue(leaf, 743, message % 'function') + else: + self.add_issuadd_issue(741, message % 'variables', leaf) + elif leaf.value == ':': + if isinstance(leaf.parent, (Flow, Scope)) and leaf.parent.type != 'lambdef': + next_leaf = leaf.get_next_leaf() + if next_leaf.type != 'newline': + if leaf.parent.type == 'funcdef': + self.add_issue(next_leaf, 704, 'Multiple statements on one line (def)') + else: + self.add_issue(next_leaf, 701, 'Multiple statements on one line (colon)') + elif leaf.value == ';': + if leaf.get_next_leaf().type in ('newline', 'endmarker'): + self.add_issue(leaf, 703, 'Statement ends with a semicolon') + else: + self.add_issue(leaf, 702, 'Multiple statements on one line (semicolon)') + elif leaf.value in ('==', '!='): + comparison = leaf.parent + index = comparison.children.index(leaf) + left = comparison.children[index - 1] + right = comparison.children[index + 1] + for node in left, right: + if node.type == 'keyword' or node.type == 'name': + if node.value == 'None': + message = "comparison to None should be 'if cond is None:'" + self.add_issue(leaf, 711, message) + break + elif node.value in ('True', 'False'): + message = "comparison to False/True should be 'if cond is True:' or 'if cond:'" + self.add_issue(leaf, 712, message) + break + elif leaf.value in ('in', 'is'): + comparison = leaf.parent + if comparison.type == 'comparison' and comparison.parent.type == 'not_test': + if leaf.value == 'in': + self.add_issue(leaf, 713, "test for membership should be 'not in'") + else: + self.add_issue(leaf, 714, "test for object identity should be 'is not'") + elif typ == 'string': + # Checking multiline strings + for i, line in enumerate(leaf.value.splitlines()[1:]): + indentation = re.match(r'[ \t]*', line).group(0) + start_pos = leaf.line + i, len(indentation) + # TODO check multiline indentation. + elif typ == 'endmarker': + if self._newline_count >= 2: + self.add_issue(leaf, 391, 'Blank line at end of file') + + def add_issue(self, node, code, message): + if self._previous_leaf is not None: + if search_ancestor(self._previous_leaf, 'error_node') is not None: + return + if self._previous_leaf.type == 'error_leaf': + return + if search_ancestor(node, 'error_node') is not None: + return + if code in (901, 903): + # 901 and 903 are raised by the ErrorFinder. + super(PEP8Normalizer, self).add_issue(node, code, message) + else: + # Skip ErrorFinder here, because it has custom behavior. + super(ErrorFinder, self).add_issue(node, code, message) + + +class PEP8NormalizerConfig(ErrorFinderConfig): + normalizer_class = PEP8Normalizer + """ + Normalizing to PEP8. Not really implemented, yet. + """ + def __init__(self, indentation=' ' * 4, hanging_indentation=None, + max_characters=79, spaces_before_comment=2): + self.indentation = indentation + if hanging_indentation is None: + hanging_indentation = indentation + self.hanging_indentation = hanging_indentation + self.closing_bracket_hanging_indentation = '' + self.break_after_binary = False + self.max_characters = max_characters + self.spaces_before_comment = spaces_before_comment + + +# TODO this is not yet ready. +#@PEP8Normalizer.register_rule(type='endmarker') +class BlankLineAtEnd(Rule): + code = 392 + message = 'Blank line at end of file' + + def is_issue(self, leaf): + return self._newline_count >= 2 diff --git a/contrib/python/parso/py2/parso/python/prefix.py b/contrib/python/parso/py2/parso/python/prefix.py new file mode 100644 index 0000000000..b7f1e1bc4d --- /dev/null +++ b/contrib/python/parso/py2/parso/python/prefix.py @@ -0,0 +1,97 @@ +import re +from codecs import BOM_UTF8 + +from parso.python.tokenize import group + +unicode_bom = BOM_UTF8.decode('utf-8') + + +class PrefixPart(object): + def __init__(self, leaf, typ, value, spacing='', start_pos=None): + assert start_pos is not None + self.parent = leaf + self.type = typ + self.value = value + self.spacing = spacing + self.start_pos = start_pos + + @property + def end_pos(self): + if self.value.endswith('\n'): + return self.start_pos[0] + 1, 0 + if self.value == unicode_bom: + # The bom doesn't have a length at the start of a Python file. + return self.start_pos + return self.start_pos[0], self.start_pos[1] + len(self.value) + + def create_spacing_part(self): + column = self.start_pos[1] - len(self.spacing) + return PrefixPart( + self.parent, 'spacing', self.spacing, + start_pos=(self.start_pos[0], column) + ) + + def __repr__(self): + return '%s(%s, %s, %s)' % ( + self.__class__.__name__, + self.type, + repr(self.value), + self.start_pos + ) + + +_comment = r'#[^\n\r\f]*' +_backslash = r'\\\r?\n' +_newline = r'\r?\n' +_form_feed = r'\f' +_only_spacing = '$' +_spacing = r'[ \t]*' +_bom = unicode_bom + +_regex = group( + _comment, _backslash, _newline, _form_feed, _only_spacing, _bom, + capture=True +) +_regex = re.compile(group(_spacing, capture=True) + _regex) + + +_types = { + '#': 'comment', + '\\': 'backslash', + '\f': 'formfeed', + '\n': 'newline', + '\r': 'newline', + unicode_bom: 'bom' +} + + +def split_prefix(leaf, start_pos): + line, column = start_pos + start = 0 + value = spacing = '' + bom = False + while start != len(leaf.prefix): + match =_regex.match(leaf.prefix, start) + spacing = match.group(1) + value = match.group(2) + if not value: + break + type_ = _types[value[0]] + yield PrefixPart( + leaf, type_, value, spacing, + start_pos=(line, column + start - int(bom) + len(spacing)) + ) + if type_ == 'bom': + bom = True + + start = match.end(0) + if value.endswith('\n'): + line += 1 + column = -start + + if value: + spacing = '' + yield PrefixPart( + leaf, 'spacing', spacing, + start_pos=(line, column + start) + ) diff --git a/contrib/python/parso/py2/parso/python/token.py b/contrib/python/parso/py2/parso/python/token.py new file mode 100644 index 0000000000..bb86ec978d --- /dev/null +++ b/contrib/python/parso/py2/parso/python/token.py @@ -0,0 +1,27 @@ +from __future__ import absolute_import + + +class TokenType(object): + def __init__(self, name, contains_syntax=False): + self.name = name + self.contains_syntax = contains_syntax + + def __repr__(self): + return '%s(%s)' % (self.__class__.__name__, self.name) + + +class TokenTypes(object): + """ + Basically an enum, but Python 2 doesn't have enums in the standard library. + """ + def __init__(self, names, contains_syntax): + for name in names: + setattr(self, name, TokenType(name, contains_syntax=name in contains_syntax)) + + +PythonTokenTypes = TokenTypes(( + 'STRING', 'NUMBER', 'NAME', 'ERRORTOKEN', 'NEWLINE', 'INDENT', 'DEDENT', + 'ERROR_DEDENT', 'FSTRING_STRING', 'FSTRING_START', 'FSTRING_END', 'OP', + 'ENDMARKER'), + contains_syntax=('NAME', 'OP'), +) diff --git a/contrib/python/parso/py2/parso/python/tokenize.py b/contrib/python/parso/py2/parso/python/tokenize.py new file mode 100644 index 0000000000..fdcd8e04d4 --- /dev/null +++ b/contrib/python/parso/py2/parso/python/tokenize.py @@ -0,0 +1,722 @@ +# -*- coding: utf-8 -*- +""" +This tokenizer has been copied from the ``tokenize.py`` standard library +tokenizer. The reason was simple: The standard library tokenizer fails +if the indentation is not right. To make it possible to do error recovery the + tokenizer needed to be rewritten. + +Basically this is a stripped down version of the standard library module, so +you can read the documentation there. Additionally we included some speed and +memory optimizations here. +""" +from __future__ import absolute_import + +import sys +import re +from collections import namedtuple +import itertools as _itertools +from codecs import BOM_UTF8 + +from parso.python.token import PythonTokenTypes +from parso.utils import split_lines + + +# Maximum code point of Unicode 6.0: 0x10ffff (1,114,111) +MAX_UNICODE = '\U0010ffff' + +STRING = PythonTokenTypes.STRING +NAME = PythonTokenTypes.NAME +NUMBER = PythonTokenTypes.NUMBER +OP = PythonTokenTypes.OP +NEWLINE = PythonTokenTypes.NEWLINE +INDENT = PythonTokenTypes.INDENT +DEDENT = PythonTokenTypes.DEDENT +ENDMARKER = PythonTokenTypes.ENDMARKER +ERRORTOKEN = PythonTokenTypes.ERRORTOKEN +ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT +FSTRING_START = PythonTokenTypes.FSTRING_START +FSTRING_STRING = PythonTokenTypes.FSTRING_STRING +FSTRING_END = PythonTokenTypes.FSTRING_END + +TokenCollection = namedtuple( + 'TokenCollection', + 'pseudo_token single_quoted triple_quoted endpats whitespace ' + 'fstring_pattern_map always_break_tokens', +) + +BOM_UTF8_STRING = BOM_UTF8.decode('utf-8') + +_token_collection_cache = {} + +if sys.version_info.major >= 3: + # Python 3 has str.isidentifier() to check if a char is a valid identifier + is_identifier = str.isidentifier +else: + # Python 2 doesn't, but it's not that important anymore and if you tokenize + # Python 2 code with this, it's still ok. It's just that parsing Python 3 + # code with this function is not 100% correct. + # This just means that Python 2 code matches a few identifiers too much, + # but that doesn't really matter. + def is_identifier(s): + return True + + +def group(*choices, **kwargs): + capture = kwargs.pop('capture', False) # Python 2, arrghhhhh :( + assert not kwargs + + start = '(' + if not capture: + start += '?:' + return start + '|'.join(choices) + ')' + + +def maybe(*choices): + return group(*choices) + '?' + + +# Return the empty string, plus all of the valid string prefixes. +def _all_string_prefixes(version_info, include_fstring=False, only_fstring=False): + def different_case_versions(prefix): + for s in _itertools.product(*[(c, c.upper()) for c in prefix]): + yield ''.join(s) + # The valid string prefixes. Only contain the lower case versions, + # and don't contain any permuations (include 'fr', but not + # 'rf'). The various permutations will be generated. + valid_string_prefixes = ['b', 'r', 'u'] + if version_info.major >= 3: + valid_string_prefixes.append('br') + + result = set(['']) + if version_info >= (3, 6) and include_fstring: + f = ['f', 'fr'] + if only_fstring: + valid_string_prefixes = f + result = set() + else: + valid_string_prefixes += f + elif only_fstring: + return set() + + # if we add binary f-strings, add: ['fb', 'fbr'] + for prefix in valid_string_prefixes: + for t in _itertools.permutations(prefix): + # create a list with upper and lower versions of each + # character + result.update(different_case_versions(t)) + if version_info.major == 2: + # In Python 2 the order cannot just be random. + result.update(different_case_versions('ur')) + result.update(different_case_versions('br')) + return result + + +def _compile(expr): + return re.compile(expr, re.UNICODE) + + +def _get_token_collection(version_info): + try: + return _token_collection_cache[tuple(version_info)] + except KeyError: + _token_collection_cache[tuple(version_info)] = result = \ + _create_token_collection(version_info) + return result + + +fstring_string_single_line = _compile(r'(?:\{\{|\}\}|\\(?:\r\n?|\n)|[^{}\r\n])+') +fstring_string_multi_line = _compile(r'(?:[^{}]+|\{\{|\}\})+') +fstring_format_spec_single_line = _compile(r'(?:\\(?:\r\n?|\n)|[^{}\r\n])+') +fstring_format_spec_multi_line = _compile(r'[^{}]+') + + +def _create_token_collection(version_info): + # Note: we use unicode matching for names ("\w") but ascii matching for + # number literals. + Whitespace = r'[ \f\t]*' + whitespace = _compile(Whitespace) + Comment = r'#[^\r\n]*' + # Python 2 is pretty much not working properly anymore, we just ignore + # parsing unicode properly, which is fine, I guess. + if version_info[0] == 2: + Name = r'([A-Za-z_0-9]+)' + elif sys.version_info[0] == 2: + # Unfortunately the regex engine cannot deal with the regex below, so + # just use this one. + Name = r'(\w+)' + else: + Name = u'([A-Za-z_0-9\u0080-' + MAX_UNICODE + ']+)' + + if version_info >= (3, 6): + Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+' + Binnumber = r'0[bB](?:_?[01])+' + Octnumber = r'0[oO](?:_?[0-7])+' + Decnumber = r'(?:0(?:_?0)*|[1-9](?:_?[0-9])*)' + Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber) + Exponent = r'[eE][-+]?[0-9](?:_?[0-9])*' + Pointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?', + r'\.[0-9](?:_?[0-9])*') + maybe(Exponent) + Expfloat = r'[0-9](?:_?[0-9])*' + Exponent + Floatnumber = group(Pointfloat, Expfloat) + Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]') + else: + Hexnumber = r'0[xX][0-9a-fA-F]+' + Binnumber = r'0[bB][01]+' + if version_info.major >= 3: + Octnumber = r'0[oO][0-7]+' + else: + Octnumber = '0[oO]?[0-7]+' + Decnumber = r'(?:0+|[1-9][0-9]*)' + Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber) + if version_info[0] < 3: + Intnumber += '[lL]?' + Exponent = r'[eE][-+]?[0-9]+' + Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent) + Expfloat = r'[0-9]+' + Exponent + Floatnumber = group(Pointfloat, Expfloat) + Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]') + Number = group(Imagnumber, Floatnumber, Intnumber) + + # Note that since _all_string_prefixes includes the empty string, + # StringPrefix can be the empty string (making it optional). + possible_prefixes = _all_string_prefixes(version_info) + StringPrefix = group(*possible_prefixes) + StringPrefixWithF = group(*_all_string_prefixes(version_info, include_fstring=True)) + fstring_prefixes = _all_string_prefixes(version_info, include_fstring=True, only_fstring=True) + FStringStart = group(*fstring_prefixes) + + # Tail end of ' string. + Single = r"(?:\\.|[^'\\])*'" + # Tail end of " string. + Double = r'(?:\\.|[^"\\])*"' + # Tail end of ''' string. + Single3 = r"(?:\\.|'(?!'')|[^'\\])*'''" + # Tail end of """ string. + Double3 = r'(?:\\.|"(?!"")|[^"\\])*"""' + Triple = group(StringPrefixWithF + "'''", StringPrefixWithF + '"""') + + # Because of leftmost-then-longest match semantics, be sure to put the + # longest operators first (e.g., if = came before ==, == would get + # recognized as two instances of =). + Operator = group(r"\*\*=?", r">>=?", r"<<=?", + r"//=?", r"->", + r"[+\-*/%&@`|^!=<>]=?", + r"~") + + Bracket = '[][(){}]' + + special_args = [r'\r\n?', r'\n', r'[;.,@]'] + if version_info >= (3, 0): + special_args.insert(0, r'\.\.\.') + if version_info >= (3, 8): + special_args.insert(0, ":=?") + else: + special_args.insert(0, ":") + Special = group(*special_args) + + Funny = group(Operator, Bracket, Special) + + # First (or only) line of ' or " string. + ContStr = group(StringPrefix + r"'[^\r\n'\\]*(?:\\.[^\r\n'\\]*)*" + + group("'", r'\\(?:\r\n?|\n)'), + StringPrefix + r'"[^\r\n"\\]*(?:\\.[^\r\n"\\]*)*' + + group('"', r'\\(?:\r\n?|\n)')) + pseudo_extra_pool = [Comment, Triple] + all_quotes = '"', "'", '"""', "'''" + if fstring_prefixes: + pseudo_extra_pool.append(FStringStart + group(*all_quotes)) + + PseudoExtras = group(r'\\(?:\r\n?|\n)|\Z', *pseudo_extra_pool) + PseudoToken = group(Whitespace, capture=True) + \ + group(PseudoExtras, Number, Funny, ContStr, Name, capture=True) + + # For a given string prefix plus quotes, endpats maps it to a regex + # to match the remainder of that string. _prefix can be empty, for + # a normal single or triple quoted string (with no prefix). + endpats = {} + for _prefix in possible_prefixes: + endpats[_prefix + "'"] = _compile(Single) + endpats[_prefix + '"'] = _compile(Double) + endpats[_prefix + "'''"] = _compile(Single3) + endpats[_prefix + '"""'] = _compile(Double3) + + # A set of all of the single and triple quoted string prefixes, + # including the opening quotes. + single_quoted = set() + triple_quoted = set() + fstring_pattern_map = {} + for t in possible_prefixes: + for quote in '"', "'": + single_quoted.add(t + quote) + + for quote in '"""', "'''": + triple_quoted.add(t + quote) + + for t in fstring_prefixes: + for quote in all_quotes: + fstring_pattern_map[t + quote] = quote + + ALWAYS_BREAK_TOKENS = (';', 'import', 'class', 'def', 'try', 'except', + 'finally', 'while', 'with', 'return', 'continue', + 'break', 'del', 'pass', 'global', 'assert') + if version_info >= (3, 5): + ALWAYS_BREAK_TOKENS += ('nonlocal', ) + pseudo_token_compiled = _compile(PseudoToken) + return TokenCollection( + pseudo_token_compiled, single_quoted, triple_quoted, endpats, + whitespace, fstring_pattern_map, set(ALWAYS_BREAK_TOKENS) + ) + + +class Token(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])): + @property + def end_pos(self): + lines = split_lines(self.string) + if len(lines) > 1: + return self.start_pos[0] + len(lines) - 1, 0 + else: + return self.start_pos[0], self.start_pos[1] + len(self.string) + + +class PythonToken(Token): + def __repr__(self): + return ('TokenInfo(type=%s, string=%r, start_pos=%r, prefix=%r)' % + self._replace(type=self.type.name)) + + +class FStringNode(object): + def __init__(self, quote): + self.quote = quote + self.parentheses_count = 0 + self.previous_lines = '' + self.last_string_start_pos = None + # In the syntax there can be multiple format_spec's nested: + # {x:{y:3}} + self.format_spec_count = 0 + + def open_parentheses(self, character): + self.parentheses_count += 1 + + def close_parentheses(self, character): + self.parentheses_count -= 1 + if self.parentheses_count == 0: + # No parentheses means that the format spec is also finished. + self.format_spec_count = 0 + + def allow_multiline(self): + return len(self.quote) == 3 + + def is_in_expr(self): + return self.parentheses_count > self.format_spec_count + + def is_in_format_spec(self): + return not self.is_in_expr() and self.format_spec_count + + +def _close_fstring_if_necessary(fstring_stack, string, line_nr, column, additional_prefix): + for fstring_stack_index, node in enumerate(fstring_stack): + lstripped_string = string.lstrip() + len_lstrip = len(string) - len(lstripped_string) + if lstripped_string.startswith(node.quote): + token = PythonToken( + FSTRING_END, + node.quote, + (line_nr, column + len_lstrip), + prefix=additional_prefix+string[:len_lstrip], + ) + additional_prefix = '' + assert not node.previous_lines + del fstring_stack[fstring_stack_index:] + return token, '', len(node.quote) + len_lstrip + return None, additional_prefix, 0 + + +def _find_fstring_string(endpats, fstring_stack, line, lnum, pos): + tos = fstring_stack[-1] + allow_multiline = tos.allow_multiline() + if tos.is_in_format_spec(): + if allow_multiline: + regex = fstring_format_spec_multi_line + else: + regex = fstring_format_spec_single_line + else: + if allow_multiline: + regex = fstring_string_multi_line + else: + regex = fstring_string_single_line + + match = regex.match(line, pos) + if match is None: + return tos.previous_lines, pos + + if not tos.previous_lines: + tos.last_string_start_pos = (lnum, pos) + + string = match.group(0) + for fstring_stack_node in fstring_stack: + end_match = endpats[fstring_stack_node.quote].match(string) + if end_match is not None: + string = end_match.group(0)[:-len(fstring_stack_node.quote)] + + new_pos = pos + new_pos += len(string) + # even if allow_multiline is False, we still need to check for trailing + # newlines, because a single-line f-string can contain line continuations + if string.endswith('\n') or string.endswith('\r'): + tos.previous_lines += string + string = '' + else: + string = tos.previous_lines + string + + return string, new_pos + + +def tokenize(code, version_info, start_pos=(1, 0)): + """Generate tokens from a the source code (string).""" + lines = split_lines(code, keepends=True) + return tokenize_lines(lines, version_info, start_pos=start_pos) + + +def _print_tokens(func): + """ + A small helper function to help debug the tokenize_lines function. + """ + def wrapper(*args, **kwargs): + for token in func(*args, **kwargs): + print(token) # This print is intentional for debugging! + yield token + + return wrapper + + +# @_print_tokens +def tokenize_lines(lines, version_info, start_pos=(1, 0), indents=None, is_first_token=True): + """ + A heavily modified Python standard library tokenizer. + + Additionally to the default information, yields also the prefix of each + token. This idea comes from lib2to3. The prefix contains all information + that is irrelevant for the parser like newlines in parentheses or comments. + """ + def dedent_if_necessary(start): + while start < indents[-1]: + if start > indents[-2]: + yield PythonToken(ERROR_DEDENT, '', (lnum, start), '') + indents[-1] = start + break + indents.pop() + yield PythonToken(DEDENT, '', spos, '') + + pseudo_token, single_quoted, triple_quoted, endpats, whitespace, \ + fstring_pattern_map, always_break_tokens, = \ + _get_token_collection(version_info) + paren_level = 0 # count parentheses + if indents is None: + indents = [0] + max_ = 0 + numchars = '0123456789' + contstr = '' + contline = None + # We start with a newline. This makes indent at the first position + # possible. It's not valid Python, but still better than an INDENT in the + # second line (and not in the first). This makes quite a few things in + # Jedi's fast parser possible. + new_line = True + prefix = '' # Should never be required, but here for safety + additional_prefix = '' + lnum = start_pos[0] - 1 + fstring_stack = [] + for line in lines: # loop over lines in stream + lnum += 1 + pos = 0 + max_ = len(line) + if is_first_token: + if line.startswith(BOM_UTF8_STRING): + additional_prefix = BOM_UTF8_STRING + line = line[1:] + max_ = len(line) + + # Fake that the part before was already parsed. + line = '^' * start_pos[1] + line + pos = start_pos[1] + max_ += start_pos[1] + + is_first_token = False + + if contstr: # continued string + endmatch = endprog.match(line) + if endmatch: + pos = endmatch.end(0) + yield PythonToken( + STRING, contstr + line[:pos], + contstr_start, prefix) + contstr = '' + contline = None + else: + contstr = contstr + line + contline = contline + line + continue + + while pos < max_: + if fstring_stack: + tos = fstring_stack[-1] + if not tos.is_in_expr(): + string, pos = _find_fstring_string(endpats, fstring_stack, line, lnum, pos) + if string: + yield PythonToken( + FSTRING_STRING, string, + tos.last_string_start_pos, + # Never has a prefix because it can start anywhere and + # include whitespace. + prefix='' + ) + tos.previous_lines = '' + continue + if pos == max_: + break + + rest = line[pos:] + fstring_end_token, additional_prefix, quote_length = _close_fstring_if_necessary( + fstring_stack, + rest, + lnum, + pos, + additional_prefix, + ) + pos += quote_length + if fstring_end_token is not None: + yield fstring_end_token + continue + + # in an f-string, match until the end of the string + if fstring_stack: + string_line = line + for fstring_stack_node in fstring_stack: + quote = fstring_stack_node.quote + end_match = endpats[quote].match(line, pos) + if end_match is not None: + end_match_string = end_match.group(0) + if len(end_match_string) - len(quote) + pos < len(string_line): + string_line = line[:pos] + end_match_string[:-len(quote)] + pseudomatch = pseudo_token.match(string_line, pos) + else: + pseudomatch = pseudo_token.match(line, pos) + + if pseudomatch: + prefix = additional_prefix + pseudomatch.group(1) + additional_prefix = '' + start, pos = pseudomatch.span(2) + spos = (lnum, start) + token = pseudomatch.group(2) + if token == '': + assert prefix + additional_prefix = prefix + # This means that we have a line with whitespace/comments at + # the end, which just results in an endmarker. + break + initial = token[0] + else: + match = whitespace.match(line, pos) + initial = line[match.end()] + start = match.end() + spos = (lnum, start) + + if new_line and initial not in '\r\n#' and (initial != '\\' or pseudomatch is None): + new_line = False + if paren_level == 0 and not fstring_stack: + indent_start = start + if indent_start > indents[-1]: + yield PythonToken(INDENT, '', spos, '') + indents.append(indent_start) + for t in dedent_if_necessary(indent_start): + yield t + + if not pseudomatch: # scan for tokens + match = whitespace.match(line, pos) + if new_line and paren_level == 0 and not fstring_stack: + for t in dedent_if_necessary(match.end()): + yield t + pos = match.end() + new_line = False + yield PythonToken( + ERRORTOKEN, line[pos], (lnum, pos), + additional_prefix + match.group(0) + ) + additional_prefix = '' + pos += 1 + continue + + if (initial in numchars # ordinary number + or (initial == '.' and token != '.' and token != '...')): + yield PythonToken(NUMBER, token, spos, prefix) + elif pseudomatch.group(3) is not None: # ordinary name + if token in always_break_tokens and (fstring_stack or paren_level): + fstring_stack[:] = [] + paren_level = 0 + # We only want to dedent if the token is on a new line. + m = re.match(r'[ \f\t]*$', line[:start]) + if m is not None: + for t in dedent_if_necessary(m.end()): + yield t + if is_identifier(token): + yield PythonToken(NAME, token, spos, prefix) + else: + for t in _split_illegal_unicode_name(token, spos, prefix): + yield t # yield from Python 2 + elif initial in '\r\n': + if any(not f.allow_multiline() for f in fstring_stack): + # Would use fstring_stack.clear, but that's not available + # in Python 2. + fstring_stack[:] = [] + + if not new_line and paren_level == 0 and not fstring_stack: + yield PythonToken(NEWLINE, token, spos, prefix) + else: + additional_prefix = prefix + token + new_line = True + elif initial == '#': # Comments + assert not token.endswith("\n") + if fstring_stack and fstring_stack[-1].is_in_expr(): + # `#` is not allowed in f-string expressions + yield PythonToken(ERRORTOKEN, initial, spos, prefix) + pos = start + 1 + else: + additional_prefix = prefix + token + elif token in triple_quoted: + endprog = endpats[token] + endmatch = endprog.match(line, pos) + if endmatch: # all on one line + pos = endmatch.end(0) + token = line[start:pos] + yield PythonToken(STRING, token, spos, prefix) + else: + contstr_start = spos # multiple lines + contstr = line[start:] + contline = line + break + + # Check up to the first 3 chars of the token to see if + # they're in the single_quoted set. If so, they start + # a string. + # We're using the first 3, because we're looking for + # "rb'" (for example) at the start of the token. If + # we switch to longer prefixes, this needs to be + # adjusted. + # Note that initial == token[:1]. + # Also note that single quote checking must come after + # triple quote checking (above). + elif initial in single_quoted or \ + token[:2] in single_quoted or \ + token[:3] in single_quoted: + if token[-1] in '\r\n': # continued string + # This means that a single quoted string ends with a + # backslash and is continued. + contstr_start = lnum, start + endprog = (endpats.get(initial) or endpats.get(token[1]) + or endpats.get(token[2])) + contstr = line[start:] + contline = line + break + else: # ordinary string + yield PythonToken(STRING, token, spos, prefix) + elif token in fstring_pattern_map: # The start of an fstring. + fstring_stack.append(FStringNode(fstring_pattern_map[token])) + yield PythonToken(FSTRING_START, token, spos, prefix) + elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n', '\\\r'): # continued stmt + additional_prefix += prefix + line[start:] + break + else: + if token in '([{': + if fstring_stack: + fstring_stack[-1].open_parentheses(token) + else: + paren_level += 1 + elif token in ')]}': + if fstring_stack: + fstring_stack[-1].close_parentheses(token) + else: + if paren_level: + paren_level -= 1 + elif token.startswith(':') and fstring_stack \ + and fstring_stack[-1].parentheses_count \ + - fstring_stack[-1].format_spec_count == 1: + # `:` and `:=` both count + fstring_stack[-1].format_spec_count += 1 + token = ':' + pos = start + 1 + + yield PythonToken(OP, token, spos, prefix) + + if contstr: + yield PythonToken(ERRORTOKEN, contstr, contstr_start, prefix) + if contstr.endswith('\n') or contstr.endswith('\r'): + new_line = True + + if fstring_stack: + tos = fstring_stack[-1] + if tos.previous_lines: + yield PythonToken( + FSTRING_STRING, tos.previous_lines, + tos.last_string_start_pos, + # Never has a prefix because it can start anywhere and + # include whitespace. + prefix='' + ) + + end_pos = lnum, max_ + # As the last position we just take the maximally possible position. We + # remove -1 for the last new line. + for indent in indents[1:]: + indents.pop() + yield PythonToken(DEDENT, '', end_pos, '') + yield PythonToken(ENDMARKER, '', end_pos, additional_prefix) + + +def _split_illegal_unicode_name(token, start_pos, prefix): + def create_token(): + return PythonToken(ERRORTOKEN if is_illegal else NAME, found, pos, prefix) + + found = '' + is_illegal = False + pos = start_pos + for i, char in enumerate(token): + if is_illegal: + if is_identifier(char): + yield create_token() + found = char + is_illegal = False + prefix = '' + pos = start_pos[0], start_pos[1] + i + else: + found += char + else: + new_found = found + char + if is_identifier(new_found): + found = new_found + else: + if found: + yield create_token() + prefix = '' + pos = start_pos[0], start_pos[1] + i + found = char + is_illegal = True + + if found: + yield create_token() + + +if __name__ == "__main__": + if len(sys.argv) >= 2: + path = sys.argv[1] + with open(path) as f: + code = f.read() + else: + code = sys.stdin.read() + + from parso.utils import python_bytes_to_unicode, parse_version_string + + if isinstance(code, bytes): + code = python_bytes_to_unicode(code) + + for token in tokenize(code, parse_version_string()): + print(token) diff --git a/contrib/python/parso/py2/parso/python/tree.py b/contrib/python/parso/py2/parso/python/tree.py new file mode 100644 index 0000000000..42fb9be69b --- /dev/null +++ b/contrib/python/parso/py2/parso/python/tree.py @@ -0,0 +1,1270 @@ +""" +This is the syntax tree for Python syntaxes (2 & 3). The classes represent +syntax elements like functions and imports. + +All of the nodes can be traced back to the `Python grammar file +<https://docs.python.org/3/reference/grammar.html>`_. If you want to know how +a tree is structured, just analyse that file (for each Python version it's a +bit different). + +There's a lot of logic here that makes it easier for Jedi (and other libraries) +to deal with a Python syntax tree. + +By using :py:meth:`parso.tree.NodeOrLeaf.get_code` on a module, you can get +back the 1-to-1 representation of the input given to the parser. This is +important if you want to refactor a parser tree. + +>>> from parso import parse +>>> parser = parse('import os') +>>> module = parser.get_root_node() +>>> module +<Module: @1-1> + +Any subclasses of :class:`Scope`, including :class:`Module` has an attribute +:attr:`iter_imports <Scope.iter_imports>`: + +>>> list(module.iter_imports()) +[<ImportName: import os@1,0>] + +Changes to the Python Grammar +----------------------------- + +A few things have changed when looking at Python grammar files: + +- :class:`Param` does not exist in Python grammar files. It is essentially a + part of a ``parameters`` node. |parso| splits it up to make it easier to + analyse parameters. However this just makes it easier to deal with the syntax + tree, it doesn't actually change the valid syntax. +- A few nodes like `lambdef` and `lambdef_nocond` have been merged in the + syntax tree to make it easier to do deal with them. + +Parser Tree Classes +------------------- +""" + +import re +try: + from collections.abc import Mapping +except ImportError: + from collections import Mapping + +from parso._compatibility import utf8_repr, unicode +from parso.tree import Node, BaseNode, Leaf, ErrorNode, ErrorLeaf, \ + search_ancestor +from parso.python.prefix import split_prefix +from parso.utils import split_lines + +_FLOW_CONTAINERS = set(['if_stmt', 'while_stmt', 'for_stmt', 'try_stmt', + 'with_stmt', 'async_stmt', 'suite']) +_RETURN_STMT_CONTAINERS = set(['suite', 'simple_stmt']) | _FLOW_CONTAINERS + +_FUNC_CONTAINERS = set( + ['suite', 'simple_stmt', 'decorated', 'async_funcdef'] +) | _FLOW_CONTAINERS + +_GET_DEFINITION_TYPES = set([ + 'expr_stmt', 'sync_comp_for', 'with_stmt', 'for_stmt', 'import_name', + 'import_from', 'param', 'del_stmt', +]) +_IMPORTS = set(['import_name', 'import_from']) + + +class DocstringMixin(object): + __slots__ = () + + def get_doc_node(self): + """ + Returns the string leaf of a docstring. e.g. ``r'''foo'''``. + """ + if self.type == 'file_input': + node = self.children[0] + elif self.type in ('funcdef', 'classdef'): + node = self.children[self.children.index(':') + 1] + if node.type == 'suite': # Normally a suite + node = node.children[1] # -> NEWLINE stmt + else: # ExprStmt + simple_stmt = self.parent + c = simple_stmt.parent.children + index = c.index(simple_stmt) + if not index: + return None + node = c[index - 1] + + if node.type == 'simple_stmt': + node = node.children[0] + if node.type == 'string': + return node + return None + + +class PythonMixin(object): + """ + Some Python specific utilities. + """ + __slots__ = () + + def get_name_of_position(self, position): + """ + Given a (line, column) tuple, returns a :py:class:`Name` or ``None`` if + there is no name at that position. + """ + for c in self.children: + if isinstance(c, Leaf): + if c.type == 'name' and c.start_pos <= position <= c.end_pos: + return c + else: + result = c.get_name_of_position(position) + if result is not None: + return result + return None + + +class PythonLeaf(PythonMixin, Leaf): + __slots__ = () + + def _split_prefix(self): + return split_prefix(self, self.get_start_pos_of_prefix()) + + def get_start_pos_of_prefix(self): + """ + Basically calls :py:meth:`parso.tree.NodeOrLeaf.get_start_pos_of_prefix`. + """ + # TODO it is really ugly that we have to override it. Maybe change + # indent error leafs somehow? No idea how, though. + previous_leaf = self.get_previous_leaf() + if previous_leaf is not None and previous_leaf.type == 'error_leaf' \ + and previous_leaf.token_type in ('INDENT', 'DEDENT', 'ERROR_DEDENT'): + previous_leaf = previous_leaf.get_previous_leaf() + + if previous_leaf is None: # It's the first leaf. + lines = split_lines(self.prefix) + # + 1 is needed because split_lines always returns at least ['']. + return self.line - len(lines) + 1, 0 # It's the first leaf. + return previous_leaf.end_pos + + +class _LeafWithoutNewlines(PythonLeaf): + """ + Simply here to optimize performance. + """ + __slots__ = () + + @property + def end_pos(self): + return self.line, self.column + len(self.value) + + +# Python base classes +class PythonBaseNode(PythonMixin, BaseNode): + __slots__ = () + + +class PythonNode(PythonMixin, Node): + __slots__ = () + + +class PythonErrorNode(PythonMixin, ErrorNode): + __slots__ = () + + +class PythonErrorLeaf(ErrorLeaf, PythonLeaf): + __slots__ = () + + +class EndMarker(_LeafWithoutNewlines): + __slots__ = () + type = 'endmarker' + + @utf8_repr + def __repr__(self): + return "<%s: prefix=%s end_pos=%s>" % ( + type(self).__name__, repr(self.prefix), self.end_pos + ) + + +class Newline(PythonLeaf): + """Contains NEWLINE and ENDMARKER tokens.""" + __slots__ = () + type = 'newline' + + @utf8_repr + def __repr__(self): + return "<%s: %s>" % (type(self).__name__, repr(self.value)) + + +class Name(_LeafWithoutNewlines): + """ + A string. Sometimes it is important to know if the string belongs to a name + or not. + """ + type = 'name' + __slots__ = () + + def __repr__(self): + return "<%s: %s@%s,%s>" % (type(self).__name__, self.value, + self.line, self.column) + + def is_definition(self, include_setitem=False): + """ + Returns True if the name is being defined. + """ + return self.get_definition(include_setitem=include_setitem) is not None + + def get_definition(self, import_name_always=False, include_setitem=False): + """ + Returns None if there's no definition for a name. + + :param import_name_always: Specifies if an import name is always a + definition. Normally foo in `from foo import bar` is not a + definition. + """ + node = self.parent + type_ = node.type + + if type_ in ('funcdef', 'classdef'): + if self == node.name: + return node + return None + + if type_ == 'except_clause': + # TODO in Python 2 this doesn't work correctly. See grammar file. + # I think we'll just let it be. Python 2 will be gone in a few + # years. + if self.get_previous_sibling() == 'as': + return node.parent # The try_stmt. + return None + + while node is not None: + if node.type == 'suite': + return None + if node.type == 'namedexpr_test': + return node.children[0] + if node.type in _GET_DEFINITION_TYPES: + if self in node.get_defined_names(include_setitem): + return node + if import_name_always and node.type in _IMPORTS: + return node + return None + node = node.parent + return None + + +class Literal(PythonLeaf): + __slots__ = () + + +class Number(Literal): + type = 'number' + __slots__ = () + + +class String(Literal): + type = 'string' + __slots__ = () + + @property + def string_prefix(self): + return re.match(r'\w*(?=[\'"])', self.value).group(0) + + def _get_payload(self): + match = re.search( + r'''('{3}|"{3}|'|")(.*)$''', + self.value, + flags=re.DOTALL + ) + return match.group(2)[:-len(match.group(1))] + + +class FStringString(PythonLeaf): + """ + f-strings contain f-string expressions and normal python strings. These are + the string parts of f-strings. + """ + type = 'fstring_string' + __slots__ = () + + +class FStringStart(PythonLeaf): + """ + f-strings contain f-string expressions and normal python strings. These are + the string parts of f-strings. + """ + type = 'fstring_start' + __slots__ = () + + +class FStringEnd(PythonLeaf): + """ + f-strings contain f-string expressions and normal python strings. These are + the string parts of f-strings. + """ + type = 'fstring_end' + __slots__ = () + + +class _StringComparisonMixin(object): + def __eq__(self, other): + """ + Make comparisons with strings easy. + Improves the readability of the parser. + """ + if isinstance(other, (str, unicode)): + return self.value == other + + return self is other + + def __ne__(self, other): + """Python 2 compatibility.""" + return not self.__eq__(other) + + def __hash__(self): + return hash(self.value) + + +class Operator(_LeafWithoutNewlines, _StringComparisonMixin): + type = 'operator' + __slots__ = () + + +class Keyword(_LeafWithoutNewlines, _StringComparisonMixin): + type = 'keyword' + __slots__ = () + + +class Scope(PythonBaseNode, DocstringMixin): + """ + Super class for the parser tree, which represents the state of a python + text file. + A Scope is either a function, class or lambda. + """ + __slots__ = () + + def __init__(self, children): + super(Scope, self).__init__(children) + + def iter_funcdefs(self): + """ + Returns a generator of `funcdef` nodes. + """ + return self._search_in_scope('funcdef') + + def iter_classdefs(self): + """ + Returns a generator of `classdef` nodes. + """ + return self._search_in_scope('classdef') + + def iter_imports(self): + """ + Returns a generator of `import_name` and `import_from` nodes. + """ + return self._search_in_scope('import_name', 'import_from') + + def _search_in_scope(self, *names): + def scan(children): + for element in children: + if element.type in names: + yield element + if element.type in _FUNC_CONTAINERS: + for e in scan(element.children): + yield e + + return scan(self.children) + + def get_suite(self): + """ + Returns the part that is executed by the function. + """ + return self.children[-1] + + def __repr__(self): + try: + name = self.name.value + except AttributeError: + name = '' + + return "<%s: %s@%s-%s>" % (type(self).__name__, name, + self.start_pos[0], self.end_pos[0]) + + +class Module(Scope): + """ + The top scope, which is always a module. + Depending on the underlying parser this may be a full module or just a part + of a module. + """ + __slots__ = ('_used_names',) + type = 'file_input' + + def __init__(self, children): + super(Module, self).__init__(children) + self._used_names = None + + def _iter_future_import_names(self): + """ + :return: A list of future import names. + :rtype: list of str + """ + # In Python it's not allowed to use future imports after the first + # actual (non-future) statement. However this is not a linter here, + # just return all future imports. If people want to scan for issues + # they should use the API. + for imp in self.iter_imports(): + if imp.type == 'import_from' and imp.level == 0: + for path in imp.get_paths(): + names = [name.value for name in path] + if len(names) == 2 and names[0] == '__future__': + yield names[1] + + def _has_explicit_absolute_import(self): + """ + Checks if imports in this module are explicitly absolute, i.e. there + is a ``__future__`` import. + Currently not public, might be in the future. + :return bool: + """ + for name in self._iter_future_import_names(): + if name == 'absolute_import': + return True + return False + + def get_used_names(self): + """ + Returns all the :class:`Name` leafs that exist in this module. This + includes both definitions and references of names. + """ + if self._used_names is None: + # Don't directly use self._used_names to eliminate a lookup. + dct = {} + + def recurse(node): + try: + children = node.children + except AttributeError: + if node.type == 'name': + arr = dct.setdefault(node.value, []) + arr.append(node) + else: + for child in children: + recurse(child) + + recurse(self) + self._used_names = UsedNamesMapping(dct) + return self._used_names + + +class Decorator(PythonBaseNode): + type = 'decorator' + __slots__ = () + + +class ClassOrFunc(Scope): + __slots__ = () + + @property + def name(self): + """ + Returns the `Name` leaf that defines the function or class name. + """ + return self.children[1] + + def get_decorators(self): + """ + :rtype: list of :class:`Decorator` + """ + decorated = self.parent + if decorated.type == 'async_funcdef': + decorated = decorated.parent + + if decorated.type == 'decorated': + if decorated.children[0].type == 'decorators': + return decorated.children[0].children + else: + return decorated.children[:1] + else: + return [] + + +class Class(ClassOrFunc): + """ + Used to store the parsed contents of a python class. + """ + type = 'classdef' + __slots__ = () + + def __init__(self, children): + super(Class, self).__init__(children) + + def get_super_arglist(self): + """ + Returns the `arglist` node that defines the super classes. It returns + None if there are no arguments. + """ + if self.children[2] != '(': # Has no parentheses + return None + else: + if self.children[3] == ')': # Empty parentheses + return None + else: + return self.children[3] + + +def _create_params(parent, argslist_list): + """ + `argslist_list` is a list that can contain an argslist as a first item, but + most not. It's basically the items between the parameter brackets (which is + at most one item). + This function modifies the parser structure. It generates `Param` objects + from the normal ast. Those param objects do not exist in a normal ast, but + make the evaluation of the ast tree so much easier. + You could also say that this function replaces the argslist node with a + list of Param objects. + """ + def check_python2_nested_param(node): + """ + Python 2 allows params to look like ``def x(a, (b, c))``, which is + basically a way of unpacking tuples in params. Python 3 has ditched + this behavior. Jedi currently just ignores those constructs. + """ + return node.type == 'fpdef' and node.children[0] == '(' + + try: + first = argslist_list[0] + except IndexError: + return [] + + if first.type in ('name', 'fpdef'): + if check_python2_nested_param(first): + return [first] + else: + return [Param([first], parent)] + elif first == '*': + return [first] + else: # argslist is a `typedargslist` or a `varargslist`. + if first.type == 'tfpdef': + children = [first] + else: + children = first.children + new_children = [] + start = 0 + # Start with offset 1, because the end is higher. + for end, child in enumerate(children + [None], 1): + if child is None or child == ',': + param_children = children[start:end] + if param_children: # Could as well be comma and then end. + if param_children[0] == '*' \ + and (len(param_children) == 1 + or param_children[1] == ',') \ + or check_python2_nested_param(param_children[0]) \ + or param_children[0] == '/': + for p in param_children: + p.parent = parent + new_children += param_children + else: + new_children.append(Param(param_children, parent)) + start = end + return new_children + + +class Function(ClassOrFunc): + """ + Used to store the parsed contents of a python function. + + Children:: + + 0. <Keyword: def> + 1. <Name> + 2. parameter list (including open-paren and close-paren <Operator>s) + 3. or 5. <Operator: :> + 4. or 6. Node() representing function body + 3. -> (if annotation is also present) + 4. annotation (if present) + """ + type = 'funcdef' + + def __init__(self, children): + super(Function, self).__init__(children) + parameters = self.children[2] # After `def foo` + parameters.children[1:-1] = _create_params(parameters, parameters.children[1:-1]) + + def _get_param_nodes(self): + return self.children[2].children + + def get_params(self): + """ + Returns a list of `Param()`. + """ + return [p for p in self._get_param_nodes() if p.type == 'param'] + + @property + def name(self): + return self.children[1] # First token after `def` + + def iter_yield_exprs(self): + """ + Returns a generator of `yield_expr`. + """ + def scan(children): + for element in children: + if element.type in ('classdef', 'funcdef', 'lambdef'): + continue + + try: + nested_children = element.children + except AttributeError: + if element.value == 'yield': + if element.parent.type == 'yield_expr': + yield element.parent + else: + yield element + else: + for result in scan(nested_children): + yield result + + return scan(self.children) + + def iter_return_stmts(self): + """ + Returns a generator of `return_stmt`. + """ + def scan(children): + for element in children: + if element.type == 'return_stmt' \ + or element.type == 'keyword' and element.value == 'return': + yield element + if element.type in _RETURN_STMT_CONTAINERS: + for e in scan(element.children): + yield e + + return scan(self.children) + + def iter_raise_stmts(self): + """ + Returns a generator of `raise_stmt`. Includes raise statements inside try-except blocks + """ + def scan(children): + for element in children: + if element.type == 'raise_stmt' \ + or element.type == 'keyword' and element.value == 'raise': + yield element + if element.type in _RETURN_STMT_CONTAINERS: + for e in scan(element.children): + yield e + + return scan(self.children) + + def is_generator(self): + """ + :return bool: Checks if a function is a generator or not. + """ + return next(self.iter_yield_exprs(), None) is not None + + @property + def annotation(self): + """ + Returns the test node after `->` or `None` if there is no annotation. + """ + try: + if self.children[3] == "->": + return self.children[4] + assert self.children[3] == ":" + return None + except IndexError: + return None + + +class Lambda(Function): + """ + Lambdas are basically trimmed functions, so give it the same interface. + + Children:: + + 0. <Keyword: lambda> + *. <Param x> for each argument x + -2. <Operator: :> + -1. Node() representing body + """ + type = 'lambdef' + __slots__ = () + + def __init__(self, children): + # We don't want to call the Function constructor, call its parent. + super(Function, self).__init__(children) + # Everything between `lambda` and the `:` operator is a parameter. + self.children[1:-2] = _create_params(self, self.children[1:-2]) + + @property + def name(self): + """ + Raises an AttributeError. Lambdas don't have a defined name. + """ + raise AttributeError("lambda is not named.") + + def _get_param_nodes(self): + return self.children[1:-2] + + @property + def annotation(self): + """ + Returns `None`, lambdas don't have annotations. + """ + return None + + def __repr__(self): + return "<%s@%s>" % (self.__class__.__name__, self.start_pos) + + +class Flow(PythonBaseNode): + __slots__ = () + + +class IfStmt(Flow): + type = 'if_stmt' + __slots__ = () + + def get_test_nodes(self): + """ + E.g. returns all the `test` nodes that are named as x, below: + + if x: + pass + elif x: + pass + """ + for i, c in enumerate(self.children): + if c in ('elif', 'if'): + yield self.children[i + 1] + + def get_corresponding_test_node(self, node): + """ + Searches for the branch in which the node is and returns the + corresponding test node (see function above). However if the node is in + the test node itself and not in the suite return None. + """ + start_pos = node.start_pos + for check_node in reversed(list(self.get_test_nodes())): + if check_node.start_pos < start_pos: + if start_pos < check_node.end_pos: + return None + # In this case the node is within the check_node itself, + # not in the suite + else: + return check_node + + def is_node_after_else(self, node): + """ + Checks if a node is defined after `else`. + """ + for c in self.children: + if c == 'else': + if node.start_pos > c.start_pos: + return True + else: + return False + + +class WhileStmt(Flow): + type = 'while_stmt' + __slots__ = () + + +class ForStmt(Flow): + type = 'for_stmt' + __slots__ = () + + def get_testlist(self): + """ + Returns the input node ``y`` from: ``for x in y:``. + """ + return self.children[3] + + def get_defined_names(self, include_setitem=False): + return _defined_names(self.children[1], include_setitem) + + +class TryStmt(Flow): + type = 'try_stmt' + __slots__ = () + + def get_except_clause_tests(self): + """ + Returns the ``test`` nodes found in ``except_clause`` nodes. + Returns ``[None]`` for except clauses without an exception given. + """ + for node in self.children: + if node.type == 'except_clause': + yield node.children[1] + elif node == 'except': + yield None + + +class WithStmt(Flow): + type = 'with_stmt' + __slots__ = () + + def get_defined_names(self, include_setitem=False): + """ + Returns the a list of `Name` that the with statement defines. The + defined names are set after `as`. + """ + names = [] + for with_item in self.children[1:-2:2]: + # Check with items for 'as' names. + if with_item.type == 'with_item': + names += _defined_names(with_item.children[2], include_setitem) + return names + + def get_test_node_from_name(self, name): + node = name.parent + if node.type != 'with_item': + raise ValueError('The name is not actually part of a with statement.') + return node.children[0] + + +class Import(PythonBaseNode): + __slots__ = () + + def get_path_for_name(self, name): + """ + The path is the list of names that leads to the searched name. + + :return list of Name: + """ + try: + # The name may be an alias. If it is, just map it back to the name. + name = self._aliases()[name] + except KeyError: + pass + + for path in self.get_paths(): + if name in path: + return path[:path.index(name) + 1] + raise ValueError('Name should be defined in the import itself') + + def is_nested(self): + return False # By default, sub classes may overwrite this behavior + + def is_star_import(self): + return self.children[-1] == '*' + + +class ImportFrom(Import): + type = 'import_from' + __slots__ = () + + def get_defined_names(self, include_setitem=False): + """ + Returns the a list of `Name` that the import defines. The + defined names are set after `import` or in case an alias - `as` - is + present that name is returned. + """ + return [alias or name for name, alias in self._as_name_tuples()] + + def _aliases(self): + """Mapping from alias to its corresponding name.""" + return dict((alias, name) for name, alias in self._as_name_tuples() + if alias is not None) + + def get_from_names(self): + for n in self.children[1:]: + if n not in ('.', '...'): + break + if n.type == 'dotted_name': # from x.y import + return n.children[::2] + elif n == 'import': # from . import + return [] + else: # from x import + return [n] + + @property + def level(self): + """The level parameter of ``__import__``.""" + level = 0 + for n in self.children[1:]: + if n in ('.', '...'): + level += len(n.value) + else: + break + return level + + def _as_name_tuples(self): + last = self.children[-1] + if last == ')': + last = self.children[-2] + elif last == '*': + return # No names defined directly. + + if last.type == 'import_as_names': + as_names = last.children[::2] + else: + as_names = [last] + for as_name in as_names: + if as_name.type == 'name': + yield as_name, None + else: + yield as_name.children[::2] # yields x, y -> ``x as y`` + + def get_paths(self): + """ + The import paths defined in an import statement. Typically an array + like this: ``[<Name: datetime>, <Name: date>]``. + + :return list of list of Name: + """ + dotted = self.get_from_names() + + if self.children[-1] == '*': + return [dotted] + return [dotted + [name] for name, alias in self._as_name_tuples()] + + +class ImportName(Import): + """For ``import_name`` nodes. Covers normal imports without ``from``.""" + type = 'import_name' + __slots__ = () + + def get_defined_names(self, include_setitem=False): + """ + Returns the a list of `Name` that the import defines. The defined names + is always the first name after `import` or in case an alias - `as` - is + present that name is returned. + """ + return [alias or path[0] for path, alias in self._dotted_as_names()] + + @property + def level(self): + """The level parameter of ``__import__``.""" + return 0 # Obviously 0 for imports without from. + + def get_paths(self): + return [path for path, alias in self._dotted_as_names()] + + def _dotted_as_names(self): + """Generator of (list(path), alias) where alias may be None.""" + dotted_as_names = self.children[1] + if dotted_as_names.type == 'dotted_as_names': + as_names = dotted_as_names.children[::2] + else: + as_names = [dotted_as_names] + + for as_name in as_names: + if as_name.type == 'dotted_as_name': + alias = as_name.children[2] + as_name = as_name.children[0] + else: + alias = None + if as_name.type == 'name': + yield [as_name], alias + else: + # dotted_names + yield as_name.children[::2], alias + + def is_nested(self): + """ + This checks for the special case of nested imports, without aliases and + from statement:: + + import foo.bar + """ + return bool([1 for path, alias in self._dotted_as_names() + if alias is None and len(path) > 1]) + + def _aliases(self): + """ + :return list of Name: Returns all the alias + """ + return dict((alias, path[-1]) for path, alias in self._dotted_as_names() + if alias is not None) + + +class KeywordStatement(PythonBaseNode): + """ + For the following statements: `assert`, `del`, `global`, `nonlocal`, + `raise`, `return`, `yield`. + + `pass`, `continue` and `break` are not in there, because they are just + simple keywords and the parser reduces it to a keyword. + """ + __slots__ = () + + @property + def type(self): + """ + Keyword statements start with the keyword and end with `_stmt`. You can + crosscheck this with the Python grammar. + """ + return '%s_stmt' % self.keyword + + @property + def keyword(self): + return self.children[0].value + + def get_defined_names(self, include_setitem=False): + keyword = self.keyword + if keyword == 'del': + return _defined_names(self.children[1], include_setitem) + if keyword in ('global', 'nonlocal'): + return self.children[1::2] + return [] + + +class AssertStmt(KeywordStatement): + __slots__ = () + + @property + def assertion(self): + return self.children[1] + + +class GlobalStmt(KeywordStatement): + __slots__ = () + + def get_global_names(self): + return self.children[1::2] + + +class ReturnStmt(KeywordStatement): + __slots__ = () + + +class YieldExpr(PythonBaseNode): + type = 'yield_expr' + __slots__ = () + + +def _defined_names(current, include_setitem): + """ + A helper function to find the defined names in statements, for loops and + list comprehensions. + """ + names = [] + if current.type in ('testlist_star_expr', 'testlist_comp', 'exprlist', 'testlist'): + for child in current.children[::2]: + names += _defined_names(child, include_setitem) + elif current.type in ('atom', 'star_expr'): + names += _defined_names(current.children[1], include_setitem) + elif current.type in ('power', 'atom_expr'): + if current.children[-2] != '**': # Just if there's no operation + trailer = current.children[-1] + if trailer.children[0] == '.': + names.append(trailer.children[1]) + elif trailer.children[0] == '[' and include_setitem: + for node in current.children[-2::-1]: + if node.type == 'trailer': + names.append(node.children[1]) + break + if node.type == 'name': + names.append(node) + break + else: + names.append(current) + return names + + +class ExprStmt(PythonBaseNode, DocstringMixin): + type = 'expr_stmt' + __slots__ = () + + def get_defined_names(self, include_setitem=False): + """ + Returns a list of `Name` defined before the `=` sign. + """ + names = [] + if self.children[1].type == 'annassign': + names = _defined_names(self.children[0], include_setitem) + return [ + name + for i in range(0, len(self.children) - 2, 2) + if '=' in self.children[i + 1].value + for name in _defined_names(self.children[i], include_setitem) + ] + names + + def get_rhs(self): + """Returns the right-hand-side of the equals.""" + node = self.children[-1] + if node.type == 'annassign': + if len(node.children) == 4: + node = node.children[3] + else: + node = node.children[1] + return node + + def yield_operators(self): + """ + Returns a generator of `+=`, `=`, etc. or None if there is no operation. + """ + first = self.children[1] + if first.type == 'annassign': + if len(first.children) <= 2: + return # No operator is available, it's just PEP 484. + + first = first.children[2] + yield first + + for operator in self.children[3::2]: + yield operator + + +class Param(PythonBaseNode): + """ + It's a helper class that makes business logic with params much easier. The + Python grammar defines no ``param`` node. It defines it in a different way + that is not really suited to working with parameters. + """ + type = 'param' + + def __init__(self, children, parent): + super(Param, self).__init__(children) + self.parent = parent + for child in children: + child.parent = self + + @property + def star_count(self): + """ + Is `0` in case of `foo`, `1` in case of `*foo` or `2` in case of + `**foo`. + """ + first = self.children[0] + if first in ('*', '**'): + return len(first.value) + return 0 + + @property + def default(self): + """ + The default is the test node that appears after the `=`. Is `None` in + case no default is present. + """ + has_comma = self.children[-1] == ',' + try: + if self.children[-2 - int(has_comma)] == '=': + return self.children[-1 - int(has_comma)] + except IndexError: + return None + + @property + def annotation(self): + """ + The default is the test node that appears after `:`. Is `None` in case + no annotation is present. + """ + tfpdef = self._tfpdef() + if tfpdef.type == 'tfpdef': + assert tfpdef.children[1] == ":" + assert len(tfpdef.children) == 3 + annotation = tfpdef.children[2] + return annotation + else: + return None + + def _tfpdef(self): + """ + tfpdef: see e.g. grammar36.txt. + """ + offset = int(self.children[0] in ('*', '**')) + return self.children[offset] + + @property + def name(self): + """ + The `Name` leaf of the param. + """ + if self._tfpdef().type == 'tfpdef': + return self._tfpdef().children[0] + else: + return self._tfpdef() + + def get_defined_names(self, include_setitem=False): + return [self.name] + + @property + def position_index(self): + """ + Property for the positional index of a paramter. + """ + index = self.parent.children.index(self) + try: + keyword_only_index = self.parent.children.index('*') + if index > keyword_only_index: + # Skip the ` *, ` + index -= 2 + except ValueError: + pass + try: + keyword_only_index = self.parent.children.index('/') + if index > keyword_only_index: + # Skip the ` /, ` + index -= 2 + except ValueError: + pass + return index - 1 + + def get_parent_function(self): + """ + Returns the function/lambda of a parameter. + """ + return search_ancestor(self, 'funcdef', 'lambdef') + + def get_code(self, include_prefix=True, include_comma=True): + """ + Like all the other get_code functions, but includes the param + `include_comma`. + + :param include_comma bool: If enabled includes the comma in the string output. + """ + if include_comma: + return super(Param, self).get_code(include_prefix) + + children = self.children + if children[-1] == ',': + children = children[:-1] + return self._get_code_for_children( + children, + include_prefix=include_prefix + ) + + def __repr__(self): + default = '' if self.default is None else '=%s' % self.default.get_code() + return '<%s: %s>' % (type(self).__name__, str(self._tfpdef()) + default) + + +class SyncCompFor(PythonBaseNode): + type = 'sync_comp_for' + __slots__ = () + + def get_defined_names(self, include_setitem=False): + """ + Returns the a list of `Name` that the comprehension defines. + """ + # allow async for + return _defined_names(self.children[1], include_setitem) + + +# This is simply here so an older Jedi version can work with this new parso +# version. Can be deleted in the next release. +CompFor = SyncCompFor + + +class UsedNamesMapping(Mapping): + """ + This class exists for the sole purpose of creating an immutable dict. + """ + def __init__(self, dct): + self._dict = dct + + def __getitem__(self, key): + return self._dict[key] + + def __len__(self): + return len(self._dict) + + def __iter__(self): + return iter(self._dict) + + def __hash__(self): + return id(self) + + def __eq__(self, other): + # Comparing these dicts does not make sense. + return self is other diff --git a/contrib/python/parso/py2/parso/tree.py b/contrib/python/parso/py2/parso/tree.py new file mode 100644 index 0000000000..b1151977c9 --- /dev/null +++ b/contrib/python/parso/py2/parso/tree.py @@ -0,0 +1,381 @@ +import sys +from abc import abstractmethod, abstractproperty + +from parso._compatibility import utf8_repr, encoding +from parso.utils import split_lines + + +def search_ancestor(node, *node_types): + """ + Recursively looks at the parents of a node and returns the first found node + that matches node_types. Returns ``None`` if no matching node is found. + + :param node: The ancestors of this node will be checked. + :param node_types: type names that are searched for. + :type node_types: tuple of str + """ + while True: + node = node.parent + if node is None or node.type in node_types: + return node + + +class NodeOrLeaf(object): + """ + The base class for nodes and leaves. + """ + __slots__ = () + type = None + ''' + The type is a string that typically matches the types of the grammar file. + ''' + + def get_root_node(self): + """ + Returns the root node of a parser tree. The returned node doesn't have + a parent node like all the other nodes/leaves. + """ + scope = self + while scope.parent is not None: + scope = scope.parent + return scope + + def get_next_sibling(self): + """ + Returns the node immediately following this node in this parent's + children list. If this node does not have a next sibling, it is None + """ + parent = self.parent + if parent is None: + return None + + # Can't use index(); we need to test by identity + for i, child in enumerate(parent.children): + if child is self: + try: + return self.parent.children[i + 1] + except IndexError: + return None + + def get_previous_sibling(self): + """ + Returns the node immediately preceding this node in this parent's + children list. If this node does not have a previous sibling, it is + None. + """ + parent = self.parent + if parent is None: + return None + + # Can't use index(); we need to test by identity + for i, child in enumerate(parent.children): + if child is self: + if i == 0: + return None + return self.parent.children[i - 1] + + def get_previous_leaf(self): + """ + Returns the previous leaf in the parser tree. + Returns `None` if this is the first element in the parser tree. + """ + if self.parent is None: + return None + + node = self + while True: + c = node.parent.children + i = c.index(node) + if i == 0: + node = node.parent + if node.parent is None: + return None + else: + node = c[i - 1] + break + + while True: + try: + node = node.children[-1] + except AttributeError: # A Leaf doesn't have children. + return node + + def get_next_leaf(self): + """ + Returns the next leaf in the parser tree. + Returns None if this is the last element in the parser tree. + """ + if self.parent is None: + return None + + node = self + while True: + c = node.parent.children + i = c.index(node) + if i == len(c) - 1: + node = node.parent + if node.parent is None: + return None + else: + node = c[i + 1] + break + + while True: + try: + node = node.children[0] + except AttributeError: # A Leaf doesn't have children. + return node + + @abstractproperty + def start_pos(self): + """ + Returns the starting position of the prefix as a tuple, e.g. `(3, 4)`. + + :return tuple of int: (line, column) + """ + + @abstractproperty + def end_pos(self): + """ + Returns the end position of the prefix as a tuple, e.g. `(3, 4)`. + + :return tuple of int: (line, column) + """ + + @abstractmethod + def get_start_pos_of_prefix(self): + """ + Returns the start_pos of the prefix. This means basically it returns + the end_pos of the last prefix. The `get_start_pos_of_prefix()` of the + prefix `+` in `2 + 1` would be `(1, 1)`, while the start_pos is + `(1, 2)`. + + :return tuple of int: (line, column) + """ + + @abstractmethod + def get_first_leaf(self): + """ + Returns the first leaf of a node or itself if this is a leaf. + """ + + @abstractmethod + def get_last_leaf(self): + """ + Returns the last leaf of a node or itself if this is a leaf. + """ + + @abstractmethod + def get_code(self, include_prefix=True): + """ + Returns the code that was the input for the parser for this node. + + :param include_prefix: Removes the prefix (whitespace and comments) of + e.g. a statement. + """ + + +class Leaf(NodeOrLeaf): + ''' + Leafs are basically tokens with a better API. Leafs exactly know where they + were defined and what text preceeds them. + ''' + __slots__ = ('value', 'parent', 'line', 'column', 'prefix') + + def __init__(self, value, start_pos, prefix=''): + self.value = value + ''' + :py:func:`str` The value of the current token. + ''' + self.start_pos = start_pos + self.prefix = prefix + ''' + :py:func:`str` Typically a mixture of whitespace and comments. Stuff + that is syntactically irrelevant for the syntax tree. + ''' + self.parent = None + ''' + The parent :class:`BaseNode` of this leaf. + ''' + + @property + def start_pos(self): + return self.line, self.column + + @start_pos.setter + def start_pos(self, value): + self.line = value[0] + self.column = value[1] + + def get_start_pos_of_prefix(self): + previous_leaf = self.get_previous_leaf() + if previous_leaf is None: + lines = split_lines(self.prefix) + # + 1 is needed because split_lines always returns at least ['']. + return self.line - len(lines) + 1, 0 # It's the first leaf. + return previous_leaf.end_pos + + def get_first_leaf(self): + return self + + def get_last_leaf(self): + return self + + def get_code(self, include_prefix=True): + if include_prefix: + return self.prefix + self.value + else: + return self.value + + @property + def end_pos(self): + lines = split_lines(self.value) + end_pos_line = self.line + len(lines) - 1 + # Check for multiline token + if self.line == end_pos_line: + end_pos_column = self.column + len(lines[-1]) + else: + end_pos_column = len(lines[-1]) + return end_pos_line, end_pos_column + + @utf8_repr + def __repr__(self): + value = self.value + if not value: + value = self.type + return "<%s: %s>" % (type(self).__name__, value) + + +class TypedLeaf(Leaf): + __slots__ = ('type',) + + def __init__(self, type, value, start_pos, prefix=''): + super(TypedLeaf, self).__init__(value, start_pos, prefix) + self.type = type + + +class BaseNode(NodeOrLeaf): + """ + The super class for all nodes. + A node has children, a type and possibly a parent node. + """ + __slots__ = ('children', 'parent') + type = None + + def __init__(self, children): + self.children = children + """ + A list of :class:`NodeOrLeaf` child nodes. + """ + self.parent = None + ''' + The parent :class:`BaseNode` of this leaf. + None if this is the root node. + ''' + + @property + def start_pos(self): + return self.children[0].start_pos + + def get_start_pos_of_prefix(self): + return self.children[0].get_start_pos_of_prefix() + + @property + def end_pos(self): + return self.children[-1].end_pos + + def _get_code_for_children(self, children, include_prefix): + if include_prefix: + return "".join(c.get_code() for c in children) + else: + first = children[0].get_code(include_prefix=False) + return first + "".join(c.get_code() for c in children[1:]) + + def get_code(self, include_prefix=True): + return self._get_code_for_children(self.children, include_prefix) + + def get_leaf_for_position(self, position, include_prefixes=False): + """ + Get the :py:class:`parso.tree.Leaf` at ``position`` + + :param tuple position: A position tuple, row, column. Rows start from 1 + :param bool include_prefixes: If ``False``, ``None`` will be returned if ``position`` falls + on whitespace or comments before a leaf + :return: :py:class:`parso.tree.Leaf` at ``position``, or ``None`` + """ + def binary_search(lower, upper): + if lower == upper: + element = self.children[lower] + if not include_prefixes and position < element.start_pos: + # We're on a prefix. + return None + # In case we have prefixes, a leaf always matches + try: + return element.get_leaf_for_position(position, include_prefixes) + except AttributeError: + return element + + + index = int((lower + upper) / 2) + element = self.children[index] + if position <= element.end_pos: + return binary_search(lower, index) + else: + return binary_search(index + 1, upper) + + if not ((1, 0) <= position <= self.children[-1].end_pos): + raise ValueError('Please provide a position that exists within this node.') + return binary_search(0, len(self.children) - 1) + + def get_first_leaf(self): + return self.children[0].get_first_leaf() + + def get_last_leaf(self): + return self.children[-1].get_last_leaf() + + @utf8_repr + def __repr__(self): + code = self.get_code().replace('\n', ' ').replace('\r', ' ').strip() + if not sys.version_info.major >= 3: + code = code.encode(encoding, 'replace') + return "<%s: %s@%s,%s>" % \ + (type(self).__name__, code, self.start_pos[0], self.start_pos[1]) + + +class Node(BaseNode): + """Concrete implementation for interior nodes.""" + __slots__ = ('type',) + + def __init__(self, type, children): + super(Node, self).__init__(children) + self.type = type + + def __repr__(self): + return "%s(%s, %r)" % (self.__class__.__name__, self.type, self.children) + + +class ErrorNode(BaseNode): + """ + A node that contains valid nodes/leaves that we're follow by a token that + was invalid. This basically means that the leaf after this node is where + Python would mark a syntax error. + """ + __slots__ = () + type = 'error_node' + + +class ErrorLeaf(Leaf): + """ + A leaf that is either completely invalid in a language (like `$` in Python) + or is invalid at that position. Like the star in `1 +* 1`. + """ + __slots__ = ('token_type',) + type = 'error_leaf' + + def __init__(self, token_type, value, start_pos, prefix=''): + super(ErrorLeaf, self).__init__(value, start_pos, prefix) + self.token_type = token_type + + def __repr__(self): + return "<%s: %s:%s, %s>" % \ + (type(self).__name__, self.token_type, repr(self.value), self.start_pos) diff --git a/contrib/python/parso/py2/parso/utils.py b/contrib/python/parso/py2/parso/utils.py new file mode 100644 index 0000000000..99a1307b6e --- /dev/null +++ b/contrib/python/parso/py2/parso/utils.py @@ -0,0 +1,185 @@ +from collections import namedtuple +import re +import sys +from ast import literal_eval +from functools import total_ordering + +from parso._compatibility import unicode + +# The following is a list in Python that are line breaks in str.splitlines, but +# not in Python. In Python only \r (Carriage Return, 0xD) and \n (Line Feed, +# 0xA) are allowed to split lines. +_NON_LINE_BREAKS = ( + u'\v', # Vertical Tabulation 0xB + u'\f', # Form Feed 0xC + u'\x1C', # File Separator + u'\x1D', # Group Separator + u'\x1E', # Record Separator + u'\x85', # Next Line (NEL - Equivalent to CR+LF. + # Used to mark end-of-line on some IBM mainframes.) + u'\u2028', # Line Separator + u'\u2029', # Paragraph Separator +) + +Version = namedtuple('Version', 'major, minor, micro') + + +def split_lines(string, keepends=False): + r""" + Intended for Python code. In contrast to Python's :py:meth:`str.splitlines`, + looks at form feeds and other special characters as normal text. Just + splits ``\n`` and ``\r\n``. + Also different: Returns ``[""]`` for an empty string input. + + In Python 2.7 form feeds are used as normal characters when using + str.splitlines. However in Python 3 somewhere there was a decision to split + also on form feeds. + """ + if keepends: + lst = string.splitlines(True) + + # We have to merge lines that were broken by form feed characters. + merge = [] + for i, line in enumerate(lst): + try: + last_chr = line[-1] + except IndexError: + pass + else: + if last_chr in _NON_LINE_BREAKS: + merge.append(i) + + for index in reversed(merge): + try: + lst[index] = lst[index] + lst[index + 1] + del lst[index + 1] + except IndexError: + # index + 1 can be empty and therefore there's no need to + # merge. + pass + + # The stdlib's implementation of the end is inconsistent when calling + # it with/without keepends. One time there's an empty string in the + # end, one time there's none. + if string.endswith('\n') or string.endswith('\r') or string == '': + lst.append('') + return lst + else: + return re.split(r'\n|\r\n|\r', string) + + +def python_bytes_to_unicode(source, encoding='utf-8', errors='strict'): + """ + Checks for unicode BOMs and PEP 263 encoding declarations. Then returns a + unicode object like in :py:meth:`bytes.decode`. + + :param encoding: See :py:meth:`bytes.decode` documentation. + :param errors: See :py:meth:`bytes.decode` documentation. ``errors`` can be + ``'strict'``, ``'replace'`` or ``'ignore'``. + """ + def detect_encoding(): + """ + For the implementation of encoding definitions in Python, look at: + - http://www.python.org/dev/peps/pep-0263/ + - http://docs.python.org/2/reference/lexical_analysis.html#encoding-declarations + """ + byte_mark = literal_eval(r"b'\xef\xbb\xbf'") + if source.startswith(byte_mark): + # UTF-8 byte-order mark + return 'utf-8' + + first_two_lines = re.match(br'(?:[^\n]*\n){0,2}', source).group(0) + possible_encoding = re.search(br"coding[=:]\s*([-\w.]+)", + first_two_lines) + if possible_encoding: + return possible_encoding.group(1) + else: + # the default if nothing else has been set -> PEP 263 + return encoding + + if isinstance(source, unicode): + # only cast str/bytes + return source + + encoding = detect_encoding() + if not isinstance(encoding, unicode): + encoding = unicode(encoding, 'utf-8', 'replace') + + try: + # Cast to unicode + return unicode(source, encoding, errors) + except LookupError: + if errors == 'replace': + # This is a weird case that can happen if the given encoding is not + # a valid encoding. This usually shouldn't happen with provided + # encodings, but can happen if somebody uses encoding declarations + # like `# coding: foo-8`. + return unicode(source, 'utf-8', errors) + raise + + +def version_info(): + """ + Returns a namedtuple of parso's version, similar to Python's + ``sys.version_info``. + """ + from parso import __version__ + tupl = re.findall(r'[a-z]+|\d+', __version__) + return Version(*[x if i == 3 else int(x) for i, x in enumerate(tupl)]) + + +def _parse_version(version): + match = re.match(r'(\d+)(?:\.(\d{1,2})(?:\.\d+)?)?((a|b|rc)\d)?$', version) + if match is None: + raise ValueError('The given version is not in the right format. ' + 'Use something like "3.8" or "3".') + + major = int(match.group(1)) + minor = match.group(2) + if minor is None: + # Use the latest Python in case it's not exactly defined, because the + # grammars are typically backwards compatible? + if major == 2: + minor = "7" + elif major == 3: + minor = "6" + else: + raise NotImplementedError("Sorry, no support yet for those fancy new/old versions.") + minor = int(minor) + return PythonVersionInfo(major, minor) + + +@total_ordering +class PythonVersionInfo(namedtuple('Version', 'major, minor')): + def __gt__(self, other): + if isinstance(other, tuple): + if len(other) != 2: + raise ValueError("Can only compare to tuples of length 2.") + return (self.major, self.minor) > other + super(PythonVersionInfo, self).__gt__(other) + + return (self.major, self.minor) + + def __eq__(self, other): + if isinstance(other, tuple): + if len(other) != 2: + raise ValueError("Can only compare to tuples of length 2.") + return (self.major, self.minor) == other + super(PythonVersionInfo, self).__eq__(other) + + def __ne__(self, other): + return not self.__eq__(other) + + +def parse_version_string(version=None): + """ + Checks for a valid version number (e.g. `3.8` or `2.7.1` or `3`) and + returns a corresponding version info that is always two characters long in + decimal. + """ + if version is None: + version = '%s.%s' % sys.version_info[:2] + if not isinstance(version, (unicode, str)): + raise TypeError('version must be a string like "3.8"') + + return _parse_version(version) diff --git a/contrib/python/parso/py2/tests/__init__.py b/contrib/python/parso/py2/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/contrib/python/parso/py2/tests/__init__.py diff --git a/contrib/python/parso/py2/tests/conftest.py b/contrib/python/parso/py2/tests/conftest.py new file mode 100644 index 0000000000..a135cb0084 --- /dev/null +++ b/contrib/python/parso/py2/tests/conftest.py @@ -0,0 +1,175 @@ +import re +import tempfile +import shutil +import logging +import sys +import os + +import pytest +import yatest.common + +import parso +from parso import cache +from parso.utils import parse_version_string + +collect_ignore = ["setup.py"] + +VERSIONS_2 = '2.7', +VERSIONS_3 = '3.4', '3.5', '3.6', '3.7', '3.8' + + +@pytest.fixture(scope='session') +def clean_parso_cache(): + """ + Set the default cache directory to a temporary directory during tests. + + Note that you can't use built-in `tmpdir` and `monkeypatch` + fixture here because their scope is 'function', which is not used + in 'session' scope fixture. + + This fixture is activated in ../pytest.ini. + """ + old = cache._default_cache_path + tmp = tempfile.mkdtemp(prefix='parso-test-') + cache._default_cache_path = tmp + yield + cache._default_cache_path = old + shutil.rmtree(tmp) + + +def pytest_addoption(parser): + parser.addoption("--logging", "-L", action='store_true', + help="Enables the logging output.") + + +def pytest_generate_tests(metafunc): + if 'normalizer_issue_case' in metafunc.fixturenames: + base_dir = os.path.join(yatest.common.test_source_path(), 'normalizer_issue_files') + + cases = list(colllect_normalizer_tests(base_dir)) + metafunc.parametrize( + 'normalizer_issue_case', + cases, + ids=[c.name for c in cases] + ) + elif 'each_version' in metafunc.fixturenames: + metafunc.parametrize('each_version', VERSIONS_2 + VERSIONS_3) + elif 'each_py2_version' in metafunc.fixturenames: + metafunc.parametrize('each_py2_version', VERSIONS_2) + elif 'each_py3_version' in metafunc.fixturenames: + metafunc.parametrize('each_py3_version', VERSIONS_3) + elif 'version_ge_py36' in metafunc.fixturenames: + metafunc.parametrize('version_ge_py36', ['3.6', '3.7', '3.8']) + elif 'version_ge_py38' in metafunc.fixturenames: + metafunc.parametrize('version_ge_py38', ['3.8']) + + +class NormalizerIssueCase(object): + """ + Static Analysis cases lie in the static_analysis folder. + The tests also start with `#!`, like the goto_definition tests. + """ + def __init__(self, path): + self.path = path + self.name = os.path.basename(path) + match = re.search(r'python([\d.]+)\.py', self.name) + self.python_version = match and match.group(1) + + +def colllect_normalizer_tests(base_dir): + for f_name in os.listdir(base_dir): + if f_name.endswith(".py"): + path = os.path.join(base_dir, f_name) + yield NormalizerIssueCase(path) + + +def pytest_configure(config): + if config.option.logging: + root = logging.getLogger() + root.setLevel(logging.DEBUG) + + #ch = logging.StreamHandler(sys.stdout) + #ch.setLevel(logging.DEBUG) + #formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + #ch.setFormatter(formatter) + + #root.addHandler(ch) + + +class Checker(): + def __init__(self, version, is_passing): + self.version = version + self._is_passing = is_passing + self.grammar = parso.load_grammar(version=self.version) + + def parse(self, code): + if self._is_passing: + return parso.parse(code, version=self.version, error_recovery=False) + else: + self._invalid_syntax(code) + + def _invalid_syntax(self, code): + with pytest.raises(parso.ParserSyntaxError): + module = parso.parse(code, version=self.version, error_recovery=False) + # For debugging + print(module.children) + + def get_error(self, code): + errors = list(self.grammar.iter_errors(self.grammar.parse(code))) + assert bool(errors) != self._is_passing + if errors: + return errors[0] + + def get_error_message(self, code): + error = self.get_error(code) + if error is None: + return + return error.message + + def assert_no_error_in_passing(self, code): + if self._is_passing: + module = self.grammar.parse(code) + assert not list(self.grammar.iter_errors(module)) + + +@pytest.fixture +def works_not_in_py(each_version): + return Checker(each_version, False) + + +@pytest.fixture +def works_in_py2(each_version): + return Checker(each_version, each_version.startswith('2')) + + +@pytest.fixture +def works_ge_py27(each_version): + version_info = parse_version_string(each_version) + return Checker(each_version, version_info >= (2, 7)) + + +@pytest.fixture +def works_ge_py3(each_version): + version_info = parse_version_string(each_version) + return Checker(each_version, version_info >= (3, 0)) + + +@pytest.fixture +def works_ge_py35(each_version): + version_info = parse_version_string(each_version) + return Checker(each_version, version_info >= (3, 5)) + +@pytest.fixture +def works_ge_py36(each_version): + version_info = parse_version_string(each_version) + return Checker(each_version, version_info >= (3, 6)) + +@pytest.fixture +def works_ge_py38(each_version): + version_info = parse_version_string(each_version) + return Checker(each_version, version_info >= (3, 8)) + +@pytest.fixture +def works_ge_py39(each_version): + version_info = parse_version_string(each_version) + return Checker(each_version, version_info >= (3, 9)) diff --git a/contrib/python/parso/py2/tests/failing_examples.py b/contrib/python/parso/py2/tests/failing_examples.py new file mode 100644 index 0000000000..2c67cb715b --- /dev/null +++ b/contrib/python/parso/py2/tests/failing_examples.py @@ -0,0 +1,397 @@ +# -*- coding: utf-8 -*- +import sys +from textwrap import dedent + + +def indent(code): + lines = code.splitlines(True) + return ''.join([' ' * 2 + line for line in lines]) + + +def build_nested(code, depth, base='def f():\n'): + if depth == 0: + return code + + new_code = base + indent(code) + return build_nested(new_code, depth - 1, base=base) + + +FAILING_EXAMPLES = [ + '1 +', + '?', + 'continue', + 'break', + 'return', + 'yield', + + # SyntaxError from Python/ast.c + 'f(x for x in bar, 1)', + 'from foo import a,', + 'from __future__ import whatever', + 'from __future__ import braces', + 'from .__future__ import whatever', + 'def f(x=3, y): pass', + 'lambda x=3, y: x', + '__debug__ = 1', + 'with x() as __debug__: pass', + # Mostly 3.6 relevant + '[]: int', + '[a, b]: int', + '(): int', + '(()): int', + '((())): int', + '{}: int', + 'True: int', + '(a, b): int', + '*star,: int', + 'a, b: int = 3', + 'foo(+a=3)', + 'f(lambda: 1=1)', + 'f(x=1, x=2)', + 'f(**x, y)', + 'f(x=2, y)', + 'f(**x, *y)', + 'f(**x, y=3, z)', + # augassign + 'a, b += 3', + '(a, b) += 3', + '[a, b] += 3', + 'f() += 1', + 'lambda x:None+=1', + '{} += 1', + '{a:b} += 1', + '{1} += 1', + '{*x} += 1', + '(x,) += 1', + '(x, y if a else q) += 1', + '[] += 1', + '[1,2] += 1', + '[] += 1', + 'None += 1', + '... += 1', + 'a > 1 += 1', + '"test" += 1', + '1 += 1', + '1.0 += 1', + '(yield) += 1', + '(yield from x) += 1', + '(x if x else y) += 1', + 'a() += 1', + 'a + b += 1', + '+a += 1', + 'a and b += 1', + '*a += 1', + 'a, b += 1', + 'f"xxx" += 1', + # All assignment tests + 'lambda a: 1 = 1', + '[x for x in y] = 1', + '{x for x in y} = 1', + '{x:x for x in y} = 1', + '(x for x in y) = 1', + 'None = 1', + '... = 1', + 'a == b = 1', + '{a, b} = 1', + '{a: b} = 1', + '1 = 1', + '"" = 1', + 'b"" = 1', + 'b"" = 1', + '"" "" = 1', + '1 | 1 = 3', + '1**1 = 3', + '~ 1 = 3', + 'not 1 = 3', + '1 and 1 = 3', + 'def foo(): (yield 1) = 3', + 'def foo(): x = yield 1 = 3', + 'async def foo(): await x = 3', + '(a if a else a) = a', + 'a, 1 = x', + 'foo() = 1', + # Cases without the equals but other assignments. + 'with x as foo(): pass', + 'del bar, 1', + 'for x, 1 in []: pass', + 'for (not 1) in []: pass', + '[x for 1 in y]', + '[x for a, 3 in y]', + '(x for 1 in y)', + '{x for 1 in y}', + '{x:x for 1 in y}', + # Unicode/Bytes issues. + r'u"\x"', + r'u"\"', + r'u"\u"', + r'u"""\U"""', + r'u"\Uffffffff"', + r"u'''\N{}'''", + r"u'\N{foo}'", + r'b"\x"', + r'b"\"', + '*a, *b = 3, 3', + 'async def foo(): yield from []', + 'yield from []', + '*a = 3', + 'del *a, b', + 'def x(*): pass', + '(%s *d) = x' % ('a,' * 256), + '{**{} for a in [1]}', + + # Parser/tokenize.c + r'"""', + r'"', + r"'''", + r"'", + r"\blub", + # IndentationError: too many levels of indentation + build_nested('pass', 100), + + # SyntaxErrors from Python/symtable.c + 'def f(x, x): pass', + 'nonlocal a', + + # IndentationError + ' foo', + 'def x():\n 1\n 2', + 'def x():\n 1\n 2', + 'if 1:\nfoo', + 'if 1: blubb\nif 1:\npass\nTrue and False', + + # f-strings + 'f"{}"', + r'f"{\}"', + 'f"{\'\\\'}"', + 'f"{#}"', + "f'{1!b}'", + "f'{1:{5:{3}}}'", + "f'{'", + "f'{'", + "f'}'", + "f'{\"}'", + "f'{\"}'", + # Now nested parsing + "f'{continue}'", + "f'{1;1}'", + "f'{a;}'", + "f'{b\"\" \"\"}'", +] + +GLOBAL_NONLOCAL_ERROR = [ + dedent(''' + def glob(): + x = 3 + x.z + global x'''), + dedent(''' + def glob(): + x = 3 + global x'''), + dedent(''' + def glob(): + x + global x'''), + dedent(''' + def glob(): + x = 3 + x.z + nonlocal x'''), + dedent(''' + def glob(): + x = 3 + nonlocal x'''), + dedent(''' + def glob(): + x + nonlocal x'''), + # Annotation issues + dedent(''' + def glob(): + x[0]: foo + global x'''), + dedent(''' + def glob(): + x.a: foo + global x'''), + dedent(''' + def glob(): + x: foo + global x'''), + dedent(''' + def glob(): + x: foo = 5 + global x'''), + dedent(''' + def glob(): + x: foo = 5 + x + global x'''), + dedent(''' + def glob(): + global x + x: foo = 3 + '''), + # global/nonlocal + param + dedent(''' + def glob(x): + global x + '''), + dedent(''' + def glob(x): + nonlocal x + '''), + dedent(''' + def x(): + a =3 + def z(): + nonlocal a + a = 3 + nonlocal a + '''), + dedent(''' + def x(): + a = 4 + def y(): + global a + nonlocal a + '''), + # Missing binding of nonlocal + dedent(''' + def x(): + nonlocal a + '''), + dedent(''' + def x(): + def y(): + nonlocal a + '''), + dedent(''' + def x(): + a = 4 + def y(): + global a + print(a) + def z(): + nonlocal a + '''), +] + +if sys.version_info >= (3, 6): + FAILING_EXAMPLES += GLOBAL_NONLOCAL_ERROR +if sys.version_info >= (3, 5): + FAILING_EXAMPLES += [ + # Raises different errors so just ignore them for now. + '[*[] for a in [1]]', + # Raises multiple errors in previous versions. + 'async def bla():\n def x(): await bla()', + ] +if sys.version_info >= (3, 4): + # Before that del None works like del list, it gives a NameError. + FAILING_EXAMPLES.append('del None') +if sys.version_info >= (3,): + FAILING_EXAMPLES += [ + # Unfortunately assigning to False and True do not raise an error in + # 2.x. + '(True,) = x', + '([False], a) = x', + # A symtable error that raises only a SyntaxWarning in Python 2. + 'def x(): from math import *', + # unicode chars in bytes are allowed in python 2 + 'b"ä"', + # combining strings and unicode is allowed in Python 2. + '"s" b""', + '"s" b"" ""', + 'b"" "" b"" ""', + ] +if sys.version_info >= (3, 6): + FAILING_EXAMPLES += [ + # Same as above, but for f-strings. + 'f"s" b""', + 'b"s" f""', + + # f-string expression part cannot include a backslash + r'''f"{'\n'}"''', + ] +FAILING_EXAMPLES.append('[a, 1] += 3') + +if sys.version_info[:2] == (3, 5): + # yields are not allowed in 3.5 async functions. Therefore test them + # separately, here. + FAILING_EXAMPLES += [ + 'async def foo():\n yield x', + 'async def foo():\n yield x', + ] +else: + FAILING_EXAMPLES += [ + 'async def foo():\n yield x\n return 1', + 'async def foo():\n yield x\n return 1', + ] + + +if sys.version_info[:2] <= (3, 4): + # Python > 3.4 this is valid code. + FAILING_EXAMPLES += [ + 'a = *[1], 2', + '(*[1], 2)', + ] + +if sys.version_info[:2] >= (3, 7): + # This is somehow ok in previous versions. + FAILING_EXAMPLES += [ + 'class X(base for base in bases): pass', + ] + +if sys.version_info[:2] < (3, 8): + FAILING_EXAMPLES += [ + # Python/compile.c + dedent('''\ + for a in [1]: + try: + pass + finally: + continue + '''), # 'continue' not supported inside 'finally' clause" + ] + +if sys.version_info[:2] >= (3, 8): + # assignment expressions from issue#89 + FAILING_EXAMPLES += [ + # Case 2 + '(lambda: x := 1)', + '((lambda: x) := 1)', + # Case 3 + '(a[i] := x)', + '((a[i]) := x)', + '(a(i) := x)', + # Case 4 + '(a.b := c)', + '[(i.i:= 0) for ((i), j) in range(5)]', + # Case 5 + '[i:= 0 for i, j in range(5)]', + '[(i:= 0) for ((i), j) in range(5)]', + '[(i:= 0) for ((i), j), in range(5)]', + '[(i:= 0) for ((i), j.i), in range(5)]', + '[[(i:= i) for j in range(5)] for i in range(5)]', + '[i for i, j in range(5) if True or (i:= 1)]', + '[False and (i:= 0) for i, j in range(5)]', + # Case 6 + '[i+1 for i in (i:= range(5))]', + '[i+1 for i in (j:= range(5))]', + '[i+1 for i in (lambda: (j:= range(5)))()]', + # Case 7 + 'class Example:\n [(j := i) for i in range(5)]', + # Not in that issue + '(await a := x)', + '((await a) := x)', + # new discoveries + '((a, b) := (1, 2))', + '([a, b] := [1, 2])', + '({a, b} := {1, 2})', + '({a: b} := {1: 2})', + '(a + b := 1)', + '(True := 1)', + '(False := 1)', + '(None := 1)', + '(__debug__ := 1)', + ] diff --git a/contrib/python/parso/py2/tests/fuzz_diff_parser.py b/contrib/python/parso/py2/tests/fuzz_diff_parser.py new file mode 100644 index 0000000000..6137ab6eb7 --- /dev/null +++ b/contrib/python/parso/py2/tests/fuzz_diff_parser.py @@ -0,0 +1,307 @@ +""" +A script to find bugs in the diff parser. + +This script is extremely useful if changes are made to the diff parser. By +running a few thousand iterations, we can assure that the diff parser is in +good shape. + +Usage: + fuzz_diff_parser.py [--pdb|--ipdb] [-l] [-n=<nr>] [-x=<nr>] random [<path>] + fuzz_diff_parser.py [--pdb|--ipdb] [-l] redo [-o=<nr>] [-p] + fuzz_diff_parser.py -h | --help + +Options: + -h --help Show this screen + -n, --maxtries=<nr> Maximum of random tries [default: 1000] + -x, --changes=<nr> Amount of changes to be done to a file per try [default: 5] + -l, --logging Prints all the logs + -o, --only-last=<nr> Only runs the last n iterations; Defaults to running all + -p, --print-code Print all test diffs + --pdb Launch pdb when error is raised + --ipdb Launch ipdb when error is raised +""" + +from __future__ import print_function +import logging +import sys +import os +import random +import pickle + +import parso +from parso.utils import split_lines +from test.test_diff_parser import _check_error_leaves_nodes + +_latest_grammar = parso.load_grammar(version='3.8') +_python_reserved_strings = tuple( + # Keywords are ususally only interesting in combination with spaces after + # them. We don't put a space before keywords, to avoid indentation errors. + s + (' ' if s.isalpha() else '') + for s in _latest_grammar._pgen_grammar.reserved_syntax_strings.keys() +) +_random_python_fragments = _python_reserved_strings + ( + ' ', '\t', '\n', '\r', '\f', 'f"', 'F"""', "fr'", "RF'''", '"', '"""', "'", + "'''", ';', ' some_random_word ', '\\', '#', +) + + +def find_python_files_in_tree(file_path): + if not os.path.isdir(file_path): + yield file_path + return + for root, dirnames, filenames in os.walk(file_path): + if 'chardet' in root: + # Stuff like chardet/langcyrillicmodel.py is just very slow to + # parse and machine generated, so ignore those. + continue + + for name in filenames: + if name.endswith('.py'): + yield os.path.join(root, name) + + +def _print_copyable_lines(lines): + for line in lines: + line = repr(line)[1:-1] + if line.endswith(r'\n'): + line = line[:-2] + '\n' + print(line, end='') + + +def _get_first_error_start_pos_or_none(module): + error_leaf = _check_error_leaves_nodes(module) + return None if error_leaf is None else error_leaf.start_pos + + +class LineReplacement: + def __init__(self, line_nr, new_line): + self._line_nr = line_nr + self._new_line = new_line + + def apply(self, code_lines): + # print(repr(self._new_line)) + code_lines[self._line_nr] = self._new_line + + +class LineDeletion: + def __init__(self, line_nr): + self.line_nr = line_nr + + def apply(self, code_lines): + del code_lines[self.line_nr] + + +class LineCopy: + def __init__(self, copy_line, insertion_line): + self._copy_line = copy_line + self._insertion_line = insertion_line + + def apply(self, code_lines): + code_lines.insert( + self._insertion_line, + # Use some line from the file. This doesn't feel totally + # random, but for the diff parser it will feel like it. + code_lines[self._copy_line] + ) + + +class FileModification: + @classmethod + def generate(cls, code_lines, change_count, previous_file_modification=None): + if previous_file_modification is not None and random.random() > 0.5: + # We want to keep the previous modifications in some cases to make + # more complex parser issues visible. + code_lines = previous_file_modification.apply(code_lines) + added_modifications = previous_file_modification.modification_list + else: + added_modifications = [] + return cls( + added_modifications + + list(cls._generate_line_modifications(code_lines, change_count)), + # work with changed trees more than with normal ones. + check_original=random.random() > 0.8, + ) + + @staticmethod + def _generate_line_modifications(lines, change_count): + def random_line(include_end=False): + return random.randint(0, len(lines) - (not include_end)) + + lines = list(lines) + for _ in range(change_count): + rand = random.randint(1, 4) + if rand == 1: + if len(lines) == 1: + # We cannot delete every line, that doesn't make sense to + # fuzz and it would be annoying to rewrite everything here. + continue + l = LineDeletion(random_line()) + elif rand == 2: + # Copy / Insertion + # Make it possible to insert into the first and the last line + l = LineCopy(random_line(), random_line(include_end=True)) + elif rand in (3, 4): + # Modify a line in some weird random ways. + line_nr = random_line() + line = lines[line_nr] + column = random.randint(0, len(line)) + random_string = '' + for _ in range(random.randint(1, 3)): + if random.random() > 0.8: + # The lower characters cause way more issues. + unicode_range = 0x1f if random.randint(0, 1) else 0x3000 + random_string += chr(random.randint(0, unicode_range)) + else: + # These insertions let us understand how random + # keyword/operator insertions work. Theoretically this + # could also be done with unicode insertions, but the + # fuzzer is just way more effective here. + random_string += random.choice(_random_python_fragments) + if random.random() > 0.5: + # In this case we insert at a very random place that + # probably breaks syntax. + line = line[:column] + random_string + line[column:] + else: + # Here we have better chances to not break syntax, because + # we really replace the line with something that has + # indentation. + line = ' ' * random.randint(0, 12) + random_string + '\n' + l = LineReplacement(line_nr, line) + l.apply(lines) + yield l + + def __init__(self, modification_list, check_original): + self.modification_list = modification_list + self._check_original = check_original + + def apply(self, code_lines): + changed_lines = list(code_lines) + for modification in self.modification_list: + modification.apply(changed_lines) + return changed_lines + + def run(self, grammar, code_lines, print_code): + code = ''.join(code_lines) + modified_lines = self.apply(code_lines) + modified_code = ''.join(modified_lines) + + if print_code: + if self._check_original: + print('Original:') + _print_copyable_lines(code_lines) + + print('\nModified:') + _print_copyable_lines(modified_lines) + print() + + if self._check_original: + m = grammar.parse(code, diff_cache=True) + start1 = _get_first_error_start_pos_or_none(m) + + grammar.parse(modified_code, diff_cache=True) + + if self._check_original: + # Also check if it's possible to "revert" the changes. + m = grammar.parse(code, diff_cache=True) + start2 = _get_first_error_start_pos_or_none(m) + assert start1 == start2, (start1, start2) + + +class FileTests: + def __init__(self, file_path, test_count, change_count): + self._path = file_path + with open(file_path, errors='replace') as f: + code = f.read() + self._code_lines = split_lines(code, keepends=True) + self._test_count = test_count + self._code_lines = self._code_lines + self._change_count = change_count + self._file_modifications = [] + + def _run(self, grammar, file_modifications, debugger, print_code=False): + try: + for i, fm in enumerate(file_modifications, 1): + fm.run(grammar, self._code_lines, print_code=print_code) + print('.', end='') + sys.stdout.flush() + print() + except Exception: + print("Issue in file: %s" % self._path) + if debugger: + einfo = sys.exc_info() + pdb = __import__(debugger) + pdb.post_mortem(einfo[2]) + raise + + def redo(self, grammar, debugger, only_last, print_code): + mods = self._file_modifications + if only_last is not None: + mods = mods[-only_last:] + self._run(grammar, mods, debugger, print_code=print_code) + + def run(self, grammar, debugger): + def iterate(): + fm = None + for _ in range(self._test_count): + fm = FileModification.generate( + self._code_lines, self._change_count, + previous_file_modification=fm + ) + self._file_modifications.append(fm) + yield fm + + self._run(grammar, iterate(), debugger) + + +def main(arguments): + debugger = 'pdb' if arguments['--pdb'] else \ + 'ipdb' if arguments['--ipdb'] else None + redo_file = os.path.join(os.path.dirname(__file__), 'fuzz-redo.pickle') + + if arguments['--logging']: + root = logging.getLogger() + root.setLevel(logging.DEBUG) + + ch = logging.StreamHandler(sys.stdout) + ch.setLevel(logging.DEBUG) + root.addHandler(ch) + + grammar = parso.load_grammar() + parso.python.diff.DEBUG_DIFF_PARSER = True + if arguments['redo']: + with open(redo_file, 'rb') as f: + file_tests_obj = pickle.load(f) + only_last = arguments['--only-last'] and int(arguments['--only-last']) + file_tests_obj.redo( + grammar, + debugger, + only_last=only_last, + print_code=arguments['--print-code'] + ) + elif arguments['random']: + # A random file is used to do diff parser checks if no file is given. + # This helps us to find errors in a lot of different files. + file_paths = list(find_python_files_in_tree(arguments['<path>'] or '.')) + max_tries = int(arguments['--maxtries']) + tries = 0 + try: + while tries < max_tries: + path = random.choice(file_paths) + print("Checking %s: %s tries" % (path, tries)) + now_tries = min(1000, max_tries - tries) + file_tests_obj = FileTests(path, now_tries, int(arguments['--changes'])) + file_tests_obj.run(grammar, debugger) + tries += now_tries + except Exception: + with open(redo_file, 'wb') as f: + pickle.dump(file_tests_obj, f) + raise + else: + raise NotImplementedError('Command is not implemented') + + +if __name__ == '__main__': + from docopt import docopt + + arguments = docopt(__doc__) + main(arguments) diff --git a/contrib/python/parso/py2/tests/normalizer_issue_files/E10.py b/contrib/python/parso/py2/tests/normalizer_issue_files/E10.py new file mode 100644 index 0000000000..38d7a19043 --- /dev/null +++ b/contrib/python/parso/py2/tests/normalizer_issue_files/E10.py @@ -0,0 +1,51 @@ +for a in 'abc': + for b in 'xyz': + hello(a) # indented with 8 spaces + #: E903:0 + hello(b) # indented with 1 tab +if True: + #: E101:0 + pass + +#: E122+1 +change_2_log = \ +"""Change 2 by slamb@testclient on 2006/04/13 21:46:23 + + creation +""" + +p4change = { + 2: change_2_log, +} + + +class TestP4Poller(unittest.TestCase): + def setUp(self): + self.setUpGetProcessOutput() + return self.setUpChangeSource() + + def tearDown(self): + pass + + +# +if True: + #: E101:0 E101+1:0 + foo(1, + 2) + + +def test_keys(self): + """areas.json - All regions are accounted for.""" + expected = set([ + #: E101:0 + u'Norrbotten', + #: E101:0 + u'V\xe4sterbotten', + ]) + + +if True: + hello(""" + tab at start of this line +""") diff --git a/contrib/python/parso/py2/tests/normalizer_issue_files/E101.py b/contrib/python/parso/py2/tests/normalizer_issue_files/E101.py new file mode 100644 index 0000000000..cc24719873 --- /dev/null +++ b/contrib/python/parso/py2/tests/normalizer_issue_files/E101.py @@ -0,0 +1,137 @@ +# Used to be the file for W191 + +#: E101+1 +if False: + print # indented with 1 tab + +#: E101+1 +y = x == 2 \ + or x == 3 +#: E101+5 +if ( + x == ( + 3 + ) or + y == 4): + pass +#: E101+3 +if x == 2 \ + or y > 1 \ + or x == 3: + pass +#: E101+3 +if x == 2 \ + or y > 1 \ + or x == 3: + pass + +#: E101+1 +if (foo == bar and baz == frop): + pass +#: E101+1 +if (foo == bar and baz == frop): + pass + +#: E101+2 E101+3 +if start[1] > end_col and not ( + over_indent == 4 and indent_next): + assert (0, "E121 continuation line over-" + "indented for visual indent") + + +#: E101+3 +def long_function_name( + var_one, var_two, var_three, + var_four): + hello(var_one) + + +#: E101+2 +if ((row < 0 or self.moduleCount <= row or + col < 0 or self.moduleCount <= col)): + raise Exception("%s,%s - %s" % (row, col, self.moduleCount)) +#: E101+1 E101+2 E101+3 E101+4 E101+5 E101+6 +if bar: + assert ( + start, 'E121 lines starting with a ' + 'closing bracket should be indented ' + "to match that of the opening " + "bracket's line" + ) + +# you want vertical alignment, so use a parens +#: E101+3 +if ((foo.bar("baz") and + foo.bar("frop") + )): + hello("yes") +#: E101+3 +# also ok, but starting to look like LISP +if ((foo.bar("baz") and + foo.bar("frop"))): + hello("yes") +#: E101+1 +if (a == 2 or b == "abc def ghi" "jkl mno"): + assert True +#: E101+2 +if (a == 2 or b == """abc def ghi +jkl mno"""): + assert True +#: E101+1 E101+2 +if length > options.max_line_length: + assert options.max_line_length, \ + "E501 line too long (%d characters)" % length + + +#: E101+1 E101+2 +if os.path.exists(os.path.join(path, PEP8_BIN)): + cmd = ([os.path.join(path, PEP8_BIN)] + + self._pep8_options(targetfile)) +# TODO Tabs in docstrings shouldn't be there, use \t. +''' + multiline string with tab in it''' +# Same here. +'''multiline string + with tabs + and spaces +''' +# Okay +'''sometimes, you just need to go nuts in a multiline string + and allow all sorts of crap + like mixed tabs and spaces + +or trailing whitespace +or long long long long long long long long long long long long long long long long long lines +''' # noqa +# Okay +'''this one + will get no warning +even though the noqa comment is not immediately after the string +''' + foo # noqa + +#: E101+2 +if foo is None and bar is "frop" and \ + blah == 'yeah': + blah = 'yeahnah' + + +#: E101+1 E101+2 E101+3 +if True: + foo( + 1, + 2) + + +#: E101+1 E101+2 E101+3 E101+4 E101+5 +def test_keys(self): + """areas.json - All regions are accounted for.""" + expected = set([ + u'Norrbotten', + u'V\xe4sterbotten', + ]) + + +#: E101+1 +x = [ + 'abc' +] diff --git a/contrib/python/parso/py2/tests/normalizer_issue_files/E11.py b/contrib/python/parso/py2/tests/normalizer_issue_files/E11.py new file mode 100644 index 0000000000..9b97f3980c --- /dev/null +++ b/contrib/python/parso/py2/tests/normalizer_issue_files/E11.py @@ -0,0 +1,60 @@ +if x > 2: + #: E111:2 + hello(x) +if True: + #: E111:5 + print + #: E111:6 + # + #: E111:2 + # what + # Comment is fine +# Comment is also fine + +if False: + pass +print +print +#: E903:0 + print +mimetype = 'application/x-directory' +#: E111:5 + # 'httpd/unix-directory' +create_date = False + + +def start(self): + # foo + #: E111:8 + # bar + if True: # Hello + self.master.start() # Comment + # try: + #: E111:12 + # self.master.start() + # except MasterExit: + #: E111:12 + # self.shutdown() + # finally: + #: E111:12 + # sys.exit() + # Dedent to the first level + #: E111:6 + # error +# Dedent to the base level +#: E111:2 + # Also wrongly indented. +# Indent is correct. + + +def start(self): # Correct comment + if True: + #: E111:0 +# try: + #: E111:0 +# self.master.start() + #: E111:0 +# except MasterExit: + #: E111:0 +# self.shutdown() + self.master.start() # comment diff --git a/contrib/python/parso/py2/tests/normalizer_issue_files/E12_first.py b/contrib/python/parso/py2/tests/normalizer_issue_files/E12_first.py new file mode 100644 index 0000000000..8dc65a5a42 --- /dev/null +++ b/contrib/python/parso/py2/tests/normalizer_issue_files/E12_first.py @@ -0,0 +1,78 @@ +abc = "E121", ( + #: E121:2 + "dent") +abc = "E122", ( + #: E121:0 +"dent") +my_list = [ + 1, 2, 3, + 4, 5, 6, + #: E123 + ] +abc = "E124", ("visual", + "indent_two" + #: E124:14 + ) +abc = "E124", ("visual", + "indent_five" + #: E124:0 +) +a = (123, + #: E124:0 +) +#: E129+1:4 +if (row < 0 or self.moduleCount <= row or + col < 0 or self.moduleCount <= col): + raise Exception("%s,%s - %s" % (row, col, self.moduleCount)) + +abc = "E126", ( + #: E126:12 + "dent") +abc = "E126", ( + #: E126:8 + "dent") +abc = "E127", ("over-", + #: E127:18 + "over-indent") +abc = "E128", ("visual", + #: E128:4 + "hanging") +abc = "E128", ("under-", + #: E128:14 + "under-indent") + + +my_list = [ + 1, 2, 3, + 4, 5, 6, + #: E123:5 + ] +result = { + #: E121:3 + 'key1': 'value', + #: E121:3 + 'key2': 'value', +} +rv.update(dict.fromkeys(( + 'qualif_nr', 'reasonComment_en', 'reasonComment_fr', + 'reasonComment_de', 'reasonComment_it'), + #: E128:10 + '?'), + "foo") + +abricot = 3 + \ + 4 + \ + 5 + 6 +abc = "hello", ( + + "there", + #: E126:5 + # "john", + "dude") +part = set_mimetype(( + a.get('mime_type', 'text')), + 'default') +part = set_mimetype(( + a.get('mime_type', 'text')), + #: E127:21 + 'default') diff --git a/contrib/python/parso/py2/tests/normalizer_issue_files/E12_not_first.py b/contrib/python/parso/py2/tests/normalizer_issue_files/E12_not_first.py new file mode 100644 index 0000000000..fc3b5f9339 --- /dev/null +++ b/contrib/python/parso/py2/tests/normalizer_issue_files/E12_not_first.py @@ -0,0 +1,356 @@ +# The issue numbers described in this file are part of the pycodestyle tracker +# and not of parso. +# Originally there were no issues in here, I (dave) added the ones that were +# necessary and IMO useful. +if ( + x == ( + 3 + ) or + y == 4): + pass + +y = x == 2 \ + or x == 3 + +#: E129+1:4 +if x == 2 \ + or y > 1 \ + or x == 3: + pass + +if x == 2 \ + or y > 1 \ + or x == 3: + pass + + +if (foo == bar and + baz == frop): + pass + +#: E129+1:4 E129+2:4 E123+3 +if ( + foo == bar and + baz == frop +): + pass + +if ( + foo == bar and + baz == frop + #: E129:4 + ): + pass + +a = ( +) + +a = (123, + ) + + +if start[1] > end_col and not ( + over_indent == 4 and indent_next): + assert (0, "E121 continuation line over-" + "indented for visual indent") + + +abc = "OK", ("visual", + "indent") + +abc = "Okay", ("visual", + "indent_three" + ) + +abc = "a-ok", ( + "there", + "dude", +) + +abc = "hello", ( + "there", + "dude") + +abc = "hello", ( + + "there", + # "john", + "dude") + +abc = "hello", ( + "there", "dude") + +abc = "hello", ( + "there", "dude", +) + +# Aligned with opening delimiter +foo = long_function_name(var_one, var_two, + var_three, var_four) + +# Extra indentation is not necessary. +foo = long_function_name( + var_one, var_two, + var_three, var_four) + + +arm = 'AAA' \ + 'BBB' \ + 'CCC' + +bbb = 'AAA' \ + 'BBB' \ + 'CCC' + +cc = ('AAA' + 'BBB' + 'CCC') + +cc = {'text': 'AAA' + 'BBB' + 'CCC'} + +cc = dict(text='AAA' + 'BBB') + +sat = 'AAA' \ + 'BBB' \ + 'iii' \ + 'CCC' + +abricot = (3 + + 4 + + 5 + 6) + +#: E122+1:4 +abricot = 3 + \ + 4 + \ + 5 + 6 + +part = [-1, 2, 3, + 4, 5, 6] + +#: E128+1:8 +part = [-1, (2, 3, + 4, 5, 6), 7, + 8, 9, 0] + +fnct(1, 2, 3, + 4, 5, 6) + +fnct(1, 2, 3, + 4, 5, 6, + 7, 8, 9, + 10, 11) + + +def long_function_name( + var_one, var_two, var_three, + var_four): + hello(var_one) + + +if ((row < 0 or self.moduleCount <= row or + col < 0 or self.moduleCount <= col)): + raise Exception("%s,%s - %s" % (row, col, self.moduleCount)) + + +result = { + 'foo': [ + 'bar', { + 'baz': 'frop', + } + ] +} + + +foo = my.func({ + "foo": "bar", +}, "baz") + + +fooff(aaaa, + cca( + vvv, + dadd + ), fff, + ggg) + +fooff(aaaa, + abbb, + cca( + vvv, + aaa, + dadd), + "visual indentation is not a multiple of four",) + +if bar: + assert ( + start, 'E121 lines starting with a ' + 'closing bracket should be indented ' + "to match that of the opening " + "bracket's line" + ) + +# you want vertical alignment, so use a parens +if ((foo.bar("baz") and + foo.bar("frop") + )): + hello("yes") + +# also ok, but starting to look like LISP +if ((foo.bar("baz") and + foo.bar("frop"))): + hello("yes") + +#: E129+1:4 E127+2:9 +if (a == 2 or + b == "abc def ghi" + "jkl mno"): + assert True + +#: E129+1:4 +if (a == 2 or + b == """abc def ghi +jkl mno"""): + assert True + +if length > options.max_line_length: + assert options.max_line_length, \ + "E501 line too long (%d characters)" % length + + +# blub + + +asd = 'l.{line}\t{pos}\t{name}\t{text}'.format( + line=token[2][0], + pos=pos, + name=tokenize.tok_name[token[0]], + text=repr(token[1]), +) + +#: E121+1:6 E121+2:6 +hello('%-7d %s per second (%d total)' % ( + options.counters[key] / elapsed, key, + options.counters[key])) + + +if os.path.exists(os.path.join(path, PEP8_BIN)): + cmd = ([os.path.join(path, PEP8_BIN)] + + self._pep8_options(targetfile)) + + +fixed = (re.sub(r'\t+', ' ', target[c::-1], 1)[::-1] + + target[c + 1:]) + +fixed = ( + re.sub(r'\t+', ' ', target[c::-1], 1)[::-1] + + target[c + 1:] +) + + +if foo is None and bar is "frop" and \ + blah == 'yeah': + blah = 'yeahnah' + + +"""This is a multi-line + docstring.""" + + +if blah: + # is this actually readable? :) + multiline_literal = """ +while True: + if True: + 1 +""".lstrip() + multiline_literal = ( + """ +while True: + if True: + 1 +""".lstrip() + ) + multiline_literal = ( + """ +while True: + if True: + 1 +""" + .lstrip() + ) + + +if blah: + multiline_visual = (""" +while True: + if True: + 1 +""" + .lstrip()) + + +rv = {'aaa': 42} +rv.update(dict.fromkeys(( + #: E121:4 E121+1:4 + 'qualif_nr', 'reasonComment_en', 'reasonComment_fr', + 'reasonComment_de', 'reasonComment_it'), '?')) + +rv.update(dict.fromkeys(('qualif_nr', 'reasonComment_en', + 'reasonComment_fr', 'reasonComment_de', + 'reasonComment_it'), '?')) + +#: E128+1:10 +rv.update(dict.fromkeys(('qualif_nr', 'reasonComment_en', 'reasonComment_fr', + 'reasonComment_de', 'reasonComment_it'), '?')) + + +rv.update(dict.fromkeys( + ('qualif_nr', 'reasonComment_en', 'reasonComment_fr', + 'reasonComment_de', 'reasonComment_it'), '?' + ), "foo", context={ + 'alpha': 4, 'beta': 53242234, 'gamma': 17, + }) + + +rv.update( + dict.fromkeys(( + 'qualif_nr', 'reasonComment_en', 'reasonComment_fr', + 'reasonComment_de', 'reasonComment_it'), '?'), + "foo", + context={ + 'alpha': 4, 'beta': 53242234, 'gamma': 17, + }, +) + + +event_obj.write(cursor, user_id, { + 'user': user, + 'summary': text, + 'data': data, + }) + +event_obj.write(cursor, user_id, { + 'user': user, + 'summary': text, + 'data': {'aaa': 1, 'bbb': 2}, + }) + +event_obj.write(cursor, user_id, { + 'user': user, + 'summary': text, + 'data': { + 'aaa': 1, + 'bbb': 2}, + }) + +event_obj.write(cursor, user_id, { + 'user': user, + 'summary': text, + 'data': {'timestamp': now, 'content': { + 'aaa': 1, + 'bbb': 2 + }}, + }) diff --git a/contrib/python/parso/py2/tests/normalizer_issue_files/E12_not_second.py b/contrib/python/parso/py2/tests/normalizer_issue_files/E12_not_second.py new file mode 100644 index 0000000000..e7c18e0ec0 --- /dev/null +++ b/contrib/python/parso/py2/tests/normalizer_issue_files/E12_not_second.py @@ -0,0 +1,294 @@ + +def qualify_by_address( + self, cr, uid, ids, context=None, + params_to_check=frozenset(QUALIF_BY_ADDRESS_PARAM)): + """ This gets called by the web server """ + + +def qualify_by_address(self, cr, uid, ids, context=None, + params_to_check=frozenset(QUALIF_BY_ADDRESS_PARAM)): + """ This gets called by the web server """ + + +_ipv4_re = re.compile('^(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.' + '(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.' + '(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.' + '(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$') + + +fct(""" + AAA """ + status_2_string) + + +if context: + msg = """\ +action: GET-CONFIG +payload: + ip_address: "%(ip)s" + username: "%(username)s" +""" % context + + +if context: + msg = """\ +action: \ +GET-CONFIG +""" % context + + +if context: + #: E122+2:0 + msg = """\ +action: """\ +"""GET-CONFIG +""" % context + + +def unicode2html(s): + """Convert the characters &<>'" in string s to HTML-safe sequences. + Convert newline to <br> too.""" + #: E127+1:28 + return unicode((s or '').replace('&', '&') + .replace('\n', '<br>\n')) + + +parser.add_option('--count', action='store_true', + help="print total number of errors and warnings " + "to standard error and set exit code to 1 if " + "total is not null") + +parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE, + help="exclude files or directories which match these " + "comma separated patterns (default: %s)" % + DEFAULT_EXCLUDE) + +add_option('--count', + #: E135+1 + help="print total number of errors " + "to standard error total is not null") + +add_option('--count', + #: E135+2:11 + help="print total number of errors " + "to standard error " + "total is not null") + + +help = ("print total number of errors " + + "to standard error") + +help = "print total number of errors " \ + "to standard error" + +help = u"print total number of errors " \ + u"to standard error" + +help = b"print total number of errors " \ + b"to standard error" + +#: E122+1:5 +help = br"print total number of errors " \ + br"to standard error" + +d = dict('foo', help="exclude files or directories which match these " + #: E135:9 + "comma separated patterns (default: %s)" % DEFAULT_EXCLUDE) + +d = dict('foo', help=u"exclude files or directories which match these " + u"comma separated patterns (default: %s)" + % DEFAULT_EXCLUDE) + +#: E135+1:9 E135+2:9 +d = dict('foo', help=b"exclude files or directories which match these " + b"comma separated patterns (default: %s)" + % DEFAULT_EXCLUDE) + +d = dict('foo', help=br"exclude files or directories which match these " + br"comma separated patterns (default: %s)" % + DEFAULT_EXCLUDE) + +d = dict('foo', + help="exclude files or directories which match these " + "comma separated patterns (default: %s)" % + DEFAULT_EXCLUDE) + +d = dict('foo', + help="exclude files or directories which match these " + "comma separated patterns (default: %s, %s)" % + (DEFAULT_EXCLUDE, DEFAULT_IGNORE) + ) + +d = dict('foo', + help="exclude files or directories which match these " + "comma separated patterns (default: %s, %s)" % + # who knows what might happen here? + (DEFAULT_EXCLUDE, DEFAULT_IGNORE) + ) + +# parens used to allow the indenting. +troublefree_hash = { + "hash": "value", + "long": ("the quick brown fox jumps over the lazy dog before doing a " + "somersault"), + "long key that tends to happen more when you're indented": ( + "stringwithalongtoken you don't want to break" + ), +} + +# another accepted form +troublefree_hash = { + "hash": "value", + "long": "the quick brown fox jumps over the lazy dog before doing " + "a somersault", + ("long key that tends to happen more " + "when you're indented"): "stringwithalongtoken you don't want to break", +} +# confusing but accepted... don't do that +troublesome_hash = { + "hash": "value", + "long": "the quick brown fox jumps over the lazy dog before doing a " + #: E135:4 + "somersault", + "longer": + "the quick brown fox jumps over the lazy dog before doing a " + "somersaulty", + "long key that tends to happen more " + "when you're indented": "stringwithalongtoken you don't want to break", +} + +d = dict('foo', + help="exclude files or directories which match these " + "comma separated patterns (default: %s)" % + DEFAULT_EXCLUDE + ) +d = dict('foo', + help="exclude files or directories which match these " + "comma separated patterns (default: %s)" % DEFAULT_EXCLUDE, + foobar="this clearly should work, because it is at " + "the right indent level", + ) + +rv.update(dict.fromkeys( + ('qualif_nr', 'reasonComment_en', 'reasonComment_fr', + 'reasonComment_de', 'reasonComment_it'), + '?'), "foo", + context={'alpha': 4, 'beta': 53242234, 'gamma': 17}) + + +def f(): + try: + if not Debug: + hello(''' +If you would like to see debugging output, +try: %s -d5 +''' % sys.argv[0]) + + +# The try statement above was not finished. +#: E901 +d = { # comment + 1: 2 +} + +# issue 138 (we won't allow this in parso) +#: E126+2:9 +[ + 12, # this is a multi-line inline + # comment +] +# issue 151 +#: E122+1:3 +if a > b and \ + c > d: + moo_like_a_cow() + +my_list = [ + 1, 2, 3, + 4, 5, 6, +] + +my_list = [1, 2, 3, + 4, 5, 6, + ] + +result = some_function_that_takes_arguments( + 'a', 'b', 'c', + 'd', 'e', 'f', +) + +result = some_function_that_takes_arguments('a', 'b', 'c', + 'd', 'e', 'f', + ) + +# issue 203 +dica = { + ('abc' + 'def'): ( + 'abc'), +} + +(abcdef[0] + [1]) = ( + 'abc') + +('abc' + 'def') == ( + 'abc') + +# issue 214 +bar( + 1).zap( + 2) + +bar( + 1).zap( + 2) + +if True: + + def example_issue254(): + return [node.copy( + ( + replacement + # First, look at all the node's current children. + for child in node.children + # Replace them. + for replacement in replace(child) + ), + dict(name=token.undefined) + )] + + +def valid_example(): + return [node.copy(properties=dict( + (key, val if val is not None else token.undefined) + for key, val in node.items() + ))] + + +foo([ + 'bug' +]) + +# issue 144, finally! +some_hash = { + "long key that tends to happen more when you're indented": + "stringwithalongtoken you don't want to break", +} + +{ + 1: + 999999 if True + else 0, +} + + +abc = dedent( + ''' + mkdir -p ./{build}/ + mv ./build/ ./{build}/%(revision)s/ + '''.format( + build='build', + # more stuff + ) +) diff --git a/contrib/python/parso/py2/tests/normalizer_issue_files/E12_second.py b/contrib/python/parso/py2/tests/normalizer_issue_files/E12_second.py new file mode 100644 index 0000000000..5488ea40eb --- /dev/null +++ b/contrib/python/parso/py2/tests/normalizer_issue_files/E12_second.py @@ -0,0 +1,195 @@ +if True: + result = some_function_that_takes_arguments( + 'a', 'b', 'c', + 'd', 'e', 'f', + #: E123:0 +) +#: E122+1 +if some_very_very_very_long_variable_name or var \ +or another_very_long_variable_name: + raise Exception() +#: E122+1 +if some_very_very_very_long_variable_name or var[0] \ +or another_very_long_variable_name: + raise Exception() +if True: + #: E122+1 + if some_very_very_very_long_variable_name or var \ + or another_very_long_variable_name: + raise Exception() +if True: + #: E122+1 + if some_very_very_very_long_variable_name or var[0] \ + or another_very_long_variable_name: + raise Exception() + +#: E901+1:8 +dictionary = [ + "is": { + # Might be a E122:4, but is not because the code is invalid Python. + "nested": yes(), + }, +] +setup('', + scripts=[''], + classifiers=[ + #: E121:6 + 'Development Status :: 4 - Beta', + 'Environment :: Console', + 'Intended Audience :: Developers', + ]) + + +#: E123+2:4 E291:15 +abc = "E123", ( + "bad", "hanging", "close" + ) + +result = { + 'foo': [ + 'bar', { + 'baz': 'frop', + #: E123 + } + #: E123 + ] + #: E123 + } +result = some_function_that_takes_arguments( + 'a', 'b', 'c', + 'd', 'e', 'f', + #: E123 + ) +my_list = [1, 2, 3, + 4, 5, 6, + #: E124:0 +] +my_list = [1, 2, 3, + 4, 5, 6, + #: E124:19 + ] +#: E124+2 +result = some_function_that_takes_arguments('a', 'b', 'c', + 'd', 'e', 'f', +) +fooff(aaaa, + cca( + vvv, + dadd + ), fff, + #: E124:0 +) +fooff(aaaa, + ccaaa( + vvv, + dadd + ), + fff, + #: E124:0 +) +d = dict('foo', + help="exclude files or directories which match these " + "comma separated patterns (default: %s)" % DEFAULT_EXCLUDE + #: E124:14 + ) + +if line_removed: + self.event(cr, uid, + #: E128:8 + name="Removing the option for contract", + #: E128:8 + description="contract line has been removed", + #: E124:8 + ) + +#: E129+1:4 +if foo is None and bar is "frop" and \ + blah == 'yeah': + blah = 'yeahnah' + + +#: E129+1:4 E129+2:4 +def long_function_name( + var_one, var_two, var_three, + var_four): + hello(var_one) + + +def qualify_by_address( + #: E129:4 E129+1:4 + self, cr, uid, ids, context=None, + params_to_check=frozenset(QUALIF_BY_ADDRESS_PARAM)): + """ This gets called by the web server """ + + +#: E129+1:4 E129+2:4 +if (a == 2 or + b == "abc def ghi" + "jkl mno"): + True + +my_list = [ + 1, 2, 3, + 4, 5, 6, + #: E123:8 + ] + +abris = 3 + \ + 4 + \ + 5 + 6 + +fixed = re.sub(r'\t+', ' ', target[c::-1], 1)[::-1] + \ + target[c + 1:] + +rv.update(dict.fromkeys(( + 'qualif_nr', 'reasonComment_en', 'reasonComment_fr', + #: E121:12 + 'reasonComment_de', 'reasonComment_it'), + '?'), + #: E128:4 + "foo") +#: E126+1:8 +eat_a_dict_a_day({ + "foo": "bar", +}) +#: E129+1:4 +if ( + x == ( + 3 + #: E129:4 + ) or + y == 4): + pass +#: E129+1:4 E121+2:8 E129+3:4 +if ( + x == ( + 3 + ) or + x == ( + # This one has correct indentation. + 3 + #: E129:4 + ) or + y == 4): + pass +troublesome_hash = { + "hash": "value", + #: E135+1:8 + "long": "the quick brown fox jumps over the lazy dog before doing a " + "somersault", +} + +# Arguments on first line forbidden when not using vertical alignment +#: E128+1:4 +foo = long_function_name(var_one, var_two, + var_three, var_four) + +#: E128+1:4 +hello('l.%s\t%s\t%s\t%r' % + (token[2][0], pos, tokenize.tok_name[token[0]], token[1])) + + +def qualify_by_address(self, cr, uid, ids, context=None, + #: E128:8 + params_to_check=frozenset(QUALIF_BY_ADDRESS_PARAM)): + """ This gets called by the web server """ diff --git a/contrib/python/parso/py2/tests/normalizer_issue_files/E12_third.py b/contrib/python/parso/py2/tests/normalizer_issue_files/E12_third.py new file mode 100644 index 0000000000..26697fed73 --- /dev/null +++ b/contrib/python/parso/py2/tests/normalizer_issue_files/E12_third.py @@ -0,0 +1,116 @@ +#: E128+1 +foo(1, 2, 3, +4, 5, 6) +#: E128+1:1 +foo(1, 2, 3, + 4, 5, 6) +#: E128+1:2 +foo(1, 2, 3, + 4, 5, 6) +#: E128+1:3 +foo(1, 2, 3, + 4, 5, 6) +foo(1, 2, 3, + 4, 5, 6) +#: E127+1:5 +foo(1, 2, 3, + 4, 5, 6) +#: E127+1:6 +foo(1, 2, 3, + 4, 5, 6) +#: E127+1:7 +foo(1, 2, 3, + 4, 5, 6) +#: E127+1:8 +foo(1, 2, 3, + 4, 5, 6) +#: E127+1:9 +foo(1, 2, 3, + 4, 5, 6) +#: E127+1:10 +foo(1, 2, 3, + 4, 5, 6) +#: E127+1:11 +foo(1, 2, 3, + 4, 5, 6) +#: E127+1:12 +foo(1, 2, 3, + 4, 5, 6) +#: E127+1:13 +foo(1, 2, 3, + 4, 5, 6) +if line_removed: + #: E128+1:14 E128+2:14 + self.event(cr, uid, + name="Removing the option for contract", + description="contract line has been removed", + ) + +if line_removed: + self.event(cr, uid, + #: E127:16 + name="Removing the option for contract", + #: E127:16 + description="contract line has been removed", + #: E124:16 + ) +rv.update(d=('a', 'b', 'c'), + #: E127:13 + e=42) + +#: E135+2:17 +rv.update(d=('a' + 'b', 'c'), + e=42, f=42 + + 42) +rv.update(d=('a' + 'b', 'c'), + e=42, f=42 + + 42) +#: E127+1:26 +input1 = {'a': {'calc': 1 + 2}, 'b': 1 + + 42} +#: E128+2:17 +rv.update(d=('a' + 'b', 'c'), + e=42, f=(42 + + 42)) + +if True: + def example_issue254(): + #: + return [node.copy( + ( + #: E121:16 E121+3:20 + replacement + # First, look at all the node's current children. + for child in node.children + for replacement in replace(child) + ), + dict(name=token.undefined) + )] +# TODO multiline docstring are currently not handled. E125+1:4? +if (""" + """): + pass + +# TODO same +for foo in """ + abc + 123 + """.strip().split(): + hello(foo) +abc = dedent( + ''' + mkdir -p ./{build}/ + mv ./build/ ./{build}/%(revision)s/ + '''.format( + #: E121:4 E121+1:4 E123+2:0 + build='build', + # more stuff +) +) +#: E701+1: E122+1 +if True:\ +hello(True) + +#: E128+1 +foobar(a +, end=' ') diff --git a/contrib/python/parso/py2/tests/normalizer_issue_files/E20.py b/contrib/python/parso/py2/tests/normalizer_issue_files/E20.py new file mode 100644 index 0000000000..44986fa963 --- /dev/null +++ b/contrib/python/parso/py2/tests/normalizer_issue_files/E20.py @@ -0,0 +1,52 @@ +#: E201:5 +spam( ham[1], {eggs: 2}) +#: E201:9 +spam(ham[ 1], {eggs: 2}) +#: E201:14 +spam(ham[1], { eggs: 2}) + +# Okay +spam(ham[1], {eggs: 2}) + + +#: E202:22 +spam(ham[1], {eggs: 2} ) +#: E202:21 +spam(ham[1], {eggs: 2 }) +#: E202:10 +spam(ham[1 ], {eggs: 2}) +# Okay +spam(ham[1], {eggs: 2}) + +result = func( + arg1='some value', + arg2='another value', +) + +result = func( + arg1='some value', + arg2='another value' +) + +result = [ + item for item in items + if item > 5 +] + +#: E203:9 +if x == 4 : + foo(x, y) + x, y = y, x +if x == 4: + #: E203:12 E702:13 + a = x, y ; x, y = y, x +if x == 4: + foo(x, y) + #: E203:12 + x, y = y , x +# Okay +if x == 4: + foo(x, y) + x, y = y, x +a[b1, :1] == 3 +b = a[:, b1] diff --git a/contrib/python/parso/py2/tests/normalizer_issue_files/E21.py b/contrib/python/parso/py2/tests/normalizer_issue_files/E21.py new file mode 100644 index 0000000000..f65616e8ab --- /dev/null +++ b/contrib/python/parso/py2/tests/normalizer_issue_files/E21.py @@ -0,0 +1,16 @@ +#: E211:4 +spam (1) +#: E211:4 E211:19 +dict ['key'] = list [index] +#: E211:11 +dict['key'] ['subkey'] = list[index] +# Okay +spam(1) +dict['key'] = list[index] + + +# This is not prohibited by PEP8, but avoid it. +# Dave: I think this is extremely stupid. Use the same convention everywhere. +#: E211:9 +class Foo (Bar, Baz): + pass diff --git a/contrib/python/parso/py2/tests/normalizer_issue_files/E22.py b/contrib/python/parso/py2/tests/normalizer_issue_files/E22.py new file mode 100644 index 0000000000..82ff6a440a --- /dev/null +++ b/contrib/python/parso/py2/tests/normalizer_issue_files/E22.py @@ -0,0 +1,156 @@ +a = 12 + 3 +#: E221:5 E229:8 +b = 4 + 5 +#: E221:1 +x = 1 +#: E221:1 +y = 2 +long_variable = 3 +#: E221:4 +x[0] = 1 +#: E221:4 +x[1] = 2 +long_variable = 3 +#: E221:8 E229:19 +x = f(x) + 1 +y = long_variable + 2 +#: E221:8 E229:19 +z = x[0] + 3 +#: E221+2:13 +text = """ + bar + foo %s""" % rofl +# Okay +x = 1 +y = 2 +long_variable = 3 + + +#: E221:7 +a = a + 1 +b = b + 10 +#: E221:3 +x = -1 +#: E221:3 +y = -2 +long_variable = 3 +#: E221:6 +x[0] = 1 +#: E221:6 +x[1] = 2 +long_variable = 3 + + +#: E223+1:1 +foobart = 4 +a = 3 # aligned with tab + + +#: E223:4 +a += 1 +b += 1000 + + +#: E225:12 +submitted +=1 +#: E225:9 +submitted+= 1 +#: E225:3 +c =-1 +#: E229:7 +x = x /2 - 1 +#: E229:11 +c = alpha -4 +#: E229:10 +c = alpha- 4 +#: E229:8 +z = x **y +#: E229:14 +z = (x + 1) **y +#: E229:13 +z = (x + 1)** y +#: E227:14 +_1kB = _1MB >>10 +#: E227:11 +_1kB = _1MB>> 10 +#: E225:1 E225:2 E229:4 +i=i+ 1 +#: E225:1 E225:2 E229:5 +i=i +1 +#: E225:1 E225:2 +i=i+1 +#: E225:3 +i =i+1 +#: E225:1 +i= i+1 +#: E229:8 +c = (a +b)*(a - b) +#: E229:7 +c = (a+ b)*(a - b) + +z = 2//30 +c = (a+b) * (a-b) +x = x*2 - 1 +x = x/2 - 1 +# TODO whitespace should be the other way around according to pep8. +x = x / 2-1 + +hypot2 = x*x + y*y +c = (a + b)*(a - b) + + +def halves(n): + return (i//2 for i in range(n)) + + +#: E227:11 E227:13 +_1kB = _1MB>>10 +#: E227:11 E227:13 +_1MB = _1kB<<10 +#: E227:5 E227:6 +a = b|c +#: E227:5 E227:6 +b = c&a +#: E227:5 E227:6 +c = b^a +#: E228:5 E228:6 +a = b%c +#: E228:9 E228:10 +msg = fmt%(errno, errmsg) +#: E228:25 E228:26 +msg = "Error %d occurred"%errno + +#: E228:7 +a = b %c +a = b % c + +# Okay +i = i + 1 +submitted += 1 +x = x * 2 - 1 +hypot2 = x * x + y * y +c = (a + b) * (a - b) +_1MiB = 2 ** 20 +_1TiB = 2**30 +foo(bar, key='word', *args, **kwargs) +baz(**kwargs) +negative = -1 +spam(-1) +-negative +func1(lambda *args, **kw: (args, kw)) +func2(lambda a, b=h[:], c=0: (a, b, c)) +if not -5 < x < +5: + #: E227:12 + print >>sys.stderr, "x is out of range." +print >> sys.stdout, "x is an integer." +x = x / 2 - 1 + + +def squares(n): + return (i**2 for i in range(n)) + + +ENG_PREFIXES = { + -6: "\u03bc", # Greek letter mu + -3: "m", +} diff --git a/contrib/python/parso/py2/tests/normalizer_issue_files/E23.py b/contrib/python/parso/py2/tests/normalizer_issue_files/E23.py new file mode 100644 index 0000000000..47f1447a23 --- /dev/null +++ b/contrib/python/parso/py2/tests/normalizer_issue_files/E23.py @@ -0,0 +1,16 @@ +#: E231:7 +a = (1,2) +#: E231:5 +a[b1,:] +#: E231:10 +a = [{'a':''}] +# Okay +a = (4,) +#: E202:7 +b = (5, ) +c = {'text': text[5:]} + +result = { + 'key1': 'value', + 'key2': 'value', +} diff --git a/contrib/python/parso/py2/tests/normalizer_issue_files/E25.py b/contrib/python/parso/py2/tests/normalizer_issue_files/E25.py new file mode 100644 index 0000000000..8cf53147f7 --- /dev/null +++ b/contrib/python/parso/py2/tests/normalizer_issue_files/E25.py @@ -0,0 +1,36 @@ +#: E251:11 E251:13 +def foo(bar = False): + '''Test function with an error in declaration''' + pass + + +#: E251:8 +foo(bar= True) +#: E251:7 +foo(bar =True) +#: E251:7 E251:9 +foo(bar = True) +#: E251:13 +y = bar(root= "sdasd") +parser.add_argument('--long-option', + #: E135+1:20 + default= + "/rather/long/filesystem/path/here/blah/blah/blah") +parser.add_argument('--long-option', + default= + "/rather/long/filesystem") +# TODO this looks so stupid. +parser.add_argument('--long-option', default + ="/rather/long/filesystem/path/here/blah/blah/blah") +#: E251+2:7 E251+2:9 +foo(True, + baz=(1, 2), + biz = 'foo' + ) +# Okay +foo(bar=(1 == 1)) +foo(bar=(1 != 1)) +foo(bar=(1 >= 1)) +foo(bar=(1 <= 1)) +(options, args) = parser.parse_args() +d[type(None)] = _deepcopy_atomic diff --git a/contrib/python/parso/py2/tests/normalizer_issue_files/E26.py b/contrib/python/parso/py2/tests/normalizer_issue_files/E26.py new file mode 100644 index 0000000000..4774852a07 --- /dev/null +++ b/contrib/python/parso/py2/tests/normalizer_issue_files/E26.py @@ -0,0 +1,78 @@ +#: E261:4 +pass # an inline comment +#: E261:4 +pass# an inline comment + +# Okay +pass # an inline comment +pass # an inline comment +#: E262:11 +x = x + 1 #Increment x +#: E262:11 +x = x + 1 # Increment x +#: E262:11 +x = y + 1 #: Increment x +#: E265 +#Block comment +a = 1 +#: E265+1 +m = 42 +#! This is important +mx = 42 - 42 + +# Comment without anything is not an issue. +# +# However if there are comments at the end without anything it obviously +# doesn't make too much sense. +#: E262:9 +foo = 1 # + + +#: E266+2:4 E266+5:4 +def how_it_feel(r): + + ### This is a variable ### + a = 42 + + ### Of course it is unused + return + + +#: E266 E266+1 +##if DEBUG: +## logging.error() +#: E266 +######################################### + +# Not at the beginning of a file +#: E265 +#!/usr/bin/env python + +# Okay + +pass # an inline comment +x = x + 1 # Increment x +y = y + 1 #: Increment x + +# Block comment +a = 1 + +# Block comment1 + +# Block comment2 +aaa = 1 + + +# example of docstring (not parsed) +def oof(): + """ + #foo not parsed + """ + + ########################################################################### + # A SEPARATOR # + ########################################################################### + + # ####################################################################### # + # ########################## another separator ########################## # + # ####################################################################### # diff --git a/contrib/python/parso/py2/tests/normalizer_issue_files/E27.py b/contrib/python/parso/py2/tests/normalizer_issue_files/E27.py new file mode 100644 index 0000000000..9149f0aa52 --- /dev/null +++ b/contrib/python/parso/py2/tests/normalizer_issue_files/E27.py @@ -0,0 +1,49 @@ +# Okay +from u import (a, b) +from v import c, d +#: E221:13 +from w import (e, f) +#: E275:13 +from w import(e, f) +#: E275:29 +from importable.module import(e, f) +try: + #: E275:33 + from importable.module import(e, f) +except ImportError: + pass +# Okay +True and False +#: E221:8 +True and False +#: E221:4 +True and False +#: E221:2 +if 1: + pass +# Syntax Error, no indentation +#: E903+1 +if 1: +pass +#: E223:8 +True and False +#: E223:4 E223:9 +True and False +#: E221:5 +a and b +#: E221:5 +1 and b +#: E221:5 +a and 2 +#: E221:1 E221:6 +1 and b +#: E221:1 E221:6 +a and 2 +#: E221:4 +this and False +#: E223:5 +a and b +#: E223:1 +a and b +#: E223:4 E223:9 +this and False diff --git a/contrib/python/parso/py2/tests/normalizer_issue_files/E29.py b/contrib/python/parso/py2/tests/normalizer_issue_files/E29.py new file mode 100644 index 0000000000..cebbb7bba1 --- /dev/null +++ b/contrib/python/parso/py2/tests/normalizer_issue_files/E29.py @@ -0,0 +1,15 @@ +# Okay +# 情 +#: W291:5 +print + + +#: W291+1 +class Foo(object): + + bang = 12 + + +#: W291+1:34 +'''multiline +string with trailing whitespace''' diff --git a/contrib/python/parso/py2/tests/normalizer_issue_files/E30.py b/contrib/python/parso/py2/tests/normalizer_issue_files/E30.py new file mode 100644 index 0000000000..31e241cd44 --- /dev/null +++ b/contrib/python/parso/py2/tests/normalizer_issue_files/E30.py @@ -0,0 +1,177 @@ +#: E301+4 +class X: + + def a(): + pass + def b(): + pass + + +#: E301+5 +class X: + + def a(): + pass + # comment + def b(): + pass + + +# -*- coding: utf-8 -*- +def a(): + pass + + +#: E302+1:0 +"""Main module.""" +def _main(): + pass + + +#: E302+1:0 +foo = 1 +def get_sys_path(): + return sys.path + + +#: E302+3:0 +def a(): + pass + +def b(): + pass + + +#: E302+5:0 +def a(): + pass + +# comment + +def b(): + pass + + +#: E303+3:0 +print + + + +#: E303+3:0 E303+4:0 +print + + + + +print +#: E303+3:0 +print + + + +# comment + +print + + +#: E303+3 E303+6 +def a(): + print + + + # comment + + + # another comment + + print + + +#: E302+2 +a = 3 +#: E304+1 +@decorator + +def function(): + pass + + +#: E303+3 +# something + + + +"""This class docstring comes on line 5. +It gives error E303: too many blank lines (3) +""" + + +#: E302+6 +def a(): + print + + # comment + + # another comment +a() + + +#: E302+7 +def a(): + print + + # comment + + # another comment + +try: + a() +except Exception: + pass + + +#: E302+4 +def a(): + print + +# Two spaces before comments, too. +if a(): + a() + + +#: E301+2 +def a(): + x = 1 + def b(): + pass + + +#: E301+2 E301+4 +def a(): + x = 2 + def b(): + x = 1 + def c(): + pass + + +#: E301+2 E301+4 E301+5 +def a(): + x = 1 + class C: + pass + x = 2 + def b(): + pass + + +#: E302+7 +# Example from https://github.com/PyCQA/pycodestyle/issues/400 +foo = 2 + + +def main(): + blah, blah + +if __name__ == '__main__': + main() diff --git a/contrib/python/parso/py2/tests/normalizer_issue_files/E30not.py b/contrib/python/parso/py2/tests/normalizer_issue_files/E30not.py new file mode 100644 index 0000000000..c0c005ccd2 --- /dev/null +++ b/contrib/python/parso/py2/tests/normalizer_issue_files/E30not.py @@ -0,0 +1,175 @@ +# Okay +class X: + pass +# Okay + + +def foo(): + pass + + +# Okay +# -*- coding: utf-8 -*- +class X: + pass + + +# Okay +# -*- coding: utf-8 -*- +def foo(): + pass + + +# Okay +class X: + + def a(): + pass + + # comment + def b(): + pass + + # This is a + # ... multi-line comment + + def c(): + pass + + +# This is a +# ... multi-line comment + +@some_decorator +class Y: + + def a(): + pass + + # comment + + def b(): + pass + + @property + def c(): + pass + + +try: + from nonexistent import Bar +except ImportError: + class Bar(object): + """This is a Bar replacement""" + + +def with_feature(f): + """Some decorator""" + wrapper = f + if has_this_feature(f): + def wrapper(*args): + call_feature(args[0]) + return f(*args) + return wrapper + + +try: + next +except NameError: + def next(iterator, default): + for item in iterator: + return item + return default + + +def a(): + pass + + +class Foo(): + """Class Foo""" + + def b(): + + pass + + +# comment +def c(): + pass + + +# comment + + +def d(): + pass + +# This is a +# ... multi-line comment + +# And this one is +# ... a second paragraph +# ... which spans on 3 lines + + +# Function `e` is below +# NOTE: Hey this is a testcase + +def e(): + pass + + +def a(): + print + + # comment + + print + + print + +# Comment 1 + +# Comment 2 + + +# Comment 3 + +def b(): + + pass + + +# Okay +def foo(): + pass + + +def bar(): + pass + + +class Foo(object): + pass + + +class Bar(object): + pass + + +if __name__ == '__main__': + foo() +# Okay +classification_errors = None +# Okay +defined_properly = True +# Okay +defaults = {} +defaults.update({}) + + +# Okay +def foo(x): + classification = x + definitely = not classification diff --git a/contrib/python/parso/py2/tests/normalizer_issue_files/E40.py b/contrib/python/parso/py2/tests/normalizer_issue_files/E40.py new file mode 100644 index 0000000000..93a2ccf386 --- /dev/null +++ b/contrib/python/parso/py2/tests/normalizer_issue_files/E40.py @@ -0,0 +1,39 @@ +#: E401:7 +import os, sys +# Okay +import os +import sys + +from subprocess import Popen, PIPE + +from myclass import MyClass +from foo.bar.yourclass import YourClass + +import myclass +import foo.bar.yourclass +# All Okay from here until the definition of VERSION +__all__ = ['abc'] + +import foo +__version__ = "42" + +import foo +__author__ = "Simon Gomizelj" + +import foo +try: + import foo +except ImportError: + pass +else: + hello('imported foo') +finally: + hello('made attempt to import foo') + +import bar +VERSION = '1.2.3' + +#: E402 +import foo +#: E402 +import foo diff --git a/contrib/python/parso/py2/tests/normalizer_issue_files/E50.py b/contrib/python/parso/py2/tests/normalizer_issue_files/E50.py new file mode 100644 index 0000000000..67fd55833c --- /dev/null +++ b/contrib/python/parso/py2/tests/normalizer_issue_files/E50.py @@ -0,0 +1,126 @@ +#: E501:4 +a = '12345678901234567890123456789012345678901234567890123456789012345678901234567890' +#: E501:80 +a = '1234567890123456789012345678901234567890123456789012345678901234567890' or \ + 6 +#: E501+1:80 +a = 7 or \ + '1234567890123456789012345678901234567890123456789012345678901234567890' or \ + 6 +#: E501+1:80 E501+2:80 +a = 7 or \ + '1234567890123456789012345678901234567890123456789012345678901234567890' or \ + '1234567890123456789012345678901234567890123456789012345678901234567890' or \ + 6 +#: E501:78 +a = '1234567890123456789012345678901234567890123456789012345678901234567890' # \ +#: E502:78 +a = ('123456789012345678901234567890123456789012345678901234567890123456789' \ + '01234567890') +#: E502+1:11 +a = ('AAA \ + BBB' \ + 'CCC') +#: E502:38 +if (foo is None and bar is "e000" and \ + blah == 'yeah'): + blah = 'yeahnah' +# +# Okay +a = ('AAA' + 'BBB') + +a = ('AAA \ + BBB' + 'CCC') + +a = 'AAA' \ + 'BBB' \ + 'CCC' + +a = ('AAA\ +BBBBBBBBB\ +CCCCCCCCC\ +DDDDDDDDD') +# +# Okay +if aaa: + pass +elif bbb or \ + ccc: + pass + +ddd = \ + ccc + +('\ + ' + ' \ +') +(''' + ''' + ' \ +') +#: E501:67 E225:21 E225:22 +very_long_identifiers=and_terrible_whitespace_habits(are_no_excuse+for_long_lines) +# +# TODO Long multiline strings are not handled. E501? +'''multiline string +with a long long long long long long long long long long long long long long long long line +''' +#: E501 +'''same thing, but this time without a terminal newline in the string +long long long long long long long long long long long long long long long long line''' +# +# issue 224 (unavoidable long lines in docstrings) +# Okay +""" +I'm some great documentation. Because I'm some great documentation, I'm +going to give you a reference to some valuable information about some API +that I'm calling: + + http://msdn.microsoft.com/en-us/library/windows/desktop/aa363858(v=vs.85).aspx +""" +#: E501 +""" +longnospaceslongnospaceslongnospaceslongnospaceslongnospaceslongnospaceslongnospaceslongnospaces""" + + +# Regression test for #622 +def foo(): + """Lorem ipsum dolor sit amet, consectetur adipiscing elit. Duis pulvinar vitae + """ + + +# Okay +""" +This + almost_empty_line +""" + +""" +This + almost_empty_line +""" +# A basic comment +#: E501 +# with a long long long long long long long long long long long long long long long long line + +# +# Okay +# I'm some great comment. Because I'm so great, I'm going to give you a +# reference to some valuable information about some API that I'm calling: +# +# http://msdn.microsoft.com/en-us/library/windows/desktop/aa363858(v=vs.85).aspx + +x = 3 + +# longnospaceslongnospaceslongnospaceslongnospaceslongnospaceslongnospaceslongnospaceslongnospaces + +# +# Okay +# This +# almost_empty_line + +# +#: E501+1 +# This +# almost_empty_line diff --git a/contrib/python/parso/py2/tests/normalizer_issue_files/E70.py b/contrib/python/parso/py2/tests/normalizer_issue_files/E70.py new file mode 100644 index 0000000000..be11fb1def --- /dev/null +++ b/contrib/python/parso/py2/tests/normalizer_issue_files/E70.py @@ -0,0 +1,25 @@ +#: E701:6 +if a: a = False +#: E701:41 +if not header or header[:6] != 'bytes=': pass +#: E702:9 +a = False; b = True +#: E702:16 E402 +import bdist_egg; bdist_egg.write_safety_flag(cmd.egg_info, safe) +#: E703:12 E402 +import shlex; +#: E702:8 E703:22 +del a[:]; a.append(42); + + +#: E704:10 +def f(x): return 2 + + +#: E704:10 +def f(x): return 2 * x + + +while all is round: + #: E704:14 + def f(x): return 2 * x diff --git a/contrib/python/parso/py2/tests/normalizer_issue_files/E71.py b/contrib/python/parso/py2/tests/normalizer_issue_files/E71.py new file mode 100644 index 0000000000..109dcd6c77 --- /dev/null +++ b/contrib/python/parso/py2/tests/normalizer_issue_files/E71.py @@ -0,0 +1,93 @@ +#: E711:7 +if res == None: + pass +#: E711:7 +if res != None: + pass +#: E711:8 +if None == res: + pass +#: E711:8 +if None != res: + pass +#: E711:10 +if res[1] == None: + pass +#: E711:10 +if res[1] != None: + pass +#: E711:8 +if None != res[1]: + pass +#: E711:8 +if None == res[1]: + pass + +# +#: E712:7 +if res == True: + pass +#: E712:7 +if res != False: + pass +#: E712:8 +if True != res: + pass +#: E712:9 +if False == res: + pass +#: E712:10 +if res[1] == True: + pass +#: E712:10 +if res[1] != False: + pass + +if x is False: + pass + +# +#: E713:9 +if not X in Y: + pass +#: E713:11 +if not X.B in Y: + pass +#: E713:9 +if not X in Y and Z == "zero": + pass +#: E713:24 +if X == "zero" or not Y in Z: + pass + +# +#: E714:9 +if not X is Y: + pass +#: E714:11 +if not X.B is Y: + pass + +# +# Okay +if x not in y: + pass + +if not (X in Y or X is Z): + pass + +if not (X in Y): + pass + +if x is not y: + pass + +if TrueElement.get_element(True) == TrueElement.get_element(False): + pass + +if (True) == TrueElement or x == TrueElement: + pass + +assert (not foo) in bar +assert {'x': not foo} in bar +assert [42, not foo] in bar diff --git a/contrib/python/parso/py2/tests/normalizer_issue_files/E72.py b/contrib/python/parso/py2/tests/normalizer_issue_files/E72.py new file mode 100644 index 0000000000..2e9ef9151d --- /dev/null +++ b/contrib/python/parso/py2/tests/normalizer_issue_files/E72.py @@ -0,0 +1,79 @@ +#: E721:3 +if type(res) == type(42): + pass +#: E721:3 +if type(res) != type(""): + pass + +import types + +if res == types.IntType: + pass + +import types + +#: E721:3 +if type(res) is not types.ListType: + pass +#: E721:7 E721:35 +assert type(res) == type(False) or type(res) == type(None) +#: E721:7 +assert type(res) == type([]) +#: E721:7 +assert type(res) == type(()) +#: E721:7 +assert type(res) == type((0,)) +#: E721:7 +assert type(res) == type((0)) +#: E721:7 +assert type(res) != type((1,)) +#: E721:7 +assert type(res) is type((1,)) +#: E721:7 +assert type(res) is not type((1,)) + +# Okay +#: E402 +import types + +if isinstance(res, int): + pass +if isinstance(res, str): + pass +if isinstance(res, types.MethodType): + pass + +#: E721:3 E721:25 +if type(a) != type(b) or type(a) == type(ccc): + pass +#: E721 +type(a) != type(b) +#: E721 +1 != type(b) +#: E721 +type(b) != 1 +1 != 1 + +try: + pass +#: E722 +except: + pass +try: + pass +except Exception: + pass +#: E722 +except: + pass +# Okay +fake_code = """" +try: + do_something() +except: + pass +""" +try: + pass +except Exception: + pass diff --git a/contrib/python/parso/py2/tests/normalizer_issue_files/E73.py b/contrib/python/parso/py2/tests/normalizer_issue_files/E73.py new file mode 100644 index 0000000000..77e2e9043a --- /dev/null +++ b/contrib/python/parso/py2/tests/normalizer_issue_files/E73.py @@ -0,0 +1,16 @@ +#: E731:4 +f = lambda x: 2 * x +while False: + #: E731:10 + foo = lambda y, z: 2 * x +# Okay +f = object() +f.method = lambda: 'Method' + +f = {} +f['a'] = lambda x: x ** 2 + +f = [] +f.append(lambda x: x ** 2) + +lambda: 'no-op' diff --git a/contrib/python/parso/py2/tests/normalizer_issue_files/LICENSE b/contrib/python/parso/py2/tests/normalizer_issue_files/LICENSE new file mode 100644 index 0000000000..142a508a63 --- /dev/null +++ b/contrib/python/parso/py2/tests/normalizer_issue_files/LICENSE @@ -0,0 +1,29 @@ +Copyright © 2006-2009 Johann C. Rocholl <johann@rocholl.net> +Copyright © 2009-2014 Florent Xicluna <florent.xicluna@gmail.com> +Copyright © 2014-2016 Ian Lee <IanLee1521@gmail.com> +Copyright © 2017-???? Dave Halter <davidhalter88@gmail.com> + +Dave: The files in this folder were ported from pydocstyle and some +modifications where made. + +Licensed under the terms of the Expat License + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation files +(the "Software"), to deal in the Software without restriction, +including without limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of the Software, +and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/contrib/python/parso/py2/tests/normalizer_issue_files/allowed_syntax.py b/contrib/python/parso/py2/tests/normalizer_issue_files/allowed_syntax.py new file mode 100644 index 0000000000..a73b84cfc9 --- /dev/null +++ b/contrib/python/parso/py2/tests/normalizer_issue_files/allowed_syntax.py @@ -0,0 +1,46 @@ +""" +Some syntax errors are a bit complicated and need exact checking. Here we +gather some of the potentially dangerous ones. +""" + +from __future__ import division + +# With a dot it's not a future import anymore. +from .__future__ import absolute_import + +'' '' +''r''u'' +b'' BR'' + + +for x in [1]: + break + continue + +try: + pass +except ZeroDivisionError: + pass + #: E722:0 +except: + pass + +try: + pass + #: E722:0 E901:0 +except: + pass +except ZeroDivisionError: + pass + + +r'\n' +r'\x' +b'\n' + + +a = 3 + + +def x(b=a): + global a diff --git a/contrib/python/parso/py2/tests/normalizer_issue_files/allowed_syntax_python2.py b/contrib/python/parso/py2/tests/normalizer_issue_files/allowed_syntax_python2.py new file mode 100644 index 0000000000..81736bc4f8 --- /dev/null +++ b/contrib/python/parso/py2/tests/normalizer_issue_files/allowed_syntax_python2.py @@ -0,0 +1,2 @@ +'s' b'' +u's' b'ä' diff --git a/contrib/python/parso/py2/tests/normalizer_issue_files/allowed_syntax_python3.4.py b/contrib/python/parso/py2/tests/normalizer_issue_files/allowed_syntax_python3.4.py new file mode 100644 index 0000000000..175957592f --- /dev/null +++ b/contrib/python/parso/py2/tests/normalizer_issue_files/allowed_syntax_python3.4.py @@ -0,0 +1,3 @@ +*foo, a = (1,) +*foo[0], a = (1,) +*[], a = (1,) diff --git a/contrib/python/parso/py2/tests/normalizer_issue_files/allowed_syntax_python3.5.py b/contrib/python/parso/py2/tests/normalizer_issue_files/allowed_syntax_python3.5.py new file mode 100644 index 0000000000..cc0385bdb9 --- /dev/null +++ b/contrib/python/parso/py2/tests/normalizer_issue_files/allowed_syntax_python3.5.py @@ -0,0 +1,23 @@ +""" +Mostly allowed syntax in Python 3.5. +""" + + +async def foo(): + await bar() + #: E901 + yield from [] + return + #: E901 + return '' + + +# With decorator it's a different statement. +@bla +async def foo(): + await bar() + #: E901 + yield from [] + return + #: E901 + return '' diff --git a/contrib/python/parso/py2/tests/normalizer_issue_files/allowed_syntax_python3.6.py b/contrib/python/parso/py2/tests/normalizer_issue_files/allowed_syntax_python3.6.py new file mode 100644 index 0000000000..1bbe071d97 --- /dev/null +++ b/contrib/python/parso/py2/tests/normalizer_issue_files/allowed_syntax_python3.6.py @@ -0,0 +1,45 @@ +foo: int = 4 +(foo): int = 3 +((foo)): int = 3 +foo.bar: int +foo[3]: int + + +def glob(): + global x + y: foo = x + + +def c(): + a = 3 + + def d(): + class X(): + nonlocal a + + +def x(): + a = 3 + + def y(): + nonlocal a + + +def x(): + def y(): + nonlocal a + + a = 3 + + +def x(): + a = 3 + + def y(): + class z(): + nonlocal a + + +a = *args, *args +error[(*args, *args)] = 3 +*args, *args diff --git a/contrib/python/parso/py2/tests/normalizer_issue_files/latin-1.py b/contrib/python/parso/py2/tests/normalizer_issue_files/latin-1.py new file mode 100644 index 0000000000..8328cfba9e --- /dev/null +++ b/contrib/python/parso/py2/tests/normalizer_issue_files/latin-1.py @@ -0,0 +1,6 @@ +# -*- coding: latin-1 -*- +# Test non-UTF8 encoding +latin1 = ('' + '') + +c = ("w") diff --git a/contrib/python/parso/py2/tests/normalizer_issue_files/python2.7.py b/contrib/python/parso/py2/tests/normalizer_issue_files/python2.7.py new file mode 100644 index 0000000000..5d10739749 --- /dev/null +++ b/contrib/python/parso/py2/tests/normalizer_issue_files/python2.7.py @@ -0,0 +1,14 @@ +import sys + +print 1, 2 >> sys.stdout + + +foo = ur'This is not possible in Python 3.' + +# This is actually printing a tuple. +#: E275:5 +print(1, 2) + +# True and False are not keywords in Python 2 and therefore there's no need for +# a space. +norman = True+False diff --git a/contrib/python/parso/py2/tests/normalizer_issue_files/python3.py b/contrib/python/parso/py2/tests/normalizer_issue_files/python3.py new file mode 100644 index 0000000000..566e90360a --- /dev/null +++ b/contrib/python/parso/py2/tests/normalizer_issue_files/python3.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 +from typing import ClassVar, List + +print(1, 2) + + +# Annotated function (Issue #29) +def foo(x: int) -> int: + return x + 1 + + +# Annotated variables #575 +CONST: int = 42 + + +class Class: + cls_var: ClassVar[str] + + def m(self): + xs: List[int] = [] + + +# True and False are keywords in Python 3 and therefore need a space. +#: E275:13 E275:14 +norman = True+False + + +#: E302+3:0 +def a(): + pass + +async def b(): + pass + + +# Okay +async def add(a: int = 0, b: int = 0) -> int: + return a + b + + +# Previously E251 four times +#: E221:5 +async def add(a: int = 0, b: int = 0) -> int: + return a + b + + +# Previously just E272+1:5 E272+4:5 +#: E302+3 E221:5 E221+3:5 +async def x(): + pass + +async def x(y: int = 1): + pass + + +#: E704:16 +async def f(x): return 2 + + +a[b1, :] == a[b1, ...] + + +# Annotated Function Definitions +# Okay +def munge(input: AnyStr, sep: AnyStr = None, limit=1000, + extra: Union[str, dict] = None) -> AnyStr: + pass + + +#: E225:24 E225:26 +def x(b: tuple = (1, 2))->int: + return a + b + + +#: E252:11 E252:12 E231:8 +def b(a:int=1): + pass + + +if alpha[:-i]: + *a, b = (1, 2, 3) + + +# Named only arguments +def foo(*, asdf): + pass + + +def foo2(bar, *, asdf=2): + pass diff --git a/contrib/python/parso/py2/tests/normalizer_issue_files/utf-8-bom.py b/contrib/python/parso/py2/tests/normalizer_issue_files/utf-8-bom.py new file mode 100644 index 0000000000..9c065c9494 --- /dev/null +++ b/contrib/python/parso/py2/tests/normalizer_issue_files/utf-8-bom.py @@ -0,0 +1,6 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +hello = 'こんにちわ' + +# EOF diff --git a/contrib/python/parso/py2/tests/normalizer_issue_files/utf-8.py b/contrib/python/parso/py2/tests/normalizer_issue_files/utf-8.py new file mode 100644 index 0000000000..73ea9a2827 --- /dev/null +++ b/contrib/python/parso/py2/tests/normalizer_issue_files/utf-8.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8 -*- + +# Some random text with multi-byte characters (utf-8 encoded) +# +# Εδώ μάτσο κειμένων τη, τρόπο πιθανό διευθυντές ώρα μη. Νέων απλό παράγει ροή +# κι, το επί δεδομένη καθορίζουν. Πάντως ζητήσεις περιβάλλοντος ένα με, τη +# ξέχασε αρπάζεις φαινόμενο όλη. Τρέξει εσφαλμένη χρησιμοποίησέ νέα τι. Θα όρο +# πετάνε φακέλους, άρα με διακοπής λαμβάνουν εφαμοργής. Λες κι μειώσει +# καθυστερεί. + +# 79 narrow chars +# 01 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 [79] + +# 78 narrow chars (Na) + 1 wide char (W) +# 01 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8情 + +# 3 narrow chars (Na) + 40 wide chars (W) +# 情 情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情 + +# 3 narrow chars (Na) + 76 wide chars (W) +# 情 情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情 + +# +# 80 narrow chars (Na) +#: E501 +# 01 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 [80] +# +# 78 narrow chars (Na) + 2 wide char (W) +#: E501 +# 01 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8情情 +# +# 3 narrow chars (Na) + 77 wide chars (W) +#: E501 +# 情 情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情 +# diff --git a/contrib/python/parso/py2/tests/test_absolute_import.py b/contrib/python/parso/py2/tests/test_absolute_import.py new file mode 100644 index 0000000000..c959ea5340 --- /dev/null +++ b/contrib/python/parso/py2/tests/test_absolute_import.py @@ -0,0 +1,29 @@ +""" +Tests ``from __future__ import absolute_import`` (only important for +Python 2.X) +""" +from parso import parse + + +def test_explicit_absolute_imports(): + """ + Detect modules with ``from __future__ import absolute_import``. + """ + module = parse("from __future__ import absolute_import") + assert module._has_explicit_absolute_import() + + +def test_no_explicit_absolute_imports(): + """ + Detect modules without ``from __future__ import absolute_import``. + """ + assert not parse("1")._has_explicit_absolute_import() + + +def test_dont_break_imports_without_namespaces(): + """ + The code checking for ``from __future__ import absolute_import`` shouldn't + assume that all imports have non-``None`` namespaces. + """ + src = "from __future__ import absolute_import\nimport xyzzy" + assert parse(src)._has_explicit_absolute_import() diff --git a/contrib/python/parso/py2/tests/test_cache.py b/contrib/python/parso/py2/tests/test_cache.py new file mode 100644 index 0000000000..1cdc86da08 --- /dev/null +++ b/contrib/python/parso/py2/tests/test_cache.py @@ -0,0 +1,195 @@ +""" +Test all things related to the ``jedi.cache`` module. +""" + +import os +import os.path + +import pytest +import time + +from parso.cache import (_CACHED_FILE_MAXIMUM_SURVIVAL, _VERSION_TAG, + _get_cache_clear_lock, _get_hashed_path, + _load_from_file_system, _NodeCacheItem, + _remove_cache_and_update_lock, _save_to_file_system, + load_module, parser_cache, try_to_save_module) +from parso._compatibility import is_pypy, PermissionError +from parso import load_grammar +from parso import cache +from parso import file_io +from parso import parse + +skip_pypy = pytest.mark.skipif( + is_pypy, + reason="pickling in pypy is slow, since we don't pickle," + "we never go into path of auto-collecting garbage" +) + + +@pytest.fixture() +def isolated_parso_cache(monkeypatch, tmpdir): + """Set `parso.cache._default_cache_path` to a temporary directory + during the test. """ + cache_path = str(os.path.join(str(tmpdir), "__parso_cache")) + monkeypatch.setattr(cache, '_default_cache_path', cache_path) + monkeypatch.setattr(cache, '_get_default_cache_path', lambda *args, **kwargs: cache_path) + return cache_path + + +@pytest.mark.skip("SUBBOTNIK-2721 Disable load cache from disk") +def test_modulepickling_change_cache_dir(tmpdir): + """ + ParserPickling should not save old cache when cache_directory is changed. + + See: `#168 <https://github.com/davidhalter/jedi/pull/168>`_ + """ + dir_1 = str(tmpdir.mkdir('first')) + dir_2 = str(tmpdir.mkdir('second')) + + item_1 = _NodeCacheItem('bla', []) + item_2 = _NodeCacheItem('bla', []) + path_1 = 'fake path 1' + path_2 = 'fake path 2' + + hashed_grammar = load_grammar()._hashed + _save_to_file_system(hashed_grammar, path_1, item_1, cache_path=dir_1) + parser_cache.clear() + cached = load_stored_item(hashed_grammar, path_1, item_1, cache_path=dir_1) + assert cached == item_1.node + + _save_to_file_system(hashed_grammar, path_2, item_2, cache_path=dir_2) + cached = load_stored_item(hashed_grammar, path_1, item_1, cache_path=dir_2) + assert cached is None + + +def load_stored_item(hashed_grammar, path, item, cache_path): + """Load `item` stored at `path` in `cache`.""" + item = _load_from_file_system(hashed_grammar, path, item.change_time - 1, cache_path) + return item + + +@pytest.mark.usefixtures("isolated_parso_cache") +def test_modulepickling_simulate_deleted_cache(tmpdir): + """ + Tests loading from a cache file after it is deleted. + According to macOS `dev docs`__, + + Note that the system may delete the Caches/ directory to free up disk + space, so your app must be able to re-create or download these files as + needed. + + It is possible that other supported platforms treat cache files the same + way. + + __ https://developer.apple.com/library/content/documentation/FileManagement/Conceptual/FileSystemProgrammingGuide/FileSystemOverview/FileSystemOverview.html + """ + grammar = load_grammar() + module = 'fake parser' + + # Create the file + path = tmpdir.dirname + '/some_path' + with open(path, 'w'): + pass + io = file_io.FileIO(path) + + try_to_save_module(grammar._hashed, io, module, lines=[]) + assert load_module(grammar._hashed, io) == module + + os.unlink(_get_hashed_path(grammar._hashed, path)) + parser_cache.clear() + + cached2 = load_module(grammar._hashed, io) + assert cached2 is None + + +@pytest.mark.skip +def test_cache_limit(): + def cache_size(): + return sum(len(v) for v in parser_cache.values()) + + try: + parser_cache.clear() + future_node_cache_item = _NodeCacheItem('bla', [], change_time=time.time() + 10e6) + old_node_cache_item = _NodeCacheItem('bla', [], change_time=time.time() - 10e4) + parser_cache['some_hash_old'] = { + '/path/%s' % i: old_node_cache_item for i in range(300) + } + parser_cache['some_hash_new'] = { + '/path/%s' % i: future_node_cache_item for i in range(300) + } + assert cache_size() == 600 + parse('somecode', cache=True, path='/path/somepath') + assert cache_size() == 301 + finally: + parser_cache.clear() + + +class _FixedTimeFileIO(file_io.KnownContentFileIO): + def __init__(self, path, content, last_modified): + super(_FixedTimeFileIO, self).__init__(path, content) + self._last_modified = last_modified + + def get_last_modified(self): + return self._last_modified + + +@pytest.mark.skip +@pytest.mark.parametrize('diff_cache', [False, True]) +@pytest.mark.parametrize('use_file_io', [False, True]) +def test_cache_last_used_update(diff_cache, use_file_io): + p = '/path/last-used' + parser_cache.clear() # Clear, because then it's easier to find stuff. + parse('somecode', cache=True, path=p) + node_cache_item = next(iter(parser_cache.values()))[p] + now = time.time() + assert node_cache_item.last_used < now + + if use_file_io: + f = _FixedTimeFileIO(p, 'code', node_cache_item.last_used - 10) + parse(file_io=f, cache=True, diff_cache=diff_cache) + else: + parse('somecode2', cache=True, path=p, diff_cache=diff_cache) + + node_cache_item = next(iter(parser_cache.values()))[p] + assert now < node_cache_item.last_used < time.time() + + +@skip_pypy +def test_inactive_cache(tmpdir, isolated_parso_cache): + parser_cache.clear() + test_subjects = "abcdef" + for path in test_subjects: + parse('somecode', cache=True, path=os.path.join(str(tmpdir), path)) + raw_cache_path = os.path.join(isolated_parso_cache, _VERSION_TAG) + assert os.path.exists(raw_cache_path) + paths = os.listdir(raw_cache_path) + a_while_ago = time.time() - _CACHED_FILE_MAXIMUM_SURVIVAL + old_paths = set() + for path in paths[:len(test_subjects) // 2]: # make certain number of paths old + os.utime(os.path.join(raw_cache_path, path), (a_while_ago, a_while_ago)) + old_paths.add(path) + # nothing should be cleared while the lock is on + assert os.path.exists(_get_cache_clear_lock().path) + _remove_cache_and_update_lock() # it shouldn't clear anything + assert len(os.listdir(raw_cache_path)) == len(test_subjects) + assert old_paths.issubset(os.listdir(raw_cache_path)) + + os.utime(_get_cache_clear_lock().path, (a_while_ago, a_while_ago)) + _remove_cache_and_update_lock() + assert len(os.listdir(raw_cache_path)) == len(test_subjects) // 2 + assert not old_paths.intersection(os.listdir(raw_cache_path)) + + +@pytest.mark.skip +@skip_pypy +def test_permission_error(monkeypatch): + def save(*args, **kwargs): + was_called[0] = True # Python 2... Use nonlocal instead + raise PermissionError + + was_called = [False] + + monkeypatch.setattr(cache, '_save_to_file_system', save) + with pytest.warns(Warning): + parse(path=__file__, cache=True, diff_cache=True) + assert was_called[0] diff --git a/contrib/python/parso/py2/tests/test_diff_parser.py b/contrib/python/parso/py2/tests/test_diff_parser.py new file mode 100644 index 0000000000..1904314971 --- /dev/null +++ b/contrib/python/parso/py2/tests/test_diff_parser.py @@ -0,0 +1,1750 @@ +# -*- coding: utf-8 -*- +from textwrap import dedent +import logging +import sys + +import pytest + +from parso.utils import split_lines +from parso import cache +from parso import load_grammar +from parso.python.diff import DiffParser, _assert_valid_graph, _assert_nodes_are_equal +from parso import parse + +ANY = object() + + +def test_simple(): + """ + The diff parser reuses modules. So check for that. + """ + grammar = load_grammar() + module_a = grammar.parse('a', diff_cache=True) + assert grammar.parse('b', diff_cache=True) == module_a + + +def _check_error_leaves_nodes(node): + if node.type in ('error_leaf', 'error_node'): + return node + + try: + children = node.children + except AttributeError: + pass + else: + for child in children: + x_node = _check_error_leaves_nodes(child) + if x_node is not None: + return x_node + return None + + +class Differ(object): + grammar = load_grammar() + + def initialize(self, code): + logging.debug('differ: initialize') + try: + del cache.parser_cache[self.grammar._hashed][None] + except KeyError: + pass + + self.lines = split_lines(code, keepends=True) + self.module = parse(code, diff_cache=True, cache=True) + assert code == self.module.get_code() + _assert_valid_graph(self.module) + return self.module + + def parse(self, code, copies=0, parsers=0, expect_error_leaves=False): + logging.debug('differ: parse copies=%s parsers=%s', copies, parsers) + lines = split_lines(code, keepends=True) + diff_parser = DiffParser( + self.grammar._pgen_grammar, + self.grammar._tokenizer, + self.module, + ) + new_module = diff_parser.update(self.lines, lines) + self.lines = lines + assert code == new_module.get_code() + + _assert_valid_graph(new_module) + + without_diff_parser_module = parse(code) + _assert_nodes_are_equal(new_module, without_diff_parser_module) + + error_node = _check_error_leaves_nodes(new_module) + assert expect_error_leaves == (error_node is not None), error_node + if parsers is not ANY: + assert diff_parser._parser_count == parsers + if copies is not ANY: + assert diff_parser._copy_count == copies + return new_module + + +@pytest.fixture() +def differ(): + return Differ() + + +def test_change_and_undo(differ): + func_before = 'def func():\n pass\n' + # Parse the function and a. + differ.initialize(func_before + 'a') + # Parse just b. + differ.parse(func_before + 'b', copies=1, parsers=2) + # b has changed to a again, so parse that. + differ.parse(func_before + 'a', copies=1, parsers=2) + # Same as before parsers should not be used. Just a simple copy. + differ.parse(func_before + 'a', copies=1) + + # Now that we have a newline at the end, everything is easier in Python + # syntax, we can parse once and then get a copy. + differ.parse(func_before + 'a\n', copies=1, parsers=2) + differ.parse(func_before + 'a\n', copies=1) + + # Getting rid of an old parser: Still no parsers used. + differ.parse('a\n', copies=1) + # Now the file has completely changed and we need to parse. + differ.parse('b\n', parsers=1) + # And again. + differ.parse('a\n', parsers=1) + + +def test_positions(differ): + func_before = 'class A:\n pass\n' + m = differ.initialize(func_before + 'a') + assert m.start_pos == (1, 0) + assert m.end_pos == (3, 1) + + m = differ.parse('a', copies=1) + assert m.start_pos == (1, 0) + assert m.end_pos == (1, 1) + + m = differ.parse('a\n\n', parsers=1) + assert m.end_pos == (3, 0) + m = differ.parse('a\n\n ', copies=1, parsers=2) + assert m.end_pos == (3, 1) + m = differ.parse('a ', parsers=1) + assert m.end_pos == (1, 2) + + +def test_if_simple(differ): + src = dedent('''\ + if 1: + a = 3 + ''') + else_ = "else:\n a = ''\n" + + differ.initialize(src + 'a') + differ.parse(src + else_ + "a", copies=0, parsers=1) + + differ.parse(else_, parsers=2, expect_error_leaves=True) + differ.parse(src + else_, parsers=1) + + +def test_func_with_for_and_comment(differ): + # The first newline is important, leave it. It should not trigger another + # parser split. + src = dedent("""\ + + def func(): + pass + + + for a in [1]: + # COMMENT + a""") + differ.initialize(src) + differ.parse('a\n' + src, copies=1, parsers=3) + + +def test_one_statement_func(differ): + src = dedent("""\ + first + def func(): a + """) + differ.initialize(src + 'second') + differ.parse(src + 'def second():\n a', parsers=1, copies=1) + + +def test_for_on_one_line(differ): + src = dedent("""\ + foo = 1 + for x in foo: pass + + def hi(): + pass + """) + differ.initialize(src) + + src = dedent("""\ + def hi(): + for x in foo: pass + pass + + pass + """) + differ.parse(src, parsers=2) + + src = dedent("""\ + def hi(): + for x in foo: pass + pass + + def nested(): + pass + """) + # The second parser is for parsing the `def nested()` which is an `equal` + # operation in the SequenceMatcher. + differ.parse(src, parsers=1, copies=1) + + +def test_open_parentheses(differ): + func = 'def func():\n a\n' + code = 'isinstance(\n\n' + func + new_code = 'isinstance(\n' + func + differ.initialize(code) + + differ.parse(new_code, parsers=1, expect_error_leaves=True) + + new_code = 'a = 1\n' + new_code + differ.parse(new_code, parsers=2, expect_error_leaves=True) + + func += 'def other_func():\n pass\n' + differ.initialize('isinstance(\n' + func) + # Cannot copy all, because the prefix of the function is once a newline and + # once not. + differ.parse('isinstance()\n' + func, parsers=2, copies=1) + + +def test_open_parentheses_at_end(differ): + code = "a['" + differ.initialize(code) + differ.parse(code, parsers=1, expect_error_leaves=True) + + +def test_backslash(differ): + src = dedent(r""" + a = 1\ + if 1 else 2 + def x(): + pass + """) + differ.initialize(src) + + src = dedent(r""" + def x(): + a = 1\ + if 1 else 2 + def y(): + pass + """) + differ.parse(src, parsers=1) + + src = dedent(r""" + def first(): + if foo \ + and bar \ + or baz: + pass + def second(): + pass + """) + differ.parse(src, parsers=2) + + +def test_full_copy(differ): + code = 'def foo(bar, baz):\n pass\n bar' + differ.initialize(code) + differ.parse(code, copies=1) + + +def test_wrong_whitespace(differ): + code = ''' + hello + ''' + differ.initialize(code) + differ.parse(code + 'bar\n ', parsers=2, expect_error_leaves=True) + + code += """abc(\npass\n """ + differ.parse(code, parsers=2, expect_error_leaves=True) + + +def test_issues_with_error_leaves(differ): + code = dedent(''' + def ints(): + str.. + str + ''') + code2 = dedent(''' + def ints(): + str. + str + ''') + differ.initialize(code) + differ.parse(code2, parsers=1, expect_error_leaves=True) + + +def test_unfinished_nodes(differ): + code = dedent(''' + class a(): + def __init__(self, a): + self.a = a + def p(self): + a(1) + ''') + code2 = dedent(''' + class a(): + def __init__(self, a): + self.a = a + def p(self): + self + a(1) + ''') + differ.initialize(code) + differ.parse(code2, parsers=2, copies=2) + + +def test_nested_if_and_scopes(differ): + code = dedent(''' + class a(): + if 1: + def b(): + 2 + ''') + code2 = code + ' else:\n 3' + differ.initialize(code) + differ.parse(code2, parsers=1, copies=0) + + +def test_word_before_def(differ): + code1 = 'blub def x():\n' + code2 = code1 + ' s' + differ.initialize(code1) + differ.parse(code2, parsers=1, copies=0, expect_error_leaves=True) + + +def test_classes_with_error_leaves(differ): + code1 = dedent(''' + class X(): + def x(self): + blablabla + assert 3 + self. + + class Y(): + pass + ''') + code2 = dedent(''' + class X(): + def x(self): + blablabla + assert 3 + str( + + class Y(): + pass + ''') + + differ.initialize(code1) + differ.parse(code2, parsers=2, copies=1, expect_error_leaves=True) + + +def test_totally_wrong_whitespace(differ): + code1 = ''' + class X(): + raise n + + class Y(): + pass + ''' + code2 = ''' + class X(): + raise n + str( + + class Y(): + pass + ''' + + differ.initialize(code1) + differ.parse(code2, parsers=2, copies=0, expect_error_leaves=True) + + +def test_node_insertion(differ): + code1 = dedent(''' + class X(): + def y(self): + a = 1 + b = 2 + + c = 3 + d = 4 + ''') + code2 = dedent(''' + class X(): + def y(self): + a = 1 + b = 2 + str + + c = 3 + d = 4 + ''') + + differ.initialize(code1) + differ.parse(code2, parsers=1, copies=2) + + +def test_whitespace_at_end(differ): + code = dedent('str\n\n') + + differ.initialize(code) + differ.parse(code + '\n', parsers=1, copies=1) + + +def test_endless_while_loop(differ): + """ + This was a bug in Jedi #878. + """ + code = '#dead' + differ.initialize(code) + module = differ.parse(code, parsers=1) + assert module.end_pos == (1, 5) + + code = '#dead\n' + differ.initialize(code) + module = differ.parse(code + '\n', parsers=1) + assert module.end_pos == (3, 0) + + +def test_in_class_movements(differ): + code1 = dedent("""\ + class PlaybookExecutor: + p + b + def run(self): + 1 + try: + x + except: + pass + """) + code2 = dedent("""\ + class PlaybookExecutor: + b + def run(self): + 1 + try: + x + except: + pass + """) + + differ.initialize(code1) + differ.parse(code2, parsers=1) + + +def test_in_parentheses_newlines(differ): + code1 = dedent(""" + x = str( + True) + + a = 1 + + def foo(): + pass + + b = 2""") + + code2 = dedent(""" + x = str(True) + + a = 1 + + def foo(): + pass + + b = 2""") + + differ.initialize(code1) + differ.parse(code2, parsers=1, copies=1) + + +def test_indentation_issue(differ): + code1 = dedent(""" + import module + """) + + code2 = dedent(""" + class L1: + class L2: + class L3: + def f(): pass + def f(): pass + def f(): pass + def f(): pass + """) + + differ.initialize(code1) + differ.parse(code2, parsers=2) + + +def test_endmarker_newline(differ): + code1 = dedent('''\ + docu = None + # some comment + result = codet + incomplete_dctassign = { + "module" + + if "a": + x = 3 # asdf + ''') + + code2 = code1.replace('codet', 'coded') + + differ.initialize(code1) + differ.parse(code2, parsers=2, copies=1, expect_error_leaves=True) + + +def test_newlines_at_end(differ): + differ.initialize('a\n\n') + differ.parse('a\n', copies=1) + + +def test_end_newline_with_decorator(differ): + code = dedent('''\ + @staticmethod + def spam(): + import json + json.l''') + + differ.initialize(code) + module = differ.parse(code + '\n', copies=1, parsers=1) + decorated, endmarker = module.children + assert decorated.type == 'decorated' + decorator, func = decorated.children + suite = func.children[-1] + assert suite.type == 'suite' + newline, first_stmt, second_stmt = suite.children + assert first_stmt.get_code() == ' import json\n' + assert second_stmt.get_code() == ' json.l\n' + + +def test_invalid_to_valid_nodes(differ): + code1 = dedent('''\ + def a(): + foo = 3 + def b(): + la = 3 + else: + la + return + foo + base + ''') + code2 = dedent('''\ + def a(): + foo = 3 + def b(): + la = 3 + if foo: + latte = 3 + else: + la + return + foo + base + ''') + + differ.initialize(code1) + differ.parse(code2, parsers=1, copies=3) + + +def test_if_removal_and_reappearence(differ): + code1 = dedent('''\ + la = 3 + if foo: + latte = 3 + else: + la + pass + ''') + + code2 = dedent('''\ + la = 3 + latte = 3 + else: + la + pass + ''') + + code3 = dedent('''\ + la = 3 + if foo: + latte = 3 + else: + la + ''') + differ.initialize(code1) + differ.parse(code2, parsers=3, copies=2, expect_error_leaves=True) + differ.parse(code1, parsers=1, copies=1) + differ.parse(code3, parsers=1, copies=1) + + +def test_add_error_indentation(differ): + code = 'if x:\n 1\n' + differ.initialize(code) + differ.parse(code + ' 2\n', parsers=1, copies=0, expect_error_leaves=True) + + +def test_differing_docstrings(differ): + code1 = dedent('''\ + def foobar(x, y): + 1 + return x + + def bazbiz(): + foobar() + lala + ''') + + code2 = dedent('''\ + def foobar(x, y): + 2 + return x + y + + def bazbiz(): + z = foobar() + lala + ''') + + differ.initialize(code1) + differ.parse(code2, parsers=2, copies=1) + differ.parse(code1, parsers=2, copies=1) + + +def test_one_call_in_function_change(differ): + code1 = dedent('''\ + def f(self): + mro = [self] + for a in something: + yield a + + def g(self): + return C( + a=str, + b=self, + ) + ''') + + code2 = dedent('''\ + def f(self): + mro = [self] + + def g(self): + return C( + a=str, + t + b=self, + ) + ''') + + differ.initialize(code1) + differ.parse(code2, parsers=2, copies=1, expect_error_leaves=True) + differ.parse(code1, parsers=2, copies=1) + + +def test_function_deletion(differ): + code1 = dedent('''\ + class C(list): + def f(self): + def iterate(): + for x in b: + break + + return list(iterate()) + ''') + + code2 = dedent('''\ + class C(): + def f(self): + for x in b: + break + + return list(iterate()) + ''') + + differ.initialize(code1) + differ.parse(code2, parsers=1, copies=0, expect_error_leaves=True) + differ.parse(code1, parsers=1, copies=0) + + +def test_docstring_removal(differ): + code1 = dedent('''\ + class E(Exception): + """ + 1 + 2 + 3 + """ + + class S(object): + @property + def f(self): + return cmd + def __repr__(self): + return cmd2 + ''') + + code2 = dedent('''\ + class E(Exception): + """ + 1 + 3 + """ + + class S(object): + @property + def f(self): + return cmd + return cmd2 + ''') + + differ.initialize(code1) + differ.parse(code2, parsers=1, copies=2) + differ.parse(code1, parsers=3, copies=1) + + +def test_paren_in_strange_position(differ): + code1 = dedent('''\ + class C: + """ ha """ + def __init__(self, message): + self.message = message + ''') + + code2 = dedent('''\ + class C: + """ ha """ + ) + def __init__(self, message): + self.message = message + ''') + + differ.initialize(code1) + differ.parse(code2, parsers=1, copies=2, expect_error_leaves=True) + differ.parse(code1, parsers=0, copies=2) + + +def insert_line_into_code(code, index, line): + lines = split_lines(code, keepends=True) + lines.insert(index, line) + return ''.join(lines) + + +def test_paren_before_docstring(differ): + code1 = dedent('''\ + # comment + """ + The + """ + from parso import tree + from parso import python + ''') + + code2 = insert_line_into_code(code1, 1, ' ' * 16 + 'raise InternalParseError(\n') + + differ.initialize(code1) + differ.parse(code2, parsers=1, copies=1, expect_error_leaves=True) + differ.parse(code1, parsers=2, copies=1) + + +def test_parentheses_before_method(differ): + code1 = dedent('''\ + class A: + def a(self): + pass + + class B: + def b(self): + if 1: + pass + ''') + + code2 = dedent('''\ + class A: + def a(self): + pass + Exception.__init__(self, "x" % + + def b(self): + if 1: + pass + ''') + + differ.initialize(code1) + differ.parse(code2, parsers=2, copies=1, expect_error_leaves=True) + differ.parse(code1, parsers=2, copies=1) + + +def test_indentation_issues(differ): + code1 = dedent('''\ + class C: + def f(): + 1 + if 2: + return 3 + + def g(): + to_be_removed + pass + ''') + + code2 = dedent('''\ + class C: + def f(): + 1 + ``something``, very ``weird``). + if 2: + return 3 + + def g(): + to_be_removed + pass + ''') + + code3 = dedent('''\ + class C: + def f(): + 1 + if 2: + return 3 + + def g(): + pass + ''') + + differ.initialize(code1) + differ.parse(code2, parsers=3, copies=1, expect_error_leaves=True) + differ.parse(code1, copies=1, parsers=2) + differ.parse(code3, parsers=2, copies=1) + differ.parse(code1, parsers=2, copies=1) + + +def test_error_dedent_issues(differ): + code1 = dedent('''\ + while True: + try: + 1 + except KeyError: + if 2: + 3 + except IndexError: + 4 + + 5 + ''') + + code2 = dedent('''\ + while True: + try: + except KeyError: + 1 + except KeyError: + if 2: + 3 + except IndexError: + 4 + + something_inserted + 5 + ''') + + differ.initialize(code1) + differ.parse(code2, parsers=3, copies=0, expect_error_leaves=True) + differ.parse(code1, parsers=1, copies=0) + + +def test_random_text_insertion(differ): + code1 = dedent('''\ +class C: + def f(): + return node + + def g(): + try: + 1 + except KeyError: + 2 + ''') + + code2 = dedent('''\ +class C: + def f(): + return node +Some'random text: yeah + for push in plan.dfa_pushes: + + def g(): + try: + 1 + except KeyError: + 2 + ''') + + differ.initialize(code1) + differ.parse(code2, parsers=2, copies=1, expect_error_leaves=True) + differ.parse(code1, parsers=2, copies=1) + + +def test_many_nested_ifs(differ): + code1 = dedent('''\ + class C: + def f(self): + def iterate(): + if 1: + yield t + else: + yield + return + + def g(): + 3 + ''') + + code2 = dedent('''\ + def f(self): + def iterate(): + if 1: + yield t + hahahaha + if 2: + else: + yield + return + + def g(): + 3 + ''') + + differ.initialize(code1) + differ.parse(code2, parsers=2, copies=1, expect_error_leaves=True) + differ.parse(code1, parsers=1, copies=1) + + +@pytest.mark.skipif(sys.version_info < (3, 5), reason="Async starts working in 3.5") +@pytest.mark.parametrize('prefix', ['', 'async ']) +def test_with_and_funcdef_in_call(differ, prefix): + code1 = prefix + dedent('''\ + with x: + la = C( + a=1, + b=2, + c=3, + ) + ''') + + code2 = insert_line_into_code(code1, 3, 'def y(self, args):\n') + + differ.initialize(code1) + differ.parse(code2, parsers=1, expect_error_leaves=True) + differ.parse(code1, parsers=1) + + +def test_wrong_backslash(differ): + code1 = dedent('''\ + def y(): + 1 + for x in y: + continue + ''') + + code2 = insert_line_into_code(code1, 3, '\\.whl$\n') + + differ.initialize(code1) + differ.parse(code2, parsers=3, copies=1, expect_error_leaves=True) + differ.parse(code1, parsers=1, copies=1) + + +def test_random_unicode_characters(differ): + """ + Those issues were all found with the fuzzer. + """ + differ.initialize('') + differ.parse(u'\x1dĔBϞɛˁşʑ˳˻ȣſéÎ\x90̕ȟòwʘ\x1dĔBϞɛˁşʑ˳˻ȣſéÎ', parsers=1, + expect_error_leaves=True) + differ.parse(u'\r\r', parsers=1) + differ.parse(u"˟Ę\x05À\r rúƣ@\x8a\x15r()\n", parsers=1, expect_error_leaves=True) + differ.parse(u'a\ntaǁ\rGĒōns__\n\nb', parsers=1, + expect_error_leaves=sys.version_info[0] == 2) + s = ' if not (self, "_fi\x02\x0e\x08\n\nle"):' + differ.parse(s, parsers=1, expect_error_leaves=True) + differ.parse('') + differ.parse(s + '\n', parsers=1, expect_error_leaves=True) + differ.parse(u' result = (\r\f\x17\t\x11res)', parsers=1, expect_error_leaves=True) + differ.parse('') + differ.parse(' a( # xx\ndef', parsers=1, expect_error_leaves=True) + + +def test_dedent_end_positions(differ): + code1 = dedent('''\ + if 1: + if b: + 2 + c = { + 5} + ''') + code2 = dedent(u'''\ + if 1: + if ⌟ഒᜈྡྷṭb: + 2 + 'l': ''} + c = { + 5} + ''') + differ.initialize(code1) + differ.parse(code2, parsers=1, expect_error_leaves=True) + differ.parse(code1, parsers=1) + + +def test_special_no_newline_ending(differ): + code1 = dedent('''\ + 1 + ''') + code2 = dedent('''\ + 1 + is ''') + differ.initialize(code1) + differ.parse(code2, copies=1, parsers=1, expect_error_leaves=True) + differ.parse(code1, copies=1, parsers=0) + + +def test_random_character_insertion(differ): + code1 = dedent('''\ + def create(self): + 1 + if self.path is not None: + return + # 3 + # 4 + ''') + code2 = dedent('''\ + def create(self): + 1 + if 2: + x return + # 3 + # 4 + ''') + differ.initialize(code1) + differ.parse(code2, copies=1, parsers=1, expect_error_leaves=True) + differ.parse(code1, copies=1, parsers=1) + + +def test_import_opening_bracket(differ): + code1 = dedent('''\ + 1 + 2 + from bubu import (X, + ''') + code2 = dedent('''\ + 11 + 2 + from bubu import (X, + ''') + differ.initialize(code1) + differ.parse(code2, copies=1, parsers=2, expect_error_leaves=True) + differ.parse(code1, copies=1, parsers=2, expect_error_leaves=True) + + +def test_opening_bracket_at_end(differ): + code1 = dedent('''\ + class C: + 1 + [ + ''') + code2 = dedent('''\ + 3 + class C: + 1 + [ + ''') + differ.initialize(code1) + differ.parse(code2, copies=1, parsers=2, expect_error_leaves=True) + differ.parse(code1, copies=1, parsers=1, expect_error_leaves=True) + + +def test_all_sorts_of_indentation(differ): + code1 = dedent('''\ + class C: + 1 + def f(): + 'same' + + if foo: + a = b + end + ''') + code2 = dedent('''\ + class C: + 1 + def f(yield await %|( + 'same' + + \x02\x06\x0f\x1c\x11 + if foo: + a = b + + end + ''') + differ.initialize(code1) + differ.parse(code2, copies=1, parsers=1, expect_error_leaves=True) + differ.parse(code1, copies=1, parsers=1, expect_error_leaves=True) + + code3 = dedent('''\ + if 1: + a + b + c + d + \x00 + ''') + differ.parse(code3, parsers=1, expect_error_leaves=True) + differ.parse('') + + +def test_dont_copy_dedents_in_beginning(differ): + code1 = dedent('''\ + a + 4 + ''') + code2 = dedent('''\ + 1 + 2 + 3 + 4 + ''') + differ.initialize(code1) + differ.parse(code2, copies=1, parsers=1, expect_error_leaves=True) + differ.parse(code1, parsers=1, copies=1) + + +def test_dont_copy_error_leaves(differ): + code1 = dedent('''\ + def f(n): + x + if 2: + 3 + ''') + code2 = dedent('''\ + def f(n): + def if 1: + indent + x + if 2: + 3 + ''') + differ.initialize(code1) + differ.parse(code2, parsers=1, expect_error_leaves=True) + differ.parse(code1, parsers=1) + + +def test_error_dedent_in_between(differ): + code1 = dedent('''\ + class C: + def f(): + a + if something: + x + z + ''') + code2 = dedent('''\ + class C: + def f(): + a + dedent + if other_thing: + b + if something: + x + z + ''') + differ.initialize(code1) + differ.parse(code2, copies=1, parsers=2, expect_error_leaves=True) + differ.parse(code1, copies=1, parsers=2) + + +def test_some_other_indentation_issues(differ): + code1 = dedent('''\ + class C: + x + def f(): + "" + copied + a + ''') + code2 = dedent('''\ + try: + de + a + b + c + d + def f(): + "" + copied + a + ''') + differ.initialize(code1) + differ.parse(code2, copies=0, parsers=1, expect_error_leaves=True) + differ.parse(code1, copies=1, parsers=1) + + +def test_open_bracket_case1(differ): + code1 = dedent('''\ + class C: + 1 + 2 # ha + ''') + code2 = insert_line_into_code(code1, 2, ' [str\n') + code3 = insert_line_into_code(code2, 4, ' str\n') + differ.initialize(code1) + differ.parse(code2, copies=1, parsers=1, expect_error_leaves=True) + differ.parse(code3, copies=1, parsers=1, expect_error_leaves=True) + differ.parse(code1, copies=1, parsers=1) + + +def test_open_bracket_case2(differ): + code1 = dedent('''\ + class C: + def f(self): + ( + b + c + + def g(self): + d + ''') + code2 = dedent('''\ + class C: + def f(self): + ( + b + c + self. + + def g(self): + d + ''') + differ.initialize(code1) + differ.parse(code2, copies=0, parsers=1, expect_error_leaves=True) + differ.parse(code1, copies=0, parsers=1, expect_error_leaves=True) + + +def test_some_weird_removals(differ): + code1 = dedent('''\ + class C: + 1 + ''') + code2 = dedent('''\ + class C: + 1 + @property + A + return + # x + omega + ''') + code3 = dedent('''\ + class C: + 1 + ; + omega + ''') + differ.initialize(code1) + differ.parse(code2, copies=1, parsers=1, expect_error_leaves=True) + differ.parse(code3, copies=1, parsers=3, expect_error_leaves=True) + differ.parse(code1, copies=1) + + +@pytest.mark.skipif(sys.version_info < (3, 5), reason="Async starts working in 3.5") +def test_async_copy(differ): + code1 = dedent('''\ + async def main(): + x = 3 + print( + ''') + code2 = dedent('''\ + async def main(): + x = 3 + print() + ''') + differ.initialize(code1) + differ.parse(code2, copies=1, parsers=1) + differ.parse(code1, copies=1, parsers=1, expect_error_leaves=True) + + +def test_parent_on_decorator(differ): + code1 = dedent('''\ + class AClass: + @decorator() + def b_test(self): + print("Hello") + print("world") + + def a_test(self): + pass''') + code2 = dedent('''\ + class AClass: + @decorator() + def b_test(self): + print("Hello") + print("world") + + def a_test(self): + pass''') + differ.initialize(code1) + module_node = differ.parse(code2, parsers=1) + cls = module_node.children[0] + cls_suite = cls.children[-1] + assert len(cls_suite.children) == 3 + + +def test_wrong_indent_in_def(differ): + code1 = dedent('''\ + def x(): + a + b + ''') + + code2 = dedent('''\ + def x(): + // + b + c + ''') + differ.initialize(code1) + differ.parse(code2, parsers=1, expect_error_leaves=True) + differ.parse(code1, parsers=1) + + +def test_backslash_issue(differ): + code1 = dedent(''' + pre = ( + '') + after = 'instead' + ''') + code2 = dedent(''' + pre = ( + '') + \\if + ''') + differ.initialize(code1) + differ.parse(code2, parsers=1, copies=1, expect_error_leaves=True) + differ.parse(code1, parsers=1, copies=1) + + +def test_paren_with_indentation(differ): + code1 = dedent(''' + class C: + def f(self, fullname, path=None): + x + + def load_module(self, fullname): + a + for prefix in self.search_path: + try: + b + except ImportError: + c + else: + raise + def x(): + pass + ''') + code2 = dedent(''' + class C: + def f(self, fullname, path=None): + x + + ( + a + for prefix in self.search_path: + try: + b + except ImportError: + c + else: + raise + ''') + differ.initialize(code1) + differ.parse(code2, parsers=1, copies=1, expect_error_leaves=True) + differ.parse(code1, parsers=3, copies=1) + + +def test_error_dedent_in_function(differ): + code1 = dedent('''\ + def x(): + a + b + c + d + ''') + code2 = dedent('''\ + def x(): + a + b + c + d + e + ''') + differ.initialize(code1) + differ.parse(code2, parsers=2, copies=1, expect_error_leaves=True) + + +def test_with_formfeed(differ): + code1 = dedent('''\ + @bla + async def foo(): + 1 + yield from [] + return + return '' + ''') + code2 = dedent('''\ + @bla + async def foo(): + 1 + \x0cimport + return + return '' + ''') + differ.initialize(code1) + differ.parse(code2, parsers=ANY, copies=ANY, expect_error_leaves=True) + + +def test_repeating_invalid_indent(differ): + code1 = dedent('''\ + def foo(): + return + + @bla + a + def foo(): + a + b + c + ''') + code2 = dedent('''\ + def foo(): + return + + @bla + a + b + c + ''') + differ.initialize(code1) + differ.parse(code2, parsers=2, copies=1, expect_error_leaves=True) + + +def test_another_random_indent(differ): + code1 = dedent('''\ + def foo(): + a + b + c + return + def foo(): + d + ''') + code2 = dedent('''\ + def foo(): + a + c + return + def foo(): + d + ''') + differ.initialize(code1) + differ.parse(code2, parsers=1, copies=3) + + +def test_invalid_function(differ): + code1 = dedent('''\ + a + def foo(): + def foo(): + b + ''') + code2 = dedent('''\ + a + def foo(): + def foo(): + b + ''') + differ.initialize(code1) + differ.parse(code2, parsers=1, copies=1, expect_error_leaves=True) + + +def test_async_func2(differ): + code1 = dedent('''\ + async def foo(): + return '' + @bla + async def foo(): + x + ''') + code2 = dedent('''\ + async def foo(): + return '' + + { + @bla + async def foo(): + x + y + ''') + differ.initialize(code1) + differ.parse(code2, parsers=ANY, copies=ANY, expect_error_leaves=True) + + +def test_weird_ending(differ): + code1 = dedent('''\ + def foo(): + a + return + ''') + code2 = dedent('''\ + def foo(): + a + nonlocal xF""" + y"""''') + differ.initialize(code1) + differ.parse(code2, parsers=1, copies=1, expect_error_leaves=True) + + +def test_nested_class(differ): + code1 = dedent('''\ +def c(): + a = 3 + class X: + b + ''') + code2 = dedent('''\ +def c(): + a = 3 + class X: + elif + ''') + differ.initialize(code1) + differ.parse(code2, parsers=1, copies=1, expect_error_leaves=True) + + +def test_class_with_paren_breaker(differ): + code1 = dedent('''\ +class Grammar: + x + def parse(): + y + parser( + ) + z + ''') + code2 = dedent('''\ +class Grammar: + x + def parse(): + y + parser( + finally ; + ) + z + ''') + differ.initialize(code1) + differ.parse(code2, parsers=3, copies=1, expect_error_leaves=True) + + +def test_byte_order_mark(differ): + code2 = dedent('''\ + + x + \ufeff + else : + ''') + differ.initialize('\n') + differ.parse(code2, parsers=2, expect_error_leaves=True) + + code3 = dedent('''\ + \ufeff + if: + + x + ''') + differ.initialize('\n') + differ.parse(code3, parsers=2, expect_error_leaves=True) + + +def test_byte_order_mark2(differ): + code = u'\ufeff# foo' + differ.initialize(code) + differ.parse(code + 'x', parsers=ANY) + + +def test_byte_order_mark3(differ): + code1 = u"\ufeff#\ny\n" + code2 = u'x\n\ufeff#\n\ufeff#\ny\n' + differ.initialize(code1) + differ.parse(code2, expect_error_leaves=True, parsers=ANY, copies=ANY) + differ.parse(code1, parsers=1) + + +def test_backslash_insertion(differ): + code1 = dedent(''' + def f(): + x + def g(): + base = "" \\ + "" + return + ''') + code2 = dedent(''' + def f(): + x + def g(): + base = "" \\ + def h(): + "" + return + ''') + + differ.initialize(code1) + differ.parse(code2, parsers=2, copies=1, expect_error_leaves=True) + differ.parse(code1, parsers=2, copies=1) + + +def test_fstring_with_error_leaf(differ): + code1 = dedent("""\ + def f(): + x + def g(): + y + """) + code2 = dedent("""\ + def f(): + x + F''' + def g(): + y + {a + \x01 + """) + + differ.initialize(code1) + differ.parse(code2, parsers=1, copies=1, expect_error_leaves=True) + + +def test_yet_another_backslash(differ): + code1 = dedent('''\ + def f(): + x + def g(): + y + base = "" \\ + "" % to + return + ''') + code2 = dedent('''\ + def f(): + x + def g(): + y + base = "" \\ + \x0f + return + ''') + + differ.initialize(code1) + differ.parse(code2, parsers=ANY, copies=ANY, expect_error_leaves=True) + differ.parse(code1, parsers=ANY, copies=ANY) + + +def test_backslash_before_def(differ): + code1 = dedent('''\ + def f(): + x + + def g(): + y + z + ''') + code2 = dedent('''\ + def f(): + x + >\\ + def g(): + y + x + z + ''') + + differ.initialize(code1) + differ.parse(code2, parsers=3, copies=1, expect_error_leaves=True) + + +def test_backslash_with_imports(differ): + code1 = dedent('''\ + from x import y, \\ + ''') + code2 = dedent('''\ + from x import y, \\ + z + ''') + + differ.initialize(code1) + differ.parse(code2, parsers=1) + differ.parse(code1, parsers=1) + + +def test_one_line_function_error_recovery(differ): + code1 = dedent('''\ + class X: + x + def y(): word """ + # a + # b + c(self) + ''') + code2 = dedent('''\ + class X: + x + def y(): word """ + # a + # b + c(\x01+self) + ''') + + differ.initialize(code1) + differ.parse(code2, parsers=1, copies=1, expect_error_leaves=True) + + +def test_one_line_property_error_recovery(differ): + code1 = dedent('''\ + class X: + x + @property + def encoding(self): True - + return 1 + ''') + code2 = dedent('''\ + class X: + x + @property + def encoding(self): True - + return 1 + ''') + + differ.initialize(code1) + differ.parse(code2, parsers=2, copies=1, expect_error_leaves=True) diff --git a/contrib/python/parso/py2/tests/test_error_recovery.py b/contrib/python/parso/py2/tests/test_error_recovery.py new file mode 100644 index 0000000000..d0d3f7bbb2 --- /dev/null +++ b/contrib/python/parso/py2/tests/test_error_recovery.py @@ -0,0 +1,149 @@ +from textwrap import dedent + +from parso import parse, load_grammar + + +def test_with_stmt(): + module = parse('with x: f.\na') + assert module.children[0].type == 'with_stmt' + w, with_item, colon, f = module.children[0].children + assert f.type == 'error_node' + assert f.get_code(include_prefix=False) == 'f.' + + assert module.children[2].type == 'name' + + +def test_one_line_function(each_version): + module = parse('def x(): f.', version=each_version) + assert module.children[0].type == 'funcdef' + def_, name, parameters, colon, f = module.children[0].children + assert f.type == 'error_node' + + module = parse('def x(a:', version=each_version) + func = module.children[0] + assert func.type == 'error_node' + if each_version.startswith('2'): + assert func.children[-1].value == 'a' + else: + assert func.children[-1] == ':' + + +def test_if_else(): + module = parse('if x:\n f.\nelse:\n g(') + if_stmt = module.children[0] + if_, test, colon, suite1, else_, colon, suite2 = if_stmt.children + f = suite1.children[1] + assert f.type == 'error_node' + assert f.children[0].value == 'f' + assert f.children[1].value == '.' + g = suite2.children[1] + assert g.children[0].value == 'g' + assert g.children[1].value == '(' + + +def test_if_stmt(): + module = parse('if x: f.\nelse: g(') + if_stmt = module.children[0] + assert if_stmt.type == 'if_stmt' + if_, test, colon, f = if_stmt.children + assert f.type == 'error_node' + assert f.children[0].value == 'f' + assert f.children[1].value == '.' + + assert module.children[1].type == 'newline' + assert module.children[1].value == '\n' + assert module.children[2].type == 'error_leaf' + assert module.children[2].value == 'else' + assert module.children[3].type == 'error_leaf' + assert module.children[3].value == ':' + + in_else_stmt = module.children[4] + assert in_else_stmt.type == 'error_node' + assert in_else_stmt.children[0].value == 'g' + assert in_else_stmt.children[1].value == '(' + + +def test_invalid_token(): + module = parse('a + ? + b') + error_node, q, plus_b, endmarker = module.children + assert error_node.get_code() == 'a +' + assert q.value == '?' + assert q.type == 'error_leaf' + assert plus_b.type == 'factor' + assert plus_b.get_code() == ' + b' + + +def test_invalid_token_in_fstr(): + module = load_grammar(version='3.6').parse('f"{a + ? + b}"') + error_node, q, plus_b, error1, error2, endmarker = module.children + assert error_node.get_code() == 'f"{a +' + assert q.value == '?' + assert q.type == 'error_leaf' + assert plus_b.type == 'error_node' + assert plus_b.get_code() == ' + b' + assert error1.value == '}' + assert error1.type == 'error_leaf' + assert error2.value == '"' + assert error2.type == 'error_leaf' + + +def test_dedent_issues1(): + code = dedent('''\ + class C: + @property + f + g + end + ''') + module = load_grammar(version='3.8').parse(code) + klass, endmarker = module.children + suite = klass.children[-1] + assert suite.children[2].type == 'error_leaf' + assert suite.children[3].get_code(include_prefix=False) == 'f\n' + assert suite.children[5].get_code(include_prefix=False) == 'g\n' + assert suite.type == 'suite' + + +def test_dedent_issues2(): + code = dedent('''\ + class C: + @property + if 1: + g + else: + h + end + ''') + module = load_grammar(version='3.8').parse(code) + klass, endmarker = module.children + suite = klass.children[-1] + assert suite.children[2].type == 'error_leaf' + if_ = suite.children[3] + assert if_.children[0] == 'if' + assert if_.children[3].type == 'suite' + assert if_.children[3].get_code() == '\n g\n' + assert if_.children[4] == 'else' + assert if_.children[6].type == 'suite' + assert if_.children[6].get_code() == '\n h\n' + + assert suite.children[4].get_code(include_prefix=False) == 'end\n' + assert suite.type == 'suite' + + +def test_dedent_issues3(): + code = dedent('''\ + class C: + f + g + ''') + module = load_grammar(version='3.8').parse(code) + klass, endmarker = module.children + suite = klass.children[-1] + assert len(suite.children) == 4 + assert suite.children[1].get_code() == ' f\n' + assert suite.children[1].type == 'simple_stmt' + assert suite.children[2].get_code() == '' + assert suite.children[2].type == 'error_leaf' + assert suite.children[2].token_type == 'ERROR_DEDENT' + assert suite.children[3].get_code() == ' g\n' + assert suite.children[3].type == 'simple_stmt' diff --git a/contrib/python/parso/py2/tests/test_file_python_errors.py b/contrib/python/parso/py2/tests/test_file_python_errors.py new file mode 100644 index 0000000000..7083dfeb46 --- /dev/null +++ b/contrib/python/parso/py2/tests/test_file_python_errors.py @@ -0,0 +1,23 @@ +import os + +import parso + + +def get_python_files(path): + for dir_path, dir_names, file_names in os.walk(path): + for file_name in file_names: + if file_name.endswith('.py'): + yield os.path.join(dir_path, file_name) + + +def test_on_itself(each_version): + """ + There are obviously no syntax erros in the Python code of parso. However + parso should output the same for all versions. + """ + grammar = parso.load_grammar(version=each_version) + path = os.path.dirname(os.path.dirname(__file__)) + '/parso' + for file in get_python_files(path): + tree = grammar.parse(path=file) + errors = list(grammar.iter_errors(tree)) + assert not errors diff --git a/contrib/python/parso/py2/tests/test_fstring.py b/contrib/python/parso/py2/tests/test_fstring.py new file mode 100644 index 0000000000..2a07ce7015 --- /dev/null +++ b/contrib/python/parso/py2/tests/test_fstring.py @@ -0,0 +1,138 @@ +import pytest +from textwrap import dedent + +from parso import load_grammar, ParserSyntaxError +from parso.python.tokenize import tokenize + + +@pytest.fixture +def grammar(): + return load_grammar(version='3.8') + + +@pytest.mark.parametrize( + 'code', [ + # simple cases + 'f"{1}"', + 'f"""{1}"""', + 'f"{foo} {bar}"', + + # empty string + 'f""', + 'f""""""', + + # empty format specifier is okay + 'f"{1:}"', + + # use of conversion options + 'f"{1!a}"', + 'f"{1!a:1}"', + + # format specifiers + 'f"{1:1}"', + 'f"{1:1.{32}}"', + 'f"{1::>4}"', + 'f"{x:{y}}"', + 'f"{x:{y:}}"', + 'f"{x:{y:1}}"', + + # Escapes + 'f"{{}}"', + 'f"{{{1}}}"', + 'f"{{{1}"', + 'f"1{{2{{3"', + 'f"}}"', + + # New Python 3.8 syntax f'{a=}' + 'f"{a=}"', + 'f"{a()=}"', + + # multiline f-string + 'f"""abc\ndef"""', + 'f"""abc{\n123}def"""', + + # a line continuation inside of an fstring_string + 'f"abc\\\ndef"', + 'f"\\\n{123}\\\n"', + + # a line continuation inside of an fstring_expr + 'f"{\\\n123}"', + + # a line continuation inside of an format spec + 'f"{123:.2\\\nf}"', + ] +) +def test_valid(code, grammar): + module = grammar.parse(code, error_recovery=False) + fstring = module.children[0] + assert fstring.type == 'fstring' + assert fstring.get_code() == code + + +@pytest.mark.parametrize( + 'code', [ + # an f-string can't contain unmatched curly braces + 'f"}"', + 'f"{"', + 'f"""}"""', + 'f"""{"""', + + # invalid conversion characters + 'f"{1!{a}}"', + 'f"{!{a}}"', + + # The curly braces must contain an expression + 'f"{}"', + 'f"{:}"', + 'f"{:}}}"', + 'f"{:1}"', + 'f"{!:}"', + 'f"{!}"', + 'f"{!a}"', + + # invalid (empty) format specifiers + 'f"{1:{}}"', + 'f"{1:{:}}"', + + # a newline without a line continuation inside a single-line string + 'f"abc\ndef"', + ] +) +def test_invalid(code, grammar): + with pytest.raises(ParserSyntaxError): + grammar.parse(code, error_recovery=False) + + # It should work with error recovery. + grammar.parse(code, error_recovery=True) + + +@pytest.mark.parametrize( + ('code', 'positions'), [ + # 2 times 2, 5 because python expr and endmarker. + ('f"}{"', [(1, 0), (1, 2), (1, 3), (1, 4), (1, 5)]), + ('f" :{ 1 : } "', [(1, 0), (1, 2), (1, 4), (1, 6), (1, 8), (1, 9), + (1, 10), (1, 11), (1, 12), (1, 13)]), + ('f"""\n {\nfoo\n }"""', [(1, 0), (1, 4), (2, 1), (3, 0), (4, 1), + (4, 2), (4, 5)]), + ] +) +def test_tokenize_start_pos(code, positions): + tokens = list(tokenize(code, version_info=(3, 6))) + assert positions == [p.start_pos for p in tokens] + + +@pytest.mark.parametrize( + 'code', [ + dedent("""\ + f'''s{ + str.uppe + ''' + """), + 'f"foo', + 'f"""foo', + 'f"abc\ndef"', + ] +) +def test_roundtrip(grammar, code): + tree = grammar.parse(code) + assert tree.get_code() == code diff --git a/contrib/python/parso/py2/tests/test_get_code.py b/contrib/python/parso/py2/tests/test_get_code.py new file mode 100644 index 0000000000..d99d792b93 --- /dev/null +++ b/contrib/python/parso/py2/tests/test_get_code.py @@ -0,0 +1,133 @@ +import difflib + +import pytest + +from parso import parse + +code_basic_features = ''' +"""A mod docstring""" + +def a_function(a_argument, a_default = "default"): + """A func docstring""" + + a_result = 3 * a_argument + print(a_result) # a comment + b = """ +from +to""" + "huhu" + + + if a_default == "default": + return str(a_result) + else + return None +''' + + +def diff_code_assert(a, b, n=4): + if a != b: + diff = "\n".join(difflib.unified_diff( + a.splitlines(), + b.splitlines(), + n=n, + lineterm="" + )) + assert False, "Code does not match:\n%s\n\ncreated code:\n%s" % ( + diff, + b + ) + pass + + +def test_basic_parsing(): + """Validate the parsing features""" + + m = parse(code_basic_features) + diff_code_assert( + code_basic_features, + m.get_code() + ) + + +def test_operators(): + src = '5 * 3' + module = parse(src) + diff_code_assert(src, module.get_code()) + + +def test_get_code(): + """Use the same code that the parser also generates, to compare""" + s = '''"""a docstring""" +class SomeClass(object, mixin): + def __init__(self): + self.xy = 3.0 + """statement docstr""" + def some_method(self): + return 1 + def yield_method(self): + while hasattr(self, 'xy'): + yield True + for x in [1, 2]: + yield x + def empty(self): + pass +class Empty: + pass +class WithDocstring: + """class docstr""" + pass +def method_with_docstring(): + """class docstr""" + pass +''' + assert parse(s).get_code() == s + + +def test_end_newlines(): + """ + The Python grammar explicitly needs a newline at the end. Jedi though still + wants to be able, to return the exact same code without the additional new + line the parser needs. + """ + def test(source, end_pos): + module = parse(source) + assert module.get_code() == source + assert module.end_pos == end_pos + + test('a', (1, 1)) + test('a\n', (2, 0)) + test('a\nb', (2, 1)) + test('a\n#comment\n', (3, 0)) + test('a\n#comment', (2, 8)) + test('a#comment', (1, 9)) + test('def a():\n pass', (2, 5)) + + test('def a(', (1, 6)) + + +@pytest.mark.parametrize(('code', 'types'), [ + ('\r', ['endmarker']), + ('\n\r', ['endmarker']) +]) +def test_carriage_return_at_end(code, types): + """ + By adding an artificial newline this created weird side effects for + \r at the end of files. + """ + tree = parse(code) + assert tree.get_code() == code + assert [c.type for c in tree.children] == types + assert tree.end_pos == (len(code) + 1, 0) + + +@pytest.mark.parametrize('code', [ + ' ', + ' F"""', + ' F"""\n', + ' F""" \n', + ' F""" \n3', + ' f"""\n"""', + ' f"""\n"""\n', +]) +def test_full_code_round_trip(code): + assert parse(code).get_code() == code diff --git a/contrib/python/parso/py2/tests/test_grammar.py b/contrib/python/parso/py2/tests/test_grammar.py new file mode 100644 index 0000000000..60a249b8f1 --- /dev/null +++ b/contrib/python/parso/py2/tests/test_grammar.py @@ -0,0 +1,8 @@ +import parso + +import pytest + + +def test_non_unicode(): + with pytest.raises(UnicodeDecodeError): + parso.parse(b'\xe4') diff --git a/contrib/python/parso/py2/tests/test_load_grammar.py b/contrib/python/parso/py2/tests/test_load_grammar.py new file mode 100644 index 0000000000..0ea648eb3e --- /dev/null +++ b/contrib/python/parso/py2/tests/test_load_grammar.py @@ -0,0 +1,31 @@ +import pytest +from parso.grammar import load_grammar +from parso import utils + + +def test_load_inexisting_grammar(): + # This version shouldn't be out for a while, but if we ever do, wow! + with pytest.raises(NotImplementedError): + load_grammar(version='15.8') + # The same is true for very old grammars (even though this is probably not + # going to be an issue. + with pytest.raises(NotImplementedError): + load_grammar(version='1.5') + + +@pytest.mark.parametrize(('string', 'result'), [ + ('2', (2, 7)), ('3', (3, 6)), ('1.1', (1, 1)), ('1.1.1', (1, 1)), ('300.1.31', (300, 1)) +]) +def test_parse_version(string, result): + assert utils._parse_version(string) == result + + +@pytest.mark.parametrize('string', ['1.', 'a', '#', '1.3.4.5']) +def test_invalid_grammar_version(string): + with pytest.raises(ValueError): + load_grammar(version=string) + + +def test_grammar_int_version(): + with pytest.raises(TypeError): + load_grammar(version=3.8) diff --git a/contrib/python/parso/py2/tests/test_normalizer_issues_files.py b/contrib/python/parso/py2/tests/test_normalizer_issues_files.py new file mode 100644 index 0000000000..2aea1dadaf --- /dev/null +++ b/contrib/python/parso/py2/tests/test_normalizer_issues_files.py @@ -0,0 +1,70 @@ +""" +To easily verify if our normalizer raises the right error codes, just use the +tests of pydocstyle. +""" + +import difflib +import re +from functools import total_ordering + +import parso +from parso.utils import python_bytes_to_unicode + + +@total_ordering +class WantedIssue(object): + def __init__(self, code, line, column): + self.code = code + self._line = line + self._column = column + + def __eq__(self, other): + return self.code == other.code and self.start_pos == other.start_pos + + def __lt__(self, other): + return self.start_pos < other.start_pos or self.code < other.code + + def __hash__(self): + return hash(str(self.code) + str(self._line) + str(self._column)) + + @property + def start_pos(self): + return self._line, self._column + + +def collect_errors(code): + for line_nr, line in enumerate(code.splitlines(), 1): + match = re.match(r'(\s*)#: (.*)$', line) + if match is not None: + codes = match.group(2) + for code in codes.split(): + code, _, add_indent = code.partition(':') + column = int(add_indent or len(match.group(1))) + + code, _, add_line = code.partition('+') + l = line_nr + 1 + int(add_line or 0) + + yield WantedIssue(code[1:], l, column) + + +def test_normalizer_issue(normalizer_issue_case): + def sort(issues): + issues = sorted(issues, key=lambda i: (i.start_pos, i.code)) + return ["(%s, %s): %s" % (i.start_pos[0], i.start_pos[1], i.code) + for i in issues] + + with open(normalizer_issue_case.path, 'rb') as f: + code = python_bytes_to_unicode(f.read()) + + desired = sort(collect_errors(code)) + + grammar = parso.load_grammar(version=normalizer_issue_case.python_version) + module = grammar.parse(code) + issues = grammar._get_normalizer_issues(module) + actual = sort(issues) + + diff = '\n'.join(difflib.ndiff(desired, actual)) + # To make the pytest -v diff a bit prettier, stop pytest to rewrite assert + # statements by executing the comparison earlier. + _bool = desired == actual + assert _bool, '\n' + diff diff --git a/contrib/python/parso/py2/tests/test_old_fast_parser.py b/contrib/python/parso/py2/tests/test_old_fast_parser.py new file mode 100644 index 0000000000..7e12a0335a --- /dev/null +++ b/contrib/python/parso/py2/tests/test_old_fast_parser.py @@ -0,0 +1,210 @@ +""" +These tests test the cases that the old fast parser tested with the normal +parser. + +The old fast parser doesn't exist anymore and was replaced with a diff parser. +However the tests might still be relevant for the parser. +""" + +from textwrap import dedent + +from parso._compatibility import u +from parso import parse + + +def test_carriage_return_splitting(): + source = u(dedent(''' + + + + "string" + + class Foo(): + pass + ''')) + source = source.replace('\n', '\r\n') + module = parse(source) + assert [n.value for lst in module.get_used_names().values() for n in lst] == ['Foo'] + + +def check_p(src, number_parsers_used, number_of_splits=None, number_of_misses=0): + if number_of_splits is None: + number_of_splits = number_parsers_used + + module_node = parse(src) + + assert src == module_node.get_code() + return module_node + + +def test_for(): + src = dedent("""\ + for a in [1,2]: + a + + for a1 in 1,"": + a1 + """) + check_p(src, 1) + + +def test_class_with_class_var(): + src = dedent("""\ + class SuperClass: + class_super = 3 + def __init__(self): + self.foo = 4 + pass + """) + check_p(src, 3) + + +def test_func_with_if(): + src = dedent("""\ + def recursion(a): + if foo: + return recursion(a) + else: + if bar: + return inexistent + else: + return a + """) + check_p(src, 1) + + +def test_decorator(): + src = dedent("""\ + class Decorator(): + @memoize + def dec(self, a): + return a + """) + check_p(src, 2) + + +def test_nested_funcs(): + src = dedent("""\ + def memoize(func): + def wrapper(*args, **kwargs): + return func(*args, **kwargs) + return wrapper + """) + check_p(src, 3) + + +def test_multi_line_params(): + src = dedent("""\ + def x(a, + b): + pass + + foo = 1 + """) + check_p(src, 2) + + +def test_class_func_if(): + src = dedent("""\ + class Class: + def func(self): + if 1: + a + else: + b + + pass + """) + check_p(src, 3) + + +def test_multi_line_for(): + src = dedent("""\ + for x in [1, + 2]: + pass + + pass + """) + check_p(src, 1) + + +def test_wrong_indentation(): + src = dedent("""\ + def func(): + a + b + a + """) + #check_p(src, 1) + + src = dedent("""\ + def complex(): + def nested(): + a + b + a + + def other(): + pass + """) + check_p(src, 3) + + +def test_strange_parentheses(): + src = dedent(""" + class X(): + a = (1 + if 1 else 2) + def x(): + pass + """) + check_p(src, 2) + + +def test_fake_parentheses(): + """ + The fast parser splitting counts parentheses, but not as correct tokens. + Therefore parentheses in string tokens are included as well. This needs to + be accounted for. + """ + src = dedent(r""" + def x(): + a = (')' + if 1 else 2) + def y(): + pass + def z(): + pass + """) + check_p(src, 3, 2, 1) + + +def test_additional_indent(): + source = dedent('''\ + int( + def x(): + pass + ''') + + check_p(source, 2) + + +def test_round_trip(): + code = dedent(''' + def x(): + """hahaha""" + func''') + + assert parse(code).get_code() == code + + +def test_parentheses_in_string(): + code = dedent(''' + def x(): + '(' + + import abc + + abc.''') + check_p(code, 2, 1, 1) diff --git a/contrib/python/parso/py2/tests/test_param_splitting.py b/contrib/python/parso/py2/tests/test_param_splitting.py new file mode 100644 index 0000000000..f04fea7d45 --- /dev/null +++ b/contrib/python/parso/py2/tests/test_param_splitting.py @@ -0,0 +1,47 @@ +''' +To make the life of any analysis easier, we are generating Param objects +instead of simple parser objects. +''' + +from textwrap import dedent + +from parso import parse + + +def assert_params(param_string, version=None, **wanted_dct): + source = dedent(''' + def x(%s): + pass + ''') % param_string + + module = parse(source, version=version) + funcdef = next(module.iter_funcdefs()) + dct = dict((p.name.value, p.default and p.default.get_code()) + for p in funcdef.get_params()) + assert dct == wanted_dct + assert module.get_code() == source + + +def test_split_params_with_separation_star(): + assert_params(u'x, y=1, *, z=3', x=None, y='1', z='3', version='3.5') + assert_params(u'*, x', x=None, version='3.5') + assert_params(u'*', version='3.5') + + +def test_split_params_with_stars(): + assert_params(u'x, *args', x=None, args=None) + assert_params(u'**kwargs', kwargs=None) + assert_params(u'*args, **kwargs', args=None, kwargs=None) + + +def test_kw_only_no_kw(works_ge_py3): + """ + Parsing this should be working. In CPython the parser also parses this and + in a later step the AST complains. + """ + module = works_ge_py3.parse('def test(arg, *):\n pass') + if module is not None: + func = module.children[0] + open_, p1, asterisk, close = func._get_param_nodes() + assert p1.get_code('arg,') + assert asterisk.value == '*' diff --git a/contrib/python/parso/py2/tests/test_parser.py b/contrib/python/parso/py2/tests/test_parser.py new file mode 100644 index 0000000000..e9a9ddab47 --- /dev/null +++ b/contrib/python/parso/py2/tests/test_parser.py @@ -0,0 +1,223 @@ +# -*- coding: utf-8 -*- +from textwrap import dedent + +import pytest + +from parso._compatibility import u +from parso import parse +from parso.python import tree +from parso.utils import split_lines + + +def test_basic_parsing(each_version): + def compare(string): + """Generates the AST object and then regenerates the code.""" + assert parse(string, version=each_version).get_code() == string + + compare('\na #pass\n') + compare('wblabla* 1\t\n') + compare('def x(a, b:3): pass\n') + compare('assert foo\n') + + +def test_subscope_names(each_version): + def get_sub(source): + return parse(source, version=each_version).children[0] + + name = get_sub('class Foo: pass').name + assert name.start_pos == (1, len('class ')) + assert name.end_pos == (1, len('class Foo')) + assert name.value == 'Foo' + + name = get_sub('def foo(): pass').name + assert name.start_pos == (1, len('def ')) + assert name.end_pos == (1, len('def foo')) + assert name.value == 'foo' + + +def test_import_names(each_version): + def get_import(source): + return next(parse(source, version=each_version).iter_imports()) + + imp = get_import('import math\n') + names = imp.get_defined_names() + assert len(names) == 1 + assert names[0].value == 'math' + assert names[0].start_pos == (1, len('import ')) + assert names[0].end_pos == (1, len('import math')) + + assert imp.start_pos == (1, 0) + assert imp.end_pos == (1, len('import math')) + + +def test_end_pos(each_version): + s = dedent(''' + x = ['a', 'b', 'c'] + def func(): + y = None + ''') + parser = parse(s, version=each_version) + scope = next(parser.iter_funcdefs()) + assert scope.start_pos == (3, 0) + assert scope.end_pos == (5, 0) + + +def test_carriage_return_statements(each_version): + source = dedent(''' + foo = 'ns1!' + + # this is a namespace package + ''') + source = source.replace('\n', '\r\n') + stmt = parse(source, version=each_version).children[0] + assert '#' not in stmt.get_code() + + +def test_incomplete_list_comprehension(each_version): + """ Shouldn't raise an error, same bug as #418. """ + # With the old parser this actually returned a statement. With the new + # parser only valid statements generate one. + children = parse('(1 for def', version=each_version).children + assert [c.type for c in children] == \ + ['error_node', 'error_node', 'endmarker'] + + +def test_newline_positions(each_version): + endmarker = parse('a\n', version=each_version).children[-1] + assert endmarker.end_pos == (2, 0) + new_line = endmarker.get_previous_leaf() + assert new_line.start_pos == (1, 1) + assert new_line.end_pos == (2, 0) + + +def test_end_pos_error_correction(each_version): + """ + Source code without ending newline are given one, because the Python + grammar needs it. However, they are removed again. We still want the right + end_pos, even if something breaks in the parser (error correction). + """ + s = 'def x():\n .' + m = parse(s, version=each_version) + func = m.children[0] + assert func.type == 'funcdef' + assert func.end_pos == (2, 2) + assert m.end_pos == (2, 2) + + +def test_param_splitting(each_version): + """ + Jedi splits parameters into params, this is not what the grammar does, + but Jedi does this to simplify argument parsing. + """ + def check(src, result): + # Python 2 tuple params should be ignored for now. + m = parse(src, version=each_version) + if each_version.startswith('2'): + # We don't want b and c to be a part of the param enumeration. Just + # ignore them, because it's not what we want to support in the + # future. + func = next(m.iter_funcdefs()) + assert [param.name.value for param in func.get_params()] == result + else: + assert not list(m.iter_funcdefs()) + + check('def x(a, (b, c)):\n pass', ['a']) + check('def x((b, c)):\n pass', []) + + +def test_unicode_string(): + s = tree.String(None, u('bö'), (0, 0)) + assert repr(s) # Should not raise an Error! + + +def test_backslash_dos_style(each_version): + assert parse('\\\r\n', version=each_version) + + +def test_started_lambda_stmt(each_version): + m = parse(u'lambda a, b: a i', version=each_version) + assert m.children[0].type == 'error_node' + + +def test_python2_octal(each_version): + module = parse('0660', version=each_version) + first = module.children[0] + if each_version.startswith('2'): + assert first.type == 'number' + else: + assert first.type == 'error_node' + + +@pytest.mark.parametrize('code', ['foo "', 'foo """\n', 'foo """\nbar']) +def test_open_string_literal(each_version, code): + """ + Testing mostly if removing the last newline works. + """ + lines = split_lines(code, keepends=True) + end_pos = (len(lines), len(lines[-1])) + module = parse(code, version=each_version) + assert module.get_code() == code + assert module.end_pos == end_pos == module.children[1].end_pos + + +def test_too_many_params(): + with pytest.raises(TypeError): + parse('asdf', hello=3) + + +def test_dedent_at_end(each_version): + code = dedent(''' + for foobar in [1]: + foobar''') + module = parse(code, version=each_version) + assert module.get_code() == code + suite = module.children[0].children[-1] + foobar = suite.children[-1] + assert foobar.type == 'name' + + +def test_no_error_nodes(each_version): + def check(node): + assert node.type not in ('error_leaf', 'error_node') + + try: + children = node.children + except AttributeError: + pass + else: + for child in children: + check(child) + + check(parse("if foo:\n bar", version=each_version)) + + +def test_named_expression(works_ge_py38): + works_ge_py38.parse("(a := 1, a + 1)") + +def test_extended_rhs_annassign(works_ge_py38): + works_ge_py38.parse("x: y = z,") + works_ge_py38.parse("x: Tuple[int, ...] = z, *q, w") + +@pytest.mark.parametrize( + 'param_code', [ + 'a=1, /', + 'a, /', + 'a=1, /, b=3', + 'a, /, b', + 'a, /, b', + 'a, /, *, b', + 'a, /, **kwargs', + ] +) +def test_positional_only_arguments(works_ge_py38, param_code): + works_ge_py38.parse("def x(%s): pass" % param_code) + +@pytest.mark.parametrize( + 'expression', [ + 'a + a', + 'lambda x: x', + 'a := lambda x: x' + ] +) +def test_decorator_expression(works_ge_py39, expression): + works_ge_py39.parse("@%s\ndef x(): pass" % expression) diff --git a/contrib/python/parso/py2/tests/test_parser_tree.py b/contrib/python/parso/py2/tests/test_parser_tree.py new file mode 100644 index 0000000000..74084794cb --- /dev/null +++ b/contrib/python/parso/py2/tests/test_parser_tree.py @@ -0,0 +1,240 @@ +# -*- coding: utf-8 # This file contains Unicode characters. + +from textwrap import dedent + +import pytest + +from parso import parse +from parso.python import tree + + +class TestsFunctionAndLambdaParsing(object): + + FIXTURES = [ + ('def my_function(x, y, z) -> str:\n return x + y * z\n', { + 'name': 'my_function', + 'call_sig': 'my_function(x, y, z)', + 'params': ['x', 'y', 'z'], + 'annotation': "str", + }), + ('lambda x, y, z: x + y * z\n', { + 'name': '<lambda>', + 'call_sig': '<lambda>(x, y, z)', + 'params': ['x', 'y', 'z'], + }), + ] + + @pytest.fixture(params=FIXTURES) + def node(self, request): + parsed = parse(dedent(request.param[0]), version='3.5') + request.keywords['expected'] = request.param[1] + child = parsed.children[0] + if child.type == 'simple_stmt': + child = child.children[0] + return child + + @pytest.fixture() + def expected(self, request, node): + return request.keywords['expected'] + + def test_name(self, node, expected): + if node.type != 'lambdef': + assert isinstance(node.name, tree.Name) + assert node.name.value == expected['name'] + + def test_params(self, node, expected): + assert isinstance(node.get_params(), list) + assert all(isinstance(x, tree.Param) for x in node.get_params()) + assert [str(x.name.value) for x in node.get_params()] == [x for x in expected['params']] + + def test_is_generator(self, node, expected): + assert node.is_generator() is expected.get('is_generator', False) + + def test_yields(self, node, expected): + assert node.is_generator() == expected.get('yields', False) + + def test_annotation(self, node, expected): + expected_annotation = expected.get('annotation', None) + if expected_annotation is None: + assert node.annotation is None + else: + assert node.annotation.value == expected_annotation + + +def test_end_pos_line(each_version): + # jedi issue #150 + s = "x()\nx( )\nx( )\nx ( )\n" + + module = parse(s, version=each_version) + for i, simple_stmt in enumerate(module.children[:-1]): + expr_stmt = simple_stmt.children[0] + assert expr_stmt.end_pos == (i + 1, i + 3) + + +def test_default_param(each_version): + func = parse('def x(foo=42): pass', version=each_version).children[0] + param, = func.get_params() + assert param.default.value == '42' + assert param.annotation is None + assert not param.star_count + + +def test_annotation_param(each_py3_version): + func = parse('def x(foo: 3): pass', version=each_py3_version).children[0] + param, = func.get_params() + assert param.default is None + assert param.annotation.value == '3' + assert not param.star_count + + +def test_annotation_params(each_py3_version): + func = parse('def x(foo: 3, bar: 4): pass', version=each_py3_version).children[0] + param1, param2 = func.get_params() + + assert param1.default is None + assert param1.annotation.value == '3' + assert not param1.star_count + + assert param2.default is None + assert param2.annotation.value == '4' + assert not param2.star_count + + +def test_default_and_annotation_param(each_py3_version): + func = parse('def x(foo:3=42): pass', version=each_py3_version).children[0] + param, = func.get_params() + assert param.default.value == '42' + assert param.annotation.value == '3' + assert not param.star_count + + +def test_ellipsis_py2(each_py2_version): + module = parse('[0][...]', version=each_py2_version, error_recovery=False) + expr = module.children[0] + trailer = expr.children[-1] + subscript = trailer.children[1] + assert subscript.type == 'subscript' + assert [leaf.value for leaf in subscript.children] == ['.', '.', '.'] + + +def get_yield_exprs(code, version): + return list(parse(code, version=version).children[0].iter_yield_exprs()) + + +def get_return_stmts(code): + return list(parse(code).children[0].iter_return_stmts()) + + +def get_raise_stmts(code, child): + return list(parse(code).children[child].iter_raise_stmts()) + + +def test_yields(each_version): + y, = get_yield_exprs('def x(): yield', each_version) + assert y.value == 'yield' + assert y.type == 'keyword' + + y, = get_yield_exprs('def x(): (yield 1)', each_version) + assert y.type == 'yield_expr' + + y, = get_yield_exprs('def x(): [1, (yield)]', each_version) + assert y.type == 'keyword' + + +def test_yield_from(): + y, = get_yield_exprs('def x(): (yield from 1)', '3.8') + assert y.type == 'yield_expr' + + +def test_returns(): + r, = get_return_stmts('def x(): return') + assert r.value == 'return' + assert r.type == 'keyword' + + r, = get_return_stmts('def x(): return 1') + assert r.type == 'return_stmt' + + +def test_raises(): + code = """ +def single_function(): + raise Exception +def top_function(): + def inner_function(): + raise NotImplementedError() + inner_function() + raise Exception +def top_function_three(): + try: + raise NotImplementedError() + except NotImplementedError: + pass + raise Exception + """ + + r = get_raise_stmts(code, 0) # Lists in a simple Function + assert len(list(r)) == 1 + + r = get_raise_stmts(code, 1) # Doesn't Exceptions list in closures + assert len(list(r)) == 1 + + r = get_raise_stmts(code, 2) # Lists inside try-catch + assert len(list(r)) == 2 + + +@pytest.mark.parametrize( + 'code, name_index, is_definition, include_setitem', [ + ('x = 3', 0, True, False), + ('x.y = 3', 0, False, False), + ('x.y = 3', 1, True, False), + ('x.y = u.v = z', 0, False, False), + ('x.y = u.v = z', 1, True, False), + ('x.y = u.v = z', 2, False, False), + ('x.y = u.v, w = z', 3, True, False), + ('x.y = u.v, w = z', 4, True, False), + ('x.y = u.v, w = z', 5, False, False), + + ('x, y = z', 0, True, False), + ('x, y = z', 1, True, False), + ('x, y = z', 2, False, False), + ('x, y = z', 2, False, False), + ('x[0], y = z', 2, False, False), + ('x[0] = z', 0, False, False), + ('x[0], y = z', 0, False, False), + ('x[0], y = z', 2, False, True), + ('x[0] = z', 0, True, True), + ('x[0], y = z', 0, True, True), + ('x: int = z', 0, True, False), + ('x: int = z', 1, False, False), + ('x: int = z', 2, False, False), + ('x: int', 0, True, False), + ('x: int', 1, False, False), + ] +) +def test_is_definition(code, name_index, is_definition, include_setitem): + module = parse(code, version='3.8') + name = module.get_first_leaf() + while True: + if name.type == 'name': + if name_index == 0: + break + name_index -= 1 + name = name.get_next_leaf() + + assert name.is_definition(include_setitem=include_setitem) == is_definition + + +def test_iter_funcdefs(): + code = dedent(''' + def normal(): ... + async def asyn(): ... + @dec + def dec_normal(): ... + @dec1 + @dec2 + async def dec_async(): ... + def broken + ''') + module = parse(code, version='3.8') + func_names = [f.name.value for f in module.iter_funcdefs()] + assert func_names == ['normal', 'asyn', 'dec_normal', 'dec_async'] diff --git a/contrib/python/parso/py2/tests/test_pep8.py b/contrib/python/parso/py2/tests/test_pep8.py new file mode 100644 index 0000000000..44c11f4f55 --- /dev/null +++ b/contrib/python/parso/py2/tests/test_pep8.py @@ -0,0 +1,39 @@ +import parso + + +def issues(code): + grammar = parso.load_grammar() + module = parso.parse(code) + return grammar._get_normalizer_issues(module) + + +def test_eof_newline(): + def assert_issue(code): + found = issues(code) + assert len(found) == 1 + issue, = found + assert issue.code == 292 + + assert not issues('asdf = 1\n') + assert_issue('asdf = 1') + assert_issue('asdf = 1\n# foo') + assert_issue('# foobar') + assert_issue('') + assert_issue('foo = 1 # comment') + + +def test_eof_blankline(): + def assert_issue(code): + found = issues(code) + assert len(found) == 1 + issue, = found + assert issue.code == 391 + + assert_issue('asdf = 1\n\n') + assert_issue('# foobar\n\n') + assert_issue('\n\n') + +def test_shebang(): + assert not issues('#!\n') + assert not issues('#!/foo\n') + assert not issues('#! python\n') diff --git a/contrib/python/parso/py2/tests/test_pgen2.py b/contrib/python/parso/py2/tests/test_pgen2.py new file mode 100644 index 0000000000..158ec29d9d --- /dev/null +++ b/contrib/python/parso/py2/tests/test_pgen2.py @@ -0,0 +1,350 @@ +"""Test suite for 2to3's parser and grammar files. + +This is the place to add tests for changes to 2to3's grammar, such as those +merging the grammars for Python 2 and 3. In addition to specific tests for +parts of the grammar we've changed, we also make sure we can parse the +test_grammar.py files from both Python 2 and Python 3. +""" + +from textwrap import dedent + +import pytest + +from parso import load_grammar +from parso import ParserSyntaxError +from parso.pgen2 import generate_grammar +from parso.python import tokenize + + +def _parse(code, version=None): + code = dedent(code) + "\n\n" + grammar = load_grammar(version=version) + return grammar.parse(code, error_recovery=False) + + +def _invalid_syntax(code, version=None, **kwargs): + with pytest.raises(ParserSyntaxError): + module = _parse(code, version=version, **kwargs) + # For debugging + print(module.children) + + +def test_formfeed(each_version): + s = u"foo\n\x0c\nfoo\n" + t = _parse(s, each_version) + assert t.children[0].children[0].type == 'name' + assert t.children[1].children[0].type == 'name' + s = u"1\n\x0c\x0c\n2\n" + t = _parse(s, each_version) + + with pytest.raises(ParserSyntaxError): + s = u"\n\x0c2\n" + _parse(s, each_version) + + +def test_matrix_multiplication_operator(works_ge_py35): + works_ge_py35.parse("a @ b") + works_ge_py35.parse("a @= b") + + +def test_yield_from(works_ge_py3, each_version): + works_ge_py3.parse("yield from x") + works_ge_py3.parse("(yield from x) + y") + _invalid_syntax("yield from", each_version) + + +def test_await_expr(works_ge_py35): + works_ge_py35.parse("""async def foo(): + await x + """) + + works_ge_py35.parse("""async def foo(): + + def foo(): pass + + def foo(): pass + + await x + """) + + works_ge_py35.parse("""async def foo(): return await a""") + + works_ge_py35.parse("""def foo(): + def foo(): pass + async def foo(): await x + """) + + +@pytest.mark.skipif('sys.version_info[:2] < (3, 5)') +@pytest.mark.xfail(reason="acting like python 3.7") +def test_async_var(): + _parse("""async = 1""", "3.5") + _parse("""await = 1""", "3.5") + _parse("""def async(): pass""", "3.5") + + +def test_async_for(works_ge_py35): + works_ge_py35.parse("async def foo():\n async for a in b: pass") + + +@pytest.mark.parametrize("body", [ + """[1 async for a in b + ]""", + """[1 async + for a in b + ]""", + """[ + 1 + async for a in b + ]""", + """[ + 1 + async for a + in b + ]""", + """[ + 1 + async + for + a + in + b + ]""", + """ [ + 1 async for a in b + ]""", +]) +def test_async_for_comprehension_newline(works_ge_py36, body): + # Issue #139 + works_ge_py36.parse("""async def foo(): + {}""".format(body)) + + +def test_async_with(works_ge_py35): + works_ge_py35.parse("async def foo():\n async with a: pass") + + @pytest.mark.skipif('sys.version_info[:2] < (3, 5)') + @pytest.mark.xfail(reason="acting like python 3.7") + def test_async_with_invalid(): + _invalid_syntax("""def foo(): + async with a: pass""", version="3.5") + + +def test_raise_3x_style_1(each_version): + _parse("raise", each_version) + + +def test_raise_2x_style_2(works_in_py2): + works_in_py2.parse("raise E, V") + +def test_raise_2x_style_3(works_in_py2): + works_in_py2.parse("raise E, V, T") + +def test_raise_2x_style_invalid_1(each_version): + _invalid_syntax("raise E, V, T, Z", version=each_version) + +def test_raise_3x_style(works_ge_py3): + works_ge_py3.parse("raise E1 from E2") + +def test_raise_3x_style_invalid_1(each_version): + _invalid_syntax("raise E, V from E1", each_version) + +def test_raise_3x_style_invalid_2(each_version): + _invalid_syntax("raise E from E1, E2", each_version) + +def test_raise_3x_style_invalid_3(each_version): + _invalid_syntax("raise from E1, E2", each_version) + +def test_raise_3x_style_invalid_4(each_version): + _invalid_syntax("raise E from", each_version) + + +# Adapted from Python 3's Lib/test/test_grammar.py:GrammarTests.testFuncdef +def test_annotation_1(works_ge_py3): + works_ge_py3.parse("""def f(x) -> list: pass""") + +def test_annotation_2(works_ge_py3): + works_ge_py3.parse("""def f(x:int): pass""") + +def test_annotation_3(works_ge_py3): + works_ge_py3.parse("""def f(*x:str): pass""") + +def test_annotation_4(works_ge_py3): + works_ge_py3.parse("""def f(**x:float): pass""") + +def test_annotation_5(works_ge_py3): + works_ge_py3.parse("""def f(x, y:1+2): pass""") + +def test_annotation_6(each_py3_version): + _invalid_syntax("""def f(a, (b:1, c:2, d)): pass""", each_py3_version) + +def test_annotation_7(each_py3_version): + _invalid_syntax("""def f(a, (b:1, c:2, d), e:3=4, f=5, *g:6): pass""", each_py3_version) + +def test_annotation_8(each_py3_version): + s = """def f(a, (b:1, c:2, d), e:3=4, f=5, + *g:6, h:7, i=8, j:9=10, **k:11) -> 12: pass""" + _invalid_syntax(s, each_py3_version) + + +def test_except_new(each_version): + s = dedent(""" + try: + x + except E as N: + y""") + _parse(s, each_version) + +def test_except_old(works_in_py2): + s = dedent(""" + try: + x + except E, N: + y""") + works_in_py2.parse(s) + + +# Adapted from Python 3's Lib/test/test_grammar.py:GrammarTests.testAtoms +def test_set_literal_1(works_ge_py27): + works_ge_py27.parse("""x = {'one'}""") + +def test_set_literal_2(works_ge_py27): + works_ge_py27.parse("""x = {'one', 1,}""") + +def test_set_literal_3(works_ge_py27): + works_ge_py27.parse("""x = {'one', 'two', 'three'}""") + +def test_set_literal_4(works_ge_py27): + works_ge_py27.parse("""x = {2, 3, 4,}""") + + +def test_new_octal_notation(each_version): + _parse("""0o7777777777777""", each_version) + _invalid_syntax("""0o7324528887""", each_version) + + +def test_old_octal_notation(works_in_py2): + works_in_py2.parse("07") + + +def test_long_notation(works_in_py2): + works_in_py2.parse("0xFl") + works_in_py2.parse("0xFL") + works_in_py2.parse("0b1l") + works_in_py2.parse("0B1L") + works_in_py2.parse("0o7l") + works_in_py2.parse("0O7L") + works_in_py2.parse("0l") + works_in_py2.parse("0L") + works_in_py2.parse("10l") + works_in_py2.parse("10L") + + +def test_new_binary_notation(each_version): + _parse("""0b101010""", each_version) + _invalid_syntax("""0b0101021""", each_version) + + +def test_class_new_syntax(works_ge_py3): + works_ge_py3.parse("class B(t=7): pass") + works_ge_py3.parse("class B(t, *args): pass") + works_ge_py3.parse("class B(t, **kwargs): pass") + works_ge_py3.parse("class B(t, *args, **kwargs): pass") + works_ge_py3.parse("class B(t, y=9, *args, **kwargs): pass") + + +def test_parser_idempotency_extended_unpacking(works_ge_py3): + """A cut-down version of pytree_idempotency.py.""" + works_ge_py3.parse("a, *b, c = x\n") + works_ge_py3.parse("[*a, b] = x\n") + works_ge_py3.parse("(z, *y, w) = m\n") + works_ge_py3.parse("for *z, m in d: pass\n") + + +def test_multiline_bytes_literals(each_version): + """ + It's not possible to get the same result when using \xaa in Python 2/3, + because it's treated differently. + """ + s = u""" + md5test(b"\xaa" * 80, + (b"Test Using Larger Than Block-Size Key " + b"and Larger Than One Block-Size Data"), + "6f630fad67cda0ee1fb1f562db3aa53e") + """ + _parse(s, each_version) + + +def test_multiline_bytes_tripquote_literals(each_version): + s = ''' + b""" + <?xml version="1.0" encoding="UTF-8"?> + <!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN"> + """ + ''' + _parse(s, each_version) + + +def test_ellipsis(works_ge_py3, each_version): + works_ge_py3.parse("...") + _parse("[0][...]", version=each_version) + + +def test_dict_unpacking(works_ge_py35): + works_ge_py35.parse("{**dict(a=3), foo:2}") + + +def test_multiline_str_literals(each_version): + s = u""" + md5test("\xaa" * 80, + ("Test Using Larger Than Block-Size Key " + "and Larger Than One Block-Size Data"), + "6f630fad67cda0ee1fb1f562db3aa53e") + """ + _parse(s, each_version) + + +def test_py2_backticks(works_in_py2): + works_in_py2.parse("`1`") + + +def test_py2_string_prefixes(works_in_py2): + works_in_py2.parse("ur'1'") + works_in_py2.parse("Ur'1'") + works_in_py2.parse("UR'1'") + _invalid_syntax("ru'1'", works_in_py2.version) + + +def py_br(each_version): + _parse('br""', each_version) + + +def test_py3_rb(works_ge_py3): + works_ge_py3.parse("rb'1'") + works_ge_py3.parse("RB'1'") + + +def test_left_recursion(): + with pytest.raises(ValueError, match='left recursion'): + generate_grammar('foo: foo NAME\n', tokenize.PythonTokenTypes) + + +@pytest.mark.parametrize( + 'grammar, error_match', [ + ['foo: bar | baz\nbar: NAME\nbaz: NAME\n', + r"foo is ambiguous.*given a TokenType\(NAME\).*bar or baz"], + ['''foo: bar | baz\nbar: 'x'\nbaz: "x"\n''', + r"foo is ambiguous.*given a ReservedString\(x\).*bar or baz"], + ['''foo: bar | 'x'\nbar: 'x'\n''', + r"foo is ambiguous.*given a ReservedString\(x\).*bar or foo"], + # An ambiguity with the second (not the first) child of a production + ['outer: "a" [inner] "b" "c"\ninner: "b" "c" [inner]\n', + r"outer is ambiguous.*given a ReservedString\(b\).*inner or outer"], + # An ambiguity hidden by a level of indirection (middle) + ['outer: "a" [middle] "b" "c"\nmiddle: inner\ninner: "b" "c" [inner]\n', + r"outer is ambiguous.*given a ReservedString\(b\).*middle or outer"], + ] +) +def test_ambiguities(grammar, error_match): + with pytest.raises(ValueError, match=error_match): + generate_grammar(grammar, tokenize.PythonTokenTypes) diff --git a/contrib/python/parso/py2/tests/test_prefix.py b/contrib/python/parso/py2/tests/test_prefix.py new file mode 100644 index 0000000000..0c79958aeb --- /dev/null +++ b/contrib/python/parso/py2/tests/test_prefix.py @@ -0,0 +1,79 @@ +try: + from itertools import zip_longest +except ImportError: + # Python 2 + from itertools import izip_longest as zip_longest + +from codecs import BOM_UTF8 + +import pytest + +import parso + +unicode_bom = BOM_UTF8.decode('utf-8') + + +@pytest.mark.parametrize(('string', 'tokens'), [ + ('', ['']), + ('#', ['#', '']), + (' # ', ['# ', '']), + (' # \n', ['# ', '\n', '']), + (' # \f\n', ['# ', '\f', '\n', '']), + (' \n', ['\n', '']), + (' \n ', ['\n', ' ']), + (' \f ', ['\f', ' ']), + (' \f ', ['\f', ' ']), + (' \r\n', ['\r\n', '']), + ('\\\n', ['\\\n', '']), + ('\\\r\n', ['\\\r\n', '']), + ('\t\t\n\t', ['\n', '\t']), +]) +def test_simple_prefix_splitting(string, tokens): + tree = parso.parse(string) + leaf = tree.children[0] + assert leaf.type == 'endmarker' + + parsed_tokens = list(leaf._split_prefix()) + start_pos = (1, 0) + for pt, expected in zip_longest(parsed_tokens, tokens): + assert pt.value == expected + + # Calculate the estimated end_pos + if expected.endswith('\n'): + end_pos = start_pos[0] + 1, 0 + else: + end_pos = start_pos[0], start_pos[1] + len(expected) + len(pt.spacing) + + #assert start_pos == pt.start_pos + assert end_pos == pt.end_pos + start_pos = end_pos + + +@pytest.mark.parametrize(('string', 'types'), [ + ('# ', ['comment', 'spacing']), + ('\r\n', ['newline', 'spacing']), + ('\f', ['formfeed', 'spacing']), + ('\\\n', ['backslash', 'spacing']), + (' \t', ['spacing']), + (' \t ', ['spacing']), + (unicode_bom + ' # ', ['bom', 'comment', 'spacing']), +]) +def test_prefix_splitting_types(string, types): + tree = parso.parse(string) + leaf = tree.children[0] + assert leaf.type == 'endmarker' + parsed_tokens = list(leaf._split_prefix()) + assert [t.type for t in parsed_tokens] == types + + +def test_utf8_bom(): + tree = parso.parse(unicode_bom + 'a = 1') + expr_stmt = tree.children[0] + assert expr_stmt.start_pos == (1, 0) + + tree = parso.parse(unicode_bom + '\n') + endmarker = tree.children[0] + parts = list(endmarker._split_prefix()) + assert [p.type for p in parts] == ['bom', 'newline', 'spacing'] + assert [p.start_pos for p in parts] == [(1, 0), (1, 0), (2, 0)] + assert [p.end_pos for p in parts] == [(1, 0), (2, 0), (2, 0)] diff --git a/contrib/python/parso/py2/tests/test_python_errors.py b/contrib/python/parso/py2/tests/test_python_errors.py new file mode 100644 index 0000000000..0b45f112d8 --- /dev/null +++ b/contrib/python/parso/py2/tests/test_python_errors.py @@ -0,0 +1,416 @@ +""" +Testing if parso finds syntax errors and indentation errors. +""" +import sys +import warnings + +import pytest + +import parso + +from textwrap import dedent +from parso._compatibility import is_pypy +from .failing_examples import FAILING_EXAMPLES, indent, build_nested + + +if is_pypy: + # The errors in PyPy might be different. Just skip the module for now. + pytestmark = pytest.mark.skip() + + +def _get_error_list(code, version=None): + grammar = parso.load_grammar(version=version) + tree = grammar.parse(code) + return list(grammar.iter_errors(tree)) + + +def assert_comparison(code, error_code, positions): + errors = [(error.start_pos, error.code) for error in _get_error_list(code)] + assert [(pos, error_code) for pos in positions] == errors + + +@pytest.mark.parametrize('code', FAILING_EXAMPLES) +def test_python_exception_matches(code): + wanted, line_nr = _get_actual_exception(code) + + errors = _get_error_list(code) + actual = None + if errors: + error, = errors + actual = error.message + assert actual in wanted + # Somehow in Python2.7 the SyntaxError().lineno is sometimes None + assert line_nr is None or line_nr == error.start_pos[0] + + +def test_non_async_in_async(): + """ + This example doesn't work with FAILING_EXAMPLES, because the line numbers + are not always the same / incorrect in Python 3.8. + """ + if sys.version_info[:2] < (3, 5): + pytest.skip() + + # Raises multiple errors in previous versions. + code = 'async def foo():\n def nofoo():[x async for x in []]' + wanted, line_nr = _get_actual_exception(code) + + errors = _get_error_list(code) + if errors: + error, = errors + actual = error.message + assert actual in wanted + if sys.version_info[:2] < (3, 8): + assert line_nr == error.start_pos[0] + else: + assert line_nr == 0 # For whatever reason this is zero in Python 3.8+ + + +@pytest.mark.parametrize( + ('code', 'positions'), [ + ('1 +', [(1, 3)]), + ('1 +\n', [(1, 3)]), + ('1 +\n2 +', [(1, 3), (2, 3)]), + ('x + 2', []), + ('[\n', [(2, 0)]), + ('[\ndef x(): pass', [(2, 0)]), + ('[\nif 1: pass', [(2, 0)]), + ('1+?', [(1, 2)]), + ('?', [(1, 0)]), + ('??', [(1, 0)]), + ('? ?', [(1, 0)]), + ('?\n?', [(1, 0), (2, 0)]), + ('? * ?', [(1, 0)]), + ('1 + * * 2', [(1, 4)]), + ('?\n1\n?', [(1, 0), (3, 0)]), + ] +) +def test_syntax_errors(code, positions): + assert_comparison(code, 901, positions) + + +@pytest.mark.parametrize( + ('code', 'positions'), [ + (' 1', [(1, 0)]), + ('def x():\n 1\n 2', [(3, 0)]), + ('def x():\n 1\n 2', [(3, 0)]), + ('def x():\n1', [(2, 0)]), + ] +) +def test_indentation_errors(code, positions): + assert_comparison(code, 903, positions) + + +def _get_actual_exception(code): + with warnings.catch_warnings(): + # We don't care about warnings where locals/globals misbehave here. + # It's as simple as either an error or not. + warnings.filterwarnings('ignore', category=SyntaxWarning) + try: + compile(code, '<unknown>', 'exec') + except (SyntaxError, IndentationError) as e: + wanted = e.__class__.__name__ + ': ' + e.msg + line_nr = e.lineno + except ValueError as e: + # The ValueError comes from byte literals in Python 2 like '\x' + # that are oddly enough not SyntaxErrors. + wanted = 'SyntaxError: (value error) ' + str(e) + line_nr = None + else: + assert False, "The piece of code should raise an exception." + + # SyntaxError + if wanted == 'SyntaxError: non-keyword arg after keyword arg': + # The python 3.5+ way, a bit nicer. + wanted = 'SyntaxError: positional argument follows keyword argument' + elif wanted == 'SyntaxError: assignment to keyword': + return [wanted, "SyntaxError: can't assign to keyword", + 'SyntaxError: cannot assign to __debug__'], line_nr + elif wanted == 'SyntaxError: can use starred expression only as assignment target': + # Python 3.4/3.4 have a bit of a different warning than 3.5/3.6 in + # certain places. But in others this error makes sense. + return [wanted, "SyntaxError: can't use starred expression here"], line_nr + elif wanted == 'SyntaxError: f-string: unterminated string': + wanted = 'SyntaxError: EOL while scanning string literal' + elif wanted == 'SyntaxError: f-string expression part cannot include a backslash': + return [ + wanted, + "SyntaxError: EOL while scanning string literal", + "SyntaxError: unexpected character after line continuation character", + ], line_nr + elif wanted == "SyntaxError: f-string: expecting '}'": + wanted = 'SyntaxError: EOL while scanning string literal' + elif wanted == 'SyntaxError: f-string: empty expression not allowed': + wanted = 'SyntaxError: invalid syntax' + elif wanted == "SyntaxError: f-string expression part cannot include '#'": + wanted = 'SyntaxError: invalid syntax' + elif wanted == "SyntaxError: f-string: single '}' is not allowed": + wanted = 'SyntaxError: invalid syntax' + return [wanted], line_nr + + +def test_default_except_error_postition(): + # For this error the position seemed to be one line off, but that doesn't + # really matter. + code = 'try: pass\nexcept: pass\nexcept X: pass' + wanted, line_nr = _get_actual_exception(code) + error, = _get_error_list(code) + assert error.message in wanted + assert line_nr != error.start_pos[0] + # I think this is the better position. + assert error.start_pos[0] == 2 + + +def test_statically_nested_blocks(): + def build(code, depth): + if depth == 0: + return code + + new_code = 'if 1:\n' + indent(code) + return build(new_code, depth - 1) + + def get_error(depth, add_func=False): + code = build('foo', depth) + if add_func: + code = 'def bar():\n' + indent(code) + errors = _get_error_list(code) + if errors: + assert errors[0].message == 'SyntaxError: too many statically nested blocks' + return errors[0] + return None + + assert get_error(19) is None + assert get_error(19, add_func=True) is None + + assert get_error(20) + assert get_error(20, add_func=True) + + +def test_future_import_first(): + def is_issue(code, *args, **kwargs): + code = code % args + return bool(_get_error_list(code, **kwargs)) + + i1 = 'from __future__ import division' + i2 = 'from __future__ import absolute_import' + i3 = 'from __future__ import annotations' + assert not is_issue(i1) + assert not is_issue(i1 + ';' + i2) + assert not is_issue(i1 + '\n' + i2) + assert not is_issue('"";' + i1) + assert not is_issue('"";' + i1) + assert not is_issue('""\n' + i1) + assert not is_issue('""\n%s\n%s', i1, i2) + assert not is_issue('""\n%s;%s', i1, i2) + assert not is_issue('"";%s;%s ', i1, i2) + assert not is_issue('"";%s\n%s ', i1, i2) + assert not is_issue(i3, version="3.7") + assert is_issue(i3, version="3.6") + assert is_issue('1;' + i1) + assert is_issue('1\n' + i1) + assert is_issue('"";1\n' + i1) + assert is_issue('""\n%s\nfrom x import a\n%s', i1, i2) + assert is_issue('%s\n""\n%s', i1, i2) + + +def test_named_argument_issues(works_not_in_py): + message = works_not_in_py.get_error_message('def foo(*, **dict): pass') + message = works_not_in_py.get_error_message('def foo(*): pass') + if works_not_in_py.version.startswith('2'): + assert message == 'SyntaxError: invalid syntax' + else: + assert message == 'SyntaxError: named arguments must follow bare *' + + works_not_in_py.assert_no_error_in_passing('def foo(*, name): pass') + works_not_in_py.assert_no_error_in_passing('def foo(bar, *, name=1): pass') + works_not_in_py.assert_no_error_in_passing('def foo(bar, *, name=1, **dct): pass') + + +def test_escape_decode_literals(each_version): + """ + We are using internal functions to assure that unicode/bytes escaping is + without syntax errors. Here we make a bit of quality assurance that this + works through versions, because the internal function might change over + time. + """ + def get_msg(end, to=1): + base = "SyntaxError: (unicode error) 'unicodeescape' " \ + "codec can't decode bytes in position 0-%s: " % to + return base + end + + def get_msgs(escape): + return (get_msg('end of string in escape sequence'), + get_msg(r"truncated %s escape" % escape)) + + error, = _get_error_list(r'u"\x"', version=each_version) + assert error.message in get_msgs(r'\xXX') + + error, = _get_error_list(r'u"\u"', version=each_version) + assert error.message in get_msgs(r'\uXXXX') + + error, = _get_error_list(r'u"\U"', version=each_version) + assert error.message in get_msgs(r'\UXXXXXXXX') + + error, = _get_error_list(r'u"\N{}"', version=each_version) + assert error.message == get_msg(r'malformed \N character escape', to=2) + + error, = _get_error_list(r'u"\N{foo}"', version=each_version) + assert error.message == get_msg(r'unknown Unicode character name', to=6) + + # Finally bytes. + error, = _get_error_list(r'b"\x"', version=each_version) + wanted = r'SyntaxError: (value error) invalid \x escape' + if sys.version_info >= (3, 0): + # The positioning information is only available in Python 3. + wanted += ' at position 0' + assert error.message == wanted + + +def test_too_many_levels_of_indentation(): + assert not _get_error_list(build_nested('pass', 99)) + assert _get_error_list(build_nested('pass', 100)) + base = 'def x():\n if x:\n' + assert not _get_error_list(build_nested('pass', 49, base=base)) + assert _get_error_list(build_nested('pass', 50, base=base)) + +def test_paren_kwarg(): + assert _get_error_list("print((sep)=seperator)", version="3.8") + assert not _get_error_list("print((sep)=seperator)", version="3.7") + +@pytest.mark.parametrize( + 'code', [ + "f'{*args,}'", + r'f"\""', + r'f"\\\""', + r'fr"\""', + r'fr"\\\""', + r"print(f'Some {x:.2f} and some {y}')", + ] +) +def test_valid_fstrings(code): + assert not _get_error_list(code, version='3.6') + + +@pytest.mark.parametrize( + 'code', [ + 'a = (b := 1)', + '[x4 := x ** 5 for x in range(7)]', + '[total := total + v for v in range(10)]', + 'while chunk := file.read(2):\n pass', + 'numbers = [y := math.factorial(x), y**2, y**3]', + ] +) +def test_valid_namedexpr(code): + assert not _get_error_list(code, version='3.8') + + +@pytest.mark.parametrize( + ('code', 'message'), [ + ("f'{1+}'", ('invalid syntax')), + (r'f"\"', ('invalid syntax')), + (r'fr"\"', ('invalid syntax')), + ] +) +def test_invalid_fstrings(code, message): + """ + Some fstring errors are handled differntly in 3.6 and other versions. + Therefore check specifically for these errors here. + """ + error, = _get_error_list(code, version='3.6') + assert message in error.message + + +@pytest.mark.parametrize( + 'code', [ + "from foo import (\nbar,\n rab,\n)", + "from foo import (bar, rab, )", + ] +) +def test_trailing_comma(code): + errors = _get_error_list(code) + assert not errors + +def test_continue_in_finally(): + code = dedent('''\ + for a in [1]: + try: + pass + finally: + continue + ''') + assert not _get_error_list(code, version="3.8") + assert _get_error_list(code, version="3.7") + + +@pytest.mark.parametrize( + 'template', [ + "a, b, {target}, c = d", + "a, b, *{target}, c = d", + "(a, *{target}), c = d", + "for x, {target} in y: pass", + "for x, q, {target} in y: pass", + "for x, q, *{target} in y: pass", + "for (x, *{target}), q in y: pass", + ] +) +@pytest.mark.parametrize( + 'target', [ + "True", + "False", + "None", + "__debug__" + ] +) +def test_forbidden_name(template, target): + assert _get_error_list(template.format(target=target), version="3") + + +def test_repeated_kwarg(): + # python 3.9+ shows which argument is repeated + assert ( + _get_error_list("f(q=1, q=2)", version="3.8")[0].message + == "SyntaxError: keyword argument repeated" + ) + assert ( + _get_error_list("f(q=1, q=2)", version="3.9")[0].message + == "SyntaxError: keyword argument repeated: q" + ) + + +@pytest.mark.parametrize( + ('source', 'no_errors'), [ + ('a(a for a in b,)', False), + ('a(a for a in b, a)', False), + ('a(a, a for a in b)', False), + ('a(a, b, a for a in b, c, d)', False), + ('a(a for a in b)', True), + ('a((a for a in b), c)', True), + ('a(c, (a for a in b))', True), + ('a(a, b, (a for a in b), c, d)', True), + ] +) +def test_unparenthesized_genexp(source, no_errors): + assert bool(_get_error_list(source)) ^ no_errors + +@pytest.mark.parametrize( + ('source', 'no_errors'), [ + ('*x = 2', False), + ('(*y) = 1', False), + ('((*z)) = 1', False), + ('a, *b = 1', True), + ('a, *b, c = 1', True), + ('a, (*b), c = 1', True), + ('a, ((*b)), c = 1', True), + ('a, (*b, c), d = 1', True), + ('[*(1,2,3)]', True), + ('{*(1,2,3)}', True), + ('[*(1,2,3),]', True), + ('[*(1,2,3), *(4,5,6)]', True), + ('[0, *(1,2,3)]', True), + ('{*(1,2,3),}', True), + ('{*(1,2,3), *(4,5,6)}', True), + ('{0, *(4,5,6)}', True) + ] +) +def test_starred_expr(source, no_errors): + assert bool(_get_error_list(source, version="3")) ^ no_errors diff --git a/contrib/python/parso/py2/tests/test_tokenize.py b/contrib/python/parso/py2/tests/test_tokenize.py new file mode 100644 index 0000000000..7afa3737d2 --- /dev/null +++ b/contrib/python/parso/py2/tests/test_tokenize.py @@ -0,0 +1,443 @@ +# -*- coding: utf-8 # This file contains Unicode characters. + +import sys +from textwrap import dedent + +import pytest + +from parso.utils import split_lines, parse_version_string +from parso.python.token import PythonTokenTypes +from parso.python import tokenize +from parso import parse +from parso.python.tokenize import PythonToken + + +# To make it easier to access some of the token types, just put them here. +NAME = PythonTokenTypes.NAME +NEWLINE = PythonTokenTypes.NEWLINE +STRING = PythonTokenTypes.STRING +NUMBER = PythonTokenTypes.NUMBER +INDENT = PythonTokenTypes.INDENT +DEDENT = PythonTokenTypes.DEDENT +ERRORTOKEN = PythonTokenTypes.ERRORTOKEN +OP = PythonTokenTypes.OP +ENDMARKER = PythonTokenTypes.ENDMARKER +ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT +FSTRING_START = PythonTokenTypes.FSTRING_START +FSTRING_STRING = PythonTokenTypes.FSTRING_STRING +FSTRING_END = PythonTokenTypes.FSTRING_END + + +def _get_token_list(string, version=None): + # Load the current version. + version_info = parse_version_string(version) + return list(tokenize.tokenize(string, version_info)) + + +def test_end_pos_one_line(): + parsed = parse(dedent(''' + def testit(): + a = "huhu" + ''')) + simple_stmt = next(parsed.iter_funcdefs()).get_suite().children[-1] + string = simple_stmt.children[0].get_rhs() + assert string.end_pos == (3, 14) + + +def test_end_pos_multi_line(): + parsed = parse(dedent(''' + def testit(): + a = """huhu + asdfasdf""" + "h" + ''')) + expr_stmt = next(parsed.iter_funcdefs()).get_suite().children[1].children[0] + string_leaf = expr_stmt.get_rhs().children[0] + assert string_leaf.end_pos == (4, 11) + + +def test_simple_no_whitespace(): + # Test a simple one line string, no preceding whitespace + simple_docstring = '"""simple one line docstring"""' + token_list = _get_token_list(simple_docstring) + _, value, _, prefix = token_list[0] + assert prefix == '' + assert value == '"""simple one line docstring"""' + + +def test_simple_with_whitespace(): + # Test a simple one line string with preceding whitespace and newline + simple_docstring = ' """simple one line docstring""" \r\n' + token_list = _get_token_list(simple_docstring) + assert token_list[0][0] == INDENT + typ, value, start_pos, prefix = token_list[1] + assert prefix == ' ' + assert value == '"""simple one line docstring"""' + assert typ == STRING + typ, value, start_pos, prefix = token_list[2] + assert prefix == ' ' + assert typ == NEWLINE + + +def test_function_whitespace(): + # Test function definition whitespace identification + fundef = dedent(''' + def test_whitespace(*args, **kwargs): + x = 1 + if x > 0: + print(True) + ''') + token_list = _get_token_list(fundef) + for _, value, _, prefix in token_list: + if value == 'test_whitespace': + assert prefix == ' ' + if value == '(': + assert prefix == '' + if value == '*': + assert prefix == '' + if value == '**': + assert prefix == ' ' + if value == 'print': + assert prefix == ' ' + if value == 'if': + assert prefix == ' ' + + +def test_tokenize_multiline_I(): + # Make sure multiline string having newlines have the end marker on the + # next line + fundef = '''""""\n''' + token_list = _get_token_list(fundef) + assert token_list == [PythonToken(ERRORTOKEN, '""""\n', (1, 0), ''), + PythonToken(ENDMARKER , '', (2, 0), '')] + + +def test_tokenize_multiline_II(): + # Make sure multiline string having no newlines have the end marker on + # same line + fundef = '''""""''' + token_list = _get_token_list(fundef) + assert token_list == [PythonToken(ERRORTOKEN, '""""', (1, 0), ''), + PythonToken(ENDMARKER, '', (1, 4), '')] + + +def test_tokenize_multiline_III(): + # Make sure multiline string having newlines have the end marker on the + # next line even if several newline + fundef = '''""""\n\n''' + token_list = _get_token_list(fundef) + assert token_list == [PythonToken(ERRORTOKEN, '""""\n\n', (1, 0), ''), + PythonToken(ENDMARKER, '', (3, 0), '')] + + +def test_identifier_contains_unicode(): + fundef = dedent(''' + def 我あφ(): + pass + ''') + token_list = _get_token_list(fundef) + unicode_token = token_list[1] + if sys.version_info.major >= 3: + assert unicode_token[0] == NAME + else: + # Unicode tokens in Python 2 seem to be identified as operators. + # They will be ignored in the parser, that's ok. + assert unicode_token[0] == ERRORTOKEN + + +def test_quoted_strings(): + string_tokens = [ + 'u"test"', + 'u"""test"""', + 'U"""test"""', + "u'''test'''", + "U'''test'''", + ] + + for s in string_tokens: + module = parse('''a = %s\n''' % s) + simple_stmt = module.children[0] + expr_stmt = simple_stmt.children[0] + assert len(expr_stmt.children) == 3 + string_tok = expr_stmt.children[2] + assert string_tok.type == 'string' + assert string_tok.value == s + + +def test_ur_literals(): + """ + Decided to parse `u''` literals regardless of Python version. This makes + probably sense: + + - Python 3+ doesn't support it, but it doesn't hurt + not be. While this is incorrect, it's just incorrect for one "old" and in + the future not very important version. + - All the other Python versions work very well with it. + """ + def check(literal, is_literal=True): + token_list = _get_token_list(literal) + typ, result_literal, _, _ = token_list[0] + if is_literal: + if typ != FSTRING_START: + assert typ == STRING + assert result_literal == literal + else: + assert typ == NAME + + check('u""') + check('ur""', is_literal=not sys.version_info.major >= 3) + check('Ur""', is_literal=not sys.version_info.major >= 3) + check('UR""', is_literal=not sys.version_info.major >= 3) + check('bR""') + # Starting with Python 3.3 this ordering is also possible. + if sys.version_info.major >= 3: + check('Rb""') + + # Starting with Python 3.6 format strings where introduced. + check('fr""', is_literal=sys.version_info >= (3, 6)) + check('rF""', is_literal=sys.version_info >= (3, 6)) + check('f""', is_literal=sys.version_info >= (3, 6)) + check('F""', is_literal=sys.version_info >= (3, 6)) + + +def test_error_literal(): + error_token, newline, endmarker = _get_token_list('"\n') + assert error_token.type == ERRORTOKEN + assert error_token.string == '"' + assert newline.type == NEWLINE + assert endmarker.type == ENDMARKER + assert endmarker.prefix == '' + + bracket, error_token, endmarker = _get_token_list('( """') + assert error_token.type == ERRORTOKEN + assert error_token.prefix == ' ' + assert error_token.string == '"""' + assert endmarker.type == ENDMARKER + assert endmarker.prefix == '' + + +def test_endmarker_end_pos(): + def check(code): + tokens = _get_token_list(code) + lines = split_lines(code) + assert tokens[-1].end_pos == (len(lines), len(lines[-1])) + + check('#c') + check('#c\n') + check('a\n') + check('a') + check(r'a\\n') + check('a\\') + + +xfail_py2 = dict(marks=[pytest.mark.xfail(sys.version_info[0] == 2, reason='Python 2')]) + + +@pytest.mark.parametrize( + ('code', 'types'), [ + # Indentation + (' foo', [INDENT, NAME, DEDENT]), + (' foo\n bar', [INDENT, NAME, NEWLINE, ERROR_DEDENT, NAME, DEDENT]), + (' foo\n bar \n baz', [INDENT, NAME, NEWLINE, ERROR_DEDENT, NAME, + NEWLINE, NAME, DEDENT]), + (' foo\nbar', [INDENT, NAME, NEWLINE, DEDENT, NAME]), + + # Name stuff + ('1foo1', [NUMBER, NAME]), + pytest.param( + u'மெல்லினம்', [NAME], + **xfail_py2), + pytest.param(u'²', [ERRORTOKEN], **xfail_py2), + pytest.param(u'ä²ö', [NAME, ERRORTOKEN, NAME], **xfail_py2), + pytest.param(u'ää²¹öö', [NAME, ERRORTOKEN, NAME], **xfail_py2), + (' \x00a', [INDENT, ERRORTOKEN, NAME, DEDENT]), + (dedent('''\ + class BaseCache: + a + def + b + def + c + '''), [NAME, NAME, OP, NEWLINE, INDENT, NAME, NEWLINE, + ERROR_DEDENT, NAME, NEWLINE, INDENT, NAME, NEWLINE, DEDENT, + NAME, NEWLINE, INDENT, NAME, NEWLINE, DEDENT, DEDENT]), + (' )\n foo', [INDENT, OP, NEWLINE, ERROR_DEDENT, NAME, DEDENT]), + ('a\n b\n )\n c', [NAME, NEWLINE, INDENT, NAME, NEWLINE, INDENT, OP, + NEWLINE, DEDENT, NAME, DEDENT]), + (' 1 \\\ndef', [INDENT, NUMBER, NAME, DEDENT]), + ] +) +def test_token_types(code, types): + actual_types = [t.type for t in _get_token_list(code)] + assert actual_types == types + [ENDMARKER] + + +def test_error_string(): + indent, t1, newline, token, endmarker = _get_token_list(' "\n') + assert t1.type == ERRORTOKEN + assert t1.prefix == ' ' + assert t1.string == '"' + assert newline.type == NEWLINE + assert endmarker.prefix == '' + assert endmarker.string == '' + + +def test_indent_error_recovery(): + code = dedent("""\ + str( + from x import a + def + """) + lst = _get_token_list(code) + expected = [ + # `str(` + INDENT, NAME, OP, + # `from parso` + NAME, NAME, + # `import a` on same line as the previous from parso + NAME, NAME, NEWLINE, + # Dedent happens, because there's an import now and the import + # statement "breaks" out of the opening paren on the first line. + DEDENT, + # `b` + NAME, NEWLINE, ENDMARKER] + assert [t.type for t in lst] == expected + + +def test_error_token_after_dedent(): + code = dedent("""\ + class C: + pass + $foo + """) + lst = _get_token_list(code) + expected = [ + NAME, NAME, OP, NEWLINE, INDENT, NAME, NEWLINE, DEDENT, + # $foo\n + ERRORTOKEN, NAME, NEWLINE, ENDMARKER + ] + assert [t.type for t in lst] == expected + + +def test_brackets_no_indentation(): + """ + There used to be an issue that the parentheses counting would go below + zero. This should not happen. + """ + code = dedent("""\ + } + { + } + """) + lst = _get_token_list(code) + assert [t.type for t in lst] == [OP, NEWLINE, OP, OP, NEWLINE, ENDMARKER] + + +def test_form_feed(): + indent, error_token, dedent_, endmarker = _get_token_list(dedent('''\ + \f"""''')) + assert error_token.prefix == '\f' + assert error_token.string == '"""' + assert endmarker.prefix == '' + assert indent.type == INDENT + assert dedent_.type == DEDENT + + +def test_carriage_return(): + lst = _get_token_list(' =\\\rclass') + assert [t.type for t in lst] == [INDENT, OP, NAME, DEDENT, ENDMARKER] + + +def test_backslash(): + code = '\\\n# 1 \n' + endmarker, = _get_token_list(code) + assert endmarker.prefix == code + + +@pytest.mark.parametrize( + ('code', 'types'), [ + # f-strings + ('f"', [FSTRING_START]), + ('f""', [FSTRING_START, FSTRING_END]), + ('f" {}"', [FSTRING_START, FSTRING_STRING, OP, OP, FSTRING_END]), + ('f" "{}', [FSTRING_START, FSTRING_STRING, FSTRING_END, OP, OP]), + (r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]), + (r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]), + + # format spec + (r'f"Some {x:.2f}{y}"', [FSTRING_START, FSTRING_STRING, OP, NAME, OP, + FSTRING_STRING, OP, OP, NAME, OP, FSTRING_END]), + + # multiline f-string + ('f"""abc\ndef"""', [FSTRING_START, FSTRING_STRING, FSTRING_END]), + ('f"""abc{\n123}def"""', [ + FSTRING_START, FSTRING_STRING, OP, NUMBER, OP, FSTRING_STRING, + FSTRING_END + ]), + + # a line continuation inside of an fstring_string + ('f"abc\\\ndef"', [ + FSTRING_START, FSTRING_STRING, FSTRING_END + ]), + ('f"\\\n{123}\\\n"', [ + FSTRING_START, FSTRING_STRING, OP, NUMBER, OP, FSTRING_STRING, + FSTRING_END + ]), + + # a line continuation inside of an fstring_expr + ('f"{\\\n123}"', [FSTRING_START, OP, NUMBER, OP, FSTRING_END]), + + # a line continuation inside of an format spec + ('f"{123:.2\\\nf}"', [ + FSTRING_START, OP, NUMBER, OP, FSTRING_STRING, OP, FSTRING_END + ]), + + # a newline without a line continuation inside a single-line string is + # wrong, and will generate an ERRORTOKEN + ('f"abc\ndef"', [ + FSTRING_START, FSTRING_STRING, NEWLINE, NAME, ERRORTOKEN + ]), + + # a more complex example + (r'print(f"Some {x:.2f}a{y}")', [ + NAME, OP, FSTRING_START, FSTRING_STRING, OP, NAME, OP, + FSTRING_STRING, OP, FSTRING_STRING, OP, NAME, OP, FSTRING_END, OP + ]), + # issue #86, a string-like in an f-string expression + ('f"{ ""}"', [ + FSTRING_START, OP, FSTRING_END, STRING + ]), + ('f"{ f""}"', [ + FSTRING_START, OP, NAME, FSTRING_END, STRING + ]), + ] +) +def test_fstring_token_types(code, types, version_ge_py36): + actual_types = [t.type for t in _get_token_list(code, version_ge_py36)] + assert types + [ENDMARKER] == actual_types + + +@pytest.mark.parametrize( + ('code', 'types'), [ + # issue #87, `:=` in the outest paratheses should be tokenized + # as a format spec marker and part of the format + ('f"{x:=10}"', [ + FSTRING_START, OP, NAME, OP, FSTRING_STRING, OP, FSTRING_END + ]), + ('f"{(x:=10)}"', [ + FSTRING_START, OP, OP, NAME, OP, NUMBER, OP, OP, FSTRING_END + ]), + ] +) +def test_fstring_assignment_expression(code, types, version_ge_py38): + actual_types = [t.type for t in _get_token_list(code, version_ge_py38)] + assert types + [ENDMARKER] == actual_types + + +def test_fstring_end_error_pos(version_ge_py38): + f_start, f_string, bracket, f_end, endmarker = \ + _get_token_list('f" { "', version_ge_py38) + assert f_start.start_pos == (1, 0) + assert f_string.start_pos == (1, 2) + assert bracket.start_pos == (1, 3) + assert f_end.start_pos == (1, 5) + assert endmarker.start_pos == (1, 6) diff --git a/contrib/python/parso/py2/tests/test_utils.py b/contrib/python/parso/py2/tests/test_utils.py new file mode 100644 index 0000000000..541d81f995 --- /dev/null +++ b/contrib/python/parso/py2/tests/test_utils.py @@ -0,0 +1,102 @@ +from codecs import BOM_UTF8 + +from parso.utils import ( + split_lines, + parse_version_string, + python_bytes_to_unicode, +) + +import parso + +import pytest + + +@pytest.mark.parametrize( + ('string', 'expected_result', 'keepends'), [ + ('asd\r\n', ['asd', ''], False), + ('asd\r\n', ['asd\r\n', ''], True), + ('asd\r', ['asd', ''], False), + ('asd\r', ['asd\r', ''], True), + ('asd\n', ['asd', ''], False), + ('asd\n', ['asd\n', ''], True), + + ('asd\r\n\f', ['asd', '\f'], False), + ('asd\r\n\f', ['asd\r\n', '\f'], True), + + ('\fasd\r\n', ['\fasd', ''], False), + ('\fasd\r\n', ['\fasd\r\n', ''], True), + + ('', [''], False), + ('', [''], True), + + ('\n', ['', ''], False), + ('\n', ['\n', ''], True), + + ('\r', ['', ''], False), + ('\r', ['\r', ''], True), + + # Invalid line breaks + ('a\vb', ['a\vb'], False), + ('a\vb', ['a\vb'], True), + ('\x1C', ['\x1C'], False), + ('\x1C', ['\x1C'], True), + ] +) +def test_split_lines(string, expected_result, keepends): + assert split_lines(string, keepends=keepends) == expected_result + + +def test_python_bytes_to_unicode_unicode_text(): + source = ( + b"# vim: fileencoding=utf-8\n" + b"# \xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a\n" + ) + actual = python_bytes_to_unicode(source) + expected = source.decode('utf-8') + assert actual == expected + + +def test_utf8_bom(): + unicode_bom = BOM_UTF8.decode('utf-8') + + module = parso.parse(unicode_bom) + endmarker = module.children[0] + assert endmarker.type == 'endmarker' + assert unicode_bom == endmarker.prefix + + module = parso.parse(unicode_bom + 'foo = 1') + expr_stmt = module.children[0] + assert expr_stmt.type == 'expr_stmt' + assert unicode_bom == expr_stmt.get_first_leaf().prefix + + +@pytest.mark.parametrize( + ('code', 'errors'), [ + (b'# coding: wtf-12\nfoo', 'strict'), + (b'# coding: wtf-12\nfoo', 'replace'), + ] +) +def test_bytes_to_unicode_failing_encoding(code, errors): + if errors == 'strict': + with pytest.raises(LookupError): + python_bytes_to_unicode(code, errors=errors) + else: + python_bytes_to_unicode(code, errors=errors) + +@pytest.mark.parametrize( + ('version_str', 'version'), [ + ('3', (3,)), + ('3.6', (3, 6)), + ('3.6.10', (3, 6)), + ('3.10', (3, 10)), + ('3.10a9', (3, 10)), + ('3.10b9', (3, 10)), + ('3.10rc9', (3, 10)), + ] +) +def test_parse_version_string(version_str, version): + parsed_version = parse_version_string(version_str) + if len(version) == 1: + assert parsed_version[0] == version[0] + else: + assert parsed_version == version diff --git a/contrib/python/parso/py2/tests/ya.make b/contrib/python/parso/py2/tests/ya.make new file mode 100644 index 0000000000..da9af307d8 --- /dev/null +++ b/contrib/python/parso/py2/tests/ya.make @@ -0,0 +1,39 @@ +PY2TEST() + +PEERDIR( + contrib/python/parso +) + +DATA( + arcadia/contrib/python/parso/py2/tests +) + +TEST_SRCS( + __init__.py + conftest.py + failing_examples.py + test_absolute_import.py + test_cache.py + test_diff_parser.py + test_error_recovery.py + test_file_python_errors.py + test_fstring.py + test_get_code.py + test_grammar.py + test_load_grammar.py + test_normalizer_issues_files.py + test_old_fast_parser.py + test_param_splitting.py + test_parser.py + test_parser_tree.py + test_pep8.py + test_pgen2.py + test_prefix.py + test_python_errors.py + test_tokenize.py + test_utils.py +) + +NO_LINT() + +END() diff --git a/contrib/python/parso/py2/ya.make b/contrib/python/parso/py2/ya.make new file mode 100644 index 0000000000..361aa9e32e --- /dev/null +++ b/contrib/python/parso/py2/ya.make @@ -0,0 +1,64 @@ +# Generated by devtools/yamaker (pypi). + +PY2_LIBRARY() + +VERSION(0.7.1) + +LICENSE(PSF-2.0) + +NO_LINT() + +PY_SRCS( + TOP_LEVEL + parso/__init__.py + parso/__init__.pyi + parso/_compatibility.py + parso/cache.py + parso/file_io.py + parso/grammar.py + parso/grammar.pyi + parso/normalizer.py + parso/parser.py + parso/pgen2/__init__.py + parso/pgen2/__init__.pyi + parso/pgen2/generator.py + parso/pgen2/generator.pyi + parso/pgen2/grammar_parser.py + parso/pgen2/grammar_parser.pyi + parso/python/__init__.py + parso/python/diff.py + parso/python/errors.py + parso/python/parser.py + parso/python/pep8.py + parso/python/prefix.py + parso/python/token.py + parso/python/token.pyi + parso/python/tokenize.py + parso/python/tokenize.pyi + parso/python/tree.py + parso/tree.py + parso/utils.py + parso/utils.pyi +) + +RESOURCE_FILES( + PREFIX contrib/python/parso/py2/ + .dist-info/METADATA + .dist-info/top_level.txt + parso/py.typed + parso/python/grammar27.txt + parso/python/grammar310.txt + parso/python/grammar33.txt + parso/python/grammar34.txt + parso/python/grammar35.txt + parso/python/grammar36.txt + parso/python/grammar37.txt + parso/python/grammar38.txt + parso/python/grammar39.txt +) + +END() + +RECURSE_FOR_TESTS( + tests +) diff --git a/contrib/python/parso/py3/.dist-info/METADATA b/contrib/python/parso/py3/.dist-info/METADATA new file mode 100644 index 0000000000..331fef3a49 --- /dev/null +++ b/contrib/python/parso/py3/.dist-info/METADATA @@ -0,0 +1,281 @@ +Metadata-Version: 2.1 +Name: parso +Version: 0.8.3 +Summary: A Python Parser +Home-page: https://github.com/davidhalter/parso +Author: David Halter +Author-email: davidhalter88@gmail.com +Maintainer: David Halter +Maintainer-email: davidhalter88@gmail.com +License: MIT +Keywords: python parser parsing +Platform: any +Classifier: Development Status :: 4 - Beta +Classifier: Environment :: Plugins +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: MIT License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Topic :: Software Development :: Libraries :: Python Modules +Classifier: Topic :: Text Editors :: Integrated Development Environments (IDE) +Classifier: Topic :: Utilities +Classifier: Typing :: Typed +Requires-Python: >=3.6 +Provides-Extra: qa +Requires-Dist: flake8 (==3.8.3) ; extra == 'qa' +Requires-Dist: mypy (==0.782) ; extra == 'qa' +Provides-Extra: testing +Requires-Dist: docopt ; extra == 'testing' +Requires-Dist: pytest (<6.0.0) ; extra == 'testing' + +################################################################### +parso - A Python Parser +################################################################### + + +.. image:: https://github.com/davidhalter/parso/workflows/Build/badge.svg?branch=master + :target: https://github.com/davidhalter/parso/actions + :alt: GitHub Actions build status + +.. image:: https://coveralls.io/repos/github/davidhalter/parso/badge.svg?branch=master + :target: https://coveralls.io/github/davidhalter/parso?branch=master + :alt: Coverage Status + +.. image:: https://pepy.tech/badge/parso + :target: https://pepy.tech/project/parso + :alt: PyPI Downloads + +.. image:: https://raw.githubusercontent.com/davidhalter/parso/master/docs/_static/logo_characters.png + +Parso is a Python parser that supports error recovery and round-trip parsing +for different Python versions (in multiple Python versions). Parso is also able +to list multiple syntax errors in your python file. + +Parso has been battle-tested by jedi_. It was pulled out of jedi to be useful +for other projects as well. + +Parso consists of a small API to parse Python and analyse the syntax tree. + +A simple example: + +.. code-block:: python + + >>> import parso + >>> module = parso.parse('hello + 1', version="3.9") + >>> expr = module.children[0] + >>> expr + PythonNode(arith_expr, [<Name: hello@1,0>, <Operator: +>, <Number: 1>]) + >>> print(expr.get_code()) + hello + 1 + >>> name = expr.children[0] + >>> name + <Name: hello@1,0> + >>> name.end_pos + (1, 5) + >>> expr.end_pos + (1, 9) + +To list multiple issues: + +.. code-block:: python + + >>> grammar = parso.load_grammar() + >>> module = grammar.parse('foo +\nbar\ncontinue') + >>> error1, error2 = grammar.iter_errors(module) + >>> error1.message + 'SyntaxError: invalid syntax' + >>> error2.message + "SyntaxError: 'continue' not properly in loop" + +Resources +========= + +- `Testing <https://parso.readthedocs.io/en/latest/docs/development.html#testing>`_ +- `PyPI <https://pypi.python.org/pypi/parso>`_ +- `Docs <https://parso.readthedocs.org/en/latest/>`_ +- Uses `semantic versioning <https://semver.org/>`_ + +Installation +============ + + pip install parso + +Future +====== + +- There will be better support for refactoring and comments. Stay tuned. +- There's a WIP PEP8 validator. It's however not in a good shape, yet. + +Known Issues +============ + +- `async`/`await` are already used as keywords in Python3.6. +- `from __future__ import print_function` is not ignored. + + +Acknowledgements +================ + +- Guido van Rossum (@gvanrossum) for creating the parser generator pgen2 + (originally used in lib2to3). +- `Salome Schneider <https://www.crepes-schnaegg.ch/cr%C3%AApes-schn%C3%A4gg/kunst-f%C3%BCrs-cr%C3%AApes-mobil/>`_ + for the extremely awesome parso logo. + + +.. _jedi: https://github.com/davidhalter/jedi + + +.. :changelog: + +Changelog +--------- + +Unreleased +++++++++++ + +0.8.3 (2021-11-30) +++++++++++++++++++ + +- Add basic support for Python 3.11 and 3.12 + +0.8.2 (2021-03-30) +++++++++++++++++++ + +- Various small bugfixes + +0.8.1 (2020-12-10) +++++++++++++++++++ + +- Various small bugfixes + +0.8.0 (2020-08-05) +++++++++++++++++++ + +- Dropped Support for Python 2.7, 3.4, 3.5 +- It's possible to use ``pathlib.Path`` objects now in the API +- The stubs are gone, we are now using annotations +- ``namedexpr_test`` nodes are now a proper class called ``NamedExpr`` +- A lot of smaller refactorings + +0.7.1 (2020-07-24) +++++++++++++++++++ + +- Fixed a couple of smaller bugs (mostly syntax error detection in + ``Grammar.iter_errors``) + +This is going to be the last release that supports Python 2.7, 3.4 and 3.5. + +0.7.0 (2020-04-13) +++++++++++++++++++ + +- Fix a lot of annoying bugs in the diff parser. The fuzzer did not find + issues anymore even after running it for more than 24 hours (500k tests). +- Small grammar change: suites can now contain newlines even after a newline. + This should really not matter if you don't use error recovery. It allows for + nicer error recovery. + +0.6.2 (2020-02-27) +++++++++++++++++++ + +- Bugfixes +- Add Grammar.refactor (might still be subject to change until 0.7.0) + +0.6.1 (2020-02-03) +++++++++++++++++++ + +- Add ``parso.normalizer.Issue.end_pos`` to make it possible to know where an + issue ends + +0.6.0 (2020-01-26) +++++++++++++++++++ + +- Dropped Python 2.6/Python 3.3 support +- del_stmt names are now considered as a definition + (for ``name.is_definition()``) +- Bugfixes + +0.5.2 (2019-12-15) +++++++++++++++++++ + +- Add include_setitem to get_definition/is_definition and get_defined_names (#66) +- Fix named expression error listing (#89, #90) +- Fix some f-string tokenizer issues (#93) + +0.5.1 (2019-07-13) +++++++++++++++++++ + +- Fix: Some unicode identifiers were not correctly tokenized +- Fix: Line continuations in f-strings are now working + +0.5.0 (2019-06-20) +++++++++++++++++++ + +- **Breaking Change** comp_for is now called sync_comp_for for all Python + versions to be compatible with the Python 3.8 Grammar +- Added .pyi stubs for a lot of the parso API +- Small FileIO changes + +0.4.0 (2019-04-05) +++++++++++++++++++ + +- Python 3.8 support +- FileIO support, it's now possible to use abstract file IO, support is alpha + +0.3.4 (2019-02-13) ++++++++++++++++++++ + +- Fix an f-string tokenizer error + +0.3.3 (2019-02-06) ++++++++++++++++++++ + +- Fix async errors in the diff parser +- A fix in iter_errors +- This is a very small bugfix release + +0.3.2 (2019-01-24) ++++++++++++++++++++ + +- 20+ bugfixes in the diff parser and 3 in the tokenizer +- A fuzzer for the diff parser, to give confidence that the diff parser is in a + good shape. +- Some bugfixes for f-string + +0.3.1 (2018-07-09) ++++++++++++++++++++ + +- Bugfixes in the diff parser and keyword-only arguments + +0.3.0 (2018-06-30) ++++++++++++++++++++ + +- Rewrote the pgen2 parser generator. + +0.2.1 (2018-05-21) ++++++++++++++++++++ + +- A bugfix for the diff parser. +- Grammar files can now be loaded from a specific path. + +0.2.0 (2018-04-15) ++++++++++++++++++++ + +- f-strings are now parsed as a part of the normal Python grammar. This makes + it way easier to deal with them. + +0.1.1 (2017-11-05) ++++++++++++++++++++ + +- Fixed a few bugs in the caching layer +- Added support for Python 3.7 + +0.1.0 (2017-09-04) ++++++++++++++++++++ + +- Pulling the library out of Jedi. Some APIs will definitely change. + + diff --git a/contrib/python/parso/py3/.dist-info/top_level.txt b/contrib/python/parso/py3/.dist-info/top_level.txt new file mode 100644 index 0000000000..0e23344047 --- /dev/null +++ b/contrib/python/parso/py3/.dist-info/top_level.txt @@ -0,0 +1 @@ +parso diff --git a/contrib/python/parso/py3/AUTHORS.txt b/contrib/python/parso/py3/AUTHORS.txt new file mode 100644 index 0000000000..9737530ba9 --- /dev/null +++ b/contrib/python/parso/py3/AUTHORS.txt @@ -0,0 +1,58 @@ +Main Authors +============ + +David Halter (@davidhalter) <davidhalter88@gmail.com> + +Code Contributors +================= +Alisdair Robertson (@robodair) +Bryan Forbes (@bryanforbes) <bryan@reigndropsfall.net> + + +Code Contributors (to Jedi and therefore possibly to this library) +================================================================== + +Takafumi Arakaki (@tkf) <aka.tkf@gmail.com> +Danilo Bargen (@dbrgn) <mail@dbrgn.ch> +Laurens Van Houtven (@lvh) <_@lvh.cc> +Aldo Stracquadanio (@Astrac) <aldo.strac@gmail.com> +Jean-Louis Fuchs (@ganwell) <ganwell@fangorn.ch> +tek (@tek) +Yasha Borevich (@jjay) <j.borevich@gmail.com> +Aaron Griffin <aaronmgriffin@gmail.com> +andviro (@andviro) +Mike Gilbert (@floppym) <floppym@gentoo.org> +Aaron Meurer (@asmeurer) <asmeurer@gmail.com> +Lubos Trilety <ltrilety@redhat.com> +Akinori Hattori (@hattya) <hattya@gmail.com> +srusskih (@srusskih) +Steven Silvester (@blink1073) +Colin Duquesnoy (@ColinDuquesnoy) <colin.duquesnoy@gmail.com> +Jorgen Schaefer (@jorgenschaefer) <contact@jorgenschaefer.de> +Fredrik Bergroth (@fbergroth) +Mathias Fußenegger (@mfussenegger) +Syohei Yoshida (@syohex) <syohex@gmail.com> +ppalucky (@ppalucky) +immerrr (@immerrr) immerrr@gmail.com +Albertas Agejevas (@alga) +Savor d'Isavano (@KenetJervet) <newelevenken@163.com> +Phillip Berndt (@phillipberndt) <phillip.berndt@gmail.com> +Ian Lee (@IanLee1521) <IanLee1521@gmail.com> +Farkhad Khatamov (@hatamov) <comsgn@gmail.com> +Kevin Kelley (@kelleyk) <kelleyk@kelleyk.net> +Sid Shanker (@squidarth) <sid.p.shanker@gmail.com> +Reinoud Elhorst (@reinhrst) +Guido van Rossum (@gvanrossum) <guido@python.org> +Dmytro Sadovnychyi (@sadovnychyi) <jedi@dmit.ro> +Cristi Burcă (@scribu) +bstaint (@bstaint) +Mathias Rav (@Mortal) <rav@cs.au.dk> +Daniel Fiterman (@dfit99) <fitermandaniel2@gmail.com> +Simon Ruggier (@sruggier) +Élie Gouzien (@ElieGouzien) +Tim Gates (@timgates42) <tim.gates@iress.com> +Batuhan Taskaya (@isidentical) <isidentical@gmail.com> +Jocelyn Boullier (@Kazy) <jocelyn@boullier.bzh> + + +Note: (@user) means a github user name. diff --git a/contrib/python/parso/py3/LICENSE.txt b/contrib/python/parso/py3/LICENSE.txt new file mode 100644 index 0000000000..08c41db014 --- /dev/null +++ b/contrib/python/parso/py3/LICENSE.txt @@ -0,0 +1,86 @@ +All contributions towards parso are MIT licensed. + +Some Python files have been taken from the standard library and are therefore +PSF licensed. Modifications on these files are dual licensed (both MIT and +PSF). These files are: + +- parso/pgen2/* +- parso/tokenize.py +- parso/token.py +- test/test_pgen2.py + +Also some test files under test/normalizer_issue_files have been copied from +https://github.com/PyCQA/pycodestyle (Expat License == MIT License). + +------------------------------------------------------------------------------- +The MIT License (MIT) + +Copyright (c) <2013-2017> <David Halter and others, see AUTHORS.txt> + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +------------------------------------------------------------------------------- + +PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 +-------------------------------------------- + +1. This LICENSE AGREEMENT is between the Python Software Foundation +("PSF"), and the Individual or Organization ("Licensee") accessing and +otherwise using this software ("Python") in source or binary form and +its associated documentation. + +2. Subject to the terms and conditions of this License Agreement, PSF hereby +grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, +analyze, test, perform and/or display publicly, prepare derivative works, +distribute, and otherwise use Python alone or in any derivative version, +provided, however, that PSF's License Agreement and PSF's notice of copyright, +i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +2011, 2012, 2013, 2014, 2015 Python Software Foundation; All Rights Reserved" +are retained in Python alone or in any derivative version prepared by Licensee. + +3. In the event Licensee prepares a derivative work that is based on +or incorporates Python or any part thereof, and wants to make +the derivative work available to others as provided herein, then +Licensee hereby agrees to include in any such work a brief summary of +the changes made to Python. + +4. PSF is making Python available to Licensee on an "AS IS" +basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON +FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS +A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, +OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +6. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +7. Nothing in this License Agreement shall be deemed to create any +relationship of agency, partnership, or joint venture between PSF and +Licensee. This License Agreement does not grant permission to use PSF +trademarks or trade name in a trademark sense to endorse or promote +products or services of Licensee, or any third party. + +8. By copying, installing or otherwise using Python, Licensee +agrees to be bound by the terms and conditions of this License +Agreement. diff --git a/contrib/python/parso/py3/README.rst b/contrib/python/parso/py3/README.rst new file mode 100644 index 0000000000..98abc73605 --- /dev/null +++ b/contrib/python/parso/py3/README.rst @@ -0,0 +1,95 @@ +################################################################### +parso - A Python Parser +################################################################### + + +.. image:: https://github.com/davidhalter/parso/workflows/Build/badge.svg?branch=master + :target: https://github.com/davidhalter/parso/actions + :alt: GitHub Actions build status + +.. image:: https://coveralls.io/repos/github/davidhalter/parso/badge.svg?branch=master + :target: https://coveralls.io/github/davidhalter/parso?branch=master + :alt: Coverage Status + +.. image:: https://pepy.tech/badge/parso + :target: https://pepy.tech/project/parso + :alt: PyPI Downloads + +.. image:: https://raw.githubusercontent.com/davidhalter/parso/master/docs/_static/logo_characters.png + +Parso is a Python parser that supports error recovery and round-trip parsing +for different Python versions (in multiple Python versions). Parso is also able +to list multiple syntax errors in your python file. + +Parso has been battle-tested by jedi_. It was pulled out of jedi to be useful +for other projects as well. + +Parso consists of a small API to parse Python and analyse the syntax tree. + +A simple example: + +.. code-block:: python + + >>> import parso + >>> module = parso.parse('hello + 1', version="3.9") + >>> expr = module.children[0] + >>> expr + PythonNode(arith_expr, [<Name: hello@1,0>, <Operator: +>, <Number: 1>]) + >>> print(expr.get_code()) + hello + 1 + >>> name = expr.children[0] + >>> name + <Name: hello@1,0> + >>> name.end_pos + (1, 5) + >>> expr.end_pos + (1, 9) + +To list multiple issues: + +.. code-block:: python + + >>> grammar = parso.load_grammar() + >>> module = grammar.parse('foo +\nbar\ncontinue') + >>> error1, error2 = grammar.iter_errors(module) + >>> error1.message + 'SyntaxError: invalid syntax' + >>> error2.message + "SyntaxError: 'continue' not properly in loop" + +Resources +========= + +- `Testing <https://parso.readthedocs.io/en/latest/docs/development.html#testing>`_ +- `PyPI <https://pypi.python.org/pypi/parso>`_ +- `Docs <https://parso.readthedocs.org/en/latest/>`_ +- Uses `semantic versioning <https://semver.org/>`_ + +Installation +============ + + pip install parso + +Future +====== + +- There will be better support for refactoring and comments. Stay tuned. +- There's a WIP PEP8 validator. It's however not in a good shape, yet. + +Known Issues +============ + +- `async`/`await` are already used as keywords in Python3.6. +- `from __future__ import print_function` is not ignored. + + +Acknowledgements +================ + +- Guido van Rossum (@gvanrossum) for creating the parser generator pgen2 + (originally used in lib2to3). +- `Salome Schneider <https://www.crepes-schnaegg.ch/cr%C3%AApes-schn%C3%A4gg/kunst-f%C3%BCrs-cr%C3%AApes-mobil/>`_ + for the extremely awesome parso logo. + + +.. _jedi: https://github.com/davidhalter/jedi diff --git a/contrib/python/parso/py3/parso/__init__.py b/contrib/python/parso/py3/parso/__init__.py new file mode 100644 index 0000000000..0cceabedca --- /dev/null +++ b/contrib/python/parso/py3/parso/__init__.py @@ -0,0 +1,58 @@ +r""" +Parso is a Python parser that supports error recovery and round-trip parsing +for different Python versions (in multiple Python versions). Parso is also able +to list multiple syntax errors in your python file. + +Parso has been battle-tested by jedi_. It was pulled out of jedi to be useful +for other projects as well. + +Parso consists of a small API to parse Python and analyse the syntax tree. + +.. _jedi: https://github.com/davidhalter/jedi + +A simple example: + +>>> import parso +>>> module = parso.parse('hello + 1', version="3.9") +>>> expr = module.children[0] +>>> expr +PythonNode(arith_expr, [<Name: hello@1,0>, <Operator: +>, <Number: 1>]) +>>> print(expr.get_code()) +hello + 1 +>>> name = expr.children[0] +>>> name +<Name: hello@1,0> +>>> name.end_pos +(1, 5) +>>> expr.end_pos +(1, 9) + +To list multiple issues: + +>>> grammar = parso.load_grammar() +>>> module = grammar.parse('foo +\nbar\ncontinue') +>>> error1, error2 = grammar.iter_errors(module) +>>> error1.message +'SyntaxError: invalid syntax' +>>> error2.message +"SyntaxError: 'continue' not properly in loop" +""" + +from parso.parser import ParserSyntaxError +from parso.grammar import Grammar, load_grammar +from parso.utils import split_lines, python_bytes_to_unicode + + +__version__ = '0.8.3' + + +def parse(code=None, **kwargs): + """ + A utility function to avoid loading grammars. + Params are documented in :py:meth:`parso.Grammar.parse`. + + :param str version: The version used by :py:func:`parso.load_grammar`. + """ + version = kwargs.pop('version', None) + grammar = load_grammar(version=version) + return grammar.parse(code, **kwargs) diff --git a/contrib/python/parso/py3/parso/_compatibility.py b/contrib/python/parso/py3/parso/_compatibility.py new file mode 100644 index 0000000000..58b186fc43 --- /dev/null +++ b/contrib/python/parso/py3/parso/_compatibility.py @@ -0,0 +1,3 @@ +import platform + +is_pypy = platform.python_implementation() == 'PyPy' diff --git a/contrib/python/parso/py3/parso/cache.py b/contrib/python/parso/py3/parso/cache.py new file mode 100644 index 0000000000..1b9f33d78e --- /dev/null +++ b/contrib/python/parso/py3/parso/cache.py @@ -0,0 +1,277 @@ +import time +import os +import sys +import hashlib +import gc +import shutil +import platform +import logging +import warnings +import pickle +from pathlib import Path +from typing import Dict, Any + +LOG = logging.getLogger(__name__) + +_CACHED_FILE_MINIMUM_SURVIVAL = 60 * 10 # 10 minutes +""" +Cached files should survive at least a few minutes. +""" + +_CACHED_FILE_MAXIMUM_SURVIVAL = 60 * 60 * 24 * 30 +""" +Maximum time for a cached file to survive if it is not +accessed within. +""" + +_CACHED_SIZE_TRIGGER = 600 +""" +This setting limits the amount of cached files. It's basically a way to start +garbage collection. + +The reasoning for this limit being as big as it is, is the following: + +Numpy, Pandas, Matplotlib and Tensorflow together use about 500 files. This +makes Jedi use ~500mb of memory. Since we might want a bit more than those few +libraries, we just increase it a bit. +""" + +_PICKLE_VERSION = 33 +""" +Version number (integer) for file system cache. + +Increment this number when there are any incompatible changes in +the parser tree classes. For example, the following changes +are regarded as incompatible. + +- A class name is changed. +- A class is moved to another module. +- A __slot__ of a class is changed. +""" + +_VERSION_TAG = '%s-%s%s-%s' % ( + platform.python_implementation(), + sys.version_info[0], + sys.version_info[1], + _PICKLE_VERSION +) +""" +Short name for distinguish Python implementations and versions. + +It's a bit similar to `sys.implementation.cache_tag`. +See: http://docs.python.org/3/library/sys.html#sys.implementation +""" + + +def _get_default_cache_path(): + if platform.system().lower() == 'windows': + dir_ = Path(os.getenv('LOCALAPPDATA') or '~', 'Parso', 'Parso') + elif platform.system().lower() == 'darwin': + dir_ = Path('~', 'Library', 'Caches', 'Parso') + else: + dir_ = Path(os.getenv('XDG_CACHE_HOME') or '~/.cache', 'parso') + return dir_.expanduser() + + +_default_cache_path = _get_default_cache_path() +""" +The path where the cache is stored. + +On Linux, this defaults to ``~/.cache/parso/``, on OS X to +``~/Library/Caches/Parso/`` and on Windows to ``%LOCALAPPDATA%\\Parso\\Parso\\``. +On Linux, if environment variable ``$XDG_CACHE_HOME`` is set, +``$XDG_CACHE_HOME/parso`` is used instead of the default one. +""" + +_CACHE_CLEAR_THRESHOLD = 60 * 60 * 24 + + +def _get_cache_clear_lock_path(cache_path=None): + """ + The path where the cache lock is stored. + + Cache lock will prevent continous cache clearing and only allow garbage + collection once a day (can be configured in _CACHE_CLEAR_THRESHOLD). + """ + cache_path = cache_path or _default_cache_path + return cache_path.joinpath("PARSO-CACHE-LOCK") + + +parser_cache: Dict[str, Any] = {} + + +class _NodeCacheItem: + def __init__(self, node, lines, change_time=None): + self.node = node + self.lines = lines + if change_time is None: + change_time = time.time() + self.change_time = change_time + self.last_used = change_time + + +def load_module(hashed_grammar, file_io, cache_path=None): + """ + Returns a module or None, if it fails. + """ + p_time = file_io.get_last_modified() + if p_time is None: + return None + + try: + module_cache_item = parser_cache[hashed_grammar][file_io.path] + if p_time <= module_cache_item.change_time: + module_cache_item.last_used = time.time() + return module_cache_item.node + except KeyError: + return _load_from_file_system( + hashed_grammar, + file_io.path, + p_time, + cache_path=cache_path + ) + + +def _load_from_file_system(hashed_grammar, path, p_time, cache_path=None): + cache_path = _get_hashed_path(hashed_grammar, path, cache_path=cache_path) + try: + # SUBBOTNIK-2721 - Для безопасности отключаем загрузку с диска + raise FileNotFoundError + if p_time > os.path.getmtime(cache_path): + # Cache is outdated + return None + + with open(cache_path, 'rb') as f: + gc.disable() + try: + module_cache_item = pickle.load(f) + finally: + gc.enable() + except FileNotFoundError: + return None + else: + _set_cache_item(hashed_grammar, path, module_cache_item) + LOG.debug('pickle loaded: %s', path) + return module_cache_item.node + + +def _set_cache_item(hashed_grammar, path, module_cache_item): + if sum(len(v) for v in parser_cache.values()) >= _CACHED_SIZE_TRIGGER: + # Garbage collection of old cache files. + # We are basically throwing everything away that hasn't been accessed + # in 10 minutes. + cutoff_time = time.time() - _CACHED_FILE_MINIMUM_SURVIVAL + for key, path_to_item_map in parser_cache.items(): + parser_cache[key] = { + path: node_item + for path, node_item in path_to_item_map.items() + if node_item.last_used > cutoff_time + } + + parser_cache.setdefault(hashed_grammar, {})[path] = module_cache_item + + +def try_to_save_module(hashed_grammar, file_io, module, lines, pickling=True, cache_path=None): + path = file_io.path + try: + p_time = None if path is None else file_io.get_last_modified() + except OSError: + p_time = None + pickling = False + + item = _NodeCacheItem(module, lines, p_time) + _set_cache_item(hashed_grammar, path, item) + if pickling and path is not None: + try: + _save_to_file_system(hashed_grammar, path, item, cache_path=cache_path) + except PermissionError: + # It's not really a big issue if the cache cannot be saved to the + # file system. It's still in RAM in that case. However we should + # still warn the user that this is happening. + warnings.warn( + 'Tried to save a file to %s, but got permission denied.' % path, + Warning + ) + else: + _remove_cache_and_update_lock(cache_path=cache_path) + + +def _save_to_file_system(hashed_grammar, path, item, cache_path=None): + with open(_get_hashed_path(hashed_grammar, path, cache_path=cache_path), 'wb') as f: + pickle.dump(item, f, pickle.HIGHEST_PROTOCOL) + + +def clear_cache(cache_path=None): + if cache_path is None: + cache_path = _default_cache_path + shutil.rmtree(cache_path) + parser_cache.clear() + + +def clear_inactive_cache( + cache_path=None, + inactivity_threshold=_CACHED_FILE_MAXIMUM_SURVIVAL, +): + if cache_path is None: + cache_path = _default_cache_path + if not cache_path.exists(): + return False + for dirname in os.listdir(cache_path): + version_path = cache_path.joinpath(dirname) + if not version_path.is_dir(): + continue + for file in os.scandir(version_path): + if file.stat().st_atime + _CACHED_FILE_MAXIMUM_SURVIVAL <= time.time(): + try: + os.remove(file.path) + except OSError: # silently ignore all failures + continue + else: + return True + + +def _touch(path): + try: + os.utime(path, None) + except FileNotFoundError: + try: + file = open(path, 'a') + file.close() + except (OSError, IOError): # TODO Maybe log this? + return False + return True + + +def _remove_cache_and_update_lock(cache_path=None): + lock_path = _get_cache_clear_lock_path(cache_path=cache_path) + try: + clear_lock_time = os.path.getmtime(lock_path) + except FileNotFoundError: + clear_lock_time = None + if ( + clear_lock_time is None # first time + or clear_lock_time + _CACHE_CLEAR_THRESHOLD <= time.time() + ): + if not _touch(lock_path): + # First make sure that as few as possible other cleanup jobs also + # get started. There is still a race condition but it's probably + # not a big problem. + return False + + clear_inactive_cache(cache_path=cache_path) + + +def _get_hashed_path(hashed_grammar, path, cache_path=None): + directory = _get_cache_directory_path(cache_path=cache_path) + + file_hash = hashlib.sha256(str(path).encode("utf-8")).hexdigest() + return os.path.join(directory, '%s-%s.pkl' % (hashed_grammar, file_hash)) + + +def _get_cache_directory_path(cache_path=None): + if cache_path is None: + cache_path = _default_cache_path + directory = cache_path.joinpath(_VERSION_TAG) + if not directory.exists(): + os.makedirs(directory) + return directory diff --git a/contrib/python/parso/py3/parso/file_io.py b/contrib/python/parso/py3/parso/file_io.py new file mode 100644 index 0000000000..1220f3356b --- /dev/null +++ b/contrib/python/parso/py3/parso/file_io.py @@ -0,0 +1,43 @@ +import os +from pathlib import Path +from typing import Union + +import __res as res + + +class FileIO: + def __init__(self, path: Union[os.PathLike, str]): + if isinstance(path, str): + path = Path(path) + self.path = path + + def read(self): # Returns bytes/str + # We would like to read unicode here, but we cannot, because we are not + # sure if it is a valid unicode file. Therefore just read whatever is + # here. + data = res.resfs_read(self.path) + if data: + return data + with open(self.path, 'rb') as f: + return f.read() + + def get_last_modified(self): + """ + Returns float - timestamp or None, if path doesn't exist. + """ + try: + return os.path.getmtime(self.path) + except FileNotFoundError: + return None + + def __repr__(self): + return '%s(%s)' % (self.__class__.__name__, self.path) + + +class KnownContentFileIO(FileIO): + def __init__(self, path, content): + super().__init__(path) + self._content = content + + def read(self): + return self._content diff --git a/contrib/python/parso/py3/parso/grammar.py b/contrib/python/parso/py3/parso/grammar.py new file mode 100644 index 0000000000..1f81148682 --- /dev/null +++ b/contrib/python/parso/py3/parso/grammar.py @@ -0,0 +1,266 @@ +import hashlib +import os +import pkgutil +from typing import Generic, TypeVar, Union, Dict, Optional, Any +from pathlib import Path + +from parso._compatibility import is_pypy +from parso.pgen2 import generate_grammar +from parso.utils import split_lines, python_bytes_to_unicode, \ + PythonVersionInfo, parse_version_string +from parso.python.diff import DiffParser +from parso.python.tokenize import tokenize_lines, tokenize +from parso.python.token import PythonTokenTypes +from parso.cache import parser_cache, load_module, try_to_save_module +from parso.parser import BaseParser +from parso.python.parser import Parser as PythonParser +from parso.python.errors import ErrorFinderConfig +from parso.python import pep8 +from parso.file_io import FileIO, KnownContentFileIO +from parso.normalizer import RefactoringNormalizer, NormalizerConfig + +_loaded_grammars: Dict[str, 'Grammar'] = {} + +_NodeT = TypeVar("_NodeT") + + +class Grammar(Generic[_NodeT]): + """ + :py:func:`parso.load_grammar` returns instances of this class. + + Creating custom none-python grammars by calling this is not supported, yet. + + :param text: A BNF representation of your grammar. + """ + _start_nonterminal: str + _error_normalizer_config: Optional[ErrorFinderConfig] = None + _token_namespace: Any = None + _default_normalizer_config: NormalizerConfig = pep8.PEP8NormalizerConfig() + + def __init__(self, text: str, *, tokenizer, parser=BaseParser, diff_parser=None): + self._pgen_grammar = generate_grammar( + text, + token_namespace=self._get_token_namespace() + ) + self._parser = parser + self._tokenizer = tokenizer + self._diff_parser = diff_parser + self._hashed = hashlib.sha256(text.encode("utf-8")).hexdigest() + + def parse(self, + code: Union[str, bytes] = None, + *, + error_recovery=True, + path: Union[os.PathLike, str] = None, + start_symbol: str = None, + cache=False, + diff_cache=False, + cache_path: Union[os.PathLike, str] = None, + file_io: FileIO = None) -> _NodeT: + """ + If you want to parse a Python file you want to start here, most likely. + + If you need finer grained control over the parsed instance, there will be + other ways to access it. + + :param str code: A unicode or bytes string. When it's not possible to + decode bytes to a string, returns a + :py:class:`UnicodeDecodeError`. + :param bool error_recovery: If enabled, any code will be returned. If + it is invalid, it will be returned as an error node. If disabled, + you will get a ParseError when encountering syntax errors in your + code. + :param str start_symbol: The grammar rule (nonterminal) that you want + to parse. Only allowed to be used when error_recovery is False. + :param str path: The path to the file you want to open. Only needed for caching. + :param bool cache: Keeps a copy of the parser tree in RAM and on disk + if a path is given. Returns the cached trees if the corresponding + files on disk have not changed. Note that this stores pickle files + on your file system (e.g. for Linux in ``~/.cache/parso/``). + :param bool diff_cache: Diffs the cached python module against the new + code and tries to parse only the parts that have changed. Returns + the same (changed) module that is found in cache. Using this option + requires you to not do anything anymore with the cached modules + under that path, because the contents of it might change. This + option is still somewhat experimental. If you want stability, + please don't use it. + :param bool cache_path: If given saves the parso cache in this + directory. If not given, defaults to the default cache places on + each platform. + + :return: A subclass of :py:class:`parso.tree.NodeOrLeaf`. Typically a + :py:class:`parso.python.tree.Module`. + """ + if code is None and path is None and file_io is None: + raise TypeError("Please provide either code or a path.") + + if isinstance(path, str): + path = Path(path) + if isinstance(cache_path, str): + cache_path = Path(cache_path) + + if start_symbol is None: + start_symbol = self._start_nonterminal + + if error_recovery and start_symbol != 'file_input': + raise NotImplementedError("This is currently not implemented.") + + if file_io is None: + if code is None: + file_io = FileIO(path) # type: ignore + else: + file_io = KnownContentFileIO(path, code) + + if cache and file_io.path is not None: + module_node = load_module(self._hashed, file_io, cache_path=cache_path) + if module_node is not None: + return module_node # type: ignore + + if code is None: + code = file_io.read() + code = python_bytes_to_unicode(code) + + lines = split_lines(code, keepends=True) + if diff_cache: + if self._diff_parser is None: + raise TypeError("You have to define a diff parser to be able " + "to use this option.") + try: + module_cache_item = parser_cache[self._hashed][file_io.path] + except KeyError: + pass + else: + module_node = module_cache_item.node + old_lines = module_cache_item.lines + if old_lines == lines: + return module_node # type: ignore + + new_node = self._diff_parser( + self._pgen_grammar, self._tokenizer, module_node + ).update( + old_lines=old_lines, + new_lines=lines + ) + try_to_save_module(self._hashed, file_io, new_node, lines, + # Never pickle in pypy, it's slow as hell. + pickling=cache and not is_pypy, + cache_path=cache_path) + return new_node # type: ignore + + tokens = self._tokenizer(lines) + + p = self._parser( + self._pgen_grammar, + error_recovery=error_recovery, + start_nonterminal=start_symbol + ) + root_node = p.parse(tokens=tokens) + + if cache or diff_cache: + try_to_save_module(self._hashed, file_io, root_node, lines, + # Never pickle in pypy, it's slow as hell. + pickling=cache and not is_pypy, + cache_path=cache_path) + return root_node # type: ignore + + def _get_token_namespace(self): + ns = self._token_namespace + if ns is None: + raise ValueError("The token namespace should be set.") + return ns + + def iter_errors(self, node): + """ + Given a :py:class:`parso.tree.NodeOrLeaf` returns a generator of + :py:class:`parso.normalizer.Issue` objects. For Python this is + a list of syntax/indentation errors. + """ + if self._error_normalizer_config is None: + raise ValueError("No error normalizer specified for this grammar.") + + return self._get_normalizer_issues(node, self._error_normalizer_config) + + def refactor(self, base_node, node_to_str_map): + return RefactoringNormalizer(node_to_str_map).walk(base_node) + + def _get_normalizer(self, normalizer_config): + if normalizer_config is None: + normalizer_config = self._default_normalizer_config + if normalizer_config is None: + raise ValueError("You need to specify a normalizer, because " + "there's no default normalizer for this tree.") + return normalizer_config.create_normalizer(self) + + def _normalize(self, node, normalizer_config=None): + """ + TODO this is not public, yet. + The returned code will be normalized, e.g. PEP8 for Python. + """ + normalizer = self._get_normalizer(normalizer_config) + return normalizer.walk(node) + + def _get_normalizer_issues(self, node, normalizer_config=None): + normalizer = self._get_normalizer(normalizer_config) + normalizer.walk(node) + return normalizer.issues + + def __repr__(self): + nonterminals = self._pgen_grammar.nonterminal_to_dfas.keys() + txt = ' '.join(list(nonterminals)[:3]) + ' ...' + return '<%s:%s>' % (self.__class__.__name__, txt) + + +class PythonGrammar(Grammar): + _error_normalizer_config = ErrorFinderConfig() + _token_namespace = PythonTokenTypes + _start_nonterminal = 'file_input' + + def __init__(self, version_info: PythonVersionInfo, bnf_text: str): + super().__init__( + bnf_text, + tokenizer=self._tokenize_lines, + parser=PythonParser, + diff_parser=DiffParser + ) + self.version_info = version_info + + def _tokenize_lines(self, lines, **kwargs): + return tokenize_lines(lines, version_info=self.version_info, **kwargs) + + def _tokenize(self, code): + # Used by Jedi. + return tokenize(code, version_info=self.version_info) + + +def load_grammar(*, version: str = None, path: str = None): + """ + Loads a :py:class:`parso.Grammar`. The default version is the current Python + version. + + :param str version: A python version string, e.g. ``version='3.8'``. + :param str path: A path to a grammar file + """ + version_info = parse_version_string(version) + + file = path or os.path.join( + 'python', + 'grammar%s%s.txt' % (version_info.major, version_info.minor) + ) + + global _loaded_grammars + path = os.path.join(os.path.dirname(__file__), file) + try: + return _loaded_grammars[path] + except KeyError: + try: + bnf_text = pkgutil.get_data("parso", file).decode("utf-8") + if bnf_text is None: + raise FileNotFoundError + + grammar = PythonGrammar(version_info, bnf_text) + return _loaded_grammars.setdefault(path, grammar) + except (FileNotFoundError, IOError): + message = "Python version %s.%s is currently not supported." % ( + version_info.major, version_info.minor + ) + raise NotImplementedError(message) diff --git a/contrib/python/parso/py3/parso/normalizer.py b/contrib/python/parso/py3/parso/normalizer.py new file mode 100644 index 0000000000..a95f029eb8 --- /dev/null +++ b/contrib/python/parso/py3/parso/normalizer.py @@ -0,0 +1,198 @@ +from contextlib import contextmanager +from typing import Dict, List + + +class _NormalizerMeta(type): + def __new__(cls, name, bases, dct): + new_cls = type.__new__(cls, name, bases, dct) + new_cls.rule_value_classes = {} + new_cls.rule_type_classes = {} + return new_cls + + +class Normalizer(metaclass=_NormalizerMeta): + _rule_type_instances: Dict[str, List[type]] = {} + _rule_value_instances: Dict[str, List[type]] = {} + + def __init__(self, grammar, config): + self.grammar = grammar + self._config = config + self.issues = [] + + self._rule_type_instances = self._instantiate_rules('rule_type_classes') + self._rule_value_instances = self._instantiate_rules('rule_value_classes') + + def _instantiate_rules(self, attr): + dct = {} + for base in type(self).mro(): + rules_map = getattr(base, attr, {}) + for type_, rule_classes in rules_map.items(): + new = [rule_cls(self) for rule_cls in rule_classes] + dct.setdefault(type_, []).extend(new) + return dct + + def walk(self, node): + self.initialize(node) + value = self.visit(node) + self.finalize() + return value + + def visit(self, node): + try: + children = node.children + except AttributeError: + return self.visit_leaf(node) + else: + with self.visit_node(node): + return ''.join(self.visit(child) for child in children) + + @contextmanager + def visit_node(self, node): + self._check_type_rules(node) + yield + + def _check_type_rules(self, node): + for rule in self._rule_type_instances.get(node.type, []): + rule.feed_node(node) + + def visit_leaf(self, leaf): + self._check_type_rules(leaf) + + for rule in self._rule_value_instances.get(leaf.value, []): + rule.feed_node(leaf) + + return leaf.prefix + leaf.value + + def initialize(self, node): + pass + + def finalize(self): + pass + + def add_issue(self, node, code, message): + issue = Issue(node, code, message) + if issue not in self.issues: + self.issues.append(issue) + return True + + @classmethod + def register_rule(cls, *, value=None, values=(), type=None, types=()): + """ + Use it as a class decorator:: + + normalizer = Normalizer('grammar', 'config') + @normalizer.register_rule(value='foo') + class MyRule(Rule): + error_code = 42 + """ + values = list(values) + types = list(types) + if value is not None: + values.append(value) + if type is not None: + types.append(type) + + if not values and not types: + raise ValueError("You must register at least something.") + + def decorator(rule_cls): + for v in values: + cls.rule_value_classes.setdefault(v, []).append(rule_cls) + for t in types: + cls.rule_type_classes.setdefault(t, []).append(rule_cls) + return rule_cls + + return decorator + + +class NormalizerConfig: + normalizer_class = Normalizer + + def create_normalizer(self, grammar): + if self.normalizer_class is None: + return None + + return self.normalizer_class(grammar, self) + + +class Issue: + def __init__(self, node, code, message): + self.code = code + """ + An integer code that stands for the type of error. + """ + self.message = message + """ + A message (string) for the issue. + """ + self.start_pos = node.start_pos + """ + The start position position of the error as a tuple (line, column). As + always in |parso| the first line is 1 and the first column 0. + """ + self.end_pos = node.end_pos + + def __eq__(self, other): + return self.start_pos == other.start_pos and self.code == other.code + + def __ne__(self, other): + return not self.__eq__(other) + + def __hash__(self): + return hash((self.code, self.start_pos)) + + def __repr__(self): + return '<%s: %s>' % (self.__class__.__name__, self.code) + + +class Rule: + code: int + message: str + + def __init__(self, normalizer): + self._normalizer = normalizer + + def is_issue(self, node): + raise NotImplementedError() + + def get_node(self, node): + return node + + def _get_message(self, message, node): + if message is None: + message = self.message + if message is None: + raise ValueError("The message on the class is not set.") + return message + + def add_issue(self, node, code=None, message=None): + if code is None: + code = self.code + if code is None: + raise ValueError("The error code on the class is not set.") + + message = self._get_message(message, node) + + self._normalizer.add_issue(node, code, message) + + def feed_node(self, node): + if self.is_issue(node): + issue_node = self.get_node(node) + self.add_issue(issue_node) + + +class RefactoringNormalizer(Normalizer): + def __init__(self, node_to_str_map): + self._node_to_str_map = node_to_str_map + + def visit(self, node): + try: + return self._node_to_str_map[node] + except KeyError: + return super().visit(node) + + def visit_leaf(self, leaf): + try: + return self._node_to_str_map[leaf] + except KeyError: + return super().visit_leaf(leaf) diff --git a/contrib/python/parso/py3/parso/parser.py b/contrib/python/parso/py3/parso/parser.py new file mode 100644 index 0000000000..37466435dd --- /dev/null +++ b/contrib/python/parso/py3/parso/parser.py @@ -0,0 +1,210 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +# Modifications: +# Copyright David Halter and Contributors +# Modifications are dual-licensed: MIT and PSF. +# 99% of the code is different from pgen2, now. + +""" +The ``Parser`` tries to convert the available Python code in an easy to read +format, something like an abstract syntax tree. The classes who represent this +tree, are sitting in the :mod:`parso.tree` module. + +The Python module ``tokenize`` is a very important part in the ``Parser``, +because it splits the code into different words (tokens). Sometimes it looks a +bit messy. Sorry for that! You might ask now: "Why didn't you use the ``ast`` +module for this? Well, ``ast`` does a very good job understanding proper Python +code, but fails to work as soon as there's a single line of broken code. + +There's one important optimization that needs to be known: Statements are not +being parsed completely. ``Statement`` is just a representation of the tokens +within the statement. This lowers memory usage and cpu time and reduces the +complexity of the ``Parser`` (there's another parser sitting inside +``Statement``, which produces ``Array`` and ``Call``). +""" +from typing import Dict, Type + +from parso import tree +from parso.pgen2.generator import ReservedString + + +class ParserSyntaxError(Exception): + """ + Contains error information about the parser tree. + + May be raised as an exception. + """ + def __init__(self, message, error_leaf): + self.message = message + self.error_leaf = error_leaf + + +class InternalParseError(Exception): + """ + Exception to signal the parser is stuck and error recovery didn't help. + Basically this shouldn't happen. It's a sign that something is really + wrong. + """ + + def __init__(self, msg, type_, value, start_pos): + Exception.__init__(self, "%s: type=%r, value=%r, start_pos=%r" % + (msg, type_.name, value, start_pos)) + self.msg = msg + self.type = type + self.value = value + self.start_pos = start_pos + + +class Stack(list): + def _allowed_transition_names_and_token_types(self): + def iterate(): + # An API just for Jedi. + for stack_node in reversed(self): + for transition in stack_node.dfa.transitions: + if isinstance(transition, ReservedString): + yield transition.value + else: + yield transition # A token type + + if not stack_node.dfa.is_final: + break + + return list(iterate()) + + +class StackNode: + def __init__(self, dfa): + self.dfa = dfa + self.nodes = [] + + @property + def nonterminal(self): + return self.dfa.from_rule + + def __repr__(self): + return '%s(%s, %s)' % (self.__class__.__name__, self.dfa, self.nodes) + + +def _token_to_transition(grammar, type_, value): + # Map from token to label + if type_.value.contains_syntax: + # Check for reserved words (keywords) + try: + return grammar.reserved_syntax_strings[value] + except KeyError: + pass + + return type_ + + +class BaseParser: + """Parser engine. + + A Parser instance contains state pertaining to the current token + sequence, and should not be used concurrently by different threads + to parse separate token sequences. + + See python/tokenize.py for how to get input tokens by a string. + + When a syntax error occurs, error_recovery() is called. + """ + + node_map: Dict[str, Type[tree.BaseNode]] = {} + default_node = tree.Node + + leaf_map: Dict[str, Type[tree.Leaf]] = {} + default_leaf = tree.Leaf + + def __init__(self, pgen_grammar, start_nonterminal='file_input', error_recovery=False): + self._pgen_grammar = pgen_grammar + self._start_nonterminal = start_nonterminal + self._error_recovery = error_recovery + + def parse(self, tokens): + first_dfa = self._pgen_grammar.nonterminal_to_dfas[self._start_nonterminal][0] + self.stack = Stack([StackNode(first_dfa)]) + + for token in tokens: + self._add_token(token) + + while True: + tos = self.stack[-1] + if not tos.dfa.is_final: + # We never broke out -- EOF is too soon -- Unfinished statement. + # However, the error recovery might have added the token again, if + # the stack is empty, we're fine. + raise InternalParseError( + "incomplete input", token.type, token.string, token.start_pos + ) + + if len(self.stack) > 1: + self._pop() + else: + return self.convert_node(tos.nonterminal, tos.nodes) + + def error_recovery(self, token): + if self._error_recovery: + raise NotImplementedError("Error Recovery is not implemented") + else: + type_, value, start_pos, prefix = token + error_leaf = tree.ErrorLeaf(type_, value, start_pos, prefix) + raise ParserSyntaxError('SyntaxError: invalid syntax', error_leaf) + + def convert_node(self, nonterminal, children): + try: + node = self.node_map[nonterminal](children) + except KeyError: + node = self.default_node(nonterminal, children) + return node + + def convert_leaf(self, type_, value, prefix, start_pos): + try: + return self.leaf_map[type_](value, start_pos, prefix) + except KeyError: + return self.default_leaf(value, start_pos, prefix) + + def _add_token(self, token): + """ + This is the only core function for parsing. Here happens basically + everything. Everything is well prepared by the parser generator and we + only apply the necessary steps here. + """ + grammar = self._pgen_grammar + stack = self.stack + type_, value, start_pos, prefix = token + transition = _token_to_transition(grammar, type_, value) + + while True: + try: + plan = stack[-1].dfa.transitions[transition] + break + except KeyError: + if stack[-1].dfa.is_final: + self._pop() + else: + self.error_recovery(token) + return + except IndexError: + raise InternalParseError("too much input", type_, value, start_pos) + + stack[-1].dfa = plan.next_dfa + + for push in plan.dfa_pushes: + stack.append(StackNode(push)) + + leaf = self.convert_leaf(type_, value, prefix, start_pos) + stack[-1].nodes.append(leaf) + + def _pop(self): + tos = self.stack.pop() + # If there's exactly one child, return that child instead of + # creating a new node. We still create expr_stmt and + # file_input though, because a lot of Jedi depends on its + # logic. + if len(tos.nodes) == 1: + new_node = tos.nodes[0] + else: + new_node = self.convert_node(tos.dfa.from_rule, tos.nodes) + + self.stack[-1].nodes.append(new_node) diff --git a/contrib/python/parso/py3/parso/pgen2/__init__.py b/contrib/python/parso/py3/parso/pgen2/__init__.py new file mode 100644 index 0000000000..d4d9dcdc49 --- /dev/null +++ b/contrib/python/parso/py3/parso/pgen2/__init__.py @@ -0,0 +1,10 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +# Modifications: +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. +# Copyright 2014 David Halter and Contributors +# Modifications are dual-licensed: MIT and PSF. + +from parso.pgen2.generator import generate_grammar diff --git a/contrib/python/parso/py3/parso/pgen2/generator.py b/contrib/python/parso/py3/parso/pgen2/generator.py new file mode 100644 index 0000000000..db6e1cb326 --- /dev/null +++ b/contrib/python/parso/py3/parso/pgen2/generator.py @@ -0,0 +1,382 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +# Modifications: +# Copyright David Halter and Contributors +# Modifications are dual-licensed: MIT and PSF. + +""" +This module defines the data structures used to represent a grammar. + +Specifying grammars in pgen is possible with this grammar:: + + grammar: (NEWLINE | rule)* ENDMARKER + rule: NAME ':' rhs NEWLINE + rhs: items ('|' items)* + items: item+ + item: '[' rhs ']' | atom ['+' | '*'] + atom: '(' rhs ')' | NAME | STRING + +This grammar is self-referencing. + +This parser generator (pgen2) was created by Guido Rossum and used for lib2to3. +Most of the code has been refactored to make it more Pythonic. Since this was a +"copy" of the CPython Parser parser "pgen", there was some work needed to make +it more readable. It should also be slightly faster than the original pgen2, +because we made some optimizations. +""" + +from ast import literal_eval +from typing import TypeVar, Generic, Mapping, Sequence, Set, Union + +from parso.pgen2.grammar_parser import GrammarParser, NFAState + +_TokenTypeT = TypeVar("_TokenTypeT") + + +class Grammar(Generic[_TokenTypeT]): + """ + Once initialized, this class supplies the grammar tables for the + parsing engine implemented by parse.py. The parsing engine + accesses the instance variables directly. + + The only important part in this parsers are dfas and transitions between + dfas. + """ + + def __init__(self, + start_nonterminal: str, + rule_to_dfas: Mapping[str, Sequence['DFAState[_TokenTypeT]']], + reserved_syntax_strings: Mapping[str, 'ReservedString']): + self.nonterminal_to_dfas = rule_to_dfas + self.reserved_syntax_strings = reserved_syntax_strings + self.start_nonterminal = start_nonterminal + + +class DFAPlan: + """ + Plans are used for the parser to create stack nodes and do the proper + DFA state transitions. + """ + def __init__(self, next_dfa: 'DFAState', dfa_pushes: Sequence['DFAState'] = []): + self.next_dfa = next_dfa + self.dfa_pushes = dfa_pushes + + def __repr__(self): + return '%s(%s, %s)' % (self.__class__.__name__, self.next_dfa, self.dfa_pushes) + + +class DFAState(Generic[_TokenTypeT]): + """ + The DFAState object is the core class for pretty much anything. DFAState + are the vertices of an ordered graph while arcs and transitions are the + edges. + + Arcs are the initial edges, where most DFAStates are not connected and + transitions are then calculated to connect the DFA state machines that have + different nonterminals. + """ + def __init__(self, from_rule: str, nfa_set: Set[NFAState], final: NFAState): + assert isinstance(nfa_set, set) + assert isinstance(next(iter(nfa_set)), NFAState) + assert isinstance(final, NFAState) + self.from_rule = from_rule + self.nfa_set = nfa_set + # map from terminals/nonterminals to DFAState + self.arcs: Mapping[str, DFAState] = {} + # In an intermediary step we set these nonterminal arcs (which has the + # same structure as arcs). These don't contain terminals anymore. + self.nonterminal_arcs: Mapping[str, DFAState] = {} + + # Transitions are basically the only thing that the parser is using + # with is_final. Everyting else is purely here to create a parser. + self.transitions: Mapping[Union[_TokenTypeT, ReservedString], DFAPlan] = {} + self.is_final = final in nfa_set + + def add_arc(self, next_, label): + assert isinstance(label, str) + assert label not in self.arcs + assert isinstance(next_, DFAState) + self.arcs[label] = next_ + + def unifystate(self, old, new): + for label, next_ in self.arcs.items(): + if next_ is old: + self.arcs[label] = new + + def __eq__(self, other): + # Equality test -- ignore the nfa_set instance variable + assert isinstance(other, DFAState) + if self.is_final != other.is_final: + return False + # Can't just return self.arcs == other.arcs, because that + # would invoke this method recursively, with cycles... + if len(self.arcs) != len(other.arcs): + return False + for label, next_ in self.arcs.items(): + if next_ is not other.arcs.get(label): + return False + return True + + def __repr__(self): + return '<%s: %s is_final=%s>' % ( + self.__class__.__name__, self.from_rule, self.is_final + ) + + +class ReservedString: + """ + Most grammars will have certain keywords and operators that are mentioned + in the grammar as strings (e.g. "if") and not token types (e.g. NUMBER). + This class basically is the former. + """ + + def __init__(self, value: str): + self.value = value + + def __repr__(self): + return '%s(%s)' % (self.__class__.__name__, self.value) + + +def _simplify_dfas(dfas): + """ + This is not theoretically optimal, but works well enough. + Algorithm: repeatedly look for two states that have the same + set of arcs (same labels pointing to the same nodes) and + unify them, until things stop changing. + + dfas is a list of DFAState instances + """ + changes = True + while changes: + changes = False + for i, state_i in enumerate(dfas): + for j in range(i + 1, len(dfas)): + state_j = dfas[j] + if state_i == state_j: + del dfas[j] + for state in dfas: + state.unifystate(state_j, state_i) + changes = True + break + + +def _make_dfas(start, finish): + """ + Uses the powerset construction algorithm to create DFA states from sets of + NFA states. + + Also does state reduction if some states are not needed. + """ + # To turn an NFA into a DFA, we define the states of the DFA + # to correspond to *sets* of states of the NFA. Then do some + # state reduction. + assert isinstance(start, NFAState) + assert isinstance(finish, NFAState) + + def addclosure(nfa_state, base_nfa_set): + assert isinstance(nfa_state, NFAState) + if nfa_state in base_nfa_set: + return + base_nfa_set.add(nfa_state) + for nfa_arc in nfa_state.arcs: + if nfa_arc.nonterminal_or_string is None: + addclosure(nfa_arc.next, base_nfa_set) + + base_nfa_set = set() + addclosure(start, base_nfa_set) + states = [DFAState(start.from_rule, base_nfa_set, finish)] + for state in states: # NB states grows while we're iterating + arcs = {} + # Find state transitions and store them in arcs. + for nfa_state in state.nfa_set: + for nfa_arc in nfa_state.arcs: + if nfa_arc.nonterminal_or_string is not None: + nfa_set = arcs.setdefault(nfa_arc.nonterminal_or_string, set()) + addclosure(nfa_arc.next, nfa_set) + + # Now create the dfa's with no None's in arcs anymore. All Nones have + # been eliminated and state transitions (arcs) are properly defined, we + # just need to create the dfa's. + for nonterminal_or_string, nfa_set in arcs.items(): + for nested_state in states: + if nested_state.nfa_set == nfa_set: + # The DFA state already exists for this rule. + break + else: + nested_state = DFAState(start.from_rule, nfa_set, finish) + states.append(nested_state) + + state.add_arc(nested_state, nonterminal_or_string) + return states # List of DFAState instances; first one is start + + +def _dump_nfa(start, finish): + print("Dump of NFA for", start.from_rule) + todo = [start] + for i, state in enumerate(todo): + print(" State", i, state is finish and "(final)" or "") + for arc in state.arcs: + label, next_ = arc.nonterminal_or_string, arc.next + if next_ in todo: + j = todo.index(next_) + else: + j = len(todo) + todo.append(next_) + if label is None: + print(" -> %d" % j) + else: + print(" %s -> %d" % (label, j)) + + +def _dump_dfas(dfas): + print("Dump of DFA for", dfas[0].from_rule) + for i, state in enumerate(dfas): + print(" State", i, state.is_final and "(final)" or "") + for nonterminal, next_ in state.arcs.items(): + print(" %s -> %d" % (nonterminal, dfas.index(next_))) + + +def generate_grammar(bnf_grammar: str, token_namespace) -> Grammar: + """ + ``bnf_text`` is a grammar in extended BNF (using * for repetition, + for + at-least-once repetition, [] for optional parts, | for alternatives and () + for grouping). + + It's not EBNF according to ISO/IEC 14977. It's a dialect Python uses in its + own parser. + """ + rule_to_dfas = {} + start_nonterminal = None + for nfa_a, nfa_z in GrammarParser(bnf_grammar).parse(): + # _dump_nfa(nfa_a, nfa_z) + dfas = _make_dfas(nfa_a, nfa_z) + # _dump_dfas(dfas) + # oldlen = len(dfas) + _simplify_dfas(dfas) + # newlen = len(dfas) + rule_to_dfas[nfa_a.from_rule] = dfas + # print(nfa_a.from_rule, oldlen, newlen) + + if start_nonterminal is None: + start_nonterminal = nfa_a.from_rule + + reserved_strings: Mapping[str, ReservedString] = {} + for nonterminal, dfas in rule_to_dfas.items(): + for dfa_state in dfas: + for terminal_or_nonterminal, next_dfa in dfa_state.arcs.items(): + if terminal_or_nonterminal in rule_to_dfas: + dfa_state.nonterminal_arcs[terminal_or_nonterminal] = next_dfa + else: + transition = _make_transition( + token_namespace, + reserved_strings, + terminal_or_nonterminal + ) + dfa_state.transitions[transition] = DFAPlan(next_dfa) + + _calculate_tree_traversal(rule_to_dfas) + return Grammar(start_nonterminal, rule_to_dfas, reserved_strings) # type: ignore + + +def _make_transition(token_namespace, reserved_syntax_strings, label): + """ + Creates a reserved string ("if", "for", "*", ...) or returns the token type + (NUMBER, STRING, ...) for a given grammar terminal. + """ + if label[0].isalpha(): + # A named token (e.g. NAME, NUMBER, STRING) + return getattr(token_namespace, label) + else: + # Either a keyword or an operator + assert label[0] in ('"', "'"), label + assert not label.startswith('"""') and not label.startswith("'''") + value = literal_eval(label) + try: + return reserved_syntax_strings[value] + except KeyError: + r = reserved_syntax_strings[value] = ReservedString(value) + return r + + +def _calculate_tree_traversal(nonterminal_to_dfas): + """ + By this point we know how dfas can move around within a stack node, but we + don't know how we can add a new stack node (nonterminal transitions). + """ + # Map from grammar rule (nonterminal) name to a set of tokens. + first_plans = {} + + nonterminals = list(nonterminal_to_dfas.keys()) + nonterminals.sort() + for nonterminal in nonterminals: + if nonterminal not in first_plans: + _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal) + + # Now that we have calculated the first terminals, we are sure that + # there is no left recursion. + + for dfas in nonterminal_to_dfas.values(): + for dfa_state in dfas: + transitions = dfa_state.transitions + for nonterminal, next_dfa in dfa_state.nonterminal_arcs.items(): + for transition, pushes in first_plans[nonterminal].items(): + if transition in transitions: + prev_plan = transitions[transition] + # Make sure these are sorted so that error messages are + # at least deterministic + choices = sorted([ + ( + prev_plan.dfa_pushes[0].from_rule + if prev_plan.dfa_pushes + else prev_plan.next_dfa.from_rule + ), + ( + pushes[0].from_rule + if pushes else next_dfa.from_rule + ), + ]) + raise ValueError( + "Rule %s is ambiguous; given a %s token, we " + "can't determine if we should evaluate %s or %s." + % ( + ( + dfa_state.from_rule, + transition, + ) + tuple(choices) + ) + ) + transitions[transition] = DFAPlan(next_dfa, pushes) + + +def _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal): + """ + Calculates the first plan in the first_plans dictionary for every given + nonterminal. This is going to be used to know when to create stack nodes. + """ + dfas = nonterminal_to_dfas[nonterminal] + new_first_plans = {} + first_plans[nonterminal] = None # dummy to detect left recursion + # We only need to check the first dfa. All the following ones are not + # interesting to find first terminals. + state = dfas[0] + for transition, next_ in state.transitions.items(): + # It's a string. We have finally found a possible first token. + new_first_plans[transition] = [next_.next_dfa] + + for nonterminal2, next_ in state.nonterminal_arcs.items(): + # It's a nonterminal and we have either a left recursion issue + # in the grammar or we have to recurse. + try: + first_plans2 = first_plans[nonterminal2] + except KeyError: + first_plans2 = _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal2) + else: + if first_plans2 is None: + raise ValueError("left recursion for rule %r" % nonterminal) + + for t, pushes in first_plans2.items(): + new_first_plans[t] = [next_] + pushes + + first_plans[nonterminal] = new_first_plans + return new_first_plans diff --git a/contrib/python/parso/py3/parso/pgen2/grammar_parser.py b/contrib/python/parso/py3/parso/pgen2/grammar_parser.py new file mode 100644 index 0000000000..582efb43ac --- /dev/null +++ b/contrib/python/parso/py3/parso/pgen2/grammar_parser.py @@ -0,0 +1,160 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +# Modifications: +# Copyright David Halter and Contributors +# Modifications are dual-licensed: MIT and PSF. +from typing import Optional, Iterator, Tuple, List + +from parso.python.tokenize import tokenize +from parso.utils import parse_version_string +from parso.python.token import PythonTokenTypes + + +class NFAArc: + def __init__(self, next_: 'NFAState', nonterminal_or_string: Optional[str]): + self.next: NFAState = next_ + self.nonterminal_or_string: Optional[str] = nonterminal_or_string + + def __repr__(self): + return '<%s: %s>' % (self.__class__.__name__, self.nonterminal_or_string) + + +class NFAState: + def __init__(self, from_rule: str): + self.from_rule: str = from_rule + self.arcs: List[NFAArc] = [] + + def add_arc(self, next_, nonterminal_or_string=None): + assert nonterminal_or_string is None or isinstance(nonterminal_or_string, str) + assert isinstance(next_, NFAState) + self.arcs.append(NFAArc(next_, nonterminal_or_string)) + + def __repr__(self): + return '<%s: from %s>' % (self.__class__.__name__, self.from_rule) + + +class GrammarParser: + """ + The parser for Python grammar files. + """ + def __init__(self, bnf_grammar: str): + self._bnf_grammar = bnf_grammar + self.generator = tokenize( + bnf_grammar, + version_info=parse_version_string('3.9') + ) + self._gettoken() # Initialize lookahead + + def parse(self) -> Iterator[Tuple[NFAState, NFAState]]: + # grammar: (NEWLINE | rule)* ENDMARKER + while self.type != PythonTokenTypes.ENDMARKER: + while self.type == PythonTokenTypes.NEWLINE: + self._gettoken() + + # rule: NAME ':' rhs NEWLINE + self._current_rule_name = self._expect(PythonTokenTypes.NAME) + self._expect(PythonTokenTypes.OP, ':') + + a, z = self._parse_rhs() + self._expect(PythonTokenTypes.NEWLINE) + + yield a, z + + def _parse_rhs(self): + # rhs: items ('|' items)* + a, z = self._parse_items() + if self.value != "|": + return a, z + else: + aa = NFAState(self._current_rule_name) + zz = NFAState(self._current_rule_name) + while True: + # Add the possibility to go into the state of a and come back + # to finish. + aa.add_arc(a) + z.add_arc(zz) + if self.value != "|": + break + + self._gettoken() + a, z = self._parse_items() + return aa, zz + + def _parse_items(self): + # items: item+ + a, b = self._parse_item() + while self.type in (PythonTokenTypes.NAME, PythonTokenTypes.STRING) \ + or self.value in ('(', '['): + c, d = self._parse_item() + # Need to end on the next item. + b.add_arc(c) + b = d + return a, b + + def _parse_item(self): + # item: '[' rhs ']' | atom ['+' | '*'] + if self.value == "[": + self._gettoken() + a, z = self._parse_rhs() + self._expect(PythonTokenTypes.OP, ']') + # Make it also possible that there is no token and change the + # state. + a.add_arc(z) + return a, z + else: + a, z = self._parse_atom() + value = self.value + if value not in ("+", "*"): + return a, z + self._gettoken() + # Make it clear that we can go back to the old state and repeat. + z.add_arc(a) + if value == "+": + return a, z + else: + # The end state is the same as the beginning, nothing must + # change. + return a, a + + def _parse_atom(self): + # atom: '(' rhs ')' | NAME | STRING + if self.value == "(": + self._gettoken() + a, z = self._parse_rhs() + self._expect(PythonTokenTypes.OP, ')') + return a, z + elif self.type in (PythonTokenTypes.NAME, PythonTokenTypes.STRING): + a = NFAState(self._current_rule_name) + z = NFAState(self._current_rule_name) + # Make it clear that the state transition requires that value. + a.add_arc(z, self.value) + self._gettoken() + return a, z + else: + self._raise_error("expected (...) or NAME or STRING, got %s/%s", + self.type, self.value) + + def _expect(self, type_, value=None): + if self.type != type_: + self._raise_error("expected %s, got %s [%s]", + type_, self.type, self.value) + if value is not None and self.value != value: + self._raise_error("expected %s, got %s", value, self.value) + value = self.value + self._gettoken() + return value + + def _gettoken(self): + tup = next(self.generator) + self.type, self.value, self.begin, prefix = tup + + def _raise_error(self, msg, *args): + if args: + try: + msg = msg % args + except: + msg = " ".join([msg] + list(map(str, args))) + line = self._bnf_grammar.splitlines()[self.begin[0] - 1] + raise SyntaxError(msg, ('<grammar>', self.begin[0], + self.begin[1], line)) diff --git a/contrib/python/parso/py3/parso/py.typed b/contrib/python/parso/py3/parso/py.typed new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/contrib/python/parso/py3/parso/py.typed diff --git a/contrib/python/parso/py3/parso/python/__init__.py b/contrib/python/parso/py3/parso/python/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/contrib/python/parso/py3/parso/python/__init__.py diff --git a/contrib/python/parso/py3/parso/python/diff.py b/contrib/python/parso/py3/parso/python/diff.py new file mode 100644 index 0000000000..ba999fa4b5 --- /dev/null +++ b/contrib/python/parso/py3/parso/python/diff.py @@ -0,0 +1,886 @@ +""" +The diff parser is trying to be a faster version of the normal parser by trying +to reuse the nodes of a previous pass over the same file. This is also called +incremental parsing in parser literature. The difference is mostly that with +incremental parsing you get a range that needs to be reparsed. Here we +calculate that range ourselves by using difflib. After that it's essentially +incremental parsing. + +The biggest issue of this approach is that we reuse nodes in a mutable way. The +intial design and idea is quite problematic for this parser, but it is also +pretty fast. Measurements showed that just copying nodes in Python is simply +quite a bit slower (especially for big files >3 kLOC). Therefore we did not +want to get rid of the mutable nodes, since this is usually not an issue. + +This is by far the hardest software I ever wrote, exactly because the initial +design is crappy. When you have to account for a lot of mutable state, it +creates a ton of issues that you would otherwise not have. This file took +probably 3-6 months to write, which is insane for a parser. + +There is a fuzzer in that helps test this whole thing. Please use it if you +make changes here. If you run the fuzzer like:: + + test/fuzz_diff_parser.py random -n 100000 + +you can be pretty sure that everything is still fine. I sometimes run the +fuzzer up to 24h to make sure everything is still ok. +""" +import re +import difflib +from collections import namedtuple +import logging + +from parso.utils import split_lines +from parso.python.parser import Parser +from parso.python.tree import EndMarker +from parso.python.tokenize import PythonToken, BOM_UTF8_STRING +from parso.python.token import PythonTokenTypes + +LOG = logging.getLogger(__name__) +DEBUG_DIFF_PARSER = False + +_INDENTATION_TOKENS = 'INDENT', 'ERROR_DEDENT', 'DEDENT' + +NEWLINE = PythonTokenTypes.NEWLINE +DEDENT = PythonTokenTypes.DEDENT +NAME = PythonTokenTypes.NAME +ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT +ENDMARKER = PythonTokenTypes.ENDMARKER + + +def _is_indentation_error_leaf(node): + return node.type == 'error_leaf' and node.token_type in _INDENTATION_TOKENS + + +def _get_previous_leaf_if_indentation(leaf): + while leaf and _is_indentation_error_leaf(leaf): + leaf = leaf.get_previous_leaf() + return leaf + + +def _get_next_leaf_if_indentation(leaf): + while leaf and _is_indentation_error_leaf(leaf): + leaf = leaf.get_next_leaf() + return leaf + + +def _get_suite_indentation(tree_node): + return _get_indentation(tree_node.children[1]) + + +def _get_indentation(tree_node): + return tree_node.start_pos[1] + + +def _assert_valid_graph(node): + """ + Checks if the parent/children relationship is correct. + + This is a check that only runs during debugging/testing. + """ + try: + children = node.children + except AttributeError: + # Ignore INDENT is necessary, because indent/dedent tokens don't + # contain value/prefix and are just around, because of the tokenizer. + if node.type == 'error_leaf' and node.token_type in _INDENTATION_TOKENS: + assert not node.value + assert not node.prefix + return + + # Calculate the content between two start positions. + previous_leaf = _get_previous_leaf_if_indentation(node.get_previous_leaf()) + if previous_leaf is None: + content = node.prefix + previous_start_pos = 1, 0 + else: + assert previous_leaf.end_pos <= node.start_pos, \ + (previous_leaf, node) + + content = previous_leaf.value + node.prefix + previous_start_pos = previous_leaf.start_pos + + if '\n' in content or '\r' in content: + splitted = split_lines(content) + line = previous_start_pos[0] + len(splitted) - 1 + actual = line, len(splitted[-1]) + else: + actual = previous_start_pos[0], previous_start_pos[1] + len(content) + if content.startswith(BOM_UTF8_STRING) \ + and node.get_start_pos_of_prefix() == (1, 0): + # Remove the byte order mark + actual = actual[0], actual[1] - 1 + + assert node.start_pos == actual, (node.start_pos, actual) + else: + for child in children: + assert child.parent == node, (node, child) + _assert_valid_graph(child) + + +def _assert_nodes_are_equal(node1, node2): + try: + children1 = node1.children + except AttributeError: + assert not hasattr(node2, 'children'), (node1, node2) + assert node1.value == node2.value, (node1, node2) + assert node1.type == node2.type, (node1, node2) + assert node1.prefix == node2.prefix, (node1, node2) + assert node1.start_pos == node2.start_pos, (node1, node2) + return + else: + try: + children2 = node2.children + except AttributeError: + assert False, (node1, node2) + for n1, n2 in zip(children1, children2): + _assert_nodes_are_equal(n1, n2) + assert len(children1) == len(children2), '\n' + repr(children1) + '\n' + repr(children2) + + +def _get_debug_error_message(module, old_lines, new_lines): + current_lines = split_lines(module.get_code(), keepends=True) + current_diff = difflib.unified_diff(new_lines, current_lines) + old_new_diff = difflib.unified_diff(old_lines, new_lines) + import parso + return ( + "There's an issue with the diff parser. Please " + "report (parso v%s) - Old/New:\n%s\nActual Diff (May be empty):\n%s" + % (parso.__version__, ''.join(old_new_diff), ''.join(current_diff)) + ) + + +def _get_last_line(node_or_leaf): + last_leaf = node_or_leaf.get_last_leaf() + if _ends_with_newline(last_leaf): + return last_leaf.start_pos[0] + else: + n = last_leaf.get_next_leaf() + if n.type == 'endmarker' and '\n' in n.prefix: + # This is a very special case and has to do with error recovery in + # Parso. The problem is basically that there's no newline leaf at + # the end sometimes (it's required in the grammar, but not needed + # actually before endmarker, CPython just adds a newline to make + # source code pass the parser, to account for that Parso error + # recovery allows small_stmt instead of simple_stmt). + return last_leaf.end_pos[0] + 1 + return last_leaf.end_pos[0] + + +def _skip_dedent_error_leaves(leaf): + while leaf is not None and leaf.type == 'error_leaf' and leaf.token_type == 'DEDENT': + leaf = leaf.get_previous_leaf() + return leaf + + +def _ends_with_newline(leaf, suffix=''): + leaf = _skip_dedent_error_leaves(leaf) + + if leaf.type == 'error_leaf': + typ = leaf.token_type.lower() + else: + typ = leaf.type + + return typ == 'newline' or suffix.endswith('\n') or suffix.endswith('\r') + + +def _flows_finished(pgen_grammar, stack): + """ + if, while, for and try might not be finished, because another part might + still be parsed. + """ + for stack_node in stack: + if stack_node.nonterminal in ('if_stmt', 'while_stmt', 'for_stmt', 'try_stmt'): + return False + return True + + +def _func_or_class_has_suite(node): + if node.type == 'decorated': + node = node.children[-1] + if node.type in ('async_funcdef', 'async_stmt'): + node = node.children[-1] + return node.type in ('classdef', 'funcdef') and node.children[-1].type == 'suite' + + +def _suite_or_file_input_is_valid(pgen_grammar, stack): + if not _flows_finished(pgen_grammar, stack): + return False + + for stack_node in reversed(stack): + if stack_node.nonterminal == 'decorator': + # A decorator is only valid with the upcoming function. + return False + + if stack_node.nonterminal == 'suite': + # If only newline is in the suite, the suite is not valid, yet. + return len(stack_node.nodes) > 1 + # Not reaching a suite means that we're dealing with file_input levels + # where there's no need for a valid statement in it. It can also be empty. + return True + + +def _is_flow_node(node): + if node.type == 'async_stmt': + node = node.children[1] + try: + value = node.children[0].value + except AttributeError: + return False + return value in ('if', 'for', 'while', 'try', 'with') + + +class _PositionUpdatingFinished(Exception): + pass + + +def _update_positions(nodes, line_offset, last_leaf): + for node in nodes: + try: + children = node.children + except AttributeError: + # Is a leaf + node.line += line_offset + if node is last_leaf: + raise _PositionUpdatingFinished + else: + _update_positions(children, line_offset, last_leaf) + + +class DiffParser: + """ + An advanced form of parsing a file faster. Unfortunately comes with huge + side effects. It changes the given module. + """ + def __init__(self, pgen_grammar, tokenizer, module): + self._pgen_grammar = pgen_grammar + self._tokenizer = tokenizer + self._module = module + + def _reset(self): + self._copy_count = 0 + self._parser_count = 0 + + self._nodes_tree = _NodesTree(self._module) + + def update(self, old_lines, new_lines): + ''' + The algorithm works as follows: + + Equal: + - Assure that the start is a newline, otherwise parse until we get + one. + - Copy from parsed_until_line + 1 to max(i2 + 1) + - Make sure that the indentation is correct (e.g. add DEDENT) + - Add old and change positions + Insert: + - Parse from parsed_until_line + 1 to min(j2 + 1), hopefully not + much more. + + Returns the new module node. + ''' + LOG.debug('diff parser start') + # Reset the used names cache so they get regenerated. + self._module._used_names = None + + self._parser_lines_new = new_lines + + self._reset() + + line_length = len(new_lines) + sm = difflib.SequenceMatcher(None, old_lines, self._parser_lines_new) + opcodes = sm.get_opcodes() + LOG.debug('line_lengths old: %s; new: %s' % (len(old_lines), line_length)) + + for operation, i1, i2, j1, j2 in opcodes: + LOG.debug('-> code[%s] old[%s:%s] new[%s:%s]', + operation, i1 + 1, i2, j1 + 1, j2) + + if j2 == line_length and new_lines[-1] == '': + # The empty part after the last newline is not relevant. + j2 -= 1 + + if operation == 'equal': + line_offset = j1 - i1 + self._copy_from_old_parser(line_offset, i1 + 1, i2, j2) + elif operation == 'replace': + self._parse(until_line=j2) + elif operation == 'insert': + self._parse(until_line=j2) + else: + assert operation == 'delete' + + # With this action all change will finally be applied and we have a + # changed module. + self._nodes_tree.close() + + if DEBUG_DIFF_PARSER: + # If there is reasonable suspicion that the diff parser is not + # behaving well, this should be enabled. + try: + code = ''.join(new_lines) + assert self._module.get_code() == code + _assert_valid_graph(self._module) + without_diff_parser_module = Parser( + self._pgen_grammar, + error_recovery=True + ).parse(self._tokenizer(new_lines)) + _assert_nodes_are_equal(self._module, without_diff_parser_module) + except AssertionError: + print(_get_debug_error_message(self._module, old_lines, new_lines)) + raise + + last_pos = self._module.end_pos[0] + if last_pos != line_length: + raise Exception( + ('(%s != %s) ' % (last_pos, line_length)) + + _get_debug_error_message(self._module, old_lines, new_lines) + ) + LOG.debug('diff parser end') + return self._module + + def _enabled_debugging(self, old_lines, lines_new): + if self._module.get_code() != ''.join(lines_new): + LOG.warning('parser issue:\n%s\n%s', ''.join(old_lines), ''.join(lines_new)) + + def _copy_from_old_parser(self, line_offset, start_line_old, until_line_old, until_line_new): + last_until_line = -1 + while until_line_new > self._nodes_tree.parsed_until_line: + parsed_until_line_old = self._nodes_tree.parsed_until_line - line_offset + line_stmt = self._get_old_line_stmt(parsed_until_line_old + 1) + if line_stmt is None: + # Parse 1 line at least. We don't need more, because we just + # want to get into a state where the old parser has statements + # again that can be copied (e.g. not lines within parentheses). + self._parse(self._nodes_tree.parsed_until_line + 1) + else: + p_children = line_stmt.parent.children + index = p_children.index(line_stmt) + + if start_line_old == 1 \ + and p_children[0].get_first_leaf().prefix.startswith(BOM_UTF8_STRING): + # If there's a BOM in the beginning, just reparse. It's too + # complicated to account for it otherwise. + copied_nodes = [] + else: + from_ = self._nodes_tree.parsed_until_line + 1 + copied_nodes = self._nodes_tree.copy_nodes( + p_children[index:], + until_line_old, + line_offset + ) + # Match all the nodes that are in the wanted range. + if copied_nodes: + self._copy_count += 1 + + to = self._nodes_tree.parsed_until_line + + LOG.debug('copy old[%s:%s] new[%s:%s]', + copied_nodes[0].start_pos[0], + copied_nodes[-1].end_pos[0] - 1, from_, to) + else: + # We have copied as much as possible (but definitely not too + # much). Therefore we just parse a bit more. + self._parse(self._nodes_tree.parsed_until_line + 1) + # Since there are potential bugs that might loop here endlessly, we + # just stop here. + assert last_until_line != self._nodes_tree.parsed_until_line, last_until_line + last_until_line = self._nodes_tree.parsed_until_line + + def _get_old_line_stmt(self, old_line): + leaf = self._module.get_leaf_for_position((old_line, 0), include_prefixes=True) + + if _ends_with_newline(leaf): + leaf = leaf.get_next_leaf() + if leaf.get_start_pos_of_prefix()[0] == old_line: + node = leaf + while node.parent.type not in ('file_input', 'suite'): + node = node.parent + + # Make sure that if only the `else:` line of an if statement is + # copied that not the whole thing is going to be copied. + if node.start_pos[0] >= old_line: + return node + # Must be on the same line. Otherwise we need to parse that bit. + return None + + def _parse(self, until_line): + """ + Parses at least until the given line, but might just parse more until a + valid state is reached. + """ + last_until_line = 0 + while until_line > self._nodes_tree.parsed_until_line: + node = self._try_parse_part(until_line) + nodes = node.children + + self._nodes_tree.add_parsed_nodes(nodes, self._keyword_token_indents) + if self._replace_tos_indent is not None: + self._nodes_tree.indents[-1] = self._replace_tos_indent + + LOG.debug( + 'parse_part from %s to %s (to %s in part parser)', + nodes[0].get_start_pos_of_prefix()[0], + self._nodes_tree.parsed_until_line, + node.end_pos[0] - 1 + ) + # Since the tokenizer sometimes has bugs, we cannot be sure that + # this loop terminates. Therefore assert that there's always a + # change. + assert last_until_line != self._nodes_tree.parsed_until_line, last_until_line + last_until_line = self._nodes_tree.parsed_until_line + + def _try_parse_part(self, until_line): + """ + Sets up a normal parser that uses a spezialized tokenizer to only parse + until a certain position (or a bit longer if the statement hasn't + ended. + """ + self._parser_count += 1 + # TODO speed up, shouldn't copy the whole list all the time. + # memoryview? + parsed_until_line = self._nodes_tree.parsed_until_line + lines_after = self._parser_lines_new[parsed_until_line:] + tokens = self._diff_tokenize( + lines_after, + until_line, + line_offset=parsed_until_line + ) + self._active_parser = Parser( + self._pgen_grammar, + error_recovery=True + ) + return self._active_parser.parse(tokens=tokens) + + def _diff_tokenize(self, lines, until_line, line_offset=0): + was_newline = False + indents = self._nodes_tree.indents + initial_indentation_count = len(indents) + + tokens = self._tokenizer( + lines, + start_pos=(line_offset + 1, 0), + indents=indents, + is_first_token=line_offset == 0, + ) + stack = self._active_parser.stack + self._replace_tos_indent = None + self._keyword_token_indents = {} + # print('start', line_offset + 1, indents) + for token in tokens: + # print(token, indents) + typ = token.type + if typ == DEDENT: + if len(indents) < initial_indentation_count: + # We are done here, only thing that can come now is an + # endmarker or another dedented code block. + while True: + typ, string, start_pos, prefix = token = next(tokens) + if typ in (DEDENT, ERROR_DEDENT): + if typ == ERROR_DEDENT: + # We want to force an error dedent in the next + # parser/pass. To make this possible we just + # increase the location by one. + self._replace_tos_indent = start_pos[1] + 1 + pass + else: + break + + if '\n' in prefix or '\r' in prefix: + prefix = re.sub(r'[^\n\r]+\Z', '', prefix) + else: + assert start_pos[1] >= len(prefix), repr(prefix) + if start_pos[1] - len(prefix) == 0: + prefix = '' + yield PythonToken( + ENDMARKER, '', + start_pos, + prefix + ) + break + elif typ == NEWLINE and token.start_pos[0] >= until_line: + was_newline = True + elif was_newline: + was_newline = False + if len(indents) == initial_indentation_count: + # Check if the parser is actually in a valid suite state. + if _suite_or_file_input_is_valid(self._pgen_grammar, stack): + yield PythonToken(ENDMARKER, '', token.start_pos, '') + break + + if typ == NAME and token.string in ('class', 'def'): + self._keyword_token_indents[token.start_pos] = list(indents) + + yield token + + +class _NodesTreeNode: + _ChildrenGroup = namedtuple( + '_ChildrenGroup', + 'prefix children line_offset last_line_offset_leaf') + + def __init__(self, tree_node, parent=None, indentation=0): + self.tree_node = tree_node + self._children_groups = [] + self.parent = parent + self._node_children = [] + self.indentation = indentation + + def finish(self): + children = [] + for prefix, children_part, line_offset, last_line_offset_leaf in self._children_groups: + first_leaf = _get_next_leaf_if_indentation( + children_part[0].get_first_leaf() + ) + + first_leaf.prefix = prefix + first_leaf.prefix + if line_offset != 0: + try: + _update_positions( + children_part, line_offset, last_line_offset_leaf) + except _PositionUpdatingFinished: + pass + children += children_part + self.tree_node.children = children + # Reset the parents + for node in children: + node.parent = self.tree_node + + for node_child in self._node_children: + node_child.finish() + + def add_child_node(self, child_node): + self._node_children.append(child_node) + + def add_tree_nodes(self, prefix, children, line_offset=0, + last_line_offset_leaf=None): + if last_line_offset_leaf is None: + last_line_offset_leaf = children[-1].get_last_leaf() + group = self._ChildrenGroup( + prefix, children, line_offset, last_line_offset_leaf + ) + self._children_groups.append(group) + + def get_last_line(self, suffix): + line = 0 + if self._children_groups: + children_group = self._children_groups[-1] + last_leaf = _get_previous_leaf_if_indentation( + children_group.last_line_offset_leaf + ) + + line = last_leaf.end_pos[0] + children_group.line_offset + + # Newlines end on the next line, which means that they would cover + # the next line. That line is not fully parsed at this point. + if _ends_with_newline(last_leaf, suffix): + line -= 1 + line += len(split_lines(suffix)) - 1 + + if suffix and not suffix.endswith('\n') and not suffix.endswith('\r'): + # This is the end of a file (that doesn't end with a newline). + line += 1 + + if self._node_children: + return max(line, self._node_children[-1].get_last_line(suffix)) + return line + + def __repr__(self): + return '<%s: %s>' % (self.__class__.__name__, self.tree_node) + + +class _NodesTree: + def __init__(self, module): + self._base_node = _NodesTreeNode(module) + self._working_stack = [self._base_node] + self._module = module + self._prefix_remainder = '' + self.prefix = '' + self.indents = [0] + + @property + def parsed_until_line(self): + return self._working_stack[-1].get_last_line(self.prefix) + + def _update_insertion_node(self, indentation): + for node in reversed(list(self._working_stack)): + if node.indentation < indentation or node is self._working_stack[0]: + return node + self._working_stack.pop() + + def add_parsed_nodes(self, tree_nodes, keyword_token_indents): + old_prefix = self.prefix + tree_nodes = self._remove_endmarker(tree_nodes) + if not tree_nodes: + self.prefix = old_prefix + self.prefix + return + + assert tree_nodes[0].type != 'newline' + + node = self._update_insertion_node(tree_nodes[0].start_pos[1]) + assert node.tree_node.type in ('suite', 'file_input') + node.add_tree_nodes(old_prefix, tree_nodes) + # tos = Top of stack + self._update_parsed_node_tos(tree_nodes[-1], keyword_token_indents) + + def _update_parsed_node_tos(self, tree_node, keyword_token_indents): + if tree_node.type == 'suite': + def_leaf = tree_node.parent.children[0] + new_tos = _NodesTreeNode( + tree_node, + indentation=keyword_token_indents[def_leaf.start_pos][-1], + ) + new_tos.add_tree_nodes('', list(tree_node.children)) + + self._working_stack[-1].add_child_node(new_tos) + self._working_stack.append(new_tos) + + self._update_parsed_node_tos(tree_node.children[-1], keyword_token_indents) + elif _func_or_class_has_suite(tree_node): + self._update_parsed_node_tos(tree_node.children[-1], keyword_token_indents) + + def _remove_endmarker(self, tree_nodes): + """ + Helps cleaning up the tree nodes that get inserted. + """ + last_leaf = tree_nodes[-1].get_last_leaf() + is_endmarker = last_leaf.type == 'endmarker' + self._prefix_remainder = '' + if is_endmarker: + prefix = last_leaf.prefix + separation = max(prefix.rfind('\n'), prefix.rfind('\r')) + if separation > -1: + # Remove the whitespace part of the prefix after a newline. + # That is not relevant if parentheses were opened. Always parse + # until the end of a line. + last_leaf.prefix, self._prefix_remainder = \ + last_leaf.prefix[:separation + 1], last_leaf.prefix[separation + 1:] + + self.prefix = '' + + if is_endmarker: + self.prefix = last_leaf.prefix + + tree_nodes = tree_nodes[:-1] + return tree_nodes + + def _get_matching_indent_nodes(self, tree_nodes, is_new_suite): + # There might be a random dedent where we have to stop copying. + # Invalid indents are ok, because the parser handled that + # properly before. An invalid dedent can happen, because a few + # lines above there was an invalid indent. + node_iterator = iter(tree_nodes) + if is_new_suite: + yield next(node_iterator) + + first_node = next(node_iterator) + indent = _get_indentation(first_node) + if not is_new_suite and indent not in self.indents: + return + yield first_node + + for n in node_iterator: + if _get_indentation(n) != indent: + return + yield n + + def copy_nodes(self, tree_nodes, until_line, line_offset): + """ + Copies tree nodes from the old parser tree. + + Returns the number of tree nodes that were copied. + """ + if tree_nodes[0].type in ('error_leaf', 'error_node'): + # Avoid copying errors in the beginning. Can lead to a lot of + # issues. + return [] + + indentation = _get_indentation(tree_nodes[0]) + old_working_stack = list(self._working_stack) + old_prefix = self.prefix + old_indents = self.indents + self.indents = [i for i in self.indents if i <= indentation] + + self._update_insertion_node(indentation) + + new_nodes, self._working_stack, self.prefix, added_indents = self._copy_nodes( + list(self._working_stack), + tree_nodes, + until_line, + line_offset, + self.prefix, + ) + if new_nodes: + self.indents += added_indents + else: + self._working_stack = old_working_stack + self.prefix = old_prefix + self.indents = old_indents + return new_nodes + + def _copy_nodes(self, working_stack, nodes, until_line, line_offset, + prefix='', is_nested=False): + new_nodes = [] + added_indents = [] + + nodes = list(self._get_matching_indent_nodes( + nodes, + is_new_suite=is_nested, + )) + + new_prefix = '' + for node in nodes: + if node.start_pos[0] > until_line: + break + + if node.type == 'endmarker': + break + + if node.type == 'error_leaf' and node.token_type in ('DEDENT', 'ERROR_DEDENT'): + break + # TODO this check might take a bit of time for large files. We + # might want to change this to do more intelligent guessing or + # binary search. + if _get_last_line(node) > until_line: + # We can split up functions and classes later. + if _func_or_class_has_suite(node): + new_nodes.append(node) + break + try: + c = node.children + except AttributeError: + pass + else: + # This case basically appears with error recovery of one line + # suites like `def foo(): bar.-`. In this case we might not + # include a newline in the statement and we need to take care + # of that. + n = node + if n.type == 'decorated': + n = n.children[-1] + if n.type in ('async_funcdef', 'async_stmt'): + n = n.children[-1] + if n.type in ('classdef', 'funcdef'): + suite_node = n.children[-1] + else: + suite_node = c[-1] + + if suite_node.type in ('error_leaf', 'error_node'): + break + + new_nodes.append(node) + + # Pop error nodes at the end from the list + if new_nodes: + while new_nodes: + last_node = new_nodes[-1] + if (last_node.type in ('error_leaf', 'error_node') + or _is_flow_node(new_nodes[-1])): + # Error leafs/nodes don't have a defined start/end. Error + # nodes might not end with a newline (e.g. if there's an + # open `(`). Therefore ignore all of them unless they are + # succeeded with valid parser state. + # If we copy flows at the end, they might be continued + # after the copy limit (in the new parser). + # In this while loop we try to remove until we find a newline. + new_prefix = '' + new_nodes.pop() + while new_nodes: + last_node = new_nodes[-1] + if last_node.get_last_leaf().type == 'newline': + break + new_nodes.pop() + continue + if len(new_nodes) > 1 and new_nodes[-2].type == 'error_node': + # The problem here is that Parso error recovery sometimes + # influences nodes before this node. + # Since the new last node is an error node this will get + # cleaned up in the next while iteration. + new_nodes.pop() + continue + break + + if not new_nodes: + return [], working_stack, prefix, added_indents + + tos = working_stack[-1] + last_node = new_nodes[-1] + had_valid_suite_last = False + # Pop incomplete suites from the list + if _func_or_class_has_suite(last_node): + suite = last_node + while suite.type != 'suite': + suite = suite.children[-1] + + indent = _get_suite_indentation(suite) + added_indents.append(indent) + + suite_tos = _NodesTreeNode(suite, indentation=_get_indentation(last_node)) + # Don't need to pass line_offset here, it's already done by the + # parent. + suite_nodes, new_working_stack, new_prefix, ai = self._copy_nodes( + working_stack + [suite_tos], suite.children, until_line, line_offset, + is_nested=True, + ) + added_indents += ai + if len(suite_nodes) < 2: + # A suite only with newline is not valid. + new_nodes.pop() + new_prefix = '' + else: + assert new_nodes + tos.add_child_node(suite_tos) + working_stack = new_working_stack + had_valid_suite_last = True + + if new_nodes: + if not _ends_with_newline(new_nodes[-1].get_last_leaf()) and not had_valid_suite_last: + p = new_nodes[-1].get_next_leaf().prefix + # We are not allowed to remove the newline at the end of the + # line, otherwise it's going to be missing. This happens e.g. + # if a bracket is around before that moves newlines to + # prefixes. + new_prefix = split_lines(p, keepends=True)[0] + + if had_valid_suite_last: + last = new_nodes[-1] + if last.type == 'decorated': + last = last.children[-1] + if last.type in ('async_funcdef', 'async_stmt'): + last = last.children[-1] + last_line_offset_leaf = last.children[-2].get_last_leaf() + assert last_line_offset_leaf == ':' + else: + last_line_offset_leaf = new_nodes[-1].get_last_leaf() + tos.add_tree_nodes( + prefix, new_nodes, line_offset, last_line_offset_leaf, + ) + prefix = new_prefix + self._prefix_remainder = '' + + return new_nodes, working_stack, prefix, added_indents + + def close(self): + self._base_node.finish() + + # Add an endmarker. + try: + last_leaf = self._module.get_last_leaf() + except IndexError: + end_pos = [1, 0] + else: + last_leaf = _skip_dedent_error_leaves(last_leaf) + end_pos = list(last_leaf.end_pos) + lines = split_lines(self.prefix) + assert len(lines) > 0 + if len(lines) == 1: + if lines[0].startswith(BOM_UTF8_STRING) and end_pos == [1, 0]: + end_pos[1] -= 1 + end_pos[1] += len(lines[0]) + else: + end_pos[0] += len(lines) - 1 + end_pos[1] = len(lines[-1]) + + endmarker = EndMarker('', tuple(end_pos), self.prefix + self._prefix_remainder) + endmarker.parent = self._module + self._module.children.append(endmarker) diff --git a/contrib/python/parso/py3/parso/python/errors.py b/contrib/python/parso/py3/parso/python/errors.py new file mode 100644 index 0000000000..5da046ab01 --- /dev/null +++ b/contrib/python/parso/py3/parso/python/errors.py @@ -0,0 +1,1295 @@ +# -*- coding: utf-8 -*- +import codecs +import warnings +import re +from contextlib import contextmanager + +from parso.normalizer import Normalizer, NormalizerConfig, Issue, Rule +from parso.python.tokenize import _get_token_collection + +_BLOCK_STMTS = ('if_stmt', 'while_stmt', 'for_stmt', 'try_stmt', 'with_stmt') +_STAR_EXPR_PARENTS = ('testlist_star_expr', 'testlist_comp', 'exprlist') +# This is the maximal block size given by python. +_MAX_BLOCK_SIZE = 20 +_MAX_INDENT_COUNT = 100 +ALLOWED_FUTURES = ( + 'nested_scopes', 'generators', 'division', 'absolute_import', + 'with_statement', 'print_function', 'unicode_literals', 'generator_stop', +) +_COMP_FOR_TYPES = ('comp_for', 'sync_comp_for') + + +def _get_rhs_name(node, version): + type_ = node.type + if type_ == "lambdef": + return "lambda" + elif type_ == "atom": + comprehension = _get_comprehension_type(node) + first, second = node.children[:2] + if comprehension is not None: + return comprehension + elif second.type == "dictorsetmaker": + if version < (3, 8): + return "literal" + else: + if second.children[1] == ":" or second.children[0] == "**": + return "dict display" + else: + return "set display" + elif ( + first == "(" + and (second == ")" + or (len(node.children) == 3 and node.children[1].type == "testlist_comp")) + ): + return "tuple" + elif first == "(": + return _get_rhs_name(_remove_parens(node), version=version) + elif first == "[": + return "list" + elif first == "{" and second == "}": + return "dict display" + elif first == "{" and len(node.children) > 2: + return "set display" + elif type_ == "keyword": + if "yield" in node.value: + return "yield expression" + if version < (3, 8): + return "keyword" + else: + return str(node.value) + elif type_ == "operator" and node.value == "...": + return "Ellipsis" + elif type_ == "comparison": + return "comparison" + elif type_ in ("string", "number", "strings"): + return "literal" + elif type_ == "yield_expr": + return "yield expression" + elif type_ == "test": + return "conditional expression" + elif type_ in ("atom_expr", "power"): + if node.children[0] == "await": + return "await expression" + elif node.children[-1].type == "trailer": + trailer = node.children[-1] + if trailer.children[0] == "(": + return "function call" + elif trailer.children[0] == "[": + return "subscript" + elif trailer.children[0] == ".": + return "attribute" + elif ( + ("expr" in type_ and "star_expr" not in type_) # is a substring + or "_test" in type_ + or type_ in ("term", "factor") + ): + return "operator" + elif type_ == "star_expr": + return "starred" + elif type_ == "testlist_star_expr": + return "tuple" + elif type_ == "fstring": + return "f-string expression" + return type_ # shouldn't reach here + + +def _iter_stmts(scope): + """ + Iterates over all statements and splits up simple_stmt. + """ + for child in scope.children: + if child.type == 'simple_stmt': + for child2 in child.children: + if child2.type == 'newline' or child2 == ';': + continue + yield child2 + else: + yield child + + +def _get_comprehension_type(atom): + first, second = atom.children[:2] + if second.type == 'testlist_comp' and second.children[1].type in _COMP_FOR_TYPES: + if first == '[': + return 'list comprehension' + else: + return 'generator expression' + elif second.type == 'dictorsetmaker' and second.children[-1].type in _COMP_FOR_TYPES: + if second.children[1] == ':': + return 'dict comprehension' + else: + return 'set comprehension' + return None + + +def _is_future_import(import_from): + # It looks like a __future__ import that is relative is still a future + # import. That feels kind of odd, but whatever. + # if import_from.level != 0: + # return False + from_names = import_from.get_from_names() + return [n.value for n in from_names] == ['__future__'] + + +def _remove_parens(atom): + """ + Returns the inner part of an expression like `(foo)`. Also removes nested + parens. + """ + try: + children = atom.children + except AttributeError: + pass + else: + if len(children) == 3 and children[0] == '(': + return _remove_parens(atom.children[1]) + return atom + + +def _skip_parens_bottom_up(node): + """ + Returns an ancestor node of an expression, skipping all levels of parens + bottom-up. + """ + while node.parent is not None: + node = node.parent + if node.type != 'atom' or node.children[0] != '(': + return node + return None + + +def _iter_params(parent_node): + return (n for n in parent_node.children if n.type == 'param' or n.type == 'operator') + + +def _is_future_import_first(import_from): + """ + Checks if the import is the first statement of a file. + """ + found_docstring = False + for stmt in _iter_stmts(import_from.get_root_node()): + if stmt.type == 'string' and not found_docstring: + continue + found_docstring = True + + if stmt == import_from: + return True + if stmt.type == 'import_from' and _is_future_import(stmt): + continue + return False + + +def _iter_definition_exprs_from_lists(exprlist): + def check_expr(child): + if child.type == 'atom': + if child.children[0] == '(': + testlist_comp = child.children[1] + if testlist_comp.type == 'testlist_comp': + yield from _iter_definition_exprs_from_lists(testlist_comp) + return + else: + # It's a paren that doesn't do anything, like 1 + (1) + yield from check_expr(testlist_comp) + return + elif child.children[0] == '[': + yield testlist_comp + return + yield child + + if exprlist.type in _STAR_EXPR_PARENTS: + for child in exprlist.children[::2]: + yield from check_expr(child) + else: + yield from check_expr(exprlist) + + +def _get_expr_stmt_definition_exprs(expr_stmt): + exprs = [] + for list_ in expr_stmt.children[:-2:2]: + if list_.type in ('testlist_star_expr', 'testlist'): + exprs += _iter_definition_exprs_from_lists(list_) + else: + exprs.append(list_) + return exprs + + +def _get_for_stmt_definition_exprs(for_stmt): + exprlist = for_stmt.children[1] + return list(_iter_definition_exprs_from_lists(exprlist)) + + +def _is_argument_comprehension(argument): + return argument.children[1].type in _COMP_FOR_TYPES + + +def _any_fstring_error(version, node): + if version < (3, 9) or node is None: + return False + if node.type == "error_node": + return any(child.type == "fstring_start" for child in node.children) + elif node.type == "fstring": + return True + else: + return node.search_ancestor("fstring") + + +class _Context: + def __init__(self, node, add_syntax_error, parent_context=None): + self.node = node + self.blocks = [] + self.parent_context = parent_context + self._used_name_dict = {} + self._global_names = [] + self._local_params_names = [] + self._nonlocal_names = [] + self._nonlocal_names_in_subscopes = [] + self._add_syntax_error = add_syntax_error + + def is_async_funcdef(self): + # Stupidly enough async funcdefs can have two different forms, + # depending if a decorator is used or not. + return self.is_function() \ + and self.node.parent.type in ('async_funcdef', 'async_stmt') + + def is_function(self): + return self.node.type == 'funcdef' + + def add_name(self, name): + parent_type = name.parent.type + if parent_type == 'trailer': + # We are only interested in first level names. + return + + if parent_type == 'global_stmt': + self._global_names.append(name) + elif parent_type == 'nonlocal_stmt': + self._nonlocal_names.append(name) + elif parent_type == 'funcdef': + self._local_params_names.extend( + [param.name.value for param in name.parent.get_params()] + ) + else: + self._used_name_dict.setdefault(name.value, []).append(name) + + def finalize(self): + """ + Returns a list of nonlocal names that need to be part of that scope. + """ + self._analyze_names(self._global_names, 'global') + self._analyze_names(self._nonlocal_names, 'nonlocal') + + global_name_strs = {n.value: n for n in self._global_names} + for nonlocal_name in self._nonlocal_names: + try: + global_name = global_name_strs[nonlocal_name.value] + except KeyError: + continue + + message = "name '%s' is nonlocal and global" % global_name.value + if global_name.start_pos < nonlocal_name.start_pos: + error_name = global_name + else: + error_name = nonlocal_name + self._add_syntax_error(error_name, message) + + nonlocals_not_handled = [] + for nonlocal_name in self._nonlocal_names_in_subscopes: + search = nonlocal_name.value + if search in self._local_params_names: + continue + if search in global_name_strs or self.parent_context is None: + message = "no binding for nonlocal '%s' found" % nonlocal_name.value + self._add_syntax_error(nonlocal_name, message) + elif not self.is_function() or \ + nonlocal_name.value not in self._used_name_dict: + nonlocals_not_handled.append(nonlocal_name) + return self._nonlocal_names + nonlocals_not_handled + + def _analyze_names(self, globals_or_nonlocals, type_): + def raise_(message): + self._add_syntax_error(base_name, message % (base_name.value, type_)) + + params = [] + if self.node.type == 'funcdef': + params = self.node.get_params() + + for base_name in globals_or_nonlocals: + found_global_or_nonlocal = False + # Somehow Python does it the reversed way. + for name in reversed(self._used_name_dict.get(base_name.value, [])): + if name.start_pos > base_name.start_pos: + # All following names don't have to be checked. + found_global_or_nonlocal = True + + parent = name.parent + if parent.type == 'param' and parent.name == name: + # Skip those here, these definitions belong to the next + # scope. + continue + + if name.is_definition(): + if parent.type == 'expr_stmt' \ + and parent.children[1].type == 'annassign': + if found_global_or_nonlocal: + # If it's after the global the error seems to be + # placed there. + base_name = name + raise_("annotated name '%s' can't be %s") + break + else: + message = "name '%s' is assigned to before %s declaration" + else: + message = "name '%s' is used prior to %s declaration" + + if not found_global_or_nonlocal: + raise_(message) + # Only add an error for the first occurence. + break + + for param in params: + if param.name.value == base_name.value: + raise_("name '%s' is parameter and %s"), + + @contextmanager + def add_block(self, node): + self.blocks.append(node) + yield + self.blocks.pop() + + def add_context(self, node): + return _Context(node, self._add_syntax_error, parent_context=self) + + def close_child_context(self, child_context): + self._nonlocal_names_in_subscopes += child_context.finalize() + + +class ErrorFinder(Normalizer): + """ + Searches for errors in the syntax tree. + """ + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._error_dict = {} + self.version = self.grammar.version_info + + def initialize(self, node): + def create_context(node): + if node is None: + return None + + parent_context = create_context(node.parent) + if node.type in ('classdef', 'funcdef', 'file_input'): + return _Context(node, self._add_syntax_error, parent_context) + return parent_context + + self.context = create_context(node) or _Context(node, self._add_syntax_error) + self._indentation_count = 0 + + def visit(self, node): + if node.type == 'error_node': + with self.visit_node(node): + # Don't need to investigate the inners of an error node. We + # might find errors in there that should be ignored, because + # the error node itself already shows that there's an issue. + return '' + return super().visit(node) + + @contextmanager + def visit_node(self, node): + self._check_type_rules(node) + + if node.type in _BLOCK_STMTS: + with self.context.add_block(node): + if len(self.context.blocks) == _MAX_BLOCK_SIZE: + self._add_syntax_error(node, "too many statically nested blocks") + yield + return + elif node.type == 'suite': + self._indentation_count += 1 + if self._indentation_count == _MAX_INDENT_COUNT: + self._add_indentation_error(node.children[1], "too many levels of indentation") + + yield + + if node.type == 'suite': + self._indentation_count -= 1 + elif node.type in ('classdef', 'funcdef'): + context = self.context + self.context = context.parent_context + self.context.close_child_context(context) + + def visit_leaf(self, leaf): + if leaf.type == 'error_leaf': + if leaf.token_type in ('INDENT', 'ERROR_DEDENT'): + # Indents/Dedents itself never have a prefix. They are just + # "pseudo" tokens that get removed by the syntax tree later. + # Therefore in case of an error we also have to check for this. + spacing = list(leaf.get_next_leaf()._split_prefix())[-1] + if leaf.token_type == 'INDENT': + message = 'unexpected indent' + else: + message = 'unindent does not match any outer indentation level' + self._add_indentation_error(spacing, message) + else: + if leaf.value.startswith('\\'): + message = 'unexpected character after line continuation character' + else: + match = re.match('\\w{,2}("{1,3}|\'{1,3})', leaf.value) + if match is None: + message = 'invalid syntax' + if ( + self.version >= (3, 9) + and leaf.value in _get_token_collection( + self.version + ).always_break_tokens + ): + message = "f-string: " + message + else: + if len(match.group(1)) == 1: + message = 'EOL while scanning string literal' + else: + message = 'EOF while scanning triple-quoted string literal' + self._add_syntax_error(leaf, message) + return '' + elif leaf.value == ':': + parent = leaf.parent + if parent.type in ('classdef', 'funcdef'): + self.context = self.context.add_context(parent) + + # The rest is rule based. + return super().visit_leaf(leaf) + + def _add_indentation_error(self, spacing, message): + self.add_issue(spacing, 903, "IndentationError: " + message) + + def _add_syntax_error(self, node, message): + self.add_issue(node, 901, "SyntaxError: " + message) + + def add_issue(self, node, code, message): + # Overwrite the default behavior. + # Check if the issues are on the same line. + line = node.start_pos[0] + args = (code, message, node) + self._error_dict.setdefault(line, args) + + def finalize(self): + self.context.finalize() + + for code, message, node in self._error_dict.values(): + self.issues.append(Issue(node, code, message)) + + +class IndentationRule(Rule): + code = 903 + + def _get_message(self, message, node): + message = super()._get_message(message, node) + return "IndentationError: " + message + + +@ErrorFinder.register_rule(type='error_node') +class _ExpectIndentedBlock(IndentationRule): + message = 'expected an indented block' + + def get_node(self, node): + leaf = node.get_next_leaf() + return list(leaf._split_prefix())[-1] + + def is_issue(self, node): + # This is the beginning of a suite that is not indented. + return node.children[-1].type == 'newline' + + +class ErrorFinderConfig(NormalizerConfig): + normalizer_class = ErrorFinder + + +class SyntaxRule(Rule): + code = 901 + + def _get_message(self, message, node): + message = super()._get_message(message, node) + if ( + "f-string" not in message + and _any_fstring_error(self._normalizer.version, node) + ): + message = "f-string: " + message + return "SyntaxError: " + message + + +@ErrorFinder.register_rule(type='error_node') +class _InvalidSyntaxRule(SyntaxRule): + message = "invalid syntax" + fstring_message = "f-string: invalid syntax" + + def get_node(self, node): + return node.get_next_leaf() + + def is_issue(self, node): + error = node.get_next_leaf().type != 'error_leaf' + if ( + error + and _any_fstring_error(self._normalizer.version, node) + ): + self.add_issue(node, message=self.fstring_message) + else: + # Error leafs will be added later as an error. + return error + + +@ErrorFinder.register_rule(value='await') +class _AwaitOutsideAsync(SyntaxRule): + message = "'await' outside async function" + + def is_issue(self, leaf): + return not self._normalizer.context.is_async_funcdef() + + def get_error_node(self, node): + # Return the whole await statement. + return node.parent + + +@ErrorFinder.register_rule(value='break') +class _BreakOutsideLoop(SyntaxRule): + message = "'break' outside loop" + + def is_issue(self, leaf): + in_loop = False + for block in self._normalizer.context.blocks: + if block.type in ('for_stmt', 'while_stmt'): + in_loop = True + return not in_loop + + +@ErrorFinder.register_rule(value='continue') +class _ContinueChecks(SyntaxRule): + message = "'continue' not properly in loop" + message_in_finally = "'continue' not supported inside 'finally' clause" + + def is_issue(self, leaf): + in_loop = False + for block in self._normalizer.context.blocks: + if block.type in ('for_stmt', 'while_stmt'): + in_loop = True + if block.type == 'try_stmt': + last_block = block.children[-3] + if ( + last_block == "finally" + and leaf.start_pos > last_block.start_pos + and self._normalizer.version < (3, 8) + ): + self.add_issue(leaf, message=self.message_in_finally) + return False # Error already added + if not in_loop: + return True + + +@ErrorFinder.register_rule(value='from') +class _YieldFromCheck(SyntaxRule): + message = "'yield from' inside async function" + + def get_node(self, leaf): + return leaf.parent.parent # This is the actual yield statement. + + def is_issue(self, leaf): + return leaf.parent.type == 'yield_arg' \ + and self._normalizer.context.is_async_funcdef() + + +@ErrorFinder.register_rule(type='name') +class _NameChecks(SyntaxRule): + message = 'cannot assign to __debug__' + message_none = 'cannot assign to None' + + def is_issue(self, leaf): + self._normalizer.context.add_name(leaf) + + if leaf.value == '__debug__' and leaf.is_definition(): + return True + + +@ErrorFinder.register_rule(type='string') +class _StringChecks(SyntaxRule): + message = "bytes can only contain ASCII literal characters." + + def is_issue(self, leaf): + string_prefix = leaf.string_prefix.lower() + if 'b' in string_prefix \ + and any(c for c in leaf.value if ord(c) > 127): + # b'ä' + return True + + if 'r' not in string_prefix: + # Raw strings don't need to be checked if they have proper + # escaping. + + payload = leaf._get_payload() + if 'b' in string_prefix: + payload = payload.encode('utf-8') + func = codecs.escape_decode + else: + func = codecs.unicode_escape_decode + + try: + with warnings.catch_warnings(): + # The warnings from parsing strings are not relevant. + warnings.filterwarnings('ignore') + func(payload) + except UnicodeDecodeError as e: + self.add_issue(leaf, message='(unicode error) ' + str(e)) + except ValueError as e: + self.add_issue(leaf, message='(value error) ' + str(e)) + + +@ErrorFinder.register_rule(value='*') +class _StarCheck(SyntaxRule): + message = "named arguments must follow bare *" + + def is_issue(self, leaf): + params = leaf.parent + if params.type == 'parameters' and params: + after = params.children[params.children.index(leaf) + 1:] + after = [child for child in after + if child not in (',', ')') and not child.star_count] + return len(after) == 0 + + +@ErrorFinder.register_rule(value='**') +class _StarStarCheck(SyntaxRule): + # e.g. {**{} for a in [1]} + # TODO this should probably get a better end_pos including + # the next sibling of leaf. + message = "dict unpacking cannot be used in dict comprehension" + + def is_issue(self, leaf): + if leaf.parent.type == 'dictorsetmaker': + comp_for = leaf.get_next_sibling().get_next_sibling() + return comp_for is not None and comp_for.type in _COMP_FOR_TYPES + + +@ErrorFinder.register_rule(value='yield') +@ErrorFinder.register_rule(value='return') +class _ReturnAndYieldChecks(SyntaxRule): + message = "'return' with value in async generator" + message_async_yield = "'yield' inside async function" + + def get_node(self, leaf): + return leaf.parent + + def is_issue(self, leaf): + if self._normalizer.context.node.type != 'funcdef': + self.add_issue(self.get_node(leaf), message="'%s' outside function" % leaf.value) + elif self._normalizer.context.is_async_funcdef() \ + and any(self._normalizer.context.node.iter_yield_exprs()): + if leaf.value == 'return' and leaf.parent.type == 'return_stmt': + return True + + +@ErrorFinder.register_rule(type='strings') +class _BytesAndStringMix(SyntaxRule): + # e.g. 's' b'' + message = "cannot mix bytes and nonbytes literals" + + def _is_bytes_literal(self, string): + if string.type == 'fstring': + return False + return 'b' in string.string_prefix.lower() + + def is_issue(self, node): + first = node.children[0] + first_is_bytes = self._is_bytes_literal(first) + for string in node.children[1:]: + if first_is_bytes != self._is_bytes_literal(string): + return True + + +@ErrorFinder.register_rule(type='import_as_names') +class _TrailingImportComma(SyntaxRule): + # e.g. from foo import a, + message = "trailing comma not allowed without surrounding parentheses" + + def is_issue(self, node): + if node.children[-1] == ',' and node.parent.children[-1] != ')': + return True + + +@ErrorFinder.register_rule(type='import_from') +class _ImportStarInFunction(SyntaxRule): + message = "import * only allowed at module level" + + def is_issue(self, node): + return node.is_star_import() and self._normalizer.context.parent_context is not None + + +@ErrorFinder.register_rule(type='import_from') +class _FutureImportRule(SyntaxRule): + message = "from __future__ imports must occur at the beginning of the file" + + def is_issue(self, node): + if _is_future_import(node): + if not _is_future_import_first(node): + return True + + for from_name, future_name in node.get_paths(): + name = future_name.value + allowed_futures = list(ALLOWED_FUTURES) + if self._normalizer.version >= (3, 7): + allowed_futures.append('annotations') + if name == 'braces': + self.add_issue(node, message="not a chance") + elif name == 'barry_as_FLUFL': + m = "Seriously I'm not implementing this :) ~ Dave" + self.add_issue(node, message=m) + elif name not in allowed_futures: + message = "future feature %s is not defined" % name + self.add_issue(node, message=message) + + +@ErrorFinder.register_rule(type='star_expr') +class _StarExprRule(SyntaxRule): + message_iterable_unpacking = "iterable unpacking cannot be used in comprehension" + + def is_issue(self, node): + def check_delete_starred(node): + while node.parent is not None: + node = node.parent + if node.type == 'del_stmt': + return True + if node.type not in (*_STAR_EXPR_PARENTS, 'atom'): + return False + return False + + if self._normalizer.version >= (3, 9): + ancestor = node.parent + else: + ancestor = _skip_parens_bottom_up(node) + # starred expression not in tuple/list/set + if ancestor.type not in (*_STAR_EXPR_PARENTS, 'dictorsetmaker') \ + and not (ancestor.type == 'atom' and ancestor.children[0] != '('): + self.add_issue(node, message="can't use starred expression here") + return + + if check_delete_starred(node): + if self._normalizer.version >= (3, 9): + self.add_issue(node, message="cannot delete starred") + else: + self.add_issue(node, message="can't use starred expression here") + return + + if node.parent.type == 'testlist_comp': + # [*[] for a in [1]] + if node.parent.children[1].type in _COMP_FOR_TYPES: + self.add_issue(node, message=self.message_iterable_unpacking) + + +@ErrorFinder.register_rule(types=_STAR_EXPR_PARENTS) +class _StarExprParentRule(SyntaxRule): + def is_issue(self, node): + def is_definition(node, ancestor): + if ancestor is None: + return False + + type_ = ancestor.type + if type_ == 'trailer': + return False + + if type_ == 'expr_stmt': + return node.start_pos < ancestor.children[-1].start_pos + + return is_definition(node, ancestor.parent) + + if is_definition(node, node.parent): + args = [c for c in node.children if c != ','] + starred = [c for c in args if c.type == 'star_expr'] + if len(starred) > 1: + if self._normalizer.version < (3, 9): + message = "two starred expressions in assignment" + else: + message = "multiple starred expressions in assignment" + self.add_issue(starred[1], message=message) + elif starred: + count = args.index(starred[0]) + if count >= 256: + message = "too many expressions in star-unpacking assignment" + self.add_issue(starred[0], message=message) + + +@ErrorFinder.register_rule(type='annassign') +class _AnnotatorRule(SyntaxRule): + # True: int + # {}: float + message = "illegal target for annotation" + + def get_node(self, node): + return node.parent + + def is_issue(self, node): + type_ = None + lhs = node.parent.children[0] + lhs = _remove_parens(lhs) + try: + children = lhs.children + except AttributeError: + pass + else: + if ',' in children or lhs.type == 'atom' and children[0] == '(': + type_ = 'tuple' + elif lhs.type == 'atom' and children[0] == '[': + type_ = 'list' + trailer = children[-1] + + if type_ is None: + if not (lhs.type == 'name' + # subscript/attributes are allowed + or lhs.type in ('atom_expr', 'power') + and trailer.type == 'trailer' + and trailer.children[0] != '('): + return True + else: + # x, y: str + message = "only single target (not %s) can be annotated" + self.add_issue(lhs.parent, message=message % type_) + + +@ErrorFinder.register_rule(type='argument') +class _ArgumentRule(SyntaxRule): + def is_issue(self, node): + first = node.children[0] + if self._normalizer.version < (3, 8): + # a((b)=c) is valid in <3.8 + first = _remove_parens(first) + if node.children[1] == '=' and first.type != 'name': + if first.type == 'lambdef': + # f(lambda: 1=1) + if self._normalizer.version < (3, 8): + message = "lambda cannot contain assignment" + else: + message = 'expression cannot contain assignment, perhaps you meant "=="?' + else: + # f(+x=1) + if self._normalizer.version < (3, 8): + message = "keyword can't be an expression" + else: + message = 'expression cannot contain assignment, perhaps you meant "=="?' + self.add_issue(first, message=message) + + if _is_argument_comprehension(node) and node.parent.type == 'classdef': + self.add_issue(node, message='invalid syntax') + + +@ErrorFinder.register_rule(type='nonlocal_stmt') +class _NonlocalModuleLevelRule(SyntaxRule): + message = "nonlocal declaration not allowed at module level" + + def is_issue(self, node): + return self._normalizer.context.parent_context is None + + +@ErrorFinder.register_rule(type='arglist') +class _ArglistRule(SyntaxRule): + @property + def message(self): + if self._normalizer.version < (3, 7): + return "Generator expression must be parenthesized if not sole argument" + else: + return "Generator expression must be parenthesized" + + def is_issue(self, node): + arg_set = set() + kw_only = False + kw_unpacking_only = False + for argument in node.children: + if argument == ',': + continue + + if argument.type == 'argument': + first = argument.children[0] + if _is_argument_comprehension(argument) and len(node.children) >= 2: + # a(a, b for b in c) + return True + + if first in ('*', '**'): + if first == '*': + if kw_unpacking_only: + # foo(**kwargs, *args) + message = "iterable argument unpacking " \ + "follows keyword argument unpacking" + self.add_issue(argument, message=message) + else: + kw_unpacking_only = True + else: # Is a keyword argument. + kw_only = True + if first.type == 'name': + if first.value in arg_set: + # f(x=1, x=2) + message = "keyword argument repeated" + if self._normalizer.version >= (3, 9): + message += ": {}".format(first.value) + self.add_issue(first, message=message) + else: + arg_set.add(first.value) + else: + if kw_unpacking_only: + # f(**x, y) + message = "positional argument follows keyword argument unpacking" + self.add_issue(argument, message=message) + elif kw_only: + # f(x=2, y) + message = "positional argument follows keyword argument" + self.add_issue(argument, message=message) + + +@ErrorFinder.register_rule(type='parameters') +@ErrorFinder.register_rule(type='lambdef') +class _ParameterRule(SyntaxRule): + # def f(x=3, y): pass + message = "non-default argument follows default argument" + + def is_issue(self, node): + param_names = set() + default_only = False + star_seen = False + for p in _iter_params(node): + if p.type == 'operator': + if p.value == '*': + star_seen = True + default_only = False + continue + + if p.name.value in param_names: + message = "duplicate argument '%s' in function definition" + self.add_issue(p.name, message=message % p.name.value) + param_names.add(p.name.value) + + if not star_seen: + if p.default is None and not p.star_count: + if default_only: + return True + elif p.star_count: + star_seen = True + default_only = False + else: + default_only = True + + +@ErrorFinder.register_rule(type='try_stmt') +class _TryStmtRule(SyntaxRule): + message = "default 'except:' must be last" + + def is_issue(self, try_stmt): + default_except = None + for except_clause in try_stmt.children[3::3]: + if except_clause in ('else', 'finally'): + break + if except_clause == 'except': + default_except = except_clause + elif default_except is not None: + self.add_issue(default_except, message=self.message) + + +@ErrorFinder.register_rule(type='fstring') +class _FStringRule(SyntaxRule): + _fstring_grammar = None + message_expr = "f-string expression part cannot include a backslash" + message_nested = "f-string: expressions nested too deeply" + message_conversion = "f-string: invalid conversion character: expected 's', 'r', or 'a'" + + def _check_format_spec(self, format_spec, depth): + self._check_fstring_contents(format_spec.children[1:], depth) + + def _check_fstring_expr(self, fstring_expr, depth): + if depth >= 2: + self.add_issue(fstring_expr, message=self.message_nested) + + expr = fstring_expr.children[1] + if '\\' in expr.get_code(): + self.add_issue(expr, message=self.message_expr) + + children_2 = fstring_expr.children[2] + if children_2.type == 'operator' and children_2.value == '=': + conversion = fstring_expr.children[3] + else: + conversion = children_2 + if conversion.type == 'fstring_conversion': + name = conversion.children[1] + if name.value not in ('s', 'r', 'a'): + self.add_issue(name, message=self.message_conversion) + + format_spec = fstring_expr.children[-2] + if format_spec.type == 'fstring_format_spec': + self._check_format_spec(format_spec, depth + 1) + + def is_issue(self, fstring): + self._check_fstring_contents(fstring.children[1:-1]) + + def _check_fstring_contents(self, children, depth=0): + for fstring_content in children: + if fstring_content.type == 'fstring_expr': + self._check_fstring_expr(fstring_content, depth) + + +class _CheckAssignmentRule(SyntaxRule): + def _check_assignment(self, node, is_deletion=False, is_namedexpr=False, is_aug_assign=False): + error = None + type_ = node.type + if type_ == 'lambdef': + error = 'lambda' + elif type_ == 'atom': + first, second = node.children[:2] + error = _get_comprehension_type(node) + if error is None: + if second.type == 'dictorsetmaker': + if self._normalizer.version < (3, 8): + error = 'literal' + else: + if second.children[1] == ':': + error = 'dict display' + else: + error = 'set display' + elif first == "{" and second == "}": + if self._normalizer.version < (3, 8): + error = 'literal' + else: + error = "dict display" + elif first == "{" and len(node.children) > 2: + if self._normalizer.version < (3, 8): + error = 'literal' + else: + error = "set display" + elif first in ('(', '['): + if second.type == 'yield_expr': + error = 'yield expression' + elif second.type == 'testlist_comp': + # ([a, b] := [1, 2]) + # ((a, b) := [1, 2]) + if is_namedexpr: + if first == '(': + error = 'tuple' + elif first == '[': + error = 'list' + + # This is not a comprehension, they were handled + # further above. + for child in second.children[::2]: + self._check_assignment(child, is_deletion, is_namedexpr, is_aug_assign) + else: # Everything handled, must be useless brackets. + self._check_assignment(second, is_deletion, is_namedexpr, is_aug_assign) + elif type_ == 'keyword': + if node.value == "yield": + error = "yield expression" + elif self._normalizer.version < (3, 8): + error = 'keyword' + else: + error = str(node.value) + elif type_ == 'operator': + if node.value == '...': + error = 'Ellipsis' + elif type_ == 'comparison': + error = 'comparison' + elif type_ in ('string', 'number', 'strings'): + error = 'literal' + elif type_ == 'yield_expr': + # This one seems to be a slightly different warning in Python. + message = 'assignment to yield expression not possible' + self.add_issue(node, message=message) + elif type_ == 'test': + error = 'conditional expression' + elif type_ in ('atom_expr', 'power'): + if node.children[0] == 'await': + error = 'await expression' + elif node.children[-2] == '**': + error = 'operator' + else: + # Has a trailer + trailer = node.children[-1] + assert trailer.type == 'trailer' + if trailer.children[0] == '(': + error = 'function call' + elif is_namedexpr and trailer.children[0] == '[': + error = 'subscript' + elif is_namedexpr and trailer.children[0] == '.': + error = 'attribute' + elif type_ == "fstring": + if self._normalizer.version < (3, 8): + error = 'literal' + else: + error = "f-string expression" + elif type_ in ('testlist_star_expr', 'exprlist', 'testlist'): + for child in node.children[::2]: + self._check_assignment(child, is_deletion, is_namedexpr, is_aug_assign) + elif ('expr' in type_ and type_ != 'star_expr' # is a substring + or '_test' in type_ + or type_ in ('term', 'factor')): + error = 'operator' + elif type_ == "star_expr": + if is_deletion: + if self._normalizer.version >= (3, 9): + error = "starred" + else: + self.add_issue(node, message="can't use starred expression here") + else: + if self._normalizer.version >= (3, 9): + ancestor = node.parent + else: + ancestor = _skip_parens_bottom_up(node) + if ancestor.type not in _STAR_EXPR_PARENTS and not is_aug_assign \ + and not (ancestor.type == 'atom' and ancestor.children[0] == '['): + message = "starred assignment target must be in a list or tuple" + self.add_issue(node, message=message) + + self._check_assignment(node.children[1]) + + if error is not None: + if is_namedexpr: + message = 'cannot use assignment expressions with %s' % error + else: + cannot = "can't" if self._normalizer.version < (3, 8) else "cannot" + message = ' '.join([cannot, "delete" if is_deletion else "assign to", error]) + self.add_issue(node, message=message) + + +@ErrorFinder.register_rule(type='sync_comp_for') +class _CompForRule(_CheckAssignmentRule): + message = "asynchronous comprehension outside of an asynchronous function" + + def is_issue(self, node): + expr_list = node.children[1] + if expr_list.type != 'expr_list': # Already handled. + self._check_assignment(expr_list) + + return node.parent.children[0] == 'async' \ + and not self._normalizer.context.is_async_funcdef() + + +@ErrorFinder.register_rule(type='expr_stmt') +class _ExprStmtRule(_CheckAssignmentRule): + message = "illegal expression for augmented assignment" + extended_message = "'{target}' is an " + message + + def is_issue(self, node): + augassign = node.children[1] + is_aug_assign = augassign != '=' and augassign.type != 'annassign' + + if self._normalizer.version <= (3, 8) or not is_aug_assign: + for before_equal in node.children[:-2:2]: + self._check_assignment(before_equal, is_aug_assign=is_aug_assign) + + if is_aug_assign: + target = _remove_parens(node.children[0]) + # a, a[b], a.b + + if target.type == "name" or ( + target.type in ("atom_expr", "power") + and target.children[1].type == "trailer" + and target.children[-1].children[0] != "(" + ): + return False + + if self._normalizer.version <= (3, 8): + return True + else: + self.add_issue( + node, + message=self.extended_message.format( + target=_get_rhs_name(node.children[0], self._normalizer.version) + ), + ) + + +@ErrorFinder.register_rule(type='with_item') +class _WithItemRule(_CheckAssignmentRule): + def is_issue(self, with_item): + self._check_assignment(with_item.children[2]) + + +@ErrorFinder.register_rule(type='del_stmt') +class _DelStmtRule(_CheckAssignmentRule): + def is_issue(self, del_stmt): + child = del_stmt.children[1] + + if child.type != 'expr_list': # Already handled. + self._check_assignment(child, is_deletion=True) + + +@ErrorFinder.register_rule(type='expr_list') +class _ExprListRule(_CheckAssignmentRule): + def is_issue(self, expr_list): + for expr in expr_list.children[::2]: + self._check_assignment(expr) + + +@ErrorFinder.register_rule(type='for_stmt') +class _ForStmtRule(_CheckAssignmentRule): + def is_issue(self, for_stmt): + # Some of the nodes here are already used, so no else if + expr_list = for_stmt.children[1] + if expr_list.type != 'expr_list': # Already handled. + self._check_assignment(expr_list) + + +@ErrorFinder.register_rule(type='namedexpr_test') +class _NamedExprRule(_CheckAssignmentRule): + # namedexpr_test: test [':=' test] + + def is_issue(self, namedexpr_test): + # assigned name + first = namedexpr_test.children[0] + + def search_namedexpr_in_comp_for(node): + while True: + parent = node.parent + if parent is None: + return parent + if parent.type == 'sync_comp_for' and parent.children[3] == node: + return parent + node = parent + + if search_namedexpr_in_comp_for(namedexpr_test): + # [i+1 for i in (i := range(5))] + # [i+1 for i in (j := range(5))] + # [i+1 for i in (lambda: (j := range(5)))()] + message = 'assignment expression cannot be used in a comprehension iterable expression' + self.add_issue(namedexpr_test, message=message) + + # defined names + exprlist = list() + + def process_comp_for(comp_for): + if comp_for.type == 'sync_comp_for': + comp = comp_for + elif comp_for.type == 'comp_for': + comp = comp_for.children[1] + exprlist.extend(_get_for_stmt_definition_exprs(comp)) + + def search_all_comp_ancestors(node): + has_ancestors = False + while True: + node = node.search_ancestor('testlist_comp', 'dictorsetmaker') + if node is None: + break + for child in node.children: + if child.type in _COMP_FOR_TYPES: + process_comp_for(child) + has_ancestors = True + break + return has_ancestors + + # check assignment expressions in comprehensions + search_all = search_all_comp_ancestors(namedexpr_test) + if search_all: + if self._normalizer.context.node.type == 'classdef': + message = 'assignment expression within a comprehension ' \ + 'cannot be used in a class body' + self.add_issue(namedexpr_test, message=message) + + namelist = [expr.value for expr in exprlist if expr.type == 'name'] + if first.type == 'name' and first.value in namelist: + # [i := 0 for i, j in range(5)] + # [[(i := i) for j in range(5)] for i in range(5)] + # [i for i, j in range(5) if True or (i := 1)] + # [False and (i := 0) for i, j in range(5)] + message = 'assignment expression cannot rebind ' \ + 'comprehension iteration variable %r' % first.value + self.add_issue(namedexpr_test, message=message) + + self._check_assignment(first, is_namedexpr=True) diff --git a/contrib/python/parso/py3/parso/python/grammar310.txt b/contrib/python/parso/py3/parso/python/grammar310.txt new file mode 100644 index 0000000000..f092050d88 --- /dev/null +++ b/contrib/python/parso/py3/parso/python/grammar310.txt @@ -0,0 +1,169 @@ +# Grammar for Python + +# NOTE WELL: You should also follow all the steps listed at +# https://devguide.python.org/grammar/ + +# Start symbols for the grammar: +# single_input is a single interactive statement; +# file_input is a module or sequence of commands read from an input file; +# eval_input is the input for the eval() functions. +# NB: compound_stmt in single_input is followed by extra NEWLINE! +single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE +file_input: stmt* ENDMARKER +eval_input: testlist NEWLINE* ENDMARKER + +decorator: '@' namedexpr_test NEWLINE +decorators: decorator+ +decorated: decorators (classdef | funcdef | async_funcdef) + +async_funcdef: 'async' funcdef +funcdef: 'def' NAME parameters ['->' test] ':' suite + +parameters: '(' [typedargslist] ')' +typedargslist: ( + (tfpdef ['=' test] (',' tfpdef ['=' test])* ',' '/' [',' [ tfpdef ['=' test] ( + ',' tfpdef ['=' test])* ([',' [ + '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] + | '**' tfpdef [',']]]) + | '*' [tfpdef] (',' tfpdef ['=' test])* ([',' ['**' tfpdef [',']]]) + | '**' tfpdef [',']]] ) +| (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [ + '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] + | '**' tfpdef [',']]] + | '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] + | '**' tfpdef [',']) +) +tfpdef: NAME [':' test] +varargslist: vfpdef ['=' test ](',' vfpdef ['=' test])* ',' '/' [',' [ (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [ + '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [',']]] + | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [',']) ]] | (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [ + '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [',']]] + | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [','] +) +vfpdef: NAME + +stmt: simple_stmt | compound_stmt | NEWLINE +simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE +small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | + import_stmt | global_stmt | nonlocal_stmt | assert_stmt) +expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) | + ('=' (yield_expr|testlist_star_expr))*) +annassign: ':' test ['=' (yield_expr|testlist_star_expr)] +testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] +augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | + '<<=' | '>>=' | '**=' | '//=') +# For normal and annotated assignments, additional restrictions enforced by the interpreter +del_stmt: 'del' exprlist +pass_stmt: 'pass' +flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt +break_stmt: 'break' +continue_stmt: 'continue' +return_stmt: 'return' [testlist_star_expr] +yield_stmt: yield_expr +raise_stmt: 'raise' [test ['from' test]] +import_stmt: import_name | import_from +import_name: 'import' dotted_as_names +# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS +import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) + 'import' ('*' | '(' import_as_names ')' | import_as_names)) +import_as_name: NAME ['as' NAME] +dotted_as_name: dotted_name ['as' NAME] +import_as_names: import_as_name (',' import_as_name)* [','] +dotted_as_names: dotted_as_name (',' dotted_as_name)* +dotted_name: NAME ('.' NAME)* +global_stmt: 'global' NAME (',' NAME)* +nonlocal_stmt: 'nonlocal' NAME (',' NAME)* +assert_stmt: 'assert' test [',' test] + +compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt +async_stmt: 'async' (funcdef | with_stmt | for_stmt) +if_stmt: 'if' namedexpr_test ':' suite ('elif' namedexpr_test ':' suite)* ['else' ':' suite] +while_stmt: 'while' namedexpr_test ':' suite ['else' ':' suite] +for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] +try_stmt: ('try' ':' suite + ((except_clause ':' suite)+ + ['else' ':' suite] + ['finally' ':' suite] | + 'finally' ':' suite)) +with_stmt: 'with' with_item (',' with_item)* ':' suite +with_item: test ['as' expr] +# NB compile.c makes sure that the default except clause is last +except_clause: 'except' [test ['as' NAME]] +suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT + +namedexpr_test: test [':=' test] +test: or_test ['if' or_test 'else' test] | lambdef +lambdef: 'lambda' [varargslist] ':' test +or_test: and_test ('or' and_test)* +and_test: not_test ('and' not_test)* +not_test: 'not' not_test | comparison +comparison: expr (comp_op expr)* +# <> isn't actually a valid comparison operator in Python. It's here for the +# sake of a __future__ import described in PEP 401 (which really works :-) +comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' +star_expr: '*' expr +expr: xor_expr ('|' xor_expr)* +xor_expr: and_expr ('^' and_expr)* +and_expr: shift_expr ('&' shift_expr)* +shift_expr: arith_expr (('<<'|'>>') arith_expr)* +arith_expr: term (('+'|'-') term)* +term: factor (('*'|'@'|'/'|'%'|'//') factor)* +factor: ('+'|'-'|'~') factor | power +power: atom_expr ['**' factor] +atom_expr: ['await'] atom trailer* +atom: ('(' [yield_expr|testlist_comp] ')' | + '[' [testlist_comp] ']' | + '{' [dictorsetmaker] '}' | + NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False') +testlist_comp: (namedexpr_test|star_expr) ( comp_for | (',' (namedexpr_test|star_expr))* [','] ) +trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME +subscriptlist: subscript (',' subscript)* [','] +subscript: test [':=' test] | [test] ':' [test] [sliceop] +sliceop: ':' [test] +exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] +testlist: test (',' test)* [','] +dictorsetmaker: ( ((test ':' test | '**' expr) + (comp_for | (',' (test ':' test | '**' expr))* [','])) | + ((test [':=' test] | star_expr) + (comp_for | (',' (test [':=' test] | star_expr))* [','])) ) + +classdef: 'class' NAME ['(' [arglist] ')'] ':' suite + +arglist: argument (',' argument)* [','] + +# The reason that keywords are test nodes instead of NAME is that using NAME +# results in an ambiguity. ast.c makes sure it's a NAME. +# "test '=' test" is really "keyword '=' test", but we have no such token. +# These need to be in a single rule to avoid grammar that is ambiguous +# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr, +# we explicitly match '*' here, too, to give it proper precedence. +# Illegal combinations and orderings are blocked in ast.c: +# multiple (test comp_for) arguments are blocked; keyword unpackings +# that precede iterable unpackings are blocked; etc. +argument: ( test [comp_for] | + test ':=' test | + test '=' test | + '**' test | + '*' test ) + +comp_iter: comp_for | comp_if +sync_comp_for: 'for' exprlist 'in' or_test [comp_iter] +comp_for: ['async'] sync_comp_for +comp_if: 'if' or_test [comp_iter] + +# not used in grammar, but may appear in "node" passed from Parser to Compiler +encoding_decl: NAME + +yield_expr: 'yield' [yield_arg] +yield_arg: 'from' test | testlist_star_expr + +strings: (STRING | fstring)+ +fstring: FSTRING_START fstring_content* FSTRING_END +fstring_content: FSTRING_STRING | fstring_expr +fstring_conversion: '!' NAME +fstring_expr: '{' (testlist_comp | yield_expr) ['='] [ fstring_conversion ] [ fstring_format_spec ] '}' +fstring_format_spec: ':' fstring_content* diff --git a/contrib/python/parso/py3/parso/python/grammar311.txt b/contrib/python/parso/py3/parso/python/grammar311.txt new file mode 100644 index 0000000000..f092050d88 --- /dev/null +++ b/contrib/python/parso/py3/parso/python/grammar311.txt @@ -0,0 +1,169 @@ +# Grammar for Python + +# NOTE WELL: You should also follow all the steps listed at +# https://devguide.python.org/grammar/ + +# Start symbols for the grammar: +# single_input is a single interactive statement; +# file_input is a module or sequence of commands read from an input file; +# eval_input is the input for the eval() functions. +# NB: compound_stmt in single_input is followed by extra NEWLINE! +single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE +file_input: stmt* ENDMARKER +eval_input: testlist NEWLINE* ENDMARKER + +decorator: '@' namedexpr_test NEWLINE +decorators: decorator+ +decorated: decorators (classdef | funcdef | async_funcdef) + +async_funcdef: 'async' funcdef +funcdef: 'def' NAME parameters ['->' test] ':' suite + +parameters: '(' [typedargslist] ')' +typedargslist: ( + (tfpdef ['=' test] (',' tfpdef ['=' test])* ',' '/' [',' [ tfpdef ['=' test] ( + ',' tfpdef ['=' test])* ([',' [ + '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] + | '**' tfpdef [',']]]) + | '*' [tfpdef] (',' tfpdef ['=' test])* ([',' ['**' tfpdef [',']]]) + | '**' tfpdef [',']]] ) +| (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [ + '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] + | '**' tfpdef [',']]] + | '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] + | '**' tfpdef [',']) +) +tfpdef: NAME [':' test] +varargslist: vfpdef ['=' test ](',' vfpdef ['=' test])* ',' '/' [',' [ (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [ + '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [',']]] + | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [',']) ]] | (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [ + '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [',']]] + | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [','] +) +vfpdef: NAME + +stmt: simple_stmt | compound_stmt | NEWLINE +simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE +small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | + import_stmt | global_stmt | nonlocal_stmt | assert_stmt) +expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) | + ('=' (yield_expr|testlist_star_expr))*) +annassign: ':' test ['=' (yield_expr|testlist_star_expr)] +testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] +augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | + '<<=' | '>>=' | '**=' | '//=') +# For normal and annotated assignments, additional restrictions enforced by the interpreter +del_stmt: 'del' exprlist +pass_stmt: 'pass' +flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt +break_stmt: 'break' +continue_stmt: 'continue' +return_stmt: 'return' [testlist_star_expr] +yield_stmt: yield_expr +raise_stmt: 'raise' [test ['from' test]] +import_stmt: import_name | import_from +import_name: 'import' dotted_as_names +# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS +import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) + 'import' ('*' | '(' import_as_names ')' | import_as_names)) +import_as_name: NAME ['as' NAME] +dotted_as_name: dotted_name ['as' NAME] +import_as_names: import_as_name (',' import_as_name)* [','] +dotted_as_names: dotted_as_name (',' dotted_as_name)* +dotted_name: NAME ('.' NAME)* +global_stmt: 'global' NAME (',' NAME)* +nonlocal_stmt: 'nonlocal' NAME (',' NAME)* +assert_stmt: 'assert' test [',' test] + +compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt +async_stmt: 'async' (funcdef | with_stmt | for_stmt) +if_stmt: 'if' namedexpr_test ':' suite ('elif' namedexpr_test ':' suite)* ['else' ':' suite] +while_stmt: 'while' namedexpr_test ':' suite ['else' ':' suite] +for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] +try_stmt: ('try' ':' suite + ((except_clause ':' suite)+ + ['else' ':' suite] + ['finally' ':' suite] | + 'finally' ':' suite)) +with_stmt: 'with' with_item (',' with_item)* ':' suite +with_item: test ['as' expr] +# NB compile.c makes sure that the default except clause is last +except_clause: 'except' [test ['as' NAME]] +suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT + +namedexpr_test: test [':=' test] +test: or_test ['if' or_test 'else' test] | lambdef +lambdef: 'lambda' [varargslist] ':' test +or_test: and_test ('or' and_test)* +and_test: not_test ('and' not_test)* +not_test: 'not' not_test | comparison +comparison: expr (comp_op expr)* +# <> isn't actually a valid comparison operator in Python. It's here for the +# sake of a __future__ import described in PEP 401 (which really works :-) +comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' +star_expr: '*' expr +expr: xor_expr ('|' xor_expr)* +xor_expr: and_expr ('^' and_expr)* +and_expr: shift_expr ('&' shift_expr)* +shift_expr: arith_expr (('<<'|'>>') arith_expr)* +arith_expr: term (('+'|'-') term)* +term: factor (('*'|'@'|'/'|'%'|'//') factor)* +factor: ('+'|'-'|'~') factor | power +power: atom_expr ['**' factor] +atom_expr: ['await'] atom trailer* +atom: ('(' [yield_expr|testlist_comp] ')' | + '[' [testlist_comp] ']' | + '{' [dictorsetmaker] '}' | + NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False') +testlist_comp: (namedexpr_test|star_expr) ( comp_for | (',' (namedexpr_test|star_expr))* [','] ) +trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME +subscriptlist: subscript (',' subscript)* [','] +subscript: test [':=' test] | [test] ':' [test] [sliceop] +sliceop: ':' [test] +exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] +testlist: test (',' test)* [','] +dictorsetmaker: ( ((test ':' test | '**' expr) + (comp_for | (',' (test ':' test | '**' expr))* [','])) | + ((test [':=' test] | star_expr) + (comp_for | (',' (test [':=' test] | star_expr))* [','])) ) + +classdef: 'class' NAME ['(' [arglist] ')'] ':' suite + +arglist: argument (',' argument)* [','] + +# The reason that keywords are test nodes instead of NAME is that using NAME +# results in an ambiguity. ast.c makes sure it's a NAME. +# "test '=' test" is really "keyword '=' test", but we have no such token. +# These need to be in a single rule to avoid grammar that is ambiguous +# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr, +# we explicitly match '*' here, too, to give it proper precedence. +# Illegal combinations and orderings are blocked in ast.c: +# multiple (test comp_for) arguments are blocked; keyword unpackings +# that precede iterable unpackings are blocked; etc. +argument: ( test [comp_for] | + test ':=' test | + test '=' test | + '**' test | + '*' test ) + +comp_iter: comp_for | comp_if +sync_comp_for: 'for' exprlist 'in' or_test [comp_iter] +comp_for: ['async'] sync_comp_for +comp_if: 'if' or_test [comp_iter] + +# not used in grammar, but may appear in "node" passed from Parser to Compiler +encoding_decl: NAME + +yield_expr: 'yield' [yield_arg] +yield_arg: 'from' test | testlist_star_expr + +strings: (STRING | fstring)+ +fstring: FSTRING_START fstring_content* FSTRING_END +fstring_content: FSTRING_STRING | fstring_expr +fstring_conversion: '!' NAME +fstring_expr: '{' (testlist_comp | yield_expr) ['='] [ fstring_conversion ] [ fstring_format_spec ] '}' +fstring_format_spec: ':' fstring_content* diff --git a/contrib/python/parso/py3/parso/python/grammar312.txt b/contrib/python/parso/py3/parso/python/grammar312.txt new file mode 100644 index 0000000000..f092050d88 --- /dev/null +++ b/contrib/python/parso/py3/parso/python/grammar312.txt @@ -0,0 +1,169 @@ +# Grammar for Python + +# NOTE WELL: You should also follow all the steps listed at +# https://devguide.python.org/grammar/ + +# Start symbols for the grammar: +# single_input is a single interactive statement; +# file_input is a module or sequence of commands read from an input file; +# eval_input is the input for the eval() functions. +# NB: compound_stmt in single_input is followed by extra NEWLINE! +single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE +file_input: stmt* ENDMARKER +eval_input: testlist NEWLINE* ENDMARKER + +decorator: '@' namedexpr_test NEWLINE +decorators: decorator+ +decorated: decorators (classdef | funcdef | async_funcdef) + +async_funcdef: 'async' funcdef +funcdef: 'def' NAME parameters ['->' test] ':' suite + +parameters: '(' [typedargslist] ')' +typedargslist: ( + (tfpdef ['=' test] (',' tfpdef ['=' test])* ',' '/' [',' [ tfpdef ['=' test] ( + ',' tfpdef ['=' test])* ([',' [ + '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] + | '**' tfpdef [',']]]) + | '*' [tfpdef] (',' tfpdef ['=' test])* ([',' ['**' tfpdef [',']]]) + | '**' tfpdef [',']]] ) +| (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [ + '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] + | '**' tfpdef [',']]] + | '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] + | '**' tfpdef [',']) +) +tfpdef: NAME [':' test] +varargslist: vfpdef ['=' test ](',' vfpdef ['=' test])* ',' '/' [',' [ (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [ + '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [',']]] + | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [',']) ]] | (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [ + '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [',']]] + | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [','] +) +vfpdef: NAME + +stmt: simple_stmt | compound_stmt | NEWLINE +simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE +small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | + import_stmt | global_stmt | nonlocal_stmt | assert_stmt) +expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) | + ('=' (yield_expr|testlist_star_expr))*) +annassign: ':' test ['=' (yield_expr|testlist_star_expr)] +testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] +augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | + '<<=' | '>>=' | '**=' | '//=') +# For normal and annotated assignments, additional restrictions enforced by the interpreter +del_stmt: 'del' exprlist +pass_stmt: 'pass' +flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt +break_stmt: 'break' +continue_stmt: 'continue' +return_stmt: 'return' [testlist_star_expr] +yield_stmt: yield_expr +raise_stmt: 'raise' [test ['from' test]] +import_stmt: import_name | import_from +import_name: 'import' dotted_as_names +# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS +import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) + 'import' ('*' | '(' import_as_names ')' | import_as_names)) +import_as_name: NAME ['as' NAME] +dotted_as_name: dotted_name ['as' NAME] +import_as_names: import_as_name (',' import_as_name)* [','] +dotted_as_names: dotted_as_name (',' dotted_as_name)* +dotted_name: NAME ('.' NAME)* +global_stmt: 'global' NAME (',' NAME)* +nonlocal_stmt: 'nonlocal' NAME (',' NAME)* +assert_stmt: 'assert' test [',' test] + +compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt +async_stmt: 'async' (funcdef | with_stmt | for_stmt) +if_stmt: 'if' namedexpr_test ':' suite ('elif' namedexpr_test ':' suite)* ['else' ':' suite] +while_stmt: 'while' namedexpr_test ':' suite ['else' ':' suite] +for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] +try_stmt: ('try' ':' suite + ((except_clause ':' suite)+ + ['else' ':' suite] + ['finally' ':' suite] | + 'finally' ':' suite)) +with_stmt: 'with' with_item (',' with_item)* ':' suite +with_item: test ['as' expr] +# NB compile.c makes sure that the default except clause is last +except_clause: 'except' [test ['as' NAME]] +suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT + +namedexpr_test: test [':=' test] +test: or_test ['if' or_test 'else' test] | lambdef +lambdef: 'lambda' [varargslist] ':' test +or_test: and_test ('or' and_test)* +and_test: not_test ('and' not_test)* +not_test: 'not' not_test | comparison +comparison: expr (comp_op expr)* +# <> isn't actually a valid comparison operator in Python. It's here for the +# sake of a __future__ import described in PEP 401 (which really works :-) +comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' +star_expr: '*' expr +expr: xor_expr ('|' xor_expr)* +xor_expr: and_expr ('^' and_expr)* +and_expr: shift_expr ('&' shift_expr)* +shift_expr: arith_expr (('<<'|'>>') arith_expr)* +arith_expr: term (('+'|'-') term)* +term: factor (('*'|'@'|'/'|'%'|'//') factor)* +factor: ('+'|'-'|'~') factor | power +power: atom_expr ['**' factor] +atom_expr: ['await'] atom trailer* +atom: ('(' [yield_expr|testlist_comp] ')' | + '[' [testlist_comp] ']' | + '{' [dictorsetmaker] '}' | + NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False') +testlist_comp: (namedexpr_test|star_expr) ( comp_for | (',' (namedexpr_test|star_expr))* [','] ) +trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME +subscriptlist: subscript (',' subscript)* [','] +subscript: test [':=' test] | [test] ':' [test] [sliceop] +sliceop: ':' [test] +exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] +testlist: test (',' test)* [','] +dictorsetmaker: ( ((test ':' test | '**' expr) + (comp_for | (',' (test ':' test | '**' expr))* [','])) | + ((test [':=' test] | star_expr) + (comp_for | (',' (test [':=' test] | star_expr))* [','])) ) + +classdef: 'class' NAME ['(' [arglist] ')'] ':' suite + +arglist: argument (',' argument)* [','] + +# The reason that keywords are test nodes instead of NAME is that using NAME +# results in an ambiguity. ast.c makes sure it's a NAME. +# "test '=' test" is really "keyword '=' test", but we have no such token. +# These need to be in a single rule to avoid grammar that is ambiguous +# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr, +# we explicitly match '*' here, too, to give it proper precedence. +# Illegal combinations and orderings are blocked in ast.c: +# multiple (test comp_for) arguments are blocked; keyword unpackings +# that precede iterable unpackings are blocked; etc. +argument: ( test [comp_for] | + test ':=' test | + test '=' test | + '**' test | + '*' test ) + +comp_iter: comp_for | comp_if +sync_comp_for: 'for' exprlist 'in' or_test [comp_iter] +comp_for: ['async'] sync_comp_for +comp_if: 'if' or_test [comp_iter] + +# not used in grammar, but may appear in "node" passed from Parser to Compiler +encoding_decl: NAME + +yield_expr: 'yield' [yield_arg] +yield_arg: 'from' test | testlist_star_expr + +strings: (STRING | fstring)+ +fstring: FSTRING_START fstring_content* FSTRING_END +fstring_content: FSTRING_STRING | fstring_expr +fstring_conversion: '!' NAME +fstring_expr: '{' (testlist_comp | yield_expr) ['='] [ fstring_conversion ] [ fstring_format_spec ] '}' +fstring_format_spec: ':' fstring_content* diff --git a/contrib/python/parso/py3/parso/python/grammar36.txt b/contrib/python/parso/py3/parso/python/grammar36.txt new file mode 100644 index 0000000000..e79620668d --- /dev/null +++ b/contrib/python/parso/py3/parso/python/grammar36.txt @@ -0,0 +1,158 @@ +# Grammar for Python + +# NOTE WELL: You should also follow all the steps listed at +# https://docs.python.org/devguide/grammar.html + +# Start symbols for the grammar: +# single_input is a single interactive statement; +# file_input is a module or sequence of commands read from an input file; +# eval_input is the input for the eval() functions. +# NB: compound_stmt in single_input is followed by extra NEWLINE! +single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE +file_input: stmt* ENDMARKER +eval_input: testlist NEWLINE* ENDMARKER +decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE +decorators: decorator+ +decorated: decorators (classdef | funcdef | async_funcdef) + +# NOTE: Francisco Souza/Reinoud Elhorst, using ASYNC/'await' keywords instead of +# skipping python3.5+ compatibility, in favour of 3.7 solution +async_funcdef: 'async' funcdef +funcdef: 'def' NAME parameters ['->' test] ':' suite + +parameters: '(' [typedargslist] ')' +typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [ + '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] + | '**' tfpdef [',']]] + | '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] + | '**' tfpdef [',']) +tfpdef: NAME [':' test] +varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [ + '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [',']]] + | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [','] +) +vfpdef: NAME + +stmt: simple_stmt | compound_stmt | NEWLINE +simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE +small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | + import_stmt | global_stmt | nonlocal_stmt | assert_stmt) +expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) | + ('=' (yield_expr|testlist_star_expr))*) +annassign: ':' test ['=' test] +testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] +augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | + '<<=' | '>>=' | '**=' | '//=') +# For normal and annotated assignments, additional restrictions enforced by the interpreter +del_stmt: 'del' exprlist +pass_stmt: 'pass' +flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt +break_stmt: 'break' +continue_stmt: 'continue' +return_stmt: 'return' [testlist] +yield_stmt: yield_expr +raise_stmt: 'raise' [test ['from' test]] +import_stmt: import_name | import_from +import_name: 'import' dotted_as_names +# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS +import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) + 'import' ('*' | '(' import_as_names ')' | import_as_names)) +import_as_name: NAME ['as' NAME] +dotted_as_name: dotted_name ['as' NAME] +import_as_names: import_as_name (',' import_as_name)* [','] +dotted_as_names: dotted_as_name (',' dotted_as_name)* +dotted_name: NAME ('.' NAME)* +global_stmt: 'global' NAME (',' NAME)* +nonlocal_stmt: 'nonlocal' NAME (',' NAME)* +assert_stmt: 'assert' test [',' test] + +compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt +async_stmt: 'async' (funcdef | with_stmt | for_stmt) +if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] +while_stmt: 'while' test ':' suite ['else' ':' suite] +for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] +try_stmt: ('try' ':' suite + ((except_clause ':' suite)+ + ['else' ':' suite] + ['finally' ':' suite] | + 'finally' ':' suite)) +with_stmt: 'with' with_item (',' with_item)* ':' suite +with_item: test ['as' expr] +# NB compile.c makes sure that the default except clause is last +except_clause: 'except' [test ['as' NAME]] +suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT + +test: or_test ['if' or_test 'else' test] | lambdef +test_nocond: or_test | lambdef_nocond +lambdef: 'lambda' [varargslist] ':' test +lambdef_nocond: 'lambda' [varargslist] ':' test_nocond +or_test: and_test ('or' and_test)* +and_test: not_test ('and' not_test)* +not_test: 'not' not_test | comparison +comparison: expr (comp_op expr)* +# <> isn't actually a valid comparison operator in Python. It's here for the +# sake of a __future__ import described in PEP 401 (which really works :-) +comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' +star_expr: '*' expr +expr: xor_expr ('|' xor_expr)* +xor_expr: and_expr ('^' and_expr)* +and_expr: shift_expr ('&' shift_expr)* +shift_expr: arith_expr (('<<'|'>>') arith_expr)* +arith_expr: term (('+'|'-') term)* +term: factor (('*'|'@'|'/'|'%'|'//') factor)* +factor: ('+'|'-'|'~') factor | power +power: atom_expr ['**' factor] +atom_expr: ['await'] atom trailer* +atom: ('(' [yield_expr|testlist_comp] ')' | + '[' [testlist_comp] ']' | + '{' [dictorsetmaker] '}' | + NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False') +testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) +trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME +subscriptlist: subscript (',' subscript)* [','] +subscript: test | [test] ':' [test] [sliceop] +sliceop: ':' [test] +exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] +testlist: test (',' test)* [','] +dictorsetmaker: ( ((test ':' test | '**' expr) + (comp_for | (',' (test ':' test | '**' expr))* [','])) | + ((test | star_expr) + (comp_for | (',' (test | star_expr))* [','])) ) + +classdef: 'class' NAME ['(' [arglist] ')'] ':' suite + +arglist: argument (',' argument)* [','] + +# The reason that keywords are test nodes instead of NAME is that using NAME +# results in an ambiguity. ast.c makes sure it's a NAME. +# "test '=' test" is really "keyword '=' test", but we have no such token. +# These need to be in a single rule to avoid grammar that is ambiguous +# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr, +# we explicitly match '*' here, too, to give it proper precedence. +# Illegal combinations and orderings are blocked in ast.c: +# multiple (test comp_for) arguments are blocked; keyword unpackings +# that precede iterable unpackings are blocked; etc. +argument: ( test [comp_for] | + test '=' test | + '**' test | + '*' test ) + +comp_iter: comp_for | comp_if +sync_comp_for: 'for' exprlist 'in' or_test [comp_iter] +comp_for: ['async'] sync_comp_for +comp_if: 'if' test_nocond [comp_iter] + +# not used in grammar, but may appear in "node" passed from Parser to Compiler +encoding_decl: NAME + +yield_expr: 'yield' [yield_arg] +yield_arg: 'from' test | testlist + +strings: (STRING | fstring)+ +fstring: FSTRING_START fstring_content* FSTRING_END +fstring_content: FSTRING_STRING | fstring_expr +fstring_conversion: '!' NAME +fstring_expr: '{' (testlist_comp | yield_expr) [ fstring_conversion ] [ fstring_format_spec ] '}' +fstring_format_spec: ':' fstring_content* diff --git a/contrib/python/parso/py3/parso/python/grammar37.txt b/contrib/python/parso/py3/parso/python/grammar37.txt new file mode 100644 index 0000000000..f4a929fe98 --- /dev/null +++ b/contrib/python/parso/py3/parso/python/grammar37.txt @@ -0,0 +1,156 @@ +# Grammar for Python + +# NOTE WELL: You should also follow all the steps listed at +# https://docs.python.org/devguide/grammar.html + +# Start symbols for the grammar: +# single_input is a single interactive statement; +# file_input is a module or sequence of commands read from an input file; +# eval_input is the input for the eval() functions. +# NB: compound_stmt in single_input is followed by extra NEWLINE! +single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE +file_input: stmt* ENDMARKER +eval_input: testlist NEWLINE* ENDMARKER +decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE +decorators: decorator+ +decorated: decorators (classdef | funcdef | async_funcdef) + +async_funcdef: 'async' funcdef +funcdef: 'def' NAME parameters ['->' test] ':' suite + +parameters: '(' [typedargslist] ')' +typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [ + '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] + | '**' tfpdef [',']]] + | '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] + | '**' tfpdef [',']) +tfpdef: NAME [':' test] +varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [ + '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [',']]] + | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [','] +) +vfpdef: NAME + +stmt: simple_stmt | compound_stmt | NEWLINE +simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE +small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | + import_stmt | global_stmt | nonlocal_stmt | assert_stmt) +expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) | + ('=' (yield_expr|testlist_star_expr))*) +annassign: ':' test ['=' test] +testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] +augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | + '<<=' | '>>=' | '**=' | '//=') +# For normal and annotated assignments, additional restrictions enforced by the interpreter +del_stmt: 'del' exprlist +pass_stmt: 'pass' +flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt +break_stmt: 'break' +continue_stmt: 'continue' +return_stmt: 'return' [testlist] +yield_stmt: yield_expr +raise_stmt: 'raise' [test ['from' test]] +import_stmt: import_name | import_from +import_name: 'import' dotted_as_names +# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS +import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) + 'import' ('*' | '(' import_as_names ')' | import_as_names)) +import_as_name: NAME ['as' NAME] +dotted_as_name: dotted_name ['as' NAME] +import_as_names: import_as_name (',' import_as_name)* [','] +dotted_as_names: dotted_as_name (',' dotted_as_name)* +dotted_name: NAME ('.' NAME)* +global_stmt: 'global' NAME (',' NAME)* +nonlocal_stmt: 'nonlocal' NAME (',' NAME)* +assert_stmt: 'assert' test [',' test] + +compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt +async_stmt: 'async' (funcdef | with_stmt | for_stmt) +if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] +while_stmt: 'while' test ':' suite ['else' ':' suite] +for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] +try_stmt: ('try' ':' suite + ((except_clause ':' suite)+ + ['else' ':' suite] + ['finally' ':' suite] | + 'finally' ':' suite)) +with_stmt: 'with' with_item (',' with_item)* ':' suite +with_item: test ['as' expr] +# NB compile.c makes sure that the default except clause is last +except_clause: 'except' [test ['as' NAME]] +suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT + +test: or_test ['if' or_test 'else' test] | lambdef +test_nocond: or_test | lambdef_nocond +lambdef: 'lambda' [varargslist] ':' test +lambdef_nocond: 'lambda' [varargslist] ':' test_nocond +or_test: and_test ('or' and_test)* +and_test: not_test ('and' not_test)* +not_test: 'not' not_test | comparison +comparison: expr (comp_op expr)* +# <> isn't actually a valid comparison operator in Python. It's here for the +# sake of a __future__ import described in PEP 401 (which really works :-) +comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' +star_expr: '*' expr +expr: xor_expr ('|' xor_expr)* +xor_expr: and_expr ('^' and_expr)* +and_expr: shift_expr ('&' shift_expr)* +shift_expr: arith_expr (('<<'|'>>') arith_expr)* +arith_expr: term (('+'|'-') term)* +term: factor (('*'|'@'|'/'|'%'|'//') factor)* +factor: ('+'|'-'|'~') factor | power +power: atom_expr ['**' factor] +atom_expr: ['await'] atom trailer* +atom: ('(' [yield_expr|testlist_comp] ')' | + '[' [testlist_comp] ']' | + '{' [dictorsetmaker] '}' | + NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False') +testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) +trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME +subscriptlist: subscript (',' subscript)* [','] +subscript: test | [test] ':' [test] [sliceop] +sliceop: ':' [test] +exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] +testlist: test (',' test)* [','] +dictorsetmaker: ( ((test ':' test | '**' expr) + (comp_for | (',' (test ':' test | '**' expr))* [','])) | + ((test | star_expr) + (comp_for | (',' (test | star_expr))* [','])) ) + +classdef: 'class' NAME ['(' [arglist] ')'] ':' suite + +arglist: argument (',' argument)* [','] + +# The reason that keywords are test nodes instead of NAME is that using NAME +# results in an ambiguity. ast.c makes sure it's a NAME. +# "test '=' test" is really "keyword '=' test", but we have no such token. +# These need to be in a single rule to avoid grammar that is ambiguous +# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr, +# we explicitly match '*' here, too, to give it proper precedence. +# Illegal combinations and orderings are blocked in ast.c: +# multiple (test comp_for) arguments are blocked; keyword unpackings +# that precede iterable unpackings are blocked; etc. +argument: ( test [comp_for] | + test '=' test | + '**' test | + '*' test ) + +comp_iter: comp_for | comp_if +sync_comp_for: 'for' exprlist 'in' or_test [comp_iter] +comp_for: ['async'] sync_comp_for +comp_if: 'if' test_nocond [comp_iter] + +# not used in grammar, but may appear in "node" passed from Parser to Compiler +encoding_decl: NAME + +yield_expr: 'yield' [yield_arg] +yield_arg: 'from' test | testlist + +strings: (STRING | fstring)+ +fstring: FSTRING_START fstring_content* FSTRING_END +fstring_content: FSTRING_STRING | fstring_expr +fstring_conversion: '!' NAME +fstring_expr: '{' (testlist_comp | yield_expr) [ fstring_conversion ] [ fstring_format_spec ] '}' +fstring_format_spec: ':' fstring_content* diff --git a/contrib/python/parso/py3/parso/python/grammar38.txt b/contrib/python/parso/py3/parso/python/grammar38.txt new file mode 100644 index 0000000000..7288d556f9 --- /dev/null +++ b/contrib/python/parso/py3/parso/python/grammar38.txt @@ -0,0 +1,171 @@ +# Grammar for Python + +# NOTE WELL: You should also follow all the steps listed at +# https://devguide.python.org/grammar/ + +# Start symbols for the grammar: +# single_input is a single interactive statement; +# file_input is a module or sequence of commands read from an input file; +# eval_input is the input for the eval() functions. +# NB: compound_stmt in single_input is followed by extra NEWLINE! +single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE +file_input: stmt* ENDMARKER +eval_input: testlist NEWLINE* ENDMARKER + +decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE +decorators: decorator+ +decorated: decorators (classdef | funcdef | async_funcdef) + +async_funcdef: 'async' funcdef +funcdef: 'def' NAME parameters ['->' test] ':' suite + +parameters: '(' [typedargslist] ')' +typedargslist: ( + (tfpdef ['=' test] (',' tfpdef ['=' test])* ',' '/' [',' [ tfpdef ['=' test] ( + ',' tfpdef ['=' test])* ([',' [ + '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] + | '**' tfpdef [',']]]) + | '*' [tfpdef] (',' tfpdef ['=' test])* ([',' ['**' tfpdef [',']]]) + | '**' tfpdef [',']]] ) +| (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [ + '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] + | '**' tfpdef [',']]] + | '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] + | '**' tfpdef [',']) +) +tfpdef: NAME [':' test] +varargslist: vfpdef ['=' test ](',' vfpdef ['=' test])* ',' '/' [',' [ (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [ + '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [',']]] + | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [',']) ]] | (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [ + '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [',']]] + | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [','] +) +vfpdef: NAME + +stmt: simple_stmt | compound_stmt | NEWLINE +simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE +small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | + import_stmt | global_stmt | nonlocal_stmt | assert_stmt) +expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) | + ('=' (yield_expr|testlist_star_expr))*) +annassign: ':' test ['=' (yield_expr|testlist_star_expr)] +testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] +augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | + '<<=' | '>>=' | '**=' | '//=') +# For normal and annotated assignments, additional restrictions enforced by the interpreter +del_stmt: 'del' exprlist +pass_stmt: 'pass' +flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt +break_stmt: 'break' +continue_stmt: 'continue' +return_stmt: 'return' [testlist_star_expr] +yield_stmt: yield_expr +raise_stmt: 'raise' [test ['from' test]] +import_stmt: import_name | import_from +import_name: 'import' dotted_as_names +# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS +import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) + 'import' ('*' | '(' import_as_names ')' | import_as_names)) +import_as_name: NAME ['as' NAME] +dotted_as_name: dotted_name ['as' NAME] +import_as_names: import_as_name (',' import_as_name)* [','] +dotted_as_names: dotted_as_name (',' dotted_as_name)* +dotted_name: NAME ('.' NAME)* +global_stmt: 'global' NAME (',' NAME)* +nonlocal_stmt: 'nonlocal' NAME (',' NAME)* +assert_stmt: 'assert' test [',' test] + +compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt +async_stmt: 'async' (funcdef | with_stmt | for_stmt) +if_stmt: 'if' namedexpr_test ':' suite ('elif' namedexpr_test ':' suite)* ['else' ':' suite] +while_stmt: 'while' namedexpr_test ':' suite ['else' ':' suite] +for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] +try_stmt: ('try' ':' suite + ((except_clause ':' suite)+ + ['else' ':' suite] + ['finally' ':' suite] | + 'finally' ':' suite)) +with_stmt: 'with' with_item (',' with_item)* ':' suite +with_item: test ['as' expr] +# NB compile.c makes sure that the default except clause is last +except_clause: 'except' [test ['as' NAME]] +suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT + +namedexpr_test: test [':=' test] +test: or_test ['if' or_test 'else' test] | lambdef +test_nocond: or_test | lambdef_nocond +lambdef: 'lambda' [varargslist] ':' test +lambdef_nocond: 'lambda' [varargslist] ':' test_nocond +or_test: and_test ('or' and_test)* +and_test: not_test ('and' not_test)* +not_test: 'not' not_test | comparison +comparison: expr (comp_op expr)* +# <> isn't actually a valid comparison operator in Python. It's here for the +# sake of a __future__ import described in PEP 401 (which really works :-) +comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' +star_expr: '*' expr +expr: xor_expr ('|' xor_expr)* +xor_expr: and_expr ('^' and_expr)* +and_expr: shift_expr ('&' shift_expr)* +shift_expr: arith_expr (('<<'|'>>') arith_expr)* +arith_expr: term (('+'|'-') term)* +term: factor (('*'|'@'|'/'|'%'|'//') factor)* +factor: ('+'|'-'|'~') factor | power +power: atom_expr ['**' factor] +atom_expr: ['await'] atom trailer* +atom: ('(' [yield_expr|testlist_comp] ')' | + '[' [testlist_comp] ']' | + '{' [dictorsetmaker] '}' | + NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False') +testlist_comp: (namedexpr_test|star_expr) ( comp_for | (',' (namedexpr_test|star_expr))* [','] ) +trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME +subscriptlist: subscript (',' subscript)* [','] +subscript: test | [test] ':' [test] [sliceop] +sliceop: ':' [test] +exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] +testlist: test (',' test)* [','] +dictorsetmaker: ( ((test ':' test | '**' expr) + (comp_for | (',' (test ':' test | '**' expr))* [','])) | + ((test | star_expr) + (comp_for | (',' (test | star_expr))* [','])) ) + +classdef: 'class' NAME ['(' [arglist] ')'] ':' suite + +arglist: argument (',' argument)* [','] + +# The reason that keywords are test nodes instead of NAME is that using NAME +# results in an ambiguity. ast.c makes sure it's a NAME. +# "test '=' test" is really "keyword '=' test", but we have no such token. +# These need to be in a single rule to avoid grammar that is ambiguous +# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr, +# we explicitly match '*' here, too, to give it proper precedence. +# Illegal combinations and orderings are blocked in ast.c: +# multiple (test comp_for) arguments are blocked; keyword unpackings +# that precede iterable unpackings are blocked; etc. +argument: ( test [comp_for] | + test ':=' test | + test '=' test | + '**' test | + '*' test ) + +comp_iter: comp_for | comp_if +sync_comp_for: 'for' exprlist 'in' or_test [comp_iter] +comp_for: ['async'] sync_comp_for +comp_if: 'if' test_nocond [comp_iter] + +# not used in grammar, but may appear in "node" passed from Parser to Compiler +encoding_decl: NAME + +yield_expr: 'yield' [yield_arg] +yield_arg: 'from' test | testlist_star_expr + +strings: (STRING | fstring)+ +fstring: FSTRING_START fstring_content* FSTRING_END +fstring_content: FSTRING_STRING | fstring_expr +fstring_conversion: '!' NAME +fstring_expr: '{' (testlist_comp | yield_expr) ['='] [ fstring_conversion ] [ fstring_format_spec ] '}' +fstring_format_spec: ':' fstring_content* diff --git a/contrib/python/parso/py3/parso/python/grammar39.txt b/contrib/python/parso/py3/parso/python/grammar39.txt new file mode 100644 index 0000000000..ae46033cf3 --- /dev/null +++ b/contrib/python/parso/py3/parso/python/grammar39.txt @@ -0,0 +1,169 @@ +# Grammar for Python + +# NOTE WELL: You should also follow all the steps listed at +# https://devguide.python.org/grammar/ + +# Start symbols for the grammar: +# single_input is a single interactive statement; +# file_input is a module or sequence of commands read from an input file; +# eval_input is the input for the eval() functions. +# NB: compound_stmt in single_input is followed by extra NEWLINE! +single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE +file_input: stmt* ENDMARKER +eval_input: testlist NEWLINE* ENDMARKER + +decorator: '@' namedexpr_test NEWLINE +decorators: decorator+ +decorated: decorators (classdef | funcdef | async_funcdef) + +async_funcdef: 'async' funcdef +funcdef: 'def' NAME parameters ['->' test] ':' suite + +parameters: '(' [typedargslist] ')' +typedargslist: ( + (tfpdef ['=' test] (',' tfpdef ['=' test])* ',' '/' [',' [ tfpdef ['=' test] ( + ',' tfpdef ['=' test])* ([',' [ + '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] + | '**' tfpdef [',']]]) + | '*' [tfpdef] (',' tfpdef ['=' test])* ([',' ['**' tfpdef [',']]]) + | '**' tfpdef [',']]] ) +| (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [ + '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] + | '**' tfpdef [',']]] + | '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] + | '**' tfpdef [',']) +) +tfpdef: NAME [':' test] +varargslist: vfpdef ['=' test ](',' vfpdef ['=' test])* ',' '/' [',' [ (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [ + '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [',']]] + | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [',']) ]] | (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [ + '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [',']]] + | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] + | '**' vfpdef [','] +) +vfpdef: NAME + +stmt: simple_stmt | compound_stmt | NEWLINE +simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE +small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | + import_stmt | global_stmt | nonlocal_stmt | assert_stmt) +expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) | + ('=' (yield_expr|testlist_star_expr))*) +annassign: ':' test ['=' (yield_expr|testlist_star_expr)] +testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] +augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | + '<<=' | '>>=' | '**=' | '//=') +# For normal and annotated assignments, additional restrictions enforced by the interpreter +del_stmt: 'del' exprlist +pass_stmt: 'pass' +flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt +break_stmt: 'break' +continue_stmt: 'continue' +return_stmt: 'return' [testlist_star_expr] +yield_stmt: yield_expr +raise_stmt: 'raise' [test ['from' test]] +import_stmt: import_name | import_from +import_name: 'import' dotted_as_names +# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS +import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) + 'import' ('*' | '(' import_as_names ')' | import_as_names)) +import_as_name: NAME ['as' NAME] +dotted_as_name: dotted_name ['as' NAME] +import_as_names: import_as_name (',' import_as_name)* [','] +dotted_as_names: dotted_as_name (',' dotted_as_name)* +dotted_name: NAME ('.' NAME)* +global_stmt: 'global' NAME (',' NAME)* +nonlocal_stmt: 'nonlocal' NAME (',' NAME)* +assert_stmt: 'assert' test [',' test] + +compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt +async_stmt: 'async' (funcdef | with_stmt | for_stmt) +if_stmt: 'if' namedexpr_test ':' suite ('elif' namedexpr_test ':' suite)* ['else' ':' suite] +while_stmt: 'while' namedexpr_test ':' suite ['else' ':' suite] +for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] +try_stmt: ('try' ':' suite + ((except_clause ':' suite)+ + ['else' ':' suite] + ['finally' ':' suite] | + 'finally' ':' suite)) +with_stmt: 'with' with_item (',' with_item)* ':' suite +with_item: test ['as' expr] +# NB compile.c makes sure that the default except clause is last +except_clause: 'except' [test ['as' NAME]] +suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT + +namedexpr_test: test [':=' test] +test: or_test ['if' or_test 'else' test] | lambdef +lambdef: 'lambda' [varargslist] ':' test +or_test: and_test ('or' and_test)* +and_test: not_test ('and' not_test)* +not_test: 'not' not_test | comparison +comparison: expr (comp_op expr)* +# <> isn't actually a valid comparison operator in Python. It's here for the +# sake of a __future__ import described in PEP 401 (which really works :-) +comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' +star_expr: '*' expr +expr: xor_expr ('|' xor_expr)* +xor_expr: and_expr ('^' and_expr)* +and_expr: shift_expr ('&' shift_expr)* +shift_expr: arith_expr (('<<'|'>>') arith_expr)* +arith_expr: term (('+'|'-') term)* +term: factor (('*'|'@'|'/'|'%'|'//') factor)* +factor: ('+'|'-'|'~') factor | power +power: atom_expr ['**' factor] +atom_expr: ['await'] atom trailer* +atom: ('(' [yield_expr|testlist_comp] ')' | + '[' [testlist_comp] ']' | + '{' [dictorsetmaker] '}' | + NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False') +testlist_comp: (namedexpr_test|star_expr) ( comp_for | (',' (namedexpr_test|star_expr))* [','] ) +trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME +subscriptlist: subscript (',' subscript)* [','] +subscript: test | [test] ':' [test] [sliceop] +sliceop: ':' [test] +exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] +testlist: test (',' test)* [','] +dictorsetmaker: ( ((test ':' test | '**' expr) + (comp_for | (',' (test ':' test | '**' expr))* [','])) | + ((test [':=' test] | star_expr) + (comp_for | (',' (test [':=' test] | star_expr))* [','])) ) + +classdef: 'class' NAME ['(' [arglist] ')'] ':' suite + +arglist: argument (',' argument)* [','] + +# The reason that keywords are test nodes instead of NAME is that using NAME +# results in an ambiguity. ast.c makes sure it's a NAME. +# "test '=' test" is really "keyword '=' test", but we have no such token. +# These need to be in a single rule to avoid grammar that is ambiguous +# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr, +# we explicitly match '*' here, too, to give it proper precedence. +# Illegal combinations and orderings are blocked in ast.c: +# multiple (test comp_for) arguments are blocked; keyword unpackings +# that precede iterable unpackings are blocked; etc. +argument: ( test [comp_for] | + test ':=' test | + test '=' test | + '**' test | + '*' test ) + +comp_iter: comp_for | comp_if +sync_comp_for: 'for' exprlist 'in' or_test [comp_iter] +comp_for: ['async'] sync_comp_for +comp_if: 'if' or_test [comp_iter] + +# not used in grammar, but may appear in "node" passed from Parser to Compiler +encoding_decl: NAME + +yield_expr: 'yield' [yield_arg] +yield_arg: 'from' test | testlist_star_expr + +strings: (STRING | fstring)+ +fstring: FSTRING_START fstring_content* FSTRING_END +fstring_content: FSTRING_STRING | fstring_expr +fstring_conversion: '!' NAME +fstring_expr: '{' (testlist_comp | yield_expr) ['='] [ fstring_conversion ] [ fstring_format_spec ] '}' +fstring_format_spec: ':' fstring_content* diff --git a/contrib/python/parso/py3/parso/python/parser.py b/contrib/python/parso/py3/parso/python/parser.py new file mode 100644 index 0000000000..fa45e8b153 --- /dev/null +++ b/contrib/python/parso/py3/parso/python/parser.py @@ -0,0 +1,206 @@ +from parso.python import tree +from parso.python.token import PythonTokenTypes +from parso.parser import BaseParser + + +NAME = PythonTokenTypes.NAME +INDENT = PythonTokenTypes.INDENT +DEDENT = PythonTokenTypes.DEDENT + + +class Parser(BaseParser): + """ + This class is used to parse a Python file, it then divides them into a + class structure of different scopes. + + :param pgen_grammar: The grammar object of pgen2. Loaded by load_grammar. + """ + + node_map = { + 'expr_stmt': tree.ExprStmt, + 'classdef': tree.Class, + 'funcdef': tree.Function, + 'file_input': tree.Module, + 'import_name': tree.ImportName, + 'import_from': tree.ImportFrom, + 'break_stmt': tree.KeywordStatement, + 'continue_stmt': tree.KeywordStatement, + 'return_stmt': tree.ReturnStmt, + 'raise_stmt': tree.KeywordStatement, + 'yield_expr': tree.YieldExpr, + 'del_stmt': tree.KeywordStatement, + 'pass_stmt': tree.KeywordStatement, + 'global_stmt': tree.GlobalStmt, + 'nonlocal_stmt': tree.KeywordStatement, + 'print_stmt': tree.KeywordStatement, + 'assert_stmt': tree.AssertStmt, + 'if_stmt': tree.IfStmt, + 'with_stmt': tree.WithStmt, + 'for_stmt': tree.ForStmt, + 'while_stmt': tree.WhileStmt, + 'try_stmt': tree.TryStmt, + 'sync_comp_for': tree.SyncCompFor, + # Not sure if this is the best idea, but IMO it's the easiest way to + # avoid extreme amounts of work around the subtle difference of 2/3 + # grammar in list comoprehensions. + 'decorator': tree.Decorator, + 'lambdef': tree.Lambda, + 'lambdef_nocond': tree.Lambda, + 'namedexpr_test': tree.NamedExpr, + } + default_node = tree.PythonNode + + # Names/Keywords are handled separately + _leaf_map = { + PythonTokenTypes.STRING: tree.String, + PythonTokenTypes.NUMBER: tree.Number, + PythonTokenTypes.NEWLINE: tree.Newline, + PythonTokenTypes.ENDMARKER: tree.EndMarker, + PythonTokenTypes.FSTRING_STRING: tree.FStringString, + PythonTokenTypes.FSTRING_START: tree.FStringStart, + PythonTokenTypes.FSTRING_END: tree.FStringEnd, + } + + def __init__(self, pgen_grammar, error_recovery=True, start_nonterminal='file_input'): + super().__init__(pgen_grammar, start_nonterminal, + error_recovery=error_recovery) + + self.syntax_errors = [] + self._omit_dedent_list = [] + self._indent_counter = 0 + + def parse(self, tokens): + if self._error_recovery: + if self._start_nonterminal != 'file_input': + raise NotImplementedError + + tokens = self._recovery_tokenize(tokens) + + return super().parse(tokens) + + def convert_node(self, nonterminal, children): + """ + Convert raw node information to a PythonBaseNode instance. + + This is passed to the parser driver which calls it whenever a reduction of a + grammar rule produces a new complete node, so that the tree is build + strictly bottom-up. + """ + try: + node = self.node_map[nonterminal](children) + except KeyError: + if nonterminal == 'suite': + # We don't want the INDENT/DEDENT in our parser tree. Those + # leaves are just cancer. They are virtual leaves and not real + # ones and therefore have pseudo start/end positions and no + # prefixes. Just ignore them. + children = [children[0]] + children[2:-1] + node = self.default_node(nonterminal, children) + return node + + def convert_leaf(self, type, value, prefix, start_pos): + # print('leaf', repr(value), token.tok_name[type]) + if type == NAME: + if value in self._pgen_grammar.reserved_syntax_strings: + return tree.Keyword(value, start_pos, prefix) + else: + return tree.Name(value, start_pos, prefix) + + return self._leaf_map.get(type, tree.Operator)(value, start_pos, prefix) + + def error_recovery(self, token): + tos_nodes = self.stack[-1].nodes + if tos_nodes: + last_leaf = tos_nodes[-1].get_last_leaf() + else: + last_leaf = None + + if self._start_nonterminal == 'file_input' and \ + (token.type == PythonTokenTypes.ENDMARKER + or token.type == DEDENT and not last_leaf.value.endswith('\n') + and not last_leaf.value.endswith('\r')): + # In Python statements need to end with a newline. But since it's + # possible (and valid in Python) that there's no newline at the + # end of a file, we have to recover even if the user doesn't want + # error recovery. + if self.stack[-1].dfa.from_rule == 'simple_stmt': + try: + plan = self.stack[-1].dfa.transitions[PythonTokenTypes.NEWLINE] + except KeyError: + pass + else: + if plan.next_dfa.is_final and not plan.dfa_pushes: + # We are ignoring here that the newline would be + # required for a simple_stmt. + self.stack[-1].dfa = plan.next_dfa + self._add_token(token) + return + + if not self._error_recovery: + return super().error_recovery(token) + + def current_suite(stack): + # For now just discard everything that is not a suite or + # file_input, if we detect an error. + for until_index, stack_node in reversed(list(enumerate(stack))): + # `suite` can sometimes be only simple_stmt, not stmt. + if stack_node.nonterminal == 'file_input': + break + elif stack_node.nonterminal == 'suite': + # In the case where we just have a newline we don't want to + # do error recovery here. In all other cases, we want to do + # error recovery. + if len(stack_node.nodes) != 1: + break + return until_index + + until_index = current_suite(self.stack) + + if self._stack_removal(until_index + 1): + self._add_token(token) + else: + typ, value, start_pos, prefix = token + if typ == INDENT: + # For every deleted INDENT we have to delete a DEDENT as well. + # Otherwise the parser will get into trouble and DEDENT too early. + self._omit_dedent_list.append(self._indent_counter) + + error_leaf = tree.PythonErrorLeaf(typ.name, value, start_pos, prefix) + self.stack[-1].nodes.append(error_leaf) + + tos = self.stack[-1] + if tos.nonterminal == 'suite': + # Need at least one statement in the suite. This happend with the + # error recovery above. + try: + tos.dfa = tos.dfa.arcs['stmt'] + except KeyError: + # We're already in a final state. + pass + + def _stack_removal(self, start_index): + all_nodes = [node for stack_node in self.stack[start_index:] for node in stack_node.nodes] + + if all_nodes: + node = tree.PythonErrorNode(all_nodes) + self.stack[start_index - 1].nodes.append(node) + + self.stack[start_index:] = [] + return bool(all_nodes) + + def _recovery_tokenize(self, tokens): + for token in tokens: + typ = token[0] + if typ == DEDENT: + # We need to count indents, because if we just omit any DEDENT, + # we might omit them in the wrong place. + o = self._omit_dedent_list + if o and o[-1] == self._indent_counter: + o.pop() + self._indent_counter -= 1 + continue + + self._indent_counter -= 1 + elif typ == INDENT: + self._indent_counter += 1 + yield token diff --git a/contrib/python/parso/py3/parso/python/pep8.py b/contrib/python/parso/py3/parso/python/pep8.py new file mode 100644 index 0000000000..c492dae682 --- /dev/null +++ b/contrib/python/parso/py3/parso/python/pep8.py @@ -0,0 +1,767 @@ +import re +from contextlib import contextmanager +from typing import Tuple + +from parso.python.errors import ErrorFinder, ErrorFinderConfig +from parso.normalizer import Rule +from parso.python.tree import Flow, Scope + + +_IMPORT_TYPES = ('import_name', 'import_from') +_SUITE_INTRODUCERS = ('classdef', 'funcdef', 'if_stmt', 'while_stmt', + 'for_stmt', 'try_stmt', 'with_stmt') +_NON_STAR_TYPES = ('term', 'import_from', 'power') +_OPENING_BRACKETS = '(', '[', '{' +_CLOSING_BRACKETS = ')', ']', '}' +_FACTOR = '+', '-', '~' +_ALLOW_SPACE = '*', '+', '-', '**', '/', '//', '@' +_BITWISE_OPERATOR = '<<', '>>', '|', '&', '^' +_NEEDS_SPACE: Tuple[str, ...] = ( + '=', '%', '->', + '<', '>', '==', '>=', '<=', '<>', '!=', + '+=', '-=', '*=', '@=', '/=', '%=', '&=', '|=', '^=', '<<=', + '>>=', '**=', '//=') +_NEEDS_SPACE += _BITWISE_OPERATOR +_IMPLICIT_INDENTATION_TYPES = ('dictorsetmaker', 'argument') +_POSSIBLE_SLICE_PARENTS = ('subscript', 'subscriptlist', 'sliceop') + + +class IndentationTypes: + VERTICAL_BRACKET = object() + HANGING_BRACKET = object() + BACKSLASH = object() + SUITE = object() + IMPLICIT = object() + + +class IndentationNode(object): + type = IndentationTypes.SUITE + + def __init__(self, config, indentation, parent=None): + self.bracket_indentation = self.indentation = indentation + self.parent = parent + + def __repr__(self): + return '<%s>' % self.__class__.__name__ + + def get_latest_suite_node(self): + n = self + while n is not None: + if n.type == IndentationTypes.SUITE: + return n + + n = n.parent + + +class BracketNode(IndentationNode): + def __init__(self, config, leaf, parent, in_suite_introducer=False): + self.leaf = leaf + + # Figure out here what the indentation is. For chained brackets + # we can basically use the previous indentation. + previous_leaf = leaf + n = parent + if n.type == IndentationTypes.IMPLICIT: + n = n.parent + while True: + if hasattr(n, 'leaf') and previous_leaf.line != n.leaf.line: + break + + previous_leaf = previous_leaf.get_previous_leaf() + if not isinstance(n, BracketNode) or previous_leaf != n.leaf: + break + n = n.parent + parent_indentation = n.indentation + + next_leaf = leaf.get_next_leaf() + if '\n' in next_leaf.prefix or '\r' in next_leaf.prefix: + # This implies code like: + # foobarbaz( + # a, + # b, + # ) + self.bracket_indentation = parent_indentation \ + + config.closing_bracket_hanging_indentation + self.indentation = parent_indentation + config.indentation + self.type = IndentationTypes.HANGING_BRACKET + else: + # Implies code like: + # foobarbaz( + # a, + # b, + # ) + expected_end_indent = leaf.end_pos[1] + if '\t' in config.indentation: + self.indentation = None + else: + self.indentation = ' ' * expected_end_indent + self.bracket_indentation = self.indentation + self.type = IndentationTypes.VERTICAL_BRACKET + + if in_suite_introducer and parent.type == IndentationTypes.SUITE \ + and self.indentation == parent_indentation + config.indentation: + self.indentation += config.indentation + # The closing bracket should have the same indentation. + self.bracket_indentation = self.indentation + self.parent = parent + + +class ImplicitNode(BracketNode): + """ + Implicit indentation after keyword arguments, default arguments, + annotations and dict values. + """ + def __init__(self, config, leaf, parent): + super().__init__(config, leaf, parent) + self.type = IndentationTypes.IMPLICIT + + next_leaf = leaf.get_next_leaf() + if leaf == ':' and '\n' not in next_leaf.prefix and '\r' not in next_leaf.prefix: + self.indentation += ' ' + + +class BackslashNode(IndentationNode): + type = IndentationTypes.BACKSLASH + + def __init__(self, config, parent_indentation, containing_leaf, spacing, parent=None): + expr_stmt = containing_leaf.search_ancestor('expr_stmt') + if expr_stmt is not None: + equals = expr_stmt.children[-2] + + if '\t' in config.indentation: + # TODO unite with the code of BracketNode + self.indentation = None + else: + # If the backslash follows the equals, use normal indentation + # otherwise it should align with the equals. + if equals.end_pos == spacing.start_pos: + self.indentation = parent_indentation + config.indentation + else: + # +1 because there is a space. + self.indentation = ' ' * (equals.end_pos[1] + 1) + else: + self.indentation = parent_indentation + config.indentation + self.bracket_indentation = self.indentation + self.parent = parent + + +def _is_magic_name(name): + return name.value.startswith('__') and name.value.endswith('__') + + +class PEP8Normalizer(ErrorFinder): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._previous_part = None + self._previous_leaf = None + self._on_newline = True + self._newline_count = 0 + self._wanted_newline_count = None + self._max_new_lines_in_prefix = 0 + self._new_statement = True + self._implicit_indentation_possible = False + # The top of stack of the indentation nodes. + self._indentation_tos = self._last_indentation_tos = \ + IndentationNode(self._config, indentation='') + self._in_suite_introducer = False + + if ' ' in self._config.indentation: + self._indentation_type = 'spaces' + self._wrong_indentation_char = '\t' + else: + self._indentation_type = 'tabs' + self._wrong_indentation_char = ' ' + + @contextmanager + def visit_node(self, node): + with super().visit_node(node): + with self._visit_node(node): + yield + + @contextmanager + def _visit_node(self, node): + typ = node.type + + if typ in 'import_name': + names = node.get_defined_names() + if len(names) > 1: + for name in names[:1]: + self.add_issue(name, 401, 'Multiple imports on one line') + elif typ == 'lambdef': + expr_stmt = node.parent + # Check if it's simply defining a single name, not something like + # foo.bar or x[1], where using a lambda could make more sense. + if expr_stmt.type == 'expr_stmt' and any(n.type == 'name' + for n in expr_stmt.children[:-2:2]): + self.add_issue(node, 731, 'Do not assign a lambda expression, use a def') + elif typ == 'try_stmt': + for child in node.children: + # Here we can simply check if it's an except, because otherwise + # it would be an except_clause. + if child.type == 'keyword' and child.value == 'except': + self.add_issue(child, 722, 'Do not use bare except, specify exception instead') + elif typ == 'comparison': + for child in node.children: + if child.type not in ('atom_expr', 'power'): + continue + if len(child.children) > 2: + continue + trailer = child.children[1] + atom = child.children[0] + if trailer.type == 'trailer' and atom.type == 'name' \ + and atom.value == 'type': + self.add_issue(node, 721, "Do not compare types, use 'isinstance()") + break + elif typ == 'file_input': + endmarker = node.children[-1] + prev = endmarker.get_previous_leaf() + prefix = endmarker.prefix + if (not prefix.endswith('\n') and not prefix.endswith('\r') and ( + prefix or prev is None or prev.value not in {'\n', '\r\n', '\r'})): + self.add_issue(endmarker, 292, "No newline at end of file") + + if typ in _IMPORT_TYPES: + simple_stmt = node.parent + module = simple_stmt.parent + if module.type == 'file_input': + index = module.children.index(simple_stmt) + for child in module.children[:index]: + children = [child] + if child.type == 'simple_stmt': + # Remove the newline. + children = child.children[:-1] + + found_docstring = False + for c in children: + if c.type == 'string' and not found_docstring: + continue + found_docstring = True + + if c.type == 'expr_stmt' and \ + all(_is_magic_name(n) for n in c.get_defined_names()): + continue + + if c.type in _IMPORT_TYPES or isinstance(c, Flow): + continue + + self.add_issue(node, 402, 'Module level import not at top of file') + break + else: + continue + break + + implicit_indentation_possible = typ in _IMPLICIT_INDENTATION_TYPES + in_introducer = typ in _SUITE_INTRODUCERS + if in_introducer: + self._in_suite_introducer = True + elif typ == 'suite': + if self._indentation_tos.type == IndentationTypes.BACKSLASH: + self._indentation_tos = self._indentation_tos.parent + + self._indentation_tos = IndentationNode( + self._config, + self._indentation_tos.indentation + self._config.indentation, + parent=self._indentation_tos + ) + elif implicit_indentation_possible: + self._implicit_indentation_possible = True + yield + if typ == 'suite': + assert self._indentation_tos.type == IndentationTypes.SUITE + self._indentation_tos = self._indentation_tos.parent + # If we dedent, no lines are needed anymore. + self._wanted_newline_count = None + elif implicit_indentation_possible: + self._implicit_indentation_possible = False + if self._indentation_tos.type == IndentationTypes.IMPLICIT: + self._indentation_tos = self._indentation_tos.parent + elif in_introducer: + self._in_suite_introducer = False + if typ in ('classdef', 'funcdef'): + self._wanted_newline_count = self._get_wanted_blank_lines_count() + + def _check_tabs_spaces(self, spacing): + if self._wrong_indentation_char in spacing.value: + self.add_issue(spacing, 101, 'Indentation contains ' + self._indentation_type) + return True + return False + + def _get_wanted_blank_lines_count(self): + suite_node = self._indentation_tos.get_latest_suite_node() + return int(suite_node.parent is None) + 1 + + def _reset_newlines(self, spacing, leaf, is_comment=False): + self._max_new_lines_in_prefix = \ + max(self._max_new_lines_in_prefix, self._newline_count) + + wanted = self._wanted_newline_count + if wanted is not None: + # Need to substract one + blank_lines = self._newline_count - 1 + if wanted > blank_lines and leaf.type != 'endmarker': + # In case of a comment we don't need to add the issue, yet. + if not is_comment: + # TODO end_pos wrong. + code = 302 if wanted == 2 else 301 + message = "expected %s blank line, found %s" \ + % (wanted, blank_lines) + self.add_issue(spacing, code, message) + self._wanted_newline_count = None + else: + self._wanted_newline_count = None + + if not is_comment: + wanted = self._get_wanted_blank_lines_count() + actual = self._max_new_lines_in_prefix - 1 + + val = leaf.value + needs_lines = ( + val == '@' and leaf.parent.type == 'decorator' + or ( + val == 'class' + or val == 'async' and leaf.get_next_leaf() == 'def' + or val == 'def' and self._previous_leaf != 'async' + ) and leaf.parent.parent.type != 'decorated' + ) + if needs_lines and actual < wanted: + func_or_cls = leaf.parent + suite = func_or_cls.parent + if suite.type == 'decorated': + suite = suite.parent + + # The first leaf of a file or a suite should not need blank + # lines. + if suite.children[int(suite.type == 'suite')] != func_or_cls: + code = 302 if wanted == 2 else 301 + message = "expected %s blank line, found %s" \ + % (wanted, actual) + self.add_issue(spacing, code, message) + + self._max_new_lines_in_prefix = 0 + + self._newline_count = 0 + + def visit_leaf(self, leaf): + super().visit_leaf(leaf) + for part in leaf._split_prefix(): + if part.type == 'spacing': + # This part is used for the part call after for. + break + self._visit_part(part, part.create_spacing_part(), leaf) + + self._analyse_non_prefix(leaf) + self._visit_part(leaf, part, leaf) + + # Cleanup + self._last_indentation_tos = self._indentation_tos + + self._new_statement = leaf.type == 'newline' + + # TODO does this work? with brackets and stuff? + if leaf.type == 'newline' and \ + self._indentation_tos.type == IndentationTypes.BACKSLASH: + self._indentation_tos = self._indentation_tos.parent + + if leaf.value == ':' and leaf.parent.type in _SUITE_INTRODUCERS: + self._in_suite_introducer = False + elif leaf.value == 'elif': + self._in_suite_introducer = True + + if not self._new_statement: + self._reset_newlines(part, leaf) + self._max_blank_lines = 0 + + self._previous_leaf = leaf + + return leaf.value + + def _visit_part(self, part, spacing, leaf): + value = part.value + type_ = part.type + if type_ == 'error_leaf': + return + + if value == ',' and part.parent.type == 'dictorsetmaker': + self._indentation_tos = self._indentation_tos.parent + + node = self._indentation_tos + + if type_ == 'comment': + if value.startswith('##'): + # Whole blocks of # should not raise an error. + if value.lstrip('#'): + self.add_issue(part, 266, "Too many leading '#' for block comment.") + elif self._on_newline: + if not re.match(r'#:? ', value) and not value == '#' \ + and not (value.startswith('#!') and part.start_pos == (1, 0)): + self.add_issue(part, 265, "Block comment should start with '# '") + else: + if not re.match(r'#:? [^ ]', value): + self.add_issue(part, 262, "Inline comment should start with '# '") + + self._reset_newlines(spacing, leaf, is_comment=True) + elif type_ == 'newline': + if self._newline_count > self._get_wanted_blank_lines_count(): + self.add_issue(part, 303, "Too many blank lines (%s)" % self._newline_count) + elif leaf in ('def', 'class') \ + and leaf.parent.parent.type == 'decorated': + self.add_issue(part, 304, "Blank lines found after function decorator") + + self._newline_count += 1 + + if type_ == 'backslash': + # TODO is this enough checking? What about ==? + if node.type != IndentationTypes.BACKSLASH: + if node.type != IndentationTypes.SUITE: + self.add_issue(part, 502, 'The backslash is redundant between brackets') + else: + indentation = node.indentation + if self._in_suite_introducer and node.type == IndentationTypes.SUITE: + indentation += self._config.indentation + + self._indentation_tos = BackslashNode( + self._config, + indentation, + part, + spacing, + parent=self._indentation_tos + ) + elif self._on_newline: + indentation = spacing.value + if node.type == IndentationTypes.BACKSLASH \ + and self._previous_part.type == 'newline': + self._indentation_tos = self._indentation_tos.parent + + if not self._check_tabs_spaces(spacing): + should_be_indentation = node.indentation + if type_ == 'comment': + # Comments can be dedented. So we have to care for that. + n = self._last_indentation_tos + while True: + if len(indentation) > len(n.indentation): + break + + should_be_indentation = n.indentation + + self._last_indentation_tos = n + if n == node: + break + n = n.parent + + if self._new_statement: + if type_ == 'newline': + if indentation: + self.add_issue(spacing, 291, 'Trailing whitespace') + elif indentation != should_be_indentation: + s = '%s %s' % (len(self._config.indentation), self._indentation_type) + self.add_issue(part, 111, 'Indentation is not a multiple of ' + s) + else: + if value in '])}': + should_be_indentation = node.bracket_indentation + else: + should_be_indentation = node.indentation + if self._in_suite_introducer and indentation == \ + node.get_latest_suite_node().indentation \ + + self._config.indentation: + self.add_issue(part, 129, "Line with same indent as next logical block") + elif indentation != should_be_indentation: + if not self._check_tabs_spaces(spacing) and part.value not in \ + {'\n', '\r\n', '\r'}: + if value in '])}': + if node.type == IndentationTypes.VERTICAL_BRACKET: + self.add_issue( + part, + 124, + "Closing bracket does not match visual indentation" + ) + else: + self.add_issue( + part, + 123, + "Losing bracket does not match " + "indentation of opening bracket's line" + ) + else: + if len(indentation) < len(should_be_indentation): + if node.type == IndentationTypes.VERTICAL_BRACKET: + self.add_issue( + part, + 128, + 'Continuation line under-indented for visual indent' + ) + elif node.type == IndentationTypes.BACKSLASH: + self.add_issue( + part, + 122, + 'Continuation line missing indentation or outdented' + ) + elif node.type == IndentationTypes.IMPLICIT: + self.add_issue(part, 135, 'xxx') + else: + self.add_issue( + part, + 121, + 'Continuation line under-indented for hanging indent' + ) + else: + if node.type == IndentationTypes.VERTICAL_BRACKET: + self.add_issue( + part, + 127, + 'Continuation line over-indented for visual indent' + ) + elif node.type == IndentationTypes.IMPLICIT: + self.add_issue(part, 136, 'xxx') + else: + self.add_issue( + part, + 126, + 'Continuation line over-indented for hanging indent' + ) + else: + self._check_spacing(part, spacing) + + self._check_line_length(part, spacing) + # ------------------------------- + # Finalizing. Updating the state. + # ------------------------------- + if value and value in '()[]{}' and type_ != 'error_leaf' \ + and part.parent.type != 'error_node': + if value in _OPENING_BRACKETS: + self._indentation_tos = BracketNode( + self._config, part, + parent=self._indentation_tos, + in_suite_introducer=self._in_suite_introducer + ) + else: + assert node.type != IndentationTypes.IMPLICIT + self._indentation_tos = self._indentation_tos.parent + elif value in ('=', ':') and self._implicit_indentation_possible \ + and part.parent.type in _IMPLICIT_INDENTATION_TYPES: + indentation = node.indentation + self._indentation_tos = ImplicitNode( + self._config, part, parent=self._indentation_tos + ) + + self._on_newline = type_ in ('newline', 'backslash', 'bom') + + self._previous_part = part + self._previous_spacing = spacing + + def _check_line_length(self, part, spacing): + if part.type == 'backslash': + last_column = part.start_pos[1] + 1 + else: + last_column = part.end_pos[1] + if last_column > self._config.max_characters \ + and spacing.start_pos[1] <= self._config.max_characters: + # Special case for long URLs in multi-line docstrings or comments, + # but still report the error when the 72 first chars are whitespaces. + report = True + if part.type == 'comment': + splitted = part.value[1:].split() + if len(splitted) == 1 \ + and (part.end_pos[1] - len(splitted[0])) < 72: + report = False + if report: + self.add_issue( + part, + 501, + 'Line too long (%s > %s characters)' % + (last_column, self._config.max_characters), + ) + + def _check_spacing(self, part, spacing): + def add_if_spaces(*args): + if spaces: + return self.add_issue(*args) + + def add_not_spaces(*args): + if not spaces: + return self.add_issue(*args) + + spaces = spacing.value + prev = self._previous_part + if prev is not None and prev.type == 'error_leaf' or part.type == 'error_leaf': + return + + type_ = part.type + if '\t' in spaces: + self.add_issue(spacing, 223, 'Used tab to separate tokens') + elif type_ == 'comment': + if len(spaces) < self._config.spaces_before_comment: + self.add_issue(spacing, 261, 'At least two spaces before inline comment') + elif type_ == 'newline': + add_if_spaces(spacing, 291, 'Trailing whitespace') + elif len(spaces) > 1: + self.add_issue(spacing, 221, 'Multiple spaces used') + else: + if prev in _OPENING_BRACKETS: + message = "Whitespace after '%s'" % part.value + add_if_spaces(spacing, 201, message) + elif part in _CLOSING_BRACKETS: + message = "Whitespace before '%s'" % part.value + add_if_spaces(spacing, 202, message) + elif part in (',', ';') or part == ':' \ + and part.parent.type not in _POSSIBLE_SLICE_PARENTS: + message = "Whitespace before '%s'" % part.value + add_if_spaces(spacing, 203, message) + elif prev == ':' and prev.parent.type in _POSSIBLE_SLICE_PARENTS: + pass # TODO + elif prev in (',', ';', ':'): + add_not_spaces(spacing, 231, "missing whitespace after '%s'") + elif part == ':': # Is a subscript + # TODO + pass + elif part in ('*', '**') and part.parent.type not in _NON_STAR_TYPES \ + or prev in ('*', '**') \ + and prev.parent.type not in _NON_STAR_TYPES: + # TODO + pass + elif prev in _FACTOR and prev.parent.type == 'factor': + pass + elif prev == '@' and prev.parent.type == 'decorator': + pass # TODO should probably raise an error if there's a space here + elif part in _NEEDS_SPACE or prev in _NEEDS_SPACE: + if part == '=' and part.parent.type in ('argument', 'param') \ + or prev == '=' and prev.parent.type in ('argument', 'param'): + if part == '=': + param = part.parent + else: + param = prev.parent + if param.type == 'param' and param.annotation: + add_not_spaces(spacing, 252, 'Expected spaces around annotation equals') + else: + add_if_spaces( + spacing, + 251, + 'Unexpected spaces around keyword / parameter equals' + ) + elif part in _BITWISE_OPERATOR or prev in _BITWISE_OPERATOR: + add_not_spaces( + spacing, + 227, + 'Missing whitespace around bitwise or shift operator' + ) + elif part == '%' or prev == '%': + add_not_spaces(spacing, 228, 'Missing whitespace around modulo operator') + else: + message_225 = 'Missing whitespace between tokens' + add_not_spaces(spacing, 225, message_225) + elif type_ == 'keyword' or prev.type == 'keyword': + add_not_spaces(spacing, 275, 'Missing whitespace around keyword') + else: + prev_spacing = self._previous_spacing + if prev in _ALLOW_SPACE and spaces != prev_spacing.value \ + and '\n' not in self._previous_leaf.prefix \ + and '\r' not in self._previous_leaf.prefix: + message = "Whitespace before operator doesn't match with whitespace after" + self.add_issue(spacing, 229, message) + + if spaces and part not in _ALLOW_SPACE and prev not in _ALLOW_SPACE: + message_225 = 'Missing whitespace between tokens' + # self.add_issue(spacing, 225, message_225) + # TODO why only brackets? + if part in _OPENING_BRACKETS: + message = "Whitespace before '%s'" % part.value + add_if_spaces(spacing, 211, message) + + def _analyse_non_prefix(self, leaf): + typ = leaf.type + if typ == 'name' and leaf.value in ('l', 'O', 'I'): + if leaf.is_definition(): + message = "Do not define %s named 'l', 'O', or 'I' one line" + if leaf.parent.type == 'class' and leaf.parent.name == leaf: + self.add_issue(leaf, 742, message % 'classes') + elif leaf.parent.type == 'function' and leaf.parent.name == leaf: + self.add_issue(leaf, 743, message % 'function') + else: + self.add_issuadd_issue(741, message % 'variables', leaf) + elif leaf.value == ':': + if isinstance(leaf.parent, (Flow, Scope)) and leaf.parent.type != 'lambdef': + next_leaf = leaf.get_next_leaf() + if next_leaf.type != 'newline': + if leaf.parent.type == 'funcdef': + self.add_issue(next_leaf, 704, 'Multiple statements on one line (def)') + else: + self.add_issue(next_leaf, 701, 'Multiple statements on one line (colon)') + elif leaf.value == ';': + if leaf.get_next_leaf().type in ('newline', 'endmarker'): + self.add_issue(leaf, 703, 'Statement ends with a semicolon') + else: + self.add_issue(leaf, 702, 'Multiple statements on one line (semicolon)') + elif leaf.value in ('==', '!='): + comparison = leaf.parent + index = comparison.children.index(leaf) + left = comparison.children[index - 1] + right = comparison.children[index + 1] + for node in left, right: + if node.type == 'keyword' or node.type == 'name': + if node.value == 'None': + message = "comparison to None should be 'if cond is None:'" + self.add_issue(leaf, 711, message) + break + elif node.value in ('True', 'False'): + message = "comparison to False/True should be " \ + "'if cond is True:' or 'if cond:'" + self.add_issue(leaf, 712, message) + break + elif leaf.value in ('in', 'is'): + comparison = leaf.parent + if comparison.type == 'comparison' and comparison.parent.type == 'not_test': + if leaf.value == 'in': + self.add_issue(leaf, 713, "test for membership should be 'not in'") + else: + self.add_issue(leaf, 714, "test for object identity should be 'is not'") + elif typ == 'string': + # Checking multiline strings + for i, line in enumerate(leaf.value.splitlines()[1:]): + indentation = re.match(r'[ \t]*', line).group(0) + start_pos = leaf.line + i, len(indentation) + # TODO check multiline indentation. + start_pos + elif typ == 'endmarker': + if self._newline_count >= 2: + self.add_issue(leaf, 391, 'Blank line at end of file') + + def add_issue(self, node, code, message): + if self._previous_leaf is not None: + if self._previous_leaf.search_ancestor('error_node') is not None: + return + if self._previous_leaf.type == 'error_leaf': + return + if node.search_ancestor('error_node') is not None: + return + if code in (901, 903): + # 901 and 903 are raised by the ErrorFinder. + super().add_issue(node, code, message) + else: + # Skip ErrorFinder here, because it has custom behavior. + super(ErrorFinder, self).add_issue(node, code, message) + + +class PEP8NormalizerConfig(ErrorFinderConfig): + normalizer_class = PEP8Normalizer + """ + Normalizing to PEP8. Not really implemented, yet. + """ + def __init__(self, indentation=' ' * 4, hanging_indentation=None, + max_characters=79, spaces_before_comment=2): + self.indentation = indentation + if hanging_indentation is None: + hanging_indentation = indentation + self.hanging_indentation = hanging_indentation + self.closing_bracket_hanging_indentation = '' + self.break_after_binary = False + self.max_characters = max_characters + self.spaces_before_comment = spaces_before_comment + + +# TODO this is not yet ready. +# @PEP8Normalizer.register_rule(type='endmarker') +class BlankLineAtEnd(Rule): + code = 392 + message = 'Blank line at end of file' + + def is_issue(self, leaf): + return self._newline_count >= 2 diff --git a/contrib/python/parso/py3/parso/python/prefix.py b/contrib/python/parso/py3/parso/python/prefix.py new file mode 100644 index 0000000000..6b8d59e69e --- /dev/null +++ b/contrib/python/parso/py3/parso/python/prefix.py @@ -0,0 +1,106 @@ +import re +from codecs import BOM_UTF8 +from typing import Tuple + +from parso.python.tokenize import group + +unicode_bom = BOM_UTF8.decode('utf-8') + + +class PrefixPart: + def __init__(self, leaf, typ, value, spacing='', start_pos=None): + assert start_pos is not None + self.parent = leaf + self.type = typ + self.value = value + self.spacing = spacing + self.start_pos: Tuple[int, int] = start_pos + + @property + def end_pos(self) -> Tuple[int, int]: + if self.value.endswith('\n') or self.value.endswith('\r'): + return self.start_pos[0] + 1, 0 + if self.value == unicode_bom: + # The bom doesn't have a length at the start of a Python file. + return self.start_pos + return self.start_pos[0], self.start_pos[1] + len(self.value) + + def create_spacing_part(self): + column = self.start_pos[1] - len(self.spacing) + return PrefixPart( + self.parent, 'spacing', self.spacing, + start_pos=(self.start_pos[0], column) + ) + + def __repr__(self): + return '%s(%s, %s, %s)' % ( + self.__class__.__name__, + self.type, + repr(self.value), + self.start_pos + ) + + def search_ancestor(self, *node_types): + node = self.parent + while node is not None: + if node.type in node_types: + return node + node = node.parent + return None + + +_comment = r'#[^\n\r\f]*' +_backslash = r'\\\r?\n|\\\r' +_newline = r'\r?\n|\r' +_form_feed = r'\f' +_only_spacing = '$' +_spacing = r'[ \t]*' +_bom = unicode_bom + +_regex = group( + _comment, _backslash, _newline, _form_feed, _only_spacing, _bom, + capture=True +) +_regex = re.compile(group(_spacing, capture=True) + _regex) + + +_types = { + '#': 'comment', + '\\': 'backslash', + '\f': 'formfeed', + '\n': 'newline', + '\r': 'newline', + unicode_bom: 'bom' +} + + +def split_prefix(leaf, start_pos): + line, column = start_pos + start = 0 + value = spacing = '' + bom = False + while start != len(leaf.prefix): + match = _regex.match(leaf.prefix, start) + spacing = match.group(1) + value = match.group(2) + if not value: + break + type_ = _types[value[0]] + yield PrefixPart( + leaf, type_, value, spacing, + start_pos=(line, column + start - int(bom) + len(spacing)) + ) + if type_ == 'bom': + bom = True + + start = match.end(0) + if value.endswith('\n') or value.endswith('\r'): + line += 1 + column = -start + + if value: + spacing = '' + yield PrefixPart( + leaf, 'spacing', spacing, + start_pos=(line, column + start) + ) diff --git a/contrib/python/parso/py3/parso/python/token.py b/contrib/python/parso/py3/parso/python/token.py new file mode 100644 index 0000000000..9b6f4c7c19 --- /dev/null +++ b/contrib/python/parso/py3/parso/python/token.py @@ -0,0 +1,31 @@ +from __future__ import absolute_import + +from enum import Enum + + +class TokenType: + name: str + contains_syntax: bool + + def __init__(self, name: str, contains_syntax: bool = False): + self.name = name + self.contains_syntax = contains_syntax + + def __repr__(self): + return '%s(%s)' % (self.__class__.__name__, self.name) + + +class PythonTokenTypes(Enum): + STRING = TokenType('STRING') + NUMBER = TokenType('NUMBER') + NAME = TokenType('NAME', contains_syntax=True) + ERRORTOKEN = TokenType('ERRORTOKEN') + NEWLINE = TokenType('NEWLINE') + INDENT = TokenType('INDENT') + DEDENT = TokenType('DEDENT') + ERROR_DEDENT = TokenType('ERROR_DEDENT') + FSTRING_STRING = TokenType('FSTRING_STRING') + FSTRING_START = TokenType('FSTRING_START') + FSTRING_END = TokenType('FSTRING_END') + OP = TokenType('OP', contains_syntax=True) + ENDMARKER = TokenType('ENDMARKER') diff --git a/contrib/python/parso/py3/parso/python/tokenize.py b/contrib/python/parso/py3/parso/python/tokenize.py new file mode 100644 index 0000000000..e3ffe440b9 --- /dev/null +++ b/contrib/python/parso/py3/parso/python/tokenize.py @@ -0,0 +1,687 @@ +# -*- coding: utf-8 -*- +""" +This tokenizer has been copied from the ``tokenize.py`` standard library +tokenizer. The reason was simple: The standard library tokenizer fails +if the indentation is not right. To make it possible to do error recovery the + tokenizer needed to be rewritten. + +Basically this is a stripped down version of the standard library module, so +you can read the documentation there. Additionally we included some speed and +memory optimizations here. +""" +from __future__ import absolute_import + +import sys +import re +import itertools as _itertools +from codecs import BOM_UTF8 +from typing import NamedTuple, Tuple, Iterator, Iterable, List, Dict, \ + Pattern, Set + +from parso.python.token import PythonTokenTypes +from parso.utils import split_lines, PythonVersionInfo, parse_version_string + + +# Maximum code point of Unicode 6.0: 0x10ffff (1,114,111) +MAX_UNICODE = '\U0010ffff' + +STRING = PythonTokenTypes.STRING +NAME = PythonTokenTypes.NAME +NUMBER = PythonTokenTypes.NUMBER +OP = PythonTokenTypes.OP +NEWLINE = PythonTokenTypes.NEWLINE +INDENT = PythonTokenTypes.INDENT +DEDENT = PythonTokenTypes.DEDENT +ENDMARKER = PythonTokenTypes.ENDMARKER +ERRORTOKEN = PythonTokenTypes.ERRORTOKEN +ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT +FSTRING_START = PythonTokenTypes.FSTRING_START +FSTRING_STRING = PythonTokenTypes.FSTRING_STRING +FSTRING_END = PythonTokenTypes.FSTRING_END + + +class TokenCollection(NamedTuple): + pseudo_token: Pattern + single_quoted: Set[str] + triple_quoted: Set[str] + endpats: Dict[str, Pattern] + whitespace: Pattern + fstring_pattern_map: Dict[str, str] + always_break_tokens: Tuple[str] + + +BOM_UTF8_STRING = BOM_UTF8.decode('utf-8') + +_token_collection_cache: Dict[PythonVersionInfo, TokenCollection] = {} + + +def group(*choices, capture=False, **kwargs): + assert not kwargs + + start = '(' + if not capture: + start += '?:' + return start + '|'.join(choices) + ')' + + +def maybe(*choices): + return group(*choices) + '?' + + +# Return the empty string, plus all of the valid string prefixes. +def _all_string_prefixes(*, include_fstring=False, only_fstring=False): + def different_case_versions(prefix): + for s in _itertools.product(*[(c, c.upper()) for c in prefix]): + yield ''.join(s) + # The valid string prefixes. Only contain the lower case versions, + # and don't contain any permuations (include 'fr', but not + # 'rf'). The various permutations will be generated. + valid_string_prefixes = ['b', 'r', 'u', 'br'] + + result = {''} + if include_fstring: + f = ['f', 'fr'] + if only_fstring: + valid_string_prefixes = f + result = set() + else: + valid_string_prefixes += f + elif only_fstring: + return set() + + # if we add binary f-strings, add: ['fb', 'fbr'] + for prefix in valid_string_prefixes: + for t in _itertools.permutations(prefix): + # create a list with upper and lower versions of each + # character + result.update(different_case_versions(t)) + return result + + +def _compile(expr): + return re.compile(expr, re.UNICODE) + + +def _get_token_collection(version_info): + try: + return _token_collection_cache[tuple(version_info)] + except KeyError: + _token_collection_cache[tuple(version_info)] = result = \ + _create_token_collection(version_info) + return result + + +unicode_character_name = r'[A-Za-z0-9\-]+(?: [A-Za-z0-9\-]+)*' +fstring_string_single_line = _compile( + r'(?:\{\{|\}\}|\\N\{' + unicode_character_name + + r'\}|\\(?:\r\n?|\n)|\\[^\r\nN]|[^{}\r\n\\])+' +) +fstring_string_multi_line = _compile( + r'(?:\{\{|\}\}|\\N\{' + unicode_character_name + r'\}|\\[^N]|[^{}\\])+' +) +fstring_format_spec_single_line = _compile(r'(?:\\(?:\r\n?|\n)|[^{}\r\n])+') +fstring_format_spec_multi_line = _compile(r'[^{}]+') + + +def _create_token_collection(version_info): + # Note: we use unicode matching for names ("\w") but ascii matching for + # number literals. + Whitespace = r'[ \f\t]*' + whitespace = _compile(Whitespace) + Comment = r'#[^\r\n]*' + Name = '([A-Za-z_0-9\u0080-' + MAX_UNICODE + ']+)' + + Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+' + Binnumber = r'0[bB](?:_?[01])+' + Octnumber = r'0[oO](?:_?[0-7])+' + Decnumber = r'(?:0(?:_?0)*|[1-9](?:_?[0-9])*)' + Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber) + Exponent = r'[eE][-+]?[0-9](?:_?[0-9])*' + Pointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?', + r'\.[0-9](?:_?[0-9])*') + maybe(Exponent) + Expfloat = r'[0-9](?:_?[0-9])*' + Exponent + Floatnumber = group(Pointfloat, Expfloat) + Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]') + Number = group(Imagnumber, Floatnumber, Intnumber) + + # Note that since _all_string_prefixes includes the empty string, + # StringPrefix can be the empty string (making it optional). + possible_prefixes = _all_string_prefixes() + StringPrefix = group(*possible_prefixes) + StringPrefixWithF = group(*_all_string_prefixes(include_fstring=True)) + fstring_prefixes = _all_string_prefixes(include_fstring=True, only_fstring=True) + FStringStart = group(*fstring_prefixes) + + # Tail end of ' string. + Single = r"(?:\\.|[^'\\])*'" + # Tail end of " string. + Double = r'(?:\\.|[^"\\])*"' + # Tail end of ''' string. + Single3 = r"(?:\\.|'(?!'')|[^'\\])*'''" + # Tail end of """ string. + Double3 = r'(?:\\.|"(?!"")|[^"\\])*"""' + Triple = group(StringPrefixWithF + "'''", StringPrefixWithF + '"""') + + # Because of leftmost-then-longest match semantics, be sure to put the + # longest operators first (e.g., if = came before ==, == would get + # recognized as two instances of =). + Operator = group(r"\*\*=?", r">>=?", r"<<=?", + r"//=?", r"->", + r"[+\-*/%&@`|^!=<>]=?", + r"~") + + Bracket = '[][(){}]' + + special_args = [r'\.\.\.', r'\r\n?', r'\n', r'[;.,@]'] + if version_info >= (3, 8): + special_args.insert(0, ":=?") + else: + special_args.insert(0, ":") + Special = group(*special_args) + + Funny = group(Operator, Bracket, Special) + + # First (or only) line of ' or " string. + ContStr = group(StringPrefix + r"'[^\r\n'\\]*(?:\\.[^\r\n'\\]*)*" + + group("'", r'\\(?:\r\n?|\n)'), + StringPrefix + r'"[^\r\n"\\]*(?:\\.[^\r\n"\\]*)*' + + group('"', r'\\(?:\r\n?|\n)')) + pseudo_extra_pool = [Comment, Triple] + all_quotes = '"', "'", '"""', "'''" + if fstring_prefixes: + pseudo_extra_pool.append(FStringStart + group(*all_quotes)) + + PseudoExtras = group(r'\\(?:\r\n?|\n)|\Z', *pseudo_extra_pool) + PseudoToken = group(Whitespace, capture=True) + \ + group(PseudoExtras, Number, Funny, ContStr, Name, capture=True) + + # For a given string prefix plus quotes, endpats maps it to a regex + # to match the remainder of that string. _prefix can be empty, for + # a normal single or triple quoted string (with no prefix). + endpats = {} + for _prefix in possible_prefixes: + endpats[_prefix + "'"] = _compile(Single) + endpats[_prefix + '"'] = _compile(Double) + endpats[_prefix + "'''"] = _compile(Single3) + endpats[_prefix + '"""'] = _compile(Double3) + + # A set of all of the single and triple quoted string prefixes, + # including the opening quotes. + single_quoted = set() + triple_quoted = set() + fstring_pattern_map = {} + for t in possible_prefixes: + for quote in '"', "'": + single_quoted.add(t + quote) + + for quote in '"""', "'''": + triple_quoted.add(t + quote) + + for t in fstring_prefixes: + for quote in all_quotes: + fstring_pattern_map[t + quote] = quote + + ALWAYS_BREAK_TOKENS = (';', 'import', 'class', 'def', 'try', 'except', + 'finally', 'while', 'with', 'return', 'continue', + 'break', 'del', 'pass', 'global', 'assert', 'nonlocal') + pseudo_token_compiled = _compile(PseudoToken) + return TokenCollection( + pseudo_token_compiled, single_quoted, triple_quoted, endpats, + whitespace, fstring_pattern_map, set(ALWAYS_BREAK_TOKENS) + ) + + +class Token(NamedTuple): + type: PythonTokenTypes + string: str + start_pos: Tuple[int, int] + prefix: str + + @property + def end_pos(self) -> Tuple[int, int]: + lines = split_lines(self.string) + if len(lines) > 1: + return self.start_pos[0] + len(lines) - 1, 0 + else: + return self.start_pos[0], self.start_pos[1] + len(self.string) + + +class PythonToken(Token): + def __repr__(self): + return ('TokenInfo(type=%s, string=%r, start_pos=%r, prefix=%r)' % + self._replace(type=self.type.name)) + + +class FStringNode: + def __init__(self, quote): + self.quote = quote + self.parentheses_count = 0 + self.previous_lines = '' + self.last_string_start_pos = None + # In the syntax there can be multiple format_spec's nested: + # {x:{y:3}} + self.format_spec_count = 0 + + def open_parentheses(self, character): + self.parentheses_count += 1 + + def close_parentheses(self, character): + self.parentheses_count -= 1 + if self.parentheses_count == 0: + # No parentheses means that the format spec is also finished. + self.format_spec_count = 0 + + def allow_multiline(self): + return len(self.quote) == 3 + + def is_in_expr(self): + return self.parentheses_count > self.format_spec_count + + def is_in_format_spec(self): + return not self.is_in_expr() and self.format_spec_count + + +def _close_fstring_if_necessary(fstring_stack, string, line_nr, column, additional_prefix): + for fstring_stack_index, node in enumerate(fstring_stack): + lstripped_string = string.lstrip() + len_lstrip = len(string) - len(lstripped_string) + if lstripped_string.startswith(node.quote): + token = PythonToken( + FSTRING_END, + node.quote, + (line_nr, column + len_lstrip), + prefix=additional_prefix+string[:len_lstrip], + ) + additional_prefix = '' + assert not node.previous_lines + del fstring_stack[fstring_stack_index:] + return token, '', len(node.quote) + len_lstrip + return None, additional_prefix, 0 + + +def _find_fstring_string(endpats, fstring_stack, line, lnum, pos): + tos = fstring_stack[-1] + allow_multiline = tos.allow_multiline() + if tos.is_in_format_spec(): + if allow_multiline: + regex = fstring_format_spec_multi_line + else: + regex = fstring_format_spec_single_line + else: + if allow_multiline: + regex = fstring_string_multi_line + else: + regex = fstring_string_single_line + + match = regex.match(line, pos) + if match is None: + return tos.previous_lines, pos + + if not tos.previous_lines: + tos.last_string_start_pos = (lnum, pos) + + string = match.group(0) + for fstring_stack_node in fstring_stack: + end_match = endpats[fstring_stack_node.quote].match(string) + if end_match is not None: + string = end_match.group(0)[:-len(fstring_stack_node.quote)] + + new_pos = pos + new_pos += len(string) + # even if allow_multiline is False, we still need to check for trailing + # newlines, because a single-line f-string can contain line continuations + if string.endswith('\n') or string.endswith('\r'): + tos.previous_lines += string + string = '' + else: + string = tos.previous_lines + string + + return string, new_pos + + +def tokenize( + code: str, *, version_info: PythonVersionInfo, start_pos: Tuple[int, int] = (1, 0) +) -> Iterator[PythonToken]: + """Generate tokens from a the source code (string).""" + lines = split_lines(code, keepends=True) + return tokenize_lines(lines, version_info=version_info, start_pos=start_pos) + + +def _print_tokens(func): + """ + A small helper function to help debug the tokenize_lines function. + """ + def wrapper(*args, **kwargs): + for token in func(*args, **kwargs): + print(token) # This print is intentional for debugging! + yield token + + return wrapper + + +# @_print_tokens +def tokenize_lines( + lines: Iterable[str], + *, + version_info: PythonVersionInfo, + indents: List[int] = None, + start_pos: Tuple[int, int] = (1, 0), + is_first_token=True, +) -> Iterator[PythonToken]: + """ + A heavily modified Python standard library tokenizer. + + Additionally to the default information, yields also the prefix of each + token. This idea comes from lib2to3. The prefix contains all information + that is irrelevant for the parser like newlines in parentheses or comments. + """ + def dedent_if_necessary(start): + while start < indents[-1]: + if start > indents[-2]: + yield PythonToken(ERROR_DEDENT, '', (lnum, start), '') + indents[-1] = start + break + indents.pop() + yield PythonToken(DEDENT, '', spos, '') + + pseudo_token, single_quoted, triple_quoted, endpats, whitespace, \ + fstring_pattern_map, always_break_tokens, = \ + _get_token_collection(version_info) + paren_level = 0 # count parentheses + if indents is None: + indents = [0] + max_ = 0 + numchars = '0123456789' + contstr = '' + contline: str + contstr_start: Tuple[int, int] + endprog: Pattern + # We start with a newline. This makes indent at the first position + # possible. It's not valid Python, but still better than an INDENT in the + # second line (and not in the first). This makes quite a few things in + # Jedi's fast parser possible. + new_line = True + prefix = '' # Should never be required, but here for safety + additional_prefix = '' + lnum = start_pos[0] - 1 + fstring_stack: List[FStringNode] = [] + for line in lines: # loop over lines in stream + lnum += 1 + pos = 0 + max_ = len(line) + if is_first_token: + if line.startswith(BOM_UTF8_STRING): + additional_prefix = BOM_UTF8_STRING + line = line[1:] + max_ = len(line) + + # Fake that the part before was already parsed. + line = '^' * start_pos[1] + line + pos = start_pos[1] + max_ += start_pos[1] + + is_first_token = False + + if contstr: # continued string + endmatch = endprog.match(line) # noqa: F821 + if endmatch: + pos = endmatch.end(0) + yield PythonToken( + STRING, contstr + line[:pos], + contstr_start, prefix) # noqa: F821 + contstr = '' + contline = '' + else: + contstr = contstr + line + contline = contline + line + continue + + while pos < max_: + if fstring_stack: + tos = fstring_stack[-1] + if not tos.is_in_expr(): + string, pos = _find_fstring_string(endpats, fstring_stack, line, lnum, pos) + if string: + yield PythonToken( + FSTRING_STRING, string, + tos.last_string_start_pos, + # Never has a prefix because it can start anywhere and + # include whitespace. + prefix='' + ) + tos.previous_lines = '' + continue + if pos == max_: + break + + rest = line[pos:] + fstring_end_token, additional_prefix, quote_length = _close_fstring_if_necessary( + fstring_stack, + rest, + lnum, + pos, + additional_prefix, + ) + pos += quote_length + if fstring_end_token is not None: + yield fstring_end_token + continue + + # in an f-string, match until the end of the string + if fstring_stack: + string_line = line + for fstring_stack_node in fstring_stack: + quote = fstring_stack_node.quote + end_match = endpats[quote].match(line, pos) + if end_match is not None: + end_match_string = end_match.group(0) + if len(end_match_string) - len(quote) + pos < len(string_line): + string_line = line[:pos] + end_match_string[:-len(quote)] + pseudomatch = pseudo_token.match(string_line, pos) + else: + pseudomatch = pseudo_token.match(line, pos) + + if pseudomatch: + prefix = additional_prefix + pseudomatch.group(1) + additional_prefix = '' + start, pos = pseudomatch.span(2) + spos = (lnum, start) + token = pseudomatch.group(2) + if token == '': + assert prefix + additional_prefix = prefix + # This means that we have a line with whitespace/comments at + # the end, which just results in an endmarker. + break + initial = token[0] + else: + match = whitespace.match(line, pos) + initial = line[match.end()] + start = match.end() + spos = (lnum, start) + + if new_line and initial not in '\r\n#' and (initial != '\\' or pseudomatch is None): + new_line = False + if paren_level == 0 and not fstring_stack: + indent_start = start + if indent_start > indents[-1]: + yield PythonToken(INDENT, '', spos, '') + indents.append(indent_start) + yield from dedent_if_necessary(indent_start) + + if not pseudomatch: # scan for tokens + match = whitespace.match(line, pos) + if new_line and paren_level == 0 and not fstring_stack: + yield from dedent_if_necessary(match.end()) + pos = match.end() + new_line = False + yield PythonToken( + ERRORTOKEN, line[pos], (lnum, pos), + additional_prefix + match.group(0) + ) + additional_prefix = '' + pos += 1 + continue + + if (initial in numchars # ordinary number + or (initial == '.' and token != '.' and token != '...')): + yield PythonToken(NUMBER, token, spos, prefix) + elif pseudomatch.group(3) is not None: # ordinary name + if token in always_break_tokens and (fstring_stack or paren_level): + fstring_stack[:] = [] + paren_level = 0 + # We only want to dedent if the token is on a new line. + m = re.match(r'[ \f\t]*$', line[:start]) + if m is not None: + yield from dedent_if_necessary(m.end()) + if token.isidentifier(): + yield PythonToken(NAME, token, spos, prefix) + else: + yield from _split_illegal_unicode_name(token, spos, prefix) + elif initial in '\r\n': + if any(not f.allow_multiline() for f in fstring_stack): + fstring_stack.clear() + + if not new_line and paren_level == 0 and not fstring_stack: + yield PythonToken(NEWLINE, token, spos, prefix) + else: + additional_prefix = prefix + token + new_line = True + elif initial == '#': # Comments + assert not token.endswith("\n") and not token.endswith("\r") + if fstring_stack and fstring_stack[-1].is_in_expr(): + # `#` is not allowed in f-string expressions + yield PythonToken(ERRORTOKEN, initial, spos, prefix) + pos = start + 1 + else: + additional_prefix = prefix + token + elif token in triple_quoted: + endprog = endpats[token] + endmatch = endprog.match(line, pos) + if endmatch: # all on one line + pos = endmatch.end(0) + token = line[start:pos] + yield PythonToken(STRING, token, spos, prefix) + else: + contstr_start = spos # multiple lines + contstr = line[start:] + contline = line + break + + # Check up to the first 3 chars of the token to see if + # they're in the single_quoted set. If so, they start + # a string. + # We're using the first 3, because we're looking for + # "rb'" (for example) at the start of the token. If + # we switch to longer prefixes, this needs to be + # adjusted. + # Note that initial == token[:1]. + # Also note that single quote checking must come after + # triple quote checking (above). + elif initial in single_quoted or \ + token[:2] in single_quoted or \ + token[:3] in single_quoted: + if token[-1] in '\r\n': # continued string + # This means that a single quoted string ends with a + # backslash and is continued. + contstr_start = lnum, start + endprog = (endpats.get(initial) or endpats.get(token[1]) + or endpats.get(token[2])) + contstr = line[start:] + contline = line + break + else: # ordinary string + yield PythonToken(STRING, token, spos, prefix) + elif token in fstring_pattern_map: # The start of an fstring. + fstring_stack.append(FStringNode(fstring_pattern_map[token])) + yield PythonToken(FSTRING_START, token, spos, prefix) + elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n', '\\\r'): # continued stmt + additional_prefix += prefix + line[start:] + break + else: + if token in '([{': + if fstring_stack: + fstring_stack[-1].open_parentheses(token) + else: + paren_level += 1 + elif token in ')]}': + if fstring_stack: + fstring_stack[-1].close_parentheses(token) + else: + if paren_level: + paren_level -= 1 + elif token.startswith(':') and fstring_stack \ + and fstring_stack[-1].parentheses_count \ + - fstring_stack[-1].format_spec_count == 1: + # `:` and `:=` both count + fstring_stack[-1].format_spec_count += 1 + token = ':' + pos = start + 1 + + yield PythonToken(OP, token, spos, prefix) + + if contstr: + yield PythonToken(ERRORTOKEN, contstr, contstr_start, prefix) + if contstr.endswith('\n') or contstr.endswith('\r'): + new_line = True + + if fstring_stack: + tos = fstring_stack[-1] + if tos.previous_lines: + yield PythonToken( + FSTRING_STRING, tos.previous_lines, + tos.last_string_start_pos, + # Never has a prefix because it can start anywhere and + # include whitespace. + prefix='' + ) + + end_pos = lnum, max_ + # As the last position we just take the maximally possible position. We + # remove -1 for the last new line. + for indent in indents[1:]: + indents.pop() + yield PythonToken(DEDENT, '', end_pos, '') + yield PythonToken(ENDMARKER, '', end_pos, additional_prefix) + + +def _split_illegal_unicode_name(token, start_pos, prefix): + def create_token(): + return PythonToken(ERRORTOKEN if is_illegal else NAME, found, pos, prefix) + + found = '' + is_illegal = False + pos = start_pos + for i, char in enumerate(token): + if is_illegal: + if char.isidentifier(): + yield create_token() + found = char + is_illegal = False + prefix = '' + pos = start_pos[0], start_pos[1] + i + else: + found += char + else: + new_found = found + char + if new_found.isidentifier(): + found = new_found + else: + if found: + yield create_token() + prefix = '' + pos = start_pos[0], start_pos[1] + i + found = char + is_illegal = True + + if found: + yield create_token() + + +if __name__ == "__main__": + path = sys.argv[1] + with open(path) as f: + code = f.read() + + for token in tokenize(code, version_info=parse_version_string('3.10')): + print(token) diff --git a/contrib/python/parso/py3/parso/python/tree.py b/contrib/python/parso/py3/parso/python/tree.py new file mode 100644 index 0000000000..ebb4087030 --- /dev/null +++ b/contrib/python/parso/py3/parso/python/tree.py @@ -0,0 +1,1242 @@ +""" +This is the syntax tree for Python 3 syntaxes. The classes represent +syntax elements like functions and imports. + +All of the nodes can be traced back to the `Python grammar file +<https://docs.python.org/3/reference/grammar.html>`_. If you want to know how +a tree is structured, just analyse that file (for each Python version it's a +bit different). + +There's a lot of logic here that makes it easier for Jedi (and other libraries) +to deal with a Python syntax tree. + +By using :py:meth:`parso.tree.NodeOrLeaf.get_code` on a module, you can get +back the 1-to-1 representation of the input given to the parser. This is +important if you want to refactor a parser tree. + +>>> from parso import parse +>>> parser = parse('import os') +>>> module = parser.get_root_node() +>>> module +<Module: @1-1> + +Any subclasses of :class:`Scope`, including :class:`Module` has an attribute +:attr:`iter_imports <Scope.iter_imports>`: + +>>> list(module.iter_imports()) +[<ImportName: import os@1,0>] + +Changes to the Python Grammar +----------------------------- + +A few things have changed when looking at Python grammar files: + +- :class:`Param` does not exist in Python grammar files. It is essentially a + part of a ``parameters`` node. |parso| splits it up to make it easier to + analyse parameters. However this just makes it easier to deal with the syntax + tree, it doesn't actually change the valid syntax. +- A few nodes like `lambdef` and `lambdef_nocond` have been merged in the + syntax tree to make it easier to do deal with them. + +Parser Tree Classes +------------------- +""" + +import re +try: + from collections.abc import Mapping +except ImportError: + from collections import Mapping +from typing import Tuple + +from parso.tree import Node, BaseNode, Leaf, ErrorNode, ErrorLeaf, search_ancestor # noqa +from parso.python.prefix import split_prefix +from parso.utils import split_lines + +_FLOW_CONTAINERS = set(['if_stmt', 'while_stmt', 'for_stmt', 'try_stmt', + 'with_stmt', 'async_stmt', 'suite']) +_RETURN_STMT_CONTAINERS = set(['suite', 'simple_stmt']) | _FLOW_CONTAINERS + +_FUNC_CONTAINERS = set( + ['suite', 'simple_stmt', 'decorated', 'async_funcdef'] +) | _FLOW_CONTAINERS + +_GET_DEFINITION_TYPES = set([ + 'expr_stmt', 'sync_comp_for', 'with_stmt', 'for_stmt', 'import_name', + 'import_from', 'param', 'del_stmt', 'namedexpr_test', +]) +_IMPORTS = set(['import_name', 'import_from']) + + +class DocstringMixin: + __slots__ = () + + def get_doc_node(self): + """ + Returns the string leaf of a docstring. e.g. ``r'''foo'''``. + """ + if self.type == 'file_input': + node = self.children[0] + elif self.type in ('funcdef', 'classdef'): + node = self.children[self.children.index(':') + 1] + if node.type == 'suite': # Normally a suite + node = node.children[1] # -> NEWLINE stmt + else: # ExprStmt + simple_stmt = self.parent + c = simple_stmt.parent.children + index = c.index(simple_stmt) + if not index: + return None + node = c[index - 1] + + if node.type == 'simple_stmt': + node = node.children[0] + if node.type == 'string': + return node + return None + + +class PythonMixin: + """ + Some Python specific utilities. + """ + __slots__ = () + + def get_name_of_position(self, position): + """ + Given a (line, column) tuple, returns a :py:class:`Name` or ``None`` if + there is no name at that position. + """ + for c in self.children: + if isinstance(c, Leaf): + if c.type == 'name' and c.start_pos <= position <= c.end_pos: + return c + else: + result = c.get_name_of_position(position) + if result is not None: + return result + return None + + +class PythonLeaf(PythonMixin, Leaf): + __slots__ = () + + def _split_prefix(self): + return split_prefix(self, self.get_start_pos_of_prefix()) + + def get_start_pos_of_prefix(self): + """ + Basically calls :py:meth:`parso.tree.NodeOrLeaf.get_start_pos_of_prefix`. + """ + # TODO it is really ugly that we have to override it. Maybe change + # indent error leafs somehow? No idea how, though. + previous_leaf = self.get_previous_leaf() + if previous_leaf is not None and previous_leaf.type == 'error_leaf' \ + and previous_leaf.token_type in ('INDENT', 'DEDENT', 'ERROR_DEDENT'): + previous_leaf = previous_leaf.get_previous_leaf() + + if previous_leaf is None: # It's the first leaf. + lines = split_lines(self.prefix) + # + 1 is needed because split_lines always returns at least ['']. + return self.line - len(lines) + 1, 0 # It's the first leaf. + return previous_leaf.end_pos + + +class _LeafWithoutNewlines(PythonLeaf): + """ + Simply here to optimize performance. + """ + __slots__ = () + + @property + def end_pos(self) -> Tuple[int, int]: + return self.line, self.column + len(self.value) + + +# Python base classes +class PythonBaseNode(PythonMixin, BaseNode): + __slots__ = () + + +class PythonNode(PythonMixin, Node): + __slots__ = () + + +class PythonErrorNode(PythonMixin, ErrorNode): + __slots__ = () + + +class PythonErrorLeaf(ErrorLeaf, PythonLeaf): + __slots__ = () + + +class EndMarker(_LeafWithoutNewlines): + __slots__ = () + type = 'endmarker' + + def __repr__(self): + return "<%s: prefix=%s end_pos=%s>" % ( + type(self).__name__, repr(self.prefix), self.end_pos + ) + + +class Newline(PythonLeaf): + """Contains NEWLINE and ENDMARKER tokens.""" + __slots__ = () + type = 'newline' + + def __repr__(self): + return "<%s: %s>" % (type(self).__name__, repr(self.value)) + + +class Name(_LeafWithoutNewlines): + """ + A string. Sometimes it is important to know if the string belongs to a name + or not. + """ + type = 'name' + __slots__ = () + + def __repr__(self): + return "<%s: %s@%s,%s>" % (type(self).__name__, self.value, + self.line, self.column) + + def is_definition(self, include_setitem=False): + """ + Returns True if the name is being defined. + """ + return self.get_definition(include_setitem=include_setitem) is not None + + def get_definition(self, import_name_always=False, include_setitem=False): + """ + Returns None if there's no definition for a name. + + :param import_name_always: Specifies if an import name is always a + definition. Normally foo in `from foo import bar` is not a + definition. + """ + node = self.parent + type_ = node.type + + if type_ in ('funcdef', 'classdef'): + if self == node.name: + return node + return None + + if type_ == 'except_clause': + if self.get_previous_sibling() == 'as': + return node.parent # The try_stmt. + return None + + while node is not None: + if node.type == 'suite': + return None + if node.type in _GET_DEFINITION_TYPES: + if self in node.get_defined_names(include_setitem): + return node + if import_name_always and node.type in _IMPORTS: + return node + return None + node = node.parent + return None + + +class Literal(PythonLeaf): + __slots__ = () + + +class Number(Literal): + type = 'number' + __slots__ = () + + +class String(Literal): + type = 'string' + __slots__ = () + + @property + def string_prefix(self): + return re.match(r'\w*(?=[\'"])', self.value).group(0) + + def _get_payload(self): + match = re.search( + r'''('{3}|"{3}|'|")(.*)$''', + self.value, + flags=re.DOTALL + ) + return match.group(2)[:-len(match.group(1))] + + +class FStringString(PythonLeaf): + """ + f-strings contain f-string expressions and normal python strings. These are + the string parts of f-strings. + """ + type = 'fstring_string' + __slots__ = () + + +class FStringStart(PythonLeaf): + """ + f-strings contain f-string expressions and normal python strings. These are + the string parts of f-strings. + """ + type = 'fstring_start' + __slots__ = () + + +class FStringEnd(PythonLeaf): + """ + f-strings contain f-string expressions and normal python strings. These are + the string parts of f-strings. + """ + type = 'fstring_end' + __slots__ = () + + +class _StringComparisonMixin: + def __eq__(self, other): + """ + Make comparisons with strings easy. + Improves the readability of the parser. + """ + if isinstance(other, str): + return self.value == other + + return self is other + + def __hash__(self): + return hash(self.value) + + +class Operator(_LeafWithoutNewlines, _StringComparisonMixin): + type = 'operator' + __slots__ = () + + +class Keyword(_LeafWithoutNewlines, _StringComparisonMixin): + type = 'keyword' + __slots__ = () + + +class Scope(PythonBaseNode, DocstringMixin): + """ + Super class for the parser tree, which represents the state of a python + text file. + A Scope is either a function, class or lambda. + """ + __slots__ = () + + def __init__(self, children): + super().__init__(children) + + def iter_funcdefs(self): + """ + Returns a generator of `funcdef` nodes. + """ + return self._search_in_scope('funcdef') + + def iter_classdefs(self): + """ + Returns a generator of `classdef` nodes. + """ + return self._search_in_scope('classdef') + + def iter_imports(self): + """ + Returns a generator of `import_name` and `import_from` nodes. + """ + return self._search_in_scope('import_name', 'import_from') + + def _search_in_scope(self, *names): + def scan(children): + for element in children: + if element.type in names: + yield element + if element.type in _FUNC_CONTAINERS: + yield from scan(element.children) + + return scan(self.children) + + def get_suite(self): + """ + Returns the part that is executed by the function. + """ + return self.children[-1] + + def __repr__(self): + try: + name = self.name.value + except AttributeError: + name = '' + + return "<%s: %s@%s-%s>" % (type(self).__name__, name, + self.start_pos[0], self.end_pos[0]) + + +class Module(Scope): + """ + The top scope, which is always a module. + Depending on the underlying parser this may be a full module or just a part + of a module. + """ + __slots__ = ('_used_names',) + type = 'file_input' + + def __init__(self, children): + super().__init__(children) + self._used_names = None + + def _iter_future_import_names(self): + """ + :return: A list of future import names. + :rtype: list of str + """ + # In Python it's not allowed to use future imports after the first + # actual (non-future) statement. However this is not a linter here, + # just return all future imports. If people want to scan for issues + # they should use the API. + for imp in self.iter_imports(): + if imp.type == 'import_from' and imp.level == 0: + for path in imp.get_paths(): + names = [name.value for name in path] + if len(names) == 2 and names[0] == '__future__': + yield names[1] + + def get_used_names(self): + """ + Returns all the :class:`Name` leafs that exist in this module. This + includes both definitions and references of names. + """ + if self._used_names is None: + # Don't directly use self._used_names to eliminate a lookup. + dct = {} + + def recurse(node): + try: + children = node.children + except AttributeError: + if node.type == 'name': + arr = dct.setdefault(node.value, []) + arr.append(node) + else: + for child in children: + recurse(child) + + recurse(self) + self._used_names = UsedNamesMapping(dct) + return self._used_names + + +class Decorator(PythonBaseNode): + type = 'decorator' + __slots__ = () + + +class ClassOrFunc(Scope): + __slots__ = () + + @property + def name(self): + """ + Returns the `Name` leaf that defines the function or class name. + """ + return self.children[1] + + def get_decorators(self): + """ + :rtype: list of :class:`Decorator` + """ + decorated = self.parent + if decorated.type == 'async_funcdef': + decorated = decorated.parent + + if decorated.type == 'decorated': + if decorated.children[0].type == 'decorators': + return decorated.children[0].children + else: + return decorated.children[:1] + else: + return [] + + +class Class(ClassOrFunc): + """ + Used to store the parsed contents of a python class. + """ + type = 'classdef' + __slots__ = () + + def __init__(self, children): + super().__init__(children) + + def get_super_arglist(self): + """ + Returns the `arglist` node that defines the super classes. It returns + None if there are no arguments. + """ + if self.children[2] != '(': # Has no parentheses + return None + else: + if self.children[3] == ')': # Empty parentheses + return None + else: + return self.children[3] + + +def _create_params(parent, argslist_list): + """ + `argslist_list` is a list that can contain an argslist as a first item, but + most not. It's basically the items between the parameter brackets (which is + at most one item). + This function modifies the parser structure. It generates `Param` objects + from the normal ast. Those param objects do not exist in a normal ast, but + make the evaluation of the ast tree so much easier. + You could also say that this function replaces the argslist node with a + list of Param objects. + """ + try: + first = argslist_list[0] + except IndexError: + return [] + + if first.type in ('name', 'fpdef'): + return [Param([first], parent)] + elif first == '*': + return [first] + else: # argslist is a `typedargslist` or a `varargslist`. + if first.type == 'tfpdef': + children = [first] + else: + children = first.children + new_children = [] + start = 0 + # Start with offset 1, because the end is higher. + for end, child in enumerate(children + [None], 1): + if child is None or child == ',': + param_children = children[start:end] + if param_children: # Could as well be comma and then end. + if param_children[0] == '*' \ + and (len(param_children) == 1 + or param_children[1] == ',') \ + or param_children[0] == '/': + for p in param_children: + p.parent = parent + new_children += param_children + else: + new_children.append(Param(param_children, parent)) + start = end + return new_children + + +class Function(ClassOrFunc): + """ + Used to store the parsed contents of a python function. + + Children:: + + 0. <Keyword: def> + 1. <Name> + 2. parameter list (including open-paren and close-paren <Operator>s) + 3. or 5. <Operator: :> + 4. or 6. Node() representing function body + 3. -> (if annotation is also present) + 4. annotation (if present) + """ + type = 'funcdef' + + def __init__(self, children): + super().__init__(children) + parameters = self.children[2] # After `def foo` + parameters_children = parameters.children[1:-1] + # If input parameters list already has Param objects, keep it as is; + # otherwise, convert it to a list of Param objects. + if not any(isinstance(child, Param) for child in parameters_children): + parameters.children[1:-1] = _create_params(parameters, parameters_children) + + def _get_param_nodes(self): + return self.children[2].children + + def get_params(self): + """ + Returns a list of `Param()`. + """ + return [p for p in self._get_param_nodes() if p.type == 'param'] + + @property + def name(self): + return self.children[1] # First token after `def` + + def iter_yield_exprs(self): + """ + Returns a generator of `yield_expr`. + """ + def scan(children): + for element in children: + if element.type in ('classdef', 'funcdef', 'lambdef'): + continue + + try: + nested_children = element.children + except AttributeError: + if element.value == 'yield': + if element.parent.type == 'yield_expr': + yield element.parent + else: + yield element + else: + yield from scan(nested_children) + + return scan(self.children) + + def iter_return_stmts(self): + """ + Returns a generator of `return_stmt`. + """ + def scan(children): + for element in children: + if element.type == 'return_stmt' \ + or element.type == 'keyword' and element.value == 'return': + yield element + if element.type in _RETURN_STMT_CONTAINERS: + yield from scan(element.children) + + return scan(self.children) + + def iter_raise_stmts(self): + """ + Returns a generator of `raise_stmt`. Includes raise statements inside try-except blocks + """ + def scan(children): + for element in children: + if element.type == 'raise_stmt' \ + or element.type == 'keyword' and element.value == 'raise': + yield element + if element.type in _RETURN_STMT_CONTAINERS: + yield from scan(element.children) + + return scan(self.children) + + def is_generator(self): + """ + :return bool: Checks if a function is a generator or not. + """ + return next(self.iter_yield_exprs(), None) is not None + + @property + def annotation(self): + """ + Returns the test node after `->` or `None` if there is no annotation. + """ + try: + if self.children[3] == "->": + return self.children[4] + assert self.children[3] == ":" + return None + except IndexError: + return None + + +class Lambda(Function): + """ + Lambdas are basically trimmed functions, so give it the same interface. + + Children:: + + 0. <Keyword: lambda> + *. <Param x> for each argument x + -2. <Operator: :> + -1. Node() representing body + """ + type = 'lambdef' + __slots__ = () + + def __init__(self, children): + # We don't want to call the Function constructor, call its parent. + super(Function, self).__init__(children) + # Everything between `lambda` and the `:` operator is a parameter. + parameters_children = self.children[1:-2] + # If input children list already has Param objects, keep it as is; + # otherwise, convert it to a list of Param objects. + if not any(isinstance(child, Param) for child in parameters_children): + self.children[1:-2] = _create_params(self, parameters_children) + + @property + def name(self): + """ + Raises an AttributeError. Lambdas don't have a defined name. + """ + raise AttributeError("lambda is not named.") + + def _get_param_nodes(self): + return self.children[1:-2] + + @property + def annotation(self): + """ + Returns `None`, lambdas don't have annotations. + """ + return None + + def __repr__(self): + return "<%s@%s>" % (self.__class__.__name__, self.start_pos) + + +class Flow(PythonBaseNode): + __slots__ = () + + +class IfStmt(Flow): + type = 'if_stmt' + __slots__ = () + + def get_test_nodes(self): + """ + E.g. returns all the `test` nodes that are named as x, below: + + if x: + pass + elif x: + pass + """ + for i, c in enumerate(self.children): + if c in ('elif', 'if'): + yield self.children[i + 1] + + def get_corresponding_test_node(self, node): + """ + Searches for the branch in which the node is and returns the + corresponding test node (see function above). However if the node is in + the test node itself and not in the suite return None. + """ + start_pos = node.start_pos + for check_node in reversed(list(self.get_test_nodes())): + if check_node.start_pos < start_pos: + if start_pos < check_node.end_pos: + return None + # In this case the node is within the check_node itself, + # not in the suite + else: + return check_node + + def is_node_after_else(self, node): + """ + Checks if a node is defined after `else`. + """ + for c in self.children: + if c == 'else': + if node.start_pos > c.start_pos: + return True + else: + return False + + +class WhileStmt(Flow): + type = 'while_stmt' + __slots__ = () + + +class ForStmt(Flow): + type = 'for_stmt' + __slots__ = () + + def get_testlist(self): + """ + Returns the input node ``y`` from: ``for x in y:``. + """ + return self.children[3] + + def get_defined_names(self, include_setitem=False): + return _defined_names(self.children[1], include_setitem) + + +class TryStmt(Flow): + type = 'try_stmt' + __slots__ = () + + def get_except_clause_tests(self): + """ + Returns the ``test`` nodes found in ``except_clause`` nodes. + Returns ``[None]`` for except clauses without an exception given. + """ + for node in self.children: + if node.type == 'except_clause': + yield node.children[1] + elif node == 'except': + yield None + + +class WithStmt(Flow): + type = 'with_stmt' + __slots__ = () + + def get_defined_names(self, include_setitem=False): + """ + Returns the a list of `Name` that the with statement defines. The + defined names are set after `as`. + """ + names = [] + for with_item in self.children[1:-2:2]: + # Check with items for 'as' names. + if with_item.type == 'with_item': + names += _defined_names(with_item.children[2], include_setitem) + return names + + def get_test_node_from_name(self, name): + node = name.search_ancestor("with_item") + if node is None: + raise ValueError('The name is not actually part of a with statement.') + return node.children[0] + + +class Import(PythonBaseNode): + __slots__ = () + + def get_path_for_name(self, name): + """ + The path is the list of names that leads to the searched name. + + :return list of Name: + """ + try: + # The name may be an alias. If it is, just map it back to the name. + name = self._aliases()[name] + except KeyError: + pass + + for path in self.get_paths(): + if name in path: + return path[:path.index(name) + 1] + raise ValueError('Name should be defined in the import itself') + + def is_nested(self): + return False # By default, sub classes may overwrite this behavior + + def is_star_import(self): + return self.children[-1] == '*' + + +class ImportFrom(Import): + type = 'import_from' + __slots__ = () + + def get_defined_names(self, include_setitem=False): + """ + Returns the a list of `Name` that the import defines. The + defined names are set after `import` or in case an alias - `as` - is + present that name is returned. + """ + return [alias or name for name, alias in self._as_name_tuples()] + + def _aliases(self): + """Mapping from alias to its corresponding name.""" + return dict((alias, name) for name, alias in self._as_name_tuples() + if alias is not None) + + def get_from_names(self): + for n in self.children[1:]: + if n not in ('.', '...'): + break + if n.type == 'dotted_name': # from x.y import + return n.children[::2] + elif n == 'import': # from . import + return [] + else: # from x import + return [n] + + @property + def level(self): + """The level parameter of ``__import__``.""" + level = 0 + for n in self.children[1:]: + if n in ('.', '...'): + level += len(n.value) + else: + break + return level + + def _as_name_tuples(self): + last = self.children[-1] + if last == ')': + last = self.children[-2] + elif last == '*': + return # No names defined directly. + + if last.type == 'import_as_names': + as_names = last.children[::2] + else: + as_names = [last] + for as_name in as_names: + if as_name.type == 'name': + yield as_name, None + else: + yield as_name.children[::2] # yields x, y -> ``x as y`` + + def get_paths(self): + """ + The import paths defined in an import statement. Typically an array + like this: ``[<Name: datetime>, <Name: date>]``. + + :return list of list of Name: + """ + dotted = self.get_from_names() + + if self.children[-1] == '*': + return [dotted] + return [dotted + [name] for name, alias in self._as_name_tuples()] + + +class ImportName(Import): + """For ``import_name`` nodes. Covers normal imports without ``from``.""" + type = 'import_name' + __slots__ = () + + def get_defined_names(self, include_setitem=False): + """ + Returns the a list of `Name` that the import defines. The defined names + is always the first name after `import` or in case an alias - `as` - is + present that name is returned. + """ + return [alias or path[0] for path, alias in self._dotted_as_names()] + + @property + def level(self): + """The level parameter of ``__import__``.""" + return 0 # Obviously 0 for imports without from. + + def get_paths(self): + return [path for path, alias in self._dotted_as_names()] + + def _dotted_as_names(self): + """Generator of (list(path), alias) where alias may be None.""" + dotted_as_names = self.children[1] + if dotted_as_names.type == 'dotted_as_names': + as_names = dotted_as_names.children[::2] + else: + as_names = [dotted_as_names] + + for as_name in as_names: + if as_name.type == 'dotted_as_name': + alias = as_name.children[2] + as_name = as_name.children[0] + else: + alias = None + if as_name.type == 'name': + yield [as_name], alias + else: + # dotted_names + yield as_name.children[::2], alias + + def is_nested(self): + """ + This checks for the special case of nested imports, without aliases and + from statement:: + + import foo.bar + """ + return bool([1 for path, alias in self._dotted_as_names() + if alias is None and len(path) > 1]) + + def _aliases(self): + """ + :return list of Name: Returns all the alias + """ + return dict((alias, path[-1]) for path, alias in self._dotted_as_names() + if alias is not None) + + +class KeywordStatement(PythonBaseNode): + """ + For the following statements: `assert`, `del`, `global`, `nonlocal`, + `raise`, `return`, `yield`. + + `pass`, `continue` and `break` are not in there, because they are just + simple keywords and the parser reduces it to a keyword. + """ + __slots__ = () + + @property + def type(self): + """ + Keyword statements start with the keyword and end with `_stmt`. You can + crosscheck this with the Python grammar. + """ + return '%s_stmt' % self.keyword + + @property + def keyword(self): + return self.children[0].value + + def get_defined_names(self, include_setitem=False): + keyword = self.keyword + if keyword == 'del': + return _defined_names(self.children[1], include_setitem) + if keyword in ('global', 'nonlocal'): + return self.children[1::2] + return [] + + +class AssertStmt(KeywordStatement): + __slots__ = () + + @property + def assertion(self): + return self.children[1] + + +class GlobalStmt(KeywordStatement): + __slots__ = () + + def get_global_names(self): + return self.children[1::2] + + +class ReturnStmt(KeywordStatement): + __slots__ = () + + +class YieldExpr(PythonBaseNode): + type = 'yield_expr' + __slots__ = () + + +def _defined_names(current, include_setitem): + """ + A helper function to find the defined names in statements, for loops and + list comprehensions. + """ + names = [] + if current.type in ('testlist_star_expr', 'testlist_comp', 'exprlist', 'testlist'): + for child in current.children[::2]: + names += _defined_names(child, include_setitem) + elif current.type in ('atom', 'star_expr'): + names += _defined_names(current.children[1], include_setitem) + elif current.type in ('power', 'atom_expr'): + if current.children[-2] != '**': # Just if there's no operation + trailer = current.children[-1] + if trailer.children[0] == '.': + names.append(trailer.children[1]) + elif trailer.children[0] == '[' and include_setitem: + for node in current.children[-2::-1]: + if node.type == 'trailer': + names.append(node.children[1]) + break + if node.type == 'name': + names.append(node) + break + else: + names.append(current) + return names + + +class ExprStmt(PythonBaseNode, DocstringMixin): + type = 'expr_stmt' + __slots__ = () + + def get_defined_names(self, include_setitem=False): + """ + Returns a list of `Name` defined before the `=` sign. + """ + names = [] + if self.children[1].type == 'annassign': + names = _defined_names(self.children[0], include_setitem) + return [ + name + for i in range(0, len(self.children) - 2, 2) + if '=' in self.children[i + 1].value + for name in _defined_names(self.children[i], include_setitem) + ] + names + + def get_rhs(self): + """Returns the right-hand-side of the equals.""" + node = self.children[-1] + if node.type == 'annassign': + if len(node.children) == 4: + node = node.children[3] + else: + node = node.children[1] + return node + + def yield_operators(self): + """ + Returns a generator of `+=`, `=`, etc. or None if there is no operation. + """ + first = self.children[1] + if first.type == 'annassign': + if len(first.children) <= 2: + return # No operator is available, it's just PEP 484. + + first = first.children[2] + yield first + + yield from self.children[3::2] + + +class NamedExpr(PythonBaseNode): + type = 'namedexpr_test' + + def get_defined_names(self, include_setitem=False): + return _defined_names(self.children[0], include_setitem) + + +class Param(PythonBaseNode): + """ + It's a helper class that makes business logic with params much easier. The + Python grammar defines no ``param`` node. It defines it in a different way + that is not really suited to working with parameters. + """ + type = 'param' + + def __init__(self, children, parent=None): + super().__init__(children) + self.parent = parent + + @property + def star_count(self): + """ + Is `0` in case of `foo`, `1` in case of `*foo` or `2` in case of + `**foo`. + """ + first = self.children[0] + if first in ('*', '**'): + return len(first.value) + return 0 + + @property + def default(self): + """ + The default is the test node that appears after the `=`. Is `None` in + case no default is present. + """ + has_comma = self.children[-1] == ',' + try: + if self.children[-2 - int(has_comma)] == '=': + return self.children[-1 - int(has_comma)] + except IndexError: + return None + + @property + def annotation(self): + """ + The default is the test node that appears after `:`. Is `None` in case + no annotation is present. + """ + tfpdef = self._tfpdef() + if tfpdef.type == 'tfpdef': + assert tfpdef.children[1] == ":" + assert len(tfpdef.children) == 3 + annotation = tfpdef.children[2] + return annotation + else: + return None + + def _tfpdef(self): + """ + tfpdef: see e.g. grammar36.txt. + """ + offset = int(self.children[0] in ('*', '**')) + return self.children[offset] + + @property + def name(self): + """ + The `Name` leaf of the param. + """ + if self._tfpdef().type == 'tfpdef': + return self._tfpdef().children[0] + else: + return self._tfpdef() + + def get_defined_names(self, include_setitem=False): + return [self.name] + + @property + def position_index(self): + """ + Property for the positional index of a paramter. + """ + index = self.parent.children.index(self) + try: + keyword_only_index = self.parent.children.index('*') + if index > keyword_only_index: + # Skip the ` *, ` + index -= 2 + except ValueError: + pass + try: + keyword_only_index = self.parent.children.index('/') + if index > keyword_only_index: + # Skip the ` /, ` + index -= 2 + except ValueError: + pass + return index - 1 + + def get_parent_function(self): + """ + Returns the function/lambda of a parameter. + """ + return self.search_ancestor('funcdef', 'lambdef') + + def get_code(self, include_prefix=True, include_comma=True): + """ + Like all the other get_code functions, but includes the param + `include_comma`. + + :param include_comma bool: If enabled includes the comma in the string output. + """ + if include_comma: + return super().get_code(include_prefix) + + children = self.children + if children[-1] == ',': + children = children[:-1] + return self._get_code_for_children( + children, + include_prefix=include_prefix + ) + + def __repr__(self): + default = '' if self.default is None else '=%s' % self.default.get_code() + return '<%s: %s>' % (type(self).__name__, str(self._tfpdef()) + default) + + +class SyncCompFor(PythonBaseNode): + type = 'sync_comp_for' + __slots__ = () + + def get_defined_names(self, include_setitem=False): + """ + Returns the a list of `Name` that the comprehension defines. + """ + # allow async for + return _defined_names(self.children[1], include_setitem) + + +# This is simply here so an older Jedi version can work with this new parso +# version. Can be deleted in the next release. +CompFor = SyncCompFor + + +class UsedNamesMapping(Mapping): + """ + This class exists for the sole purpose of creating an immutable dict. + """ + def __init__(self, dct): + self._dict = dct + + def __getitem__(self, key): + return self._dict[key] + + def __len__(self): + return len(self._dict) + + def __iter__(self): + return iter(self._dict) + + def __hash__(self): + return id(self) + + def __eq__(self, other): + # Comparing these dicts does not make sense. + return self is other diff --git a/contrib/python/parso/py3/parso/tree.py b/contrib/python/parso/py3/parso/tree.py new file mode 100644 index 0000000000..e5298711ea --- /dev/null +++ b/contrib/python/parso/py3/parso/tree.py @@ -0,0 +1,488 @@ +from abc import abstractmethod, abstractproperty +from typing import List, Optional, Tuple, Union + +from parso.utils import split_lines + + +def search_ancestor(node: 'NodeOrLeaf', *node_types: str) -> 'Optional[BaseNode]': + """ + Recursively looks at the parents of a node and returns the first found node + that matches ``node_types``. Returns ``None`` if no matching node is found. + + This function is deprecated, use :meth:`NodeOrLeaf.search_ancestor` instead. + + :param node: The ancestors of this node will be checked. + :param node_types: type names that are searched for. + """ + n = node.parent + while n is not None: + if n.type in node_types: + return n + n = n.parent + return None + + +class NodeOrLeaf: + """ + The base class for nodes and leaves. + """ + __slots__ = ('parent',) + type: str + ''' + The type is a string that typically matches the types of the grammar file. + ''' + parent: 'Optional[BaseNode]' + ''' + The parent :class:`BaseNode` of this node or leaf. + None if this is the root node. + ''' + + def get_root_node(self): + """ + Returns the root node of a parser tree. The returned node doesn't have + a parent node like all the other nodes/leaves. + """ + scope = self + while scope.parent is not None: + scope = scope.parent + return scope + + def get_next_sibling(self): + """ + Returns the node immediately following this node in this parent's + children list. If this node does not have a next sibling, it is None + """ + parent = self.parent + if parent is None: + return None + + # Can't use index(); we need to test by identity + for i, child in enumerate(parent.children): + if child is self: + try: + return self.parent.children[i + 1] + except IndexError: + return None + + def get_previous_sibling(self): + """ + Returns the node immediately preceding this node in this parent's + children list. If this node does not have a previous sibling, it is + None. + """ + parent = self.parent + if parent is None: + return None + + # Can't use index(); we need to test by identity + for i, child in enumerate(parent.children): + if child is self: + if i == 0: + return None + return self.parent.children[i - 1] + + def get_previous_leaf(self): + """ + Returns the previous leaf in the parser tree. + Returns `None` if this is the first element in the parser tree. + """ + if self.parent is None: + return None + + node = self + while True: + c = node.parent.children + i = c.index(node) + if i == 0: + node = node.parent + if node.parent is None: + return None + else: + node = c[i - 1] + break + + while True: + try: + node = node.children[-1] + except AttributeError: # A Leaf doesn't have children. + return node + + def get_next_leaf(self): + """ + Returns the next leaf in the parser tree. + Returns None if this is the last element in the parser tree. + """ + if self.parent is None: + return None + + node = self + while True: + c = node.parent.children + i = c.index(node) + if i == len(c) - 1: + node = node.parent + if node.parent is None: + return None + else: + node = c[i + 1] + break + + while True: + try: + node = node.children[0] + except AttributeError: # A Leaf doesn't have children. + return node + + @abstractproperty + def start_pos(self) -> Tuple[int, int]: + """ + Returns the starting position of the prefix as a tuple, e.g. `(3, 4)`. + + :return tuple of int: (line, column) + """ + + @abstractproperty + def end_pos(self) -> Tuple[int, int]: + """ + Returns the end position of the prefix as a tuple, e.g. `(3, 4)`. + + :return tuple of int: (line, column) + """ + + @abstractmethod + def get_start_pos_of_prefix(self): + """ + Returns the start_pos of the prefix. This means basically it returns + the end_pos of the last prefix. The `get_start_pos_of_prefix()` of the + prefix `+` in `2 + 1` would be `(1, 1)`, while the start_pos is + `(1, 2)`. + + :return tuple of int: (line, column) + """ + + @abstractmethod + def get_first_leaf(self): + """ + Returns the first leaf of a node or itself if this is a leaf. + """ + + @abstractmethod + def get_last_leaf(self): + """ + Returns the last leaf of a node or itself if this is a leaf. + """ + + @abstractmethod + def get_code(self, include_prefix=True): + """ + Returns the code that was the input for the parser for this node. + + :param include_prefix: Removes the prefix (whitespace and comments) of + e.g. a statement. + """ + + def search_ancestor(self, *node_types: str) -> 'Optional[BaseNode]': + """ + Recursively looks at the parents of this node or leaf and returns the + first found node that matches ``node_types``. Returns ``None`` if no + matching node is found. + + :param node_types: type names that are searched for. + """ + node = self.parent + while node is not None: + if node.type in node_types: + return node + node = node.parent + return None + + def dump(self, *, indent: Optional[Union[int, str]] = 4) -> str: + """ + Returns a formatted dump of the parser tree rooted at this node or leaf. This is + mainly useful for debugging purposes. + + The ``indent`` parameter is interpreted in a similar way as :py:func:`ast.dump`. + If ``indent`` is a non-negative integer or string, then the tree will be + pretty-printed with that indent level. An indent level of 0, negative, or ``""`` + will only insert newlines. ``None`` selects the single line representation. + Using a positive integer indent indents that many spaces per level. If + ``indent`` is a string (such as ``"\\t"``), that string is used to indent each + level. + + :param indent: Indentation style as described above. The default indentation is + 4 spaces, which yields a pretty-printed dump. + + >>> import parso + >>> print(parso.parse("lambda x, y: x + y").dump()) + Module([ + Lambda([ + Keyword('lambda', (1, 0)), + Param([ + Name('x', (1, 7), prefix=' '), + Operator(',', (1, 8)), + ]), + Param([ + Name('y', (1, 10), prefix=' '), + ]), + Operator(':', (1, 11)), + PythonNode('arith_expr', [ + Name('x', (1, 13), prefix=' '), + Operator('+', (1, 15), prefix=' '), + Name('y', (1, 17), prefix=' '), + ]), + ]), + EndMarker('', (1, 18)), + ]) + """ + if indent is None: + newline = False + indent_string = '' + elif isinstance(indent, int): + newline = True + indent_string = ' ' * indent + elif isinstance(indent, str): + newline = True + indent_string = indent + else: + raise TypeError(f"expect 'indent' to be int, str or None, got {indent!r}") + + def _format_dump(node: NodeOrLeaf, indent: str = '', top_level: bool = True) -> str: + result = '' + node_type = type(node).__name__ + if isinstance(node, Leaf): + result += f'{indent}{node_type}(' + if isinstance(node, ErrorLeaf): + result += f'{node.token_type!r}, ' + elif isinstance(node, TypedLeaf): + result += f'{node.type!r}, ' + result += f'{node.value!r}, {node.start_pos!r}' + if node.prefix: + result += f', prefix={node.prefix!r}' + result += ')' + elif isinstance(node, BaseNode): + result += f'{indent}{node_type}(' + if isinstance(node, Node): + result += f'{node.type!r}, ' + result += '[' + if newline: + result += '\n' + for child in node.children: + result += _format_dump(child, indent=indent + indent_string, top_level=False) + result += f'{indent}])' + else: # pragma: no cover + # We shouldn't ever reach here, unless: + # - `NodeOrLeaf` is incorrectly subclassed else where + # - or a node's children list contains invalid nodes or leafs + # Both are unexpected internal errors. + raise TypeError(f'unsupported node encountered: {node!r}') + if not top_level: + if newline: + result += ',\n' + else: + result += ', ' + return result + + return _format_dump(self) + + +class Leaf(NodeOrLeaf): + ''' + Leafs are basically tokens with a better API. Leafs exactly know where they + were defined and what text preceeds them. + ''' + __slots__ = ('value', 'line', 'column', 'prefix') + prefix: str + + def __init__(self, value: str, start_pos: Tuple[int, int], prefix: str = '') -> None: + self.value = value + ''' + :py:func:`str` The value of the current token. + ''' + self.start_pos = start_pos + self.prefix = prefix + ''' + :py:func:`str` Typically a mixture of whitespace and comments. Stuff + that is syntactically irrelevant for the syntax tree. + ''' + self.parent: Optional[BaseNode] = None + ''' + The parent :class:`BaseNode` of this leaf. + ''' + + @property + def start_pos(self) -> Tuple[int, int]: + return self.line, self.column + + @start_pos.setter + def start_pos(self, value: Tuple[int, int]) -> None: + self.line = value[0] + self.column = value[1] + + def get_start_pos_of_prefix(self): + previous_leaf = self.get_previous_leaf() + if previous_leaf is None: + lines = split_lines(self.prefix) + # + 1 is needed because split_lines always returns at least ['']. + return self.line - len(lines) + 1, 0 # It's the first leaf. + return previous_leaf.end_pos + + def get_first_leaf(self): + return self + + def get_last_leaf(self): + return self + + def get_code(self, include_prefix=True): + if include_prefix: + return self.prefix + self.value + else: + return self.value + + @property + def end_pos(self) -> Tuple[int, int]: + lines = split_lines(self.value) + end_pos_line = self.line + len(lines) - 1 + # Check for multiline token + if self.line == end_pos_line: + end_pos_column = self.column + len(lines[-1]) + else: + end_pos_column = len(lines[-1]) + return end_pos_line, end_pos_column + + def __repr__(self): + value = self.value + if not value: + value = self.type + return "<%s: %s>" % (type(self).__name__, value) + + +class TypedLeaf(Leaf): + __slots__ = ('type',) + + def __init__(self, type, value, start_pos, prefix=''): + super().__init__(value, start_pos, prefix) + self.type = type + + +class BaseNode(NodeOrLeaf): + """ + The super class for all nodes. + A node has children, a type and possibly a parent node. + """ + __slots__ = ('children',) + + def __init__(self, children: List[NodeOrLeaf]) -> None: + self.children = children + """ + A list of :class:`NodeOrLeaf` child nodes. + """ + self.parent: Optional[BaseNode] = None + ''' + The parent :class:`BaseNode` of this node. + None if this is the root node. + ''' + for child in children: + child.parent = self + + @property + def start_pos(self) -> Tuple[int, int]: + return self.children[0].start_pos + + def get_start_pos_of_prefix(self): + return self.children[0].get_start_pos_of_prefix() + + @property + def end_pos(self) -> Tuple[int, int]: + return self.children[-1].end_pos + + def _get_code_for_children(self, children, include_prefix): + if include_prefix: + return "".join(c.get_code() for c in children) + else: + first = children[0].get_code(include_prefix=False) + return first + "".join(c.get_code() for c in children[1:]) + + def get_code(self, include_prefix=True): + return self._get_code_for_children(self.children, include_prefix) + + def get_leaf_for_position(self, position, include_prefixes=False): + """ + Get the :py:class:`parso.tree.Leaf` at ``position`` + + :param tuple position: A position tuple, row, column. Rows start from 1 + :param bool include_prefixes: If ``False``, ``None`` will be returned if ``position`` falls + on whitespace or comments before a leaf + :return: :py:class:`parso.tree.Leaf` at ``position``, or ``None`` + """ + def binary_search(lower, upper): + if lower == upper: + element = self.children[lower] + if not include_prefixes and position < element.start_pos: + # We're on a prefix. + return None + # In case we have prefixes, a leaf always matches + try: + return element.get_leaf_for_position(position, include_prefixes) + except AttributeError: + return element + + index = int((lower + upper) / 2) + element = self.children[index] + if position <= element.end_pos: + return binary_search(lower, index) + else: + return binary_search(index + 1, upper) + + if not ((1, 0) <= position <= self.children[-1].end_pos): + raise ValueError('Please provide a position that exists within this node.') + return binary_search(0, len(self.children) - 1) + + def get_first_leaf(self): + return self.children[0].get_first_leaf() + + def get_last_leaf(self): + return self.children[-1].get_last_leaf() + + def __repr__(self): + code = self.get_code().replace('\n', ' ').replace('\r', ' ').strip() + return "<%s: %s@%s,%s>" % \ + (type(self).__name__, code, self.start_pos[0], self.start_pos[1]) + + +class Node(BaseNode): + """Concrete implementation for interior nodes.""" + __slots__ = ('type',) + + def __init__(self, type, children): + super().__init__(children) + self.type = type + + def __repr__(self): + return "%s(%s, %r)" % (self.__class__.__name__, self.type, self.children) + + +class ErrorNode(BaseNode): + """ + A node that contains valid nodes/leaves that we're follow by a token that + was invalid. This basically means that the leaf after this node is where + Python would mark a syntax error. + """ + __slots__ = () + type = 'error_node' + + +class ErrorLeaf(Leaf): + """ + A leaf that is either completely invalid in a language (like `$` in Python) + or is invalid at that position. Like the star in `1 +* 1`. + """ + __slots__ = ('token_type',) + type = 'error_leaf' + + def __init__(self, token_type, value, start_pos, prefix=''): + super().__init__(value, start_pos, prefix) + self.token_type = token_type + + def __repr__(self): + return "<%s: %s:%s, %s>" % \ + (type(self).__name__, self.token_type, repr(self.value), self.start_pos) diff --git a/contrib/python/parso/py3/parso/utils.py b/contrib/python/parso/py3/parso/utils.py new file mode 100644 index 0000000000..6ad1ea26ad --- /dev/null +++ b/contrib/python/parso/py3/parso/utils.py @@ -0,0 +1,194 @@ +import re +import sys +from ast import literal_eval +from functools import total_ordering +from typing import NamedTuple, Sequence, Union + +# The following is a list in Python that are line breaks in str.splitlines, but +# not in Python. In Python only \r (Carriage Return, 0xD) and \n (Line Feed, +# 0xA) are allowed to split lines. +_NON_LINE_BREAKS = ( + '\v', # Vertical Tabulation 0xB + '\f', # Form Feed 0xC + '\x1C', # File Separator + '\x1D', # Group Separator + '\x1E', # Record Separator + '\x85', # Next Line (NEL - Equivalent to CR+LF. + # Used to mark end-of-line on some IBM mainframes.) + '\u2028', # Line Separator + '\u2029', # Paragraph Separator +) + + +class Version(NamedTuple): + major: int + minor: int + micro: int + + +def split_lines(string: str, keepends: bool = False) -> Sequence[str]: + r""" + Intended for Python code. In contrast to Python's :py:meth:`str.splitlines`, + looks at form feeds and other special characters as normal text. Just + splits ``\n`` and ``\r\n``. + Also different: Returns ``[""]`` for an empty string input. + + In Python 2.7 form feeds are used as normal characters when using + str.splitlines. However in Python 3 somewhere there was a decision to split + also on form feeds. + """ + if keepends: + lst = string.splitlines(True) + + # We have to merge lines that were broken by form feed characters. + merge = [] + for i, line in enumerate(lst): + try: + last_chr = line[-1] + except IndexError: + pass + else: + if last_chr in _NON_LINE_BREAKS: + merge.append(i) + + for index in reversed(merge): + try: + lst[index] = lst[index] + lst[index + 1] + del lst[index + 1] + except IndexError: + # index + 1 can be empty and therefore there's no need to + # merge. + pass + + # The stdlib's implementation of the end is inconsistent when calling + # it with/without keepends. One time there's an empty string in the + # end, one time there's none. + if string.endswith('\n') or string.endswith('\r') or string == '': + lst.append('') + return lst + else: + return re.split(r'\n|\r\n|\r', string) + + +def python_bytes_to_unicode( + source: Union[str, bytes], encoding: str = 'utf-8', errors: str = 'strict' +) -> str: + """ + Checks for unicode BOMs and PEP 263 encoding declarations. Then returns a + unicode object like in :py:meth:`bytes.decode`. + + :param encoding: See :py:meth:`bytes.decode` documentation. + :param errors: See :py:meth:`bytes.decode` documentation. ``errors`` can be + ``'strict'``, ``'replace'`` or ``'ignore'``. + """ + def detect_encoding(): + """ + For the implementation of encoding definitions in Python, look at: + - http://www.python.org/dev/peps/pep-0263/ + - http://docs.python.org/2/reference/lexical_analysis.html#encoding-declarations + """ + byte_mark = literal_eval(r"b'\xef\xbb\xbf'") + if source.startswith(byte_mark): + # UTF-8 byte-order mark + return 'utf-8' + + first_two_lines = re.match(br'(?:[^\r\n]*(?:\r\n|\r|\n)){0,2}', source).group(0) + possible_encoding = re.search(br"coding[=:]\s*([-\w.]+)", + first_two_lines) + if possible_encoding: + e = possible_encoding.group(1) + if not isinstance(e, str): + e = str(e, 'ascii', 'replace') + return e + else: + # the default if nothing else has been set -> PEP 263 + return encoding + + if isinstance(source, str): + # only cast str/bytes + return source + + encoding = detect_encoding() + try: + # Cast to unicode + return str(source, encoding, errors) + except LookupError: + if errors == 'replace': + # This is a weird case that can happen if the given encoding is not + # a valid encoding. This usually shouldn't happen with provided + # encodings, but can happen if somebody uses encoding declarations + # like `# coding: foo-8`. + return str(source, 'utf-8', errors) + raise + + +def version_info() -> Version: + """ + Returns a namedtuple of parso's version, similar to Python's + ``sys.version_info``. + """ + from parso import __version__ + tupl = re.findall(r'[a-z]+|\d+', __version__) + return Version(*[x if i == 3 else int(x) for i, x in enumerate(tupl)]) + + +class _PythonVersionInfo(NamedTuple): + major: int + minor: int + + +@total_ordering +class PythonVersionInfo(_PythonVersionInfo): + def __gt__(self, other): + if isinstance(other, tuple): + if len(other) != 2: + raise ValueError("Can only compare to tuples of length 2.") + return (self.major, self.minor) > other + super().__gt__(other) + + return (self.major, self.minor) + + def __eq__(self, other): + if isinstance(other, tuple): + if len(other) != 2: + raise ValueError("Can only compare to tuples of length 2.") + return (self.major, self.minor) == other + super().__eq__(other) + + def __ne__(self, other): + return not self.__eq__(other) + + +def _parse_version(version) -> PythonVersionInfo: + match = re.match(r'(\d+)(?:\.(\d{1,2})(?:\.\d+)?)?((a|b|rc)\d)?$', version) + if match is None: + raise ValueError('The given version is not in the right format. ' + 'Use something like "3.8" or "3".') + + major = int(match.group(1)) + minor = match.group(2) + if minor is None: + # Use the latest Python in case it's not exactly defined, because the + # grammars are typically backwards compatible? + if major == 2: + minor = "7" + elif major == 3: + minor = "6" + else: + raise NotImplementedError("Sorry, no support yet for those fancy new/old versions.") + minor = int(minor) + return PythonVersionInfo(major, minor) + + +def parse_version_string(version: str = None) -> PythonVersionInfo: + """ + Checks for a valid version number (e.g. `3.8` or `3.10.1` or `3`) and + returns a corresponding version info that is always two characters long in + decimal. + """ + if version is None: + version = '%s.%s' % sys.version_info[:2] + if not isinstance(version, str): + raise TypeError('version must be a string like "3.8"') + + return _parse_version(version) diff --git a/contrib/python/parso/py3/tests/__init__.py b/contrib/python/parso/py3/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/contrib/python/parso/py3/tests/__init__.py diff --git a/contrib/python/parso/py3/tests/conftest.py b/contrib/python/parso/py3/tests/conftest.py new file mode 100644 index 0000000000..7ac062f433 --- /dev/null +++ b/contrib/python/parso/py3/tests/conftest.py @@ -0,0 +1,148 @@ +import re +import tempfile +import shutil +import logging +import os +from pathlib import Path + +import pytest +import yatest.common + +import parso +from parso import cache +from parso.utils import parse_version_string + +collect_ignore = ["setup.py"] + +_SUPPORTED_VERSIONS = '3.6', '3.7', '3.8', '3.9', '3.10' + + +@pytest.fixture(scope='session') +def clean_parso_cache(): + """ + Set the default cache directory to a temporary directory during tests. + + Note that you can't use built-in `tmpdir` and `monkeypatch` + fixture here because their scope is 'function', which is not used + in 'session' scope fixture. + + This fixture is activated in ../pytest.ini. + """ + old = cache._default_cache_path + tmp = tempfile.mkdtemp(prefix='parso-test-') + cache._default_cache_path = Path(tmp) + yield + cache._default_cache_path = old + shutil.rmtree(tmp) + + +def pytest_addoption(parser): + parser.addoption("--logging", "-L", action='store_true', + help="Enables the logging output.") + + +def pytest_generate_tests(metafunc): + if 'normalizer_issue_case' in metafunc.fixturenames: + base_dir = os.path.join(yatest.common.test_source_path(), 'normalizer_issue_files') + + cases = list(colllect_normalizer_tests(base_dir)) + metafunc.parametrize( + 'normalizer_issue_case', + cases, + ids=[c.name for c in cases] + ) + elif 'each_version' in metafunc.fixturenames: + metafunc.parametrize('each_version', _SUPPORTED_VERSIONS) + elif 'version_ge_py38' in metafunc.fixturenames: + ge38 = set(_SUPPORTED_VERSIONS) - {'3.6', '3.7'} + metafunc.parametrize('version_ge_py38', sorted(ge38)) + + +class NormalizerIssueCase: + """ + Static Analysis cases lie in the static_analysis folder. + The tests also start with `#!`, like the goto_definition tests. + """ + def __init__(self, path): + self.path = path + self.name = os.path.basename(path) + match = re.search(r'python([\d.]+)\.py', self.name) + self.python_version = match and match.group(1) + + +def colllect_normalizer_tests(base_dir): + for f_name in os.listdir(base_dir): + if f_name.endswith(".py"): + path = os.path.join(base_dir, f_name) + yield NormalizerIssueCase(path) + + +def pytest_configure(config): + if config.option.logging: + root = logging.getLogger() + root.setLevel(logging.DEBUG) + + #ch = logging.StreamHandler(sys.stdout) + #ch.setLevel(logging.DEBUG) + #formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + #ch.setFormatter(formatter) + + #root.addHandler(ch) + + +class Checker: + def __init__(self, version, is_passing): + self.version = version + self._is_passing = is_passing + self.grammar = parso.load_grammar(version=self.version) + + def parse(self, code): + if self._is_passing: + return parso.parse(code, version=self.version, error_recovery=False) + else: + self._invalid_syntax(code) + + def _invalid_syntax(self, code): + with pytest.raises(parso.ParserSyntaxError): + module = parso.parse(code, version=self.version, error_recovery=False) + # For debugging + print(module.children) + + def get_error(self, code): + errors = list(self.grammar.iter_errors(self.grammar.parse(code))) + assert bool(errors) != self._is_passing + if errors: + return errors[0] + + def get_error_message(self, code): + error = self.get_error(code) + if error is None: + return + return error.message + + def assert_no_error_in_passing(self, code): + if self._is_passing: + module = self.grammar.parse(code) + assert not list(self.grammar.iter_errors(module)) + + +@pytest.fixture +def works_not_in_py(each_version): + return Checker(each_version, False) + + +@pytest.fixture +def works_in_py(each_version): + return Checker(each_version, True) + + +@pytest.fixture +def works_ge_py38(each_version): + version_info = parse_version_string(each_version) + return Checker(each_version, version_info >= (3, 8)) + + +@pytest.fixture +def works_ge_py39(each_version): + version_info = parse_version_string(each_version) + return Checker(each_version, version_info >= (3, 9)) diff --git a/contrib/python/parso/py3/tests/failing_examples.py b/contrib/python/parso/py3/tests/failing_examples.py new file mode 100644 index 0000000000..09714d3902 --- /dev/null +++ b/contrib/python/parso/py3/tests/failing_examples.py @@ -0,0 +1,415 @@ +# -*- coding: utf-8 -*- +import sys +from textwrap import dedent + + +def indent(code): + lines = code.splitlines(True) + return ''.join([' ' * 2 + line for line in lines]) + + +def build_nested(code, depth, base='def f():\n'): + if depth == 0: + return code + + new_code = base + indent(code) + return build_nested(new_code, depth - 1, base=base) + + +FAILING_EXAMPLES = [ + '1 +', + '?', + 'continue', + 'break', + 'return', + 'yield', + + # SyntaxError from Python/ast.c + 'f(x for x in bar, 1)', + 'from foo import a,', + 'from __future__ import whatever', + 'from __future__ import braces', + 'from .__future__ import whatever', + 'def f(x=3, y): pass', + 'lambda x=3, y: x', + '__debug__ = 1', + 'with x() as __debug__: pass', + + '[]: int', + '[a, b]: int', + '(): int', + '(()): int', + '((())): int', + '{}: int', + 'True: int', + '(a, b): int', + '*star,: int', + 'a, b: int = 3', + 'foo(+a=3)', + 'f(lambda: 1=1)', + 'f(x=1, x=2)', + 'f(**x, y)', + 'f(x=2, y)', + 'f(**x, *y)', + 'f(**x, y=3, z)', + # augassign + 'a, b += 3', + '(a, b) += 3', + '[a, b] += 3', + '[a, 1] += 3', + 'f() += 1', + 'lambda x:None+=1', + '{} += 1', + '{a:b} += 1', + '{1} += 1', + '{*x} += 1', + '(x,) += 1', + '(x, y if a else q) += 1', + '[] += 1', + '[1,2] += 1', + '[] += 1', + 'None += 1', + '... += 1', + 'a > 1 += 1', + '"test" += 1', + '1 += 1', + '1.0 += 1', + '(yield) += 1', + '(yield from x) += 1', + '(x if x else y) += 1', + 'a() += 1', + 'a + b += 1', + '+a += 1', + 'a and b += 1', + '*a += 1', + 'a, b += 1', + 'f"xxx" += 1', + # All assignment tests + 'lambda a: 1 = 1', + '[x for x in y] = 1', + '{x for x in y} = 1', + '{x:x for x in y} = 1', + '(x for x in y) = 1', + 'None = 1', + '... = 1', + 'a == b = 1', + '{a, b} = 1', + '{a: b} = 1', + '1 = 1', + '"" = 1', + 'b"" = 1', + 'b"" = 1', + '"" "" = 1', + '1 | 1 = 3', + '1**1 = 3', + '~ 1 = 3', + 'not 1 = 3', + '1 and 1 = 3', + 'def foo(): (yield 1) = 3', + 'def foo(): x = yield 1 = 3', + 'async def foo(): await x = 3', + '(a if a else a) = a', + 'a, 1 = x', + 'foo() = 1', + # Cases without the equals but other assignments. + 'with x as foo(): pass', + 'del bar, 1', + 'for x, 1 in []: pass', + 'for (not 1) in []: pass', + '[x for 1 in y]', + '[x for a, 3 in y]', + '(x for 1 in y)', + '{x for 1 in y}', + '{x:x for 1 in y}', + # Unicode/Bytes issues. + r'u"\x"', + r'u"\"', + r'u"\u"', + r'u"""\U"""', + r'u"\Uffffffff"', + r"u'''\N{}'''", + r"u'\N{foo}'", + r'b"\x"', + r'b"\"', + 'b"ä"', + + '*a, *b = 3, 3', + 'async def foo(): yield from []', + 'yield from []', + '*a = 3', + 'del *a, b', + 'def x(*): pass', + '(%s *d) = x' % ('a,' * 256), + '{**{} for a in [1]}', + '(True,) = x', + '([False], a) = x', + 'def x(): from math import *', + + # invalid del statements + 'del x + y', + 'del x(y)', + 'async def foo(): del await x', + 'def foo(): del (yield x)', + 'del [x for x in range(10)]', + 'del *x', + 'del *x,', + 'del (*x,)', + 'del [*x]', + 'del x, *y', + 'del *x.y,', + 'del *x[y],', + 'del *x[y::], z', + 'del x, (y, *z)', + 'del (x, *[y, z])', + 'del [x, *(y, [*z])]', + 'del {}', + 'del {x}', + 'del {x, y}', + 'del {x, *y}', + + # invalid starred expressions + '*x', + '(*x)', + '((*x))', + '1 + (*x)', + '*x; 1', + '1; *x', + '1\n*x', + 'x = *y', + 'x: int = *y', + 'def foo(): return *x', + 'def foo(): yield *x', + 'f"{*x}"', + 'for *x in 1: pass', + '[1 for *x in 1]', + + # str/bytes combinations + '"s" b""', + '"s" b"" ""', + 'b"" "" b"" ""', + 'f"s" b""', + 'b"s" f""', + + # Parser/tokenize.c + r'"""', + r'"', + r"'''", + r"'", + r"\blub", + # IndentationError: too many levels of indentation + build_nested('pass', 100), + + # SyntaxErrors from Python/symtable.c + 'def f(x, x): pass', + 'nonlocal a', + + # IndentationError + ' foo', + 'def x():\n 1\n 2', + 'def x():\n 1\n 2', + 'if 1:\nfoo', + 'if 1: blubb\nif 1:\npass\nTrue and False', + + # f-strings + 'f"{}"', + r'f"{\}"', + 'f"{\'\\\'}"', + 'f"{#}"', + "f'{1!b}'", + "f'{1:{5:{3}}}'", + "f'{'", + "f'{'", + "f'}'", + "f'{\"}'", + "f'{\"}'", + # Now nested parsing + "f'{continue}'", + "f'{1;1}'", + "f'{a;}'", + "f'{b\"\" \"\"}'", + # f-string expression part cannot include a backslash + r'''f"{'\n'}"''', + + 'async def foo():\n yield x\n return 1', + 'async def foo():\n yield x\n return 1', + + '[*[] for a in [1]]', + 'async def bla():\n def x(): await bla()', + 'del None', + 'del True', + 'del False', + 'del ...', + + # Errors of global / nonlocal + dedent(''' + def glob(): + x = 3 + x.z + global x'''), + dedent(''' + def glob(): + x = 3 + global x'''), + dedent(''' + def glob(): + x + global x'''), + dedent(''' + def glob(): + x = 3 + x.z + nonlocal x'''), + dedent(''' + def glob(): + x = 3 + nonlocal x'''), + dedent(''' + def glob(): + x + nonlocal x'''), + # Annotation issues + dedent(''' + def glob(): + x[0]: foo + global x'''), + dedent(''' + def glob(): + x.a: foo + global x'''), + dedent(''' + def glob(): + x: foo + global x'''), + dedent(''' + def glob(): + x: foo = 5 + global x'''), + dedent(''' + def glob(): + x: foo = 5 + x + global x'''), + dedent(''' + def glob(): + global x + x: foo = 3 + '''), + # global/nonlocal + param + dedent(''' + def glob(x): + global x + '''), + dedent(''' + def glob(x): + nonlocal x + '''), + dedent(''' + def x(): + a =3 + def z(): + nonlocal a + a = 3 + nonlocal a + '''), + dedent(''' + def x(): + a = 4 + def y(): + global a + nonlocal a + '''), + # Missing binding of nonlocal + dedent(''' + def x(): + nonlocal a + '''), + dedent(''' + def x(): + def y(): + nonlocal a + '''), + dedent(''' + def x(): + a = 4 + def y(): + global a + print(a) + def z(): + nonlocal a + '''), + # Name is assigned before nonlocal declaration + dedent(''' + def x(a): + def y(): + a = 10 + nonlocal a + '''), +] + +if sys.version_info[:2] >= (3, 7): + # This is somehow ok in previous versions. + FAILING_EXAMPLES += [ + 'class X(base for base in bases): pass', + ] + +if sys.version_info[:2] < (3, 8): + FAILING_EXAMPLES += [ + # Python/compile.c + dedent('''\ + for a in [1]: + try: + pass + finally: + continue + '''), # 'continue' not supported inside 'finally' clause" + ] + +if sys.version_info[:2] >= (3, 8): + # assignment expressions from issue#89 + FAILING_EXAMPLES += [ + # Case 2 + '(lambda: x := 1)', + '((lambda: x) := 1)', + # Case 3 + '(a[i] := x)', + '((a[i]) := x)', + '(a(i) := x)', + # Case 4 + '(a.b := c)', + '[(i.i:= 0) for ((i), j) in range(5)]', + # Case 5 + '[i:= 0 for i, j in range(5)]', + '[(i:= 0) for ((i), j) in range(5)]', + '[(i:= 0) for ((i), j), in range(5)]', + '[(i:= 0) for ((i), j.i), in range(5)]', + '[[(i:= i) for j in range(5)] for i in range(5)]', + '[i for i, j in range(5) if True or (i:= 1)]', + '[False and (i:= 0) for i, j in range(5)]', + # Case 6 + '[i+1 for i in (i:= range(5))]', + '[i+1 for i in (j:= range(5))]', + '[i+1 for i in (lambda: (j:= range(5)))()]', + # Case 7 + 'class Example:\n [(j := i) for i in range(5)]', + # Not in that issue + '(await a := x)', + '((await a) := x)', + # new discoveries + '((a, b) := (1, 2))', + '([a, b] := [1, 2])', + '({a, b} := {1, 2})', + '({a: b} := {1: 2})', + '(a + b := 1)', + '(True := 1)', + '(False := 1)', + '(None := 1)', + '(__debug__ := 1)', + # Unparenthesized walrus not allowed in dict literals, dict comprehensions and slices + '{a:="a": b:=1}', + '{y:=1: 2 for x in range(5)}', + 'a[b:=0:1:2]', + ] + # f-string debugging syntax with invalid conversion character + FAILING_EXAMPLES += [ + "f'{1=!b}'", + ] diff --git a/contrib/python/parso/py3/tests/fuzz_diff_parser.py b/contrib/python/parso/py3/tests/fuzz_diff_parser.py new file mode 100644 index 0000000000..39b93f21d5 --- /dev/null +++ b/contrib/python/parso/py3/tests/fuzz_diff_parser.py @@ -0,0 +1,307 @@ +""" +A script to find bugs in the diff parser. + +This script is extremely useful if changes are made to the diff parser. By +running a few thousand iterations, we can assure that the diff parser is in +good shape. + +Usage: + fuzz_diff_parser.py [--pdb|--ipdb] [-l] [-n=<nr>] [-x=<nr>] random [<path>] + fuzz_diff_parser.py [--pdb|--ipdb] [-l] redo [-o=<nr>] [-p] + fuzz_diff_parser.py -h | --help + +Options: + -h --help Show this screen + -n, --maxtries=<nr> Maximum of random tries [default: 1000] + -x, --changes=<nr> Amount of changes to be done to a file per try [default: 5] + -l, --logging Prints all the logs + -o, --only-last=<nr> Only runs the last n iterations; Defaults to running all + -p, --print-code Print all test diffs + --pdb Launch pdb when error is raised + --ipdb Launch ipdb when error is raised +""" + +from __future__ import print_function +import logging +import sys +import os +import random +import pickle + +import parso +from parso.utils import split_lines +from test.test_diff_parser import _check_error_leaves_nodes + +_latest_grammar = parso.load_grammar(version='3.8') +_python_reserved_strings = tuple( + # Keywords are ususally only interesting in combination with spaces after + # them. We don't put a space before keywords, to avoid indentation errors. + s + (' ' if s.isalpha() else '') + for s in _latest_grammar._pgen_grammar.reserved_syntax_strings.keys() +) +_random_python_fragments = _python_reserved_strings + ( + ' ', '\t', '\n', '\r', '\f', 'f"', 'F"""', "fr'", "RF'''", '"', '"""', "'", + "'''", ';', ' some_random_word ', '\\', '#', +) + + +def find_python_files_in_tree(file_path): + if not os.path.isdir(file_path): + yield file_path + return + for root, dirnames, filenames in os.walk(file_path): + if 'chardet' in root: + # Stuff like chardet/langcyrillicmodel.py is just very slow to + # parse and machine generated, so ignore those. + continue + + for name in filenames: + if name.endswith('.py'): + yield os.path.join(root, name) + + +def _print_copyable_lines(lines): + for line in lines: + line = repr(line)[1:-1] + if line.endswith(r'\n'): + line = line[:-2] + '\n' + print(line, end='') + + +def _get_first_error_start_pos_or_none(module): + error_leaf = _check_error_leaves_nodes(module) + return None if error_leaf is None else error_leaf.start_pos + + +class LineReplacement: + def __init__(self, line_nr, new_line): + self._line_nr = line_nr + self._new_line = new_line + + def apply(self, code_lines): + # print(repr(self._new_line)) + code_lines[self._line_nr] = self._new_line + + +class LineDeletion: + def __init__(self, line_nr): + self.line_nr = line_nr + + def apply(self, code_lines): + del code_lines[self.line_nr] + + +class LineCopy: + def __init__(self, copy_line, insertion_line): + self._copy_line = copy_line + self._insertion_line = insertion_line + + def apply(self, code_lines): + code_lines.insert( + self._insertion_line, + # Use some line from the file. This doesn't feel totally + # random, but for the diff parser it will feel like it. + code_lines[self._copy_line] + ) + + +class FileModification: + @classmethod + def generate(cls, code_lines, change_count, previous_file_modification=None): + if previous_file_modification is not None and random.random() > 0.5: + # We want to keep the previous modifications in some cases to make + # more complex parser issues visible. + code_lines = previous_file_modification.apply(code_lines) + added_modifications = previous_file_modification.modification_list + else: + added_modifications = [] + return cls( + added_modifications + + list(cls._generate_line_modifications(code_lines, change_count)), + # work with changed trees more than with normal ones. + check_original=random.random() > 0.8, + ) + + @staticmethod + def _generate_line_modifications(lines, change_count): + def random_line(include_end=False): + return random.randint(0, len(lines) - (not include_end)) + + lines = list(lines) + for _ in range(change_count): + rand = random.randint(1, 4) + if rand == 1: + if len(lines) == 1: + # We cannot delete every line, that doesn't make sense to + # fuzz and it would be annoying to rewrite everything here. + continue + ld = LineDeletion(random_line()) + elif rand == 2: + # Copy / Insertion + # Make it possible to insert into the first and the last line + ld = LineCopy(random_line(), random_line(include_end=True)) + elif rand in (3, 4): + # Modify a line in some weird random ways. + line_nr = random_line() + line = lines[line_nr] + column = random.randint(0, len(line)) + random_string = '' + for _ in range(random.randint(1, 3)): + if random.random() > 0.8: + # The lower characters cause way more issues. + unicode_range = 0x1f if random.randint(0, 1) else 0x3000 + random_string += chr(random.randint(0, unicode_range)) + else: + # These insertions let us understand how random + # keyword/operator insertions work. Theoretically this + # could also be done with unicode insertions, but the + # fuzzer is just way more effective here. + random_string += random.choice(_random_python_fragments) + if random.random() > 0.5: + # In this case we insert at a very random place that + # probably breaks syntax. + line = line[:column] + random_string + line[column:] + else: + # Here we have better chances to not break syntax, because + # we really replace the line with something that has + # indentation. + line = ' ' * random.randint(0, 12) + random_string + '\n' + ld = LineReplacement(line_nr, line) + ld.apply(lines) + yield ld + + def __init__(self, modification_list, check_original): + self.modification_list = modification_list + self._check_original = check_original + + def apply(self, code_lines): + changed_lines = list(code_lines) + for modification in self.modification_list: + modification.apply(changed_lines) + return changed_lines + + def run(self, grammar, code_lines, print_code): + code = ''.join(code_lines) + modified_lines = self.apply(code_lines) + modified_code = ''.join(modified_lines) + + if print_code: + if self._check_original: + print('Original:') + _print_copyable_lines(code_lines) + + print('\nModified:') + _print_copyable_lines(modified_lines) + print() + + if self._check_original: + m = grammar.parse(code, diff_cache=True) + start1 = _get_first_error_start_pos_or_none(m) + + grammar.parse(modified_code, diff_cache=True) + + if self._check_original: + # Also check if it's possible to "revert" the changes. + m = grammar.parse(code, diff_cache=True) + start2 = _get_first_error_start_pos_or_none(m) + assert start1 == start2, (start1, start2) + + +class FileTests: + def __init__(self, file_path, test_count, change_count): + self._path = file_path + with open(file_path, errors='replace') as f: + code = f.read() + self._code_lines = split_lines(code, keepends=True) + self._test_count = test_count + self._code_lines = self._code_lines + self._change_count = change_count + self._file_modifications = [] + + def _run(self, grammar, file_modifications, debugger, print_code=False): + try: + for i, fm in enumerate(file_modifications, 1): + fm.run(grammar, self._code_lines, print_code=print_code) + print('.', end='') + sys.stdout.flush() + print() + except Exception: + print("Issue in file: %s" % self._path) + if debugger: + einfo = sys.exc_info() + pdb = __import__(debugger) + pdb.post_mortem(einfo[2]) + raise + + def redo(self, grammar, debugger, only_last, print_code): + mods = self._file_modifications + if only_last is not None: + mods = mods[-only_last:] + self._run(grammar, mods, debugger, print_code=print_code) + + def run(self, grammar, debugger): + def iterate(): + fm = None + for _ in range(self._test_count): + fm = FileModification.generate( + self._code_lines, self._change_count, + previous_file_modification=fm + ) + self._file_modifications.append(fm) + yield fm + + self._run(grammar, iterate(), debugger) + + +def main(arguments): + debugger = 'pdb' if arguments['--pdb'] else \ + 'ipdb' if arguments['--ipdb'] else None + redo_file = os.path.join(os.path.dirname(__file__), 'fuzz-redo.pickle') + + if arguments['--logging']: + root = logging.getLogger() + root.setLevel(logging.DEBUG) + + ch = logging.StreamHandler(sys.stdout) + ch.setLevel(logging.DEBUG) + root.addHandler(ch) + + grammar = parso.load_grammar() + parso.python.diff.DEBUG_DIFF_PARSER = True + if arguments['redo']: + with open(redo_file, 'rb') as f: + file_tests_obj = pickle.load(f) + only_last = arguments['--only-last'] and int(arguments['--only-last']) + file_tests_obj.redo( + grammar, + debugger, + only_last=only_last, + print_code=arguments['--print-code'] + ) + elif arguments['random']: + # A random file is used to do diff parser checks if no file is given. + # This helps us to find errors in a lot of different files. + file_paths = list(find_python_files_in_tree(arguments['<path>'] or '.')) + max_tries = int(arguments['--maxtries']) + tries = 0 + try: + while tries < max_tries: + path = random.choice(file_paths) + print("Checking %s: %s tries" % (path, tries)) + now_tries = min(1000, max_tries - tries) + file_tests_obj = FileTests(path, now_tries, int(arguments['--changes'])) + file_tests_obj.run(grammar, debugger) + tries += now_tries + except Exception: + with open(redo_file, 'wb') as f: + pickle.dump(file_tests_obj, f) + raise + else: + raise NotImplementedError('Command is not implemented') + + +if __name__ == '__main__': + from docopt import docopt + + arguments = docopt(__doc__) + main(arguments) diff --git a/contrib/python/parso/py3/tests/normalizer_issue_files/E10.py b/contrib/python/parso/py3/tests/normalizer_issue_files/E10.py new file mode 100644 index 0000000000..38d7a19043 --- /dev/null +++ b/contrib/python/parso/py3/tests/normalizer_issue_files/E10.py @@ -0,0 +1,51 @@ +for a in 'abc': + for b in 'xyz': + hello(a) # indented with 8 spaces + #: E903:0 + hello(b) # indented with 1 tab +if True: + #: E101:0 + pass + +#: E122+1 +change_2_log = \ +"""Change 2 by slamb@testclient on 2006/04/13 21:46:23 + + creation +""" + +p4change = { + 2: change_2_log, +} + + +class TestP4Poller(unittest.TestCase): + def setUp(self): + self.setUpGetProcessOutput() + return self.setUpChangeSource() + + def tearDown(self): + pass + + +# +if True: + #: E101:0 E101+1:0 + foo(1, + 2) + + +def test_keys(self): + """areas.json - All regions are accounted for.""" + expected = set([ + #: E101:0 + u'Norrbotten', + #: E101:0 + u'V\xe4sterbotten', + ]) + + +if True: + hello(""" + tab at start of this line +""") diff --git a/contrib/python/parso/py3/tests/normalizer_issue_files/E101.py b/contrib/python/parso/py3/tests/normalizer_issue_files/E101.py new file mode 100644 index 0000000000..cc24719873 --- /dev/null +++ b/contrib/python/parso/py3/tests/normalizer_issue_files/E101.py @@ -0,0 +1,137 @@ +# Used to be the file for W191 + +#: E101+1 +if False: + print # indented with 1 tab + +#: E101+1 +y = x == 2 \ + or x == 3 +#: E101+5 +if ( + x == ( + 3 + ) or + y == 4): + pass +#: E101+3 +if x == 2 \ + or y > 1 \ + or x == 3: + pass +#: E101+3 +if x == 2 \ + or y > 1 \ + or x == 3: + pass + +#: E101+1 +if (foo == bar and baz == frop): + pass +#: E101+1 +if (foo == bar and baz == frop): + pass + +#: E101+2 E101+3 +if start[1] > end_col and not ( + over_indent == 4 and indent_next): + assert (0, "E121 continuation line over-" + "indented for visual indent") + + +#: E101+3 +def long_function_name( + var_one, var_two, var_three, + var_four): + hello(var_one) + + +#: E101+2 +if ((row < 0 or self.moduleCount <= row or + col < 0 or self.moduleCount <= col)): + raise Exception("%s,%s - %s" % (row, col, self.moduleCount)) +#: E101+1 E101+2 E101+3 E101+4 E101+5 E101+6 +if bar: + assert ( + start, 'E121 lines starting with a ' + 'closing bracket should be indented ' + "to match that of the opening " + "bracket's line" + ) + +# you want vertical alignment, so use a parens +#: E101+3 +if ((foo.bar("baz") and + foo.bar("frop") + )): + hello("yes") +#: E101+3 +# also ok, but starting to look like LISP +if ((foo.bar("baz") and + foo.bar("frop"))): + hello("yes") +#: E101+1 +if (a == 2 or b == "abc def ghi" "jkl mno"): + assert True +#: E101+2 +if (a == 2 or b == """abc def ghi +jkl mno"""): + assert True +#: E101+1 E101+2 +if length > options.max_line_length: + assert options.max_line_length, \ + "E501 line too long (%d characters)" % length + + +#: E101+1 E101+2 +if os.path.exists(os.path.join(path, PEP8_BIN)): + cmd = ([os.path.join(path, PEP8_BIN)] + + self._pep8_options(targetfile)) +# TODO Tabs in docstrings shouldn't be there, use \t. +''' + multiline string with tab in it''' +# Same here. +'''multiline string + with tabs + and spaces +''' +# Okay +'''sometimes, you just need to go nuts in a multiline string + and allow all sorts of crap + like mixed tabs and spaces + +or trailing whitespace +or long long long long long long long long long long long long long long long long long lines +''' # noqa +# Okay +'''this one + will get no warning +even though the noqa comment is not immediately after the string +''' + foo # noqa + +#: E101+2 +if foo is None and bar is "frop" and \ + blah == 'yeah': + blah = 'yeahnah' + + +#: E101+1 E101+2 E101+3 +if True: + foo( + 1, + 2) + + +#: E101+1 E101+2 E101+3 E101+4 E101+5 +def test_keys(self): + """areas.json - All regions are accounted for.""" + expected = set([ + u'Norrbotten', + u'V\xe4sterbotten', + ]) + + +#: E101+1 +x = [ + 'abc' +] diff --git a/contrib/python/parso/py3/tests/normalizer_issue_files/E11.py b/contrib/python/parso/py3/tests/normalizer_issue_files/E11.py new file mode 100644 index 0000000000..9b97f3980c --- /dev/null +++ b/contrib/python/parso/py3/tests/normalizer_issue_files/E11.py @@ -0,0 +1,60 @@ +if x > 2: + #: E111:2 + hello(x) +if True: + #: E111:5 + print + #: E111:6 + # + #: E111:2 + # what + # Comment is fine +# Comment is also fine + +if False: + pass +print +print +#: E903:0 + print +mimetype = 'application/x-directory' +#: E111:5 + # 'httpd/unix-directory' +create_date = False + + +def start(self): + # foo + #: E111:8 + # bar + if True: # Hello + self.master.start() # Comment + # try: + #: E111:12 + # self.master.start() + # except MasterExit: + #: E111:12 + # self.shutdown() + # finally: + #: E111:12 + # sys.exit() + # Dedent to the first level + #: E111:6 + # error +# Dedent to the base level +#: E111:2 + # Also wrongly indented. +# Indent is correct. + + +def start(self): # Correct comment + if True: + #: E111:0 +# try: + #: E111:0 +# self.master.start() + #: E111:0 +# except MasterExit: + #: E111:0 +# self.shutdown() + self.master.start() # comment diff --git a/contrib/python/parso/py3/tests/normalizer_issue_files/E12_first.py b/contrib/python/parso/py3/tests/normalizer_issue_files/E12_first.py new file mode 100644 index 0000000000..8dc65a5a42 --- /dev/null +++ b/contrib/python/parso/py3/tests/normalizer_issue_files/E12_first.py @@ -0,0 +1,78 @@ +abc = "E121", ( + #: E121:2 + "dent") +abc = "E122", ( + #: E121:0 +"dent") +my_list = [ + 1, 2, 3, + 4, 5, 6, + #: E123 + ] +abc = "E124", ("visual", + "indent_two" + #: E124:14 + ) +abc = "E124", ("visual", + "indent_five" + #: E124:0 +) +a = (123, + #: E124:0 +) +#: E129+1:4 +if (row < 0 or self.moduleCount <= row or + col < 0 or self.moduleCount <= col): + raise Exception("%s,%s - %s" % (row, col, self.moduleCount)) + +abc = "E126", ( + #: E126:12 + "dent") +abc = "E126", ( + #: E126:8 + "dent") +abc = "E127", ("over-", + #: E127:18 + "over-indent") +abc = "E128", ("visual", + #: E128:4 + "hanging") +abc = "E128", ("under-", + #: E128:14 + "under-indent") + + +my_list = [ + 1, 2, 3, + 4, 5, 6, + #: E123:5 + ] +result = { + #: E121:3 + 'key1': 'value', + #: E121:3 + 'key2': 'value', +} +rv.update(dict.fromkeys(( + 'qualif_nr', 'reasonComment_en', 'reasonComment_fr', + 'reasonComment_de', 'reasonComment_it'), + #: E128:10 + '?'), + "foo") + +abricot = 3 + \ + 4 + \ + 5 + 6 +abc = "hello", ( + + "there", + #: E126:5 + # "john", + "dude") +part = set_mimetype(( + a.get('mime_type', 'text')), + 'default') +part = set_mimetype(( + a.get('mime_type', 'text')), + #: E127:21 + 'default') diff --git a/contrib/python/parso/py3/tests/normalizer_issue_files/E12_not_first.py b/contrib/python/parso/py3/tests/normalizer_issue_files/E12_not_first.py new file mode 100644 index 0000000000..fc3b5f9339 --- /dev/null +++ b/contrib/python/parso/py3/tests/normalizer_issue_files/E12_not_first.py @@ -0,0 +1,356 @@ +# The issue numbers described in this file are part of the pycodestyle tracker +# and not of parso. +# Originally there were no issues in here, I (dave) added the ones that were +# necessary and IMO useful. +if ( + x == ( + 3 + ) or + y == 4): + pass + +y = x == 2 \ + or x == 3 + +#: E129+1:4 +if x == 2 \ + or y > 1 \ + or x == 3: + pass + +if x == 2 \ + or y > 1 \ + or x == 3: + pass + + +if (foo == bar and + baz == frop): + pass + +#: E129+1:4 E129+2:4 E123+3 +if ( + foo == bar and + baz == frop +): + pass + +if ( + foo == bar and + baz == frop + #: E129:4 + ): + pass + +a = ( +) + +a = (123, + ) + + +if start[1] > end_col and not ( + over_indent == 4 and indent_next): + assert (0, "E121 continuation line over-" + "indented for visual indent") + + +abc = "OK", ("visual", + "indent") + +abc = "Okay", ("visual", + "indent_three" + ) + +abc = "a-ok", ( + "there", + "dude", +) + +abc = "hello", ( + "there", + "dude") + +abc = "hello", ( + + "there", + # "john", + "dude") + +abc = "hello", ( + "there", "dude") + +abc = "hello", ( + "there", "dude", +) + +# Aligned with opening delimiter +foo = long_function_name(var_one, var_two, + var_three, var_four) + +# Extra indentation is not necessary. +foo = long_function_name( + var_one, var_two, + var_three, var_four) + + +arm = 'AAA' \ + 'BBB' \ + 'CCC' + +bbb = 'AAA' \ + 'BBB' \ + 'CCC' + +cc = ('AAA' + 'BBB' + 'CCC') + +cc = {'text': 'AAA' + 'BBB' + 'CCC'} + +cc = dict(text='AAA' + 'BBB') + +sat = 'AAA' \ + 'BBB' \ + 'iii' \ + 'CCC' + +abricot = (3 + + 4 + + 5 + 6) + +#: E122+1:4 +abricot = 3 + \ + 4 + \ + 5 + 6 + +part = [-1, 2, 3, + 4, 5, 6] + +#: E128+1:8 +part = [-1, (2, 3, + 4, 5, 6), 7, + 8, 9, 0] + +fnct(1, 2, 3, + 4, 5, 6) + +fnct(1, 2, 3, + 4, 5, 6, + 7, 8, 9, + 10, 11) + + +def long_function_name( + var_one, var_two, var_three, + var_four): + hello(var_one) + + +if ((row < 0 or self.moduleCount <= row or + col < 0 or self.moduleCount <= col)): + raise Exception("%s,%s - %s" % (row, col, self.moduleCount)) + + +result = { + 'foo': [ + 'bar', { + 'baz': 'frop', + } + ] +} + + +foo = my.func({ + "foo": "bar", +}, "baz") + + +fooff(aaaa, + cca( + vvv, + dadd + ), fff, + ggg) + +fooff(aaaa, + abbb, + cca( + vvv, + aaa, + dadd), + "visual indentation is not a multiple of four",) + +if bar: + assert ( + start, 'E121 lines starting with a ' + 'closing bracket should be indented ' + "to match that of the opening " + "bracket's line" + ) + +# you want vertical alignment, so use a parens +if ((foo.bar("baz") and + foo.bar("frop") + )): + hello("yes") + +# also ok, but starting to look like LISP +if ((foo.bar("baz") and + foo.bar("frop"))): + hello("yes") + +#: E129+1:4 E127+2:9 +if (a == 2 or + b == "abc def ghi" + "jkl mno"): + assert True + +#: E129+1:4 +if (a == 2 or + b == """abc def ghi +jkl mno"""): + assert True + +if length > options.max_line_length: + assert options.max_line_length, \ + "E501 line too long (%d characters)" % length + + +# blub + + +asd = 'l.{line}\t{pos}\t{name}\t{text}'.format( + line=token[2][0], + pos=pos, + name=tokenize.tok_name[token[0]], + text=repr(token[1]), +) + +#: E121+1:6 E121+2:6 +hello('%-7d %s per second (%d total)' % ( + options.counters[key] / elapsed, key, + options.counters[key])) + + +if os.path.exists(os.path.join(path, PEP8_BIN)): + cmd = ([os.path.join(path, PEP8_BIN)] + + self._pep8_options(targetfile)) + + +fixed = (re.sub(r'\t+', ' ', target[c::-1], 1)[::-1] + + target[c + 1:]) + +fixed = ( + re.sub(r'\t+', ' ', target[c::-1], 1)[::-1] + + target[c + 1:] +) + + +if foo is None and bar is "frop" and \ + blah == 'yeah': + blah = 'yeahnah' + + +"""This is a multi-line + docstring.""" + + +if blah: + # is this actually readable? :) + multiline_literal = """ +while True: + if True: + 1 +""".lstrip() + multiline_literal = ( + """ +while True: + if True: + 1 +""".lstrip() + ) + multiline_literal = ( + """ +while True: + if True: + 1 +""" + .lstrip() + ) + + +if blah: + multiline_visual = (""" +while True: + if True: + 1 +""" + .lstrip()) + + +rv = {'aaa': 42} +rv.update(dict.fromkeys(( + #: E121:4 E121+1:4 + 'qualif_nr', 'reasonComment_en', 'reasonComment_fr', + 'reasonComment_de', 'reasonComment_it'), '?')) + +rv.update(dict.fromkeys(('qualif_nr', 'reasonComment_en', + 'reasonComment_fr', 'reasonComment_de', + 'reasonComment_it'), '?')) + +#: E128+1:10 +rv.update(dict.fromkeys(('qualif_nr', 'reasonComment_en', 'reasonComment_fr', + 'reasonComment_de', 'reasonComment_it'), '?')) + + +rv.update(dict.fromkeys( + ('qualif_nr', 'reasonComment_en', 'reasonComment_fr', + 'reasonComment_de', 'reasonComment_it'), '?' + ), "foo", context={ + 'alpha': 4, 'beta': 53242234, 'gamma': 17, + }) + + +rv.update( + dict.fromkeys(( + 'qualif_nr', 'reasonComment_en', 'reasonComment_fr', + 'reasonComment_de', 'reasonComment_it'), '?'), + "foo", + context={ + 'alpha': 4, 'beta': 53242234, 'gamma': 17, + }, +) + + +event_obj.write(cursor, user_id, { + 'user': user, + 'summary': text, + 'data': data, + }) + +event_obj.write(cursor, user_id, { + 'user': user, + 'summary': text, + 'data': {'aaa': 1, 'bbb': 2}, + }) + +event_obj.write(cursor, user_id, { + 'user': user, + 'summary': text, + 'data': { + 'aaa': 1, + 'bbb': 2}, + }) + +event_obj.write(cursor, user_id, { + 'user': user, + 'summary': text, + 'data': {'timestamp': now, 'content': { + 'aaa': 1, + 'bbb': 2 + }}, + }) diff --git a/contrib/python/parso/py3/tests/normalizer_issue_files/E12_not_second.py b/contrib/python/parso/py3/tests/normalizer_issue_files/E12_not_second.py new file mode 100644 index 0000000000..e7c18e0ec0 --- /dev/null +++ b/contrib/python/parso/py3/tests/normalizer_issue_files/E12_not_second.py @@ -0,0 +1,294 @@ + +def qualify_by_address( + self, cr, uid, ids, context=None, + params_to_check=frozenset(QUALIF_BY_ADDRESS_PARAM)): + """ This gets called by the web server """ + + +def qualify_by_address(self, cr, uid, ids, context=None, + params_to_check=frozenset(QUALIF_BY_ADDRESS_PARAM)): + """ This gets called by the web server """ + + +_ipv4_re = re.compile('^(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.' + '(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.' + '(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.' + '(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$') + + +fct(""" + AAA """ + status_2_string) + + +if context: + msg = """\ +action: GET-CONFIG +payload: + ip_address: "%(ip)s" + username: "%(username)s" +""" % context + + +if context: + msg = """\ +action: \ +GET-CONFIG +""" % context + + +if context: + #: E122+2:0 + msg = """\ +action: """\ +"""GET-CONFIG +""" % context + + +def unicode2html(s): + """Convert the characters &<>'" in string s to HTML-safe sequences. + Convert newline to <br> too.""" + #: E127+1:28 + return unicode((s or '').replace('&', '&') + .replace('\n', '<br>\n')) + + +parser.add_option('--count', action='store_true', + help="print total number of errors and warnings " + "to standard error and set exit code to 1 if " + "total is not null") + +parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE, + help="exclude files or directories which match these " + "comma separated patterns (default: %s)" % + DEFAULT_EXCLUDE) + +add_option('--count', + #: E135+1 + help="print total number of errors " + "to standard error total is not null") + +add_option('--count', + #: E135+2:11 + help="print total number of errors " + "to standard error " + "total is not null") + + +help = ("print total number of errors " + + "to standard error") + +help = "print total number of errors " \ + "to standard error" + +help = u"print total number of errors " \ + u"to standard error" + +help = b"print total number of errors " \ + b"to standard error" + +#: E122+1:5 +help = br"print total number of errors " \ + br"to standard error" + +d = dict('foo', help="exclude files or directories which match these " + #: E135:9 + "comma separated patterns (default: %s)" % DEFAULT_EXCLUDE) + +d = dict('foo', help=u"exclude files or directories which match these " + u"comma separated patterns (default: %s)" + % DEFAULT_EXCLUDE) + +#: E135+1:9 E135+2:9 +d = dict('foo', help=b"exclude files or directories which match these " + b"comma separated patterns (default: %s)" + % DEFAULT_EXCLUDE) + +d = dict('foo', help=br"exclude files or directories which match these " + br"comma separated patterns (default: %s)" % + DEFAULT_EXCLUDE) + +d = dict('foo', + help="exclude files or directories which match these " + "comma separated patterns (default: %s)" % + DEFAULT_EXCLUDE) + +d = dict('foo', + help="exclude files or directories which match these " + "comma separated patterns (default: %s, %s)" % + (DEFAULT_EXCLUDE, DEFAULT_IGNORE) + ) + +d = dict('foo', + help="exclude files or directories which match these " + "comma separated patterns (default: %s, %s)" % + # who knows what might happen here? + (DEFAULT_EXCLUDE, DEFAULT_IGNORE) + ) + +# parens used to allow the indenting. +troublefree_hash = { + "hash": "value", + "long": ("the quick brown fox jumps over the lazy dog before doing a " + "somersault"), + "long key that tends to happen more when you're indented": ( + "stringwithalongtoken you don't want to break" + ), +} + +# another accepted form +troublefree_hash = { + "hash": "value", + "long": "the quick brown fox jumps over the lazy dog before doing " + "a somersault", + ("long key that tends to happen more " + "when you're indented"): "stringwithalongtoken you don't want to break", +} +# confusing but accepted... don't do that +troublesome_hash = { + "hash": "value", + "long": "the quick brown fox jumps over the lazy dog before doing a " + #: E135:4 + "somersault", + "longer": + "the quick brown fox jumps over the lazy dog before doing a " + "somersaulty", + "long key that tends to happen more " + "when you're indented": "stringwithalongtoken you don't want to break", +} + +d = dict('foo', + help="exclude files or directories which match these " + "comma separated patterns (default: %s)" % + DEFAULT_EXCLUDE + ) +d = dict('foo', + help="exclude files or directories which match these " + "comma separated patterns (default: %s)" % DEFAULT_EXCLUDE, + foobar="this clearly should work, because it is at " + "the right indent level", + ) + +rv.update(dict.fromkeys( + ('qualif_nr', 'reasonComment_en', 'reasonComment_fr', + 'reasonComment_de', 'reasonComment_it'), + '?'), "foo", + context={'alpha': 4, 'beta': 53242234, 'gamma': 17}) + + +def f(): + try: + if not Debug: + hello(''' +If you would like to see debugging output, +try: %s -d5 +''' % sys.argv[0]) + + +# The try statement above was not finished. +#: E901 +d = { # comment + 1: 2 +} + +# issue 138 (we won't allow this in parso) +#: E126+2:9 +[ + 12, # this is a multi-line inline + # comment +] +# issue 151 +#: E122+1:3 +if a > b and \ + c > d: + moo_like_a_cow() + +my_list = [ + 1, 2, 3, + 4, 5, 6, +] + +my_list = [1, 2, 3, + 4, 5, 6, + ] + +result = some_function_that_takes_arguments( + 'a', 'b', 'c', + 'd', 'e', 'f', +) + +result = some_function_that_takes_arguments('a', 'b', 'c', + 'd', 'e', 'f', + ) + +# issue 203 +dica = { + ('abc' + 'def'): ( + 'abc'), +} + +(abcdef[0] + [1]) = ( + 'abc') + +('abc' + 'def') == ( + 'abc') + +# issue 214 +bar( + 1).zap( + 2) + +bar( + 1).zap( + 2) + +if True: + + def example_issue254(): + return [node.copy( + ( + replacement + # First, look at all the node's current children. + for child in node.children + # Replace them. + for replacement in replace(child) + ), + dict(name=token.undefined) + )] + + +def valid_example(): + return [node.copy(properties=dict( + (key, val if val is not None else token.undefined) + for key, val in node.items() + ))] + + +foo([ + 'bug' +]) + +# issue 144, finally! +some_hash = { + "long key that tends to happen more when you're indented": + "stringwithalongtoken you don't want to break", +} + +{ + 1: + 999999 if True + else 0, +} + + +abc = dedent( + ''' + mkdir -p ./{build}/ + mv ./build/ ./{build}/%(revision)s/ + '''.format( + build='build', + # more stuff + ) +) diff --git a/contrib/python/parso/py3/tests/normalizer_issue_files/E12_second.py b/contrib/python/parso/py3/tests/normalizer_issue_files/E12_second.py new file mode 100644 index 0000000000..5488ea40eb --- /dev/null +++ b/contrib/python/parso/py3/tests/normalizer_issue_files/E12_second.py @@ -0,0 +1,195 @@ +if True: + result = some_function_that_takes_arguments( + 'a', 'b', 'c', + 'd', 'e', 'f', + #: E123:0 +) +#: E122+1 +if some_very_very_very_long_variable_name or var \ +or another_very_long_variable_name: + raise Exception() +#: E122+1 +if some_very_very_very_long_variable_name or var[0] \ +or another_very_long_variable_name: + raise Exception() +if True: + #: E122+1 + if some_very_very_very_long_variable_name or var \ + or another_very_long_variable_name: + raise Exception() +if True: + #: E122+1 + if some_very_very_very_long_variable_name or var[0] \ + or another_very_long_variable_name: + raise Exception() + +#: E901+1:8 +dictionary = [ + "is": { + # Might be a E122:4, but is not because the code is invalid Python. + "nested": yes(), + }, +] +setup('', + scripts=[''], + classifiers=[ + #: E121:6 + 'Development Status :: 4 - Beta', + 'Environment :: Console', + 'Intended Audience :: Developers', + ]) + + +#: E123+2:4 E291:15 +abc = "E123", ( + "bad", "hanging", "close" + ) + +result = { + 'foo': [ + 'bar', { + 'baz': 'frop', + #: E123 + } + #: E123 + ] + #: E123 + } +result = some_function_that_takes_arguments( + 'a', 'b', 'c', + 'd', 'e', 'f', + #: E123 + ) +my_list = [1, 2, 3, + 4, 5, 6, + #: E124:0 +] +my_list = [1, 2, 3, + 4, 5, 6, + #: E124:19 + ] +#: E124+2 +result = some_function_that_takes_arguments('a', 'b', 'c', + 'd', 'e', 'f', +) +fooff(aaaa, + cca( + vvv, + dadd + ), fff, + #: E124:0 +) +fooff(aaaa, + ccaaa( + vvv, + dadd + ), + fff, + #: E124:0 +) +d = dict('foo', + help="exclude files or directories which match these " + "comma separated patterns (default: %s)" % DEFAULT_EXCLUDE + #: E124:14 + ) + +if line_removed: + self.event(cr, uid, + #: E128:8 + name="Removing the option for contract", + #: E128:8 + description="contract line has been removed", + #: E124:8 + ) + +#: E129+1:4 +if foo is None and bar is "frop" and \ + blah == 'yeah': + blah = 'yeahnah' + + +#: E129+1:4 E129+2:4 +def long_function_name( + var_one, var_two, var_three, + var_four): + hello(var_one) + + +def qualify_by_address( + #: E129:4 E129+1:4 + self, cr, uid, ids, context=None, + params_to_check=frozenset(QUALIF_BY_ADDRESS_PARAM)): + """ This gets called by the web server """ + + +#: E129+1:4 E129+2:4 +if (a == 2 or + b == "abc def ghi" + "jkl mno"): + True + +my_list = [ + 1, 2, 3, + 4, 5, 6, + #: E123:8 + ] + +abris = 3 + \ + 4 + \ + 5 + 6 + +fixed = re.sub(r'\t+', ' ', target[c::-1], 1)[::-1] + \ + target[c + 1:] + +rv.update(dict.fromkeys(( + 'qualif_nr', 'reasonComment_en', 'reasonComment_fr', + #: E121:12 + 'reasonComment_de', 'reasonComment_it'), + '?'), + #: E128:4 + "foo") +#: E126+1:8 +eat_a_dict_a_day({ + "foo": "bar", +}) +#: E129+1:4 +if ( + x == ( + 3 + #: E129:4 + ) or + y == 4): + pass +#: E129+1:4 E121+2:8 E129+3:4 +if ( + x == ( + 3 + ) or + x == ( + # This one has correct indentation. + 3 + #: E129:4 + ) or + y == 4): + pass +troublesome_hash = { + "hash": "value", + #: E135+1:8 + "long": "the quick brown fox jumps over the lazy dog before doing a " + "somersault", +} + +# Arguments on first line forbidden when not using vertical alignment +#: E128+1:4 +foo = long_function_name(var_one, var_two, + var_three, var_four) + +#: E128+1:4 +hello('l.%s\t%s\t%s\t%r' % + (token[2][0], pos, tokenize.tok_name[token[0]], token[1])) + + +def qualify_by_address(self, cr, uid, ids, context=None, + #: E128:8 + params_to_check=frozenset(QUALIF_BY_ADDRESS_PARAM)): + """ This gets called by the web server """ diff --git a/contrib/python/parso/py3/tests/normalizer_issue_files/E12_third.py b/contrib/python/parso/py3/tests/normalizer_issue_files/E12_third.py new file mode 100644 index 0000000000..26697fed73 --- /dev/null +++ b/contrib/python/parso/py3/tests/normalizer_issue_files/E12_third.py @@ -0,0 +1,116 @@ +#: E128+1 +foo(1, 2, 3, +4, 5, 6) +#: E128+1:1 +foo(1, 2, 3, + 4, 5, 6) +#: E128+1:2 +foo(1, 2, 3, + 4, 5, 6) +#: E128+1:3 +foo(1, 2, 3, + 4, 5, 6) +foo(1, 2, 3, + 4, 5, 6) +#: E127+1:5 +foo(1, 2, 3, + 4, 5, 6) +#: E127+1:6 +foo(1, 2, 3, + 4, 5, 6) +#: E127+1:7 +foo(1, 2, 3, + 4, 5, 6) +#: E127+1:8 +foo(1, 2, 3, + 4, 5, 6) +#: E127+1:9 +foo(1, 2, 3, + 4, 5, 6) +#: E127+1:10 +foo(1, 2, 3, + 4, 5, 6) +#: E127+1:11 +foo(1, 2, 3, + 4, 5, 6) +#: E127+1:12 +foo(1, 2, 3, + 4, 5, 6) +#: E127+1:13 +foo(1, 2, 3, + 4, 5, 6) +if line_removed: + #: E128+1:14 E128+2:14 + self.event(cr, uid, + name="Removing the option for contract", + description="contract line has been removed", + ) + +if line_removed: + self.event(cr, uid, + #: E127:16 + name="Removing the option for contract", + #: E127:16 + description="contract line has been removed", + #: E124:16 + ) +rv.update(d=('a', 'b', 'c'), + #: E127:13 + e=42) + +#: E135+2:17 +rv.update(d=('a' + 'b', 'c'), + e=42, f=42 + + 42) +rv.update(d=('a' + 'b', 'c'), + e=42, f=42 + + 42) +#: E127+1:26 +input1 = {'a': {'calc': 1 + 2}, 'b': 1 + + 42} +#: E128+2:17 +rv.update(d=('a' + 'b', 'c'), + e=42, f=(42 + + 42)) + +if True: + def example_issue254(): + #: + return [node.copy( + ( + #: E121:16 E121+3:20 + replacement + # First, look at all the node's current children. + for child in node.children + for replacement in replace(child) + ), + dict(name=token.undefined) + )] +# TODO multiline docstring are currently not handled. E125+1:4? +if (""" + """): + pass + +# TODO same +for foo in """ + abc + 123 + """.strip().split(): + hello(foo) +abc = dedent( + ''' + mkdir -p ./{build}/ + mv ./build/ ./{build}/%(revision)s/ + '''.format( + #: E121:4 E121+1:4 E123+2:0 + build='build', + # more stuff +) +) +#: E701+1: E122+1 +if True:\ +hello(True) + +#: E128+1 +foobar(a +, end=' ') diff --git a/contrib/python/parso/py3/tests/normalizer_issue_files/E20.py b/contrib/python/parso/py3/tests/normalizer_issue_files/E20.py new file mode 100644 index 0000000000..44986fa963 --- /dev/null +++ b/contrib/python/parso/py3/tests/normalizer_issue_files/E20.py @@ -0,0 +1,52 @@ +#: E201:5 +spam( ham[1], {eggs: 2}) +#: E201:9 +spam(ham[ 1], {eggs: 2}) +#: E201:14 +spam(ham[1], { eggs: 2}) + +# Okay +spam(ham[1], {eggs: 2}) + + +#: E202:22 +spam(ham[1], {eggs: 2} ) +#: E202:21 +spam(ham[1], {eggs: 2 }) +#: E202:10 +spam(ham[1 ], {eggs: 2}) +# Okay +spam(ham[1], {eggs: 2}) + +result = func( + arg1='some value', + arg2='another value', +) + +result = func( + arg1='some value', + arg2='another value' +) + +result = [ + item for item in items + if item > 5 +] + +#: E203:9 +if x == 4 : + foo(x, y) + x, y = y, x +if x == 4: + #: E203:12 E702:13 + a = x, y ; x, y = y, x +if x == 4: + foo(x, y) + #: E203:12 + x, y = y , x +# Okay +if x == 4: + foo(x, y) + x, y = y, x +a[b1, :1] == 3 +b = a[:, b1] diff --git a/contrib/python/parso/py3/tests/normalizer_issue_files/E21.py b/contrib/python/parso/py3/tests/normalizer_issue_files/E21.py new file mode 100644 index 0000000000..f65616e8ab --- /dev/null +++ b/contrib/python/parso/py3/tests/normalizer_issue_files/E21.py @@ -0,0 +1,16 @@ +#: E211:4 +spam (1) +#: E211:4 E211:19 +dict ['key'] = list [index] +#: E211:11 +dict['key'] ['subkey'] = list[index] +# Okay +spam(1) +dict['key'] = list[index] + + +# This is not prohibited by PEP8, but avoid it. +# Dave: I think this is extremely stupid. Use the same convention everywhere. +#: E211:9 +class Foo (Bar, Baz): + pass diff --git a/contrib/python/parso/py3/tests/normalizer_issue_files/E22.py b/contrib/python/parso/py3/tests/normalizer_issue_files/E22.py new file mode 100644 index 0000000000..82ff6a440a --- /dev/null +++ b/contrib/python/parso/py3/tests/normalizer_issue_files/E22.py @@ -0,0 +1,156 @@ +a = 12 + 3 +#: E221:5 E229:8 +b = 4 + 5 +#: E221:1 +x = 1 +#: E221:1 +y = 2 +long_variable = 3 +#: E221:4 +x[0] = 1 +#: E221:4 +x[1] = 2 +long_variable = 3 +#: E221:8 E229:19 +x = f(x) + 1 +y = long_variable + 2 +#: E221:8 E229:19 +z = x[0] + 3 +#: E221+2:13 +text = """ + bar + foo %s""" % rofl +# Okay +x = 1 +y = 2 +long_variable = 3 + + +#: E221:7 +a = a + 1 +b = b + 10 +#: E221:3 +x = -1 +#: E221:3 +y = -2 +long_variable = 3 +#: E221:6 +x[0] = 1 +#: E221:6 +x[1] = 2 +long_variable = 3 + + +#: E223+1:1 +foobart = 4 +a = 3 # aligned with tab + + +#: E223:4 +a += 1 +b += 1000 + + +#: E225:12 +submitted +=1 +#: E225:9 +submitted+= 1 +#: E225:3 +c =-1 +#: E229:7 +x = x /2 - 1 +#: E229:11 +c = alpha -4 +#: E229:10 +c = alpha- 4 +#: E229:8 +z = x **y +#: E229:14 +z = (x + 1) **y +#: E229:13 +z = (x + 1)** y +#: E227:14 +_1kB = _1MB >>10 +#: E227:11 +_1kB = _1MB>> 10 +#: E225:1 E225:2 E229:4 +i=i+ 1 +#: E225:1 E225:2 E229:5 +i=i +1 +#: E225:1 E225:2 +i=i+1 +#: E225:3 +i =i+1 +#: E225:1 +i= i+1 +#: E229:8 +c = (a +b)*(a - b) +#: E229:7 +c = (a+ b)*(a - b) + +z = 2//30 +c = (a+b) * (a-b) +x = x*2 - 1 +x = x/2 - 1 +# TODO whitespace should be the other way around according to pep8. +x = x / 2-1 + +hypot2 = x*x + y*y +c = (a + b)*(a - b) + + +def halves(n): + return (i//2 for i in range(n)) + + +#: E227:11 E227:13 +_1kB = _1MB>>10 +#: E227:11 E227:13 +_1MB = _1kB<<10 +#: E227:5 E227:6 +a = b|c +#: E227:5 E227:6 +b = c&a +#: E227:5 E227:6 +c = b^a +#: E228:5 E228:6 +a = b%c +#: E228:9 E228:10 +msg = fmt%(errno, errmsg) +#: E228:25 E228:26 +msg = "Error %d occurred"%errno + +#: E228:7 +a = b %c +a = b % c + +# Okay +i = i + 1 +submitted += 1 +x = x * 2 - 1 +hypot2 = x * x + y * y +c = (a + b) * (a - b) +_1MiB = 2 ** 20 +_1TiB = 2**30 +foo(bar, key='word', *args, **kwargs) +baz(**kwargs) +negative = -1 +spam(-1) +-negative +func1(lambda *args, **kw: (args, kw)) +func2(lambda a, b=h[:], c=0: (a, b, c)) +if not -5 < x < +5: + #: E227:12 + print >>sys.stderr, "x is out of range." +print >> sys.stdout, "x is an integer." +x = x / 2 - 1 + + +def squares(n): + return (i**2 for i in range(n)) + + +ENG_PREFIXES = { + -6: "\u03bc", # Greek letter mu + -3: "m", +} diff --git a/contrib/python/parso/py3/tests/normalizer_issue_files/E23.py b/contrib/python/parso/py3/tests/normalizer_issue_files/E23.py new file mode 100644 index 0000000000..47f1447a23 --- /dev/null +++ b/contrib/python/parso/py3/tests/normalizer_issue_files/E23.py @@ -0,0 +1,16 @@ +#: E231:7 +a = (1,2) +#: E231:5 +a[b1,:] +#: E231:10 +a = [{'a':''}] +# Okay +a = (4,) +#: E202:7 +b = (5, ) +c = {'text': text[5:]} + +result = { + 'key1': 'value', + 'key2': 'value', +} diff --git a/contrib/python/parso/py3/tests/normalizer_issue_files/E25.py b/contrib/python/parso/py3/tests/normalizer_issue_files/E25.py new file mode 100644 index 0000000000..8cf53147f7 --- /dev/null +++ b/contrib/python/parso/py3/tests/normalizer_issue_files/E25.py @@ -0,0 +1,36 @@ +#: E251:11 E251:13 +def foo(bar = False): + '''Test function with an error in declaration''' + pass + + +#: E251:8 +foo(bar= True) +#: E251:7 +foo(bar =True) +#: E251:7 E251:9 +foo(bar = True) +#: E251:13 +y = bar(root= "sdasd") +parser.add_argument('--long-option', + #: E135+1:20 + default= + "/rather/long/filesystem/path/here/blah/blah/blah") +parser.add_argument('--long-option', + default= + "/rather/long/filesystem") +# TODO this looks so stupid. +parser.add_argument('--long-option', default + ="/rather/long/filesystem/path/here/blah/blah/blah") +#: E251+2:7 E251+2:9 +foo(True, + baz=(1, 2), + biz = 'foo' + ) +# Okay +foo(bar=(1 == 1)) +foo(bar=(1 != 1)) +foo(bar=(1 >= 1)) +foo(bar=(1 <= 1)) +(options, args) = parser.parse_args() +d[type(None)] = _deepcopy_atomic diff --git a/contrib/python/parso/py3/tests/normalizer_issue_files/E26.py b/contrib/python/parso/py3/tests/normalizer_issue_files/E26.py new file mode 100644 index 0000000000..4774852a07 --- /dev/null +++ b/contrib/python/parso/py3/tests/normalizer_issue_files/E26.py @@ -0,0 +1,78 @@ +#: E261:4 +pass # an inline comment +#: E261:4 +pass# an inline comment + +# Okay +pass # an inline comment +pass # an inline comment +#: E262:11 +x = x + 1 #Increment x +#: E262:11 +x = x + 1 # Increment x +#: E262:11 +x = y + 1 #: Increment x +#: E265 +#Block comment +a = 1 +#: E265+1 +m = 42 +#! This is important +mx = 42 - 42 + +# Comment without anything is not an issue. +# +# However if there are comments at the end without anything it obviously +# doesn't make too much sense. +#: E262:9 +foo = 1 # + + +#: E266+2:4 E266+5:4 +def how_it_feel(r): + + ### This is a variable ### + a = 42 + + ### Of course it is unused + return + + +#: E266 E266+1 +##if DEBUG: +## logging.error() +#: E266 +######################################### + +# Not at the beginning of a file +#: E265 +#!/usr/bin/env python + +# Okay + +pass # an inline comment +x = x + 1 # Increment x +y = y + 1 #: Increment x + +# Block comment +a = 1 + +# Block comment1 + +# Block comment2 +aaa = 1 + + +# example of docstring (not parsed) +def oof(): + """ + #foo not parsed + """ + + ########################################################################### + # A SEPARATOR # + ########################################################################### + + # ####################################################################### # + # ########################## another separator ########################## # + # ####################################################################### # diff --git a/contrib/python/parso/py3/tests/normalizer_issue_files/E27.py b/contrib/python/parso/py3/tests/normalizer_issue_files/E27.py new file mode 100644 index 0000000000..9149f0aa52 --- /dev/null +++ b/contrib/python/parso/py3/tests/normalizer_issue_files/E27.py @@ -0,0 +1,49 @@ +# Okay +from u import (a, b) +from v import c, d +#: E221:13 +from w import (e, f) +#: E275:13 +from w import(e, f) +#: E275:29 +from importable.module import(e, f) +try: + #: E275:33 + from importable.module import(e, f) +except ImportError: + pass +# Okay +True and False +#: E221:8 +True and False +#: E221:4 +True and False +#: E221:2 +if 1: + pass +# Syntax Error, no indentation +#: E903+1 +if 1: +pass +#: E223:8 +True and False +#: E223:4 E223:9 +True and False +#: E221:5 +a and b +#: E221:5 +1 and b +#: E221:5 +a and 2 +#: E221:1 E221:6 +1 and b +#: E221:1 E221:6 +a and 2 +#: E221:4 +this and False +#: E223:5 +a and b +#: E223:1 +a and b +#: E223:4 E223:9 +this and False diff --git a/contrib/python/parso/py3/tests/normalizer_issue_files/E29.py b/contrib/python/parso/py3/tests/normalizer_issue_files/E29.py new file mode 100644 index 0000000000..cebbb7bba1 --- /dev/null +++ b/contrib/python/parso/py3/tests/normalizer_issue_files/E29.py @@ -0,0 +1,15 @@ +# Okay +# 情 +#: W291:5 +print + + +#: W291+1 +class Foo(object): + + bang = 12 + + +#: W291+1:34 +'''multiline +string with trailing whitespace''' diff --git a/contrib/python/parso/py3/tests/normalizer_issue_files/E30.py b/contrib/python/parso/py3/tests/normalizer_issue_files/E30.py new file mode 100644 index 0000000000..31e241cd44 --- /dev/null +++ b/contrib/python/parso/py3/tests/normalizer_issue_files/E30.py @@ -0,0 +1,177 @@ +#: E301+4 +class X: + + def a(): + pass + def b(): + pass + + +#: E301+5 +class X: + + def a(): + pass + # comment + def b(): + pass + + +# -*- coding: utf-8 -*- +def a(): + pass + + +#: E302+1:0 +"""Main module.""" +def _main(): + pass + + +#: E302+1:0 +foo = 1 +def get_sys_path(): + return sys.path + + +#: E302+3:0 +def a(): + pass + +def b(): + pass + + +#: E302+5:0 +def a(): + pass + +# comment + +def b(): + pass + + +#: E303+3:0 +print + + + +#: E303+3:0 E303+4:0 +print + + + + +print +#: E303+3:0 +print + + + +# comment + +print + + +#: E303+3 E303+6 +def a(): + print + + + # comment + + + # another comment + + print + + +#: E302+2 +a = 3 +#: E304+1 +@decorator + +def function(): + pass + + +#: E303+3 +# something + + + +"""This class docstring comes on line 5. +It gives error E303: too many blank lines (3) +""" + + +#: E302+6 +def a(): + print + + # comment + + # another comment +a() + + +#: E302+7 +def a(): + print + + # comment + + # another comment + +try: + a() +except Exception: + pass + + +#: E302+4 +def a(): + print + +# Two spaces before comments, too. +if a(): + a() + + +#: E301+2 +def a(): + x = 1 + def b(): + pass + + +#: E301+2 E301+4 +def a(): + x = 2 + def b(): + x = 1 + def c(): + pass + + +#: E301+2 E301+4 E301+5 +def a(): + x = 1 + class C: + pass + x = 2 + def b(): + pass + + +#: E302+7 +# Example from https://github.com/PyCQA/pycodestyle/issues/400 +foo = 2 + + +def main(): + blah, blah + +if __name__ == '__main__': + main() diff --git a/contrib/python/parso/py3/tests/normalizer_issue_files/E30not.py b/contrib/python/parso/py3/tests/normalizer_issue_files/E30not.py new file mode 100644 index 0000000000..c0c005ccd2 --- /dev/null +++ b/contrib/python/parso/py3/tests/normalizer_issue_files/E30not.py @@ -0,0 +1,175 @@ +# Okay +class X: + pass +# Okay + + +def foo(): + pass + + +# Okay +# -*- coding: utf-8 -*- +class X: + pass + + +# Okay +# -*- coding: utf-8 -*- +def foo(): + pass + + +# Okay +class X: + + def a(): + pass + + # comment + def b(): + pass + + # This is a + # ... multi-line comment + + def c(): + pass + + +# This is a +# ... multi-line comment + +@some_decorator +class Y: + + def a(): + pass + + # comment + + def b(): + pass + + @property + def c(): + pass + + +try: + from nonexistent import Bar +except ImportError: + class Bar(object): + """This is a Bar replacement""" + + +def with_feature(f): + """Some decorator""" + wrapper = f + if has_this_feature(f): + def wrapper(*args): + call_feature(args[0]) + return f(*args) + return wrapper + + +try: + next +except NameError: + def next(iterator, default): + for item in iterator: + return item + return default + + +def a(): + pass + + +class Foo(): + """Class Foo""" + + def b(): + + pass + + +# comment +def c(): + pass + + +# comment + + +def d(): + pass + +# This is a +# ... multi-line comment + +# And this one is +# ... a second paragraph +# ... which spans on 3 lines + + +# Function `e` is below +# NOTE: Hey this is a testcase + +def e(): + pass + + +def a(): + print + + # comment + + print + + print + +# Comment 1 + +# Comment 2 + + +# Comment 3 + +def b(): + + pass + + +# Okay +def foo(): + pass + + +def bar(): + pass + + +class Foo(object): + pass + + +class Bar(object): + pass + + +if __name__ == '__main__': + foo() +# Okay +classification_errors = None +# Okay +defined_properly = True +# Okay +defaults = {} +defaults.update({}) + + +# Okay +def foo(x): + classification = x + definitely = not classification diff --git a/contrib/python/parso/py3/tests/normalizer_issue_files/E40.py b/contrib/python/parso/py3/tests/normalizer_issue_files/E40.py new file mode 100644 index 0000000000..93a2ccf386 --- /dev/null +++ b/contrib/python/parso/py3/tests/normalizer_issue_files/E40.py @@ -0,0 +1,39 @@ +#: E401:7 +import os, sys +# Okay +import os +import sys + +from subprocess import Popen, PIPE + +from myclass import MyClass +from foo.bar.yourclass import YourClass + +import myclass +import foo.bar.yourclass +# All Okay from here until the definition of VERSION +__all__ = ['abc'] + +import foo +__version__ = "42" + +import foo +__author__ = "Simon Gomizelj" + +import foo +try: + import foo +except ImportError: + pass +else: + hello('imported foo') +finally: + hello('made attempt to import foo') + +import bar +VERSION = '1.2.3' + +#: E402 +import foo +#: E402 +import foo diff --git a/contrib/python/parso/py3/tests/normalizer_issue_files/E50.py b/contrib/python/parso/py3/tests/normalizer_issue_files/E50.py new file mode 100644 index 0000000000..67fd55833c --- /dev/null +++ b/contrib/python/parso/py3/tests/normalizer_issue_files/E50.py @@ -0,0 +1,126 @@ +#: E501:4 +a = '12345678901234567890123456789012345678901234567890123456789012345678901234567890' +#: E501:80 +a = '1234567890123456789012345678901234567890123456789012345678901234567890' or \ + 6 +#: E501+1:80 +a = 7 or \ + '1234567890123456789012345678901234567890123456789012345678901234567890' or \ + 6 +#: E501+1:80 E501+2:80 +a = 7 or \ + '1234567890123456789012345678901234567890123456789012345678901234567890' or \ + '1234567890123456789012345678901234567890123456789012345678901234567890' or \ + 6 +#: E501:78 +a = '1234567890123456789012345678901234567890123456789012345678901234567890' # \ +#: E502:78 +a = ('123456789012345678901234567890123456789012345678901234567890123456789' \ + '01234567890') +#: E502+1:11 +a = ('AAA \ + BBB' \ + 'CCC') +#: E502:38 +if (foo is None and bar is "e000" and \ + blah == 'yeah'): + blah = 'yeahnah' +# +# Okay +a = ('AAA' + 'BBB') + +a = ('AAA \ + BBB' + 'CCC') + +a = 'AAA' \ + 'BBB' \ + 'CCC' + +a = ('AAA\ +BBBBBBBBB\ +CCCCCCCCC\ +DDDDDDDDD') +# +# Okay +if aaa: + pass +elif bbb or \ + ccc: + pass + +ddd = \ + ccc + +('\ + ' + ' \ +') +(''' + ''' + ' \ +') +#: E501:67 E225:21 E225:22 +very_long_identifiers=and_terrible_whitespace_habits(are_no_excuse+for_long_lines) +# +# TODO Long multiline strings are not handled. E501? +'''multiline string +with a long long long long long long long long long long long long long long long long line +''' +#: E501 +'''same thing, but this time without a terminal newline in the string +long long long long long long long long long long long long long long long long line''' +# +# issue 224 (unavoidable long lines in docstrings) +# Okay +""" +I'm some great documentation. Because I'm some great documentation, I'm +going to give you a reference to some valuable information about some API +that I'm calling: + + http://msdn.microsoft.com/en-us/library/windows/desktop/aa363858(v=vs.85).aspx +""" +#: E501 +""" +longnospaceslongnospaceslongnospaceslongnospaceslongnospaceslongnospaceslongnospaceslongnospaces""" + + +# Regression test for #622 +def foo(): + """Lorem ipsum dolor sit amet, consectetur adipiscing elit. Duis pulvinar vitae + """ + + +# Okay +""" +This + almost_empty_line +""" + +""" +This + almost_empty_line +""" +# A basic comment +#: E501 +# with a long long long long long long long long long long long long long long long long line + +# +# Okay +# I'm some great comment. Because I'm so great, I'm going to give you a +# reference to some valuable information about some API that I'm calling: +# +# http://msdn.microsoft.com/en-us/library/windows/desktop/aa363858(v=vs.85).aspx + +x = 3 + +# longnospaceslongnospaceslongnospaceslongnospaceslongnospaceslongnospaceslongnospaceslongnospaces + +# +# Okay +# This +# almost_empty_line + +# +#: E501+1 +# This +# almost_empty_line diff --git a/contrib/python/parso/py3/tests/normalizer_issue_files/E70.py b/contrib/python/parso/py3/tests/normalizer_issue_files/E70.py new file mode 100644 index 0000000000..be11fb1def --- /dev/null +++ b/contrib/python/parso/py3/tests/normalizer_issue_files/E70.py @@ -0,0 +1,25 @@ +#: E701:6 +if a: a = False +#: E701:41 +if not header or header[:6] != 'bytes=': pass +#: E702:9 +a = False; b = True +#: E702:16 E402 +import bdist_egg; bdist_egg.write_safety_flag(cmd.egg_info, safe) +#: E703:12 E402 +import shlex; +#: E702:8 E703:22 +del a[:]; a.append(42); + + +#: E704:10 +def f(x): return 2 + + +#: E704:10 +def f(x): return 2 * x + + +while all is round: + #: E704:14 + def f(x): return 2 * x diff --git a/contrib/python/parso/py3/tests/normalizer_issue_files/E71.py b/contrib/python/parso/py3/tests/normalizer_issue_files/E71.py new file mode 100644 index 0000000000..109dcd6c77 --- /dev/null +++ b/contrib/python/parso/py3/tests/normalizer_issue_files/E71.py @@ -0,0 +1,93 @@ +#: E711:7 +if res == None: + pass +#: E711:7 +if res != None: + pass +#: E711:8 +if None == res: + pass +#: E711:8 +if None != res: + pass +#: E711:10 +if res[1] == None: + pass +#: E711:10 +if res[1] != None: + pass +#: E711:8 +if None != res[1]: + pass +#: E711:8 +if None == res[1]: + pass + +# +#: E712:7 +if res == True: + pass +#: E712:7 +if res != False: + pass +#: E712:8 +if True != res: + pass +#: E712:9 +if False == res: + pass +#: E712:10 +if res[1] == True: + pass +#: E712:10 +if res[1] != False: + pass + +if x is False: + pass + +# +#: E713:9 +if not X in Y: + pass +#: E713:11 +if not X.B in Y: + pass +#: E713:9 +if not X in Y and Z == "zero": + pass +#: E713:24 +if X == "zero" or not Y in Z: + pass + +# +#: E714:9 +if not X is Y: + pass +#: E714:11 +if not X.B is Y: + pass + +# +# Okay +if x not in y: + pass + +if not (X in Y or X is Z): + pass + +if not (X in Y): + pass + +if x is not y: + pass + +if TrueElement.get_element(True) == TrueElement.get_element(False): + pass + +if (True) == TrueElement or x == TrueElement: + pass + +assert (not foo) in bar +assert {'x': not foo} in bar +assert [42, not foo] in bar diff --git a/contrib/python/parso/py3/tests/normalizer_issue_files/E72.py b/contrib/python/parso/py3/tests/normalizer_issue_files/E72.py new file mode 100644 index 0000000000..2e9ef9151d --- /dev/null +++ b/contrib/python/parso/py3/tests/normalizer_issue_files/E72.py @@ -0,0 +1,79 @@ +#: E721:3 +if type(res) == type(42): + pass +#: E721:3 +if type(res) != type(""): + pass + +import types + +if res == types.IntType: + pass + +import types + +#: E721:3 +if type(res) is not types.ListType: + pass +#: E721:7 E721:35 +assert type(res) == type(False) or type(res) == type(None) +#: E721:7 +assert type(res) == type([]) +#: E721:7 +assert type(res) == type(()) +#: E721:7 +assert type(res) == type((0,)) +#: E721:7 +assert type(res) == type((0)) +#: E721:7 +assert type(res) != type((1,)) +#: E721:7 +assert type(res) is type((1,)) +#: E721:7 +assert type(res) is not type((1,)) + +# Okay +#: E402 +import types + +if isinstance(res, int): + pass +if isinstance(res, str): + pass +if isinstance(res, types.MethodType): + pass + +#: E721:3 E721:25 +if type(a) != type(b) or type(a) == type(ccc): + pass +#: E721 +type(a) != type(b) +#: E721 +1 != type(b) +#: E721 +type(b) != 1 +1 != 1 + +try: + pass +#: E722 +except: + pass +try: + pass +except Exception: + pass +#: E722 +except: + pass +# Okay +fake_code = """" +try: + do_something() +except: + pass +""" +try: + pass +except Exception: + pass diff --git a/contrib/python/parso/py3/tests/normalizer_issue_files/E73.py b/contrib/python/parso/py3/tests/normalizer_issue_files/E73.py new file mode 100644 index 0000000000..77e2e9043a --- /dev/null +++ b/contrib/python/parso/py3/tests/normalizer_issue_files/E73.py @@ -0,0 +1,16 @@ +#: E731:4 +f = lambda x: 2 * x +while False: + #: E731:10 + foo = lambda y, z: 2 * x +# Okay +f = object() +f.method = lambda: 'Method' + +f = {} +f['a'] = lambda x: x ** 2 + +f = [] +f.append(lambda x: x ** 2) + +lambda: 'no-op' diff --git a/contrib/python/parso/py3/tests/normalizer_issue_files/LICENSE b/contrib/python/parso/py3/tests/normalizer_issue_files/LICENSE new file mode 100644 index 0000000000..142a508a63 --- /dev/null +++ b/contrib/python/parso/py3/tests/normalizer_issue_files/LICENSE @@ -0,0 +1,29 @@ +Copyright © 2006-2009 Johann C. Rocholl <johann@rocholl.net> +Copyright © 2009-2014 Florent Xicluna <florent.xicluna@gmail.com> +Copyright © 2014-2016 Ian Lee <IanLee1521@gmail.com> +Copyright © 2017-???? Dave Halter <davidhalter88@gmail.com> + +Dave: The files in this folder were ported from pydocstyle and some +modifications where made. + +Licensed under the terms of the Expat License + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation files +(the "Software"), to deal in the Software without restriction, +including without limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of the Software, +and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/contrib/python/parso/py3/tests/normalizer_issue_files/allowed_syntax.py b/contrib/python/parso/py3/tests/normalizer_issue_files/allowed_syntax.py new file mode 100644 index 0000000000..9cccf619cb --- /dev/null +++ b/contrib/python/parso/py3/tests/normalizer_issue_files/allowed_syntax.py @@ -0,0 +1,163 @@ +""" +Some syntax errors are a bit complicated and need exact checking. Here we +gather some of the potentially dangerous ones. +""" + +from __future__ import division + +# With a dot it's not a future import anymore. +from .__future__ import absolute_import + +'' '' +''r''u'' +b'' BR'' + + +for x in [1]: + break + continue + +try: + pass +except ZeroDivisionError: + pass + #: E722:0 +except: + pass + +try: + pass + #: E722:0 E901:0 +except: + pass +except ZeroDivisionError: + pass + + +r'\n' +r'\x' +b'\n' + + +a = 3 + + +def x(b=a): + global a + + +def x(*args, c=2, d): + pass + + +def x(*, c=2, d): + pass + + +def x(a, b=1, *args, c=2, d): + pass + + +def x(a, b=1, *, c=2, d): + pass + + +lambda *args, c=2, d: (c, d) +lambda *, c=2, d: (c, d) +lambda a, b=1, *args, c=2, d: (c, d) +lambda a, b=1, *, c=2, d: (c, d) + + +*foo, a = (1,) +*foo[0], a = (1,) +*[], a = (1,) + + +async def foo(): + await bar() + #: E901 + yield from [] + return + #: E901 + return '' + + +# With decorator it's a different statement. +@bla +async def foo(): + await bar() + #: E901 + yield from [] + return + #: E901 + return '' + + +foo: int = 4 +(foo): int = 3 +((foo)): int = 3 +foo.bar: int +foo[3]: int + + +def glob(): + global x + y: foo = x + + +def c(): + a = 3 + + def d(): + class X(): + nonlocal a + + +def x(): + a = 3 + + def y(): + nonlocal a + + +def x(): + def y(): + nonlocal a + + a = 3 + + +def x(): + a = 3 + + def y(): + class z(): + nonlocal a + + +def x(a): + def y(): + nonlocal a + + +def x(a, b): + def y(): + nonlocal b + nonlocal a + + +def x(a): + def y(): + def z(): + nonlocal a + + +def x(): + def y(a): + def z(): + nonlocal a + + +a = *args, *args +error[(*args, *args)] = 3 +*args, *args diff --git a/contrib/python/parso/py3/tests/normalizer_issue_files/latin-1.py b/contrib/python/parso/py3/tests/normalizer_issue_files/latin-1.py new file mode 100644 index 0000000000..8328cfba9e --- /dev/null +++ b/contrib/python/parso/py3/tests/normalizer_issue_files/latin-1.py @@ -0,0 +1,6 @@ +# -*- coding: latin-1 -*- +# Test non-UTF8 encoding +latin1 = ('' + '') + +c = ("w") diff --git a/contrib/python/parso/py3/tests/normalizer_issue_files/python.py b/contrib/python/parso/py3/tests/normalizer_issue_files/python.py new file mode 100644 index 0000000000..566e90360a --- /dev/null +++ b/contrib/python/parso/py3/tests/normalizer_issue_files/python.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 +from typing import ClassVar, List + +print(1, 2) + + +# Annotated function (Issue #29) +def foo(x: int) -> int: + return x + 1 + + +# Annotated variables #575 +CONST: int = 42 + + +class Class: + cls_var: ClassVar[str] + + def m(self): + xs: List[int] = [] + + +# True and False are keywords in Python 3 and therefore need a space. +#: E275:13 E275:14 +norman = True+False + + +#: E302+3:0 +def a(): + pass + +async def b(): + pass + + +# Okay +async def add(a: int = 0, b: int = 0) -> int: + return a + b + + +# Previously E251 four times +#: E221:5 +async def add(a: int = 0, b: int = 0) -> int: + return a + b + + +# Previously just E272+1:5 E272+4:5 +#: E302+3 E221:5 E221+3:5 +async def x(): + pass + +async def x(y: int = 1): + pass + + +#: E704:16 +async def f(x): return 2 + + +a[b1, :] == a[b1, ...] + + +# Annotated Function Definitions +# Okay +def munge(input: AnyStr, sep: AnyStr = None, limit=1000, + extra: Union[str, dict] = None) -> AnyStr: + pass + + +#: E225:24 E225:26 +def x(b: tuple = (1, 2))->int: + return a + b + + +#: E252:11 E252:12 E231:8 +def b(a:int=1): + pass + + +if alpha[:-i]: + *a, b = (1, 2, 3) + + +# Named only arguments +def foo(*, asdf): + pass + + +def foo2(bar, *, asdf=2): + pass diff --git a/contrib/python/parso/py3/tests/normalizer_issue_files/utf-8-bom.py b/contrib/python/parso/py3/tests/normalizer_issue_files/utf-8-bom.py new file mode 100644 index 0000000000..9c065c9494 --- /dev/null +++ b/contrib/python/parso/py3/tests/normalizer_issue_files/utf-8-bom.py @@ -0,0 +1,6 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +hello = 'こんにちわ' + +# EOF diff --git a/contrib/python/parso/py3/tests/normalizer_issue_files/utf-8.py b/contrib/python/parso/py3/tests/normalizer_issue_files/utf-8.py new file mode 100644 index 0000000000..73ea9a2827 --- /dev/null +++ b/contrib/python/parso/py3/tests/normalizer_issue_files/utf-8.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8 -*- + +# Some random text with multi-byte characters (utf-8 encoded) +# +# Εδώ μάτσο κειμένων τη, τρόπο πιθανό διευθυντές ώρα μη. Νέων απλό παράγει ροή +# κι, το επί δεδομένη καθορίζουν. Πάντως ζητήσεις περιβάλλοντος ένα με, τη +# ξέχασε αρπάζεις φαινόμενο όλη. Τρέξει εσφαλμένη χρησιμοποίησέ νέα τι. Θα όρο +# πετάνε φακέλους, άρα με διακοπής λαμβάνουν εφαμοργής. Λες κι μειώσει +# καθυστερεί. + +# 79 narrow chars +# 01 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 [79] + +# 78 narrow chars (Na) + 1 wide char (W) +# 01 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8情 + +# 3 narrow chars (Na) + 40 wide chars (W) +# 情 情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情 + +# 3 narrow chars (Na) + 76 wide chars (W) +# 情 情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情 + +# +# 80 narrow chars (Na) +#: E501 +# 01 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 [80] +# +# 78 narrow chars (Na) + 2 wide char (W) +#: E501 +# 01 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8情情 +# +# 3 narrow chars (Na) + 77 wide chars (W) +#: E501 +# 情 情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情情 +# diff --git a/contrib/python/parso/py3/tests/test_cache.py b/contrib/python/parso/py3/tests/test_cache.py new file mode 100644 index 0000000000..d19a872e14 --- /dev/null +++ b/contrib/python/parso/py3/tests/test_cache.py @@ -0,0 +1,197 @@ +""" +Test all things related to the ``jedi.cache`` module. +""" + +import os +import pytest +import time +from pathlib import Path + +from parso.cache import (_CACHED_FILE_MAXIMUM_SURVIVAL, _VERSION_TAG, + _get_cache_clear_lock_path, _get_hashed_path, + _load_from_file_system, _NodeCacheItem, + _remove_cache_and_update_lock, _save_to_file_system, + load_module, parser_cache, try_to_save_module) +from parso._compatibility import is_pypy +from parso import load_grammar +from parso import cache +from parso import file_io +from parso import parse + +skip_pypy = pytest.mark.skipif( + is_pypy, + reason="pickling in pypy is slow, since we don't pickle," + "we never go into path of auto-collecting garbage" +) + + +@pytest.fixture() +def isolated_parso_cache(monkeypatch, tmpdir): + """Set `parso.cache._default_cache_path` to a temporary directory + during the test. """ + cache_path = Path(str(tmpdir), "__parso_cache") + monkeypatch.setattr(cache, '_default_cache_path', cache_path) + return cache_path + + +@pytest.mark.skip("SUBBOTNIK-2721 Disable load cache from disk") +def test_modulepickling_change_cache_dir(tmpdir): + """ + ParserPickling should not save old cache when cache_directory is changed. + + See: `#168 <https://github.com/davidhalter/jedi/pull/168>`_ + """ + dir_1 = Path(str(tmpdir.mkdir('first'))) + dir_2 = Path(str(tmpdir.mkdir('second'))) + + item_1 = _NodeCacheItem('bla', []) + item_2 = _NodeCacheItem('bla', []) + path_1 = Path('fake path 1') + path_2 = Path('fake path 2') + + hashed_grammar = load_grammar()._hashed + _save_to_file_system(hashed_grammar, path_1, item_1, cache_path=dir_1) + parser_cache.clear() + cached = load_stored_item(hashed_grammar, path_1, item_1, cache_path=dir_1) + assert cached == item_1.node + + _save_to_file_system(hashed_grammar, path_2, item_2, cache_path=dir_2) + cached = load_stored_item(hashed_grammar, path_1, item_1, cache_path=dir_2) + assert cached is None + + +def load_stored_item(hashed_grammar, path, item, cache_path): + """Load `item` stored at `path` in `cache`.""" + item = _load_from_file_system(hashed_grammar, path, item.change_time - 1, cache_path) + return item + + +@pytest.mark.usefixtures("isolated_parso_cache") +def test_modulepickling_simulate_deleted_cache(tmpdir): + """ + Tests loading from a cache file after it is deleted. + According to macOS `dev docs`__, + + Note that the system may delete the Caches/ directory to free up disk + space, so your app must be able to re-create or download these files as + needed. + + It is possible that other supported platforms treat cache files the same + way. + + __ https://developer.apple.com/library/content/documentation/FileManagement/Conceptual/FileSystemProgrammingGuide/FileSystemOverview/FileSystemOverview.html + """ # noqa + grammar = load_grammar() + module = 'fake parser' + + # Create the file + path = Path(str(tmpdir.dirname), 'some_path') + with open(path, 'w'): + pass + io = file_io.FileIO(path) + + try_to_save_module(grammar._hashed, io, module, lines=[]) + assert load_module(grammar._hashed, io) == module + + os.unlink(_get_hashed_path(grammar._hashed, path)) + parser_cache.clear() + + cached2 = load_module(grammar._hashed, io) + assert cached2 is None + + +@pytest.mark.skip +def test_cache_limit(): + def cache_size(): + return sum(len(v) for v in parser_cache.values()) + + try: + parser_cache.clear() + future_node_cache_item = _NodeCacheItem('bla', [], change_time=time.time() + 10e6) + old_node_cache_item = _NodeCacheItem('bla', [], change_time=time.time() - 10e4) + parser_cache['some_hash_old'] = { + '/path/%s' % i: old_node_cache_item for i in range(300) + } + parser_cache['some_hash_new'] = { + '/path/%s' % i: future_node_cache_item for i in range(300) + } + assert cache_size() == 600 + parse('somecode', cache=True, path='/path/somepath') + assert cache_size() == 301 + finally: + parser_cache.clear() + + +class _FixedTimeFileIO(file_io.KnownContentFileIO): + def __init__(self, path, content, last_modified): + super().__init__(path, content) + self._last_modified = last_modified + + def get_last_modified(self): + return self._last_modified + + +@pytest.mark.skip +@pytest.mark.parametrize('diff_cache', [False, True]) +@pytest.mark.parametrize('use_file_io', [False, True]) +def test_cache_last_used_update(diff_cache, use_file_io): + p = Path('/path/last-used') + parser_cache.clear() # Clear, because then it's easier to find stuff. + parse('somecode', cache=True, path=p) + node_cache_item = next(iter(parser_cache.values()))[p] + now = time.time() + assert node_cache_item.last_used <= now + + if use_file_io: + f = _FixedTimeFileIO(p, 'code', node_cache_item.last_used - 10) + parse(file_io=f, cache=True, diff_cache=diff_cache) + else: + parse('somecode2', cache=True, path=p, diff_cache=diff_cache) + + node_cache_item = next(iter(parser_cache.values()))[p] + assert now <= node_cache_item.last_used <= time.time() + + +@skip_pypy +def test_inactive_cache(tmpdir, isolated_parso_cache): + parser_cache.clear() + test_subjects = "abcdef" + for path in test_subjects: + parse('somecode', cache=True, path=os.path.join(str(tmpdir), path)) + raw_cache_path = isolated_parso_cache.joinpath(_VERSION_TAG) + assert raw_cache_path.exists() + dir_names = os.listdir(raw_cache_path) + a_while_ago = time.time() - _CACHED_FILE_MAXIMUM_SURVIVAL + old_paths = set() + for dir_name in dir_names[:len(test_subjects) // 2]: # make certain number of paths old + os.utime(raw_cache_path.joinpath(dir_name), (a_while_ago, a_while_ago)) + old_paths.add(dir_name) + # nothing should be cleared while the lock is on + assert _get_cache_clear_lock_path().exists() + _remove_cache_and_update_lock() # it shouldn't clear anything + assert len(os.listdir(raw_cache_path)) == len(test_subjects) + assert old_paths.issubset(os.listdir(raw_cache_path)) + + os.utime(_get_cache_clear_lock_path(), (a_while_ago, a_while_ago)) + _remove_cache_and_update_lock() + assert len(os.listdir(raw_cache_path)) == len(test_subjects) // 2 + assert not old_paths.intersection(os.listdir(raw_cache_path)) + + +@pytest.mark.skip +@skip_pypy +def test_permission_error(monkeypatch): + def save(*args, **kwargs): + nonlocal was_called + was_called = True + raise PermissionError + + was_called = False + + monkeypatch.setattr(cache, '_save_to_file_system', save) + try: + with pytest.warns(Warning): + parse(path=__file__, cache=True, diff_cache=True) + assert was_called + finally: + parser_cache.clear() diff --git a/contrib/python/parso/py3/tests/test_diff_parser.py b/contrib/python/parso/py3/tests/test_diff_parser.py new file mode 100644 index 0000000000..222236e7e8 --- /dev/null +++ b/contrib/python/parso/py3/tests/test_diff_parser.py @@ -0,0 +1,1746 @@ +# -*- coding: utf-8 -*- +from textwrap import dedent +import logging + +import pytest + +from parso.utils import split_lines +from parso import cache +from parso import load_grammar +from parso.python.diff import DiffParser, _assert_valid_graph, _assert_nodes_are_equal +from parso import parse + +ANY = object() + + +def test_simple(): + """ + The diff parser reuses modules. So check for that. + """ + grammar = load_grammar() + module_a = grammar.parse('a', diff_cache=True) + assert grammar.parse('b', diff_cache=True) == module_a + + +def _check_error_leaves_nodes(node): + if node.type in ('error_leaf', 'error_node'): + return node + + try: + children = node.children + except AttributeError: + pass + else: + for child in children: + x_node = _check_error_leaves_nodes(child) + if x_node is not None: + return x_node + return None + + +class Differ: + grammar = load_grammar() + + def initialize(self, code): + logging.debug('differ: initialize') + try: + del cache.parser_cache[self.grammar._hashed][None] + except KeyError: + pass + + self.lines = split_lines(code, keepends=True) + self.module = parse(code, diff_cache=True, cache=True) + assert code == self.module.get_code() + _assert_valid_graph(self.module) + return self.module + + def parse(self, code, copies=0, parsers=0, expect_error_leaves=False): + logging.debug('differ: parse copies=%s parsers=%s', copies, parsers) + lines = split_lines(code, keepends=True) + diff_parser = DiffParser( + self.grammar._pgen_grammar, + self.grammar._tokenizer, + self.module, + ) + new_module = diff_parser.update(self.lines, lines) + self.lines = lines + assert code == new_module.get_code() + + _assert_valid_graph(new_module) + + without_diff_parser_module = parse(code) + _assert_nodes_are_equal(new_module, without_diff_parser_module) + + error_node = _check_error_leaves_nodes(new_module) + assert expect_error_leaves == (error_node is not None), error_node + if parsers is not ANY: + assert diff_parser._parser_count == parsers + if copies is not ANY: + assert diff_parser._copy_count == copies + return new_module + + +@pytest.fixture() +def differ(): + return Differ() + + +def test_change_and_undo(differ): + func_before = 'def func():\n pass\n' + # Parse the function and a. + differ.initialize(func_before + 'a') + # Parse just b. + differ.parse(func_before + 'b', copies=1, parsers=2) + # b has changed to a again, so parse that. + differ.parse(func_before + 'a', copies=1, parsers=2) + # Same as before parsers should not be used. Just a simple copy. + differ.parse(func_before + 'a', copies=1) + + # Now that we have a newline at the end, everything is easier in Python + # syntax, we can parse once and then get a copy. + differ.parse(func_before + 'a\n', copies=1, parsers=2) + differ.parse(func_before + 'a\n', copies=1) + + # Getting rid of an old parser: Still no parsers used. + differ.parse('a\n', copies=1) + # Now the file has completely changed and we need to parse. + differ.parse('b\n', parsers=1) + # And again. + differ.parse('a\n', parsers=1) + + +def test_positions(differ): + func_before = 'class A:\n pass\n' + m = differ.initialize(func_before + 'a') + assert m.start_pos == (1, 0) + assert m.end_pos == (3, 1) + + m = differ.parse('a', copies=1) + assert m.start_pos == (1, 0) + assert m.end_pos == (1, 1) + + m = differ.parse('a\n\n', parsers=1) + assert m.end_pos == (3, 0) + m = differ.parse('a\n\n ', copies=1, parsers=2) + assert m.end_pos == (3, 1) + m = differ.parse('a ', parsers=1) + assert m.end_pos == (1, 2) + + +def test_if_simple(differ): + src = dedent('''\ + if 1: + a = 3 + ''') + else_ = "else:\n a = ''\n" + + differ.initialize(src + 'a') + differ.parse(src + else_ + "a", copies=0, parsers=1) + + differ.parse(else_, parsers=2, expect_error_leaves=True) + differ.parse(src + else_, parsers=1) + + +def test_func_with_for_and_comment(differ): + # The first newline is important, leave it. It should not trigger another + # parser split. + src = dedent("""\ + + def func(): + pass + + + for a in [1]: + # COMMENT + a""") + differ.initialize(src) + differ.parse('a\n' + src, copies=1, parsers=3) + + +def test_one_statement_func(differ): + src = dedent("""\ + first + def func(): a + """) + differ.initialize(src + 'second') + differ.parse(src + 'def second():\n a', parsers=1, copies=1) + + +def test_for_on_one_line(differ): + src = dedent("""\ + foo = 1 + for x in foo: pass + + def hi(): + pass + """) + differ.initialize(src) + + src = dedent("""\ + def hi(): + for x in foo: pass + pass + + pass + """) + differ.parse(src, parsers=2) + + src = dedent("""\ + def hi(): + for x in foo: pass + pass + + def nested(): + pass + """) + # The second parser is for parsing the `def nested()` which is an `equal` + # operation in the SequenceMatcher. + differ.parse(src, parsers=1, copies=1) + + +def test_open_parentheses(differ): + func = 'def func():\n a\n' + code = 'isinstance(\n\n' + func + new_code = 'isinstance(\n' + func + differ.initialize(code) + + differ.parse(new_code, parsers=1, expect_error_leaves=True) + + new_code = 'a = 1\n' + new_code + differ.parse(new_code, parsers=2, expect_error_leaves=True) + + func += 'def other_func():\n pass\n' + differ.initialize('isinstance(\n' + func) + # Cannot copy all, because the prefix of the function is once a newline and + # once not. + differ.parse('isinstance()\n' + func, parsers=2, copies=1) + + +def test_open_parentheses_at_end(differ): + code = "a['" + differ.initialize(code) + differ.parse(code, parsers=1, expect_error_leaves=True) + + +def test_backslash(differ): + src = dedent(r""" + a = 1\ + if 1 else 2 + def x(): + pass + """) + differ.initialize(src) + + src = dedent(r""" + def x(): + a = 1\ + if 1 else 2 + def y(): + pass + """) + differ.parse(src, parsers=1) + + src = dedent(r""" + def first(): + if foo \ + and bar \ + or baz: + pass + def second(): + pass + """) + differ.parse(src, parsers=2) + + +def test_full_copy(differ): + code = 'def foo(bar, baz):\n pass\n bar' + differ.initialize(code) + differ.parse(code, copies=1) + + +def test_wrong_whitespace(differ): + code = ''' + hello + ''' + differ.initialize(code) + differ.parse(code + 'bar\n ', parsers=2, expect_error_leaves=True) + + code += """abc(\npass\n """ + differ.parse(code, parsers=2, expect_error_leaves=True) + + +def test_issues_with_error_leaves(differ): + code = dedent(''' + def ints(): + str.. + str + ''') + code2 = dedent(''' + def ints(): + str. + str + ''') + differ.initialize(code) + differ.parse(code2, parsers=1, expect_error_leaves=True) + + +def test_unfinished_nodes(differ): + code = dedent(''' + class a(): + def __init__(self, a): + self.a = a + def p(self): + a(1) + ''') + code2 = dedent(''' + class a(): + def __init__(self, a): + self.a = a + def p(self): + self + a(1) + ''') + differ.initialize(code) + differ.parse(code2, parsers=2, copies=2) + + +def test_nested_if_and_scopes(differ): + code = dedent(''' + class a(): + if 1: + def b(): + 2 + ''') + code2 = code + ' else:\n 3' + differ.initialize(code) + differ.parse(code2, parsers=1, copies=0) + + +def test_word_before_def(differ): + code1 = 'blub def x():\n' + code2 = code1 + ' s' + differ.initialize(code1) + differ.parse(code2, parsers=1, copies=0, expect_error_leaves=True) + + +def test_classes_with_error_leaves(differ): + code1 = dedent(''' + class X(): + def x(self): + blablabla + assert 3 + self. + + class Y(): + pass + ''') + code2 = dedent(''' + class X(): + def x(self): + blablabla + assert 3 + str( + + class Y(): + pass + ''') + + differ.initialize(code1) + differ.parse(code2, parsers=2, copies=1, expect_error_leaves=True) + + +def test_totally_wrong_whitespace(differ): + code1 = ''' + class X(): + raise n + + class Y(): + pass + ''' + code2 = ''' + class X(): + raise n + str( + + class Y(): + pass + ''' + + differ.initialize(code1) + differ.parse(code2, parsers=2, copies=0, expect_error_leaves=True) + + +def test_node_insertion(differ): + code1 = dedent(''' + class X(): + def y(self): + a = 1 + b = 2 + + c = 3 + d = 4 + ''') + code2 = dedent(''' + class X(): + def y(self): + a = 1 + b = 2 + str + + c = 3 + d = 4 + ''') + + differ.initialize(code1) + differ.parse(code2, parsers=1, copies=2) + + +def test_whitespace_at_end(differ): + code = dedent('str\n\n') + + differ.initialize(code) + differ.parse(code + '\n', parsers=1, copies=1) + + +def test_endless_while_loop(differ): + """ + This was a bug in Jedi #878. + """ + code = '#dead' + differ.initialize(code) + module = differ.parse(code, parsers=1) + assert module.end_pos == (1, 5) + + code = '#dead\n' + differ.initialize(code) + module = differ.parse(code + '\n', parsers=1) + assert module.end_pos == (3, 0) + + +def test_in_class_movements(differ): + code1 = dedent("""\ + class PlaybookExecutor: + p + b + def run(self): + 1 + try: + x + except: + pass + """) + code2 = dedent("""\ + class PlaybookExecutor: + b + def run(self): + 1 + try: + x + except: + pass + """) + + differ.initialize(code1) + differ.parse(code2, parsers=1) + + +def test_in_parentheses_newlines(differ): + code1 = dedent(""" + x = str( + True) + + a = 1 + + def foo(): + pass + + b = 2""") + + code2 = dedent(""" + x = str(True) + + a = 1 + + def foo(): + pass + + b = 2""") + + differ.initialize(code1) + differ.parse(code2, parsers=1, copies=1) + + +def test_indentation_issue(differ): + code1 = dedent(""" + import module + """) + + code2 = dedent(""" + class L1: + class L2: + class L3: + def f(): pass + def f(): pass + def f(): pass + def f(): pass + """) + + differ.initialize(code1) + differ.parse(code2, parsers=2) + + +def test_endmarker_newline(differ): + code1 = dedent('''\ + docu = None + # some comment + result = codet + incomplete_dctassign = { + "module" + + if "a": + x = 3 # asdf + ''') + + code2 = code1.replace('codet', 'coded') + + differ.initialize(code1) + differ.parse(code2, parsers=2, copies=1, expect_error_leaves=True) + + +def test_newlines_at_end(differ): + differ.initialize('a\n\n') + differ.parse('a\n', copies=1) + + +def test_end_newline_with_decorator(differ): + code = dedent('''\ + @staticmethod + def spam(): + import json + json.l''') + + differ.initialize(code) + module = differ.parse(code + '\n', copies=1, parsers=1) + decorated, endmarker = module.children + assert decorated.type == 'decorated' + decorator, func = decorated.children + suite = func.children[-1] + assert suite.type == 'suite' + newline, first_stmt, second_stmt = suite.children + assert first_stmt.get_code() == ' import json\n' + assert second_stmt.get_code() == ' json.l\n' + + +def test_invalid_to_valid_nodes(differ): + code1 = dedent('''\ + def a(): + foo = 3 + def b(): + la = 3 + else: + la + return + foo + base + ''') + code2 = dedent('''\ + def a(): + foo = 3 + def b(): + la = 3 + if foo: + latte = 3 + else: + la + return + foo + base + ''') + + differ.initialize(code1) + differ.parse(code2, parsers=1, copies=3) + + +def test_if_removal_and_reappearence(differ): + code1 = dedent('''\ + la = 3 + if foo: + latte = 3 + else: + la + pass + ''') + + code2 = dedent('''\ + la = 3 + latte = 3 + else: + la + pass + ''') + + code3 = dedent('''\ + la = 3 + if foo: + latte = 3 + else: + la + ''') + differ.initialize(code1) + differ.parse(code2, parsers=3, copies=2, expect_error_leaves=True) + differ.parse(code1, parsers=1, copies=1) + differ.parse(code3, parsers=1, copies=1) + + +def test_add_error_indentation(differ): + code = 'if x:\n 1\n' + differ.initialize(code) + differ.parse(code + ' 2\n', parsers=1, copies=0, expect_error_leaves=True) + + +def test_differing_docstrings(differ): + code1 = dedent('''\ + def foobar(x, y): + 1 + return x + + def bazbiz(): + foobar() + lala + ''') + + code2 = dedent('''\ + def foobar(x, y): + 2 + return x + y + + def bazbiz(): + z = foobar() + lala + ''') + + differ.initialize(code1) + differ.parse(code2, parsers=2, copies=1) + differ.parse(code1, parsers=2, copies=1) + + +def test_one_call_in_function_change(differ): + code1 = dedent('''\ + def f(self): + mro = [self] + for a in something: + yield a + + def g(self): + return C( + a=str, + b=self, + ) + ''') + + code2 = dedent('''\ + def f(self): + mro = [self] + + def g(self): + return C( + a=str, + t + b=self, + ) + ''') + + differ.initialize(code1) + differ.parse(code2, parsers=2, copies=1, expect_error_leaves=True) + differ.parse(code1, parsers=2, copies=1) + + +def test_function_deletion(differ): + code1 = dedent('''\ + class C(list): + def f(self): + def iterate(): + for x in b: + break + + return list(iterate()) + ''') + + code2 = dedent('''\ + class C(): + def f(self): + for x in b: + break + + return list(iterate()) + ''') + + differ.initialize(code1) + differ.parse(code2, parsers=1, copies=0, expect_error_leaves=True) + differ.parse(code1, parsers=1, copies=0) + + +def test_docstring_removal(differ): + code1 = dedent('''\ + class E(Exception): + """ + 1 + 2 + 3 + """ + + class S(object): + @property + def f(self): + return cmd + def __repr__(self): + return cmd2 + ''') + + code2 = dedent('''\ + class E(Exception): + """ + 1 + 3 + """ + + class S(object): + @property + def f(self): + return cmd + return cmd2 + ''') + + differ.initialize(code1) + differ.parse(code2, parsers=1, copies=2) + differ.parse(code1, parsers=3, copies=1) + + +def test_paren_in_strange_position(differ): + code1 = dedent('''\ + class C: + """ ha """ + def __init__(self, message): + self.message = message + ''') + + code2 = dedent('''\ + class C: + """ ha """ + ) + def __init__(self, message): + self.message = message + ''') + + differ.initialize(code1) + differ.parse(code2, parsers=1, copies=2, expect_error_leaves=True) + differ.parse(code1, parsers=0, copies=2) + + +def insert_line_into_code(code, index, line): + lines = split_lines(code, keepends=True) + lines.insert(index, line) + return ''.join(lines) + + +def test_paren_before_docstring(differ): + code1 = dedent('''\ + # comment + """ + The + """ + from parso import tree + from parso import python + ''') + + code2 = insert_line_into_code(code1, 1, ' ' * 16 + 'raise InternalParseError(\n') + + differ.initialize(code1) + differ.parse(code2, parsers=1, copies=1, expect_error_leaves=True) + differ.parse(code1, parsers=2, copies=1) + + +def test_parentheses_before_method(differ): + code1 = dedent('''\ + class A: + def a(self): + pass + + class B: + def b(self): + if 1: + pass + ''') + + code2 = dedent('''\ + class A: + def a(self): + pass + Exception.__init__(self, "x" % + + def b(self): + if 1: + pass + ''') + + differ.initialize(code1) + differ.parse(code2, parsers=2, copies=1, expect_error_leaves=True) + differ.parse(code1, parsers=2, copies=1) + + +def test_indentation_issues(differ): + code1 = dedent('''\ + class C: + def f(): + 1 + if 2: + return 3 + + def g(): + to_be_removed + pass + ''') + + code2 = dedent('''\ + class C: + def f(): + 1 + ``something``, very ``weird``). + if 2: + return 3 + + def g(): + to_be_removed + pass + ''') + + code3 = dedent('''\ + class C: + def f(): + 1 + if 2: + return 3 + + def g(): + pass + ''') + + differ.initialize(code1) + differ.parse(code2, parsers=3, copies=1, expect_error_leaves=True) + differ.parse(code1, copies=1, parsers=2) + differ.parse(code3, parsers=2, copies=1) + differ.parse(code1, parsers=2, copies=1) + + +def test_error_dedent_issues(differ): + code1 = dedent('''\ + while True: + try: + 1 + except KeyError: + if 2: + 3 + except IndexError: + 4 + + 5 + ''') + + code2 = dedent('''\ + while True: + try: + except KeyError: + 1 + except KeyError: + if 2: + 3 + except IndexError: + 4 + + something_inserted + 5 + ''') + + differ.initialize(code1) + differ.parse(code2, parsers=3, copies=0, expect_error_leaves=True) + differ.parse(code1, parsers=1, copies=0) + + +def test_random_text_insertion(differ): + code1 = dedent('''\ +class C: + def f(): + return node + + def g(): + try: + 1 + except KeyError: + 2 + ''') + + code2 = dedent('''\ +class C: + def f(): + return node +Some'random text: yeah + for push in plan.dfa_pushes: + + def g(): + try: + 1 + except KeyError: + 2 + ''') + + differ.initialize(code1) + differ.parse(code2, parsers=2, copies=1, expect_error_leaves=True) + differ.parse(code1, parsers=2, copies=1) + + +def test_many_nested_ifs(differ): + code1 = dedent('''\ + class C: + def f(self): + def iterate(): + if 1: + yield t + else: + yield + return + + def g(): + 3 + ''') + + code2 = dedent('''\ + def f(self): + def iterate(): + if 1: + yield t + hahahaha + if 2: + else: + yield + return + + def g(): + 3 + ''') + + differ.initialize(code1) + differ.parse(code2, parsers=2, copies=1, expect_error_leaves=True) + differ.parse(code1, parsers=1, copies=1) + + +@pytest.mark.parametrize('prefix', ['', 'async ']) +def test_with_and_funcdef_in_call(differ, prefix): + code1 = prefix + dedent('''\ + with x: + la = C( + a=1, + b=2, + c=3, + ) + ''') + + code2 = insert_line_into_code(code1, 3, 'def y(self, args):\n') + + differ.initialize(code1) + differ.parse(code2, parsers=1, expect_error_leaves=True) + differ.parse(code1, parsers=1) + + +def test_wrong_backslash(differ): + code1 = dedent('''\ + def y(): + 1 + for x in y: + continue + ''') + + code2 = insert_line_into_code(code1, 3, '\\.whl$\n') + + differ.initialize(code1) + differ.parse(code2, parsers=3, copies=1, expect_error_leaves=True) + differ.parse(code1, parsers=1, copies=1) + + +def test_random_unicode_characters(differ): + """ + Those issues were all found with the fuzzer. + """ + differ.initialize('') + differ.parse('\x1dĔBϞɛˁşʑ˳˻ȣſéÎ\x90̕ȟòwʘ\x1dĔBϞɛˁşʑ˳˻ȣſéÎ', parsers=1, + expect_error_leaves=True) + differ.parse('\r\r', parsers=1) + differ.parse("˟Ę\x05À\r rúƣ@\x8a\x15r()\n", parsers=1, expect_error_leaves=True) + differ.parse('a\ntaǁ\rGĒōns__\n\nb', parsers=1) + s = ' if not (self, "_fi\x02\x0e\x08\n\nle"):' + differ.parse(s, parsers=1, expect_error_leaves=True) + differ.parse('') + differ.parse(s + '\n', parsers=1, expect_error_leaves=True) + differ.parse(' result = (\r\f\x17\t\x11res)', parsers=1, expect_error_leaves=True) + differ.parse('') + differ.parse(' a( # xx\ndef', parsers=1, expect_error_leaves=True) + + +def test_dedent_end_positions(differ): + code1 = dedent('''\ + if 1: + if b: + 2 + c = { + 5} + ''') + code2 = dedent('''\ + if 1: + if ⌟ഒᜈྡྷṭb: + 2 + 'l': ''} + c = { + 5} + ''') + differ.initialize(code1) + differ.parse(code2, parsers=1, expect_error_leaves=True) + differ.parse(code1, parsers=1) + + +def test_special_no_newline_ending(differ): + code1 = dedent('''\ + 1 + ''') + code2 = dedent('''\ + 1 + is ''') + differ.initialize(code1) + differ.parse(code2, copies=1, parsers=1, expect_error_leaves=True) + differ.parse(code1, copies=1, parsers=0) + + +def test_random_character_insertion(differ): + code1 = dedent('''\ + def create(self): + 1 + if self.path is not None: + return + # 3 + # 4 + ''') + code2 = dedent('''\ + def create(self): + 1 + if 2: + x return + # 3 + # 4 + ''') + differ.initialize(code1) + differ.parse(code2, copies=1, parsers=1, expect_error_leaves=True) + differ.parse(code1, copies=1, parsers=1) + + +def test_import_opening_bracket(differ): + code1 = dedent('''\ + 1 + 2 + from bubu import (X, + ''') + code2 = dedent('''\ + 11 + 2 + from bubu import (X, + ''') + differ.initialize(code1) + differ.parse(code2, copies=1, parsers=2, expect_error_leaves=True) + differ.parse(code1, copies=1, parsers=2, expect_error_leaves=True) + + +def test_opening_bracket_at_end(differ): + code1 = dedent('''\ + class C: + 1 + [ + ''') + code2 = dedent('''\ + 3 + class C: + 1 + [ + ''') + differ.initialize(code1) + differ.parse(code2, copies=1, parsers=2, expect_error_leaves=True) + differ.parse(code1, copies=1, parsers=1, expect_error_leaves=True) + + +def test_all_sorts_of_indentation(differ): + code1 = dedent('''\ + class C: + 1 + def f(): + 'same' + + if foo: + a = b + end + ''') + code2 = dedent('''\ + class C: + 1 + def f(yield await %|( + 'same' + + \x02\x06\x0f\x1c\x11 + if foo: + a = b + + end + ''') + differ.initialize(code1) + differ.parse(code2, copies=1, parsers=1, expect_error_leaves=True) + differ.parse(code1, copies=1, parsers=1, expect_error_leaves=True) + + code3 = dedent('''\ + if 1: + a + b + c + d + \x00 + ''') + differ.parse(code3, parsers=1, expect_error_leaves=True) + differ.parse('') + + +def test_dont_copy_dedents_in_beginning(differ): + code1 = dedent('''\ + a + 4 + ''') + code2 = dedent('''\ + 1 + 2 + 3 + 4 + ''') + differ.initialize(code1) + differ.parse(code2, copies=1, parsers=1, expect_error_leaves=True) + differ.parse(code1, parsers=1, copies=1) + + +def test_dont_copy_error_leaves(differ): + code1 = dedent('''\ + def f(n): + x + if 2: + 3 + ''') + code2 = dedent('''\ + def f(n): + def if 1: + indent + x + if 2: + 3 + ''') + differ.initialize(code1) + differ.parse(code2, parsers=1, expect_error_leaves=True) + differ.parse(code1, parsers=1) + + +def test_error_dedent_in_between(differ): + code1 = dedent('''\ + class C: + def f(): + a + if something: + x + z + ''') + code2 = dedent('''\ + class C: + def f(): + a + dedent + if other_thing: + b + if something: + x + z + ''') + differ.initialize(code1) + differ.parse(code2, copies=1, parsers=2, expect_error_leaves=True) + differ.parse(code1, copies=1, parsers=2) + + +def test_some_other_indentation_issues(differ): + code1 = dedent('''\ + class C: + x + def f(): + "" + copied + a + ''') + code2 = dedent('''\ + try: + de + a + b + c + d + def f(): + "" + copied + a + ''') + differ.initialize(code1) + differ.parse(code2, copies=0, parsers=1, expect_error_leaves=True) + differ.parse(code1, copies=1, parsers=1) + + +def test_open_bracket_case1(differ): + code1 = dedent('''\ + class C: + 1 + 2 # ha + ''') + code2 = insert_line_into_code(code1, 2, ' [str\n') + code3 = insert_line_into_code(code2, 4, ' str\n') + differ.initialize(code1) + differ.parse(code2, copies=1, parsers=1, expect_error_leaves=True) + differ.parse(code3, copies=1, parsers=1, expect_error_leaves=True) + differ.parse(code1, copies=1, parsers=1) + + +def test_open_bracket_case2(differ): + code1 = dedent('''\ + class C: + def f(self): + ( + b + c + + def g(self): + d + ''') + code2 = dedent('''\ + class C: + def f(self): + ( + b + c + self. + + def g(self): + d + ''') + differ.initialize(code1) + differ.parse(code2, copies=0, parsers=1, expect_error_leaves=True) + differ.parse(code1, copies=0, parsers=1, expect_error_leaves=True) + + +def test_some_weird_removals(differ): + code1 = dedent('''\ + class C: + 1 + ''') + code2 = dedent('''\ + class C: + 1 + @property + A + return + # x + omega + ''') + code3 = dedent('''\ + class C: + 1 + ; + omega + ''') + differ.initialize(code1) + differ.parse(code2, copies=1, parsers=1, expect_error_leaves=True) + differ.parse(code3, copies=1, parsers=3, expect_error_leaves=True) + differ.parse(code1, copies=1) + + +def test_async_copy(differ): + code1 = dedent('''\ + async def main(): + x = 3 + print( + ''') + code2 = dedent('''\ + async def main(): + x = 3 + print() + ''') + differ.initialize(code1) + differ.parse(code2, copies=1, parsers=1) + differ.parse(code1, copies=1, parsers=1, expect_error_leaves=True) + + +def test_parent_on_decorator(differ): + code1 = dedent('''\ + class AClass: + @decorator() + def b_test(self): + print("Hello") + print("world") + + def a_test(self): + pass''') + code2 = dedent('''\ + class AClass: + @decorator() + def b_test(self): + print("Hello") + print("world") + + def a_test(self): + pass''') + differ.initialize(code1) + module_node = differ.parse(code2, parsers=1) + cls = module_node.children[0] + cls_suite = cls.children[-1] + assert len(cls_suite.children) == 3 + + +def test_wrong_indent_in_def(differ): + code1 = dedent('''\ + def x(): + a + b + ''') + + code2 = dedent('''\ + def x(): + // + b + c + ''') + differ.initialize(code1) + differ.parse(code2, parsers=1, expect_error_leaves=True) + differ.parse(code1, parsers=1) + + +def test_backslash_issue(differ): + code1 = dedent(''' + pre = ( + '') + after = 'instead' + ''') + code2 = dedent(''' + pre = ( + '') + \\if + ''') # noqa + differ.initialize(code1) + differ.parse(code2, parsers=1, copies=1, expect_error_leaves=True) + differ.parse(code1, parsers=1, copies=1) + + +def test_paren_with_indentation(differ): + code1 = dedent(''' + class C: + def f(self, fullname, path=None): + x + + def load_module(self, fullname): + a + for prefix in self.search_path: + try: + b + except ImportError: + c + else: + raise + def x(): + pass + ''') + code2 = dedent(''' + class C: + def f(self, fullname, path=None): + x + + ( + a + for prefix in self.search_path: + try: + b + except ImportError: + c + else: + raise + ''') + differ.initialize(code1) + differ.parse(code2, parsers=1, copies=1, expect_error_leaves=True) + differ.parse(code1, parsers=3, copies=1) + + +def test_error_dedent_in_function(differ): + code1 = dedent('''\ + def x(): + a + b + c + d + ''') + code2 = dedent('''\ + def x(): + a + b + c + d + e + ''') + differ.initialize(code1) + differ.parse(code2, parsers=2, copies=1, expect_error_leaves=True) + + +def test_with_formfeed(differ): + code1 = dedent('''\ + @bla + async def foo(): + 1 + yield from [] + return + return '' + ''') + code2 = dedent('''\ + @bla + async def foo(): + 1 + \x0cimport + return + return '' + ''') # noqa + differ.initialize(code1) + differ.parse(code2, parsers=ANY, copies=ANY, expect_error_leaves=True) + + +def test_repeating_invalid_indent(differ): + code1 = dedent('''\ + def foo(): + return + + @bla + a + def foo(): + a + b + c + ''') + code2 = dedent('''\ + def foo(): + return + + @bla + a + b + c + ''') + differ.initialize(code1) + differ.parse(code2, parsers=2, copies=1, expect_error_leaves=True) + + +def test_another_random_indent(differ): + code1 = dedent('''\ + def foo(): + a + b + c + return + def foo(): + d + ''') + code2 = dedent('''\ + def foo(): + a + c + return + def foo(): + d + ''') + differ.initialize(code1) + differ.parse(code2, parsers=1, copies=3) + + +def test_invalid_function(differ): + code1 = dedent('''\ + a + def foo(): + def foo(): + b + ''') + code2 = dedent('''\ + a + def foo(): + def foo(): + b + ''') + differ.initialize(code1) + differ.parse(code2, parsers=1, copies=1, expect_error_leaves=True) + + +def test_async_func2(differ): + code1 = dedent('''\ + async def foo(): + return '' + @bla + async def foo(): + x + ''') + code2 = dedent('''\ + async def foo(): + return '' + + { + @bla + async def foo(): + x + y + ''') + differ.initialize(code1) + differ.parse(code2, parsers=ANY, copies=ANY, expect_error_leaves=True) + + +def test_weird_ending(differ): + code1 = dedent('''\ + def foo(): + a + return + ''') + code2 = dedent('''\ + def foo(): + a + nonlocal xF""" + y"""''') + differ.initialize(code1) + differ.parse(code2, parsers=1, copies=1, expect_error_leaves=True) + + +def test_nested_class(differ): + code1 = dedent('''\ +def c(): + a = 3 + class X: + b + ''') + code2 = dedent('''\ +def c(): + a = 3 + class X: + elif + ''') + differ.initialize(code1) + differ.parse(code2, parsers=1, copies=1, expect_error_leaves=True) + + +def test_class_with_paren_breaker(differ): + code1 = dedent('''\ +class Grammar: + x + def parse(): + y + parser( + ) + z + ''') + code2 = dedent('''\ +class Grammar: + x + def parse(): + y + parser( + finally ; + ) + z + ''') + differ.initialize(code1) + differ.parse(code2, parsers=3, copies=1, expect_error_leaves=True) + + +def test_byte_order_mark(differ): + code2 = dedent('''\ + + x + \ufeff + else : + ''') + differ.initialize('\n') + differ.parse(code2, parsers=2, expect_error_leaves=True) + + code3 = dedent('''\ + \ufeff + if: + + x + ''') + differ.initialize('\n') + differ.parse(code3, parsers=2, expect_error_leaves=True) + + +def test_byte_order_mark2(differ): + code = '\ufeff# foo' + differ.initialize(code) + differ.parse(code + 'x', parsers=ANY) + + +def test_byte_order_mark3(differ): + code1 = "\ufeff#\ny\n" + code2 = 'x\n\ufeff#\n\ufeff#\ny\n' + differ.initialize(code1) + differ.parse(code2, expect_error_leaves=True, parsers=ANY, copies=ANY) + differ.parse(code1, parsers=1) + + +def test_backslash_insertion(differ): + code1 = dedent(''' + def f(): + x + def g(): + base = "" \\ + "" + return + ''') + code2 = dedent(''' + def f(): + x + def g(): + base = "" \\ + def h(): + "" + return + ''') + + differ.initialize(code1) + differ.parse(code2, parsers=2, copies=1, expect_error_leaves=True) + differ.parse(code1, parsers=2, copies=1) + + +def test_fstring_with_error_leaf(differ): + code1 = dedent("""\ + def f(): + x + def g(): + y + """) + code2 = dedent("""\ + def f(): + x + F''' + def g(): + y + {a + \x01 + """) + + differ.initialize(code1) + differ.parse(code2, parsers=1, copies=1, expect_error_leaves=True) + + +def test_yet_another_backslash(differ): + code1 = dedent('''\ + def f(): + x + def g(): + y + base = "" \\ + "" % to + return + ''') + code2 = dedent('''\ + def f(): + x + def g(): + y + base = "" \\ + \x0f + return + ''') + + differ.initialize(code1) + differ.parse(code2, parsers=ANY, copies=ANY, expect_error_leaves=True) + differ.parse(code1, parsers=ANY, copies=ANY) + + +def test_backslash_before_def(differ): + code1 = dedent('''\ + def f(): + x + + def g(): + y + z + ''') + code2 = dedent('''\ + def f(): + x + >\\ + def g(): + y + x + z + ''') + + differ.initialize(code1) + differ.parse(code2, parsers=3, copies=1, expect_error_leaves=True) + + +def test_backslash_with_imports(differ): + code1 = dedent('''\ + from x import y, \\ + ''') + code2 = dedent('''\ + from x import y, \\ + z + ''') + + differ.initialize(code1) + differ.parse(code2, parsers=1) + differ.parse(code1, parsers=1) + + +def test_one_line_function_error_recovery(differ): + code1 = dedent('''\ + class X: + x + def y(): word """ + # a + # b + c(self) + ''') + code2 = dedent('''\ + class X: + x + def y(): word """ + # a + # b + c(\x01+self) + ''') + + differ.initialize(code1) + differ.parse(code2, parsers=1, copies=1, expect_error_leaves=True) + + +def test_one_line_property_error_recovery(differ): + code1 = dedent('''\ + class X: + x + @property + def encoding(self): True - + return 1 + ''') + code2 = dedent('''\ + class X: + x + @property + def encoding(self): True - + return 1 + ''') + + differ.initialize(code1) + differ.parse(code2, parsers=2, copies=1, expect_error_leaves=True) diff --git a/contrib/python/parso/py3/tests/test_dump_tree.py b/contrib/python/parso/py3/tests/test_dump_tree.py new file mode 100644 index 0000000000..d2d7259f73 --- /dev/null +++ b/contrib/python/parso/py3/tests/test_dump_tree.py @@ -0,0 +1,182 @@ +from textwrap import dedent + +import pytest + +from parso import parse +# Using star import for easier eval testing below. +from parso.python.tree import * # noqa: F403 +from parso.tree import * # noqa: F403 +from parso.tree import ErrorLeaf, TypedLeaf + + +@pytest.mark.parametrize( + 'indent,expected_dump', [ + (None, "Module([" + "Lambda([" + "Keyword('lambda', (1, 0)), " + "Param([" + "Name('x', (1, 7), prefix=' '), " + "Operator(',', (1, 8)), " + "]), " + "Param([" + "Name('y', (1, 10), prefix=' '), " + "]), " + "Operator(':', (1, 11)), " + "PythonNode('arith_expr', [" + "Name('x', (1, 13), prefix=' '), " + "Operator('+', (1, 15), prefix=' '), " + "Name('y', (1, 17), prefix=' '), " + "]), " + "]), " + "EndMarker('', (1, 18)), " + "])"), + (0, dedent('''\ + Module([ + Lambda([ + Keyword('lambda', (1, 0)), + Param([ + Name('x', (1, 7), prefix=' '), + Operator(',', (1, 8)), + ]), + Param([ + Name('y', (1, 10), prefix=' '), + ]), + Operator(':', (1, 11)), + PythonNode('arith_expr', [ + Name('x', (1, 13), prefix=' '), + Operator('+', (1, 15), prefix=' '), + Name('y', (1, 17), prefix=' '), + ]), + ]), + EndMarker('', (1, 18)), + ])''')), + (4, dedent('''\ + Module([ + Lambda([ + Keyword('lambda', (1, 0)), + Param([ + Name('x', (1, 7), prefix=' '), + Operator(',', (1, 8)), + ]), + Param([ + Name('y', (1, 10), prefix=' '), + ]), + Operator(':', (1, 11)), + PythonNode('arith_expr', [ + Name('x', (1, 13), prefix=' '), + Operator('+', (1, 15), prefix=' '), + Name('y', (1, 17), prefix=' '), + ]), + ]), + EndMarker('', (1, 18)), + ])''')), + ('\t', dedent('''\ + Module([ + \tLambda([ + \t\tKeyword('lambda', (1, 0)), + \t\tParam([ + \t\t\tName('x', (1, 7), prefix=' '), + \t\t\tOperator(',', (1, 8)), + \t\t]), + \t\tParam([ + \t\t\tName('y', (1, 10), prefix=' '), + \t\t]), + \t\tOperator(':', (1, 11)), + \t\tPythonNode('arith_expr', [ + \t\t\tName('x', (1, 13), prefix=' '), + \t\t\tOperator('+', (1, 15), prefix=' '), + \t\t\tName('y', (1, 17), prefix=' '), + \t\t]), + \t]), + \tEndMarker('', (1, 18)), + ])''')), + ] +) +def test_dump_parser_tree(indent, expected_dump): + code = "lambda x, y: x + y" + module = parse(code) + assert module.dump(indent=indent) == expected_dump + + # Check that dumped tree can be eval'd to recover the parser tree and original code. + recovered_code = eval(expected_dump).get_code() + assert recovered_code == code + + +@pytest.mark.parametrize( + 'node,expected_dump,expected_code', [ + ( # Dump intermediate node (not top level module) + parse("def foo(x, y): return x + y").children[0], dedent('''\ + Function([ + Keyword('def', (1, 0)), + Name('foo', (1, 4), prefix=' '), + PythonNode('parameters', [ + Operator('(', (1, 7)), + Param([ + Name('x', (1, 8)), + Operator(',', (1, 9)), + ]), + Param([ + Name('y', (1, 11), prefix=' '), + ]), + Operator(')', (1, 12)), + ]), + Operator(':', (1, 13)), + ReturnStmt([ + Keyword('return', (1, 15), prefix=' '), + PythonNode('arith_expr', [ + Name('x', (1, 22), prefix=' '), + Operator('+', (1, 24), prefix=' '), + Name('y', (1, 26), prefix=' '), + ]), + ]), + ])'''), + "def foo(x, y): return x + y", + ), + ( # Dump leaf + parse("def foo(x, y): return x + y").children[0].children[0], + "Keyword('def', (1, 0))", + 'def', + ), + ( # Dump ErrorLeaf + ErrorLeaf('error_type', 'error_code', (1, 1), prefix=' '), + "ErrorLeaf('error_type', 'error_code', (1, 1), prefix=' ')", + ' error_code', + ), + ( # Dump TypedLeaf + TypedLeaf('type', 'value', (1, 1)), + "TypedLeaf('type', 'value', (1, 1))", + 'value', + ), + ] +) +def test_dump_parser_tree_not_top_level_module(node, expected_dump, expected_code): + dump_result = node.dump() + assert dump_result == expected_dump + + # Check that dumped tree can be eval'd to recover the parser tree and original code. + recovered_code = eval(dump_result).get_code() + assert recovered_code == expected_code + + +def test_dump_parser_tree_invalid_args(): + module = parse("lambda x, y: x + y") + + with pytest.raises(TypeError): + module.dump(indent=1.1) + + +def test_eval_dump_recovers_parent(): + module = parse("lambda x, y: x + y") + module2 = eval(module.dump()) + assert module2.parent is None + lambda_node = module2.children[0] + assert lambda_node.parent is module2 + assert module2.children[1].parent is module2 + assert lambda_node.children[0].parent is lambda_node + param_node = lambda_node.children[1] + assert param_node.parent is lambda_node + assert param_node.children[0].parent is param_node + assert param_node.children[1].parent is param_node + arith_expr_node = lambda_node.children[-1] + assert arith_expr_node.parent is lambda_node + assert arith_expr_node.children[0].parent is arith_expr_node diff --git a/contrib/python/parso/py3/tests/test_error_recovery.py b/contrib/python/parso/py3/tests/test_error_recovery.py new file mode 100644 index 0000000000..87efd4784a --- /dev/null +++ b/contrib/python/parso/py3/tests/test_error_recovery.py @@ -0,0 +1,149 @@ +from textwrap import dedent + +from parso import parse, load_grammar + + +def test_with_stmt(): + module = parse('with x: f.\na') + assert module.children[0].type == 'with_stmt' + w, with_item, colon, f = module.children[0].children + assert f.type == 'error_node' + assert f.get_code(include_prefix=False) == 'f.' + + assert module.children[2].type == 'name' + + +def test_one_line_function(each_version): + module = parse('def x(): f.', version=each_version) + assert module.children[0].type == 'funcdef' + def_, name, parameters, colon, f = module.children[0].children + assert f.type == 'error_node' + + module = parse('def x(a:', version=each_version) + func = module.children[0] + assert func.type == 'error_node' + if each_version.startswith('2'): + assert func.children[-1].value == 'a' + else: + assert func.children[-1] == ':' + + +def test_if_else(): + module = parse('if x:\n f.\nelse:\n g(') + if_stmt = module.children[0] + if_, test, colon, suite1, else_, colon, suite2 = if_stmt.children + f = suite1.children[1] + assert f.type == 'error_node' + assert f.children[0].value == 'f' + assert f.children[1].value == '.' + g = suite2.children[1] + assert g.children[0].value == 'g' + assert g.children[1].value == '(' + + +def test_if_stmt(): + module = parse('if x: f.\nelse: g(') + if_stmt = module.children[0] + assert if_stmt.type == 'if_stmt' + if_, test, colon, f = if_stmt.children + assert f.type == 'error_node' + assert f.children[0].value == 'f' + assert f.children[1].value == '.' + + assert module.children[1].type == 'newline' + assert module.children[1].value == '\n' + assert module.children[2].type == 'error_leaf' + assert module.children[2].value == 'else' + assert module.children[3].type == 'error_leaf' + assert module.children[3].value == ':' + + in_else_stmt = module.children[4] + assert in_else_stmt.type == 'error_node' + assert in_else_stmt.children[0].value == 'g' + assert in_else_stmt.children[1].value == '(' + + +def test_invalid_token(): + module = parse('a + ? + b') + error_node, q, plus_b, endmarker = module.children + assert error_node.get_code() == 'a +' + assert q.value == '?' + assert q.type == 'error_leaf' + assert plus_b.type == 'factor' + assert plus_b.get_code() == ' + b' + + +def test_invalid_token_in_fstr(): + module = load_grammar(version='3.9').parse('f"{a + ? + b}"') + error_node, q, plus_b, error1, error2, endmarker = module.children + assert error_node.get_code() == 'f"{a +' + assert q.value == '?' + assert q.type == 'error_leaf' + assert plus_b.type == 'error_node' + assert plus_b.get_code() == ' + b' + assert error1.value == '}' + assert error1.type == 'error_leaf' + assert error2.value == '"' + assert error2.type == 'error_leaf' + + +def test_dedent_issues1(): + code = dedent('''\ + class C: + @property + f + g + end + ''') + module = load_grammar(version='3.8').parse(code) + klass, endmarker = module.children + suite = klass.children[-1] + assert suite.children[2].type == 'error_leaf' + assert suite.children[3].get_code(include_prefix=False) == 'f\n' + assert suite.children[5].get_code(include_prefix=False) == 'g\n' + assert suite.type == 'suite' + + +def test_dedent_issues2(): + code = dedent('''\ + class C: + @property + if 1: + g + else: + h + end + ''') + module = load_grammar(version='3.8').parse(code) + klass, endmarker = module.children + suite = klass.children[-1] + assert suite.children[2].type == 'error_leaf' + if_ = suite.children[3] + assert if_.children[0] == 'if' + assert if_.children[3].type == 'suite' + assert if_.children[3].get_code() == '\n g\n' + assert if_.children[4] == 'else' + assert if_.children[6].type == 'suite' + assert if_.children[6].get_code() == '\n h\n' + + assert suite.children[4].get_code(include_prefix=False) == 'end\n' + assert suite.type == 'suite' + + +def test_dedent_issues3(): + code = dedent('''\ + class C: + f + g + ''') + module = load_grammar(version='3.8').parse(code) + klass, endmarker = module.children + suite = klass.children[-1] + assert len(suite.children) == 4 + assert suite.children[1].get_code() == ' f\n' + assert suite.children[1].type == 'simple_stmt' + assert suite.children[2].get_code() == '' + assert suite.children[2].type == 'error_leaf' + assert suite.children[2].token_type == 'ERROR_DEDENT' + assert suite.children[3].get_code() == ' g\n' + assert suite.children[3].type == 'simple_stmt' diff --git a/contrib/python/parso/py3/tests/test_file_python_errors.py b/contrib/python/parso/py3/tests/test_file_python_errors.py new file mode 100644 index 0000000000..7083dfeb46 --- /dev/null +++ b/contrib/python/parso/py3/tests/test_file_python_errors.py @@ -0,0 +1,23 @@ +import os + +import parso + + +def get_python_files(path): + for dir_path, dir_names, file_names in os.walk(path): + for file_name in file_names: + if file_name.endswith('.py'): + yield os.path.join(dir_path, file_name) + + +def test_on_itself(each_version): + """ + There are obviously no syntax erros in the Python code of parso. However + parso should output the same for all versions. + """ + grammar = parso.load_grammar(version=each_version) + path = os.path.dirname(os.path.dirname(__file__)) + '/parso' + for file in get_python_files(path): + tree = grammar.parse(path=file) + errors = list(grammar.iter_errors(tree)) + assert not errors diff --git a/contrib/python/parso/py3/tests/test_fstring.py b/contrib/python/parso/py3/tests/test_fstring.py new file mode 100644 index 0000000000..c81d027a16 --- /dev/null +++ b/contrib/python/parso/py3/tests/test_fstring.py @@ -0,0 +1,164 @@ +import pytest +from textwrap import dedent + +from parso import load_grammar, ParserSyntaxError +from parso.python.tokenize import tokenize + + +@pytest.fixture +def grammar(): + return load_grammar(version='3.8') + + +@pytest.mark.parametrize( + 'code', [ + # simple cases + 'f"{1}"', + 'f"""{1}"""', + 'f"{foo} {bar}"', + + # empty string + 'f""', + 'f""""""', + + # empty format specifier is okay + 'f"{1:}"', + + # use of conversion options + 'f"{1!a}"', + 'f"{1!a:1}"', + + # format specifiers + 'f"{1:1}"', + 'f"{1:1.{32}}"', + 'f"{1::>4}"', + 'f"{x:{y}}"', + 'f"{x:{y:}}"', + 'f"{x:{y:1}}"', + + # Escapes + 'f"{{}}"', + 'f"{{{1}}}"', + 'f"{{{1}"', + 'f"1{{2{{3"', + 'f"}}"', + + # New Python 3.8 syntax f'{a=}' + 'f"{a=}"', + 'f"{a()=}"', + + # multiline f-string + 'f"""abc\ndef"""', + 'f"""abc{\n123}def"""', + + # a line continuation inside of an fstring_string + 'f"abc\\\ndef"', + 'f"\\\n{123}\\\n"', + + # a line continuation inside of an fstring_expr + 'f"{\\\n123}"', + + # a line continuation inside of an format spec + 'f"{123:.2\\\nf}"', + + # some unparenthesized syntactic structures + 'f"{*x,}"', + 'f"{*x, *y}"', + 'f"{x, *y}"', + 'f"{*x, y}"', + 'f"{x for x in [1]}"', + + # named unicode characters + 'f"\\N{BULLET}"', + 'f"\\N{FLEUR-DE-LIS}"', + 'f"\\N{NO ENTRY}"', + 'f"Combo {expr} and \\N{NO ENTRY}"', + 'f"\\N{NO ENTRY} and {expr}"', + 'f"\\N{no entry}"', + 'f"\\N{SOYOMBO LETTER -A}"', + 'f"\\N{DOMINO TILE HORIZONTAL-00-00}"', + 'f"""\\N{NO ENTRY}"""', + ] +) +def test_valid(code, grammar): + module = grammar.parse(code, error_recovery=False) + fstring = module.children[0] + assert fstring.type == 'fstring' + assert fstring.get_code() == code + + +@pytest.mark.parametrize( + 'code', [ + # an f-string can't contain unmatched curly braces + 'f"}"', + 'f"{"', + 'f"""}"""', + 'f"""{"""', + + # invalid conversion characters + 'f"{1!{a}}"', + 'f"{1=!{a}}"', + 'f"{!{a}}"', + + # The curly braces must contain an expression + 'f"{}"', + 'f"{:}"', + 'f"{:}}}"', + 'f"{:1}"', + 'f"{!:}"', + 'f"{!}"', + 'f"{!a}"', + + # invalid (empty) format specifiers + 'f"{1:{}}"', + 'f"{1:{:}}"', + + # a newline without a line continuation inside a single-line string + 'f"abc\ndef"', + + # various named unicode escapes that aren't name-shaped + 'f"\\N{ BULLET }"', + 'f"\\N{NO ENTRY}"', + 'f"""\\N{NO\nENTRY}"""', + ] +) +def test_invalid(code, grammar): + with pytest.raises(ParserSyntaxError): + grammar.parse(code, error_recovery=False) + + # It should work with error recovery. + grammar.parse(code, error_recovery=True) + + +@pytest.mark.parametrize( + ('code', 'positions'), [ + # 2 times 2, 5 because python expr and endmarker. + ('f"}{"', [(1, 0), (1, 2), (1, 3), (1, 4), (1, 5)]), + ('f" :{ 1 : } "', [(1, 0), (1, 2), (1, 4), (1, 6), (1, 8), (1, 9), + (1, 10), (1, 11), (1, 12), (1, 13)]), + ('f"""\n {\nfoo\n }"""', [(1, 0), (1, 4), (2, 1), (3, 0), (4, 1), + (4, 2), (4, 5)]), + ('f"\\N{NO ENTRY} and {expr}"', [(1, 0), (1, 2), (1, 19), (1, 20), + (1, 24), (1, 25), (1, 26)]), + ] +) +def test_tokenize_start_pos(code, positions): + tokens = list(tokenize(code, version_info=(3, 6))) + assert positions == [p.start_pos for p in tokens] + + +@pytest.mark.parametrize( + 'code', [ + dedent("""\ + f'''s{ + str.uppe + ''' + """), + 'f"foo', + 'f"""foo', + 'f"abc\ndef"', + ] +) +def test_roundtrip(grammar, code): + tree = grammar.parse(code) + assert tree.get_code() == code diff --git a/contrib/python/parso/py3/tests/test_get_code.py b/contrib/python/parso/py3/tests/test_get_code.py new file mode 100644 index 0000000000..d99d792b93 --- /dev/null +++ b/contrib/python/parso/py3/tests/test_get_code.py @@ -0,0 +1,133 @@ +import difflib + +import pytest + +from parso import parse + +code_basic_features = ''' +"""A mod docstring""" + +def a_function(a_argument, a_default = "default"): + """A func docstring""" + + a_result = 3 * a_argument + print(a_result) # a comment + b = """ +from +to""" + "huhu" + + + if a_default == "default": + return str(a_result) + else + return None +''' + + +def diff_code_assert(a, b, n=4): + if a != b: + diff = "\n".join(difflib.unified_diff( + a.splitlines(), + b.splitlines(), + n=n, + lineterm="" + )) + assert False, "Code does not match:\n%s\n\ncreated code:\n%s" % ( + diff, + b + ) + pass + + +def test_basic_parsing(): + """Validate the parsing features""" + + m = parse(code_basic_features) + diff_code_assert( + code_basic_features, + m.get_code() + ) + + +def test_operators(): + src = '5 * 3' + module = parse(src) + diff_code_assert(src, module.get_code()) + + +def test_get_code(): + """Use the same code that the parser also generates, to compare""" + s = '''"""a docstring""" +class SomeClass(object, mixin): + def __init__(self): + self.xy = 3.0 + """statement docstr""" + def some_method(self): + return 1 + def yield_method(self): + while hasattr(self, 'xy'): + yield True + for x in [1, 2]: + yield x + def empty(self): + pass +class Empty: + pass +class WithDocstring: + """class docstr""" + pass +def method_with_docstring(): + """class docstr""" + pass +''' + assert parse(s).get_code() == s + + +def test_end_newlines(): + """ + The Python grammar explicitly needs a newline at the end. Jedi though still + wants to be able, to return the exact same code without the additional new + line the parser needs. + """ + def test(source, end_pos): + module = parse(source) + assert module.get_code() == source + assert module.end_pos == end_pos + + test('a', (1, 1)) + test('a\n', (2, 0)) + test('a\nb', (2, 1)) + test('a\n#comment\n', (3, 0)) + test('a\n#comment', (2, 8)) + test('a#comment', (1, 9)) + test('def a():\n pass', (2, 5)) + + test('def a(', (1, 6)) + + +@pytest.mark.parametrize(('code', 'types'), [ + ('\r', ['endmarker']), + ('\n\r', ['endmarker']) +]) +def test_carriage_return_at_end(code, types): + """ + By adding an artificial newline this created weird side effects for + \r at the end of files. + """ + tree = parse(code) + assert tree.get_code() == code + assert [c.type for c in tree.children] == types + assert tree.end_pos == (len(code) + 1, 0) + + +@pytest.mark.parametrize('code', [ + ' ', + ' F"""', + ' F"""\n', + ' F""" \n', + ' F""" \n3', + ' f"""\n"""', + ' f"""\n"""\n', +]) +def test_full_code_round_trip(code): + assert parse(code).get_code() == code diff --git a/contrib/python/parso/py3/tests/test_grammar.py b/contrib/python/parso/py3/tests/test_grammar.py new file mode 100644 index 0000000000..60a249b8f1 --- /dev/null +++ b/contrib/python/parso/py3/tests/test_grammar.py @@ -0,0 +1,8 @@ +import parso + +import pytest + + +def test_non_unicode(): + with pytest.raises(UnicodeDecodeError): + parso.parse(b'\xe4') diff --git a/contrib/python/parso/py3/tests/test_load_grammar.py b/contrib/python/parso/py3/tests/test_load_grammar.py new file mode 100644 index 0000000000..0ea648eb3e --- /dev/null +++ b/contrib/python/parso/py3/tests/test_load_grammar.py @@ -0,0 +1,31 @@ +import pytest +from parso.grammar import load_grammar +from parso import utils + + +def test_load_inexisting_grammar(): + # This version shouldn't be out for a while, but if we ever do, wow! + with pytest.raises(NotImplementedError): + load_grammar(version='15.8') + # The same is true for very old grammars (even though this is probably not + # going to be an issue. + with pytest.raises(NotImplementedError): + load_grammar(version='1.5') + + +@pytest.mark.parametrize(('string', 'result'), [ + ('2', (2, 7)), ('3', (3, 6)), ('1.1', (1, 1)), ('1.1.1', (1, 1)), ('300.1.31', (300, 1)) +]) +def test_parse_version(string, result): + assert utils._parse_version(string) == result + + +@pytest.mark.parametrize('string', ['1.', 'a', '#', '1.3.4.5']) +def test_invalid_grammar_version(string): + with pytest.raises(ValueError): + load_grammar(version=string) + + +def test_grammar_int_version(): + with pytest.raises(TypeError): + load_grammar(version=3.8) diff --git a/contrib/python/parso/py3/tests/test_normalizer_issues_files.py b/contrib/python/parso/py3/tests/test_normalizer_issues_files.py new file mode 100644 index 0000000000..c6a23497e5 --- /dev/null +++ b/contrib/python/parso/py3/tests/test_normalizer_issues_files.py @@ -0,0 +1,71 @@ +""" +To easily verify if our normalizer raises the right error codes, just use the +tests of pydocstyle. +""" + +import difflib +import re +from functools import total_ordering +from typing import Iterator, Tuple + +import parso +from parso.utils import python_bytes_to_unicode + + +@total_ordering +class WantedIssue: + def __init__(self, code: str, line: int, column: int) -> None: + self.code = code + self._line = line + self._column = column + + def __eq__(self, other): + return self.code == other.code and self.start_pos == other.start_pos + + def __lt__(self, other: 'WantedIssue') -> bool: + return self.start_pos < other.start_pos or self.code < other.code + + def __hash__(self) -> int: + return hash(str(self.code) + str(self._line) + str(self._column)) + + @property + def start_pos(self) -> Tuple[int, int]: + return self._line, self._column + + +def collect_errors(code: str) -> Iterator[WantedIssue]: + for line_nr, line in enumerate(code.splitlines(), 1): + match = re.match(r'(\s*)#: (.*)$', line) + if match is not None: + codes = match.group(2) + for code in codes.split(): + code, _, add_indent = code.partition(':') + column = int(add_indent or len(match.group(1))) + + code, _, add_line = code.partition('+') + ln = line_nr + 1 + int(add_line or 0) + + yield WantedIssue(code[1:], ln, column) + + +def test_normalizer_issue(normalizer_issue_case): + def sort(issues): + issues = sorted(issues, key=lambda i: (i.start_pos, i.code)) + return ["(%s, %s): %s" % (i.start_pos[0], i.start_pos[1], i.code) + for i in issues] + + with open(normalizer_issue_case.path, 'rb') as f: + code = python_bytes_to_unicode(f.read()) + + desired = sort(collect_errors(code)) + + grammar = parso.load_grammar(version=normalizer_issue_case.python_version) + module = grammar.parse(code) + issues = grammar._get_normalizer_issues(module) + actual = sort(issues) + + diff = '\n'.join(difflib.ndiff(desired, actual)) + # To make the pytest -v diff a bit prettier, stop pytest to rewrite assert + # statements by executing the comparison earlier. + _bool = desired == actual + assert _bool, '\n' + diff diff --git a/contrib/python/parso/py3/tests/test_old_fast_parser.py b/contrib/python/parso/py3/tests/test_old_fast_parser.py new file mode 100644 index 0000000000..6f332cfc54 --- /dev/null +++ b/contrib/python/parso/py3/tests/test_old_fast_parser.py @@ -0,0 +1,209 @@ +""" +These tests test the cases that the old fast parser tested with the normal +parser. + +The old fast parser doesn't exist anymore and was replaced with a diff parser. +However the tests might still be relevant for the parser. +""" + +from textwrap import dedent + +from parso import parse + + +def test_carriage_return_splitting(): + source = dedent(''' + + + + "string" + + class Foo(): + pass + ''') + source = source.replace('\n', '\r\n') + module = parse(source) + assert [n.value for lst in module.get_used_names().values() for n in lst] == ['Foo'] + + +def check_p(src, number_parsers_used, number_of_splits=None, number_of_misses=0): + if number_of_splits is None: + number_of_splits = number_parsers_used + + module_node = parse(src) + + assert src == module_node.get_code() + return module_node + + +def test_for(): + src = dedent("""\ + for a in [1,2]: + a + + for a1 in 1,"": + a1 + """) + check_p(src, 1) + + +def test_class_with_class_var(): + src = dedent("""\ + class SuperClass: + class_super = 3 + def __init__(self): + self.foo = 4 + pass + """) + check_p(src, 3) + + +def test_func_with_if(): + src = dedent("""\ + def recursion(a): + if foo: + return recursion(a) + else: + if bar: + return inexistent + else: + return a + """) + check_p(src, 1) + + +def test_decorator(): + src = dedent("""\ + class Decorator(): + @memoize + def dec(self, a): + return a + """) + check_p(src, 2) + + +def test_nested_funcs(): + src = dedent("""\ + def memoize(func): + def wrapper(*args, **kwargs): + return func(*args, **kwargs) + return wrapper + """) + check_p(src, 3) + + +def test_multi_line_params(): + src = dedent("""\ + def x(a, + b): + pass + + foo = 1 + """) + check_p(src, 2) + + +def test_class_func_if(): + src = dedent("""\ + class Class: + def func(self): + if 1: + a + else: + b + + pass + """) + check_p(src, 3) + + +def test_multi_line_for(): + src = dedent("""\ + for x in [1, + 2]: + pass + + pass + """) + check_p(src, 1) + + +def test_wrong_indentation(): + src = dedent("""\ + def func(): + a + b + a + """) + check_p(src, 1) + + src = dedent("""\ + def complex(): + def nested(): + a + b + a + + def other(): + pass + """) + check_p(src, 3) + + +def test_strange_parentheses(): + src = dedent(""" + class X(): + a = (1 + if 1 else 2) + def x(): + pass + """) + check_p(src, 2) + + +def test_fake_parentheses(): + """ + The fast parser splitting counts parentheses, but not as correct tokens. + Therefore parentheses in string tokens are included as well. This needs to + be accounted for. + """ + src = dedent(r""" + def x(): + a = (')' + if 1 else 2) + def y(): + pass + def z(): + pass + """) + check_p(src, 3, 2, 1) + + +def test_additional_indent(): + source = dedent('''\ + int( + def x(): + pass + ''') + + check_p(source, 2) + + +def test_round_trip(): + code = dedent(''' + def x(): + """hahaha""" + func''') + + assert parse(code).get_code() == code + + +def test_parentheses_in_string(): + code = dedent(''' + def x(): + '(' + + import abc + + abc.''') + check_p(code, 2, 1, 1) diff --git a/contrib/python/parso/py3/tests/test_param_splitting.py b/contrib/python/parso/py3/tests/test_param_splitting.py new file mode 100644 index 0000000000..3ea5f1653b --- /dev/null +++ b/contrib/python/parso/py3/tests/test_param_splitting.py @@ -0,0 +1,47 @@ +''' +To make the life of any analysis easier, we are generating Param objects +instead of simple parser objects. +''' + +from textwrap import dedent + +from parso import parse + + +def assert_params(param_string, **wanted_dct): + source = dedent(''' + def x(%s): + pass + ''') % param_string + + module = parse(source) + funcdef = next(module.iter_funcdefs()) + dct = dict((p.name.value, p.default and p.default.get_code()) + for p in funcdef.get_params()) + assert dct == wanted_dct + assert module.get_code() == source + + +def test_split_params_with_separation_star(): + assert_params('x, y=1, *, z=3', x=None, y='1', z='3') + assert_params('*, x', x=None) + assert_params('*') + + +def test_split_params_with_stars(): + assert_params('x, *args', x=None, args=None) + assert_params('**kwargs', kwargs=None) + assert_params('*args, **kwargs', args=None, kwargs=None) + + +def test_kw_only_no_kw(works_in_py): + """ + Parsing this should be working. In CPython the parser also parses this and + in a later step the AST complains. + """ + module = works_in_py.parse('def test(arg, *):\n pass') + if module is not None: + func = module.children[0] + open_, p1, asterisk, close = func._get_param_nodes() + assert p1.get_code('arg,') + assert asterisk.value == '*' diff --git a/contrib/python/parso/py3/tests/test_parser.py b/contrib/python/parso/py3/tests/test_parser.py new file mode 100644 index 0000000000..e087b0d554 --- /dev/null +++ b/contrib/python/parso/py3/tests/test_parser.py @@ -0,0 +1,208 @@ +# -*- coding: utf-8 -*- +from textwrap import dedent + +import pytest + +from parso import parse +from parso.python import tree +from parso.utils import split_lines + + +def test_basic_parsing(each_version): + def compare(string): + """Generates the AST object and then regenerates the code.""" + assert parse(string, version=each_version).get_code() == string + + compare('\na #pass\n') + compare('wblabla* 1\t\n') + compare('def x(a, b:3): pass\n') + compare('assert foo\n') + + +def test_subscope_names(each_version): + def get_sub(source): + return parse(source, version=each_version).children[0] + + name = get_sub('class Foo: pass').name + assert name.start_pos == (1, len('class ')) + assert name.end_pos == (1, len('class Foo')) + assert name.value == 'Foo' + + name = get_sub('def foo(): pass').name + assert name.start_pos == (1, len('def ')) + assert name.end_pos == (1, len('def foo')) + assert name.value == 'foo' + + +def test_import_names(each_version): + def get_import(source): + return next(parse(source, version=each_version).iter_imports()) + + imp = get_import('import math\n') + names = imp.get_defined_names() + assert len(names) == 1 + assert names[0].value == 'math' + assert names[0].start_pos == (1, len('import ')) + assert names[0].end_pos == (1, len('import math')) + + assert imp.start_pos == (1, 0) + assert imp.end_pos == (1, len('import math')) + + +def test_end_pos(each_version): + s = dedent(''' + x = ['a', 'b', 'c'] + def func(): + y = None + ''') + parser = parse(s, version=each_version) + scope = next(parser.iter_funcdefs()) + assert scope.start_pos == (3, 0) + assert scope.end_pos == (5, 0) + + +def test_carriage_return_statements(each_version): + source = dedent(''' + foo = 'ns1!' + + # this is a namespace package + ''') + source = source.replace('\n', '\r\n') + stmt = parse(source, version=each_version).children[0] + assert '#' not in stmt.get_code() + + +def test_incomplete_list_comprehension(each_version): + """ Shouldn't raise an error, same bug as #418. """ + # With the old parser this actually returned a statement. With the new + # parser only valid statements generate one. + children = parse('(1 for def', version=each_version).children + assert [c.type for c in children] == \ + ['error_node', 'error_node', 'endmarker'] + + +def test_newline_positions(each_version): + endmarker = parse('a\n', version=each_version).children[-1] + assert endmarker.end_pos == (2, 0) + new_line = endmarker.get_previous_leaf() + assert new_line.start_pos == (1, 1) + assert new_line.end_pos == (2, 0) + + +def test_end_pos_error_correction(each_version): + """ + Source code without ending newline are given one, because the Python + grammar needs it. However, they are removed again. We still want the right + end_pos, even if something breaks in the parser (error correction). + """ + s = 'def x():\n .' + m = parse(s, version=each_version) + func = m.children[0] + assert func.type == 'funcdef' + assert func.end_pos == (2, 2) + assert m.end_pos == (2, 2) + + +def test_param_splitting(each_version): + """ + Jedi splits parameters into params, this is not what the grammar does, + but Jedi does this to simplify argument parsing. + """ + def check(src, result): + m = parse(src, version=each_version) + assert not list(m.iter_funcdefs()) + + check('def x(a, (b, c)):\n pass', ['a']) + check('def x((b, c)):\n pass', []) + + +def test_unicode_string(): + s = tree.String(None, 'bö', (0, 0)) + assert repr(s) # Should not raise an Error! + + +def test_backslash_dos_style(each_version): + assert parse('\\\r\n', version=each_version) + + +def test_started_lambda_stmt(each_version): + m = parse('lambda a, b: a i', version=each_version) + assert m.children[0].type == 'error_node' + + +@pytest.mark.parametrize('code', ['foo "', 'foo """\n', 'foo """\nbar']) +def test_open_string_literal(each_version, code): + """ + Testing mostly if removing the last newline works. + """ + lines = split_lines(code, keepends=True) + end_pos = (len(lines), len(lines[-1])) + module = parse(code, version=each_version) + assert module.get_code() == code + assert module.end_pos == end_pos == module.children[1].end_pos + + +def test_too_many_params(): + with pytest.raises(TypeError): + parse('asdf', hello=3) + + +def test_dedent_at_end(each_version): + code = dedent(''' + for foobar in [1]: + foobar''') + module = parse(code, version=each_version) + assert module.get_code() == code + suite = module.children[0].children[-1] + foobar = suite.children[-1] + assert foobar.type == 'name' + + +def test_no_error_nodes(each_version): + def check(node): + assert node.type not in ('error_leaf', 'error_node') + + try: + children = node.children + except AttributeError: + pass + else: + for child in children: + check(child) + + check(parse("if foo:\n bar", version=each_version)) + + +def test_named_expression(works_ge_py38): + works_ge_py38.parse("(a := 1, a + 1)") + + +def test_extended_rhs_annassign(works_ge_py38): + works_ge_py38.parse("x: y = z,") + works_ge_py38.parse("x: Tuple[int, ...] = z, *q, w") + + +@pytest.mark.parametrize( + 'param_code', [ + 'a=1, /', + 'a, /', + 'a=1, /, b=3', + 'a, /, b', + 'a, /, b', + 'a, /, *, b', + 'a, /, **kwargs', + ] +) +def test_positional_only_arguments(works_ge_py38, param_code): + works_ge_py38.parse("def x(%s): pass" % param_code) + + +@pytest.mark.parametrize( + 'expression', [ + 'a + a', + 'lambda x: x', + 'a := lambda x: x' + ] +) +def test_decorator_expression(works_ge_py39, expression): + works_ge_py39.parse("@%s\ndef x(): pass" % expression) diff --git a/contrib/python/parso/py3/tests/test_parser_tree.py b/contrib/python/parso/py3/tests/test_parser_tree.py new file mode 100644 index 0000000000..b994b9bbb8 --- /dev/null +++ b/contrib/python/parso/py3/tests/test_parser_tree.py @@ -0,0 +1,266 @@ +# -*- coding: utf-8 # This file contains Unicode characters. + +from textwrap import dedent + +import pytest + +from parso import parse +from parso.python import tree +from parso.tree import search_ancestor + + +class TestsFunctionAndLambdaParsing: + + FIXTURES = [ + ('def my_function(x, y, z) -> str:\n return x + y * z\n', { + 'name': 'my_function', + 'call_sig': 'my_function(x, y, z)', + 'params': ['x', 'y', 'z'], + 'annotation': "str", + }), + ('lambda x, y, z: x + y * z\n', { + 'name': '<lambda>', + 'call_sig': '<lambda>(x, y, z)', + 'params': ['x', 'y', 'z'], + }), + ] + + @pytest.fixture(params=FIXTURES) + def node(self, request): + parsed = parse(dedent(request.param[0]), version='3.10') + request.keywords['expected'] = request.param[1] + child = parsed.children[0] + if child.type == 'simple_stmt': + child = child.children[0] + return child + + @pytest.fixture() + def expected(self, request, node): + return request.keywords['expected'] + + def test_name(self, node, expected): + if node.type != 'lambdef': + assert isinstance(node.name, tree.Name) + assert node.name.value == expected['name'] + + def test_params(self, node, expected): + assert isinstance(node.get_params(), list) + assert all(isinstance(x, tree.Param) for x in node.get_params()) + assert [str(x.name.value) for x in node.get_params()] == [x for x in expected['params']] + + def test_is_generator(self, node, expected): + assert node.is_generator() is expected.get('is_generator', False) + + def test_yields(self, node, expected): + assert node.is_generator() == expected.get('yields', False) + + def test_annotation(self, node, expected): + expected_annotation = expected.get('annotation', None) + if expected_annotation is None: + assert node.annotation is None + else: + assert node.annotation.value == expected_annotation + + +def test_end_pos_line(each_version): + # jedi issue #150 + s = "x()\nx( )\nx( )\nx ( )\n" + + module = parse(s, version=each_version) + for i, simple_stmt in enumerate(module.children[:-1]): + expr_stmt = simple_stmt.children[0] + assert expr_stmt.end_pos == (i + 1, i + 3) + + +def test_default_param(each_version): + func = parse('def x(foo=42): pass', version=each_version).children[0] + param, = func.get_params() + assert param.default.value == '42' + assert param.annotation is None + assert not param.star_count + + +def test_annotation_param(each_version): + func = parse('def x(foo: 3): pass', version=each_version).children[0] + param, = func.get_params() + assert param.default is None + assert param.annotation.value == '3' + assert not param.star_count + + +def test_annotation_params(each_version): + func = parse('def x(foo: 3, bar: 4): pass', version=each_version).children[0] + param1, param2 = func.get_params() + + assert param1.default is None + assert param1.annotation.value == '3' + assert not param1.star_count + + assert param2.default is None + assert param2.annotation.value == '4' + assert not param2.star_count + + +def test_default_and_annotation_param(each_version): + func = parse('def x(foo:3=42): pass', version=each_version).children[0] + param, = func.get_params() + assert param.default.value == '42' + assert param.annotation.value == '3' + assert not param.star_count + + +def get_yield_exprs(code, version): + return list(parse(code, version=version).children[0].iter_yield_exprs()) + + +def get_return_stmts(code): + return list(parse(code).children[0].iter_return_stmts()) + + +def get_raise_stmts(code, child): + return list(parse(code).children[child].iter_raise_stmts()) + + +def test_yields(each_version): + y, = get_yield_exprs('def x(): yield', each_version) + assert y.value == 'yield' + assert y.type == 'keyword' + + y, = get_yield_exprs('def x(): (yield 1)', each_version) + assert y.type == 'yield_expr' + + y, = get_yield_exprs('def x(): [1, (yield)]', each_version) + assert y.type == 'keyword' + + +def test_yield_from(): + y, = get_yield_exprs('def x(): (yield from 1)', '3.8') + assert y.type == 'yield_expr' + + +def test_returns(): + r, = get_return_stmts('def x(): return') + assert r.value == 'return' + assert r.type == 'keyword' + + r, = get_return_stmts('def x(): return 1') + assert r.type == 'return_stmt' + + +def test_raises(): + code = """ +def single_function(): + raise Exception +def top_function(): + def inner_function(): + raise NotImplementedError() + inner_function() + raise Exception +def top_function_three(): + try: + raise NotImplementedError() + except NotImplementedError: + pass + raise Exception + """ + + r = get_raise_stmts(code, 0) # Lists in a simple Function + assert len(list(r)) == 1 + + r = get_raise_stmts(code, 1) # Doesn't Exceptions list in closures + assert len(list(r)) == 1 + + r = get_raise_stmts(code, 2) # Lists inside try-catch + assert len(list(r)) == 2 + + +@pytest.mark.parametrize( + 'code, name_index, is_definition, include_setitem', [ + ('x = 3', 0, True, False), + ('x.y = 3', 0, False, False), + ('x.y = 3', 1, True, False), + ('x.y = u.v = z', 0, False, False), + ('x.y = u.v = z', 1, True, False), + ('x.y = u.v = z', 2, False, False), + ('x.y = u.v, w = z', 3, True, False), + ('x.y = u.v, w = z', 4, True, False), + ('x.y = u.v, w = z', 5, False, False), + + ('x, y = z', 0, True, False), + ('x, y = z', 1, True, False), + ('x, y = z', 2, False, False), + ('x, y = z', 2, False, False), + ('x[0], y = z', 2, False, False), + ('x[0] = z', 0, False, False), + ('x[0], y = z', 0, False, False), + ('x[0], y = z', 2, False, True), + ('x[0] = z', 0, True, True), + ('x[0], y = z', 0, True, True), + ('x: int = z', 0, True, False), + ('x: int = z', 1, False, False), + ('x: int = z', 2, False, False), + ('x: int', 0, True, False), + ('x: int', 1, False, False), + ] +) +def test_is_definition(code, name_index, is_definition, include_setitem): + module = parse(code, version='3.8') + name = module.get_first_leaf() + while True: + if name.type == 'name': + if name_index == 0: + break + name_index -= 1 + name = name.get_next_leaf() + + assert name.is_definition(include_setitem=include_setitem) == is_definition + + +def test_iter_funcdefs(): + code = dedent(''' + def normal(): ... + async def asyn(): ... + @dec + def dec_normal(): ... + @dec1 + @dec2 + async def dec_async(): ... + def broken + ''') + module = parse(code, version='3.8') + func_names = [f.name.value for f in module.iter_funcdefs()] + assert func_names == ['normal', 'asyn', 'dec_normal', 'dec_async'] + + +def test_with_stmt_get_test_node_from_name(): + code = "with A as X.Y, B as (Z), C as Q[0], D as Q['foo']: pass" + with_stmt = parse(code, version='3').children[0] + tests = [ + with_stmt.get_test_node_from_name(name).value + for name in with_stmt.get_defined_names(include_setitem=True) + ] + assert tests == ["A", "B", "C", "D"] + + +sample_module = parse('x + y') +sample_node = sample_module.children[0] +sample_leaf = sample_node.children[0] + + +@pytest.mark.parametrize( + 'node,node_types,expected_ancestor', [ + (sample_module, ('file_input',), None), + (sample_node, ('arith_expr',), None), + (sample_node, ('file_input', 'eval_input'), sample_module), + (sample_leaf, ('name',), None), + (sample_leaf, ('arith_expr',), sample_node), + (sample_leaf, ('file_input',), sample_module), + (sample_leaf, ('file_input', 'arith_expr'), sample_node), + (sample_leaf, ('shift_expr',), None), + (sample_leaf, ('name', 'shift_expr',), None), + (sample_leaf, (), None), + ] +) +def test_search_ancestor(node, node_types, expected_ancestor): + assert node.search_ancestor(*node_types) is expected_ancestor + assert search_ancestor(node, *node_types) is expected_ancestor # deprecated diff --git a/contrib/python/parso/py3/tests/test_pep8.py b/contrib/python/parso/py3/tests/test_pep8.py new file mode 100644 index 0000000000..06cffb4af9 --- /dev/null +++ b/contrib/python/parso/py3/tests/test_pep8.py @@ -0,0 +1,42 @@ +import parso + + +def issues(code): + grammar = parso.load_grammar() + module = parso.parse(code) + return grammar._get_normalizer_issues(module) + + +def test_eof_newline(): + def assert_issue(code): + found = issues(code) + assert len(found) == 1 + issue, = found + assert issue.code == 292 + + assert not issues('asdf = 1\n') + assert not issues('asdf = 1\r\n') + assert not issues('asdf = 1\r') + assert_issue('asdf = 1') + assert_issue('asdf = 1\n# foo') + assert_issue('# foobar') + assert_issue('') + assert_issue('foo = 1 # comment') + + +def test_eof_blankline(): + def assert_issue(code): + found = issues(code) + assert len(found) == 1 + issue, = found + assert issue.code == 391 + + assert_issue('asdf = 1\n\n') + assert_issue('# foobar\n\n') + assert_issue('\n\n') + + +def test_shebang(): + assert not issues('#!\n') + assert not issues('#!/foo\n') + assert not issues('#! python\n') diff --git a/contrib/python/parso/py3/tests/test_pgen2.py b/contrib/python/parso/py3/tests/test_pgen2.py new file mode 100644 index 0000000000..85ccacfb47 --- /dev/null +++ b/contrib/python/parso/py3/tests/test_pgen2.py @@ -0,0 +1,357 @@ +from textwrap import dedent + +import pytest + +from parso import load_grammar +from parso import ParserSyntaxError +from parso.pgen2 import generate_grammar +from parso.python import tokenize + + +def _parse(code, version=None): + code = dedent(code) + "\n\n" + grammar = load_grammar(version=version) + return grammar.parse(code, error_recovery=False) + + +def _invalid_syntax(code, version=None, **kwargs): + with pytest.raises(ParserSyntaxError): + module = _parse(code, version=version, **kwargs) + # For debugging + print(module.children) + + +def test_formfeed(each_version): + s = "foo\n\x0c\nfoo\n" + t = _parse(s, each_version) + assert t.children[0].children[0].type == 'name' + assert t.children[1].children[0].type == 'name' + s = "1\n\x0c\x0c\n2\n" + t = _parse(s, each_version) + + with pytest.raises(ParserSyntaxError): + s = "\n\x0c2\n" + _parse(s, each_version) + + +def test_matrix_multiplication_operator(works_in_py): + works_in_py.parse("a @ b") + works_in_py.parse("a @= b") + + +def test_yield_from(works_in_py, each_version): + works_in_py.parse("yield from x") + works_in_py.parse("(yield from x) + y") + _invalid_syntax("yield from", each_version) + + +def test_await_expr(works_in_py): + works_in_py.parse("""async def foo(): + await x + """) + + works_in_py.parse("""async def foo(): + + def foo(): pass + + def foo(): pass + + await x + """) + + works_in_py.parse("""async def foo(): return await a""") + + works_in_py.parse("""def foo(): + def foo(): pass + async def foo(): await x + """) + + +@pytest.mark.parametrize( + 'code', [ + "async = 1", + "await = 1", + "def async(): pass", + ] +) +def test_async_var(works_not_in_py, code): + works_not_in_py.parse(code) + + +def test_async_for(works_in_py): + works_in_py.parse("async def foo():\n async for a in b: pass") + + +@pytest.mark.parametrize("body", [ + """[1 async for a in b + ]""", + """[1 async + for a in b + ]""", + """[ + 1 + async for a in b + ]""", + """[ + 1 + async for a + in b + ]""", + """[ + 1 + async + for + a + in + b + ]""", + """ [ + 1 async for a in b + ]""", +]) +def test_async_for_comprehension_newline(works_in_py, body): + # Issue #139 + works_in_py.parse("""async def foo(): + {}""".format(body)) + + +def test_async_with(works_in_py): + works_in_py.parse("async def foo():\n async with a: pass") + + +def test_async_with_invalid(works_in_py): + works_in_py.parse("""def foo():\n async with a: pass""") + + +def test_raise_3x_style_1(each_version): + _parse("raise", each_version) + + +def test_raise_2x_style_2(works_not_in_py): + works_not_in_py.parse("raise E, V") + + +def test_raise_2x_style_3(works_not_in_py): + works_not_in_py.parse("raise E, V, T") + + +def test_raise_2x_style_invalid_1(each_version): + _invalid_syntax("raise E, V, T, Z", version=each_version) + + +def test_raise_3x_style(works_in_py): + works_in_py.parse("raise E1 from E2") + + +def test_raise_3x_style_invalid_1(each_version): + _invalid_syntax("raise E, V from E1", each_version) + + +def test_raise_3x_style_invalid_2(each_version): + _invalid_syntax("raise E from E1, E2", each_version) + + +def test_raise_3x_style_invalid_3(each_version): + _invalid_syntax("raise from E1, E2", each_version) + + +def test_raise_3x_style_invalid_4(each_version): + _invalid_syntax("raise E from", each_version) + + +# Adapted from Python 3's Lib/test/test_grammar.py:GrammarTests.testFuncdef +def test_annotation_1(works_in_py): + works_in_py.parse("""def f(x) -> list: pass""") + + +def test_annotation_2(works_in_py): + works_in_py.parse("""def f(x:int): pass""") + + +def test_annotation_3(works_in_py): + works_in_py.parse("""def f(*x:str): pass""") + + +def test_annotation_4(works_in_py): + works_in_py.parse("""def f(**x:float): pass""") + + +def test_annotation_5(works_in_py): + works_in_py.parse("""def f(x, y:1+2): pass""") + + +def test_annotation_6(each_version): + _invalid_syntax("""def f(a, (b:1, c:2, d)): pass""", each_version) + + +def test_annotation_7(each_version): + _invalid_syntax("""def f(a, (b:1, c:2, d), e:3=4, f=5, *g:6): pass""", each_version) + + +def test_annotation_8(each_version): + s = """def f(a, (b:1, c:2, d), e:3=4, f=5, + *g:6, h:7, i=8, j:9=10, **k:11) -> 12: pass""" + _invalid_syntax(s, each_version) + + +def test_except_new(each_version): + s = dedent(""" + try: + x + except E as N: + y""") + _parse(s, each_version) + + +def test_except_old(works_not_in_py): + s = dedent(""" + try: + x + except E, N: + y""") + works_not_in_py.parse(s) + + +# Adapted from Python 3's Lib/test/test_grammar.py:GrammarTests.testAtoms +def test_set_literal_1(works_in_py): + works_in_py.parse("""x = {'one'}""") + + +def test_set_literal_2(works_in_py): + works_in_py.parse("""x = {'one', 1,}""") + + +def test_set_literal_3(works_in_py): + works_in_py.parse("""x = {'one', 'two', 'three'}""") + + +def test_set_literal_4(works_in_py): + works_in_py.parse("""x = {2, 3, 4,}""") + + +def test_new_octal_notation(each_version): + _parse("""0o7777777777777""", each_version) + _invalid_syntax("""0o7324528887""", each_version) + + +def test_old_octal_notation(works_not_in_py): + works_not_in_py.parse("07") + + +def test_long_notation(works_not_in_py): + works_not_in_py.parse("0xFl") + works_not_in_py.parse("0xFL") + works_not_in_py.parse("0b1l") + works_not_in_py.parse("0B1L") + works_not_in_py.parse("0o7l") + works_not_in_py.parse("0O7L") + works_not_in_py.parse("0l") + works_not_in_py.parse("0L") + works_not_in_py.parse("10l") + works_not_in_py.parse("10L") + + +def test_new_binary_notation(each_version): + _parse("""0b101010""", each_version) + _invalid_syntax("""0b0101021""", each_version) + + +def test_class_new_syntax(works_in_py): + works_in_py.parse("class B(t=7): pass") + works_in_py.parse("class B(t, *args): pass") + works_in_py.parse("class B(t, **kwargs): pass") + works_in_py.parse("class B(t, *args, **kwargs): pass") + works_in_py.parse("class B(t, y=9, *args, **kwargs): pass") + + +def test_parser_idempotency_extended_unpacking(works_in_py): + """A cut-down version of pytree_idempotency.py.""" + works_in_py.parse("a, *b, c = x\n") + works_in_py.parse("[*a, b] = x\n") + works_in_py.parse("(z, *y, w) = m\n") + works_in_py.parse("for *z, m in d: pass\n") + + +def test_multiline_bytes_literals(each_version): + s = """ + md5test(b"\xaa" * 80, + (b"Test Using Larger Than Block-Size Key " + b"and Larger Than One Block-Size Data"), + "6f630fad67cda0ee1fb1f562db3aa53e") + """ + _parse(s, each_version) + + +def test_multiline_bytes_tripquote_literals(each_version): + s = ''' + b""" + <?xml version="1.0" encoding="UTF-8"?> + <!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN"> + """ + ''' + _parse(s, each_version) + + +def test_ellipsis(works_in_py, each_version): + works_in_py.parse("...") + _parse("[0][...]", version=each_version) + + +def test_dict_unpacking(works_in_py): + works_in_py.parse("{**dict(a=3), foo:2}") + + +def test_multiline_str_literals(each_version): + s = """ + md5test("\xaa" * 80, + ("Test Using Larger Than Block-Size Key " + "and Larger Than One Block-Size Data"), + "6f630fad67cda0ee1fb1f562db3aa53e") + """ + _parse(s, each_version) + + +def test_py2_backticks(works_not_in_py): + works_not_in_py.parse("`1`") + + +def test_py2_string_prefixes(works_not_in_py): + works_not_in_py.parse("ur'1'") + works_not_in_py.parse("Ur'1'") + works_not_in_py.parse("UR'1'") + _invalid_syntax("ru'1'", works_not_in_py.version) + + +def py_br(each_version): + _parse('br""', each_version) + + +def test_py3_rb(works_in_py): + works_in_py.parse("rb'1'") + works_in_py.parse("RB'1'") + + +def test_left_recursion(): + with pytest.raises(ValueError, match='left recursion'): + generate_grammar('foo: foo NAME\n', tokenize.PythonTokenTypes) + + +@pytest.mark.parametrize( + 'grammar, error_match', [ + ['foo: bar | baz\nbar: NAME\nbaz: NAME\n', + r"foo is ambiguous.*given a (PythonTokenTypes\.)?NAME.*bar or baz"], + ['''foo: bar | baz\nbar: 'x'\nbaz: "x"\n''', + r"foo is ambiguous.*given a ReservedString\(x\).*bar or baz"], + ['''foo: bar | 'x'\nbar: 'x'\n''', + r"foo is ambiguous.*given a ReservedString\(x\).*bar or foo"], + # An ambiguity with the second (not the first) child of a production + ['outer: "a" [inner] "b" "c"\ninner: "b" "c" [inner]\n', + r"outer is ambiguous.*given a ReservedString\(b\).*inner or outer"], + # An ambiguity hidden by a level of indirection (middle) + ['outer: "a" [middle] "b" "c"\nmiddle: inner\ninner: "b" "c" [inner]\n', + r"outer is ambiguous.*given a ReservedString\(b\).*middle or outer"], + ] +) +def test_ambiguities(grammar, error_match): + with pytest.raises(ValueError, match=error_match): + generate_grammar(grammar, tokenize.PythonTokenTypes) diff --git a/contrib/python/parso/py3/tests/test_prefix.py b/contrib/python/parso/py3/tests/test_prefix.py new file mode 100644 index 0000000000..58c1dcf98f --- /dev/null +++ b/contrib/python/parso/py3/tests/test_prefix.py @@ -0,0 +1,75 @@ +from itertools import zip_longest +from codecs import BOM_UTF8 + +import pytest + +import parso + +unicode_bom = BOM_UTF8.decode('utf-8') + + +@pytest.mark.parametrize(('string', 'tokens'), [ + ('', ['']), + ('#', ['#', '']), + (' # ', ['# ', '']), + (' # \n', ['# ', '\n', '']), + (' # \f\n', ['# ', '\f', '\n', '']), + (' \n', ['\n', '']), + (' \n ', ['\n', ' ']), + (' \f ', ['\f', ' ']), + (' \f ', ['\f', ' ']), + (' \r\n', ['\r\n', '']), + (' \r', ['\r', '']), + ('\\\n', ['\\\n', '']), + ('\\\r\n', ['\\\r\n', '']), + ('\t\t\n\t', ['\n', '\t']), +]) +def test_simple_prefix_splitting(string, tokens): + tree = parso.parse(string) + leaf = tree.children[0] + assert leaf.type == 'endmarker' + + parsed_tokens = list(leaf._split_prefix()) + start_pos = (1, 0) + for pt, expected in zip_longest(parsed_tokens, tokens): + assert pt.value == expected + + # Calculate the estimated end_pos + if expected.endswith('\n') or expected.endswith('\r'): + end_pos = start_pos[0] + 1, 0 + else: + end_pos = start_pos[0], start_pos[1] + len(expected) + len(pt.spacing) + + # assert start_pos == pt.start_pos + assert end_pos == pt.end_pos + start_pos = end_pos + + +@pytest.mark.parametrize(('string', 'types'), [ + ('# ', ['comment', 'spacing']), + ('\r\n', ['newline', 'spacing']), + ('\f', ['formfeed', 'spacing']), + ('\\\n', ['backslash', 'spacing']), + (' \t', ['spacing']), + (' \t ', ['spacing']), + (unicode_bom + ' # ', ['bom', 'comment', 'spacing']), +]) +def test_prefix_splitting_types(string, types): + tree = parso.parse(string) + leaf = tree.children[0] + assert leaf.type == 'endmarker' + parsed_tokens = list(leaf._split_prefix()) + assert [t.type for t in parsed_tokens] == types + + +def test_utf8_bom(): + tree = parso.parse(unicode_bom + 'a = 1') + expr_stmt = tree.children[0] + assert expr_stmt.start_pos == (1, 0) + + tree = parso.parse(unicode_bom + '\n') + endmarker = tree.children[0] + parts = list(endmarker._split_prefix()) + assert [p.type for p in parts] == ['bom', 'newline', 'spacing'] + assert [p.start_pos for p in parts] == [(1, 0), (1, 0), (2, 0)] + assert [p.end_pos for p in parts] == [(1, 0), (2, 0), (2, 0)] diff --git a/contrib/python/parso/py3/tests/test_python_errors.py b/contrib/python/parso/py3/tests/test_python_errors.py new file mode 100644 index 0000000000..fe43a301ad --- /dev/null +++ b/contrib/python/parso/py3/tests/test_python_errors.py @@ -0,0 +1,510 @@ +""" +Testing if parso finds syntax errors and indentation errors. +""" +import sys +import warnings + +import pytest + +import parso + +from textwrap import dedent +from parso._compatibility import is_pypy +from .failing_examples import FAILING_EXAMPLES, indent, build_nested + + +if is_pypy: + # The errors in PyPy might be different. Just skip the module for now. + pytestmark = pytest.mark.skip() + + +def _get_error_list(code, version=None): + grammar = parso.load_grammar(version=version) + tree = grammar.parse(code) + return list(grammar.iter_errors(tree)) + + +def assert_comparison(code, error_code, positions): + errors = [(error.start_pos, error.code) for error in _get_error_list(code)] + assert [(pos, error_code) for pos in positions] == errors + +@pytest.mark.skipif(sys.version_info >= (3, 10), reason="parso don't support Python 3.10 yet") +@pytest.mark.parametrize('code', FAILING_EXAMPLES) +def test_python_exception_matches(code): + wanted, line_nr = _get_actual_exception(code) + + errors = _get_error_list(code) + actual = None + if errors: + error, = errors + actual = error.message + assert actual in wanted + # Somehow in Python2.7 the SyntaxError().lineno is sometimes None + assert line_nr is None or line_nr == error.start_pos[0] + + +def test_non_async_in_async(): + """ + This example doesn't work with FAILING_EXAMPLES, because the line numbers + are not always the same / incorrect in Python 3.8. + """ + # Raises multiple errors in previous versions. + code = 'async def foo():\n def nofoo():[x async for x in []]' + wanted, line_nr = _get_actual_exception(code) + + errors = _get_error_list(code) + if errors: + error, = errors + actual = error.message + assert actual in wanted + if sys.version_info[:2] not in ((3, 8), (3, 9)): + assert line_nr == error.start_pos[0] + else: + assert line_nr == 0 # For whatever reason this is zero in Python 3.8/3.9 + + +@pytest.mark.parametrize( + ('code', 'positions'), [ + ('1 +', [(1, 3)]), + ('1 +\n', [(1, 3)]), + ('1 +\n2 +', [(1, 3), (2, 3)]), + ('x + 2', []), + ('[\n', [(2, 0)]), + ('[\ndef x(): pass', [(2, 0)]), + ('[\nif 1: pass', [(2, 0)]), + ('1+?', [(1, 2)]), + ('?', [(1, 0)]), + ('??', [(1, 0)]), + ('? ?', [(1, 0)]), + ('?\n?', [(1, 0), (2, 0)]), + ('? * ?', [(1, 0)]), + ('1 + * * 2', [(1, 4)]), + ('?\n1\n?', [(1, 0), (3, 0)]), + ] +) +def test_syntax_errors(code, positions): + assert_comparison(code, 901, positions) + + +@pytest.mark.parametrize( + ('code', 'positions'), [ + (' 1', [(1, 0)]), + ('def x():\n 1\n 2', [(3, 0)]), + ('def x():\n 1\n 2', [(3, 0)]), + ('def x():\n1', [(2, 0)]), + ] +) +def test_indentation_errors(code, positions): + assert_comparison(code, 903, positions) + + +def _get_actual_exception(code): + with warnings.catch_warnings(): + # We don't care about warnings where locals/globals misbehave here. + # It's as simple as either an error or not. + warnings.filterwarnings('ignore', category=SyntaxWarning) + try: + compile(code, '<unknown>', 'exec') + except (SyntaxError, IndentationError) as e: + wanted = e.__class__.__name__ + ': ' + e.msg + line_nr = e.lineno + except ValueError as e: + # The ValueError comes from byte literals in Python 2 like '\x' + # that are oddly enough not SyntaxErrors. + wanted = 'SyntaxError: (value error) ' + str(e) + line_nr = None + else: + assert False, "The piece of code should raise an exception." + + # SyntaxError + if wanted == 'SyntaxError: assignment to keyword': + return [wanted, "SyntaxError: can't assign to keyword", + 'SyntaxError: cannot assign to __debug__'], line_nr + elif wanted == 'SyntaxError: f-string: unterminated string': + wanted = 'SyntaxError: EOL while scanning string literal' + elif wanted == 'SyntaxError: f-string expression part cannot include a backslash': + return [ + wanted, + "SyntaxError: EOL while scanning string literal", + "SyntaxError: unexpected character after line continuation character", + ], line_nr + elif wanted == "SyntaxError: f-string: expecting '}'": + wanted = 'SyntaxError: EOL while scanning string literal' + elif wanted == 'SyntaxError: f-string: empty expression not allowed': + wanted = 'SyntaxError: invalid syntax' + elif wanted == "SyntaxError: f-string expression part cannot include '#'": + wanted = 'SyntaxError: invalid syntax' + elif wanted == "SyntaxError: f-string: single '}' is not allowed": + wanted = 'SyntaxError: invalid syntax' + return [wanted], line_nr + + +def test_default_except_error_postition(): + # For this error the position seemed to be one line off in Python < 3.10, + # but that doesn't really matter. + code = 'try: pass\nexcept: pass\nexcept X: pass' + wanted, line_nr = _get_actual_exception(code) + error, = _get_error_list(code) + assert error.message in wanted + if sys.version_info[:2] >= (3, 10): + assert line_nr == error.start_pos[0] + else: + assert line_nr != error.start_pos[0] + # I think this is the better position. + assert error.start_pos[0] == 2 + + +def test_statically_nested_blocks(): + def build(code, depth): + if depth == 0: + return code + + new_code = 'if 1:\n' + indent(code) + return build(new_code, depth - 1) + + def get_error(depth, add_func=False): + code = build('foo', depth) + if add_func: + code = 'def bar():\n' + indent(code) + errors = _get_error_list(code) + if errors: + assert errors[0].message == 'SyntaxError: too many statically nested blocks' + return errors[0] + return None + + assert get_error(19) is None + assert get_error(19, add_func=True) is None + + assert get_error(20) + assert get_error(20, add_func=True) + + +def test_future_import_first(): + def is_issue(code, *args, **kwargs): + code = code % args + return bool(_get_error_list(code, **kwargs)) + + i1 = 'from __future__ import division' + i2 = 'from __future__ import absolute_import' + i3 = 'from __future__ import annotations' + assert not is_issue(i1) + assert not is_issue(i1 + ';' + i2) + assert not is_issue(i1 + '\n' + i2) + assert not is_issue('"";' + i1) + assert not is_issue('"";' + i1) + assert not is_issue('""\n' + i1) + assert not is_issue('""\n%s\n%s', i1, i2) + assert not is_issue('""\n%s;%s', i1, i2) + assert not is_issue('"";%s;%s ', i1, i2) + assert not is_issue('"";%s\n%s ', i1, i2) + assert not is_issue(i3, version="3.7") + assert is_issue(i3, version="3.6") + assert is_issue('1;' + i1) + assert is_issue('1\n' + i1) + assert is_issue('"";1\n' + i1) + assert is_issue('""\n%s\nfrom x import a\n%s', i1, i2) + assert is_issue('%s\n""\n%s', i1, i2) + + +def test_named_argument_issues(works_not_in_py): + message = works_not_in_py.get_error_message('def foo(*, **dict): pass') + message = works_not_in_py.get_error_message('def foo(*): pass') + if works_not_in_py.version.startswith('2'): + assert message == 'SyntaxError: invalid syntax' + else: + assert message == 'SyntaxError: named arguments must follow bare *' + + works_not_in_py.assert_no_error_in_passing('def foo(*, name): pass') + works_not_in_py.assert_no_error_in_passing('def foo(bar, *, name=1): pass') + works_not_in_py.assert_no_error_in_passing('def foo(bar, *, name=1, **dct): pass') + + +def test_escape_decode_literals(each_version): + """ + We are using internal functions to assure that unicode/bytes escaping is + without syntax errors. Here we make a bit of quality assurance that this + works through versions, because the internal function might change over + time. + """ + def get_msg(end, to=1): + base = "SyntaxError: (unicode error) 'unicodeescape' " \ + "codec can't decode bytes in position 0-%s: " % to + return base + end + + def get_msgs(escape): + return (get_msg('end of string in escape sequence'), + get_msg(r"truncated %s escape" % escape)) + + error, = _get_error_list(r'u"\x"', version=each_version) + assert error.message in get_msgs(r'\xXX') + + error, = _get_error_list(r'u"\u"', version=each_version) + assert error.message in get_msgs(r'\uXXXX') + + error, = _get_error_list(r'u"\U"', version=each_version) + assert error.message in get_msgs(r'\UXXXXXXXX') + + error, = _get_error_list(r'u"\N{}"', version=each_version) + assert error.message == get_msg(r'malformed \N character escape', to=2) + + error, = _get_error_list(r'u"\N{foo}"', version=each_version) + assert error.message == get_msg(r'unknown Unicode character name', to=6) + + # Finally bytes. + error, = _get_error_list(r'b"\x"', version=each_version) + wanted = r'SyntaxError: (value error) invalid \x escape at position 0' + assert error.message == wanted + + +def test_too_many_levels_of_indentation(): + assert not _get_error_list(build_nested('pass', 99)) + assert _get_error_list(build_nested('pass', 100)) + base = 'def x():\n if x:\n' + assert not _get_error_list(build_nested('pass', 49, base=base)) + assert _get_error_list(build_nested('pass', 50, base=base)) + + +def test_paren_kwarg(): + assert _get_error_list("print((sep)=seperator)", version="3.8") + assert not _get_error_list("print((sep)=seperator)", version="3.7") + + +@pytest.mark.parametrize( + 'code', [ + "f'{*args,}'", + r'f"\""', + r'f"\\\""', + r'fr"\""', + r'fr"\\\""', + r"print(f'Some {x:.2f} and some {y}')", + # Unparenthesized yield expression + 'def foo(): return f"{yield 1}"', + ] +) +def test_valid_fstrings(code): + assert not _get_error_list(code, version='3.6') + + +@pytest.mark.parametrize( + 'code', [ + 'a = (b := 1)', + '[x4 := x ** 5 for x in range(7)]', + '[total := total + v for v in range(10)]', + 'while chunk := file.read(2):\n pass', + 'numbers = [y := math.factorial(x), y**2, y**3]', + '{(a:="a"): (b:=1)}', + '{(y:=1): 2 for x in range(5)}', + 'a[(b:=0)]', + 'a[(b:=0, c:=0)]', + 'a[(b:=0):1:2]', + ] +) +def test_valid_namedexpr(code): + assert not _get_error_list(code, version='3.8') + + +@pytest.mark.parametrize( + 'code', [ + '{x := 1, 2, 3}', + '{x4 := x ** 5 for x in range(7)}', + ] +) +def test_valid_namedexpr_set(code): + assert not _get_error_list(code, version='3.9') + + +@pytest.mark.parametrize( + 'code', [ + 'a[b:=0]', + 'a[b:=0, c:=0]', + ] +) +def test_valid_namedexpr_index(code): + assert not _get_error_list(code, version='3.10') + + +@pytest.mark.parametrize( + ('code', 'message'), [ + ("f'{1+}'", ('invalid syntax')), + (r'f"\"', ('invalid syntax')), + (r'fr"\"', ('invalid syntax')), + ] +) +def test_invalid_fstrings(code, message): + """ + Some fstring errors are handled differntly in 3.6 and other versions. + Therefore check specifically for these errors here. + """ + error, = _get_error_list(code, version='3.6') + assert message in error.message + + +@pytest.mark.parametrize( + 'code', [ + "from foo import (\nbar,\n rab,\n)", + "from foo import (bar, rab, )", + ] +) +def test_trailing_comma(code): + errors = _get_error_list(code) + assert not errors + + +def test_continue_in_finally(): + code = dedent('''\ + for a in [1]: + try: + pass + finally: + continue + ''') + assert not _get_error_list(code, version="3.8") + assert _get_error_list(code, version="3.7") + + +@pytest.mark.parametrize( + 'template', [ + "a, b, {target}, c = d", + "a, b, *{target}, c = d", + "(a, *{target}), c = d", + "for x, {target} in y: pass", + "for x, q, {target} in y: pass", + "for x, q, *{target} in y: pass", + "for (x, *{target}), q in y: pass", + ] +) +@pytest.mark.parametrize( + 'target', [ + "True", + "False", + "None", + "__debug__" + ] +) +def test_forbidden_name(template, target): + assert _get_error_list(template.format(target=target), version="3") + + +def test_repeated_kwarg(): + # python 3.9+ shows which argument is repeated + assert ( + _get_error_list("f(q=1, q=2)", version="3.8")[0].message + == "SyntaxError: keyword argument repeated" + ) + assert ( + _get_error_list("f(q=1, q=2)", version="3.9")[0].message + == "SyntaxError: keyword argument repeated: q" + ) + + +@pytest.mark.parametrize( + ('source', 'no_errors'), [ + ('a(a for a in b,)', False), + ('a(a for a in b, a)', False), + ('a(a, a for a in b)', False), + ('a(a, b, a for a in b, c, d)', False), + ('a(a for a in b)', True), + ('a((a for a in b), c)', True), + ('a(c, (a for a in b))', True), + ('a(a, b, (a for a in b), c, d)', True), + ] +) +def test_unparenthesized_genexp(source, no_errors): + assert bool(_get_error_list(source)) ^ no_errors + + +@pytest.mark.parametrize( + ('source', 'no_errors'), [ + ('*x = 2', False), + ('(*y) = 1', False), + ('((*z)) = 1', False), + ('*a,', True), + ('*a, = 1', True), + ('(*a,)', True), + ('(*a,) = 1', True), + ('[*a]', True), + ('[*a] = 1', True), + ('a, *b', True), + ('a, *b = 1', True), + ('a, *b, c', True), + ('a, *b, c = 1', True), + ('a, (*b, c), d', True), + ('a, (*b, c), d = 1', True), + ('*a.b,', True), + ('*a.b, = 1', True), + ('*a[b],', True), + ('*a[b], = 1', True), + ('*a[b::], c', True), + ('*a[b::], c = 1', True), + ('(a, *[b, c])', True), + ('(a, *[b, c]) = 1', True), + ('[a, *(b, [*c])]', True), + ('[a, *(b, [*c])] = 1', True), + ('[*(1,2,3)]', True), + ('{*(1,2,3)}', True), + ('[*(1,2,3),]', True), + ('[*(1,2,3), *(4,5,6)]', True), + ('[0, *(1,2,3)]', True), + ('{*(1,2,3),}', True), + ('{*(1,2,3), *(4,5,6)}', True), + ('{0, *(4,5,6)}', True) + ] +) +def test_starred_expr(source, no_errors): + assert bool(_get_error_list(source, version="3")) ^ no_errors + + +@pytest.mark.parametrize( + 'code', [ + 'a, (*b), c', + 'a, (*b), c = 1', + 'a, ((*b)), c', + 'a, ((*b)), c = 1', + ] +) +def test_parenthesized_single_starred_expr(code): + assert not _get_error_list(code, version='3.8') + assert _get_error_list(code, version='3.9') + + +@pytest.mark.parametrize( + 'code', [ + '() = ()', + '() = []', + '[] = ()', + '[] = []', + ] +) +def test_valid_empty_assignment(code): + assert not _get_error_list(code) + + +@pytest.mark.parametrize( + 'code', [ + 'del ()', + 'del []', + 'del x', + 'del x,', + 'del x, y', + 'del (x, y)', + 'del [x, y]', + 'del (x, [y, z])', + 'del x.y, x[y]', + 'del f(x)[y::]', + 'del x[[*y]]', + 'del x[[*y]::]', + ] +) +def test_valid_del(code): + assert not _get_error_list(code) + + +@pytest.mark.parametrize( + ('source', 'version', 'no_errors'), [ + ('[x for x in range(10) if lambda: 1]', '3.8', True), + ('[x for x in range(10) if lambda: 1]', '3.9', False), + ('[x for x in range(10) if (lambda: 1)]', '3.9', True), + ] +) +def test_lambda_in_comp_if(source, version, no_errors): + assert bool(_get_error_list(source, version=version)) ^ no_errors diff --git a/contrib/python/parso/py3/tests/test_tokenize.py b/contrib/python/parso/py3/tests/test_tokenize.py new file mode 100644 index 0000000000..0029fc8a51 --- /dev/null +++ b/contrib/python/parso/py3/tests/test_tokenize.py @@ -0,0 +1,429 @@ +# -*- coding: utf-8 # This file contains Unicode characters. + +from textwrap import dedent + +import pytest + +from parso.utils import split_lines, parse_version_string +from parso.python.token import PythonTokenTypes +from parso.python import tokenize +from parso import parse +from parso.python.tokenize import PythonToken + + +# To make it easier to access some of the token types, just put them here. +NAME = PythonTokenTypes.NAME +NEWLINE = PythonTokenTypes.NEWLINE +STRING = PythonTokenTypes.STRING +NUMBER = PythonTokenTypes.NUMBER +INDENT = PythonTokenTypes.INDENT +DEDENT = PythonTokenTypes.DEDENT +ERRORTOKEN = PythonTokenTypes.ERRORTOKEN +OP = PythonTokenTypes.OP +ENDMARKER = PythonTokenTypes.ENDMARKER +ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT +FSTRING_START = PythonTokenTypes.FSTRING_START +FSTRING_STRING = PythonTokenTypes.FSTRING_STRING +FSTRING_END = PythonTokenTypes.FSTRING_END + + +def _get_token_list(string, version=None): + # Load the current version. + version_info = parse_version_string(version) + return list(tokenize.tokenize(string, version_info=version_info)) + + +def test_end_pos_one_line(): + parsed = parse(dedent(''' + def testit(): + a = "huhu" + ''')) + simple_stmt = next(parsed.iter_funcdefs()).get_suite().children[-1] + string = simple_stmt.children[0].get_rhs() + assert string.end_pos == (3, 14) + + +def test_end_pos_multi_line(): + parsed = parse(dedent(''' + def testit(): + a = """huhu + asdfasdf""" + "h" + ''')) + expr_stmt = next(parsed.iter_funcdefs()).get_suite().children[1].children[0] + string_leaf = expr_stmt.get_rhs().children[0] + assert string_leaf.end_pos == (4, 11) + + +def test_simple_no_whitespace(): + # Test a simple one line string, no preceding whitespace + simple_docstring = '"""simple one line docstring"""' + token_list = _get_token_list(simple_docstring) + _, value, _, prefix = token_list[0] + assert prefix == '' + assert value == '"""simple one line docstring"""' + + +def test_simple_with_whitespace(): + # Test a simple one line string with preceding whitespace and newline + simple_docstring = ' """simple one line docstring""" \r\n' + token_list = _get_token_list(simple_docstring) + assert token_list[0][0] == INDENT + typ, value, start_pos, prefix = token_list[1] + assert prefix == ' ' + assert value == '"""simple one line docstring"""' + assert typ == STRING + typ, value, start_pos, prefix = token_list[2] + assert prefix == ' ' + assert typ == NEWLINE + + +def test_function_whitespace(): + # Test function definition whitespace identification + fundef = dedent(''' + def test_whitespace(*args, **kwargs): + x = 1 + if x > 0: + print(True) + ''') + token_list = _get_token_list(fundef) + for _, value, _, prefix in token_list: + if value == 'test_whitespace': + assert prefix == ' ' + if value == '(': + assert prefix == '' + if value == '*': + assert prefix == '' + if value == '**': + assert prefix == ' ' + if value == 'print': + assert prefix == ' ' + if value == 'if': + assert prefix == ' ' + + +def test_tokenize_multiline_I(): + # Make sure multiline string having newlines have the end marker on the + # next line + fundef = '''""""\n''' + token_list = _get_token_list(fundef) + assert token_list == [PythonToken(ERRORTOKEN, '""""\n', (1, 0), ''), + PythonToken(ENDMARKER, '', (2, 0), '')] + + +def test_tokenize_multiline_II(): + # Make sure multiline string having no newlines have the end marker on + # same line + fundef = '''""""''' + token_list = _get_token_list(fundef) + assert token_list == [PythonToken(ERRORTOKEN, '""""', (1, 0), ''), + PythonToken(ENDMARKER, '', (1, 4), '')] + + +def test_tokenize_multiline_III(): + # Make sure multiline string having newlines have the end marker on the + # next line even if several newline + fundef = '''""""\n\n''' + token_list = _get_token_list(fundef) + assert token_list == [PythonToken(ERRORTOKEN, '""""\n\n', (1, 0), ''), + PythonToken(ENDMARKER, '', (3, 0), '')] + + +def test_identifier_contains_unicode(): + fundef = dedent(''' + def 我あφ(): + pass + ''') + token_list = _get_token_list(fundef) + unicode_token = token_list[1] + assert unicode_token[0] == NAME + + +def test_quoted_strings(): + string_tokens = [ + 'u"test"', + 'u"""test"""', + 'U"""test"""', + "u'''test'''", + "U'''test'''", + ] + + for s in string_tokens: + module = parse('''a = %s\n''' % s) + simple_stmt = module.children[0] + expr_stmt = simple_stmt.children[0] + assert len(expr_stmt.children) == 3 + string_tok = expr_stmt.children[2] + assert string_tok.type == 'string' + assert string_tok.value == s + + +def test_ur_literals(): + """ + Decided to parse `u''` literals regardless of Python version. This makes + probably sense: + + - Python 3+ doesn't support it, but it doesn't hurt + not be. While this is incorrect, it's just incorrect for one "old" and in + the future not very important version. + - All the other Python versions work very well with it. + """ + def check(literal, is_literal=True): + token_list = _get_token_list(literal) + typ, result_literal, _, _ = token_list[0] + if is_literal: + if typ != FSTRING_START: + assert typ == STRING + assert result_literal == literal + else: + assert typ == NAME + + check('u""') + check('ur""', is_literal=False) + check('Ur""', is_literal=False) + check('UR""', is_literal=False) + check('bR""') + check('Rb""') + + check('fr""') + check('rF""') + check('f""') + check('F""') + + +def test_error_literal(): + error_token, newline, endmarker = _get_token_list('"\n') + assert error_token.type == ERRORTOKEN + assert error_token.string == '"' + assert newline.type == NEWLINE + assert endmarker.type == ENDMARKER + assert endmarker.prefix == '' + + bracket, error_token, endmarker = _get_token_list('( """') + assert error_token.type == ERRORTOKEN + assert error_token.prefix == ' ' + assert error_token.string == '"""' + assert endmarker.type == ENDMARKER + assert endmarker.prefix == '' + + +def test_endmarker_end_pos(): + def check(code): + tokens = _get_token_list(code) + lines = split_lines(code) + assert tokens[-1].end_pos == (len(lines), len(lines[-1])) + + check('#c') + check('#c\n') + check('a\n') + check('a') + check(r'a\\n') + check('a\\') + + +@pytest.mark.parametrize( + ('code', 'types'), [ + # Indentation + (' foo', [INDENT, NAME, DEDENT]), + (' foo\n bar', [INDENT, NAME, NEWLINE, ERROR_DEDENT, NAME, DEDENT]), + (' foo\n bar \n baz', [INDENT, NAME, NEWLINE, ERROR_DEDENT, NAME, + NEWLINE, NAME, DEDENT]), + (' foo\nbar', [INDENT, NAME, NEWLINE, DEDENT, NAME]), + + # Name stuff + ('1foo1', [NUMBER, NAME]), + ('மெல்லினம்', [NAME]), + ('²', [ERRORTOKEN]), + ('ä²ö', [NAME, ERRORTOKEN, NAME]), + ('ää²¹öö', [NAME, ERRORTOKEN, NAME]), + (' \x00a', [INDENT, ERRORTOKEN, NAME, DEDENT]), + (dedent('''\ + class BaseCache: + a + def + b + def + c + '''), [NAME, NAME, OP, NEWLINE, INDENT, NAME, NEWLINE, + ERROR_DEDENT, NAME, NEWLINE, INDENT, NAME, NEWLINE, DEDENT, + NAME, NEWLINE, INDENT, NAME, NEWLINE, DEDENT, DEDENT]), + (' )\n foo', [INDENT, OP, NEWLINE, ERROR_DEDENT, NAME, DEDENT]), + ('a\n b\n )\n c', [NAME, NEWLINE, INDENT, NAME, NEWLINE, INDENT, OP, + NEWLINE, DEDENT, NAME, DEDENT]), + (' 1 \\\ndef', [INDENT, NUMBER, NAME, DEDENT]), + ] +) +def test_token_types(code, types): + actual_types = [t.type for t in _get_token_list(code)] + assert actual_types == types + [ENDMARKER] + + +def test_error_string(): + indent, t1, newline, token, endmarker = _get_token_list(' "\n') + assert t1.type == ERRORTOKEN + assert t1.prefix == ' ' + assert t1.string == '"' + assert newline.type == NEWLINE + assert endmarker.prefix == '' + assert endmarker.string == '' + + +def test_indent_error_recovery(): + code = dedent("""\ + str( + from x import a + def + """) + lst = _get_token_list(code) + expected = [ + # `str(` + INDENT, NAME, OP, + # `from parso` + NAME, NAME, + # `import a` on same line as the previous from parso + NAME, NAME, NEWLINE, + # Dedent happens, because there's an import now and the import + # statement "breaks" out of the opening paren on the first line. + DEDENT, + # `b` + NAME, NEWLINE, ENDMARKER] + assert [t.type for t in lst] == expected + + +def test_error_token_after_dedent(): + code = dedent("""\ + class C: + pass + $foo + """) + lst = _get_token_list(code) + expected = [ + NAME, NAME, OP, NEWLINE, INDENT, NAME, NEWLINE, DEDENT, + # $foo\n + ERRORTOKEN, NAME, NEWLINE, ENDMARKER + ] + assert [t.type for t in lst] == expected + + +def test_brackets_no_indentation(): + """ + There used to be an issue that the parentheses counting would go below + zero. This should not happen. + """ + code = dedent("""\ + } + { + } + """) + lst = _get_token_list(code) + assert [t.type for t in lst] == [OP, NEWLINE, OP, OP, NEWLINE, ENDMARKER] + + +def test_form_feed(): + indent, error_token, dedent_, endmarker = _get_token_list(dedent('''\ + \f"""''')) + assert error_token.prefix == '\f' + assert error_token.string == '"""' + assert endmarker.prefix == '' + assert indent.type == INDENT + assert dedent_.type == DEDENT + + +def test_carriage_return(): + lst = _get_token_list(' =\\\rclass') + assert [t.type for t in lst] == [INDENT, OP, NAME, DEDENT, ENDMARKER] + + +def test_backslash(): + code = '\\\n# 1 \n' + endmarker, = _get_token_list(code) + assert endmarker.prefix == code + + +@pytest.mark.parametrize( + ('code', 'types'), [ + # f-strings + ('f"', [FSTRING_START]), + ('f""', [FSTRING_START, FSTRING_END]), + ('f" {}"', [FSTRING_START, FSTRING_STRING, OP, OP, FSTRING_END]), + ('f" "{}', [FSTRING_START, FSTRING_STRING, FSTRING_END, OP, OP]), + (r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]), + (r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]), + + # format spec + (r'f"Some {x:.2f}{y}"', [FSTRING_START, FSTRING_STRING, OP, NAME, OP, + FSTRING_STRING, OP, OP, NAME, OP, FSTRING_END]), + + # multiline f-string + ('f"""abc\ndef"""', [FSTRING_START, FSTRING_STRING, FSTRING_END]), + ('f"""abc{\n123}def"""', [ + FSTRING_START, FSTRING_STRING, OP, NUMBER, OP, FSTRING_STRING, + FSTRING_END + ]), + + # a line continuation inside of an fstring_string + ('f"abc\\\ndef"', [ + FSTRING_START, FSTRING_STRING, FSTRING_END + ]), + ('f"\\\n{123}\\\n"', [ + FSTRING_START, FSTRING_STRING, OP, NUMBER, OP, FSTRING_STRING, + FSTRING_END + ]), + + # a line continuation inside of an fstring_expr + ('f"{\\\n123}"', [FSTRING_START, OP, NUMBER, OP, FSTRING_END]), + + # a line continuation inside of an format spec + ('f"{123:.2\\\nf}"', [ + FSTRING_START, OP, NUMBER, OP, FSTRING_STRING, OP, FSTRING_END + ]), + + # a newline without a line continuation inside a single-line string is + # wrong, and will generate an ERRORTOKEN + ('f"abc\ndef"', [ + FSTRING_START, FSTRING_STRING, NEWLINE, NAME, ERRORTOKEN + ]), + + # a more complex example + (r'print(f"Some {x:.2f}a{y}")', [ + NAME, OP, FSTRING_START, FSTRING_STRING, OP, NAME, OP, + FSTRING_STRING, OP, FSTRING_STRING, OP, NAME, OP, FSTRING_END, OP + ]), + # issue #86, a string-like in an f-string expression + ('f"{ ""}"', [ + FSTRING_START, OP, FSTRING_END, STRING + ]), + ('f"{ f""}"', [ + FSTRING_START, OP, NAME, FSTRING_END, STRING + ]), + ] +) +def test_fstring_token_types(code, types, each_version): + actual_types = [t.type for t in _get_token_list(code, each_version)] + assert types + [ENDMARKER] == actual_types + + +@pytest.mark.parametrize( + ('code', 'types'), [ + # issue #87, `:=` in the outest paratheses should be tokenized + # as a format spec marker and part of the format + ('f"{x:=10}"', [ + FSTRING_START, OP, NAME, OP, FSTRING_STRING, OP, FSTRING_END + ]), + ('f"{(x:=10)}"', [ + FSTRING_START, OP, OP, NAME, OP, NUMBER, OP, OP, FSTRING_END + ]), + ] +) +def test_fstring_assignment_expression(code, types, version_ge_py38): + actual_types = [t.type for t in _get_token_list(code, version_ge_py38)] + assert types + [ENDMARKER] == actual_types + + +def test_fstring_end_error_pos(version_ge_py38): + f_start, f_string, bracket, f_end, endmarker = \ + _get_token_list('f" { "', version_ge_py38) + assert f_start.start_pos == (1, 0) + assert f_string.start_pos == (1, 2) + assert bracket.start_pos == (1, 3) + assert f_end.start_pos == (1, 5) + assert endmarker.start_pos == (1, 6) diff --git a/contrib/python/parso/py3/tests/test_utils.py b/contrib/python/parso/py3/tests/test_utils.py new file mode 100644 index 0000000000..300a54ebc2 --- /dev/null +++ b/contrib/python/parso/py3/tests/test_utils.py @@ -0,0 +1,107 @@ +from codecs import BOM_UTF8 + +from parso.utils import ( + split_lines, + parse_version_string, + python_bytes_to_unicode, +) + +import parso + +import pytest + + +@pytest.mark.parametrize( + ('string', 'expected_result', 'keepends'), [ + ('asd\r\n', ['asd', ''], False), + ('asd\r\n', ['asd\r\n', ''], True), + ('asd\r', ['asd', ''], False), + ('asd\r', ['asd\r', ''], True), + ('asd\n', ['asd', ''], False), + ('asd\n', ['asd\n', ''], True), + + ('asd\r\n\f', ['asd', '\f'], False), + ('asd\r\n\f', ['asd\r\n', '\f'], True), + + ('\fasd\r\n', ['\fasd', ''], False), + ('\fasd\r\n', ['\fasd\r\n', ''], True), + + ('', [''], False), + ('', [''], True), + + ('\n', ['', ''], False), + ('\n', ['\n', ''], True), + + ('\r', ['', ''], False), + ('\r', ['\r', ''], True), + + # Invalid line breaks + ('a\vb', ['a\vb'], False), + ('a\vb', ['a\vb'], True), + ('\x1C', ['\x1C'], False), + ('\x1C', ['\x1C'], True), + ] +) +def test_split_lines(string, expected_result, keepends): + assert split_lines(string, keepends=keepends) == expected_result + + +def test_python_bytes_to_unicode_unicode_text(): + source = ( + b"# vim: fileencoding=utf-8\n" + b"# \xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a\n" + ) + actual = python_bytes_to_unicode(source) + expected = source.decode('utf-8') + assert actual == expected + + +def test_utf8_bom(): + unicode_bom = BOM_UTF8.decode('utf-8') + + module = parso.parse(unicode_bom) + endmarker = module.children[0] + assert endmarker.type == 'endmarker' + assert unicode_bom == endmarker.prefix + + module = parso.parse(unicode_bom + 'foo = 1') + expr_stmt = module.children[0] + assert expr_stmt.type == 'expr_stmt' + assert unicode_bom == expr_stmt.get_first_leaf().prefix + + +@pytest.mark.parametrize( + ('code', 'errors'), [ + (b'# coding: wtf-12\nfoo', 'strict'), + (b'# coding: wtf-12\nfoo', 'replace'), + (b'# coding: wtf-12\r\nfoo', 'strict'), + (b'# coding: wtf-12\r\nfoo', 'replace'), + (b'# coding: wtf-12\rfoo', 'strict'), + (b'# coding: wtf-12\rfoo', 'replace'), + ] +) +def test_bytes_to_unicode_failing_encoding(code, errors): + if errors == 'strict': + with pytest.raises(LookupError): + python_bytes_to_unicode(code, errors=errors) + else: + python_bytes_to_unicode(code, errors=errors) + + +@pytest.mark.parametrize( + ('version_str', 'version'), [ + ('3', (3,)), + ('3.6', (3, 6)), + ('3.6.10', (3, 6)), + ('3.10', (3, 10)), + ('3.10a9', (3, 10)), + ('3.10b9', (3, 10)), + ('3.10rc9', (3, 10)), + ] +) +def test_parse_version_string(version_str, version): + parsed_version = parse_version_string(version_str) + if len(version) == 1: + assert parsed_version[0] == version[0] + else: + assert parsed_version == version diff --git a/contrib/python/parso/py3/tests/ya.make b/contrib/python/parso/py3/tests/ya.make new file mode 100644 index 0000000000..468e032926 --- /dev/null +++ b/contrib/python/parso/py3/tests/ya.make @@ -0,0 +1,39 @@ +PY3TEST() + +PEERDIR( + contrib/python/parso +) + +DATA( + arcadia/contrib/python/parso/py3/tests +) + +TEST_SRCS( + __init__.py + conftest.py + failing_examples.py + test_cache.py + test_diff_parser.py + test_dump_tree.py + test_error_recovery.py + test_file_python_errors.py + test_fstring.py + test_get_code.py + test_grammar.py + test_load_grammar.py + test_normalizer_issues_files.py + test_old_fast_parser.py + test_param_splitting.py + test_parser.py + test_parser_tree.py + test_pep8.py + test_pgen2.py + test_prefix.py + test_python_errors.py + test_tokenize.py + test_utils.py +) + +NO_LINT() + +END() diff --git a/contrib/python/parso/py3/ya.make b/contrib/python/parso/py3/ya.make new file mode 100644 index 0000000000..fa4210f7c5 --- /dev/null +++ b/contrib/python/parso/py3/ya.make @@ -0,0 +1,54 @@ +# Generated by devtools/yamaker (pypi). + +PY3_LIBRARY() + +VERSION(0.8.3) + +LICENSE(PSF-2.0) + +NO_LINT() + +PY_SRCS( + TOP_LEVEL + parso/__init__.py + parso/_compatibility.py + parso/cache.py + parso/file_io.py + parso/grammar.py + parso/normalizer.py + parso/parser.py + parso/pgen2/__init__.py + parso/pgen2/generator.py + parso/pgen2/grammar_parser.py + parso/python/__init__.py + parso/python/diff.py + parso/python/errors.py + parso/python/parser.py + parso/python/pep8.py + parso/python/prefix.py + parso/python/token.py + parso/python/tokenize.py + parso/python/tree.py + parso/tree.py + parso/utils.py +) + +RESOURCE_FILES( + PREFIX contrib/python/parso/py3/ + .dist-info/METADATA + .dist-info/top_level.txt + parso/py.typed + parso/python/grammar310.txt + parso/python/grammar311.txt + parso/python/grammar312.txt + parso/python/grammar36.txt + parso/python/grammar37.txt + parso/python/grammar38.txt + parso/python/grammar39.txt +) + +END() + +RECURSE_FOR_TESTS( + tests +) diff --git a/contrib/python/parso/ya.make b/contrib/python/parso/ya.make new file mode 100644 index 0000000000..b47642bc39 --- /dev/null +++ b/contrib/python/parso/ya.make @@ -0,0 +1,18 @@ +PY23_LIBRARY() + +LICENSE(Service-Py23-Proxy) + +IF (PYTHON2) + PEERDIR(contrib/python/parso/py2) +ELSE() + PEERDIR(contrib/python/parso/py3) +ENDIF() + +NO_LINT() + +END() + +RECURSE( + py2 + py3 +) |