diff options
author | monster <monster@ydb.tech> | 2022-07-07 14:41:37 +0300 |
---|---|---|
committer | monster <monster@ydb.tech> | 2022-07-07 14:41:37 +0300 |
commit | 06e5c21a835c0e923506c4ff27929f34e00761c2 (patch) | |
tree | 75efcbc6854ef9bd476eb8bf00cc5c900da436a2 /contrib/python/parso/py2/tests/test_tokenize.py | |
parent | 03f024c4412e3aa613bb543cf1660176320ba8f4 (diff) | |
download | ydb-06e5c21a835c0e923506c4ff27929f34e00761c2.tar.gz |
fix ya.make
Diffstat (limited to 'contrib/python/parso/py2/tests/test_tokenize.py')
-rw-r--r-- | contrib/python/parso/py2/tests/test_tokenize.py | 443 |
1 files changed, 0 insertions, 443 deletions
diff --git a/contrib/python/parso/py2/tests/test_tokenize.py b/contrib/python/parso/py2/tests/test_tokenize.py deleted file mode 100644 index 7afa3737d2..0000000000 --- a/contrib/python/parso/py2/tests/test_tokenize.py +++ /dev/null @@ -1,443 +0,0 @@ -# -*- coding: utf-8 # This file contains Unicode characters. - -import sys -from textwrap import dedent - -import pytest - -from parso.utils import split_lines, parse_version_string -from parso.python.token import PythonTokenTypes -from parso.python import tokenize -from parso import parse -from parso.python.tokenize import PythonToken - - -# To make it easier to access some of the token types, just put them here. -NAME = PythonTokenTypes.NAME -NEWLINE = PythonTokenTypes.NEWLINE -STRING = PythonTokenTypes.STRING -NUMBER = PythonTokenTypes.NUMBER -INDENT = PythonTokenTypes.INDENT -DEDENT = PythonTokenTypes.DEDENT -ERRORTOKEN = PythonTokenTypes.ERRORTOKEN -OP = PythonTokenTypes.OP -ENDMARKER = PythonTokenTypes.ENDMARKER -ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT -FSTRING_START = PythonTokenTypes.FSTRING_START -FSTRING_STRING = PythonTokenTypes.FSTRING_STRING -FSTRING_END = PythonTokenTypes.FSTRING_END - - -def _get_token_list(string, version=None): - # Load the current version. - version_info = parse_version_string(version) - return list(tokenize.tokenize(string, version_info)) - - -def test_end_pos_one_line(): - parsed = parse(dedent(''' - def testit(): - a = "huhu" - ''')) - simple_stmt = next(parsed.iter_funcdefs()).get_suite().children[-1] - string = simple_stmt.children[0].get_rhs() - assert string.end_pos == (3, 14) - - -def test_end_pos_multi_line(): - parsed = parse(dedent(''' - def testit(): - a = """huhu - asdfasdf""" + "h" - ''')) - expr_stmt = next(parsed.iter_funcdefs()).get_suite().children[1].children[0] - string_leaf = expr_stmt.get_rhs().children[0] - assert string_leaf.end_pos == (4, 11) - - -def test_simple_no_whitespace(): - # Test a simple one line string, no preceding whitespace - simple_docstring = '"""simple one line docstring"""' - token_list = _get_token_list(simple_docstring) - _, value, _, prefix = token_list[0] - assert prefix == '' - assert value == '"""simple one line docstring"""' - - -def test_simple_with_whitespace(): - # Test a simple one line string with preceding whitespace and newline - simple_docstring = ' """simple one line docstring""" \r\n' - token_list = _get_token_list(simple_docstring) - assert token_list[0][0] == INDENT - typ, value, start_pos, prefix = token_list[1] - assert prefix == ' ' - assert value == '"""simple one line docstring"""' - assert typ == STRING - typ, value, start_pos, prefix = token_list[2] - assert prefix == ' ' - assert typ == NEWLINE - - -def test_function_whitespace(): - # Test function definition whitespace identification - fundef = dedent(''' - def test_whitespace(*args, **kwargs): - x = 1 - if x > 0: - print(True) - ''') - token_list = _get_token_list(fundef) - for _, value, _, prefix in token_list: - if value == 'test_whitespace': - assert prefix == ' ' - if value == '(': - assert prefix == '' - if value == '*': - assert prefix == '' - if value == '**': - assert prefix == ' ' - if value == 'print': - assert prefix == ' ' - if value == 'if': - assert prefix == ' ' - - -def test_tokenize_multiline_I(): - # Make sure multiline string having newlines have the end marker on the - # next line - fundef = '''""""\n''' - token_list = _get_token_list(fundef) - assert token_list == [PythonToken(ERRORTOKEN, '""""\n', (1, 0), ''), - PythonToken(ENDMARKER , '', (2, 0), '')] - - -def test_tokenize_multiline_II(): - # Make sure multiline string having no newlines have the end marker on - # same line - fundef = '''""""''' - token_list = _get_token_list(fundef) - assert token_list == [PythonToken(ERRORTOKEN, '""""', (1, 0), ''), - PythonToken(ENDMARKER, '', (1, 4), '')] - - -def test_tokenize_multiline_III(): - # Make sure multiline string having newlines have the end marker on the - # next line even if several newline - fundef = '''""""\n\n''' - token_list = _get_token_list(fundef) - assert token_list == [PythonToken(ERRORTOKEN, '""""\n\n', (1, 0), ''), - PythonToken(ENDMARKER, '', (3, 0), '')] - - -def test_identifier_contains_unicode(): - fundef = dedent(''' - def 我あφ(): - pass - ''') - token_list = _get_token_list(fundef) - unicode_token = token_list[1] - if sys.version_info.major >= 3: - assert unicode_token[0] == NAME - else: - # Unicode tokens in Python 2 seem to be identified as operators. - # They will be ignored in the parser, that's ok. - assert unicode_token[0] == ERRORTOKEN - - -def test_quoted_strings(): - string_tokens = [ - 'u"test"', - 'u"""test"""', - 'U"""test"""', - "u'''test'''", - "U'''test'''", - ] - - for s in string_tokens: - module = parse('''a = %s\n''' % s) - simple_stmt = module.children[0] - expr_stmt = simple_stmt.children[0] - assert len(expr_stmt.children) == 3 - string_tok = expr_stmt.children[2] - assert string_tok.type == 'string' - assert string_tok.value == s - - -def test_ur_literals(): - """ - Decided to parse `u''` literals regardless of Python version. This makes - probably sense: - - - Python 3+ doesn't support it, but it doesn't hurt - not be. While this is incorrect, it's just incorrect for one "old" and in - the future not very important version. - - All the other Python versions work very well with it. - """ - def check(literal, is_literal=True): - token_list = _get_token_list(literal) - typ, result_literal, _, _ = token_list[0] - if is_literal: - if typ != FSTRING_START: - assert typ == STRING - assert result_literal == literal - else: - assert typ == NAME - - check('u""') - check('ur""', is_literal=not sys.version_info.major >= 3) - check('Ur""', is_literal=not sys.version_info.major >= 3) - check('UR""', is_literal=not sys.version_info.major >= 3) - check('bR""') - # Starting with Python 3.3 this ordering is also possible. - if sys.version_info.major >= 3: - check('Rb""') - - # Starting with Python 3.6 format strings where introduced. - check('fr""', is_literal=sys.version_info >= (3, 6)) - check('rF""', is_literal=sys.version_info >= (3, 6)) - check('f""', is_literal=sys.version_info >= (3, 6)) - check('F""', is_literal=sys.version_info >= (3, 6)) - - -def test_error_literal(): - error_token, newline, endmarker = _get_token_list('"\n') - assert error_token.type == ERRORTOKEN - assert error_token.string == '"' - assert newline.type == NEWLINE - assert endmarker.type == ENDMARKER - assert endmarker.prefix == '' - - bracket, error_token, endmarker = _get_token_list('( """') - assert error_token.type == ERRORTOKEN - assert error_token.prefix == ' ' - assert error_token.string == '"""' - assert endmarker.type == ENDMARKER - assert endmarker.prefix == '' - - -def test_endmarker_end_pos(): - def check(code): - tokens = _get_token_list(code) - lines = split_lines(code) - assert tokens[-1].end_pos == (len(lines), len(lines[-1])) - - check('#c') - check('#c\n') - check('a\n') - check('a') - check(r'a\\n') - check('a\\') - - -xfail_py2 = dict(marks=[pytest.mark.xfail(sys.version_info[0] == 2, reason='Python 2')]) - - -@pytest.mark.parametrize( - ('code', 'types'), [ - # Indentation - (' foo', [INDENT, NAME, DEDENT]), - (' foo\n bar', [INDENT, NAME, NEWLINE, ERROR_DEDENT, NAME, DEDENT]), - (' foo\n bar \n baz', [INDENT, NAME, NEWLINE, ERROR_DEDENT, NAME, - NEWLINE, NAME, DEDENT]), - (' foo\nbar', [INDENT, NAME, NEWLINE, DEDENT, NAME]), - - # Name stuff - ('1foo1', [NUMBER, NAME]), - pytest.param( - u'மெல்லினம்', [NAME], - **xfail_py2), - pytest.param(u'²', [ERRORTOKEN], **xfail_py2), - pytest.param(u'ä²ö', [NAME, ERRORTOKEN, NAME], **xfail_py2), - pytest.param(u'ää²¹öö', [NAME, ERRORTOKEN, NAME], **xfail_py2), - (' \x00a', [INDENT, ERRORTOKEN, NAME, DEDENT]), - (dedent('''\ - class BaseCache: - a - def - b - def - c - '''), [NAME, NAME, OP, NEWLINE, INDENT, NAME, NEWLINE, - ERROR_DEDENT, NAME, NEWLINE, INDENT, NAME, NEWLINE, DEDENT, - NAME, NEWLINE, INDENT, NAME, NEWLINE, DEDENT, DEDENT]), - (' )\n foo', [INDENT, OP, NEWLINE, ERROR_DEDENT, NAME, DEDENT]), - ('a\n b\n )\n c', [NAME, NEWLINE, INDENT, NAME, NEWLINE, INDENT, OP, - NEWLINE, DEDENT, NAME, DEDENT]), - (' 1 \\\ndef', [INDENT, NUMBER, NAME, DEDENT]), - ] -) -def test_token_types(code, types): - actual_types = [t.type for t in _get_token_list(code)] - assert actual_types == types + [ENDMARKER] - - -def test_error_string(): - indent, t1, newline, token, endmarker = _get_token_list(' "\n') - assert t1.type == ERRORTOKEN - assert t1.prefix == ' ' - assert t1.string == '"' - assert newline.type == NEWLINE - assert endmarker.prefix == '' - assert endmarker.string == '' - - -def test_indent_error_recovery(): - code = dedent("""\ - str( - from x import a - def - """) - lst = _get_token_list(code) - expected = [ - # `str(` - INDENT, NAME, OP, - # `from parso` - NAME, NAME, - # `import a` on same line as the previous from parso - NAME, NAME, NEWLINE, - # Dedent happens, because there's an import now and the import - # statement "breaks" out of the opening paren on the first line. - DEDENT, - # `b` - NAME, NEWLINE, ENDMARKER] - assert [t.type for t in lst] == expected - - -def test_error_token_after_dedent(): - code = dedent("""\ - class C: - pass - $foo - """) - lst = _get_token_list(code) - expected = [ - NAME, NAME, OP, NEWLINE, INDENT, NAME, NEWLINE, DEDENT, - # $foo\n - ERRORTOKEN, NAME, NEWLINE, ENDMARKER - ] - assert [t.type for t in lst] == expected - - -def test_brackets_no_indentation(): - """ - There used to be an issue that the parentheses counting would go below - zero. This should not happen. - """ - code = dedent("""\ - } - { - } - """) - lst = _get_token_list(code) - assert [t.type for t in lst] == [OP, NEWLINE, OP, OP, NEWLINE, ENDMARKER] - - -def test_form_feed(): - indent, error_token, dedent_, endmarker = _get_token_list(dedent('''\ - \f"""''')) - assert error_token.prefix == '\f' - assert error_token.string == '"""' - assert endmarker.prefix == '' - assert indent.type == INDENT - assert dedent_.type == DEDENT - - -def test_carriage_return(): - lst = _get_token_list(' =\\\rclass') - assert [t.type for t in lst] == [INDENT, OP, NAME, DEDENT, ENDMARKER] - - -def test_backslash(): - code = '\\\n# 1 \n' - endmarker, = _get_token_list(code) - assert endmarker.prefix == code - - -@pytest.mark.parametrize( - ('code', 'types'), [ - # f-strings - ('f"', [FSTRING_START]), - ('f""', [FSTRING_START, FSTRING_END]), - ('f" {}"', [FSTRING_START, FSTRING_STRING, OP, OP, FSTRING_END]), - ('f" "{}', [FSTRING_START, FSTRING_STRING, FSTRING_END, OP, OP]), - (r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]), - (r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]), - - # format spec - (r'f"Some {x:.2f}{y}"', [FSTRING_START, FSTRING_STRING, OP, NAME, OP, - FSTRING_STRING, OP, OP, NAME, OP, FSTRING_END]), - - # multiline f-string - ('f"""abc\ndef"""', [FSTRING_START, FSTRING_STRING, FSTRING_END]), - ('f"""abc{\n123}def"""', [ - FSTRING_START, FSTRING_STRING, OP, NUMBER, OP, FSTRING_STRING, - FSTRING_END - ]), - - # a line continuation inside of an fstring_string - ('f"abc\\\ndef"', [ - FSTRING_START, FSTRING_STRING, FSTRING_END - ]), - ('f"\\\n{123}\\\n"', [ - FSTRING_START, FSTRING_STRING, OP, NUMBER, OP, FSTRING_STRING, - FSTRING_END - ]), - - # a line continuation inside of an fstring_expr - ('f"{\\\n123}"', [FSTRING_START, OP, NUMBER, OP, FSTRING_END]), - - # a line continuation inside of an format spec - ('f"{123:.2\\\nf}"', [ - FSTRING_START, OP, NUMBER, OP, FSTRING_STRING, OP, FSTRING_END - ]), - - # a newline without a line continuation inside a single-line string is - # wrong, and will generate an ERRORTOKEN - ('f"abc\ndef"', [ - FSTRING_START, FSTRING_STRING, NEWLINE, NAME, ERRORTOKEN - ]), - - # a more complex example - (r'print(f"Some {x:.2f}a{y}")', [ - NAME, OP, FSTRING_START, FSTRING_STRING, OP, NAME, OP, - FSTRING_STRING, OP, FSTRING_STRING, OP, NAME, OP, FSTRING_END, OP - ]), - # issue #86, a string-like in an f-string expression - ('f"{ ""}"', [ - FSTRING_START, OP, FSTRING_END, STRING - ]), - ('f"{ f""}"', [ - FSTRING_START, OP, NAME, FSTRING_END, STRING - ]), - ] -) -def test_fstring_token_types(code, types, version_ge_py36): - actual_types = [t.type for t in _get_token_list(code, version_ge_py36)] - assert types + [ENDMARKER] == actual_types - - -@pytest.mark.parametrize( - ('code', 'types'), [ - # issue #87, `:=` in the outest paratheses should be tokenized - # as a format spec marker and part of the format - ('f"{x:=10}"', [ - FSTRING_START, OP, NAME, OP, FSTRING_STRING, OP, FSTRING_END - ]), - ('f"{(x:=10)}"', [ - FSTRING_START, OP, OP, NAME, OP, NUMBER, OP, OP, FSTRING_END - ]), - ] -) -def test_fstring_assignment_expression(code, types, version_ge_py38): - actual_types = [t.type for t in _get_token_list(code, version_ge_py38)] - assert types + [ENDMARKER] == actual_types - - -def test_fstring_end_error_pos(version_ge_py38): - f_start, f_string, bracket, f_end, endmarker = \ - _get_token_list('f" { "', version_ge_py38) - assert f_start.start_pos == (1, 0) - assert f_string.start_pos == (1, 2) - assert bracket.start_pos == (1, 3) - assert f_end.start_pos == (1, 5) - assert endmarker.start_pos == (1, 6) |