aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/python/parso/py2/tests/test_tokenize.py
diff options
context:
space:
mode:
authormonster <monster@ydb.tech>2022-07-07 14:41:37 +0300
committermonster <monster@ydb.tech>2022-07-07 14:41:37 +0300
commit06e5c21a835c0e923506c4ff27929f34e00761c2 (patch)
tree75efcbc6854ef9bd476eb8bf00cc5c900da436a2 /contrib/python/parso/py2/tests/test_tokenize.py
parent03f024c4412e3aa613bb543cf1660176320ba8f4 (diff)
downloadydb-06e5c21a835c0e923506c4ff27929f34e00761c2.tar.gz
fix ya.make
Diffstat (limited to 'contrib/python/parso/py2/tests/test_tokenize.py')
-rw-r--r--contrib/python/parso/py2/tests/test_tokenize.py443
1 files changed, 0 insertions, 443 deletions
diff --git a/contrib/python/parso/py2/tests/test_tokenize.py b/contrib/python/parso/py2/tests/test_tokenize.py
deleted file mode 100644
index 7afa3737d2..0000000000
--- a/contrib/python/parso/py2/tests/test_tokenize.py
+++ /dev/null
@@ -1,443 +0,0 @@
-# -*- coding: utf-8 # This file contains Unicode characters.
-
-import sys
-from textwrap import dedent
-
-import pytest
-
-from parso.utils import split_lines, parse_version_string
-from parso.python.token import PythonTokenTypes
-from parso.python import tokenize
-from parso import parse
-from parso.python.tokenize import PythonToken
-
-
-# To make it easier to access some of the token types, just put them here.
-NAME = PythonTokenTypes.NAME
-NEWLINE = PythonTokenTypes.NEWLINE
-STRING = PythonTokenTypes.STRING
-NUMBER = PythonTokenTypes.NUMBER
-INDENT = PythonTokenTypes.INDENT
-DEDENT = PythonTokenTypes.DEDENT
-ERRORTOKEN = PythonTokenTypes.ERRORTOKEN
-OP = PythonTokenTypes.OP
-ENDMARKER = PythonTokenTypes.ENDMARKER
-ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT
-FSTRING_START = PythonTokenTypes.FSTRING_START
-FSTRING_STRING = PythonTokenTypes.FSTRING_STRING
-FSTRING_END = PythonTokenTypes.FSTRING_END
-
-
-def _get_token_list(string, version=None):
- # Load the current version.
- version_info = parse_version_string(version)
- return list(tokenize.tokenize(string, version_info))
-
-
-def test_end_pos_one_line():
- parsed = parse(dedent('''
- def testit():
- a = "huhu"
- '''))
- simple_stmt = next(parsed.iter_funcdefs()).get_suite().children[-1]
- string = simple_stmt.children[0].get_rhs()
- assert string.end_pos == (3, 14)
-
-
-def test_end_pos_multi_line():
- parsed = parse(dedent('''
- def testit():
- a = """huhu
- asdfasdf""" + "h"
- '''))
- expr_stmt = next(parsed.iter_funcdefs()).get_suite().children[1].children[0]
- string_leaf = expr_stmt.get_rhs().children[0]
- assert string_leaf.end_pos == (4, 11)
-
-
-def test_simple_no_whitespace():
- # Test a simple one line string, no preceding whitespace
- simple_docstring = '"""simple one line docstring"""'
- token_list = _get_token_list(simple_docstring)
- _, value, _, prefix = token_list[0]
- assert prefix == ''
- assert value == '"""simple one line docstring"""'
-
-
-def test_simple_with_whitespace():
- # Test a simple one line string with preceding whitespace and newline
- simple_docstring = ' """simple one line docstring""" \r\n'
- token_list = _get_token_list(simple_docstring)
- assert token_list[0][0] == INDENT
- typ, value, start_pos, prefix = token_list[1]
- assert prefix == ' '
- assert value == '"""simple one line docstring"""'
- assert typ == STRING
- typ, value, start_pos, prefix = token_list[2]
- assert prefix == ' '
- assert typ == NEWLINE
-
-
-def test_function_whitespace():
- # Test function definition whitespace identification
- fundef = dedent('''
- def test_whitespace(*args, **kwargs):
- x = 1
- if x > 0:
- print(True)
- ''')
- token_list = _get_token_list(fundef)
- for _, value, _, prefix in token_list:
- if value == 'test_whitespace':
- assert prefix == ' '
- if value == '(':
- assert prefix == ''
- if value == '*':
- assert prefix == ''
- if value == '**':
- assert prefix == ' '
- if value == 'print':
- assert prefix == ' '
- if value == 'if':
- assert prefix == ' '
-
-
-def test_tokenize_multiline_I():
- # Make sure multiline string having newlines have the end marker on the
- # next line
- fundef = '''""""\n'''
- token_list = _get_token_list(fundef)
- assert token_list == [PythonToken(ERRORTOKEN, '""""\n', (1, 0), ''),
- PythonToken(ENDMARKER , '', (2, 0), '')]
-
-
-def test_tokenize_multiline_II():
- # Make sure multiline string having no newlines have the end marker on
- # same line
- fundef = '''""""'''
- token_list = _get_token_list(fundef)
- assert token_list == [PythonToken(ERRORTOKEN, '""""', (1, 0), ''),
- PythonToken(ENDMARKER, '', (1, 4), '')]
-
-
-def test_tokenize_multiline_III():
- # Make sure multiline string having newlines have the end marker on the
- # next line even if several newline
- fundef = '''""""\n\n'''
- token_list = _get_token_list(fundef)
- assert token_list == [PythonToken(ERRORTOKEN, '""""\n\n', (1, 0), ''),
- PythonToken(ENDMARKER, '', (3, 0), '')]
-
-
-def test_identifier_contains_unicode():
- fundef = dedent('''
- def 我あφ():
- pass
- ''')
- token_list = _get_token_list(fundef)
- unicode_token = token_list[1]
- if sys.version_info.major >= 3:
- assert unicode_token[0] == NAME
- else:
- # Unicode tokens in Python 2 seem to be identified as operators.
- # They will be ignored in the parser, that's ok.
- assert unicode_token[0] == ERRORTOKEN
-
-
-def test_quoted_strings():
- string_tokens = [
- 'u"test"',
- 'u"""test"""',
- 'U"""test"""',
- "u'''test'''",
- "U'''test'''",
- ]
-
- for s in string_tokens:
- module = parse('''a = %s\n''' % s)
- simple_stmt = module.children[0]
- expr_stmt = simple_stmt.children[0]
- assert len(expr_stmt.children) == 3
- string_tok = expr_stmt.children[2]
- assert string_tok.type == 'string'
- assert string_tok.value == s
-
-
-def test_ur_literals():
- """
- Decided to parse `u''` literals regardless of Python version. This makes
- probably sense:
-
- - Python 3+ doesn't support it, but it doesn't hurt
- not be. While this is incorrect, it's just incorrect for one "old" and in
- the future not very important version.
- - All the other Python versions work very well with it.
- """
- def check(literal, is_literal=True):
- token_list = _get_token_list(literal)
- typ, result_literal, _, _ = token_list[0]
- if is_literal:
- if typ != FSTRING_START:
- assert typ == STRING
- assert result_literal == literal
- else:
- assert typ == NAME
-
- check('u""')
- check('ur""', is_literal=not sys.version_info.major >= 3)
- check('Ur""', is_literal=not sys.version_info.major >= 3)
- check('UR""', is_literal=not sys.version_info.major >= 3)
- check('bR""')
- # Starting with Python 3.3 this ordering is also possible.
- if sys.version_info.major >= 3:
- check('Rb""')
-
- # Starting with Python 3.6 format strings where introduced.
- check('fr""', is_literal=sys.version_info >= (3, 6))
- check('rF""', is_literal=sys.version_info >= (3, 6))
- check('f""', is_literal=sys.version_info >= (3, 6))
- check('F""', is_literal=sys.version_info >= (3, 6))
-
-
-def test_error_literal():
- error_token, newline, endmarker = _get_token_list('"\n')
- assert error_token.type == ERRORTOKEN
- assert error_token.string == '"'
- assert newline.type == NEWLINE
- assert endmarker.type == ENDMARKER
- assert endmarker.prefix == ''
-
- bracket, error_token, endmarker = _get_token_list('( """')
- assert error_token.type == ERRORTOKEN
- assert error_token.prefix == ' '
- assert error_token.string == '"""'
- assert endmarker.type == ENDMARKER
- assert endmarker.prefix == ''
-
-
-def test_endmarker_end_pos():
- def check(code):
- tokens = _get_token_list(code)
- lines = split_lines(code)
- assert tokens[-1].end_pos == (len(lines), len(lines[-1]))
-
- check('#c')
- check('#c\n')
- check('a\n')
- check('a')
- check(r'a\\n')
- check('a\\')
-
-
-xfail_py2 = dict(marks=[pytest.mark.xfail(sys.version_info[0] == 2, reason='Python 2')])
-
-
-@pytest.mark.parametrize(
- ('code', 'types'), [
- # Indentation
- (' foo', [INDENT, NAME, DEDENT]),
- (' foo\n bar', [INDENT, NAME, NEWLINE, ERROR_DEDENT, NAME, DEDENT]),
- (' foo\n bar \n baz', [INDENT, NAME, NEWLINE, ERROR_DEDENT, NAME,
- NEWLINE, NAME, DEDENT]),
- (' foo\nbar', [INDENT, NAME, NEWLINE, DEDENT, NAME]),
-
- # Name stuff
- ('1foo1', [NUMBER, NAME]),
- pytest.param(
- u'மெல்லினம்', [NAME],
- **xfail_py2),
- pytest.param(u'²', [ERRORTOKEN], **xfail_py2),
- pytest.param(u'ä²ö', [NAME, ERRORTOKEN, NAME], **xfail_py2),
- pytest.param(u'ää²¹öö', [NAME, ERRORTOKEN, NAME], **xfail_py2),
- (' \x00a', [INDENT, ERRORTOKEN, NAME, DEDENT]),
- (dedent('''\
- class BaseCache:
- a
- def
- b
- def
- c
- '''), [NAME, NAME, OP, NEWLINE, INDENT, NAME, NEWLINE,
- ERROR_DEDENT, NAME, NEWLINE, INDENT, NAME, NEWLINE, DEDENT,
- NAME, NEWLINE, INDENT, NAME, NEWLINE, DEDENT, DEDENT]),
- (' )\n foo', [INDENT, OP, NEWLINE, ERROR_DEDENT, NAME, DEDENT]),
- ('a\n b\n )\n c', [NAME, NEWLINE, INDENT, NAME, NEWLINE, INDENT, OP,
- NEWLINE, DEDENT, NAME, DEDENT]),
- (' 1 \\\ndef', [INDENT, NUMBER, NAME, DEDENT]),
- ]
-)
-def test_token_types(code, types):
- actual_types = [t.type for t in _get_token_list(code)]
- assert actual_types == types + [ENDMARKER]
-
-
-def test_error_string():
- indent, t1, newline, token, endmarker = _get_token_list(' "\n')
- assert t1.type == ERRORTOKEN
- assert t1.prefix == ' '
- assert t1.string == '"'
- assert newline.type == NEWLINE
- assert endmarker.prefix == ''
- assert endmarker.string == ''
-
-
-def test_indent_error_recovery():
- code = dedent("""\
- str(
- from x import a
- def
- """)
- lst = _get_token_list(code)
- expected = [
- # `str(`
- INDENT, NAME, OP,
- # `from parso`
- NAME, NAME,
- # `import a` on same line as the previous from parso
- NAME, NAME, NEWLINE,
- # Dedent happens, because there's an import now and the import
- # statement "breaks" out of the opening paren on the first line.
- DEDENT,
- # `b`
- NAME, NEWLINE, ENDMARKER]
- assert [t.type for t in lst] == expected
-
-
-def test_error_token_after_dedent():
- code = dedent("""\
- class C:
- pass
- $foo
- """)
- lst = _get_token_list(code)
- expected = [
- NAME, NAME, OP, NEWLINE, INDENT, NAME, NEWLINE, DEDENT,
- # $foo\n
- ERRORTOKEN, NAME, NEWLINE, ENDMARKER
- ]
- assert [t.type for t in lst] == expected
-
-
-def test_brackets_no_indentation():
- """
- There used to be an issue that the parentheses counting would go below
- zero. This should not happen.
- """
- code = dedent("""\
- }
- {
- }
- """)
- lst = _get_token_list(code)
- assert [t.type for t in lst] == [OP, NEWLINE, OP, OP, NEWLINE, ENDMARKER]
-
-
-def test_form_feed():
- indent, error_token, dedent_, endmarker = _get_token_list(dedent('''\
- \f"""'''))
- assert error_token.prefix == '\f'
- assert error_token.string == '"""'
- assert endmarker.prefix == ''
- assert indent.type == INDENT
- assert dedent_.type == DEDENT
-
-
-def test_carriage_return():
- lst = _get_token_list(' =\\\rclass')
- assert [t.type for t in lst] == [INDENT, OP, NAME, DEDENT, ENDMARKER]
-
-
-def test_backslash():
- code = '\\\n# 1 \n'
- endmarker, = _get_token_list(code)
- assert endmarker.prefix == code
-
-
-@pytest.mark.parametrize(
- ('code', 'types'), [
- # f-strings
- ('f"', [FSTRING_START]),
- ('f""', [FSTRING_START, FSTRING_END]),
- ('f" {}"', [FSTRING_START, FSTRING_STRING, OP, OP, FSTRING_END]),
- ('f" "{}', [FSTRING_START, FSTRING_STRING, FSTRING_END, OP, OP]),
- (r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]),
- (r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]),
-
- # format spec
- (r'f"Some {x:.2f}{y}"', [FSTRING_START, FSTRING_STRING, OP, NAME, OP,
- FSTRING_STRING, OP, OP, NAME, OP, FSTRING_END]),
-
- # multiline f-string
- ('f"""abc\ndef"""', [FSTRING_START, FSTRING_STRING, FSTRING_END]),
- ('f"""abc{\n123}def"""', [
- FSTRING_START, FSTRING_STRING, OP, NUMBER, OP, FSTRING_STRING,
- FSTRING_END
- ]),
-
- # a line continuation inside of an fstring_string
- ('f"abc\\\ndef"', [
- FSTRING_START, FSTRING_STRING, FSTRING_END
- ]),
- ('f"\\\n{123}\\\n"', [
- FSTRING_START, FSTRING_STRING, OP, NUMBER, OP, FSTRING_STRING,
- FSTRING_END
- ]),
-
- # a line continuation inside of an fstring_expr
- ('f"{\\\n123}"', [FSTRING_START, OP, NUMBER, OP, FSTRING_END]),
-
- # a line continuation inside of an format spec
- ('f"{123:.2\\\nf}"', [
- FSTRING_START, OP, NUMBER, OP, FSTRING_STRING, OP, FSTRING_END
- ]),
-
- # a newline without a line continuation inside a single-line string is
- # wrong, and will generate an ERRORTOKEN
- ('f"abc\ndef"', [
- FSTRING_START, FSTRING_STRING, NEWLINE, NAME, ERRORTOKEN
- ]),
-
- # a more complex example
- (r'print(f"Some {x:.2f}a{y}")', [
- NAME, OP, FSTRING_START, FSTRING_STRING, OP, NAME, OP,
- FSTRING_STRING, OP, FSTRING_STRING, OP, NAME, OP, FSTRING_END, OP
- ]),
- # issue #86, a string-like in an f-string expression
- ('f"{ ""}"', [
- FSTRING_START, OP, FSTRING_END, STRING
- ]),
- ('f"{ f""}"', [
- FSTRING_START, OP, NAME, FSTRING_END, STRING
- ]),
- ]
-)
-def test_fstring_token_types(code, types, version_ge_py36):
- actual_types = [t.type for t in _get_token_list(code, version_ge_py36)]
- assert types + [ENDMARKER] == actual_types
-
-
-@pytest.mark.parametrize(
- ('code', 'types'), [
- # issue #87, `:=` in the outest paratheses should be tokenized
- # as a format spec marker and part of the format
- ('f"{x:=10}"', [
- FSTRING_START, OP, NAME, OP, FSTRING_STRING, OP, FSTRING_END
- ]),
- ('f"{(x:=10)}"', [
- FSTRING_START, OP, OP, NAME, OP, NUMBER, OP, OP, FSTRING_END
- ]),
- ]
-)
-def test_fstring_assignment_expression(code, types, version_ge_py38):
- actual_types = [t.type for t in _get_token_list(code, version_ge_py38)]
- assert types + [ENDMARKER] == actual_types
-
-
-def test_fstring_end_error_pos(version_ge_py38):
- f_start, f_string, bracket, f_end, endmarker = \
- _get_token_list('f" { "', version_ge_py38)
- assert f_start.start_pos == (1, 0)
- assert f_string.start_pos == (1, 2)
- assert bracket.start_pos == (1, 3)
- assert f_end.start_pos == (1, 5)
- assert endmarker.start_pos == (1, 6)