1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
|
from itertools import zip_longest
from codecs import BOM_UTF8
import pytest
import parso
unicode_bom = BOM_UTF8.decode('utf-8')
@pytest.mark.parametrize(('string', 'tokens'), [
('', ['']),
('#', ['#', '']),
(' # ', ['# ', '']),
(' # \n', ['# ', '\n', '']),
(' # \f\n', ['# ', '\f', '\n', '']),
(' \n', ['\n', '']),
(' \n ', ['\n', ' ']),
(' \f ', ['\f', ' ']),
(' \f ', ['\f', ' ']),
(' \r\n', ['\r\n', '']),
(' \r', ['\r', '']),
('\\\n', ['\\\n', '']),
('\\\r\n', ['\\\r\n', '']),
('\t\t\n\t', ['\n', '\t']),
])
def test_simple_prefix_splitting(string, tokens):
tree = parso.parse(string)
leaf = tree.children[0]
assert leaf.type == 'endmarker'
parsed_tokens = list(leaf._split_prefix())
start_pos = (1, 0)
for pt, expected in zip_longest(parsed_tokens, tokens):
assert pt.value == expected
# Calculate the estimated end_pos
if expected.endswith('\n') or expected.endswith('\r'):
end_pos = start_pos[0] + 1, 0
else:
end_pos = start_pos[0], start_pos[1] + len(expected) + len(pt.spacing)
# assert start_pos == pt.start_pos
assert end_pos == pt.end_pos
start_pos = end_pos
@pytest.mark.parametrize(('string', 'types'), [
('# ', ['comment', 'spacing']),
('\r\n', ['newline', 'spacing']),
('\f', ['formfeed', 'spacing']),
('\\\n', ['backslash', 'spacing']),
(' \t', ['spacing']),
(' \t ', ['spacing']),
(unicode_bom + ' # ', ['bom', 'comment', 'spacing']),
])
def test_prefix_splitting_types(string, types):
tree = parso.parse(string)
leaf = tree.children[0]
assert leaf.type == 'endmarker'
parsed_tokens = list(leaf._split_prefix())
assert [t.type for t in parsed_tokens] == types
def test_utf8_bom():
tree = parso.parse(unicode_bom + 'a = 1')
expr_stmt = tree.children[0]
assert expr_stmt.start_pos == (1, 0)
tree = parso.parse(unicode_bom + '\n')
endmarker = tree.children[0]
parts = list(endmarker._split_prefix())
assert [p.type for p in parts] == ['bom', 'newline', 'spacing']
assert [p.start_pos for p in parts] == [(1, 0), (1, 0), (2, 0)]
assert [p.end_pos for p in parts] == [(1, 0), (2, 0), (2, 0)]
|