summaryrefslogtreecommitdiffstats
path: root/contrib/python/jmespath/py3/tests/test_lexer.py
blob: 1649283ba4b271d4a645ef5b40ef3397037d54de (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
from . import unittest

from jmespath import lexer
from jmespath.exceptions import LexerError, EmptyExpressionError


class TestRegexLexer(unittest.TestCase):

    def setUp(self):
        self.lexer = lexer.Lexer()

    def assert_tokens(self, actual, expected):
        # The expected tokens only need to specify the
        # type and value.  The line/column numbers are not
        # checked, and we use assertEqual for the tests
        # that check those line numbers.
        stripped = []
        for item in actual:
            stripped.append({'type': item['type'], 'value': item['value']})
        # Every tokenization should end in eof, so we automatically
        # check that value, strip it off the end, and then
        # verify the remaining tokens against the expected.
        # That way the tests don't need to add eof to every
        # assert_tokens call.
        self.assertEqual(stripped[-1]['type'], 'eof')
        stripped.pop()
        self.assertEqual(stripped, expected)

    def test_empty_string(self):
        with self.assertRaises(EmptyExpressionError):
            list(self.lexer.tokenize(''))

    def test_field(self):
        tokens = list(self.lexer.tokenize('foo'))
        self.assert_tokens(tokens, [{'type': 'unquoted_identifier',
                                     'value': 'foo'}])

    def test_number(self):
        tokens = list(self.lexer.tokenize('24'))
        self.assert_tokens(tokens, [{'type': 'number',
                                     'value': 24}])

    def test_negative_number(self):
        tokens = list(self.lexer.tokenize('-24'))
        self.assert_tokens(tokens, [{'type': 'number',
                                     'value': -24}])

    def test_quoted_identifier(self):
        tokens = list(self.lexer.tokenize('"foobar"'))
        self.assert_tokens(tokens, [{'type': 'quoted_identifier',
                                     'value': "foobar"}])

    def test_json_escaped_value(self):
        tokens = list(self.lexer.tokenize('"\u2713"'))
        self.assert_tokens(tokens, [{'type': 'quoted_identifier',
                                     'value': u"\u2713"}])

    def test_number_expressions(self):
        tokens = list(self.lexer.tokenize('foo.bar.baz'))
        self.assert_tokens(tokens, [
            {'type': 'unquoted_identifier', 'value': 'foo'},
            {'type': 'dot', 'value': '.'},
            {'type': 'unquoted_identifier', 'value': 'bar'},
            {'type': 'dot', 'value': '.'},
            {'type': 'unquoted_identifier', 'value': 'baz'},
        ])

    def test_space_separated(self):
        tokens = list(self.lexer.tokenize('foo.bar[*].baz | a || b'))
        self.assert_tokens(tokens, [
            {'type': 'unquoted_identifier', 'value': 'foo'},
            {'type': 'dot', 'value': '.'},
            {'type': 'unquoted_identifier', 'value': 'bar'},
            {'type': 'lbracket', 'value': '['},
            {'type': 'star', 'value': '*'},
            {'type': 'rbracket', 'value': ']'},
            {'type': 'dot', 'value': '.'},
            {'type': 'unquoted_identifier', 'value': 'baz'},
            {'type': 'pipe', 'value': '|'},
            {'type': 'unquoted_identifier', 'value': 'a'},
            {'type': 'or', 'value': '||'},
            {'type': 'unquoted_identifier', 'value': 'b'},
        ])

    def test_literal(self):
        tokens = list(self.lexer.tokenize('`[0, 1]`'))
        self.assert_tokens(tokens, [
            {'type': 'literal', 'value': [0, 1]},
        ])

    def test_literal_string(self):
        tokens = list(self.lexer.tokenize('`foobar`'))
        self.assert_tokens(tokens, [
            {'type': 'literal', 'value': "foobar"},
        ])

    def test_literal_number(self):
        tokens = list(self.lexer.tokenize('`2`'))
        self.assert_tokens(tokens, [
            {'type': 'literal', 'value': 2},
        ])

    def test_literal_with_invalid_json(self):
        with self.assertRaises(LexerError):
            list(self.lexer.tokenize('`foo"bar`'))

    def test_literal_with_empty_string(self):
        tokens = list(self.lexer.tokenize('``'))
        self.assert_tokens(tokens, [{'type': 'literal', 'value': ''}])

    def test_position_information(self):
        tokens = list(self.lexer.tokenize('foo'))
        self.assertEqual(
            tokens,
            [{'type': 'unquoted_identifier', 'value': 'foo',
              'start': 0, 'end': 3},
              {'type': 'eof', 'value': '', 'start': 3, 'end': 3}]
        )

    def test_position_multiple_tokens(self):
        tokens = list(self.lexer.tokenize('foo.bar'))
        self.assertEqual(
            tokens,
            [{'type': 'unquoted_identifier', 'value': 'foo',
              'start': 0, 'end': 3},
             {'type': 'dot', 'value': '.',
              'start': 3, 'end': 4},
             {'type': 'unquoted_identifier', 'value': 'bar',
              'start': 4, 'end': 7},
             {'type': 'eof', 'value': '',
              'start': 7, 'end': 7},
             ]
        )

    def test_adds_quotes_when_invalid_json(self):
        tokens = list(self.lexer.tokenize('`{{}`'))
        self.assertEqual(
            tokens,
            [{'type': 'literal', 'value': '{{}',
              'start': 0, 'end': 4},
             {'type': 'eof', 'value': '',
              'start': 5, 'end': 5}
             ]
        )

    def test_unknown_character(self):
        with self.assertRaises(LexerError) as e:
            tokens = list(self.lexer.tokenize('foo[0^]'))

    def test_bad_first_character(self):
        with self.assertRaises(LexerError):
            tokens = list(self.lexer.tokenize('^foo[0]'))

    def test_unknown_character_with_identifier(self):
        with self.assertRaisesRegex(LexerError, "Unknown token"):
            list(self.lexer.tokenize('foo-bar'))


if __name__ == '__main__':
    unittest.main()