contrib/python/pyre2/py3/tests/test_namedgroups.txt


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56

Testing some aspects of named groups
=================================================

    >>> import re2
    >>> re2.set_fallback_notification(re2.FALLBACK_EXCEPTION)

    >>> m = re2.match(r"(?P<first_name>\w+) (?P<last_name>\w+)", "Malcolm Reynolds")
    >>> m.start("first_name")
    0
    >>> m.start("last_name")
    8

    >>> m.span("last_name")
    (8, 16)
    >>> m.regs
    ((0, 16), (0, 7), (8, 16))

    >>> m = re2.match(u"(?P<first_name>\\w+) (?P<last_name>\\w+)", u"Malcolm Reynolds")
    >>> m.start(u"first_name")
    0
    >>> m.start(u"last_name")
    8

    >>> m.span(u"last_name")
    (8, 16)
    >>> m.regs
    ((0, 16), (0, 7), (8, 16))

Compare patterns with and without unicode

    >>> pattern = re2.compile(br"(?P<first_name>\w+) (?P<last_name>\w+)")
    >>> print(pattern._dump_pattern().decode('utf8'))
    (?P<first_name>\w+) (?P<last_name>\w+)
    >>> pattern = re2.compile(u"(?P<first_name>\\w+) (?P<last_name>\\w+)",
    ... re2.UNICODE)
    >>> print(pattern._dump_pattern())
    (?P<first_name>[_\p{L}\p{Nd}]+) (?P<last_name>[_\p{L}\p{Nd}]+)

Make sure positions are converted properly for unicode

    >>> m = pattern.match(
    ... u'\u05d9\u05e9\u05e8\u05d0\u05dc \u05e6\u05d3\u05d5\u05e7')
    >>> m.start(u"first_name")
    0
    >>> m.start(u"last_name")
    6
    >>> m.end(u"last_name")
    10
    >>> m.regs
    ((0, 10), (0, 5), (6, 10))
    >>> m.span(2)
    (6, 10)
    >>> m.span(u"last_name")
    (6, 10)

    >>> re2.set_fallback_notification(re2.FALLBACK_QUIETLY)