aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/python/pyre2/py2/tests/test_unicode.txt
blob: 71d497b80dbfee42375695a0dc1f6f107aad89d3 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
Here are some tests to make sure that utf-8 works
=================================================

    >>> import sys
    >>> import re2 as re
    >>> re.set_fallback_notification(re.FALLBACK_EXCEPTION)
    >>> a = u'\u6211\u5f88\u597d'
    >>> c = re.compile(a[0])
    >>> c.search(a).group() == u'\u6211'
    True

Test unicode stickyness

    >>> re.sub(u'x', u'y', u'x') == u'y'
    True
    >>> re.sub(r'x', 'y', 'x') == 'y'
    True
    >>> re.findall('.', 'x') == ['x']
    True
    >>> re.findall(u'.', u'x') == [u'x']
    True
    >>> re.split(',', '1,2,3') == ['1', '2', '3']
    True
    >>> re.split(u',', u'1,2,3') == [u'1', u'2', u'3']
    True
    >>> re.search('(\\d)', '1').group(1) == '1'
    True
    >>> re.search(u'(\\d)', u'1').group(1) == u'1'
    True

Test unicode character groups

    >>> re.search(u'\\d', u'\u0661', re.UNICODE).group(0) == u'\u0661'
    True
    >>> int(re.search(u'\\d', u'\u0661', re.UNICODE).group(0)) == 1
    True
    >>> (re.search(u'\\w', u'\u0401') is None) == (sys.version_info[0] == 2)
    True
    >>> re.search(u'\\w', u'\u0401', re.UNICODE).group(0) == u'\u0401'
    True
    >>> re.search(u'\\s', u'\u1680', re.UNICODE).group(0) == u'\u1680'
    True
    >>> re.findall(r'[\s\d\w]', 'hey 123', re.UNICODE) == ['h', 'e', 'y', ' ', '1', '2', '3']
    True
    >>> re.search(u'\\D', u'\u0661x', re.UNICODE).group(0) == u'x'
    True
    >>> re.search(u'\\W', u'\u0401!', re.UNICODE).group(0) == u'!'
    True
    >>> re.search(u'\\S', u'\u1680x', re.UNICODE).group(0) == u'x'
    True
    >>> re.set_fallback_notification(re.FALLBACK_QUIETLY)
    >>> re.search(u'[\\W]', u'\u0401!', re.UNICODE).group(0) == u'!'
    True
    >>> re.search(u'[\\S]', u'\u1680x', re.UNICODE).group(0) == u'x'
    True
    >>> re.set_fallback_notification(re.FALLBACK_EXCEPTION)


Positions are translated transparently between unicode and UTF-8

    >>> re.search(u' (.)', u'\U0001d200xxx\u1234 x').span(1)
    (6, 7)
    >>> re.search(b' (.)', u'\U0001d200xxx\u1234 x'.encode('utf-8')).span(1)
    (11, 12)
    >>> re.compile(u'x').findall(u'\u1234x', 1, 2) == [u'x']
    True
    >>> data = u'\U0001d200xxx\u1234 x'
    >>> re.search(u' (.)', data).string == data
    True

    >>> re.set_fallback_notification(re.FALLBACK_QUIETLY)