diff options
author | vitalyisaev <vitalyisaev@ydb.tech> | 2023-11-30 13:26:22 +0300 |
---|---|---|
committer | vitalyisaev <vitalyisaev@ydb.tech> | 2023-11-30 15:44:45 +0300 |
commit | 0a98fece5a9b54f16afeb3a94b3eb3105e9c3962 (patch) | |
tree | 291d72dbd7e9865399f668c84d11ed86fb190bbf /contrib/python/pyre2/py3/tests/test_unicode.txt | |
parent | cb2c8d75065e5b3c47094067cb4aa407d4813298 (diff) | |
download | ydb-0a98fece5a9b54f16afeb3a94b3eb3105e9c3962.tar.gz |
YQ Connector:Use docker-compose in integrational tests
Diffstat (limited to 'contrib/python/pyre2/py3/tests/test_unicode.txt')
-rw-r--r-- | contrib/python/pyre2/py3/tests/test_unicode.txt | 71 |
1 files changed, 71 insertions, 0 deletions
diff --git a/contrib/python/pyre2/py3/tests/test_unicode.txt b/contrib/python/pyre2/py3/tests/test_unicode.txt new file mode 100644 index 0000000000..71d497b80d --- /dev/null +++ b/contrib/python/pyre2/py3/tests/test_unicode.txt @@ -0,0 +1,71 @@ +Here are some tests to make sure that utf-8 works +================================================= + + >>> import sys + >>> import re2 as re + >>> re.set_fallback_notification(re.FALLBACK_EXCEPTION) + >>> a = u'\u6211\u5f88\u597d' + >>> c = re.compile(a[0]) + >>> c.search(a).group() == u'\u6211' + True + +Test unicode stickyness + + >>> re.sub(u'x', u'y', u'x') == u'y' + True + >>> re.sub(r'x', 'y', 'x') == 'y' + True + >>> re.findall('.', 'x') == ['x'] + True + >>> re.findall(u'.', u'x') == [u'x'] + True + >>> re.split(',', '1,2,3') == ['1', '2', '3'] + True + >>> re.split(u',', u'1,2,3') == [u'1', u'2', u'3'] + True + >>> re.search('(\\d)', '1').group(1) == '1' + True + >>> re.search(u'(\\d)', u'1').group(1) == u'1' + True + +Test unicode character groups + + >>> re.search(u'\\d', u'\u0661', re.UNICODE).group(0) == u'\u0661' + True + >>> int(re.search(u'\\d', u'\u0661', re.UNICODE).group(0)) == 1 + True + >>> (re.search(u'\\w', u'\u0401') is None) == (sys.version_info[0] == 2) + True + >>> re.search(u'\\w', u'\u0401', re.UNICODE).group(0) == u'\u0401' + True + >>> re.search(u'\\s', u'\u1680', re.UNICODE).group(0) == u'\u1680' + True + >>> re.findall(r'[\s\d\w]', 'hey 123', re.UNICODE) == ['h', 'e', 'y', ' ', '1', '2', '3'] + True + >>> re.search(u'\\D', u'\u0661x', re.UNICODE).group(0) == u'x' + True + >>> re.search(u'\\W', u'\u0401!', re.UNICODE).group(0) == u'!' + True + >>> re.search(u'\\S', u'\u1680x', re.UNICODE).group(0) == u'x' + True + >>> re.set_fallback_notification(re.FALLBACK_QUIETLY) + >>> re.search(u'[\\W]', u'\u0401!', re.UNICODE).group(0) == u'!' + True + >>> re.search(u'[\\S]', u'\u1680x', re.UNICODE).group(0) == u'x' + True + >>> re.set_fallback_notification(re.FALLBACK_EXCEPTION) + + +Positions are translated transparently between unicode and UTF-8 + + >>> re.search(u' (.)', u'\U0001d200xxx\u1234 x').span(1) + (6, 7) + >>> re.search(b' (.)', u'\U0001d200xxx\u1234 x'.encode('utf-8')).span(1) + (11, 12) + >>> re.compile(u'x').findall(u'\u1234x', 1, 2) == [u'x'] + True + >>> data = u'\U0001d200xxx\u1234 x' + >>> re.search(u' (.)', data).string == data + True + + >>> re.set_fallback_notification(re.FALLBACK_QUIETLY) |