diff options
author | AlexSm <alex@ydb.tech> | 2023-12-27 23:31:58 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-12-27 23:31:58 +0100 |
commit | d67bfb4b4b7549081543e87a31bc6cb5c46ac973 (patch) | |
tree | 8674f2f1570877cb653e7ddcff37ba00288de15a /contrib/python/wcwidth/py2/tests/test_core.py | |
parent | 1f6bef05ed441c3aa2d565ac792b26cded704ac7 (diff) | |
download | ydb-d67bfb4b4b7549081543e87a31bc6cb5c46ac973.tar.gz |
Import libs 4 (#758)
Diffstat (limited to 'contrib/python/wcwidth/py2/tests/test_core.py')
-rw-r--r-- | contrib/python/wcwidth/py2/tests/test_core.py | 225 |
1 files changed, 212 insertions, 13 deletions
diff --git a/contrib/python/wcwidth/py2/tests/test_core.py b/contrib/python/wcwidth/py2/tests/test_core.py index c8f791c016..d2776cd992 100644 --- a/contrib/python/wcwidth/py2/tests/test_core.py +++ b/contrib/python/wcwidth/py2/tests/test_core.py @@ -10,6 +10,13 @@ except ImportError: # local import wcwidth +try: + # python 2 + _ = unichr +except NameError: + # python 3 + unichr = chr + def test_package_version(): """wcwidth.__version__ is expected value.""" @@ -23,6 +30,45 @@ def test_package_version(): assert result == expected +def test_empty_string(): + """ + Test empty string is OK. + + https://github.com/jquast/wcwidth/issues/24 + """ + phrase = "" + expect_length_each = 0 + expect_length_phrase = 0 + + # exercise, + length_each = wcwidth.wcwidth(phrase) + length_phrase = wcwidth.wcswidth(phrase) + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + + +def basic_string_type(): + """ + This is a python 2-specific test of the basic "string type" + + Such strings cannot contain anything but ascii in python2. + """ + # given, + phrase = 'hello\x00world' + expect_length_each = (1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1) + expect_length_phrase = sum(expect_length_each) + + # exercise, + length_each = tuple(map(wcwidth.wcwidth, phrase)) + length_phrase = wcwidth.wcswidth(phrase) + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + + def test_hello_jp(): u""" Width of Japanese phrase: コンニチハ, セカイ! @@ -59,9 +105,11 @@ def test_wcswidth_substr(): expect_length_phrase = sum(expect_length_each) # exercise, + length_each = tuple(map(wcwidth.wcwidth, phrase))[:end] length_phrase = wcwidth.wcswidth(phrase, end) # verify. + assert length_each == expect_length_each assert length_phrase == expect_length_phrase @@ -82,7 +130,15 @@ def test_null_width_0(): def test_control_c0_width_negative_1(): - """CSI (Control sequence initiate) reports width -1 for ESC.""" + """How the API reacts to CSI (Control sequence initiate). + + An example of bad fortune, this terminal sequence is a width of 0 + on all terminals, but wcwidth doesn't parse Control-Sequence-Inducer + (CSI) sequences. + + Also the "legacy" posix functions wcwidth and wcswidth return -1 for + any string containing the C1 control character \x1b (ESC). + """ # given, phrase = u'\x1b[0m' expect_length_each = (-1, 1, 1, 1) @@ -90,9 +146,9 @@ def test_control_c0_width_negative_1(): # exercise, length_each = tuple(map(wcwidth.wcwidth, phrase)) - length_phrase = wcwidth.wcswidth(phrase, len(phrase)) + length_phrase = wcwidth.wcswidth(phrase) - # verify. + # verify, though this is actually *0* width for a terminal emulator assert length_each == expect_length_each assert length_phrase == expect_length_phrase @@ -106,7 +162,7 @@ def test_combining_width(): # exercise, length_each = tuple(map(wcwidth.wcwidth, phrase)) - length_phrase = wcwidth.wcswidth(phrase, len(phrase)) + length_phrase = wcwidth.wcswidth(phrase) # verify. assert length_each == expect_length_each @@ -121,7 +177,7 @@ def test_combining_cafe(): # exercise, length_each = tuple(map(wcwidth.wcwidth, phrase)) - length_phrase = wcwidth.wcswidth(phrase, len(phrase)) + length_phrase = wcwidth.wcswidth(phrase) # verify. assert length_each == expect_length_each @@ -129,29 +185,172 @@ def test_combining_cafe(): def test_combining_enclosing(): - u"""CYRILLIC CAPITAL LETTER A + COMBINING CYRILLIC HUNDRED THOUSANDS SIGN is А҈ of length 1.""" + u"""CYRILLIC CAPITAL LETTER A + COMBINING CYRILLIC HUNDRED THOUSANDS SIGN is of length 1.""" phrase = u"\u0410\u0488" expect_length_each = (1, 0) expect_length_phrase = 1 # exercise, length_each = tuple(map(wcwidth.wcwidth, phrase)) - length_phrase = wcwidth.wcswidth(phrase, len(phrase)) + length_phrase = wcwidth.wcswidth(phrase) # verify. assert length_each == expect_length_each assert length_phrase == expect_length_phrase -def test_combining_spacing(): - u"""Balinese kapal (ship) is ᬓᬨᬮ᭄ of length 4.""" - phrase = u"\u1B13\u1B28\u1B2E\u1B44" - expect_length_each = (1, 1, 1, 1) - expect_length_phrase = 4 +def test_balinese_script(): + u""" + Balinese kapal (ship) is length 3. + + This may be an example that is not yet correctly rendered by any terminal so + far, like devanagari. + """ + phrase = (u"\u1B13" # Category 'Lo', EAW 'N' -- BALINESE LETTER KA + u"\u1B28" # Category 'Lo', EAW 'N' -- BALINESE LETTER PA KAPAL + u"\u1B2E" # Category 'Lo', EAW 'N' -- BALINESE LETTER LA + u"\u1B44") # Category 'Mc', EAW 'N' -- BALINESE ADEG ADEG + expect_length_each = (1, 1, 1, 0) + expect_length_phrase = 3 # exercise, length_each = tuple(map(wcwidth.wcwidth, phrase)) - length_phrase = wcwidth.wcswidth(phrase, len(phrase)) + length_phrase = wcwidth.wcswidth(phrase) + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + + +def test_kr_jamo_filler(): + u""" + Jamo filler is 0 width. + + According to https://www.unicode.org/L2/L2006/06310-hangul-decompose9.pdf this character and others + like it, ``\uffa0``, ``\u1160``, ``\u115f``, ``\u1160``, are not commonly viewed with a terminal, + seems it doesn't matter whether it is implemented or not, they are not typically used ! + """ + phrase = u"\u1100\u1160" + expect_length_each = (2, 1) + expect_length_phrase = 3 + + # exercise, + length_each = tuple(map(wcwidth.wcwidth, phrase)) + length_phrase = wcwidth.wcswidth(phrase) + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + + +def test_devanagari_script(): + """ + Attempt to test the measurement width of Devanagari script. + + I believe this 'phrase' should be length 3. + + This is a difficult problem, and this library does not yet get it right, + because we interpret the unicode data files programmatically, but they do + not correctly describe how their terminal width is measured. + + There are very few Terminals that do! + + As of 2023, + + - iTerm2: correct length but individual characters are out of order and + horizaontally misplaced as to be unreadable in its language when + using 'Noto Sans' font. + - mlterm: mixed results, it offers several options in the configuration + dialog, "Xft", "Cario", and "Variable Column Width" have some + effect, but with neither 'Noto Sans' or 'unifont', it is not + recognizable as the Devanagari script it is meant to display. + + Previous testing with Devanagari documented at address https://benizi.com/vim/devanagari/ + + See also, https://askubuntu.com/questions/8437/is-there-a-good-mono-spaced-font-for-devanagari-script-in-the-terminal + """ + # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf + # please note that document correctly points out that the final width cannot be determined + # as a sum of each individual width, as this library currently performs with exception of + # ZWJ, but I think it incorrectly gestures what a stateless call to wcwidth.wcwidth of + # each codepoint *should* return. + phrase = (u"\u0915" # Akhand, Category 'Lo', East Asian Width property 'N' -- DEVANAGARI LETTER KA + u"\u094D" # Joiner, Category 'Mn', East Asian Width property 'N' -- DEVANAGARI SIGN VIRAMA + u"\u0937" # Fused, Category 'Lo', East Asian Width property 'N' -- DEVANAGARI LETTER SSA + u"\u093F") # MatraL, Category 'Mc', East Asian Width property 'N' -- DEVANAGARI VOWEL SIGN I + # 23107-terminal-suppt.pdf suggests wcwidth.wcwidth should return (2, 0, 0, 1) + expect_length_each = (1, 0, 1, 0) + # I believe the final width *should* be 3. + expect_length_phrase = 2 + + # exercise, + length_each = tuple(map(wcwidth.wcwidth, phrase)) + length_phrase = wcwidth.wcswidth(phrase) + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + + +def test_tamil_script(): + # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf + phrase = (u"\u0b95" # Akhand, Category 'Lo', East Asian Width property 'N' -- TAMIL LETTER KA + u"\u0bcd" # Joiner, Category 'Mn', East Asian Width property 'N' -- TAMIL SIGN VIRAMA + u"\u0bb7" # Fused, Category 'Lo', East Asian Width property 'N' -- TAMIL LETTER SSA + u"\u0bcc") # MatraLR, Category 'Mc', East Asian Width property 'N' -- TAMIL VOWEL SIGN AU + # 23107-terminal-suppt.pdf suggests wcwidth.wcwidth should return (3, 0, 0, 4) + expect_length_each = (1, 0, 1, 0) + + # I believe the final width should be about 5 or 6. + expect_length_phrase = 2 + + # exercise, + length_each = tuple(map(wcwidth.wcwidth, phrase)) + length_phrase = wcwidth.wcswidth(phrase) + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + + +def test_kannada_script(): + # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf + # |ರ್ಝೈ| + # |123| + phrase = (u"\u0cb0" # Repha, Category 'Lo', East Asian Width property 'N' -- KANNADA LETTER RA + u"\u0ccd" # Joiner, Category 'Mn', East Asian Width property 'N' -- KANNADA SIGN VIRAMA + u"\u0c9d" # Base, Category 'Lo', East Asian Width property 'N' -- KANNADA LETTER JHA + u"\u0cc8") # MatraUR, Category 'Mc', East Asian Width property 'N' -- KANNADA VOWEL SIGN AI + # 23107-terminal-suppt.pdf suggests should be (2, 0, 3, 1) + expect_length_each = (1, 0, 1, 0) + # I believe the correct final width *should* be 3 or 4. + expect_length_phrase = 2 + + # exercise, + length_each = tuple(map(wcwidth.wcwidth, phrase)) + length_phrase = wcwidth.wcswidth(phrase) + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + + +def test_kannada_script_2(): + # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf + # |ರ಼್ಚ| + # |12| + phrase = (u"\u0cb0" # Base, Category 'Lo', East Asian Width property 'N' -- KANNADA LETTER RA + u"\u0cbc" # Nukta, Category 'Mn', East Asian Width property 'N' -- KANNADA SIGN NUKTA + u"\u0ccd" # Joiner, Category 'Lo', East Asian Width property 'N' -- KANNADA SIGN VIRAMA + u"\u0c9a") # Subjoin, Category 'Mc', East Asian Width property 'N' -- KANNADA LETTER CA + # 23107-terminal-suppt.pdf suggests wcwidth.wcwidth should return (2, 0, 0, 1) + expect_length_each = (1, 0, 0, 1) + # I believe the final width is correct, but maybe for the wrong reasons! + expect_length_phrase = 2 + + # exercise, + length_each = tuple(map(wcwidth.wcwidth, phrase)) + length_phrase = wcwidth.wcswidth(phrase) # verify. assert length_each == expect_length_each |