Import libs 4 (#758)

author: AlexSm <alex@ydb.tech> 2023-12-27 23:31:58 +0100
committer: GitHub <noreply@github.com> 2023-12-27 23:31:58 +0100
commit: d67bfb4b4b7549081543e87a31bc6cb5c46ac973 (patch)
tree: 8674f2f1570877cb653e7ddcff37ba00288de15a /contrib/python/wcwidth/py2/tests/test_core.py
parent: 1f6bef05ed441c3aa2d565ac792b26cded704ac7 (diff)
download: ydb-d67bfb4b4b7549081543e87a31bc6cb5c46ac973.tar.gz
1 files changed, 212 insertions, 13 deletions
diff --git a/contrib/python/wcwidth/py2/tests/test_core.py b/contrib/python/wcwidth/py2/tests/test_core.py
index c8f791c016..d2776cd992 100644
--- a/contrib/python/wcwidth/py2/tests/test_core.py
+++ b/contrib/python/wcwidth/py2/tests/test_core.py
@@ -10,6 +10,13 @@ except ImportError:
 # local
 import wcwidth
 
+try:
+    # python 2
+    _ = unichr
+except NameError:
+    # python 3
+    unichr = chr
+
 
 def test_package_version():
     """wcwidth.__version__ is expected value."""
@@ -23,6 +30,45 @@ def test_package_version():
     assert result == expected
 
 
+def test_empty_string():
+    """
+    Test empty string is OK.
+
+    https://github.com/jquast/wcwidth/issues/24
+    """
+    phrase = ""
+    expect_length_each = 0
+    expect_length_phrase = 0
+
+    # exercise,
+    length_each = wcwidth.wcwidth(phrase)
+    length_phrase = wcwidth.wcswidth(phrase)
+
+    # verify.
+    assert length_each == expect_length_each
+    assert length_phrase == expect_length_phrase
+
+
+def basic_string_type():
+    """
+    This is a python 2-specific test of the basic "string type"
+
+    Such strings cannot contain anything but ascii in python2.
+    """
+    # given,
+    phrase = 'hello\x00world'
+    expect_length_each = (1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1)
+    expect_length_phrase = sum(expect_length_each)
+
+    # exercise,
+    length_each = tuple(map(wcwidth.wcwidth, phrase))
+    length_phrase = wcwidth.wcswidth(phrase)
+
+    # verify.
+    assert length_each == expect_length_each
+    assert length_phrase == expect_length_phrase
+
+
 def test_hello_jp():
     u"""
     Width of Japanese phrase: コンニチハ, セカイ!
@@ -59,9 +105,11 @@ def test_wcswidth_substr():
     expect_length_phrase = sum(expect_length_each)
 
     # exercise,
+    length_each = tuple(map(wcwidth.wcwidth, phrase))[:end]
     length_phrase = wcwidth.wcswidth(phrase, end)
 
     # verify.
+    assert length_each == expect_length_each
     assert length_phrase == expect_length_phrase
 
 
@@ -82,7 +130,15 @@ def test_null_width_0():
 
 
 def test_control_c0_width_negative_1():
-    """CSI (Control sequence initiate) reports width -1 for ESC."""
+    """How the API reacts to CSI (Control sequence initiate).
+
+    An example of bad fortune, this terminal sequence is a width of 0
+    on all terminals, but wcwidth doesn't parse Control-Sequence-Inducer
+    (CSI) sequences.
+
+    Also the "legacy" posix functions wcwidth and wcswidth return -1 for
+    any string containing the C1 control character \x1b (ESC).
+    """
     # given,
     phrase = u'\x1b[0m'
     expect_length_each = (-1, 1, 1, 1)
@@ -90,9 +146,9 @@ def test_control_c0_width_negative_1():
 
     # exercise,
     length_each = tuple(map(wcwidth.wcwidth, phrase))
-    length_phrase = wcwidth.wcswidth(phrase, len(phrase))
+    length_phrase = wcwidth.wcswidth(phrase)
 
-    # verify.
+    # verify, though this is actually *0* width for a terminal emulator
     assert length_each == expect_length_each
     assert length_phrase == expect_length_phrase
 
@@ -106,7 +162,7 @@ def test_combining_width():
 
     # exercise,
     length_each = tuple(map(wcwidth.wcwidth, phrase))
-    length_phrase = wcwidth.wcswidth(phrase, len(phrase))
+    length_phrase = wcwidth.wcswidth(phrase)
 
     # verify.
     assert length_each == expect_length_each
@@ -121,7 +177,7 @@ def test_combining_cafe():
 
     # exercise,
     length_each = tuple(map(wcwidth.wcwidth, phrase))
-    length_phrase = wcwidth.wcswidth(phrase, len(phrase))
+    length_phrase = wcwidth.wcswidth(phrase)
 
     # verify.
     assert length_each == expect_length_each
@@ -129,29 +185,172 @@ def test_combining_cafe():
 
 
 def test_combining_enclosing():
-    u"""CYRILLIC CAPITAL LETTER A + COMBINING CYRILLIC HUNDRED THOUSANDS SIGN is А҈ of length 1."""
+    u"""CYRILLIC CAPITAL LETTER A + COMBINING CYRILLIC HUNDRED THOUSANDS SIGN is of length 1."""
     phrase = u"\u0410\u0488"
     expect_length_each = (1, 0)
     expect_length_phrase = 1
 
     # exercise,
     length_each = tuple(map(wcwidth.wcwidth, phrase))
-    length_phrase = wcwidth.wcswidth(phrase, len(phrase))
+    length_phrase = wcwidth.wcswidth(phrase)
 
     # verify.
     assert length_each == expect_length_each
     assert length_phrase == expect_length_phrase
 
 
-def test_combining_spacing():
-    u"""Balinese kapal (ship) is ᬓᬨᬮ᭄ of length 4."""
-    phrase = u"\u1B13\u1B28\u1B2E\u1B44"
-    expect_length_each = (1, 1, 1, 1)
-    expect_length_phrase = 4
+def test_balinese_script():
+    u"""
+    Balinese kapal (ship) is length 3.
+
+    This may be an example that is not yet correctly rendered by any terminal so
+    far, like devanagari.
+    """
+    phrase = (u"\u1B13"    # Category 'Lo', EAW 'N' -- BALINESE LETTER KA
+              u"\u1B28"    # Category 'Lo', EAW 'N' -- BALINESE LETTER PA KAPAL
+              u"\u1B2E"    # Category 'Lo', EAW 'N' -- BALINESE LETTER LA
+              u"\u1B44")   # Category 'Mc', EAW 'N' -- BALINESE ADEG ADEG
+    expect_length_each = (1, 1, 1, 0)
+    expect_length_phrase = 3
 
     # exercise,
     length_each = tuple(map(wcwidth.wcwidth, phrase))
-    length_phrase = wcwidth.wcswidth(phrase, len(phrase))
+    length_phrase = wcwidth.wcswidth(phrase)
+
+    # verify.
+    assert length_each == expect_length_each
+    assert length_phrase == expect_length_phrase
+
+
+def test_kr_jamo_filler():
+    u"""
+    Jamo filler is 0 width.
+
+    According to https://www.unicode.org/L2/L2006/06310-hangul-decompose9.pdf this character and others
+    like it, ``\uffa0``, ``\u1160``, ``\u115f``, ``\u1160``, are not commonly viewed with a terminal,
+    seems it doesn't matter whether it is implemented or not, they are not typically used !
+    """
+    phrase = u"\u1100\u1160"
+    expect_length_each = (2, 1)
+    expect_length_phrase = 3
+
+    # exercise,
+    length_each = tuple(map(wcwidth.wcwidth, phrase))
+    length_phrase = wcwidth.wcswidth(phrase)
+
+    # verify.
+    assert length_each == expect_length_each
+    assert length_phrase == expect_length_phrase
+
+
+def test_devanagari_script():
+    """
+    Attempt to test the measurement width of Devanagari script.
+
+    I believe this 'phrase' should be length 3.
+
+    This is a difficult problem, and this library does not yet get it right,
+    because we interpret the unicode data files programmatically, but they do
+    not correctly describe how their terminal width is measured.
+
+    There are very few Terminals that do!
+
+    As of 2023,
+
+    - iTerm2: correct length but individual characters are out of order and
+              horizaontally misplaced as to be unreadable in its language when
+              using 'Noto Sans' font.
+    - mlterm: mixed results, it offers several options in the configuration
+              dialog, "Xft", "Cario", and "Variable Column Width" have some
+              effect, but with neither 'Noto Sans' or 'unifont', it is not
+              recognizable as the Devanagari script it is meant to display.
+
+    Previous testing with Devanagari documented at address https://benizi.com/vim/devanagari/
+
+    See also, https://askubuntu.com/questions/8437/is-there-a-good-mono-spaced-font-for-devanagari-script-in-the-terminal
+    """
+    # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
+    # please note that document correctly points out that the final width cannot be determined
+    # as a sum of each individual width, as this library currently performs with exception of
+    # ZWJ, but I think it incorrectly gestures what a stateless call to wcwidth.wcwidth of
+    # each codepoint *should* return.
+    phrase = (u"\u0915"    # Akhand, Category 'Lo', East Asian Width property 'N' -- DEVANAGARI LETTER KA
+              u"\u094D"    # Joiner, Category 'Mn', East Asian Width property 'N' -- DEVANAGARI SIGN VIRAMA
+              u"\u0937"    # Fused, Category 'Lo', East Asian Width property 'N' -- DEVANAGARI LETTER SSA
+              u"\u093F")   # MatraL, Category 'Mc', East Asian Width property 'N' -- DEVANAGARI VOWEL SIGN I
+    # 23107-terminal-suppt.pdf suggests wcwidth.wcwidth should return (2, 0, 0, 1)
+    expect_length_each = (1, 0, 1, 0)
+    # I believe the final width *should* be 3.
+    expect_length_phrase = 2
+
+    # exercise,
+    length_each = tuple(map(wcwidth.wcwidth, phrase))
+    length_phrase = wcwidth.wcswidth(phrase)
+
+    # verify.
+    assert length_each == expect_length_each
+    assert length_phrase == expect_length_phrase
+
+
+def test_tamil_script():
+    # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
+    phrase = (u"\u0b95"    # Akhand, Category 'Lo', East Asian Width property 'N' -- TAMIL LETTER KA
+              u"\u0bcd"    # Joiner, Category 'Mn', East Asian Width property 'N' -- TAMIL SIGN VIRAMA
+              u"\u0bb7"    # Fused, Category 'Lo', East Asian Width property 'N' -- TAMIL LETTER SSA
+              u"\u0bcc")   # MatraLR, Category 'Mc', East Asian Width property 'N' -- TAMIL VOWEL SIGN AU
+    # 23107-terminal-suppt.pdf suggests wcwidth.wcwidth should return (3, 0, 0, 4)
+    expect_length_each = (1, 0, 1, 0)
+
+    # I believe the final width should be about 5 or 6.
+    expect_length_phrase = 2
+
+    # exercise,
+    length_each = tuple(map(wcwidth.wcwidth, phrase))
+    length_phrase = wcwidth.wcswidth(phrase)
+
+    # verify.
+    assert length_each == expect_length_each
+    assert length_phrase == expect_length_phrase
+
+
+def test_kannada_script():
+    # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
+    # |ರ್ಝೈ|
+    # |123|
+    phrase = (u"\u0cb0"    # Repha, Category 'Lo', East Asian Width property 'N' -- KANNADA LETTER RA
+              u"\u0ccd"    # Joiner, Category 'Mn', East Asian Width property 'N' -- KANNADA SIGN VIRAMA
+              u"\u0c9d"    # Base, Category 'Lo', East Asian Width property 'N' -- KANNADA LETTER JHA
+              u"\u0cc8")   # MatraUR, Category 'Mc', East Asian Width property 'N' -- KANNADA VOWEL SIGN AI
+    # 23107-terminal-suppt.pdf suggests should be (2, 0, 3, 1)
+    expect_length_each = (1, 0, 1, 0)
+    # I believe the correct final width *should* be 3 or 4.
+    expect_length_phrase = 2
+
+    # exercise,
+    length_each = tuple(map(wcwidth.wcwidth, phrase))
+    length_phrase = wcwidth.wcswidth(phrase)
+
+    # verify.
+    assert length_each == expect_length_each
+    assert length_phrase == expect_length_phrase
+
+
+def test_kannada_script_2():
+    # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
+    # |ರ಼್ಚ|
+    # |12|
+    phrase = (u"\u0cb0"    # Base, Category 'Lo', East Asian Width property 'N' -- KANNADA LETTER RA
+              u"\u0cbc"    # Nukta, Category 'Mn', East Asian Width property 'N' -- KANNADA SIGN NUKTA
+              u"\u0ccd"    # Joiner, Category 'Lo', East Asian Width property 'N' -- KANNADA SIGN VIRAMA
+              u"\u0c9a")   # Subjoin, Category 'Mc', East Asian Width property 'N' -- KANNADA LETTER CA
+    # 23107-terminal-suppt.pdf suggests wcwidth.wcwidth should return (2, 0, 0, 1)
+    expect_length_each = (1, 0, 0, 1)
+    # I believe the final width is correct, but maybe for the wrong reasons!
+    expect_length_phrase = 2
+
+    # exercise,
+    length_each = tuple(map(wcwidth.wcwidth, phrase))
+    length_phrase = wcwidth.wcswidth(phrase)
 
     # verify.
     assert length_each == expect_length_each
author	AlexSm <alex@ydb.tech>	2023-12-27 23:31:58 +0100
committer	GitHub <noreply@github.com>	2023-12-27 23:31:58 +0100
commit	d67bfb4b4b7549081543e87a31bc6cb5c46ac973 (patch)
tree	8674f2f1570877cb653e7ddcff37ba00288de15a /contrib/python/wcwidth/py2/tests/test_core.py
parent	1f6bef05ed441c3aa2d565ac792b26cded704ac7 (diff)
download	ydb-d67bfb4b4b7549081543e87a31bc6cb5c46ac973.tar.gz