Intermediate changes

author: robot-piglet <robot-piglet@yandex-team.com> 2024-02-04 02:13:03 +0300
committer: Alexander Smirnov <alex@ydb.tech> 2024-02-09 19:17:36 +0300
commit: 055fb4247521918239057d343e52f835d818e3e1 (patch)
tree: a97c8832f272c15125ea3a27446fe74d720e450a /contrib/python/wcwidth/py2/tests/test_core.py
parent: 0b8ccf7ff449ecfad252a58d14cd20c832deecaa (diff)
download: ydb-055fb4247521918239057d343e52f835d818e3e1.tar.gz
1 files changed, 51 insertions, 6 deletions
diff --git a/contrib/python/wcwidth/py2/tests/test_core.py b/contrib/python/wcwidth/py2/tests/test_core.py
index d2776cd992..60ed6b1cde 100644
--- a/contrib/python/wcwidth/py2/tests/test_core.py
+++ b/contrib/python/wcwidth/py2/tests/test_core.py
@@ -222,17 +222,48 @@ def test_balinese_script():
     assert length_phrase == expect_length_phrase
 
 
+def test_kr_jamo():
+    """
+    Test basic combining of HANGUL CHOSEONG and JUNGSEONG
+
+    Example and from Raymond Chen's blog post,
+    https://devblogs.microsoft.com/oldnewthing/20201009-00/?p=104351
+    """
+    # This is an example where both characters are "wide" when displayed alone.
+    #
+    # But JUNGSEONG (vowel) is designed for combination with a CHOSEONG (consonant).
+    #
+    # This wcwidth library understands their width only when combination,
+    # and not by independent display, like other zero-width characters that may
+    # only combine with an appropriate preceding character.
+    phrase = (
+        u"\u1100"  # ᄀ HANGUL CHOSEONG KIYEOK (consonant)
+        u"\u1161"  # ᅡ HANGUL JUNGSEONG A (vowel)
+    )
+    expect_length_each = (2, 0)
+    expect_length_phrase = 2
+
+    # exercise,
+    length_each = tuple(map(wcwidth.wcwidth, phrase))
+    length_phrase = wcwidth.wcswidth(phrase)
+
+    # verify.
+    assert length_each == expect_length_each
+    assert length_phrase == expect_length_phrase
+
+
 def test_kr_jamo_filler():
     u"""
     Jamo filler is 0 width.
 
-    According to https://www.unicode.org/L2/L2006/06310-hangul-decompose9.pdf this character and others
-    like it, ``\uffa0``, ``\u1160``, ``\u115f``, ``\u1160``, are not commonly viewed with a terminal,
-    seems it doesn't matter whether it is implemented or not, they are not typically used !
+    Example from https://www.unicode.org/L2/L2006/06310-hangul-decompose9.pdf
     """
-    phrase = u"\u1100\u1160"
-    expect_length_each = (2, 1)
-    expect_length_phrase = 3
+    phrase = (
+        u"\u1100"  # HANGUL CHOSEONG KIYEOK (consonant)
+        u"\u1160"  # HANGUL JUNGSEONG FILLER (vowel)
+    )
+    expect_length_each = (2, 0)
+    expect_length_phrase = 2
 
     # exercise,
     length_each = tuple(map(wcwidth.wcwidth, phrase))
@@ -355,3 +386,17 @@ def test_kannada_script_2():
     # verify.
     assert length_each == expect_length_each
     assert length_phrase == expect_length_phrase
+
+
+def test_zero_wide_conflict():
+    # Test characters considered both "wide" and "zero" width
+    # -  (0x03000, 0x0303e,),  # Ideographic Space       ..Ideographic Variation In
+    # +  (0x03000, 0x03029,),  # Ideographic Space       ..Hangzhou Numeral Nine
+    assert wcwidth.wcwidth(unichr(0x03029), unicode_version='4.1.0') == 2
+    assert wcwidth.wcwidth(unichr(0x0302a), unicode_version='4.1.0') == 0
+
+    # - (0x03099, 0x030ff,),  # Combining Katakana-hirag..Katakana Digraph Koto
+    # + (0x0309b, 0x030ff,),  # Katakana-hiragana Voiced..Katakana Digraph Koto
+    assert wcwidth.wcwidth(unichr(0x03099), unicode_version='4.1.0') == 0
+    assert wcwidth.wcwidth(unichr(0x0309a), unicode_version='4.1.0') == 0
+    assert wcwidth.wcwidth(unichr(0x0309b), unicode_version='4.1.0') == 2
author	robot-piglet <robot-piglet@yandex-team.com>	2024-02-04 02:13:03 +0300
committer	Alexander Smirnov <alex@ydb.tech>	2024-02-09 19:17:36 +0300
commit	055fb4247521918239057d343e52f835d818e3e1 (patch)
tree	a97c8832f272c15125ea3a27446fe74d720e450a /contrib/python/wcwidth/py2/tests/test_core.py
parent	0b8ccf7ff449ecfad252a58d14cd20c832deecaa (diff)
download	ydb-055fb4247521918239057d343e52f835d818e3e1.tar.gz