Intermediate changes

author: robot-piglet <robot-piglet@yandex-team.com> 2024-02-04 02:13:03 +0300
committer: robot-piglet <robot-piglet@yandex-team.com> 2024-02-04 02:31:01 +0300
commit: 9b729c17b913155dd17d52abe558cf647f8838d2 (patch)
tree: 38b8b5874c869cba5cd7e5fd276c85a12d39bc80 /contrib/python/wcwidth/py2/tests/test_core.py
parent: f8298c4cf5f642353e0c5e7cd058721a7210cb51 (diff)
download: ydb-9b729c17b913155dd17d52abe558cf647f8838d2.tar.gz
1 files changed, 51 insertions, 6 deletions
diff --git a/contrib/python/wcwidth/py2/tests/test_core.py b/contrib/python/wcwidth/py2/tests/test_core.py
index d2776cd992..60ed6b1cde 100644
--- a/contrib/python/wcwidth/py2/tests/test_core.py
+++ b/contrib/python/wcwidth/py2/tests/test_core.py
@@ -222,17 +222,48 @@ def test_balinese_script():
     assert length_phrase == expect_length_phrase
 
 
+def test_kr_jamo():
+    """
+    Test basic combining of HANGUL CHOSEONG and JUNGSEONG
+
+    Example and from Raymond Chen's blog post,
+    https://devblogs.microsoft.com/oldnewthing/20201009-00/?p=104351
+    """
+    # This is an example where both characters are "wide" when displayed alone.
+    #
+    # But JUNGSEONG (vowel) is designed for combination with a CHOSEONG (consonant).
+    #
+    # This wcwidth library understands their width only when combination,
+    # and not by independent display, like other zero-width characters that may
+    # only combine with an appropriate preceding character.
+    phrase = (
+        u"\u1100"  # ᄀ HANGUL CHOSEONG KIYEOK (consonant)
+        u"\u1161"  # ᅡ HANGUL JUNGSEONG A (vowel)
+    )
+    expect_length_each = (2, 0)
+    expect_length_phrase = 2
+
+    # exercise,
+    length_each = tuple(map(wcwidth.wcwidth, phrase))
+    length_phrase = wcwidth.wcswidth(phrase)
+
+    # verify.
+    assert length_each == expect_length_each
+    assert length_phrase == expect_length_phrase
+
+
 def test_kr_jamo_filler():
     u"""
     Jamo filler is 0 width.
 
-    According to https://www.unicode.org/L2/L2006/06310-hangul-decompose9.pdf this character and others
-    like it, ``\uffa0``, ``\u1160``, ``\u115f``, ``\u1160``, are not commonly viewed with a terminal,
-    seems it doesn't matter whether it is implemented or not, they are not typically used !
+    Example from https://www.unicode.org/L2/L2006/06310-hangul-decompose9.pdf
     """
-    phrase = u"\u1100\u1160"
-    expect_length_each = (2, 1)
-    expect_length_phrase = 3
+    phrase = (
+        u"\u1100"  # HANGUL CHOSEONG KIYEOK (consonant)
+        u"\u1160"  # HANGUL JUNGSEONG FILLER (vowel)
+    )
+    expect_length_each = (2, 0)
+    expect_length_phrase = 2
 
     # exercise,
     length_each = tuple(map(wcwidth.wcwidth, phrase))
@@ -355,3 +386,17 @@ def test_kannada_script_2():
     # verify.
     assert length_each == expect_length_each
     assert length_phrase == expect_length_phrase
+
+
+def test_zero_wide_conflict():
+    # Test characters considered both "wide" and "zero" width
+    # -  (0x03000, 0x0303e,),  # Ideographic Space       ..Ideographic Variation In
+    # +  (0x03000, 0x03029,),  # Ideographic Space       ..Hangzhou Numeral Nine
+    assert wcwidth.wcwidth(unichr(0x03029), unicode_version='4.1.0') == 2
+    assert wcwidth.wcwidth(unichr(0x0302a), unicode_version='4.1.0') == 0
+
+    # - (0x03099, 0x030ff,),  # Combining Katakana-hirag..Katakana Digraph Koto
+    # + (0x0309b, 0x030ff,),  # Katakana-hiragana Voiced..Katakana Digraph Koto
+    assert wcwidth.wcwidth(unichr(0x03099), unicode_version='4.1.0') == 0
+    assert wcwidth.wcwidth(unichr(0x0309a), unicode_version='4.1.0') == 0
+    assert wcwidth.wcwidth(unichr(0x0309b), unicode_version='4.1.0') == 2
author	robot-piglet <robot-piglet@yandex-team.com>	2024-02-04 02:13:03 +0300
committer	robot-piglet <robot-piglet@yandex-team.com>	2024-02-04 02:31:01 +0300
commit	9b729c17b913155dd17d52abe558cf647f8838d2 (patch)
tree	38b8b5874c869cba5cd7e5fd276c85a12d39bc80 /contrib/python/wcwidth/py2/tests/test_core.py
parent	f8298c4cf5f642353e0c5e7cd058721a7210cb51 (diff)
download	ydb-9b729c17b913155dd17d52abe558cf647f8838d2.tar.gz