diff options
author | robot-piglet <robot-piglet@yandex-team.com> | 2024-02-04 02:13:03 +0300 |
---|---|---|
committer | robot-piglet <robot-piglet@yandex-team.com> | 2024-02-04 02:31:01 +0300 |
commit | 9b729c17b913155dd17d52abe558cf647f8838d2 (patch) | |
tree | 38b8b5874c869cba5cd7e5fd276c85a12d39bc80 /contrib/python/wcwidth/py2/tests/test_core.py | |
parent | f8298c4cf5f642353e0c5e7cd058721a7210cb51 (diff) | |
download | ydb-9b729c17b913155dd17d52abe558cf647f8838d2.tar.gz |
Intermediate changes
Diffstat (limited to 'contrib/python/wcwidth/py2/tests/test_core.py')
-rw-r--r-- | contrib/python/wcwidth/py2/tests/test_core.py | 57 |
1 files changed, 51 insertions, 6 deletions
diff --git a/contrib/python/wcwidth/py2/tests/test_core.py b/contrib/python/wcwidth/py2/tests/test_core.py index d2776cd992..60ed6b1cde 100644 --- a/contrib/python/wcwidth/py2/tests/test_core.py +++ b/contrib/python/wcwidth/py2/tests/test_core.py @@ -222,17 +222,48 @@ def test_balinese_script(): assert length_phrase == expect_length_phrase +def test_kr_jamo(): + """ + Test basic combining of HANGUL CHOSEONG and JUNGSEONG + + Example and from Raymond Chen's blog post, + https://devblogs.microsoft.com/oldnewthing/20201009-00/?p=104351 + """ + # This is an example where both characters are "wide" when displayed alone. + # + # But JUNGSEONG (vowel) is designed for combination with a CHOSEONG (consonant). + # + # This wcwidth library understands their width only when combination, + # and not by independent display, like other zero-width characters that may + # only combine with an appropriate preceding character. + phrase = ( + u"\u1100" # ᄀ HANGUL CHOSEONG KIYEOK (consonant) + u"\u1161" # ᅡ HANGUL JUNGSEONG A (vowel) + ) + expect_length_each = (2, 0) + expect_length_phrase = 2 + + # exercise, + length_each = tuple(map(wcwidth.wcwidth, phrase)) + length_phrase = wcwidth.wcswidth(phrase) + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + + def test_kr_jamo_filler(): u""" Jamo filler is 0 width. - According to https://www.unicode.org/L2/L2006/06310-hangul-decompose9.pdf this character and others - like it, ``\uffa0``, ``\u1160``, ``\u115f``, ``\u1160``, are not commonly viewed with a terminal, - seems it doesn't matter whether it is implemented or not, they are not typically used ! + Example from https://www.unicode.org/L2/L2006/06310-hangul-decompose9.pdf """ - phrase = u"\u1100\u1160" - expect_length_each = (2, 1) - expect_length_phrase = 3 + phrase = ( + u"\u1100" # HANGUL CHOSEONG KIYEOK (consonant) + u"\u1160" # HANGUL JUNGSEONG FILLER (vowel) + ) + expect_length_each = (2, 0) + expect_length_phrase = 2 # exercise, length_each = tuple(map(wcwidth.wcwidth, phrase)) @@ -355,3 +386,17 @@ def test_kannada_script_2(): # verify. assert length_each == expect_length_each assert length_phrase == expect_length_phrase + + +def test_zero_wide_conflict(): + # Test characters considered both "wide" and "zero" width + # - (0x03000, 0x0303e,), # Ideographic Space ..Ideographic Variation In + # + (0x03000, 0x03029,), # Ideographic Space ..Hangzhou Numeral Nine + assert wcwidth.wcwidth(unichr(0x03029), unicode_version='4.1.0') == 2 + assert wcwidth.wcwidth(unichr(0x0302a), unicode_version='4.1.0') == 0 + + # - (0x03099, 0x030ff,), # Combining Katakana-hirag..Katakana Digraph Koto + # + (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto + assert wcwidth.wcwidth(unichr(0x03099), unicode_version='4.1.0') == 0 + assert wcwidth.wcwidth(unichr(0x0309a), unicode_version='4.1.0') == 0 + assert wcwidth.wcwidth(unichr(0x0309b), unicode_version='4.1.0') == 2 |