diff options
author | robot-piglet <robot-piglet@yandex-team.com> | 2024-02-04 02:13:03 +0300 |
---|---|---|
committer | Alexander Smirnov <alex@ydb.tech> | 2024-02-09 19:17:36 +0300 |
commit | 055fb4247521918239057d343e52f835d818e3e1 (patch) | |
tree | a97c8832f272c15125ea3a27446fe74d720e450a /contrib/python/wcwidth/py2/tests/test_core.py | |
parent | 0b8ccf7ff449ecfad252a58d14cd20c832deecaa (diff) | |
download | ydb-055fb4247521918239057d343e52f835d818e3e1.tar.gz |
Intermediate changes
Diffstat (limited to 'contrib/python/wcwidth/py2/tests/test_core.py')
-rw-r--r-- | contrib/python/wcwidth/py2/tests/test_core.py | 57 |
1 files changed, 51 insertions, 6 deletions
diff --git a/contrib/python/wcwidth/py2/tests/test_core.py b/contrib/python/wcwidth/py2/tests/test_core.py index d2776cd992..60ed6b1cde 100644 --- a/contrib/python/wcwidth/py2/tests/test_core.py +++ b/contrib/python/wcwidth/py2/tests/test_core.py @@ -222,17 +222,48 @@ def test_balinese_script(): assert length_phrase == expect_length_phrase +def test_kr_jamo(): + """ + Test basic combining of HANGUL CHOSEONG and JUNGSEONG + + Example and from Raymond Chen's blog post, + https://devblogs.microsoft.com/oldnewthing/20201009-00/?p=104351 + """ + # This is an example where both characters are "wide" when displayed alone. + # + # But JUNGSEONG (vowel) is designed for combination with a CHOSEONG (consonant). + # + # This wcwidth library understands their width only when combination, + # and not by independent display, like other zero-width characters that may + # only combine with an appropriate preceding character. + phrase = ( + u"\u1100" # ᄀ HANGUL CHOSEONG KIYEOK (consonant) + u"\u1161" # ᅡ HANGUL JUNGSEONG A (vowel) + ) + expect_length_each = (2, 0) + expect_length_phrase = 2 + + # exercise, + length_each = tuple(map(wcwidth.wcwidth, phrase)) + length_phrase = wcwidth.wcswidth(phrase) + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + + def test_kr_jamo_filler(): u""" Jamo filler is 0 width. - According to https://www.unicode.org/L2/L2006/06310-hangul-decompose9.pdf this character and others - like it, ``\uffa0``, ``\u1160``, ``\u115f``, ``\u1160``, are not commonly viewed with a terminal, - seems it doesn't matter whether it is implemented or not, they are not typically used ! + Example from https://www.unicode.org/L2/L2006/06310-hangul-decompose9.pdf """ - phrase = u"\u1100\u1160" - expect_length_each = (2, 1) - expect_length_phrase = 3 + phrase = ( + u"\u1100" # HANGUL CHOSEONG KIYEOK (consonant) + u"\u1160" # HANGUL JUNGSEONG FILLER (vowel) + ) + expect_length_each = (2, 0) + expect_length_phrase = 2 # exercise, length_each = tuple(map(wcwidth.wcwidth, phrase)) @@ -355,3 +386,17 @@ def test_kannada_script_2(): # verify. assert length_each == expect_length_each assert length_phrase == expect_length_phrase + + +def test_zero_wide_conflict(): + # Test characters considered both "wide" and "zero" width + # - (0x03000, 0x0303e,), # Ideographic Space ..Ideographic Variation In + # + (0x03000, 0x03029,), # Ideographic Space ..Hangzhou Numeral Nine + assert wcwidth.wcwidth(unichr(0x03029), unicode_version='4.1.0') == 2 + assert wcwidth.wcwidth(unichr(0x0302a), unicode_version='4.1.0') == 0 + + # - (0x03099, 0x030ff,), # Combining Katakana-hirag..Katakana Digraph Koto + # + (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto + assert wcwidth.wcwidth(unichr(0x03099), unicode_version='4.1.0') == 0 + assert wcwidth.wcwidth(unichr(0x0309a), unicode_version='4.1.0') == 0 + assert wcwidth.wcwidth(unichr(0x0309b), unicode_version='4.1.0') == 2 |