aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/python/wcwidth/py2/tests/test_core.py
diff options
context:
space:
mode:
authorrobot-piglet <robot-piglet@yandex-team.com>2024-02-04 02:13:03 +0300
committerrobot-piglet <robot-piglet@yandex-team.com>2024-02-04 02:31:01 +0300
commit9b729c17b913155dd17d52abe558cf647f8838d2 (patch)
tree38b8b5874c869cba5cd7e5fd276c85a12d39bc80 /contrib/python/wcwidth/py2/tests/test_core.py
parentf8298c4cf5f642353e0c5e7cd058721a7210cb51 (diff)
downloadydb-9b729c17b913155dd17d52abe558cf647f8838d2.tar.gz
Intermediate changes
Diffstat (limited to 'contrib/python/wcwidth/py2/tests/test_core.py')
-rw-r--r--contrib/python/wcwidth/py2/tests/test_core.py57
1 files changed, 51 insertions, 6 deletions
diff --git a/contrib/python/wcwidth/py2/tests/test_core.py b/contrib/python/wcwidth/py2/tests/test_core.py
index d2776cd992..60ed6b1cde 100644
--- a/contrib/python/wcwidth/py2/tests/test_core.py
+++ b/contrib/python/wcwidth/py2/tests/test_core.py
@@ -222,17 +222,48 @@ def test_balinese_script():
assert length_phrase == expect_length_phrase
+def test_kr_jamo():
+ """
+ Test basic combining of HANGUL CHOSEONG and JUNGSEONG
+
+ Example and from Raymond Chen's blog post,
+ https://devblogs.microsoft.com/oldnewthing/20201009-00/?p=104351
+ """
+ # This is an example where both characters are "wide" when displayed alone.
+ #
+ # But JUNGSEONG (vowel) is designed for combination with a CHOSEONG (consonant).
+ #
+ # This wcwidth library understands their width only when combination,
+ # and not by independent display, like other zero-width characters that may
+ # only combine with an appropriate preceding character.
+ phrase = (
+ u"\u1100" # ᄀ HANGUL CHOSEONG KIYEOK (consonant)
+ u"\u1161" # ᅡ HANGUL JUNGSEONG A (vowel)
+ )
+ expect_length_each = (2, 0)
+ expect_length_phrase = 2
+
+ # exercise,
+ length_each = tuple(map(wcwidth.wcwidth, phrase))
+ length_phrase = wcwidth.wcswidth(phrase)
+
+ # verify.
+ assert length_each == expect_length_each
+ assert length_phrase == expect_length_phrase
+
+
def test_kr_jamo_filler():
u"""
Jamo filler is 0 width.
- According to https://www.unicode.org/L2/L2006/06310-hangul-decompose9.pdf this character and others
- like it, ``\uffa0``, ``\u1160``, ``\u115f``, ``\u1160``, are not commonly viewed with a terminal,
- seems it doesn't matter whether it is implemented or not, they are not typically used !
+ Example from https://www.unicode.org/L2/L2006/06310-hangul-decompose9.pdf
"""
- phrase = u"\u1100\u1160"
- expect_length_each = (2, 1)
- expect_length_phrase = 3
+ phrase = (
+ u"\u1100" # HANGUL CHOSEONG KIYEOK (consonant)
+ u"\u1160" # HANGUL JUNGSEONG FILLER (vowel)
+ )
+ expect_length_each = (2, 0)
+ expect_length_phrase = 2
# exercise,
length_each = tuple(map(wcwidth.wcwidth, phrase))
@@ -355,3 +386,17 @@ def test_kannada_script_2():
# verify.
assert length_each == expect_length_each
assert length_phrase == expect_length_phrase
+
+
+def test_zero_wide_conflict():
+ # Test characters considered both "wide" and "zero" width
+ # - (0x03000, 0x0303e,), # Ideographic Space ..Ideographic Variation In
+ # + (0x03000, 0x03029,), # Ideographic Space ..Hangzhou Numeral Nine
+ assert wcwidth.wcwidth(unichr(0x03029), unicode_version='4.1.0') == 2
+ assert wcwidth.wcwidth(unichr(0x0302a), unicode_version='4.1.0') == 0
+
+ # - (0x03099, 0x030ff,), # Combining Katakana-hirag..Katakana Digraph Koto
+ # + (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto
+ assert wcwidth.wcwidth(unichr(0x03099), unicode_version='4.1.0') == 0
+ assert wcwidth.wcwidth(unichr(0x0309a), unicode_version='4.1.0') == 0
+ assert wcwidth.wcwidth(unichr(0x0309b), unicode_version='4.1.0') == 2