Update contrib/python/wcwidth/py3 to 0.2.10

author: robot-contrib <robot-contrib@yandex-team.com> 2023-11-28 10:22:06 +0300
committer: robot-contrib <robot-contrib@yandex-team.com> 2023-11-28 11:32:21 +0300
commit: e47e2d7f764c4433fc3ccb00c5875361175988f7 (patch)
tree: 7fbd42979a8996707562949ec1a98eab234a090d /contrib/python/wcwidth/py3/tests
parent: 13de5f3d2da60e60f030c6cde3e389b07a9192d3 (diff)
download: ydb-e47e2d7f764c4433fc3ccb00c5875361175988f7.tar.gz
2 files changed, 243 insertions, 103 deletions
diff --git a/contrib/python/wcwidth/py3/tests/test_core.py b/contrib/python/wcwidth/py3/tests/test_core.py
index f0396ea968b..d2776cd992b 100644
--- a/contrib/python/wcwidth/py3/tests/test_core.py
+++ b/contrib/python/wcwidth/py3/tests/test_core.py
@@ -10,23 +10,12 @@ except ImportError:
 # local
 import wcwidth
 
-# 3rd party
-import pytest
-
-# some tests cannot be done on some builds of python, where the internal
-# unicode structure is limited to 0x10000 for memory conservation,
-# "ValueError: unichr() arg not in range(0x10000) (narrow Python build)"
 try:
     # python 2
     _ = unichr
 except NameError:
     # python 3
     unichr = chr
-try:
-    unichr(0x2fffe)
-    NARROW_ONLY = False
-except ValueError:
-    NARROW_ONLY = True
 
 
 def test_package_version():
@@ -254,98 +243,6 @@ def test_kr_jamo_filler():
     assert length_phrase == expect_length_phrase
 
 
-@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
-def emoji_zwj_sequence():
-    u"""
-    Emoji zwj sequence of four codepoints is just 2 cells.
-    """
-    phrase = (u"\U0001f469"   # Base, Category So, East Asian Width property 'W' -- WOMAN
-              u"\U0001f3fb"   # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
-              u"\u200d"       # Joiner, Category Cf, East Asian Width property 'N'  -- ZERO WIDTH JOINER
-              u"\U0001f4bb")  # Fused, Category So, East Asian Width peroperty 'W' -- PERSONAL COMPUTER
-    # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
-    expect_length_each = (2, 0, 0, 2)
-    expect_length_phrase = 2
-
-    # exercise,
-    length_each = tuple(map(wcwidth.wcwidth, phrase))
-    length_phrase = wcwidth.wcswidth(phrase)
-
-    # verify.
-    assert length_each == expect_length_each
-    assert length_phrase == expect_length_phrase
-
-
-@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
-def test_unfinished_zwj_sequence():
-    u"""
-    Ensure index-out-of-bounds does not occur for zero-width joiner without any following character
-    """
-    phrase = (u"\U0001f469"   # Base, Category So, East Asian Width property 'W' -- WOMAN
-              u"\U0001f3fb"   # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
-              u"\u200d")      # Joiner, Category Cf, East Asian Width property 'N'  -- ZERO WIDTH JOINER
-    expect_length_each = (2, 0, 0)
-    expect_length_phrase = 2
-
-    # exercise,
-    length_each = tuple(map(wcwidth.wcwidth, phrase))
-    length_phrase = wcwidth.wcswidth(phrase)
-
-    # verify.
-    assert length_each == expect_length_each
-    assert length_phrase == expect_length_phrase
-
-
-@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
-def test_non_recommended_zwj_sequence():
-    """
-    Verify ZWJ is measured as though successful with characters that cannot be joined, wcwidth does not verify
-    """
-    phrase = (u"\U0001f469"   # Base, Category So, East Asian Width property 'W' -- WOMAN
-              u"\U0001f3fb"   # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
-              u"\u200d")      # Joiner, Category Cf, East Asian Width property 'N'  -- ZERO WIDTH JOINER
-    expect_length_each = (2, 0, 0)
-    expect_length_phrase = 2
-
-    # exercise,
-    length_each = tuple(map(wcwidth.wcwidth, phrase))
-    length_phrase = wcwidth.wcswidth(phrase)
-
-    # verify.
-    assert length_each == expect_length_each
-    assert length_phrase == expect_length_phrase
-
-
-@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
-def test_longer_emoji_zwj_sequence():
-    """
-    A much longer emoji ZWJ sequence of 10 total codepoints is just 2 cells!
-    """
-    # 'Category Code', 'East Asian Width property' -- 'description'
-    phrase = (u"\U0001F9D1"   # 'So', 'W' -- ADULT
-              u"\U0001F3FB"   # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
-              u"\u200d"       # 'Cf', 'N' -- ZERO WIDTH JOINER
-              u"\u2764"       # 'So', 'N' -- HEAVY BLACK HEART
-              u"\uFE0F"       # 'Mn', 'A' -- VARIATION SELECTOR-16
-              u"\u200d"       # 'Cf', 'N' -- ZERO WIDTH JOINER
-              u"\U0001F48B"   # 'So', 'W' -- KISS MARK
-              u"\u200d"       # 'Cf', 'N' -- ZERO WIDTH JOINER
-              u"\U0001F9D1"   # 'So', 'W' -- ADULT
-              u"\U0001F3FD")  # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-4
-
-    # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
-    expect_length_each = (2, 0, 0, 1, 0, 0, 2, 0, 2, 0)
-    expect_length_phrase = 2
-
-    # exercise,
-    length_each = tuple(map(wcwidth.wcwidth, phrase))
-    length_phrase = wcwidth.wcswidth(phrase)
-
-    # verify.
-    assert length_each == expect_length_each
-    assert length_phrase == expect_length_phrase
-
-
 def test_devanagari_script():
     """
     Attempt to test the measurement width of Devanagari script.
diff --git a/contrib/python/wcwidth/py3/tests/test_emojis.py b/contrib/python/wcwidth/py3/tests/test_emojis.py
new file mode 100644
index 00000000000..4f88e2330e5
--- /dev/null
+++ b/contrib/python/wcwidth/py3/tests/test_emojis.py
@@ -0,0 +1,243 @@
+# std imports
+import os
+import codecs
+
+# 3rd party
+import pytest
+
+try:
+    # python 2
+    _ = unichr
+except NameError:
+    # python 3
+    unichr = chr
+
+# some tests cannot be done on some builds of python, where the internal
+# unicode structure is limited to 0x10000 for memory conservation,
+# "ValueError: unichr() arg not in range(0x10000) (narrow Python build)"
+try:
+    unichr(0x2fffe)
+    NARROW_ONLY = False
+except ValueError:
+    NARROW_ONLY = True
+
+# local
+import wcwidth
+
+
+def make_sequence_from_line(line):
+    # convert '002A FE0F  ; ..' -> (0x2a, 0xfe0f) -> chr(0x2a) + chr(0xfe0f)
+    return ''.join(unichr(int(cp, 16)) for cp in line.split(';', 1)[0].strip().split())
+
+
+@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
+def emoji_zwj_sequence():
+    u"""
+    Emoji zwj sequence of four codepoints is just 2 cells.
+    """
+    phrase = (u"\U0001f469"   # Base, Category So, East Asian Width property 'W' -- WOMAN
+              u"\U0001f3fb"   # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
+              u"\u200d"       # Joiner, Category Cf, East Asian Width property 'N'  -- ZERO WIDTH JOINER
+              u"\U0001f4bb")  # Fused, Category So, East Asian Width peroperty 'W' -- PERSONAL COMPUTER
+    # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
+    expect_length_each = (2, 0, 0, 2)
+    expect_length_phrase = 2
+
+    # exercise,
+    length_each = tuple(map(wcwidth.wcwidth, phrase))
+    length_phrase = wcwidth.wcswidth(phrase)
+
+    # verify.
+    assert length_each == expect_length_each
+    assert length_phrase == expect_length_phrase
+
+
+@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
+def test_unfinished_zwj_sequence():
+    u"""
+    Ensure index-out-of-bounds does not occur for zero-width joiner without any following character
+    """
+    phrase = (u"\U0001f469"   # Base, Category So, East Asian Width property 'W' -- WOMAN
+              u"\U0001f3fb"   # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
+              u"\u200d")      # Joiner, Category Cf, East Asian Width property 'N'  -- ZERO WIDTH JOINER
+    expect_length_each = (2, 0, 0)
+    expect_length_phrase = 2
+
+    # exercise,
+    length_each = tuple(map(wcwidth.wcwidth, phrase))
+    length_phrase = wcwidth.wcswidth(phrase)
+
+    # verify.
+    assert length_each == expect_length_each
+    assert length_phrase == expect_length_phrase
+
+
+@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
+def test_non_recommended_zwj_sequence():
+    """
+    Verify ZWJ is measured as though successful with characters that cannot be joined, wcwidth does not verify
+    """
+    phrase = (u"\U0001f469"   # Base, Category So, East Asian Width property 'W' -- WOMAN
+              u"\U0001f3fb"   # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
+              u"\u200d")      # Joiner, Category Cf, East Asian Width property 'N'  -- ZERO WIDTH JOINER
+    expect_length_each = (2, 0, 0)
+    expect_length_phrase = 2
+
+    # exercise,
+    length_each = tuple(map(wcwidth.wcwidth, phrase))
+    length_phrase = wcwidth.wcswidth(phrase)
+
+    # verify.
+    assert length_each == expect_length_each
+    assert length_phrase == expect_length_phrase
+
+
+@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
+def test_another_emoji_zwj_sequence():
+    phrase = (
+        u"\u26F9"        # PERSON WITH BALL
+        u"\U0001F3FB"    # EMOJI MODIFIER FITZPATRICK TYPE-1-2
+        u"\u200D"        # ZERO WIDTH JOINER
+        u"\u2640"        # FEMALE SIGN
+        u"\uFE0F")       # VARIATION SELECTOR-16
+    expect_length_each = (1, 0, 0, 1, 0)
+    expect_length_phrase = 2
+
+    # exercise,
+    length_each = tuple(map(wcwidth.wcwidth, phrase))
+    length_phrase = wcwidth.wcswidth(phrase)
+
+    # verify.
+    assert length_each == expect_length_each
+    assert length_phrase == expect_length_phrase
+
+
+@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
+def test_longer_emoji_zwj_sequence():
+    """
+    A much longer emoji ZWJ sequence of 10 total codepoints is just 2 cells!
+
+    Also test the same sequence in duplicate, verifying multiple VS-16 sequences
+    in a single function call.
+    """
+    # 'Category Code', 'East Asian Width property' -- 'description'
+    phrase = (u"\U0001F9D1"   # 'So', 'W' -- ADULT
+              u"\U0001F3FB"   # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
+              u"\u200d"       # 'Cf', 'N' -- ZERO WIDTH JOINER
+              u"\u2764"       # 'So', 'N' -- HEAVY BLACK HEART
+              u"\uFE0F"       # 'Mn', 'A' -- VARIATION SELECTOR-16
+              u"\u200d"       # 'Cf', 'N' -- ZERO WIDTH JOINER
+              u"\U0001F48B"   # 'So', 'W' -- KISS MARK
+              u"\u200d"       # 'Cf', 'N' -- ZERO WIDTH JOINER
+              u"\U0001F9D1"   # 'So', 'W' -- ADULT
+              u"\U0001F3FD"   # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-4
+    ) * 2
+    # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
+    expect_length_each = (2, 0, 0, 1, 0, 0, 2, 0, 2, 0) * 2
+    expect_length_phrase = 4
+
+    # exercise,
+    length_each = tuple(map(wcwidth.wcwidth, phrase))
+    length_phrase = wcwidth.wcswidth(phrase)
+
+    # verify.
+    assert length_each == expect_length_each
+    assert length_phrase == expect_length_phrase
+
+
+def read_sequences_from_file(filename):
+    fp = codecs.open(os.path.join(os.path.dirname(__file__), filename), 'r', encoding='utf-8')
+    lines = [line.strip()
+                for line in fp.readlines()
+                if not line.startswith('#') and line.strip()]
+    fp.close()
+    sequences = [make_sequence_from_line(line) for line in lines]
+    return lines, sequences
+
+
+@pytest.mark.skipif(NARROW_ONLY, reason="Some sequences in text file are not compatible with 'narrow' builds")
+def test_recommended_emoji_zwj_sequences():
+    """
+    Test wcswidth of all of the unicode.org-published emoji-zwj-sequences.txt
+    """
+    # given,
+    lines, sequences = read_sequences_from_file('emoji-zwj-sequences.txt')
+
+    errors = []
+    # Exercise, track by zipping with original text file line, a debugging aide
+    num = 0
+    for sequence, line in zip(sequences, lines):
+        num += 1
+        measured_width = wcwidth.wcswidth(sequence)
+        if measured_width != 2:
+            errors.append({
+                'expected_width': 2,
+                'line': line,
+                'measured_width': measured_width,
+                'sequence': sequence,
+            })
+
+    # verify
+    assert errors == []
+    assert num >= 1468
+
+
+def test_recommended_variation_16_sequences():
+    """
+    Test wcswidth of all of the unicode.org-published emoji-variation-sequences.txt
+    """
+    # given,
+    lines, sequences = read_sequences_from_file('emoji-variation-sequences.txt')
+
+    errors = []
+    num = 0
+    for sequence, line in zip(sequences, lines):
+        num += 1
+        if '\ufe0f' not in sequence:
+            # filter for only \uFE0F (VS-16)
+            continue
+        measured_width = wcwidth.wcswidth(sequence)
+        if measured_width != 2:
+            errors.append({
+                'expected_width': 2,
+                'line': line,
+                'measured_width': wcwidth.wcswidth(sequence),
+                'sequence': sequence,
+            })
+
+    # verify
+    assert errors == []
+    assert num >= 742
+
+
+def test_unicode_9_vs16():
+    """Verify effect of VS-16 on unicode_version 9.0 and later"""
+    phrase = (u"\u2640"        # FEMALE SIGN
+              u"\uFE0F")       # VARIATION SELECTOR-16
+
+    expect_length_each = (1, 0)
+    expect_length_phrase = 2
+
+    # exercise,
+    length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='9.0') for w_char in phrase)
+    length_phrase = wcwidth.wcswidth(phrase, unicode_version='9.0')
+
+    # verify.
+    assert length_each == expect_length_each
+    assert length_phrase == expect_length_phrase
+
+def test_unicode_8_vs16():
+    """Verify that VS-16 has no effect on unicode_version 8.0 and earler"""
+    phrase = (u"\u2640"        # FEMALE SIGN
+              u"\uFE0F")       # VARIATION SELECTOR-16
+
+    expect_length_each = (1, 0)
+    expect_length_phrase = 1
+
+    # exercise,
+    length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='8.0') for w_char in phrase)
+    length_phrase = wcwidth.wcswidth(phrase, unicode_version='8.0')
+
+    # verify.
+    assert length_each == expect_length_each
+    assert length_phrase == expect_length_phrase
+\ No newline at end of file
author	robot-contrib <robot-contrib@yandex-team.com>	2023-11-28 10:22:06 +0300
committer	robot-contrib <robot-contrib@yandex-team.com>	2023-11-28 11:32:21 +0300
commit	e47e2d7f764c4433fc3ccb00c5875361175988f7 (patch)
tree	7fbd42979a8996707562949ec1a98eab234a090d /contrib/python/wcwidth/py3/tests
parent	13de5f3d2da60e60f030c6cde3e389b07a9192d3 (diff)
download	ydb-e47e2d7f764c4433fc3ccb00c5875361175988f7.tar.gz