aboutsummaryrefslogtreecommitdiffstats
path: root/contrib
diff options
context:
space:
mode:
authorrobot-contrib <robot-contrib@yandex-team.com>2023-11-28 10:22:06 +0300
committerrobot-contrib <robot-contrib@yandex-team.com>2023-11-28 11:32:21 +0300
commite47e2d7f764c4433fc3ccb00c5875361175988f7 (patch)
tree7fbd42979a8996707562949ec1a98eab234a090d /contrib
parent13de5f3d2da60e60f030c6cde3e389b07a9192d3 (diff)
downloadydb-e47e2d7f764c4433fc3ccb00c5875361175988f7.tar.gz
Update contrib/python/wcwidth/py3 to 0.2.10
Diffstat (limited to 'contrib')
-rw-r--r--contrib/python/wcwidth/py3/.dist-info/METADATA10
-rw-r--r--contrib/python/wcwidth/py3/README.rst8
-rw-r--r--contrib/python/wcwidth/py3/tests/test_core.py103
-rw-r--r--contrib/python/wcwidth/py3/tests/test_emojis.py243
-rw-r--r--contrib/python/wcwidth/py3/wcwidth/__init__.py3
-rw-r--r--contrib/python/wcwidth/py3/wcwidth/table_vs16.py125
-rw-r--r--contrib/python/wcwidth/py3/wcwidth/wcwidth.py18
-rw-r--r--contrib/python/wcwidth/py3/ya.make3
8 files changed, 405 insertions, 108 deletions
diff --git a/contrib/python/wcwidth/py3/.dist-info/METADATA b/contrib/python/wcwidth/py3/.dist-info/METADATA
index 0a4dd22135..7c34843f75 100644
--- a/contrib/python/wcwidth/py3/.dist-info/METADATA
+++ b/contrib/python/wcwidth/py3/.dist-info/METADATA
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: wcwidth
-Version: 0.2.9
+Version: 0.2.10
Summary: Measures the displayed width of unicode strings in a terminal
Home-page: https://github.com/jquast/wcwidth
Author: Jeff Quast
@@ -247,10 +247,15 @@ Other Languages
=======
History
=======
+0.2.10 *2023-11-08*
+ * **Bugfix** accounting of some kinds of emoji sequences using U+FE0F
+ Variation Selector 16 (`PR #97`_).
+ * **Updated** `Specification <Specification_from_pypi_>`_.
+
0.2.9 *2023-10-30*
* **Bugfix** zero-width characters used in Emoji ZWJ sequences, Balinese,
Jamo, Devanagari, Tamil, Kannada and others (`PR #91`_).
- * **Updated** to include `Specification <Specification_from_pypi>`_ of
+ * **Updated** to include `Specification <Specification_from_pypi_>`_ of
character measurements.
0.2.8 *2023-09-30*
@@ -350,6 +355,7 @@ https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c::
.. _`PR #35`: https://github.com/jquast/wcwidth/pull/35
.. _`PR #82`: https://github.com/jquast/wcwidth/pull/82
.. _`PR #91`: https://github.com/jquast/wcwidth/pull/91
+.. _`PR #97`: https://github.com/jquast/wcwidth/pull/97
.. _`jquast/blessed`: https://github.com/jquast/blessed
.. _`selectel/pyte`: https://github.com/selectel/pyte
.. _`thomasballinger/curtsies`: https://github.com/thomasballinger/curtsies
diff --git a/contrib/python/wcwidth/py3/README.rst b/contrib/python/wcwidth/py3/README.rst
index 2d9722d527..749d89aa03 100644
--- a/contrib/python/wcwidth/py3/README.rst
+++ b/contrib/python/wcwidth/py3/README.rst
@@ -216,10 +216,15 @@ Other Languages
=======
History
=======
+0.2.10 *2023-11-08*
+ * **Bugfix** accounting of some kinds of emoji sequences using U+FE0F
+ Variation Selector 16 (`PR #97`_).
+ * **Updated** `Specification <Specification_from_pypi_>`_.
+
0.2.9 *2023-10-30*
* **Bugfix** zero-width characters used in Emoji ZWJ sequences, Balinese,
Jamo, Devanagari, Tamil, Kannada and others (`PR #91`_).
- * **Updated** to include `Specification <Specification_from_pypi>`_ of
+ * **Updated** to include `Specification <Specification_from_pypi_>`_ of
character measurements.
0.2.8 *2023-09-30*
@@ -319,6 +324,7 @@ https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c::
.. _`PR #35`: https://github.com/jquast/wcwidth/pull/35
.. _`PR #82`: https://github.com/jquast/wcwidth/pull/82
.. _`PR #91`: https://github.com/jquast/wcwidth/pull/91
+.. _`PR #97`: https://github.com/jquast/wcwidth/pull/97
.. _`jquast/blessed`: https://github.com/jquast/blessed
.. _`selectel/pyte`: https://github.com/selectel/pyte
.. _`thomasballinger/curtsies`: https://github.com/thomasballinger/curtsies
diff --git a/contrib/python/wcwidth/py3/tests/test_core.py b/contrib/python/wcwidth/py3/tests/test_core.py
index f0396ea968..d2776cd992 100644
--- a/contrib/python/wcwidth/py3/tests/test_core.py
+++ b/contrib/python/wcwidth/py3/tests/test_core.py
@@ -10,23 +10,12 @@ except ImportError:
# local
import wcwidth
-# 3rd party
-import pytest
-
-# some tests cannot be done on some builds of python, where the internal
-# unicode structure is limited to 0x10000 for memory conservation,
-# "ValueError: unichr() arg not in range(0x10000) (narrow Python build)"
try:
# python 2
_ = unichr
except NameError:
# python 3
unichr = chr
-try:
- unichr(0x2fffe)
- NARROW_ONLY = False
-except ValueError:
- NARROW_ONLY = True
def test_package_version():
@@ -254,98 +243,6 @@ def test_kr_jamo_filler():
assert length_phrase == expect_length_phrase
-@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
-def emoji_zwj_sequence():
- u"""
- Emoji zwj sequence of four codepoints is just 2 cells.
- """
- phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN
- u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
- u"\u200d" # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER
- u"\U0001f4bb") # Fused, Category So, East Asian Width peroperty 'W' -- PERSONAL COMPUTER
- # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
- expect_length_each = (2, 0, 0, 2)
- expect_length_phrase = 2
-
- # exercise,
- length_each = tuple(map(wcwidth.wcwidth, phrase))
- length_phrase = wcwidth.wcswidth(phrase)
-
- # verify.
- assert length_each == expect_length_each
- assert length_phrase == expect_length_phrase
-
-
-@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
-def test_unfinished_zwj_sequence():
- u"""
- Ensure index-out-of-bounds does not occur for zero-width joiner without any following character
- """
- phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN
- u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
- u"\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER
- expect_length_each = (2, 0, 0)
- expect_length_phrase = 2
-
- # exercise,
- length_each = tuple(map(wcwidth.wcwidth, phrase))
- length_phrase = wcwidth.wcswidth(phrase)
-
- # verify.
- assert length_each == expect_length_each
- assert length_phrase == expect_length_phrase
-
-
-@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
-def test_non_recommended_zwj_sequence():
- """
- Verify ZWJ is measured as though successful with characters that cannot be joined, wcwidth does not verify
- """
- phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN
- u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
- u"\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER
- expect_length_each = (2, 0, 0)
- expect_length_phrase = 2
-
- # exercise,
- length_each = tuple(map(wcwidth.wcwidth, phrase))
- length_phrase = wcwidth.wcswidth(phrase)
-
- # verify.
- assert length_each == expect_length_each
- assert length_phrase == expect_length_phrase
-
-
-@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
-def test_longer_emoji_zwj_sequence():
- """
- A much longer emoji ZWJ sequence of 10 total codepoints is just 2 cells!
- """
- # 'Category Code', 'East Asian Width property' -- 'description'
- phrase = (u"\U0001F9D1" # 'So', 'W' -- ADULT
- u"\U0001F3FB" # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
- u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER
- u"\u2764" # 'So', 'N' -- HEAVY BLACK HEART
- u"\uFE0F" # 'Mn', 'A' -- VARIATION SELECTOR-16
- u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER
- u"\U0001F48B" # 'So', 'W' -- KISS MARK
- u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER
- u"\U0001F9D1" # 'So', 'W' -- ADULT
- u"\U0001F3FD") # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-4
-
- # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
- expect_length_each = (2, 0, 0, 1, 0, 0, 2, 0, 2, 0)
- expect_length_phrase = 2
-
- # exercise,
- length_each = tuple(map(wcwidth.wcwidth, phrase))
- length_phrase = wcwidth.wcswidth(phrase)
-
- # verify.
- assert length_each == expect_length_each
- assert length_phrase == expect_length_phrase
-
-
def test_devanagari_script():
"""
Attempt to test the measurement width of Devanagari script.
diff --git a/contrib/python/wcwidth/py3/tests/test_emojis.py b/contrib/python/wcwidth/py3/tests/test_emojis.py
new file mode 100644
index 0000000000..4f88e2330e
--- /dev/null
+++ b/contrib/python/wcwidth/py3/tests/test_emojis.py
@@ -0,0 +1,243 @@
+# std imports
+import os
+import codecs
+
+# 3rd party
+import pytest
+
+try:
+ # python 2
+ _ = unichr
+except NameError:
+ # python 3
+ unichr = chr
+
+# some tests cannot be done on some builds of python, where the internal
+# unicode structure is limited to 0x10000 for memory conservation,
+# "ValueError: unichr() arg not in range(0x10000) (narrow Python build)"
+try:
+ unichr(0x2fffe)
+ NARROW_ONLY = False
+except ValueError:
+ NARROW_ONLY = True
+
+# local
+import wcwidth
+
+
+def make_sequence_from_line(line):
+ # convert '002A FE0F ; ..' -> (0x2a, 0xfe0f) -> chr(0x2a) + chr(0xfe0f)
+ return ''.join(unichr(int(cp, 16)) for cp in line.split(';', 1)[0].strip().split())
+
+
+@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
+def emoji_zwj_sequence():
+ u"""
+ Emoji zwj sequence of four codepoints is just 2 cells.
+ """
+ phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN
+ u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
+ u"\u200d" # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER
+ u"\U0001f4bb") # Fused, Category So, East Asian Width peroperty 'W' -- PERSONAL COMPUTER
+ # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
+ expect_length_each = (2, 0, 0, 2)
+ expect_length_phrase = 2
+
+ # exercise,
+ length_each = tuple(map(wcwidth.wcwidth, phrase))
+ length_phrase = wcwidth.wcswidth(phrase)
+
+ # verify.
+ assert length_each == expect_length_each
+ assert length_phrase == expect_length_phrase
+
+
+@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
+def test_unfinished_zwj_sequence():
+ u"""
+ Ensure index-out-of-bounds does not occur for zero-width joiner without any following character
+ """
+ phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN
+ u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
+ u"\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER
+ expect_length_each = (2, 0, 0)
+ expect_length_phrase = 2
+
+ # exercise,
+ length_each = tuple(map(wcwidth.wcwidth, phrase))
+ length_phrase = wcwidth.wcswidth(phrase)
+
+ # verify.
+ assert length_each == expect_length_each
+ assert length_phrase == expect_length_phrase
+
+
+@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
+def test_non_recommended_zwj_sequence():
+ """
+ Verify ZWJ is measured as though successful with characters that cannot be joined, wcwidth does not verify
+ """
+ phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN
+ u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
+ u"\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER
+ expect_length_each = (2, 0, 0)
+ expect_length_phrase = 2
+
+ # exercise,
+ length_each = tuple(map(wcwidth.wcwidth, phrase))
+ length_phrase = wcwidth.wcswidth(phrase)
+
+ # verify.
+ assert length_each == expect_length_each
+ assert length_phrase == expect_length_phrase
+
+
+@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
+def test_another_emoji_zwj_sequence():
+ phrase = (
+ u"\u26F9" # PERSON WITH BALL
+ u"\U0001F3FB" # EMOJI MODIFIER FITZPATRICK TYPE-1-2
+ u"\u200D" # ZERO WIDTH JOINER
+ u"\u2640" # FEMALE SIGN
+ u"\uFE0F") # VARIATION SELECTOR-16
+ expect_length_each = (1, 0, 0, 1, 0)
+ expect_length_phrase = 2
+
+ # exercise,
+ length_each = tuple(map(wcwidth.wcwidth, phrase))
+ length_phrase = wcwidth.wcswidth(phrase)
+
+ # verify.
+ assert length_each == expect_length_each
+ assert length_phrase == expect_length_phrase
+
+
+@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
+def test_longer_emoji_zwj_sequence():
+ """
+ A much longer emoji ZWJ sequence of 10 total codepoints is just 2 cells!
+
+ Also test the same sequence in duplicate, verifying multiple VS-16 sequences
+ in a single function call.
+ """
+ # 'Category Code', 'East Asian Width property' -- 'description'
+ phrase = (u"\U0001F9D1" # 'So', 'W' -- ADULT
+ u"\U0001F3FB" # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
+ u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER
+ u"\u2764" # 'So', 'N' -- HEAVY BLACK HEART
+ u"\uFE0F" # 'Mn', 'A' -- VARIATION SELECTOR-16
+ u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER
+ u"\U0001F48B" # 'So', 'W' -- KISS MARK
+ u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER
+ u"\U0001F9D1" # 'So', 'W' -- ADULT
+ u"\U0001F3FD" # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-4
+ ) * 2
+ # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
+ expect_length_each = (2, 0, 0, 1, 0, 0, 2, 0, 2, 0) * 2
+ expect_length_phrase = 4
+
+ # exercise,
+ length_each = tuple(map(wcwidth.wcwidth, phrase))
+ length_phrase = wcwidth.wcswidth(phrase)
+
+ # verify.
+ assert length_each == expect_length_each
+ assert length_phrase == expect_length_phrase
+
+
+def read_sequences_from_file(filename):
+ fp = codecs.open(os.path.join(os.path.dirname(__file__), filename), 'r', encoding='utf-8')
+ lines = [line.strip()
+ for line in fp.readlines()
+ if not line.startswith('#') and line.strip()]
+ fp.close()
+ sequences = [make_sequence_from_line(line) for line in lines]
+ return lines, sequences
+
+
+@pytest.mark.skipif(NARROW_ONLY, reason="Some sequences in text file are not compatible with 'narrow' builds")
+def test_recommended_emoji_zwj_sequences():
+ """
+ Test wcswidth of all of the unicode.org-published emoji-zwj-sequences.txt
+ """
+ # given,
+ lines, sequences = read_sequences_from_file('emoji-zwj-sequences.txt')
+
+ errors = []
+ # Exercise, track by zipping with original text file line, a debugging aide
+ num = 0
+ for sequence, line in zip(sequences, lines):
+ num += 1
+ measured_width = wcwidth.wcswidth(sequence)
+ if measured_width != 2:
+ errors.append({
+ 'expected_width': 2,
+ 'line': line,
+ 'measured_width': measured_width,
+ 'sequence': sequence,
+ })
+
+ # verify
+ assert errors == []
+ assert num >= 1468
+
+
+def test_recommended_variation_16_sequences():
+ """
+ Test wcswidth of all of the unicode.org-published emoji-variation-sequences.txt
+ """
+ # given,
+ lines, sequences = read_sequences_from_file('emoji-variation-sequences.txt')
+
+ errors = []
+ num = 0
+ for sequence, line in zip(sequences, lines):
+ num += 1
+ if '\ufe0f' not in sequence:
+ # filter for only \uFE0F (VS-16)
+ continue
+ measured_width = wcwidth.wcswidth(sequence)
+ if measured_width != 2:
+ errors.append({
+ 'expected_width': 2,
+ 'line': line,
+ 'measured_width': wcwidth.wcswidth(sequence),
+ 'sequence': sequence,
+ })
+
+ # verify
+ assert errors == []
+ assert num >= 742
+
+
+def test_unicode_9_vs16():
+ """Verify effect of VS-16 on unicode_version 9.0 and later"""
+ phrase = (u"\u2640" # FEMALE SIGN
+ u"\uFE0F") # VARIATION SELECTOR-16
+
+ expect_length_each = (1, 0)
+ expect_length_phrase = 2
+
+ # exercise,
+ length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='9.0') for w_char in phrase)
+ length_phrase = wcwidth.wcswidth(phrase, unicode_version='9.0')
+
+ # verify.
+ assert length_each == expect_length_each
+ assert length_phrase == expect_length_phrase
+
+def test_unicode_8_vs16():
+ """Verify that VS-16 has no effect on unicode_version 8.0 and earler"""
+ phrase = (u"\u2640" # FEMALE SIGN
+ u"\uFE0F") # VARIATION SELECTOR-16
+
+ expect_length_each = (1, 0)
+ expect_length_phrase = 1
+
+ # exercise,
+ length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='8.0') for w_char in phrase)
+ length_phrase = wcwidth.wcswidth(phrase, unicode_version='8.0')
+
+ # verify.
+ assert length_each == expect_length_each
+ assert length_phrase == expect_length_phrase \ No newline at end of file
diff --git a/contrib/python/wcwidth/py3/wcwidth/__init__.py b/contrib/python/wcwidth/py3/wcwidth/__init__.py
index 212e72d033..91e18dbe90 100644
--- a/contrib/python/wcwidth/py3/wcwidth/__init__.py
+++ b/contrib/python/wcwidth/py3/wcwidth/__init__.py
@@ -11,6 +11,7 @@ https://github.com/jquast/wcwidth
# local
from .wcwidth import ZERO_WIDTH # noqa
from .wcwidth import (WIDE_EASTASIAN,
+ VS16_NARROW_TO_WIDE,
wcwidth,
wcswidth,
_bisearch,
@@ -25,4 +26,4 @@ __all__ = ('wcwidth', 'wcswidth', 'list_versions')
# We also used pkg_resources to load unicode version tables from version.json,
# generated by bin/update-tables.py, but some environments are unable to
# import pkg_resources for one reason or another, yikes!
-__version__ = '0.2.9'
+__version__ = '0.2.10'
diff --git a/contrib/python/wcwidth/py3/wcwidth/table_vs16.py b/contrib/python/wcwidth/py3/wcwidth/table_vs16.py
new file mode 100644
index 0000000000..3249262d98
--- /dev/null
+++ b/contrib/python/wcwidth/py3/wcwidth/table_vs16.py
@@ -0,0 +1,125 @@
+"""
+Exports VS16_NARROW_TO_WIDE table keyed by supporting unicode version level.
+
+This code generated by wcwidth/bin/update-tables.py on 2023-11-07 16:43:49 UTC.
+"""
+VS16_NARROW_TO_WIDE = {
+ '9.0.0': (
+ # Source: 9.0.0
+ # Date: 2023-02-01, 02:22:54 GMT
+ #
+ (0x00023, 0x00023,), # Number Sign
+ (0x0002a, 0x0002a,), # Asterisk
+ (0x00030, 0x00039,), # Digit Zero ..Digit Nine
+ (0x000a9, 0x000a9,), # Copyright Sign
+ (0x000ae, 0x000ae,), # Registered Sign
+ (0x0203c, 0x0203c,), # Double Exclamation Mark
+ (0x02049, 0x02049,), # Exclamation Question Mark
+ (0x02122, 0x02122,), # Trade Mark Sign
+ (0x02139, 0x02139,), # Information Source
+ (0x02194, 0x02199,), # Left Right Arrow ..South West Arrow
+ (0x021a9, 0x021aa,), # Leftwards Arrow With Hoo..Rightwards Arrow With Ho
+ (0x02328, 0x02328,), # Keyboard
+ (0x023cf, 0x023cf,), # Eject Symbol
+ (0x023ed, 0x023ef,), # Black Right-pointing Dou..Black Right-pointing Tri
+ (0x023f1, 0x023f2,), # Stopwatch ..Timer Clock
+ (0x023f8, 0x023fa,), # Double Vertical Bar ..Black Circle For Record
+ (0x024c2, 0x024c2,), # Circled Latin Capital Letter M
+ (0x025aa, 0x025ab,), # Black Small Square ..White Small Square
+ (0x025b6, 0x025b6,), # Black Right-pointing Triangle
+ (0x025c0, 0x025c0,), # Black Left-pointing Triangle
+ (0x025fb, 0x025fc,), # White Medium Square ..Black Medium Square
+ (0x02600, 0x02604,), # Black Sun With Rays ..Comet
+ (0x0260e, 0x0260e,), # Black Telephone
+ (0x02611, 0x02611,), # Ballot Box With Check
+ (0x02618, 0x02618,), # Shamrock
+ (0x0261d, 0x0261d,), # White Up Pointing Index
+ (0x02620, 0x02620,), # Skull And Crossbones
+ (0x02622, 0x02623,), # Radioactive Sign ..Biohazard Sign
+ (0x02626, 0x02626,), # Orthodox Cross
+ (0x0262a, 0x0262a,), # Star And Crescent
+ (0x0262e, 0x0262f,), # Peace Symbol ..Yin Yang
+ (0x02638, 0x0263a,), # Wheel Of Dharma ..White Smiling Face
+ (0x02640, 0x02640,), # Female Sign
+ (0x02642, 0x02642,), # Male Sign
+ (0x0265f, 0x02660,), # Black Chess Pawn ..Black Spade Suit
+ (0x02663, 0x02663,), # Black Club Suit
+ (0x02665, 0x02666,), # Black Heart Suit ..Black Diamond Suit
+ (0x02668, 0x02668,), # Hot Springs
+ (0x0267b, 0x0267b,), # Black Universal Recycling Symbol
+ (0x0267e, 0x0267e,), # Permanent Paper Sign
+ (0x02692, 0x02692,), # Hammer And Pick
+ (0x02694, 0x02697,), # Crossed Swords ..Alembic
+ (0x02699, 0x02699,), # Gear
+ (0x0269b, 0x0269c,), # Atom Symbol ..Fleur-de-lis
+ (0x026a0, 0x026a0,), # Warning Sign
+ (0x026a7, 0x026a7,), # Male With Stroke And Male And Female Sign
+ (0x026b0, 0x026b1,), # Coffin ..Funeral Urn
+ (0x026c8, 0x026c8,), # Thunder Cloud And Rain
+ (0x026cf, 0x026cf,), # Pick
+ (0x026d1, 0x026d1,), # Helmet With White Cross
+ (0x026d3, 0x026d3,), # Chains
+ (0x026e9, 0x026e9,), # Shinto Shrine
+ (0x026f0, 0x026f1,), # Mountain ..Umbrella On Ground
+ (0x026f4, 0x026f4,), # Ferry
+ (0x026f7, 0x026f9,), # Skier ..Person With Ball
+ (0x02702, 0x02702,), # Black Scissors
+ (0x02708, 0x02709,), # Airplane ..Envelope
+ (0x0270c, 0x0270d,), # Victory Hand ..Writing Hand
+ (0x0270f, 0x0270f,), # Pencil
+ (0x02712, 0x02712,), # Black Nib
+ (0x02714, 0x02714,), # Heavy Check Mark
+ (0x02716, 0x02716,), # Heavy Multiplication X
+ (0x0271d, 0x0271d,), # Latin Cross
+ (0x02721, 0x02721,), # Star Of David
+ (0x02733, 0x02734,), # Eight Spoked Asterisk ..Eight Pointed Black Star
+ (0x02744, 0x02744,), # Snowflake
+ (0x02747, 0x02747,), # Sparkle
+ (0x02763, 0x02764,), # Heavy Heart Exclamation ..Heavy Black Heart
+ (0x027a1, 0x027a1,), # Black Rightwards Arrow
+ (0x02934, 0x02935,), # Arrow Pointing Rightward..Arrow Pointing Rightward
+ (0x02b05, 0x02b07,), # Leftwards Black Arrow ..Downwards Black Arrow
+ (0x1f170, 0x1f171,), # Negative Squared Latin C..Negative Squared Latin C
+ (0x1f17e, 0x1f17f,), # Negative Squared Latin C..Negative Squared Latin C
+ (0x1f321, 0x1f321,), # Thermometer
+ (0x1f324, 0x1f32c,), # White Sun With Small Clo..Wind Blowing Face
+ (0x1f336, 0x1f336,), # Hot Pepper
+ (0x1f37d, 0x1f37d,), # Fork And Knife With Plate
+ (0x1f396, 0x1f397,), # Military Medal ..Reminder Ribbon
+ (0x1f399, 0x1f39b,), # Studio Microphone ..Control Knobs
+ (0x1f39e, 0x1f39f,), # Film Frames ..Admission Tickets
+ (0x1f3cb, 0x1f3ce,), # Weight Lifter ..Racing Car
+ (0x1f3d4, 0x1f3df,), # Snow Capped Mountain ..Stadium
+ (0x1f3f3, 0x1f3f3,), # Waving White Flag
+ (0x1f3f5, 0x1f3f5,), # Rosette
+ (0x1f3f7, 0x1f3f7,), # Label
+ (0x1f43f, 0x1f43f,), # Chipmunk
+ (0x1f441, 0x1f441,), # Eye
+ (0x1f4fd, 0x1f4fd,), # Film Projector
+ (0x1f549, 0x1f54a,), # Om Symbol ..Dove Of Peace
+ (0x1f56f, 0x1f570,), # Candle ..Mantelpiece Clock
+ (0x1f573, 0x1f579,), # Hole ..Joystick
+ (0x1f587, 0x1f587,), # Linked Paperclips
+ (0x1f58a, 0x1f58d,), # Lower Left Ballpoint Pen..Lower Left Crayon
+ (0x1f590, 0x1f590,), # Raised Hand With Fingers Splayed
+ (0x1f5a5, 0x1f5a5,), # Desktop Computer
+ (0x1f5a8, 0x1f5a8,), # Printer
+ (0x1f5b1, 0x1f5b2,), # Three Button Mouse ..Trackball
+ (0x1f5bc, 0x1f5bc,), # Frame With Picture
+ (0x1f5c2, 0x1f5c4,), # Card Index Dividers ..File Cabinet
+ (0x1f5d1, 0x1f5d3,), # Wastebasket ..Spiral Calendar Pad
+ (0x1f5dc, 0x1f5de,), # Compression ..Rolled-up Newspaper
+ (0x1f5e1, 0x1f5e1,), # Dagger Knife
+ (0x1f5e3, 0x1f5e3,), # Speaking Head In Silhouette
+ (0x1f5e8, 0x1f5e8,), # Left Speech Bubble
+ (0x1f5ef, 0x1f5ef,), # Right Anger Bubble
+ (0x1f5f3, 0x1f5f3,), # Ballot Box With Ballot
+ (0x1f5fa, 0x1f5fa,), # World Map
+ (0x1f6cb, 0x1f6cb,), # Couch And Lamp
+ (0x1f6cd, 0x1f6cf,), # Shopping Bags ..Bed
+ (0x1f6e0, 0x1f6e5,), # Hammer And Wrench ..Motor Boat
+ (0x1f6e9, 0x1f6e9,), # Small Airplane
+ (0x1f6f0, 0x1f6f0,), # Satellite
+ (0x1f6f3, 0x1f6f3,), # Passenger Ship
+ ),
+}
diff --git a/contrib/python/wcwidth/py3/wcwidth/wcwidth.py b/contrib/python/wcwidth/py3/wcwidth/wcwidth.py
index 3ded9d5736..59eb5c0806 100644
--- a/contrib/python/wcwidth/py3/wcwidth/wcwidth.py
+++ b/contrib/python/wcwidth/py3/wcwidth/wcwidth.py
@@ -68,6 +68,7 @@ import sys
import warnings
# local
+from .table_vs16 import VS16_NARROW_TO_WIDE
from .table_wide import WIDE_EASTASIAN
from .table_zero import ZERO_WIDTH
from .unicode_versions import list_versions
@@ -175,20 +176,37 @@ def wcswidth(pwcs, n=None, unicode_version='auto'):
See :ref:`Specification` for details of cell measurement.
"""
# this 'n' argument is a holdover for POSIX function
+ _unicode_version = None
end = len(pwcs) if n is None else n
width = 0
idx = 0
+ last_measured_char = None
while idx < end:
char = pwcs[idx]
if char == u'\u200D':
# Zero Width Joiner, do not measure this or next character
idx += 2
continue
+ if char == u'\uFE0F' and last_measured_char:
+ # on variation selector 16 (VS16) following another character,
+ # conditionally add '1' to the measured width if that character is
+ # known to be converted from narrow to wide by the VS16 character.
+ if _unicode_version is None:
+ _unicode_version = _wcversion_value(_wcmatch_version(unicode_version))
+ if _unicode_version >= (9, 0, 0):
+ width += _bisearch(ord(last_measured_char), VS16_NARROW_TO_WIDE["9.0.0"])
+ last_measured_char = None
+ idx += 1
+ continue
# measure character at current index
wcw = wcwidth(char, unicode_version)
if wcw < 0:
# early return -1 on C0 and C1 control characters
return wcw
+ if wcw > 0:
+ # track last character measured to contain a cell, so that
+ # subsequent VS-16 modifiers may be understood
+ last_measured_char = char
width += wcw
idx += 1
return width
diff --git a/contrib/python/wcwidth/py3/ya.make b/contrib/python/wcwidth/py3/ya.make
index e00e47f4c2..ec1107cd74 100644
--- a/contrib/python/wcwidth/py3/ya.make
+++ b/contrib/python/wcwidth/py3/ya.make
@@ -2,7 +2,7 @@
PY3_LIBRARY()
-VERSION(0.2.9)
+VERSION(0.2.10)
LICENSE(MIT)
@@ -12,6 +12,7 @@ PY_SRCS(
TOP_LEVEL
wcwidth/__init__.py
wcwidth/emoji_zwj_sequences.py
+ wcwidth/table_vs16.py
wcwidth/table_wide.py
wcwidth/table_zero.py
wcwidth/unicode_versions.py