diff options
author | robot-contrib <robot-contrib@yandex-team.com> | 2023-11-28 10:22:06 +0300 |
---|---|---|
committer | robot-contrib <robot-contrib@yandex-team.com> | 2023-11-28 11:32:21 +0300 |
commit | e47e2d7f764c4433fc3ccb00c5875361175988f7 (patch) | |
tree | 7fbd42979a8996707562949ec1a98eab234a090d /contrib/python/wcwidth/py3 | |
parent | 13de5f3d2da60e60f030c6cde3e389b07a9192d3 (diff) | |
download | ydb-e47e2d7f764c4433fc3ccb00c5875361175988f7.tar.gz |
Update contrib/python/wcwidth/py3 to 0.2.10
Diffstat (limited to 'contrib/python/wcwidth/py3')
-rw-r--r-- | contrib/python/wcwidth/py3/.dist-info/METADATA | 10 | ||||
-rw-r--r-- | contrib/python/wcwidth/py3/README.rst | 8 | ||||
-rw-r--r-- | contrib/python/wcwidth/py3/tests/test_core.py | 103 | ||||
-rw-r--r-- | contrib/python/wcwidth/py3/tests/test_emojis.py | 243 | ||||
-rw-r--r-- | contrib/python/wcwidth/py3/wcwidth/__init__.py | 3 | ||||
-rw-r--r-- | contrib/python/wcwidth/py3/wcwidth/table_vs16.py | 125 | ||||
-rw-r--r-- | contrib/python/wcwidth/py3/wcwidth/wcwidth.py | 18 | ||||
-rw-r--r-- | contrib/python/wcwidth/py3/ya.make | 3 |
8 files changed, 405 insertions, 108 deletions
diff --git a/contrib/python/wcwidth/py3/.dist-info/METADATA b/contrib/python/wcwidth/py3/.dist-info/METADATA index 0a4dd22135..7c34843f75 100644 --- a/contrib/python/wcwidth/py3/.dist-info/METADATA +++ b/contrib/python/wcwidth/py3/.dist-info/METADATA @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: wcwidth -Version: 0.2.9 +Version: 0.2.10 Summary: Measures the displayed width of unicode strings in a terminal Home-page: https://github.com/jquast/wcwidth Author: Jeff Quast @@ -247,10 +247,15 @@ Other Languages ======= History ======= +0.2.10 *2023-11-08* + * **Bugfix** accounting of some kinds of emoji sequences using U+FE0F + Variation Selector 16 (`PR #97`_). + * **Updated** `Specification <Specification_from_pypi_>`_. + 0.2.9 *2023-10-30* * **Bugfix** zero-width characters used in Emoji ZWJ sequences, Balinese, Jamo, Devanagari, Tamil, Kannada and others (`PR #91`_). - * **Updated** to include `Specification <Specification_from_pypi>`_ of + * **Updated** to include `Specification <Specification_from_pypi_>`_ of character measurements. 0.2.8 *2023-09-30* @@ -350,6 +355,7 @@ https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c:: .. _`PR #35`: https://github.com/jquast/wcwidth/pull/35 .. _`PR #82`: https://github.com/jquast/wcwidth/pull/82 .. _`PR #91`: https://github.com/jquast/wcwidth/pull/91 +.. _`PR #97`: https://github.com/jquast/wcwidth/pull/97 .. _`jquast/blessed`: https://github.com/jquast/blessed .. _`selectel/pyte`: https://github.com/selectel/pyte .. _`thomasballinger/curtsies`: https://github.com/thomasballinger/curtsies diff --git a/contrib/python/wcwidth/py3/README.rst b/contrib/python/wcwidth/py3/README.rst index 2d9722d527..749d89aa03 100644 --- a/contrib/python/wcwidth/py3/README.rst +++ b/contrib/python/wcwidth/py3/README.rst @@ -216,10 +216,15 @@ Other Languages ======= History ======= +0.2.10 *2023-11-08* + * **Bugfix** accounting of some kinds of emoji sequences using U+FE0F + Variation Selector 16 (`PR #97`_). + * **Updated** `Specification <Specification_from_pypi_>`_. + 0.2.9 *2023-10-30* * **Bugfix** zero-width characters used in Emoji ZWJ sequences, Balinese, Jamo, Devanagari, Tamil, Kannada and others (`PR #91`_). - * **Updated** to include `Specification <Specification_from_pypi>`_ of + * **Updated** to include `Specification <Specification_from_pypi_>`_ of character measurements. 0.2.8 *2023-09-30* @@ -319,6 +324,7 @@ https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c:: .. _`PR #35`: https://github.com/jquast/wcwidth/pull/35 .. _`PR #82`: https://github.com/jquast/wcwidth/pull/82 .. _`PR #91`: https://github.com/jquast/wcwidth/pull/91 +.. _`PR #97`: https://github.com/jquast/wcwidth/pull/97 .. _`jquast/blessed`: https://github.com/jquast/blessed .. _`selectel/pyte`: https://github.com/selectel/pyte .. _`thomasballinger/curtsies`: https://github.com/thomasballinger/curtsies diff --git a/contrib/python/wcwidth/py3/tests/test_core.py b/contrib/python/wcwidth/py3/tests/test_core.py index f0396ea968..d2776cd992 100644 --- a/contrib/python/wcwidth/py3/tests/test_core.py +++ b/contrib/python/wcwidth/py3/tests/test_core.py @@ -10,23 +10,12 @@ except ImportError: # local import wcwidth -# 3rd party -import pytest - -# some tests cannot be done on some builds of python, where the internal -# unicode structure is limited to 0x10000 for memory conservation, -# "ValueError: unichr() arg not in range(0x10000) (narrow Python build)" try: # python 2 _ = unichr except NameError: # python 3 unichr = chr -try: - unichr(0x2fffe) - NARROW_ONLY = False -except ValueError: - NARROW_ONLY = True def test_package_version(): @@ -254,98 +243,6 @@ def test_kr_jamo_filler(): assert length_phrase == expect_length_phrase -@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") -def emoji_zwj_sequence(): - u""" - Emoji zwj sequence of four codepoints is just 2 cells. - """ - phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN - u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 - u"\u200d" # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER - u"\U0001f4bb") # Fused, Category So, East Asian Width peroperty 'W' -- PERSONAL COMPUTER - # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf - expect_length_each = (2, 0, 0, 2) - expect_length_phrase = 2 - - # exercise, - length_each = tuple(map(wcwidth.wcwidth, phrase)) - length_phrase = wcwidth.wcswidth(phrase) - - # verify. - assert length_each == expect_length_each - assert length_phrase == expect_length_phrase - - -@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") -def test_unfinished_zwj_sequence(): - u""" - Ensure index-out-of-bounds does not occur for zero-width joiner without any following character - """ - phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN - u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 - u"\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER - expect_length_each = (2, 0, 0) - expect_length_phrase = 2 - - # exercise, - length_each = tuple(map(wcwidth.wcwidth, phrase)) - length_phrase = wcwidth.wcswidth(phrase) - - # verify. - assert length_each == expect_length_each - assert length_phrase == expect_length_phrase - - -@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") -def test_non_recommended_zwj_sequence(): - """ - Verify ZWJ is measured as though successful with characters that cannot be joined, wcwidth does not verify - """ - phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN - u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 - u"\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER - expect_length_each = (2, 0, 0) - expect_length_phrase = 2 - - # exercise, - length_each = tuple(map(wcwidth.wcwidth, phrase)) - length_phrase = wcwidth.wcswidth(phrase) - - # verify. - assert length_each == expect_length_each - assert length_phrase == expect_length_phrase - - -@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") -def test_longer_emoji_zwj_sequence(): - """ - A much longer emoji ZWJ sequence of 10 total codepoints is just 2 cells! - """ - # 'Category Code', 'East Asian Width property' -- 'description' - phrase = (u"\U0001F9D1" # 'So', 'W' -- ADULT - u"\U0001F3FB" # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 - u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER - u"\u2764" # 'So', 'N' -- HEAVY BLACK HEART - u"\uFE0F" # 'Mn', 'A' -- VARIATION SELECTOR-16 - u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER - u"\U0001F48B" # 'So', 'W' -- KISS MARK - u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER - u"\U0001F9D1" # 'So', 'W' -- ADULT - u"\U0001F3FD") # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-4 - - # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf - expect_length_each = (2, 0, 0, 1, 0, 0, 2, 0, 2, 0) - expect_length_phrase = 2 - - # exercise, - length_each = tuple(map(wcwidth.wcwidth, phrase)) - length_phrase = wcwidth.wcswidth(phrase) - - # verify. - assert length_each == expect_length_each - assert length_phrase == expect_length_phrase - - def test_devanagari_script(): """ Attempt to test the measurement width of Devanagari script. diff --git a/contrib/python/wcwidth/py3/tests/test_emojis.py b/contrib/python/wcwidth/py3/tests/test_emojis.py new file mode 100644 index 0000000000..4f88e2330e --- /dev/null +++ b/contrib/python/wcwidth/py3/tests/test_emojis.py @@ -0,0 +1,243 @@ +# std imports +import os +import codecs + +# 3rd party +import pytest + +try: + # python 2 + _ = unichr +except NameError: + # python 3 + unichr = chr + +# some tests cannot be done on some builds of python, where the internal +# unicode structure is limited to 0x10000 for memory conservation, +# "ValueError: unichr() arg not in range(0x10000) (narrow Python build)" +try: + unichr(0x2fffe) + NARROW_ONLY = False +except ValueError: + NARROW_ONLY = True + +# local +import wcwidth + + +def make_sequence_from_line(line): + # convert '002A FE0F ; ..' -> (0x2a, 0xfe0f) -> chr(0x2a) + chr(0xfe0f) + return ''.join(unichr(int(cp, 16)) for cp in line.split(';', 1)[0].strip().split()) + + +@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") +def emoji_zwj_sequence(): + u""" + Emoji zwj sequence of four codepoints is just 2 cells. + """ + phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN + u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 + u"\u200d" # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER + u"\U0001f4bb") # Fused, Category So, East Asian Width peroperty 'W' -- PERSONAL COMPUTER + # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf + expect_length_each = (2, 0, 0, 2) + expect_length_phrase = 2 + + # exercise, + length_each = tuple(map(wcwidth.wcwidth, phrase)) + length_phrase = wcwidth.wcswidth(phrase) + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + + +@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") +def test_unfinished_zwj_sequence(): + u""" + Ensure index-out-of-bounds does not occur for zero-width joiner without any following character + """ + phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN + u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 + u"\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER + expect_length_each = (2, 0, 0) + expect_length_phrase = 2 + + # exercise, + length_each = tuple(map(wcwidth.wcwidth, phrase)) + length_phrase = wcwidth.wcswidth(phrase) + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + + +@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") +def test_non_recommended_zwj_sequence(): + """ + Verify ZWJ is measured as though successful with characters that cannot be joined, wcwidth does not verify + """ + phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN + u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 + u"\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER + expect_length_each = (2, 0, 0) + expect_length_phrase = 2 + + # exercise, + length_each = tuple(map(wcwidth.wcwidth, phrase)) + length_phrase = wcwidth.wcswidth(phrase) + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + + +@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") +def test_another_emoji_zwj_sequence(): + phrase = ( + u"\u26F9" # PERSON WITH BALL + u"\U0001F3FB" # EMOJI MODIFIER FITZPATRICK TYPE-1-2 + u"\u200D" # ZERO WIDTH JOINER + u"\u2640" # FEMALE SIGN + u"\uFE0F") # VARIATION SELECTOR-16 + expect_length_each = (1, 0, 0, 1, 0) + expect_length_phrase = 2 + + # exercise, + length_each = tuple(map(wcwidth.wcwidth, phrase)) + length_phrase = wcwidth.wcswidth(phrase) + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + + +@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") +def test_longer_emoji_zwj_sequence(): + """ + A much longer emoji ZWJ sequence of 10 total codepoints is just 2 cells! + + Also test the same sequence in duplicate, verifying multiple VS-16 sequences + in a single function call. + """ + # 'Category Code', 'East Asian Width property' -- 'description' + phrase = (u"\U0001F9D1" # 'So', 'W' -- ADULT + u"\U0001F3FB" # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 + u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER + u"\u2764" # 'So', 'N' -- HEAVY BLACK HEART + u"\uFE0F" # 'Mn', 'A' -- VARIATION SELECTOR-16 + u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER + u"\U0001F48B" # 'So', 'W' -- KISS MARK + u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER + u"\U0001F9D1" # 'So', 'W' -- ADULT + u"\U0001F3FD" # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-4 + ) * 2 + # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf + expect_length_each = (2, 0, 0, 1, 0, 0, 2, 0, 2, 0) * 2 + expect_length_phrase = 4 + + # exercise, + length_each = tuple(map(wcwidth.wcwidth, phrase)) + length_phrase = wcwidth.wcswidth(phrase) + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + + +def read_sequences_from_file(filename): + fp = codecs.open(os.path.join(os.path.dirname(__file__), filename), 'r', encoding='utf-8') + lines = [line.strip() + for line in fp.readlines() + if not line.startswith('#') and line.strip()] + fp.close() + sequences = [make_sequence_from_line(line) for line in lines] + return lines, sequences + + +@pytest.mark.skipif(NARROW_ONLY, reason="Some sequences in text file are not compatible with 'narrow' builds") +def test_recommended_emoji_zwj_sequences(): + """ + Test wcswidth of all of the unicode.org-published emoji-zwj-sequences.txt + """ + # given, + lines, sequences = read_sequences_from_file('emoji-zwj-sequences.txt') + + errors = [] + # Exercise, track by zipping with original text file line, a debugging aide + num = 0 + for sequence, line in zip(sequences, lines): + num += 1 + measured_width = wcwidth.wcswidth(sequence) + if measured_width != 2: + errors.append({ + 'expected_width': 2, + 'line': line, + 'measured_width': measured_width, + 'sequence': sequence, + }) + + # verify + assert errors == [] + assert num >= 1468 + + +def test_recommended_variation_16_sequences(): + """ + Test wcswidth of all of the unicode.org-published emoji-variation-sequences.txt + """ + # given, + lines, sequences = read_sequences_from_file('emoji-variation-sequences.txt') + + errors = [] + num = 0 + for sequence, line in zip(sequences, lines): + num += 1 + if '\ufe0f' not in sequence: + # filter for only \uFE0F (VS-16) + continue + measured_width = wcwidth.wcswidth(sequence) + if measured_width != 2: + errors.append({ + 'expected_width': 2, + 'line': line, + 'measured_width': wcwidth.wcswidth(sequence), + 'sequence': sequence, + }) + + # verify + assert errors == [] + assert num >= 742 + + +def test_unicode_9_vs16(): + """Verify effect of VS-16 on unicode_version 9.0 and later""" + phrase = (u"\u2640" # FEMALE SIGN + u"\uFE0F") # VARIATION SELECTOR-16 + + expect_length_each = (1, 0) + expect_length_phrase = 2 + + # exercise, + length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='9.0') for w_char in phrase) + length_phrase = wcwidth.wcswidth(phrase, unicode_version='9.0') + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + +def test_unicode_8_vs16(): + """Verify that VS-16 has no effect on unicode_version 8.0 and earler""" + phrase = (u"\u2640" # FEMALE SIGN + u"\uFE0F") # VARIATION SELECTOR-16 + + expect_length_each = (1, 0) + expect_length_phrase = 1 + + # exercise, + length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='8.0') for w_char in phrase) + length_phrase = wcwidth.wcswidth(phrase, unicode_version='8.0') + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase
\ No newline at end of file diff --git a/contrib/python/wcwidth/py3/wcwidth/__init__.py b/contrib/python/wcwidth/py3/wcwidth/__init__.py index 212e72d033..91e18dbe90 100644 --- a/contrib/python/wcwidth/py3/wcwidth/__init__.py +++ b/contrib/python/wcwidth/py3/wcwidth/__init__.py @@ -11,6 +11,7 @@ https://github.com/jquast/wcwidth # local from .wcwidth import ZERO_WIDTH # noqa from .wcwidth import (WIDE_EASTASIAN, + VS16_NARROW_TO_WIDE, wcwidth, wcswidth, _bisearch, @@ -25,4 +26,4 @@ __all__ = ('wcwidth', 'wcswidth', 'list_versions') # We also used pkg_resources to load unicode version tables from version.json, # generated by bin/update-tables.py, but some environments are unable to # import pkg_resources for one reason or another, yikes! -__version__ = '0.2.9' +__version__ = '0.2.10' diff --git a/contrib/python/wcwidth/py3/wcwidth/table_vs16.py b/contrib/python/wcwidth/py3/wcwidth/table_vs16.py new file mode 100644 index 0000000000..3249262d98 --- /dev/null +++ b/contrib/python/wcwidth/py3/wcwidth/table_vs16.py @@ -0,0 +1,125 @@ +""" +Exports VS16_NARROW_TO_WIDE table keyed by supporting unicode version level. + +This code generated by wcwidth/bin/update-tables.py on 2023-11-07 16:43:49 UTC. +""" +VS16_NARROW_TO_WIDE = { + '9.0.0': ( + # Source: 9.0.0 + # Date: 2023-02-01, 02:22:54 GMT + # + (0x00023, 0x00023,), # Number Sign + (0x0002a, 0x0002a,), # Asterisk + (0x00030, 0x00039,), # Digit Zero ..Digit Nine + (0x000a9, 0x000a9,), # Copyright Sign + (0x000ae, 0x000ae,), # Registered Sign + (0x0203c, 0x0203c,), # Double Exclamation Mark + (0x02049, 0x02049,), # Exclamation Question Mark + (0x02122, 0x02122,), # Trade Mark Sign + (0x02139, 0x02139,), # Information Source + (0x02194, 0x02199,), # Left Right Arrow ..South West Arrow + (0x021a9, 0x021aa,), # Leftwards Arrow With Hoo..Rightwards Arrow With Ho + (0x02328, 0x02328,), # Keyboard + (0x023cf, 0x023cf,), # Eject Symbol + (0x023ed, 0x023ef,), # Black Right-pointing Dou..Black Right-pointing Tri + (0x023f1, 0x023f2,), # Stopwatch ..Timer Clock + (0x023f8, 0x023fa,), # Double Vertical Bar ..Black Circle For Record + (0x024c2, 0x024c2,), # Circled Latin Capital Letter M + (0x025aa, 0x025ab,), # Black Small Square ..White Small Square + (0x025b6, 0x025b6,), # Black Right-pointing Triangle + (0x025c0, 0x025c0,), # Black Left-pointing Triangle + (0x025fb, 0x025fc,), # White Medium Square ..Black Medium Square + (0x02600, 0x02604,), # Black Sun With Rays ..Comet + (0x0260e, 0x0260e,), # Black Telephone + (0x02611, 0x02611,), # Ballot Box With Check + (0x02618, 0x02618,), # Shamrock + (0x0261d, 0x0261d,), # White Up Pointing Index + (0x02620, 0x02620,), # Skull And Crossbones + (0x02622, 0x02623,), # Radioactive Sign ..Biohazard Sign + (0x02626, 0x02626,), # Orthodox Cross + (0x0262a, 0x0262a,), # Star And Crescent + (0x0262e, 0x0262f,), # Peace Symbol ..Yin Yang + (0x02638, 0x0263a,), # Wheel Of Dharma ..White Smiling Face + (0x02640, 0x02640,), # Female Sign + (0x02642, 0x02642,), # Male Sign + (0x0265f, 0x02660,), # Black Chess Pawn ..Black Spade Suit + (0x02663, 0x02663,), # Black Club Suit + (0x02665, 0x02666,), # Black Heart Suit ..Black Diamond Suit + (0x02668, 0x02668,), # Hot Springs + (0x0267b, 0x0267b,), # Black Universal Recycling Symbol + (0x0267e, 0x0267e,), # Permanent Paper Sign + (0x02692, 0x02692,), # Hammer And Pick + (0x02694, 0x02697,), # Crossed Swords ..Alembic + (0x02699, 0x02699,), # Gear + (0x0269b, 0x0269c,), # Atom Symbol ..Fleur-de-lis + (0x026a0, 0x026a0,), # Warning Sign + (0x026a7, 0x026a7,), # Male With Stroke And Male And Female Sign + (0x026b0, 0x026b1,), # Coffin ..Funeral Urn + (0x026c8, 0x026c8,), # Thunder Cloud And Rain + (0x026cf, 0x026cf,), # Pick + (0x026d1, 0x026d1,), # Helmet With White Cross + (0x026d3, 0x026d3,), # Chains + (0x026e9, 0x026e9,), # Shinto Shrine + (0x026f0, 0x026f1,), # Mountain ..Umbrella On Ground + (0x026f4, 0x026f4,), # Ferry + (0x026f7, 0x026f9,), # Skier ..Person With Ball + (0x02702, 0x02702,), # Black Scissors + (0x02708, 0x02709,), # Airplane ..Envelope + (0x0270c, 0x0270d,), # Victory Hand ..Writing Hand + (0x0270f, 0x0270f,), # Pencil + (0x02712, 0x02712,), # Black Nib + (0x02714, 0x02714,), # Heavy Check Mark + (0x02716, 0x02716,), # Heavy Multiplication X + (0x0271d, 0x0271d,), # Latin Cross + (0x02721, 0x02721,), # Star Of David + (0x02733, 0x02734,), # Eight Spoked Asterisk ..Eight Pointed Black Star + (0x02744, 0x02744,), # Snowflake + (0x02747, 0x02747,), # Sparkle + (0x02763, 0x02764,), # Heavy Heart Exclamation ..Heavy Black Heart + (0x027a1, 0x027a1,), # Black Rightwards Arrow + (0x02934, 0x02935,), # Arrow Pointing Rightward..Arrow Pointing Rightward + (0x02b05, 0x02b07,), # Leftwards Black Arrow ..Downwards Black Arrow + (0x1f170, 0x1f171,), # Negative Squared Latin C..Negative Squared Latin C + (0x1f17e, 0x1f17f,), # Negative Squared Latin C..Negative Squared Latin C + (0x1f321, 0x1f321,), # Thermometer + (0x1f324, 0x1f32c,), # White Sun With Small Clo..Wind Blowing Face + (0x1f336, 0x1f336,), # Hot Pepper + (0x1f37d, 0x1f37d,), # Fork And Knife With Plate + (0x1f396, 0x1f397,), # Military Medal ..Reminder Ribbon + (0x1f399, 0x1f39b,), # Studio Microphone ..Control Knobs + (0x1f39e, 0x1f39f,), # Film Frames ..Admission Tickets + (0x1f3cb, 0x1f3ce,), # Weight Lifter ..Racing Car + (0x1f3d4, 0x1f3df,), # Snow Capped Mountain ..Stadium + (0x1f3f3, 0x1f3f3,), # Waving White Flag + (0x1f3f5, 0x1f3f5,), # Rosette + (0x1f3f7, 0x1f3f7,), # Label + (0x1f43f, 0x1f43f,), # Chipmunk + (0x1f441, 0x1f441,), # Eye + (0x1f4fd, 0x1f4fd,), # Film Projector + (0x1f549, 0x1f54a,), # Om Symbol ..Dove Of Peace + (0x1f56f, 0x1f570,), # Candle ..Mantelpiece Clock + (0x1f573, 0x1f579,), # Hole ..Joystick + (0x1f587, 0x1f587,), # Linked Paperclips + (0x1f58a, 0x1f58d,), # Lower Left Ballpoint Pen..Lower Left Crayon + (0x1f590, 0x1f590,), # Raised Hand With Fingers Splayed + (0x1f5a5, 0x1f5a5,), # Desktop Computer + (0x1f5a8, 0x1f5a8,), # Printer + (0x1f5b1, 0x1f5b2,), # Three Button Mouse ..Trackball + (0x1f5bc, 0x1f5bc,), # Frame With Picture + (0x1f5c2, 0x1f5c4,), # Card Index Dividers ..File Cabinet + (0x1f5d1, 0x1f5d3,), # Wastebasket ..Spiral Calendar Pad + (0x1f5dc, 0x1f5de,), # Compression ..Rolled-up Newspaper + (0x1f5e1, 0x1f5e1,), # Dagger Knife + (0x1f5e3, 0x1f5e3,), # Speaking Head In Silhouette + (0x1f5e8, 0x1f5e8,), # Left Speech Bubble + (0x1f5ef, 0x1f5ef,), # Right Anger Bubble + (0x1f5f3, 0x1f5f3,), # Ballot Box With Ballot + (0x1f5fa, 0x1f5fa,), # World Map + (0x1f6cb, 0x1f6cb,), # Couch And Lamp + (0x1f6cd, 0x1f6cf,), # Shopping Bags ..Bed + (0x1f6e0, 0x1f6e5,), # Hammer And Wrench ..Motor Boat + (0x1f6e9, 0x1f6e9,), # Small Airplane + (0x1f6f0, 0x1f6f0,), # Satellite + (0x1f6f3, 0x1f6f3,), # Passenger Ship + ), +} diff --git a/contrib/python/wcwidth/py3/wcwidth/wcwidth.py b/contrib/python/wcwidth/py3/wcwidth/wcwidth.py index 3ded9d5736..59eb5c0806 100644 --- a/contrib/python/wcwidth/py3/wcwidth/wcwidth.py +++ b/contrib/python/wcwidth/py3/wcwidth/wcwidth.py @@ -68,6 +68,7 @@ import sys import warnings # local +from .table_vs16 import VS16_NARROW_TO_WIDE from .table_wide import WIDE_EASTASIAN from .table_zero import ZERO_WIDTH from .unicode_versions import list_versions @@ -175,20 +176,37 @@ def wcswidth(pwcs, n=None, unicode_version='auto'): See :ref:`Specification` for details of cell measurement. """ # this 'n' argument is a holdover for POSIX function + _unicode_version = None end = len(pwcs) if n is None else n width = 0 idx = 0 + last_measured_char = None while idx < end: char = pwcs[idx] if char == u'\u200D': # Zero Width Joiner, do not measure this or next character idx += 2 continue + if char == u'\uFE0F' and last_measured_char: + # on variation selector 16 (VS16) following another character, + # conditionally add '1' to the measured width if that character is + # known to be converted from narrow to wide by the VS16 character. + if _unicode_version is None: + _unicode_version = _wcversion_value(_wcmatch_version(unicode_version)) + if _unicode_version >= (9, 0, 0): + width += _bisearch(ord(last_measured_char), VS16_NARROW_TO_WIDE["9.0.0"]) + last_measured_char = None + idx += 1 + continue # measure character at current index wcw = wcwidth(char, unicode_version) if wcw < 0: # early return -1 on C0 and C1 control characters return wcw + if wcw > 0: + # track last character measured to contain a cell, so that + # subsequent VS-16 modifiers may be understood + last_measured_char = char width += wcw idx += 1 return width diff --git a/contrib/python/wcwidth/py3/ya.make b/contrib/python/wcwidth/py3/ya.make index e00e47f4c2..ec1107cd74 100644 --- a/contrib/python/wcwidth/py3/ya.make +++ b/contrib/python/wcwidth/py3/ya.make @@ -2,7 +2,7 @@ PY3_LIBRARY() -VERSION(0.2.9) +VERSION(0.2.10) LICENSE(MIT) @@ -12,6 +12,7 @@ PY_SRCS( TOP_LEVEL wcwidth/__init__.py wcwidth/emoji_zwj_sequences.py + wcwidth/table_vs16.py wcwidth/table_wide.py wcwidth/table_zero.py wcwidth/unicode_versions.py |