diff options
author | Ivan Blinkov <ivan@blinkov.ru> | 2022-02-10 16:47:10 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:10 +0300 |
commit | 1aeb9a455974457866f78722ad98114bafc84e8a (patch) | |
tree | e4340eaf1668684d83a0a58c36947c5def5350ad /contrib/python/wcwidth | |
parent | bd5ef432f5cfb1e18851381329d94665a4c22470 (diff) | |
download | ydb-1aeb9a455974457866f78722ad98114bafc84e8a.tar.gz |
Restoring authorship annotation for Ivan Blinkov <ivan@blinkov.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/python/wcwidth')
-rw-r--r-- | contrib/python/wcwidth/LICENSE | 42 | ||||
-rw-r--r-- | contrib/python/wcwidth/wcwidth/__init__.py | 2 | ||||
-rw-r--r-- | contrib/python/wcwidth/wcwidth/wcwidth.py | 288 | ||||
-rw-r--r-- | contrib/python/wcwidth/ya.make | 22 |
4 files changed, 177 insertions, 177 deletions
diff --git a/contrib/python/wcwidth/LICENSE b/contrib/python/wcwidth/LICENSE index a44c075724..4a1bec17f0 100644 --- a/contrib/python/wcwidth/LICENSE +++ b/contrib/python/wcwidth/LICENSE @@ -1,24 +1,24 @@ -The MIT License (MIT) - -Copyright (c) 2014 Jeff Quast <contact@jeffquast.com> - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +The MIT License (MIT) + +Copyright (c) 2014 Jeff Quast <contact@jeffquast.com> + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. Markus Kuhn -- 2007-05-26 (Unicode 5.0) diff --git a/contrib/python/wcwidth/wcwidth/__init__.py b/contrib/python/wcwidth/wcwidth/__init__.py index a9008f8235..317236272d 100644 --- a/contrib/python/wcwidth/wcwidth/__init__.py +++ b/contrib/python/wcwidth/wcwidth/__init__.py @@ -1,6 +1,6 @@ """ wcwidth module. - + https://github.com/jquast/wcwidth """ # re-export all functions & definitions, even private ones, from top-level diff --git a/contrib/python/wcwidth/wcwidth/wcwidth.py b/contrib/python/wcwidth/wcwidth/wcwidth.py index 931bd0b1b3..54a18c5e12 100644 --- a/contrib/python/wcwidth/wcwidth/wcwidth.py +++ b/contrib/python/wcwidth/wcwidth/wcwidth.py @@ -1,83 +1,83 @@ -""" +""" This is a python implementation of wcwidth() and wcswidth(). - -https://github.com/jquast/wcwidth - + +https://github.com/jquast/wcwidth + from Markus Kuhn's C code, retrieved from: - - http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c - -This is an implementation of wcwidth() and wcswidth() (defined in -IEEE Std 1002.1-2001) for Unicode. - -http://www.opengroup.org/onlinepubs/007904975/functions/wcwidth.html -http://www.opengroup.org/onlinepubs/007904975/functions/wcswidth.html - -In fixed-width output devices, Latin characters all occupy a single -"cell" position of equal width, whereas ideographic CJK characters -occupy two such cells. Interoperability between terminal-line -applications and (teletype-style) character terminals using the -UTF-8 encoding requires agreement on which character should advance -the cursor by how many cell positions. No established formal -standards exist at present on which Unicode character shall occupy -how many cell positions on character terminals. These routines are -a first attempt of defining such behavior based on simple rules -applied to data provided by the Unicode Consortium. - -For some graphical characters, the Unicode standard explicitly -defines a character-cell width via the definition of the East Asian -FullWidth (F), Wide (W), Half-width (H), and Narrow (Na) classes. -In all these cases, there is no ambiguity about which width a -terminal shall use. For characters in the East Asian Ambiguous (A) -class, the width choice depends purely on a preference of backward -compatibility with either historic CJK or Western practice. -Choosing single-width for these characters is easy to justify as -the appropriate long-term solution, as the CJK practice of -displaying these characters as double-width comes from historic -implementation simplicity (8-bit encoded characters were displayed -single-width and 16-bit ones double-width, even for Greek, -Cyrillic, etc.) and not any typographic considerations. - -Much less clear is the choice of width for the Not East Asian -(Neutral) class. Existing practice does not dictate a width for any -of these characters. It would nevertheless make sense -typographically to allocate two character cells to characters such -as for instance EM SPACE or VOLUME INTEGRAL, which cannot be -represented adequately with a single-width glyph. The following -routines at present merely assign a single-cell width to all -neutral characters, in the interest of simplicity. This is not -entirely satisfactory and should be reconsidered before -establishing a formal standard in this area. At the moment, the -decision which Not East Asian (Neutral) characters should be -represented by double-width glyphs cannot yet be answered by -applying a simple rule from the Unicode database content. Setting -up a proper standard for the behavior of UTF-8 character terminals -will require a careful analysis not only of each Unicode character, -but also of each presentation form, something the author of these -routines has avoided to do so far. - -http://www.unicode.org/unicode/reports/tr11/ - -Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c -""" + + http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c + +This is an implementation of wcwidth() and wcswidth() (defined in +IEEE Std 1002.1-2001) for Unicode. + +http://www.opengroup.org/onlinepubs/007904975/functions/wcwidth.html +http://www.opengroup.org/onlinepubs/007904975/functions/wcswidth.html + +In fixed-width output devices, Latin characters all occupy a single +"cell" position of equal width, whereas ideographic CJK characters +occupy two such cells. Interoperability between terminal-line +applications and (teletype-style) character terminals using the +UTF-8 encoding requires agreement on which character should advance +the cursor by how many cell positions. No established formal +standards exist at present on which Unicode character shall occupy +how many cell positions on character terminals. These routines are +a first attempt of defining such behavior based on simple rules +applied to data provided by the Unicode Consortium. + +For some graphical characters, the Unicode standard explicitly +defines a character-cell width via the definition of the East Asian +FullWidth (F), Wide (W), Half-width (H), and Narrow (Na) classes. +In all these cases, there is no ambiguity about which width a +terminal shall use. For characters in the East Asian Ambiguous (A) +class, the width choice depends purely on a preference of backward +compatibility with either historic CJK or Western practice. +Choosing single-width for these characters is easy to justify as +the appropriate long-term solution, as the CJK practice of +displaying these characters as double-width comes from historic +implementation simplicity (8-bit encoded characters were displayed +single-width and 16-bit ones double-width, even for Greek, +Cyrillic, etc.) and not any typographic considerations. + +Much less clear is the choice of width for the Not East Asian +(Neutral) class. Existing practice does not dictate a width for any +of these characters. It would nevertheless make sense +typographically to allocate two character cells to characters such +as for instance EM SPACE or VOLUME INTEGRAL, which cannot be +represented adequately with a single-width glyph. The following +routines at present merely assign a single-cell width to all +neutral characters, in the interest of simplicity. This is not +entirely satisfactory and should be reconsidered before +establishing a formal standard in this area. At the moment, the +decision which Not East Asian (Neutral) characters should be +represented by double-width glyphs cannot yet be answered by +applying a simple rule from the Unicode database content. Setting +up a proper standard for the behavior of UTF-8 character terminals +will require a careful analysis not only of each Unicode character, +but also of each presentation form, something the author of these +routines has avoided to do so far. + +http://www.unicode.org/unicode/reports/tr11/ + +Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c +""" from __future__ import division - + # std imports import os import sys import warnings # local -from .table_wide import WIDE_EASTASIAN -from .table_zero import ZERO_WIDTH +from .table_wide import WIDE_EASTASIAN +from .table_zero import ZERO_WIDTH from .unicode_versions import list_versions - + try: from functools import lru_cache except ImportError: # lru_cache was added in Python 3.2 from backports.functools_lru_cache import lru_cache - + # global cache _UNICODE_CMPTABLE = None _PY3 = (sys.version_info[0] >= 3) @@ -110,42 +110,42 @@ ZERO_WIDTH_CF = set([ def _bisearch(ucs, table): - """ - Auxiliary function for binary search in interval table. - - :arg int ucs: Ordinal value of unicode character. - :arg list table: List of starting and ending ranges of ordinal values, - in form of ``[(start, end), ...]``. - :rtype: int - :returns: 1 if ordinal value ucs is found within lookup table, else 0. - """ - lbound = 0 + """ + Auxiliary function for binary search in interval table. + + :arg int ucs: Ordinal value of unicode character. + :arg list table: List of starting and ending ranges of ordinal values, + in form of ``[(start, end), ...]``. + :rtype: int + :returns: 1 if ordinal value ucs is found within lookup table, else 0. + """ + lbound = 0 ubound = len(table) - 1 - - if ucs < table[0][0] or ucs > table[ubound][1]: - return 0 - while ubound >= lbound: - mid = (lbound + ubound) // 2 - if ucs > table[mid][1]: - lbound = mid + 1 - elif ucs < table[mid][0]: - ubound = mid - 1 - else: - return 1 - - return 0 - - + + if ucs < table[0][0] or ucs > table[ubound][1]: + return 0 + while ubound >= lbound: + mid = (lbound + ubound) // 2 + if ucs > table[mid][1]: + lbound = mid + 1 + elif ucs < table[mid][0]: + ubound = mid - 1 + else: + return 1 + + return 0 + + @lru_cache(maxsize=1000) def wcwidth(wc, unicode_version='auto'): - r""" + r""" Given one Unicode character, return its printable length on a terminal. - + :param str wc: A single Unicode character. :param str unicode_version: A Unicode version number, such as ``'6.0.0'``, the list of available version levels may be listed by pairing function :func:`list_versions`. - + Any version string may be specified without error -- the nearest matching version is selected. When ``latest`` (default), the highest Unicode version level is used. @@ -157,72 +157,72 @@ def wcwidth(wc, unicode_version='auto'): character occupies on a graphic terminal (1 or 2) is returned. :rtype: int - The following have a column width of -1: - - - C0 control characters (U+001 through U+01F). - - - C1 control characters and DEL (U+07F through U+0A0). - - The following have a column width of 0: - + The following have a column width of -1: + + - C0 control characters (U+001 through U+01F). + + - C1 control characters and DEL (U+07F through U+0A0). + + The following have a column width of 0: + - Non-spacing and enclosing combining characters (general category code Mn or Me in the Unicode database). - + - NULL (``U+0000``). - + - COMBINING GRAPHEME JOINER (``U+034F``). - + - ZERO WIDTH SPACE (``U+200B``) *through* RIGHT-TO-LEFT MARK (``U+200F``). - + - LINE SEPARATOR (``U+2028``) *and* PARAGRAPH SEPARATOR (``U+2029``). - + - LEFT-TO-RIGHT EMBEDDING (``U+202A``) *through* RIGHT-TO-LEFT OVERRIDE (``U+202E``). - + - WORD JOINER (``U+2060``) *through* INVISIBLE SEPARATOR (``U+2063``). - - The following have a column width of 1: - + + The following have a column width of 1: + - SOFT HYPHEN (``U+00AD``). - + - All remaining characters, including all printable ISO 8859-1 and WGL4 characters, Unicode control characters, etc. - - The following have a column width of 2: - - - Spacing characters in the East Asian Wide (W) or East Asian - Full-width (F) category as defined in Unicode Technical - Report #11 have a column width of 2. + + The following have a column width of 2: + + - Spacing characters in the East Asian Wide (W) or East Asian + Full-width (F) category as defined in Unicode Technical + Report #11 have a column width of 2. - Some kinds of Emoji or symbols. - """ + """ # NOTE: created by hand, there isn't anything identifiable other than # general Cf category code to identify these, and some characters in Cf # category code are of non-zero width. - ucs = ord(wc) + ucs = ord(wc) if ucs in ZERO_WIDTH_CF: - return 0 - - # C0/C1 control characters - if ucs < 32 or 0x07F <= ucs < 0x0A0: - return -1 - + return 0 + + # C0/C1 control characters + if ucs < 32 or 0x07F <= ucs < 0x0A0: + return -1 + _unicode_version = _wcmatch_version(unicode_version) - # combining characters with zero width + # combining characters with zero width if _bisearch(ucs, ZERO_WIDTH[_unicode_version]): - return 0 - + return 0 + return 1 + _bisearch(ucs, WIDE_EASTASIAN[_unicode_version]) - - + + def wcswidth(pwcs, n=None, unicode_version='auto'): - """ - Given a unicode string, return its printable length on a terminal. - + """ + Given a unicode string, return its printable length on a terminal. + :param str pwcs: Measure width of given unicode string. :param int n: When ``n`` is None (default), return the length of the entire string, otherwise width the first ``n`` characters specified. @@ -234,19 +234,19 @@ def wcswidth(pwcs, n=None, unicode_version='auto'): :returns: The width, in cells, necessary to display the first ``n`` characters of the unicode string ``pwcs``. Returns ``-1`` if a non-printable character is encountered. - """ - # pylint: disable=C0103 - # Invalid argument name "n" - - end = len(pwcs) if n is None else n - idx = slice(0, end) - width = 0 - for char in pwcs[idx]: + """ + # pylint: disable=C0103 + # Invalid argument name "n" + + end = len(pwcs) if n is None else n + idx = slice(0, end) + width = 0 + for char in pwcs[idx]: wcw = wcwidth(char, unicode_version) - if wcw < 0: - return -1 + if wcw < 0: + return -1 width += wcw - return width + return width @lru_cache(maxsize=128) diff --git a/contrib/python/wcwidth/ya.make b/contrib/python/wcwidth/ya.make index f1aeefaa1c..64ebbbdc2b 100644 --- a/contrib/python/wcwidth/ya.make +++ b/contrib/python/wcwidth/ya.make @@ -1,9 +1,9 @@ PY23_LIBRARY() - + LICENSE(MIT) OWNER(g:python-contrib blinkov) - + VERSION(0.2.5) PEERDIR( @@ -16,15 +16,15 @@ IF (PYTHON2) ) ENDIF() -PY_SRCS( - TOP_LEVEL - wcwidth/__init__.py - wcwidth/table_wide.py - wcwidth/table_zero.py +PY_SRCS( + TOP_LEVEL + wcwidth/__init__.py + wcwidth/table_wide.py + wcwidth/table_zero.py wcwidth/unicode_versions.py - wcwidth/wcwidth.py -) - + wcwidth/wcwidth.py +) + RESOURCE_FILES( PREFIX contrib/python/wcwidth/ .dist-info/METADATA @@ -33,7 +33,7 @@ RESOURCE_FILES( NO_LINT() -END() +END() RECURSE_FOR_TESTS( tests |