aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/python/wcwidth
diff options
context:
space:
mode:
authorIvan Blinkov <ivan@blinkov.ru>2022-02-10 16:47:10 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:47:10 +0300
commit1aeb9a455974457866f78722ad98114bafc84e8a (patch)
treee4340eaf1668684d83a0a58c36947c5def5350ad /contrib/python/wcwidth
parentbd5ef432f5cfb1e18851381329d94665a4c22470 (diff)
downloadydb-1aeb9a455974457866f78722ad98114bafc84e8a.tar.gz
Restoring authorship annotation for Ivan Blinkov <ivan@blinkov.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/python/wcwidth')
-rw-r--r--contrib/python/wcwidth/LICENSE42
-rw-r--r--contrib/python/wcwidth/wcwidth/__init__.py2
-rw-r--r--contrib/python/wcwidth/wcwidth/wcwidth.py288
-rw-r--r--contrib/python/wcwidth/ya.make22
4 files changed, 177 insertions, 177 deletions
diff --git a/contrib/python/wcwidth/LICENSE b/contrib/python/wcwidth/LICENSE
index a44c075724..4a1bec17f0 100644
--- a/contrib/python/wcwidth/LICENSE
+++ b/contrib/python/wcwidth/LICENSE
@@ -1,24 +1,24 @@
-The MIT License (MIT)
-
-Copyright (c) 2014 Jeff Quast <contact@jeffquast.com>
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+The MIT License (MIT)
+
+Copyright (c) 2014 Jeff Quast <contact@jeffquast.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
Markus Kuhn -- 2007-05-26 (Unicode 5.0)
diff --git a/contrib/python/wcwidth/wcwidth/__init__.py b/contrib/python/wcwidth/wcwidth/__init__.py
index a9008f8235..317236272d 100644
--- a/contrib/python/wcwidth/wcwidth/__init__.py
+++ b/contrib/python/wcwidth/wcwidth/__init__.py
@@ -1,6 +1,6 @@
"""
wcwidth module.
-
+
https://github.com/jquast/wcwidth
"""
# re-export all functions & definitions, even private ones, from top-level
diff --git a/contrib/python/wcwidth/wcwidth/wcwidth.py b/contrib/python/wcwidth/wcwidth/wcwidth.py
index 931bd0b1b3..54a18c5e12 100644
--- a/contrib/python/wcwidth/wcwidth/wcwidth.py
+++ b/contrib/python/wcwidth/wcwidth/wcwidth.py
@@ -1,83 +1,83 @@
-"""
+"""
This is a python implementation of wcwidth() and wcswidth().
-
-https://github.com/jquast/wcwidth
-
+
+https://github.com/jquast/wcwidth
+
from Markus Kuhn's C code, retrieved from:
-
- http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
-
-This is an implementation of wcwidth() and wcswidth() (defined in
-IEEE Std 1002.1-2001) for Unicode.
-
-http://www.opengroup.org/onlinepubs/007904975/functions/wcwidth.html
-http://www.opengroup.org/onlinepubs/007904975/functions/wcswidth.html
-
-In fixed-width output devices, Latin characters all occupy a single
-"cell" position of equal width, whereas ideographic CJK characters
-occupy two such cells. Interoperability between terminal-line
-applications and (teletype-style) character terminals using the
-UTF-8 encoding requires agreement on which character should advance
-the cursor by how many cell positions. No established formal
-standards exist at present on which Unicode character shall occupy
-how many cell positions on character terminals. These routines are
-a first attempt of defining such behavior based on simple rules
-applied to data provided by the Unicode Consortium.
-
-For some graphical characters, the Unicode standard explicitly
-defines a character-cell width via the definition of the East Asian
-FullWidth (F), Wide (W), Half-width (H), and Narrow (Na) classes.
-In all these cases, there is no ambiguity about which width a
-terminal shall use. For characters in the East Asian Ambiguous (A)
-class, the width choice depends purely on a preference of backward
-compatibility with either historic CJK or Western practice.
-Choosing single-width for these characters is easy to justify as
-the appropriate long-term solution, as the CJK practice of
-displaying these characters as double-width comes from historic
-implementation simplicity (8-bit encoded characters were displayed
-single-width and 16-bit ones double-width, even for Greek,
-Cyrillic, etc.) and not any typographic considerations.
-
-Much less clear is the choice of width for the Not East Asian
-(Neutral) class. Existing practice does not dictate a width for any
-of these characters. It would nevertheless make sense
-typographically to allocate two character cells to characters such
-as for instance EM SPACE or VOLUME INTEGRAL, which cannot be
-represented adequately with a single-width glyph. The following
-routines at present merely assign a single-cell width to all
-neutral characters, in the interest of simplicity. This is not
-entirely satisfactory and should be reconsidered before
-establishing a formal standard in this area. At the moment, the
-decision which Not East Asian (Neutral) characters should be
-represented by double-width glyphs cannot yet be answered by
-applying a simple rule from the Unicode database content. Setting
-up a proper standard for the behavior of UTF-8 character terminals
-will require a careful analysis not only of each Unicode character,
-but also of each presentation form, something the author of these
-routines has avoided to do so far.
-
-http://www.unicode.org/unicode/reports/tr11/
-
-Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
-"""
+
+ http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
+
+This is an implementation of wcwidth() and wcswidth() (defined in
+IEEE Std 1002.1-2001) for Unicode.
+
+http://www.opengroup.org/onlinepubs/007904975/functions/wcwidth.html
+http://www.opengroup.org/onlinepubs/007904975/functions/wcswidth.html
+
+In fixed-width output devices, Latin characters all occupy a single
+"cell" position of equal width, whereas ideographic CJK characters
+occupy two such cells. Interoperability between terminal-line
+applications and (teletype-style) character terminals using the
+UTF-8 encoding requires agreement on which character should advance
+the cursor by how many cell positions. No established formal
+standards exist at present on which Unicode character shall occupy
+how many cell positions on character terminals. These routines are
+a first attempt of defining such behavior based on simple rules
+applied to data provided by the Unicode Consortium.
+
+For some graphical characters, the Unicode standard explicitly
+defines a character-cell width via the definition of the East Asian
+FullWidth (F), Wide (W), Half-width (H), and Narrow (Na) classes.
+In all these cases, there is no ambiguity about which width a
+terminal shall use. For characters in the East Asian Ambiguous (A)
+class, the width choice depends purely on a preference of backward
+compatibility with either historic CJK or Western practice.
+Choosing single-width for these characters is easy to justify as
+the appropriate long-term solution, as the CJK practice of
+displaying these characters as double-width comes from historic
+implementation simplicity (8-bit encoded characters were displayed
+single-width and 16-bit ones double-width, even for Greek,
+Cyrillic, etc.) and not any typographic considerations.
+
+Much less clear is the choice of width for the Not East Asian
+(Neutral) class. Existing practice does not dictate a width for any
+of these characters. It would nevertheless make sense
+typographically to allocate two character cells to characters such
+as for instance EM SPACE or VOLUME INTEGRAL, which cannot be
+represented adequately with a single-width glyph. The following
+routines at present merely assign a single-cell width to all
+neutral characters, in the interest of simplicity. This is not
+entirely satisfactory and should be reconsidered before
+establishing a formal standard in this area. At the moment, the
+decision which Not East Asian (Neutral) characters should be
+represented by double-width glyphs cannot yet be answered by
+applying a simple rule from the Unicode database content. Setting
+up a proper standard for the behavior of UTF-8 character terminals
+will require a careful analysis not only of each Unicode character,
+but also of each presentation form, something the author of these
+routines has avoided to do so far.
+
+http://www.unicode.org/unicode/reports/tr11/
+
+Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
+"""
from __future__ import division
-
+
# std imports
import os
import sys
import warnings
# local
-from .table_wide import WIDE_EASTASIAN
-from .table_zero import ZERO_WIDTH
+from .table_wide import WIDE_EASTASIAN
+from .table_zero import ZERO_WIDTH
from .unicode_versions import list_versions
-
+
try:
from functools import lru_cache
except ImportError:
# lru_cache was added in Python 3.2
from backports.functools_lru_cache import lru_cache
-
+
# global cache
_UNICODE_CMPTABLE = None
_PY3 = (sys.version_info[0] >= 3)
@@ -110,42 +110,42 @@ ZERO_WIDTH_CF = set([
def _bisearch(ucs, table):
- """
- Auxiliary function for binary search in interval table.
-
- :arg int ucs: Ordinal value of unicode character.
- :arg list table: List of starting and ending ranges of ordinal values,
- in form of ``[(start, end), ...]``.
- :rtype: int
- :returns: 1 if ordinal value ucs is found within lookup table, else 0.
- """
- lbound = 0
+ """
+ Auxiliary function for binary search in interval table.
+
+ :arg int ucs: Ordinal value of unicode character.
+ :arg list table: List of starting and ending ranges of ordinal values,
+ in form of ``[(start, end), ...]``.
+ :rtype: int
+ :returns: 1 if ordinal value ucs is found within lookup table, else 0.
+ """
+ lbound = 0
ubound = len(table) - 1
-
- if ucs < table[0][0] or ucs > table[ubound][1]:
- return 0
- while ubound >= lbound:
- mid = (lbound + ubound) // 2
- if ucs > table[mid][1]:
- lbound = mid + 1
- elif ucs < table[mid][0]:
- ubound = mid - 1
- else:
- return 1
-
- return 0
-
-
+
+ if ucs < table[0][0] or ucs > table[ubound][1]:
+ return 0
+ while ubound >= lbound:
+ mid = (lbound + ubound) // 2
+ if ucs > table[mid][1]:
+ lbound = mid + 1
+ elif ucs < table[mid][0]:
+ ubound = mid - 1
+ else:
+ return 1
+
+ return 0
+
+
@lru_cache(maxsize=1000)
def wcwidth(wc, unicode_version='auto'):
- r"""
+ r"""
Given one Unicode character, return its printable length on a terminal.
-
+
:param str wc: A single Unicode character.
:param str unicode_version: A Unicode version number, such as
``'6.0.0'``, the list of available version levels may be
listed by pairing function :func:`list_versions`.
-
+
Any version string may be specified without error -- the nearest
matching version is selected. When ``latest`` (default), the
highest Unicode version level is used.
@@ -157,72 +157,72 @@ def wcwidth(wc, unicode_version='auto'):
character occupies on a graphic terminal (1 or 2) is returned.
:rtype: int
- The following have a column width of -1:
-
- - C0 control characters (U+001 through U+01F).
-
- - C1 control characters and DEL (U+07F through U+0A0).
-
- The following have a column width of 0:
-
+ The following have a column width of -1:
+
+ - C0 control characters (U+001 through U+01F).
+
+ - C1 control characters and DEL (U+07F through U+0A0).
+
+ The following have a column width of 0:
+
- Non-spacing and enclosing combining characters (general
category code Mn or Me in the Unicode database).
-
+
- NULL (``U+0000``).
-
+
- COMBINING GRAPHEME JOINER (``U+034F``).
-
+
- ZERO WIDTH SPACE (``U+200B``) *through*
RIGHT-TO-LEFT MARK (``U+200F``).
-
+
- LINE SEPARATOR (``U+2028``) *and*
PARAGRAPH SEPARATOR (``U+2029``).
-
+
- LEFT-TO-RIGHT EMBEDDING (``U+202A``) *through*
RIGHT-TO-LEFT OVERRIDE (``U+202E``).
-
+
- WORD JOINER (``U+2060``) *through*
INVISIBLE SEPARATOR (``U+2063``).
-
- The following have a column width of 1:
-
+
+ The following have a column width of 1:
+
- SOFT HYPHEN (``U+00AD``).
-
+
- All remaining characters, including all printable ISO 8859-1
and WGL4 characters, Unicode control characters, etc.
-
- The following have a column width of 2:
-
- - Spacing characters in the East Asian Wide (W) or East Asian
- Full-width (F) category as defined in Unicode Technical
- Report #11 have a column width of 2.
+
+ The following have a column width of 2:
+
+ - Spacing characters in the East Asian Wide (W) or East Asian
+ Full-width (F) category as defined in Unicode Technical
+ Report #11 have a column width of 2.
- Some kinds of Emoji or symbols.
- """
+ """
# NOTE: created by hand, there isn't anything identifiable other than
# general Cf category code to identify these, and some characters in Cf
# category code are of non-zero width.
- ucs = ord(wc)
+ ucs = ord(wc)
if ucs in ZERO_WIDTH_CF:
- return 0
-
- # C0/C1 control characters
- if ucs < 32 or 0x07F <= ucs < 0x0A0:
- return -1
-
+ return 0
+
+ # C0/C1 control characters
+ if ucs < 32 or 0x07F <= ucs < 0x0A0:
+ return -1
+
_unicode_version = _wcmatch_version(unicode_version)
- # combining characters with zero width
+ # combining characters with zero width
if _bisearch(ucs, ZERO_WIDTH[_unicode_version]):
- return 0
-
+ return 0
+
return 1 + _bisearch(ucs, WIDE_EASTASIAN[_unicode_version])
-
-
+
+
def wcswidth(pwcs, n=None, unicode_version='auto'):
- """
- Given a unicode string, return its printable length on a terminal.
-
+ """
+ Given a unicode string, return its printable length on a terminal.
+
:param str pwcs: Measure width of given unicode string.
:param int n: When ``n`` is None (default), return the length of the
entire string, otherwise width the first ``n`` characters specified.
@@ -234,19 +234,19 @@ def wcswidth(pwcs, n=None, unicode_version='auto'):
:returns: The width, in cells, necessary to display the first ``n``
characters of the unicode string ``pwcs``. Returns ``-1`` if
a non-printable character is encountered.
- """
- # pylint: disable=C0103
- # Invalid argument name "n"
-
- end = len(pwcs) if n is None else n
- idx = slice(0, end)
- width = 0
- for char in pwcs[idx]:
+ """
+ # pylint: disable=C0103
+ # Invalid argument name "n"
+
+ end = len(pwcs) if n is None else n
+ idx = slice(0, end)
+ width = 0
+ for char in pwcs[idx]:
wcw = wcwidth(char, unicode_version)
- if wcw < 0:
- return -1
+ if wcw < 0:
+ return -1
width += wcw
- return width
+ return width
@lru_cache(maxsize=128)
diff --git a/contrib/python/wcwidth/ya.make b/contrib/python/wcwidth/ya.make
index f1aeefaa1c..64ebbbdc2b 100644
--- a/contrib/python/wcwidth/ya.make
+++ b/contrib/python/wcwidth/ya.make
@@ -1,9 +1,9 @@
PY23_LIBRARY()
-
+
LICENSE(MIT)
OWNER(g:python-contrib blinkov)
-
+
VERSION(0.2.5)
PEERDIR(
@@ -16,15 +16,15 @@ IF (PYTHON2)
)
ENDIF()
-PY_SRCS(
- TOP_LEVEL
- wcwidth/__init__.py
- wcwidth/table_wide.py
- wcwidth/table_zero.py
+PY_SRCS(
+ TOP_LEVEL
+ wcwidth/__init__.py
+ wcwidth/table_wide.py
+ wcwidth/table_zero.py
wcwidth/unicode_versions.py
- wcwidth/wcwidth.py
-)
-
+ wcwidth/wcwidth.py
+)
+
RESOURCE_FILES(
PREFIX contrib/python/wcwidth/
.dist-info/METADATA
@@ -33,7 +33,7 @@ RESOURCE_FILES(
NO_LINT()
-END()
+END()
RECURSE_FOR_TESTS(
tests