diff options
| author | robot-piglet <[email protected]> | 2026-02-07 12:14:07 +0300 |
|---|---|---|
| committer | robot-piglet <[email protected]> | 2026-02-07 12:40:00 +0300 |
| commit | 0e21c41b9d3a3a49c95448b2eea3fdf2b8e87d01 (patch) | |
| tree | 41c06cabe5246cfb625ab8c56782fed748c7ad35 /contrib/python/wcwidth/py3 | |
| parent | 52112738aa5a3063a86695c6f99e6dd21350488c (diff) | |
Intermediate changes
commit_hash:3418900cda4febf1105de037f8c1ce1f8325c5aa
Diffstat (limited to 'contrib/python/wcwidth/py3')
27 files changed, 6300 insertions, 923 deletions
diff --git a/contrib/python/wcwidth/py3/.dist-info/METADATA b/contrib/python/wcwidth/py3/.dist-info/METADATA index 95adb5715ec..d147ef939e4 100644 --- a/contrib/python/wcwidth/py3/.dist-info/METADATA +++ b/contrib/python/wcwidth/py3/.dist-info/METADATA @@ -1,20 +1,18 @@ Metadata-Version: 2.4 Name: wcwidth -Version: 0.2.14 +Version: 0.3.0 Summary: Measures the displayed width of unicode strings in a terminal -Home-page: https://github.com/jquast/wcwidth -Author: Jeff Quast -Author-email: [email protected] -License: MIT +Project-URL: Homepage, https://github.com/jquast/wcwidth +Author-email: Jeff Quast <[email protected]> +License-Expression: MIT +License-File: LICENSE Keywords: cjk,combining,console,eastasian,emoji,emulator,terminal,unicode,wcswidth,wcwidth,xterm -Classifier: Intended Audience :: Developers -Classifier: Natural Language :: English Classifier: Development Status :: 5 - Production/Stable Classifier: Environment :: Console -Classifier: License :: OSI Approved :: MIT License +Classifier: Intended Audience :: Developers +Classifier: Natural Language :: English Classifier: Operating System :: POSIX -Classifier: Programming Language :: Python :: 3.6 -Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3 :: Only Classifier: Programming Language :: Python :: 3.8 Classifier: Programming Language :: Python :: 3.9 Classifier: Programming Language :: Python :: 3.10 @@ -22,22 +20,12 @@ Classifier: Programming Language :: Python :: 3.11 Classifier: Programming Language :: Python :: 3.12 Classifier: Programming Language :: Python :: 3.13 Classifier: Programming Language :: Python :: 3.14 +Classifier: Topic :: Software Development :: Internationalization Classifier: Topic :: Software Development :: Libraries Classifier: Topic :: Software Development :: Localization -Classifier: Topic :: Software Development :: Internationalization Classifier: Topic :: Terminals -Requires-Python: >=3.6 -License-File: LICENSE -Dynamic: author -Dynamic: author-email -Dynamic: classifier -Dynamic: description -Dynamic: home-page -Dynamic: keywords -Dynamic: license -Dynamic: license-file -Dynamic: requires-python -Dynamic: summary +Requires-Python: >=3.8 +Description-Content-Type: text/x-rst |pypi_downloads| |codecov| |license| @@ -45,98 +33,313 @@ Dynamic: summary Introduction ============ -This library is mainly for CLI programs that carefully produce output for -Terminals, or make pretend to be an emulator. - -**Problem Statement**: The printable length of *most* strings are equal to the -number of cells they occupy on the screen ``1 character : 1 cell``. However, -there are categories of characters that *occupy 2 cells* (full-wide), and -others that *occupy 0* cells (zero-width). - -**Solution**: POSIX.1-2001 and POSIX.1-2008 conforming systems provide -`wcwidth(3)`_ and `wcswidth(3)`_ C functions of which this python module's -functions precisely copy. *These functions return the number of cells a -unicode string is expected to occupy.* +This library is mainly for CLI/TUI programs that carefully produce output for Terminals. Installation ------------ -The stable version of this package is maintained on pypi, install using pip:: +The stable version of this package is maintained on pypi, install or upgrade, using pip:: - pip install wcwidth + pip install --upgrade wcwidth -Example +Problem ------- -**Problem**: given the following phrase (Japanese), +All Python string-formatting functions, `textwrap.wrap()`_, `str.ljust()`_, `str.rjust()`_, and +`str.center()`_ **incorrectly** measure the displayed width of a string as equal to the number of +their codepoints. - >>> text = u'コンニチハ' +Some examples of **incorrect results**: -Python **incorrectly** uses the *string length* of 5 codepoints rather than the -*printable length* of 10 cells, so that when using the `rjust` function, the -output length is wrong:: +.. code-block:: python - >>> print(len('コンニチハ')) - 5 + >>> # result consumes 16 total cells, 11 expected, + >>> 'コンニチハ'.rjust(11, 'X') + 'XXXXXXコンニチハ' - >>> print('コンニチハ'.rjust(20, '_')) - _______________コンニチハ + >>> # result consumes 5 total cells, 6 expected, + >>> 'café'.center(6, 'X') + 'caféX' -By defining our own "rjust" function that uses wcwidth, we can correct this:: +Solution +-------- - >>> def wc_rjust(text, length, padding=' '): - ... from wcwidth import wcswidth - ... return padding * max(0, (length - wcswidth(text))) + text - ... +The lowest-level functions in this library are the POSIX.1-2001 and POSIX.1-2008 `wcwidth(3)`_ and +`wcswidth(3)`_, which this library precisely copies by interface as `wcwidth()`_ and `wcswidth()`_. +These functions return -1 when C0 and C1 control codes are present. -Our **Solution** uses wcswidth to determine the string length correctly:: +An easy-to-use `width()`_ function is provided as a wrapper of `wcswidth()`_ that is also capable of +measuring most terminal control codes and sequences, like colors, bold, tabstops, and horizontal +cursor movement. - >>> from wcwidth import wcswidth - >>> print(wcswidth('コンニチハ')) - 10 +Text-justification is solved by the grapheme and sequence-aware functions `ljust()`_, +`rjust()`_, `center()`_, and `wrap()`_, serving as drop-in replacements to python standard functions +of the same names. - >>> print(wc_rjust('コンニチハ', 20, '_')) - __________コンニチハ +The iterator functions `iter_graphemes()`_ and `iter_sequences()`_ allow for careful navigation of +grapheme and terminal control sequence boundaries. The `clip()`_ function extracts substrings by +display column positions, and `strip_sequences()`_ removes terminal escape sequences from text. +Discrepancies +------------- -Choosing a Version ------------------- +You may find that support *varies* for complex unicode sequences or codepoints. -Export an environment variable, ``UNICODE_VERSION``. This should be done by -*terminal emulators* or those developers experimenting with authoring one of -their own, from shell:: +A companion utility, `jquast/ucs-detect`_ was authored to gather and publish the results of Wide +character support and version level, language support, zero-width joiner, and variation-16 support +as a `General Tabulated Summary`_ by terminal emulator software and version. - $ export UNICODE_VERSION=13.0 +======== +Overview +======== -If unspecified, the latest version is used. If your Terminal Emulator does not -export this variable, you can use the `jquast/ucs-detect`_ utility to -automatically detect and export it to your shell. +wcwidth() +--------- -wcwidth, wcswidth ------------------ Use function ``wcwidth()`` to determine the length of a *single unicode -character*, and ``wcswidth()`` to determine the length of many, a *string -of unicode characters*. +codepoint*. + +A brief overview, through examples, for all of the public API functions. + +Full API Documentation at https://wcwidth.readthedocs.io/en/latest/api.html + +wcwidth() +--------- + +Measures width of a single codepoint, + +.. code-block:: python + + >>> # '♀' narrow emoji + >>> wcwidth.wcwidth('\u2640') + 1 + +Use function `wcwidth()`_ to determine the length of a *single unicode character*. + +See `Specification <Specification_from_pypi_>`_ of character measurements. Note that ``-1`` is +returned for control codes. + +wcswidth() +---------- -Briefly, return values of function ``wcwidth()`` are: +Measures width of a string, returns -1 for control codes. -``-1`` - Indeterminate (not printable). +.. code-block:: python -``0`` - Does not advance the cursor, such as NULL or Combining. + >>> # '♀️' emoji w/vs-16 + >>> wcwidth.wcswidth('♀️') + 2 -``2`` - Characters of category East Asian Wide (W) or East Asian - Full-width (F) which are displayed using two terminal cells. +Use function `wcswidth()`_ to determine the length of many, a *string of unicode characters*. + +See `Specification <Specification_from_pypi_>`_ of character measurements. Note that +``-1`` is returned if control codes occurs anywhere in the string. + +width() +------- + +Measures width of a string, with improved handling of ``control_codes`` + +.. code-block:: python + + >>> # same support as wcswidth(), eg. regional indicator flag: + >>> wcwidth.width('\U0001F1FF\U0001F1FC') + 2 + >>> # but also supports SGR colored text, 'WARN', followed by SGR reset + >>> wcwidth.width('\x1b[38;2;255;150;100mWARN\x1b[0m') + 4 + >>> # tabs, + >>> wcwidth.width('\t', tabsize=4) + 4 + >>> # or, tab and all other control characters can be ignored + >>> wcwidth.width('\t', control_codes='ignore') + 0 + >>> # "vertical" control characters are ignored + >>> wcwidth.width('\n') + 0 + >>> # as well as sequences with "indeterminate" effects like Home + Clear + >>> wcwidth.width('\x1b[H\x1b[2J') + 0 + >>> # or, raise ValueError for "indeterminate" effects using control_codes='strict' + >>> wcwidth.width('\n', control_codes='strict') + Traceback (most recent call last): + ... + ValueError: Vertical movement character 0xa at position 0 + +Use ``control_codes='ignore'`` when the input is known not to contain any control characters or +terminal sequences for slightly improved performance. Note that TAB (``'\t'``) is a control +character and is also ignored, you may want to use `str.expandtabs()`_, first. + +iter_sequences() +---------------- + +Iterates through text, segmented by terminal sequence, + +.. code-block:: python + + >>> list(wcwidth.iter_sequences('hello')) + [('hello', False)] + >>> list(wcwidth.iter_sequences('\x1b[31mred\x1b[0m')) + [('\x1b[31m', True), ('red', False), ('\x1b[0m', True)] + +Use `iter_sequences()`_ to split text into segments of plain text and escape sequences. Each tuple +contains the segment string and a boolean indicating whether it is an escape sequence (``True``) or +text (``False``). + +iter_graphemes() +---------------- + +Use `iter_graphemes()`_ to iterate over *grapheme clusters* of a string. + +.. code-block:: python + + >>> from wcwidth import iter_graphemes + >>> # ok + Regional Indicator 'Z', 'W' (Zimbabwe) + >>> list(wcwidth.iter_graphemes('ok\U0001F1FF\U0001F1FC')) + ['o', 'k', '🇿🇼'] + + >>> # cafe + combining acute accent + >>> list(wcwidth.iter_graphemes('cafe\u0301')) + ['c', 'a', 'f', 'é'] + + >>> # ok + Emoji Man + ZWJ + Woman + ZWJ + Girl + >>> list(wcwidth.iter_graphemes('ok\U0001F468\u200D\U0001F469\u200D\U0001F467')) + ['o', 'k', '👨\u200d👩\u200d👧'] + +A grapheme cluster is what a user perceives as a single character, even if it is composed of +multiple Unicode codepoints. This function implements `Unicode Standard Annex #29`_ grapheme cluster +boundary rules. + +ljust() +------- -``1`` - All others. +Use `ljust()`_ as replacement of `str.ljust()`_: -Function ``wcswidth()`` simply returns the sum of all values for each character -along a string, or ``-1`` when it occurs anywhere along a string. +.. code-block:: python -Full API Documentation at https://wcwidth.readthedocs.io + >>> 'コンニチハ'.ljust(11, '*') # don't do this + 'コンニチハ******' + >>> wcwidth.ljust('コンニチハ', 11, '*') # do this! + 'コンニチハ*' + +rjust() +------- + +Use `rjust()`_ as replacement of `str.rjust()`_: + +.. code-block:: python + + >>> 'コンニチハ'.rjust(11, '*') # don't do this + '******コンニチハ' + >>> wcwidth.rjust('コンニチハ', 11, '*') # do this! + '*コンニチハ' + +center() +-------- + +Use `center()`_ as replacement of `str.center()`_: + +.. code-block:: python + + >>> 'cafe\u0301'.center(6, '*') # don't do this + 'café*' + >>> wcwidth.center('cafe\u0301', 6, '*') + '*café*' # do this! + +wrap() +------ + +Use function ``wrap()`` to wrap text containing terminal sequences, Unicode grapheme +clusters, and wide characters to a given display width. + +.. code-block:: python + + >>> from wcwidth import wrap + >>> # Basic wrapping + >>> wrap('hello world', 5) + ['hello', 'world'] + + >>> # Wrapping CJK text (each character is 2 cells wide) + >>> wrap('コンニチハ', 4) + ['コン', 'ニチ', 'ハ'] + + >>> # Text with ANSI color sequences + >>> wrap('\x1b[31mhello world\x1b[0m', 5) + ['\x1b[31mhello', 'world\x1b[0m'] + +clip() +------ + +Use `clip()`_ to extract a substring by column positions, preserving terminal sequences. + +.. code-block:: python + + >>> from wcwidth import clip + >>> # Wide characters split to Narrow boundaries using fillchar=' ' + >>> clip('中文字', 0, 3) + '中 ' + >>> clip('中文字', 1, 5, fillchar='.') + '.文.' + + >>> # *ALL* Terminal sequences are preserved + >>> clip('\x1b[31m中文\x1b[0m', 0, 3) + '\x1b[31m中 \x1b[0m' + +strip_sequences() +----------------- + +Use `strip_sequences()`_ to remove all terminal escape sequences from text. + +.. code-block:: python + + >>> from wcwidth import strip_sequences + >>> strip_sequences('\x1b[31mred\x1b[0m') + 'red' + +.. _ambiguous_width: + +ambiguous_width +--------------- + +Some Unicode characters have "East Asian Ambiguous" (A) width. These characters display as 1 cell by +default, matching Western terminal contexts, but many CJK (Chinese, Japanese, Korean) environments +may have a preference for 2 cells. This is often found as boolean option, "Ambiguous width as wide" +in Terminal Emulator software preferences. + +By default, wcwidth treats ambiguous characters as narrow (width 1). For CJK environments where your +terminal is configured to display ambiguous characters as double-width, pass ``ambiguous_width=2``: + +.. code-block:: python + + >>> # CIRCLED DIGIT ONE - ambiguous width + >>> wcwidth.width('\u2460') + 1 + >>> wcwidth.width('\u2460', ambiguous_width=2) + 2 + +The ``ambiguous_width`` parameter is available on all width-measuring functions: `wcwidth()`_, +`wcswidth()`_, `width()`_, `ljust()`_, `rjust()`_, `center()`_, `wrap()`_, and `clip()`_. + +**Terminal Detection** + +The most reliable method to detect whether a terminal profile is set for "Ambiguous width as wide" +mode is to display an ambiguous character surrounded by a pair of Cursor Position Report (CPR) +queries with a terminal in cooked or raw mode, and to parse the responses for their ``(y, x)`` +locations, and measure the difference of the ``x`` positions. This code should also check whether +it is attached to a terminal and timeout, and then fallback to the preferred locale. + +`jquast/blessed`_ library provides a `Terminal.detect_ambiguous_width()`_ method: + +.. code-block:: python + + >>> import blessed, functools + >>> # Detect terminal ambiguous width as wide (2) or narrow (1) + >>> ambiguous_width = blessed.Terminal().detect_ambiguous_width() + >>> # Define a new 'width' function with this argument + >>> awidth = functools.partial(wcwidth.width, ambiguous_width=ambiguous_width) + >>> # result depends on attached terminal mode + >>> awidth('\u2460') + 1 ========== Developing @@ -146,9 +349,13 @@ Install wcwidth in editable mode:: pip install -e . -Execute unit tests using tox_ for all supported Python versions:: +Execute all code generation, autoformatters, linters and unit tests using tox:: - tox -e py36,py37,py38,py39,py310,py311,py312,py313,py314 + tox + +Or execute individual tasks, see ``tox -lv`` for all available targets:: + + tox -e pylint,py36,py314 Updating Unicode Version ------------------------ @@ -182,7 +389,7 @@ To upgrade requirements for updating unicode version, run:: To upgrade requirements for testing, run:: - tox -e update_requirements37,update_requirements39 + tox -e update_requirements38,update_requirements39 To upgrade requirements for building documentation, run:: @@ -238,10 +445,15 @@ This library is used in: - `saulpw/visidata`_: Terminal spreadsheet multitool for discovering and arranging data +- `jquast/ucs-detect`_: Utility for unicode support detection. + =============== Other Languages =============== +There are similar implementations of the `wcwidth()`_ and `wcswidth()`_ functions in other +languages. + - `timoxley/wcwidth`_: JavaScript - `janlelis/unicode-display_width`_: Ruby - `alecrabbit/php-wcwidth`_: PHP @@ -253,11 +465,25 @@ Other Languages - `joachimschmidt557/zig-wcwidth`_: Zig - `fumiyas/wcwidth-cjk`_: `LD_PRELOAD` override - `joshuarubin/wcwidth9`_: Unicode version 9 in C +- `spectreconsole/wcwidth`_: C# ======= History ======= +0.3.0 *2026-01-21* + * **New** Function `iter_graphemes()`_. `PR #165`_. + * **New** Functions `width()`_ and `iter_sequences()`_. `PR #166`_. + * **New** Functions `ljust()`_, `rjust()`_, `center()`_. `PR #168`_. + * **New** Function `wrap()`_. `PR #169`_. + * **Performance** improvement in `wcswidth()`_. `PR #171`_. + * **New** argument ``ambiguous_width`` to all functions. `PR #172`_. + * **New** Functions `clip()`_ and `strip_sequences()`_. `PR #173`_. + * **Bugfix** Characters with ``Default_Ignorable_Code_Point`` property now + return width 0. `PR #174`_. + * **Bugfix** Characters with ``Prepended_Concatenation_Mark`` property now + return width 1. `PR #175`_. + 0.2.14 *2025-09-22* * **Drop Support** for Python 2.7 and 3.5. `PR #117`_. * **Update** tables to include Unicode Specifications 16.0.0 and 17.0.0. @@ -269,10 +495,10 @@ History * **Bugfix** zero-width support for Hangul Jamo (Korean) 0.2.12 *2023-11-21* - * re-release to remove .pyi file misplaced in wheel files `Issue #101`_. + * **Bugfix** Re-release to remove `.pyi` files misplaced in wheel `Issue #101`_. 0.2.11 *2023-11-20* - * Include tests files in the source distribution (`PR #98`_, `PR #100`_). + * **Updated** Include tests files in the source distribution (`PR #98`_, `PR #100`_). 0.2.10 *2023-11-13* * **Bugfix** accounting of some kinds of emoji sequences using U+FE0F @@ -388,6 +614,15 @@ https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c:: .. _`PR #117`: https://github.com/jquast/wcwidth/pull/117 .. _`PR #146`: https://github.com/jquast/wcwidth/pull/146 .. _`PR #149`: https://github.com/jquast/wcwidth/pull/149 +.. _`PR #165`: https://github.com/jquast/wcwidth/pull/165 +.. _`PR #166`: https://github.com/jquast/wcwidth/pull/166 +.. _`PR #168`: https://github.com/jquast/wcwidth/pull/168 +.. _`PR #169`: https://github.com/jquast/wcwidth/pull/169 +.. _`PR #171`: https://github.com/jquast/wcwidth/pull/171 +.. _`PR #172`: https://github.com/jquast/wcwidth/pull/172 +.. _`PR #173`: https://github.com/jquast/wcwidth/pull/173 +.. _`PR #174`: https://github.com/jquast/wcwidth/pull/174 +.. _`PR #175`: https://github.com/jquast/wcwidth/pull/175 .. _`Issue #101`: https://github.com/jquast/wcwidth/issues/101 .. _`jquast/blessed`: https://github.com/jquast/blessed .. _`selectel/pyte`: https://github.com/selectel/pyte @@ -413,12 +648,32 @@ https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c:: .. _`joachimschmidt557/zig-wcwidth`: https://github.com/joachimschmidt557/zig-wcwidth .. _`fumiyas/wcwidth-cjk`: https://github.com/fumiyas/wcwidth-cjk .. _`joshuarubin/wcwidth9`: https://github.com/joshuarubin/wcwidth9 +.. _`spectreconsole/wcwidth`: https://github.com/spectreconsole/wcwidth .. _`python-cmd2/cmd2`: https://github.com/python-cmd2/cmd2 .. _`stratis-storage/stratis-cli`: https://github.com/stratis-storage/stratis-cli .. _`ihabunek/toot`: https://github.com/ihabunek/toot .. _`saulpw/visidata`: https://github.com/saulpw/visidata .. _`pip-tools`: https://pip-tools.readthedocs.io/ .. _`sphinx`: https://www.sphinx-doc.org/ +.. _`textwrap.wrap()`: https://docs.python.org/3/library/textwrap.html#textwrap.wrap +.. _`str.ljust()`: https://docs.python.org/3/library/stdtypes.html#str.ljust +.. _`str.rjust()`: https://docs.python.org/3/library/stdtypes.html#str.rjust +.. _`str.center()`: https://docs.python.org/3/library/stdtypes.html#str.center +.. _`str.expandtabs()`: https://docs.python.org/3/library/stdtypes.html#str.expandtabs +.. _`General Tabulated Summary`: https://ucs-detect.readthedocs.io/results.html#tabulated-results +.. _`wcwidth()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.wcwidth +.. _`wcswidth()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.wcswidth +.. _`width()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.width +.. _`iter_graphemes()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.iter_graphemes +.. _`ljust()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.ljust +.. _`rjust()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.rjust +.. _`center()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.center +.. _`wrap()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.wrap +.. _`clip()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.clip +.. _`strip_sequences()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.strip_sequences +.. _`iter_sequences()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.iter_sequences +.. _`Unicode Standard Annex #29`: https://www.unicode.org/reports/tr29/ +.. _`Terminal.detect_ambiguous_width()`: https://blessed.readthedocs.io/en/latest/api/terminal.html#blessed.terminal.Terminal.detect_ambiguous_width .. |pypi_downloads| image:: https://img.shields.io/pypi/dm/wcwidth.svg?logo=pypi :alt: Downloads :target: https://pypi.org/project/wcwidth/ diff --git a/contrib/python/wcwidth/py3/README.rst b/contrib/python/wcwidth/py3/README.rst deleted file mode 100644 index 1b92ef4ca03..00000000000 --- a/contrib/python/wcwidth/py3/README.rst +++ /dev/null @@ -1,389 +0,0 @@ -|pypi_downloads| |codecov| |license| - -============ -Introduction -============ - -This library is mainly for CLI programs that carefully produce output for -Terminals, or make pretend to be an emulator. - -**Problem Statement**: The printable length of *most* strings are equal to the -number of cells they occupy on the screen ``1 character : 1 cell``. However, -there are categories of characters that *occupy 2 cells* (full-wide), and -others that *occupy 0* cells (zero-width). - -**Solution**: POSIX.1-2001 and POSIX.1-2008 conforming systems provide -`wcwidth(3)`_ and `wcswidth(3)`_ C functions of which this python module's -functions precisely copy. *These functions return the number of cells a -unicode string is expected to occupy.* - -Installation ------------- - -The stable version of this package is maintained on pypi, install using pip:: - - pip install wcwidth - -Example -------- - -**Problem**: given the following phrase (Japanese), - - >>> text = u'コンニチハ' - -Python **incorrectly** uses the *string length* of 5 codepoints rather than the -*printable length* of 10 cells, so that when using the `rjust` function, the -output length is wrong:: - - >>> print(len('コンニチハ')) - 5 - - >>> print('コンニチハ'.rjust(20, '_')) - _______________コンニチハ - -By defining our own "rjust" function that uses wcwidth, we can correct this:: - - >>> def wc_rjust(text, length, padding=' '): - ... from wcwidth import wcswidth - ... return padding * max(0, (length - wcswidth(text))) + text - ... - -Our **Solution** uses wcswidth to determine the string length correctly:: - - >>> from wcwidth import wcswidth - >>> print(wcswidth('コンニチハ')) - 10 - - >>> print(wc_rjust('コンニチハ', 20, '_')) - __________コンニチハ - - -Choosing a Version ------------------- - -Export an environment variable, ``UNICODE_VERSION``. This should be done by -*terminal emulators* or those developers experimenting with authoring one of -their own, from shell:: - - $ export UNICODE_VERSION=13.0 - -If unspecified, the latest version is used. If your Terminal Emulator does not -export this variable, you can use the `jquast/ucs-detect`_ utility to -automatically detect and export it to your shell. - -wcwidth, wcswidth ------------------ -Use function ``wcwidth()`` to determine the length of a *single unicode -character*, and ``wcswidth()`` to determine the length of many, a *string -of unicode characters*. - -Briefly, return values of function ``wcwidth()`` are: - -``-1`` - Indeterminate (not printable). - -``0`` - Does not advance the cursor, such as NULL or Combining. - -``2`` - Characters of category East Asian Wide (W) or East Asian - Full-width (F) which are displayed using two terminal cells. - -``1`` - All others. - -Function ``wcswidth()`` simply returns the sum of all values for each character -along a string, or ``-1`` when it occurs anywhere along a string. - -Full API Documentation at https://wcwidth.readthedocs.io - -========== -Developing -========== - -Install wcwidth in editable mode:: - - pip install -e . - -Execute unit tests using tox_ for all supported Python versions:: - - tox -e py36,py37,py38,py39,py310,py311,py312,py313,py314 - -Updating Unicode Version ------------------------- - -Regenerate python code tables from latest Unicode Specification data files:: - - tox -e update - -The script is located at ``bin/update-tables.py``, requires Python 3.9 or -later. It is recommended but not necessary to run this script with the newest -Python, because the newest Python has the latest ``unicodedata`` for generating -comments. - -Building Documentation ----------------------- - -This project is using `sphinx`_ 4.5 to build documentation:: - - tox -e sphinx - -The output will be in ``docs/_build/html/``. - -Updating Requirements ---------------------- - -This project is using `pip-tools`_ to manage requirements. - -To upgrade requirements for updating unicode version, run:: - - tox -e update_requirements_update - -To upgrade requirements for testing, run:: - - tox -e update_requirements37,update_requirements39 - -To upgrade requirements for building documentation, run:: - - tox -e update_requirements_docs - -Utilities ---------- - -Supplementary tools for browsing and testing terminals for wide unicode -characters are found in the `bin/`_ of this project's source code. Just ensure -to first ``pip install -r requirements-develop.txt`` from this projects main -folder. For example, an interactive browser for testing:: - - python ./bin/wcwidth-browser.py - -==== -Uses -==== - -This library is used in: - -- `jquast/blessed`_: a thin, practical wrapper around terminal capabilities in - Python. - -- `prompt-toolkit/python-prompt-toolkit`_: a Library for building powerful - interactive command lines in Python. - -- `dbcli/pgcli`_: Postgres CLI with autocompletion and syntax highlighting. - -- `thomasballinger/curtsies`_: a Curses-like terminal wrapper with a display - based on compositing 2d arrays of text. - -- `selectel/pyte`_: Simple VTXXX-compatible linux terminal emulator. - -- `astanin/python-tabulate`_: Pretty-print tabular data in Python, a library - and a command-line utility. - -- `rspeer/python-ftfy`_: Fixes mojibake and other glitches in Unicode - text. - -- `nbedos/termtosvg`_: Terminal recorder that renders sessions as SVG - animations. - -- `peterbrittain/asciimatics`_: Package to help people create full-screen text - UIs. - -- `python-cmd2/cmd2`_: A tool for building interactive command line apps - -- `stratis-storage/stratis-cli`_: CLI for the Stratis project - -- `ihabunek/toot`_: A Mastodon CLI/TUI client - -- `saulpw/visidata`_: Terminal spreadsheet multitool for discovering and - arranging data - -=============== -Other Languages -=============== - -- `timoxley/wcwidth`_: JavaScript -- `janlelis/unicode-display_width`_: Ruby -- `alecrabbit/php-wcwidth`_: PHP -- `Text::CharWidth`_: Perl -- `bluebear94/Terminal-WCWidth`_: Perl 6 -- `mattn/go-runewidth`_: Go -- `grepsuzette/wcwidth`_: Haxe -- `aperezdc/lua-wcwidth`_: Lua -- `joachimschmidt557/zig-wcwidth`_: Zig -- `fumiyas/wcwidth-cjk`_: `LD_PRELOAD` override -- `joshuarubin/wcwidth9`_: Unicode version 9 in C - -======= -History -======= - -0.2.14 *2025-09-22* - * **Drop Support** for Python 2.7 and 3.5. `PR #117`_. - * **Update** tables to include Unicode Specifications 16.0.0 and 17.0.0. - `PR #146`_. - * **Bugfix** U+00AD SOFT HYPHEN should measure as 1, versions 0.2.9 through - 0.2.13 measured as 0. `PR #149`_. - -0.2.13 *2024-01-06* - * **Bugfix** zero-width support for Hangul Jamo (Korean) - -0.2.12 *2023-11-21* - * re-release to remove .pyi file misplaced in wheel files `Issue #101`_. - -0.2.11 *2023-11-20* - * Include tests files in the source distribution (`PR #98`_, `PR #100`_). - -0.2.10 *2023-11-13* - * **Bugfix** accounting of some kinds of emoji sequences using U+FE0F - Variation Selector 16 (`PR #97`_). - * **Updated** `Specification <Specification_from_pypi_>`_. - -0.2.9 *2023-10-30* - * **Bugfix** zero-width characters used in Emoji ZWJ sequences, Balinese, - Jamo, Devanagari, Tamil, Kannada and others (`PR #91`_). - * **Updated** to include `Specification <Specification_from_pypi_>`_ of - character measurements. - -0.2.8 *2023-09-30* - * Include requirements files in the source distribution (`PR #82`_). - -0.2.7 *2023-09-28* - * **Updated** tables to include Unicode Specification 15.1.0. - * Include ``bin``, ``docs``, and ``tox.ini`` in the source distribution - -0.2.6 *2023-01-14* - * **Updated** tables to include Unicode Specification 14.0.0 and 15.0.0. - * **Changed** developer tools to use pip-compile, and to use jinja2 templates - for code generation in `bin/update-tables.py` to prepare for possible - compiler optimization release. - -0.2.1 .. 0.2.5 *2020-06-23* - * **Repository** changes to update tests and packaging issues, and - begin tagging repository with matching release versions. - -0.2.0 *2020-06-01* - * **Enhancement**: Unicode version may be selected by exporting the - Environment variable ``UNICODE_VERSION``, such as ``13.0``, or ``6.3.0``. - See the `jquast/ucs-detect`_ CLI utility for automatic detection. - * **Enhancement**: - API Documentation is published to readthedocs.io. - * **Updated** tables for *all* Unicode Specifications with files - published in a programmatically consumable format, versions 4.1.0 - through 13.0 - -0.1.9 *2020-03-22* - * **Performance** optimization by `Avram Lubkin`_, `PR #35`_. - * **Updated** tables to Unicode Specification 13.0.0. - -0.1.8 *2020-01-01* - * **Updated** tables to Unicode Specification 12.0.0. (`PR #30`_). - -0.1.7 *2016-07-01* - * **Updated** tables to Unicode Specification 9.0.0. (`PR #18`_). - -0.1.6 *2016-01-08 Production/Stable* - * ``LICENSE`` file now included with distribution. - -0.1.5 *2015-09-13 Alpha* - * **Bugfix**: - Resolution of "combining_ character width" issue, most especially - those that previously returned -1 now often (correctly) return 0. - resolved by `Philip Craig`_ via `PR #11`_. - * **Deprecated**: - The module path ``wcwidth.table_comb`` is no longer available, - it has been superseded by module path ``wcwidth.table_zero``. - -0.1.4 *2014-11-20 Pre-Alpha* - * **Feature**: ``wcswidth()`` now determines printable length - for (most) combining_ characters. The developer's tool - `bin/wcwidth-browser.py`_ is improved to display combining_ - characters when provided the ``--combining`` option - (`Thomas Ballinger`_ and `Leta Montopoli`_ `PR #5`_). - * **Feature**: added static analysis (prospector_) to testing - framework. - -0.1.3 *2014-10-29 Pre-Alpha* - * **Bugfix**: 2nd parameter of wcswidth was not honored. - (`Thomas Ballinger`_, `PR #4`_). - -0.1.2 *2014-10-28 Pre-Alpha* - * **Updated** tables to Unicode Specification 7.0.0. - (`Thomas Ballinger`_, `PR #3`_). - -0.1.1 *2014-05-14 Pre-Alpha* - * Initial release to pypi, Based on Unicode Specification 6.3.0 - -This code was originally derived directly from C code of the same name, -whose latest version is available at -https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c:: - - * Markus Kuhn -- 2007-05-26 (Unicode 5.0) - * - * Permission to use, copy, modify, and distribute this software - * for any purpose and without fee is hereby granted. The author - * disclaims all warranties with regard to this software. - -.. _`Specification_from_pypi`: https://wcwidth.readthedocs.io/en/latest/specs.html -.. _`tox`: https://tox.wiki/en/latest/ -.. _`prospector`: https://github.com/landscapeio/prospector -.. _`combining`: https://en.wikipedia.org/wiki/Combining_character -.. _`bin/`: https://github.com/jquast/wcwidth/tree/master/bin -.. _`bin/wcwidth-browser.py`: https://github.com/jquast/wcwidth/blob/master/bin/wcwidth-browser.py -.. _`Thomas Ballinger`: https://github.com/thomasballinger -.. _`Leta Montopoli`: https://github.com/lmontopo -.. _`Philip Craig`: https://github.com/philipc -.. _`PR #3`: https://github.com/jquast/wcwidth/pull/3 -.. _`PR #4`: https://github.com/jquast/wcwidth/pull/4 -.. _`PR #5`: https://github.com/jquast/wcwidth/pull/5 -.. _`PR #11`: https://github.com/jquast/wcwidth/pull/11 -.. _`PR #18`: https://github.com/jquast/wcwidth/pull/18 -.. _`PR #30`: https://github.com/jquast/wcwidth/pull/30 -.. _`PR #35`: https://github.com/jquast/wcwidth/pull/35 -.. _`PR #82`: https://github.com/jquast/wcwidth/pull/82 -.. _`PR #91`: https://github.com/jquast/wcwidth/pull/91 -.. _`PR #97`: https://github.com/jquast/wcwidth/pull/97 -.. _`PR #98`: https://github.com/jquast/wcwidth/pull/98 -.. _`PR #100`: https://github.com/jquast/wcwidth/pull/100 -.. _`PR #117`: https://github.com/jquast/wcwidth/pull/117 -.. _`PR #146`: https://github.com/jquast/wcwidth/pull/146 -.. _`PR #149`: https://github.com/jquast/wcwidth/pull/149 -.. _`Issue #101`: https://github.com/jquast/wcwidth/issues/101 -.. _`jquast/blessed`: https://github.com/jquast/blessed -.. _`selectel/pyte`: https://github.com/selectel/pyte -.. _`thomasballinger/curtsies`: https://github.com/thomasballinger/curtsies -.. _`dbcli/pgcli`: https://github.com/dbcli/pgcli -.. _`prompt-toolkit/python-prompt-toolkit`: https://github.com/prompt-toolkit/python-prompt-toolkit -.. _`timoxley/wcwidth`: https://github.com/timoxley/wcwidth -.. _`wcwidth(3)`: https://man7.org/linux/man-pages/man3/wcwidth.3.html -.. _`wcswidth(3)`: https://man7.org/linux/man-pages/man3/wcswidth.3.html -.. _`astanin/python-tabulate`: https://github.com/astanin/python-tabulate -.. _`janlelis/unicode-display_width`: https://github.com/janlelis/unicode-display_width -.. _`rspeer/python-ftfy`: https://github.com/rspeer/python-ftfy -.. _`alecrabbit/php-wcwidth`: https://github.com/alecrabbit/php-wcwidth -.. _`Text::CharWidth`: https://metacpan.org/pod/Text::CharWidth -.. _`bluebear94/Terminal-WCWidth`: https://github.com/bluebear94/Terminal-WCWidth -.. _`mattn/go-runewidth`: https://github.com/mattn/go-runewidth -.. _`grepsuzette/wcwidth`: https://github.com/grepsuzette/wcwidth -.. _`jquast/ucs-detect`: https://github.com/jquast/ucs-detect -.. _`Avram Lubkin`: https://github.com/avylove -.. _`nbedos/termtosvg`: https://github.com/nbedos/termtosvg -.. _`peterbrittain/asciimatics`: https://github.com/peterbrittain/asciimatics -.. _`aperezdc/lua-wcwidth`: https://github.com/aperezdc/lua-wcwidth -.. _`joachimschmidt557/zig-wcwidth`: https://github.com/joachimschmidt557/zig-wcwidth -.. _`fumiyas/wcwidth-cjk`: https://github.com/fumiyas/wcwidth-cjk -.. _`joshuarubin/wcwidth9`: https://github.com/joshuarubin/wcwidth9 -.. _`python-cmd2/cmd2`: https://github.com/python-cmd2/cmd2 -.. _`stratis-storage/stratis-cli`: https://github.com/stratis-storage/stratis-cli -.. _`ihabunek/toot`: https://github.com/ihabunek/toot -.. _`saulpw/visidata`: https://github.com/saulpw/visidata -.. _`pip-tools`: https://pip-tools.readthedocs.io/ -.. _`sphinx`: https://www.sphinx-doc.org/ -.. |pypi_downloads| image:: https://img.shields.io/pypi/dm/wcwidth.svg?logo=pypi - :alt: Downloads - :target: https://pypi.org/project/wcwidth/ -.. |codecov| image:: https://codecov.io/gh/jquast/wcwidth/branch/master/graph/badge.svg - :alt: codecov.io Code Coverage - :target: https://app.codecov.io/gh/jquast/wcwidth/ -.. |license| image:: https://img.shields.io/pypi/l/wcwidth.svg - :target: https://pypi.org/project/wcwidth/ - :alt: MIT License diff --git a/contrib/python/wcwidth/py3/tests/GraphemeBreakTest.txt b/contrib/python/wcwidth/py3/tests/GraphemeBreakTest.txt new file mode 100644 index 00000000000..e1215547c58 --- /dev/null +++ b/contrib/python/wcwidth/py3/tests/GraphemeBreakTest.txt @@ -0,0 +1,796 @@ +# GraphemeBreakTest-17.0.0.txt +# Date: 2025-03-24, 14:45:55 GMT +# © 2025 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use and license, see https://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see https://www.unicode.org/reports/tr44/ +# +# Default Grapheme_Cluster_Break Test +# +# Format: +# <string> (# <comment>)? +# <string> contains hex Unicode code points, with +# ÷ wherever there is a break opportunity, and +# × wherever there is not. +# <comment> the format can change, but currently it shows: +# - the sample character name +# - (x) the Grapheme_Cluster_Break property value for the sample character and +# any other properties relevant to the algorithm, as described in +# GraphemeBreakTest.html +# - [x] the rule that determines whether there is a break or not, +# as listed in the Rules section of GraphemeBreakTest.html +# +# These samples may be extended or changed in the future. +# +÷ 000D ÷ 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 000D × 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 000D ÷ 0000 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <NULL> (Control) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 0000 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3] +÷ 000D ÷ 094D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3] +÷ 000D ÷ 0308 × 094D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3] +÷ 000D ÷ 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 000D ÷ 0308 × 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 000D ÷ 200C ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3] +÷ 000D ÷ 0308 × 200C ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3] +÷ 000D ÷ 200D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ 000D ÷ 0308 × 200D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ 000D ÷ 1F1E6 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 000D ÷ 06DD ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] ARABIC END OF AYAH (Prepend) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 06DD ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3] +÷ 000D ÷ 0903 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 000D ÷ 0308 × 0903 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 000D ÷ 1100 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 000D ÷ 1160 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 000D ÷ 11A8 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 000D ÷ AC00 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 000D ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 000D ÷ AC01 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 000D ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 000D ÷ 0915 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 0915 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3] +÷ 000D ÷ 00A9 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 00A9 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3] +÷ 000D ÷ 0020 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 000D ÷ 0378 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 000A ÷ 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 000A ÷ 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 000A ÷ 0000 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <NULL> (Control) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 0000 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3] +÷ 000A ÷ 094D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3] +÷ 000A ÷ 0308 × 094D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3] +÷ 000A ÷ 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 000A ÷ 0308 × 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 000A ÷ 200C ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3] +÷ 000A ÷ 0308 × 200C ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3] +÷ 000A ÷ 200D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ 000A ÷ 0308 × 200D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ 000A ÷ 1F1E6 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 000A ÷ 06DD ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] ARABIC END OF AYAH (Prepend) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 06DD ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3] +÷ 000A ÷ 0903 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 000A ÷ 0308 × 0903 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 000A ÷ 1100 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 000A ÷ 1160 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 000A ÷ 11A8 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 000A ÷ AC00 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 000A ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 000A ÷ AC01 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 000A ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 000A ÷ 0915 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 0915 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3] +÷ 000A ÷ 00A9 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 00A9 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3] +÷ 000A ÷ 0020 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 000A ÷ 0378 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 0000 ÷ 000D ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0000 ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0000 ÷ 000A ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0000 ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0000 ÷ 0000 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] <NULL> (Control) ÷ [0.3] +÷ 0000 ÷ 0308 ÷ 0000 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3] +÷ 0000 ÷ 094D ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3] +÷ 0000 ÷ 0308 × 094D ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3] +÷ 0000 ÷ 0300 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 0000 ÷ 0308 × 0300 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 0000 ÷ 200C ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3] +÷ 0000 ÷ 0308 × 200C ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3] +÷ 0000 ÷ 200D ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ 0000 ÷ 0308 × 200D ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ 0000 ÷ 1F1E6 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0000 ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0000 ÷ 06DD ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] ARABIC END OF AYAH (Prepend) ÷ [0.3] +÷ 0000 ÷ 0308 ÷ 06DD ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3] +÷ 0000 ÷ 0903 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0000 ÷ 0308 × 0903 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0000 ÷ 1100 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0000 ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0000 ÷ 1160 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0000 ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0000 ÷ 11A8 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0000 ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0000 ÷ AC00 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0000 ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0000 ÷ AC01 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0000 ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0000 ÷ 0915 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3] +÷ 0000 ÷ 0308 ÷ 0915 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3] +÷ 0000 ÷ 00A9 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3] +÷ 0000 ÷ 0308 ÷ 00A9 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3] +÷ 0000 ÷ 0020 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 0000 ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 0000 ÷ 0378 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 0000 ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 094D ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 094D × 0308 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 094D ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 094D × 0308 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 094D ÷ 0000 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3] +÷ 094D × 0308 ÷ 0000 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3] +÷ 094D × 094D ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3] +÷ 094D × 0308 × 094D ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3] +÷ 094D × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 094D × 0308 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 094D × 200C ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3] +÷ 094D × 0308 × 200C ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3] +÷ 094D × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ 094D × 0308 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ 094D ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 094D × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 094D ÷ 06DD ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3] +÷ 094D × 0308 ÷ 06DD ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3] +÷ 094D × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 094D × 0308 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 094D ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 094D × 0308 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 094D ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 094D × 0308 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 094D ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 094D × 0308 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 094D ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 094D × 0308 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 094D ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 094D × 0308 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 094D ÷ 0915 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3] +÷ 094D × 0308 ÷ 0915 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3] +÷ 094D ÷ 00A9 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3] +÷ 094D × 0308 ÷ 00A9 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3] +÷ 094D ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 094D × 0308 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 094D ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 094D × 0308 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 0300 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0300 × 0308 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0300 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0300 × 0308 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0300 ÷ 0000 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3] +÷ 0300 × 0308 ÷ 0000 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3] +÷ 0300 × 094D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3] +÷ 0300 × 0308 × 094D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3] +÷ 0300 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 0300 × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 0300 × 200C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3] +÷ 0300 × 0308 × 200C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3] +÷ 0300 × 200D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ 0300 × 0308 × 200D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ 0300 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0300 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0300 ÷ 06DD ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3] +÷ 0300 × 0308 ÷ 06DD ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3] +÷ 0300 × 0903 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0300 × 0308 × 0903 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0300 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0300 × 0308 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0300 ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0300 × 0308 ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0300 ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0300 × 0308 ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0300 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0300 × 0308 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0300 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0300 × 0308 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0300 ÷ 0915 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3] +÷ 0300 × 0308 ÷ 0915 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3] +÷ 0300 ÷ 00A9 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3] +÷ 0300 × 0308 ÷ 00A9 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3] +÷ 0300 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 0300 × 0308 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 0300 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 0300 × 0308 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 200C ÷ 000D ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 200C × 0308 ÷ 000D ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 200C ÷ 000A ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 200C × 0308 ÷ 000A ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 200C ÷ 0000 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [5.0] <NULL> (Control) ÷ [0.3] +÷ 200C × 0308 ÷ 0000 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3] +÷ 200C × 094D ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3] +÷ 200C × 0308 × 094D ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3] +÷ 200C × 0300 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 200C × 0308 × 0300 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 200C × 200C ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3] +÷ 200C × 0308 × 200C ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3] +÷ 200C × 200D ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ 200C × 0308 × 200D ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ 200C ÷ 1F1E6 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 200C × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 200C ÷ 06DD ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3] +÷ 200C × 0308 ÷ 06DD ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3] +÷ 200C × 0903 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 200C × 0308 × 0903 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 200C ÷ 1100 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 200C × 0308 ÷ 1100 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 200C ÷ 1160 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 200C × 0308 ÷ 1160 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 200C ÷ 11A8 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 200C × 0308 ÷ 11A8 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 200C ÷ AC00 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 200C × 0308 ÷ AC00 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 200C ÷ AC01 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 200C × 0308 ÷ AC01 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 200C ÷ 0915 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3] +÷ 200C × 0308 ÷ 0915 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3] +÷ 200C ÷ 00A9 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3] +÷ 200C × 0308 ÷ 00A9 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3] +÷ 200C ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 200C × 0308 ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 200C ÷ 0378 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 200C × 0308 ÷ 0378 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 200D ÷ 000D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 200D × 0308 ÷ 000D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 200D ÷ 000A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 200D × 0308 ÷ 000A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 200D ÷ 0000 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [5.0] <NULL> (Control) ÷ [0.3] +÷ 200D × 0308 ÷ 0000 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3] +÷ 200D × 094D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3] +÷ 200D × 0308 × 094D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3] +÷ 200D × 0300 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 200D × 0308 × 0300 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 200D × 200C ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3] +÷ 200D × 0308 × 200C ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3] +÷ 200D × 200D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ 200D × 0308 × 200D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ 200D ÷ 1F1E6 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 200D × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 200D ÷ 06DD ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3] +÷ 200D × 0308 ÷ 06DD ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3] +÷ 200D × 0903 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 200D × 0308 × 0903 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 200D ÷ 1100 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 200D × 0308 ÷ 1100 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 200D ÷ 1160 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 200D × 0308 ÷ 1160 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 200D ÷ 11A8 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 200D × 0308 ÷ 11A8 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 200D ÷ AC00 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 200D × 0308 ÷ AC00 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 200D ÷ AC01 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 200D × 0308 ÷ AC01 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 200D ÷ 0915 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3] +÷ 200D × 0308 ÷ 0915 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3] +÷ 200D ÷ 00A9 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3] +÷ 200D × 0308 ÷ 00A9 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3] +÷ 200D ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 200D × 0308 ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 200D ÷ 0378 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 200D × 0308 ÷ 0378 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 1F1E6 ÷ 000D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 000D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 1F1E6 ÷ 000A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 000A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 1F1E6 ÷ 0000 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] <NULL> (Control) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 0000 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3] +÷ 1F1E6 × 094D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3] +÷ 1F1E6 × 0308 × 094D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3] +÷ 1F1E6 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 1F1E6 × 0308 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 1F1E6 × 200C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3] +÷ 1F1E6 × 0308 × 200C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3] +÷ 1F1E6 × 200D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ 1F1E6 × 0308 × 200D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ 1F1E6 × 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [12.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 1F1E6 ÷ 06DD ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 06DD ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3] +÷ 1F1E6 × 0903 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 1F1E6 × 0308 × 0903 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 1F1E6 ÷ 1100 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 1100 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 1F1E6 ÷ 1160 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 1160 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 1F1E6 ÷ 11A8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 11A8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 1F1E6 ÷ AC00 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ AC00 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 1F1E6 ÷ AC01 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ AC01 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 1F1E6 ÷ 0915 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 0915 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3] +÷ 1F1E6 ÷ 00A9 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 00A9 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3] +÷ 1F1E6 ÷ 0020 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 0020 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 1F1E6 ÷ 0378 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 0378 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 06DD ÷ 000D ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 06DD × 0308 ÷ 000D ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 06DD ÷ 000A ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 06DD × 0308 ÷ 000A ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 06DD ÷ 0000 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) ÷ [5.0] <NULL> (Control) ÷ [0.3] +÷ 06DD × 0308 ÷ 0000 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3] +÷ 06DD × 094D ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3] +÷ 06DD × 0308 × 094D ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3] +÷ 06DD × 0300 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 06DD × 0308 × 0300 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 06DD × 200C ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3] +÷ 06DD × 0308 × 200C ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3] +÷ 06DD × 200D ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ 06DD × 0308 × 200D ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ 06DD × 1F1E6 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 06DD × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 06DD × 06DD ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.2] ARABIC END OF AYAH (Prepend) ÷ [0.3] +÷ 06DD × 0308 ÷ 06DD ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3] +÷ 06DD × 0903 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 06DD × 0308 × 0903 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 06DD × 1100 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.2] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 06DD × 0308 ÷ 1100 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 06DD × 1160 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.2] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 06DD × 0308 ÷ 1160 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 06DD × 11A8 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.2] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 06DD × 0308 ÷ 11A8 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 06DD × AC00 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.2] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 06DD × 0308 ÷ AC00 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 06DD × AC01 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.2] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 06DD × 0308 ÷ AC01 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 06DD × 0915 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3] +÷ 06DD × 0308 ÷ 0915 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3] +÷ 06DD × 00A9 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.2] COPYRIGHT SIGN (ExtPict) ÷ [0.3] +÷ 06DD × 0308 ÷ 00A9 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3] +÷ 06DD × 0020 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.2] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 06DD × 0308 ÷ 0020 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 06DD × 0378 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.2] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 06DD × 0308 ÷ 0378 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 0903 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0903 × 0308 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0903 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0903 × 0308 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0903 ÷ 0000 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <NULL> (Control) ÷ [0.3] +÷ 0903 × 0308 ÷ 0000 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3] +÷ 0903 × 094D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3] +÷ 0903 × 0308 × 094D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3] +÷ 0903 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 0903 × 0308 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 0903 × 200C ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3] +÷ 0903 × 0308 × 200C ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3] +÷ 0903 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ 0903 × 0308 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ 0903 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0903 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0903 ÷ 06DD ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3] +÷ 0903 × 0308 ÷ 06DD ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3] +÷ 0903 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0903 × 0308 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0903 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0903 × 0308 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0903 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0903 × 0308 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0903 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0903 × 0308 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0903 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0903 × 0308 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0903 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0903 × 0308 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0903 ÷ 0915 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3] +÷ 0903 × 0308 ÷ 0915 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3] +÷ 0903 ÷ 00A9 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3] +÷ 0903 × 0308 ÷ 00A9 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3] +÷ 0903 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 0903 × 0308 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 0903 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 0903 × 0308 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 1100 ÷ 000D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 1100 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 1100 ÷ 000A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 1100 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 1100 ÷ 0000 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <NULL> (Control) ÷ [0.3] +÷ 1100 × 0308 ÷ 0000 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3] +÷ 1100 × 094D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3] +÷ 1100 × 0308 × 094D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3] +÷ 1100 × 0300 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 1100 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 1100 × 200C ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3] +÷ 1100 × 0308 × 200C ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3] +÷ 1100 × 200D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ 1100 × 0308 × 200D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ 1100 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 1100 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 1100 ÷ 06DD ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3] +÷ 1100 × 0308 ÷ 06DD ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3] +÷ 1100 × 0903 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 1100 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 1100 × 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 1100 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 1100 × 1160 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 1100 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 1100 ÷ 11A8 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 1100 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 1100 × AC00 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 1100 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 1100 × AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 1100 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 1100 ÷ 0915 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3] +÷ 1100 × 0308 ÷ 0915 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3] +÷ 1100 ÷ 00A9 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3] +÷ 1100 × 0308 ÷ 00A9 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3] +÷ 1100 ÷ 0020 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 1100 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 1100 ÷ 0378 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 1100 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 1160 ÷ 000D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 1160 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 1160 ÷ 000A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 1160 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 1160 ÷ 0000 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <NULL> (Control) ÷ [0.3] +÷ 1160 × 0308 ÷ 0000 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3] +÷ 1160 × 094D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3] +÷ 1160 × 0308 × 094D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3] +÷ 1160 × 0300 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 1160 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 1160 × 200C ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3] +÷ 1160 × 0308 × 200C ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3] +÷ 1160 × 200D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ 1160 × 0308 × 200D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ 1160 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 1160 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 1160 ÷ 06DD ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3] +÷ 1160 × 0308 ÷ 06DD ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3] +÷ 1160 × 0903 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 1160 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 1160 ÷ 1100 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 1160 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 1160 × 1160 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [7.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 1160 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 1160 × 11A8 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 1160 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 1160 ÷ AC00 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 1160 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 1160 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 1160 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 1160 ÷ 0915 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3] +÷ 1160 × 0308 ÷ 0915 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3] +÷ 1160 ÷ 00A9 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3] +÷ 1160 × 0308 ÷ 00A9 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3] +÷ 1160 ÷ 0020 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 1160 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 1160 ÷ 0378 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 1160 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 11A8 ÷ 000D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 11A8 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 11A8 ÷ 000A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 11A8 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 11A8 ÷ 0000 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <NULL> (Control) ÷ [0.3] +÷ 11A8 × 0308 ÷ 0000 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3] +÷ 11A8 × 094D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3] +÷ 11A8 × 0308 × 094D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3] +÷ 11A8 × 0300 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 11A8 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 11A8 × 200C ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3] +÷ 11A8 × 0308 × 200C ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3] +÷ 11A8 × 200D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ 11A8 × 0308 × 200D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ 11A8 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 11A8 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 11A8 ÷ 06DD ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3] +÷ 11A8 × 0308 ÷ 06DD ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3] +÷ 11A8 × 0903 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 11A8 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 11A8 ÷ 1100 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 11A8 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 11A8 ÷ 1160 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 11A8 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 11A8 × 11A8 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 11A8 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 11A8 ÷ AC00 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 11A8 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 11A8 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 11A8 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 11A8 ÷ 0915 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3] +÷ 11A8 × 0308 ÷ 0915 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3] +÷ 11A8 ÷ 00A9 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3] +÷ 11A8 × 0308 ÷ 00A9 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3] +÷ 11A8 ÷ 0020 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 11A8 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 11A8 ÷ 0378 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 11A8 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ AC00 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ AC00 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ AC00 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ AC00 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ AC00 ÷ 0000 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <NULL> (Control) ÷ [0.3] +÷ AC00 × 0308 ÷ 0000 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3] +÷ AC00 × 094D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3] +÷ AC00 × 0308 × 094D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3] +÷ AC00 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ AC00 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ AC00 × 200C ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3] +÷ AC00 × 0308 × 200C ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3] +÷ AC00 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ AC00 × 0308 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ AC00 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ AC00 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ AC00 ÷ 06DD ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3] +÷ AC00 × 0308 ÷ 06DD ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3] +÷ AC00 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ AC00 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ AC00 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ AC00 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ AC00 × 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ AC00 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ AC00 × 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ AC00 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ AC00 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ AC00 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ AC00 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ AC00 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ AC00 ÷ 0915 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3] +÷ AC00 × 0308 ÷ 0915 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3] +÷ AC00 ÷ 00A9 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3] +÷ AC00 × 0308 ÷ 00A9 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3] +÷ AC00 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ AC00 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ AC00 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ AC00 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ AC01 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ AC01 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ AC01 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ AC01 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ AC01 ÷ 0000 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <NULL> (Control) ÷ [0.3] +÷ AC01 × 0308 ÷ 0000 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3] +÷ AC01 × 094D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3] +÷ AC01 × 0308 × 094D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3] +÷ AC01 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ AC01 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ AC01 × 200C ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3] +÷ AC01 × 0308 × 200C ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3] +÷ AC01 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ AC01 × 0308 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ AC01 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ AC01 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ AC01 ÷ 06DD ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3] +÷ AC01 × 0308 ÷ 06DD ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3] +÷ AC01 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ AC01 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ AC01 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ AC01 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ AC01 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ AC01 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ AC01 × 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ AC01 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ AC01 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ AC01 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ AC01 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ AC01 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ AC01 ÷ 0915 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3] +÷ AC01 × 0308 ÷ 0915 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3] +÷ AC01 ÷ 00A9 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3] +÷ AC01 × 0308 ÷ 00A9 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3] +÷ AC01 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ AC01 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ AC01 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ AC01 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 0915 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0915 × 0308 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0915 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0915 × 0308 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0915 ÷ 0000 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [5.0] <NULL> (Control) ÷ [0.3] +÷ 0915 × 0308 ÷ 0000 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3] +÷ 0915 × 094D ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3] +÷ 0915 × 0308 × 094D ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3] +÷ 0915 × 0300 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 0915 × 0308 × 0300 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 0915 × 200C ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3] +÷ 0915 × 0308 × 200C ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3] +÷ 0915 × 200D ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ 0915 × 0308 × 200D ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ 0915 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0915 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0915 ÷ 06DD ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3] +÷ 0915 × 0308 ÷ 06DD ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3] +÷ 0915 × 0903 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0915 × 0308 × 0903 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0915 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0915 × 0308 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0915 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0915 × 0308 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0915 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0915 × 0308 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0915 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0915 × 0308 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0915 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0915 × 0308 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0915 ÷ 0915 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3] +÷ 0915 × 0308 ÷ 0915 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3] +÷ 0915 ÷ 00A9 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3] +÷ 0915 × 0308 ÷ 00A9 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3] +÷ 0915 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 0915 × 0308 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 0915 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 0915 × 0308 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 00A9 ÷ 000D ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 00A9 × 0308 ÷ 000D ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 00A9 ÷ 000A ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 00A9 × 0308 ÷ 000A ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 00A9 ÷ 0000 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) ÷ [5.0] <NULL> (Control) ÷ [0.3] +÷ 00A9 × 0308 ÷ 0000 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3] +÷ 00A9 × 094D ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3] +÷ 00A9 × 0308 × 094D ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3] +÷ 00A9 × 0300 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 00A9 × 0308 × 0300 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 00A9 × 200C ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3] +÷ 00A9 × 0308 × 200C ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3] +÷ 00A9 × 200D ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ 00A9 × 0308 × 200D ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ 00A9 ÷ 1F1E6 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 00A9 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 00A9 ÷ 06DD ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3] +÷ 00A9 × 0308 ÷ 06DD ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3] +÷ 00A9 × 0903 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 00A9 × 0308 × 0903 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 00A9 ÷ 1100 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 00A9 × 0308 ÷ 1100 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 00A9 ÷ 1160 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 00A9 × 0308 ÷ 1160 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 00A9 ÷ 11A8 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 00A9 × 0308 ÷ 11A8 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 00A9 ÷ AC00 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 00A9 × 0308 ÷ AC00 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 00A9 ÷ AC01 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 00A9 × 0308 ÷ AC01 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 00A9 ÷ 0915 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3] +÷ 00A9 × 0308 ÷ 0915 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3] +÷ 00A9 ÷ 00A9 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3] +÷ 00A9 × 0308 ÷ 00A9 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3] +÷ 00A9 ÷ 0020 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 00A9 × 0308 ÷ 0020 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 00A9 ÷ 0378 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 00A9 × 0308 ÷ 0378 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 0020 ÷ 000D ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0020 × 0308 ÷ 000D ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0020 ÷ 000A ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0020 × 0308 ÷ 000A ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0020 ÷ 0000 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) ÷ [5.0] <NULL> (Control) ÷ [0.3] +÷ 0020 × 0308 ÷ 0000 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3] +÷ 0020 × 094D ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3] +÷ 0020 × 0308 × 094D ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3] +÷ 0020 × 0300 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 0020 × 0308 × 0300 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 0020 × 200C ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3] +÷ 0020 × 0308 × 200C ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3] +÷ 0020 × 200D ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ 0020 × 0308 × 200D ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ 0020 ÷ 1F1E6 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0020 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0020 ÷ 06DD ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3] +÷ 0020 × 0308 ÷ 06DD ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3] +÷ 0020 × 0903 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0020 × 0308 × 0903 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0020 ÷ 1100 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0020 × 0308 ÷ 1100 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0020 ÷ 1160 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0020 × 0308 ÷ 1160 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0020 ÷ 11A8 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0020 × 0308 ÷ 11A8 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0020 ÷ AC00 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0020 × 0308 ÷ AC00 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0020 ÷ AC01 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0020 × 0308 ÷ AC01 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0020 ÷ 0915 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3] +÷ 0020 × 0308 ÷ 0915 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3] +÷ 0020 ÷ 00A9 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3] +÷ 0020 × 0308 ÷ 00A9 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3] +÷ 0020 ÷ 0020 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 0020 × 0308 ÷ 0020 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 0020 ÷ 0378 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 0020 × 0308 ÷ 0378 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 0378 ÷ 000D ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0378 × 0308 ÷ 000D ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0378 ÷ 000A ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0378 × 0308 ÷ 000A ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0378 ÷ 0000 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [5.0] <NULL> (Control) ÷ [0.3] +÷ 0378 × 0308 ÷ 0000 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3] +÷ 0378 × 094D ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3] +÷ 0378 × 0308 × 094D ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3] +÷ 0378 × 0300 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 0378 × 0308 × 0300 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 0378 × 200C ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3] +÷ 0378 × 0308 × 200C ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3] +÷ 0378 × 200D ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ 0378 × 0308 × 200D ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ 0378 ÷ 1F1E6 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0378 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0378 ÷ 06DD ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3] +÷ 0378 × 0308 ÷ 06DD ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3] +÷ 0378 × 0903 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0378 × 0308 × 0903 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0378 ÷ 1100 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0378 × 0308 ÷ 1100 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0378 ÷ 1160 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0378 × 0308 ÷ 1160 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0378 ÷ 11A8 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0378 × 0308 ÷ 11A8 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0378 ÷ AC00 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0378 × 0308 ÷ AC00 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0378 ÷ AC01 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0378 × 0308 ÷ AC01 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0378 ÷ 0915 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3] +÷ 0378 × 0308 ÷ 0915 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3] +÷ 0378 ÷ 00A9 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3] +÷ 0378 × 0308 ÷ 00A9 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3] +÷ 0378 ÷ 0020 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 0378 × 0308 ÷ 0020 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 0378 ÷ 0378 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 0378 × 0308 ÷ 0378 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 000D × 000A ÷ 0061 ÷ 000A ÷ 0308 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [4.0] LATIN SMALL LETTER A (XXmLinkingConsonantmExtPict) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 0061 × 0308 ÷ # ÷ [0.2] LATIN SMALL LETTER A (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 0020 × 200D ÷ 0646 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] ARABIC LETTER NOON (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 0646 × 200D ÷ 0020 ÷ # ÷ [0.2] ARABIC LETTER NOON (XXmLinkingConsonantmExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 1100 × 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ AC00 × 11A8 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ AC01 × 11A8 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 1F1E6 × 1F1E7 ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [12.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 0061 ÷ 1F1E6 × 1F1E7 ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (XXmLinkingConsonantmExtPict) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 0061 ÷ 1F1E6 × 1F1E7 × 200D ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (XXmLinkingConsonantmExtPict) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 0061 ÷ 1F1E6 × 200D ÷ 1F1E7 × 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (XXmLinkingConsonantmExtPict) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 0061 ÷ 1F1E6 × 1F1E7 ÷ 1F1E8 × 1F1E9 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (XXmLinkingConsonantmExtPict) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER D (RI) ÷ [999.0] LATIN SMALL LETTER B (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 0061 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (XXmLinkingConsonantmExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3] +÷ 0061 × 0308 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] LATIN SMALL LETTER B (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 0061 × 0903 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (XXmLinkingConsonantmExtPict) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] LATIN SMALL LETTER B (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 0061 ÷ 0600 × 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (XXmLinkingConsonantmExtPict) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) × [9.2] LATIN SMALL LETTER B (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 1F476 × 1F3FF ÷ 1F476 ÷ # ÷ [0.2] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] BABY (ExtPict) ÷ [0.3] +÷ 0061 × 1F3FF ÷ 1F476 ÷ # ÷ [0.2] LATIN SMALL LETTER A (XXmLinkingConsonantmExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] BABY (ExtPict) ÷ [0.3] +÷ 0061 × 1F3FF ÷ 1F476 × 200D × 1F6D1 ÷ # ÷ [0.2] LATIN SMALL LETTER A (XXmLinkingConsonantmExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] BABY (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ) × [11.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3] +÷ 1F476 × 1F3FF × 0308 × 200D × 1F476 × 1F3FF ÷ # ÷ [0.2] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) × [11.0] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 1F6D1 × 200D × 1F6D1 ÷ # ÷ [0.2] OCTAGONAL SIGN (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ) × [11.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3] +÷ 0061 × 200D ÷ 1F6D1 ÷ # ÷ [0.2] LATIN SMALL LETTER A (XXmLinkingConsonantmExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3] +÷ 2701 × 200D ÷ 2701 ÷ # ÷ [0.2] UPPER BLADE SCISSORS (XXmLinkingConsonantmExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] UPPER BLADE SCISSORS (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 0061 × 200D ÷ 2701 ÷ # ÷ [0.2] LATIN SMALL LETTER A (XXmLinkingConsonantmExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] UPPER BLADE SCISSORS (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 0915 ÷ 0924 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] DEVANAGARI LETTER TA (LinkingConsonant) ÷ [0.3] +÷ 0915 × 094D × 0924 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.3] DEVANAGARI LETTER TA (LinkingConsonant) ÷ [0.3] +÷ 0915 × 094D × 094D × 0924 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.3] DEVANAGARI LETTER TA (LinkingConsonant) ÷ [0.3] +÷ 0915 × 094D × 200D × 0924 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) × [9.3] DEVANAGARI LETTER TA (LinkingConsonant) ÷ [0.3] +÷ 0915 × 093C × 200D × 094D × 0924 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.3] DEVANAGARI LETTER TA (LinkingConsonant) ÷ [0.3] +÷ 0915 × 093C × 094D × 200D × 0924 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) × [9.3] DEVANAGARI LETTER TA (LinkingConsonant) ÷ [0.3] +÷ 0915 × 094D × 0924 × 094D × 092F ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.3] DEVANAGARI LETTER TA (LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.3] DEVANAGARI LETTER YA (LinkingConsonant) ÷ [0.3] +÷ 0915 × 094D ÷ 0061 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [999.0] LATIN SMALL LETTER A (XXmLinkingConsonantmExtPict) ÷ [0.3] +÷ 0061 × 094D ÷ 0924 ÷ # ÷ [0.2] LATIN SMALL LETTER A (XXmLinkingConsonantmExtPict) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [999.0] DEVANAGARI LETTER TA (LinkingConsonant) ÷ [0.3] +÷ 003F × 094D ÷ 0924 ÷ # ÷ [0.2] QUESTION MARK (XXmLinkingConsonantmExtPict) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [999.0] DEVANAGARI LETTER TA (LinkingConsonant) ÷ [0.3] +÷ 0915 × 094D × 094D × 0924 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.3] DEVANAGARI LETTER TA (LinkingConsonant) ÷ [0.3] +÷ 0AB8 × 0AFB × 0ACD × 0AB8 × 0AFB ÷ # ÷ [0.2] GUJARATI LETTER SA (LinkingConsonant) × [9.0] GUJARATI SIGN SHADDA (Extend_ConjunctExtendermConjunctLinker) × [9.0] GUJARATI SIGN VIRAMA (Extend_ConjunctLinker) × [9.3] GUJARATI LETTER SA (LinkingConsonant) × [9.0] GUJARATI SIGN SHADDA (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 1019 × 1039 × 1018 ÷ 102C × 1037 ÷ # ÷ [0.2] MYANMAR LETTER MA (LinkingConsonant) × [9.0] MYANMAR SIGN VIRAMA (Extend_ConjunctLinker) × [9.3] MYANMAR LETTER BHA (LinkingConsonant) ÷ [999.0] MYANMAR VOWEL SIGN AA (XXmLinkingConsonantmExtPict) × [9.0] MYANMAR SIGN DOT BELOW (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 1004 × 103A × 1039 × 1011 × 1039 × 1011 ÷ # ÷ [0.2] MYANMAR LETTER NGA (LinkingConsonant) × [9.0] MYANMAR SIGN ASAT (Extend_ConjunctExtendermConjunctLinker) × [9.0] MYANMAR SIGN VIRAMA (Extend_ConjunctLinker) × [9.3] MYANMAR LETTER THA (LinkingConsonant) × [9.0] MYANMAR SIGN VIRAMA (Extend_ConjunctLinker) × [9.3] MYANMAR LETTER THA (LinkingConsonant) ÷ [0.3] +÷ 1B12 × 1B01 ÷ 1B32 × 1B44 × 1B2F ÷ 1B32 × 1B44 × 1B22 × 1B44 × 1B2C ÷ 1B32 × 1B44 × 1B22 × 1B38 ÷ # ÷ [0.2] BALINESE LETTER OKARA TEDUNG (XXmLinkingConsonantmExtPict) × [9.0] BALINESE SIGN ULU CANDRA (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] BALINESE LETTER SA (LinkingConsonant) × [9.0] BALINESE ADEG ADEG (Extend_ConjunctLinker) × [9.3] BALINESE LETTER WA (LinkingConsonant) ÷ [999.0] BALINESE LETTER SA (LinkingConsonant) × [9.0] BALINESE ADEG ADEG (Extend_ConjunctLinker) × [9.3] BALINESE LETTER TA (LinkingConsonant) × [9.0] BALINESE ADEG ADEG (Extend_ConjunctLinker) × [9.3] BALINESE LETTER YA (LinkingConsonant) ÷ [999.0] BALINESE LETTER SA (LinkingConsonant) × [9.0] BALINESE ADEG ADEG (Extend_ConjunctLinker) × [9.3] BALINESE LETTER TA (LinkingConsonant) × [9.0] BALINESE VOWEL SIGN SUKU (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 179F × 17D2 × 178F × 17D2 × 179A × 17B8 ÷ # ÷ [0.2] KHMER LETTER SA (LinkingConsonant) × [9.0] KHMER SIGN COENG (Extend_ConjunctLinker) × [9.3] KHMER LETTER TA (LinkingConsonant) × [9.0] KHMER SIGN COENG (Extend_ConjunctLinker) × [9.3] KHMER LETTER RO (LinkingConsonant) × [9.0] KHMER VOWEL SIGN II (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3] +÷ 1B26 ÷ 1B17 × 1B44 × 1B13 ÷ # ÷ [0.2] BALINESE LETTER NA (LinkingConsonant) ÷ [999.0] BALINESE LETTER NGA (LinkingConsonant) × [9.0] BALINESE ADEG ADEG (Extend_ConjunctLinker) × [9.3] BALINESE LETTER KA (LinkingConsonant) ÷ [0.3] +÷ 1B27 ÷ 1B13 × 1B44 × 1B0B ÷ 1B0B × 1B04 ÷ # ÷ [0.2] BALINESE LETTER PA (LinkingConsonant) ÷ [999.0] BALINESE LETTER KA (LinkingConsonant) × [9.0] BALINESE ADEG ADEG (Extend_ConjunctLinker) × [9.3] BALINESE LETTER RA REPA (LinkingConsonant) ÷ [999.0] BALINESE LETTER RA REPA (LinkingConsonant) × [9.1] BALINESE SIGN BISAH (SpacingMark) ÷ [0.3] +÷ 1795 × 17D2 × 17AF ÷ 1798 ÷ # ÷ [0.2] KHMER LETTER PHA (LinkingConsonant) × [9.0] KHMER SIGN COENG (Extend_ConjunctLinker) × [9.3] KHMER INDEPENDENT VOWEL QE (LinkingConsonant) ÷ [999.0] KHMER LETTER MO (LinkingConsonant) ÷ [0.3] +÷ 17A0 × 17D2 × 17AB ÷ 1791 × 17D0 ÷ 1799 ÷ # ÷ [0.2] KHMER LETTER HA (LinkingConsonant) × [9.0] KHMER SIGN COENG (Extend_ConjunctLinker) × [9.3] KHMER INDEPENDENT VOWEL RY (LinkingConsonant) ÷ [999.0] KHMER LETTER TO (LinkingConsonant) × [9.0] KHMER SIGN SAMYOK SANNYA (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] KHMER LETTER YO (LinkingConsonant) ÷ [0.3] +# +# Lines: 766 +# +# EOF diff --git a/contrib/python/wcwidth/py3/tests/test_ambiguous.py b/contrib/python/wcwidth/py3/tests/test_ambiguous.py new file mode 100644 index 00000000000..0c61cdacf9c --- /dev/null +++ b/contrib/python/wcwidth/py3/tests/test_ambiguous.py @@ -0,0 +1,80 @@ +"""Tests for ambiguous_width parameter.""" +# 3rd party +import pytest + +# local +import wcwidth + +AMBIGUOUS_CHARS = [ + ('\u00a1', 'INVERTED_EXCLAMATION'), + ('\u00a7', 'SECTION_SIGN'), + ('\u00b0', 'DEGREE_SIGN'), + ('\u00b1', 'PLUS_MINUS'), + ('\u00d7', 'MULTIPLICATION'), + ('\u00f7', 'DIVISION'), + ('\u2460', 'CIRCLED_ONE'), + ('\u2500', 'BOX_HORIZONTAL'), + ('\u25a0', 'BLACK_SQUARE'), + ('\u2605', 'BLACK_STAR'), +] + + [email protected]('char,name', AMBIGUOUS_CHARS) +def test_wcwidth_ambiguous_default(char, name): + """Ambiguous characters return width 1 by default.""" + assert wcwidth.wcwidth(char) == 1 + + [email protected]('char,name', AMBIGUOUS_CHARS) +def test_wcwidth_ambiguous_wide(char, name): + """Ambiguous characters return width 2 when ambiguous_width=2.""" + assert wcwidth.wcwidth(char, ambiguous_width=2) == 2 + + +def test_wcswidth_mixed_ambiguous_and_wide(): + """Mixed CJK and ambiguous characters.""" + text = '\u4e2d\u00b1' # CJK (wide=2) + PLUS-MINUS (ambiguous) + assert wcwidth.wcswidth(text) == 3 + assert wcwidth.wcswidth(text, ambiguous_width=2) == 4 + + +def test_width_ambiguous(): + """Width() respects ambiguous_width parameter.""" + assert wcwidth.width('\u00b1') == 1 + assert wcwidth.width('\u00b1', ambiguous_width=2) == 2 + + +def test_ljust_ambiguous(): + """Ljust respects ambiguous_width parameter.""" + text = '\u00b1' + assert wcwidth.ljust(text, 4) == '\u00b1 ' + assert wcwidth.ljust(text, 4, ambiguous_width=2) == '\u00b1 ' + + +def test_rjust_ambiguous(): + """Rjust respects ambiguous_width parameter.""" + text = '\u00b1' + assert wcwidth.rjust(text, 4) == ' \u00b1' + assert wcwidth.rjust(text, 4, ambiguous_width=2) == ' \u00b1' + + +def test_center_ambiguous(): + """Center respects ambiguous_width parameter.""" + text = '\u00b1' + assert wcwidth.center(text, 5) == ' \u00b1 ' + assert wcwidth.center(text, 6, ambiguous_width=2) == ' \u00b1 ' + + +def test_wrap_ambiguous(): + """Wrap respects ambiguous_width parameter.""" + text = '\u00b1' * 5 # 5 ambiguous characters + assert wcwidth.wrap(text, 4) == ['\u00b1\u00b1\u00b1\u00b1', '\u00b1'] + assert wcwidth.wrap(text, 4, ambiguous_width=2) == ['\u00b1\u00b1', '\u00b1\u00b1', '\u00b1'] + + +def test_wide_not_affected_by_ambiguous(): + """Wide characters remain wide regardless of ambiguous_width.""" + cjk = '\u4e2d' # CJK character (always wide) + assert wcwidth.wcwidth(cjk) == 2 + assert wcwidth.wcwidth(cjk, ambiguous_width=2) == 2 + assert wcwidth.wcwidth(cjk, ambiguous_width=1) == 2 diff --git a/contrib/python/wcwidth/py3/tests/test_clip.py b/contrib/python/wcwidth/py3/tests/test_clip.py new file mode 100644 index 00000000000..8a98c14c6f2 --- /dev/null +++ b/contrib/python/wcwidth/py3/tests/test_clip.py @@ -0,0 +1,223 @@ +"""Tests for clip() and strip_sequences() functions.""" +# 3rd party +import pytest + +# local +from wcwidth import clip, width, strip_sequences + +STRIP_SEQUENCES_CASES = [ + ('', ''), + ('hello', 'hello'), + ('hello world', 'hello world'), + ('\x1b[31m', ''), + ('\x1b[0m', ''), + ('\x1b[m', ''), + ('\x1b[31mred\x1b[0m', 'red'), + ('\x1b[1m\x1b[31mbold red\x1b[0m', 'bold red'), + ('\x1b[1m\x1b[31m\x1b[4m', ''), + ('\x1b[1mbold\x1b[0m \x1b[3mitalic\x1b[0m', 'bold italic'), + ('\x1b]0;title\x07', ''), + ('\x1b]0;title\x07text', 'text'), + ('\x1b]8;;https://example.com\x07link\x1b]8;;\x07', 'link'), + ('\x1b[31m中文\x1b[0m', '中文'), + ('\x1b[1m\U0001F468\u200D\U0001F469\u200D\U0001F467\x1b[0m', + '\U0001F468\u200D\U0001F469\u200D\U0001F467'), + ('\x1b', '\x1b'), + ('a\x1bb', 'a\x1bb'), + ('\x1b[', ''), + ('text\x1b[mmore', 'textmore'), +] + + [email protected]('text,expected', STRIP_SEQUENCES_CASES) +def test_strip_sequences(text, expected): + assert strip_sequences(text) == expected + + +CLIP_BASIC_CASES = [ + ('', 0, 5, ''), + ('', 0, 0, ''), + ('hello', 0, 0, ''), + ('hello', 5, 5, ''), + ('hello', 5, 3, ''), + ('hello', -5, 3, 'hel'), + ('hello', 0, 5, 'hello'), + ('hello', 0, 3, 'hel'), + ('hello', 2, 5, 'llo'), + ('hello', 1, 4, 'ell'), + ('hello world', 0, 5, 'hello'), + ('hello world', 6, 11, 'world'), + ('hello world', 0, 11, 'hello world'), + ('hi', 0, 100, 'hi'), + ('hi', 100, 200, ''), +] + + [email protected]('text,start,end,expected', CLIP_BASIC_CASES) +def test_clip_basic(text, start, end, expected): + assert clip(text, start, end) == expected + + +CLIP_CJK_CASES = [ + ('中文字', 0, 6, '中文字'), + ('中文字', 0, 4, '中文'), + ('中文字', 0, 2, '中'), + ('中文字', 2, 4, '文'), + ('中文字', 0, 3, '中 '), + ('中文字', 1, 6, ' 文字'), + ('中文字', 1, 5, ' 文 '), + ('A中B', 0, 4, 'A中B'), + ('A中B', 0, 3, 'A中'), + ('A中B', 1, 4, '中B'), + ('A中B', 1, 3, '中'), + ('A中B', 2, 4, ' B'), + ('中', 0, 2, '中'), + ('中', 0, 1, ' '), + ('中', 1, 2, ' '), +] + + [email protected]('text,start,end,expected', CLIP_CJK_CASES) +def test_clip_cjk(text, start, end, expected): + assert clip(text, start, end) == expected + + +def test_clip_cjk_custom_fillchar(): + assert clip('中文字', 1, 5, fillchar='.') == '.文.' + assert clip('中文', 1, 3, fillchar='\u00b7') == '\u00b7\u00b7' + + +CLIP_CJK_WIDTH_CASES = [ + ('中文字', 0, 6, 6), + ('中文字', 0, 3, 3), + ('中文字', 1, 6, 5), + ('中文字', 1, 5, 4), +] + + [email protected]('text,start,end,expected_width', CLIP_CJK_WIDTH_CASES) +def test_clip_cjk_width_consistency(text, start, end, expected_width): + assert width(clip(text, start, end)) == expected_width + + +def test_clip_sequences_preserve_sgr(): + result = clip('\x1b[31mred\x1b[0m', 0, 3) + assert result == '\x1b[31mred\x1b[0m' + assert strip_sequences(result) == 'red' + + +def test_clip_sequences_before_start(): + assert clip('\x1b[31mred text\x1b[0m', 4, 8) == '\x1b[31mtext\x1b[0m' + + +def test_clip_sequences_after_end(): + assert clip('hello\x1b[31m world\x1b[0m', 0, 5) == 'hello\x1b[31m\x1b[0m' + + +def test_clip_sequences_multiple(): + assert clip('\x1b[1m\x1b[31mbold red\x1b[0m', 0, 4) == '\x1b[1m\x1b[31mbold\x1b[0m' + + +def test_clip_sequences_only(): + assert clip('\x1b[31m\x1b[0m', 0, 10) == '\x1b[31m\x1b[0m' + + +def test_clip_sequences_osc_hyperlink(): + assert clip('\x1b]8;;https://example.com\x07link\x1b]8;;\x07', 0, 4) == \ + '\x1b]8;;https://example.com\x07link\x1b]8;;\x07' + + +def test_clip_sequences_cjk_with_sequences(): + assert clip('\x1b[31m中文\x1b[0m', 0, 3) == '\x1b[31m中 \x1b[0m' + + +def test_clip_sequences_between_chars(): + assert clip('a\x1b[31mb\x1b[0mc', 1, 2) == '\x1b[31mb\x1b[0m' + + +def test_clip_sequences_lone_esc(): + assert clip('a\x1bb', 0, 2) == 'a\x1bb' + + +CLIP_EMOJI_CASES = [ + ('\U0001F600', 2), + ('\U0001F468\u200D\U0001F469\u200D\U0001F467', 2), + ('\u2764\uFE0F', 2), + ('\U0001F1FA\U0001F1F8', 2), +] + + [email protected]('emoji,full_width', CLIP_EMOJI_CASES) +def test_clip_emoji(emoji, full_width): + assert clip(emoji, 0, full_width) == emoji + assert clip(emoji, 0, 1) == ' ' + assert width(emoji) == full_width + + +def test_clip_emoji_with_sequences(): + assert clip('\x1b[1m\U0001F600\x1b[0m', 0, 2) == '\x1b[1m\U0001F600\x1b[0m' + + +def test_clip_combining_accent(): + assert clip('cafe\u0301', 0, 4) == 'cafe\u0301' + assert clip('cafe\u0301', 0, 3) == 'caf' + + +def test_clip_combining_multiple(): + assert clip('e\u0301\u0327', 0, 1) == 'e\u0301\u0327' + + +def test_clip_ambiguous_width_1(): + assert clip('\u00b1test', 0, 3, ambiguous_width=1) == '\u00b1te' + + +def test_clip_ambiguous_width_2(): + assert clip('\u00b1test', 0, 3, ambiguous_width=2) == '\u00b1t' + + +CLIP_TAB_CASES = [ + ('a\tb', 0, 10, 8, 'a b'), + ('a\tb', 0, 4, 8, 'a '), + ('a\tb', 0, 10, 4, 'a b'), + ('a\tb', 4, 10, 8, ' b'), + ('a\tb\tc', 0, 20, 4, 'a b c'), + ('中\tb', 0, 10, 4, '中 b'), + ('a\tb', 0, 5, 0, 'a\tb'), +] + + [email protected]('text,start,end,tabsize,expected', CLIP_TAB_CASES) +def test_clip_tab_expansion(text, start, end, tabsize, expected): + assert clip(text, start, end, tabsize=tabsize) == expected + + +def test_clip_tab_with_sequences(): + assert clip('\x1b[31mab\tc\x1b[0m', 0, 12, tabsize=4) == '\x1b[31mab c\x1b[0m' + + +CLIP_CONTROL_CHAR_CASES = [ + ('abc\bde', 0, 5, 'abc\bde'), + ('ab\acd', 0, 4, 'ab\acd'), + ('ab\x00cd', 0, 4, 'ab\x00cd'), + ('abc\rde', 0, 5, 'abc\rde'), + ('\a\b\rHello', 0, 5, '\a\b\rHello'), + ('ab\x01\x02cd', 0, 4, 'ab\x01\x02cd'), +] + + [email protected]('text,start,end,expected', CLIP_CONTROL_CHAR_CASES) +def test_clip_control_chars_zero_width(text, start, end, expected): + assert clip(text, start, end) == expected + + +CLIP_CURSOR_SEQUENCE_CASES = [ + ('ab\x1b[5Ccd', 0, 4, 'ab\x1b[5Ccd'), + ('abcde\x1b[2Df', 0, 6, 'abcde\x1b[2Df'), + ('ab\x1b[10Ccd', 0, 4, 'ab\x1b[10Ccd'), + ('ab\x1b[Ccd', 0, 4, 'ab\x1b[Ccd'), +] + + [email protected]('text,start,end,expected', CLIP_CURSOR_SEQUENCE_CASES) +def test_clip_cursor_sequences_zero_width(text, start, end, expected): + assert clip(text, start, end) == expected diff --git a/contrib/python/wcwidth/py3/tests/test_core.py b/contrib/python/wcwidth/py3/tests/test_core.py index 206bbdcab38..b8ae3611430 100644 --- a/contrib/python/wcwidth/py3/tests/test_core.py +++ b/contrib/python/wcwidth/py3/tests/test_core.py @@ -1,10 +1,9 @@ -"""Core tests for wcwidth module. isort:skip_file""" -try: - # std import - import importlib.metadata as importmeta -except ImportError: - # 3rd party for python3.7 and earlier - import importlib_metadata as importmeta +"""Core tests for wcwidth module.""" +# std imports +import importlib.metadata + +# 3rd party +import pytest # local import wcwidth @@ -13,7 +12,7 @@ import wcwidth def test_package_version(): """wcwidth.__version__ is expected value.""" # given, - expected = importmeta.version('wcwidth') + expected = importlib.metadata.version('wcwidth') # exercise, result = wcwidth.__version__ @@ -43,7 +42,7 @@ def test_empty_string(): def basic_string_type(): """ - This is a python 2-specific test of the basic "string type" + This is a python 2-specific test of the basic "string type". Such strings cannot contain anything but ascii in python2. """ @@ -122,14 +121,14 @@ def test_null_width_0(): def test_control_c0_width_negative_1(): - """How the API reacts to CSI (Control sequence initiate). + """ + How the API reacts to CSI (Control sequence initiate). - An example of bad fortune, this terminal sequence is a width of 0 - on all terminals, but wcwidth doesn't parse Control-Sequence-Inducer - (CSI) sequences. + An example of bad fortune, this terminal sequence is a width of 0 on all terminals, but wcwidth + doesn't parse Control-Sequence-Inducer (CSI) sequences. - Also the "legacy" posix functions wcwidth and wcswidth return -1 for - any string containing the C1 control character \x1b (ESC). + Also the "legacy" posix functions wcwidth and wcswidth return -1 for any string containing the + C1 control character \x1b (ESC). """ # given, phrase = '\x1b[0m' @@ -195,8 +194,8 @@ def test_balinese_script(): """ Balinese kapal (ship) is length 3. - This may be an example that is not yet correctly rendered by any terminal so - far, like devanagari. + This may be an example that is not yet correctly rendered by any terminal so far, like + devanagari. """ phrase = ("\u1B13" # Category 'Lo', EAW 'N' -- BALINESE LETTER KA "\u1B28" # Category 'Lo', EAW 'N' -- BALINESE LETTER PA KAPAL @@ -216,7 +215,7 @@ def test_balinese_script(): def test_kr_jamo(): """ - Test basic combining of HANGUL CHOSEONG and JUNGSEONG + Test basic combining of HANGUL CHOSEONG and JUNGSEONG. Example and from Raymond Chen's blog post, https://devblogs.microsoft.com/oldnewthing/20201009-00/?p=104351 @@ -393,8 +392,33 @@ def test_zero_wide_conflict(): assert wcwidth.wcwidth(chr(0x0309a), unicode_version='4.1.0') == 0 assert wcwidth.wcwidth(chr(0x0309b), unicode_version='4.1.0') == 2 + def test_soft_hyphen(): # Test SOFT HYPHEN, category 'Cf' usually are zero-width, but most # implementations agree to draw it was '1' cell, visually # indistinguishable from a space, ' ' in Konsole, for example. assert wcwidth.wcwidth(chr(0x000ad)) == 1 + + +PREPENDED_CONCATENATION_MARKS = [ + (0x0600, 'ARABIC NUMBER SIGN'), + (0x0601, 'ARABIC SIGN SANAH'), + (0x0602, 'ARABIC FOOTNOTE MARKER'), + (0x0603, 'ARABIC SIGN SAFHA'), + (0x0604, 'ARABIC SIGN SAMVAT'), + (0x0605, 'ARABIC NUMBER MARK ABOVE'), + (0x06DD, 'ARABIC END OF AYAH'), + (0x070F, 'SYRIAC ABBREVIATION MARK'), + (0x0890, 'ARABIC POUND MARK ABOVE'), + (0x0891, 'ARABIC PIASTRE MARK ABOVE'), + (0x08E2, 'ARABIC DISPUTED END OF AYAH'), + (0x110BD, 'KAITHI NUMBER SIGN'), + (0x110CD, 'KAITHI NUMBER SIGN ABOVE'), +] + + [email protected]('codepoint,name', PREPENDED_CONCATENATION_MARKS) +def test_prepended_concatenation_mark_width(codepoint, name): + """Prepended Concatenation Marks have width 1, not 0.""" + # https://github.com/jquast/wcwidth/issues/119 + assert wcwidth.wcwidth(chr(codepoint)) == 1 diff --git a/contrib/python/wcwidth/py3/tests/test_emojis.py b/contrib/python/wcwidth/py3/tests/test_emojis.py index 310d0c3d44e..20245049ef9 100644 --- a/contrib/python/wcwidth/py3/tests/test_emojis.py +++ b/contrib/python/wcwidth/py3/tests/test_emojis.py @@ -1,9 +1,13 @@ +"""Tests for emoji width measurement and ZWJ sequences.""" # std imports import os # 3rd party import pytest +# local +import wcwidth + # some tests cannot be done on some builds of python, where the internal # unicode structure is limited to 0x10000 for memory conservation, # "ValueError: unichr() arg not in range(0x10000) (narrow Python build)" @@ -13,9 +17,6 @@ try: except ValueError: NARROW_ONLY = True -# local -import wcwidth - def make_sequence_from_line(line): # convert '002A FE0F ; ..' -> (0x2a, 0xfe0f) -> chr(0x2a) + chr(0xfe0f) @@ -24,13 +25,11 @@ def make_sequence_from_line(line): @pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") def emoji_zwj_sequence(): - """ - Emoji zwj sequence of four codepoints is just 2 cells. - """ + """Emoji zwj sequence of four codepoints is just 2 cells.""" phrase = ("\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN "\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 "\u200d" # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER - "\U0001f4bb") # Fused, Category So, East Asian Width peroperty 'W' -- PERSONAL COMPUTER + "\U0001f4bb") # Fused, Category So, East Asian Width property 'W' -- PERSONAL COMPUTER # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf expect_length_each = (2, 0, 0, 2) expect_length_phrase = 2 @@ -46,9 +45,7 @@ def emoji_zwj_sequence(): @pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") def test_unfinished_zwj_sequence(): - """ - Ensure index-out-of-bounds does not occur for zero-width joiner without any following character - """ + """Ensure index-out-of-bounds does not occur for ZWJ without any following character.""" phrase = ("\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN "\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 "\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER @@ -66,9 +63,7 @@ def test_unfinished_zwj_sequence(): @pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") def test_non_recommended_zwj_sequence(): - """ - Verify ZWJ is measured as though successful with characters that cannot be joined, wcwidth does not verify - """ + """Verify ZWJ with characters that cannot be joined, wcwidth does not verify.""" phrase = ("\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN "\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 "\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER @@ -109,8 +104,8 @@ def test_longer_emoji_zwj_sequence(): """ A much longer emoji ZWJ sequence of 10 total codepoints is just 2 cells! - Also test the same sequence in duplicate, verifying multiple VS-16 sequences - in a single function call. + Also test the same sequence in duplicate, verifying multiple VS-16 sequences in a single + function call. """ # 'Category Code', 'East Asian Width property' -- 'description' phrase = ("\U0001F9D1" # 'So', 'W' -- ADULT @@ -138,20 +133,17 @@ def test_longer_emoji_zwj_sequence(): def read_sequences_from_file(filename): - fp = open(os.path.join(os.path.dirname(__file__), filename), 'r', encoding='utf-8') - lines = [line.strip() - for line in fp.readlines() - if not line.startswith('#') and line.strip()] - fp.close() + with open(os.path.join(os.path.dirname(__file__), filename), encoding='utf-8') as fp: + lines = [line.strip() + for line in fp.readlines() + if not line.startswith('#') and line.strip()] sequences = [make_sequence_from_line(line) for line in lines] return lines, sequences @pytest.mark.skipif(NARROW_ONLY, reason="Some sequences in text file are not compatible with 'narrow' builds") def test_recommended_emoji_zwj_sequences(): - """ - Test wcswidth of all of the unicode.org-published emoji-zwj-sequences.txt - """ + """Test wcswidth of all of the unicode.org-published emoji-zwj-sequences.txt.""" # given, lines, sequences = read_sequences_from_file('emoji-zwj-sequences.txt') @@ -170,14 +162,12 @@ def test_recommended_emoji_zwj_sequences(): }) # verify - assert errors == [] + assert not errors assert num >= 1468 def test_recommended_variation_16_sequences(): - """ - Test wcswidth of all of the unicode.org-published emoji-variation-sequences.txt - """ + """Test wcswidth of all of the unicode.org-published emoji-variation-sequences.txt.""" # given, lines, sequences = read_sequences_from_file('emoji-variation-sequences.txt') @@ -198,12 +188,12 @@ def test_recommended_variation_16_sequences(): }) # verify - assert errors == [] + assert not errors assert num >= 742 def test_unicode_9_vs16(): - """Verify effect of VS-16 on unicode_version 9.0 and later""" + """Verify effect of VS-16 on unicode_version 9.0 and later.""" phrase = ("\u2640" # FEMALE SIGN "\uFE0F") # VARIATION SELECTOR-16 @@ -220,7 +210,7 @@ def test_unicode_9_vs16(): def test_unicode_8_vs16(): - """Verify that VS-16 has no effect on unicode_version 8.0 and earler""" + """Verify that VS-16 has no effect on unicode_version 8.0 and earlier.""" phrase = ("\u2640" # FEMALE SIGN "\uFE0F") # VARIATION SELECTOR-16 diff --git a/contrib/python/wcwidth/py3/tests/test_grapheme.py b/contrib/python/wcwidth/py3/tests/test_grapheme.py new file mode 100644 index 00000000000..5be6f58de09 --- /dev/null +++ b/contrib/python/wcwidth/py3/tests/test_grapheme.py @@ -0,0 +1,146 @@ +"""Tests for grapheme cluster segmentation.""" +# std imports +import os + +# 3rd party +import pytest + +# local +from wcwidth import iter_graphemes + +try: + chr(0x2fffe) + NARROW_ONLY = False +except ValueError: + NARROW_ONLY = True + + +def parse_grapheme_break_test_line(line): + """Parse a line from GraphemeBreakTest.txt.""" + data, _, _ = line.partition('#') + data = data.strip() + if not data: + return None, None + + parts = [] + current_cluster = [] + + for token in data.split(): + if token == '÷': + if current_cluster: + parts.append(current_cluster) + current_cluster = [] + elif token == '×': + pass + else: + try: + current_cluster.append(int(token, 16)) + except ValueError: + continue + + if current_cluster: + parts.append(current_cluster) + + all_codepoints = [] + expected_clusters = [] + for cluster in parts: + cluster_str = ''.join(chr(cp) for cp in cluster) + expected_clusters.append(cluster_str) + all_codepoints.extend(cluster) + + if not all_codepoints: + return None, None + + input_str = ''.join(chr(cp) for cp in all_codepoints) + return input_str, expected_clusters + + +def read_grapheme_break_test(): + """Read and parse GraphemeBreakTest.txt.""" + test_file = os.path.join(os.path.dirname(__file__), 'GraphemeBreakTest.txt') + if not os.path.exists(test_file): + return [] + + test_cases = [] + with open(test_file, encoding='utf-8') as f: + for line_num, line in enumerate(f, 1): + line = line.strip() + if not line or line.startswith('#'): + continue + input_str, expected = parse_grapheme_break_test_line(line) + if input_str is not None: + test_cases.append(pytest.param(input_str, expected, id=f"line{line_num}")) + + return test_cases + + [email protected](("input_str", "expected"), [ + ('', []), + ('a', ['a']), + ('abc', ['a', 'b', 'c']), + ('cafe\u0301', ['c', 'a', 'f', 'e\u0301']), + ('\r\n', ['\r\n']), + ('ok\r\nok', ['o', 'k', '\r\n', 'o', 'k']), + ('\r', ['\r']), + ('ok\rok', ['o', 'k', '\r', 'o', 'k']), + ('\n', ['\n']), + ('ok\nok', ['o', 'k', '\n', 'o', 'k']), + ('\r\r', ['\r', '\r']), + ('ok\r\rok', ['o', 'k', '\r', '\r', 'o', 'k']), +]) +def test_core_grapheme(input_str, expected): + """Basic grapheme cluster segmentation.""" + assert list(iter_graphemes(input_str)) == expected + + [email protected](("input_str", "start", "end", "expected"), [ + ('abcdef', 2, None, ['c', 'd', 'e', 'f']), + ('abcdef', 0, 4, ['a', 'b', 'c', 'd']), + ('abcdef', 1, 4, ['b', 'c', 'd']), + ('abc', 10, None, []), + ('abc', 0, 10, ['a', 'b', 'c']), +]) +def test_iter_graphemes_slice(input_str, start, end, expected): + """Grapheme iteration with start/end parameters.""" + assert list(iter_graphemes(input_str, start=start, end=end)) == expected + + +HANGUL_LV = '\u1100\u1161' +HANGUL_LVT = '\uAC00\u11A8' +FLAG_US = '\U0001F1FA\U0001F1F8' +FLAG_AU = '\U0001F1E6\U0001F1FA' +RI_A = '\U0001F1E6' +FAMILY = '\U0001F468\u200D\U0001F469\u200D\U0001F467' +WAVE_SKIN = '\U0001F44B\U0001F3FB' +HEART_EMOJI = '\u2764\uFE0F' + + [email protected](NARROW_ONLY, reason="requires wide Unicode") [email protected](("input_str", "expected"), [ + (HANGUL_LV, [HANGUL_LV]), + ('ok' + HANGUL_LV + 'ok', ['o', 'k', HANGUL_LV, 'o', 'k']), + (HANGUL_LVT, [HANGUL_LVT]), + ('ok' + HANGUL_LVT + 'ok', ['o', 'k', HANGUL_LVT, 'o', 'k']), + (FLAG_US, [FLAG_US]), + ('ok' + FLAG_US + 'ok', ['o', 'k', FLAG_US, 'o', 'k']), + (FLAG_US + RI_A, [FLAG_US, RI_A]), + ('ok' + FLAG_US + RI_A + 'ok', ['o', 'k', FLAG_US, RI_A, 'o', 'k']), + (FLAG_US + FLAG_AU, [FLAG_US, FLAG_AU]), + ('ok' + FLAG_US + FLAG_AU + 'ok', ['o', 'k', FLAG_US, FLAG_AU, 'o', 'k']), + (FAMILY, [FAMILY]), + ('ok' + FAMILY + 'ok', ['o', 'k', FAMILY, 'o', 'k']), + (WAVE_SKIN, [WAVE_SKIN]), + ('ok' + WAVE_SKIN + 'ok', ['o', 'k', WAVE_SKIN, 'o', 'k']), + (HEART_EMOJI, [HEART_EMOJI]), + ('ok' + HEART_EMOJI + 'ok', ['o', 'k', HEART_EMOJI, 'o', 'k']), +]) +def test_wide_unicode_graphemes(input_str, expected): + """Grapheme segmentation for wide Unicode characters.""" + assert list(iter_graphemes(input_str)) == expected + + [email protected](NARROW_ONLY, reason="requires wide Unicode") [email protected](("input_str", "expected"), read_grapheme_break_test()) +def test_unicode_grapheme_break_test(input_str, expected): + """Validate against official Unicode GraphemeBreakTest.txt.""" + assert list(iter_graphemes(input_str)) == expected diff --git a/contrib/python/wcwidth/py3/tests/test_justify.py b/contrib/python/wcwidth/py3/tests/test_justify.py new file mode 100644 index 00000000000..55864bb23a5 --- /dev/null +++ b/contrib/python/wcwidth/py3/tests/test_justify.py @@ -0,0 +1,56 @@ +"""Tests for text justification functions.""" +# local +from wcwidth import ljust, rjust, width, center + +SGR_RED = '\x1b[31m' +SGR_RESET = '\x1b[0m' +CJK_WORD = '\u4e2d\u6587' +CAFE_COMBINING = 'cafe\u0301' +EMOJI_FAMILY = '\U0001F468\u200D\U0001F469\u200D\U0001F467' + + +def test_ljust(): + assert ljust('hi', 5) == 'hi ' + assert ljust('', 5) == ' ' + assert ljust('hello', 3) == 'hello' + assert ljust('hello', 5) == 'hello' + assert ljust('\x1b[31mhi\x1b[0m', 5) == '\x1b[31mhi\x1b[0m ' + assert ljust('\u4e2d', 4) == '\u4e2d ' + assert ljust('hi', 5, fillchar='-') == 'hi---' + assert ljust('hi', 5, fillchar='\u00b7') == 'hi\u00b7\u00b7\u00b7' + assert ljust(CJK_WORD, 8) == CJK_WORD + ' ' + assert width(ljust(CJK_WORD, 8)) == 8 + assert width(ljust(CAFE_COMBINING, 8)) == 8 + assert width(ljust(EMOJI_FAMILY, 6)) == 6 + text = f'{SGR_RED}hi{SGR_RESET}' + assert len(ljust(text, 6, control_codes='ignore')) - len(SGR_RED) - len(SGR_RESET) == 6 + + +def test_rjust(): + assert rjust('hi', 5) == ' hi' + assert rjust('', 5) == ' ' + assert rjust('hello', 3) == 'hello' + assert rjust('hello', 5) == 'hello' + assert rjust('\x1b[31mhi\x1b[0m', 5) == ' \x1b[31mhi\x1b[0m' + assert rjust('\u4e2d', 4) == ' \u4e2d' + assert rjust('hi', 5, fillchar='-') == '---hi' + assert rjust('hi', 5, fillchar='\u00b7') == '\u00b7\u00b7\u00b7hi' + assert rjust(CJK_WORD, 8) == ' ' + CJK_WORD + assert width(rjust(CAFE_COMBINING, 8)) == 8 + assert width(rjust(EMOJI_FAMILY, 6)) == 6 + + +def test_center(): + assert center('hi', 6) == ' hi ' + assert center('hi', 5) == ' hi ' + assert center('', 4) == ' ' + assert center('hello', 3) == 'hello' + assert center('hello', 5) == 'hello' + assert center('\x1b[31mhi\x1b[0m', 6) == ' \x1b[31mhi\x1b[0m ' + assert center('\u4e2d', 6) == ' \u4e2d ' + assert center('hi', 6, fillchar='-') == '--hi--' + assert center('hi', 6, fillchar='\u00b7') == '\u00b7\u00b7hi\u00b7\u00b7' + assert center('x', 4) == ' x ' + assert width(center(CJK_WORD, 8)) == 8 + assert width(center(CAFE_COMBINING, 8)) == 8 + assert width(center(EMOJI_FAMILY, 6)) == 6 diff --git a/contrib/python/wcwidth/py3/tests/test_table_integrity.py b/contrib/python/wcwidth/py3/tests/test_table_integrity.py index e680498162a..8dbfe169a00 100644 --- a/contrib/python/wcwidth/py3/tests/test_table_integrity.py +++ b/contrib/python/wcwidth/py3/tests/test_table_integrity.py @@ -1,6 +1,4 @@ -""" -Executes verify-table-integrity.py as a unit test. -""" +"""Executes verify-table-integrity.py as a unit test.""" # std imports import os import sys diff --git a/contrib/python/wcwidth/py3/tests/test_textwrap.py b/contrib/python/wcwidth/py3/tests/test_textwrap.py new file mode 100644 index 00000000000..1f62e29ad1c --- /dev/null +++ b/contrib/python/wcwidth/py3/tests/test_textwrap.py @@ -0,0 +1,262 @@ +"""Tests for sequence-aware text wrapping functions.""" +# std imports +import sys +import platform +import textwrap + +# 3rd party +import pytest + +# local +from wcwidth import iter_sequences +from wcwidth.textwrap import SequenceTextWrapper, wrap + +SGR_RED = '\x1b[31m' +SGR_BOLD = '\x1b[1m' +SGR_RESET = '\x1b[0m' +ATTRS = ('\x1b[31m', '\x1b[34m', '\x1b[4m', '\x1b[7m', '\x1b[41m', '\x1b[37m', '\x1b[107m') + +OSC_HYPERLINK = '\x1b]8;;https://example.com\x07link\x1b]8;;\x07' +CSI_CURSOR = '\x1b[5C' +CTRL_BEL = '\x07' + +ZWJ = '\u200d' +WOMAN = '\U0001F469' +GIRL = '\U0001F467' +FAMILY_ZWJ = f'{WOMAN}{ZWJ}{WOMAN}{ZWJ}{GIRL}' +SMILEY_VS16 = '\u263a\ufe0f' +ZWJ_FAMILY = '\U0001F469\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466' +CAFE_COMBINING = 'cafe\u0301' +HANGUL_GA = '\u1100\u1161' + + +def _strip(text): + return ''.join(seg for seg, is_seq in iter_sequences(text) if not is_seq) + + +def _adjust_stdlib_result(expected, kwargs): + """ + Adjust stdlib textwrap result for known bugs in older Python versions. + + CPython #140627: Older versions leave trailing whitespace and preceding all-whitespace lines + when drop_whitespace=True. Fixed in 3.13.11+, 3.14.2+, and 3.15+. We always strip to normalize + across versions. + """ + if not expected: + return expected + if kwargs.get('drop_whitespace'): + # Strip trailing whitespace from each line (old Python bug) + expected = [line.rstrip() for line in expected] + # Remove leading all-whitespace lines (old Python bug) + if expected and not expected[0].strip(): + expected = expected[1:] + if expected and kwargs.get('subsequent_indent'): + expected[0] = expected[0][len(kwargs['subsequent_indent']):] + return expected + + +def _colorize(text): + return ''.join( + ATTRS[idx % len(ATTRS)] + char + SGR_RESET if char not in ' -\t' else char + for idx, char in enumerate(text) + ) + + +# Edge cases not covered by stdlib comparison +BASIC_EDGE_CASES = [ + ('', 10, []), + (' ', 10, []), + ('\u5973', 0, ['\u5973']), +] + + [email protected]('text,w,expected', BASIC_EDGE_CASES) +def test_wrap_edge_cases(text, w, expected): + assert wrap(text, w) == expected + + +def test_wrap_initial_indent(): + assert wrap('hello world', 10, initial_indent='> ') == ['> hello', 'world'] + + +def test_wrap_drops_trailing_whitespace(): + """Trailing whitespace stripped when drop_whitespace=True (CPython #140627).""" + result = wrap(' Z! a bc defghij', 3) + assert result[:3] == [' Z!', 'a', 'bc'] + + +LONG_WORD_CASES = [ + ('abcdefghij', 3, True, ['abc', 'def', 'ghi', 'j']), + ('abcdefghij', 3, False, ['abcdefghij']), +] + + [email protected]('text,w,break_long,expected', LONG_WORD_CASES) +def test_wrap_long_words(text, w, break_long, expected): + assert wrap(text, w, break_long_words=break_long) == expected + + +# Hyphen edge cases for long word breaking +HYPHEN_LONG_WORD_CASES = [ + ('a-b-c-d', 3, True, ['a-', 'b-', 'c-d']), + ('a-b-c-d', 3, False, ['a-b', '-c-', 'd']), + ('---', 2, True, ['--', '-']), + ('a---b', 2, True, ['a-', '--', 'b']), + ('a-\x1b[31mb', 2, True, ['a-\x1b[31m', 'b']), +] + + [email protected]('text,w,break_hyphens,expected', HYPHEN_LONG_WORD_CASES) +def test_wrap_hyphen_long_words(text, w, break_hyphens, expected): + assert wrap(text, w, break_on_hyphens=break_hyphens) == expected + + +# Comprehensive stdlib compatibility +TEXTWRAP_KWARGS = [ + {'break_long_words': False, 'drop_whitespace': False}, + {'break_long_words': False, 'drop_whitespace': True}, + {'break_long_words': True, 'drop_whitespace': False}, + {'break_long_words': True, 'drop_whitespace': True}, + {'break_long_words': True, 'drop_whitespace': False, 'subsequent_indent': ' '}, + {'break_long_words': True, 'drop_whitespace': True, 'subsequent_indent': ' '}, + {'break_long_words': True, 'drop_whitespace': True, 'break_on_hyphens': True}, + {'break_long_words': True, 'drop_whitespace': True, 'break_on_hyphens': False}, +] + + [email protected]('kwargs', TEXTWRAP_KWARGS) [email protected]('width', [3, 7, 8, 9, 10, 16, 20, 40]) +def test_wrap_matches_stdlib(kwargs, width): + pgraph = ' Z! a bc defghij klmnopqrstuvw<<>>xyz012345678900 ' * 2 + pgraph_colored = _colorize(pgraph) + expected = _adjust_stdlib_result( + textwrap.wrap(pgraph, width=width, **kwargs), kwargs + ) + wrapper = SequenceTextWrapper(width=width, **kwargs) + assert wrapper.wrap(pgraph) == expected + # For colored text, strip sequences + colored_result = [_strip(line) for line in wrapper.wrap(pgraph_colored)] + if kwargs.get('drop_whitespace'): + # normalize trailing whitespace, rstrip when drop_whitespace is True + # matches CPython #140627 fix + colored_result = [line.rstrip() for line in colored_result] + assert colored_result == expected + + [email protected]('kwargs', TEXTWRAP_KWARGS) [email protected]('width', [8, 10, 16, 20, 40]) [email protected]('tabsize', [4, 5, 8]) +def test_wrap_tabsize_matches_stdlib(kwargs, width, tabsize): + tabsize = min(tabsize, width) + pgraph = ' Z! a bc\t defghij\t kl mnopqrs\ttuvw<<>>xyz012345678900 ' * 2 + expected = _adjust_stdlib_result( + textwrap.wrap(pgraph, width=width, tabsize=tabsize, **kwargs), kwargs + ) + wrapper = SequenceTextWrapper(width=width, tabsize=tabsize, **kwargs) + assert wrapper.wrap(pgraph) == expected + + +def test_wrap_multiline_matches_stdlib(): + given = '\n' + 32 * 'A' + '\n' + 32 * 'B' + '\n' + 32 * 'C' + '\n\n' + assert wrap(given, 30) == textwrap.wrap(given, 30) + + +# Wide characters that exceed width=1 (tests force-grapheme logic) +WIDE_CHAR_WIDTH_1_CASES = [ + ('\u5973', 1, ['\u5973']), + (ZWJ_FAMILY, 1, [ZWJ_FAMILY]), + (HANGUL_GA, 1, [HANGUL_GA]), +] + + [email protected]('text,w,expected', WIDE_CHAR_WIDTH_1_CASES) +def test_wrap_wide_char_width_1(text, w, expected): + assert wrap(text, w) == expected + + +# Unicode width-aware wrapping +UNICODE_CASES = [ + # CJK (2 cells each) + ('\u4e2d\u6587\u5b57\u7b26', 4, ['\u4e2d\u6587', '\u5b57\u7b26']), + ('\u4e2d\u6587\u5b57', 5, ['\u4e2d\u6587', '\u5b57']), + # Combining characters + (CAFE_COMBINING + '-latte', 4, ['cafe\u0301', '-lat', 'te']), + # Emoji (ZWJ, VS16) + (f'{FAMILY_ZWJ} ab', 4, [FAMILY_ZWJ, 'ab']), + (f'{SMILEY_VS16} ab', 3, [SMILEY_VS16, 'ab']), + ('\U0001F469\U0001F467\U0001F466', 4, ['\U0001F469\U0001F467', '\U0001F466']), +] + + [email protected]('text,w,expected', UNICODE_CASES) +def test_wrap_unicode(text, w, expected): + kwargs = {'break_on_hyphens': False} if '-' in text else {} + assert wrap(text, w, **kwargs) == expected + + +# Escape sequence preservation +SEQUENCE_CASES = [ + # SGR sequences preserved at word boundaries + (f'{SGR_RED}red{SGR_RESET} blue', 4, [f'{SGR_RED}red{SGR_RESET}', 'blue']), + (f'hello{SGR_RED} world', 6, [f'hello{SGR_RED}', 'world']), + # Empty/adjacent sequences + (f'{SGR_RED}{SGR_RESET}', 10, [f'{SGR_RED}{SGR_RESET}']), + (f'hello {SGR_RED}{SGR_RESET}world', 6, ['hello', f'{SGR_RED}{SGR_RESET}world']), + # OSC hyperlinks + (f'{OSC_HYPERLINK} text', 5, [OSC_HYPERLINK, 'text']), + # CSI cursor sequences + (f'{CSI_CURSOR}text here', 10, [f'{CSI_CURSOR}text', 'here']), + # Control characters + (f'{CTRL_BEL}alert text', 6, [f'{CTRL_BEL}alert', 'text']), + # Sequences in long word breaking + ('x\x1b[31mabcdefghij\x1b[0m', 3, ['xab', 'cde', 'fgh', 'ij']), + # Lone ESC + ('abc\x1bdefghij', 3, ['abc', 'def', 'ghi', 'j']), +] + + [email protected]('text,w,expected', SEQUENCE_CASES) +def test_wrap_sequences(text, w, expected): + result = wrap(text, w) + if any('\x1b' in e or '\x00' <= e[0] < '\x20' for e in expected if e): + assert result == expected + else: + assert [_strip(line) for line in result] == expected + + +# Mixed: sequences + unicode +MIXED_CASES = [ + (f'{SGR_RED}\u4e2d\u6587{SGR_RESET} ab', 5, [f'{SGR_RED}\u4e2d\u6587{SGR_RESET}', 'ab']), + (f'{SGR_RED}{FAMILY_ZWJ}{SGR_RESET} ab', 4, [f'{SGR_RED}{FAMILY_ZWJ}{SGR_RESET}', 'ab']), + (f'{SGR_BOLD}\u4e2d{SGR_RESET}y z', 4, [f'{SGR_BOLD}\u4e2d{SGR_RESET}y', 'z']), +] + + [email protected]('text,w,expected', MIXED_CASES) +def test_wrap_mixed(text, w, expected): + assert wrap(text, w) == expected + + +# Tabsize with wide characters - tests column alignment with different cell widths +TABSIZE_WIDE_CASES = [ + # CJK (2 cells) + tab: tabsize=4, '\u4e2d' is 2 cols, tab expands to col 4 + ('\u4e2d\ta b', 6, 4, ['\u4e2d a', 'b']), + # CJK + tab with tabsize=8: '\u4e2d' is 2 cols, tab expands to col 8 + ('\u4e2d\ta b', 10, 8, ['\u4e2d a', 'b']), + # Emoji + tab (emoji width=2): similar column alignment + (f'{SMILEY_VS16}\ta b', 6, 4, [f'{SMILEY_VS16} a', 'b']), + # Multiple CJK + tab: 4 cols, tab to 4 adds 0, but expand_tabs adds min 1 + ('\u4e2d\u6587\ta', 8, 4, ['\u4e2d\u6587 a']), + # ASCII + tab + CJK: 'a' is 1 col, tab to 4 (3 spaces), CJK is 2 cols + ('a\t\u4e2d b', 8, 4, ['a \u4e2d b']), +] + + [email protected]('text,w,tabsize,expected', TABSIZE_WIDE_CASES) + platform.python_implementation() == 'PyPy' and sys.version_info < (3, 9), + reason='PyPy 3.8 str.expandtabs() counts UTF-8 bytes instead of characters' +) +def test_wrap_tabsize_wide_chars(text, w, tabsize, expected): + """Verify tabsize respects wide character column positions.""" + assert wrap(text, w, tabsize=tabsize) == expected diff --git a/contrib/python/wcwidth/py3/tests/test_ucslevel.py b/contrib/python/wcwidth/py3/tests/test_ucslevel.py index b15fb5f5a71..a907db2c44a 100644 --- a/contrib/python/wcwidth/py3/tests/test_ucslevel.py +++ b/contrib/python/wcwidth/py3/tests/test_ucslevel.py @@ -46,7 +46,11 @@ def test_exact_410_unicode(): def test_nearest_505_str(): - """wcwidth._wcmatch_version('5.0.5') returns nearest '5.0.0'. (str)""" + """ + wcwidth._wcmatch_version('5.0.5') returns nearest '5.0.0'. + + (str) + """ # given given, expected = '5.0.5', '5.0.0' @@ -58,7 +62,11 @@ def test_nearest_505_str(): def test_nearest_505_unicode(): - """wcwidth._wcmatch_version(u'5.0.5') returns nearest u'5.0.0'. (unicode)""" + """ + wcwidth._wcmatch_version(u'5.0.5') returns nearest u'5.0.0'. + + (unicode) + """ # given given, expected = '5.0.5', '5.0.0' diff --git a/contrib/python/wcwidth/py3/tests/test_width.py b/contrib/python/wcwidth/py3/tests/test_width.py new file mode 100644 index 00000000000..722f68447da --- /dev/null +++ b/contrib/python/wcwidth/py3/tests/test_width.py @@ -0,0 +1,385 @@ +"""Tests for width() function.""" +# 3rd party +import pytest + +# local +import wcwidth +from wcwidth.escape_sequences import ZERO_WIDTH_PATTERN + +BASIC_WIDTH_CASES = [ + ('', 0, 'empty'), + ('hello', 5, 'ASCII'), + ('コンニチハ', 10, 'CJK'), + ('cafe\u0301', 4, 'combining'), + ('\U0001F468\u200d\U0001F469\u200d\U0001F467', 2, 'ZWJ'), +] + + [email protected]('text,expected,name', BASIC_WIDTH_CASES) +def test_width_basic(text, expected, name): + """Basic width measurement tests.""" + assert wcwidth.width(text) == expected + + +IGNORE_MODE_CASES = [ + ('hello\x01world', 10, 'C0_control'), + ('hello\x00world', 10, 'NUL'), + ('abc\bd', 4, 'backspace'), + ('abc\nxy', 5, 'LF'), + ('\x1b[31mred\x1b[0m', 3, 'SGR_sequence'), + ('hello\x80world', 10, 'C1_control'), + ('\x1b', 0, 'lone_ESC'), + ('a\x1bb', 2, 'lone_ESC_between'), +] + + [email protected]('text,expected,name', IGNORE_MODE_CASES) +def test_width_control_codes_ignore(text, expected, name): + """Ignore mode strips control codes from width calculation.""" + assert wcwidth.width(text, control_codes="ignore") == expected + + +STRICT_RAISES_CASES = [ + ('hello\x01world', 'C0_control'), + ('hello\x1aworld', 'ctrl_z'), + ('hello\x7fworld', 'DEL'), + ('hello\x80world', 'C1_control'), + ('hello\nworld', 'LF'), + ('hello\x1b[Hworld', 'cursor_home'), + ('hello\x1b[Aworld', 'cursor_up'), +] + + [email protected]('text,name', STRICT_RAISES_CASES) +def test_width_control_codes_strict_raises(text, name): + """Strict mode raises ValueError for illegal control codes.""" + with pytest.raises(ValueError): + wcwidth.width(text, control_codes="strict") + + +STRICT_ALLOWED_CASES = [ + ('hello\x07world', 10, 'BEL'), + ('hello\x00world', 10, 'NUL'), + ('abc\bd', 3, 'backspace'), + ('abc\rxy', 3, 'CR'), + ('\x1b[31mred\x1b[0m', 3, 'SGR_sequence'), + ('a\x1b[2Cb', 4, 'cursor_right'), + ('\x1b', 0, 'lone_ESC'), + ('a\x1bb', 2, 'lone_ESC_between'), + ('\x1b!', 1, 'ESC_unrecognized'), +] + + [email protected]('text,expected,name', STRICT_ALLOWED_CASES) +def test_width_control_codes_strict_allowed(text, expected, name): + """Strict mode allows certain control codes.""" + assert wcwidth.width(text, control_codes="strict") == expected + + +STRICT_INDETERMINATE_SEQUENCES = [ + ('\x1b[?1049h', 'enter_fullscreen'), + ('\x1b[?1049l', 'exit_fullscreen'), + ('\x1bD', 'scroll_forward'), + ('\x1bM', 'scroll_reverse'), + ('\x1b8', 'restore_cursor'), + ('\x1b[1P', 'parm_dch'), + ('\x1b[1M', 'parm_delete_line'), + ('\x1b[1L', 'parm_insert_line'), + ('\x1b[1X', 'erase_chars'), + ('\x1b[1S', 'parm_index'), + ('\x1b[1T', 'parm_rindex'), +] + + [email protected]('seq,cap_name', STRICT_INDETERMINATE_SEQUENCES) +def test_width_strict_indeterminate_raises(seq, cap_name): + with pytest.raises(ValueError): + wcwidth.width(f"hello{seq}world", control_codes="strict") + + +PARSE_MODE_CASES = [ + ('hello\x01world', 10, 'C0_control'), + ('abc\bd', 3, 'backspace'), + ('abc\rxy', 3, 'CR'), + ('abc\nxy', 5, 'LF_vertical'), + ('a\x1b[2Cb', 4, 'cursor_right'), + ('abcd\x1b[2De', 4, 'cursor_left'), + ('\x1b[31mred\x1b[0m', 3, 'SGR'), + ('ab\x1b[Hcd', 4, 'indeterminate'), +] + + [email protected]('text,expected,name', PARSE_MODE_CASES) +def test_width_control_codes_parse(text, expected, name): + """Parse mode (default) handles control codes.""" + assert wcwidth.width(text) == expected + + +TABSIZE_CASES = [ + ('\t', 8, 8, 'default'), + ('abc\t', 8, 8, 'after_text'), + ('ab\t', 4, 4, 'tabsize_4'), +] + + [email protected]('text,expected,tabsize,name', TABSIZE_CASES) +def test_width_tabsize(text, expected, tabsize, name): + """Tabsize parameter controls tab width calculation.""" + assert wcwidth.width(text, tabsize=tabsize) == expected + + +def test_width_tabsize_zero(): + """Tabs are zero-width with control_codes='ignore'.""" + assert wcwidth.width('\t', control_codes='ignore') == 0 + + +def test_width_tabsize_zero_parse(): + """Tab with tabsize=0 in parse mode is zero-width.""" + assert wcwidth.width('ab\tc', tabsize=0) == 3 + + +ESCAPE_SEQUENCE_CASES = [ + ('\x1b[m', 0, 'basic_SGR'), + ('\x1b[38;2;255;0;0m', 0, 'RGB_SGR'), + ('\x1b]8;;https://example.com\x07link\x1b]8;;\x07', 4, 'OSC_hyperlink'), + ('\x1b]0;title\x07text', 4, 'OSC_title'), + ('\x1b(B', 0, 'charset'), + ('\x1b[', 0, 'Fe_CSI'), +] + + [email protected]('text,expected,name', ESCAPE_SEQUENCE_CASES) +def test_width_escape_sequences(text, expected, name): + """Escape sequences are parsed correctly.""" + assert wcwidth.width(text) == expected + + +EDGE_CASES = [ + ('\x1b[31m\x1b[0m', 0, 'only_escapes'), + ('\x1b[31mhello\x1b[0m world', 11, 'mixed_content'), + ('\x1b[31mコ\x1b[0m', 2, 'wide_with_escape'), + ('\x1b', 0, 'lone_ESC'), + ('\x1b!', 1, 'ESC_unrecognized'), + ('*\x1b*', 2, 'lone_ESC_between_text'), +] + + [email protected]('text,expected,name', EDGE_CASES) +def test_width_edge_cases(text, expected, name): + """Edge cases are handled correctly.""" + assert wcwidth.width(text) == expected + + +def test_width_unknown_control_codes(): + """Unknown control_codes defaults to parse mode.""" + assert wcwidth.width("hello", control_codes="invalid") == 5 + assert wcwidth.width("abc\bd", control_codes="unknown") == 3 + + +def test_vs16_selector(): + """VS16 converts narrow character to wide (width 2).""" + # Smiley face with VS16 should be width 2 (same as wcswidth) + assert wcwidth.width("\u263A\uFE0F") == 2 + assert wcwidth.width("\u263A\uFE0F") == wcwidth.wcswidth("\u263A\uFE0F") + # Heart with VS16 + assert wcwidth.width("\u2764\uFE0F") == 2 + # VS16 without valid preceding char is zero-width + assert wcwidth.width("\uFE0F") == 0 + # Character not in VS16 table followed by VS16 stays narrow + assert wcwidth.width("A\uFE0F") == 1 + + +def test_vs16_after_control_chars(): + """VS16 after control characters should not add width.""" + # Emoji, then control char, then VS16 - VS16 should NOT apply to emoji + # width() returns max extent, so BS/CR don't reduce it + assert wcwidth.width("\u263A\x07\uFE0F") == 1 # smiley(1) + BEL(0) + VS16(0) + assert wcwidth.width("\u263A\x08\uFE0F") == 1 # smiley(1) + BS(back) + VS16(0), extent=1 + assert wcwidth.width("\u263A\x0d\uFE0F") == 1 # smiley(1) + CR(reset) + VS16(0), extent=1 + assert wcwidth.width("\u263A\x1b[m\uFE0F") == 1 # smiley(1) + SGR(0) + VS16(0) + assert wcwidth.width("\u263A\u200Da\uFE0F") == 1 # smiley(1) + ZWJ+a(0) + VS16(0) + + +def test_backspace_at_column_zero(): + """Backspace at column 0 does not go negative.""" + assert wcwidth.width('\b') == 0 + assert wcwidth.width('\ba') == 1 + + +def test_carriage_return_resets_column(): + """CR resets column, max extent is preserved.""" + assert wcwidth.width('abc\rd') == 3 + assert wcwidth.width('abc\rde') == 3 + + +def test_iter_sequences_lone_esc(): + """Lone ESC is yielded as a sequence.""" + assert list(wcwidth.iter_sequences('\x1b')) == [('\x1b', True)] + assert list(wcwidth.iter_sequences('*\x1b*')) == [('*', False), ('\x1b', True), ('*', False)] + + +def test_tab_ignore_with_tabsize(): + """Tabs are zero-width with control_codes='ignore', tabsize has no effect.""" + assert wcwidth.width("abc\t", control_codes="ignore", tabsize=8) == 3 + + +def test_cursor_right_unparameterized(): + """Test unparameterized cursor_right sequence is handled correctly.""" + seq = '\x1b[C' + # sequence is recognized as a sequence + segments = list(wcwidth.iter_sequences(seq)) + assert segments == [(seq, True)] + # sequence alone moves cursor right by 1 (default), extent is 1 + assert wcwidth.width(seq) == 1 + # cursor moves right by 1: 'a'(1) + right(1) + 'b'(1) = 3 + assert wcwidth.width('a' + seq + 'b') == 3 + # strict mode allows cursor_right + assert wcwidth.width('a' + seq + 'b', control_codes='strict') == 3 + + +INDETERMINATE_CAP_SAMPLES = [ + ('\x1b[1;1r', 'change_scroll_region'), + ('\x1b[H\x1b[2J', 'clear_screen'), + ('\x1b[K', 'clr_eol'), + ('\x1b[1;1H', 'cursor_address'), + ('\x1b[A', 'cursor_up'), + ('\x1b[M', 'delete_line'), + ('\x1b[?1049h', 'enter_fullscreen'), + ('\x1b[1X', 'erase_chars'), + ('\x1b[L', 'insert_line'), + ('\x1b[1S', 'parm_index'), + ('\x1b[1A', 'parm_up_cursor'), + ('\x1b8', 'restore_cursor'), + ('\x1b[1d', 'row_address'), + ('\x1bD', 'scroll_forward'), +] + + [email protected]('seq,cap_name', INDETERMINATE_CAP_SAMPLES) +def test_indeterminate_caps_covered_by_term_seq_pattern(seq, cap_name): + """Verify all INDETERMINATE_CAPS sequences are matched by ZERO_WIDTH_PATTERN.""" + # local + assert ZERO_WIDTH_PATTERN.match(seq) + assert wcwidth.width(seq) == 0 + + +ZERO_WIDTH_CAP_SAMPLES = [ + ('\x1b[3g', 'clear_all_tabs'), + ('\x1b[?25l', 'cursor_invisible'), + ('\x1b[?25h', 'cursor_normal'), + ('\x1b[?12;25h', 'cursor_visible'), + ('\x1b(0', 'enter_alt_charset_mode'), + ('\x1b[5m', 'enter_blink_mode'), + ('\x1b[1m', 'enter_bold_mode'), + ('\x1b[2m', 'enter_dim_mode'), + ('\x1b[3m', 'enter_italics_mode'), + ('\x1b[7m', 'enter_reverse_mode'), + ('\x1b[3m', 'enter_standout_mode'), + ('\x1b[4m', 'enter_underline_mode'), + ('\x1b(B', 'exit_alt_charset_mode'), + ('\x1b[m', 'exit_attribute_mode'), + ('\x1b[4l', 'exit_insert_mode'), + ('\x1b[23m', 'exit_italics_mode'), + ('\x1b[27m', 'exit_standout_mode'), + ('\x1b[24m', 'exit_underline_mode'), + ('\x1b[?5h\x1b[?5l', 'flash_screen_csi'), + ('\x1bg', 'flash_screen_visual_bell'), + ('\x1b>', 'keypad_local'), + ('\x1b=', 'keypad_xmit'), + ('\x1b[39;49m', 'orig_pair'), + ('\x1b7', 'save_cursor'), + ('\x1bH', 'set_tab'), +] + + [email protected]('seq,cap_name', ZERO_WIDTH_CAP_SAMPLES) +def test_zero_width_sequences_matched_by_pattern(seq, cap_name): + """Verify zero-width terminfo sequences are matched by ZERO_WIDTH_PATTERN.""" + for part, is_seq in wcwidth.iter_sequences(seq): + assert is_seq, f"{cap_name}: {repr(part)} not matched as sequence" + assert wcwidth.width(seq) == 0 + + +MODERN_TERMINAL_SEQUENCES = [ + ('\x1b_Gf=100,i=1;base64data\x1b\\hello', 5, 'kitty_graphics_with_text'), + ('\x1b_Ga=d\x07', 0, 'kitty_graphics_delete'), + ('\x1bP0;1;0q#0~-\x1b\\test', 4, 'sixel_graphics_with_text'), + ('\x1bP$q"p\x1b\\', 0, 'decrqss_query'), + ('\x1b^private\x1b\\text', 4, 'pm_with_text'), + ('\x1b]1337;SetMark\x07test', 4, 'iterm2_setmark'), + ('\x1b]1337;File=inline=1:base64\x07img', 3, 'iterm2_inline_image'), + ('\x1b]1337;CursorShape=1\x07', 0, 'iterm2_cursor_shape'), + ('\x1b]1337;CurrentDir=/home\x07', 0, 'iterm2_currentdir'), + ('\x1b]133;A\x07$ ', 2, 'shell_prompt_start'), + ('\x1b]133;B\x07ls', 2, 'shell_command_start'), + ('\x1b]133;C\x07', 0, 'shell_command_executed'), + ('\x1b]133;D;0\x07', 0, 'shell_command_finished'), + ('\x1b]99;i=1:d=0;Hello\x1b\\', 0, 'kitty_notification'), + ('\x1b]5522;type=read\x07', 0, 'kitty_clipboard_read'), + ('\x1b]22;pointer\x07', 0, 'kitty_pointer_shape'), + ('\x1b]21;fg=?\x07', 0, 'kitty_color_query'), + ('\x1b]30001\x1b\\', 0, 'kitty_color_push'), + ('\x1b]30101\x1b\\', 0, 'kitty_color_pop'), +] + + [email protected]('seq,expected_width,name', MODERN_TERMINAL_SEQUENCES) +def test_modern_sequences(seq, expected_width, name): + """Modern terminal sequences are recognized as zero-width.""" + assert wcwidth.width(seq) == expected_width + assert wcwidth.width(seq, control_codes='strict') == expected_width + + [email protected]('codepoint,expected_width', [ + (0x3164, 0), + (0xFFA0, 0), + (0x2065, 0), + (0xFFF0, 0), + (0xFFF1, 0), + (0xFFF8, 0), + (0xE0000, 0), + (0xE0002, 0), + (0xE001F, 0), + (0xE0080, 0), + (0xE00FF, 0), + (0xE01F0, 0), + (0xE0FFF, 0), +]) +def test_default_ignorable_zero_width(codepoint, expected_width): + """Default_Ignorable_Code_Point characters return width 0.""" + result = wcwidth.wcwidth(chr(codepoint)) + assert result == expected_width + + [email protected]('codepoint,expected_width', [ + (0x00AD, 1), + (0x115F, 2), +]) +def test_default_ignorable_exceptions(codepoint, expected_width): + """Exceptions to Default_Ignorable_Code_Point zero-width rule.""" + result = wcwidth.wcwidth(chr(codepoint)) + assert result == expected_width + + +def test_hangul_filler_zero_width(): + """U+3164 HANGUL FILLER is width 0.""" + result = wcwidth.wcwidth('\u3164') + assert result == 0 + + +def test_halfwidth_hangul_filler_zero_width(): + """U+FFA0 HALFWIDTH HANGUL FILLER is width 0.""" + result = wcwidth.wcwidth('\uFFA0') + assert result == 0 + + +def test_hangul_choseong_filler_exception(): + """U+115F HANGUL CHOSEONG FILLER remains width 2 for jamo composition.""" + result = wcwidth.wcwidth('\u115F') + assert result == 2 + + +def test_soft_hyphen_exception(): + """U+00AD SOFT HYPHEN remains width 1 for ISO-8859-1 compatibility.""" + result = wcwidth.wcwidth('\u00AD') + assert result == 1 diff --git a/contrib/python/wcwidth/py3/wcwidth/__init__.py b/contrib/python/wcwidth/py3/wcwidth/__init__.py index e4e81380913..106816aa6d2 100644 --- a/contrib/python/wcwidth/py3/wcwidth/__init__.py +++ b/contrib/python/wcwidth/py3/wcwidth/__init__.py @@ -5,25 +5,37 @@ https://github.com/jquast/wcwidth """ # re-export all functions & definitions, even private ones, from top-level # module path, to allow for 'from wcwidth import _private_func'. Of course, -# user beware that any _private function may disappear or change signature at -# any future version. +# user beware that any _private functions or variables not exported by __all__ +# may disappear or change signature at any future version. # local from .wcwidth import ZERO_WIDTH # noqa from .wcwidth import (WIDE_EASTASIAN, + AMBIGUOUS_EASTASIAN, VS16_NARROW_TO_WIDE, + clip, + ljust, + rjust, + width, + center, wcwidth, wcswidth, - _bisearch, list_versions, + iter_sequences, + strip_sequences, _wcmatch_version, _wcversion_value) +from .bisearch import bisearch as _bisearch +from .grapheme import iter_graphemes # noqa +from .textwrap import SequenceTextWrapper, wrap # The __all__ attribute defines the items exported from statement, # 'from wcwidth import *', but also to say, "This is the public API". -__all__ = ('wcwidth', 'wcswidth', 'list_versions') +__all__ = ('wcwidth', 'wcswidth', 'width', 'iter_sequences', 'iter_graphemes', + 'ljust', 'rjust', 'center', 'wrap', 'clip', 'strip_sequences', + 'list_versions') # We also used pkg_resources to load unicode version tables from version.json, # generated by bin/update-tables.py, but some environments are unable to # import pkg_resources for one reason or another, yikes! -__version__ = '0.2.14' +__version__ = '0.3.0' diff --git a/contrib/python/wcwidth/py3/wcwidth/bisearch.py b/contrib/python/wcwidth/py3/wcwidth/bisearch.py new file mode 100644 index 00000000000..bd0b4c13c02 --- /dev/null +++ b/contrib/python/wcwidth/py3/wcwidth/bisearch.py @@ -0,0 +1,29 @@ +"""Binary search function for Unicode interval tables.""" + + +def bisearch(ucs, table): + # type: (int, tuple) -> int + """ + Binary search in interval table. + + :param ucs: Ordinal value of unicode character. + :param table: Tuple of starting and ending ranges of ordinal values, + in form of ``((start, end), ...)``. + :returns: 1 if ordinal value ucs is found within lookup table, else 0. + """ + lbound = 0 + ubound = len(table) - 1 + + if ucs < table[0][0] or ucs > table[ubound][1]: + return 0 + + while ubound >= lbound: + mid = (lbound + ubound) // 2 + if ucs > table[mid][1]: + lbound = mid + 1 + elif ucs < table[mid][0]: + ubound = mid - 1 + else: + return 1 + + return 0 diff --git a/contrib/python/wcwidth/py3/wcwidth/control_codes.py b/contrib/python/wcwidth/py3/wcwidth/control_codes.py new file mode 100644 index 00000000000..3a6fff76386 --- /dev/null +++ b/contrib/python/wcwidth/py3/wcwidth/control_codes.py @@ -0,0 +1,46 @@ +""" +Control character sets for terminal handling. + +This module provides the control character sets used by the width() function to handle terminal +control characters. +""" + +# Illegal C0/C1 control characters. +# These raise ValueError in 'strict' mode. +ILLEGAL_CTRL = frozenset( + chr(c) for c in ( + list(range(0x01, 0x07)) + # SOH, STX, ETX (^C), EOT (^D), ENQ, ACK + list(range(0x10, 0x1b)) + # DLE through SUB (^Z) + list(range(0x1c, 0x20)) + # FS, GS, RS, US + [0x7f] + # DEL + list(range(0x80, 0xa0)) # C1 control characters + ) +) + +# Vertical movement control characters. +# These raise ValueError in 'strict' mode (indeterminate horizontal position). +VERTICAL_CTRL = frozenset({ + '\x0a', # LF (line feed) + '\x0b', # VT (vertical tab) + '\x0c', # FF (form feed) +}) + +# Horizontal movement control characters. +# These affect cursor position and are tracked in 'strict' and 'parse' modes. +HORIZONTAL_CTRL = frozenset({ + '\x08', # BS (backspace) - cursor left 1 + '\x09', # HT (horizontal tab) - advance to next tab stop + '\x0d', # CR (carriage return) - cursor to column 0 +}) + +# Terminal-valid zero-width control characters. +# These are allowed in all modes (zero-width, no movement). +ZERO_WIDTH_CTRL = frozenset({ + '\x00', # NUL + '\x07', # BEL (bell) + '\x0e', # SO (shift out) + '\x0f', # SI (shift in) +}) + +# All control characters that need special handling (not regular printable). +ALL_CTRL = ILLEGAL_CTRL | VERTICAL_CTRL | HORIZONTAL_CTRL | ZERO_WIDTH_CTRL | {'\x1b'} diff --git a/contrib/python/wcwidth/py3/wcwidth/escape_sequences.py b/contrib/python/wcwidth/py3/wcwidth/escape_sequences.py new file mode 100644 index 00000000000..ec51bd3b5bb --- /dev/null +++ b/contrib/python/wcwidth/py3/wcwidth/escape_sequences.py @@ -0,0 +1,69 @@ +r""" +Terminal escape sequence patterns. + +This module provides regex patterns for matching terminal escape sequences. All patterns match +sequences that begin with ESC (\\x1b). Before calling re.match with these patterns, callers should +first check that the character at the current position is ESC for optimal performance. +""" +# std imports +import re + +# Zero-width escape sequences (SGR, OSC, CSI, etc.). This table, like INDETERMINATE_EFFECT_SEQUENCE, +# originated from the 'blessed' library. +ZERO_WIDTH_PATTERN = re.compile( + # CSI sequences + r'\x1b\[[\x30-\x3f]*[\x20-\x2f]*[\x40-\x7e]|' + # OSC sequences + r'\x1b\][^\x07\x1b]*(?:\x07|\x1b\\)|' + # APC sequences + r'\x1b_[^\x1b\x07]*(?:\x07|\x1b\\)|' + # DCS sequences + r'\x1bP[^\x1b\x07]*(?:\x07|\x1b\\)|' + # PM sequences + r'\x1b\^[^\x1b\x07]*(?:\x07|\x1b\\)|' + # Character set designation + r'\x1b[()].|' + # Fe sequences + r'\x1b[\x40-\x5f]|' + # Fp sequences + r'\x1b[78=>g]' +) + +# Cursor right movement: CSI [n] C, parameter may be parsed by width() +CURSOR_RIGHT_SEQUENCE = re.compile(r'\x1b\[(\d*)C') + +# Cursor left movement: CSI [n] D, parameter may be parsed by width() +CURSOR_LEFT_SEQUENCE = re.compile(r'\x1b\[(\d*)D') + +# Indeterminate effect sequences - raise ValueError in 'strict' mode. The effects of these sequences +# are likely to be undesirable, moving the cursor vertically or to any unknown position, and +# otherwise not managed by the 'width' method of this library. +# +# This table was created initially with code generation by extraction of termcap library with +# techniques used at 'blessed' library runtime for 'xterm', 'alacritty', 'kitty', ghostty', +# 'screen', 'tmux', and others. Then, these common capabilities were merged into the list below. +INDETERMINATE_EFFECT_SEQUENCE = re.compile( + '|'.join(f'(?:{_pattern})' for _pattern in ( + r'\x1b\[\d+;\d+r', # change_scroll_region + r'\x1b\[\d*K', # erase_in_line (clr_eol, clr_bol) + r'\x1b\[\d*J', # erase_in_display (clr_eos, erase_display) + r'\x1b\[\d*G', # column_address + r'\x1b\[\d+;\d+H', # cursor_address + r'\x1b\[\d*H', # cursor_home + r'\x1b\[\d*A', # cursor_up + r'\x1b\[\d*B', # cursor_down + r'\x1b\[\d*P', # delete_character + r'\x1b\[\d*M', # delete_line + r'\x1b\[\d*L', # insert_line + r'\x1b\[\d*@', # insert_character + r'\x1b\[\d+X', # erase_chars + r'\x1b\[\d*S', # scroll_up (parm_index) + r'\x1b\[\d*T', # scroll_down (parm_rindex) + r'\x1b\[\d*d', # row_address + r'\x1b\[\?1049[hl]', # alternate screen buffer + r'\x1b\[\?47[hl]', # alternate screen (legacy) + r'\x1b8', # restore_cursor + r'\x1bD', # scroll_forward (index) + r'\x1bM', # scroll_reverse (reverse index) + )) +) diff --git a/contrib/python/wcwidth/py3/wcwidth/grapheme.py b/contrib/python/wcwidth/py3/wcwidth/grapheme.py new file mode 100644 index 00000000000..fed1b0184e6 --- /dev/null +++ b/contrib/python/wcwidth/py3/wcwidth/grapheme.py @@ -0,0 +1,299 @@ +""" +Grapheme cluster segmentation following Unicode Standard Annex #29. + +This module provides pure-Python implementation of the grapheme cluster boundary algorithm as +defined in UAX #29: Unicode Text Segmentation. + +https://www.unicode.org/reports/tr29/ +""" +# std imports +from enum import IntEnum +from functools import lru_cache + +from typing import Iterator, Optional, NamedTuple + +# local +from .bisearch import bisearch as _bisearch +from .table_grapheme import (GRAPHEME_L, + GRAPHEME_T, + GRAPHEME_V, + GRAPHEME_LV, + INCB_EXTEND, + INCB_LINKER, + GRAPHEME_LVT, + INCB_CONSONANT, + GRAPHEME_EXTEND, + GRAPHEME_CONTROL, + GRAPHEME_PREPEND, + GRAPHEME_SPACINGMARK, + EXTENDED_PICTOGRAPHIC, + GRAPHEME_REGIONAL_INDICATOR) + + +class GCB(IntEnum): + """Grapheme Cluster Break property values.""" + + OTHER = 0 + CR = 1 + LF = 2 + CONTROL = 3 + EXTEND = 4 + ZWJ = 5 + REGIONAL_INDICATOR = 6 + PREPEND = 7 + SPACING_MARK = 8 + L = 9 + V = 10 + T = 11 + LV = 12 + LVT = 13 + + +@lru_cache(maxsize=1000) +def _grapheme_cluster_break(ucs: int) -> GCB: + # pylint: disable=too-many-branches,too-complex + """Return the Grapheme_Cluster_Break property for a codepoint.""" + # Single codepoint matches + if ucs == 0x000d: + return GCB.CR + if ucs == 0x000a: + return GCB.LF + if ucs == 0x200d: + return GCB.ZWJ + # Matching by codepoint ranges, requiring binary search + if _bisearch(ucs, GRAPHEME_CONTROL): + return GCB.CONTROL + if _bisearch(ucs, GRAPHEME_EXTEND): + return GCB.EXTEND + if _bisearch(ucs, GRAPHEME_REGIONAL_INDICATOR): + return GCB.REGIONAL_INDICATOR + if _bisearch(ucs, GRAPHEME_PREPEND): + return GCB.PREPEND + if _bisearch(ucs, GRAPHEME_SPACINGMARK): + return GCB.SPACING_MARK + if _bisearch(ucs, GRAPHEME_L): + return GCB.L + if _bisearch(ucs, GRAPHEME_V): + return GCB.V + if _bisearch(ucs, GRAPHEME_T): + return GCB.T + if _bisearch(ucs, GRAPHEME_LV): + return GCB.LV + if _bisearch(ucs, GRAPHEME_LVT): + return GCB.LVT + return GCB.OTHER + + +@lru_cache(maxsize=512) +def _is_extended_pictographic(ucs: int) -> bool: + """Check if codepoint has Extended_Pictographic property.""" + return _bisearch(ucs, EXTENDED_PICTOGRAPHIC) + + +@lru_cache(maxsize=128) +def _is_incb_linker(ucs: int) -> bool: + """Check if codepoint has InCB=Linker property.""" + return _bisearch(ucs, INCB_LINKER) + + +@lru_cache(maxsize=256) +def _is_incb_consonant(ucs: int) -> bool: + """Check if codepoint has InCB=Consonant property.""" + return _bisearch(ucs, INCB_CONSONANT) + + +@lru_cache(maxsize=256) +def _is_incb_extend(ucs: int) -> bool: + """Check if codepoint has InCB=Extend property.""" + return _bisearch(ucs, INCB_EXTEND) + + +class BreakResult(NamedTuple): + """Result of grapheme cluster break decision.""" + + should_break: bool + ri_count: int + + +@lru_cache(maxsize=196) # 14 GCB values × 14 = 196 max combinations +def _simple_break_check(prev_gcb: GCB, curr_gcb: GCB) -> Optional[BreakResult]: + """ + Check simple GCB-pair-based break rules (cacheable). + + Returns BreakResult for rules that can be determined from GCB properties alone, or None if + complex lookback rules (GB9c, GB11) need to be checked. + """ + # GB3: CR x LF + if prev_gcb == GCB.CR and curr_gcb == GCB.LF: + return BreakResult(should_break=False, ri_count=0) + + # GB4: (Control|CR|LF) ÷ + if prev_gcb in (GCB.CONTROL, GCB.CR, GCB.LF): + return BreakResult(should_break=True, ri_count=0) + + # GB5: ÷ (Control|CR|LF) + if curr_gcb in (GCB.CONTROL, GCB.CR, GCB.LF): + return BreakResult(should_break=True, ri_count=0) + + # GB6: L x (L|V|LV|LVT) + if prev_gcb == GCB.L and curr_gcb in (GCB.L, GCB.V, GCB.LV, GCB.LVT): + return BreakResult(should_break=False, ri_count=0) + + # GB7: (LV|V) x (V|T) + if prev_gcb in (GCB.LV, GCB.V) and curr_gcb in (GCB.V, GCB.T): + return BreakResult(should_break=False, ri_count=0) + + # GB8: (LVT|T) x T + if prev_gcb in (GCB.LVT, GCB.T) and curr_gcb == GCB.T: + return BreakResult(should_break=False, ri_count=0) + + # GB9: x (Extend|ZWJ) - but ZWJ needs GB11 check, so only handle Extend here + if curr_gcb == GCB.EXTEND: + return BreakResult(should_break=False, ri_count=0) + + # GB9a: x SpacingMark + if curr_gcb == GCB.SPACING_MARK: + return BreakResult(should_break=False, ri_count=0) + + # GB9b: Prepend x + if prev_gcb == GCB.PREPEND: + return BreakResult(should_break=False, ri_count=0) + + # GB9c and GB11 need lookback - return None to signal complex check needed + # GB12/13 (RI pairs) need ri_count state - also handled in main function + return None + + +def _should_break( + prev_gcb: GCB, + curr_gcb: GCB, + text: str, + curr_idx: int, + ri_count: int, +) -> BreakResult: + # pylint: disable=too-many-branches,too-complex + """ + Determine if there should be a grapheme cluster break between prev and curr. + + Implements UAX #29 grapheme cluster boundary rules. + """ + # Try cached simple rules first + result = _simple_break_check(prev_gcb, curr_gcb) + if result is not None: + return result + + # GB9: x ZWJ (not cached because GB11 needs lookback when prev is ZWJ) + if curr_gcb == GCB.ZWJ: + return BreakResult(should_break=False, ri_count=0) + + # GB9c: Indic conjunct cluster + # \p{InCB=Consonant} [\p{InCB=Extend}\p{InCB=Linker}]* \p{InCB=Linker} + # [\p{InCB=Extend}\p{InCB=Linker}]* x \p{InCB=Consonant} + curr_ucs = ord(text[curr_idx]) + if _is_incb_consonant(curr_ucs): + has_linker = False + i = curr_idx - 1 + while i >= 0: + prev_ucs = ord(text[i]) + if _is_incb_linker(prev_ucs): + has_linker = True + i -= 1 + elif _is_incb_extend(prev_ucs): + i -= 1 + elif _is_incb_consonant(prev_ucs): + if has_linker: + return BreakResult(should_break=False, ri_count=0) + break + else: + break + + # GB11: ExtPict Extend* ZWJ x ExtPict + if prev_gcb == GCB.ZWJ and _is_extended_pictographic(curr_ucs): + i = curr_idx - 2 # Skip the ZWJ at curr_idx - 1 + while i >= 0: + prev_ucs = ord(text[i]) + prev_prop = _grapheme_cluster_break(prev_ucs) + if prev_prop == GCB.EXTEND: + i -= 1 + elif _is_extended_pictographic(prev_ucs): + return BreakResult(should_break=False, ri_count=0) + else: + break + + # GB12/GB13: RI x RI (pair matching) + if prev_gcb == GCB.REGIONAL_INDICATOR and curr_gcb == GCB.REGIONAL_INDICATOR: + if ri_count % 2 == 1: + return BreakResult(should_break=False, ri_count=ri_count + 1) + return BreakResult(should_break=True, ri_count=1) + + # GB999: Any ÷ Any + ri_count = 1 if curr_gcb == GCB.REGIONAL_INDICATOR else 0 + return BreakResult(should_break=True, ri_count=ri_count) + + +def iter_graphemes( + unistr: str, + start: int = 0, + end: Optional[int] = None, +) -> Iterator[str]: + r""" + Iterate over grapheme clusters in a Unicode string. + + Grapheme clusters are "user-perceived characters" - what a user would + consider a single character, which may consist of multiple Unicode + codepoints (e.g., a base character with combining marks, emoji sequences). + + :param unistr: The Unicode string to segment. + :param start: Starting index (default 0). + :param end: Ending index (default len(unistr)). + :yields: Grapheme cluster substrings. + + Example:: + + >>> list(iter_graphemes('cafe\\u0301')) + ['c', 'a', 'f', 'e\\u0301'] + >>> list(iter_graphemes('\\U0001F468\\u200D\\U0001F469\\u200D\\U0001F467')) + ['o', 'k', '\\U0001F468\\u200D\\U0001F469\\u200D\\U0001F467'] + >>> list(iter_graphemes('\\U0001F1FA\\U0001F1F8')) + ['o', 'k', '\\U0001F1FA\\U0001F1F8'] + + .. versionadded:: 0.3.0 + """ + if not unistr: + return + + length = len(unistr) + + if end is None: + end = length + + if start >= end or start >= length: + return + + end = min(end, length) + + # Track state for grapheme cluster boundaries + cluster_start = start + ri_count = 0 + + # Get GCB for first character + prev_gcb = _grapheme_cluster_break(ord(unistr[start])) + + # Handle Regional Indicator count initialization + if prev_gcb == GCB.REGIONAL_INDICATOR: + ri_count = 1 + + for idx in range(start + 1, end): + curr_gcb = _grapheme_cluster_break(ord(unistr[idx])) + + result = _should_break(prev_gcb, curr_gcb, unistr, idx, ri_count) + ri_count = result.ri_count + + if result.should_break: + yield unistr[cluster_start:idx] + cluster_start = idx + + prev_gcb = curr_gcb + + # Yield the final cluster + yield unistr[cluster_start:end] diff --git a/contrib/python/wcwidth/py3/wcwidth/table_ambiguous.py b/contrib/python/wcwidth/py3/wcwidth/table_ambiguous.py new file mode 100644 index 00000000000..e3dc0b1c3de --- /dev/null +++ b/contrib/python/wcwidth/py3/wcwidth/table_ambiguous.py @@ -0,0 +1,189 @@ +""" +Exports AMBIGUOUS_EASTASIAN table keyed by supporting unicode version level. + +This code generated by wcwidth/bin/update-tables.py on 2026-01-18 23:27:15 UTC. +""" +# pylint: disable=duplicate-code +AMBIGUOUS_EASTASIAN = { + '17.0.0': ( + # Source: EastAsianWidth-17.0.0.txt + # Date: 2025-07-24, 00:12:54 GMT + # + (0x000a1, 0x000a1,), # Inverted Exclamation Mark + (0x000a4, 0x000a4,), # Currency Sign + (0x000a7, 0x000a8,), # Section Sign ..Diaeresis + (0x000aa, 0x000aa,), # Feminine Ordinal Indicator + (0x000ad, 0x000ae,), # Soft Hyphen ..Registered Sign + (0x000b0, 0x000b4,), # Degree Sign ..Acute Accent + (0x000b6, 0x000ba,), # Pilcrow Sign ..Masculine Ordinal Indica + (0x000bc, 0x000bf,), # Vulgar Fraction One Quar..Inverted Question Mark + (0x000c6, 0x000c6,), # Latin Capital Letter Ae + (0x000d0, 0x000d0,), # Latin Capital Letter Eth + (0x000d7, 0x000d8,), # Multiplication Sign ..Latin Capital Letter O W + (0x000de, 0x000e1,), # Latin Capital Letter Tho..Latin Small Letter A Wit + (0x000e6, 0x000e6,), # Latin Small Letter Ae + (0x000e8, 0x000ea,), # Latin Small Letter E Wit..Latin Small Letter E Wit + (0x000ec, 0x000ed,), # Latin Small Letter I Wit..Latin Small Letter I Wit + (0x000f0, 0x000f0,), # Latin Small Letter Eth + (0x000f2, 0x000f3,), # Latin Small Letter O Wit..Latin Small Letter O Wit + (0x000f7, 0x000fa,), # Division Sign ..Latin Small Letter U Wit + (0x000fc, 0x000fc,), # Latin Small Letter U With Diaeresis + (0x000fe, 0x000fe,), # Latin Small Letter Thorn + (0x00101, 0x00101,), # Latin Small Letter A With Macron + (0x00111, 0x00111,), # Latin Small Letter D With Stroke + (0x00113, 0x00113,), # Latin Small Letter E With Macron + (0x0011b, 0x0011b,), # Latin Small Letter E With Caron + (0x00126, 0x00127,), # Latin Capital Letter H W..Latin Small Letter H Wit + (0x0012b, 0x0012b,), # Latin Small Letter I With Macron + (0x00131, 0x00133,), # Latin Small Letter Dotle..Latin Small Ligature Ij + (0x00138, 0x00138,), # Latin Small Letter Kra + (0x0013f, 0x00142,), # Latin Capital Letter L W..Latin Small Letter L Wit + (0x00144, 0x00144,), # Latin Small Letter N With Acute + (0x00148, 0x0014b,), # Latin Small Letter N Wit..Latin Small Letter Eng + (0x0014d, 0x0014d,), # Latin Small Letter O With Macron + (0x00152, 0x00153,), # Latin Capital Ligature O..Latin Small Ligature Oe + (0x00166, 0x00167,), # Latin Capital Letter T W..Latin Small Letter T Wit + (0x0016b, 0x0016b,), # Latin Small Letter U With Macron + (0x001ce, 0x001ce,), # Latin Small Letter A With Caron + (0x001d0, 0x001d0,), # Latin Small Letter I With Caron + (0x001d2, 0x001d2,), # Latin Small Letter O With Caron + (0x001d4, 0x001d4,), # Latin Small Letter U With Caron + (0x001d6, 0x001d6,), # Latin Small Letter U With Diaeresis And Macron + (0x001d8, 0x001d8,), # Latin Small Letter U With Diaeresis And Acute + (0x001da, 0x001da,), # Latin Small Letter U With Diaeresis And Caron + (0x001dc, 0x001dc,), # Latin Small Letter U With Diaeresis And Grave + (0x00251, 0x00251,), # Latin Small Letter Alpha + (0x00261, 0x00261,), # Latin Small Letter Script G + (0x002c4, 0x002c4,), # Modifier Letter Up Arrowhead + (0x002c7, 0x002c7,), # Caron + (0x002c9, 0x002cb,), # Modifier Letter Macron ..Modifier Letter Grave Ac + (0x002cd, 0x002cd,), # Modifier Letter Low Macron + (0x002d0, 0x002d0,), # Modifier Letter Triangular Colon + (0x002d8, 0x002db,), # Breve ..Ogonek + (0x002dd, 0x002dd,), # Double Acute Accent + (0x002df, 0x002df,), # Modifier Letter Cross Accent + (0x00391, 0x003a1,), # Greek Capital Letter Alp..Greek Capital Letter Rho + (0x003a3, 0x003a9,), # Greek Capital Letter Sig..Greek Capital Letter Ome + (0x003b1, 0x003c1,), # Greek Small Letter Alpha..Greek Small Letter Rho + (0x003c3, 0x003c9,), # Greek Small Letter Sigma..Greek Small Letter Omega + (0x00401, 0x00401,), # Cyrillic Capital Letter Io + (0x00410, 0x0044f,), # Cyrillic Capital Letter ..Cyrillic Small Letter Ya + (0x00451, 0x00451,), # Cyrillic Small Letter Io + (0x02010, 0x02010,), # Hyphen + (0x02013, 0x02016,), # En Dash ..Double Vertical Line + (0x02018, 0x02019,), # Left Single Quotation Ma..Right Single Quotation M + (0x0201c, 0x0201d,), # Left Double Quotation Ma..Right Double Quotation M + (0x02020, 0x02022,), # Dagger ..Bullet + (0x02024, 0x02027,), # One Dot Leader ..Hyphenation Point + (0x02030, 0x02030,), # Per Mille Sign + (0x02032, 0x02033,), # Prime ..Double Prime + (0x02035, 0x02035,), # Reversed Prime + (0x0203b, 0x0203b,), # Reference Mark + (0x0203e, 0x0203e,), # Overline + (0x02074, 0x02074,), # Superscript Four + (0x0207f, 0x0207f,), # Superscript Latin Small Letter N + (0x02081, 0x02084,), # Subscript One ..Subscript Four + (0x020ac, 0x020ac,), # Euro Sign + (0x02103, 0x02103,), # Degree Celsius + (0x02105, 0x02105,), # Care Of + (0x02109, 0x02109,), # Degree Fahrenheit + (0x02113, 0x02113,), # Script Small L + (0x02116, 0x02116,), # Numero Sign + (0x02121, 0x02122,), # Telephone Sign ..Trade Mark Sign + (0x02126, 0x02126,), # Ohm Sign + (0x0212b, 0x0212b,), # Angstrom Sign + (0x02153, 0x02154,), # Vulgar Fraction One Thir..Vulgar Fraction Two Thir + (0x0215b, 0x0215e,), # Vulgar Fraction One Eigh..Vulgar Fraction Seven Ei + (0x02160, 0x0216b,), # Roman Numeral One ..Roman Numeral Twelve + (0x02170, 0x02179,), # Small Roman Numeral One ..Small Roman Numeral Ten + (0x02189, 0x02189,), # Vulgar Fraction Zero Thirds + (0x02190, 0x02199,), # Leftwards Arrow ..South West Arrow + (0x021b8, 0x021b9,), # North West Arrow To Long..Leftwards Arrow To Bar O + (0x021d2, 0x021d2,), # Rightwards Double Arrow + (0x021d4, 0x021d4,), # Left Right Double Arrow + (0x021e7, 0x021e7,), # Upwards White Arrow + (0x02200, 0x02200,), # For All + (0x02202, 0x02203,), # Partial Differential ..There Exists + (0x02207, 0x02208,), # Nabla ..Element Of + (0x0220b, 0x0220b,), # Contains As Member + (0x0220f, 0x0220f,), # N-ary Product + (0x02211, 0x02211,), # N-ary Summation + (0x02215, 0x02215,), # Division Slash + (0x0221a, 0x0221a,), # Square Root + (0x0221d, 0x02220,), # Proportional To ..Angle + (0x02223, 0x02223,), # Divides + (0x02225, 0x02225,), # Parallel To + (0x02227, 0x0222c,), # Logical And ..Double Integral + (0x0222e, 0x0222e,), # Contour Integral + (0x02234, 0x02237,), # Therefore ..Proportion + (0x0223c, 0x0223d,), # Tilde Operator ..Reversed Tilde + (0x02248, 0x02248,), # Almost Equal To + (0x0224c, 0x0224c,), # All Equal To + (0x02252, 0x02252,), # Approximately Equal To Or The Image Of + (0x02260, 0x02261,), # Not Equal To ..Identical To + (0x02264, 0x02267,), # Less-than Or Equal To ..Greater-than Over Equal + (0x0226a, 0x0226b,), # Much Less-than ..Much Greater-than + (0x0226e, 0x0226f,), # Not Less-than ..Not Greater-than + (0x02282, 0x02283,), # Subset Of ..Superset Of + (0x02286, 0x02287,), # Subset Of Or Equal To ..Superset Of Or Equal To + (0x02295, 0x02295,), # Circled Plus + (0x02299, 0x02299,), # Circled Dot Operator + (0x022a5, 0x022a5,), # Up Tack + (0x022bf, 0x022bf,), # Right Triangle + (0x02312, 0x02312,), # Arc + (0x02460, 0x024e9,), # Circled Digit One ..Circled Latin Small Lett + (0x024eb, 0x0254b,), # Negative Circled Number ..Box Drawings Heavy Verti + (0x02550, 0x02573,), # Box Drawings Double Hori..Box Drawings Light Diago + (0x02580, 0x0258f,), # Upper Half Block ..Left One Eighth Block + (0x02592, 0x02595,), # Medium Shade ..Right One Eighth Block + (0x025a0, 0x025a1,), # Black Square ..White Square + (0x025a3, 0x025a9,), # White Square Containing ..Square With Diagonal Cro + (0x025b2, 0x025b3,), # Black Up-pointing Triang..White Up-pointing Triang + (0x025b6, 0x025b7,), # Black Right-pointing Tri..White Right-pointing Tri + (0x025bc, 0x025bd,), # Black Down-pointing Tria..White Down-pointing Tria + (0x025c0, 0x025c1,), # Black Left-pointing Tria..White Left-pointing Tria + (0x025c6, 0x025c8,), # Black Diamond ..White Diamond Containing + (0x025cb, 0x025cb,), # White Circle + (0x025ce, 0x025d1,), # Bullseye ..Circle With Right Half B + (0x025e2, 0x025e5,), # Black Lower Right Triang..Black Upper Right Triang + (0x025ef, 0x025ef,), # Large Circle + (0x02605, 0x02606,), # Black Star ..White Star + (0x02609, 0x02609,), # Sun + (0x0260e, 0x0260f,), # Black Telephone ..White Telephone + (0x0261c, 0x0261c,), # White Left Pointing Index + (0x0261e, 0x0261e,), # White Right Pointing Index + (0x02640, 0x02640,), # Female Sign + (0x02642, 0x02642,), # Male Sign + (0x02660, 0x02661,), # Black Spade Suit ..White Heart Suit + (0x02663, 0x02665,), # Black Club Suit ..Black Heart Suit + (0x02667, 0x0266a,), # White Club Suit ..Eighth Note + (0x0266c, 0x0266d,), # Beamed Sixteenth Notes ..Music Flat Sign + (0x0266f, 0x0266f,), # Music Sharp Sign + (0x0269e, 0x0269f,), # Three Lines Converging R..Three Lines Converging L + (0x026bf, 0x026bf,), # Squared Key + (0x026c6, 0x026cd,), # Rain ..Disabled Car + (0x026cf, 0x026d3,), # Pick ..Chains + (0x026d5, 0x026e1,), # Alternate One-way Left W..Restricted Left Entry-2 + (0x026e3, 0x026e3,), # Heavy Circle With Stroke And Two Dots Above + (0x026e8, 0x026e9,), # Black Cross On Shield ..Shinto Shrine + (0x026eb, 0x026f1,), # Castle ..Umbrella On Ground + (0x026f4, 0x026f4,), # Ferry + (0x026f6, 0x026f9,), # Square Four Corners ..Person With Ball + (0x026fb, 0x026fc,), # Japanese Bank Symbol ..Headstone Graveyard Symb + (0x026fe, 0x026ff,), # Cup On Black Square ..White Flag With Horizont + (0x0273d, 0x0273d,), # Heavy Teardrop-spoked Asterisk + (0x02776, 0x0277f,), # Dingbat Negative Circled..Dingbat Negative Circled + (0x02b56, 0x02b59,), # Heavy Oval With Oval Ins..Heavy Circled Saltire + (0x03248, 0x0324f,), # Circled Number Ten On Bl..Circled Number Eighty On + (0x0e000, 0x0f8ff,), # (nil) + (0x0fffd, 0x0fffd,), # Replacement Character + (0x1f100, 0x1f10a,), # Digit Zero Full Stop ..Digit Nine Comma + (0x1f110, 0x1f12d,), # Parenthesized Latin Capi..Circled Cd + (0x1f130, 0x1f169,), # Squared Latin Capital Le..Negative Circled Latin C + (0x1f170, 0x1f18d,), # Negative Squared Latin C..Negative Squared Sa + (0x1f18f, 0x1f190,), # Negative Squared Wc ..Square Dj + (0x1f19b, 0x1f1ac,), # Squared Three D ..Squared Vod + (0xf0000, 0xffffd,), # (nil) + (0x100000, 0x10fffd,), # (nil) + ), +} diff --git a/contrib/python/wcwidth/py3/wcwidth/table_grapheme.py b/contrib/python/wcwidth/py3/wcwidth/table_grapheme.py new file mode 100644 index 00000000000..7fe0d157309 --- /dev/null +++ b/contrib/python/wcwidth/py3/wcwidth/table_grapheme.py @@ -0,0 +1,2126 @@ +""" +Exports grapheme cluster break property tables for Unicode version 17.0.0. + +This module provides lookup tables for Unicode grapheme cluster break properties as defined in UAX +#29: Unicode Text Segmentation. + +This code generated by wcwidth/bin/update-tables.py on 2026-01-20 16:47:43 UTC. +""" +# pylint: disable=duplicate-code + +GRAPHEME_CR = ( + # Source: GraphemeBreakProperty-17.0.0.txt + # Date: 2025-06-30, 06:20:23 GMT + # + (0x0000d, 0x0000d,), # (nil) +) + +GRAPHEME_LF = ( + # Source: GraphemeBreakProperty-17.0.0.txt + # Date: 2025-06-30, 06:20:23 GMT + # + (0x0000a, 0x0000a,), # (nil) +) + +GRAPHEME_CONTROL = ( + # Source: GraphemeBreakProperty-17.0.0.txt + # Date: 2025-06-30, 06:20:23 GMT + # + (0x00000, 0x00009,), # (nil) + (0x0000b, 0x0000c,), # (nil) + (0x0000e, 0x0001f,), # (nil) + (0x0007f, 0x0009f,), # (nil) + (0x000ad, 0x000ad,), # Soft Hyphen + (0x0061c, 0x0061c,), # Arabic Letter Mark + (0x0180e, 0x0180e,), # Mongolian Vowel Separator + (0x0200b, 0x0200b,), # Zero Width Space + (0x0200e, 0x0200f,), # Left-to-right Mark ..Right-to-left Mark + (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override + (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes + (0x0feff, 0x0feff,), # Zero Width No-break Space + (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T + (0x13430, 0x1343f,), # Egyptian Hieroglyph Vert..Egyptian Hieroglyph End + (0x1bca0, 0x1bca3,), # Shorthand Format Letter ..Shorthand Format Up Step + (0x1d173, 0x1d17a,), # Musical Symbol Begin Bea..Musical Symbol End Phras + (0xe0000, 0xe001f,), # (nil) + (0xe0080, 0xe00ff,), # (nil) + (0xe01f0, 0xe0fff,), # (nil) +) + +GRAPHEME_EXTEND = ( + # Source: GraphemeBreakProperty-17.0.0.txt + # Date: 2025-06-30, 06:20:23 GMT + # + (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le + (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli + (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg + (0x005bf, 0x005bf,), # Hebrew Point Rafe + (0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot + (0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot + (0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan + (0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra + (0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below + (0x00670, 0x00670,), # Arabic Letter Superscript Alef + (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen + (0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda + (0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon + (0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem + (0x00711, 0x00711,), # Syriac Letter Superscript Alaph + (0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh + (0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun + (0x007eb, 0x007f3,), # Nko Combining Short High..Nko Combining Double Dot + (0x007fd, 0x007fd,), # Nko Dantayalan + (0x00816, 0x00819,), # Samaritan Mark In ..Samaritan Mark Dagesh + (0x0081b, 0x00823,), # Samaritan Mark Epentheti..Samaritan Vowel Sign A + (0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U + (0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa + (0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark + (0x00897, 0x0089f,), # (nil) ..Arabic Half Madda Over M + (0x008ca, 0x008e1,), # Arabic Small High Farsi ..Arabic Small High Sign S + (0x008e3, 0x00902,), # Arabic Turned Damma Belo..Devanagari Sign Anusvara + (0x0093a, 0x0093a,), # Devanagari Vowel Sign Oe + (0x0093c, 0x0093c,), # Devanagari Sign Nukta + (0x00941, 0x00948,), # Devanagari Vowel Sign U ..Devanagari Vowel Sign Ai + (0x0094d, 0x0094d,), # Devanagari Sign Virama + (0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu + (0x00962, 0x00963,), # Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo + (0x00981, 0x00981,), # Bengali Sign Candrabindu + (0x009bc, 0x009bc,), # Bengali Sign Nukta + (0x009be, 0x009be,), # Bengali Vowel Sign Aa + (0x009c1, 0x009c4,), # Bengali Vowel Sign U ..Bengali Vowel Sign Vocal + (0x009cd, 0x009cd,), # Bengali Sign Virama + (0x009d7, 0x009d7,), # Bengali Au Length Mark + (0x009e2, 0x009e3,), # Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal + (0x009fe, 0x009fe,), # Bengali Sandhi Mark + (0x00a01, 0x00a02,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Bindi + (0x00a3c, 0x00a3c,), # Gurmukhi Sign Nukta + (0x00a41, 0x00a42,), # Gurmukhi Vowel Sign U ..Gurmukhi Vowel Sign Uu + (0x00a47, 0x00a48,), # Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai + (0x00a4b, 0x00a4d,), # Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama + (0x00a51, 0x00a51,), # Gurmukhi Sign Udaat + (0x00a70, 0x00a71,), # Gurmukhi Tippi ..Gurmukhi Addak + (0x00a75, 0x00a75,), # Gurmukhi Sign Yakash + (0x00a81, 0x00a82,), # Gujarati Sign Candrabind..Gujarati Sign Anusvara + (0x00abc, 0x00abc,), # Gujarati Sign Nukta + (0x00ac1, 0x00ac5,), # Gujarati Vowel Sign U ..Gujarati Vowel Sign Cand + (0x00ac7, 0x00ac8,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Ai + (0x00acd, 0x00acd,), # Gujarati Sign Virama + (0x00ae2, 0x00ae3,), # Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca + (0x00afa, 0x00aff,), # Gujarati Sign Sukun ..Gujarati Sign Two-circle + (0x00b01, 0x00b01,), # Oriya Sign Candrabindu + (0x00b3c, 0x00b3c,), # Oriya Sign Nukta + (0x00b3e, 0x00b3f,), # Oriya Vowel Sign Aa ..Oriya Vowel Sign I + (0x00b41, 0x00b44,), # Oriya Vowel Sign U ..Oriya Vowel Sign Vocalic + (0x00b4d, 0x00b4d,), # Oriya Sign Virama + (0x00b55, 0x00b57,), # Oriya Sign Overline ..Oriya Au Length Mark + (0x00b62, 0x00b63,), # Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic + (0x00b82, 0x00b82,), # Tamil Sign Anusvara + (0x00bbe, 0x00bbe,), # Tamil Vowel Sign Aa + (0x00bc0, 0x00bc0,), # Tamil Vowel Sign Ii + (0x00bcd, 0x00bcd,), # Tamil Sign Virama + (0x00bd7, 0x00bd7,), # Tamil Au Length Mark + (0x00c00, 0x00c00,), # Telugu Sign Combining Candrabindu Above + (0x00c04, 0x00c04,), # Telugu Sign Combining Anusvara Above + (0x00c3c, 0x00c3c,), # Telugu Sign Nukta + (0x00c3e, 0x00c40,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Ii + (0x00c46, 0x00c48,), # Telugu Vowel Sign E ..Telugu Vowel Sign Ai + (0x00c4a, 0x00c4d,), # Telugu Vowel Sign O ..Telugu Sign Virama + (0x00c55, 0x00c56,), # Telugu Length Mark ..Telugu Ai Length Mark + (0x00c62, 0x00c63,), # Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali + (0x00c81, 0x00c81,), # Kannada Sign Candrabindu + (0x00cbc, 0x00cbc,), # Kannada Sign Nukta + (0x00cbf, 0x00cc0,), # Kannada Vowel Sign I ..Kannada Vowel Sign Ii + (0x00cc2, 0x00cc2,), # Kannada Vowel Sign Uu + (0x00cc6, 0x00cc8,), # Kannada Vowel Sign E ..Kannada Vowel Sign Ai + (0x00cca, 0x00ccd,), # Kannada Vowel Sign O ..Kannada Sign Virama + (0x00cd5, 0x00cd6,), # Kannada Length Mark ..Kannada Ai Length Mark + (0x00ce2, 0x00ce3,), # Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal + (0x00d00, 0x00d01,), # Malayalam Sign Combining..Malayalam Sign Candrabin + (0x00d3b, 0x00d3c,), # Malayalam Sign Vertical ..Malayalam Sign Circular + (0x00d3e, 0x00d3e,), # Malayalam Vowel Sign Aa + (0x00d41, 0x00d44,), # Malayalam Vowel Sign U ..Malayalam Vowel Sign Voc + (0x00d4d, 0x00d4d,), # Malayalam Sign Virama + (0x00d57, 0x00d57,), # Malayalam Au Length Mark + (0x00d62, 0x00d63,), # Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc + (0x00d81, 0x00d81,), # Sinhala Sign Candrabindu + (0x00dca, 0x00dca,), # Sinhala Sign Al-lakuna + (0x00dcf, 0x00dcf,), # Sinhala Vowel Sign Aela-pilla + (0x00dd2, 0x00dd4,), # Sinhala Vowel Sign Ketti..Sinhala Vowel Sign Ketti + (0x00dd6, 0x00dd6,), # Sinhala Vowel Sign Diga Paa-pilla + (0x00ddf, 0x00ddf,), # Sinhala Vowel Sign Gayanukitta + (0x00e31, 0x00e31,), # Thai Character Mai Han-akat + (0x00e34, 0x00e3a,), # Thai Character Sara I ..Thai Character Phinthu + (0x00e47, 0x00e4e,), # Thai Character Maitaikhu..Thai Character Yamakkan + (0x00eb1, 0x00eb1,), # Lao Vowel Sign Mai Kan + (0x00eb4, 0x00ebc,), # Lao Vowel Sign I ..Lao Semivowel Sign Lo + (0x00ec8, 0x00ece,), # Lao Tone Mai Ek ..Lao Yamakkan + (0x00f18, 0x00f19,), # Tibetan Astrological Sig..Tibetan Astrological Sig + (0x00f35, 0x00f35,), # Tibetan Mark Ngas Bzung Nyi Zla + (0x00f37, 0x00f37,), # Tibetan Mark Ngas Bzung Sgor Rtags + (0x00f39, 0x00f39,), # Tibetan Mark Tsa -phru + (0x00f71, 0x00f7e,), # Tibetan Vowel Sign Aa ..Tibetan Sign Rjes Su Nga + (0x00f80, 0x00f84,), # Tibetan Vowel Sign Rever..Tibetan Mark Halanta + (0x00f86, 0x00f87,), # Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags + (0x00f8d, 0x00f97,), # Tibetan Subjoined Sign L..Tibetan Subjoined Letter + (0x00f99, 0x00fbc,), # Tibetan Subjoined Letter..Tibetan Subjoined Letter + (0x00fc6, 0x00fc6,), # Tibetan Symbol Padma Gdan + (0x0102d, 0x01030,), # Myanmar Vowel Sign I ..Myanmar Vowel Sign Uu + (0x01032, 0x01037,), # Myanmar Vowel Sign Ai ..Myanmar Sign Dot Below + (0x01039, 0x0103a,), # Myanmar Sign Virama ..Myanmar Sign Asat + (0x0103d, 0x0103e,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M + (0x01058, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal + (0x0105e, 0x01060,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M + (0x01071, 0x01074,), # Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah + (0x01082, 0x01082,), # Myanmar Consonant Sign Shan Medial Wa + (0x01085, 0x01086,), # Myanmar Vowel Sign Shan ..Myanmar Vowel Sign Shan + (0x0108d, 0x0108d,), # Myanmar Sign Shan Council Emphatic Tone + (0x0109d, 0x0109d,), # Myanmar Vowel Sign Aiton Ai + (0x0135d, 0x0135f,), # Ethiopic Combining Gemin..Ethiopic Combining Gemin + (0x01712, 0x01715,), # Tagalog Vowel Sign I ..Tagalog Sign Pamudpod + (0x01732, 0x01734,), # Hanunoo Vowel Sign I ..Hanunoo Sign Pamudpod + (0x01752, 0x01753,), # Buhid Vowel Sign I ..Buhid Vowel Sign U + (0x01772, 0x01773,), # Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U + (0x017b4, 0x017b5,), # Khmer Vowel Inherent Aq ..Khmer Vowel Inherent Aa + (0x017b7, 0x017bd,), # Khmer Vowel Sign I ..Khmer Vowel Sign Ua + (0x017c6, 0x017c6,), # Khmer Sign Nikahit + (0x017c9, 0x017d3,), # Khmer Sign Muusikatoan ..Khmer Sign Bathamasat + (0x017dd, 0x017dd,), # Khmer Sign Atthacan + (0x0180b, 0x0180d,), # Mongolian Free Variation..Mongolian Free Variation + (0x0180f, 0x0180f,), # Mongolian Free Variation Selector Four + (0x01885, 0x01886,), # Mongolian Letter Ali Gal..Mongolian Letter Ali Gal + (0x018a9, 0x018a9,), # Mongolian Letter Ali Gali Dagalga + (0x01920, 0x01922,), # Limbu Vowel Sign A ..Limbu Vowel Sign U + (0x01927, 0x01928,), # Limbu Vowel Sign E ..Limbu Vowel Sign O + (0x01932, 0x01932,), # Limbu Small Letter Anusvara + (0x01939, 0x0193b,), # Limbu Sign Mukphreng ..Limbu Sign Sa-i + (0x01a17, 0x01a18,), # Buginese Vowel Sign I ..Buginese Vowel Sign U + (0x01a1b, 0x01a1b,), # Buginese Vowel Sign Ae + (0x01a56, 0x01a56,), # Tai Tham Consonant Sign Medial La + (0x01a58, 0x01a5e,), # Tai Tham Sign Mai Kang L..Tai Tham Consonant Sign + (0x01a60, 0x01a60,), # Tai Tham Sign Sakot + (0x01a62, 0x01a62,), # Tai Tham Vowel Sign Mai Sat + (0x01a65, 0x01a6c,), # Tai Tham Vowel Sign I ..Tai Tham Vowel Sign Oa B + (0x01a73, 0x01a7c,), # Tai Tham Vowel Sign Oa A..Tai Tham Sign Khuen-lue + (0x01a7f, 0x01a7f,), # Tai Tham Combining Cryptogrammic Dot + (0x01ab0, 0x01add,), # Combining Doubled Circum..(nil) + (0x01ae0, 0x01aeb,), # (nil) + (0x01b00, 0x01b03,), # Balinese Sign Ulu Ricem ..Balinese Sign Surang + (0x01b34, 0x01b3d,), # Balinese Sign Rerekan ..Balinese Vowel Sign La L + (0x01b42, 0x01b44,), # Balinese Vowel Sign Pepe..Balinese Adeg Adeg + (0x01b6b, 0x01b73,), # Balinese Musical Symbol ..Balinese Musical Symbol + (0x01b80, 0x01b81,), # Sundanese Sign Panyecek ..Sundanese Sign Panglayar + (0x01ba2, 0x01ba5,), # Sundanese Consonant Sign..Sundanese Vowel Sign Pan + (0x01ba8, 0x01bad,), # Sundanese Vowel Sign Pam..Sundanese Consonant Sign + (0x01be6, 0x01be6,), # Batak Sign Tompi + (0x01be8, 0x01be9,), # Batak Vowel Sign Pakpak ..Batak Vowel Sign Ee + (0x01bed, 0x01bed,), # Batak Vowel Sign Karo O + (0x01bef, 0x01bf3,), # Batak Vowel Sign U For S..Batak Panongonan + (0x01c2c, 0x01c33,), # Lepcha Vowel Sign E ..Lepcha Consonant Sign T + (0x01c36, 0x01c37,), # Lepcha Sign Ran ..Lepcha Sign Nukta + (0x01cd0, 0x01cd2,), # Vedic Tone Karshana ..Vedic Tone Prenkha + (0x01cd4, 0x01ce0,), # Vedic Sign Yajurvedic Mi..Vedic Tone Rigvedic Kash + (0x01ce2, 0x01ce8,), # Vedic Sign Visarga Svari..Vedic Sign Visarga Anuda + (0x01ced, 0x01ced,), # Vedic Sign Tiryak + (0x01cf4, 0x01cf4,), # Vedic Tone Candra Above + (0x01cf8, 0x01cf9,), # Vedic Tone Ring Above ..Vedic Tone Double Ring A + (0x01dc0, 0x01dff,), # Combining Dotted Grave A..Combining Right Arrowhea + (0x0200c, 0x0200c,), # Zero Width Non-joiner + (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above + (0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu + (0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner + (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette + (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M + (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag + (0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous + (0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer + (0x0a69e, 0x0a69f,), # Combining Cyrillic Lette..Combining Cyrillic Lette + (0x0a6f0, 0x0a6f1,), # Bamum Combining Mark Koq..Bamum Combining Mark Tuk + (0x0a802, 0x0a802,), # Syloti Nagri Sign Dvisvara + (0x0a806, 0x0a806,), # Syloti Nagri Sign Hasanta + (0x0a80b, 0x0a80b,), # Syloti Nagri Sign Anusvara + (0x0a825, 0x0a826,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign + (0x0a82c, 0x0a82c,), # Syloti Nagri Sign Alternate Hasanta + (0x0a8c4, 0x0a8c5,), # Saurashtra Sign Virama ..Saurashtra Sign Candrabi + (0x0a8e0, 0x0a8f1,), # Combining Devanagari Dig..Combining Devanagari Sig + (0x0a8ff, 0x0a8ff,), # Devanagari Vowel Sign Ay + (0x0a926, 0x0a92d,), # Kayah Li Vowel Ue ..Kayah Li Tone Calya Plop + (0x0a947, 0x0a951,), # Rejang Vowel Sign I ..Rejang Consonant Sign R + (0x0a953, 0x0a953,), # Rejang Virama + (0x0a980, 0x0a982,), # Javanese Sign Panyangga ..Javanese Sign Layar + (0x0a9b3, 0x0a9b3,), # Javanese Sign Cecak Telu + (0x0a9b6, 0x0a9b9,), # Javanese Vowel Sign Wulu..Javanese Vowel Sign Suku + (0x0a9bc, 0x0a9bd,), # Javanese Vowel Sign Pepe..Javanese Consonant Sign + (0x0a9c0, 0x0a9c0,), # Javanese Pangkon + (0x0a9e5, 0x0a9e5,), # Myanmar Sign Shan Saw + (0x0aa29, 0x0aa2e,), # Cham Vowel Sign Aa ..Cham Vowel Sign Oe + (0x0aa31, 0x0aa32,), # Cham Vowel Sign Au ..Cham Vowel Sign Ue + (0x0aa35, 0x0aa36,), # Cham Consonant Sign La ..Cham Consonant Sign Wa + (0x0aa43, 0x0aa43,), # Cham Consonant Sign Final Ng + (0x0aa4c, 0x0aa4c,), # Cham Consonant Sign Final M + (0x0aa7c, 0x0aa7c,), # Myanmar Sign Tai Laing Tone-2 + (0x0aab0, 0x0aab0,), # Tai Viet Mai Kang + (0x0aab2, 0x0aab4,), # Tai Viet Vowel I ..Tai Viet Vowel U + (0x0aab7, 0x0aab8,), # Tai Viet Mai Khit ..Tai Viet Vowel Ia + (0x0aabe, 0x0aabf,), # Tai Viet Vowel Am ..Tai Viet Tone Mai Ek + (0x0aac1, 0x0aac1,), # Tai Viet Tone Mai Tho + (0x0aaec, 0x0aaed,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0x0aaf6, 0x0aaf6,), # Meetei Mayek Virama + (0x0abe5, 0x0abe5,), # Meetei Mayek Vowel Sign Anap + (0x0abe8, 0x0abe8,), # Meetei Mayek Vowel Sign Unap + (0x0abed, 0x0abed,), # Meetei Mayek Apun Iyek + (0x0fb1e, 0x0fb1e,), # Hebrew Point Judeo-spanish Varika + (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 + (0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo + (0x0ff9e, 0x0ff9f,), # Halfwidth Katakana Voice..Halfwidth Katakana Semi- + (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke + (0x102e0, 0x102e0,), # Coptic Epact Thousands Mark + (0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let + (0x10a01, 0x10a03,), # Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo + (0x10a05, 0x10a06,), # Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O + (0x10a0c, 0x10a0f,), # Kharoshthi Vowel Length ..Kharoshthi Sign Visarga + (0x10a38, 0x10a3a,), # Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo + (0x10a3f, 0x10a3f,), # Kharoshthi Virama + (0x10ae5, 0x10ae6,), # Manichaean Abbreviation ..Manichaean Abbreviation + (0x10d24, 0x10d27,), # Hanifi Rohingya Sign Har..Hanifi Rohingya Sign Tas + (0x10d69, 0x10d6d,), # (nil) + (0x10eab, 0x10eac,), # Yezidi Combining Hamza M..Yezidi Combining Madda M + (0x10efa, 0x10eff,), # (nil) ..Arabic Small Low Word Ma + (0x10f46, 0x10f50,), # Sogdian Combining Dot Be..Sogdian Combining Stroke + (0x10f82, 0x10f85,), # Old Uyghur Combining Dot..Old Uyghur Combining Two + (0x11001, 0x11001,), # Brahmi Sign Anusvara + (0x11038, 0x11046,), # Brahmi Vowel Sign Aa ..Brahmi Virama + (0x11070, 0x11070,), # Brahmi Sign Old Tamil Virama + (0x11073, 0x11074,), # Brahmi Vowel Sign Old Ta..Brahmi Vowel Sign Old Ta + (0x1107f, 0x11081,), # Brahmi Number Joiner ..Kaithi Sign Anusvara + (0x110b3, 0x110b6,), # Kaithi Vowel Sign U ..Kaithi Vowel Sign Ai + (0x110b9, 0x110ba,), # Kaithi Sign Virama ..Kaithi Sign Nukta + (0x110c2, 0x110c2,), # Kaithi Vowel Sign Vocalic R + (0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga + (0x11127, 0x1112b,), # Chakma Vowel Sign A ..Chakma Vowel Sign Uu + (0x1112d, 0x11134,), # Chakma Vowel Sign Ai ..Chakma Maayyaa + (0x11173, 0x11173,), # Mahajani Sign Nukta + (0x11180, 0x11181,), # Sharada Sign Candrabindu..Sharada Sign Anusvara + (0x111b6, 0x111be,), # Sharada Vowel Sign U ..Sharada Vowel Sign O + (0x111c0, 0x111c0,), # Sharada Sign Virama + (0x111c9, 0x111cc,), # Sharada Sandhi Mark ..Sharada Extra Short Vowe + (0x111cf, 0x111cf,), # Sharada Sign Inverted Candrabindu + (0x1122f, 0x11231,), # Khojki Vowel Sign U ..Khojki Vowel Sign Ai + (0x11234, 0x11237,), # Khojki Sign Anusvara ..Khojki Sign Shadda + (0x1123e, 0x1123e,), # Khojki Sign Sukun + (0x11241, 0x11241,), # Khojki Vowel Sign Vocalic R + (0x112df, 0x112df,), # Khudawadi Sign Anusvara + (0x112e3, 0x112ea,), # Khudawadi Vowel Sign U ..Khudawadi Sign Virama + (0x11300, 0x11301,), # Grantha Sign Combining A..Grantha Sign Candrabindu + (0x1133b, 0x1133c,), # Combining Bindu Below ..Grantha Sign Nukta + (0x1133e, 0x1133e,), # Grantha Vowel Sign Aa + (0x11340, 0x11340,), # Grantha Vowel Sign Ii + (0x1134d, 0x1134d,), # Grantha Sign Virama + (0x11357, 0x11357,), # Grantha Au Length Mark + (0x11366, 0x1136c,), # Combining Grantha Digit ..Combining Grantha Digit + (0x11370, 0x11374,), # Combining Grantha Letter..Combining Grantha Letter + (0x113b8, 0x113b8,), # (nil) + (0x113bb, 0x113c0,), # (nil) + (0x113c2, 0x113c2,), # (nil) + (0x113c5, 0x113c5,), # (nil) + (0x113c7, 0x113c9,), # (nil) + (0x113ce, 0x113d0,), # (nil) + (0x113d2, 0x113d2,), # (nil) + (0x113e1, 0x113e2,), # (nil) + (0x11438, 0x1143f,), # Newa Vowel Sign U ..Newa Vowel Sign Ai + (0x11442, 0x11444,), # Newa Sign Virama ..Newa Sign Anusvara + (0x11446, 0x11446,), # Newa Sign Nukta + (0x1145e, 0x1145e,), # Newa Sandhi Mark + (0x114b0, 0x114b0,), # Tirhuta Vowel Sign Aa + (0x114b3, 0x114b8,), # Tirhuta Vowel Sign U ..Tirhuta Vowel Sign Vocal + (0x114ba, 0x114ba,), # Tirhuta Vowel Sign Short E + (0x114bd, 0x114bd,), # Tirhuta Vowel Sign Short O + (0x114bf, 0x114c0,), # Tirhuta Sign Candrabindu..Tirhuta Sign Anusvara + (0x114c2, 0x114c3,), # Tirhuta Sign Virama ..Tirhuta Sign Nukta + (0x115af, 0x115af,), # Siddham Vowel Sign Aa + (0x115b2, 0x115b5,), # Siddham Vowel Sign U ..Siddham Vowel Sign Vocal + (0x115bc, 0x115bd,), # Siddham Sign Candrabindu..Siddham Sign Anusvara + (0x115bf, 0x115c0,), # Siddham Sign Virama ..Siddham Sign Nukta + (0x115dc, 0x115dd,), # Siddham Vowel Sign Alter..Siddham Vowel Sign Alter + (0x11633, 0x1163a,), # Modi Vowel Sign U ..Modi Vowel Sign Ai + (0x1163d, 0x1163d,), # Modi Sign Anusvara + (0x1163f, 0x11640,), # Modi Sign Virama ..Modi Sign Ardhacandra + (0x116ab, 0x116ab,), # Takri Sign Anusvara + (0x116ad, 0x116ad,), # Takri Vowel Sign Aa + (0x116b0, 0x116b7,), # Takri Vowel Sign U ..Takri Sign Nukta + (0x1171d, 0x1171d,), # Ahom Consonant Sign Medial La + (0x1171f, 0x1171f,), # Ahom Consonant Sign Medial Ligating Ra + (0x11722, 0x11725,), # Ahom Vowel Sign I ..Ahom Vowel Sign Uu + (0x11727, 0x1172b,), # Ahom Vowel Sign Aw ..Ahom Sign Killer + (0x1182f, 0x11837,), # Dogra Vowel Sign U ..Dogra Sign Anusvara + (0x11839, 0x1183a,), # Dogra Sign Virama ..Dogra Sign Nukta + (0x11930, 0x11930,), # Dives Akuru Vowel Sign Aa + (0x1193b, 0x1193e,), # Dives Akuru Sign Anusvar..Dives Akuru Virama + (0x11943, 0x11943,), # Dives Akuru Sign Nukta + (0x119d4, 0x119d7,), # Nandinagari Vowel Sign U..Nandinagari Vowel Sign V + (0x119da, 0x119db,), # Nandinagari Vowel Sign E..Nandinagari Vowel Sign A + (0x119e0, 0x119e0,), # Nandinagari Sign Virama + (0x11a01, 0x11a0a,), # Zanabazar Square Vowel S..Zanabazar Square Vowel L + (0x11a33, 0x11a38,), # Zanabazar Square Final C..Zanabazar Square Sign An + (0x11a3b, 0x11a3e,), # Zanabazar Square Cluster..Zanabazar Square Cluster + (0x11a47, 0x11a47,), # Zanabazar Square Subjoiner + (0x11a51, 0x11a56,), # Soyombo Vowel Sign I ..Soyombo Vowel Sign Oe + (0x11a59, 0x11a5b,), # Soyombo Vowel Sign Vocal..Soyombo Vowel Length Mar + (0x11a8a, 0x11a96,), # Soyombo Final Consonant ..Soyombo Sign Anusvara + (0x11a98, 0x11a99,), # Soyombo Gemination Mark ..Soyombo Subjoiner + (0x11b60, 0x11b60,), # (nil) + (0x11b62, 0x11b64,), # (nil) + (0x11b66, 0x11b66,), # (nil) + (0x11c30, 0x11c36,), # Bhaiksuki Vowel Sign I ..Bhaiksuki Vowel Sign Voc + (0x11c38, 0x11c3d,), # Bhaiksuki Vowel Sign E ..Bhaiksuki Sign Anusvara + (0x11c3f, 0x11c3f,), # Bhaiksuki Sign Virama + (0x11c92, 0x11ca7,), # Marchen Subjoined Letter..Marchen Subjoined Letter + (0x11caa, 0x11cb0,), # Marchen Subjoined Letter..Marchen Vowel Sign Aa + (0x11cb2, 0x11cb3,), # Marchen Vowel Sign U ..Marchen Vowel Sign E + (0x11cb5, 0x11cb6,), # Marchen Sign Anusvara ..Marchen Sign Candrabindu + (0x11d31, 0x11d36,), # Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign + (0x11d3a, 0x11d3a,), # Masaram Gondi Vowel Sign E + (0x11d3c, 0x11d3d,), # Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign + (0x11d3f, 0x11d45,), # Masaram Gondi Vowel Sign..Masaram Gondi Virama + (0x11d47, 0x11d47,), # Masaram Gondi Ra-kara + (0x11d90, 0x11d91,), # Gunjala Gondi Vowel Sign..Gunjala Gondi Vowel Sign + (0x11d95, 0x11d95,), # Gunjala Gondi Sign Anusvara + (0x11d97, 0x11d97,), # Gunjala Gondi Virama + (0x11ef3, 0x11ef4,), # Makasar Vowel Sign I ..Makasar Vowel Sign U + (0x11f00, 0x11f01,), # Kawi Sign Candrabindu ..Kawi Sign Anusvara + (0x11f36, 0x11f3a,), # Kawi Vowel Sign I ..Kawi Vowel Sign Vocalic + (0x11f40, 0x11f42,), # Kawi Vowel Sign Eu ..Kawi Conjoiner + (0x11f5a, 0x11f5a,), # (nil) + (0x13440, 0x13440,), # Egyptian Hieroglyph Mirror Horizontally + (0x13447, 0x13455,), # Egyptian Hieroglyph Modi..Egyptian Hieroglyph Modi + (0x1611e, 0x16129,), # (nil) + (0x1612d, 0x1612f,), # (nil) + (0x16af0, 0x16af4,), # Bassa Vah Combining High..Bassa Vah Combining High + (0x16b30, 0x16b36,), # Pahawh Hmong Mark Cim Tu..Pahawh Hmong Mark Cim Ta + (0x16f4f, 0x16f4f,), # Miao Sign Consonant Modifier Bar + (0x16f8f, 0x16f92,), # Miao Tone Right ..Miao Tone Below + (0x16fe4, 0x16fe4,), # Khitan Small Script Filler + (0x16ff0, 0x16ff1,), # Vietnamese Alternate Rea..Vietnamese Alternate Rea + (0x1bc9d, 0x1bc9e,), # Duployan Thick Letter Se..Duployan Double Mark + (0x1cf00, 0x1cf2d,), # Znamenny Combining Mark ..Znamenny Combining Mark + (0x1cf30, 0x1cf46,), # Znamenny Combining Tonal..Znamenny Priznak Modifie + (0x1d165, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d16d, 0x1d172,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d17b, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical + (0x1da00, 0x1da36,), # Signwriting Head Rim ..Signwriting Air Sucking + (0x1da3b, 0x1da6c,), # Signwriting Mouth Closed..Signwriting Excitement + (0x1da75, 0x1da75,), # Signwriting Upper Body Tilting From Hip Joints + (0x1da84, 0x1da84,), # Signwriting Location Head Neck + (0x1da9b, 0x1da9f,), # Signwriting Fill Modifie..Signwriting Fill Modifie + (0x1daa1, 0x1daaf,), # Signwriting Rotation Mod..Signwriting Rotation Mod + (0x1e000, 0x1e006,), # Combining Glagolitic Let..Combining Glagolitic Let + (0x1e008, 0x1e018,), # Combining Glagolitic Let..Combining Glagolitic Let + (0x1e01b, 0x1e021,), # Combining Glagolitic Let..Combining Glagolitic Let + (0x1e023, 0x1e024,), # Combining Glagolitic Let..Combining Glagolitic Let + (0x1e026, 0x1e02a,), # Combining Glagolitic Let..Combining Glagolitic Let + (0x1e08f, 0x1e08f,), # Combining Cyrillic Small Letter Byelorussian-ukr + (0x1e130, 0x1e136,), # Nyiakeng Puachue Hmong T..Nyiakeng Puachue Hmong T + (0x1e2ae, 0x1e2ae,), # Toto Sign Rising Tone + (0x1e2ec, 0x1e2ef,), # Wancho Tone Tup ..Wancho Tone Koini + (0x1e4ec, 0x1e4ef,), # Nag Mundari Sign Muhor ..Nag Mundari Sign Sutuh + (0x1e5ee, 0x1e5ef,), # (nil) + (0x1e6e3, 0x1e6e3,), # (nil) + (0x1e6e6, 0x1e6e6,), # (nil) + (0x1e6ee, 0x1e6ef,), # (nil) + (0x1e6f5, 0x1e6f5,), # (nil) + (0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining + (0x1e944, 0x1e94a,), # Adlam Alif Lengthener ..Adlam Nukta + (0x1f3fb, 0x1f3ff,), # Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri + (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag + (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 +) + +GRAPHEME_ZWJ = ( + # Source: GraphemeBreakProperty-17.0.0.txt + # Date: 2025-06-30, 06:20:23 GMT + # + (0x0200d, 0x0200d,), # Zero Width Joiner +) + +GRAPHEME_REGIONAL_INDICATOR = ( + # Source: GraphemeBreakProperty-17.0.0.txt + # Date: 2025-06-30, 06:20:23 GMT + # + (0x1f1e6, 0x1f1ff,), # Regional Indicator Symbo..Regional Indicator Symbo +) + +GRAPHEME_PREPEND = ( + # Source: GraphemeBreakProperty-17.0.0.txt + # Date: 2025-06-30, 06:20:23 GMT + # + (0x00600, 0x00605,), # Arabic Number Sign ..Arabic Number Mark Above + (0x006dd, 0x006dd,), # Arabic End Of Ayah + (0x0070f, 0x0070f,), # Syriac Abbreviation Mark + (0x00890, 0x00891,), # Arabic Pound Mark Above ..Arabic Piastre Mark Abov + (0x008e2, 0x008e2,), # Arabic Disputed End Of Ayah + (0x00d4e, 0x00d4e,), # Malayalam Letter Dot Reph + (0x110bd, 0x110bd,), # Kaithi Number Sign + (0x110cd, 0x110cd,), # Kaithi Number Sign Above + (0x111c2, 0x111c3,), # Sharada Sign Jihvamuliya..Sharada Sign Upadhmaniya + (0x113d1, 0x113d1,), # (nil) + (0x1193f, 0x1193f,), # Dives Akuru Prefixed Nasal Sign + (0x11941, 0x11941,), # Dives Akuru Initial Ra + (0x11a84, 0x11a89,), # Soyombo Sign Jihvamuliya..Soyombo Cluster-initial + (0x11d46, 0x11d46,), # Masaram Gondi Repha + (0x11f02, 0x11f02,), # Kawi Sign Repha +) + +GRAPHEME_SPACINGMARK = ( + # Source: GraphemeBreakProperty-17.0.0.txt + # Date: 2025-06-30, 06:20:23 GMT + # + (0x00903, 0x00903,), # Devanagari Sign Visarga + (0x0093b, 0x0093b,), # Devanagari Vowel Sign Ooe + (0x0093e, 0x00940,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Ii + (0x00949, 0x0094c,), # Devanagari Vowel Sign Ca..Devanagari Vowel Sign Au + (0x0094e, 0x0094f,), # Devanagari Vowel Sign Pr..Devanagari Vowel Sign Aw + (0x00982, 0x00983,), # Bengali Sign Anusvara ..Bengali Sign Visarga + (0x009bf, 0x009c0,), # Bengali Vowel Sign I ..Bengali Vowel Sign Ii + (0x009c7, 0x009c8,), # Bengali Vowel Sign E ..Bengali Vowel Sign Ai + (0x009cb, 0x009cc,), # Bengali Vowel Sign O ..Bengali Vowel Sign Au + (0x00a03, 0x00a03,), # Gurmukhi Sign Visarga + (0x00a3e, 0x00a40,), # Gurmukhi Vowel Sign Aa ..Gurmukhi Vowel Sign Ii + (0x00a83, 0x00a83,), # Gujarati Sign Visarga + (0x00abe, 0x00ac0,), # Gujarati Vowel Sign Aa ..Gujarati Vowel Sign Ii + (0x00ac9, 0x00ac9,), # Gujarati Vowel Sign Candra O + (0x00acb, 0x00acc,), # Gujarati Vowel Sign O ..Gujarati Vowel Sign Au + (0x00b02, 0x00b03,), # Oriya Sign Anusvara ..Oriya Sign Visarga + (0x00b40, 0x00b40,), # Oriya Vowel Sign Ii + (0x00b47, 0x00b48,), # Oriya Vowel Sign E ..Oriya Vowel Sign Ai + (0x00b4b, 0x00b4c,), # Oriya Vowel Sign O ..Oriya Vowel Sign Au + (0x00bbf, 0x00bbf,), # Tamil Vowel Sign I + (0x00bc1, 0x00bc2,), # Tamil Vowel Sign U ..Tamil Vowel Sign Uu + (0x00bc6, 0x00bc8,), # Tamil Vowel Sign E ..Tamil Vowel Sign Ai + (0x00bca, 0x00bcc,), # Tamil Vowel Sign O ..Tamil Vowel Sign Au + (0x00c01, 0x00c03,), # Telugu Sign Candrabindu ..Telugu Sign Visarga + (0x00c41, 0x00c44,), # Telugu Vowel Sign U ..Telugu Vowel Sign Vocali + (0x00c82, 0x00c83,), # Kannada Sign Anusvara ..Kannada Sign Visarga + (0x00cbe, 0x00cbe,), # Kannada Vowel Sign Aa + (0x00cc1, 0x00cc1,), # Kannada Vowel Sign U + (0x00cc3, 0x00cc4,), # Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal + (0x00cf3, 0x00cf3,), # Kannada Sign Combining Anusvara Above Right + (0x00d02, 0x00d03,), # Malayalam Sign Anusvara ..Malayalam Sign Visarga + (0x00d3f, 0x00d40,), # Malayalam Vowel Sign I ..Malayalam Vowel Sign Ii + (0x00d46, 0x00d48,), # Malayalam Vowel Sign E ..Malayalam Vowel Sign Ai + (0x00d4a, 0x00d4c,), # Malayalam Vowel Sign O ..Malayalam Vowel Sign Au + (0x00d82, 0x00d83,), # Sinhala Sign Anusvaraya ..Sinhala Sign Visargaya + (0x00dd0, 0x00dd1,), # Sinhala Vowel Sign Ketti..Sinhala Vowel Sign Diga + (0x00dd8, 0x00dde,), # Sinhala Vowel Sign Gaett..Sinhala Vowel Sign Kombu + (0x00df2, 0x00df3,), # Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga + (0x00e33, 0x00e33,), # Thai Character Sara Am + (0x00eb3, 0x00eb3,), # Lao Vowel Sign Am + (0x00f3e, 0x00f3f,), # Tibetan Sign Yar Tshes ..Tibetan Sign Mar Tshes + (0x00f7f, 0x00f7f,), # Tibetan Sign Rnam Bcad + (0x01031, 0x01031,), # Myanmar Vowel Sign E + (0x0103b, 0x0103c,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M + (0x01056, 0x01057,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal + (0x01084, 0x01084,), # Myanmar Vowel Sign Shan E + (0x017b6, 0x017b6,), # Khmer Vowel Sign Aa + (0x017be, 0x017c5,), # Khmer Vowel Sign Oe ..Khmer Vowel Sign Au + (0x017c7, 0x017c8,), # Khmer Sign Reahmuk ..Khmer Sign Yuukaleapintu + (0x01923, 0x01926,), # Limbu Vowel Sign Ee ..Limbu Vowel Sign Au + (0x01929, 0x0192b,), # Limbu Subjoined Letter Y..Limbu Subjoined Letter W + (0x01930, 0x01931,), # Limbu Small Letter Ka ..Limbu Small Letter Nga + (0x01933, 0x01938,), # Limbu Small Letter Ta ..Limbu Small Letter La + (0x01a19, 0x01a1a,), # Buginese Vowel Sign E ..Buginese Vowel Sign O + (0x01a55, 0x01a55,), # Tai Tham Consonant Sign Medial Ra + (0x01a57, 0x01a57,), # Tai Tham Consonant Sign La Tang Lai + (0x01a6d, 0x01a72,), # Tai Tham Vowel Sign Oy ..Tai Tham Vowel Sign Tham + (0x01b04, 0x01b04,), # Balinese Sign Bisah + (0x01b3e, 0x01b41,), # Balinese Vowel Sign Tali..Balinese Vowel Sign Tali + (0x01b82, 0x01b82,), # Sundanese Sign Pangwisad + (0x01ba1, 0x01ba1,), # Sundanese Consonant Sign Pamingkal + (0x01ba6, 0x01ba7,), # Sundanese Vowel Sign Pan..Sundanese Vowel Sign Pan + (0x01be7, 0x01be7,), # Batak Vowel Sign E + (0x01bea, 0x01bec,), # Batak Vowel Sign I ..Batak Vowel Sign O + (0x01bee, 0x01bee,), # Batak Vowel Sign U + (0x01c24, 0x01c2b,), # Lepcha Subjoined Letter ..Lepcha Vowel Sign Uu + (0x01c34, 0x01c35,), # Lepcha Consonant Sign Ny..Lepcha Consonant Sign Ka + (0x01ce1, 0x01ce1,), # Vedic Tone Atharvavedic Independent Svarita + (0x01cf7, 0x01cf7,), # Vedic Sign Atikrama + (0x0a823, 0x0a824,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign + (0x0a827, 0x0a827,), # Syloti Nagri Vowel Sign Oo + (0x0a880, 0x0a881,), # Saurashtra Sign Anusvara..Saurashtra Sign Visarga + (0x0a8b4, 0x0a8c3,), # Saurashtra Consonant Sig..Saurashtra Vowel Sign Au + (0x0a952, 0x0a952,), # Rejang Consonant Sign H + (0x0a983, 0x0a983,), # Javanese Sign Wignyan + (0x0a9b4, 0x0a9b5,), # Javanese Vowel Sign Taru..Javanese Vowel Sign Tolo + (0x0a9ba, 0x0a9bb,), # Javanese Vowel Sign Tali..Javanese Vowel Sign Dirg + (0x0a9be, 0x0a9bf,), # Javanese Consonant Sign ..Javanese Consonant Sign + (0x0aa2f, 0x0aa30,), # Cham Vowel Sign O ..Cham Vowel Sign Ai + (0x0aa33, 0x0aa34,), # Cham Consonant Sign Ya ..Cham Consonant Sign Ra + (0x0aa4d, 0x0aa4d,), # Cham Consonant Sign Final H + (0x0aaeb, 0x0aaeb,), # Meetei Mayek Vowel Sign Ii + (0x0aaee, 0x0aaef,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0x0aaf5, 0x0aaf5,), # Meetei Mayek Vowel Sign Visarga + (0x0abe3, 0x0abe4,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0x0abe6, 0x0abe7,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0x0abe9, 0x0abea,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0x0abec, 0x0abec,), # Meetei Mayek Lum Iyek + (0x11000, 0x11000,), # Brahmi Sign Candrabindu + (0x11002, 0x11002,), # Brahmi Sign Visarga + (0x11082, 0x11082,), # Kaithi Sign Visarga + (0x110b0, 0x110b2,), # Kaithi Vowel Sign Aa ..Kaithi Vowel Sign Ii + (0x110b7, 0x110b8,), # Kaithi Vowel Sign O ..Kaithi Vowel Sign Au + (0x1112c, 0x1112c,), # Chakma Vowel Sign E + (0x11145, 0x11146,), # Chakma Vowel Sign Aa ..Chakma Vowel Sign Ei + (0x11182, 0x11182,), # Sharada Sign Visarga + (0x111b3, 0x111b5,), # Sharada Vowel Sign Aa ..Sharada Vowel Sign Ii + (0x111bf, 0x111bf,), # Sharada Vowel Sign Au + (0x111ce, 0x111ce,), # Sharada Vowel Sign Prishthamatra E + (0x1122c, 0x1122e,), # Khojki Vowel Sign Aa ..Khojki Vowel Sign Ii + (0x11232, 0x11233,), # Khojki Vowel Sign O ..Khojki Vowel Sign Au + (0x112e0, 0x112e2,), # Khudawadi Vowel Sign Aa ..Khudawadi Vowel Sign Ii + (0x11302, 0x11303,), # Grantha Sign Anusvara ..Grantha Sign Visarga + (0x1133f, 0x1133f,), # Grantha Vowel Sign I + (0x11341, 0x11344,), # Grantha Vowel Sign U ..Grantha Vowel Sign Vocal + (0x11347, 0x11348,), # Grantha Vowel Sign Ee ..Grantha Vowel Sign Ai + (0x1134b, 0x1134c,), # Grantha Vowel Sign Oo ..Grantha Vowel Sign Au + (0x11362, 0x11363,), # Grantha Vowel Sign Vocal..Grantha Vowel Sign Vocal + (0x113b9, 0x113ba,), # (nil) + (0x113ca, 0x113ca,), # (nil) + (0x113cc, 0x113cd,), # (nil) + (0x11435, 0x11437,), # Newa Vowel Sign Aa ..Newa Vowel Sign Ii + (0x11440, 0x11441,), # Newa Vowel Sign O ..Newa Vowel Sign Au + (0x11445, 0x11445,), # Newa Sign Visarga + (0x114b1, 0x114b2,), # Tirhuta Vowel Sign I ..Tirhuta Vowel Sign Ii + (0x114b9, 0x114b9,), # Tirhuta Vowel Sign E + (0x114bb, 0x114bc,), # Tirhuta Vowel Sign Ai ..Tirhuta Vowel Sign O + (0x114be, 0x114be,), # Tirhuta Vowel Sign Au + (0x114c1, 0x114c1,), # Tirhuta Sign Visarga + (0x115b0, 0x115b1,), # Siddham Vowel Sign I ..Siddham Vowel Sign Ii + (0x115b8, 0x115bb,), # Siddham Vowel Sign E ..Siddham Vowel Sign Au + (0x115be, 0x115be,), # Siddham Sign Visarga + (0x11630, 0x11632,), # Modi Vowel Sign Aa ..Modi Vowel Sign Ii + (0x1163b, 0x1163c,), # Modi Vowel Sign O ..Modi Vowel Sign Au + (0x1163e, 0x1163e,), # Modi Sign Visarga + (0x116ac, 0x116ac,), # Takri Sign Visarga + (0x116ae, 0x116af,), # Takri Vowel Sign I ..Takri Vowel Sign Ii + (0x1171e, 0x1171e,), # Ahom Consonant Sign Medial Ra + (0x11726, 0x11726,), # Ahom Vowel Sign E + (0x1182c, 0x1182e,), # Dogra Vowel Sign Aa ..Dogra Vowel Sign Ii + (0x11838, 0x11838,), # Dogra Sign Visarga + (0x11931, 0x11935,), # Dives Akuru Vowel Sign I..Dives Akuru Vowel Sign E + (0x11937, 0x11938,), # Dives Akuru Vowel Sign A..Dives Akuru Vowel Sign O + (0x11940, 0x11940,), # Dives Akuru Medial Ya + (0x11942, 0x11942,), # Dives Akuru Medial Ra + (0x119d1, 0x119d3,), # Nandinagari Vowel Sign A..Nandinagari Vowel Sign I + (0x119dc, 0x119df,), # Nandinagari Vowel Sign O..Nandinagari Sign Visarga + (0x119e4, 0x119e4,), # Nandinagari Vowel Sign Prishthamatra E + (0x11a39, 0x11a39,), # Zanabazar Square Sign Visarga + (0x11a57, 0x11a58,), # Soyombo Vowel Sign Ai ..Soyombo Vowel Sign Au + (0x11a97, 0x11a97,), # Soyombo Sign Visarga + (0x11b61, 0x11b61,), # (nil) + (0x11b65, 0x11b65,), # (nil) + (0x11b67, 0x11b67,), # (nil) + (0x11c2f, 0x11c2f,), # Bhaiksuki Vowel Sign Aa + (0x11c3e, 0x11c3e,), # Bhaiksuki Sign Visarga + (0x11ca9, 0x11ca9,), # Marchen Subjoined Letter Ya + (0x11cb1, 0x11cb1,), # Marchen Vowel Sign I + (0x11cb4, 0x11cb4,), # Marchen Vowel Sign O + (0x11d8a, 0x11d8e,), # Gunjala Gondi Vowel Sign..Gunjala Gondi Vowel Sign + (0x11d93, 0x11d94,), # Gunjala Gondi Vowel Sign..Gunjala Gondi Vowel Sign + (0x11d96, 0x11d96,), # Gunjala Gondi Sign Visarga + (0x11ef5, 0x11ef6,), # Makasar Vowel Sign E ..Makasar Vowel Sign O + (0x11f03, 0x11f03,), # Kawi Sign Visarga + (0x11f34, 0x11f35,), # Kawi Vowel Sign Aa ..Kawi Vowel Sign Alternat + (0x11f3e, 0x11f3f,), # Kawi Vowel Sign E ..Kawi Vowel Sign Ai + (0x1612a, 0x1612c,), # (nil) + (0x16f51, 0x16f87,), # Miao Sign Aspiration ..Miao Vowel Sign Ui +) + +GRAPHEME_L = ( + # Source: GraphemeBreakProperty-17.0.0.txt + # Date: 2025-06-30, 06:20:23 GMT + # + (0x01100, 0x0115f,), # Hangul Choseong Kiyeok ..Hangul Choseong Filler + (0x0a960, 0x0a97c,), # Hangul Choseong Tikeut-m..Hangul Choseong Ssangyeo +) + +GRAPHEME_V = ( + # Source: GraphemeBreakProperty-17.0.0.txt + # Date: 2025-06-30, 06:20:23 GMT + # + (0x01160, 0x011a7,), # Hangul Jungseong Filler ..Hangul Jungseong O-yae + (0x0d7b0, 0x0d7c6,), # Hangul Jungseong O-yeo ..Hangul Jungseong Araea-e + (0x16d63, 0x16d63,), # (nil) + (0x16d67, 0x16d6a,), # (nil) +) + +GRAPHEME_T = ( + # Source: GraphemeBreakProperty-17.0.0.txt + # Date: 2025-06-30, 06:20:23 GMT + # + (0x011a8, 0x011ff,), # Hangul Jongseong Kiyeok ..Hangul Jongseong Ssangni + (0x0d7cb, 0x0d7fb,), # Hangul Jongseong Nieun-r..Hangul Jongseong Phieuph +) + +GRAPHEME_LV = ( + # Source: GraphemeBreakProperty-17.0.0.txt + # Date: 2025-06-30, 06:20:23 GMT + # + (0x0ac00, 0x0ac00,), # Hangul Syllable Ga + (0x0ac1c, 0x0ac1c,), # Hangul Syllable Gae + (0x0ac38, 0x0ac38,), # Hangul Syllable Gya + (0x0ac54, 0x0ac54,), # Hangul Syllable Gyae + (0x0ac70, 0x0ac70,), # Hangul Syllable Geo + (0x0ac8c, 0x0ac8c,), # Hangul Syllable Ge + (0x0aca8, 0x0aca8,), # Hangul Syllable Gyeo + (0x0acc4, 0x0acc4,), # Hangul Syllable Gye + (0x0ace0, 0x0ace0,), # Hangul Syllable Go + (0x0acfc, 0x0acfc,), # Hangul Syllable Gwa + (0x0ad18, 0x0ad18,), # Hangul Syllable Gwae + (0x0ad34, 0x0ad34,), # Hangul Syllable Goe + (0x0ad50, 0x0ad50,), # Hangul Syllable Gyo + (0x0ad6c, 0x0ad6c,), # Hangul Syllable Gu + (0x0ad88, 0x0ad88,), # Hangul Syllable Gweo + (0x0ada4, 0x0ada4,), # Hangul Syllable Gwe + (0x0adc0, 0x0adc0,), # Hangul Syllable Gwi + (0x0addc, 0x0addc,), # Hangul Syllable Gyu + (0x0adf8, 0x0adf8,), # Hangul Syllable Geu + (0x0ae14, 0x0ae14,), # Hangul Syllable Gyi + (0x0ae30, 0x0ae30,), # Hangul Syllable Gi + (0x0ae4c, 0x0ae4c,), # Hangul Syllable Gga + (0x0ae68, 0x0ae68,), # Hangul Syllable Ggae + (0x0ae84, 0x0ae84,), # Hangul Syllable Ggya + (0x0aea0, 0x0aea0,), # Hangul Syllable Ggyae + (0x0aebc, 0x0aebc,), # Hangul Syllable Ggeo + (0x0aed8, 0x0aed8,), # Hangul Syllable Gge + (0x0aef4, 0x0aef4,), # Hangul Syllable Ggyeo + (0x0af10, 0x0af10,), # Hangul Syllable Ggye + (0x0af2c, 0x0af2c,), # Hangul Syllable Ggo + (0x0af48, 0x0af48,), # Hangul Syllable Ggwa + (0x0af64, 0x0af64,), # Hangul Syllable Ggwae + (0x0af80, 0x0af80,), # Hangul Syllable Ggoe + (0x0af9c, 0x0af9c,), # Hangul Syllable Ggyo + (0x0afb8, 0x0afb8,), # Hangul Syllable Ggu + (0x0afd4, 0x0afd4,), # Hangul Syllable Ggweo + (0x0aff0, 0x0aff0,), # Hangul Syllable Ggwe + (0x0b00c, 0x0b00c,), # Hangul Syllable Ggwi + (0x0b028, 0x0b028,), # Hangul Syllable Ggyu + (0x0b044, 0x0b044,), # Hangul Syllable Ggeu + (0x0b060, 0x0b060,), # Hangul Syllable Ggyi + (0x0b07c, 0x0b07c,), # Hangul Syllable Ggi + (0x0b098, 0x0b098,), # Hangul Syllable Na + (0x0b0b4, 0x0b0b4,), # Hangul Syllable Nae + (0x0b0d0, 0x0b0d0,), # Hangul Syllable Nya + (0x0b0ec, 0x0b0ec,), # Hangul Syllable Nyae + (0x0b108, 0x0b108,), # Hangul Syllable Neo + (0x0b124, 0x0b124,), # Hangul Syllable Ne + (0x0b140, 0x0b140,), # Hangul Syllable Nyeo + (0x0b15c, 0x0b15c,), # Hangul Syllable Nye + (0x0b178, 0x0b178,), # Hangul Syllable No + (0x0b194, 0x0b194,), # Hangul Syllable Nwa + (0x0b1b0, 0x0b1b0,), # Hangul Syllable Nwae + (0x0b1cc, 0x0b1cc,), # Hangul Syllable Noe + (0x0b1e8, 0x0b1e8,), # Hangul Syllable Nyo + (0x0b204, 0x0b204,), # Hangul Syllable Nu + (0x0b220, 0x0b220,), # Hangul Syllable Nweo + (0x0b23c, 0x0b23c,), # Hangul Syllable Nwe + (0x0b258, 0x0b258,), # Hangul Syllable Nwi + (0x0b274, 0x0b274,), # Hangul Syllable Nyu + (0x0b290, 0x0b290,), # Hangul Syllable Neu + (0x0b2ac, 0x0b2ac,), # Hangul Syllable Nyi + (0x0b2c8, 0x0b2c8,), # Hangul Syllable Ni + (0x0b2e4, 0x0b2e4,), # Hangul Syllable Da + (0x0b300, 0x0b300,), # Hangul Syllable Dae + (0x0b31c, 0x0b31c,), # Hangul Syllable Dya + (0x0b338, 0x0b338,), # Hangul Syllable Dyae + (0x0b354, 0x0b354,), # Hangul Syllable Deo + (0x0b370, 0x0b370,), # Hangul Syllable De + (0x0b38c, 0x0b38c,), # Hangul Syllable Dyeo + (0x0b3a8, 0x0b3a8,), # Hangul Syllable Dye + (0x0b3c4, 0x0b3c4,), # Hangul Syllable Do + (0x0b3e0, 0x0b3e0,), # Hangul Syllable Dwa + (0x0b3fc, 0x0b3fc,), # Hangul Syllable Dwae + (0x0b418, 0x0b418,), # Hangul Syllable Doe + (0x0b434, 0x0b434,), # Hangul Syllable Dyo + (0x0b450, 0x0b450,), # Hangul Syllable Du + (0x0b46c, 0x0b46c,), # Hangul Syllable Dweo + (0x0b488, 0x0b488,), # Hangul Syllable Dwe + (0x0b4a4, 0x0b4a4,), # Hangul Syllable Dwi + (0x0b4c0, 0x0b4c0,), # Hangul Syllable Dyu + (0x0b4dc, 0x0b4dc,), # Hangul Syllable Deu + (0x0b4f8, 0x0b4f8,), # Hangul Syllable Dyi + (0x0b514, 0x0b514,), # Hangul Syllable Di + (0x0b530, 0x0b530,), # Hangul Syllable Dda + (0x0b54c, 0x0b54c,), # Hangul Syllable Ddae + (0x0b568, 0x0b568,), # Hangul Syllable Ddya + (0x0b584, 0x0b584,), # Hangul Syllable Ddyae + (0x0b5a0, 0x0b5a0,), # Hangul Syllable Ddeo + (0x0b5bc, 0x0b5bc,), # Hangul Syllable Dde + (0x0b5d8, 0x0b5d8,), # Hangul Syllable Ddyeo + (0x0b5f4, 0x0b5f4,), # Hangul Syllable Ddye + (0x0b610, 0x0b610,), # Hangul Syllable Ddo + (0x0b62c, 0x0b62c,), # Hangul Syllable Ddwa + (0x0b648, 0x0b648,), # Hangul Syllable Ddwae + (0x0b664, 0x0b664,), # Hangul Syllable Ddoe + (0x0b680, 0x0b680,), # Hangul Syllable Ddyo + (0x0b69c, 0x0b69c,), # Hangul Syllable Ddu + (0x0b6b8, 0x0b6b8,), # Hangul Syllable Ddweo + (0x0b6d4, 0x0b6d4,), # Hangul Syllable Ddwe + (0x0b6f0, 0x0b6f0,), # Hangul Syllable Ddwi + (0x0b70c, 0x0b70c,), # Hangul Syllable Ddyu + (0x0b728, 0x0b728,), # Hangul Syllable Ddeu + (0x0b744, 0x0b744,), # Hangul Syllable Ddyi + (0x0b760, 0x0b760,), # Hangul Syllable Ddi + (0x0b77c, 0x0b77c,), # Hangul Syllable Ra + (0x0b798, 0x0b798,), # Hangul Syllable Rae + (0x0b7b4, 0x0b7b4,), # Hangul Syllable Rya + (0x0b7d0, 0x0b7d0,), # Hangul Syllable Ryae + (0x0b7ec, 0x0b7ec,), # Hangul Syllable Reo + (0x0b808, 0x0b808,), # Hangul Syllable Re + (0x0b824, 0x0b824,), # Hangul Syllable Ryeo + (0x0b840, 0x0b840,), # Hangul Syllable Rye + (0x0b85c, 0x0b85c,), # Hangul Syllable Ro + (0x0b878, 0x0b878,), # Hangul Syllable Rwa + (0x0b894, 0x0b894,), # Hangul Syllable Rwae + (0x0b8b0, 0x0b8b0,), # Hangul Syllable Roe + (0x0b8cc, 0x0b8cc,), # Hangul Syllable Ryo + (0x0b8e8, 0x0b8e8,), # Hangul Syllable Ru + (0x0b904, 0x0b904,), # Hangul Syllable Rweo + (0x0b920, 0x0b920,), # Hangul Syllable Rwe + (0x0b93c, 0x0b93c,), # Hangul Syllable Rwi + (0x0b958, 0x0b958,), # Hangul Syllable Ryu + (0x0b974, 0x0b974,), # Hangul Syllable Reu + (0x0b990, 0x0b990,), # Hangul Syllable Ryi + (0x0b9ac, 0x0b9ac,), # Hangul Syllable Ri + (0x0b9c8, 0x0b9c8,), # Hangul Syllable Ma + (0x0b9e4, 0x0b9e4,), # Hangul Syllable Mae + (0x0ba00, 0x0ba00,), # Hangul Syllable Mya + (0x0ba1c, 0x0ba1c,), # Hangul Syllable Myae + (0x0ba38, 0x0ba38,), # Hangul Syllable Meo + (0x0ba54, 0x0ba54,), # Hangul Syllable Me + (0x0ba70, 0x0ba70,), # Hangul Syllable Myeo + (0x0ba8c, 0x0ba8c,), # Hangul Syllable Mye + (0x0baa8, 0x0baa8,), # Hangul Syllable Mo + (0x0bac4, 0x0bac4,), # Hangul Syllable Mwa + (0x0bae0, 0x0bae0,), # Hangul Syllable Mwae + (0x0bafc, 0x0bafc,), # Hangul Syllable Moe + (0x0bb18, 0x0bb18,), # Hangul Syllable Myo + (0x0bb34, 0x0bb34,), # Hangul Syllable Mu + (0x0bb50, 0x0bb50,), # Hangul Syllable Mweo + (0x0bb6c, 0x0bb6c,), # Hangul Syllable Mwe + (0x0bb88, 0x0bb88,), # Hangul Syllable Mwi + (0x0bba4, 0x0bba4,), # Hangul Syllable Myu + (0x0bbc0, 0x0bbc0,), # Hangul Syllable Meu + (0x0bbdc, 0x0bbdc,), # Hangul Syllable Myi + (0x0bbf8, 0x0bbf8,), # Hangul Syllable Mi + (0x0bc14, 0x0bc14,), # Hangul Syllable Ba + (0x0bc30, 0x0bc30,), # Hangul Syllable Bae + (0x0bc4c, 0x0bc4c,), # Hangul Syllable Bya + (0x0bc68, 0x0bc68,), # Hangul Syllable Byae + (0x0bc84, 0x0bc84,), # Hangul Syllable Beo + (0x0bca0, 0x0bca0,), # Hangul Syllable Be + (0x0bcbc, 0x0bcbc,), # Hangul Syllable Byeo + (0x0bcd8, 0x0bcd8,), # Hangul Syllable Bye + (0x0bcf4, 0x0bcf4,), # Hangul Syllable Bo + (0x0bd10, 0x0bd10,), # Hangul Syllable Bwa + (0x0bd2c, 0x0bd2c,), # Hangul Syllable Bwae + (0x0bd48, 0x0bd48,), # Hangul Syllable Boe + (0x0bd64, 0x0bd64,), # Hangul Syllable Byo + (0x0bd80, 0x0bd80,), # Hangul Syllable Bu + (0x0bd9c, 0x0bd9c,), # Hangul Syllable Bweo + (0x0bdb8, 0x0bdb8,), # Hangul Syllable Bwe + (0x0bdd4, 0x0bdd4,), # Hangul Syllable Bwi + (0x0bdf0, 0x0bdf0,), # Hangul Syllable Byu + (0x0be0c, 0x0be0c,), # Hangul Syllable Beu + (0x0be28, 0x0be28,), # Hangul Syllable Byi + (0x0be44, 0x0be44,), # Hangul Syllable Bi + (0x0be60, 0x0be60,), # Hangul Syllable Bba + (0x0be7c, 0x0be7c,), # Hangul Syllable Bbae + (0x0be98, 0x0be98,), # Hangul Syllable Bbya + (0x0beb4, 0x0beb4,), # Hangul Syllable Bbyae + (0x0bed0, 0x0bed0,), # Hangul Syllable Bbeo + (0x0beec, 0x0beec,), # Hangul Syllable Bbe + (0x0bf08, 0x0bf08,), # Hangul Syllable Bbyeo + (0x0bf24, 0x0bf24,), # Hangul Syllable Bbye + (0x0bf40, 0x0bf40,), # Hangul Syllable Bbo + (0x0bf5c, 0x0bf5c,), # Hangul Syllable Bbwa + (0x0bf78, 0x0bf78,), # Hangul Syllable Bbwae + (0x0bf94, 0x0bf94,), # Hangul Syllable Bboe + (0x0bfb0, 0x0bfb0,), # Hangul Syllable Bbyo + (0x0bfcc, 0x0bfcc,), # Hangul Syllable Bbu + (0x0bfe8, 0x0bfe8,), # Hangul Syllable Bbweo + (0x0c004, 0x0c004,), # Hangul Syllable Bbwe + (0x0c020, 0x0c020,), # Hangul Syllable Bbwi + (0x0c03c, 0x0c03c,), # Hangul Syllable Bbyu + (0x0c058, 0x0c058,), # Hangul Syllable Bbeu + (0x0c074, 0x0c074,), # Hangul Syllable Bbyi + (0x0c090, 0x0c090,), # Hangul Syllable Bbi + (0x0c0ac, 0x0c0ac,), # Hangul Syllable Sa + (0x0c0c8, 0x0c0c8,), # Hangul Syllable Sae + (0x0c0e4, 0x0c0e4,), # Hangul Syllable Sya + (0x0c100, 0x0c100,), # Hangul Syllable Syae + (0x0c11c, 0x0c11c,), # Hangul Syllable Seo + (0x0c138, 0x0c138,), # Hangul Syllable Se + (0x0c154, 0x0c154,), # Hangul Syllable Syeo + (0x0c170, 0x0c170,), # Hangul Syllable Sye + (0x0c18c, 0x0c18c,), # Hangul Syllable So + (0x0c1a8, 0x0c1a8,), # Hangul Syllable Swa + (0x0c1c4, 0x0c1c4,), # Hangul Syllable Swae + (0x0c1e0, 0x0c1e0,), # Hangul Syllable Soe + (0x0c1fc, 0x0c1fc,), # Hangul Syllable Syo + (0x0c218, 0x0c218,), # Hangul Syllable Su + (0x0c234, 0x0c234,), # Hangul Syllable Sweo + (0x0c250, 0x0c250,), # Hangul Syllable Swe + (0x0c26c, 0x0c26c,), # Hangul Syllable Swi + (0x0c288, 0x0c288,), # Hangul Syllable Syu + (0x0c2a4, 0x0c2a4,), # Hangul Syllable Seu + (0x0c2c0, 0x0c2c0,), # Hangul Syllable Syi + (0x0c2dc, 0x0c2dc,), # Hangul Syllable Si + (0x0c2f8, 0x0c2f8,), # Hangul Syllable Ssa + (0x0c314, 0x0c314,), # Hangul Syllable Ssae + (0x0c330, 0x0c330,), # Hangul Syllable Ssya + (0x0c34c, 0x0c34c,), # Hangul Syllable Ssyae + (0x0c368, 0x0c368,), # Hangul Syllable Sseo + (0x0c384, 0x0c384,), # Hangul Syllable Sse + (0x0c3a0, 0x0c3a0,), # Hangul Syllable Ssyeo + (0x0c3bc, 0x0c3bc,), # Hangul Syllable Ssye + (0x0c3d8, 0x0c3d8,), # Hangul Syllable Sso + (0x0c3f4, 0x0c3f4,), # Hangul Syllable Sswa + (0x0c410, 0x0c410,), # Hangul Syllable Sswae + (0x0c42c, 0x0c42c,), # Hangul Syllable Ssoe + (0x0c448, 0x0c448,), # Hangul Syllable Ssyo + (0x0c464, 0x0c464,), # Hangul Syllable Ssu + (0x0c480, 0x0c480,), # Hangul Syllable Ssweo + (0x0c49c, 0x0c49c,), # Hangul Syllable Sswe + (0x0c4b8, 0x0c4b8,), # Hangul Syllable Sswi + (0x0c4d4, 0x0c4d4,), # Hangul Syllable Ssyu + (0x0c4f0, 0x0c4f0,), # Hangul Syllable Sseu + (0x0c50c, 0x0c50c,), # Hangul Syllable Ssyi + (0x0c528, 0x0c528,), # Hangul Syllable Ssi + (0x0c544, 0x0c544,), # Hangul Syllable A + (0x0c560, 0x0c560,), # Hangul Syllable Ae + (0x0c57c, 0x0c57c,), # Hangul Syllable Ya + (0x0c598, 0x0c598,), # Hangul Syllable Yae + (0x0c5b4, 0x0c5b4,), # Hangul Syllable Eo + (0x0c5d0, 0x0c5d0,), # Hangul Syllable E + (0x0c5ec, 0x0c5ec,), # Hangul Syllable Yeo + (0x0c608, 0x0c608,), # Hangul Syllable Ye + (0x0c624, 0x0c624,), # Hangul Syllable O + (0x0c640, 0x0c640,), # Hangul Syllable Wa + (0x0c65c, 0x0c65c,), # Hangul Syllable Wae + (0x0c678, 0x0c678,), # Hangul Syllable Oe + (0x0c694, 0x0c694,), # Hangul Syllable Yo + (0x0c6b0, 0x0c6b0,), # Hangul Syllable U + (0x0c6cc, 0x0c6cc,), # Hangul Syllable Weo + (0x0c6e8, 0x0c6e8,), # Hangul Syllable We + (0x0c704, 0x0c704,), # Hangul Syllable Wi + (0x0c720, 0x0c720,), # Hangul Syllable Yu + (0x0c73c, 0x0c73c,), # Hangul Syllable Eu + (0x0c758, 0x0c758,), # Hangul Syllable Yi + (0x0c774, 0x0c774,), # Hangul Syllable I + (0x0c790, 0x0c790,), # Hangul Syllable Ja + (0x0c7ac, 0x0c7ac,), # Hangul Syllable Jae + (0x0c7c8, 0x0c7c8,), # Hangul Syllable Jya + (0x0c7e4, 0x0c7e4,), # Hangul Syllable Jyae + (0x0c800, 0x0c800,), # Hangul Syllable Jeo + (0x0c81c, 0x0c81c,), # Hangul Syllable Je + (0x0c838, 0x0c838,), # Hangul Syllable Jyeo + (0x0c854, 0x0c854,), # Hangul Syllable Jye + (0x0c870, 0x0c870,), # Hangul Syllable Jo + (0x0c88c, 0x0c88c,), # Hangul Syllable Jwa + (0x0c8a8, 0x0c8a8,), # Hangul Syllable Jwae + (0x0c8c4, 0x0c8c4,), # Hangul Syllable Joe + (0x0c8e0, 0x0c8e0,), # Hangul Syllable Jyo + (0x0c8fc, 0x0c8fc,), # Hangul Syllable Ju + (0x0c918, 0x0c918,), # Hangul Syllable Jweo + (0x0c934, 0x0c934,), # Hangul Syllable Jwe + (0x0c950, 0x0c950,), # Hangul Syllable Jwi + (0x0c96c, 0x0c96c,), # Hangul Syllable Jyu + (0x0c988, 0x0c988,), # Hangul Syllable Jeu + (0x0c9a4, 0x0c9a4,), # Hangul Syllable Jyi + (0x0c9c0, 0x0c9c0,), # Hangul Syllable Ji + (0x0c9dc, 0x0c9dc,), # Hangul Syllable Jja + (0x0c9f8, 0x0c9f8,), # Hangul Syllable Jjae + (0x0ca14, 0x0ca14,), # Hangul Syllable Jjya + (0x0ca30, 0x0ca30,), # Hangul Syllable Jjyae + (0x0ca4c, 0x0ca4c,), # Hangul Syllable Jjeo + (0x0ca68, 0x0ca68,), # Hangul Syllable Jje + (0x0ca84, 0x0ca84,), # Hangul Syllable Jjyeo + (0x0caa0, 0x0caa0,), # Hangul Syllable Jjye + (0x0cabc, 0x0cabc,), # Hangul Syllable Jjo + (0x0cad8, 0x0cad8,), # Hangul Syllable Jjwa + (0x0caf4, 0x0caf4,), # Hangul Syllable Jjwae + (0x0cb10, 0x0cb10,), # Hangul Syllable Jjoe + (0x0cb2c, 0x0cb2c,), # Hangul Syllable Jjyo + (0x0cb48, 0x0cb48,), # Hangul Syllable Jju + (0x0cb64, 0x0cb64,), # Hangul Syllable Jjweo + (0x0cb80, 0x0cb80,), # Hangul Syllable Jjwe + (0x0cb9c, 0x0cb9c,), # Hangul Syllable Jjwi + (0x0cbb8, 0x0cbb8,), # Hangul Syllable Jjyu + (0x0cbd4, 0x0cbd4,), # Hangul Syllable Jjeu + (0x0cbf0, 0x0cbf0,), # Hangul Syllable Jjyi + (0x0cc0c, 0x0cc0c,), # Hangul Syllable Jji + (0x0cc28, 0x0cc28,), # Hangul Syllable Ca + (0x0cc44, 0x0cc44,), # Hangul Syllable Cae + (0x0cc60, 0x0cc60,), # Hangul Syllable Cya + (0x0cc7c, 0x0cc7c,), # Hangul Syllable Cyae + (0x0cc98, 0x0cc98,), # Hangul Syllable Ceo + (0x0ccb4, 0x0ccb4,), # Hangul Syllable Ce + (0x0ccd0, 0x0ccd0,), # Hangul Syllable Cyeo + (0x0ccec, 0x0ccec,), # Hangul Syllable Cye + (0x0cd08, 0x0cd08,), # Hangul Syllable Co + (0x0cd24, 0x0cd24,), # Hangul Syllable Cwa + (0x0cd40, 0x0cd40,), # Hangul Syllable Cwae + (0x0cd5c, 0x0cd5c,), # Hangul Syllable Coe + (0x0cd78, 0x0cd78,), # Hangul Syllable Cyo + (0x0cd94, 0x0cd94,), # Hangul Syllable Cu + (0x0cdb0, 0x0cdb0,), # Hangul Syllable Cweo + (0x0cdcc, 0x0cdcc,), # Hangul Syllable Cwe + (0x0cde8, 0x0cde8,), # Hangul Syllable Cwi + (0x0ce04, 0x0ce04,), # Hangul Syllable Cyu + (0x0ce20, 0x0ce20,), # Hangul Syllable Ceu + (0x0ce3c, 0x0ce3c,), # Hangul Syllable Cyi + (0x0ce58, 0x0ce58,), # Hangul Syllable Ci + (0x0ce74, 0x0ce74,), # Hangul Syllable Ka + (0x0ce90, 0x0ce90,), # Hangul Syllable Kae + (0x0ceac, 0x0ceac,), # Hangul Syllable Kya + (0x0cec8, 0x0cec8,), # Hangul Syllable Kyae + (0x0cee4, 0x0cee4,), # Hangul Syllable Keo + (0x0cf00, 0x0cf00,), # Hangul Syllable Ke + (0x0cf1c, 0x0cf1c,), # Hangul Syllable Kyeo + (0x0cf38, 0x0cf38,), # Hangul Syllable Kye + (0x0cf54, 0x0cf54,), # Hangul Syllable Ko + (0x0cf70, 0x0cf70,), # Hangul Syllable Kwa + (0x0cf8c, 0x0cf8c,), # Hangul Syllable Kwae + (0x0cfa8, 0x0cfa8,), # Hangul Syllable Koe + (0x0cfc4, 0x0cfc4,), # Hangul Syllable Kyo + (0x0cfe0, 0x0cfe0,), # Hangul Syllable Ku + (0x0cffc, 0x0cffc,), # Hangul Syllable Kweo + (0x0d018, 0x0d018,), # Hangul Syllable Kwe + (0x0d034, 0x0d034,), # Hangul Syllable Kwi + (0x0d050, 0x0d050,), # Hangul Syllable Kyu + (0x0d06c, 0x0d06c,), # Hangul Syllable Keu + (0x0d088, 0x0d088,), # Hangul Syllable Kyi + (0x0d0a4, 0x0d0a4,), # Hangul Syllable Ki + (0x0d0c0, 0x0d0c0,), # Hangul Syllable Ta + (0x0d0dc, 0x0d0dc,), # Hangul Syllable Tae + (0x0d0f8, 0x0d0f8,), # Hangul Syllable Tya + (0x0d114, 0x0d114,), # Hangul Syllable Tyae + (0x0d130, 0x0d130,), # Hangul Syllable Teo + (0x0d14c, 0x0d14c,), # Hangul Syllable Te + (0x0d168, 0x0d168,), # Hangul Syllable Tyeo + (0x0d184, 0x0d184,), # Hangul Syllable Tye + (0x0d1a0, 0x0d1a0,), # Hangul Syllable To + (0x0d1bc, 0x0d1bc,), # Hangul Syllable Twa + (0x0d1d8, 0x0d1d8,), # Hangul Syllable Twae + (0x0d1f4, 0x0d1f4,), # Hangul Syllable Toe + (0x0d210, 0x0d210,), # Hangul Syllable Tyo + (0x0d22c, 0x0d22c,), # Hangul Syllable Tu + (0x0d248, 0x0d248,), # Hangul Syllable Tweo + (0x0d264, 0x0d264,), # Hangul Syllable Twe + (0x0d280, 0x0d280,), # Hangul Syllable Twi + (0x0d29c, 0x0d29c,), # Hangul Syllable Tyu + (0x0d2b8, 0x0d2b8,), # Hangul Syllable Teu + (0x0d2d4, 0x0d2d4,), # Hangul Syllable Tyi + (0x0d2f0, 0x0d2f0,), # Hangul Syllable Ti + (0x0d30c, 0x0d30c,), # Hangul Syllable Pa + (0x0d328, 0x0d328,), # Hangul Syllable Pae + (0x0d344, 0x0d344,), # Hangul Syllable Pya + (0x0d360, 0x0d360,), # Hangul Syllable Pyae + (0x0d37c, 0x0d37c,), # Hangul Syllable Peo + (0x0d398, 0x0d398,), # Hangul Syllable Pe + (0x0d3b4, 0x0d3b4,), # Hangul Syllable Pyeo + (0x0d3d0, 0x0d3d0,), # Hangul Syllable Pye + (0x0d3ec, 0x0d3ec,), # Hangul Syllable Po + (0x0d408, 0x0d408,), # Hangul Syllable Pwa + (0x0d424, 0x0d424,), # Hangul Syllable Pwae + (0x0d440, 0x0d440,), # Hangul Syllable Poe + (0x0d45c, 0x0d45c,), # Hangul Syllable Pyo + (0x0d478, 0x0d478,), # Hangul Syllable Pu + (0x0d494, 0x0d494,), # Hangul Syllable Pweo + (0x0d4b0, 0x0d4b0,), # Hangul Syllable Pwe + (0x0d4cc, 0x0d4cc,), # Hangul Syllable Pwi + (0x0d4e8, 0x0d4e8,), # Hangul Syllable Pyu + (0x0d504, 0x0d504,), # Hangul Syllable Peu + (0x0d520, 0x0d520,), # Hangul Syllable Pyi + (0x0d53c, 0x0d53c,), # Hangul Syllable Pi + (0x0d558, 0x0d558,), # Hangul Syllable Ha + (0x0d574, 0x0d574,), # Hangul Syllable Hae + (0x0d590, 0x0d590,), # Hangul Syllable Hya + (0x0d5ac, 0x0d5ac,), # Hangul Syllable Hyae + (0x0d5c8, 0x0d5c8,), # Hangul Syllable Heo + (0x0d5e4, 0x0d5e4,), # Hangul Syllable He + (0x0d600, 0x0d600,), # Hangul Syllable Hyeo + (0x0d61c, 0x0d61c,), # Hangul Syllable Hye + (0x0d638, 0x0d638,), # Hangul Syllable Ho + (0x0d654, 0x0d654,), # Hangul Syllable Hwa + (0x0d670, 0x0d670,), # Hangul Syllable Hwae + (0x0d68c, 0x0d68c,), # Hangul Syllable Hoe + (0x0d6a8, 0x0d6a8,), # Hangul Syllable Hyo + (0x0d6c4, 0x0d6c4,), # Hangul Syllable Hu + (0x0d6e0, 0x0d6e0,), # Hangul Syllable Hweo + (0x0d6fc, 0x0d6fc,), # Hangul Syllable Hwe + (0x0d718, 0x0d718,), # Hangul Syllable Hwi + (0x0d734, 0x0d734,), # Hangul Syllable Hyu + (0x0d750, 0x0d750,), # Hangul Syllable Heu + (0x0d76c, 0x0d76c,), # Hangul Syllable Hyi + (0x0d788, 0x0d788,), # Hangul Syllable Hi +) + +GRAPHEME_LVT = ( + # Source: GraphemeBreakProperty-17.0.0.txt + # Date: 2025-06-30, 06:20:23 GMT + # + (0x0ac01, 0x0ac1b,), # Hangul Syllable Gag ..Hangul Syllable Gah + (0x0ac1d, 0x0ac37,), # Hangul Syllable Gaeg ..Hangul Syllable Gaeh + (0x0ac39, 0x0ac53,), # Hangul Syllable Gyag ..Hangul Syllable Gyah + (0x0ac55, 0x0ac6f,), # Hangul Syllable Gyaeg ..Hangul Syllable Gyaeh + (0x0ac71, 0x0ac8b,), # Hangul Syllable Geog ..Hangul Syllable Geoh + (0x0ac8d, 0x0aca7,), # Hangul Syllable Geg ..Hangul Syllable Geh + (0x0aca9, 0x0acc3,), # Hangul Syllable Gyeog ..Hangul Syllable Gyeoh + (0x0acc5, 0x0acdf,), # Hangul Syllable Gyeg ..Hangul Syllable Gyeh + (0x0ace1, 0x0acfb,), # Hangul Syllable Gog ..Hangul Syllable Goh + (0x0acfd, 0x0ad17,), # Hangul Syllable Gwag ..Hangul Syllable Gwah + (0x0ad19, 0x0ad33,), # Hangul Syllable Gwaeg ..Hangul Syllable Gwaeh + (0x0ad35, 0x0ad4f,), # Hangul Syllable Goeg ..Hangul Syllable Goeh + (0x0ad51, 0x0ad6b,), # Hangul Syllable Gyog ..Hangul Syllable Gyoh + (0x0ad6d, 0x0ad87,), # Hangul Syllable Gug ..Hangul Syllable Guh + (0x0ad89, 0x0ada3,), # Hangul Syllable Gweog ..Hangul Syllable Gweoh + (0x0ada5, 0x0adbf,), # Hangul Syllable Gweg ..Hangul Syllable Gweh + (0x0adc1, 0x0addb,), # Hangul Syllable Gwig ..Hangul Syllable Gwih + (0x0addd, 0x0adf7,), # Hangul Syllable Gyug ..Hangul Syllable Gyuh + (0x0adf9, 0x0ae13,), # Hangul Syllable Geug ..Hangul Syllable Geuh + (0x0ae15, 0x0ae2f,), # Hangul Syllable Gyig ..Hangul Syllable Gyih + (0x0ae31, 0x0ae4b,), # Hangul Syllable Gig ..Hangul Syllable Gih + (0x0ae4d, 0x0ae67,), # Hangul Syllable Ggag ..Hangul Syllable Ggah + (0x0ae69, 0x0ae83,), # Hangul Syllable Ggaeg ..Hangul Syllable Ggaeh + (0x0ae85, 0x0ae9f,), # Hangul Syllable Ggyag ..Hangul Syllable Ggyah + (0x0aea1, 0x0aebb,), # Hangul Syllable Ggyaeg ..Hangul Syllable Ggyaeh + (0x0aebd, 0x0aed7,), # Hangul Syllable Ggeog ..Hangul Syllable Ggeoh + (0x0aed9, 0x0aef3,), # Hangul Syllable Ggeg ..Hangul Syllable Ggeh + (0x0aef5, 0x0af0f,), # Hangul Syllable Ggyeog ..Hangul Syllable Ggyeoh + (0x0af11, 0x0af2b,), # Hangul Syllable Ggyeg ..Hangul Syllable Ggyeh + (0x0af2d, 0x0af47,), # Hangul Syllable Ggog ..Hangul Syllable Ggoh + (0x0af49, 0x0af63,), # Hangul Syllable Ggwag ..Hangul Syllable Ggwah + (0x0af65, 0x0af7f,), # Hangul Syllable Ggwaeg ..Hangul Syllable Ggwaeh + (0x0af81, 0x0af9b,), # Hangul Syllable Ggoeg ..Hangul Syllable Ggoeh + (0x0af9d, 0x0afb7,), # Hangul Syllable Ggyog ..Hangul Syllable Ggyoh + (0x0afb9, 0x0afd3,), # Hangul Syllable Ggug ..Hangul Syllable Gguh + (0x0afd5, 0x0afef,), # Hangul Syllable Ggweog ..Hangul Syllable Ggweoh + (0x0aff1, 0x0b00b,), # Hangul Syllable Ggweg ..Hangul Syllable Ggweh + (0x0b00d, 0x0b027,), # Hangul Syllable Ggwig ..Hangul Syllable Ggwih + (0x0b029, 0x0b043,), # Hangul Syllable Ggyug ..Hangul Syllable Ggyuh + (0x0b045, 0x0b05f,), # Hangul Syllable Ggeug ..Hangul Syllable Ggeuh + (0x0b061, 0x0b07b,), # Hangul Syllable Ggyig ..Hangul Syllable Ggyih + (0x0b07d, 0x0b097,), # Hangul Syllable Ggig ..Hangul Syllable Ggih + (0x0b099, 0x0b0b3,), # Hangul Syllable Nag ..Hangul Syllable Nah + (0x0b0b5, 0x0b0cf,), # Hangul Syllable Naeg ..Hangul Syllable Naeh + (0x0b0d1, 0x0b0eb,), # Hangul Syllable Nyag ..Hangul Syllable Nyah + (0x0b0ed, 0x0b107,), # Hangul Syllable Nyaeg ..Hangul Syllable Nyaeh + (0x0b109, 0x0b123,), # Hangul Syllable Neog ..Hangul Syllable Neoh + (0x0b125, 0x0b13f,), # Hangul Syllable Neg ..Hangul Syllable Neh + (0x0b141, 0x0b15b,), # Hangul Syllable Nyeog ..Hangul Syllable Nyeoh + (0x0b15d, 0x0b177,), # Hangul Syllable Nyeg ..Hangul Syllable Nyeh + (0x0b179, 0x0b193,), # Hangul Syllable Nog ..Hangul Syllable Noh + (0x0b195, 0x0b1af,), # Hangul Syllable Nwag ..Hangul Syllable Nwah + (0x0b1b1, 0x0b1cb,), # Hangul Syllable Nwaeg ..Hangul Syllable Nwaeh + (0x0b1cd, 0x0b1e7,), # Hangul Syllable Noeg ..Hangul Syllable Noeh + (0x0b1e9, 0x0b203,), # Hangul Syllable Nyog ..Hangul Syllable Nyoh + (0x0b205, 0x0b21f,), # Hangul Syllable Nug ..Hangul Syllable Nuh + (0x0b221, 0x0b23b,), # Hangul Syllable Nweog ..Hangul Syllable Nweoh + (0x0b23d, 0x0b257,), # Hangul Syllable Nweg ..Hangul Syllable Nweh + (0x0b259, 0x0b273,), # Hangul Syllable Nwig ..Hangul Syllable Nwih + (0x0b275, 0x0b28f,), # Hangul Syllable Nyug ..Hangul Syllable Nyuh + (0x0b291, 0x0b2ab,), # Hangul Syllable Neug ..Hangul Syllable Neuh + (0x0b2ad, 0x0b2c7,), # Hangul Syllable Nyig ..Hangul Syllable Nyih + (0x0b2c9, 0x0b2e3,), # Hangul Syllable Nig ..Hangul Syllable Nih + (0x0b2e5, 0x0b2ff,), # Hangul Syllable Dag ..Hangul Syllable Dah + (0x0b301, 0x0b31b,), # Hangul Syllable Daeg ..Hangul Syllable Daeh + (0x0b31d, 0x0b337,), # Hangul Syllable Dyag ..Hangul Syllable Dyah + (0x0b339, 0x0b353,), # Hangul Syllable Dyaeg ..Hangul Syllable Dyaeh + (0x0b355, 0x0b36f,), # Hangul Syllable Deog ..Hangul Syllable Deoh + (0x0b371, 0x0b38b,), # Hangul Syllable Deg ..Hangul Syllable Deh + (0x0b38d, 0x0b3a7,), # Hangul Syllable Dyeog ..Hangul Syllable Dyeoh + (0x0b3a9, 0x0b3c3,), # Hangul Syllable Dyeg ..Hangul Syllable Dyeh + (0x0b3c5, 0x0b3df,), # Hangul Syllable Dog ..Hangul Syllable Doh + (0x0b3e1, 0x0b3fb,), # Hangul Syllable Dwag ..Hangul Syllable Dwah + (0x0b3fd, 0x0b417,), # Hangul Syllable Dwaeg ..Hangul Syllable Dwaeh + (0x0b419, 0x0b433,), # Hangul Syllable Doeg ..Hangul Syllable Doeh + (0x0b435, 0x0b44f,), # Hangul Syllable Dyog ..Hangul Syllable Dyoh + (0x0b451, 0x0b46b,), # Hangul Syllable Dug ..Hangul Syllable Duh + (0x0b46d, 0x0b487,), # Hangul Syllable Dweog ..Hangul Syllable Dweoh + (0x0b489, 0x0b4a3,), # Hangul Syllable Dweg ..Hangul Syllable Dweh + (0x0b4a5, 0x0b4bf,), # Hangul Syllable Dwig ..Hangul Syllable Dwih + (0x0b4c1, 0x0b4db,), # Hangul Syllable Dyug ..Hangul Syllable Dyuh + (0x0b4dd, 0x0b4f7,), # Hangul Syllable Deug ..Hangul Syllable Deuh + (0x0b4f9, 0x0b513,), # Hangul Syllable Dyig ..Hangul Syllable Dyih + (0x0b515, 0x0b52f,), # Hangul Syllable Dig ..Hangul Syllable Dih + (0x0b531, 0x0b54b,), # Hangul Syllable Ddag ..Hangul Syllable Ddah + (0x0b54d, 0x0b567,), # Hangul Syllable Ddaeg ..Hangul Syllable Ddaeh + (0x0b569, 0x0b583,), # Hangul Syllable Ddyag ..Hangul Syllable Ddyah + (0x0b585, 0x0b59f,), # Hangul Syllable Ddyaeg ..Hangul Syllable Ddyaeh + (0x0b5a1, 0x0b5bb,), # Hangul Syllable Ddeog ..Hangul Syllable Ddeoh + (0x0b5bd, 0x0b5d7,), # Hangul Syllable Ddeg ..Hangul Syllable Ddeh + (0x0b5d9, 0x0b5f3,), # Hangul Syllable Ddyeog ..Hangul Syllable Ddyeoh + (0x0b5f5, 0x0b60f,), # Hangul Syllable Ddyeg ..Hangul Syllable Ddyeh + (0x0b611, 0x0b62b,), # Hangul Syllable Ddog ..Hangul Syllable Ddoh + (0x0b62d, 0x0b647,), # Hangul Syllable Ddwag ..Hangul Syllable Ddwah + (0x0b649, 0x0b663,), # Hangul Syllable Ddwaeg ..Hangul Syllable Ddwaeh + (0x0b665, 0x0b67f,), # Hangul Syllable Ddoeg ..Hangul Syllable Ddoeh + (0x0b681, 0x0b69b,), # Hangul Syllable Ddyog ..Hangul Syllable Ddyoh + (0x0b69d, 0x0b6b7,), # Hangul Syllable Ddug ..Hangul Syllable Dduh + (0x0b6b9, 0x0b6d3,), # Hangul Syllable Ddweog ..Hangul Syllable Ddweoh + (0x0b6d5, 0x0b6ef,), # Hangul Syllable Ddweg ..Hangul Syllable Ddweh + (0x0b6f1, 0x0b70b,), # Hangul Syllable Ddwig ..Hangul Syllable Ddwih + (0x0b70d, 0x0b727,), # Hangul Syllable Ddyug ..Hangul Syllable Ddyuh + (0x0b729, 0x0b743,), # Hangul Syllable Ddeug ..Hangul Syllable Ddeuh + (0x0b745, 0x0b75f,), # Hangul Syllable Ddyig ..Hangul Syllable Ddyih + (0x0b761, 0x0b77b,), # Hangul Syllable Ddig ..Hangul Syllable Ddih + (0x0b77d, 0x0b797,), # Hangul Syllable Rag ..Hangul Syllable Rah + (0x0b799, 0x0b7b3,), # Hangul Syllable Raeg ..Hangul Syllable Raeh + (0x0b7b5, 0x0b7cf,), # Hangul Syllable Ryag ..Hangul Syllable Ryah + (0x0b7d1, 0x0b7eb,), # Hangul Syllable Ryaeg ..Hangul Syllable Ryaeh + (0x0b7ed, 0x0b807,), # Hangul Syllable Reog ..Hangul Syllable Reoh + (0x0b809, 0x0b823,), # Hangul Syllable Reg ..Hangul Syllable Reh + (0x0b825, 0x0b83f,), # Hangul Syllable Ryeog ..Hangul Syllable Ryeoh + (0x0b841, 0x0b85b,), # Hangul Syllable Ryeg ..Hangul Syllable Ryeh + (0x0b85d, 0x0b877,), # Hangul Syllable Rog ..Hangul Syllable Roh + (0x0b879, 0x0b893,), # Hangul Syllable Rwag ..Hangul Syllable Rwah + (0x0b895, 0x0b8af,), # Hangul Syllable Rwaeg ..Hangul Syllable Rwaeh + (0x0b8b1, 0x0b8cb,), # Hangul Syllable Roeg ..Hangul Syllable Roeh + (0x0b8cd, 0x0b8e7,), # Hangul Syllable Ryog ..Hangul Syllable Ryoh + (0x0b8e9, 0x0b903,), # Hangul Syllable Rug ..Hangul Syllable Ruh + (0x0b905, 0x0b91f,), # Hangul Syllable Rweog ..Hangul Syllable Rweoh + (0x0b921, 0x0b93b,), # Hangul Syllable Rweg ..Hangul Syllable Rweh + (0x0b93d, 0x0b957,), # Hangul Syllable Rwig ..Hangul Syllable Rwih + (0x0b959, 0x0b973,), # Hangul Syllable Ryug ..Hangul Syllable Ryuh + (0x0b975, 0x0b98f,), # Hangul Syllable Reug ..Hangul Syllable Reuh + (0x0b991, 0x0b9ab,), # Hangul Syllable Ryig ..Hangul Syllable Ryih + (0x0b9ad, 0x0b9c7,), # Hangul Syllable Rig ..Hangul Syllable Rih + (0x0b9c9, 0x0b9e3,), # Hangul Syllable Mag ..Hangul Syllable Mah + (0x0b9e5, 0x0b9ff,), # Hangul Syllable Maeg ..Hangul Syllable Maeh + (0x0ba01, 0x0ba1b,), # Hangul Syllable Myag ..Hangul Syllable Myah + (0x0ba1d, 0x0ba37,), # Hangul Syllable Myaeg ..Hangul Syllable Myaeh + (0x0ba39, 0x0ba53,), # Hangul Syllable Meog ..Hangul Syllable Meoh + (0x0ba55, 0x0ba6f,), # Hangul Syllable Meg ..Hangul Syllable Meh + (0x0ba71, 0x0ba8b,), # Hangul Syllable Myeog ..Hangul Syllable Myeoh + (0x0ba8d, 0x0baa7,), # Hangul Syllable Myeg ..Hangul Syllable Myeh + (0x0baa9, 0x0bac3,), # Hangul Syllable Mog ..Hangul Syllable Moh + (0x0bac5, 0x0badf,), # Hangul Syllable Mwag ..Hangul Syllable Mwah + (0x0bae1, 0x0bafb,), # Hangul Syllable Mwaeg ..Hangul Syllable Mwaeh + (0x0bafd, 0x0bb17,), # Hangul Syllable Moeg ..Hangul Syllable Moeh + (0x0bb19, 0x0bb33,), # Hangul Syllable Myog ..Hangul Syllable Myoh + (0x0bb35, 0x0bb4f,), # Hangul Syllable Mug ..Hangul Syllable Muh + (0x0bb51, 0x0bb6b,), # Hangul Syllable Mweog ..Hangul Syllable Mweoh + (0x0bb6d, 0x0bb87,), # Hangul Syllable Mweg ..Hangul Syllable Mweh + (0x0bb89, 0x0bba3,), # Hangul Syllable Mwig ..Hangul Syllable Mwih + (0x0bba5, 0x0bbbf,), # Hangul Syllable Myug ..Hangul Syllable Myuh + (0x0bbc1, 0x0bbdb,), # Hangul Syllable Meug ..Hangul Syllable Meuh + (0x0bbdd, 0x0bbf7,), # Hangul Syllable Myig ..Hangul Syllable Myih + (0x0bbf9, 0x0bc13,), # Hangul Syllable Mig ..Hangul Syllable Mih + (0x0bc15, 0x0bc2f,), # Hangul Syllable Bag ..Hangul Syllable Bah + (0x0bc31, 0x0bc4b,), # Hangul Syllable Baeg ..Hangul Syllable Baeh + (0x0bc4d, 0x0bc67,), # Hangul Syllable Byag ..Hangul Syllable Byah + (0x0bc69, 0x0bc83,), # Hangul Syllable Byaeg ..Hangul Syllable Byaeh + (0x0bc85, 0x0bc9f,), # Hangul Syllable Beog ..Hangul Syllable Beoh + (0x0bca1, 0x0bcbb,), # Hangul Syllable Beg ..Hangul Syllable Beh + (0x0bcbd, 0x0bcd7,), # Hangul Syllable Byeog ..Hangul Syllable Byeoh + (0x0bcd9, 0x0bcf3,), # Hangul Syllable Byeg ..Hangul Syllable Byeh + (0x0bcf5, 0x0bd0f,), # Hangul Syllable Bog ..Hangul Syllable Boh + (0x0bd11, 0x0bd2b,), # Hangul Syllable Bwag ..Hangul Syllable Bwah + (0x0bd2d, 0x0bd47,), # Hangul Syllable Bwaeg ..Hangul Syllable Bwaeh + (0x0bd49, 0x0bd63,), # Hangul Syllable Boeg ..Hangul Syllable Boeh + (0x0bd65, 0x0bd7f,), # Hangul Syllable Byog ..Hangul Syllable Byoh + (0x0bd81, 0x0bd9b,), # Hangul Syllable Bug ..Hangul Syllable Buh + (0x0bd9d, 0x0bdb7,), # Hangul Syllable Bweog ..Hangul Syllable Bweoh + (0x0bdb9, 0x0bdd3,), # Hangul Syllable Bweg ..Hangul Syllable Bweh + (0x0bdd5, 0x0bdef,), # Hangul Syllable Bwig ..Hangul Syllable Bwih + (0x0bdf1, 0x0be0b,), # Hangul Syllable Byug ..Hangul Syllable Byuh + (0x0be0d, 0x0be27,), # Hangul Syllable Beug ..Hangul Syllable Beuh + (0x0be29, 0x0be43,), # Hangul Syllable Byig ..Hangul Syllable Byih + (0x0be45, 0x0be5f,), # Hangul Syllable Big ..Hangul Syllable Bih + (0x0be61, 0x0be7b,), # Hangul Syllable Bbag ..Hangul Syllable Bbah + (0x0be7d, 0x0be97,), # Hangul Syllable Bbaeg ..Hangul Syllable Bbaeh + (0x0be99, 0x0beb3,), # Hangul Syllable Bbyag ..Hangul Syllable Bbyah + (0x0beb5, 0x0becf,), # Hangul Syllable Bbyaeg ..Hangul Syllable Bbyaeh + (0x0bed1, 0x0beeb,), # Hangul Syllable Bbeog ..Hangul Syllable Bbeoh + (0x0beed, 0x0bf07,), # Hangul Syllable Bbeg ..Hangul Syllable Bbeh + (0x0bf09, 0x0bf23,), # Hangul Syllable Bbyeog ..Hangul Syllable Bbyeoh + (0x0bf25, 0x0bf3f,), # Hangul Syllable Bbyeg ..Hangul Syllable Bbyeh + (0x0bf41, 0x0bf5b,), # Hangul Syllable Bbog ..Hangul Syllable Bboh + (0x0bf5d, 0x0bf77,), # Hangul Syllable Bbwag ..Hangul Syllable Bbwah + (0x0bf79, 0x0bf93,), # Hangul Syllable Bbwaeg ..Hangul Syllable Bbwaeh + (0x0bf95, 0x0bfaf,), # Hangul Syllable Bboeg ..Hangul Syllable Bboeh + (0x0bfb1, 0x0bfcb,), # Hangul Syllable Bbyog ..Hangul Syllable Bbyoh + (0x0bfcd, 0x0bfe7,), # Hangul Syllable Bbug ..Hangul Syllable Bbuh + (0x0bfe9, 0x0c003,), # Hangul Syllable Bbweog ..Hangul Syllable Bbweoh + (0x0c005, 0x0c01f,), # Hangul Syllable Bbweg ..Hangul Syllable Bbweh + (0x0c021, 0x0c03b,), # Hangul Syllable Bbwig ..Hangul Syllable Bbwih + (0x0c03d, 0x0c057,), # Hangul Syllable Bbyug ..Hangul Syllable Bbyuh + (0x0c059, 0x0c073,), # Hangul Syllable Bbeug ..Hangul Syllable Bbeuh + (0x0c075, 0x0c08f,), # Hangul Syllable Bbyig ..Hangul Syllable Bbyih + (0x0c091, 0x0c0ab,), # Hangul Syllable Bbig ..Hangul Syllable Bbih + (0x0c0ad, 0x0c0c7,), # Hangul Syllable Sag ..Hangul Syllable Sah + (0x0c0c9, 0x0c0e3,), # Hangul Syllable Saeg ..Hangul Syllable Saeh + (0x0c0e5, 0x0c0ff,), # Hangul Syllable Syag ..Hangul Syllable Syah + (0x0c101, 0x0c11b,), # Hangul Syllable Syaeg ..Hangul Syllable Syaeh + (0x0c11d, 0x0c137,), # Hangul Syllable Seog ..Hangul Syllable Seoh + (0x0c139, 0x0c153,), # Hangul Syllable Seg ..Hangul Syllable Seh + (0x0c155, 0x0c16f,), # Hangul Syllable Syeog ..Hangul Syllable Syeoh + (0x0c171, 0x0c18b,), # Hangul Syllable Syeg ..Hangul Syllable Syeh + (0x0c18d, 0x0c1a7,), # Hangul Syllable Sog ..Hangul Syllable Soh + (0x0c1a9, 0x0c1c3,), # Hangul Syllable Swag ..Hangul Syllable Swah + (0x0c1c5, 0x0c1df,), # Hangul Syllable Swaeg ..Hangul Syllable Swaeh + (0x0c1e1, 0x0c1fb,), # Hangul Syllable Soeg ..Hangul Syllable Soeh + (0x0c1fd, 0x0c217,), # Hangul Syllable Syog ..Hangul Syllable Syoh + (0x0c219, 0x0c233,), # Hangul Syllable Sug ..Hangul Syllable Suh + (0x0c235, 0x0c24f,), # Hangul Syllable Sweog ..Hangul Syllable Sweoh + (0x0c251, 0x0c26b,), # Hangul Syllable Sweg ..Hangul Syllable Sweh + (0x0c26d, 0x0c287,), # Hangul Syllable Swig ..Hangul Syllable Swih + (0x0c289, 0x0c2a3,), # Hangul Syllable Syug ..Hangul Syllable Syuh + (0x0c2a5, 0x0c2bf,), # Hangul Syllable Seug ..Hangul Syllable Seuh + (0x0c2c1, 0x0c2db,), # Hangul Syllable Syig ..Hangul Syllable Syih + (0x0c2dd, 0x0c2f7,), # Hangul Syllable Sig ..Hangul Syllable Sih + (0x0c2f9, 0x0c313,), # Hangul Syllable Ssag ..Hangul Syllable Ssah + (0x0c315, 0x0c32f,), # Hangul Syllable Ssaeg ..Hangul Syllable Ssaeh + (0x0c331, 0x0c34b,), # Hangul Syllable Ssyag ..Hangul Syllable Ssyah + (0x0c34d, 0x0c367,), # Hangul Syllable Ssyaeg ..Hangul Syllable Ssyaeh + (0x0c369, 0x0c383,), # Hangul Syllable Sseog ..Hangul Syllable Sseoh + (0x0c385, 0x0c39f,), # Hangul Syllable Sseg ..Hangul Syllable Sseh + (0x0c3a1, 0x0c3bb,), # Hangul Syllable Ssyeog ..Hangul Syllable Ssyeoh + (0x0c3bd, 0x0c3d7,), # Hangul Syllable Ssyeg ..Hangul Syllable Ssyeh + (0x0c3d9, 0x0c3f3,), # Hangul Syllable Ssog ..Hangul Syllable Ssoh + (0x0c3f5, 0x0c40f,), # Hangul Syllable Sswag ..Hangul Syllable Sswah + (0x0c411, 0x0c42b,), # Hangul Syllable Sswaeg ..Hangul Syllable Sswaeh + (0x0c42d, 0x0c447,), # Hangul Syllable Ssoeg ..Hangul Syllable Ssoeh + (0x0c449, 0x0c463,), # Hangul Syllable Ssyog ..Hangul Syllable Ssyoh + (0x0c465, 0x0c47f,), # Hangul Syllable Ssug ..Hangul Syllable Ssuh + (0x0c481, 0x0c49b,), # Hangul Syllable Ssweog ..Hangul Syllable Ssweoh + (0x0c49d, 0x0c4b7,), # Hangul Syllable Ssweg ..Hangul Syllable Ssweh + (0x0c4b9, 0x0c4d3,), # Hangul Syllable Sswig ..Hangul Syllable Sswih + (0x0c4d5, 0x0c4ef,), # Hangul Syllable Ssyug ..Hangul Syllable Ssyuh + (0x0c4f1, 0x0c50b,), # Hangul Syllable Sseug ..Hangul Syllable Sseuh + (0x0c50d, 0x0c527,), # Hangul Syllable Ssyig ..Hangul Syllable Ssyih + (0x0c529, 0x0c543,), # Hangul Syllable Ssig ..Hangul Syllable Ssih + (0x0c545, 0x0c55f,), # Hangul Syllable Ag ..Hangul Syllable Ah + (0x0c561, 0x0c57b,), # Hangul Syllable Aeg ..Hangul Syllable Aeh + (0x0c57d, 0x0c597,), # Hangul Syllable Yag ..Hangul Syllable Yah + (0x0c599, 0x0c5b3,), # Hangul Syllable Yaeg ..Hangul Syllable Yaeh + (0x0c5b5, 0x0c5cf,), # Hangul Syllable Eog ..Hangul Syllable Eoh + (0x0c5d1, 0x0c5eb,), # Hangul Syllable Eg ..Hangul Syllable Eh + (0x0c5ed, 0x0c607,), # Hangul Syllable Yeog ..Hangul Syllable Yeoh + (0x0c609, 0x0c623,), # Hangul Syllable Yeg ..Hangul Syllable Yeh + (0x0c625, 0x0c63f,), # Hangul Syllable Og ..Hangul Syllable Oh + (0x0c641, 0x0c65b,), # Hangul Syllable Wag ..Hangul Syllable Wah + (0x0c65d, 0x0c677,), # Hangul Syllable Waeg ..Hangul Syllable Waeh + (0x0c679, 0x0c693,), # Hangul Syllable Oeg ..Hangul Syllable Oeh + (0x0c695, 0x0c6af,), # Hangul Syllable Yog ..Hangul Syllable Yoh + (0x0c6b1, 0x0c6cb,), # Hangul Syllable Ug ..Hangul Syllable Uh + (0x0c6cd, 0x0c6e7,), # Hangul Syllable Weog ..Hangul Syllable Weoh + (0x0c6e9, 0x0c703,), # Hangul Syllable Weg ..Hangul Syllable Weh + (0x0c705, 0x0c71f,), # Hangul Syllable Wig ..Hangul Syllable Wih + (0x0c721, 0x0c73b,), # Hangul Syllable Yug ..Hangul Syllable Yuh + (0x0c73d, 0x0c757,), # Hangul Syllable Eug ..Hangul Syllable Euh + (0x0c759, 0x0c773,), # Hangul Syllable Yig ..Hangul Syllable Yih + (0x0c775, 0x0c78f,), # Hangul Syllable Ig ..Hangul Syllable Ih + (0x0c791, 0x0c7ab,), # Hangul Syllable Jag ..Hangul Syllable Jah + (0x0c7ad, 0x0c7c7,), # Hangul Syllable Jaeg ..Hangul Syllable Jaeh + (0x0c7c9, 0x0c7e3,), # Hangul Syllable Jyag ..Hangul Syllable Jyah + (0x0c7e5, 0x0c7ff,), # Hangul Syllable Jyaeg ..Hangul Syllable Jyaeh + (0x0c801, 0x0c81b,), # Hangul Syllable Jeog ..Hangul Syllable Jeoh + (0x0c81d, 0x0c837,), # Hangul Syllable Jeg ..Hangul Syllable Jeh + (0x0c839, 0x0c853,), # Hangul Syllable Jyeog ..Hangul Syllable Jyeoh + (0x0c855, 0x0c86f,), # Hangul Syllable Jyeg ..Hangul Syllable Jyeh + (0x0c871, 0x0c88b,), # Hangul Syllable Jog ..Hangul Syllable Joh + (0x0c88d, 0x0c8a7,), # Hangul Syllable Jwag ..Hangul Syllable Jwah + (0x0c8a9, 0x0c8c3,), # Hangul Syllable Jwaeg ..Hangul Syllable Jwaeh + (0x0c8c5, 0x0c8df,), # Hangul Syllable Joeg ..Hangul Syllable Joeh + (0x0c8e1, 0x0c8fb,), # Hangul Syllable Jyog ..Hangul Syllable Jyoh + (0x0c8fd, 0x0c917,), # Hangul Syllable Jug ..Hangul Syllable Juh + (0x0c919, 0x0c933,), # Hangul Syllable Jweog ..Hangul Syllable Jweoh + (0x0c935, 0x0c94f,), # Hangul Syllable Jweg ..Hangul Syllable Jweh + (0x0c951, 0x0c96b,), # Hangul Syllable Jwig ..Hangul Syllable Jwih + (0x0c96d, 0x0c987,), # Hangul Syllable Jyug ..Hangul Syllable Jyuh + (0x0c989, 0x0c9a3,), # Hangul Syllable Jeug ..Hangul Syllable Jeuh + (0x0c9a5, 0x0c9bf,), # Hangul Syllable Jyig ..Hangul Syllable Jyih + (0x0c9c1, 0x0c9db,), # Hangul Syllable Jig ..Hangul Syllable Jih + (0x0c9dd, 0x0c9f7,), # Hangul Syllable Jjag ..Hangul Syllable Jjah + (0x0c9f9, 0x0ca13,), # Hangul Syllable Jjaeg ..Hangul Syllable Jjaeh + (0x0ca15, 0x0ca2f,), # Hangul Syllable Jjyag ..Hangul Syllable Jjyah + (0x0ca31, 0x0ca4b,), # Hangul Syllable Jjyaeg ..Hangul Syllable Jjyaeh + (0x0ca4d, 0x0ca67,), # Hangul Syllable Jjeog ..Hangul Syllable Jjeoh + (0x0ca69, 0x0ca83,), # Hangul Syllable Jjeg ..Hangul Syllable Jjeh + (0x0ca85, 0x0ca9f,), # Hangul Syllable Jjyeog ..Hangul Syllable Jjyeoh + (0x0caa1, 0x0cabb,), # Hangul Syllable Jjyeg ..Hangul Syllable Jjyeh + (0x0cabd, 0x0cad7,), # Hangul Syllable Jjog ..Hangul Syllable Jjoh + (0x0cad9, 0x0caf3,), # Hangul Syllable Jjwag ..Hangul Syllable Jjwah + (0x0caf5, 0x0cb0f,), # Hangul Syllable Jjwaeg ..Hangul Syllable Jjwaeh + (0x0cb11, 0x0cb2b,), # Hangul Syllable Jjoeg ..Hangul Syllable Jjoeh + (0x0cb2d, 0x0cb47,), # Hangul Syllable Jjyog ..Hangul Syllable Jjyoh + (0x0cb49, 0x0cb63,), # Hangul Syllable Jjug ..Hangul Syllable Jjuh + (0x0cb65, 0x0cb7f,), # Hangul Syllable Jjweog ..Hangul Syllable Jjweoh + (0x0cb81, 0x0cb9b,), # Hangul Syllable Jjweg ..Hangul Syllable Jjweh + (0x0cb9d, 0x0cbb7,), # Hangul Syllable Jjwig ..Hangul Syllable Jjwih + (0x0cbb9, 0x0cbd3,), # Hangul Syllable Jjyug ..Hangul Syllable Jjyuh + (0x0cbd5, 0x0cbef,), # Hangul Syllable Jjeug ..Hangul Syllable Jjeuh + (0x0cbf1, 0x0cc0b,), # Hangul Syllable Jjyig ..Hangul Syllable Jjyih + (0x0cc0d, 0x0cc27,), # Hangul Syllable Jjig ..Hangul Syllable Jjih + (0x0cc29, 0x0cc43,), # Hangul Syllable Cag ..Hangul Syllable Cah + (0x0cc45, 0x0cc5f,), # Hangul Syllable Caeg ..Hangul Syllable Caeh + (0x0cc61, 0x0cc7b,), # Hangul Syllable Cyag ..Hangul Syllable Cyah + (0x0cc7d, 0x0cc97,), # Hangul Syllable Cyaeg ..Hangul Syllable Cyaeh + (0x0cc99, 0x0ccb3,), # Hangul Syllable Ceog ..Hangul Syllable Ceoh + (0x0ccb5, 0x0cccf,), # Hangul Syllable Ceg ..Hangul Syllable Ceh + (0x0ccd1, 0x0cceb,), # Hangul Syllable Cyeog ..Hangul Syllable Cyeoh + (0x0cced, 0x0cd07,), # Hangul Syllable Cyeg ..Hangul Syllable Cyeh + (0x0cd09, 0x0cd23,), # Hangul Syllable Cog ..Hangul Syllable Coh + (0x0cd25, 0x0cd3f,), # Hangul Syllable Cwag ..Hangul Syllable Cwah + (0x0cd41, 0x0cd5b,), # Hangul Syllable Cwaeg ..Hangul Syllable Cwaeh + (0x0cd5d, 0x0cd77,), # Hangul Syllable Coeg ..Hangul Syllable Coeh + (0x0cd79, 0x0cd93,), # Hangul Syllable Cyog ..Hangul Syllable Cyoh + (0x0cd95, 0x0cdaf,), # Hangul Syllable Cug ..Hangul Syllable Cuh + (0x0cdb1, 0x0cdcb,), # Hangul Syllable Cweog ..Hangul Syllable Cweoh + (0x0cdcd, 0x0cde7,), # Hangul Syllable Cweg ..Hangul Syllable Cweh + (0x0cde9, 0x0ce03,), # Hangul Syllable Cwig ..Hangul Syllable Cwih + (0x0ce05, 0x0ce1f,), # Hangul Syllable Cyug ..Hangul Syllable Cyuh + (0x0ce21, 0x0ce3b,), # Hangul Syllable Ceug ..Hangul Syllable Ceuh + (0x0ce3d, 0x0ce57,), # Hangul Syllable Cyig ..Hangul Syllable Cyih + (0x0ce59, 0x0ce73,), # Hangul Syllable Cig ..Hangul Syllable Cih + (0x0ce75, 0x0ce8f,), # Hangul Syllable Kag ..Hangul Syllable Kah + (0x0ce91, 0x0ceab,), # Hangul Syllable Kaeg ..Hangul Syllable Kaeh + (0x0cead, 0x0cec7,), # Hangul Syllable Kyag ..Hangul Syllable Kyah + (0x0cec9, 0x0cee3,), # Hangul Syllable Kyaeg ..Hangul Syllable Kyaeh + (0x0cee5, 0x0ceff,), # Hangul Syllable Keog ..Hangul Syllable Keoh + (0x0cf01, 0x0cf1b,), # Hangul Syllable Keg ..Hangul Syllable Keh + (0x0cf1d, 0x0cf37,), # Hangul Syllable Kyeog ..Hangul Syllable Kyeoh + (0x0cf39, 0x0cf53,), # Hangul Syllable Kyeg ..Hangul Syllable Kyeh + (0x0cf55, 0x0cf6f,), # Hangul Syllable Kog ..Hangul Syllable Koh + (0x0cf71, 0x0cf8b,), # Hangul Syllable Kwag ..Hangul Syllable Kwah + (0x0cf8d, 0x0cfa7,), # Hangul Syllable Kwaeg ..Hangul Syllable Kwaeh + (0x0cfa9, 0x0cfc3,), # Hangul Syllable Koeg ..Hangul Syllable Koeh + (0x0cfc5, 0x0cfdf,), # Hangul Syllable Kyog ..Hangul Syllable Kyoh + (0x0cfe1, 0x0cffb,), # Hangul Syllable Kug ..Hangul Syllable Kuh + (0x0cffd, 0x0d017,), # Hangul Syllable Kweog ..Hangul Syllable Kweoh + (0x0d019, 0x0d033,), # Hangul Syllable Kweg ..Hangul Syllable Kweh + (0x0d035, 0x0d04f,), # Hangul Syllable Kwig ..Hangul Syllable Kwih + (0x0d051, 0x0d06b,), # Hangul Syllable Kyug ..Hangul Syllable Kyuh + (0x0d06d, 0x0d087,), # Hangul Syllable Keug ..Hangul Syllable Keuh + (0x0d089, 0x0d0a3,), # Hangul Syllable Kyig ..Hangul Syllable Kyih + (0x0d0a5, 0x0d0bf,), # Hangul Syllable Kig ..Hangul Syllable Kih + (0x0d0c1, 0x0d0db,), # Hangul Syllable Tag ..Hangul Syllable Tah + (0x0d0dd, 0x0d0f7,), # Hangul Syllable Taeg ..Hangul Syllable Taeh + (0x0d0f9, 0x0d113,), # Hangul Syllable Tyag ..Hangul Syllable Tyah + (0x0d115, 0x0d12f,), # Hangul Syllable Tyaeg ..Hangul Syllable Tyaeh + (0x0d131, 0x0d14b,), # Hangul Syllable Teog ..Hangul Syllable Teoh + (0x0d14d, 0x0d167,), # Hangul Syllable Teg ..Hangul Syllable Teh + (0x0d169, 0x0d183,), # Hangul Syllable Tyeog ..Hangul Syllable Tyeoh + (0x0d185, 0x0d19f,), # Hangul Syllable Tyeg ..Hangul Syllable Tyeh + (0x0d1a1, 0x0d1bb,), # Hangul Syllable Tog ..Hangul Syllable Toh + (0x0d1bd, 0x0d1d7,), # Hangul Syllable Twag ..Hangul Syllable Twah + (0x0d1d9, 0x0d1f3,), # Hangul Syllable Twaeg ..Hangul Syllable Twaeh + (0x0d1f5, 0x0d20f,), # Hangul Syllable Toeg ..Hangul Syllable Toeh + (0x0d211, 0x0d22b,), # Hangul Syllable Tyog ..Hangul Syllable Tyoh + (0x0d22d, 0x0d247,), # Hangul Syllable Tug ..Hangul Syllable Tuh + (0x0d249, 0x0d263,), # Hangul Syllable Tweog ..Hangul Syllable Tweoh + (0x0d265, 0x0d27f,), # Hangul Syllable Tweg ..Hangul Syllable Tweh + (0x0d281, 0x0d29b,), # Hangul Syllable Twig ..Hangul Syllable Twih + (0x0d29d, 0x0d2b7,), # Hangul Syllable Tyug ..Hangul Syllable Tyuh + (0x0d2b9, 0x0d2d3,), # Hangul Syllable Teug ..Hangul Syllable Teuh + (0x0d2d5, 0x0d2ef,), # Hangul Syllable Tyig ..Hangul Syllable Tyih + (0x0d2f1, 0x0d30b,), # Hangul Syllable Tig ..Hangul Syllable Tih + (0x0d30d, 0x0d327,), # Hangul Syllable Pag ..Hangul Syllable Pah + (0x0d329, 0x0d343,), # Hangul Syllable Paeg ..Hangul Syllable Paeh + (0x0d345, 0x0d35f,), # Hangul Syllable Pyag ..Hangul Syllable Pyah + (0x0d361, 0x0d37b,), # Hangul Syllable Pyaeg ..Hangul Syllable Pyaeh + (0x0d37d, 0x0d397,), # Hangul Syllable Peog ..Hangul Syllable Peoh + (0x0d399, 0x0d3b3,), # Hangul Syllable Peg ..Hangul Syllable Peh + (0x0d3b5, 0x0d3cf,), # Hangul Syllable Pyeog ..Hangul Syllable Pyeoh + (0x0d3d1, 0x0d3eb,), # Hangul Syllable Pyeg ..Hangul Syllable Pyeh + (0x0d3ed, 0x0d407,), # Hangul Syllable Pog ..Hangul Syllable Poh + (0x0d409, 0x0d423,), # Hangul Syllable Pwag ..Hangul Syllable Pwah + (0x0d425, 0x0d43f,), # Hangul Syllable Pwaeg ..Hangul Syllable Pwaeh + (0x0d441, 0x0d45b,), # Hangul Syllable Poeg ..Hangul Syllable Poeh + (0x0d45d, 0x0d477,), # Hangul Syllable Pyog ..Hangul Syllable Pyoh + (0x0d479, 0x0d493,), # Hangul Syllable Pug ..Hangul Syllable Puh + (0x0d495, 0x0d4af,), # Hangul Syllable Pweog ..Hangul Syllable Pweoh + (0x0d4b1, 0x0d4cb,), # Hangul Syllable Pweg ..Hangul Syllable Pweh + (0x0d4cd, 0x0d4e7,), # Hangul Syllable Pwig ..Hangul Syllable Pwih + (0x0d4e9, 0x0d503,), # Hangul Syllable Pyug ..Hangul Syllable Pyuh + (0x0d505, 0x0d51f,), # Hangul Syllable Peug ..Hangul Syllable Peuh + (0x0d521, 0x0d53b,), # Hangul Syllable Pyig ..Hangul Syllable Pyih + (0x0d53d, 0x0d557,), # Hangul Syllable Pig ..Hangul Syllable Pih + (0x0d559, 0x0d573,), # Hangul Syllable Hag ..Hangul Syllable Hah + (0x0d575, 0x0d58f,), # Hangul Syllable Haeg ..Hangul Syllable Haeh + (0x0d591, 0x0d5ab,), # Hangul Syllable Hyag ..Hangul Syllable Hyah + (0x0d5ad, 0x0d5c7,), # Hangul Syllable Hyaeg ..Hangul Syllable Hyaeh + (0x0d5c9, 0x0d5e3,), # Hangul Syllable Heog ..Hangul Syllable Heoh + (0x0d5e5, 0x0d5ff,), # Hangul Syllable Heg ..Hangul Syllable Heh + (0x0d601, 0x0d61b,), # Hangul Syllable Hyeog ..Hangul Syllable Hyeoh + (0x0d61d, 0x0d637,), # Hangul Syllable Hyeg ..Hangul Syllable Hyeh + (0x0d639, 0x0d653,), # Hangul Syllable Hog ..Hangul Syllable Hoh + (0x0d655, 0x0d66f,), # Hangul Syllable Hwag ..Hangul Syllable Hwah + (0x0d671, 0x0d68b,), # Hangul Syllable Hwaeg ..Hangul Syllable Hwaeh + (0x0d68d, 0x0d6a7,), # Hangul Syllable Hoeg ..Hangul Syllable Hoeh + (0x0d6a9, 0x0d6c3,), # Hangul Syllable Hyog ..Hangul Syllable Hyoh + (0x0d6c5, 0x0d6df,), # Hangul Syllable Hug ..Hangul Syllable Huh + (0x0d6e1, 0x0d6fb,), # Hangul Syllable Hweog ..Hangul Syllable Hweoh + (0x0d6fd, 0x0d717,), # Hangul Syllable Hweg ..Hangul Syllable Hweh + (0x0d719, 0x0d733,), # Hangul Syllable Hwig ..Hangul Syllable Hwih + (0x0d735, 0x0d74f,), # Hangul Syllable Hyug ..Hangul Syllable Hyuh + (0x0d751, 0x0d76b,), # Hangul Syllable Heug ..Hangul Syllable Heuh + (0x0d76d, 0x0d787,), # Hangul Syllable Hyig ..Hangul Syllable Hyih + (0x0d789, 0x0d7a3,), # Hangul Syllable Hig ..Hangul Syllable Hih +) + +EXTENDED_PICTOGRAPHIC = ( + # Source: emoji-data.txt + # Date: 2025-07-25, 17:54:31 GMT + # + (0x000a9, 0x000a9,), # Copyright Sign + (0x000ae, 0x000ae,), # Registered Sign + (0x0203c, 0x0203c,), # Double Exclamation Mark + (0x02049, 0x02049,), # Exclamation Question Mark + (0x02122, 0x02122,), # Trade Mark Sign + (0x02139, 0x02139,), # Information Source + (0x02194, 0x02199,), # Left Right Arrow ..South West Arrow + (0x021a9, 0x021aa,), # Leftwards Arrow With Hoo..Rightwards Arrow With Ho + (0x0231a, 0x0231b,), # Watch ..Hourglass + (0x02328, 0x02328,), # Keyboard + (0x023cf, 0x023cf,), # Eject Symbol + (0x023e9, 0x023f3,), # Black Right-pointing Dou..Hourglass With Flowing S + (0x023f8, 0x023fa,), # Double Vertical Bar ..Black Circle For Record + (0x024c2, 0x024c2,), # Circled Latin Capital Letter M + (0x025aa, 0x025ab,), # Black Small Square ..White Small Square + (0x025b6, 0x025b6,), # Black Right-pointing Triangle + (0x025c0, 0x025c0,), # Black Left-pointing Triangle + (0x025fb, 0x025fe,), # White Medium Square ..Black Medium Small Squar + (0x02600, 0x02604,), # Black Sun With Rays ..Comet + (0x0260e, 0x0260e,), # Black Telephone + (0x02611, 0x02611,), # Ballot Box With Check + (0x02614, 0x02615,), # Umbrella With Rain Drops..Hot Beverage + (0x02618, 0x02618,), # Shamrock + (0x0261d, 0x0261d,), # White Up Pointing Index + (0x02620, 0x02620,), # Skull And Crossbones + (0x02622, 0x02623,), # Radioactive Sign ..Biohazard Sign + (0x02626, 0x02626,), # Orthodox Cross + (0x0262a, 0x0262a,), # Star And Crescent + (0x0262e, 0x0262f,), # Peace Symbol ..Yin Yang + (0x02638, 0x0263a,), # Wheel Of Dharma ..White Smiling Face + (0x02640, 0x02640,), # Female Sign + (0x02642, 0x02642,), # Male Sign + (0x02648, 0x02653,), # Aries ..Pisces + (0x0265f, 0x02660,), # Black Chess Pawn ..Black Spade Suit + (0x02663, 0x02663,), # Black Club Suit + (0x02665, 0x02666,), # Black Heart Suit ..Black Diamond Suit + (0x02668, 0x02668,), # Hot Springs + (0x0267b, 0x0267b,), # Black Universal Recycling Symbol + (0x0267e, 0x0267f,), # Permanent Paper Sign ..Wheelchair Symbol + (0x02692, 0x02697,), # Hammer And Pick ..Alembic + (0x02699, 0x02699,), # Gear + (0x0269b, 0x0269c,), # Atom Symbol ..Fleur-de-lis + (0x026a0, 0x026a1,), # Warning Sign ..High Voltage Sign + (0x026a7, 0x026a7,), # Male With Stroke And Male And Female Sign + (0x026aa, 0x026ab,), # Medium White Circle ..Medium Black Circle + (0x026b0, 0x026b1,), # Coffin ..Funeral Urn + (0x026bd, 0x026be,), # Soccer Ball ..Baseball + (0x026c4, 0x026c5,), # Snowman Without Snow ..Sun Behind Cloud + (0x026c8, 0x026c8,), # Thunder Cloud And Rain + (0x026ce, 0x026cf,), # Ophiuchus ..Pick + (0x026d1, 0x026d1,), # Helmet With White Cross + (0x026d3, 0x026d4,), # Chains ..No Entry + (0x026e9, 0x026ea,), # Shinto Shrine ..Church + (0x026f0, 0x026f5,), # Mountain ..Sailboat + (0x026f7, 0x026fa,), # Skier ..Tent + (0x026fd, 0x026fd,), # Fuel Pump + (0x02702, 0x02702,), # Black Scissors + (0x02705, 0x02705,), # White Heavy Check Mark + (0x02708, 0x0270d,), # Airplane ..Writing Hand + (0x0270f, 0x0270f,), # Pencil + (0x02712, 0x02712,), # Black Nib + (0x02714, 0x02714,), # Heavy Check Mark + (0x02716, 0x02716,), # Heavy Multiplication X + (0x0271d, 0x0271d,), # Latin Cross + (0x02721, 0x02721,), # Star Of David + (0x02728, 0x02728,), # Sparkles + (0x02733, 0x02734,), # Eight Spoked Asterisk ..Eight Pointed Black Star + (0x02744, 0x02744,), # Snowflake + (0x02747, 0x02747,), # Sparkle + (0x0274c, 0x0274c,), # Cross Mark + (0x0274e, 0x0274e,), # Negative Squared Cross Mark + (0x02753, 0x02755,), # Black Question Mark Orna..White Exclamation Mark O + (0x02757, 0x02757,), # Heavy Exclamation Mark Symbol + (0x02763, 0x02764,), # Heavy Heart Exclamation ..Heavy Black Heart + (0x02795, 0x02797,), # Heavy Plus Sign ..Heavy Division Sign + (0x027a1, 0x027a1,), # Black Rightwards Arrow + (0x027b0, 0x027b0,), # Curly Loop + (0x027bf, 0x027bf,), # Double Curly Loop + (0x02934, 0x02935,), # Arrow Pointing Rightward..Arrow Pointing Rightward + (0x02b05, 0x02b07,), # Leftwards Black Arrow ..Downwards Black Arrow + (0x02b1b, 0x02b1c,), # Black Large Square ..White Large Square + (0x02b50, 0x02b50,), # White Medium Star + (0x02b55, 0x02b55,), # Heavy Large Circle + (0x03030, 0x03030,), # Wavy Dash + (0x0303d, 0x0303d,), # Part Alternation Mark + (0x03297, 0x03297,), # Circled Ideograph Congratulation + (0x03299, 0x03299,), # Circled Ideograph Secret + (0x1f004, 0x1f004,), # Mahjong Tile Red Dragon + (0x1f02c, 0x1f02f,), # (nil) + (0x1f094, 0x1f09f,), # (nil) + (0x1f0af, 0x1f0b0,), # (nil) + (0x1f0c0, 0x1f0c0,), # (nil) + (0x1f0cf, 0x1f0d0,), # Playing Card Black Joker..(nil) + (0x1f0f6, 0x1f0ff,), # (nil) + (0x1f170, 0x1f171,), # Negative Squared Latin C..Negative Squared Latin C + (0x1f17e, 0x1f17f,), # Negative Squared Latin C..Negative Squared Latin C + (0x1f18e, 0x1f18e,), # Negative Squared Ab + (0x1f191, 0x1f19a,), # Squared Cl ..Squared Vs + (0x1f1ae, 0x1f1e5,), # (nil) + (0x1f201, 0x1f20f,), # Squared Katakana Koko ..(nil) + (0x1f21a, 0x1f21a,), # Squared Cjk Unified Ideograph-7121 + (0x1f22f, 0x1f22f,), # Squared Cjk Unified Ideograph-6307 + (0x1f232, 0x1f23a,), # Squared Cjk Unified Ideo..Squared Cjk Unified Ideo + (0x1f23c, 0x1f23f,), # (nil) + (0x1f249, 0x1f25f,), # (nil) + (0x1f266, 0x1f321,), # (nil) ..Thermometer + (0x1f324, 0x1f393,), # White Sun With Small Clo..Graduation Cap + (0x1f396, 0x1f397,), # Military Medal ..Reminder Ribbon + (0x1f399, 0x1f39b,), # Studio Microphone ..Control Knobs + (0x1f39e, 0x1f3f0,), # Film Frames ..European Castle + (0x1f3f3, 0x1f3f5,), # Waving White Flag ..Rosette + (0x1f3f7, 0x1f3fa,), # Label ..Amphora + (0x1f400, 0x1f4fd,), # Rat ..Film Projector + (0x1f4ff, 0x1f53d,), # Prayer Beads ..Down-pointing Small Red + (0x1f549, 0x1f54e,), # Om Symbol ..Menorah With Nine Branch + (0x1f550, 0x1f567,), # Clock Face One Oclock ..Clock Face Twelve-thirty + (0x1f56f, 0x1f570,), # Candle ..Mantelpiece Clock + (0x1f573, 0x1f57a,), # Hole ..Man Dancing + (0x1f587, 0x1f587,), # Linked Paperclips + (0x1f58a, 0x1f58d,), # Lower Left Ballpoint Pen..Lower Left Crayon + (0x1f590, 0x1f590,), # Raised Hand With Fingers Splayed + (0x1f595, 0x1f596,), # Reversed Hand With Middl..Raised Hand With Part Be + (0x1f5a4, 0x1f5a5,), # Black Heart ..Desktop Computer + (0x1f5a8, 0x1f5a8,), # Printer + (0x1f5b1, 0x1f5b2,), # Three Button Mouse ..Trackball + (0x1f5bc, 0x1f5bc,), # Frame With Picture + (0x1f5c2, 0x1f5c4,), # Card Index Dividers ..File Cabinet + (0x1f5d1, 0x1f5d3,), # Wastebasket ..Spiral Calendar Pad + (0x1f5dc, 0x1f5de,), # Compression ..Rolled-up Newspaper + (0x1f5e1, 0x1f5e1,), # Dagger Knife + (0x1f5e3, 0x1f5e3,), # Speaking Head In Silhouette + (0x1f5e8, 0x1f5e8,), # Left Speech Bubble + (0x1f5ef, 0x1f5ef,), # Right Anger Bubble + (0x1f5f3, 0x1f5f3,), # Ballot Box With Ballot + (0x1f5fa, 0x1f64f,), # World Map ..Person With Folded Hands + (0x1f680, 0x1f6c5,), # Rocket ..Left Luggage + (0x1f6cb, 0x1f6d2,), # Couch And Lamp ..Shopping Trolley + (0x1f6d5, 0x1f6e5,), # Hindu Temple ..Motor Boat + (0x1f6e9, 0x1f6e9,), # Small Airplane + (0x1f6eb, 0x1f6f0,), # Airplane Departure ..Satellite + (0x1f6f3, 0x1f6ff,), # Passenger Ship ..(nil) + (0x1f7da, 0x1f7ff,), # (nil) + (0x1f80c, 0x1f80f,), # (nil) + (0x1f848, 0x1f84f,), # (nil) + (0x1f85a, 0x1f85f,), # (nil) + (0x1f888, 0x1f88f,), # (nil) + (0x1f8ae, 0x1f8af,), # (nil) + (0x1f8bc, 0x1f8bf,), # (nil) + (0x1f8c2, 0x1f8cf,), # (nil) + (0x1f8d9, 0x1f8ff,), # (nil) + (0x1f90c, 0x1f93a,), # Pinched Fingers ..Fencer + (0x1f93c, 0x1f945,), # Wrestlers ..Goal Net + (0x1f947, 0x1f9ff,), # First Place Medal ..Nazar Amulet + (0x1fa58, 0x1fa5f,), # (nil) + (0x1fa6e, 0x1faff,), # (nil) + (0x1fc00, 0x1fffd,), # (nil) +) + +INCB_LINKER = ( + # Source: DerivedCoreProperties + # Date: see file + # + (0x0094d, 0x0094d,), # Devanagari Sign Virama + (0x009cd, 0x009cd,), # Bengali Sign Virama + (0x00acd, 0x00acd,), # Gujarati Sign Virama + (0x00b4d, 0x00b4d,), # Oriya Sign Virama + (0x00c4d, 0x00c4d,), # Telugu Sign Virama + (0x00d4d, 0x00d4d,), # Malayalam Sign Virama + (0x01039, 0x01039,), # Myanmar Sign Virama + (0x017d2, 0x017d2,), # Khmer Sign Coeng + (0x01a60, 0x01a60,), # Tai Tham Sign Sakot + (0x01b44, 0x01b44,), # Balinese Adeg Adeg + (0x01bab, 0x01bab,), # Sundanese Sign Virama + (0x0a9c0, 0x0a9c0,), # Javanese Pangkon + (0x0aaf6, 0x0aaf6,), # Meetei Mayek Virama + (0x10a3f, 0x10a3f,), # Kharoshthi Virama + (0x11133, 0x11133,), # Chakma Virama + (0x113d0, 0x113d0,), # (nil) + (0x1193e, 0x1193e,), # Dives Akuru Virama + (0x11a47, 0x11a47,), # Zanabazar Square Subjoiner + (0x11a99, 0x11a99,), # Soyombo Subjoiner + (0x11f42, 0x11f42,), # Kawi Conjoiner +) + +INCB_CONSONANT = ( + # Source: DerivedCoreProperties + # Date: see file + # + (0x00915, 0x00939,), # Devanagari Letter Ka ..Devanagari Letter Ha + (0x00958, 0x0095f,), # Devanagari Letter Qa ..Devanagari Letter Yya + (0x00978, 0x0097f,), # Devanagari Letter Marwar..Devanagari Letter Bba + (0x00995, 0x009a8,), # Bengali Letter Ka ..Bengali Letter Na + (0x009aa, 0x009b0,), # Bengali Letter Pa ..Bengali Letter Ra + (0x009b2, 0x009b2,), # Bengali Letter La + (0x009b6, 0x009b9,), # Bengali Letter Sha ..Bengali Letter Ha + (0x009dc, 0x009dd,), # Bengali Letter Rra ..Bengali Letter Rha + (0x009df, 0x009df,), # Bengali Letter Yya + (0x009f0, 0x009f1,), # Bengali Letter Ra With M..Bengali Letter Ra With L + (0x00a95, 0x00aa8,), # Gujarati Letter Ka ..Gujarati Letter Na + (0x00aaa, 0x00ab0,), # Gujarati Letter Pa ..Gujarati Letter Ra + (0x00ab2, 0x00ab3,), # Gujarati Letter La ..Gujarati Letter Lla + (0x00ab5, 0x00ab9,), # Gujarati Letter Va ..Gujarati Letter Ha + (0x00af9, 0x00af9,), # Gujarati Letter Zha + (0x00b15, 0x00b28,), # Oriya Letter Ka ..Oriya Letter Na + (0x00b2a, 0x00b30,), # Oriya Letter Pa ..Oriya Letter Ra + (0x00b32, 0x00b33,), # Oriya Letter La ..Oriya Letter Lla + (0x00b35, 0x00b39,), # Oriya Letter Va ..Oriya Letter Ha + (0x00b5c, 0x00b5d,), # Oriya Letter Rra ..Oriya Letter Rha + (0x00b5f, 0x00b5f,), # Oriya Letter Yya + (0x00b71, 0x00b71,), # Oriya Letter Wa + (0x00c15, 0x00c28,), # Telugu Letter Ka ..Telugu Letter Na + (0x00c2a, 0x00c39,), # Telugu Letter Pa ..Telugu Letter Ha + (0x00c58, 0x00c5a,), # Telugu Letter Tsa ..Telugu Letter Rrra + (0x00d15, 0x00d3a,), # Malayalam Letter Ka ..Malayalam Letter Ttta + (0x01000, 0x0102a,), # Myanmar Letter Ka ..Myanmar Letter Au + (0x0103f, 0x0103f,), # Myanmar Letter Great Sa + (0x01050, 0x01055,), # Myanmar Letter Sha ..Myanmar Letter Vocalic L + (0x0105a, 0x0105d,), # Myanmar Letter Mon Nga ..Myanmar Letter Mon Bbe + (0x01061, 0x01061,), # Myanmar Letter Sgaw Karen Sha + (0x01065, 0x01066,), # Myanmar Letter Western P..Myanmar Letter Western P + (0x0106e, 0x01070,), # Myanmar Letter Eastern P..Myanmar Letter Eastern P + (0x01075, 0x01081,), # Myanmar Letter Shan Ka ..Myanmar Letter Shan Ha + (0x0108e, 0x0108e,), # Myanmar Letter Rumai Palaung Fa + (0x01780, 0x017b3,), # Khmer Letter Ka ..Khmer Independent Vowel + (0x01a20, 0x01a54,), # Tai Tham Letter High Ka ..Tai Tham Letter Great Sa + (0x01b0b, 0x01b0c,), # Balinese Letter Ra Repa ..Balinese Letter Ra Repa + (0x01b13, 0x01b33,), # Balinese Letter Ka ..Balinese Letter Ha + (0x01b45, 0x01b4c,), # Balinese Letter Kaf Sasa..Balinese Letter Archaic + (0x01b83, 0x01ba0,), # Sundanese Letter A ..Sundanese Letter Ha + (0x01bae, 0x01baf,), # Sundanese Letter Kha ..Sundanese Letter Sya + (0x01bbb, 0x01bbd,), # Sundanese Letter Reu ..Sundanese Letter Bha + (0x0a989, 0x0a98b,), # Javanese Letter Pa Cerek..Javanese Letter Nga Lele + (0x0a98f, 0x0a9b2,), # Javanese Letter Ka ..Javanese Letter Ha + (0x0a9e0, 0x0a9e4,), # Myanmar Letter Shan Gha ..Myanmar Letter Shan Bha + (0x0a9e7, 0x0a9ef,), # Myanmar Letter Tai Laing..Myanmar Letter Tai Laing + (0x0a9fa, 0x0a9fe,), # Myanmar Letter Tai Laing..Myanmar Letter Tai Laing + (0x0aa60, 0x0aa6f,), # Myanmar Letter Khamti Ga..Myanmar Letter Khamti Fa + (0x0aa71, 0x0aa73,), # Myanmar Letter Khamti Xa..Myanmar Letter Khamti Ra + (0x0aa7a, 0x0aa7a,), # Myanmar Letter Aiton Ra + (0x0aa7e, 0x0aa7f,), # Myanmar Letter Shwe Pala..Myanmar Letter Shwe Pala + (0x0aae0, 0x0aaea,), # Meetei Mayek Letter E ..Meetei Mayek Letter Ssa + (0x0abc0, 0x0abda,), # Meetei Mayek Letter Kok ..Meetei Mayek Letter Bham + (0x10a00, 0x10a00,), # Kharoshthi Letter A + (0x10a10, 0x10a13,), # Kharoshthi Letter Ka ..Kharoshthi Letter Gha + (0x10a15, 0x10a17,), # Kharoshthi Letter Ca ..Kharoshthi Letter Ja + (0x10a19, 0x10a35,), # Kharoshthi Letter Nya ..Kharoshthi Letter Vha + (0x11103, 0x11126,), # Chakma Letter Aa ..Chakma Letter Haa + (0x11144, 0x11144,), # Chakma Letter Lhaa + (0x11147, 0x11147,), # Chakma Letter Vaa + (0x11380, 0x11389,), # (nil) + (0x1138b, 0x1138b,), # (nil) + (0x1138e, 0x1138e,), # (nil) + (0x11390, 0x113b5,), # (nil) + (0x11900, 0x11906,), # Dives Akuru Letter A ..Dives Akuru Letter E + (0x11909, 0x11909,), # Dives Akuru Letter O + (0x1190c, 0x11913,), # Dives Akuru Letter Ka ..Dives Akuru Letter Ja + (0x11915, 0x11916,), # Dives Akuru Letter Nya ..Dives Akuru Letter Tta + (0x11918, 0x1192f,), # Dives Akuru Letter Dda ..Dives Akuru Letter Za + (0x11a00, 0x11a00,), # Zanabazar Square Letter A + (0x11a0b, 0x11a32,), # Zanabazar Square Letter ..Zanabazar Square Letter + (0x11a50, 0x11a50,), # Soyombo Letter A + (0x11a5c, 0x11a83,), # Soyombo Letter Ka ..Soyombo Letter Kssa + (0x11f04, 0x11f10,), # Kawi Letter A ..Kawi Letter O + (0x11f12, 0x11f33,), # Kawi Letter Ka ..Kawi Letter Jnya +) + +INCB_EXTEND = ( + # Source: DerivedCoreProperties + # Date: see file + # + (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le + (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli + (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg + (0x005bf, 0x005bf,), # Hebrew Point Rafe + (0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot + (0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot + (0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan + (0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra + (0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below + (0x00670, 0x00670,), # Arabic Letter Superscript Alef + (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen + (0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda + (0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon + (0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem + (0x00711, 0x00711,), # Syriac Letter Superscript Alaph + (0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh + (0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun + (0x007eb, 0x007f3,), # Nko Combining Short High..Nko Combining Double Dot + (0x007fd, 0x007fd,), # Nko Dantayalan + (0x00816, 0x00819,), # Samaritan Mark In ..Samaritan Mark Dagesh + (0x0081b, 0x00823,), # Samaritan Mark Epentheti..Samaritan Vowel Sign A + (0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U + (0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa + (0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark + (0x00897, 0x0089f,), # (nil) ..Arabic Half Madda Over M + (0x008ca, 0x008e1,), # Arabic Small High Farsi ..Arabic Small High Sign S + (0x008e3, 0x00902,), # Arabic Turned Damma Belo..Devanagari Sign Anusvara + (0x0093a, 0x0093a,), # Devanagari Vowel Sign Oe + (0x0093c, 0x0093c,), # Devanagari Sign Nukta + (0x00941, 0x00948,), # Devanagari Vowel Sign U ..Devanagari Vowel Sign Ai + (0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu + (0x00962, 0x00963,), # Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo + (0x00981, 0x00981,), # Bengali Sign Candrabindu + (0x009bc, 0x009bc,), # Bengali Sign Nukta + (0x009be, 0x009be,), # Bengali Vowel Sign Aa + (0x009c1, 0x009c4,), # Bengali Vowel Sign U ..Bengali Vowel Sign Vocal + (0x009d7, 0x009d7,), # Bengali Au Length Mark + (0x009e2, 0x009e3,), # Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal + (0x009fe, 0x009fe,), # Bengali Sandhi Mark + (0x00a01, 0x00a02,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Bindi + (0x00a3c, 0x00a3c,), # Gurmukhi Sign Nukta + (0x00a41, 0x00a42,), # Gurmukhi Vowel Sign U ..Gurmukhi Vowel Sign Uu + (0x00a47, 0x00a48,), # Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai + (0x00a4b, 0x00a4d,), # Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama + (0x00a51, 0x00a51,), # Gurmukhi Sign Udaat + (0x00a70, 0x00a71,), # Gurmukhi Tippi ..Gurmukhi Addak + (0x00a75, 0x00a75,), # Gurmukhi Sign Yakash + (0x00a81, 0x00a82,), # Gujarati Sign Candrabind..Gujarati Sign Anusvara + (0x00abc, 0x00abc,), # Gujarati Sign Nukta + (0x00ac1, 0x00ac5,), # Gujarati Vowel Sign U ..Gujarati Vowel Sign Cand + (0x00ac7, 0x00ac8,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Ai + (0x00ae2, 0x00ae3,), # Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca + (0x00afa, 0x00aff,), # Gujarati Sign Sukun ..Gujarati Sign Two-circle + (0x00b01, 0x00b01,), # Oriya Sign Candrabindu + (0x00b3c, 0x00b3c,), # Oriya Sign Nukta + (0x00b3e, 0x00b3f,), # Oriya Vowel Sign Aa ..Oriya Vowel Sign I + (0x00b41, 0x00b44,), # Oriya Vowel Sign U ..Oriya Vowel Sign Vocalic + (0x00b55, 0x00b57,), # Oriya Sign Overline ..Oriya Au Length Mark + (0x00b62, 0x00b63,), # Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic + (0x00b82, 0x00b82,), # Tamil Sign Anusvara + (0x00bbe, 0x00bbe,), # Tamil Vowel Sign Aa + (0x00bc0, 0x00bc0,), # Tamil Vowel Sign Ii + (0x00bcd, 0x00bcd,), # Tamil Sign Virama + (0x00bd7, 0x00bd7,), # Tamil Au Length Mark + (0x00c00, 0x00c00,), # Telugu Sign Combining Candrabindu Above + (0x00c04, 0x00c04,), # Telugu Sign Combining Anusvara Above + (0x00c3c, 0x00c3c,), # Telugu Sign Nukta + (0x00c3e, 0x00c40,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Ii + (0x00c46, 0x00c48,), # Telugu Vowel Sign E ..Telugu Vowel Sign Ai + (0x00c4a, 0x00c4c,), # Telugu Vowel Sign O ..Telugu Vowel Sign Au + (0x00c55, 0x00c56,), # Telugu Length Mark ..Telugu Ai Length Mark + (0x00c62, 0x00c63,), # Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali + (0x00c81, 0x00c81,), # Kannada Sign Candrabindu + (0x00cbc, 0x00cbc,), # Kannada Sign Nukta + (0x00cbf, 0x00cc0,), # Kannada Vowel Sign I ..Kannada Vowel Sign Ii + (0x00cc2, 0x00cc2,), # Kannada Vowel Sign Uu + (0x00cc6, 0x00cc8,), # Kannada Vowel Sign E ..Kannada Vowel Sign Ai + (0x00cca, 0x00ccd,), # Kannada Vowel Sign O ..Kannada Sign Virama + (0x00cd5, 0x00cd6,), # Kannada Length Mark ..Kannada Ai Length Mark + (0x00ce2, 0x00ce3,), # Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal + (0x00d00, 0x00d01,), # Malayalam Sign Combining..Malayalam Sign Candrabin + (0x00d3b, 0x00d3c,), # Malayalam Sign Vertical ..Malayalam Sign Circular + (0x00d3e, 0x00d3e,), # Malayalam Vowel Sign Aa + (0x00d41, 0x00d44,), # Malayalam Vowel Sign U ..Malayalam Vowel Sign Voc + (0x00d57, 0x00d57,), # Malayalam Au Length Mark + (0x00d62, 0x00d63,), # Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc + (0x00d81, 0x00d81,), # Sinhala Sign Candrabindu + (0x00dca, 0x00dca,), # Sinhala Sign Al-lakuna + (0x00dcf, 0x00dcf,), # Sinhala Vowel Sign Aela-pilla + (0x00dd2, 0x00dd4,), # Sinhala Vowel Sign Ketti..Sinhala Vowel Sign Ketti + (0x00dd6, 0x00dd6,), # Sinhala Vowel Sign Diga Paa-pilla + (0x00ddf, 0x00ddf,), # Sinhala Vowel Sign Gayanukitta + (0x00e31, 0x00e31,), # Thai Character Mai Han-akat + (0x00e34, 0x00e3a,), # Thai Character Sara I ..Thai Character Phinthu + (0x00e47, 0x00e4e,), # Thai Character Maitaikhu..Thai Character Yamakkan + (0x00eb1, 0x00eb1,), # Lao Vowel Sign Mai Kan + (0x00eb4, 0x00ebc,), # Lao Vowel Sign I ..Lao Semivowel Sign Lo + (0x00ec8, 0x00ece,), # Lao Tone Mai Ek ..Lao Yamakkan + (0x00f18, 0x00f19,), # Tibetan Astrological Sig..Tibetan Astrological Sig + (0x00f35, 0x00f35,), # Tibetan Mark Ngas Bzung Nyi Zla + (0x00f37, 0x00f37,), # Tibetan Mark Ngas Bzung Sgor Rtags + (0x00f39, 0x00f39,), # Tibetan Mark Tsa -phru + (0x00f71, 0x00f7e,), # Tibetan Vowel Sign Aa ..Tibetan Sign Rjes Su Nga + (0x00f80, 0x00f84,), # Tibetan Vowel Sign Rever..Tibetan Mark Halanta + (0x00f86, 0x00f87,), # Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags + (0x00f8d, 0x00f97,), # Tibetan Subjoined Sign L..Tibetan Subjoined Letter + (0x00f99, 0x00fbc,), # Tibetan Subjoined Letter..Tibetan Subjoined Letter + (0x00fc6, 0x00fc6,), # Tibetan Symbol Padma Gdan + (0x0102d, 0x01030,), # Myanmar Vowel Sign I ..Myanmar Vowel Sign Uu + (0x01032, 0x01037,), # Myanmar Vowel Sign Ai ..Myanmar Sign Dot Below + (0x0103a, 0x0103a,), # Myanmar Sign Asat + (0x0103d, 0x0103e,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M + (0x01058, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal + (0x0105e, 0x01060,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M + (0x01071, 0x01074,), # Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah + (0x01082, 0x01082,), # Myanmar Consonant Sign Shan Medial Wa + (0x01085, 0x01086,), # Myanmar Vowel Sign Shan ..Myanmar Vowel Sign Shan + (0x0108d, 0x0108d,), # Myanmar Sign Shan Council Emphatic Tone + (0x0109d, 0x0109d,), # Myanmar Vowel Sign Aiton Ai + (0x0135d, 0x0135f,), # Ethiopic Combining Gemin..Ethiopic Combining Gemin + (0x01712, 0x01715,), # Tagalog Vowel Sign I ..Tagalog Sign Pamudpod + (0x01732, 0x01734,), # Hanunoo Vowel Sign I ..Hanunoo Sign Pamudpod + (0x01752, 0x01753,), # Buhid Vowel Sign I ..Buhid Vowel Sign U + (0x01772, 0x01773,), # Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U + (0x017b4, 0x017b5,), # Khmer Vowel Inherent Aq ..Khmer Vowel Inherent Aa + (0x017b7, 0x017bd,), # Khmer Vowel Sign I ..Khmer Vowel Sign Ua + (0x017c6, 0x017c6,), # Khmer Sign Nikahit + (0x017c9, 0x017d1,), # Khmer Sign Muusikatoan ..Khmer Sign Viriam + (0x017d3, 0x017d3,), # Khmer Sign Bathamasat + (0x017dd, 0x017dd,), # Khmer Sign Atthacan + (0x0180b, 0x0180d,), # Mongolian Free Variation..Mongolian Free Variation + (0x0180f, 0x0180f,), # Mongolian Free Variation Selector Four + (0x01885, 0x01886,), # Mongolian Letter Ali Gal..Mongolian Letter Ali Gal + (0x018a9, 0x018a9,), # Mongolian Letter Ali Gali Dagalga + (0x01920, 0x01922,), # Limbu Vowel Sign A ..Limbu Vowel Sign U + (0x01927, 0x01928,), # Limbu Vowel Sign E ..Limbu Vowel Sign O + (0x01932, 0x01932,), # Limbu Small Letter Anusvara + (0x01939, 0x0193b,), # Limbu Sign Mukphreng ..Limbu Sign Sa-i + (0x01a17, 0x01a18,), # Buginese Vowel Sign I ..Buginese Vowel Sign U + (0x01a1b, 0x01a1b,), # Buginese Vowel Sign Ae + (0x01a56, 0x01a56,), # Tai Tham Consonant Sign Medial La + (0x01a58, 0x01a5e,), # Tai Tham Sign Mai Kang L..Tai Tham Consonant Sign + (0x01a62, 0x01a62,), # Tai Tham Vowel Sign Mai Sat + (0x01a65, 0x01a6c,), # Tai Tham Vowel Sign I ..Tai Tham Vowel Sign Oa B + (0x01a73, 0x01a7c,), # Tai Tham Vowel Sign Oa A..Tai Tham Sign Khuen-lue + (0x01a7f, 0x01a7f,), # Tai Tham Combining Cryptogrammic Dot + (0x01ab0, 0x01add,), # Combining Doubled Circum..(nil) + (0x01ae0, 0x01aeb,), # (nil) + (0x01b00, 0x01b03,), # Balinese Sign Ulu Ricem ..Balinese Sign Surang + (0x01b34, 0x01b3d,), # Balinese Sign Rerekan ..Balinese Vowel Sign La L + (0x01b42, 0x01b43,), # Balinese Vowel Sign Pepe..Balinese Vowel Sign Pepe + (0x01b6b, 0x01b73,), # Balinese Musical Symbol ..Balinese Musical Symbol + (0x01b80, 0x01b81,), # Sundanese Sign Panyecek ..Sundanese Sign Panglayar + (0x01ba2, 0x01ba5,), # Sundanese Consonant Sign..Sundanese Vowel Sign Pan + (0x01ba8, 0x01baa,), # Sundanese Vowel Sign Pam..Sundanese Sign Pamaaeh + (0x01bac, 0x01bad,), # Sundanese Consonant Sign..Sundanese Consonant Sign + (0x01be6, 0x01be6,), # Batak Sign Tompi + (0x01be8, 0x01be9,), # Batak Vowel Sign Pakpak ..Batak Vowel Sign Ee + (0x01bed, 0x01bed,), # Batak Vowel Sign Karo O + (0x01bef, 0x01bf3,), # Batak Vowel Sign U For S..Batak Panongonan + (0x01c2c, 0x01c33,), # Lepcha Vowel Sign E ..Lepcha Consonant Sign T + (0x01c36, 0x01c37,), # Lepcha Sign Ran ..Lepcha Sign Nukta + (0x01cd0, 0x01cd2,), # Vedic Tone Karshana ..Vedic Tone Prenkha + (0x01cd4, 0x01ce0,), # Vedic Sign Yajurvedic Mi..Vedic Tone Rigvedic Kash + (0x01ce2, 0x01ce8,), # Vedic Sign Visarga Svari..Vedic Sign Visarga Anuda + (0x01ced, 0x01ced,), # Vedic Sign Tiryak + (0x01cf4, 0x01cf4,), # Vedic Tone Candra Above + (0x01cf8, 0x01cf9,), # Vedic Tone Ring Above ..Vedic Tone Double Ring A + (0x01dc0, 0x01dff,), # Combining Dotted Grave A..Combining Right Arrowhea + (0x0200d, 0x0200d,), # Zero Width Joiner + (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above + (0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu + (0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner + (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette + (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M + (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag + (0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous + (0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer + (0x0a69e, 0x0a69f,), # Combining Cyrillic Lette..Combining Cyrillic Lette + (0x0a6f0, 0x0a6f1,), # Bamum Combining Mark Koq..Bamum Combining Mark Tuk + (0x0a802, 0x0a802,), # Syloti Nagri Sign Dvisvara + (0x0a806, 0x0a806,), # Syloti Nagri Sign Hasanta + (0x0a80b, 0x0a80b,), # Syloti Nagri Sign Anusvara + (0x0a825, 0x0a826,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign + (0x0a82c, 0x0a82c,), # Syloti Nagri Sign Alternate Hasanta + (0x0a8c4, 0x0a8c5,), # Saurashtra Sign Virama ..Saurashtra Sign Candrabi + (0x0a8e0, 0x0a8f1,), # Combining Devanagari Dig..Combining Devanagari Sig + (0x0a8ff, 0x0a8ff,), # Devanagari Vowel Sign Ay + (0x0a926, 0x0a92d,), # Kayah Li Vowel Ue ..Kayah Li Tone Calya Plop + (0x0a947, 0x0a951,), # Rejang Vowel Sign I ..Rejang Consonant Sign R + (0x0a953, 0x0a953,), # Rejang Virama + (0x0a980, 0x0a982,), # Javanese Sign Panyangga ..Javanese Sign Layar + (0x0a9b3, 0x0a9b3,), # Javanese Sign Cecak Telu + (0x0a9b6, 0x0a9b9,), # Javanese Vowel Sign Wulu..Javanese Vowel Sign Suku + (0x0a9bc, 0x0a9bd,), # Javanese Vowel Sign Pepe..Javanese Consonant Sign + (0x0a9e5, 0x0a9e5,), # Myanmar Sign Shan Saw + (0x0aa29, 0x0aa2e,), # Cham Vowel Sign Aa ..Cham Vowel Sign Oe + (0x0aa31, 0x0aa32,), # Cham Vowel Sign Au ..Cham Vowel Sign Ue + (0x0aa35, 0x0aa36,), # Cham Consonant Sign La ..Cham Consonant Sign Wa + (0x0aa43, 0x0aa43,), # Cham Consonant Sign Final Ng + (0x0aa4c, 0x0aa4c,), # Cham Consonant Sign Final M + (0x0aa7c, 0x0aa7c,), # Myanmar Sign Tai Laing Tone-2 + (0x0aab0, 0x0aab0,), # Tai Viet Mai Kang + (0x0aab2, 0x0aab4,), # Tai Viet Vowel I ..Tai Viet Vowel U + (0x0aab7, 0x0aab8,), # Tai Viet Mai Khit ..Tai Viet Vowel Ia + (0x0aabe, 0x0aabf,), # Tai Viet Vowel Am ..Tai Viet Tone Mai Ek + (0x0aac1, 0x0aac1,), # Tai Viet Tone Mai Tho + (0x0aaec, 0x0aaed,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0x0abe5, 0x0abe5,), # Meetei Mayek Vowel Sign Anap + (0x0abe8, 0x0abe8,), # Meetei Mayek Vowel Sign Unap + (0x0abed, 0x0abed,), # Meetei Mayek Apun Iyek + (0x0fb1e, 0x0fb1e,), # Hebrew Point Judeo-spanish Varika + (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 + (0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo + (0x0ff9e, 0x0ff9f,), # Halfwidth Katakana Voice..Halfwidth Katakana Semi- + (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke + (0x102e0, 0x102e0,), # Coptic Epact Thousands Mark + (0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let + (0x10a01, 0x10a03,), # Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo + (0x10a05, 0x10a06,), # Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O + (0x10a0c, 0x10a0f,), # Kharoshthi Vowel Length ..Kharoshthi Sign Visarga + (0x10a38, 0x10a3a,), # Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo + (0x10ae5, 0x10ae6,), # Manichaean Abbreviation ..Manichaean Abbreviation + (0x10d24, 0x10d27,), # Hanifi Rohingya Sign Har..Hanifi Rohingya Sign Tas + (0x10d69, 0x10d6d,), # (nil) + (0x10eab, 0x10eac,), # Yezidi Combining Hamza M..Yezidi Combining Madda M + (0x10efa, 0x10eff,), # (nil) ..Arabic Small Low Word Ma + (0x10f46, 0x10f50,), # Sogdian Combining Dot Be..Sogdian Combining Stroke + (0x10f82, 0x10f85,), # Old Uyghur Combining Dot..Old Uyghur Combining Two + (0x11001, 0x11001,), # Brahmi Sign Anusvara + (0x11038, 0x11046,), # Brahmi Vowel Sign Aa ..Brahmi Virama + (0x11070, 0x11070,), # Brahmi Sign Old Tamil Virama + (0x11073, 0x11074,), # Brahmi Vowel Sign Old Ta..Brahmi Vowel Sign Old Ta + (0x1107f, 0x11081,), # Brahmi Number Joiner ..Kaithi Sign Anusvara + (0x110b3, 0x110b6,), # Kaithi Vowel Sign U ..Kaithi Vowel Sign Ai + (0x110b9, 0x110ba,), # Kaithi Sign Virama ..Kaithi Sign Nukta + (0x110c2, 0x110c2,), # Kaithi Vowel Sign Vocalic R + (0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga + (0x11127, 0x1112b,), # Chakma Vowel Sign A ..Chakma Vowel Sign Uu + (0x1112d, 0x11132,), # Chakma Vowel Sign Ai ..Chakma Au Mark + (0x11134, 0x11134,), # Chakma Maayyaa + (0x11173, 0x11173,), # Mahajani Sign Nukta + (0x11180, 0x11181,), # Sharada Sign Candrabindu..Sharada Sign Anusvara + (0x111b6, 0x111be,), # Sharada Vowel Sign U ..Sharada Vowel Sign O + (0x111c0, 0x111c0,), # Sharada Sign Virama + (0x111c9, 0x111cc,), # Sharada Sandhi Mark ..Sharada Extra Short Vowe + (0x111cf, 0x111cf,), # Sharada Sign Inverted Candrabindu + (0x1122f, 0x11231,), # Khojki Vowel Sign U ..Khojki Vowel Sign Ai + (0x11234, 0x11237,), # Khojki Sign Anusvara ..Khojki Sign Shadda + (0x1123e, 0x1123e,), # Khojki Sign Sukun + (0x11241, 0x11241,), # Khojki Vowel Sign Vocalic R + (0x112df, 0x112df,), # Khudawadi Sign Anusvara + (0x112e3, 0x112ea,), # Khudawadi Vowel Sign U ..Khudawadi Sign Virama + (0x11300, 0x11301,), # Grantha Sign Combining A..Grantha Sign Candrabindu + (0x1133b, 0x1133c,), # Combining Bindu Below ..Grantha Sign Nukta + (0x1133e, 0x1133e,), # Grantha Vowel Sign Aa + (0x11340, 0x11340,), # Grantha Vowel Sign Ii + (0x1134d, 0x1134d,), # Grantha Sign Virama + (0x11357, 0x11357,), # Grantha Au Length Mark + (0x11366, 0x1136c,), # Combining Grantha Digit ..Combining Grantha Digit + (0x11370, 0x11374,), # Combining Grantha Letter..Combining Grantha Letter + (0x113b8, 0x113b8,), # (nil) + (0x113bb, 0x113c0,), # (nil) + (0x113c2, 0x113c2,), # (nil) + (0x113c5, 0x113c5,), # (nil) + (0x113c7, 0x113c9,), # (nil) + (0x113ce, 0x113cf,), # (nil) + (0x113d2, 0x113d2,), # (nil) + (0x113e1, 0x113e2,), # (nil) + (0x11438, 0x1143f,), # Newa Vowel Sign U ..Newa Vowel Sign Ai + (0x11442, 0x11444,), # Newa Sign Virama ..Newa Sign Anusvara + (0x11446, 0x11446,), # Newa Sign Nukta + (0x1145e, 0x1145e,), # Newa Sandhi Mark + (0x114b0, 0x114b0,), # Tirhuta Vowel Sign Aa + (0x114b3, 0x114b8,), # Tirhuta Vowel Sign U ..Tirhuta Vowel Sign Vocal + (0x114ba, 0x114ba,), # Tirhuta Vowel Sign Short E + (0x114bd, 0x114bd,), # Tirhuta Vowel Sign Short O + (0x114bf, 0x114c0,), # Tirhuta Sign Candrabindu..Tirhuta Sign Anusvara + (0x114c2, 0x114c3,), # Tirhuta Sign Virama ..Tirhuta Sign Nukta + (0x115af, 0x115af,), # Siddham Vowel Sign Aa + (0x115b2, 0x115b5,), # Siddham Vowel Sign U ..Siddham Vowel Sign Vocal + (0x115bc, 0x115bd,), # Siddham Sign Candrabindu..Siddham Sign Anusvara + (0x115bf, 0x115c0,), # Siddham Sign Virama ..Siddham Sign Nukta + (0x115dc, 0x115dd,), # Siddham Vowel Sign Alter..Siddham Vowel Sign Alter + (0x11633, 0x1163a,), # Modi Vowel Sign U ..Modi Vowel Sign Ai + (0x1163d, 0x1163d,), # Modi Sign Anusvara + (0x1163f, 0x11640,), # Modi Sign Virama ..Modi Sign Ardhacandra + (0x116ab, 0x116ab,), # Takri Sign Anusvara + (0x116ad, 0x116ad,), # Takri Vowel Sign Aa + (0x116b0, 0x116b7,), # Takri Vowel Sign U ..Takri Sign Nukta + (0x1171d, 0x1171d,), # Ahom Consonant Sign Medial La + (0x1171f, 0x1171f,), # Ahom Consonant Sign Medial Ligating Ra + (0x11722, 0x11725,), # Ahom Vowel Sign I ..Ahom Vowel Sign Uu + (0x11727, 0x1172b,), # Ahom Vowel Sign Aw ..Ahom Sign Killer + (0x1182f, 0x11837,), # Dogra Vowel Sign U ..Dogra Sign Anusvara + (0x11839, 0x1183a,), # Dogra Sign Virama ..Dogra Sign Nukta + (0x11930, 0x11930,), # Dives Akuru Vowel Sign Aa + (0x1193b, 0x1193d,), # Dives Akuru Sign Anusvar..Dives Akuru Sign Halanta + (0x11943, 0x11943,), # Dives Akuru Sign Nukta + (0x119d4, 0x119d7,), # Nandinagari Vowel Sign U..Nandinagari Vowel Sign V + (0x119da, 0x119db,), # Nandinagari Vowel Sign E..Nandinagari Vowel Sign A + (0x119e0, 0x119e0,), # Nandinagari Sign Virama + (0x11a01, 0x11a0a,), # Zanabazar Square Vowel S..Zanabazar Square Vowel L + (0x11a33, 0x11a38,), # Zanabazar Square Final C..Zanabazar Square Sign An + (0x11a3b, 0x11a3e,), # Zanabazar Square Cluster..Zanabazar Square Cluster + (0x11a51, 0x11a56,), # Soyombo Vowel Sign I ..Soyombo Vowel Sign Oe + (0x11a59, 0x11a5b,), # Soyombo Vowel Sign Vocal..Soyombo Vowel Length Mar + (0x11a8a, 0x11a96,), # Soyombo Final Consonant ..Soyombo Sign Anusvara + (0x11a98, 0x11a98,), # Soyombo Gemination Mark + (0x11b60, 0x11b60,), # (nil) + (0x11b62, 0x11b64,), # (nil) + (0x11b66, 0x11b66,), # (nil) + (0x11c30, 0x11c36,), # Bhaiksuki Vowel Sign I ..Bhaiksuki Vowel Sign Voc + (0x11c38, 0x11c3d,), # Bhaiksuki Vowel Sign E ..Bhaiksuki Sign Anusvara + (0x11c3f, 0x11c3f,), # Bhaiksuki Sign Virama + (0x11c92, 0x11ca7,), # Marchen Subjoined Letter..Marchen Subjoined Letter + (0x11caa, 0x11cb0,), # Marchen Subjoined Letter..Marchen Vowel Sign Aa + (0x11cb2, 0x11cb3,), # Marchen Vowel Sign U ..Marchen Vowel Sign E + (0x11cb5, 0x11cb6,), # Marchen Sign Anusvara ..Marchen Sign Candrabindu + (0x11d31, 0x11d36,), # Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign + (0x11d3a, 0x11d3a,), # Masaram Gondi Vowel Sign E + (0x11d3c, 0x11d3d,), # Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign + (0x11d3f, 0x11d45,), # Masaram Gondi Vowel Sign..Masaram Gondi Virama + (0x11d47, 0x11d47,), # Masaram Gondi Ra-kara + (0x11d90, 0x11d91,), # Gunjala Gondi Vowel Sign..Gunjala Gondi Vowel Sign + (0x11d95, 0x11d95,), # Gunjala Gondi Sign Anusvara + (0x11d97, 0x11d97,), # Gunjala Gondi Virama + (0x11ef3, 0x11ef4,), # Makasar Vowel Sign I ..Makasar Vowel Sign U + (0x11f00, 0x11f01,), # Kawi Sign Candrabindu ..Kawi Sign Anusvara + (0x11f36, 0x11f3a,), # Kawi Vowel Sign I ..Kawi Vowel Sign Vocalic + (0x11f40, 0x11f41,), # Kawi Vowel Sign Eu ..Kawi Sign Killer + (0x11f5a, 0x11f5a,), # (nil) + (0x13440, 0x13440,), # Egyptian Hieroglyph Mirror Horizontally + (0x13447, 0x13455,), # Egyptian Hieroglyph Modi..Egyptian Hieroglyph Modi + (0x1611e, 0x16129,), # (nil) + (0x1612d, 0x1612f,), # (nil) + (0x16af0, 0x16af4,), # Bassa Vah Combining High..Bassa Vah Combining High + (0x16b30, 0x16b36,), # Pahawh Hmong Mark Cim Tu..Pahawh Hmong Mark Cim Ta + (0x16f4f, 0x16f4f,), # Miao Sign Consonant Modifier Bar + (0x16f8f, 0x16f92,), # Miao Tone Right ..Miao Tone Below + (0x16fe4, 0x16fe4,), # Khitan Small Script Filler + (0x16ff0, 0x16ff1,), # Vietnamese Alternate Rea..Vietnamese Alternate Rea + (0x1bc9d, 0x1bc9e,), # Duployan Thick Letter Se..Duployan Double Mark + (0x1cf00, 0x1cf2d,), # Znamenny Combining Mark ..Znamenny Combining Mark + (0x1cf30, 0x1cf46,), # Znamenny Combining Tonal..Znamenny Priznak Modifie + (0x1d165, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d16d, 0x1d172,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d17b, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical + (0x1da00, 0x1da36,), # Signwriting Head Rim ..Signwriting Air Sucking + (0x1da3b, 0x1da6c,), # Signwriting Mouth Closed..Signwriting Excitement + (0x1da75, 0x1da75,), # Signwriting Upper Body Tilting From Hip Joints + (0x1da84, 0x1da84,), # Signwriting Location Head Neck + (0x1da9b, 0x1da9f,), # Signwriting Fill Modifie..Signwriting Fill Modifie + (0x1daa1, 0x1daaf,), # Signwriting Rotation Mod..Signwriting Rotation Mod + (0x1e000, 0x1e006,), # Combining Glagolitic Let..Combining Glagolitic Let + (0x1e008, 0x1e018,), # Combining Glagolitic Let..Combining Glagolitic Let + (0x1e01b, 0x1e021,), # Combining Glagolitic Let..Combining Glagolitic Let + (0x1e023, 0x1e024,), # Combining Glagolitic Let..Combining Glagolitic Let + (0x1e026, 0x1e02a,), # Combining Glagolitic Let..Combining Glagolitic Let + (0x1e08f, 0x1e08f,), # Combining Cyrillic Small Letter Byelorussian-ukr + (0x1e130, 0x1e136,), # Nyiakeng Puachue Hmong T..Nyiakeng Puachue Hmong T + (0x1e2ae, 0x1e2ae,), # Toto Sign Rising Tone + (0x1e2ec, 0x1e2ef,), # Wancho Tone Tup ..Wancho Tone Koini + (0x1e4ec, 0x1e4ef,), # Nag Mundari Sign Muhor ..Nag Mundari Sign Sutuh + (0x1e5ee, 0x1e5ef,), # (nil) + (0x1e6e3, 0x1e6e3,), # (nil) + (0x1e6e6, 0x1e6e6,), # (nil) + (0x1e6ee, 0x1e6ef,), # (nil) + (0x1e6f5, 0x1e6f5,), # (nil) + (0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining + (0x1e944, 0x1e94a,), # Adlam Alif Lengthener ..Adlam Nukta + (0x1f3fb, 0x1f3ff,), # Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri + (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag + (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 +) diff --git a/contrib/python/wcwidth/py3/wcwidth/table_vs15.py b/contrib/python/wcwidth/py3/wcwidth/table_vs15.py deleted file mode 100644 index a5ede6f961e..00000000000 --- a/contrib/python/wcwidth/py3/wcwidth/table_vs15.py +++ /dev/null @@ -1,103 +0,0 @@ -""" -Exports VS15_WIDE_TO_NARROW table keyed by supporting unicode version level. - -This code generated by wcwidth/bin/update-tables.py on 2024-02-14 19:59:22 UTC. -""" -VS15_WIDE_TO_NARROW = { - '9.0.0': ( - # Source: 9.0.0 - # Date: 2023-02-01, 02:22:54 GMT - # - (0x0231a, 0x0231b,), # Watch ..Hourglass - (0x023e9, 0x023ec,), # Black Right-pointing Dou..Black Down-pointing Doub - (0x023f0, 0x023f0,), # Alarm Clock - (0x023f3, 0x023f3,), # Hourglass With Flowing Sand - (0x025fd, 0x025fe,), # White Medium Small Squar..Black Medium Small Squar - (0x02614, 0x02615,), # Umbrella With Rain Drops..Hot Beverage - (0x02648, 0x02653,), # Aries ..Pisces - (0x0267f, 0x0267f,), # Wheelchair Symbol - (0x02693, 0x02693,), # Anchor - (0x026a1, 0x026a1,), # High Voltage Sign - (0x026aa, 0x026ab,), # Medium White Circle ..Medium Black Circle - (0x026bd, 0x026be,), # Soccer Ball ..Baseball - (0x026c4, 0x026c5,), # Snowman Without Snow ..Sun Behind Cloud - (0x026ce, 0x026ce,), # Ophiuchus - (0x026d4, 0x026d4,), # No Entry - (0x026ea, 0x026ea,), # Church - (0x026f2, 0x026f3,), # Fountain ..Flag In Hole - (0x026f5, 0x026f5,), # Sailboat - (0x026fa, 0x026fa,), # Tent - (0x026fd, 0x026fd,), # Fuel Pump - (0x02705, 0x02705,), # White Heavy Check Mark - (0x0270a, 0x0270b,), # Raised Fist ..Raised Hand - (0x02728, 0x02728,), # Sparkles - (0x0274c, 0x0274c,), # Cross Mark - (0x0274e, 0x0274e,), # Negative Squared Cross Mark - (0x02753, 0x02755,), # Black Question Mark Orna..White Exclamation Mark O - (0x02757, 0x02757,), # Heavy Exclamation Mark Symbol - (0x02795, 0x02797,), # Heavy Plus Sign ..Heavy Division Sign - (0x027b0, 0x027b0,), # Curly Loop - (0x027bf, 0x027bf,), # Double Curly Loop - (0x02b1b, 0x02b1c,), # Black Large Square ..White Large Square - (0x02b50, 0x02b50,), # White Medium Star - (0x02b55, 0x02b55,), # Heavy Large Circle - (0x03030, 0x03030,), # Wavy Dash - (0x0303d, 0x0303d,), # Part Alternation Mark - (0x03297, 0x03297,), # Circled Ideograph Congratulation - (0x03299, 0x03299,), # Circled Ideograph Secret - (0x1f004, 0x1f004,), # Mahjong Tile Red Dragon - (0x1f202, 0x1f202,), # Squared Katakana Sa - (0x1f21a, 0x1f21a,), # Squared Cjk Unified Ideograph-7121 - (0x1f22f, 0x1f22f,), # Squared Cjk Unified Ideograph-6307 - (0x1f237, 0x1f237,), # Squared Cjk Unified Ideograph-6708 - (0x1f30d, 0x1f30f,), # Earth Globe Europe-afric..Earth Globe Asia-austral - (0x1f315, 0x1f315,), # Full Moon Symbol - (0x1f31c, 0x1f31c,), # Last Quarter Moon With Face - (0x1f378, 0x1f378,), # Cocktail Glass - (0x1f393, 0x1f393,), # Graduation Cap - (0x1f3a7, 0x1f3a7,), # Headphone - (0x1f3ac, 0x1f3ae,), # Clapper Board ..Video Game - (0x1f3c2, 0x1f3c2,), # Snowboarder - (0x1f3c4, 0x1f3c4,), # Surfer - (0x1f3c6, 0x1f3c6,), # Trophy - (0x1f3ca, 0x1f3ca,), # Swimmer - (0x1f3e0, 0x1f3e0,), # House Building - (0x1f3ed, 0x1f3ed,), # Factory - (0x1f408, 0x1f408,), # Cat - (0x1f415, 0x1f415,), # Dog - (0x1f41f, 0x1f41f,), # Fish - (0x1f426, 0x1f426,), # Bird - (0x1f442, 0x1f442,), # Ear - (0x1f446, 0x1f449,), # White Up Pointing Backha..White Right Pointing Bac - (0x1f44d, 0x1f44e,), # Thumbs Up Sign ..Thumbs Down Sign - (0x1f453, 0x1f453,), # Eyeglasses - (0x1f46a, 0x1f46a,), # Family - (0x1f47d, 0x1f47d,), # Extraterrestrial Alien - (0x1f4a3, 0x1f4a3,), # Bomb - (0x1f4b0, 0x1f4b0,), # Money Bag - (0x1f4b3, 0x1f4b3,), # Credit Card - (0x1f4bb, 0x1f4bb,), # Personal Computer - (0x1f4bf, 0x1f4bf,), # Optical Disc - (0x1f4cb, 0x1f4cb,), # Clipboard - (0x1f4da, 0x1f4da,), # Books - (0x1f4df, 0x1f4df,), # Pager - (0x1f4e4, 0x1f4e6,), # Outbox Tray ..Package - (0x1f4ea, 0x1f4ed,), # Closed Mailbox With Lowe..Open Mailbox With Lowere - (0x1f4f7, 0x1f4f7,), # Camera - (0x1f4f9, 0x1f4fb,), # Video Camera ..Radio - (0x1f508, 0x1f508,), # Speaker - (0x1f50d, 0x1f50d,), # Left-pointing Magnifying Glass - (0x1f512, 0x1f513,), # Lock ..Open Lock - (0x1f550, 0x1f567,), # Clock Face One Oclock ..Clock Face Twelve-thirty - (0x1f610, 0x1f610,), # Neutral Face - (0x1f687, 0x1f687,), # Metro - (0x1f68d, 0x1f68d,), # Oncoming Bus - (0x1f691, 0x1f691,), # Ambulance - (0x1f694, 0x1f694,), # Oncoming Police Car - (0x1f698, 0x1f698,), # Oncoming Automobile - (0x1f6ad, 0x1f6ad,), # No Smoking Symbol - (0x1f6b2, 0x1f6b2,), # Bicycle - (0x1f6b9, 0x1f6ba,), # Mens Symbol ..Womens Symbol - (0x1f6bc, 0x1f6bc,), # Baby Symbol - ), -} diff --git a/contrib/python/wcwidth/py3/wcwidth/table_vs16.py b/contrib/python/wcwidth/py3/wcwidth/table_vs16.py index 1ca25c66368..70e4a7373ff 100644 --- a/contrib/python/wcwidth/py3/wcwidth/table_vs16.py +++ b/contrib/python/wcwidth/py3/wcwidth/table_vs16.py @@ -3,6 +3,7 @@ Exports VS16_NARROW_TO_WIDE table keyed by supporting unicode version level. This code generated by wcwidth/bin/update-tables.py on 2025-09-15 16:57:50 UTC. """ +# pylint: disable=duplicate-code VS16_NARROW_TO_WIDE = { '9.0.0': ( # Source: 9.0.0 diff --git a/contrib/python/wcwidth/py3/wcwidth/table_wide.py b/contrib/python/wcwidth/py3/wcwidth/table_wide.py index 5139b1f0d3b..3f422d48d33 100644 --- a/contrib/python/wcwidth/py3/wcwidth/table_wide.py +++ b/contrib/python/wcwidth/py3/wcwidth/table_wide.py @@ -1,8 +1,9 @@ """ Exports WIDE_EASTASIAN table keyed by supporting unicode version level. -This code generated by wcwidth/bin/update-tables.py on 2025-09-19 15:55:08 UTC. +This code generated by wcwidth/bin/update-tables.py on 2026-01-19 22:42:32 UTC. """ +# pylint: disable=duplicate-code WIDE_EASTASIAN = { '4.1.0': ( # Source: EastAsianWidth-4.1.0.txt @@ -20,7 +21,8 @@ WIDE_EASTASIAN = { (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312c,), # Bopomofo Letter B ..Bopomofo Letter Gn - (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae + (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I + (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae (0x03190, 0x031b7,), # Ideographic Annotation L..Bopomofo Final Letter H (0x031c0, 0x031cf,), # Cjk Stroke T ..Cjk Stroke N (0x031f0, 0x0321e,), # Katakana Letter Small Ku..Parenthesized Korean Cha @@ -59,7 +61,8 @@ WIDE_EASTASIAN = { (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312c,), # Bopomofo Letter B ..Bopomofo Letter Gn - (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae + (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I + (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae (0x03190, 0x031b7,), # Ideographic Annotation L..Bopomofo Final Letter H (0x031c0, 0x031cf,), # Cjk Stroke T ..Cjk Stroke N (0x031f0, 0x0321e,), # Katakana Letter Small Ku..Parenthesized Korean Cha @@ -98,7 +101,8 @@ WIDE_EASTASIAN = { (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312d,), # Bopomofo Letter B ..Bopomofo Letter Ih - (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae + (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I + (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae (0x03190, 0x031b7,), # Ideographic Annotation L..Bopomofo Final Letter H (0x031c0, 0x031e3,), # Cjk Stroke T ..Cjk Stroke Q (0x031f0, 0x0321e,), # Katakana Letter Small Ku..Parenthesized Korean Cha @@ -136,7 +140,8 @@ WIDE_EASTASIAN = { (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312d,), # Bopomofo Letter B ..Bopomofo Letter Ih - (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae + (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I + (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae (0x03190, 0x031b7,), # Ideographic Annotation L..Bopomofo Final Letter H (0x031c0, 0x031e3,), # Cjk Stroke T ..Cjk Stroke Q (0x031f0, 0x0321e,), # Katakana Letter Small Ku..Parenthesized Korean Cha @@ -175,7 +180,8 @@ WIDE_EASTASIAN = { (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312d,), # Bopomofo Letter B ..Bopomofo Letter Ih - (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae + (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I + (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae (0x03190, 0x031ba,), # Ideographic Annotation L..Bopomofo Letter Zy (0x031c0, 0x031e3,), # Cjk Stroke T ..Cjk Stroke Q (0x031f0, 0x0321e,), # Katakana Letter Small Ku..Parenthesized Korean Cha @@ -216,7 +222,8 @@ WIDE_EASTASIAN = { (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312d,), # Bopomofo Letter B ..Bopomofo Letter Ih - (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae + (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I + (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae (0x03190, 0x031ba,), # Ideographic Annotation L..Bopomofo Letter Zy (0x031c0, 0x031e3,), # Cjk Stroke T ..Cjk Stroke Q (0x031f0, 0x0321e,), # Katakana Letter Small Ku..Parenthesized Korean Cha @@ -257,7 +264,8 @@ WIDE_EASTASIAN = { (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312d,), # Bopomofo Letter B ..Bopomofo Letter Ih - (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae + (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I + (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae (0x03190, 0x031ba,), # Ideographic Annotation L..Bopomofo Letter Zy (0x031c0, 0x031e3,), # Cjk Stroke T ..Cjk Stroke Q (0x031f0, 0x0321e,), # Katakana Letter Small Ku..Parenthesized Korean Cha @@ -298,7 +306,8 @@ WIDE_EASTASIAN = { (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312d,), # Bopomofo Letter B ..Bopomofo Letter Ih - (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae + (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I + (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae (0x03190, 0x031ba,), # Ideographic Annotation L..Bopomofo Letter Zy (0x031c0, 0x031e3,), # Cjk Stroke T ..Cjk Stroke Q (0x031f0, 0x0321e,), # Katakana Letter Small Ku..Parenthesized Korean Cha @@ -339,7 +348,8 @@ WIDE_EASTASIAN = { (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312d,), # Bopomofo Letter B ..Bopomofo Letter Ih - (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae + (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I + (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae (0x03190, 0x031ba,), # Ideographic Annotation L..Bopomofo Letter Zy (0x031c0, 0x031e3,), # Cjk Stroke T ..Cjk Stroke Q (0x031f0, 0x0321e,), # Katakana Letter Small Ku..Parenthesized Korean Cha @@ -380,7 +390,8 @@ WIDE_EASTASIAN = { (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312d,), # Bopomofo Letter B ..Bopomofo Letter Ih - (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae + (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I + (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae (0x03190, 0x031ba,), # Ideographic Annotation L..Bopomofo Letter Zy (0x031c0, 0x031e3,), # Cjk Stroke T ..Cjk Stroke Q (0x031f0, 0x0321e,), # Katakana Letter Small Ku..Parenthesized Korean Cha @@ -454,7 +465,8 @@ WIDE_EASTASIAN = { (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312d,), # Bopomofo Letter B ..Bopomofo Letter Ih - (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae + (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I + (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae (0x03190, 0x031ba,), # Ideographic Annotation L..Bopomofo Letter Zy (0x031c0, 0x031e3,), # Cjk Stroke T ..Cjk Stroke Q (0x031f0, 0x0321e,), # Katakana Letter Small Ku..Parenthesized Korean Cha @@ -567,7 +579,8 @@ WIDE_EASTASIAN = { (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312e,), # Bopomofo Letter B ..Bopomofo Letter O With D - (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae + (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I + (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae (0x03190, 0x031ba,), # Ideographic Annotation L..Bopomofo Letter Zy (0x031c0, 0x031e3,), # Cjk Stroke T ..Cjk Stroke Q (0x031f0, 0x0321e,), # Katakana Letter Small Ku..Parenthesized Korean Cha @@ -680,7 +693,8 @@ WIDE_EASTASIAN = { (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312f,), # Bopomofo Letter B ..Bopomofo Letter Nn - (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae + (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I + (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae (0x03190, 0x031ba,), # Ideographic Annotation L..Bopomofo Letter Zy (0x031c0, 0x031e3,), # Cjk Stroke T ..Cjk Stroke Q (0x031f0, 0x0321e,), # Katakana Letter Small Ku..Parenthesized Korean Cha @@ -795,7 +809,8 @@ WIDE_EASTASIAN = { (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312f,), # Bopomofo Letter B ..Bopomofo Letter Nn - (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae + (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I + (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae (0x03190, 0x031ba,), # Ideographic Annotation L..Bopomofo Letter Zy (0x031c0, 0x031e3,), # Cjk Stroke T ..Cjk Stroke Q (0x031f0, 0x0321e,), # Katakana Letter Small Ku..Parenthesized Korean Cha @@ -916,7 +931,8 @@ WIDE_EASTASIAN = { (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312f,), # Bopomofo Letter B ..Bopomofo Letter Nn - (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae + (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I + (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae (0x03190, 0x031ba,), # Ideographic Annotation L..Bopomofo Letter Zy (0x031c0, 0x031e3,), # Cjk Stroke T ..Cjk Stroke Q (0x031f0, 0x0321e,), # Katakana Letter Small Ku..Parenthesized Korean Cha @@ -1036,7 +1052,8 @@ WIDE_EASTASIAN = { (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312f,), # Bopomofo Letter B ..Bopomofo Letter Nn - (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae + (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I + (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae (0x03190, 0x031e3,), # Ideographic Annotation L..Cjk Stroke Q (0x031f0, 0x0321e,), # Katakana Letter Small Ku..Parenthesized Korean Cha (0x03220, 0x03247,), # Parenthesized Ideograph ..Circled Ideograph Koto @@ -1158,7 +1175,8 @@ WIDE_EASTASIAN = { (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312f,), # Bopomofo Letter B ..Bopomofo Letter Nn - (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae + (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I + (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae (0x03190, 0x031e3,), # Ideographic Annotation L..Cjk Stroke Q (0x031f0, 0x0321e,), # Katakana Letter Small Ku..Parenthesized Korean Cha (0x03220, 0x03247,), # Parenthesized Ideograph ..Circled Ideograph Koto @@ -1285,7 +1303,8 @@ WIDE_EASTASIAN = { (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312f,), # Bopomofo Letter B ..Bopomofo Letter Nn - (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae + (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I + (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae (0x03190, 0x031e3,), # Ideographic Annotation L..Cjk Stroke Q (0x031f0, 0x0321e,), # Katakana Letter Small Ku..Parenthesized Korean Cha (0x03220, 0x03247,), # Parenthesized Ideograph ..Circled Ideograph Koto @@ -1411,7 +1430,8 @@ WIDE_EASTASIAN = { (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312f,), # Bopomofo Letter B ..Bopomofo Letter Nn - (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae + (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I + (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae (0x03190, 0x031e3,), # Ideographic Annotation L..Cjk Stroke Q (0x031ef, 0x0321e,), # Ideographic Description ..Parenthesized Korean Cha (0x03220, 0x03247,), # Parenthesized Ideograph ..Circled Ideograph Koto @@ -1539,7 +1559,8 @@ WIDE_EASTASIAN = { (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312f,), # Bopomofo Letter B ..Bopomofo Letter Nn - (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae + (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I + (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae (0x03190, 0x031e5,), # Ideographic Annotation L..(nil) (0x031ef, 0x0321e,), # Ideographic Description ..Parenthesized Korean Cha (0x03220, 0x03247,), # Parenthesized Ideograph ..Circled Ideograph Koto @@ -1667,7 +1688,8 @@ WIDE_EASTASIAN = { (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312f,), # Bopomofo Letter B ..Bopomofo Letter Nn - (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae + (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I + (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae (0x03190, 0x031e5,), # Ideographic Annotation L..(nil) (0x031ef, 0x0321e,), # Ideographic Description ..Parenthesized Korean Cha (0x03220, 0x03247,), # Parenthesized Ideograph ..Circled Ideograph Koto diff --git a/contrib/python/wcwidth/py3/wcwidth/table_zero.py b/contrib/python/wcwidth/py3/wcwidth/table_zero.py index ef99d09e9f4..50ac7fbfd09 100644 --- a/contrib/python/wcwidth/py3/wcwidth/table_zero.py +++ b/contrib/python/wcwidth/py3/wcwidth/table_zero.py @@ -1,14 +1,18 @@ """ Exports ZERO_WIDTH table keyed by supporting unicode version level. -This code generated by wcwidth/bin/update-tables.py on 2025-09-18 07:49:05 UTC. +This code generated by wcwidth/bin/update-tables.py on 2026-01-20 16:49:11 UTC. """ +# pylint: disable=duplicate-code ZERO_WIDTH = { '4.1.0': ( # Source: DerivedGeneralCategory-4.1.0.txt # Date: 2005-02-26, 02:35:50 GMT [MD] # - (0x00000, 0x00000,), # (nil) + (0x00000, 0x00008,), # (nil) + (0x0000e, 0x0001f,), # (nil) + (0x0007f, 0x00084,), # (nil) + (0x00086, 0x0009f,), # (nil) (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00486,), # Combining Cyrillic Titlo..Combining Cyrillic Psili (0x00488, 0x00489,), # Combining Cyrillic Hundr..Combining Cyrillic Milli @@ -124,21 +128,24 @@ ZERO_WIDTH = { (0x01dc0, 0x01dc3,), # Combining Dotted Grave A..Combining Suspension Mar (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override - (0x02060, 0x02063,), # Word Joiner ..Invisible Separator - (0x0206a, 0x0206f,), # Inhibit Symmetric Swappi..Nominal Digit Shapes + (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes (0x020d0, 0x020eb,), # Combining Left Harpoon A..Combining Long Double So (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag + (0x03164, 0x03164,), # Hangul Filler (0x0a802, 0x0a802,), # Syloti Nagri Sign Dvisvara (0x0a806, 0x0a806,), # Syloti Nagri Sign Hasanta (0x0a80b, 0x0a80b,), # Syloti Nagri Sign Anusvara (0x0a823, 0x0a827,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign - (0x0d7b0, 0x0d7ff,), # Hangul Jungseong O-yeo ..(nil) + (0x0d7b0, 0x0dfff,), # Hangul Jungseong O-yeo ..(nil) (0x0fb1e, 0x0fb1e,), # Hebrew Point Judeo-spanish Varika + (0x0fdd0, 0x0fdef,), # (nil) (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe23,), # Combining Ligature Left ..Combining Double Tilde R (0x0feff, 0x0feff,), # Zero Width No-break Space - (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T + (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler + (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T + (0x0fffe, 0x0ffff,), # (nil) (0x10a01, 0x10a03,), # Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo (0x10a05, 0x10a06,), # Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O (0x10a0c, 0x10a0f,), # Kharoshthi Vowel Length ..Kharoshthi Sign Visarga @@ -149,15 +156,31 @@ ZERO_WIDTH = { (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical - (0xe0001, 0xe0001,), # Language Tag - (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag - (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 + (0x1fffe, 0x1ffff,), # (nil) + (0x2fffe, 0x2ffff,), # (nil) + (0x3fffe, 0x3ffff,), # (nil) + (0x4fffe, 0x4ffff,), # (nil) + (0x5fffe, 0x5ffff,), # (nil) + (0x6fffe, 0x6ffff,), # (nil) + (0x7fffe, 0x7ffff,), # (nil) + (0x8fffe, 0x8ffff,), # (nil) + (0x9fffe, 0x9ffff,), # (nil) + (0xafffe, 0xaffff,), # (nil) + (0xbfffe, 0xbffff,), # (nil) + (0xcfffe, 0xcffff,), # (nil) + (0xdfffe, 0xe0fff,), # (nil) + (0xefffe, 0xeffff,), # (nil) + (0xffffe, 0xfffff,), # (nil) + (0x10fffe, 0x10ffff,), # (nil) ), '5.0.0': ( # Source: DerivedGeneralCategory-5.0.0.txt # Date: 2006-02-27, 23:41:27 GMT [MD] # - (0x00000, 0x00000,), # (nil) + (0x00000, 0x00008,), # (nil) + (0x0000e, 0x0001f,), # (nil) + (0x0007f, 0x00084,), # (nil) + (0x00086, 0x0009f,), # (nil) (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00486,), # Combining Cyrillic Titlo..Combining Cyrillic Psili (0x00488, 0x00489,), # Combining Cyrillic Hundr..Combining Cyrillic Milli @@ -278,21 +301,24 @@ ZERO_WIDTH = { (0x01dfe, 0x01dff,), # Combining Left Arrowhead..Combining Right Arrowhea (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override - (0x02060, 0x02063,), # Word Joiner ..Invisible Separator - (0x0206a, 0x0206f,), # Inhibit Symmetric Swappi..Nominal Digit Shapes + (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes (0x020d0, 0x020ef,), # Combining Left Harpoon A..Combining Right Arrow Be (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag + (0x03164, 0x03164,), # Hangul Filler (0x0a802, 0x0a802,), # Syloti Nagri Sign Dvisvara (0x0a806, 0x0a806,), # Syloti Nagri Sign Hasanta (0x0a80b, 0x0a80b,), # Syloti Nagri Sign Anusvara (0x0a823, 0x0a827,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign - (0x0d7b0, 0x0d7ff,), # Hangul Jungseong O-yeo ..(nil) + (0x0d7b0, 0x0dfff,), # Hangul Jungseong O-yeo ..(nil) (0x0fb1e, 0x0fb1e,), # Hebrew Point Judeo-spanish Varika + (0x0fdd0, 0x0fdef,), # (nil) (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe23,), # Combining Ligature Left ..Combining Double Tilde R (0x0feff, 0x0feff,), # Zero Width No-break Space - (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T + (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler + (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T + (0x0fffe, 0x0ffff,), # (nil) (0x10a01, 0x10a03,), # Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo (0x10a05, 0x10a06,), # Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O (0x10a0c, 0x10a0f,), # Kharoshthi Vowel Length ..Kharoshthi Sign Visarga @@ -303,9 +329,22 @@ ZERO_WIDTH = { (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical - (0xe0001, 0xe0001,), # Language Tag - (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag - (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 + (0x1fffe, 0x1ffff,), # (nil) + (0x2fffe, 0x2ffff,), # (nil) + (0x3fffe, 0x3ffff,), # (nil) + (0x4fffe, 0x4ffff,), # (nil) + (0x5fffe, 0x5ffff,), # (nil) + (0x6fffe, 0x6ffff,), # (nil) + (0x7fffe, 0x7ffff,), # (nil) + (0x8fffe, 0x8ffff,), # (nil) + (0x9fffe, 0x9ffff,), # (nil) + (0xafffe, 0xaffff,), # (nil) + (0xbfffe, 0xbffff,), # (nil) + (0xcfffe, 0xcffff,), # (nil) + (0xdfffe, 0xe0fff,), # (nil) + (0xefffe, 0xeffff,), # (nil) + (0xffffe, 0xfffff,), # (nil) + (0x10fffe, 0x10ffff,), # (nil) ), '5.1.0': ( # Source: DerivedGeneralCategory-5.1.0.txt @@ -444,12 +483,12 @@ ZERO_WIDTH = { (0x01dfe, 0x01dff,), # Combining Left Arrowhead..Combining Right Arrowhea (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override - (0x02060, 0x02064,), # Word Joiner ..Invisible Plus - (0x0206a, 0x0206f,), # Inhibit Symmetric Swappi..Nominal Digit Shapes + (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag + (0x03164, 0x03164,), # Hangul Filler (0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous (0x0a67c, 0x0a67d,), # Combining Cyrillic Kavyk..Combining Cyrillic Payer (0x0a802, 0x0a802,), # Syloti Nagri Sign Dvisvara @@ -468,7 +507,8 @@ ZERO_WIDTH = { (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe26,), # Combining Ligature Left ..Combining Conjoining Mac (0x0feff, 0x0feff,), # Zero Width No-break Space - (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T + (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler + (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke (0x10a01, 0x10a03,), # Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo (0x10a05, 0x10a06,), # Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O @@ -480,9 +520,7 @@ ZERO_WIDTH = { (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical - (0xe0001, 0xe0001,), # Language Tag - (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag - (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 + (0xe0000, 0xe0fff,), # (nil) ), '5.2.0': ( # Source: DerivedGeneralCategory-5.2.0.txt @@ -633,13 +671,13 @@ ZERO_WIDTH = { (0x01dfd, 0x01dff,), # Combining Almost Equal T..Combining Right Arrowhea (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override - (0x02060, 0x02064,), # Word Joiner ..Invisible Plus - (0x0206a, 0x0206f,), # Inhibit Symmetric Swappi..Nominal Digit Shapes + (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above (0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag + (0x03164, 0x03164,), # Hangul Filler (0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous (0x0a67c, 0x0a67d,), # Combining Cyrillic Kavyk..Combining Cyrillic Payer (0x0a6f0, 0x0a6f1,), # Bamum Combining Mark Koq..Bamum Combining Mark Tuk @@ -670,7 +708,8 @@ ZERO_WIDTH = { (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe26,), # Combining Ligature Left ..Combining Conjoining Mac (0x0feff, 0x0feff,), # Zero Width No-break Space - (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T + (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler + (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke (0x10a01, 0x10a03,), # Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo (0x10a05, 0x10a06,), # Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O @@ -685,9 +724,7 @@ ZERO_WIDTH = { (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical - (0xe0001, 0xe0001,), # Language Tag - (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag - (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 + (0xe0000, 0xe0fff,), # (nil) ), '6.0.0': ( # Source: DerivedGeneralCategory-6.0.0.txt @@ -841,14 +878,14 @@ ZERO_WIDTH = { (0x01dfc, 0x01dff,), # Combining Double Inverte..Combining Right Arrowhea (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override - (0x02060, 0x02064,), # Word Joiner ..Invisible Plus - (0x0206a, 0x0206f,), # Inhibit Symmetric Swappi..Nominal Digit Shapes + (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above (0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu (0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag + (0x03164, 0x03164,), # Hangul Filler (0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous (0x0a67c, 0x0a67d,), # Combining Cyrillic Kavyk..Combining Cyrillic Payer (0x0a6f0, 0x0a6f1,), # Bamum Combining Mark Koq..Bamum Combining Mark Tuk @@ -879,7 +916,8 @@ ZERO_WIDTH = { (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe26,), # Combining Ligature Left ..Combining Conjoining Mac (0x0feff, 0x0feff,), # Zero Width No-break Space - (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T + (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler + (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke (0x10a01, 0x10a03,), # Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo (0x10a05, 0x10a06,), # Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O @@ -896,9 +934,7 @@ ZERO_WIDTH = { (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical - (0xe0001, 0xe0001,), # Language Tag - (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag - (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 + (0xe0000, 0xe0fff,), # (nil) ), '6.1.0': ( # Source: DerivedGeneralCategory-6.1.0.txt @@ -1053,14 +1089,14 @@ ZERO_WIDTH = { (0x01dfc, 0x01dff,), # Combining Double Inverte..Combining Right Arrowhea (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override - (0x02060, 0x02064,), # Word Joiner ..Invisible Plus - (0x0206a, 0x0206f,), # Inhibit Symmetric Swappi..Nominal Digit Shapes + (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above (0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu (0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag + (0x03164, 0x03164,), # Hangul Filler (0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous (0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer (0x0a69f, 0x0a69f,), # Combining Cyrillic Letter Iotified E @@ -1094,7 +1130,8 @@ ZERO_WIDTH = { (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe26,), # Combining Ligature Left ..Combining Conjoining Mac (0x0feff, 0x0feff,), # Zero Width No-break Space - (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T + (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler + (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke (0x10a01, 0x10a03,), # Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo (0x10a05, 0x10a06,), # Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O @@ -1118,9 +1155,7 @@ ZERO_WIDTH = { (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical - (0xe0001, 0xe0001,), # Language Tag - (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag - (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 + (0xe0000, 0xe0fff,), # (nil) ), '6.2.0': ( # Source: DerivedGeneralCategory-6.2.0.txt @@ -1275,14 +1310,14 @@ ZERO_WIDTH = { (0x01dfc, 0x01dff,), # Combining Double Inverte..Combining Right Arrowhea (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override - (0x02060, 0x02064,), # Word Joiner ..Invisible Plus - (0x0206a, 0x0206f,), # Inhibit Symmetric Swappi..Nominal Digit Shapes + (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above (0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu (0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag + (0x03164, 0x03164,), # Hangul Filler (0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous (0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer (0x0a69f, 0x0a69f,), # Combining Cyrillic Letter Iotified E @@ -1316,7 +1351,8 @@ ZERO_WIDTH = { (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe26,), # Combining Ligature Left ..Combining Conjoining Mac (0x0feff, 0x0feff,), # Zero Width No-break Space - (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T + (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler + (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke (0x10a01, 0x10a03,), # Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo (0x10a05, 0x10a06,), # Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O @@ -1340,9 +1376,7 @@ ZERO_WIDTH = { (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical - (0xe0001, 0xe0001,), # Language Tag - (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag - (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 + (0xe0000, 0xe0fff,), # (nil) ), '6.3.0': ( # Source: DerivedGeneralCategory-6.3.0.txt @@ -1498,14 +1532,14 @@ ZERO_WIDTH = { (0x01dfc, 0x01dff,), # Combining Double Inverte..Combining Right Arrowhea (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override - (0x02060, 0x02064,), # Word Joiner ..Invisible Plus - (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes + (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above (0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu (0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag + (0x03164, 0x03164,), # Hangul Filler (0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous (0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer (0x0a69f, 0x0a69f,), # Combining Cyrillic Letter Iotified E @@ -1539,7 +1573,8 @@ ZERO_WIDTH = { (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe26,), # Combining Ligature Left ..Combining Conjoining Mac (0x0feff, 0x0feff,), # Zero Width No-break Space - (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T + (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler + (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke (0x10a01, 0x10a03,), # Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo (0x10a05, 0x10a06,), # Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O @@ -1563,9 +1598,7 @@ ZERO_WIDTH = { (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical - (0xe0001, 0xe0001,), # Language Tag - (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag - (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 + (0xe0000, 0xe0fff,), # (nil) ), '7.0.0': ( # Source: DerivedGeneralCategory-7.0.0.txt @@ -1722,14 +1755,14 @@ ZERO_WIDTH = { (0x01dfc, 0x01dff,), # Combining Double Inverte..Combining Right Arrowhea (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override - (0x02060, 0x02064,), # Word Joiner ..Invisible Plus - (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes + (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above (0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu (0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag + (0x03164, 0x03164,), # Hangul Filler (0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous (0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer (0x0a69f, 0x0a69f,), # Combining Cyrillic Letter Iotified E @@ -1764,7 +1797,8 @@ ZERO_WIDTH = { (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe2d,), # Combining Ligature Left ..Combining Conjoining Mac (0x0feff, 0x0feff,), # Zero Width No-break Space - (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T + (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler + (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke (0x102e0, 0x102e0,), # Coptic Epact Thousands Mark (0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let @@ -1812,9 +1846,7 @@ ZERO_WIDTH = { (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical (0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining - (0xe0001, 0xe0001,), # Language Tag - (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag - (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 + (0xe0000, 0xe0fff,), # (nil) ), '8.0.0': ( # Source: DerivedGeneralCategory-8.0.0.txt @@ -1969,14 +2001,14 @@ ZERO_WIDTH = { (0x01dfc, 0x01dff,), # Combining Double Inverte..Combining Right Arrowhea (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override - (0x02060, 0x02064,), # Word Joiner ..Invisible Plus - (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes + (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above (0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu (0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag + (0x03164, 0x03164,), # Hangul Filler (0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous (0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer (0x0a69e, 0x0a69f,), # Combining Cyrillic Lette..Combining Cyrillic Lette @@ -2011,7 +2043,8 @@ ZERO_WIDTH = { (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo (0x0feff, 0x0feff,), # Zero Width No-break Space - (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T + (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler + (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke (0x102e0, 0x102e0,), # Coptic Epact Thousands Mark (0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let @@ -2069,9 +2102,7 @@ ZERO_WIDTH = { (0x1daa1, 0x1daaf,), # Signwriting Rotation Mod..Signwriting Rotation Mod (0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining (0x1f3fb, 0x1f3ff,), # Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri - (0xe0001, 0xe0001,), # Language Tag - (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag - (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 + (0xe0000, 0xe0fff,), # (nil) ), '9.0.0': ( # Source: DerivedGeneralCategory-9.0.0.txt @@ -2085,16 +2116,14 @@ ZERO_WIDTH = { (0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot (0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot (0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan - (0x00600, 0x00605,), # Arabic Number Sign ..Arabic Number Mark Above (0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra (0x0061c, 0x0061c,), # Arabic Letter Mark (0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below (0x00670, 0x00670,), # Arabic Letter Superscript Alef - (0x006d6, 0x006dd,), # Arabic Small High Ligatu..Arabic End Of Ayah + (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen (0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda (0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon (0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem - (0x0070f, 0x0070f,), # Syriac Abbreviation Mark (0x00711, 0x00711,), # Syriac Letter Superscript Alaph (0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh (0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun @@ -2104,7 +2133,8 @@ ZERO_WIDTH = { (0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U (0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa (0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark - (0x008d4, 0x00903,), # Arabic Small High Word A..Devanagari Sign Visarga + (0x008d4, 0x008e1,), # Arabic Small High Word A..Arabic Small High Sign S + (0x008e3, 0x00903,), # Arabic Turned Damma Belo..Devanagari Sign Visarga (0x0093a, 0x0093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta (0x0093e, 0x0094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw (0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu @@ -2227,14 +2257,14 @@ ZERO_WIDTH = { (0x01dfb, 0x01dff,), # Combining Deletion Mark ..Combining Right Arrowhea (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override - (0x02060, 0x02064,), # Word Joiner ..Invisible Plus - (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes + (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above (0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu (0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag + (0x03164, 0x03164,), # Hangul Filler (0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous (0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer (0x0a69e, 0x0a69f,), # Combining Cyrillic Lette..Combining Cyrillic Lette @@ -2269,7 +2299,8 @@ ZERO_WIDTH = { (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo (0x0feff, 0x0feff,), # Zero Width No-break Space - (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T + (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler + (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke (0x102e0, 0x102e0,), # Coptic Epact Thousands Mark (0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let @@ -2283,7 +2314,6 @@ ZERO_WIDTH = { (0x11038, 0x11046,), # Brahmi Vowel Sign Aa ..Brahmi Virama (0x1107f, 0x11082,), # Brahmi Number Joiner ..Kaithi Sign Visarga (0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta - (0x110bd, 0x110bd,), # Kaithi Number Sign (0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga (0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa (0x11173, 0x11173,), # Mahajani Sign Nukta @@ -2339,9 +2369,7 @@ ZERO_WIDTH = { (0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining (0x1e944, 0x1e94a,), # Adlam Alif Lengthener ..Adlam Nukta (0x1f3fb, 0x1f3ff,), # Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri - (0xe0001, 0xe0001,), # Language Tag - (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag - (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 + (0xe0000, 0xe0fff,), # (nil) ), '10.0.0': ( # Source: DerivedGeneralCategory-10.0.0.txt @@ -2355,16 +2383,14 @@ ZERO_WIDTH = { (0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot (0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot (0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan - (0x00600, 0x00605,), # Arabic Number Sign ..Arabic Number Mark Above (0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra (0x0061c, 0x0061c,), # Arabic Letter Mark (0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below (0x00670, 0x00670,), # Arabic Letter Superscript Alef - (0x006d6, 0x006dd,), # Arabic Small High Ligatu..Arabic End Of Ayah + (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen (0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda (0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon (0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem - (0x0070f, 0x0070f,), # Syriac Abbreviation Mark (0x00711, 0x00711,), # Syriac Letter Superscript Alaph (0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh (0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun @@ -2374,7 +2400,8 @@ ZERO_WIDTH = { (0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U (0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa (0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark - (0x008d4, 0x00903,), # Arabic Small High Word A..Devanagari Sign Visarga + (0x008d4, 0x008e1,), # Arabic Small High Word A..Arabic Small High Sign S + (0x008e3, 0x00903,), # Arabic Turned Damma Belo..Devanagari Sign Visarga (0x0093a, 0x0093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta (0x0093e, 0x0094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw (0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu @@ -2499,14 +2526,14 @@ ZERO_WIDTH = { (0x01dfb, 0x01dff,), # Combining Deletion Mark ..Combining Right Arrowhea (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override - (0x02060, 0x02064,), # Word Joiner ..Invisible Plus - (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes + (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above (0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu (0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag + (0x03164, 0x03164,), # Hangul Filler (0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous (0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer (0x0a69e, 0x0a69f,), # Combining Cyrillic Lette..Combining Cyrillic Lette @@ -2541,7 +2568,8 @@ ZERO_WIDTH = { (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo (0x0feff, 0x0feff,), # Zero Width No-break Space - (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T + (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler + (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke (0x102e0, 0x102e0,), # Coptic Epact Thousands Mark (0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let @@ -2555,7 +2583,6 @@ ZERO_WIDTH = { (0x11038, 0x11046,), # Brahmi Vowel Sign Aa ..Brahmi Virama (0x1107f, 0x11082,), # Brahmi Number Joiner ..Kaithi Sign Visarga (0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta - (0x110bd, 0x110bd,), # Kaithi Number Sign (0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga (0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa (0x11173, 0x11173,), # Mahajani Sign Nukta @@ -2622,9 +2649,7 @@ ZERO_WIDTH = { (0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining (0x1e944, 0x1e94a,), # Adlam Alif Lengthener ..Adlam Nukta (0x1f3fb, 0x1f3ff,), # Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri - (0xe0001, 0xe0001,), # Language Tag - (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag - (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 + (0xe0000, 0xe0fff,), # (nil) ), '11.0.0': ( # Source: DerivedGeneralCategory-11.0.0.txt @@ -2638,16 +2663,14 @@ ZERO_WIDTH = { (0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot (0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot (0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan - (0x00600, 0x00605,), # Arabic Number Sign ..Arabic Number Mark Above (0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra (0x0061c, 0x0061c,), # Arabic Letter Mark (0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below (0x00670, 0x00670,), # Arabic Letter Superscript Alef - (0x006d6, 0x006dd,), # Arabic Small High Ligatu..Arabic End Of Ayah + (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen (0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda (0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon (0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem - (0x0070f, 0x0070f,), # Syriac Abbreviation Mark (0x00711, 0x00711,), # Syriac Letter Superscript Alaph (0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh (0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun @@ -2658,7 +2681,8 @@ ZERO_WIDTH = { (0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U (0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa (0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark - (0x008d3, 0x00903,), # Arabic Small Low Waw ..Devanagari Sign Visarga + (0x008d3, 0x008e1,), # Arabic Small Low Waw ..Arabic Small High Sign S + (0x008e3, 0x00903,), # Arabic Turned Damma Belo..Devanagari Sign Visarga (0x0093a, 0x0093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta (0x0093e, 0x0094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw (0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu @@ -2784,14 +2808,14 @@ ZERO_WIDTH = { (0x01dfb, 0x01dff,), # Combining Deletion Mark ..Combining Right Arrowhea (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override - (0x02060, 0x02064,), # Word Joiner ..Invisible Plus - (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes + (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above (0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu (0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag + (0x03164, 0x03164,), # Hangul Filler (0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous (0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer (0x0a69e, 0x0a69f,), # Combining Cyrillic Lette..Combining Cyrillic Lette @@ -2827,7 +2851,8 @@ ZERO_WIDTH = { (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo (0x0feff, 0x0feff,), # Zero Width No-break Space - (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T + (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler + (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke (0x102e0, 0x102e0,), # Coptic Epact Thousands Mark (0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let @@ -2843,8 +2868,6 @@ ZERO_WIDTH = { (0x11038, 0x11046,), # Brahmi Vowel Sign Aa ..Brahmi Virama (0x1107f, 0x11082,), # Brahmi Number Joiner ..Kaithi Sign Visarga (0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta - (0x110bd, 0x110bd,), # Kaithi Number Sign - (0x110cd, 0x110cd,), # Kaithi Number Sign Above (0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga (0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa (0x11145, 0x11146,), # Chakma Vowel Sign Aa ..Chakma Vowel Sign Ei @@ -2918,9 +2941,7 @@ ZERO_WIDTH = { (0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining (0x1e944, 0x1e94a,), # Adlam Alif Lengthener ..Adlam Nukta (0x1f3fb, 0x1f3ff,), # Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri - (0xe0001, 0xe0001,), # Language Tag - (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag - (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 + (0xe0000, 0xe0fff,), # (nil) ), '12.0.0': ( # Source: DerivedGeneralCategory-12.0.0.txt @@ -2934,16 +2955,14 @@ ZERO_WIDTH = { (0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot (0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot (0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan - (0x00600, 0x00605,), # Arabic Number Sign ..Arabic Number Mark Above (0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra (0x0061c, 0x0061c,), # Arabic Letter Mark (0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below (0x00670, 0x00670,), # Arabic Letter Superscript Alef - (0x006d6, 0x006dd,), # Arabic Small High Ligatu..Arabic End Of Ayah + (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen (0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda (0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon (0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem - (0x0070f, 0x0070f,), # Syriac Abbreviation Mark (0x00711, 0x00711,), # Syriac Letter Superscript Alaph (0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh (0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun @@ -2954,7 +2973,8 @@ ZERO_WIDTH = { (0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U (0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa (0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark - (0x008d3, 0x00903,), # Arabic Small Low Waw ..Devanagari Sign Visarga + (0x008d3, 0x008e1,), # Arabic Small Low Waw ..Arabic Small High Sign S + (0x008e3, 0x00903,), # Arabic Turned Damma Belo..Devanagari Sign Visarga (0x0093a, 0x0093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta (0x0093e, 0x0094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw (0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu @@ -3079,14 +3099,14 @@ ZERO_WIDTH = { (0x01dfb, 0x01dff,), # Combining Deletion Mark ..Combining Right Arrowhea (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override - (0x02060, 0x02064,), # Word Joiner ..Invisible Plus - (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes + (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above (0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu (0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag + (0x03164, 0x03164,), # Hangul Filler (0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous (0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer (0x0a69e, 0x0a69f,), # Combining Cyrillic Lette..Combining Cyrillic Lette @@ -3122,7 +3142,8 @@ ZERO_WIDTH = { (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo (0x0feff, 0x0feff,), # Zero Width No-break Space - (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T + (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler + (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke (0x102e0, 0x102e0,), # Coptic Epact Thousands Mark (0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let @@ -3138,8 +3159,6 @@ ZERO_WIDTH = { (0x11038, 0x11046,), # Brahmi Vowel Sign Aa ..Brahmi Virama (0x1107f, 0x11082,), # Brahmi Number Joiner ..Kaithi Sign Visarga (0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta - (0x110bd, 0x110bd,), # Kaithi Number Sign - (0x110cd, 0x110cd,), # Kaithi Number Sign Above (0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga (0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa (0x11145, 0x11146,), # Chakma Vowel Sign Aa ..Chakma Vowel Sign Ei @@ -3220,9 +3239,7 @@ ZERO_WIDTH = { (0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining (0x1e944, 0x1e94a,), # Adlam Alif Lengthener ..Adlam Nukta (0x1f3fb, 0x1f3ff,), # Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri - (0xe0001, 0xe0001,), # Language Tag - (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag - (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 + (0xe0000, 0xe0fff,), # (nil) ), '12.1.0': ( # Source: DerivedGeneralCategory-12.1.0.txt @@ -3236,16 +3253,14 @@ ZERO_WIDTH = { (0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot (0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot (0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan - (0x00600, 0x00605,), # Arabic Number Sign ..Arabic Number Mark Above (0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra (0x0061c, 0x0061c,), # Arabic Letter Mark (0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below (0x00670, 0x00670,), # Arabic Letter Superscript Alef - (0x006d6, 0x006dd,), # Arabic Small High Ligatu..Arabic End Of Ayah + (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen (0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda (0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon (0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem - (0x0070f, 0x0070f,), # Syriac Abbreviation Mark (0x00711, 0x00711,), # Syriac Letter Superscript Alaph (0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh (0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun @@ -3256,7 +3271,8 @@ ZERO_WIDTH = { (0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U (0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa (0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark - (0x008d3, 0x00903,), # Arabic Small Low Waw ..Devanagari Sign Visarga + (0x008d3, 0x008e1,), # Arabic Small Low Waw ..Arabic Small High Sign S + (0x008e3, 0x00903,), # Arabic Turned Damma Belo..Devanagari Sign Visarga (0x0093a, 0x0093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta (0x0093e, 0x0094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw (0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu @@ -3381,14 +3397,14 @@ ZERO_WIDTH = { (0x01dfb, 0x01dff,), # Combining Deletion Mark ..Combining Right Arrowhea (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override - (0x02060, 0x02064,), # Word Joiner ..Invisible Plus - (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes + (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above (0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu (0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag + (0x03164, 0x03164,), # Hangul Filler (0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous (0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer (0x0a69e, 0x0a69f,), # Combining Cyrillic Lette..Combining Cyrillic Lette @@ -3424,7 +3440,8 @@ ZERO_WIDTH = { (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo (0x0feff, 0x0feff,), # Zero Width No-break Space - (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T + (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler + (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke (0x102e0, 0x102e0,), # Coptic Epact Thousands Mark (0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let @@ -3440,8 +3457,6 @@ ZERO_WIDTH = { (0x11038, 0x11046,), # Brahmi Vowel Sign Aa ..Brahmi Virama (0x1107f, 0x11082,), # Brahmi Number Joiner ..Kaithi Sign Visarga (0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta - (0x110bd, 0x110bd,), # Kaithi Number Sign - (0x110cd, 0x110cd,), # Kaithi Number Sign Above (0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga (0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa (0x11145, 0x11146,), # Chakma Vowel Sign Aa ..Chakma Vowel Sign Ei @@ -3522,9 +3537,7 @@ ZERO_WIDTH = { (0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining (0x1e944, 0x1e94a,), # Adlam Alif Lengthener ..Adlam Nukta (0x1f3fb, 0x1f3ff,), # Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri - (0xe0001, 0xe0001,), # Language Tag - (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag - (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 + (0xe0000, 0xe0fff,), # (nil) ), '13.0.0': ( # Source: DerivedGeneralCategory-13.0.0.txt @@ -3538,16 +3551,14 @@ ZERO_WIDTH = { (0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot (0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot (0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan - (0x00600, 0x00605,), # Arabic Number Sign ..Arabic Number Mark Above (0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra (0x0061c, 0x0061c,), # Arabic Letter Mark (0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below (0x00670, 0x00670,), # Arabic Letter Superscript Alef - (0x006d6, 0x006dd,), # Arabic Small High Ligatu..Arabic End Of Ayah + (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen (0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda (0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon (0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem - (0x0070f, 0x0070f,), # Syriac Abbreviation Mark (0x00711, 0x00711,), # Syriac Letter Superscript Alaph (0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh (0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun @@ -3558,7 +3569,8 @@ ZERO_WIDTH = { (0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U (0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa (0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark - (0x008d3, 0x00903,), # Arabic Small Low Waw ..Devanagari Sign Visarga + (0x008d3, 0x008e1,), # Arabic Small Low Waw ..Arabic Small High Sign S + (0x008e3, 0x00903,), # Arabic Turned Damma Belo..Devanagari Sign Visarga (0x0093a, 0x0093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta (0x0093e, 0x0094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw (0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu @@ -3683,14 +3695,14 @@ ZERO_WIDTH = { (0x01dfb, 0x01dff,), # Combining Deletion Mark ..Combining Right Arrowhea (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override - (0x02060, 0x02064,), # Word Joiner ..Invisible Plus - (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes + (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above (0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu (0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag + (0x03164, 0x03164,), # Hangul Filler (0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous (0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer (0x0a69e, 0x0a69f,), # Combining Cyrillic Lette..Combining Cyrillic Lette @@ -3727,7 +3739,8 @@ ZERO_WIDTH = { (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo (0x0feff, 0x0feff,), # Zero Width No-break Space - (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T + (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler + (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke (0x102e0, 0x102e0,), # Coptic Epact Thousands Mark (0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let @@ -3744,8 +3757,6 @@ ZERO_WIDTH = { (0x11038, 0x11046,), # Brahmi Vowel Sign Aa ..Brahmi Virama (0x1107f, 0x11082,), # Brahmi Number Joiner ..Kaithi Sign Visarga (0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta - (0x110bd, 0x110bd,), # Kaithi Number Sign - (0x110cd, 0x110cd,), # Kaithi Number Sign Above (0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga (0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa (0x11145, 0x11146,), # Chakma Vowel Sign Aa ..Chakma Vowel Sign Ei @@ -3834,9 +3845,7 @@ ZERO_WIDTH = { (0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining (0x1e944, 0x1e94a,), # Adlam Alif Lengthener ..Adlam Nukta (0x1f3fb, 0x1f3ff,), # Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri - (0xe0001, 0xe0001,), # Language Tag - (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag - (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 + (0xe0000, 0xe0fff,), # (nil) ), '14.0.0': ( # Source: DerivedGeneralCategory-14.0.0.txt @@ -3850,16 +3859,14 @@ ZERO_WIDTH = { (0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot (0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot (0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan - (0x00600, 0x00605,), # Arabic Number Sign ..Arabic Number Mark Above (0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra (0x0061c, 0x0061c,), # Arabic Letter Mark (0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below (0x00670, 0x00670,), # Arabic Letter Superscript Alef - (0x006d6, 0x006dd,), # Arabic Small High Ligatu..Arabic End Of Ayah + (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen (0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda (0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon (0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem - (0x0070f, 0x0070f,), # Syriac Abbreviation Mark (0x00711, 0x00711,), # Syriac Letter Superscript Alaph (0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh (0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun @@ -3870,9 +3877,9 @@ ZERO_WIDTH = { (0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U (0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa (0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark - (0x00890, 0x00891,), # Arabic Pound Mark Above ..Arabic Piastre Mark Abov (0x00898, 0x0089f,), # Arabic Small High Word A..Arabic Half Madda Over M - (0x008ca, 0x00903,), # Arabic Small High Farsi ..Devanagari Sign Visarga + (0x008ca, 0x008e1,), # Arabic Small High Farsi ..Arabic Small High Sign S + (0x008e3, 0x00903,), # Arabic Turned Damma Belo..Devanagari Sign Visarga (0x0093a, 0x0093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta (0x0093e, 0x0094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw (0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu @@ -3997,14 +4004,14 @@ ZERO_WIDTH = { (0x01dc0, 0x01dff,), # Combining Dotted Grave A..Combining Right Arrowhea (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override - (0x02060, 0x02064,), # Word Joiner ..Invisible Plus - (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes + (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above (0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu (0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag + (0x03164, 0x03164,), # Hangul Filler (0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous (0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer (0x0a69e, 0x0a69f,), # Combining Cyrillic Lette..Combining Cyrillic Lette @@ -4041,7 +4048,8 @@ ZERO_WIDTH = { (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo (0x0feff, 0x0feff,), # Zero Width No-break Space - (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T + (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler + (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke (0x102e0, 0x102e0,), # Coptic Epact Thousands Mark (0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let @@ -4061,9 +4069,7 @@ ZERO_WIDTH = { (0x11073, 0x11074,), # Brahmi Vowel Sign Old Ta..Brahmi Vowel Sign Old Ta (0x1107f, 0x11082,), # Brahmi Number Joiner ..Kaithi Sign Visarga (0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta - (0x110bd, 0x110bd,), # Kaithi Number Sign (0x110c2, 0x110c2,), # Kaithi Vowel Sign Vocalic R - (0x110cd, 0x110cd,), # Kaithi Number Sign Above (0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga (0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa (0x11145, 0x11146,), # Chakma Vowel Sign Aa ..Chakma Vowel Sign Ei @@ -4155,9 +4161,7 @@ ZERO_WIDTH = { (0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining (0x1e944, 0x1e94a,), # Adlam Alif Lengthener ..Adlam Nukta (0x1f3fb, 0x1f3ff,), # Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri - (0xe0001, 0xe0001,), # Language Tag - (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag - (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 + (0xe0000, 0xe0fff,), # (nil) ), '15.0.0': ( # Source: DerivedGeneralCategory-15.0.0.txt @@ -4171,16 +4175,14 @@ ZERO_WIDTH = { (0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot (0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot (0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan - (0x00600, 0x00605,), # Arabic Number Sign ..Arabic Number Mark Above (0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra (0x0061c, 0x0061c,), # Arabic Letter Mark (0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below (0x00670, 0x00670,), # Arabic Letter Superscript Alef - (0x006d6, 0x006dd,), # Arabic Small High Ligatu..Arabic End Of Ayah + (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen (0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda (0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon (0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem - (0x0070f, 0x0070f,), # Syriac Abbreviation Mark (0x00711, 0x00711,), # Syriac Letter Superscript Alaph (0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh (0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun @@ -4191,9 +4193,9 @@ ZERO_WIDTH = { (0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U (0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa (0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark - (0x00890, 0x00891,), # Arabic Pound Mark Above ..Arabic Piastre Mark Abov (0x00898, 0x0089f,), # Arabic Small High Word A..Arabic Half Madda Over M - (0x008ca, 0x00903,), # Arabic Small High Farsi ..Devanagari Sign Visarga + (0x008ca, 0x008e1,), # Arabic Small High Farsi ..Arabic Small High Sign S + (0x008e3, 0x00903,), # Arabic Turned Damma Belo..Devanagari Sign Visarga (0x0093a, 0x0093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta (0x0093e, 0x0094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw (0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu @@ -4319,14 +4321,14 @@ ZERO_WIDTH = { (0x01dc0, 0x01dff,), # Combining Dotted Grave A..Combining Right Arrowhea (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override - (0x02060, 0x02064,), # Word Joiner ..Invisible Plus - (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes + (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above (0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu (0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag + (0x03164, 0x03164,), # Hangul Filler (0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous (0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer (0x0a69e, 0x0a69f,), # Combining Cyrillic Lette..Combining Cyrillic Lette @@ -4363,7 +4365,8 @@ ZERO_WIDTH = { (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo (0x0feff, 0x0feff,), # Zero Width No-break Space - (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T + (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler + (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke (0x102e0, 0x102e0,), # Coptic Epact Thousands Mark (0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let @@ -4384,9 +4387,7 @@ ZERO_WIDTH = { (0x11073, 0x11074,), # Brahmi Vowel Sign Old Ta..Brahmi Vowel Sign Old Ta (0x1107f, 0x11082,), # Brahmi Number Joiner ..Kaithi Sign Visarga (0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta - (0x110bd, 0x110bd,), # Kaithi Number Sign (0x110c2, 0x110c2,), # Kaithi Vowel Sign Vocalic R - (0x110cd, 0x110cd,), # Kaithi Number Sign Above (0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga (0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa (0x11145, 0x11146,), # Chakma Vowel Sign Aa ..Chakma Vowel Sign Ei @@ -4486,9 +4487,7 @@ ZERO_WIDTH = { (0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining (0x1e944, 0x1e94a,), # Adlam Alif Lengthener ..Adlam Nukta (0x1f3fb, 0x1f3ff,), # Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri - (0xe0001, 0xe0001,), # Language Tag - (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag - (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 + (0xe0000, 0xe0fff,), # (nil) ), '15.1.0': ( # Source: DerivedGeneralCategory-15.1.0.txt @@ -4502,16 +4501,14 @@ ZERO_WIDTH = { (0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot (0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot (0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan - (0x00600, 0x00605,), # Arabic Number Sign ..Arabic Number Mark Above (0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra (0x0061c, 0x0061c,), # Arabic Letter Mark (0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below (0x00670, 0x00670,), # Arabic Letter Superscript Alef - (0x006d6, 0x006dd,), # Arabic Small High Ligatu..Arabic End Of Ayah + (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen (0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda (0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon (0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem - (0x0070f, 0x0070f,), # Syriac Abbreviation Mark (0x00711, 0x00711,), # Syriac Letter Superscript Alaph (0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh (0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun @@ -4522,9 +4519,9 @@ ZERO_WIDTH = { (0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U (0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa (0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark - (0x00890, 0x00891,), # Arabic Pound Mark Above ..Arabic Piastre Mark Abov (0x00898, 0x0089f,), # Arabic Small High Word A..Arabic Half Madda Over M - (0x008ca, 0x00903,), # Arabic Small High Farsi ..Devanagari Sign Visarga + (0x008ca, 0x008e1,), # Arabic Small High Farsi ..Arabic Small High Sign S + (0x008e3, 0x00903,), # Arabic Turned Damma Belo..Devanagari Sign Visarga (0x0093a, 0x0093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta (0x0093e, 0x0094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw (0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu @@ -4650,14 +4647,14 @@ ZERO_WIDTH = { (0x01dc0, 0x01dff,), # Combining Dotted Grave A..Combining Right Arrowhea (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override - (0x02060, 0x02064,), # Word Joiner ..Invisible Plus - (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes + (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above (0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu (0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag + (0x03164, 0x03164,), # Hangul Filler (0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous (0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer (0x0a69e, 0x0a69f,), # Combining Cyrillic Lette..Combining Cyrillic Lette @@ -4694,7 +4691,8 @@ ZERO_WIDTH = { (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo (0x0feff, 0x0feff,), # Zero Width No-break Space - (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T + (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler + (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke (0x102e0, 0x102e0,), # Coptic Epact Thousands Mark (0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let @@ -4715,9 +4713,7 @@ ZERO_WIDTH = { (0x11073, 0x11074,), # Brahmi Vowel Sign Old Ta..Brahmi Vowel Sign Old Ta (0x1107f, 0x11082,), # Brahmi Number Joiner ..Kaithi Sign Visarga (0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta - (0x110bd, 0x110bd,), # Kaithi Number Sign (0x110c2, 0x110c2,), # Kaithi Vowel Sign Vocalic R - (0x110cd, 0x110cd,), # Kaithi Number Sign Above (0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga (0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa (0x11145, 0x11146,), # Chakma Vowel Sign Aa ..Chakma Vowel Sign Ei @@ -4817,9 +4813,7 @@ ZERO_WIDTH = { (0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining (0x1e944, 0x1e94a,), # Adlam Alif Lengthener ..Adlam Nukta (0x1f3fb, 0x1f3ff,), # Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri - (0xe0001, 0xe0001,), # Language Tag - (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag - (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 + (0xe0000, 0xe0fff,), # (nil) ), '16.0.0': ( # Source: DerivedGeneralCategory-16.0.0.txt @@ -4833,16 +4827,14 @@ ZERO_WIDTH = { (0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot (0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot (0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan - (0x00600, 0x00605,), # Arabic Number Sign ..Arabic Number Mark Above (0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra (0x0061c, 0x0061c,), # Arabic Letter Mark (0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below (0x00670, 0x00670,), # Arabic Letter Superscript Alef - (0x006d6, 0x006dd,), # Arabic Small High Ligatu..Arabic End Of Ayah + (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen (0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda (0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon (0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem - (0x0070f, 0x0070f,), # Syriac Abbreviation Mark (0x00711, 0x00711,), # Syriac Letter Superscript Alaph (0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh (0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun @@ -4853,9 +4845,9 @@ ZERO_WIDTH = { (0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U (0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa (0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark - (0x00890, 0x00891,), # Arabic Pound Mark Above ..Arabic Piastre Mark Abov (0x00897, 0x0089f,), # (nil) ..Arabic Half Madda Over M - (0x008ca, 0x00903,), # Arabic Small High Farsi ..Devanagari Sign Visarga + (0x008ca, 0x008e1,), # Arabic Small High Farsi ..Arabic Small High Sign S + (0x008e3, 0x00903,), # Arabic Turned Damma Belo..Devanagari Sign Visarga (0x0093a, 0x0093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta (0x0093e, 0x0094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw (0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu @@ -4981,14 +4973,14 @@ ZERO_WIDTH = { (0x01dc0, 0x01dff,), # Combining Dotted Grave A..Combining Right Arrowhea (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override - (0x02060, 0x02064,), # Word Joiner ..Invisible Plus - (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes + (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above (0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu (0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag + (0x03164, 0x03164,), # Hangul Filler (0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous (0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer (0x0a69e, 0x0a69f,), # Combining Cyrillic Lette..Combining Cyrillic Lette @@ -5025,7 +5017,8 @@ ZERO_WIDTH = { (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo (0x0feff, 0x0feff,), # Zero Width No-break Space - (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T + (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler + (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke (0x102e0, 0x102e0,), # Coptic Epact Thousands Mark (0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let @@ -5047,9 +5040,7 @@ ZERO_WIDTH = { (0x11073, 0x11074,), # Brahmi Vowel Sign Old Ta..Brahmi Vowel Sign Old Ta (0x1107f, 0x11082,), # Brahmi Number Joiner ..Kaithi Sign Visarga (0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta - (0x110bd, 0x110bd,), # Kaithi Number Sign (0x110c2, 0x110c2,), # Kaithi Vowel Sign Vocalic R - (0x110cd, 0x110cd,), # Kaithi Number Sign Above (0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga (0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa (0x11145, 0x11146,), # Chakma Vowel Sign Aa ..Chakma Vowel Sign Ei @@ -5159,9 +5150,7 @@ ZERO_WIDTH = { (0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining (0x1e944, 0x1e94a,), # Adlam Alif Lengthener ..Adlam Nukta (0x1f3fb, 0x1f3ff,), # Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri - (0xe0001, 0xe0001,), # Language Tag - (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag - (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 + (0xe0000, 0xe0fff,), # (nil) ), '17.0.0': ( # Source: DerivedGeneralCategory-17.0.0.txt @@ -5175,16 +5164,14 @@ ZERO_WIDTH = { (0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot (0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot (0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan - (0x00600, 0x00605,), # Arabic Number Sign ..Arabic Number Mark Above (0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra (0x0061c, 0x0061c,), # Arabic Letter Mark (0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below (0x00670, 0x00670,), # Arabic Letter Superscript Alef - (0x006d6, 0x006dd,), # Arabic Small High Ligatu..Arabic End Of Ayah + (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen (0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda (0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon (0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem - (0x0070f, 0x0070f,), # Syriac Abbreviation Mark (0x00711, 0x00711,), # Syriac Letter Superscript Alaph (0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh (0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun @@ -5195,9 +5182,9 @@ ZERO_WIDTH = { (0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U (0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa (0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark - (0x00890, 0x00891,), # Arabic Pound Mark Above ..Arabic Piastre Mark Abov (0x00897, 0x0089f,), # (nil) ..Arabic Half Madda Over M - (0x008ca, 0x00903,), # Arabic Small High Farsi ..Devanagari Sign Visarga + (0x008ca, 0x008e1,), # Arabic Small High Farsi ..Arabic Small High Sign S + (0x008e3, 0x00903,), # Arabic Turned Damma Belo..Devanagari Sign Visarga (0x0093a, 0x0093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta (0x0093e, 0x0094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw (0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu @@ -5324,14 +5311,14 @@ ZERO_WIDTH = { (0x01dc0, 0x01dff,), # Combining Dotted Grave A..Combining Right Arrowhea (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override - (0x02060, 0x02064,), # Word Joiner ..Invisible Plus - (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes + (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above (0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu (0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag + (0x03164, 0x03164,), # Hangul Filler (0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous (0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer (0x0a69e, 0x0a69f,), # Combining Cyrillic Lette..Combining Cyrillic Lette @@ -5368,7 +5355,8 @@ ZERO_WIDTH = { (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo (0x0feff, 0x0feff,), # Zero Width No-break Space - (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T + (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler + (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke (0x102e0, 0x102e0,), # Coptic Epact Thousands Mark (0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let @@ -5390,9 +5378,7 @@ ZERO_WIDTH = { (0x11073, 0x11074,), # Brahmi Vowel Sign Old Ta..Brahmi Vowel Sign Old Ta (0x1107f, 0x11082,), # Brahmi Number Joiner ..Kaithi Sign Visarga (0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta - (0x110bd, 0x110bd,), # Kaithi Number Sign (0x110c2, 0x110c2,), # Kaithi Vowel Sign Vocalic R - (0x110cd, 0x110cd,), # Kaithi Number Sign Above (0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga (0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa (0x11145, 0x11146,), # Chakma Vowel Sign Aa ..Chakma Vowel Sign Ei @@ -5507,8 +5493,6 @@ ZERO_WIDTH = { (0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining (0x1e944, 0x1e94a,), # Adlam Alif Lengthener ..Adlam Nukta (0x1f3fb, 0x1f3ff,), # Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri - (0xe0001, 0xe0001,), # Language Tag - (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag - (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 + (0xe0000, 0xe0fff,), # (nil) ), } diff --git a/contrib/python/wcwidth/py3/wcwidth/textwrap.py b/contrib/python/wcwidth/py3/wcwidth/textwrap.py new file mode 100644 index 00000000000..1b45213b5e1 --- /dev/null +++ b/contrib/python/wcwidth/py3/wcwidth/textwrap.py @@ -0,0 +1,387 @@ +""" +Sequence-aware text wrapping functions. + +This module provides functions for wrapping text that may contain terminal escape sequences, with +proper handling of Unicode grapheme clusters and character display widths. +""" +# std imports +import textwrap + +from typing import List + +# local +from .wcwidth import width as _width +from .wcwidth import iter_sequences +from .grapheme import iter_graphemes +from .escape_sequences import ZERO_WIDTH_PATTERN + + +class SequenceTextWrapper(textwrap.TextWrapper): + """ + Sequence-aware text wrapper extending :class:`textwrap.TextWrapper`. + + This wrapper properly handles terminal escape sequences and Unicode grapheme clusters when + calculating text width for wrapping. + + This implementation is based on the SequenceTextWrapper from the 'blessed' library, with + contributions from Avram Lubkin and grayjk. + + The key difference from the blessed implementation is the addition of grapheme cluster support + via :func:`~.iter_graphemes`, providing width calculation for ZWJ emoji sequences, VS-16 emojis + and variations, regional indicator flags, and combining characters. + """ + + def __init__(self, width: int = 70, *, + control_codes: str = 'parse', + tabsize: int = 8, + ambiguous_width: int = 1, + **kwargs): + """ + Initialize the wrapper. + + :param width: Maximum line width in display cells. + :param control_codes: How to handle control sequences (see :func:`~.width`). + :param tabsize: Tab stop width for tab expansion. + :param ambiguous_width: Width to use for East Asian Ambiguous (A) characters. + :param kwargs: Additional arguments passed to :class:`textwrap.TextWrapper`. + """ + super().__init__(width=width, **kwargs) + self.control_codes = control_codes + self.tabsize = tabsize + self.ambiguous_width = ambiguous_width + + def _width(self, text: str) -> int: + """Measure text width accounting for sequences.""" + return _width(text, control_codes=self.control_codes, tabsize=self.tabsize, + ambiguous_width=self.ambiguous_width) + + def _strip_sequences(self, text: str) -> str: + """Strip all terminal sequences from text.""" + result = [] + for segment, is_seq in iter_sequences(text): + if not is_seq: + result.append(segment) + return ''.join(result) + + def _extract_sequences(self, text: str) -> str: + """Extract only terminal sequences from text.""" + result = [] + for segment, is_seq in iter_sequences(text): + if is_seq: + result.append(segment) + return ''.join(result) + + def _split(self, text: str) -> List[str]: # pylint: disable=too-many-locals + """ + Sequence-aware variant of :meth:`textwrap.TextWrapper._split`. + + This method ensures that terminal escape sequences don't interfere with the text splitting + logic, particularly for hyphen-based word breaking. It builds a position mapping from + stripped text to original text, calls the parent's _split on stripped text, then maps chunks + back. + """ + # pylint: disable=too-many-locals,too-many-branches + # Build a mapping from stripped text positions to original text positions. + # We track where each character ENDS so that sequences between characters + # attach to the following text (not preceding text). This ensures sequences + # aren't lost when whitespace is dropped. + # + # char_end[i] = position in original text right after the i-th stripped char + char_end: List[int] = [] + stripped_text = '' + original_pos = 0 + + for segment, is_seq in iter_sequences(text): + if not is_seq: + for char in segment: + original_pos += 1 + char_end.append(original_pos) + stripped_text += char + else: + # Escape sequences advance position but don't add to stripped text + original_pos += len(segment) + + # Add sentinel for final position + char_end.append(original_pos) + + # Use parent's _split on the stripped text + # pylint: disable-next=protected-access + stripped_chunks = textwrap.TextWrapper._split(self, stripped_text) + + # Handle text that contains only sequences (no visible characters). + # Return the sequences as a single chunk to preserve them. + if not stripped_chunks and text: + return [text] + + # Map the chunks back to the original text with sequences + result: List[str] = [] + stripped_pos = 0 + num_chunks = len(stripped_chunks) + + for idx, chunk in enumerate(stripped_chunks): + chunk_len = len(chunk) + + # Start is where previous character ended (or 0 for first chunk) + start_orig = 0 if stripped_pos == 0 else char_end[stripped_pos - 1] + + # End is where next character starts. For last chunk, use sentinel + # to include any trailing sequences. + if idx == num_chunks - 1: + end_orig = char_end[-1] # sentinel includes trailing sequences + else: + end_orig = char_end[stripped_pos + chunk_len - 1] + + # Extract the corresponding portion from the original text + result.append(text[start_orig:end_orig]) + stripped_pos += chunk_len + + return result + + def _wrap_chunks(self, chunks: List[str]) -> List[str]: # pylint: disable=too-many-branches + """ + Wrap chunks into lines using sequence-aware width. + + Override TextWrapper._wrap_chunks to use _width instead of len. Follows stdlib's algorithm: + greedily fill lines, handle long words. + """ + # pylint: disable=too-many-branches + if not chunks: + return [] + + lines = [] + is_first_line = True + + # Arrange in reverse order so items can be efficiently popped + chunks = list(reversed(chunks)) + + while chunks: + current_line: List[str] = [] + current_width = 0 + + # Get the indent and available width for current line + indent = self.initial_indent if is_first_line else self.subsequent_indent + line_width = self.width - self._width(indent) + + # Drop leading whitespace (except at very start) + # When dropping, transfer any sequences to the next chunk. + # Only drop if there's actual whitespace text, not if it's only sequences. + stripped = self._strip_sequences(chunks[-1]) + if self.drop_whitespace and lines and stripped and not stripped.strip(): + sequences = self._extract_sequences(chunks[-1]) + del chunks[-1] + if sequences and chunks: + chunks[-1] = sequences + chunks[-1] + + # Greedily add chunks that fit + while chunks: + chunk = chunks[-1] + chunk_width = self._width(chunk) + + if current_width + chunk_width <= line_width: + current_line.append(chunks.pop()) + current_width += chunk_width + else: + break + + # Handle chunk that's too long for any line + if chunks and self._width(chunks[-1]) > line_width: + self._handle_long_word( + chunks, current_line, current_width, line_width + ) + current_width = self._width(''.join(current_line)) + # Remove any empty chunks left by _handle_long_word + while chunks and not chunks[-1]: + del chunks[-1] + + # Drop trailing whitespace + # When dropping, transfer any sequences to the previous chunk. + # Only drop if there's actual whitespace text, not if it's only sequences. + stripped_last = self._strip_sequences(current_line[-1]) if current_line else '' + if (self.drop_whitespace and current_line and + stripped_last and not stripped_last.strip()): + sequences = self._extract_sequences(current_line[-1]) + current_width -= self._width(current_line[-1]) + del current_line[-1] + if sequences and current_line: + current_line[-1] = current_line[-1] + sequences + + if current_line: + line_content = ''.join(current_line) + # Strip trailing whitespace when drop_whitespace is enabled + # (matches CPython #140627 fix behavior) + if self.drop_whitespace: + line_content = line_content.rstrip() + lines.append(indent + line_content) + is_first_line = False + + return lines + + def _handle_long_word(self, reversed_chunks: List[str], + cur_line: List[str], cur_len: int, + width: int) -> None: + """ + Sequence-aware :meth:`textwrap.TextWrapper._handle_long_word`. + + This method ensures that word boundaries are not broken mid-sequence, and respects grapheme + cluster boundaries when breaking long words. + """ + if width < 1: + space_left = 1 + else: + space_left = width - cur_len + + if self.break_long_words: + chunk = reversed_chunks[-1] + break_at_hyphen = False + hyphen_end = 0 + + # Handle break_on_hyphens: find last hyphen within space_left + if self.break_on_hyphens: + # Strip sequences to find hyphen in logical text + stripped = self._strip_sequences(chunk) + if len(stripped) > space_left: + # Find last hyphen in the portion that fits + hyphen_pos = stripped.rfind('-', 0, space_left) + if hyphen_pos > 0 and any(c != '-' for c in stripped[:hyphen_pos]): + # Map back to original position including sequences + hyphen_end = self._map_stripped_pos_to_original(chunk, hyphen_pos + 1) + break_at_hyphen = True + + # Break at grapheme boundaries to avoid splitting multi-codepoint characters + if break_at_hyphen: + actual_end = hyphen_end + else: + actual_end = self._find_break_position(chunk, space_left) + # If no progress possible (e.g., wide char exceeds line width), + # force at least one grapheme to avoid infinite loop. + # Only force when cur_line is empty; if line has content, + # appending nothing is safe and the line will be committed. + if actual_end == 0 and not cur_line: + actual_end = self._find_first_grapheme_end(chunk) + cur_line.append(chunk[:actual_end]) + reversed_chunks[-1] = chunk[actual_end:] + + elif not cur_line: + cur_line.append(reversed_chunks.pop()) + + def _map_stripped_pos_to_original(self, text: str, stripped_pos: int) -> int: + """Map a position in stripped text back to original text position.""" + stripped_idx = 0 + original_idx = 0 + + for segment, is_seq in iter_sequences(text): + if is_seq: + original_idx += len(segment) + elif stripped_idx + len(segment) > stripped_pos: + # Position is within this segment + return original_idx + (stripped_pos - stripped_idx) + else: + stripped_idx += len(segment) + original_idx += len(segment) + + # Caller guarantees stripped_pos < total stripped chars, so we always + # return from within the loop. This line satisfies the type checker. + return original_idx # pragma: no cover + + def _find_break_position(self, text: str, max_width: int) -> int: + """Find string index in text that fits within max_width cells.""" + idx = 0 + width_so_far = 0 + + while idx < len(text): + char = text[idx] + + # Skip escape sequences (they don't add width) + if char == '\x1b': + match = ZERO_WIDTH_PATTERN.match(text, idx) + if match: + idx = match.end() + continue + + # Get grapheme + grapheme = next(iter_graphemes(text[idx:])) + + grapheme_width = self._width(grapheme) + if width_so_far + grapheme_width > max_width: + return idx # Found break point + + width_so_far += grapheme_width + idx += len(grapheme) + + # Caller guarantees chunk_width > max_width, so a grapheme always + # exceeds and we return from within the loop. Type checker requires this. + return idx # pragma: no cover + + def _find_first_grapheme_end(self, text: str) -> int: + """Find the end position of the first grapheme.""" + return len(next(iter_graphemes(text))) + + +def wrap(text: str, width: int = 70, *, + control_codes: str = 'parse', + tabsize: int = 8, + ambiguous_width: int = 1, + initial_indent: str = '', + subsequent_indent: str = '', + break_long_words: bool = True, + break_on_hyphens: bool = True) -> List[str]: + r""" + Wrap text to fit within given width, returning a list of wrapped lines. + + Like :func:`textwrap.wrap`, but measures width in display cells rather than + characters, correctly handling wide characters, combining marks, and terminal + escape sequences. + + :param str text: Text to wrap, may contain terminal sequences. + :param int width: Maximum line width in display cells. + :param str control_codes: How to handle terminal sequences (see :func:`~.width`). + :param int tabsize: Tab stop width for tab expansion. + :param int ambiguous_width: Width to use for East Asian Ambiguous (A) + characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts. + :param str initial_indent: String prepended to first line. + :param str subsequent_indent: String prepended to subsequent lines. + :param bool break_long_words: If True, break words longer than width. + :param bool break_on_hyphens: If True, allow breaking at hyphens. + :returns: List of wrapped lines without trailing newlines. + :rtype: list[str] + + Like :func:`textwrap.wrap`, newlines in the input text are treated as + whitespace and collapsed. To preserve paragraph breaks, wrap each + paragraph separately:: + + >>> text = 'First line.\\nSecond line.' + >>> wrap(text, 40) # newline collapsed to space + ['First line. Second line.'] + >>> [line for para in text.split('\\n') + ... for line in (wrap(para, 40) if para else [''])] + ['First line.', 'Second line.'] + + .. seealso:: + + :func:`textwrap.wrap`, :class:`textwrap.TextWrapper` + Standard library text wrapping (character-based). + + :class:`.SequenceTextWrapper` + Class interface for advanced wrapping options. + + .. versionadded:: 0.3.0 + + Example:: + + >>> from wcwidth import wrap + >>> wrap('hello world', 5) + ['hello', 'world'] + >>> wrap('中文字符', 4) # CJK characters (2 cells each) + ['中文', '字符'] + """ + wrapper = SequenceTextWrapper( + width=width, + control_codes=control_codes, + tabsize=tabsize, + ambiguous_width=ambiguous_width, + initial_indent=initial_indent, + subsequent_indent=subsequent_indent, + break_long_words=break_long_words, + break_on_hyphens=break_on_hyphens, + ) + return wrapper.wrap(text) diff --git a/contrib/python/wcwidth/py3/wcwidth/wcwidth.py b/contrib/python/wcwidth/py3/wcwidth/wcwidth.py index 92ca14afba1..09938dbea2c 100644 --- a/contrib/python/wcwidth/py3/wcwidth/wcwidth.py +++ b/contrib/python/wcwidth/py3/wcwidth/wcwidth.py @@ -67,52 +67,56 @@ import warnings from functools import lru_cache # local +from .bisearch import bisearch as _bisearch +from .grapheme import iter_graphemes from .table_vs16 import VS16_NARROW_TO_WIDE from .table_wide import WIDE_EASTASIAN from .table_zero import ZERO_WIDTH +from .control_codes import ILLEGAL_CTRL, VERTICAL_CTRL, HORIZONTAL_CTRL, ZERO_WIDTH_CTRL +from .table_ambiguous import AMBIGUOUS_EASTASIAN +from .escape_sequences import (ZERO_WIDTH_PATTERN, + CURSOR_LEFT_SEQUENCE, + CURSOR_RIGHT_SEQUENCE, + INDETERMINATE_EFFECT_SEQUENCE) from .unicode_versions import list_versions +_AMBIGUOUS_TABLE = AMBIGUOUS_EASTASIAN[next(iter(AMBIGUOUS_EASTASIAN))] -def _bisearch(ucs, table): - """ - Auxiliary function for binary search in interval table. - - :arg int ucs: Ordinal value of unicode character. - :arg list table: List of starting and ending ranges of ordinal values, - in form of ``[(start, end), ...]``. - :rtype: int - :returns: 1 if ordinal value ucs is found within lookup table, else 0. - """ - lbound = 0 - ubound = len(table) - 1 - - if ucs < table[0][0] or ucs > table[ubound][1]: - return 0 - while ubound >= lbound: - mid = (lbound + ubound) // 2 - if ucs > table[mid][1]: - lbound = mid + 1 - elif ucs < table[mid][0]: - ubound = mid - 1 - else: - return 1 - - return 0 +# Translation table to strip C0/C1 control characters for fast 'ignore' mode. +_CONTROL_CHAR_TABLE = str.maketrans('', '', ( + ''.join(chr(c) for c in range(0x00, 0x20)) + # C0: NUL through US (including tab) + '\x7f' + # DEL + ''.join(chr(c) for c in range(0x80, 0xa0)) # C1: U+0080-U+009F +)) -@lru_cache(maxsize=1000) -def wcwidth(wc, unicode_version='auto'): +@lru_cache(maxsize=2000) +def wcwidth(wc, unicode_version='auto', ambiguous_width=1): r""" - Given one Unicode character, return its printable length on a terminal. + Given one Unicode codepoint, return its printable length on a terminal. :param str wc: A single Unicode character. :param str unicode_version: A Unicode version number, such as - ``'6.0.0'``. A list of version levels suported by wcwidth + ``'6.0.0'``. A list of version levels supported by wcwidth is returned by :func:`list_versions`. Any version string may be specified without error -- the nearest - matching version is selected. When ``latest`` (default), the - highest Unicode version level is used. + matching version is selected. When ``'auto'`` (default), the + ``UNICODE_VERSION`` environment variable is used if set, otherwise + the highest Unicode version level is used. + + .. deprecated:: 0.3.0 + + This parameter is deprecated. Empirical data shows that Unicode + support in terminals varies not only by unicode version, but + by capabilities, Emojis, and specific language support. + + The default ``'auto'`` behavior is recommended for all use cases. + + :param int ambiguous_width: Width to use for East Asian Ambiguous (A) + characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts + where ambiguous characters display as double-width. See + :ref:`ambiguous_width` for details. :return: The width, in cells, necessary to display the character of Unicode string character, ``wc``. Returns 0 if the ``wc`` argument has no printable effect on a terminal (such as NUL '\0'), -1 if ``wc`` is @@ -141,11 +145,18 @@ def wcwidth(wc, unicode_version='auto'): if _bisearch(ucs, ZERO_WIDTH[_unicode_version]): return 0 - # 1 or 2 width - return 1 + _bisearch(ucs, WIDE_EASTASIAN[_unicode_version]) + # Wide (F/W categories) + if _bisearch(ucs, WIDE_EASTASIAN[_unicode_version]): + return 2 + + # Ambiguous width (A category) - only when ambiguous_width=2 + if ambiguous_width == 2 and _bisearch(ucs, _AMBIGUOUS_TABLE): + return 2 + return 1 -def wcswidth(pwcs, n=None, unicode_version='auto'): + +def wcswidth(pwcs, n=None, unicode_version='auto', ambiguous_width=1): """ Given a unicode string, return its printable length on a terminal. @@ -155,10 +166,21 @@ def wcswidth(pwcs, n=None, unicode_version='auto'): argument exists only for compatibility with the C POSIX function signature. It is suggested instead to use python's string slicing capability, ``wcswidth(pwcs[:n])`` - :param str unicode_version: An explicit definition of the unicode version - level to use for determination, may be ``auto`` (default), which uses - the Environment Variable, ``UNICODE_VERSION`` if defined, or the latest - available unicode version, otherwise. + :param str unicode_version: A Unicode version number, such as + ``'6.0.0'``, or ``'auto'`` (default) which uses the + ``UNICODE_VERSION`` environment variable if defined, or the latest + available unicode version otherwise. + + .. deprecated:: 0.3.0 + + This parameter is deprecated. Empirical data shows that Unicode + support in terminals varies not only by unicode version, but + by capabilities, Emojis, and specific language support. + + The default ``'auto'`` behavior is recommended for all use cases. + + :param int ambiguous_width: Width to use for East Asian Ambiguous (A) + characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts. :rtype: int :returns: The width, in cells, needed to display the first ``n`` characters of the unicode string ``pwcs``. Returns ``-1`` for C0 and C1 control @@ -167,40 +189,43 @@ def wcswidth(pwcs, n=None, unicode_version='auto'): See :ref:`Specification` for details of cell measurement. """ # this 'n' argument is a holdover for POSIX function + + # Fast path: pure ASCII printable strings are always width == length + if n is None and pwcs.isascii() and pwcs.isprintable(): + return len(pwcs) + _unicode_version = None end = len(pwcs) if n is None else n - width = 0 + total_width = 0 idx = 0 - last_measured_char = None + last_measured_idx = -2 # Track index of last measured char for VS16 while idx < end: char = pwcs[idx] if char == '\u200D': # Zero Width Joiner, do not measure this or next character idx += 2 continue - if char == '\uFE0F' and last_measured_char: - # on variation selector 16 (VS16) following another character, - # conditionally add '1' to the measured width if that character is - # known to be converted from narrow to wide by the VS16 character. + if char == '\uFE0F' and last_measured_idx >= 0: + # VS16 following a measured character: add 1 if that character is + # known to be converted from narrow to wide by VS16. if _unicode_version is None: _unicode_version = _wcversion_value(_wcmatch_version(unicode_version)) if _unicode_version >= (9, 0, 0): - width += _bisearch(ord(last_measured_char), VS16_NARROW_TO_WIDE["9.0.0"]) - last_measured_char = None + total_width += _bisearch(ord(pwcs[last_measured_idx]), + VS16_NARROW_TO_WIDE["9.0.0"]) + last_measured_idx = -2 # Prevent double application idx += 1 continue # measure character at current index - wcw = wcwidth(char, unicode_version) + wcw = wcwidth(char, unicode_version, ambiguous_width) if wcw < 0: # early return -1 on C0 and C1 control characters return wcw if wcw > 0: - # track last character measured to contain a cell, so that - # subsequent VS-16 modifiers may be understood - last_measured_char = char - width += wcw + last_measured_idx = idx + total_width += wcw idx += 1 - return width + return total_width @lru_cache(maxsize=128) @@ -274,11 +299,10 @@ def _wcmatch_version(given_version): except ValueError: # submitted value raises ValueError in int(), warn and use latest. - warnings.warn("UNICODE_VERSION value, {given_version!r}, is invalid. " + warnings.warn(f"UNICODE_VERSION value, {given_version!r}, is invalid. " "Value should be in form of `integer[.]+', the latest " - "supported unicode version {latest_version!r} has been " - "inferred.".format(given_version=given_version, - latest_version=latest_version)) + f"supported unicode version {latest_version!r} has been " + "inferred.") return latest_version # given version is less than any available version, return earliest @@ -290,11 +314,9 @@ def _wcmatch_version(given_version): # this probably isn't what you wanted, the oldest wcwidth.c you will # find in the wild is likely version 5 or 6, which we both support, # but it's better than not saying anything at all. - warnings.warn("UNICODE_VERSION value, {given_version!r}, is lower " + warnings.warn(f"UNICODE_VERSION value, {given_version!r}, is lower " "than any available unicode version. Returning lowest " - "version level, {earliest_version!r}".format( - given_version=given_version, - earliest_version=earliest_version)) + f"version level, {earliest_version!r}") return earliest_version # create list of versions which are less than our equal to given version, @@ -324,3 +346,457 @@ def _wcmatch_version(given_version): if cmp_next_version > cmp_given: return unicode_version assert False, ("Code path unreachable", given_version, unicode_versions) # pragma: no cover + + +def iter_sequences(text): + r""" + Iterate through text, yielding segments with sequence identification. + + This generator yields tuples of ``(segment, is_sequence)`` for each part + of the input text, where ``is_sequence`` is ``True`` if the segment is + a recognized terminal escape sequence. + + :param str text: String to iterate through. + :rtype: Iterator[tuple[str, bool]] + :returns: Iterator of (segment, is_sequence) tuples. + + .. versionadded:: 0.3.0 + + Example:: + + >>> list(iter_sequences('hello')) + [('hello', False)] + >>> list(iter_sequences('\\x1b[31mred')) + [('\\x1b[31m', True), ('red', False)] + >>> list(iter_sequences('\\x1b[1m\\x1b[31m')) + [('\\x1b[1m', True), ('\\x1b[31m', True)] + """ + idx = 0 + text_len = len(text) + segment_start = 0 + + while idx < text_len: + char = text[idx] + + if char == '\x1b': + # Yield any accumulated non-sequence text + if idx > segment_start: + yield (text[segment_start:idx], False) + + # Try to match an escape sequence + match = ZERO_WIDTH_PATTERN.match(text, idx) + if match: + yield (match.group(), True) + idx = match.end() + else: + # Lone ESC or unrecognized - yield as sequence anyway + yield (char, True) + idx += 1 + segment_start = idx + else: + idx += 1 + + # Yield any remaining text + if segment_start < text_len: + yield (text[segment_start:], False) + + +def _width_ignored_codes(text, ambiguous_width=1): + """ + Fast path for width() with control_codes='ignore'. + + Strips escape sequences and control characters, then measures remaining text. + """ + return wcswidth( + strip_sequences(text).translate(_CONTROL_CHAR_TABLE), + ambiguous_width=ambiguous_width + ) + + +def width(text, *, control_codes='parse', tabsize=8, ambiguous_width=1): + r""" + Return printable width of text containing many kinds of control codes and sequences. + + Unlike :func:`wcswidth`, this function handles most control characters and many popular terminal + output sequences. Never returns -1. + + :param str text: String to measure. + :param str control_codes: How to handle control characters and sequences: + + - ``'parse'`` (default): Track horizontal cursor movement from BS ``\\b``, CR ``\\r``, TAB + ``\\t``, and cursor left and right movement sequences. Vertical movement (LF, VT, FF) and + indeterminate sequences are zero-width. Never raises. + - ``'strict'``: Like parse, but raises :exc:`ValueError` on control characters with + indeterminate results of the screen or cursor, like clear or vertical movement. Generally, + these should be handled with a virtual terminal emulator (like 'pyte'). + - ``'ignore'``: All C0 and C1 control characters and escape sequences are measured as + width 0. This is the fastest measurement for text already filtered or known not to contain + any kinds of control codes or sequences. TAB ``\\t`` is zero-width; for tab expansion, + pre-process: ``text.replace('\\t', ' ' * 8)``. + + :param int tabsize: Tab stop width for ``'parse'`` and ``'strict'`` modes. Default is 8. + Must be positive. Has no effect when ``control_codes='ignore'``. + :param int ambiguous_width: Width to use for East Asian Ambiguous (A) + characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts. + :rtype: int + :returns: Maximum cursor position reached, "extent", accounting for cursor movement sequences + present in ``text`` according to given parameters. This represents the rightmost column the + cursor reaches. Always a non-negative integer. + + :raises ValueError: If ``control_codes='strict'`` and control characters with indeterminate + effects, such as vertical movement or clear sequences are encountered, or on unexpected + C0 or C1 control code. Also raised when ``control_codes`` is not one of the valid values. + + .. versionadded:: 0.3.0 + + Examples:: + + >>> width('hello') + 5 + >>> width('コンニチハ') + 10 + >>> width('\\x1b[31mred\\x1b[0m') + 3 + >>> width('\\x1b[31mred\\x1b[0m', control_codes='ignore') # same result (ignored) + 3 + >>> width('123\\b4') # backspace overwrites previous cell (outputs '124') + 3 + >>> width('abc\\t') # tab caused cursor to move to column 8 + 8 + >>> width('1\\x1b[10C') # '1' + cursor right 10, cursor ends on column 11 + 11 + >>> width('1\\x1b[10C', control_codes='ignore') # faster but wrong in this case + 1 + """ + # pylint: disable=too-complex,too-many-branches,too-many-statements + # This could be broken into sub-functions (#1, #3, and 6 especially), but for reduced overhead + # considering this function is a likely "hot path", they are inlined, breaking many of our + # complexity rules. + + # Fast path for ignore mode -- this is useful if you know the text is already "clean" + if control_codes == 'ignore': + return _width_ignored_codes(text, ambiguous_width) + + strict = control_codes == 'strict' + # Track absolute positions: tab stops need modulo on absolute column, CR resets to 0. + # Initialize max_extent to 0 so backward movement (CR, BS) won't yield negative width. + current_col = 0 + max_extent = 0 + idx = 0 + last_measured_idx = -2 # Track index of last measured char for VS16; -2 can never match idx-1 + + while idx < len(text): + char = text[idx] + + # 1. Handle ESC sequences + if char == '\x1b': + match = ZERO_WIDTH_PATTERN.match(text, idx) + if match: + seq = match.group() + if strict and INDETERMINATE_EFFECT_SEQUENCE.match(seq): + raise ValueError(f"Indeterminate cursor sequence at position {idx}") + # Apply cursor movement + right = CURSOR_RIGHT_SEQUENCE.match(seq) + if right: + current_col += int(right.group(1) or 1) + else: + left = CURSOR_LEFT_SEQUENCE.match(seq) + if left: + current_col = max(0, current_col - int(left.group(1) or 1)) + idx = match.end() + else: + idx += 1 + max_extent = max(max_extent, current_col) + continue + + # 2. Handle illegal and vertical control characters (zero width, error in strict) + if char in ILLEGAL_CTRL: + if strict: + raise ValueError(f"Illegal control character {ord(char):#x} at position {idx}") + idx += 1 + continue + + if char in VERTICAL_CTRL: + if strict: + raise ValueError(f"Vertical movement character {ord(char):#x} at position {idx}") + idx += 1 + continue + + # 3. Handle horizontal movement characters + if char in HORIZONTAL_CTRL: + if char == '\x09' and tabsize > 0: # Tab + current_col += tabsize - (current_col % tabsize) + elif char == '\x08': # Backspace + if current_col > 0: + current_col -= 1 + elif char == '\x0d': # Carriage return + current_col = 0 + max_extent = max(max_extent, current_col) + idx += 1 + continue + + # 4. Handle ZWJ (skip this and next character) + if char == '\u200D': + idx += 2 + continue + + # 5. Handle other zero-width characters (control chars) + if char in ZERO_WIDTH_CTRL: + idx += 1 + continue + + # 6. Handle VS16: converts preceding narrow character to wide + if char == '\uFE0F': + if last_measured_idx == idx - 1: + if _bisearch(ord(text[last_measured_idx]), VS16_NARROW_TO_WIDE["9.0.0"]): + current_col += 1 + max_extent = max(max_extent, current_col) + idx += 1 + continue + + # 7. Normal characters: measure with wcwidth + w = wcwidth(char, 'auto', ambiguous_width) + if w > 0: + current_col += w + max_extent = max(max_extent, current_col) + last_measured_idx = idx + idx += 1 + + return max_extent + + +def ljust(text, dest_width, fillchar=' ', *, control_codes='parse', ambiguous_width=1): + r""" + Return text left-justified in a string of given display width. + + :param str text: String to justify, may contain terminal sequences. + :param int dest_width: Total display width of result in terminal cells. + :param str fillchar: Single character for padding (default space). Must have + display width of 1 (not wide, not zero-width, not combining). Unicode + characters like ``'·'`` are acceptable. The width is not validated. + :param str control_codes: How to handle control sequences when measuring. + Passed to :func:`width` for measurement. + :param int ambiguous_width: Width to use for East Asian Ambiguous (A) + characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts. + :returns: Text padded on the right to reach ``dest_width``. + :rtype: str + + .. versionadded:: 0.3.0 + + Example:: + + >>> wcwidth.ljust('hi', 5) + 'hi ' + >>> wcwidth.ljust('\\x1b[31mhi\\x1b[0m', 5) + '\\x1b[31mhi\\x1b[0m ' + >>> wcwidth.ljust('\\U0001F468\\u200D\\U0001F469\\u200D\\U0001F467', 6) + '👨👩👧 ' + """ + if text.isascii() and text.isprintable(): + text_width = len(text) + else: + text_width = width(text, control_codes=control_codes, ambiguous_width=ambiguous_width) + padding_cells = max(0, dest_width - text_width) + return text + fillchar * padding_cells + + +def rjust(text, dest_width, fillchar=' ', *, control_codes='parse', ambiguous_width=1): + r""" + Return text right-justified in a string of given display width. + + :param str text: String to justify, may contain terminal sequences. + :param int dest_width: Total display width of result in terminal cells. + :param str fillchar: Single character for padding (default space). Must have + display width of 1 (not wide, not zero-width, not combining). Unicode + characters like ``'·'`` are acceptable. The width is not validated. + :param str control_codes: How to handle control sequences when measuring. + Passed to :func:`width` for measurement. + :param int ambiguous_width: Width to use for East Asian Ambiguous (A) + characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts. + :returns: Text padded on the left to reach ``dest_width``. + :rtype: str + + .. versionadded:: 0.3.0 + + Example:: + + >>> wcwidth.rjust('hi', 5) + ' hi' + >>> wcwidth.rjust('\\x1b[31mhi\\x1b[0m', 5) + ' \\x1b[31mhi\\x1b[0m' + >>> wcwidth.rjust('\\U0001F468\\u200D\\U0001F469\\u200D\\U0001F467', 6) + ' 👨👩👧' + """ + if text.isascii() and text.isprintable(): + text_width = len(text) + else: + text_width = width(text, control_codes=control_codes, ambiguous_width=ambiguous_width) + padding_cells = max(0, dest_width - text_width) + return fillchar * padding_cells + text + + +def center(text, dest_width, fillchar=' ', *, control_codes='parse', ambiguous_width=1): + r""" + Return text centered in a string of given display width. + + :param str text: String to center, may contain terminal sequences. + :param int dest_width: Total display width of result in terminal cells. + :param str fillchar: Single character for padding (default space). Must have + display width of 1 (not wide, not zero-width, not combining). Unicode + characters like ``'·'`` are acceptable. The width is not validated. + :param str control_codes: How to handle control sequences when measuring. + Passed to :func:`width` for measurement. + :param int ambiguous_width: Width to use for East Asian Ambiguous (A) + characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts. + :returns: Text padded on both sides to reach ``dest_width``. + :rtype: str + + For odd-width padding, the extra cell goes on the right (matching + Python's :meth:`str.center` behavior). + + .. versionadded:: 0.3.0 + + Example:: + + >>> wcwidth.center('hi', 6) + ' hi ' + >>> wcwidth.center('\\x1b[31mhi\\x1b[0m', 6) + ' \\x1b[31mhi\\x1b[0m ' + >>> wcwidth.center('\\U0001F468\\u200D\\U0001F469\\u200D\\U0001F467', 6) + ' 👨👩👧 ' + """ + if text.isascii() and text.isprintable(): + text_width = len(text) + else: + text_width = width(text, control_codes=control_codes, ambiguous_width=ambiguous_width) + total_padding = max(0, dest_width - text_width) + left_pad = total_padding // 2 + right_pad = total_padding - left_pad + return fillchar * left_pad + text + fillchar * right_pad + + +def strip_sequences(text): + r""" + Return text with all terminal escape sequences removed. + + Unknown or incomplete ESC sequences are preserved. + + :param str text: String that may contain terminal escape sequences. + :rtype: str + :returns: The input text with all escape sequences stripped. + + .. versionadded:: 0.3.0 + + Example:: + + >>> strip_sequences('\\x1b[31mred\\x1b[0m') + 'red' + >>> strip_sequences('hello') + 'hello' + >>> strip_sequences('\\x1b[1m\\x1b[31mbold red\\x1b[0m text') + 'bold red text' + """ + return ZERO_WIDTH_PATTERN.sub('', text) + + +def clip(text, start, end, *, fillchar=' ', tabsize=8, ambiguous_width=1): + r""" + Clip text to display columns ``(start, end)`` while preserving all terminal sequences. + + This function extracts a substring based on visible column positions rather than + character indices. Terminal escape sequences are preserved in the output since + they have zero display width. If a wide character (width 2) would be split at + either boundary, it is replaced with ``fillchar``. + + TAB characters (``\\t``) are expanded to spaces up to the next tab stop, + controlled by the ``tabsize`` parameter. + + Other cursor movement characters (backspace, carriage return) and cursor + movement sequences are passed through unchanged as zero-width. + + :param str text: String to clip, may contain terminal escape sequences. + :param int start: Absolute starting column (inclusive, 0-indexed). + :param int end: Absolute ending column (exclusive). + :param str fillchar: Character to use when a wide character must be split at + a boundary (default space). Must have display width of 1. + :param int tabsize: Tab stop width (default 8). Set to 0 to pass tabs through + as zero-width (preserved in output but don't advance column position). + :param int ambiguous_width: Width to use for East Asian Ambiguous (A) + characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts. + :rtype: str + :returns: Substring of ``text`` spanning display columns ``(start, end)``, + with all terminal sequences preserved and wide characters at boundaries + replaced with ``fillchar``. + + .. versionadded:: 0.3.0 + + Example:: + + >>> clip('hello world', 0, 5) + 'hello' + >>> clip('中文字', 0, 3) # Wide char split at column 3 + '中 ' + >>> clip('a\\tb', 0, 10) # Tab expanded to spaces + 'a b' + """ + # pylint: disable=too-complex,too-many-locals,too-many-branches + start = max(start, 0) + if end <= start: + return '' + + # Fast path: printable ASCII only (no tabs, escapes, or wide chars) + if text.isascii() and text.isprintable(): + return text[start:end] + + output = [] + col = 0 + idx = 0 + text_len = len(text) + + while idx < text_len: + char = text[idx] + + # Escape sequences: always include (zero-width) + if char == '\x1b': + match = ZERO_WIDTH_PATTERN.match(text, idx) + if match: + output.append(match.group()) + idx = match.end() + else: + output.append(char) + idx += 1 + continue + + # TAB: expand to spaces (or pass through if tabsize=0) + if char == '\t': + if tabsize > 0: + next_tab = col + (tabsize - (col % tabsize)) + while col < next_tab: + if start <= col < end: + output.append(' ') + col += 1 + else: + output.append(char) + idx += 1 + continue + + # Grapheme clustering handles everything else (including control chars) + grapheme = next(iter_graphemes(text[idx:])) + w = width(grapheme, ambiguous_width=ambiguous_width) + + if w == 0: + # Zero-width (combining marks, etc): always include, doesn't advance column + output.append(grapheme) + else: + if col >= start and col + w <= end: + # Fully visible: include the grapheme + output.append(grapheme) + elif col < end and col + w > start: + # Partially visible: wide char spans boundary, replace with fillchar + output.append(fillchar * (min(end, col + w) - max(start, col))) + # Else: fully outside (start, end), omit entirely + col += w + + idx += len(grapheme) + + return ''.join(output) diff --git a/contrib/python/wcwidth/py3/ya.make b/contrib/python/wcwidth/py3/ya.make index 7f889617c63..798281c2205 100644 --- a/contrib/python/wcwidth/py3/ya.make +++ b/contrib/python/wcwidth/py3/ya.make @@ -2,7 +2,7 @@ PY3_LIBRARY() -VERSION(0.2.14) +VERSION(0.3.0) LICENSE(MIT) @@ -11,10 +11,16 @@ NO_LINT() PY_SRCS( TOP_LEVEL wcwidth/__init__.py - wcwidth/table_vs15.py + wcwidth/bisearch.py + wcwidth/control_codes.py + wcwidth/escape_sequences.py + wcwidth/grapheme.py + wcwidth/table_ambiguous.py + wcwidth/table_grapheme.py wcwidth/table_vs16.py wcwidth/table_wide.py wcwidth/table_zero.py + wcwidth/textwrap.py wcwidth/unicode_versions.py wcwidth/wcwidth.py ) |
