summaryrefslogtreecommitdiffstats
path: root/contrib/python/wcwidth/py3
diff options
context:
space:
mode:
authorrobot-piglet <[email protected]>2026-02-07 12:14:07 +0300
committerrobot-piglet <[email protected]>2026-02-07 12:40:00 +0300
commit0e21c41b9d3a3a49c95448b2eea3fdf2b8e87d01 (patch)
tree41c06cabe5246cfb625ab8c56782fed748c7ad35 /contrib/python/wcwidth/py3
parent52112738aa5a3063a86695c6f99e6dd21350488c (diff)
Intermediate changes
commit_hash:3418900cda4febf1105de037f8c1ce1f8325c5aa
Diffstat (limited to 'contrib/python/wcwidth/py3')
-rw-r--r--contrib/python/wcwidth/py3/.dist-info/METADATA433
-rw-r--r--contrib/python/wcwidth/py3/README.rst389
-rw-r--r--contrib/python/wcwidth/py3/tests/GraphemeBreakTest.txt796
-rw-r--r--contrib/python/wcwidth/py3/tests/test_ambiguous.py80
-rw-r--r--contrib/python/wcwidth/py3/tests/test_clip.py223
-rw-r--r--contrib/python/wcwidth/py3/tests/test_core.py60
-rw-r--r--contrib/python/wcwidth/py3/tests/test_emojis.py50
-rw-r--r--contrib/python/wcwidth/py3/tests/test_grapheme.py146
-rw-r--r--contrib/python/wcwidth/py3/tests/test_justify.py56
-rw-r--r--contrib/python/wcwidth/py3/tests/test_table_integrity.py4
-rw-r--r--contrib/python/wcwidth/py3/tests/test_textwrap.py262
-rw-r--r--contrib/python/wcwidth/py3/tests/test_ucslevel.py12
-rw-r--r--contrib/python/wcwidth/py3/tests/test_width.py385
-rw-r--r--contrib/python/wcwidth/py3/wcwidth/__init__.py22
-rw-r--r--contrib/python/wcwidth/py3/wcwidth/bisearch.py29
-rw-r--r--contrib/python/wcwidth/py3/wcwidth/control_codes.py46
-rw-r--r--contrib/python/wcwidth/py3/wcwidth/escape_sequences.py69
-rw-r--r--contrib/python/wcwidth/py3/wcwidth/grapheme.py299
-rw-r--r--contrib/python/wcwidth/py3/wcwidth/table_ambiguous.py189
-rw-r--r--contrib/python/wcwidth/py3/wcwidth/table_grapheme.py2126
-rw-r--r--contrib/python/wcwidth/py3/wcwidth/table_vs15.py103
-rw-r--r--contrib/python/wcwidth/py3/wcwidth/table_vs16.py1
-rw-r--r--contrib/python/wcwidth/py3/wcwidth/table_wide.py66
-rw-r--r--contrib/python/wcwidth/py3/wcwidth/table_zero.py384
-rw-r--r--contrib/python/wcwidth/py3/wcwidth/textwrap.py387
-rw-r--r--contrib/python/wcwidth/py3/wcwidth/wcwidth.py596
-rw-r--r--contrib/python/wcwidth/py3/ya.make10
27 files changed, 6300 insertions, 923 deletions
diff --git a/contrib/python/wcwidth/py3/.dist-info/METADATA b/contrib/python/wcwidth/py3/.dist-info/METADATA
index 95adb5715ec..d147ef939e4 100644
--- a/contrib/python/wcwidth/py3/.dist-info/METADATA
+++ b/contrib/python/wcwidth/py3/.dist-info/METADATA
@@ -1,20 +1,18 @@
Metadata-Version: 2.4
Name: wcwidth
-Version: 0.2.14
+Version: 0.3.0
Summary: Measures the displayed width of unicode strings in a terminal
-Home-page: https://github.com/jquast/wcwidth
-Author: Jeff Quast
-Author-email: [email protected]
-License: MIT
+Project-URL: Homepage, https://github.com/jquast/wcwidth
+Author-email: Jeff Quast <[email protected]>
+License-Expression: MIT
+License-File: LICENSE
Keywords: cjk,combining,console,eastasian,emoji,emulator,terminal,unicode,wcswidth,wcwidth,xterm
-Classifier: Intended Audience :: Developers
-Classifier: Natural Language :: English
Classifier: Development Status :: 5 - Production/Stable
Classifier: Environment :: Console
-Classifier: License :: OSI Approved :: MIT License
+Classifier: Intended Audience :: Developers
+Classifier: Natural Language :: English
Classifier: Operating System :: POSIX
-Classifier: Programming Language :: Python :: 3.6
-Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3 :: Only
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
@@ -22,22 +20,12 @@ Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: 3.13
Classifier: Programming Language :: Python :: 3.14
+Classifier: Topic :: Software Development :: Internationalization
Classifier: Topic :: Software Development :: Libraries
Classifier: Topic :: Software Development :: Localization
-Classifier: Topic :: Software Development :: Internationalization
Classifier: Topic :: Terminals
-Requires-Python: >=3.6
-License-File: LICENSE
-Dynamic: author
-Dynamic: author-email
-Dynamic: classifier
-Dynamic: description
-Dynamic: home-page
-Dynamic: keywords
-Dynamic: license
-Dynamic: license-file
-Dynamic: requires-python
-Dynamic: summary
+Requires-Python: >=3.8
+Description-Content-Type: text/x-rst
|pypi_downloads| |codecov| |license|
@@ -45,98 +33,313 @@ Dynamic: summary
Introduction
============
-This library is mainly for CLI programs that carefully produce output for
-Terminals, or make pretend to be an emulator.
-
-**Problem Statement**: The printable length of *most* strings are equal to the
-number of cells they occupy on the screen ``1 character : 1 cell``. However,
-there are categories of characters that *occupy 2 cells* (full-wide), and
-others that *occupy 0* cells (zero-width).
-
-**Solution**: POSIX.1-2001 and POSIX.1-2008 conforming systems provide
-`wcwidth(3)`_ and `wcswidth(3)`_ C functions of which this python module's
-functions precisely copy. *These functions return the number of cells a
-unicode string is expected to occupy.*
+This library is mainly for CLI/TUI programs that carefully produce output for Terminals.
Installation
------------
-The stable version of this package is maintained on pypi, install using pip::
+The stable version of this package is maintained on pypi, install or upgrade, using pip::
- pip install wcwidth
+ pip install --upgrade wcwidth
-Example
+Problem
-------
-**Problem**: given the following phrase (Japanese),
+All Python string-formatting functions, `textwrap.wrap()`_, `str.ljust()`_, `str.rjust()`_, and
+`str.center()`_ **incorrectly** measure the displayed width of a string as equal to the number of
+their codepoints.
- >>> text = u'コンニチハ'
+Some examples of **incorrect results**:
-Python **incorrectly** uses the *string length* of 5 codepoints rather than the
-*printable length* of 10 cells, so that when using the `rjust` function, the
-output length is wrong::
+.. code-block:: python
- >>> print(len('コンニチハ'))
- 5
+ >>> # result consumes 16 total cells, 11 expected,
+ >>> 'コンニチハ'.rjust(11, 'X')
+ 'XXXXXXコンニチハ'
- >>> print('コンニチハ'.rjust(20, '_'))
- _______________コンニチハ
+ >>> # result consumes 5 total cells, 6 expected,
+ >>> 'café'.center(6, 'X')
+ 'caféX'
-By defining our own "rjust" function that uses wcwidth, we can correct this::
+Solution
+--------
- >>> def wc_rjust(text, length, padding=' '):
- ... from wcwidth import wcswidth
- ... return padding * max(0, (length - wcswidth(text))) + text
- ...
+The lowest-level functions in this library are the POSIX.1-2001 and POSIX.1-2008 `wcwidth(3)`_ and
+`wcswidth(3)`_, which this library precisely copies by interface as `wcwidth()`_ and `wcswidth()`_.
+These functions return -1 when C0 and C1 control codes are present.
-Our **Solution** uses wcswidth to determine the string length correctly::
+An easy-to-use `width()`_ function is provided as a wrapper of `wcswidth()`_ that is also capable of
+measuring most terminal control codes and sequences, like colors, bold, tabstops, and horizontal
+cursor movement.
- >>> from wcwidth import wcswidth
- >>> print(wcswidth('コンニチハ'))
- 10
+Text-justification is solved by the grapheme and sequence-aware functions `ljust()`_,
+`rjust()`_, `center()`_, and `wrap()`_, serving as drop-in replacements to python standard functions
+of the same names.
- >>> print(wc_rjust('コンニチハ', 20, '_'))
- __________コンニチハ
+The iterator functions `iter_graphemes()`_ and `iter_sequences()`_ allow for careful navigation of
+grapheme and terminal control sequence boundaries. The `clip()`_ function extracts substrings by
+display column positions, and `strip_sequences()`_ removes terminal escape sequences from text.
+Discrepancies
+-------------
-Choosing a Version
-------------------
+You may find that support *varies* for complex unicode sequences or codepoints.
-Export an environment variable, ``UNICODE_VERSION``. This should be done by
-*terminal emulators* or those developers experimenting with authoring one of
-their own, from shell::
+A companion utility, `jquast/ucs-detect`_ was authored to gather and publish the results of Wide
+character support and version level, language support, zero-width joiner, and variation-16 support
+as a `General Tabulated Summary`_ by terminal emulator software and version.
- $ export UNICODE_VERSION=13.0
+========
+Overview
+========
-If unspecified, the latest version is used. If your Terminal Emulator does not
-export this variable, you can use the `jquast/ucs-detect`_ utility to
-automatically detect and export it to your shell.
+wcwidth()
+---------
-wcwidth, wcswidth
------------------
Use function ``wcwidth()`` to determine the length of a *single unicode
-character*, and ``wcswidth()`` to determine the length of many, a *string
-of unicode characters*.
+codepoint*.
+
+A brief overview, through examples, for all of the public API functions.
+
+Full API Documentation at https://wcwidth.readthedocs.io/en/latest/api.html
+
+wcwidth()
+---------
+
+Measures width of a single codepoint,
+
+.. code-block:: python
+
+ >>> # '♀' narrow emoji
+ >>> wcwidth.wcwidth('\u2640')
+ 1
+
+Use function `wcwidth()`_ to determine the length of a *single unicode character*.
+
+See `Specification <Specification_from_pypi_>`_ of character measurements. Note that ``-1`` is
+returned for control codes.
+
+wcswidth()
+----------
-Briefly, return values of function ``wcwidth()`` are:
+Measures width of a string, returns -1 for control codes.
-``-1``
- Indeterminate (not printable).
+.. code-block:: python
-``0``
- Does not advance the cursor, such as NULL or Combining.
+ >>> # '♀️' emoji w/vs-16
+ >>> wcwidth.wcswidth('♀️')
+ 2
-``2``
- Characters of category East Asian Wide (W) or East Asian
- Full-width (F) which are displayed using two terminal cells.
+Use function `wcswidth()`_ to determine the length of many, a *string of unicode characters*.
+
+See `Specification <Specification_from_pypi_>`_ of character measurements. Note that
+``-1`` is returned if control codes occurs anywhere in the string.
+
+width()
+-------
+
+Measures width of a string, with improved handling of ``control_codes``
+
+.. code-block:: python
+
+ >>> # same support as wcswidth(), eg. regional indicator flag:
+ >>> wcwidth.width('\U0001F1FF\U0001F1FC')
+ 2
+ >>> # but also supports SGR colored text, 'WARN', followed by SGR reset
+ >>> wcwidth.width('\x1b[38;2;255;150;100mWARN\x1b[0m')
+ 4
+ >>> # tabs,
+ >>> wcwidth.width('\t', tabsize=4)
+ 4
+ >>> # or, tab and all other control characters can be ignored
+ >>> wcwidth.width('\t', control_codes='ignore')
+ 0
+ >>> # "vertical" control characters are ignored
+ >>> wcwidth.width('\n')
+ 0
+ >>> # as well as sequences with "indeterminate" effects like Home + Clear
+ >>> wcwidth.width('\x1b[H\x1b[2J')
+ 0
+ >>> # or, raise ValueError for "indeterminate" effects using control_codes='strict'
+ >>> wcwidth.width('\n', control_codes='strict')
+ Traceback (most recent call last):
+ ...
+ ValueError: Vertical movement character 0xa at position 0
+
+Use ``control_codes='ignore'`` when the input is known not to contain any control characters or
+terminal sequences for slightly improved performance. Note that TAB (``'\t'``) is a control
+character and is also ignored, you may want to use `str.expandtabs()`_, first.
+
+iter_sequences()
+----------------
+
+Iterates through text, segmented by terminal sequence,
+
+.. code-block:: python
+
+ >>> list(wcwidth.iter_sequences('hello'))
+ [('hello', False)]
+ >>> list(wcwidth.iter_sequences('\x1b[31mred\x1b[0m'))
+ [('\x1b[31m', True), ('red', False), ('\x1b[0m', True)]
+
+Use `iter_sequences()`_ to split text into segments of plain text and escape sequences. Each tuple
+contains the segment string and a boolean indicating whether it is an escape sequence (``True``) or
+text (``False``).
+
+iter_graphemes()
+----------------
+
+Use `iter_graphemes()`_ to iterate over *grapheme clusters* of a string.
+
+.. code-block:: python
+
+ >>> from wcwidth import iter_graphemes
+ >>> # ok + Regional Indicator 'Z', 'W' (Zimbabwe)
+ >>> list(wcwidth.iter_graphemes('ok\U0001F1FF\U0001F1FC'))
+ ['o', 'k', '🇿🇼']
+
+ >>> # cafe + combining acute accent
+ >>> list(wcwidth.iter_graphemes('cafe\u0301'))
+ ['c', 'a', 'f', 'é']
+
+ >>> # ok + Emoji Man + ZWJ + Woman + ZWJ + Girl
+ >>> list(wcwidth.iter_graphemes('ok\U0001F468\u200D\U0001F469\u200D\U0001F467'))
+ ['o', 'k', '👨\u200d👩\u200d👧']
+
+A grapheme cluster is what a user perceives as a single character, even if it is composed of
+multiple Unicode codepoints. This function implements `Unicode Standard Annex #29`_ grapheme cluster
+boundary rules.
+
+ljust()
+-------
-``1``
- All others.
+Use `ljust()`_ as replacement of `str.ljust()`_:
-Function ``wcswidth()`` simply returns the sum of all values for each character
-along a string, or ``-1`` when it occurs anywhere along a string.
+.. code-block:: python
-Full API Documentation at https://wcwidth.readthedocs.io
+ >>> 'コンニチハ'.ljust(11, '*') # don't do this
+ 'コンニチハ******'
+ >>> wcwidth.ljust('コンニチハ', 11, '*') # do this!
+ 'コンニチハ*'
+
+rjust()
+-------
+
+Use `rjust()`_ as replacement of `str.rjust()`_:
+
+.. code-block:: python
+
+ >>> 'コンニチハ'.rjust(11, '*') # don't do this
+ '******コンニチハ'
+ >>> wcwidth.rjust('コンニチハ', 11, '*') # do this!
+ '*コンニチハ'
+
+center()
+--------
+
+Use `center()`_ as replacement of `str.center()`_:
+
+.. code-block:: python
+
+ >>> 'cafe\u0301'.center(6, '*') # don't do this
+ 'café*'
+ >>> wcwidth.center('cafe\u0301', 6, '*')
+ '*café*' # do this!
+
+wrap()
+------
+
+Use function ``wrap()`` to wrap text containing terminal sequences, Unicode grapheme
+clusters, and wide characters to a given display width.
+
+.. code-block:: python
+
+ >>> from wcwidth import wrap
+ >>> # Basic wrapping
+ >>> wrap('hello world', 5)
+ ['hello', 'world']
+
+ >>> # Wrapping CJK text (each character is 2 cells wide)
+ >>> wrap('コンニチハ', 4)
+ ['コン', 'ニチ', 'ハ']
+
+ >>> # Text with ANSI color sequences
+ >>> wrap('\x1b[31mhello world\x1b[0m', 5)
+ ['\x1b[31mhello', 'world\x1b[0m']
+
+clip()
+------
+
+Use `clip()`_ to extract a substring by column positions, preserving terminal sequences.
+
+.. code-block:: python
+
+ >>> from wcwidth import clip
+ >>> # Wide characters split to Narrow boundaries using fillchar=' '
+ >>> clip('中文字', 0, 3)
+ '中 '
+ >>> clip('中文字', 1, 5, fillchar='.')
+ '.文.'
+
+ >>> # *ALL* Terminal sequences are preserved
+ >>> clip('\x1b[31m中文\x1b[0m', 0, 3)
+ '\x1b[31m中 \x1b[0m'
+
+strip_sequences()
+-----------------
+
+Use `strip_sequences()`_ to remove all terminal escape sequences from text.
+
+.. code-block:: python
+
+ >>> from wcwidth import strip_sequences
+ >>> strip_sequences('\x1b[31mred\x1b[0m')
+ 'red'
+
+.. _ambiguous_width:
+
+ambiguous_width
+---------------
+
+Some Unicode characters have "East Asian Ambiguous" (A) width. These characters display as 1 cell by
+default, matching Western terminal contexts, but many CJK (Chinese, Japanese, Korean) environments
+may have a preference for 2 cells. This is often found as boolean option, "Ambiguous width as wide"
+in Terminal Emulator software preferences.
+
+By default, wcwidth treats ambiguous characters as narrow (width 1). For CJK environments where your
+terminal is configured to display ambiguous characters as double-width, pass ``ambiguous_width=2``:
+
+.. code-block:: python
+
+ >>> # CIRCLED DIGIT ONE - ambiguous width
+ >>> wcwidth.width('\u2460')
+ 1
+ >>> wcwidth.width('\u2460', ambiguous_width=2)
+ 2
+
+The ``ambiguous_width`` parameter is available on all width-measuring functions: `wcwidth()`_,
+`wcswidth()`_, `width()`_, `ljust()`_, `rjust()`_, `center()`_, `wrap()`_, and `clip()`_.
+
+**Terminal Detection**
+
+The most reliable method to detect whether a terminal profile is set for "Ambiguous width as wide"
+mode is to display an ambiguous character surrounded by a pair of Cursor Position Report (CPR)
+queries with a terminal in cooked or raw mode, and to parse the responses for their ``(y, x)``
+locations, and measure the difference of the ``x`` positions. This code should also check whether
+it is attached to a terminal and timeout, and then fallback to the preferred locale.
+
+`jquast/blessed`_ library provides a `Terminal.detect_ambiguous_width()`_ method:
+
+.. code-block:: python
+
+ >>> import blessed, functools
+ >>> # Detect terminal ambiguous width as wide (2) or narrow (1)
+ >>> ambiguous_width = blessed.Terminal().detect_ambiguous_width()
+ >>> # Define a new 'width' function with this argument
+ >>> awidth = functools.partial(wcwidth.width, ambiguous_width=ambiguous_width)
+ >>> # result depends on attached terminal mode
+ >>> awidth('\u2460')
+ 1
==========
Developing
@@ -146,9 +349,13 @@ Install wcwidth in editable mode::
pip install -e .
-Execute unit tests using tox_ for all supported Python versions::
+Execute all code generation, autoformatters, linters and unit tests using tox::
- tox -e py36,py37,py38,py39,py310,py311,py312,py313,py314
+ tox
+
+Or execute individual tasks, see ``tox -lv`` for all available targets::
+
+ tox -e pylint,py36,py314
Updating Unicode Version
------------------------
@@ -182,7 +389,7 @@ To upgrade requirements for updating unicode version, run::
To upgrade requirements for testing, run::
- tox -e update_requirements37,update_requirements39
+ tox -e update_requirements38,update_requirements39
To upgrade requirements for building documentation, run::
@@ -238,10 +445,15 @@ This library is used in:
- `saulpw/visidata`_: Terminal spreadsheet multitool for discovering and
arranging data
+- `jquast/ucs-detect`_: Utility for unicode support detection.
+
===============
Other Languages
===============
+There are similar implementations of the `wcwidth()`_ and `wcswidth()`_ functions in other
+languages.
+
- `timoxley/wcwidth`_: JavaScript
- `janlelis/unicode-display_width`_: Ruby
- `alecrabbit/php-wcwidth`_: PHP
@@ -253,11 +465,25 @@ Other Languages
- `joachimschmidt557/zig-wcwidth`_: Zig
- `fumiyas/wcwidth-cjk`_: `LD_PRELOAD` override
- `joshuarubin/wcwidth9`_: Unicode version 9 in C
+- `spectreconsole/wcwidth`_: C#
=======
History
=======
+0.3.0 *2026-01-21*
+ * **New** Function `iter_graphemes()`_. `PR #165`_.
+ * **New** Functions `width()`_ and `iter_sequences()`_. `PR #166`_.
+ * **New** Functions `ljust()`_, `rjust()`_, `center()`_. `PR #168`_.
+ * **New** Function `wrap()`_. `PR #169`_.
+ * **Performance** improvement in `wcswidth()`_. `PR #171`_.
+ * **New** argument ``ambiguous_width`` to all functions. `PR #172`_.
+ * **New** Functions `clip()`_ and `strip_sequences()`_. `PR #173`_.
+ * **Bugfix** Characters with ``Default_Ignorable_Code_Point`` property now
+ return width 0. `PR #174`_.
+ * **Bugfix** Characters with ``Prepended_Concatenation_Mark`` property now
+ return width 1. `PR #175`_.
+
0.2.14 *2025-09-22*
* **Drop Support** for Python 2.7 and 3.5. `PR #117`_.
* **Update** tables to include Unicode Specifications 16.0.0 and 17.0.0.
@@ -269,10 +495,10 @@ History
* **Bugfix** zero-width support for Hangul Jamo (Korean)
0.2.12 *2023-11-21*
- * re-release to remove .pyi file misplaced in wheel files `Issue #101`_.
+ * **Bugfix** Re-release to remove `.pyi` files misplaced in wheel `Issue #101`_.
0.2.11 *2023-11-20*
- * Include tests files in the source distribution (`PR #98`_, `PR #100`_).
+ * **Updated** Include tests files in the source distribution (`PR #98`_, `PR #100`_).
0.2.10 *2023-11-13*
* **Bugfix** accounting of some kinds of emoji sequences using U+FE0F
@@ -388,6 +614,15 @@ https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c::
.. _`PR #117`: https://github.com/jquast/wcwidth/pull/117
.. _`PR #146`: https://github.com/jquast/wcwidth/pull/146
.. _`PR #149`: https://github.com/jquast/wcwidth/pull/149
+.. _`PR #165`: https://github.com/jquast/wcwidth/pull/165
+.. _`PR #166`: https://github.com/jquast/wcwidth/pull/166
+.. _`PR #168`: https://github.com/jquast/wcwidth/pull/168
+.. _`PR #169`: https://github.com/jquast/wcwidth/pull/169
+.. _`PR #171`: https://github.com/jquast/wcwidth/pull/171
+.. _`PR #172`: https://github.com/jquast/wcwidth/pull/172
+.. _`PR #173`: https://github.com/jquast/wcwidth/pull/173
+.. _`PR #174`: https://github.com/jquast/wcwidth/pull/174
+.. _`PR #175`: https://github.com/jquast/wcwidth/pull/175
.. _`Issue #101`: https://github.com/jquast/wcwidth/issues/101
.. _`jquast/blessed`: https://github.com/jquast/blessed
.. _`selectel/pyte`: https://github.com/selectel/pyte
@@ -413,12 +648,32 @@ https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c::
.. _`joachimschmidt557/zig-wcwidth`: https://github.com/joachimschmidt557/zig-wcwidth
.. _`fumiyas/wcwidth-cjk`: https://github.com/fumiyas/wcwidth-cjk
.. _`joshuarubin/wcwidth9`: https://github.com/joshuarubin/wcwidth9
+.. _`spectreconsole/wcwidth`: https://github.com/spectreconsole/wcwidth
.. _`python-cmd2/cmd2`: https://github.com/python-cmd2/cmd2
.. _`stratis-storage/stratis-cli`: https://github.com/stratis-storage/stratis-cli
.. _`ihabunek/toot`: https://github.com/ihabunek/toot
.. _`saulpw/visidata`: https://github.com/saulpw/visidata
.. _`pip-tools`: https://pip-tools.readthedocs.io/
.. _`sphinx`: https://www.sphinx-doc.org/
+.. _`textwrap.wrap()`: https://docs.python.org/3/library/textwrap.html#textwrap.wrap
+.. _`str.ljust()`: https://docs.python.org/3/library/stdtypes.html#str.ljust
+.. _`str.rjust()`: https://docs.python.org/3/library/stdtypes.html#str.rjust
+.. _`str.center()`: https://docs.python.org/3/library/stdtypes.html#str.center
+.. _`str.expandtabs()`: https://docs.python.org/3/library/stdtypes.html#str.expandtabs
+.. _`General Tabulated Summary`: https://ucs-detect.readthedocs.io/results.html#tabulated-results
+.. _`wcwidth()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.wcwidth
+.. _`wcswidth()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.wcswidth
+.. _`width()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.width
+.. _`iter_graphemes()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.iter_graphemes
+.. _`ljust()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.ljust
+.. _`rjust()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.rjust
+.. _`center()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.center
+.. _`wrap()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.wrap
+.. _`clip()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.clip
+.. _`strip_sequences()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.strip_sequences
+.. _`iter_sequences()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.iter_sequences
+.. _`Unicode Standard Annex #29`: https://www.unicode.org/reports/tr29/
+.. _`Terminal.detect_ambiguous_width()`: https://blessed.readthedocs.io/en/latest/api/terminal.html#blessed.terminal.Terminal.detect_ambiguous_width
.. |pypi_downloads| image:: https://img.shields.io/pypi/dm/wcwidth.svg?logo=pypi
:alt: Downloads
:target: https://pypi.org/project/wcwidth/
diff --git a/contrib/python/wcwidth/py3/README.rst b/contrib/python/wcwidth/py3/README.rst
deleted file mode 100644
index 1b92ef4ca03..00000000000
--- a/contrib/python/wcwidth/py3/README.rst
+++ /dev/null
@@ -1,389 +0,0 @@
-|pypi_downloads| |codecov| |license|
-
-============
-Introduction
-============
-
-This library is mainly for CLI programs that carefully produce output for
-Terminals, or make pretend to be an emulator.
-
-**Problem Statement**: The printable length of *most* strings are equal to the
-number of cells they occupy on the screen ``1 character : 1 cell``. However,
-there are categories of characters that *occupy 2 cells* (full-wide), and
-others that *occupy 0* cells (zero-width).
-
-**Solution**: POSIX.1-2001 and POSIX.1-2008 conforming systems provide
-`wcwidth(3)`_ and `wcswidth(3)`_ C functions of which this python module's
-functions precisely copy. *These functions return the number of cells a
-unicode string is expected to occupy.*
-
-Installation
-------------
-
-The stable version of this package is maintained on pypi, install using pip::
-
- pip install wcwidth
-
-Example
--------
-
-**Problem**: given the following phrase (Japanese),
-
- >>> text = u'コンニチハ'
-
-Python **incorrectly** uses the *string length* of 5 codepoints rather than the
-*printable length* of 10 cells, so that when using the `rjust` function, the
-output length is wrong::
-
- >>> print(len('コンニチハ'))
- 5
-
- >>> print('コンニチハ'.rjust(20, '_'))
- _______________コンニチハ
-
-By defining our own "rjust" function that uses wcwidth, we can correct this::
-
- >>> def wc_rjust(text, length, padding=' '):
- ... from wcwidth import wcswidth
- ... return padding * max(0, (length - wcswidth(text))) + text
- ...
-
-Our **Solution** uses wcswidth to determine the string length correctly::
-
- >>> from wcwidth import wcswidth
- >>> print(wcswidth('コンニチハ'))
- 10
-
- >>> print(wc_rjust('コンニチハ', 20, '_'))
- __________コンニチハ
-
-
-Choosing a Version
-------------------
-
-Export an environment variable, ``UNICODE_VERSION``. This should be done by
-*terminal emulators* or those developers experimenting with authoring one of
-their own, from shell::
-
- $ export UNICODE_VERSION=13.0
-
-If unspecified, the latest version is used. If your Terminal Emulator does not
-export this variable, you can use the `jquast/ucs-detect`_ utility to
-automatically detect and export it to your shell.
-
-wcwidth, wcswidth
------------------
-Use function ``wcwidth()`` to determine the length of a *single unicode
-character*, and ``wcswidth()`` to determine the length of many, a *string
-of unicode characters*.
-
-Briefly, return values of function ``wcwidth()`` are:
-
-``-1``
- Indeterminate (not printable).
-
-``0``
- Does not advance the cursor, such as NULL or Combining.
-
-``2``
- Characters of category East Asian Wide (W) or East Asian
- Full-width (F) which are displayed using two terminal cells.
-
-``1``
- All others.
-
-Function ``wcswidth()`` simply returns the sum of all values for each character
-along a string, or ``-1`` when it occurs anywhere along a string.
-
-Full API Documentation at https://wcwidth.readthedocs.io
-
-==========
-Developing
-==========
-
-Install wcwidth in editable mode::
-
- pip install -e .
-
-Execute unit tests using tox_ for all supported Python versions::
-
- tox -e py36,py37,py38,py39,py310,py311,py312,py313,py314
-
-Updating Unicode Version
-------------------------
-
-Regenerate python code tables from latest Unicode Specification data files::
-
- tox -e update
-
-The script is located at ``bin/update-tables.py``, requires Python 3.9 or
-later. It is recommended but not necessary to run this script with the newest
-Python, because the newest Python has the latest ``unicodedata`` for generating
-comments.
-
-Building Documentation
-----------------------
-
-This project is using `sphinx`_ 4.5 to build documentation::
-
- tox -e sphinx
-
-The output will be in ``docs/_build/html/``.
-
-Updating Requirements
----------------------
-
-This project is using `pip-tools`_ to manage requirements.
-
-To upgrade requirements for updating unicode version, run::
-
- tox -e update_requirements_update
-
-To upgrade requirements for testing, run::
-
- tox -e update_requirements37,update_requirements39
-
-To upgrade requirements for building documentation, run::
-
- tox -e update_requirements_docs
-
-Utilities
----------
-
-Supplementary tools for browsing and testing terminals for wide unicode
-characters are found in the `bin/`_ of this project's source code. Just ensure
-to first ``pip install -r requirements-develop.txt`` from this projects main
-folder. For example, an interactive browser for testing::
-
- python ./bin/wcwidth-browser.py
-
-====
-Uses
-====
-
-This library is used in:
-
-- `jquast/blessed`_: a thin, practical wrapper around terminal capabilities in
- Python.
-
-- `prompt-toolkit/python-prompt-toolkit`_: a Library for building powerful
- interactive command lines in Python.
-
-- `dbcli/pgcli`_: Postgres CLI with autocompletion and syntax highlighting.
-
-- `thomasballinger/curtsies`_: a Curses-like terminal wrapper with a display
- based on compositing 2d arrays of text.
-
-- `selectel/pyte`_: Simple VTXXX-compatible linux terminal emulator.
-
-- `astanin/python-tabulate`_: Pretty-print tabular data in Python, a library
- and a command-line utility.
-
-- `rspeer/python-ftfy`_: Fixes mojibake and other glitches in Unicode
- text.
-
-- `nbedos/termtosvg`_: Terminal recorder that renders sessions as SVG
- animations.
-
-- `peterbrittain/asciimatics`_: Package to help people create full-screen text
- UIs.
-
-- `python-cmd2/cmd2`_: A tool for building interactive command line apps
-
-- `stratis-storage/stratis-cli`_: CLI for the Stratis project
-
-- `ihabunek/toot`_: A Mastodon CLI/TUI client
-
-- `saulpw/visidata`_: Terminal spreadsheet multitool for discovering and
- arranging data
-
-===============
-Other Languages
-===============
-
-- `timoxley/wcwidth`_: JavaScript
-- `janlelis/unicode-display_width`_: Ruby
-- `alecrabbit/php-wcwidth`_: PHP
-- `Text::CharWidth`_: Perl
-- `bluebear94/Terminal-WCWidth`_: Perl 6
-- `mattn/go-runewidth`_: Go
-- `grepsuzette/wcwidth`_: Haxe
-- `aperezdc/lua-wcwidth`_: Lua
-- `joachimschmidt557/zig-wcwidth`_: Zig
-- `fumiyas/wcwidth-cjk`_: `LD_PRELOAD` override
-- `joshuarubin/wcwidth9`_: Unicode version 9 in C
-
-=======
-History
-=======
-
-0.2.14 *2025-09-22*
- * **Drop Support** for Python 2.7 and 3.5. `PR #117`_.
- * **Update** tables to include Unicode Specifications 16.0.0 and 17.0.0.
- `PR #146`_.
- * **Bugfix** U+00AD SOFT HYPHEN should measure as 1, versions 0.2.9 through
- 0.2.13 measured as 0. `PR #149`_.
-
-0.2.13 *2024-01-06*
- * **Bugfix** zero-width support for Hangul Jamo (Korean)
-
-0.2.12 *2023-11-21*
- * re-release to remove .pyi file misplaced in wheel files `Issue #101`_.
-
-0.2.11 *2023-11-20*
- * Include tests files in the source distribution (`PR #98`_, `PR #100`_).
-
-0.2.10 *2023-11-13*
- * **Bugfix** accounting of some kinds of emoji sequences using U+FE0F
- Variation Selector 16 (`PR #97`_).
- * **Updated** `Specification <Specification_from_pypi_>`_.
-
-0.2.9 *2023-10-30*
- * **Bugfix** zero-width characters used in Emoji ZWJ sequences, Balinese,
- Jamo, Devanagari, Tamil, Kannada and others (`PR #91`_).
- * **Updated** to include `Specification <Specification_from_pypi_>`_ of
- character measurements.
-
-0.2.8 *2023-09-30*
- * Include requirements files in the source distribution (`PR #82`_).
-
-0.2.7 *2023-09-28*
- * **Updated** tables to include Unicode Specification 15.1.0.
- * Include ``bin``, ``docs``, and ``tox.ini`` in the source distribution
-
-0.2.6 *2023-01-14*
- * **Updated** tables to include Unicode Specification 14.0.0 and 15.0.0.
- * **Changed** developer tools to use pip-compile, and to use jinja2 templates
- for code generation in `bin/update-tables.py` to prepare for possible
- compiler optimization release.
-
-0.2.1 .. 0.2.5 *2020-06-23*
- * **Repository** changes to update tests and packaging issues, and
- begin tagging repository with matching release versions.
-
-0.2.0 *2020-06-01*
- * **Enhancement**: Unicode version may be selected by exporting the
- Environment variable ``UNICODE_VERSION``, such as ``13.0``, or ``6.3.0``.
- See the `jquast/ucs-detect`_ CLI utility for automatic detection.
- * **Enhancement**:
- API Documentation is published to readthedocs.io.
- * **Updated** tables for *all* Unicode Specifications with files
- published in a programmatically consumable format, versions 4.1.0
- through 13.0
-
-0.1.9 *2020-03-22*
- * **Performance** optimization by `Avram Lubkin`_, `PR #35`_.
- * **Updated** tables to Unicode Specification 13.0.0.
-
-0.1.8 *2020-01-01*
- * **Updated** tables to Unicode Specification 12.0.0. (`PR #30`_).
-
-0.1.7 *2016-07-01*
- * **Updated** tables to Unicode Specification 9.0.0. (`PR #18`_).
-
-0.1.6 *2016-01-08 Production/Stable*
- * ``LICENSE`` file now included with distribution.
-
-0.1.5 *2015-09-13 Alpha*
- * **Bugfix**:
- Resolution of "combining_ character width" issue, most especially
- those that previously returned -1 now often (correctly) return 0.
- resolved by `Philip Craig`_ via `PR #11`_.
- * **Deprecated**:
- The module path ``wcwidth.table_comb`` is no longer available,
- it has been superseded by module path ``wcwidth.table_zero``.
-
-0.1.4 *2014-11-20 Pre-Alpha*
- * **Feature**: ``wcswidth()`` now determines printable length
- for (most) combining_ characters. The developer's tool
- `bin/wcwidth-browser.py`_ is improved to display combining_
- characters when provided the ``--combining`` option
- (`Thomas Ballinger`_ and `Leta Montopoli`_ `PR #5`_).
- * **Feature**: added static analysis (prospector_) to testing
- framework.
-
-0.1.3 *2014-10-29 Pre-Alpha*
- * **Bugfix**: 2nd parameter of wcswidth was not honored.
- (`Thomas Ballinger`_, `PR #4`_).
-
-0.1.2 *2014-10-28 Pre-Alpha*
- * **Updated** tables to Unicode Specification 7.0.0.
- (`Thomas Ballinger`_, `PR #3`_).
-
-0.1.1 *2014-05-14 Pre-Alpha*
- * Initial release to pypi, Based on Unicode Specification 6.3.0
-
-This code was originally derived directly from C code of the same name,
-whose latest version is available at
-https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c::
-
- * Markus Kuhn -- 2007-05-26 (Unicode 5.0)
- *
- * Permission to use, copy, modify, and distribute this software
- * for any purpose and without fee is hereby granted. The author
- * disclaims all warranties with regard to this software.
-
-.. _`Specification_from_pypi`: https://wcwidth.readthedocs.io/en/latest/specs.html
-.. _`tox`: https://tox.wiki/en/latest/
-.. _`prospector`: https://github.com/landscapeio/prospector
-.. _`combining`: https://en.wikipedia.org/wiki/Combining_character
-.. _`bin/`: https://github.com/jquast/wcwidth/tree/master/bin
-.. _`bin/wcwidth-browser.py`: https://github.com/jquast/wcwidth/blob/master/bin/wcwidth-browser.py
-.. _`Thomas Ballinger`: https://github.com/thomasballinger
-.. _`Leta Montopoli`: https://github.com/lmontopo
-.. _`Philip Craig`: https://github.com/philipc
-.. _`PR #3`: https://github.com/jquast/wcwidth/pull/3
-.. _`PR #4`: https://github.com/jquast/wcwidth/pull/4
-.. _`PR #5`: https://github.com/jquast/wcwidth/pull/5
-.. _`PR #11`: https://github.com/jquast/wcwidth/pull/11
-.. _`PR #18`: https://github.com/jquast/wcwidth/pull/18
-.. _`PR #30`: https://github.com/jquast/wcwidth/pull/30
-.. _`PR #35`: https://github.com/jquast/wcwidth/pull/35
-.. _`PR #82`: https://github.com/jquast/wcwidth/pull/82
-.. _`PR #91`: https://github.com/jquast/wcwidth/pull/91
-.. _`PR #97`: https://github.com/jquast/wcwidth/pull/97
-.. _`PR #98`: https://github.com/jquast/wcwidth/pull/98
-.. _`PR #100`: https://github.com/jquast/wcwidth/pull/100
-.. _`PR #117`: https://github.com/jquast/wcwidth/pull/117
-.. _`PR #146`: https://github.com/jquast/wcwidth/pull/146
-.. _`PR #149`: https://github.com/jquast/wcwidth/pull/149
-.. _`Issue #101`: https://github.com/jquast/wcwidth/issues/101
-.. _`jquast/blessed`: https://github.com/jquast/blessed
-.. _`selectel/pyte`: https://github.com/selectel/pyte
-.. _`thomasballinger/curtsies`: https://github.com/thomasballinger/curtsies
-.. _`dbcli/pgcli`: https://github.com/dbcli/pgcli
-.. _`prompt-toolkit/python-prompt-toolkit`: https://github.com/prompt-toolkit/python-prompt-toolkit
-.. _`timoxley/wcwidth`: https://github.com/timoxley/wcwidth
-.. _`wcwidth(3)`: https://man7.org/linux/man-pages/man3/wcwidth.3.html
-.. _`wcswidth(3)`: https://man7.org/linux/man-pages/man3/wcswidth.3.html
-.. _`astanin/python-tabulate`: https://github.com/astanin/python-tabulate
-.. _`janlelis/unicode-display_width`: https://github.com/janlelis/unicode-display_width
-.. _`rspeer/python-ftfy`: https://github.com/rspeer/python-ftfy
-.. _`alecrabbit/php-wcwidth`: https://github.com/alecrabbit/php-wcwidth
-.. _`Text::CharWidth`: https://metacpan.org/pod/Text::CharWidth
-.. _`bluebear94/Terminal-WCWidth`: https://github.com/bluebear94/Terminal-WCWidth
-.. _`mattn/go-runewidth`: https://github.com/mattn/go-runewidth
-.. _`grepsuzette/wcwidth`: https://github.com/grepsuzette/wcwidth
-.. _`jquast/ucs-detect`: https://github.com/jquast/ucs-detect
-.. _`Avram Lubkin`: https://github.com/avylove
-.. _`nbedos/termtosvg`: https://github.com/nbedos/termtosvg
-.. _`peterbrittain/asciimatics`: https://github.com/peterbrittain/asciimatics
-.. _`aperezdc/lua-wcwidth`: https://github.com/aperezdc/lua-wcwidth
-.. _`joachimschmidt557/zig-wcwidth`: https://github.com/joachimschmidt557/zig-wcwidth
-.. _`fumiyas/wcwidth-cjk`: https://github.com/fumiyas/wcwidth-cjk
-.. _`joshuarubin/wcwidth9`: https://github.com/joshuarubin/wcwidth9
-.. _`python-cmd2/cmd2`: https://github.com/python-cmd2/cmd2
-.. _`stratis-storage/stratis-cli`: https://github.com/stratis-storage/stratis-cli
-.. _`ihabunek/toot`: https://github.com/ihabunek/toot
-.. _`saulpw/visidata`: https://github.com/saulpw/visidata
-.. _`pip-tools`: https://pip-tools.readthedocs.io/
-.. _`sphinx`: https://www.sphinx-doc.org/
-.. |pypi_downloads| image:: https://img.shields.io/pypi/dm/wcwidth.svg?logo=pypi
- :alt: Downloads
- :target: https://pypi.org/project/wcwidth/
-.. |codecov| image:: https://codecov.io/gh/jquast/wcwidth/branch/master/graph/badge.svg
- :alt: codecov.io Code Coverage
- :target: https://app.codecov.io/gh/jquast/wcwidth/
-.. |license| image:: https://img.shields.io/pypi/l/wcwidth.svg
- :target: https://pypi.org/project/wcwidth/
- :alt: MIT License
diff --git a/contrib/python/wcwidth/py3/tests/GraphemeBreakTest.txt b/contrib/python/wcwidth/py3/tests/GraphemeBreakTest.txt
new file mode 100644
index 00000000000..e1215547c58
--- /dev/null
+++ b/contrib/python/wcwidth/py3/tests/GraphemeBreakTest.txt
@@ -0,0 +1,796 @@
+# GraphemeBreakTest-17.0.0.txt
+# Date: 2025-03-24, 14:45:55 GMT
+# © 2025 Unicode®, Inc.
+# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
+# For terms of use and license, see https://www.unicode.org/terms_of_use.html
+#
+# Unicode Character Database
+# For documentation, see https://www.unicode.org/reports/tr44/
+#
+# Default Grapheme_Cluster_Break Test
+#
+# Format:
+# <string> (# <comment>)?
+# <string> contains hex Unicode code points, with
+# ÷ wherever there is a break opportunity, and
+# × wherever there is not.
+# <comment> the format can change, but currently it shows:
+# - the sample character name
+# - (x) the Grapheme_Cluster_Break property value for the sample character and
+# any other properties relevant to the algorithm, as described in
+# GraphemeBreakTest.html
+# - [x] the rule that determines whether there is a break or not,
+# as listed in the Rules section of GraphemeBreakTest.html
+#
+# These samples may be extended or changed in the future.
+#
+÷ 000D ÷ 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000D × 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000D ÷ 0000 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <NULL> (Control) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0000 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3]
+÷ 000D ÷ 094D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3]
+÷ 000D ÷ 0308 × 094D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3]
+÷ 000D ÷ 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 000D ÷ 0308 × 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 000D ÷ 200C ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3]
+÷ 000D ÷ 0308 × 200C ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3]
+÷ 000D ÷ 200D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ 000D ÷ 0308 × 200D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ 000D ÷ 1F1E6 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 000D ÷ 06DD ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] ARABIC END OF AYAH (Prepend) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 06DD ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3]
+÷ 000D ÷ 0903 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 000D ÷ 0308 × 0903 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 000D ÷ 1100 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 000D ÷ 1160 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 000D ÷ 11A8 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 000D ÷ AC00 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 000D ÷ AC01 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 000D ÷ 0915 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0915 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 000D ÷ 00A9 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 00A9 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3]
+÷ 000D ÷ 0020 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 000D ÷ 0378 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 000A ÷ 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000A ÷ 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000A ÷ 0000 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <NULL> (Control) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0000 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3]
+÷ 000A ÷ 094D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3]
+÷ 000A ÷ 0308 × 094D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3]
+÷ 000A ÷ 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 000A ÷ 0308 × 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 000A ÷ 200C ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3]
+÷ 000A ÷ 0308 × 200C ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3]
+÷ 000A ÷ 200D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ 000A ÷ 0308 × 200D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ 000A ÷ 1F1E6 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 000A ÷ 06DD ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] ARABIC END OF AYAH (Prepend) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 06DD ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3]
+÷ 000A ÷ 0903 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 000A ÷ 0308 × 0903 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 000A ÷ 1100 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 000A ÷ 1160 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 000A ÷ 11A8 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 000A ÷ AC00 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 000A ÷ AC01 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 000A ÷ 0915 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0915 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 000A ÷ 00A9 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 00A9 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3]
+÷ 000A ÷ 0020 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 000A ÷ 0378 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 0000 ÷ 000D ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0000 ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0000 ÷ 000A ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0000 ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0000 ÷ 0000 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] <NULL> (Control) ÷ [0.3]
+÷ 0000 ÷ 0308 ÷ 0000 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3]
+÷ 0000 ÷ 094D ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3]
+÷ 0000 ÷ 0308 × 094D ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3]
+÷ 0000 ÷ 0300 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 0000 ÷ 0308 × 0300 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 0000 ÷ 200C ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3]
+÷ 0000 ÷ 0308 × 200C ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3]
+÷ 0000 ÷ 200D ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ 0000 ÷ 0308 × 200D ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ 0000 ÷ 1F1E6 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0000 ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0000 ÷ 06DD ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] ARABIC END OF AYAH (Prepend) ÷ [0.3]
+÷ 0000 ÷ 0308 ÷ 06DD ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3]
+÷ 0000 ÷ 0903 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0000 ÷ 0308 × 0903 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0000 ÷ 1100 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0000 ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0000 ÷ 1160 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0000 ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0000 ÷ 11A8 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0000 ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0000 ÷ AC00 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0000 ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0000 ÷ AC01 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0000 ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0000 ÷ 0915 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 0000 ÷ 0308 ÷ 0915 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 0000 ÷ 00A9 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3]
+÷ 0000 ÷ 0308 ÷ 00A9 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3]
+÷ 0000 ÷ 0020 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 0000 ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 0000 ÷ 0378 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 0000 ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <NULL> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 094D ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 094D × 0308 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 094D ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 094D × 0308 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 094D ÷ 0000 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3]
+÷ 094D × 0308 ÷ 0000 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3]
+÷ 094D × 094D ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3]
+÷ 094D × 0308 × 094D ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3]
+÷ 094D × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 094D × 0308 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 094D × 200C ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3]
+÷ 094D × 0308 × 200C ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3]
+÷ 094D × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ 094D × 0308 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ 094D ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 094D × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 094D ÷ 06DD ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3]
+÷ 094D × 0308 ÷ 06DD ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3]
+÷ 094D × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 094D × 0308 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 094D ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 094D × 0308 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 094D ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 094D × 0308 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 094D ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 094D × 0308 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 094D ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 094D × 0308 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 094D ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 094D × 0308 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 094D ÷ 0915 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 094D × 0308 ÷ 0915 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 094D ÷ 00A9 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3]
+÷ 094D × 0308 ÷ 00A9 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3]
+÷ 094D ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 094D × 0308 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 094D ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 094D × 0308 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 0300 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0300 × 0308 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0300 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0300 × 0308 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0300 ÷ 0000 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0000 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3]
+÷ 0300 × 094D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3]
+÷ 0300 × 0308 × 094D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3]
+÷ 0300 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 0300 × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 0300 × 200C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3]
+÷ 0300 × 0308 × 200C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3]
+÷ 0300 × 200D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ 0300 × 0308 × 200D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ 0300 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0300 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0300 ÷ 06DD ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3]
+÷ 0300 × 0308 ÷ 06DD ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3]
+÷ 0300 × 0903 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0300 × 0308 × 0903 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0300 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0300 × 0308 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0300 ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0300 × 0308 ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0300 ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0300 × 0308 ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0300 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0300 × 0308 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0300 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0300 × 0308 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0300 ÷ 0915 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0915 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 0300 ÷ 00A9 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3]
+÷ 0300 × 0308 ÷ 00A9 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3]
+÷ 0300 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 0300 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 200C ÷ 000D ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 200C × 0308 ÷ 000D ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 200C ÷ 000A ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 200C × 0308 ÷ 000A ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 200C ÷ 0000 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [5.0] <NULL> (Control) ÷ [0.3]
+÷ 200C × 0308 ÷ 0000 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3]
+÷ 200C × 094D ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3]
+÷ 200C × 0308 × 094D ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3]
+÷ 200C × 0300 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 200C × 0308 × 0300 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 200C × 200C ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3]
+÷ 200C × 0308 × 200C ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3]
+÷ 200C × 200D ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ 200C × 0308 × 200D ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ 200C ÷ 1F1E6 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 200C × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 200C ÷ 06DD ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3]
+÷ 200C × 0308 ÷ 06DD ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3]
+÷ 200C × 0903 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 200C × 0308 × 0903 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 200C ÷ 1100 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 200C × 0308 ÷ 1100 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 200C ÷ 1160 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 200C × 0308 ÷ 1160 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 200C ÷ 11A8 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 200C × 0308 ÷ 11A8 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 200C ÷ AC00 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 200C × 0308 ÷ AC00 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 200C ÷ AC01 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 200C × 0308 ÷ AC01 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 200C ÷ 0915 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 200C × 0308 ÷ 0915 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 200C ÷ 00A9 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3]
+÷ 200C × 0308 ÷ 00A9 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3]
+÷ 200C ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 200C × 0308 ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 200C ÷ 0378 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 200C × 0308 ÷ 0378 ÷ # ÷ [0.2] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 200D ÷ 000D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 200D × 0308 ÷ 000D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 200D ÷ 000A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 200D × 0308 ÷ 000A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 200D ÷ 0000 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [5.0] <NULL> (Control) ÷ [0.3]
+÷ 200D × 0308 ÷ 0000 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3]
+÷ 200D × 094D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3]
+÷ 200D × 0308 × 094D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3]
+÷ 200D × 0300 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 200D × 0308 × 0300 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 200D × 200C ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3]
+÷ 200D × 0308 × 200C ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3]
+÷ 200D × 200D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ 200D × 0308 × 200D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ 200D ÷ 1F1E6 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 200D × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 200D ÷ 06DD ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3]
+÷ 200D × 0308 ÷ 06DD ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3]
+÷ 200D × 0903 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 200D × 0308 × 0903 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 200D ÷ 1100 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 200D × 0308 ÷ 1100 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 200D ÷ 1160 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 200D × 0308 ÷ 1160 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 200D ÷ 11A8 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 200D × 0308 ÷ 11A8 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 200D ÷ AC00 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 200D × 0308 ÷ AC00 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 200D ÷ AC01 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 200D × 0308 ÷ AC01 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 200D ÷ 0915 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 200D × 0308 ÷ 0915 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 200D ÷ 00A9 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3]
+÷ 200D × 0308 ÷ 00A9 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3]
+÷ 200D ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 200D × 0308 ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 200D ÷ 0378 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 200D × 0308 ÷ 0378 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 1F1E6 ÷ 000D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 000D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 1F1E6 ÷ 000A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 000A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 1F1E6 ÷ 0000 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] <NULL> (Control) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0000 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3]
+÷ 1F1E6 × 094D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3]
+÷ 1F1E6 × 0308 × 094D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3]
+÷ 1F1E6 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 1F1E6 × 0308 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 1F1E6 × 200C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3]
+÷ 1F1E6 × 0308 × 200C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3]
+÷ 1F1E6 × 200D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ 1F1E6 × 0308 × 200D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ 1F1E6 × 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [12.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 1F1E6 ÷ 06DD ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 06DD ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3]
+÷ 1F1E6 × 0903 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 1F1E6 × 0308 × 0903 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 1F1E6 ÷ 1100 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 1100 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 1F1E6 ÷ 1160 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 1160 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 1F1E6 ÷ 11A8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 11A8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 1F1E6 ÷ AC00 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ AC00 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 1F1E6 ÷ AC01 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ AC01 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 1F1E6 ÷ 0915 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0915 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 1F1E6 ÷ 00A9 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 00A9 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3]
+÷ 1F1E6 ÷ 0020 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0020 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 1F1E6 ÷ 0378 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0378 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 06DD ÷ 000D ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 06DD × 0308 ÷ 000D ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 06DD ÷ 000A ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 06DD × 0308 ÷ 000A ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 06DD ÷ 0000 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) ÷ [5.0] <NULL> (Control) ÷ [0.3]
+÷ 06DD × 0308 ÷ 0000 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3]
+÷ 06DD × 094D ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3]
+÷ 06DD × 0308 × 094D ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3]
+÷ 06DD × 0300 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 06DD × 0308 × 0300 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 06DD × 200C ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3]
+÷ 06DD × 0308 × 200C ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3]
+÷ 06DD × 200D ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ 06DD × 0308 × 200D ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ 06DD × 1F1E6 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 06DD × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 06DD × 06DD ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.2] ARABIC END OF AYAH (Prepend) ÷ [0.3]
+÷ 06DD × 0308 ÷ 06DD ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3]
+÷ 06DD × 0903 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 06DD × 0308 × 0903 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 06DD × 1100 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.2] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 06DD × 0308 ÷ 1100 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 06DD × 1160 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.2] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 06DD × 0308 ÷ 1160 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 06DD × 11A8 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.2] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 06DD × 0308 ÷ 11A8 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 06DD × AC00 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.2] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 06DD × 0308 ÷ AC00 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 06DD × AC01 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.2] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 06DD × 0308 ÷ AC01 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 06DD × 0915 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 06DD × 0308 ÷ 0915 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 06DD × 00A9 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.2] COPYRIGHT SIGN (ExtPict) ÷ [0.3]
+÷ 06DD × 0308 ÷ 00A9 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3]
+÷ 06DD × 0020 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.2] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 06DD × 0308 ÷ 0020 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 06DD × 0378 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.2] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 06DD × 0308 ÷ 0378 ÷ # ÷ [0.2] ARABIC END OF AYAH (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 0903 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0903 × 0308 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0903 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0903 × 0308 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0903 ÷ 0000 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <NULL> (Control) ÷ [0.3]
+÷ 0903 × 0308 ÷ 0000 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3]
+÷ 0903 × 094D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3]
+÷ 0903 × 0308 × 094D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3]
+÷ 0903 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 0903 × 0308 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 0903 × 200C ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3]
+÷ 0903 × 0308 × 200C ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3]
+÷ 0903 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ 0903 × 0308 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ 0903 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0903 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0903 ÷ 06DD ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3]
+÷ 0903 × 0308 ÷ 06DD ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3]
+÷ 0903 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0903 × 0308 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0903 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0903 × 0308 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0903 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0903 × 0308 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0903 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0903 × 0308 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0903 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0903 × 0308 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0903 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0903 × 0308 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0903 ÷ 0915 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 0903 × 0308 ÷ 0915 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 0903 ÷ 00A9 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3]
+÷ 0903 × 0308 ÷ 00A9 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3]
+÷ 0903 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 0903 × 0308 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 0903 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 0903 × 0308 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 1100 ÷ 000D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 1100 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 1100 ÷ 000A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 1100 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 1100 ÷ 0000 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <NULL> (Control) ÷ [0.3]
+÷ 1100 × 0308 ÷ 0000 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3]
+÷ 1100 × 094D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3]
+÷ 1100 × 0308 × 094D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3]
+÷ 1100 × 0300 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 1100 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 1100 × 200C ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3]
+÷ 1100 × 0308 × 200C ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3]
+÷ 1100 × 200D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ 1100 × 0308 × 200D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ 1100 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 1100 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 1100 ÷ 06DD ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3]
+÷ 1100 × 0308 ÷ 06DD ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3]
+÷ 1100 × 0903 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 1100 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 1100 × 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 1100 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 1100 × 1160 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 1100 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 1100 ÷ 11A8 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 1100 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 1100 × AC00 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 1100 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 1100 × AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 1100 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 1100 ÷ 0915 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 1100 × 0308 ÷ 0915 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 1100 ÷ 00A9 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3]
+÷ 1100 × 0308 ÷ 00A9 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3]
+÷ 1100 ÷ 0020 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 1100 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 1100 ÷ 0378 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 1100 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 1160 ÷ 000D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 1160 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 1160 ÷ 000A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 1160 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 1160 ÷ 0000 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <NULL> (Control) ÷ [0.3]
+÷ 1160 × 0308 ÷ 0000 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3]
+÷ 1160 × 094D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3]
+÷ 1160 × 0308 × 094D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3]
+÷ 1160 × 0300 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 1160 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 1160 × 200C ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3]
+÷ 1160 × 0308 × 200C ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3]
+÷ 1160 × 200D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ 1160 × 0308 × 200D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ 1160 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 1160 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 1160 ÷ 06DD ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3]
+÷ 1160 × 0308 ÷ 06DD ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3]
+÷ 1160 × 0903 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 1160 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 1160 ÷ 1100 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 1160 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 1160 × 1160 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [7.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 1160 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 1160 × 11A8 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 1160 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 1160 ÷ AC00 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 1160 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 1160 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 1160 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 1160 ÷ 0915 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 1160 × 0308 ÷ 0915 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 1160 ÷ 00A9 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3]
+÷ 1160 × 0308 ÷ 00A9 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3]
+÷ 1160 ÷ 0020 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 1160 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 1160 ÷ 0378 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 1160 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 11A8 ÷ 000D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 11A8 ÷ 000A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 11A8 ÷ 0000 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <NULL> (Control) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 0000 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3]
+÷ 11A8 × 094D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3]
+÷ 11A8 × 0308 × 094D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3]
+÷ 11A8 × 0300 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 11A8 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 11A8 × 200C ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3]
+÷ 11A8 × 0308 × 200C ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3]
+÷ 11A8 × 200D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ 11A8 × 0308 × 200D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ 11A8 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 11A8 ÷ 06DD ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 06DD ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3]
+÷ 11A8 × 0903 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 11A8 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 11A8 ÷ 1100 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 11A8 ÷ 1160 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 11A8 × 11A8 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 11A8 ÷ AC00 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 11A8 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 11A8 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 11A8 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 11A8 ÷ 0915 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 0915 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 11A8 ÷ 00A9 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 00A9 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3]
+÷ 11A8 ÷ 0020 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 11A8 ÷ 0378 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ AC00 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ AC00 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ AC00 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ AC00 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ AC00 ÷ 0000 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <NULL> (Control) ÷ [0.3]
+÷ AC00 × 0308 ÷ 0000 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3]
+÷ AC00 × 094D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3]
+÷ AC00 × 0308 × 094D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3]
+÷ AC00 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ AC00 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ AC00 × 200C ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3]
+÷ AC00 × 0308 × 200C ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3]
+÷ AC00 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ AC00 × 0308 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ AC00 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ AC00 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ AC00 ÷ 06DD ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3]
+÷ AC00 × 0308 ÷ 06DD ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3]
+÷ AC00 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ AC00 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ AC00 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ AC00 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ AC00 × 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ AC00 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ AC00 × 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ AC00 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ AC00 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ AC00 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ AC00 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ AC00 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ AC00 ÷ 0915 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ AC00 × 0308 ÷ 0915 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ AC00 ÷ 00A9 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3]
+÷ AC00 × 0308 ÷ 00A9 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3]
+÷ AC00 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ AC00 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ AC00 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ AC00 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ AC01 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ AC01 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ AC01 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ AC01 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ AC01 ÷ 0000 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <NULL> (Control) ÷ [0.3]
+÷ AC01 × 0308 ÷ 0000 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3]
+÷ AC01 × 094D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3]
+÷ AC01 × 0308 × 094D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3]
+÷ AC01 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ AC01 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ AC01 × 200C ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3]
+÷ AC01 × 0308 × 200C ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3]
+÷ AC01 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ AC01 × 0308 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ AC01 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ AC01 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ AC01 ÷ 06DD ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3]
+÷ AC01 × 0308 ÷ 06DD ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3]
+÷ AC01 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ AC01 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ AC01 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ AC01 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ AC01 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ AC01 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ AC01 × 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ AC01 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ AC01 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ AC01 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ AC01 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ AC01 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ AC01 ÷ 0915 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ AC01 × 0308 ÷ 0915 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ AC01 ÷ 00A9 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3]
+÷ AC01 × 0308 ÷ 00A9 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3]
+÷ AC01 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ AC01 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ AC01 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ AC01 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 0915 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0915 × 0308 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0915 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0915 × 0308 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0915 ÷ 0000 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [5.0] <NULL> (Control) ÷ [0.3]
+÷ 0915 × 0308 ÷ 0000 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3]
+÷ 0915 × 094D ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3]
+÷ 0915 × 0308 × 094D ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3]
+÷ 0915 × 0300 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 0915 × 0308 × 0300 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 0915 × 200C ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3]
+÷ 0915 × 0308 × 200C ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3]
+÷ 0915 × 200D ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ 0915 × 0308 × 200D ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ 0915 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0915 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0915 ÷ 06DD ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3]
+÷ 0915 × 0308 ÷ 06DD ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3]
+÷ 0915 × 0903 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0915 × 0308 × 0903 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0915 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0915 × 0308 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0915 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0915 × 0308 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0915 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0915 × 0308 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0915 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0915 × 0308 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0915 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0915 × 0308 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0915 ÷ 0915 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 0915 × 0308 ÷ 0915 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 0915 ÷ 00A9 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3]
+÷ 0915 × 0308 ÷ 00A9 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3]
+÷ 0915 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 0915 × 0308 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 0915 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 0915 × 0308 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 00A9 ÷ 000D ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 00A9 × 0308 ÷ 000D ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 00A9 ÷ 000A ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 00A9 × 0308 ÷ 000A ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 00A9 ÷ 0000 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) ÷ [5.0] <NULL> (Control) ÷ [0.3]
+÷ 00A9 × 0308 ÷ 0000 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3]
+÷ 00A9 × 094D ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3]
+÷ 00A9 × 0308 × 094D ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3]
+÷ 00A9 × 0300 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 00A9 × 0308 × 0300 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 00A9 × 200C ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3]
+÷ 00A9 × 0308 × 200C ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3]
+÷ 00A9 × 200D ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ 00A9 × 0308 × 200D ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ 00A9 ÷ 1F1E6 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 00A9 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 00A9 ÷ 06DD ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3]
+÷ 00A9 × 0308 ÷ 06DD ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3]
+÷ 00A9 × 0903 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 00A9 × 0308 × 0903 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 00A9 ÷ 1100 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 00A9 × 0308 ÷ 1100 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 00A9 ÷ 1160 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 00A9 × 0308 ÷ 1160 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 00A9 ÷ 11A8 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 00A9 × 0308 ÷ 11A8 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 00A9 ÷ AC00 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 00A9 × 0308 ÷ AC00 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 00A9 ÷ AC01 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 00A9 × 0308 ÷ AC01 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 00A9 ÷ 0915 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 00A9 × 0308 ÷ 0915 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 00A9 ÷ 00A9 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3]
+÷ 00A9 × 0308 ÷ 00A9 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3]
+÷ 00A9 ÷ 0020 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 00A9 × 0308 ÷ 0020 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 00A9 ÷ 0378 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 00A9 × 0308 ÷ 0378 ÷ # ÷ [0.2] COPYRIGHT SIGN (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 0020 ÷ 000D ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0020 × 0308 ÷ 000D ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0020 ÷ 000A ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0020 × 0308 ÷ 000A ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0020 ÷ 0000 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) ÷ [5.0] <NULL> (Control) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0000 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3]
+÷ 0020 × 094D ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3]
+÷ 0020 × 0308 × 094D ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3]
+÷ 0020 × 0300 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 0020 × 0308 × 0300 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 0020 × 200C ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3]
+÷ 0020 × 0308 × 200C ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3]
+÷ 0020 × 200D ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ 0020 × 0308 × 200D ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ 0020 ÷ 1F1E6 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0020 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0020 ÷ 06DD ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3]
+÷ 0020 × 0308 ÷ 06DD ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3]
+÷ 0020 × 0903 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0020 × 0308 × 0903 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0020 ÷ 1100 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0020 × 0308 ÷ 1100 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0020 ÷ 1160 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0020 × 0308 ÷ 1160 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0020 ÷ 11A8 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0020 × 0308 ÷ 11A8 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0020 ÷ AC00 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0020 × 0308 ÷ AC00 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0020 ÷ AC01 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0020 × 0308 ÷ AC01 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0020 ÷ 0915 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0915 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 0020 ÷ 00A9 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3]
+÷ 0020 × 0308 ÷ 00A9 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3]
+÷ 0020 ÷ 0020 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0020 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 0020 ÷ 0378 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0378 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 0378 ÷ 000D ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0378 × 0308 ÷ 000D ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0378 ÷ 000A ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0378 × 0308 ÷ 000A ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0378 ÷ 0000 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [5.0] <NULL> (Control) ÷ [0.3]
+÷ 0378 × 0308 ÷ 0000 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [5.0] <NULL> (Control) ÷ [0.3]
+÷ 0378 × 094D ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3]
+÷ 0378 × 0308 × 094D ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [0.3]
+÷ 0378 × 0300 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 0378 × 0308 × 0300 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING GRAVE ACCENT (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 0378 × 200C ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3]
+÷ 0378 × 0308 × 200C ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH NON-JOINER (ExtendmConjunctLinkermConjunctExtender) ÷ [0.3]
+÷ 0378 × 200D ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ 0378 × 0308 × 200D ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ 0378 ÷ 1F1E6 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0378 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0378 ÷ 06DD ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3]
+÷ 0378 × 0308 ÷ 06DD ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] ARABIC END OF AYAH (Prepend) ÷ [0.3]
+÷ 0378 × 0903 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0378 × 0308 × 0903 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0378 ÷ 1100 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0378 × 0308 ÷ 1100 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0378 ÷ 1160 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0378 × 0308 ÷ 1160 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0378 ÷ 11A8 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0378 × 0308 ÷ 11A8 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0378 ÷ AC00 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0378 × 0308 ÷ AC00 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0378 ÷ AC01 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0378 × 0308 ÷ AC01 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0378 ÷ 0915 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 0378 × 0308 ÷ 0915 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 0378 ÷ 00A9 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3]
+÷ 0378 × 0308 ÷ 00A9 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] COPYRIGHT SIGN (ExtPict) ÷ [0.3]
+÷ 0378 ÷ 0020 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 0378 × 0308 ÷ 0020 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 0378 ÷ 0378 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 0378 × 0308 ÷ 0378 ÷ # ÷ [0.2] <reserved-0378> (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] <reserved-0378> (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 000D × 000A ÷ 0061 ÷ 000A ÷ 0308 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [4.0] LATIN SMALL LETTER A (XXmLinkingConsonantmExtPict) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 0061 × 0308 ÷ # ÷ [0.2] LATIN SMALL LETTER A (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 0020 × 200D ÷ 0646 ÷ # ÷ [0.2] SPACE (XXmLinkingConsonantmExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] ARABIC LETTER NOON (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 0646 × 200D ÷ 0020 ÷ # ÷ [0.2] ARABIC LETTER NOON (XXmLinkingConsonantmExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] SPACE (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 1100 × 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ AC00 × 11A8 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ AC01 × 11A8 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 1F1E6 × 1F1E7 ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [12.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 0061 ÷ 1F1E6 × 1F1E7 ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (XXmLinkingConsonantmExtPict) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 0061 ÷ 1F1E6 × 1F1E7 × 200D ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (XXmLinkingConsonantmExtPict) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 0061 ÷ 1F1E6 × 200D ÷ 1F1E7 × 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (XXmLinkingConsonantmExtPict) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 0061 ÷ 1F1E6 × 1F1E7 ÷ 1F1E8 × 1F1E9 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (XXmLinkingConsonantmExtPict) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER D (RI) ÷ [999.0] LATIN SMALL LETTER B (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 0061 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (XXmLinkingConsonantmExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [0.3]
+÷ 0061 × 0308 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (XXmLinkingConsonantmExtPict) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] LATIN SMALL LETTER B (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 0061 × 0903 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (XXmLinkingConsonantmExtPict) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] LATIN SMALL LETTER B (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 0061 ÷ 0600 × 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (XXmLinkingConsonantmExtPict) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) × [9.2] LATIN SMALL LETTER B (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 1F476 × 1F3FF ÷ 1F476 ÷ # ÷ [0.2] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] BABY (ExtPict) ÷ [0.3]
+÷ 0061 × 1F3FF ÷ 1F476 ÷ # ÷ [0.2] LATIN SMALL LETTER A (XXmLinkingConsonantmExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] BABY (ExtPict) ÷ [0.3]
+÷ 0061 × 1F3FF ÷ 1F476 × 200D × 1F6D1 ÷ # ÷ [0.2] LATIN SMALL LETTER A (XXmLinkingConsonantmExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] BABY (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ) × [11.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
+÷ 1F476 × 1F3FF × 0308 × 200D × 1F476 × 1F3FF ÷ # ÷ [0.2] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_ConjunctExtendermConjunctLinker) × [9.0] COMBINING DIAERESIS (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) × [11.0] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 1F6D1 × 200D × 1F6D1 ÷ # ÷ [0.2] OCTAGONAL SIGN (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ) × [11.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
+÷ 0061 × 200D ÷ 1F6D1 ÷ # ÷ [0.2] LATIN SMALL LETTER A (XXmLinkingConsonantmExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
+÷ 2701 × 200D ÷ 2701 ÷ # ÷ [0.2] UPPER BLADE SCISSORS (XXmLinkingConsonantmExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] UPPER BLADE SCISSORS (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 0061 × 200D ÷ 2701 ÷ # ÷ [0.2] LATIN SMALL LETTER A (XXmLinkingConsonantmExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ) ÷ [999.0] UPPER BLADE SCISSORS (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 0915 ÷ 0924 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) ÷ [999.0] DEVANAGARI LETTER TA (LinkingConsonant) ÷ [0.3]
+÷ 0915 × 094D × 0924 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.3] DEVANAGARI LETTER TA (LinkingConsonant) ÷ [0.3]
+÷ 0915 × 094D × 094D × 0924 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.3] DEVANAGARI LETTER TA (LinkingConsonant) ÷ [0.3]
+÷ 0915 × 094D × 200D × 0924 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) × [9.3] DEVANAGARI LETTER TA (LinkingConsonant) ÷ [0.3]
+÷ 0915 × 093C × 200D × 094D × 0924 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctExtendermConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.3] DEVANAGARI LETTER TA (LinkingConsonant) ÷ [0.3]
+÷ 0915 × 093C × 094D × 200D × 0924 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctExtendermConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] ZERO WIDTH JOINER (ZWJ) × [9.3] DEVANAGARI LETTER TA (LinkingConsonant) ÷ [0.3]
+÷ 0915 × 094D × 0924 × 094D × 092F ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.3] DEVANAGARI LETTER TA (LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.3] DEVANAGARI LETTER YA (LinkingConsonant) ÷ [0.3]
+÷ 0915 × 094D ÷ 0061 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [999.0] LATIN SMALL LETTER A (XXmLinkingConsonantmExtPict) ÷ [0.3]
+÷ 0061 × 094D ÷ 0924 ÷ # ÷ [0.2] LATIN SMALL LETTER A (XXmLinkingConsonantmExtPict) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [999.0] DEVANAGARI LETTER TA (LinkingConsonant) ÷ [0.3]
+÷ 003F × 094D ÷ 0924 ÷ # ÷ [0.2] QUESTION MARK (XXmLinkingConsonantmExtPict) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) ÷ [999.0] DEVANAGARI LETTER TA (LinkingConsonant) ÷ [0.3]
+÷ 0915 × 094D × 094D × 0924 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinker) × [9.3] DEVANAGARI LETTER TA (LinkingConsonant) ÷ [0.3]
+÷ 0AB8 × 0AFB × 0ACD × 0AB8 × 0AFB ÷ # ÷ [0.2] GUJARATI LETTER SA (LinkingConsonant) × [9.0] GUJARATI SIGN SHADDA (Extend_ConjunctExtendermConjunctLinker) × [9.0] GUJARATI SIGN VIRAMA (Extend_ConjunctLinker) × [9.3] GUJARATI LETTER SA (LinkingConsonant) × [9.0] GUJARATI SIGN SHADDA (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 1019 × 1039 × 1018 ÷ 102C × 1037 ÷ # ÷ [0.2] MYANMAR LETTER MA (LinkingConsonant) × [9.0] MYANMAR SIGN VIRAMA (Extend_ConjunctLinker) × [9.3] MYANMAR LETTER BHA (LinkingConsonant) ÷ [999.0] MYANMAR VOWEL SIGN AA (XXmLinkingConsonantmExtPict) × [9.0] MYANMAR SIGN DOT BELOW (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 1004 × 103A × 1039 × 1011 × 1039 × 1011 ÷ # ÷ [0.2] MYANMAR LETTER NGA (LinkingConsonant) × [9.0] MYANMAR SIGN ASAT (Extend_ConjunctExtendermConjunctLinker) × [9.0] MYANMAR SIGN VIRAMA (Extend_ConjunctLinker) × [9.3] MYANMAR LETTER THA (LinkingConsonant) × [9.0] MYANMAR SIGN VIRAMA (Extend_ConjunctLinker) × [9.3] MYANMAR LETTER THA (LinkingConsonant) ÷ [0.3]
+÷ 1B12 × 1B01 ÷ 1B32 × 1B44 × 1B2F ÷ 1B32 × 1B44 × 1B22 × 1B44 × 1B2C ÷ 1B32 × 1B44 × 1B22 × 1B38 ÷ # ÷ [0.2] BALINESE LETTER OKARA TEDUNG (XXmLinkingConsonantmExtPict) × [9.0] BALINESE SIGN ULU CANDRA (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] BALINESE LETTER SA (LinkingConsonant) × [9.0] BALINESE ADEG ADEG (Extend_ConjunctLinker) × [9.3] BALINESE LETTER WA (LinkingConsonant) ÷ [999.0] BALINESE LETTER SA (LinkingConsonant) × [9.0] BALINESE ADEG ADEG (Extend_ConjunctLinker) × [9.3] BALINESE LETTER TA (LinkingConsonant) × [9.0] BALINESE ADEG ADEG (Extend_ConjunctLinker) × [9.3] BALINESE LETTER YA (LinkingConsonant) ÷ [999.0] BALINESE LETTER SA (LinkingConsonant) × [9.0] BALINESE ADEG ADEG (Extend_ConjunctLinker) × [9.3] BALINESE LETTER TA (LinkingConsonant) × [9.0] BALINESE VOWEL SIGN SUKU (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 179F × 17D2 × 178F × 17D2 × 179A × 17B8 ÷ # ÷ [0.2] KHMER LETTER SA (LinkingConsonant) × [9.0] KHMER SIGN COENG (Extend_ConjunctLinker) × [9.3] KHMER LETTER TA (LinkingConsonant) × [9.0] KHMER SIGN COENG (Extend_ConjunctLinker) × [9.3] KHMER LETTER RO (LinkingConsonant) × [9.0] KHMER VOWEL SIGN II (Extend_ConjunctExtendermConjunctLinker) ÷ [0.3]
+÷ 1B26 ÷ 1B17 × 1B44 × 1B13 ÷ # ÷ [0.2] BALINESE LETTER NA (LinkingConsonant) ÷ [999.0] BALINESE LETTER NGA (LinkingConsonant) × [9.0] BALINESE ADEG ADEG (Extend_ConjunctLinker) × [9.3] BALINESE LETTER KA (LinkingConsonant) ÷ [0.3]
+÷ 1B27 ÷ 1B13 × 1B44 × 1B0B ÷ 1B0B × 1B04 ÷ # ÷ [0.2] BALINESE LETTER PA (LinkingConsonant) ÷ [999.0] BALINESE LETTER KA (LinkingConsonant) × [9.0] BALINESE ADEG ADEG (Extend_ConjunctLinker) × [9.3] BALINESE LETTER RA REPA (LinkingConsonant) ÷ [999.0] BALINESE LETTER RA REPA (LinkingConsonant) × [9.1] BALINESE SIGN BISAH (SpacingMark) ÷ [0.3]
+÷ 1795 × 17D2 × 17AF ÷ 1798 ÷ # ÷ [0.2] KHMER LETTER PHA (LinkingConsonant) × [9.0] KHMER SIGN COENG (Extend_ConjunctLinker) × [9.3] KHMER INDEPENDENT VOWEL QE (LinkingConsonant) ÷ [999.0] KHMER LETTER MO (LinkingConsonant) ÷ [0.3]
+÷ 17A0 × 17D2 × 17AB ÷ 1791 × 17D0 ÷ 1799 ÷ # ÷ [0.2] KHMER LETTER HA (LinkingConsonant) × [9.0] KHMER SIGN COENG (Extend_ConjunctLinker) × [9.3] KHMER INDEPENDENT VOWEL RY (LinkingConsonant) ÷ [999.0] KHMER LETTER TO (LinkingConsonant) × [9.0] KHMER SIGN SAMYOK SANNYA (Extend_ConjunctExtendermConjunctLinker) ÷ [999.0] KHMER LETTER YO (LinkingConsonant) ÷ [0.3]
+#
+# Lines: 766
+#
+# EOF
diff --git a/contrib/python/wcwidth/py3/tests/test_ambiguous.py b/contrib/python/wcwidth/py3/tests/test_ambiguous.py
new file mode 100644
index 00000000000..0c61cdacf9c
--- /dev/null
+++ b/contrib/python/wcwidth/py3/tests/test_ambiguous.py
@@ -0,0 +1,80 @@
+"""Tests for ambiguous_width parameter."""
+# 3rd party
+import pytest
+
+# local
+import wcwidth
+
+AMBIGUOUS_CHARS = [
+ ('\u00a1', 'INVERTED_EXCLAMATION'),
+ ('\u00a7', 'SECTION_SIGN'),
+ ('\u00b0', 'DEGREE_SIGN'),
+ ('\u00b1', 'PLUS_MINUS'),
+ ('\u00d7', 'MULTIPLICATION'),
+ ('\u00f7', 'DIVISION'),
+ ('\u2460', 'CIRCLED_ONE'),
+ ('\u2500', 'BOX_HORIZONTAL'),
+ ('\u25a0', 'BLACK_SQUARE'),
+ ('\u2605', 'BLACK_STAR'),
+]
+
+
[email protected]('char,name', AMBIGUOUS_CHARS)
+def test_wcwidth_ambiguous_default(char, name):
+ """Ambiguous characters return width 1 by default."""
+ assert wcwidth.wcwidth(char) == 1
+
+
[email protected]('char,name', AMBIGUOUS_CHARS)
+def test_wcwidth_ambiguous_wide(char, name):
+ """Ambiguous characters return width 2 when ambiguous_width=2."""
+ assert wcwidth.wcwidth(char, ambiguous_width=2) == 2
+
+
+def test_wcswidth_mixed_ambiguous_and_wide():
+ """Mixed CJK and ambiguous characters."""
+ text = '\u4e2d\u00b1' # CJK (wide=2) + PLUS-MINUS (ambiguous)
+ assert wcwidth.wcswidth(text) == 3
+ assert wcwidth.wcswidth(text, ambiguous_width=2) == 4
+
+
+def test_width_ambiguous():
+ """Width() respects ambiguous_width parameter."""
+ assert wcwidth.width('\u00b1') == 1
+ assert wcwidth.width('\u00b1', ambiguous_width=2) == 2
+
+
+def test_ljust_ambiguous():
+ """Ljust respects ambiguous_width parameter."""
+ text = '\u00b1'
+ assert wcwidth.ljust(text, 4) == '\u00b1 '
+ assert wcwidth.ljust(text, 4, ambiguous_width=2) == '\u00b1 '
+
+
+def test_rjust_ambiguous():
+ """Rjust respects ambiguous_width parameter."""
+ text = '\u00b1'
+ assert wcwidth.rjust(text, 4) == ' \u00b1'
+ assert wcwidth.rjust(text, 4, ambiguous_width=2) == ' \u00b1'
+
+
+def test_center_ambiguous():
+ """Center respects ambiguous_width parameter."""
+ text = '\u00b1'
+ assert wcwidth.center(text, 5) == ' \u00b1 '
+ assert wcwidth.center(text, 6, ambiguous_width=2) == ' \u00b1 '
+
+
+def test_wrap_ambiguous():
+ """Wrap respects ambiguous_width parameter."""
+ text = '\u00b1' * 5 # 5 ambiguous characters
+ assert wcwidth.wrap(text, 4) == ['\u00b1\u00b1\u00b1\u00b1', '\u00b1']
+ assert wcwidth.wrap(text, 4, ambiguous_width=2) == ['\u00b1\u00b1', '\u00b1\u00b1', '\u00b1']
+
+
+def test_wide_not_affected_by_ambiguous():
+ """Wide characters remain wide regardless of ambiguous_width."""
+ cjk = '\u4e2d' # CJK character (always wide)
+ assert wcwidth.wcwidth(cjk) == 2
+ assert wcwidth.wcwidth(cjk, ambiguous_width=2) == 2
+ assert wcwidth.wcwidth(cjk, ambiguous_width=1) == 2
diff --git a/contrib/python/wcwidth/py3/tests/test_clip.py b/contrib/python/wcwidth/py3/tests/test_clip.py
new file mode 100644
index 00000000000..8a98c14c6f2
--- /dev/null
+++ b/contrib/python/wcwidth/py3/tests/test_clip.py
@@ -0,0 +1,223 @@
+"""Tests for clip() and strip_sequences() functions."""
+# 3rd party
+import pytest
+
+# local
+from wcwidth import clip, width, strip_sequences
+
+STRIP_SEQUENCES_CASES = [
+ ('', ''),
+ ('hello', 'hello'),
+ ('hello world', 'hello world'),
+ ('\x1b[31m', ''),
+ ('\x1b[0m', ''),
+ ('\x1b[m', ''),
+ ('\x1b[31mred\x1b[0m', 'red'),
+ ('\x1b[1m\x1b[31mbold red\x1b[0m', 'bold red'),
+ ('\x1b[1m\x1b[31m\x1b[4m', ''),
+ ('\x1b[1mbold\x1b[0m \x1b[3mitalic\x1b[0m', 'bold italic'),
+ ('\x1b]0;title\x07', ''),
+ ('\x1b]0;title\x07text', 'text'),
+ ('\x1b]8;;https://example.com\x07link\x1b]8;;\x07', 'link'),
+ ('\x1b[31m中文\x1b[0m', '中文'),
+ ('\x1b[1m\U0001F468\u200D\U0001F469\u200D\U0001F467\x1b[0m',
+ '\U0001F468\u200D\U0001F469\u200D\U0001F467'),
+ ('\x1b', '\x1b'),
+ ('a\x1bb', 'a\x1bb'),
+ ('\x1b[', ''),
+ ('text\x1b[mmore', 'textmore'),
+]
+
+
[email protected]('text,expected', STRIP_SEQUENCES_CASES)
+def test_strip_sequences(text, expected):
+ assert strip_sequences(text) == expected
+
+
+CLIP_BASIC_CASES = [
+ ('', 0, 5, ''),
+ ('', 0, 0, ''),
+ ('hello', 0, 0, ''),
+ ('hello', 5, 5, ''),
+ ('hello', 5, 3, ''),
+ ('hello', -5, 3, 'hel'),
+ ('hello', 0, 5, 'hello'),
+ ('hello', 0, 3, 'hel'),
+ ('hello', 2, 5, 'llo'),
+ ('hello', 1, 4, 'ell'),
+ ('hello world', 0, 5, 'hello'),
+ ('hello world', 6, 11, 'world'),
+ ('hello world', 0, 11, 'hello world'),
+ ('hi', 0, 100, 'hi'),
+ ('hi', 100, 200, ''),
+]
+
+
[email protected]('text,start,end,expected', CLIP_BASIC_CASES)
+def test_clip_basic(text, start, end, expected):
+ assert clip(text, start, end) == expected
+
+
+CLIP_CJK_CASES = [
+ ('中文字', 0, 6, '中文字'),
+ ('中文字', 0, 4, '中文'),
+ ('中文字', 0, 2, '中'),
+ ('中文字', 2, 4, '文'),
+ ('中文字', 0, 3, '中 '),
+ ('中文字', 1, 6, ' 文字'),
+ ('中文字', 1, 5, ' 文 '),
+ ('A中B', 0, 4, 'A中B'),
+ ('A中B', 0, 3, 'A中'),
+ ('A中B', 1, 4, '中B'),
+ ('A中B', 1, 3, '中'),
+ ('A中B', 2, 4, ' B'),
+ ('中', 0, 2, '中'),
+ ('中', 0, 1, ' '),
+ ('中', 1, 2, ' '),
+]
+
+
[email protected]('text,start,end,expected', CLIP_CJK_CASES)
+def test_clip_cjk(text, start, end, expected):
+ assert clip(text, start, end) == expected
+
+
+def test_clip_cjk_custom_fillchar():
+ assert clip('中文字', 1, 5, fillchar='.') == '.文.'
+ assert clip('中文', 1, 3, fillchar='\u00b7') == '\u00b7\u00b7'
+
+
+CLIP_CJK_WIDTH_CASES = [
+ ('中文字', 0, 6, 6),
+ ('中文字', 0, 3, 3),
+ ('中文字', 1, 6, 5),
+ ('中文字', 1, 5, 4),
+]
+
+
[email protected]('text,start,end,expected_width', CLIP_CJK_WIDTH_CASES)
+def test_clip_cjk_width_consistency(text, start, end, expected_width):
+ assert width(clip(text, start, end)) == expected_width
+
+
+def test_clip_sequences_preserve_sgr():
+ result = clip('\x1b[31mred\x1b[0m', 0, 3)
+ assert result == '\x1b[31mred\x1b[0m'
+ assert strip_sequences(result) == 'red'
+
+
+def test_clip_sequences_before_start():
+ assert clip('\x1b[31mred text\x1b[0m', 4, 8) == '\x1b[31mtext\x1b[0m'
+
+
+def test_clip_sequences_after_end():
+ assert clip('hello\x1b[31m world\x1b[0m', 0, 5) == 'hello\x1b[31m\x1b[0m'
+
+
+def test_clip_sequences_multiple():
+ assert clip('\x1b[1m\x1b[31mbold red\x1b[0m', 0, 4) == '\x1b[1m\x1b[31mbold\x1b[0m'
+
+
+def test_clip_sequences_only():
+ assert clip('\x1b[31m\x1b[0m', 0, 10) == '\x1b[31m\x1b[0m'
+
+
+def test_clip_sequences_osc_hyperlink():
+ assert clip('\x1b]8;;https://example.com\x07link\x1b]8;;\x07', 0, 4) == \
+ '\x1b]8;;https://example.com\x07link\x1b]8;;\x07'
+
+
+def test_clip_sequences_cjk_with_sequences():
+ assert clip('\x1b[31m中文\x1b[0m', 0, 3) == '\x1b[31m中 \x1b[0m'
+
+
+def test_clip_sequences_between_chars():
+ assert clip('a\x1b[31mb\x1b[0mc', 1, 2) == '\x1b[31mb\x1b[0m'
+
+
+def test_clip_sequences_lone_esc():
+ assert clip('a\x1bb', 0, 2) == 'a\x1bb'
+
+
+CLIP_EMOJI_CASES = [
+ ('\U0001F600', 2),
+ ('\U0001F468\u200D\U0001F469\u200D\U0001F467', 2),
+ ('\u2764\uFE0F', 2),
+ ('\U0001F1FA\U0001F1F8', 2),
+]
+
+
[email protected]('emoji,full_width', CLIP_EMOJI_CASES)
+def test_clip_emoji(emoji, full_width):
+ assert clip(emoji, 0, full_width) == emoji
+ assert clip(emoji, 0, 1) == ' '
+ assert width(emoji) == full_width
+
+
+def test_clip_emoji_with_sequences():
+ assert clip('\x1b[1m\U0001F600\x1b[0m', 0, 2) == '\x1b[1m\U0001F600\x1b[0m'
+
+
+def test_clip_combining_accent():
+ assert clip('cafe\u0301', 0, 4) == 'cafe\u0301'
+ assert clip('cafe\u0301', 0, 3) == 'caf'
+
+
+def test_clip_combining_multiple():
+ assert clip('e\u0301\u0327', 0, 1) == 'e\u0301\u0327'
+
+
+def test_clip_ambiguous_width_1():
+ assert clip('\u00b1test', 0, 3, ambiguous_width=1) == '\u00b1te'
+
+
+def test_clip_ambiguous_width_2():
+ assert clip('\u00b1test', 0, 3, ambiguous_width=2) == '\u00b1t'
+
+
+CLIP_TAB_CASES = [
+ ('a\tb', 0, 10, 8, 'a b'),
+ ('a\tb', 0, 4, 8, 'a '),
+ ('a\tb', 0, 10, 4, 'a b'),
+ ('a\tb', 4, 10, 8, ' b'),
+ ('a\tb\tc', 0, 20, 4, 'a b c'),
+ ('中\tb', 0, 10, 4, '中 b'),
+ ('a\tb', 0, 5, 0, 'a\tb'),
+]
+
+
[email protected]('text,start,end,tabsize,expected', CLIP_TAB_CASES)
+def test_clip_tab_expansion(text, start, end, tabsize, expected):
+ assert clip(text, start, end, tabsize=tabsize) == expected
+
+
+def test_clip_tab_with_sequences():
+ assert clip('\x1b[31mab\tc\x1b[0m', 0, 12, tabsize=4) == '\x1b[31mab c\x1b[0m'
+
+
+CLIP_CONTROL_CHAR_CASES = [
+ ('abc\bde', 0, 5, 'abc\bde'),
+ ('ab\acd', 0, 4, 'ab\acd'),
+ ('ab\x00cd', 0, 4, 'ab\x00cd'),
+ ('abc\rde', 0, 5, 'abc\rde'),
+ ('\a\b\rHello', 0, 5, '\a\b\rHello'),
+ ('ab\x01\x02cd', 0, 4, 'ab\x01\x02cd'),
+]
+
+
[email protected]('text,start,end,expected', CLIP_CONTROL_CHAR_CASES)
+def test_clip_control_chars_zero_width(text, start, end, expected):
+ assert clip(text, start, end) == expected
+
+
+CLIP_CURSOR_SEQUENCE_CASES = [
+ ('ab\x1b[5Ccd', 0, 4, 'ab\x1b[5Ccd'),
+ ('abcde\x1b[2Df', 0, 6, 'abcde\x1b[2Df'),
+ ('ab\x1b[10Ccd', 0, 4, 'ab\x1b[10Ccd'),
+ ('ab\x1b[Ccd', 0, 4, 'ab\x1b[Ccd'),
+]
+
+
[email protected]('text,start,end,expected', CLIP_CURSOR_SEQUENCE_CASES)
+def test_clip_cursor_sequences_zero_width(text, start, end, expected):
+ assert clip(text, start, end) == expected
diff --git a/contrib/python/wcwidth/py3/tests/test_core.py b/contrib/python/wcwidth/py3/tests/test_core.py
index 206bbdcab38..b8ae3611430 100644
--- a/contrib/python/wcwidth/py3/tests/test_core.py
+++ b/contrib/python/wcwidth/py3/tests/test_core.py
@@ -1,10 +1,9 @@
-"""Core tests for wcwidth module. isort:skip_file"""
-try:
- # std import
- import importlib.metadata as importmeta
-except ImportError:
- # 3rd party for python3.7 and earlier
- import importlib_metadata as importmeta
+"""Core tests for wcwidth module."""
+# std imports
+import importlib.metadata
+
+# 3rd party
+import pytest
# local
import wcwidth
@@ -13,7 +12,7 @@ import wcwidth
def test_package_version():
"""wcwidth.__version__ is expected value."""
# given,
- expected = importmeta.version('wcwidth')
+ expected = importlib.metadata.version('wcwidth')
# exercise,
result = wcwidth.__version__
@@ -43,7 +42,7 @@ def test_empty_string():
def basic_string_type():
"""
- This is a python 2-specific test of the basic "string type"
+ This is a python 2-specific test of the basic "string type".
Such strings cannot contain anything but ascii in python2.
"""
@@ -122,14 +121,14 @@ def test_null_width_0():
def test_control_c0_width_negative_1():
- """How the API reacts to CSI (Control sequence initiate).
+ """
+ How the API reacts to CSI (Control sequence initiate).
- An example of bad fortune, this terminal sequence is a width of 0
- on all terminals, but wcwidth doesn't parse Control-Sequence-Inducer
- (CSI) sequences.
+ An example of bad fortune, this terminal sequence is a width of 0 on all terminals, but wcwidth
+ doesn't parse Control-Sequence-Inducer (CSI) sequences.
- Also the "legacy" posix functions wcwidth and wcswidth return -1 for
- any string containing the C1 control character \x1b (ESC).
+ Also the "legacy" posix functions wcwidth and wcswidth return -1 for any string containing the
+ C1 control character \x1b (ESC).
"""
# given,
phrase = '\x1b[0m'
@@ -195,8 +194,8 @@ def test_balinese_script():
"""
Balinese kapal (ship) is length 3.
- This may be an example that is not yet correctly rendered by any terminal so
- far, like devanagari.
+ This may be an example that is not yet correctly rendered by any terminal so far, like
+ devanagari.
"""
phrase = ("\u1B13" # Category 'Lo', EAW 'N' -- BALINESE LETTER KA
"\u1B28" # Category 'Lo', EAW 'N' -- BALINESE LETTER PA KAPAL
@@ -216,7 +215,7 @@ def test_balinese_script():
def test_kr_jamo():
"""
- Test basic combining of HANGUL CHOSEONG and JUNGSEONG
+ Test basic combining of HANGUL CHOSEONG and JUNGSEONG.
Example and from Raymond Chen's blog post,
https://devblogs.microsoft.com/oldnewthing/20201009-00/?p=104351
@@ -393,8 +392,33 @@ def test_zero_wide_conflict():
assert wcwidth.wcwidth(chr(0x0309a), unicode_version='4.1.0') == 0
assert wcwidth.wcwidth(chr(0x0309b), unicode_version='4.1.0') == 2
+
def test_soft_hyphen():
# Test SOFT HYPHEN, category 'Cf' usually are zero-width, but most
# implementations agree to draw it was '1' cell, visually
# indistinguishable from a space, ' ' in Konsole, for example.
assert wcwidth.wcwidth(chr(0x000ad)) == 1
+
+
+PREPENDED_CONCATENATION_MARKS = [
+ (0x0600, 'ARABIC NUMBER SIGN'),
+ (0x0601, 'ARABIC SIGN SANAH'),
+ (0x0602, 'ARABIC FOOTNOTE MARKER'),
+ (0x0603, 'ARABIC SIGN SAFHA'),
+ (0x0604, 'ARABIC SIGN SAMVAT'),
+ (0x0605, 'ARABIC NUMBER MARK ABOVE'),
+ (0x06DD, 'ARABIC END OF AYAH'),
+ (0x070F, 'SYRIAC ABBREVIATION MARK'),
+ (0x0890, 'ARABIC POUND MARK ABOVE'),
+ (0x0891, 'ARABIC PIASTRE MARK ABOVE'),
+ (0x08E2, 'ARABIC DISPUTED END OF AYAH'),
+ (0x110BD, 'KAITHI NUMBER SIGN'),
+ (0x110CD, 'KAITHI NUMBER SIGN ABOVE'),
+]
+
+
[email protected]('codepoint,name', PREPENDED_CONCATENATION_MARKS)
+def test_prepended_concatenation_mark_width(codepoint, name):
+ """Prepended Concatenation Marks have width 1, not 0."""
+ # https://github.com/jquast/wcwidth/issues/119
+ assert wcwidth.wcwidth(chr(codepoint)) == 1
diff --git a/contrib/python/wcwidth/py3/tests/test_emojis.py b/contrib/python/wcwidth/py3/tests/test_emojis.py
index 310d0c3d44e..20245049ef9 100644
--- a/contrib/python/wcwidth/py3/tests/test_emojis.py
+++ b/contrib/python/wcwidth/py3/tests/test_emojis.py
@@ -1,9 +1,13 @@
+"""Tests for emoji width measurement and ZWJ sequences."""
# std imports
import os
# 3rd party
import pytest
+# local
+import wcwidth
+
# some tests cannot be done on some builds of python, where the internal
# unicode structure is limited to 0x10000 for memory conservation,
# "ValueError: unichr() arg not in range(0x10000) (narrow Python build)"
@@ -13,9 +17,6 @@ try:
except ValueError:
NARROW_ONLY = True
-# local
-import wcwidth
-
def make_sequence_from_line(line):
# convert '002A FE0F ; ..' -> (0x2a, 0xfe0f) -> chr(0x2a) + chr(0xfe0f)
@@ -24,13 +25,11 @@ def make_sequence_from_line(line):
@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
def emoji_zwj_sequence():
- """
- Emoji zwj sequence of four codepoints is just 2 cells.
- """
+ """Emoji zwj sequence of four codepoints is just 2 cells."""
phrase = ("\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN
"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
"\u200d" # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER
- "\U0001f4bb") # Fused, Category So, East Asian Width peroperty 'W' -- PERSONAL COMPUTER
+ "\U0001f4bb") # Fused, Category So, East Asian Width property 'W' -- PERSONAL COMPUTER
# This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
expect_length_each = (2, 0, 0, 2)
expect_length_phrase = 2
@@ -46,9 +45,7 @@ def emoji_zwj_sequence():
@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
def test_unfinished_zwj_sequence():
- """
- Ensure index-out-of-bounds does not occur for zero-width joiner without any following character
- """
+ """Ensure index-out-of-bounds does not occur for ZWJ without any following character."""
phrase = ("\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN
"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
"\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER
@@ -66,9 +63,7 @@ def test_unfinished_zwj_sequence():
@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
def test_non_recommended_zwj_sequence():
- """
- Verify ZWJ is measured as though successful with characters that cannot be joined, wcwidth does not verify
- """
+ """Verify ZWJ with characters that cannot be joined, wcwidth does not verify."""
phrase = ("\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN
"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
"\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER
@@ -109,8 +104,8 @@ def test_longer_emoji_zwj_sequence():
"""
A much longer emoji ZWJ sequence of 10 total codepoints is just 2 cells!
- Also test the same sequence in duplicate, verifying multiple VS-16 sequences
- in a single function call.
+ Also test the same sequence in duplicate, verifying multiple VS-16 sequences in a single
+ function call.
"""
# 'Category Code', 'East Asian Width property' -- 'description'
phrase = ("\U0001F9D1" # 'So', 'W' -- ADULT
@@ -138,20 +133,17 @@ def test_longer_emoji_zwj_sequence():
def read_sequences_from_file(filename):
- fp = open(os.path.join(os.path.dirname(__file__), filename), 'r', encoding='utf-8')
- lines = [line.strip()
- for line in fp.readlines()
- if not line.startswith('#') and line.strip()]
- fp.close()
+ with open(os.path.join(os.path.dirname(__file__), filename), encoding='utf-8') as fp:
+ lines = [line.strip()
+ for line in fp.readlines()
+ if not line.startswith('#') and line.strip()]
sequences = [make_sequence_from_line(line) for line in lines]
return lines, sequences
@pytest.mark.skipif(NARROW_ONLY, reason="Some sequences in text file are not compatible with 'narrow' builds")
def test_recommended_emoji_zwj_sequences():
- """
- Test wcswidth of all of the unicode.org-published emoji-zwj-sequences.txt
- """
+ """Test wcswidth of all of the unicode.org-published emoji-zwj-sequences.txt."""
# given,
lines, sequences = read_sequences_from_file('emoji-zwj-sequences.txt')
@@ -170,14 +162,12 @@ def test_recommended_emoji_zwj_sequences():
})
# verify
- assert errors == []
+ assert not errors
assert num >= 1468
def test_recommended_variation_16_sequences():
- """
- Test wcswidth of all of the unicode.org-published emoji-variation-sequences.txt
- """
+ """Test wcswidth of all of the unicode.org-published emoji-variation-sequences.txt."""
# given,
lines, sequences = read_sequences_from_file('emoji-variation-sequences.txt')
@@ -198,12 +188,12 @@ def test_recommended_variation_16_sequences():
})
# verify
- assert errors == []
+ assert not errors
assert num >= 742
def test_unicode_9_vs16():
- """Verify effect of VS-16 on unicode_version 9.0 and later"""
+ """Verify effect of VS-16 on unicode_version 9.0 and later."""
phrase = ("\u2640" # FEMALE SIGN
"\uFE0F") # VARIATION SELECTOR-16
@@ -220,7 +210,7 @@ def test_unicode_9_vs16():
def test_unicode_8_vs16():
- """Verify that VS-16 has no effect on unicode_version 8.0 and earler"""
+ """Verify that VS-16 has no effect on unicode_version 8.0 and earlier."""
phrase = ("\u2640" # FEMALE SIGN
"\uFE0F") # VARIATION SELECTOR-16
diff --git a/contrib/python/wcwidth/py3/tests/test_grapheme.py b/contrib/python/wcwidth/py3/tests/test_grapheme.py
new file mode 100644
index 00000000000..5be6f58de09
--- /dev/null
+++ b/contrib/python/wcwidth/py3/tests/test_grapheme.py
@@ -0,0 +1,146 @@
+"""Tests for grapheme cluster segmentation."""
+# std imports
+import os
+
+# 3rd party
+import pytest
+
+# local
+from wcwidth import iter_graphemes
+
+try:
+ chr(0x2fffe)
+ NARROW_ONLY = False
+except ValueError:
+ NARROW_ONLY = True
+
+
+def parse_grapheme_break_test_line(line):
+ """Parse a line from GraphemeBreakTest.txt."""
+ data, _, _ = line.partition('#')
+ data = data.strip()
+ if not data:
+ return None, None
+
+ parts = []
+ current_cluster = []
+
+ for token in data.split():
+ if token == '÷':
+ if current_cluster:
+ parts.append(current_cluster)
+ current_cluster = []
+ elif token == '×':
+ pass
+ else:
+ try:
+ current_cluster.append(int(token, 16))
+ except ValueError:
+ continue
+
+ if current_cluster:
+ parts.append(current_cluster)
+
+ all_codepoints = []
+ expected_clusters = []
+ for cluster in parts:
+ cluster_str = ''.join(chr(cp) for cp in cluster)
+ expected_clusters.append(cluster_str)
+ all_codepoints.extend(cluster)
+
+ if not all_codepoints:
+ return None, None
+
+ input_str = ''.join(chr(cp) for cp in all_codepoints)
+ return input_str, expected_clusters
+
+
+def read_grapheme_break_test():
+ """Read and parse GraphemeBreakTest.txt."""
+ test_file = os.path.join(os.path.dirname(__file__), 'GraphemeBreakTest.txt')
+ if not os.path.exists(test_file):
+ return []
+
+ test_cases = []
+ with open(test_file, encoding='utf-8') as f:
+ for line_num, line in enumerate(f, 1):
+ line = line.strip()
+ if not line or line.startswith('#'):
+ continue
+ input_str, expected = parse_grapheme_break_test_line(line)
+ if input_str is not None:
+ test_cases.append(pytest.param(input_str, expected, id=f"line{line_num}"))
+
+ return test_cases
+
+
[email protected](("input_str", "expected"), [
+ ('', []),
+ ('a', ['a']),
+ ('abc', ['a', 'b', 'c']),
+ ('cafe\u0301', ['c', 'a', 'f', 'e\u0301']),
+ ('\r\n', ['\r\n']),
+ ('ok\r\nok', ['o', 'k', '\r\n', 'o', 'k']),
+ ('\r', ['\r']),
+ ('ok\rok', ['o', 'k', '\r', 'o', 'k']),
+ ('\n', ['\n']),
+ ('ok\nok', ['o', 'k', '\n', 'o', 'k']),
+ ('\r\r', ['\r', '\r']),
+ ('ok\r\rok', ['o', 'k', '\r', '\r', 'o', 'k']),
+])
+def test_core_grapheme(input_str, expected):
+ """Basic grapheme cluster segmentation."""
+ assert list(iter_graphemes(input_str)) == expected
+
+
[email protected](("input_str", "start", "end", "expected"), [
+ ('abcdef', 2, None, ['c', 'd', 'e', 'f']),
+ ('abcdef', 0, 4, ['a', 'b', 'c', 'd']),
+ ('abcdef', 1, 4, ['b', 'c', 'd']),
+ ('abc', 10, None, []),
+ ('abc', 0, 10, ['a', 'b', 'c']),
+])
+def test_iter_graphemes_slice(input_str, start, end, expected):
+ """Grapheme iteration with start/end parameters."""
+ assert list(iter_graphemes(input_str, start=start, end=end)) == expected
+
+
+HANGUL_LV = '\u1100\u1161'
+HANGUL_LVT = '\uAC00\u11A8'
+FLAG_US = '\U0001F1FA\U0001F1F8'
+FLAG_AU = '\U0001F1E6\U0001F1FA'
+RI_A = '\U0001F1E6'
+FAMILY = '\U0001F468\u200D\U0001F469\u200D\U0001F467'
+WAVE_SKIN = '\U0001F44B\U0001F3FB'
+HEART_EMOJI = '\u2764\uFE0F'
+
+
[email protected](NARROW_ONLY, reason="requires wide Unicode")
[email protected](("input_str", "expected"), [
+ (HANGUL_LV, [HANGUL_LV]),
+ ('ok' + HANGUL_LV + 'ok', ['o', 'k', HANGUL_LV, 'o', 'k']),
+ (HANGUL_LVT, [HANGUL_LVT]),
+ ('ok' + HANGUL_LVT + 'ok', ['o', 'k', HANGUL_LVT, 'o', 'k']),
+ (FLAG_US, [FLAG_US]),
+ ('ok' + FLAG_US + 'ok', ['o', 'k', FLAG_US, 'o', 'k']),
+ (FLAG_US + RI_A, [FLAG_US, RI_A]),
+ ('ok' + FLAG_US + RI_A + 'ok', ['o', 'k', FLAG_US, RI_A, 'o', 'k']),
+ (FLAG_US + FLAG_AU, [FLAG_US, FLAG_AU]),
+ ('ok' + FLAG_US + FLAG_AU + 'ok', ['o', 'k', FLAG_US, FLAG_AU, 'o', 'k']),
+ (FAMILY, [FAMILY]),
+ ('ok' + FAMILY + 'ok', ['o', 'k', FAMILY, 'o', 'k']),
+ (WAVE_SKIN, [WAVE_SKIN]),
+ ('ok' + WAVE_SKIN + 'ok', ['o', 'k', WAVE_SKIN, 'o', 'k']),
+ (HEART_EMOJI, [HEART_EMOJI]),
+ ('ok' + HEART_EMOJI + 'ok', ['o', 'k', HEART_EMOJI, 'o', 'k']),
+])
+def test_wide_unicode_graphemes(input_str, expected):
+ """Grapheme segmentation for wide Unicode characters."""
+ assert list(iter_graphemes(input_str)) == expected
+
+
[email protected](NARROW_ONLY, reason="requires wide Unicode")
[email protected](("input_str", "expected"), read_grapheme_break_test())
+def test_unicode_grapheme_break_test(input_str, expected):
+ """Validate against official Unicode GraphemeBreakTest.txt."""
+ assert list(iter_graphemes(input_str)) == expected
diff --git a/contrib/python/wcwidth/py3/tests/test_justify.py b/contrib/python/wcwidth/py3/tests/test_justify.py
new file mode 100644
index 00000000000..55864bb23a5
--- /dev/null
+++ b/contrib/python/wcwidth/py3/tests/test_justify.py
@@ -0,0 +1,56 @@
+"""Tests for text justification functions."""
+# local
+from wcwidth import ljust, rjust, width, center
+
+SGR_RED = '\x1b[31m'
+SGR_RESET = '\x1b[0m'
+CJK_WORD = '\u4e2d\u6587'
+CAFE_COMBINING = 'cafe\u0301'
+EMOJI_FAMILY = '\U0001F468\u200D\U0001F469\u200D\U0001F467'
+
+
+def test_ljust():
+ assert ljust('hi', 5) == 'hi '
+ assert ljust('', 5) == ' '
+ assert ljust('hello', 3) == 'hello'
+ assert ljust('hello', 5) == 'hello'
+ assert ljust('\x1b[31mhi\x1b[0m', 5) == '\x1b[31mhi\x1b[0m '
+ assert ljust('\u4e2d', 4) == '\u4e2d '
+ assert ljust('hi', 5, fillchar='-') == 'hi---'
+ assert ljust('hi', 5, fillchar='\u00b7') == 'hi\u00b7\u00b7\u00b7'
+ assert ljust(CJK_WORD, 8) == CJK_WORD + ' '
+ assert width(ljust(CJK_WORD, 8)) == 8
+ assert width(ljust(CAFE_COMBINING, 8)) == 8
+ assert width(ljust(EMOJI_FAMILY, 6)) == 6
+ text = f'{SGR_RED}hi{SGR_RESET}'
+ assert len(ljust(text, 6, control_codes='ignore')) - len(SGR_RED) - len(SGR_RESET) == 6
+
+
+def test_rjust():
+ assert rjust('hi', 5) == ' hi'
+ assert rjust('', 5) == ' '
+ assert rjust('hello', 3) == 'hello'
+ assert rjust('hello', 5) == 'hello'
+ assert rjust('\x1b[31mhi\x1b[0m', 5) == ' \x1b[31mhi\x1b[0m'
+ assert rjust('\u4e2d', 4) == ' \u4e2d'
+ assert rjust('hi', 5, fillchar='-') == '---hi'
+ assert rjust('hi', 5, fillchar='\u00b7') == '\u00b7\u00b7\u00b7hi'
+ assert rjust(CJK_WORD, 8) == ' ' + CJK_WORD
+ assert width(rjust(CAFE_COMBINING, 8)) == 8
+ assert width(rjust(EMOJI_FAMILY, 6)) == 6
+
+
+def test_center():
+ assert center('hi', 6) == ' hi '
+ assert center('hi', 5) == ' hi '
+ assert center('', 4) == ' '
+ assert center('hello', 3) == 'hello'
+ assert center('hello', 5) == 'hello'
+ assert center('\x1b[31mhi\x1b[0m', 6) == ' \x1b[31mhi\x1b[0m '
+ assert center('\u4e2d', 6) == ' \u4e2d '
+ assert center('hi', 6, fillchar='-') == '--hi--'
+ assert center('hi', 6, fillchar='\u00b7') == '\u00b7\u00b7hi\u00b7\u00b7'
+ assert center('x', 4) == ' x '
+ assert width(center(CJK_WORD, 8)) == 8
+ assert width(center(CAFE_COMBINING, 8)) == 8
+ assert width(center(EMOJI_FAMILY, 6)) == 6
diff --git a/contrib/python/wcwidth/py3/tests/test_table_integrity.py b/contrib/python/wcwidth/py3/tests/test_table_integrity.py
index e680498162a..8dbfe169a00 100644
--- a/contrib/python/wcwidth/py3/tests/test_table_integrity.py
+++ b/contrib/python/wcwidth/py3/tests/test_table_integrity.py
@@ -1,6 +1,4 @@
-"""
-Executes verify-table-integrity.py as a unit test.
-"""
+"""Executes verify-table-integrity.py as a unit test."""
# std imports
import os
import sys
diff --git a/contrib/python/wcwidth/py3/tests/test_textwrap.py b/contrib/python/wcwidth/py3/tests/test_textwrap.py
new file mode 100644
index 00000000000..1f62e29ad1c
--- /dev/null
+++ b/contrib/python/wcwidth/py3/tests/test_textwrap.py
@@ -0,0 +1,262 @@
+"""Tests for sequence-aware text wrapping functions."""
+# std imports
+import sys
+import platform
+import textwrap
+
+# 3rd party
+import pytest
+
+# local
+from wcwidth import iter_sequences
+from wcwidth.textwrap import SequenceTextWrapper, wrap
+
+SGR_RED = '\x1b[31m'
+SGR_BOLD = '\x1b[1m'
+SGR_RESET = '\x1b[0m'
+ATTRS = ('\x1b[31m', '\x1b[34m', '\x1b[4m', '\x1b[7m', '\x1b[41m', '\x1b[37m', '\x1b[107m')
+
+OSC_HYPERLINK = '\x1b]8;;https://example.com\x07link\x1b]8;;\x07'
+CSI_CURSOR = '\x1b[5C'
+CTRL_BEL = '\x07'
+
+ZWJ = '\u200d'
+WOMAN = '\U0001F469'
+GIRL = '\U0001F467'
+FAMILY_ZWJ = f'{WOMAN}{ZWJ}{WOMAN}{ZWJ}{GIRL}'
+SMILEY_VS16 = '\u263a\ufe0f'
+ZWJ_FAMILY = '\U0001F469\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466'
+CAFE_COMBINING = 'cafe\u0301'
+HANGUL_GA = '\u1100\u1161'
+
+
+def _strip(text):
+ return ''.join(seg for seg, is_seq in iter_sequences(text) if not is_seq)
+
+
+def _adjust_stdlib_result(expected, kwargs):
+ """
+ Adjust stdlib textwrap result for known bugs in older Python versions.
+
+ CPython #140627: Older versions leave trailing whitespace and preceding all-whitespace lines
+ when drop_whitespace=True. Fixed in 3.13.11+, 3.14.2+, and 3.15+. We always strip to normalize
+ across versions.
+ """
+ if not expected:
+ return expected
+ if kwargs.get('drop_whitespace'):
+ # Strip trailing whitespace from each line (old Python bug)
+ expected = [line.rstrip() for line in expected]
+ # Remove leading all-whitespace lines (old Python bug)
+ if expected and not expected[0].strip():
+ expected = expected[1:]
+ if expected and kwargs.get('subsequent_indent'):
+ expected[0] = expected[0][len(kwargs['subsequent_indent']):]
+ return expected
+
+
+def _colorize(text):
+ return ''.join(
+ ATTRS[idx % len(ATTRS)] + char + SGR_RESET if char not in ' -\t' else char
+ for idx, char in enumerate(text)
+ )
+
+
+# Edge cases not covered by stdlib comparison
+BASIC_EDGE_CASES = [
+ ('', 10, []),
+ (' ', 10, []),
+ ('\u5973', 0, ['\u5973']),
+]
+
+
[email protected]('text,w,expected', BASIC_EDGE_CASES)
+def test_wrap_edge_cases(text, w, expected):
+ assert wrap(text, w) == expected
+
+
+def test_wrap_initial_indent():
+ assert wrap('hello world', 10, initial_indent='> ') == ['> hello', 'world']
+
+
+def test_wrap_drops_trailing_whitespace():
+ """Trailing whitespace stripped when drop_whitespace=True (CPython #140627)."""
+ result = wrap(' Z! a bc defghij', 3)
+ assert result[:3] == [' Z!', 'a', 'bc']
+
+
+LONG_WORD_CASES = [
+ ('abcdefghij', 3, True, ['abc', 'def', 'ghi', 'j']),
+ ('abcdefghij', 3, False, ['abcdefghij']),
+]
+
+
[email protected]('text,w,break_long,expected', LONG_WORD_CASES)
+def test_wrap_long_words(text, w, break_long, expected):
+ assert wrap(text, w, break_long_words=break_long) == expected
+
+
+# Hyphen edge cases for long word breaking
+HYPHEN_LONG_WORD_CASES = [
+ ('a-b-c-d', 3, True, ['a-', 'b-', 'c-d']),
+ ('a-b-c-d', 3, False, ['a-b', '-c-', 'd']),
+ ('---', 2, True, ['--', '-']),
+ ('a---b', 2, True, ['a-', '--', 'b']),
+ ('a-\x1b[31mb', 2, True, ['a-\x1b[31m', 'b']),
+]
+
+
[email protected]('text,w,break_hyphens,expected', HYPHEN_LONG_WORD_CASES)
+def test_wrap_hyphen_long_words(text, w, break_hyphens, expected):
+ assert wrap(text, w, break_on_hyphens=break_hyphens) == expected
+
+
+# Comprehensive stdlib compatibility
+TEXTWRAP_KWARGS = [
+ {'break_long_words': False, 'drop_whitespace': False},
+ {'break_long_words': False, 'drop_whitespace': True},
+ {'break_long_words': True, 'drop_whitespace': False},
+ {'break_long_words': True, 'drop_whitespace': True},
+ {'break_long_words': True, 'drop_whitespace': False, 'subsequent_indent': ' '},
+ {'break_long_words': True, 'drop_whitespace': True, 'subsequent_indent': ' '},
+ {'break_long_words': True, 'drop_whitespace': True, 'break_on_hyphens': True},
+ {'break_long_words': True, 'drop_whitespace': True, 'break_on_hyphens': False},
+]
+
+
[email protected]('kwargs', TEXTWRAP_KWARGS)
[email protected]('width', [3, 7, 8, 9, 10, 16, 20, 40])
+def test_wrap_matches_stdlib(kwargs, width):
+ pgraph = ' Z! a bc defghij klmnopqrstuvw<<>>xyz012345678900 ' * 2
+ pgraph_colored = _colorize(pgraph)
+ expected = _adjust_stdlib_result(
+ textwrap.wrap(pgraph, width=width, **kwargs), kwargs
+ )
+ wrapper = SequenceTextWrapper(width=width, **kwargs)
+ assert wrapper.wrap(pgraph) == expected
+ # For colored text, strip sequences
+ colored_result = [_strip(line) for line in wrapper.wrap(pgraph_colored)]
+ if kwargs.get('drop_whitespace'):
+ # normalize trailing whitespace, rstrip when drop_whitespace is True
+ # matches CPython #140627 fix
+ colored_result = [line.rstrip() for line in colored_result]
+ assert colored_result == expected
+
+
[email protected]('kwargs', TEXTWRAP_KWARGS)
[email protected]('width', [8, 10, 16, 20, 40])
[email protected]('tabsize', [4, 5, 8])
+def test_wrap_tabsize_matches_stdlib(kwargs, width, tabsize):
+ tabsize = min(tabsize, width)
+ pgraph = ' Z! a bc\t defghij\t kl mnopqrs\ttuvw<<>>xyz012345678900 ' * 2
+ expected = _adjust_stdlib_result(
+ textwrap.wrap(pgraph, width=width, tabsize=tabsize, **kwargs), kwargs
+ )
+ wrapper = SequenceTextWrapper(width=width, tabsize=tabsize, **kwargs)
+ assert wrapper.wrap(pgraph) == expected
+
+
+def test_wrap_multiline_matches_stdlib():
+ given = '\n' + 32 * 'A' + '\n' + 32 * 'B' + '\n' + 32 * 'C' + '\n\n'
+ assert wrap(given, 30) == textwrap.wrap(given, 30)
+
+
+# Wide characters that exceed width=1 (tests force-grapheme logic)
+WIDE_CHAR_WIDTH_1_CASES = [
+ ('\u5973', 1, ['\u5973']),
+ (ZWJ_FAMILY, 1, [ZWJ_FAMILY]),
+ (HANGUL_GA, 1, [HANGUL_GA]),
+]
+
+
[email protected]('text,w,expected', WIDE_CHAR_WIDTH_1_CASES)
+def test_wrap_wide_char_width_1(text, w, expected):
+ assert wrap(text, w) == expected
+
+
+# Unicode width-aware wrapping
+UNICODE_CASES = [
+ # CJK (2 cells each)
+ ('\u4e2d\u6587\u5b57\u7b26', 4, ['\u4e2d\u6587', '\u5b57\u7b26']),
+ ('\u4e2d\u6587\u5b57', 5, ['\u4e2d\u6587', '\u5b57']),
+ # Combining characters
+ (CAFE_COMBINING + '-latte', 4, ['cafe\u0301', '-lat', 'te']),
+ # Emoji (ZWJ, VS16)
+ (f'{FAMILY_ZWJ} ab', 4, [FAMILY_ZWJ, 'ab']),
+ (f'{SMILEY_VS16} ab', 3, [SMILEY_VS16, 'ab']),
+ ('\U0001F469\U0001F467\U0001F466', 4, ['\U0001F469\U0001F467', '\U0001F466']),
+]
+
+
[email protected]('text,w,expected', UNICODE_CASES)
+def test_wrap_unicode(text, w, expected):
+ kwargs = {'break_on_hyphens': False} if '-' in text else {}
+ assert wrap(text, w, **kwargs) == expected
+
+
+# Escape sequence preservation
+SEQUENCE_CASES = [
+ # SGR sequences preserved at word boundaries
+ (f'{SGR_RED}red{SGR_RESET} blue', 4, [f'{SGR_RED}red{SGR_RESET}', 'blue']),
+ (f'hello{SGR_RED} world', 6, [f'hello{SGR_RED}', 'world']),
+ # Empty/adjacent sequences
+ (f'{SGR_RED}{SGR_RESET}', 10, [f'{SGR_RED}{SGR_RESET}']),
+ (f'hello {SGR_RED}{SGR_RESET}world', 6, ['hello', f'{SGR_RED}{SGR_RESET}world']),
+ # OSC hyperlinks
+ (f'{OSC_HYPERLINK} text', 5, [OSC_HYPERLINK, 'text']),
+ # CSI cursor sequences
+ (f'{CSI_CURSOR}text here', 10, [f'{CSI_CURSOR}text', 'here']),
+ # Control characters
+ (f'{CTRL_BEL}alert text', 6, [f'{CTRL_BEL}alert', 'text']),
+ # Sequences in long word breaking
+ ('x\x1b[31mabcdefghij\x1b[0m', 3, ['xab', 'cde', 'fgh', 'ij']),
+ # Lone ESC
+ ('abc\x1bdefghij', 3, ['abc', 'def', 'ghi', 'j']),
+]
+
+
[email protected]('text,w,expected', SEQUENCE_CASES)
+def test_wrap_sequences(text, w, expected):
+ result = wrap(text, w)
+ if any('\x1b' in e or '\x00' <= e[0] < '\x20' for e in expected if e):
+ assert result == expected
+ else:
+ assert [_strip(line) for line in result] == expected
+
+
+# Mixed: sequences + unicode
+MIXED_CASES = [
+ (f'{SGR_RED}\u4e2d\u6587{SGR_RESET} ab', 5, [f'{SGR_RED}\u4e2d\u6587{SGR_RESET}', 'ab']),
+ (f'{SGR_RED}{FAMILY_ZWJ}{SGR_RESET} ab', 4, [f'{SGR_RED}{FAMILY_ZWJ}{SGR_RESET}', 'ab']),
+ (f'{SGR_BOLD}\u4e2d{SGR_RESET}y z', 4, [f'{SGR_BOLD}\u4e2d{SGR_RESET}y', 'z']),
+]
+
+
[email protected]('text,w,expected', MIXED_CASES)
+def test_wrap_mixed(text, w, expected):
+ assert wrap(text, w) == expected
+
+
+# Tabsize with wide characters - tests column alignment with different cell widths
+TABSIZE_WIDE_CASES = [
+ # CJK (2 cells) + tab: tabsize=4, '\u4e2d' is 2 cols, tab expands to col 4
+ ('\u4e2d\ta b', 6, 4, ['\u4e2d a', 'b']),
+ # CJK + tab with tabsize=8: '\u4e2d' is 2 cols, tab expands to col 8
+ ('\u4e2d\ta b', 10, 8, ['\u4e2d a', 'b']),
+ # Emoji + tab (emoji width=2): similar column alignment
+ (f'{SMILEY_VS16}\ta b', 6, 4, [f'{SMILEY_VS16} a', 'b']),
+ # Multiple CJK + tab: 4 cols, tab to 4 adds 0, but expand_tabs adds min 1
+ ('\u4e2d\u6587\ta', 8, 4, ['\u4e2d\u6587 a']),
+ # ASCII + tab + CJK: 'a' is 1 col, tab to 4 (3 spaces), CJK is 2 cols
+ ('a\t\u4e2d b', 8, 4, ['a \u4e2d b']),
+]
+
+
[email protected]('text,w,tabsize,expected', TABSIZE_WIDE_CASES)
+ platform.python_implementation() == 'PyPy' and sys.version_info < (3, 9),
+ reason='PyPy 3.8 str.expandtabs() counts UTF-8 bytes instead of characters'
+)
+def test_wrap_tabsize_wide_chars(text, w, tabsize, expected):
+ """Verify tabsize respects wide character column positions."""
+ assert wrap(text, w, tabsize=tabsize) == expected
diff --git a/contrib/python/wcwidth/py3/tests/test_ucslevel.py b/contrib/python/wcwidth/py3/tests/test_ucslevel.py
index b15fb5f5a71..a907db2c44a 100644
--- a/contrib/python/wcwidth/py3/tests/test_ucslevel.py
+++ b/contrib/python/wcwidth/py3/tests/test_ucslevel.py
@@ -46,7 +46,11 @@ def test_exact_410_unicode():
def test_nearest_505_str():
- """wcwidth._wcmatch_version('5.0.5') returns nearest '5.0.0'. (str)"""
+ """
+ wcwidth._wcmatch_version('5.0.5') returns nearest '5.0.0'.
+
+ (str)
+ """
# given
given, expected = '5.0.5', '5.0.0'
@@ -58,7 +62,11 @@ def test_nearest_505_str():
def test_nearest_505_unicode():
- """wcwidth._wcmatch_version(u'5.0.5') returns nearest u'5.0.0'. (unicode)"""
+ """
+ wcwidth._wcmatch_version(u'5.0.5') returns nearest u'5.0.0'.
+
+ (unicode)
+ """
# given
given, expected = '5.0.5', '5.0.0'
diff --git a/contrib/python/wcwidth/py3/tests/test_width.py b/contrib/python/wcwidth/py3/tests/test_width.py
new file mode 100644
index 00000000000..722f68447da
--- /dev/null
+++ b/contrib/python/wcwidth/py3/tests/test_width.py
@@ -0,0 +1,385 @@
+"""Tests for width() function."""
+# 3rd party
+import pytest
+
+# local
+import wcwidth
+from wcwidth.escape_sequences import ZERO_WIDTH_PATTERN
+
+BASIC_WIDTH_CASES = [
+ ('', 0, 'empty'),
+ ('hello', 5, 'ASCII'),
+ ('コンニチハ', 10, 'CJK'),
+ ('cafe\u0301', 4, 'combining'),
+ ('\U0001F468\u200d\U0001F469\u200d\U0001F467', 2, 'ZWJ'),
+]
+
+
[email protected]('text,expected,name', BASIC_WIDTH_CASES)
+def test_width_basic(text, expected, name):
+ """Basic width measurement tests."""
+ assert wcwidth.width(text) == expected
+
+
+IGNORE_MODE_CASES = [
+ ('hello\x01world', 10, 'C0_control'),
+ ('hello\x00world', 10, 'NUL'),
+ ('abc\bd', 4, 'backspace'),
+ ('abc\nxy', 5, 'LF'),
+ ('\x1b[31mred\x1b[0m', 3, 'SGR_sequence'),
+ ('hello\x80world', 10, 'C1_control'),
+ ('\x1b', 0, 'lone_ESC'),
+ ('a\x1bb', 2, 'lone_ESC_between'),
+]
+
+
[email protected]('text,expected,name', IGNORE_MODE_CASES)
+def test_width_control_codes_ignore(text, expected, name):
+ """Ignore mode strips control codes from width calculation."""
+ assert wcwidth.width(text, control_codes="ignore") == expected
+
+
+STRICT_RAISES_CASES = [
+ ('hello\x01world', 'C0_control'),
+ ('hello\x1aworld', 'ctrl_z'),
+ ('hello\x7fworld', 'DEL'),
+ ('hello\x80world', 'C1_control'),
+ ('hello\nworld', 'LF'),
+ ('hello\x1b[Hworld', 'cursor_home'),
+ ('hello\x1b[Aworld', 'cursor_up'),
+]
+
+
[email protected]('text,name', STRICT_RAISES_CASES)
+def test_width_control_codes_strict_raises(text, name):
+ """Strict mode raises ValueError for illegal control codes."""
+ with pytest.raises(ValueError):
+ wcwidth.width(text, control_codes="strict")
+
+
+STRICT_ALLOWED_CASES = [
+ ('hello\x07world', 10, 'BEL'),
+ ('hello\x00world', 10, 'NUL'),
+ ('abc\bd', 3, 'backspace'),
+ ('abc\rxy', 3, 'CR'),
+ ('\x1b[31mred\x1b[0m', 3, 'SGR_sequence'),
+ ('a\x1b[2Cb', 4, 'cursor_right'),
+ ('\x1b', 0, 'lone_ESC'),
+ ('a\x1bb', 2, 'lone_ESC_between'),
+ ('\x1b!', 1, 'ESC_unrecognized'),
+]
+
+
[email protected]('text,expected,name', STRICT_ALLOWED_CASES)
+def test_width_control_codes_strict_allowed(text, expected, name):
+ """Strict mode allows certain control codes."""
+ assert wcwidth.width(text, control_codes="strict") == expected
+
+
+STRICT_INDETERMINATE_SEQUENCES = [
+ ('\x1b[?1049h', 'enter_fullscreen'),
+ ('\x1b[?1049l', 'exit_fullscreen'),
+ ('\x1bD', 'scroll_forward'),
+ ('\x1bM', 'scroll_reverse'),
+ ('\x1b8', 'restore_cursor'),
+ ('\x1b[1P', 'parm_dch'),
+ ('\x1b[1M', 'parm_delete_line'),
+ ('\x1b[1L', 'parm_insert_line'),
+ ('\x1b[1X', 'erase_chars'),
+ ('\x1b[1S', 'parm_index'),
+ ('\x1b[1T', 'parm_rindex'),
+]
+
+
[email protected]('seq,cap_name', STRICT_INDETERMINATE_SEQUENCES)
+def test_width_strict_indeterminate_raises(seq, cap_name):
+ with pytest.raises(ValueError):
+ wcwidth.width(f"hello{seq}world", control_codes="strict")
+
+
+PARSE_MODE_CASES = [
+ ('hello\x01world', 10, 'C0_control'),
+ ('abc\bd', 3, 'backspace'),
+ ('abc\rxy', 3, 'CR'),
+ ('abc\nxy', 5, 'LF_vertical'),
+ ('a\x1b[2Cb', 4, 'cursor_right'),
+ ('abcd\x1b[2De', 4, 'cursor_left'),
+ ('\x1b[31mred\x1b[0m', 3, 'SGR'),
+ ('ab\x1b[Hcd', 4, 'indeterminate'),
+]
+
+
[email protected]('text,expected,name', PARSE_MODE_CASES)
+def test_width_control_codes_parse(text, expected, name):
+ """Parse mode (default) handles control codes."""
+ assert wcwidth.width(text) == expected
+
+
+TABSIZE_CASES = [
+ ('\t', 8, 8, 'default'),
+ ('abc\t', 8, 8, 'after_text'),
+ ('ab\t', 4, 4, 'tabsize_4'),
+]
+
+
[email protected]('text,expected,tabsize,name', TABSIZE_CASES)
+def test_width_tabsize(text, expected, tabsize, name):
+ """Tabsize parameter controls tab width calculation."""
+ assert wcwidth.width(text, tabsize=tabsize) == expected
+
+
+def test_width_tabsize_zero():
+ """Tabs are zero-width with control_codes='ignore'."""
+ assert wcwidth.width('\t', control_codes='ignore') == 0
+
+
+def test_width_tabsize_zero_parse():
+ """Tab with tabsize=0 in parse mode is zero-width."""
+ assert wcwidth.width('ab\tc', tabsize=0) == 3
+
+
+ESCAPE_SEQUENCE_CASES = [
+ ('\x1b[m', 0, 'basic_SGR'),
+ ('\x1b[38;2;255;0;0m', 0, 'RGB_SGR'),
+ ('\x1b]8;;https://example.com\x07link\x1b]8;;\x07', 4, 'OSC_hyperlink'),
+ ('\x1b]0;title\x07text', 4, 'OSC_title'),
+ ('\x1b(B', 0, 'charset'),
+ ('\x1b[', 0, 'Fe_CSI'),
+]
+
+
[email protected]('text,expected,name', ESCAPE_SEQUENCE_CASES)
+def test_width_escape_sequences(text, expected, name):
+ """Escape sequences are parsed correctly."""
+ assert wcwidth.width(text) == expected
+
+
+EDGE_CASES = [
+ ('\x1b[31m\x1b[0m', 0, 'only_escapes'),
+ ('\x1b[31mhello\x1b[0m world', 11, 'mixed_content'),
+ ('\x1b[31mコ\x1b[0m', 2, 'wide_with_escape'),
+ ('\x1b', 0, 'lone_ESC'),
+ ('\x1b!', 1, 'ESC_unrecognized'),
+ ('*\x1b*', 2, 'lone_ESC_between_text'),
+]
+
+
[email protected]('text,expected,name', EDGE_CASES)
+def test_width_edge_cases(text, expected, name):
+ """Edge cases are handled correctly."""
+ assert wcwidth.width(text) == expected
+
+
+def test_width_unknown_control_codes():
+ """Unknown control_codes defaults to parse mode."""
+ assert wcwidth.width("hello", control_codes="invalid") == 5
+ assert wcwidth.width("abc\bd", control_codes="unknown") == 3
+
+
+def test_vs16_selector():
+ """VS16 converts narrow character to wide (width 2)."""
+ # Smiley face with VS16 should be width 2 (same as wcswidth)
+ assert wcwidth.width("\u263A\uFE0F") == 2
+ assert wcwidth.width("\u263A\uFE0F") == wcwidth.wcswidth("\u263A\uFE0F")
+ # Heart with VS16
+ assert wcwidth.width("\u2764\uFE0F") == 2
+ # VS16 without valid preceding char is zero-width
+ assert wcwidth.width("\uFE0F") == 0
+ # Character not in VS16 table followed by VS16 stays narrow
+ assert wcwidth.width("A\uFE0F") == 1
+
+
+def test_vs16_after_control_chars():
+ """VS16 after control characters should not add width."""
+ # Emoji, then control char, then VS16 - VS16 should NOT apply to emoji
+ # width() returns max extent, so BS/CR don't reduce it
+ assert wcwidth.width("\u263A\x07\uFE0F") == 1 # smiley(1) + BEL(0) + VS16(0)
+ assert wcwidth.width("\u263A\x08\uFE0F") == 1 # smiley(1) + BS(back) + VS16(0), extent=1
+ assert wcwidth.width("\u263A\x0d\uFE0F") == 1 # smiley(1) + CR(reset) + VS16(0), extent=1
+ assert wcwidth.width("\u263A\x1b[m\uFE0F") == 1 # smiley(1) + SGR(0) + VS16(0)
+ assert wcwidth.width("\u263A\u200Da\uFE0F") == 1 # smiley(1) + ZWJ+a(0) + VS16(0)
+
+
+def test_backspace_at_column_zero():
+ """Backspace at column 0 does not go negative."""
+ assert wcwidth.width('\b') == 0
+ assert wcwidth.width('\ba') == 1
+
+
+def test_carriage_return_resets_column():
+ """CR resets column, max extent is preserved."""
+ assert wcwidth.width('abc\rd') == 3
+ assert wcwidth.width('abc\rde') == 3
+
+
+def test_iter_sequences_lone_esc():
+ """Lone ESC is yielded as a sequence."""
+ assert list(wcwidth.iter_sequences('\x1b')) == [('\x1b', True)]
+ assert list(wcwidth.iter_sequences('*\x1b*')) == [('*', False), ('\x1b', True), ('*', False)]
+
+
+def test_tab_ignore_with_tabsize():
+ """Tabs are zero-width with control_codes='ignore', tabsize has no effect."""
+ assert wcwidth.width("abc\t", control_codes="ignore", tabsize=8) == 3
+
+
+def test_cursor_right_unparameterized():
+ """Test unparameterized cursor_right sequence is handled correctly."""
+ seq = '\x1b[C'
+ # sequence is recognized as a sequence
+ segments = list(wcwidth.iter_sequences(seq))
+ assert segments == [(seq, True)]
+ # sequence alone moves cursor right by 1 (default), extent is 1
+ assert wcwidth.width(seq) == 1
+ # cursor moves right by 1: 'a'(1) + right(1) + 'b'(1) = 3
+ assert wcwidth.width('a' + seq + 'b') == 3
+ # strict mode allows cursor_right
+ assert wcwidth.width('a' + seq + 'b', control_codes='strict') == 3
+
+
+INDETERMINATE_CAP_SAMPLES = [
+ ('\x1b[1;1r', 'change_scroll_region'),
+ ('\x1b[H\x1b[2J', 'clear_screen'),
+ ('\x1b[K', 'clr_eol'),
+ ('\x1b[1;1H', 'cursor_address'),
+ ('\x1b[A', 'cursor_up'),
+ ('\x1b[M', 'delete_line'),
+ ('\x1b[?1049h', 'enter_fullscreen'),
+ ('\x1b[1X', 'erase_chars'),
+ ('\x1b[L', 'insert_line'),
+ ('\x1b[1S', 'parm_index'),
+ ('\x1b[1A', 'parm_up_cursor'),
+ ('\x1b8', 'restore_cursor'),
+ ('\x1b[1d', 'row_address'),
+ ('\x1bD', 'scroll_forward'),
+]
+
+
[email protected]('seq,cap_name', INDETERMINATE_CAP_SAMPLES)
+def test_indeterminate_caps_covered_by_term_seq_pattern(seq, cap_name):
+ """Verify all INDETERMINATE_CAPS sequences are matched by ZERO_WIDTH_PATTERN."""
+ # local
+ assert ZERO_WIDTH_PATTERN.match(seq)
+ assert wcwidth.width(seq) == 0
+
+
+ZERO_WIDTH_CAP_SAMPLES = [
+ ('\x1b[3g', 'clear_all_tabs'),
+ ('\x1b[?25l', 'cursor_invisible'),
+ ('\x1b[?25h', 'cursor_normal'),
+ ('\x1b[?12;25h', 'cursor_visible'),
+ ('\x1b(0', 'enter_alt_charset_mode'),
+ ('\x1b[5m', 'enter_blink_mode'),
+ ('\x1b[1m', 'enter_bold_mode'),
+ ('\x1b[2m', 'enter_dim_mode'),
+ ('\x1b[3m', 'enter_italics_mode'),
+ ('\x1b[7m', 'enter_reverse_mode'),
+ ('\x1b[3m', 'enter_standout_mode'),
+ ('\x1b[4m', 'enter_underline_mode'),
+ ('\x1b(B', 'exit_alt_charset_mode'),
+ ('\x1b[m', 'exit_attribute_mode'),
+ ('\x1b[4l', 'exit_insert_mode'),
+ ('\x1b[23m', 'exit_italics_mode'),
+ ('\x1b[27m', 'exit_standout_mode'),
+ ('\x1b[24m', 'exit_underline_mode'),
+ ('\x1b[?5h\x1b[?5l', 'flash_screen_csi'),
+ ('\x1bg', 'flash_screen_visual_bell'),
+ ('\x1b>', 'keypad_local'),
+ ('\x1b=', 'keypad_xmit'),
+ ('\x1b[39;49m', 'orig_pair'),
+ ('\x1b7', 'save_cursor'),
+ ('\x1bH', 'set_tab'),
+]
+
+
[email protected]('seq,cap_name', ZERO_WIDTH_CAP_SAMPLES)
+def test_zero_width_sequences_matched_by_pattern(seq, cap_name):
+ """Verify zero-width terminfo sequences are matched by ZERO_WIDTH_PATTERN."""
+ for part, is_seq in wcwidth.iter_sequences(seq):
+ assert is_seq, f"{cap_name}: {repr(part)} not matched as sequence"
+ assert wcwidth.width(seq) == 0
+
+
+MODERN_TERMINAL_SEQUENCES = [
+ ('\x1b_Gf=100,i=1;base64data\x1b\\hello', 5, 'kitty_graphics_with_text'),
+ ('\x1b_Ga=d\x07', 0, 'kitty_graphics_delete'),
+ ('\x1bP0;1;0q#0~-\x1b\\test', 4, 'sixel_graphics_with_text'),
+ ('\x1bP$q"p\x1b\\', 0, 'decrqss_query'),
+ ('\x1b^private\x1b\\text', 4, 'pm_with_text'),
+ ('\x1b]1337;SetMark\x07test', 4, 'iterm2_setmark'),
+ ('\x1b]1337;File=inline=1:base64\x07img', 3, 'iterm2_inline_image'),
+ ('\x1b]1337;CursorShape=1\x07', 0, 'iterm2_cursor_shape'),
+ ('\x1b]1337;CurrentDir=/home\x07', 0, 'iterm2_currentdir'),
+ ('\x1b]133;A\x07$ ', 2, 'shell_prompt_start'),
+ ('\x1b]133;B\x07ls', 2, 'shell_command_start'),
+ ('\x1b]133;C\x07', 0, 'shell_command_executed'),
+ ('\x1b]133;D;0\x07', 0, 'shell_command_finished'),
+ ('\x1b]99;i=1:d=0;Hello\x1b\\', 0, 'kitty_notification'),
+ ('\x1b]5522;type=read\x07', 0, 'kitty_clipboard_read'),
+ ('\x1b]22;pointer\x07', 0, 'kitty_pointer_shape'),
+ ('\x1b]21;fg=?\x07', 0, 'kitty_color_query'),
+ ('\x1b]30001\x1b\\', 0, 'kitty_color_push'),
+ ('\x1b]30101\x1b\\', 0, 'kitty_color_pop'),
+]
+
+
[email protected]('seq,expected_width,name', MODERN_TERMINAL_SEQUENCES)
+def test_modern_sequences(seq, expected_width, name):
+ """Modern terminal sequences are recognized as zero-width."""
+ assert wcwidth.width(seq) == expected_width
+ assert wcwidth.width(seq, control_codes='strict') == expected_width
+
+
[email protected]('codepoint,expected_width', [
+ (0x3164, 0),
+ (0xFFA0, 0),
+ (0x2065, 0),
+ (0xFFF0, 0),
+ (0xFFF1, 0),
+ (0xFFF8, 0),
+ (0xE0000, 0),
+ (0xE0002, 0),
+ (0xE001F, 0),
+ (0xE0080, 0),
+ (0xE00FF, 0),
+ (0xE01F0, 0),
+ (0xE0FFF, 0),
+])
+def test_default_ignorable_zero_width(codepoint, expected_width):
+ """Default_Ignorable_Code_Point characters return width 0."""
+ result = wcwidth.wcwidth(chr(codepoint))
+ assert result == expected_width
+
+
[email protected]('codepoint,expected_width', [
+ (0x00AD, 1),
+ (0x115F, 2),
+])
+def test_default_ignorable_exceptions(codepoint, expected_width):
+ """Exceptions to Default_Ignorable_Code_Point zero-width rule."""
+ result = wcwidth.wcwidth(chr(codepoint))
+ assert result == expected_width
+
+
+def test_hangul_filler_zero_width():
+ """U+3164 HANGUL FILLER is width 0."""
+ result = wcwidth.wcwidth('\u3164')
+ assert result == 0
+
+
+def test_halfwidth_hangul_filler_zero_width():
+ """U+FFA0 HALFWIDTH HANGUL FILLER is width 0."""
+ result = wcwidth.wcwidth('\uFFA0')
+ assert result == 0
+
+
+def test_hangul_choseong_filler_exception():
+ """U+115F HANGUL CHOSEONG FILLER remains width 2 for jamo composition."""
+ result = wcwidth.wcwidth('\u115F')
+ assert result == 2
+
+
+def test_soft_hyphen_exception():
+ """U+00AD SOFT HYPHEN remains width 1 for ISO-8859-1 compatibility."""
+ result = wcwidth.wcwidth('\u00AD')
+ assert result == 1
diff --git a/contrib/python/wcwidth/py3/wcwidth/__init__.py b/contrib/python/wcwidth/py3/wcwidth/__init__.py
index e4e81380913..106816aa6d2 100644
--- a/contrib/python/wcwidth/py3/wcwidth/__init__.py
+++ b/contrib/python/wcwidth/py3/wcwidth/__init__.py
@@ -5,25 +5,37 @@ https://github.com/jquast/wcwidth
"""
# re-export all functions & definitions, even private ones, from top-level
# module path, to allow for 'from wcwidth import _private_func'. Of course,
-# user beware that any _private function may disappear or change signature at
-# any future version.
+# user beware that any _private functions or variables not exported by __all__
+# may disappear or change signature at any future version.
# local
from .wcwidth import ZERO_WIDTH # noqa
from .wcwidth import (WIDE_EASTASIAN,
+ AMBIGUOUS_EASTASIAN,
VS16_NARROW_TO_WIDE,
+ clip,
+ ljust,
+ rjust,
+ width,
+ center,
wcwidth,
wcswidth,
- _bisearch,
list_versions,
+ iter_sequences,
+ strip_sequences,
_wcmatch_version,
_wcversion_value)
+from .bisearch import bisearch as _bisearch
+from .grapheme import iter_graphemes # noqa
+from .textwrap import SequenceTextWrapper, wrap
# The __all__ attribute defines the items exported from statement,
# 'from wcwidth import *', but also to say, "This is the public API".
-__all__ = ('wcwidth', 'wcswidth', 'list_versions')
+__all__ = ('wcwidth', 'wcswidth', 'width', 'iter_sequences', 'iter_graphemes',
+ 'ljust', 'rjust', 'center', 'wrap', 'clip', 'strip_sequences',
+ 'list_versions')
# We also used pkg_resources to load unicode version tables from version.json,
# generated by bin/update-tables.py, but some environments are unable to
# import pkg_resources for one reason or another, yikes!
-__version__ = '0.2.14'
+__version__ = '0.3.0'
diff --git a/contrib/python/wcwidth/py3/wcwidth/bisearch.py b/contrib/python/wcwidth/py3/wcwidth/bisearch.py
new file mode 100644
index 00000000000..bd0b4c13c02
--- /dev/null
+++ b/contrib/python/wcwidth/py3/wcwidth/bisearch.py
@@ -0,0 +1,29 @@
+"""Binary search function for Unicode interval tables."""
+
+
+def bisearch(ucs, table):
+ # type: (int, tuple) -> int
+ """
+ Binary search in interval table.
+
+ :param ucs: Ordinal value of unicode character.
+ :param table: Tuple of starting and ending ranges of ordinal values,
+ in form of ``((start, end), ...)``.
+ :returns: 1 if ordinal value ucs is found within lookup table, else 0.
+ """
+ lbound = 0
+ ubound = len(table) - 1
+
+ if ucs < table[0][0] or ucs > table[ubound][1]:
+ return 0
+
+ while ubound >= lbound:
+ mid = (lbound + ubound) // 2
+ if ucs > table[mid][1]:
+ lbound = mid + 1
+ elif ucs < table[mid][0]:
+ ubound = mid - 1
+ else:
+ return 1
+
+ return 0
diff --git a/contrib/python/wcwidth/py3/wcwidth/control_codes.py b/contrib/python/wcwidth/py3/wcwidth/control_codes.py
new file mode 100644
index 00000000000..3a6fff76386
--- /dev/null
+++ b/contrib/python/wcwidth/py3/wcwidth/control_codes.py
@@ -0,0 +1,46 @@
+"""
+Control character sets for terminal handling.
+
+This module provides the control character sets used by the width() function to handle terminal
+control characters.
+"""
+
+# Illegal C0/C1 control characters.
+# These raise ValueError in 'strict' mode.
+ILLEGAL_CTRL = frozenset(
+ chr(c) for c in (
+ list(range(0x01, 0x07)) + # SOH, STX, ETX (^C), EOT (^D), ENQ, ACK
+ list(range(0x10, 0x1b)) + # DLE through SUB (^Z)
+ list(range(0x1c, 0x20)) + # FS, GS, RS, US
+ [0x7f] + # DEL
+ list(range(0x80, 0xa0)) # C1 control characters
+ )
+)
+
+# Vertical movement control characters.
+# These raise ValueError in 'strict' mode (indeterminate horizontal position).
+VERTICAL_CTRL = frozenset({
+ '\x0a', # LF (line feed)
+ '\x0b', # VT (vertical tab)
+ '\x0c', # FF (form feed)
+})
+
+# Horizontal movement control characters.
+# These affect cursor position and are tracked in 'strict' and 'parse' modes.
+HORIZONTAL_CTRL = frozenset({
+ '\x08', # BS (backspace) - cursor left 1
+ '\x09', # HT (horizontal tab) - advance to next tab stop
+ '\x0d', # CR (carriage return) - cursor to column 0
+})
+
+# Terminal-valid zero-width control characters.
+# These are allowed in all modes (zero-width, no movement).
+ZERO_WIDTH_CTRL = frozenset({
+ '\x00', # NUL
+ '\x07', # BEL (bell)
+ '\x0e', # SO (shift out)
+ '\x0f', # SI (shift in)
+})
+
+# All control characters that need special handling (not regular printable).
+ALL_CTRL = ILLEGAL_CTRL | VERTICAL_CTRL | HORIZONTAL_CTRL | ZERO_WIDTH_CTRL | {'\x1b'}
diff --git a/contrib/python/wcwidth/py3/wcwidth/escape_sequences.py b/contrib/python/wcwidth/py3/wcwidth/escape_sequences.py
new file mode 100644
index 00000000000..ec51bd3b5bb
--- /dev/null
+++ b/contrib/python/wcwidth/py3/wcwidth/escape_sequences.py
@@ -0,0 +1,69 @@
+r"""
+Terminal escape sequence patterns.
+
+This module provides regex patterns for matching terminal escape sequences. All patterns match
+sequences that begin with ESC (\\x1b). Before calling re.match with these patterns, callers should
+first check that the character at the current position is ESC for optimal performance.
+"""
+# std imports
+import re
+
+# Zero-width escape sequences (SGR, OSC, CSI, etc.). This table, like INDETERMINATE_EFFECT_SEQUENCE,
+# originated from the 'blessed' library.
+ZERO_WIDTH_PATTERN = re.compile(
+ # CSI sequences
+ r'\x1b\[[\x30-\x3f]*[\x20-\x2f]*[\x40-\x7e]|'
+ # OSC sequences
+ r'\x1b\][^\x07\x1b]*(?:\x07|\x1b\\)|'
+ # APC sequences
+ r'\x1b_[^\x1b\x07]*(?:\x07|\x1b\\)|'
+ # DCS sequences
+ r'\x1bP[^\x1b\x07]*(?:\x07|\x1b\\)|'
+ # PM sequences
+ r'\x1b\^[^\x1b\x07]*(?:\x07|\x1b\\)|'
+ # Character set designation
+ r'\x1b[()].|'
+ # Fe sequences
+ r'\x1b[\x40-\x5f]|'
+ # Fp sequences
+ r'\x1b[78=>g]'
+)
+
+# Cursor right movement: CSI [n] C, parameter may be parsed by width()
+CURSOR_RIGHT_SEQUENCE = re.compile(r'\x1b\[(\d*)C')
+
+# Cursor left movement: CSI [n] D, parameter may be parsed by width()
+CURSOR_LEFT_SEQUENCE = re.compile(r'\x1b\[(\d*)D')
+
+# Indeterminate effect sequences - raise ValueError in 'strict' mode. The effects of these sequences
+# are likely to be undesirable, moving the cursor vertically or to any unknown position, and
+# otherwise not managed by the 'width' method of this library.
+#
+# This table was created initially with code generation by extraction of termcap library with
+# techniques used at 'blessed' library runtime for 'xterm', 'alacritty', 'kitty', ghostty',
+# 'screen', 'tmux', and others. Then, these common capabilities were merged into the list below.
+INDETERMINATE_EFFECT_SEQUENCE = re.compile(
+ '|'.join(f'(?:{_pattern})' for _pattern in (
+ r'\x1b\[\d+;\d+r', # change_scroll_region
+ r'\x1b\[\d*K', # erase_in_line (clr_eol, clr_bol)
+ r'\x1b\[\d*J', # erase_in_display (clr_eos, erase_display)
+ r'\x1b\[\d*G', # column_address
+ r'\x1b\[\d+;\d+H', # cursor_address
+ r'\x1b\[\d*H', # cursor_home
+ r'\x1b\[\d*A', # cursor_up
+ r'\x1b\[\d*B', # cursor_down
+ r'\x1b\[\d*P', # delete_character
+ r'\x1b\[\d*M', # delete_line
+ r'\x1b\[\d*L', # insert_line
+ r'\x1b\[\d*@', # insert_character
+ r'\x1b\[\d+X', # erase_chars
+ r'\x1b\[\d*S', # scroll_up (parm_index)
+ r'\x1b\[\d*T', # scroll_down (parm_rindex)
+ r'\x1b\[\d*d', # row_address
+ r'\x1b\[\?1049[hl]', # alternate screen buffer
+ r'\x1b\[\?47[hl]', # alternate screen (legacy)
+ r'\x1b8', # restore_cursor
+ r'\x1bD', # scroll_forward (index)
+ r'\x1bM', # scroll_reverse (reverse index)
+ ))
+)
diff --git a/contrib/python/wcwidth/py3/wcwidth/grapheme.py b/contrib/python/wcwidth/py3/wcwidth/grapheme.py
new file mode 100644
index 00000000000..fed1b0184e6
--- /dev/null
+++ b/contrib/python/wcwidth/py3/wcwidth/grapheme.py
@@ -0,0 +1,299 @@
+"""
+Grapheme cluster segmentation following Unicode Standard Annex #29.
+
+This module provides pure-Python implementation of the grapheme cluster boundary algorithm as
+defined in UAX #29: Unicode Text Segmentation.
+
+https://www.unicode.org/reports/tr29/
+"""
+# std imports
+from enum import IntEnum
+from functools import lru_cache
+
+from typing import Iterator, Optional, NamedTuple
+
+# local
+from .bisearch import bisearch as _bisearch
+from .table_grapheme import (GRAPHEME_L,
+ GRAPHEME_T,
+ GRAPHEME_V,
+ GRAPHEME_LV,
+ INCB_EXTEND,
+ INCB_LINKER,
+ GRAPHEME_LVT,
+ INCB_CONSONANT,
+ GRAPHEME_EXTEND,
+ GRAPHEME_CONTROL,
+ GRAPHEME_PREPEND,
+ GRAPHEME_SPACINGMARK,
+ EXTENDED_PICTOGRAPHIC,
+ GRAPHEME_REGIONAL_INDICATOR)
+
+
+class GCB(IntEnum):
+ """Grapheme Cluster Break property values."""
+
+ OTHER = 0
+ CR = 1
+ LF = 2
+ CONTROL = 3
+ EXTEND = 4
+ ZWJ = 5
+ REGIONAL_INDICATOR = 6
+ PREPEND = 7
+ SPACING_MARK = 8
+ L = 9
+ V = 10
+ T = 11
+ LV = 12
+ LVT = 13
+
+
+@lru_cache(maxsize=1000)
+def _grapheme_cluster_break(ucs: int) -> GCB:
+ # pylint: disable=too-many-branches,too-complex
+ """Return the Grapheme_Cluster_Break property for a codepoint."""
+ # Single codepoint matches
+ if ucs == 0x000d:
+ return GCB.CR
+ if ucs == 0x000a:
+ return GCB.LF
+ if ucs == 0x200d:
+ return GCB.ZWJ
+ # Matching by codepoint ranges, requiring binary search
+ if _bisearch(ucs, GRAPHEME_CONTROL):
+ return GCB.CONTROL
+ if _bisearch(ucs, GRAPHEME_EXTEND):
+ return GCB.EXTEND
+ if _bisearch(ucs, GRAPHEME_REGIONAL_INDICATOR):
+ return GCB.REGIONAL_INDICATOR
+ if _bisearch(ucs, GRAPHEME_PREPEND):
+ return GCB.PREPEND
+ if _bisearch(ucs, GRAPHEME_SPACINGMARK):
+ return GCB.SPACING_MARK
+ if _bisearch(ucs, GRAPHEME_L):
+ return GCB.L
+ if _bisearch(ucs, GRAPHEME_V):
+ return GCB.V
+ if _bisearch(ucs, GRAPHEME_T):
+ return GCB.T
+ if _bisearch(ucs, GRAPHEME_LV):
+ return GCB.LV
+ if _bisearch(ucs, GRAPHEME_LVT):
+ return GCB.LVT
+ return GCB.OTHER
+
+
+@lru_cache(maxsize=512)
+def _is_extended_pictographic(ucs: int) -> bool:
+ """Check if codepoint has Extended_Pictographic property."""
+ return _bisearch(ucs, EXTENDED_PICTOGRAPHIC)
+
+
+@lru_cache(maxsize=128)
+def _is_incb_linker(ucs: int) -> bool:
+ """Check if codepoint has InCB=Linker property."""
+ return _bisearch(ucs, INCB_LINKER)
+
+
+@lru_cache(maxsize=256)
+def _is_incb_consonant(ucs: int) -> bool:
+ """Check if codepoint has InCB=Consonant property."""
+ return _bisearch(ucs, INCB_CONSONANT)
+
+
+@lru_cache(maxsize=256)
+def _is_incb_extend(ucs: int) -> bool:
+ """Check if codepoint has InCB=Extend property."""
+ return _bisearch(ucs, INCB_EXTEND)
+
+
+class BreakResult(NamedTuple):
+ """Result of grapheme cluster break decision."""
+
+ should_break: bool
+ ri_count: int
+
+
+@lru_cache(maxsize=196) # 14 GCB values × 14 = 196 max combinations
+def _simple_break_check(prev_gcb: GCB, curr_gcb: GCB) -> Optional[BreakResult]:
+ """
+ Check simple GCB-pair-based break rules (cacheable).
+
+ Returns BreakResult for rules that can be determined from GCB properties alone, or None if
+ complex lookback rules (GB9c, GB11) need to be checked.
+ """
+ # GB3: CR x LF
+ if prev_gcb == GCB.CR and curr_gcb == GCB.LF:
+ return BreakResult(should_break=False, ri_count=0)
+
+ # GB4: (Control|CR|LF) ÷
+ if prev_gcb in (GCB.CONTROL, GCB.CR, GCB.LF):
+ return BreakResult(should_break=True, ri_count=0)
+
+ # GB5: ÷ (Control|CR|LF)
+ if curr_gcb in (GCB.CONTROL, GCB.CR, GCB.LF):
+ return BreakResult(should_break=True, ri_count=0)
+
+ # GB6: L x (L|V|LV|LVT)
+ if prev_gcb == GCB.L and curr_gcb in (GCB.L, GCB.V, GCB.LV, GCB.LVT):
+ return BreakResult(should_break=False, ri_count=0)
+
+ # GB7: (LV|V) x (V|T)
+ if prev_gcb in (GCB.LV, GCB.V) and curr_gcb in (GCB.V, GCB.T):
+ return BreakResult(should_break=False, ri_count=0)
+
+ # GB8: (LVT|T) x T
+ if prev_gcb in (GCB.LVT, GCB.T) and curr_gcb == GCB.T:
+ return BreakResult(should_break=False, ri_count=0)
+
+ # GB9: x (Extend|ZWJ) - but ZWJ needs GB11 check, so only handle Extend here
+ if curr_gcb == GCB.EXTEND:
+ return BreakResult(should_break=False, ri_count=0)
+
+ # GB9a: x SpacingMark
+ if curr_gcb == GCB.SPACING_MARK:
+ return BreakResult(should_break=False, ri_count=0)
+
+ # GB9b: Prepend x
+ if prev_gcb == GCB.PREPEND:
+ return BreakResult(should_break=False, ri_count=0)
+
+ # GB9c and GB11 need lookback - return None to signal complex check needed
+ # GB12/13 (RI pairs) need ri_count state - also handled in main function
+ return None
+
+
+def _should_break(
+ prev_gcb: GCB,
+ curr_gcb: GCB,
+ text: str,
+ curr_idx: int,
+ ri_count: int,
+) -> BreakResult:
+ # pylint: disable=too-many-branches,too-complex
+ """
+ Determine if there should be a grapheme cluster break between prev and curr.
+
+ Implements UAX #29 grapheme cluster boundary rules.
+ """
+ # Try cached simple rules first
+ result = _simple_break_check(prev_gcb, curr_gcb)
+ if result is not None:
+ return result
+
+ # GB9: x ZWJ (not cached because GB11 needs lookback when prev is ZWJ)
+ if curr_gcb == GCB.ZWJ:
+ return BreakResult(should_break=False, ri_count=0)
+
+ # GB9c: Indic conjunct cluster
+ # \p{InCB=Consonant} [\p{InCB=Extend}\p{InCB=Linker}]* \p{InCB=Linker}
+ # [\p{InCB=Extend}\p{InCB=Linker}]* x \p{InCB=Consonant}
+ curr_ucs = ord(text[curr_idx])
+ if _is_incb_consonant(curr_ucs):
+ has_linker = False
+ i = curr_idx - 1
+ while i >= 0:
+ prev_ucs = ord(text[i])
+ if _is_incb_linker(prev_ucs):
+ has_linker = True
+ i -= 1
+ elif _is_incb_extend(prev_ucs):
+ i -= 1
+ elif _is_incb_consonant(prev_ucs):
+ if has_linker:
+ return BreakResult(should_break=False, ri_count=0)
+ break
+ else:
+ break
+
+ # GB11: ExtPict Extend* ZWJ x ExtPict
+ if prev_gcb == GCB.ZWJ and _is_extended_pictographic(curr_ucs):
+ i = curr_idx - 2 # Skip the ZWJ at curr_idx - 1
+ while i >= 0:
+ prev_ucs = ord(text[i])
+ prev_prop = _grapheme_cluster_break(prev_ucs)
+ if prev_prop == GCB.EXTEND:
+ i -= 1
+ elif _is_extended_pictographic(prev_ucs):
+ return BreakResult(should_break=False, ri_count=0)
+ else:
+ break
+
+ # GB12/GB13: RI x RI (pair matching)
+ if prev_gcb == GCB.REGIONAL_INDICATOR and curr_gcb == GCB.REGIONAL_INDICATOR:
+ if ri_count % 2 == 1:
+ return BreakResult(should_break=False, ri_count=ri_count + 1)
+ return BreakResult(should_break=True, ri_count=1)
+
+ # GB999: Any ÷ Any
+ ri_count = 1 if curr_gcb == GCB.REGIONAL_INDICATOR else 0
+ return BreakResult(should_break=True, ri_count=ri_count)
+
+
+def iter_graphemes(
+ unistr: str,
+ start: int = 0,
+ end: Optional[int] = None,
+) -> Iterator[str]:
+ r"""
+ Iterate over grapheme clusters in a Unicode string.
+
+ Grapheme clusters are "user-perceived characters" - what a user would
+ consider a single character, which may consist of multiple Unicode
+ codepoints (e.g., a base character with combining marks, emoji sequences).
+
+ :param unistr: The Unicode string to segment.
+ :param start: Starting index (default 0).
+ :param end: Ending index (default len(unistr)).
+ :yields: Grapheme cluster substrings.
+
+ Example::
+
+ >>> list(iter_graphemes('cafe\\u0301'))
+ ['c', 'a', 'f', 'e\\u0301']
+ >>> list(iter_graphemes('\\U0001F468\\u200D\\U0001F469\\u200D\\U0001F467'))
+ ['o', 'k', '\\U0001F468\\u200D\\U0001F469\\u200D\\U0001F467']
+ >>> list(iter_graphemes('\\U0001F1FA\\U0001F1F8'))
+ ['o', 'k', '\\U0001F1FA\\U0001F1F8']
+
+ .. versionadded:: 0.3.0
+ """
+ if not unistr:
+ return
+
+ length = len(unistr)
+
+ if end is None:
+ end = length
+
+ if start >= end or start >= length:
+ return
+
+ end = min(end, length)
+
+ # Track state for grapheme cluster boundaries
+ cluster_start = start
+ ri_count = 0
+
+ # Get GCB for first character
+ prev_gcb = _grapheme_cluster_break(ord(unistr[start]))
+
+ # Handle Regional Indicator count initialization
+ if prev_gcb == GCB.REGIONAL_INDICATOR:
+ ri_count = 1
+
+ for idx in range(start + 1, end):
+ curr_gcb = _grapheme_cluster_break(ord(unistr[idx]))
+
+ result = _should_break(prev_gcb, curr_gcb, unistr, idx, ri_count)
+ ri_count = result.ri_count
+
+ if result.should_break:
+ yield unistr[cluster_start:idx]
+ cluster_start = idx
+
+ prev_gcb = curr_gcb
+
+ # Yield the final cluster
+ yield unistr[cluster_start:end]
diff --git a/contrib/python/wcwidth/py3/wcwidth/table_ambiguous.py b/contrib/python/wcwidth/py3/wcwidth/table_ambiguous.py
new file mode 100644
index 00000000000..e3dc0b1c3de
--- /dev/null
+++ b/contrib/python/wcwidth/py3/wcwidth/table_ambiguous.py
@@ -0,0 +1,189 @@
+"""
+Exports AMBIGUOUS_EASTASIAN table keyed by supporting unicode version level.
+
+This code generated by wcwidth/bin/update-tables.py on 2026-01-18 23:27:15 UTC.
+"""
+# pylint: disable=duplicate-code
+AMBIGUOUS_EASTASIAN = {
+ '17.0.0': (
+ # Source: EastAsianWidth-17.0.0.txt
+ # Date: 2025-07-24, 00:12:54 GMT
+ #
+ (0x000a1, 0x000a1,), # Inverted Exclamation Mark
+ (0x000a4, 0x000a4,), # Currency Sign
+ (0x000a7, 0x000a8,), # Section Sign ..Diaeresis
+ (0x000aa, 0x000aa,), # Feminine Ordinal Indicator
+ (0x000ad, 0x000ae,), # Soft Hyphen ..Registered Sign
+ (0x000b0, 0x000b4,), # Degree Sign ..Acute Accent
+ (0x000b6, 0x000ba,), # Pilcrow Sign ..Masculine Ordinal Indica
+ (0x000bc, 0x000bf,), # Vulgar Fraction One Quar..Inverted Question Mark
+ (0x000c6, 0x000c6,), # Latin Capital Letter Ae
+ (0x000d0, 0x000d0,), # Latin Capital Letter Eth
+ (0x000d7, 0x000d8,), # Multiplication Sign ..Latin Capital Letter O W
+ (0x000de, 0x000e1,), # Latin Capital Letter Tho..Latin Small Letter A Wit
+ (0x000e6, 0x000e6,), # Latin Small Letter Ae
+ (0x000e8, 0x000ea,), # Latin Small Letter E Wit..Latin Small Letter E Wit
+ (0x000ec, 0x000ed,), # Latin Small Letter I Wit..Latin Small Letter I Wit
+ (0x000f0, 0x000f0,), # Latin Small Letter Eth
+ (0x000f2, 0x000f3,), # Latin Small Letter O Wit..Latin Small Letter O Wit
+ (0x000f7, 0x000fa,), # Division Sign ..Latin Small Letter U Wit
+ (0x000fc, 0x000fc,), # Latin Small Letter U With Diaeresis
+ (0x000fe, 0x000fe,), # Latin Small Letter Thorn
+ (0x00101, 0x00101,), # Latin Small Letter A With Macron
+ (0x00111, 0x00111,), # Latin Small Letter D With Stroke
+ (0x00113, 0x00113,), # Latin Small Letter E With Macron
+ (0x0011b, 0x0011b,), # Latin Small Letter E With Caron
+ (0x00126, 0x00127,), # Latin Capital Letter H W..Latin Small Letter H Wit
+ (0x0012b, 0x0012b,), # Latin Small Letter I With Macron
+ (0x00131, 0x00133,), # Latin Small Letter Dotle..Latin Small Ligature Ij
+ (0x00138, 0x00138,), # Latin Small Letter Kra
+ (0x0013f, 0x00142,), # Latin Capital Letter L W..Latin Small Letter L Wit
+ (0x00144, 0x00144,), # Latin Small Letter N With Acute
+ (0x00148, 0x0014b,), # Latin Small Letter N Wit..Latin Small Letter Eng
+ (0x0014d, 0x0014d,), # Latin Small Letter O With Macron
+ (0x00152, 0x00153,), # Latin Capital Ligature O..Latin Small Ligature Oe
+ (0x00166, 0x00167,), # Latin Capital Letter T W..Latin Small Letter T Wit
+ (0x0016b, 0x0016b,), # Latin Small Letter U With Macron
+ (0x001ce, 0x001ce,), # Latin Small Letter A With Caron
+ (0x001d0, 0x001d0,), # Latin Small Letter I With Caron
+ (0x001d2, 0x001d2,), # Latin Small Letter O With Caron
+ (0x001d4, 0x001d4,), # Latin Small Letter U With Caron
+ (0x001d6, 0x001d6,), # Latin Small Letter U With Diaeresis And Macron
+ (0x001d8, 0x001d8,), # Latin Small Letter U With Diaeresis And Acute
+ (0x001da, 0x001da,), # Latin Small Letter U With Diaeresis And Caron
+ (0x001dc, 0x001dc,), # Latin Small Letter U With Diaeresis And Grave
+ (0x00251, 0x00251,), # Latin Small Letter Alpha
+ (0x00261, 0x00261,), # Latin Small Letter Script G
+ (0x002c4, 0x002c4,), # Modifier Letter Up Arrowhead
+ (0x002c7, 0x002c7,), # Caron
+ (0x002c9, 0x002cb,), # Modifier Letter Macron ..Modifier Letter Grave Ac
+ (0x002cd, 0x002cd,), # Modifier Letter Low Macron
+ (0x002d0, 0x002d0,), # Modifier Letter Triangular Colon
+ (0x002d8, 0x002db,), # Breve ..Ogonek
+ (0x002dd, 0x002dd,), # Double Acute Accent
+ (0x002df, 0x002df,), # Modifier Letter Cross Accent
+ (0x00391, 0x003a1,), # Greek Capital Letter Alp..Greek Capital Letter Rho
+ (0x003a3, 0x003a9,), # Greek Capital Letter Sig..Greek Capital Letter Ome
+ (0x003b1, 0x003c1,), # Greek Small Letter Alpha..Greek Small Letter Rho
+ (0x003c3, 0x003c9,), # Greek Small Letter Sigma..Greek Small Letter Omega
+ (0x00401, 0x00401,), # Cyrillic Capital Letter Io
+ (0x00410, 0x0044f,), # Cyrillic Capital Letter ..Cyrillic Small Letter Ya
+ (0x00451, 0x00451,), # Cyrillic Small Letter Io
+ (0x02010, 0x02010,), # Hyphen
+ (0x02013, 0x02016,), # En Dash ..Double Vertical Line
+ (0x02018, 0x02019,), # Left Single Quotation Ma..Right Single Quotation M
+ (0x0201c, 0x0201d,), # Left Double Quotation Ma..Right Double Quotation M
+ (0x02020, 0x02022,), # Dagger ..Bullet
+ (0x02024, 0x02027,), # One Dot Leader ..Hyphenation Point
+ (0x02030, 0x02030,), # Per Mille Sign
+ (0x02032, 0x02033,), # Prime ..Double Prime
+ (0x02035, 0x02035,), # Reversed Prime
+ (0x0203b, 0x0203b,), # Reference Mark
+ (0x0203e, 0x0203e,), # Overline
+ (0x02074, 0x02074,), # Superscript Four
+ (0x0207f, 0x0207f,), # Superscript Latin Small Letter N
+ (0x02081, 0x02084,), # Subscript One ..Subscript Four
+ (0x020ac, 0x020ac,), # Euro Sign
+ (0x02103, 0x02103,), # Degree Celsius
+ (0x02105, 0x02105,), # Care Of
+ (0x02109, 0x02109,), # Degree Fahrenheit
+ (0x02113, 0x02113,), # Script Small L
+ (0x02116, 0x02116,), # Numero Sign
+ (0x02121, 0x02122,), # Telephone Sign ..Trade Mark Sign
+ (0x02126, 0x02126,), # Ohm Sign
+ (0x0212b, 0x0212b,), # Angstrom Sign
+ (0x02153, 0x02154,), # Vulgar Fraction One Thir..Vulgar Fraction Two Thir
+ (0x0215b, 0x0215e,), # Vulgar Fraction One Eigh..Vulgar Fraction Seven Ei
+ (0x02160, 0x0216b,), # Roman Numeral One ..Roman Numeral Twelve
+ (0x02170, 0x02179,), # Small Roman Numeral One ..Small Roman Numeral Ten
+ (0x02189, 0x02189,), # Vulgar Fraction Zero Thirds
+ (0x02190, 0x02199,), # Leftwards Arrow ..South West Arrow
+ (0x021b8, 0x021b9,), # North West Arrow To Long..Leftwards Arrow To Bar O
+ (0x021d2, 0x021d2,), # Rightwards Double Arrow
+ (0x021d4, 0x021d4,), # Left Right Double Arrow
+ (0x021e7, 0x021e7,), # Upwards White Arrow
+ (0x02200, 0x02200,), # For All
+ (0x02202, 0x02203,), # Partial Differential ..There Exists
+ (0x02207, 0x02208,), # Nabla ..Element Of
+ (0x0220b, 0x0220b,), # Contains As Member
+ (0x0220f, 0x0220f,), # N-ary Product
+ (0x02211, 0x02211,), # N-ary Summation
+ (0x02215, 0x02215,), # Division Slash
+ (0x0221a, 0x0221a,), # Square Root
+ (0x0221d, 0x02220,), # Proportional To ..Angle
+ (0x02223, 0x02223,), # Divides
+ (0x02225, 0x02225,), # Parallel To
+ (0x02227, 0x0222c,), # Logical And ..Double Integral
+ (0x0222e, 0x0222e,), # Contour Integral
+ (0x02234, 0x02237,), # Therefore ..Proportion
+ (0x0223c, 0x0223d,), # Tilde Operator ..Reversed Tilde
+ (0x02248, 0x02248,), # Almost Equal To
+ (0x0224c, 0x0224c,), # All Equal To
+ (0x02252, 0x02252,), # Approximately Equal To Or The Image Of
+ (0x02260, 0x02261,), # Not Equal To ..Identical To
+ (0x02264, 0x02267,), # Less-than Or Equal To ..Greater-than Over Equal
+ (0x0226a, 0x0226b,), # Much Less-than ..Much Greater-than
+ (0x0226e, 0x0226f,), # Not Less-than ..Not Greater-than
+ (0x02282, 0x02283,), # Subset Of ..Superset Of
+ (0x02286, 0x02287,), # Subset Of Or Equal To ..Superset Of Or Equal To
+ (0x02295, 0x02295,), # Circled Plus
+ (0x02299, 0x02299,), # Circled Dot Operator
+ (0x022a5, 0x022a5,), # Up Tack
+ (0x022bf, 0x022bf,), # Right Triangle
+ (0x02312, 0x02312,), # Arc
+ (0x02460, 0x024e9,), # Circled Digit One ..Circled Latin Small Lett
+ (0x024eb, 0x0254b,), # Negative Circled Number ..Box Drawings Heavy Verti
+ (0x02550, 0x02573,), # Box Drawings Double Hori..Box Drawings Light Diago
+ (0x02580, 0x0258f,), # Upper Half Block ..Left One Eighth Block
+ (0x02592, 0x02595,), # Medium Shade ..Right One Eighth Block
+ (0x025a0, 0x025a1,), # Black Square ..White Square
+ (0x025a3, 0x025a9,), # White Square Containing ..Square With Diagonal Cro
+ (0x025b2, 0x025b3,), # Black Up-pointing Triang..White Up-pointing Triang
+ (0x025b6, 0x025b7,), # Black Right-pointing Tri..White Right-pointing Tri
+ (0x025bc, 0x025bd,), # Black Down-pointing Tria..White Down-pointing Tria
+ (0x025c0, 0x025c1,), # Black Left-pointing Tria..White Left-pointing Tria
+ (0x025c6, 0x025c8,), # Black Diamond ..White Diamond Containing
+ (0x025cb, 0x025cb,), # White Circle
+ (0x025ce, 0x025d1,), # Bullseye ..Circle With Right Half B
+ (0x025e2, 0x025e5,), # Black Lower Right Triang..Black Upper Right Triang
+ (0x025ef, 0x025ef,), # Large Circle
+ (0x02605, 0x02606,), # Black Star ..White Star
+ (0x02609, 0x02609,), # Sun
+ (0x0260e, 0x0260f,), # Black Telephone ..White Telephone
+ (0x0261c, 0x0261c,), # White Left Pointing Index
+ (0x0261e, 0x0261e,), # White Right Pointing Index
+ (0x02640, 0x02640,), # Female Sign
+ (0x02642, 0x02642,), # Male Sign
+ (0x02660, 0x02661,), # Black Spade Suit ..White Heart Suit
+ (0x02663, 0x02665,), # Black Club Suit ..Black Heart Suit
+ (0x02667, 0x0266a,), # White Club Suit ..Eighth Note
+ (0x0266c, 0x0266d,), # Beamed Sixteenth Notes ..Music Flat Sign
+ (0x0266f, 0x0266f,), # Music Sharp Sign
+ (0x0269e, 0x0269f,), # Three Lines Converging R..Three Lines Converging L
+ (0x026bf, 0x026bf,), # Squared Key
+ (0x026c6, 0x026cd,), # Rain ..Disabled Car
+ (0x026cf, 0x026d3,), # Pick ..Chains
+ (0x026d5, 0x026e1,), # Alternate One-way Left W..Restricted Left Entry-2
+ (0x026e3, 0x026e3,), # Heavy Circle With Stroke And Two Dots Above
+ (0x026e8, 0x026e9,), # Black Cross On Shield ..Shinto Shrine
+ (0x026eb, 0x026f1,), # Castle ..Umbrella On Ground
+ (0x026f4, 0x026f4,), # Ferry
+ (0x026f6, 0x026f9,), # Square Four Corners ..Person With Ball
+ (0x026fb, 0x026fc,), # Japanese Bank Symbol ..Headstone Graveyard Symb
+ (0x026fe, 0x026ff,), # Cup On Black Square ..White Flag With Horizont
+ (0x0273d, 0x0273d,), # Heavy Teardrop-spoked Asterisk
+ (0x02776, 0x0277f,), # Dingbat Negative Circled..Dingbat Negative Circled
+ (0x02b56, 0x02b59,), # Heavy Oval With Oval Ins..Heavy Circled Saltire
+ (0x03248, 0x0324f,), # Circled Number Ten On Bl..Circled Number Eighty On
+ (0x0e000, 0x0f8ff,), # (nil)
+ (0x0fffd, 0x0fffd,), # Replacement Character
+ (0x1f100, 0x1f10a,), # Digit Zero Full Stop ..Digit Nine Comma
+ (0x1f110, 0x1f12d,), # Parenthesized Latin Capi..Circled Cd
+ (0x1f130, 0x1f169,), # Squared Latin Capital Le..Negative Circled Latin C
+ (0x1f170, 0x1f18d,), # Negative Squared Latin C..Negative Squared Sa
+ (0x1f18f, 0x1f190,), # Negative Squared Wc ..Square Dj
+ (0x1f19b, 0x1f1ac,), # Squared Three D ..Squared Vod
+ (0xf0000, 0xffffd,), # (nil)
+ (0x100000, 0x10fffd,), # (nil)
+ ),
+}
diff --git a/contrib/python/wcwidth/py3/wcwidth/table_grapheme.py b/contrib/python/wcwidth/py3/wcwidth/table_grapheme.py
new file mode 100644
index 00000000000..7fe0d157309
--- /dev/null
+++ b/contrib/python/wcwidth/py3/wcwidth/table_grapheme.py
@@ -0,0 +1,2126 @@
+"""
+Exports grapheme cluster break property tables for Unicode version 17.0.0.
+
+This module provides lookup tables for Unicode grapheme cluster break properties as defined in UAX
+#29: Unicode Text Segmentation.
+
+This code generated by wcwidth/bin/update-tables.py on 2026-01-20 16:47:43 UTC.
+"""
+# pylint: disable=duplicate-code
+
+GRAPHEME_CR = (
+ # Source: GraphemeBreakProperty-17.0.0.txt
+ # Date: 2025-06-30, 06:20:23 GMT
+ #
+ (0x0000d, 0x0000d,), # (nil)
+)
+
+GRAPHEME_LF = (
+ # Source: GraphemeBreakProperty-17.0.0.txt
+ # Date: 2025-06-30, 06:20:23 GMT
+ #
+ (0x0000a, 0x0000a,), # (nil)
+)
+
+GRAPHEME_CONTROL = (
+ # Source: GraphemeBreakProperty-17.0.0.txt
+ # Date: 2025-06-30, 06:20:23 GMT
+ #
+ (0x00000, 0x00009,), # (nil)
+ (0x0000b, 0x0000c,), # (nil)
+ (0x0000e, 0x0001f,), # (nil)
+ (0x0007f, 0x0009f,), # (nil)
+ (0x000ad, 0x000ad,), # Soft Hyphen
+ (0x0061c, 0x0061c,), # Arabic Letter Mark
+ (0x0180e, 0x0180e,), # Mongolian Vowel Separator
+ (0x0200b, 0x0200b,), # Zero Width Space
+ (0x0200e, 0x0200f,), # Left-to-right Mark ..Right-to-left Mark
+ (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override
+ (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes
+ (0x0feff, 0x0feff,), # Zero Width No-break Space
+ (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T
+ (0x13430, 0x1343f,), # Egyptian Hieroglyph Vert..Egyptian Hieroglyph End
+ (0x1bca0, 0x1bca3,), # Shorthand Format Letter ..Shorthand Format Up Step
+ (0x1d173, 0x1d17a,), # Musical Symbol Begin Bea..Musical Symbol End Phras
+ (0xe0000, 0xe001f,), # (nil)
+ (0xe0080, 0xe00ff,), # (nil)
+ (0xe01f0, 0xe0fff,), # (nil)
+)
+
+GRAPHEME_EXTEND = (
+ # Source: GraphemeBreakProperty-17.0.0.txt
+ # Date: 2025-06-30, 06:20:23 GMT
+ #
+ (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le
+ (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli
+ (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg
+ (0x005bf, 0x005bf,), # Hebrew Point Rafe
+ (0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot
+ (0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot
+ (0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan
+ (0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra
+ (0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below
+ (0x00670, 0x00670,), # Arabic Letter Superscript Alef
+ (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen
+ (0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda
+ (0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon
+ (0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem
+ (0x00711, 0x00711,), # Syriac Letter Superscript Alaph
+ (0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh
+ (0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun
+ (0x007eb, 0x007f3,), # Nko Combining Short High..Nko Combining Double Dot
+ (0x007fd, 0x007fd,), # Nko Dantayalan
+ (0x00816, 0x00819,), # Samaritan Mark In ..Samaritan Mark Dagesh
+ (0x0081b, 0x00823,), # Samaritan Mark Epentheti..Samaritan Vowel Sign A
+ (0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U
+ (0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa
+ (0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark
+ (0x00897, 0x0089f,), # (nil) ..Arabic Half Madda Over M
+ (0x008ca, 0x008e1,), # Arabic Small High Farsi ..Arabic Small High Sign S
+ (0x008e3, 0x00902,), # Arabic Turned Damma Belo..Devanagari Sign Anusvara
+ (0x0093a, 0x0093a,), # Devanagari Vowel Sign Oe
+ (0x0093c, 0x0093c,), # Devanagari Sign Nukta
+ (0x00941, 0x00948,), # Devanagari Vowel Sign U ..Devanagari Vowel Sign Ai
+ (0x0094d, 0x0094d,), # Devanagari Sign Virama
+ (0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu
+ (0x00962, 0x00963,), # Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo
+ (0x00981, 0x00981,), # Bengali Sign Candrabindu
+ (0x009bc, 0x009bc,), # Bengali Sign Nukta
+ (0x009be, 0x009be,), # Bengali Vowel Sign Aa
+ (0x009c1, 0x009c4,), # Bengali Vowel Sign U ..Bengali Vowel Sign Vocal
+ (0x009cd, 0x009cd,), # Bengali Sign Virama
+ (0x009d7, 0x009d7,), # Bengali Au Length Mark
+ (0x009e2, 0x009e3,), # Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal
+ (0x009fe, 0x009fe,), # Bengali Sandhi Mark
+ (0x00a01, 0x00a02,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Bindi
+ (0x00a3c, 0x00a3c,), # Gurmukhi Sign Nukta
+ (0x00a41, 0x00a42,), # Gurmukhi Vowel Sign U ..Gurmukhi Vowel Sign Uu
+ (0x00a47, 0x00a48,), # Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai
+ (0x00a4b, 0x00a4d,), # Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama
+ (0x00a51, 0x00a51,), # Gurmukhi Sign Udaat
+ (0x00a70, 0x00a71,), # Gurmukhi Tippi ..Gurmukhi Addak
+ (0x00a75, 0x00a75,), # Gurmukhi Sign Yakash
+ (0x00a81, 0x00a82,), # Gujarati Sign Candrabind..Gujarati Sign Anusvara
+ (0x00abc, 0x00abc,), # Gujarati Sign Nukta
+ (0x00ac1, 0x00ac5,), # Gujarati Vowel Sign U ..Gujarati Vowel Sign Cand
+ (0x00ac7, 0x00ac8,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Ai
+ (0x00acd, 0x00acd,), # Gujarati Sign Virama
+ (0x00ae2, 0x00ae3,), # Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca
+ (0x00afa, 0x00aff,), # Gujarati Sign Sukun ..Gujarati Sign Two-circle
+ (0x00b01, 0x00b01,), # Oriya Sign Candrabindu
+ (0x00b3c, 0x00b3c,), # Oriya Sign Nukta
+ (0x00b3e, 0x00b3f,), # Oriya Vowel Sign Aa ..Oriya Vowel Sign I
+ (0x00b41, 0x00b44,), # Oriya Vowel Sign U ..Oriya Vowel Sign Vocalic
+ (0x00b4d, 0x00b4d,), # Oriya Sign Virama
+ (0x00b55, 0x00b57,), # Oriya Sign Overline ..Oriya Au Length Mark
+ (0x00b62, 0x00b63,), # Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic
+ (0x00b82, 0x00b82,), # Tamil Sign Anusvara
+ (0x00bbe, 0x00bbe,), # Tamil Vowel Sign Aa
+ (0x00bc0, 0x00bc0,), # Tamil Vowel Sign Ii
+ (0x00bcd, 0x00bcd,), # Tamil Sign Virama
+ (0x00bd7, 0x00bd7,), # Tamil Au Length Mark
+ (0x00c00, 0x00c00,), # Telugu Sign Combining Candrabindu Above
+ (0x00c04, 0x00c04,), # Telugu Sign Combining Anusvara Above
+ (0x00c3c, 0x00c3c,), # Telugu Sign Nukta
+ (0x00c3e, 0x00c40,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Ii
+ (0x00c46, 0x00c48,), # Telugu Vowel Sign E ..Telugu Vowel Sign Ai
+ (0x00c4a, 0x00c4d,), # Telugu Vowel Sign O ..Telugu Sign Virama
+ (0x00c55, 0x00c56,), # Telugu Length Mark ..Telugu Ai Length Mark
+ (0x00c62, 0x00c63,), # Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali
+ (0x00c81, 0x00c81,), # Kannada Sign Candrabindu
+ (0x00cbc, 0x00cbc,), # Kannada Sign Nukta
+ (0x00cbf, 0x00cc0,), # Kannada Vowel Sign I ..Kannada Vowel Sign Ii
+ (0x00cc2, 0x00cc2,), # Kannada Vowel Sign Uu
+ (0x00cc6, 0x00cc8,), # Kannada Vowel Sign E ..Kannada Vowel Sign Ai
+ (0x00cca, 0x00ccd,), # Kannada Vowel Sign O ..Kannada Sign Virama
+ (0x00cd5, 0x00cd6,), # Kannada Length Mark ..Kannada Ai Length Mark
+ (0x00ce2, 0x00ce3,), # Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal
+ (0x00d00, 0x00d01,), # Malayalam Sign Combining..Malayalam Sign Candrabin
+ (0x00d3b, 0x00d3c,), # Malayalam Sign Vertical ..Malayalam Sign Circular
+ (0x00d3e, 0x00d3e,), # Malayalam Vowel Sign Aa
+ (0x00d41, 0x00d44,), # Malayalam Vowel Sign U ..Malayalam Vowel Sign Voc
+ (0x00d4d, 0x00d4d,), # Malayalam Sign Virama
+ (0x00d57, 0x00d57,), # Malayalam Au Length Mark
+ (0x00d62, 0x00d63,), # Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc
+ (0x00d81, 0x00d81,), # Sinhala Sign Candrabindu
+ (0x00dca, 0x00dca,), # Sinhala Sign Al-lakuna
+ (0x00dcf, 0x00dcf,), # Sinhala Vowel Sign Aela-pilla
+ (0x00dd2, 0x00dd4,), # Sinhala Vowel Sign Ketti..Sinhala Vowel Sign Ketti
+ (0x00dd6, 0x00dd6,), # Sinhala Vowel Sign Diga Paa-pilla
+ (0x00ddf, 0x00ddf,), # Sinhala Vowel Sign Gayanukitta
+ (0x00e31, 0x00e31,), # Thai Character Mai Han-akat
+ (0x00e34, 0x00e3a,), # Thai Character Sara I ..Thai Character Phinthu
+ (0x00e47, 0x00e4e,), # Thai Character Maitaikhu..Thai Character Yamakkan
+ (0x00eb1, 0x00eb1,), # Lao Vowel Sign Mai Kan
+ (0x00eb4, 0x00ebc,), # Lao Vowel Sign I ..Lao Semivowel Sign Lo
+ (0x00ec8, 0x00ece,), # Lao Tone Mai Ek ..Lao Yamakkan
+ (0x00f18, 0x00f19,), # Tibetan Astrological Sig..Tibetan Astrological Sig
+ (0x00f35, 0x00f35,), # Tibetan Mark Ngas Bzung Nyi Zla
+ (0x00f37, 0x00f37,), # Tibetan Mark Ngas Bzung Sgor Rtags
+ (0x00f39, 0x00f39,), # Tibetan Mark Tsa -phru
+ (0x00f71, 0x00f7e,), # Tibetan Vowel Sign Aa ..Tibetan Sign Rjes Su Nga
+ (0x00f80, 0x00f84,), # Tibetan Vowel Sign Rever..Tibetan Mark Halanta
+ (0x00f86, 0x00f87,), # Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags
+ (0x00f8d, 0x00f97,), # Tibetan Subjoined Sign L..Tibetan Subjoined Letter
+ (0x00f99, 0x00fbc,), # Tibetan Subjoined Letter..Tibetan Subjoined Letter
+ (0x00fc6, 0x00fc6,), # Tibetan Symbol Padma Gdan
+ (0x0102d, 0x01030,), # Myanmar Vowel Sign I ..Myanmar Vowel Sign Uu
+ (0x01032, 0x01037,), # Myanmar Vowel Sign Ai ..Myanmar Sign Dot Below
+ (0x01039, 0x0103a,), # Myanmar Sign Virama ..Myanmar Sign Asat
+ (0x0103d, 0x0103e,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M
+ (0x01058, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal
+ (0x0105e, 0x01060,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M
+ (0x01071, 0x01074,), # Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah
+ (0x01082, 0x01082,), # Myanmar Consonant Sign Shan Medial Wa
+ (0x01085, 0x01086,), # Myanmar Vowel Sign Shan ..Myanmar Vowel Sign Shan
+ (0x0108d, 0x0108d,), # Myanmar Sign Shan Council Emphatic Tone
+ (0x0109d, 0x0109d,), # Myanmar Vowel Sign Aiton Ai
+ (0x0135d, 0x0135f,), # Ethiopic Combining Gemin..Ethiopic Combining Gemin
+ (0x01712, 0x01715,), # Tagalog Vowel Sign I ..Tagalog Sign Pamudpod
+ (0x01732, 0x01734,), # Hanunoo Vowel Sign I ..Hanunoo Sign Pamudpod
+ (0x01752, 0x01753,), # Buhid Vowel Sign I ..Buhid Vowel Sign U
+ (0x01772, 0x01773,), # Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U
+ (0x017b4, 0x017b5,), # Khmer Vowel Inherent Aq ..Khmer Vowel Inherent Aa
+ (0x017b7, 0x017bd,), # Khmer Vowel Sign I ..Khmer Vowel Sign Ua
+ (0x017c6, 0x017c6,), # Khmer Sign Nikahit
+ (0x017c9, 0x017d3,), # Khmer Sign Muusikatoan ..Khmer Sign Bathamasat
+ (0x017dd, 0x017dd,), # Khmer Sign Atthacan
+ (0x0180b, 0x0180d,), # Mongolian Free Variation..Mongolian Free Variation
+ (0x0180f, 0x0180f,), # Mongolian Free Variation Selector Four
+ (0x01885, 0x01886,), # Mongolian Letter Ali Gal..Mongolian Letter Ali Gal
+ (0x018a9, 0x018a9,), # Mongolian Letter Ali Gali Dagalga
+ (0x01920, 0x01922,), # Limbu Vowel Sign A ..Limbu Vowel Sign U
+ (0x01927, 0x01928,), # Limbu Vowel Sign E ..Limbu Vowel Sign O
+ (0x01932, 0x01932,), # Limbu Small Letter Anusvara
+ (0x01939, 0x0193b,), # Limbu Sign Mukphreng ..Limbu Sign Sa-i
+ (0x01a17, 0x01a18,), # Buginese Vowel Sign I ..Buginese Vowel Sign U
+ (0x01a1b, 0x01a1b,), # Buginese Vowel Sign Ae
+ (0x01a56, 0x01a56,), # Tai Tham Consonant Sign Medial La
+ (0x01a58, 0x01a5e,), # Tai Tham Sign Mai Kang L..Tai Tham Consonant Sign
+ (0x01a60, 0x01a60,), # Tai Tham Sign Sakot
+ (0x01a62, 0x01a62,), # Tai Tham Vowel Sign Mai Sat
+ (0x01a65, 0x01a6c,), # Tai Tham Vowel Sign I ..Tai Tham Vowel Sign Oa B
+ (0x01a73, 0x01a7c,), # Tai Tham Vowel Sign Oa A..Tai Tham Sign Khuen-lue
+ (0x01a7f, 0x01a7f,), # Tai Tham Combining Cryptogrammic Dot
+ (0x01ab0, 0x01add,), # Combining Doubled Circum..(nil)
+ (0x01ae0, 0x01aeb,), # (nil)
+ (0x01b00, 0x01b03,), # Balinese Sign Ulu Ricem ..Balinese Sign Surang
+ (0x01b34, 0x01b3d,), # Balinese Sign Rerekan ..Balinese Vowel Sign La L
+ (0x01b42, 0x01b44,), # Balinese Vowel Sign Pepe..Balinese Adeg Adeg
+ (0x01b6b, 0x01b73,), # Balinese Musical Symbol ..Balinese Musical Symbol
+ (0x01b80, 0x01b81,), # Sundanese Sign Panyecek ..Sundanese Sign Panglayar
+ (0x01ba2, 0x01ba5,), # Sundanese Consonant Sign..Sundanese Vowel Sign Pan
+ (0x01ba8, 0x01bad,), # Sundanese Vowel Sign Pam..Sundanese Consonant Sign
+ (0x01be6, 0x01be6,), # Batak Sign Tompi
+ (0x01be8, 0x01be9,), # Batak Vowel Sign Pakpak ..Batak Vowel Sign Ee
+ (0x01bed, 0x01bed,), # Batak Vowel Sign Karo O
+ (0x01bef, 0x01bf3,), # Batak Vowel Sign U For S..Batak Panongonan
+ (0x01c2c, 0x01c33,), # Lepcha Vowel Sign E ..Lepcha Consonant Sign T
+ (0x01c36, 0x01c37,), # Lepcha Sign Ran ..Lepcha Sign Nukta
+ (0x01cd0, 0x01cd2,), # Vedic Tone Karshana ..Vedic Tone Prenkha
+ (0x01cd4, 0x01ce0,), # Vedic Sign Yajurvedic Mi..Vedic Tone Rigvedic Kash
+ (0x01ce2, 0x01ce8,), # Vedic Sign Visarga Svari..Vedic Sign Visarga Anuda
+ (0x01ced, 0x01ced,), # Vedic Sign Tiryak
+ (0x01cf4, 0x01cf4,), # Vedic Tone Candra Above
+ (0x01cf8, 0x01cf9,), # Vedic Tone Ring Above ..Vedic Tone Double Ring A
+ (0x01dc0, 0x01dff,), # Combining Dotted Grave A..Combining Right Arrowhea
+ (0x0200c, 0x0200c,), # Zero Width Non-joiner
+ (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above
+ (0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu
+ (0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner
+ (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette
+ (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M
+ (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag
+ (0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous
+ (0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer
+ (0x0a69e, 0x0a69f,), # Combining Cyrillic Lette..Combining Cyrillic Lette
+ (0x0a6f0, 0x0a6f1,), # Bamum Combining Mark Koq..Bamum Combining Mark Tuk
+ (0x0a802, 0x0a802,), # Syloti Nagri Sign Dvisvara
+ (0x0a806, 0x0a806,), # Syloti Nagri Sign Hasanta
+ (0x0a80b, 0x0a80b,), # Syloti Nagri Sign Anusvara
+ (0x0a825, 0x0a826,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign
+ (0x0a82c, 0x0a82c,), # Syloti Nagri Sign Alternate Hasanta
+ (0x0a8c4, 0x0a8c5,), # Saurashtra Sign Virama ..Saurashtra Sign Candrabi
+ (0x0a8e0, 0x0a8f1,), # Combining Devanagari Dig..Combining Devanagari Sig
+ (0x0a8ff, 0x0a8ff,), # Devanagari Vowel Sign Ay
+ (0x0a926, 0x0a92d,), # Kayah Li Vowel Ue ..Kayah Li Tone Calya Plop
+ (0x0a947, 0x0a951,), # Rejang Vowel Sign I ..Rejang Consonant Sign R
+ (0x0a953, 0x0a953,), # Rejang Virama
+ (0x0a980, 0x0a982,), # Javanese Sign Panyangga ..Javanese Sign Layar
+ (0x0a9b3, 0x0a9b3,), # Javanese Sign Cecak Telu
+ (0x0a9b6, 0x0a9b9,), # Javanese Vowel Sign Wulu..Javanese Vowel Sign Suku
+ (0x0a9bc, 0x0a9bd,), # Javanese Vowel Sign Pepe..Javanese Consonant Sign
+ (0x0a9c0, 0x0a9c0,), # Javanese Pangkon
+ (0x0a9e5, 0x0a9e5,), # Myanmar Sign Shan Saw
+ (0x0aa29, 0x0aa2e,), # Cham Vowel Sign Aa ..Cham Vowel Sign Oe
+ (0x0aa31, 0x0aa32,), # Cham Vowel Sign Au ..Cham Vowel Sign Ue
+ (0x0aa35, 0x0aa36,), # Cham Consonant Sign La ..Cham Consonant Sign Wa
+ (0x0aa43, 0x0aa43,), # Cham Consonant Sign Final Ng
+ (0x0aa4c, 0x0aa4c,), # Cham Consonant Sign Final M
+ (0x0aa7c, 0x0aa7c,), # Myanmar Sign Tai Laing Tone-2
+ (0x0aab0, 0x0aab0,), # Tai Viet Mai Kang
+ (0x0aab2, 0x0aab4,), # Tai Viet Vowel I ..Tai Viet Vowel U
+ (0x0aab7, 0x0aab8,), # Tai Viet Mai Khit ..Tai Viet Vowel Ia
+ (0x0aabe, 0x0aabf,), # Tai Viet Vowel Am ..Tai Viet Tone Mai Ek
+ (0x0aac1, 0x0aac1,), # Tai Viet Tone Mai Tho
+ (0x0aaec, 0x0aaed,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign
+ (0x0aaf6, 0x0aaf6,), # Meetei Mayek Virama
+ (0x0abe5, 0x0abe5,), # Meetei Mayek Vowel Sign Anap
+ (0x0abe8, 0x0abe8,), # Meetei Mayek Vowel Sign Unap
+ (0x0abed, 0x0abed,), # Meetei Mayek Apun Iyek
+ (0x0fb1e, 0x0fb1e,), # Hebrew Point Judeo-spanish Varika
+ (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16
+ (0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo
+ (0x0ff9e, 0x0ff9f,), # Halfwidth Katakana Voice..Halfwidth Katakana Semi-
+ (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke
+ (0x102e0, 0x102e0,), # Coptic Epact Thousands Mark
+ (0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let
+ (0x10a01, 0x10a03,), # Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo
+ (0x10a05, 0x10a06,), # Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O
+ (0x10a0c, 0x10a0f,), # Kharoshthi Vowel Length ..Kharoshthi Sign Visarga
+ (0x10a38, 0x10a3a,), # Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo
+ (0x10a3f, 0x10a3f,), # Kharoshthi Virama
+ (0x10ae5, 0x10ae6,), # Manichaean Abbreviation ..Manichaean Abbreviation
+ (0x10d24, 0x10d27,), # Hanifi Rohingya Sign Har..Hanifi Rohingya Sign Tas
+ (0x10d69, 0x10d6d,), # (nil)
+ (0x10eab, 0x10eac,), # Yezidi Combining Hamza M..Yezidi Combining Madda M
+ (0x10efa, 0x10eff,), # (nil) ..Arabic Small Low Word Ma
+ (0x10f46, 0x10f50,), # Sogdian Combining Dot Be..Sogdian Combining Stroke
+ (0x10f82, 0x10f85,), # Old Uyghur Combining Dot..Old Uyghur Combining Two
+ (0x11001, 0x11001,), # Brahmi Sign Anusvara
+ (0x11038, 0x11046,), # Brahmi Vowel Sign Aa ..Brahmi Virama
+ (0x11070, 0x11070,), # Brahmi Sign Old Tamil Virama
+ (0x11073, 0x11074,), # Brahmi Vowel Sign Old Ta..Brahmi Vowel Sign Old Ta
+ (0x1107f, 0x11081,), # Brahmi Number Joiner ..Kaithi Sign Anusvara
+ (0x110b3, 0x110b6,), # Kaithi Vowel Sign U ..Kaithi Vowel Sign Ai
+ (0x110b9, 0x110ba,), # Kaithi Sign Virama ..Kaithi Sign Nukta
+ (0x110c2, 0x110c2,), # Kaithi Vowel Sign Vocalic R
+ (0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga
+ (0x11127, 0x1112b,), # Chakma Vowel Sign A ..Chakma Vowel Sign Uu
+ (0x1112d, 0x11134,), # Chakma Vowel Sign Ai ..Chakma Maayyaa
+ (0x11173, 0x11173,), # Mahajani Sign Nukta
+ (0x11180, 0x11181,), # Sharada Sign Candrabindu..Sharada Sign Anusvara
+ (0x111b6, 0x111be,), # Sharada Vowel Sign U ..Sharada Vowel Sign O
+ (0x111c0, 0x111c0,), # Sharada Sign Virama
+ (0x111c9, 0x111cc,), # Sharada Sandhi Mark ..Sharada Extra Short Vowe
+ (0x111cf, 0x111cf,), # Sharada Sign Inverted Candrabindu
+ (0x1122f, 0x11231,), # Khojki Vowel Sign U ..Khojki Vowel Sign Ai
+ (0x11234, 0x11237,), # Khojki Sign Anusvara ..Khojki Sign Shadda
+ (0x1123e, 0x1123e,), # Khojki Sign Sukun
+ (0x11241, 0x11241,), # Khojki Vowel Sign Vocalic R
+ (0x112df, 0x112df,), # Khudawadi Sign Anusvara
+ (0x112e3, 0x112ea,), # Khudawadi Vowel Sign U ..Khudawadi Sign Virama
+ (0x11300, 0x11301,), # Grantha Sign Combining A..Grantha Sign Candrabindu
+ (0x1133b, 0x1133c,), # Combining Bindu Below ..Grantha Sign Nukta
+ (0x1133e, 0x1133e,), # Grantha Vowel Sign Aa
+ (0x11340, 0x11340,), # Grantha Vowel Sign Ii
+ (0x1134d, 0x1134d,), # Grantha Sign Virama
+ (0x11357, 0x11357,), # Grantha Au Length Mark
+ (0x11366, 0x1136c,), # Combining Grantha Digit ..Combining Grantha Digit
+ (0x11370, 0x11374,), # Combining Grantha Letter..Combining Grantha Letter
+ (0x113b8, 0x113b8,), # (nil)
+ (0x113bb, 0x113c0,), # (nil)
+ (0x113c2, 0x113c2,), # (nil)
+ (0x113c5, 0x113c5,), # (nil)
+ (0x113c7, 0x113c9,), # (nil)
+ (0x113ce, 0x113d0,), # (nil)
+ (0x113d2, 0x113d2,), # (nil)
+ (0x113e1, 0x113e2,), # (nil)
+ (0x11438, 0x1143f,), # Newa Vowel Sign U ..Newa Vowel Sign Ai
+ (0x11442, 0x11444,), # Newa Sign Virama ..Newa Sign Anusvara
+ (0x11446, 0x11446,), # Newa Sign Nukta
+ (0x1145e, 0x1145e,), # Newa Sandhi Mark
+ (0x114b0, 0x114b0,), # Tirhuta Vowel Sign Aa
+ (0x114b3, 0x114b8,), # Tirhuta Vowel Sign U ..Tirhuta Vowel Sign Vocal
+ (0x114ba, 0x114ba,), # Tirhuta Vowel Sign Short E
+ (0x114bd, 0x114bd,), # Tirhuta Vowel Sign Short O
+ (0x114bf, 0x114c0,), # Tirhuta Sign Candrabindu..Tirhuta Sign Anusvara
+ (0x114c2, 0x114c3,), # Tirhuta Sign Virama ..Tirhuta Sign Nukta
+ (0x115af, 0x115af,), # Siddham Vowel Sign Aa
+ (0x115b2, 0x115b5,), # Siddham Vowel Sign U ..Siddham Vowel Sign Vocal
+ (0x115bc, 0x115bd,), # Siddham Sign Candrabindu..Siddham Sign Anusvara
+ (0x115bf, 0x115c0,), # Siddham Sign Virama ..Siddham Sign Nukta
+ (0x115dc, 0x115dd,), # Siddham Vowel Sign Alter..Siddham Vowel Sign Alter
+ (0x11633, 0x1163a,), # Modi Vowel Sign U ..Modi Vowel Sign Ai
+ (0x1163d, 0x1163d,), # Modi Sign Anusvara
+ (0x1163f, 0x11640,), # Modi Sign Virama ..Modi Sign Ardhacandra
+ (0x116ab, 0x116ab,), # Takri Sign Anusvara
+ (0x116ad, 0x116ad,), # Takri Vowel Sign Aa
+ (0x116b0, 0x116b7,), # Takri Vowel Sign U ..Takri Sign Nukta
+ (0x1171d, 0x1171d,), # Ahom Consonant Sign Medial La
+ (0x1171f, 0x1171f,), # Ahom Consonant Sign Medial Ligating Ra
+ (0x11722, 0x11725,), # Ahom Vowel Sign I ..Ahom Vowel Sign Uu
+ (0x11727, 0x1172b,), # Ahom Vowel Sign Aw ..Ahom Sign Killer
+ (0x1182f, 0x11837,), # Dogra Vowel Sign U ..Dogra Sign Anusvara
+ (0x11839, 0x1183a,), # Dogra Sign Virama ..Dogra Sign Nukta
+ (0x11930, 0x11930,), # Dives Akuru Vowel Sign Aa
+ (0x1193b, 0x1193e,), # Dives Akuru Sign Anusvar..Dives Akuru Virama
+ (0x11943, 0x11943,), # Dives Akuru Sign Nukta
+ (0x119d4, 0x119d7,), # Nandinagari Vowel Sign U..Nandinagari Vowel Sign V
+ (0x119da, 0x119db,), # Nandinagari Vowel Sign E..Nandinagari Vowel Sign A
+ (0x119e0, 0x119e0,), # Nandinagari Sign Virama
+ (0x11a01, 0x11a0a,), # Zanabazar Square Vowel S..Zanabazar Square Vowel L
+ (0x11a33, 0x11a38,), # Zanabazar Square Final C..Zanabazar Square Sign An
+ (0x11a3b, 0x11a3e,), # Zanabazar Square Cluster..Zanabazar Square Cluster
+ (0x11a47, 0x11a47,), # Zanabazar Square Subjoiner
+ (0x11a51, 0x11a56,), # Soyombo Vowel Sign I ..Soyombo Vowel Sign Oe
+ (0x11a59, 0x11a5b,), # Soyombo Vowel Sign Vocal..Soyombo Vowel Length Mar
+ (0x11a8a, 0x11a96,), # Soyombo Final Consonant ..Soyombo Sign Anusvara
+ (0x11a98, 0x11a99,), # Soyombo Gemination Mark ..Soyombo Subjoiner
+ (0x11b60, 0x11b60,), # (nil)
+ (0x11b62, 0x11b64,), # (nil)
+ (0x11b66, 0x11b66,), # (nil)
+ (0x11c30, 0x11c36,), # Bhaiksuki Vowel Sign I ..Bhaiksuki Vowel Sign Voc
+ (0x11c38, 0x11c3d,), # Bhaiksuki Vowel Sign E ..Bhaiksuki Sign Anusvara
+ (0x11c3f, 0x11c3f,), # Bhaiksuki Sign Virama
+ (0x11c92, 0x11ca7,), # Marchen Subjoined Letter..Marchen Subjoined Letter
+ (0x11caa, 0x11cb0,), # Marchen Subjoined Letter..Marchen Vowel Sign Aa
+ (0x11cb2, 0x11cb3,), # Marchen Vowel Sign U ..Marchen Vowel Sign E
+ (0x11cb5, 0x11cb6,), # Marchen Sign Anusvara ..Marchen Sign Candrabindu
+ (0x11d31, 0x11d36,), # Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign
+ (0x11d3a, 0x11d3a,), # Masaram Gondi Vowel Sign E
+ (0x11d3c, 0x11d3d,), # Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign
+ (0x11d3f, 0x11d45,), # Masaram Gondi Vowel Sign..Masaram Gondi Virama
+ (0x11d47, 0x11d47,), # Masaram Gondi Ra-kara
+ (0x11d90, 0x11d91,), # Gunjala Gondi Vowel Sign..Gunjala Gondi Vowel Sign
+ (0x11d95, 0x11d95,), # Gunjala Gondi Sign Anusvara
+ (0x11d97, 0x11d97,), # Gunjala Gondi Virama
+ (0x11ef3, 0x11ef4,), # Makasar Vowel Sign I ..Makasar Vowel Sign U
+ (0x11f00, 0x11f01,), # Kawi Sign Candrabindu ..Kawi Sign Anusvara
+ (0x11f36, 0x11f3a,), # Kawi Vowel Sign I ..Kawi Vowel Sign Vocalic
+ (0x11f40, 0x11f42,), # Kawi Vowel Sign Eu ..Kawi Conjoiner
+ (0x11f5a, 0x11f5a,), # (nil)
+ (0x13440, 0x13440,), # Egyptian Hieroglyph Mirror Horizontally
+ (0x13447, 0x13455,), # Egyptian Hieroglyph Modi..Egyptian Hieroglyph Modi
+ (0x1611e, 0x16129,), # (nil)
+ (0x1612d, 0x1612f,), # (nil)
+ (0x16af0, 0x16af4,), # Bassa Vah Combining High..Bassa Vah Combining High
+ (0x16b30, 0x16b36,), # Pahawh Hmong Mark Cim Tu..Pahawh Hmong Mark Cim Ta
+ (0x16f4f, 0x16f4f,), # Miao Sign Consonant Modifier Bar
+ (0x16f8f, 0x16f92,), # Miao Tone Right ..Miao Tone Below
+ (0x16fe4, 0x16fe4,), # Khitan Small Script Filler
+ (0x16ff0, 0x16ff1,), # Vietnamese Alternate Rea..Vietnamese Alternate Rea
+ (0x1bc9d, 0x1bc9e,), # Duployan Thick Letter Se..Duployan Double Mark
+ (0x1cf00, 0x1cf2d,), # Znamenny Combining Mark ..Znamenny Combining Mark
+ (0x1cf30, 0x1cf46,), # Znamenny Combining Tonal..Znamenny Priznak Modifie
+ (0x1d165, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining
+ (0x1d16d, 0x1d172,), # Musical Symbol Combining..Musical Symbol Combining
+ (0x1d17b, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining
+ (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining
+ (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining
+ (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical
+ (0x1da00, 0x1da36,), # Signwriting Head Rim ..Signwriting Air Sucking
+ (0x1da3b, 0x1da6c,), # Signwriting Mouth Closed..Signwriting Excitement
+ (0x1da75, 0x1da75,), # Signwriting Upper Body Tilting From Hip Joints
+ (0x1da84, 0x1da84,), # Signwriting Location Head Neck
+ (0x1da9b, 0x1da9f,), # Signwriting Fill Modifie..Signwriting Fill Modifie
+ (0x1daa1, 0x1daaf,), # Signwriting Rotation Mod..Signwriting Rotation Mod
+ (0x1e000, 0x1e006,), # Combining Glagolitic Let..Combining Glagolitic Let
+ (0x1e008, 0x1e018,), # Combining Glagolitic Let..Combining Glagolitic Let
+ (0x1e01b, 0x1e021,), # Combining Glagolitic Let..Combining Glagolitic Let
+ (0x1e023, 0x1e024,), # Combining Glagolitic Let..Combining Glagolitic Let
+ (0x1e026, 0x1e02a,), # Combining Glagolitic Let..Combining Glagolitic Let
+ (0x1e08f, 0x1e08f,), # Combining Cyrillic Small Letter Byelorussian-ukr
+ (0x1e130, 0x1e136,), # Nyiakeng Puachue Hmong T..Nyiakeng Puachue Hmong T
+ (0x1e2ae, 0x1e2ae,), # Toto Sign Rising Tone
+ (0x1e2ec, 0x1e2ef,), # Wancho Tone Tup ..Wancho Tone Koini
+ (0x1e4ec, 0x1e4ef,), # Nag Mundari Sign Muhor ..Nag Mundari Sign Sutuh
+ (0x1e5ee, 0x1e5ef,), # (nil)
+ (0x1e6e3, 0x1e6e3,), # (nil)
+ (0x1e6e6, 0x1e6e6,), # (nil)
+ (0x1e6ee, 0x1e6ef,), # (nil)
+ (0x1e6f5, 0x1e6f5,), # (nil)
+ (0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining
+ (0x1e944, 0x1e94a,), # Adlam Alif Lengthener ..Adlam Nukta
+ (0x1f3fb, 0x1f3ff,), # Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri
+ (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag
+ (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256
+)
+
+GRAPHEME_ZWJ = (
+ # Source: GraphemeBreakProperty-17.0.0.txt
+ # Date: 2025-06-30, 06:20:23 GMT
+ #
+ (0x0200d, 0x0200d,), # Zero Width Joiner
+)
+
+GRAPHEME_REGIONAL_INDICATOR = (
+ # Source: GraphemeBreakProperty-17.0.0.txt
+ # Date: 2025-06-30, 06:20:23 GMT
+ #
+ (0x1f1e6, 0x1f1ff,), # Regional Indicator Symbo..Regional Indicator Symbo
+)
+
+GRAPHEME_PREPEND = (
+ # Source: GraphemeBreakProperty-17.0.0.txt
+ # Date: 2025-06-30, 06:20:23 GMT
+ #
+ (0x00600, 0x00605,), # Arabic Number Sign ..Arabic Number Mark Above
+ (0x006dd, 0x006dd,), # Arabic End Of Ayah
+ (0x0070f, 0x0070f,), # Syriac Abbreviation Mark
+ (0x00890, 0x00891,), # Arabic Pound Mark Above ..Arabic Piastre Mark Abov
+ (0x008e2, 0x008e2,), # Arabic Disputed End Of Ayah
+ (0x00d4e, 0x00d4e,), # Malayalam Letter Dot Reph
+ (0x110bd, 0x110bd,), # Kaithi Number Sign
+ (0x110cd, 0x110cd,), # Kaithi Number Sign Above
+ (0x111c2, 0x111c3,), # Sharada Sign Jihvamuliya..Sharada Sign Upadhmaniya
+ (0x113d1, 0x113d1,), # (nil)
+ (0x1193f, 0x1193f,), # Dives Akuru Prefixed Nasal Sign
+ (0x11941, 0x11941,), # Dives Akuru Initial Ra
+ (0x11a84, 0x11a89,), # Soyombo Sign Jihvamuliya..Soyombo Cluster-initial
+ (0x11d46, 0x11d46,), # Masaram Gondi Repha
+ (0x11f02, 0x11f02,), # Kawi Sign Repha
+)
+
+GRAPHEME_SPACINGMARK = (
+ # Source: GraphemeBreakProperty-17.0.0.txt
+ # Date: 2025-06-30, 06:20:23 GMT
+ #
+ (0x00903, 0x00903,), # Devanagari Sign Visarga
+ (0x0093b, 0x0093b,), # Devanagari Vowel Sign Ooe
+ (0x0093e, 0x00940,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Ii
+ (0x00949, 0x0094c,), # Devanagari Vowel Sign Ca..Devanagari Vowel Sign Au
+ (0x0094e, 0x0094f,), # Devanagari Vowel Sign Pr..Devanagari Vowel Sign Aw
+ (0x00982, 0x00983,), # Bengali Sign Anusvara ..Bengali Sign Visarga
+ (0x009bf, 0x009c0,), # Bengali Vowel Sign I ..Bengali Vowel Sign Ii
+ (0x009c7, 0x009c8,), # Bengali Vowel Sign E ..Bengali Vowel Sign Ai
+ (0x009cb, 0x009cc,), # Bengali Vowel Sign O ..Bengali Vowel Sign Au
+ (0x00a03, 0x00a03,), # Gurmukhi Sign Visarga
+ (0x00a3e, 0x00a40,), # Gurmukhi Vowel Sign Aa ..Gurmukhi Vowel Sign Ii
+ (0x00a83, 0x00a83,), # Gujarati Sign Visarga
+ (0x00abe, 0x00ac0,), # Gujarati Vowel Sign Aa ..Gujarati Vowel Sign Ii
+ (0x00ac9, 0x00ac9,), # Gujarati Vowel Sign Candra O
+ (0x00acb, 0x00acc,), # Gujarati Vowel Sign O ..Gujarati Vowel Sign Au
+ (0x00b02, 0x00b03,), # Oriya Sign Anusvara ..Oriya Sign Visarga
+ (0x00b40, 0x00b40,), # Oriya Vowel Sign Ii
+ (0x00b47, 0x00b48,), # Oriya Vowel Sign E ..Oriya Vowel Sign Ai
+ (0x00b4b, 0x00b4c,), # Oriya Vowel Sign O ..Oriya Vowel Sign Au
+ (0x00bbf, 0x00bbf,), # Tamil Vowel Sign I
+ (0x00bc1, 0x00bc2,), # Tamil Vowel Sign U ..Tamil Vowel Sign Uu
+ (0x00bc6, 0x00bc8,), # Tamil Vowel Sign E ..Tamil Vowel Sign Ai
+ (0x00bca, 0x00bcc,), # Tamil Vowel Sign O ..Tamil Vowel Sign Au
+ (0x00c01, 0x00c03,), # Telugu Sign Candrabindu ..Telugu Sign Visarga
+ (0x00c41, 0x00c44,), # Telugu Vowel Sign U ..Telugu Vowel Sign Vocali
+ (0x00c82, 0x00c83,), # Kannada Sign Anusvara ..Kannada Sign Visarga
+ (0x00cbe, 0x00cbe,), # Kannada Vowel Sign Aa
+ (0x00cc1, 0x00cc1,), # Kannada Vowel Sign U
+ (0x00cc3, 0x00cc4,), # Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal
+ (0x00cf3, 0x00cf3,), # Kannada Sign Combining Anusvara Above Right
+ (0x00d02, 0x00d03,), # Malayalam Sign Anusvara ..Malayalam Sign Visarga
+ (0x00d3f, 0x00d40,), # Malayalam Vowel Sign I ..Malayalam Vowel Sign Ii
+ (0x00d46, 0x00d48,), # Malayalam Vowel Sign E ..Malayalam Vowel Sign Ai
+ (0x00d4a, 0x00d4c,), # Malayalam Vowel Sign O ..Malayalam Vowel Sign Au
+ (0x00d82, 0x00d83,), # Sinhala Sign Anusvaraya ..Sinhala Sign Visargaya
+ (0x00dd0, 0x00dd1,), # Sinhala Vowel Sign Ketti..Sinhala Vowel Sign Diga
+ (0x00dd8, 0x00dde,), # Sinhala Vowel Sign Gaett..Sinhala Vowel Sign Kombu
+ (0x00df2, 0x00df3,), # Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga
+ (0x00e33, 0x00e33,), # Thai Character Sara Am
+ (0x00eb3, 0x00eb3,), # Lao Vowel Sign Am
+ (0x00f3e, 0x00f3f,), # Tibetan Sign Yar Tshes ..Tibetan Sign Mar Tshes
+ (0x00f7f, 0x00f7f,), # Tibetan Sign Rnam Bcad
+ (0x01031, 0x01031,), # Myanmar Vowel Sign E
+ (0x0103b, 0x0103c,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M
+ (0x01056, 0x01057,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal
+ (0x01084, 0x01084,), # Myanmar Vowel Sign Shan E
+ (0x017b6, 0x017b6,), # Khmer Vowel Sign Aa
+ (0x017be, 0x017c5,), # Khmer Vowel Sign Oe ..Khmer Vowel Sign Au
+ (0x017c7, 0x017c8,), # Khmer Sign Reahmuk ..Khmer Sign Yuukaleapintu
+ (0x01923, 0x01926,), # Limbu Vowel Sign Ee ..Limbu Vowel Sign Au
+ (0x01929, 0x0192b,), # Limbu Subjoined Letter Y..Limbu Subjoined Letter W
+ (0x01930, 0x01931,), # Limbu Small Letter Ka ..Limbu Small Letter Nga
+ (0x01933, 0x01938,), # Limbu Small Letter Ta ..Limbu Small Letter La
+ (0x01a19, 0x01a1a,), # Buginese Vowel Sign E ..Buginese Vowel Sign O
+ (0x01a55, 0x01a55,), # Tai Tham Consonant Sign Medial Ra
+ (0x01a57, 0x01a57,), # Tai Tham Consonant Sign La Tang Lai
+ (0x01a6d, 0x01a72,), # Tai Tham Vowel Sign Oy ..Tai Tham Vowel Sign Tham
+ (0x01b04, 0x01b04,), # Balinese Sign Bisah
+ (0x01b3e, 0x01b41,), # Balinese Vowel Sign Tali..Balinese Vowel Sign Tali
+ (0x01b82, 0x01b82,), # Sundanese Sign Pangwisad
+ (0x01ba1, 0x01ba1,), # Sundanese Consonant Sign Pamingkal
+ (0x01ba6, 0x01ba7,), # Sundanese Vowel Sign Pan..Sundanese Vowel Sign Pan
+ (0x01be7, 0x01be7,), # Batak Vowel Sign E
+ (0x01bea, 0x01bec,), # Batak Vowel Sign I ..Batak Vowel Sign O
+ (0x01bee, 0x01bee,), # Batak Vowel Sign U
+ (0x01c24, 0x01c2b,), # Lepcha Subjoined Letter ..Lepcha Vowel Sign Uu
+ (0x01c34, 0x01c35,), # Lepcha Consonant Sign Ny..Lepcha Consonant Sign Ka
+ (0x01ce1, 0x01ce1,), # Vedic Tone Atharvavedic Independent Svarita
+ (0x01cf7, 0x01cf7,), # Vedic Sign Atikrama
+ (0x0a823, 0x0a824,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign
+ (0x0a827, 0x0a827,), # Syloti Nagri Vowel Sign Oo
+ (0x0a880, 0x0a881,), # Saurashtra Sign Anusvara..Saurashtra Sign Visarga
+ (0x0a8b4, 0x0a8c3,), # Saurashtra Consonant Sig..Saurashtra Vowel Sign Au
+ (0x0a952, 0x0a952,), # Rejang Consonant Sign H
+ (0x0a983, 0x0a983,), # Javanese Sign Wignyan
+ (0x0a9b4, 0x0a9b5,), # Javanese Vowel Sign Taru..Javanese Vowel Sign Tolo
+ (0x0a9ba, 0x0a9bb,), # Javanese Vowel Sign Tali..Javanese Vowel Sign Dirg
+ (0x0a9be, 0x0a9bf,), # Javanese Consonant Sign ..Javanese Consonant Sign
+ (0x0aa2f, 0x0aa30,), # Cham Vowel Sign O ..Cham Vowel Sign Ai
+ (0x0aa33, 0x0aa34,), # Cham Consonant Sign Ya ..Cham Consonant Sign Ra
+ (0x0aa4d, 0x0aa4d,), # Cham Consonant Sign Final H
+ (0x0aaeb, 0x0aaeb,), # Meetei Mayek Vowel Sign Ii
+ (0x0aaee, 0x0aaef,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign
+ (0x0aaf5, 0x0aaf5,), # Meetei Mayek Vowel Sign Visarga
+ (0x0abe3, 0x0abe4,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign
+ (0x0abe6, 0x0abe7,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign
+ (0x0abe9, 0x0abea,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign
+ (0x0abec, 0x0abec,), # Meetei Mayek Lum Iyek
+ (0x11000, 0x11000,), # Brahmi Sign Candrabindu
+ (0x11002, 0x11002,), # Brahmi Sign Visarga
+ (0x11082, 0x11082,), # Kaithi Sign Visarga
+ (0x110b0, 0x110b2,), # Kaithi Vowel Sign Aa ..Kaithi Vowel Sign Ii
+ (0x110b7, 0x110b8,), # Kaithi Vowel Sign O ..Kaithi Vowel Sign Au
+ (0x1112c, 0x1112c,), # Chakma Vowel Sign E
+ (0x11145, 0x11146,), # Chakma Vowel Sign Aa ..Chakma Vowel Sign Ei
+ (0x11182, 0x11182,), # Sharada Sign Visarga
+ (0x111b3, 0x111b5,), # Sharada Vowel Sign Aa ..Sharada Vowel Sign Ii
+ (0x111bf, 0x111bf,), # Sharada Vowel Sign Au
+ (0x111ce, 0x111ce,), # Sharada Vowel Sign Prishthamatra E
+ (0x1122c, 0x1122e,), # Khojki Vowel Sign Aa ..Khojki Vowel Sign Ii
+ (0x11232, 0x11233,), # Khojki Vowel Sign O ..Khojki Vowel Sign Au
+ (0x112e0, 0x112e2,), # Khudawadi Vowel Sign Aa ..Khudawadi Vowel Sign Ii
+ (0x11302, 0x11303,), # Grantha Sign Anusvara ..Grantha Sign Visarga
+ (0x1133f, 0x1133f,), # Grantha Vowel Sign I
+ (0x11341, 0x11344,), # Grantha Vowel Sign U ..Grantha Vowel Sign Vocal
+ (0x11347, 0x11348,), # Grantha Vowel Sign Ee ..Grantha Vowel Sign Ai
+ (0x1134b, 0x1134c,), # Grantha Vowel Sign Oo ..Grantha Vowel Sign Au
+ (0x11362, 0x11363,), # Grantha Vowel Sign Vocal..Grantha Vowel Sign Vocal
+ (0x113b9, 0x113ba,), # (nil)
+ (0x113ca, 0x113ca,), # (nil)
+ (0x113cc, 0x113cd,), # (nil)
+ (0x11435, 0x11437,), # Newa Vowel Sign Aa ..Newa Vowel Sign Ii
+ (0x11440, 0x11441,), # Newa Vowel Sign O ..Newa Vowel Sign Au
+ (0x11445, 0x11445,), # Newa Sign Visarga
+ (0x114b1, 0x114b2,), # Tirhuta Vowel Sign I ..Tirhuta Vowel Sign Ii
+ (0x114b9, 0x114b9,), # Tirhuta Vowel Sign E
+ (0x114bb, 0x114bc,), # Tirhuta Vowel Sign Ai ..Tirhuta Vowel Sign O
+ (0x114be, 0x114be,), # Tirhuta Vowel Sign Au
+ (0x114c1, 0x114c1,), # Tirhuta Sign Visarga
+ (0x115b0, 0x115b1,), # Siddham Vowel Sign I ..Siddham Vowel Sign Ii
+ (0x115b8, 0x115bb,), # Siddham Vowel Sign E ..Siddham Vowel Sign Au
+ (0x115be, 0x115be,), # Siddham Sign Visarga
+ (0x11630, 0x11632,), # Modi Vowel Sign Aa ..Modi Vowel Sign Ii
+ (0x1163b, 0x1163c,), # Modi Vowel Sign O ..Modi Vowel Sign Au
+ (0x1163e, 0x1163e,), # Modi Sign Visarga
+ (0x116ac, 0x116ac,), # Takri Sign Visarga
+ (0x116ae, 0x116af,), # Takri Vowel Sign I ..Takri Vowel Sign Ii
+ (0x1171e, 0x1171e,), # Ahom Consonant Sign Medial Ra
+ (0x11726, 0x11726,), # Ahom Vowel Sign E
+ (0x1182c, 0x1182e,), # Dogra Vowel Sign Aa ..Dogra Vowel Sign Ii
+ (0x11838, 0x11838,), # Dogra Sign Visarga
+ (0x11931, 0x11935,), # Dives Akuru Vowel Sign I..Dives Akuru Vowel Sign E
+ (0x11937, 0x11938,), # Dives Akuru Vowel Sign A..Dives Akuru Vowel Sign O
+ (0x11940, 0x11940,), # Dives Akuru Medial Ya
+ (0x11942, 0x11942,), # Dives Akuru Medial Ra
+ (0x119d1, 0x119d3,), # Nandinagari Vowel Sign A..Nandinagari Vowel Sign I
+ (0x119dc, 0x119df,), # Nandinagari Vowel Sign O..Nandinagari Sign Visarga
+ (0x119e4, 0x119e4,), # Nandinagari Vowel Sign Prishthamatra E
+ (0x11a39, 0x11a39,), # Zanabazar Square Sign Visarga
+ (0x11a57, 0x11a58,), # Soyombo Vowel Sign Ai ..Soyombo Vowel Sign Au
+ (0x11a97, 0x11a97,), # Soyombo Sign Visarga
+ (0x11b61, 0x11b61,), # (nil)
+ (0x11b65, 0x11b65,), # (nil)
+ (0x11b67, 0x11b67,), # (nil)
+ (0x11c2f, 0x11c2f,), # Bhaiksuki Vowel Sign Aa
+ (0x11c3e, 0x11c3e,), # Bhaiksuki Sign Visarga
+ (0x11ca9, 0x11ca9,), # Marchen Subjoined Letter Ya
+ (0x11cb1, 0x11cb1,), # Marchen Vowel Sign I
+ (0x11cb4, 0x11cb4,), # Marchen Vowel Sign O
+ (0x11d8a, 0x11d8e,), # Gunjala Gondi Vowel Sign..Gunjala Gondi Vowel Sign
+ (0x11d93, 0x11d94,), # Gunjala Gondi Vowel Sign..Gunjala Gondi Vowel Sign
+ (0x11d96, 0x11d96,), # Gunjala Gondi Sign Visarga
+ (0x11ef5, 0x11ef6,), # Makasar Vowel Sign E ..Makasar Vowel Sign O
+ (0x11f03, 0x11f03,), # Kawi Sign Visarga
+ (0x11f34, 0x11f35,), # Kawi Vowel Sign Aa ..Kawi Vowel Sign Alternat
+ (0x11f3e, 0x11f3f,), # Kawi Vowel Sign E ..Kawi Vowel Sign Ai
+ (0x1612a, 0x1612c,), # (nil)
+ (0x16f51, 0x16f87,), # Miao Sign Aspiration ..Miao Vowel Sign Ui
+)
+
+GRAPHEME_L = (
+ # Source: GraphemeBreakProperty-17.0.0.txt
+ # Date: 2025-06-30, 06:20:23 GMT
+ #
+ (0x01100, 0x0115f,), # Hangul Choseong Kiyeok ..Hangul Choseong Filler
+ (0x0a960, 0x0a97c,), # Hangul Choseong Tikeut-m..Hangul Choseong Ssangyeo
+)
+
+GRAPHEME_V = (
+ # Source: GraphemeBreakProperty-17.0.0.txt
+ # Date: 2025-06-30, 06:20:23 GMT
+ #
+ (0x01160, 0x011a7,), # Hangul Jungseong Filler ..Hangul Jungseong O-yae
+ (0x0d7b0, 0x0d7c6,), # Hangul Jungseong O-yeo ..Hangul Jungseong Araea-e
+ (0x16d63, 0x16d63,), # (nil)
+ (0x16d67, 0x16d6a,), # (nil)
+)
+
+GRAPHEME_T = (
+ # Source: GraphemeBreakProperty-17.0.0.txt
+ # Date: 2025-06-30, 06:20:23 GMT
+ #
+ (0x011a8, 0x011ff,), # Hangul Jongseong Kiyeok ..Hangul Jongseong Ssangni
+ (0x0d7cb, 0x0d7fb,), # Hangul Jongseong Nieun-r..Hangul Jongseong Phieuph
+)
+
+GRAPHEME_LV = (
+ # Source: GraphemeBreakProperty-17.0.0.txt
+ # Date: 2025-06-30, 06:20:23 GMT
+ #
+ (0x0ac00, 0x0ac00,), # Hangul Syllable Ga
+ (0x0ac1c, 0x0ac1c,), # Hangul Syllable Gae
+ (0x0ac38, 0x0ac38,), # Hangul Syllable Gya
+ (0x0ac54, 0x0ac54,), # Hangul Syllable Gyae
+ (0x0ac70, 0x0ac70,), # Hangul Syllable Geo
+ (0x0ac8c, 0x0ac8c,), # Hangul Syllable Ge
+ (0x0aca8, 0x0aca8,), # Hangul Syllable Gyeo
+ (0x0acc4, 0x0acc4,), # Hangul Syllable Gye
+ (0x0ace0, 0x0ace0,), # Hangul Syllable Go
+ (0x0acfc, 0x0acfc,), # Hangul Syllable Gwa
+ (0x0ad18, 0x0ad18,), # Hangul Syllable Gwae
+ (0x0ad34, 0x0ad34,), # Hangul Syllable Goe
+ (0x0ad50, 0x0ad50,), # Hangul Syllable Gyo
+ (0x0ad6c, 0x0ad6c,), # Hangul Syllable Gu
+ (0x0ad88, 0x0ad88,), # Hangul Syllable Gweo
+ (0x0ada4, 0x0ada4,), # Hangul Syllable Gwe
+ (0x0adc0, 0x0adc0,), # Hangul Syllable Gwi
+ (0x0addc, 0x0addc,), # Hangul Syllable Gyu
+ (0x0adf8, 0x0adf8,), # Hangul Syllable Geu
+ (0x0ae14, 0x0ae14,), # Hangul Syllable Gyi
+ (0x0ae30, 0x0ae30,), # Hangul Syllable Gi
+ (0x0ae4c, 0x0ae4c,), # Hangul Syllable Gga
+ (0x0ae68, 0x0ae68,), # Hangul Syllable Ggae
+ (0x0ae84, 0x0ae84,), # Hangul Syllable Ggya
+ (0x0aea0, 0x0aea0,), # Hangul Syllable Ggyae
+ (0x0aebc, 0x0aebc,), # Hangul Syllable Ggeo
+ (0x0aed8, 0x0aed8,), # Hangul Syllable Gge
+ (0x0aef4, 0x0aef4,), # Hangul Syllable Ggyeo
+ (0x0af10, 0x0af10,), # Hangul Syllable Ggye
+ (0x0af2c, 0x0af2c,), # Hangul Syllable Ggo
+ (0x0af48, 0x0af48,), # Hangul Syllable Ggwa
+ (0x0af64, 0x0af64,), # Hangul Syllable Ggwae
+ (0x0af80, 0x0af80,), # Hangul Syllable Ggoe
+ (0x0af9c, 0x0af9c,), # Hangul Syllable Ggyo
+ (0x0afb8, 0x0afb8,), # Hangul Syllable Ggu
+ (0x0afd4, 0x0afd4,), # Hangul Syllable Ggweo
+ (0x0aff0, 0x0aff0,), # Hangul Syllable Ggwe
+ (0x0b00c, 0x0b00c,), # Hangul Syllable Ggwi
+ (0x0b028, 0x0b028,), # Hangul Syllable Ggyu
+ (0x0b044, 0x0b044,), # Hangul Syllable Ggeu
+ (0x0b060, 0x0b060,), # Hangul Syllable Ggyi
+ (0x0b07c, 0x0b07c,), # Hangul Syllable Ggi
+ (0x0b098, 0x0b098,), # Hangul Syllable Na
+ (0x0b0b4, 0x0b0b4,), # Hangul Syllable Nae
+ (0x0b0d0, 0x0b0d0,), # Hangul Syllable Nya
+ (0x0b0ec, 0x0b0ec,), # Hangul Syllable Nyae
+ (0x0b108, 0x0b108,), # Hangul Syllable Neo
+ (0x0b124, 0x0b124,), # Hangul Syllable Ne
+ (0x0b140, 0x0b140,), # Hangul Syllable Nyeo
+ (0x0b15c, 0x0b15c,), # Hangul Syllable Nye
+ (0x0b178, 0x0b178,), # Hangul Syllable No
+ (0x0b194, 0x0b194,), # Hangul Syllable Nwa
+ (0x0b1b0, 0x0b1b0,), # Hangul Syllable Nwae
+ (0x0b1cc, 0x0b1cc,), # Hangul Syllable Noe
+ (0x0b1e8, 0x0b1e8,), # Hangul Syllable Nyo
+ (0x0b204, 0x0b204,), # Hangul Syllable Nu
+ (0x0b220, 0x0b220,), # Hangul Syllable Nweo
+ (0x0b23c, 0x0b23c,), # Hangul Syllable Nwe
+ (0x0b258, 0x0b258,), # Hangul Syllable Nwi
+ (0x0b274, 0x0b274,), # Hangul Syllable Nyu
+ (0x0b290, 0x0b290,), # Hangul Syllable Neu
+ (0x0b2ac, 0x0b2ac,), # Hangul Syllable Nyi
+ (0x0b2c8, 0x0b2c8,), # Hangul Syllable Ni
+ (0x0b2e4, 0x0b2e4,), # Hangul Syllable Da
+ (0x0b300, 0x0b300,), # Hangul Syllable Dae
+ (0x0b31c, 0x0b31c,), # Hangul Syllable Dya
+ (0x0b338, 0x0b338,), # Hangul Syllable Dyae
+ (0x0b354, 0x0b354,), # Hangul Syllable Deo
+ (0x0b370, 0x0b370,), # Hangul Syllable De
+ (0x0b38c, 0x0b38c,), # Hangul Syllable Dyeo
+ (0x0b3a8, 0x0b3a8,), # Hangul Syllable Dye
+ (0x0b3c4, 0x0b3c4,), # Hangul Syllable Do
+ (0x0b3e0, 0x0b3e0,), # Hangul Syllable Dwa
+ (0x0b3fc, 0x0b3fc,), # Hangul Syllable Dwae
+ (0x0b418, 0x0b418,), # Hangul Syllable Doe
+ (0x0b434, 0x0b434,), # Hangul Syllable Dyo
+ (0x0b450, 0x0b450,), # Hangul Syllable Du
+ (0x0b46c, 0x0b46c,), # Hangul Syllable Dweo
+ (0x0b488, 0x0b488,), # Hangul Syllable Dwe
+ (0x0b4a4, 0x0b4a4,), # Hangul Syllable Dwi
+ (0x0b4c0, 0x0b4c0,), # Hangul Syllable Dyu
+ (0x0b4dc, 0x0b4dc,), # Hangul Syllable Deu
+ (0x0b4f8, 0x0b4f8,), # Hangul Syllable Dyi
+ (0x0b514, 0x0b514,), # Hangul Syllable Di
+ (0x0b530, 0x0b530,), # Hangul Syllable Dda
+ (0x0b54c, 0x0b54c,), # Hangul Syllable Ddae
+ (0x0b568, 0x0b568,), # Hangul Syllable Ddya
+ (0x0b584, 0x0b584,), # Hangul Syllable Ddyae
+ (0x0b5a0, 0x0b5a0,), # Hangul Syllable Ddeo
+ (0x0b5bc, 0x0b5bc,), # Hangul Syllable Dde
+ (0x0b5d8, 0x0b5d8,), # Hangul Syllable Ddyeo
+ (0x0b5f4, 0x0b5f4,), # Hangul Syllable Ddye
+ (0x0b610, 0x0b610,), # Hangul Syllable Ddo
+ (0x0b62c, 0x0b62c,), # Hangul Syllable Ddwa
+ (0x0b648, 0x0b648,), # Hangul Syllable Ddwae
+ (0x0b664, 0x0b664,), # Hangul Syllable Ddoe
+ (0x0b680, 0x0b680,), # Hangul Syllable Ddyo
+ (0x0b69c, 0x0b69c,), # Hangul Syllable Ddu
+ (0x0b6b8, 0x0b6b8,), # Hangul Syllable Ddweo
+ (0x0b6d4, 0x0b6d4,), # Hangul Syllable Ddwe
+ (0x0b6f0, 0x0b6f0,), # Hangul Syllable Ddwi
+ (0x0b70c, 0x0b70c,), # Hangul Syllable Ddyu
+ (0x0b728, 0x0b728,), # Hangul Syllable Ddeu
+ (0x0b744, 0x0b744,), # Hangul Syllable Ddyi
+ (0x0b760, 0x0b760,), # Hangul Syllable Ddi
+ (0x0b77c, 0x0b77c,), # Hangul Syllable Ra
+ (0x0b798, 0x0b798,), # Hangul Syllable Rae
+ (0x0b7b4, 0x0b7b4,), # Hangul Syllable Rya
+ (0x0b7d0, 0x0b7d0,), # Hangul Syllable Ryae
+ (0x0b7ec, 0x0b7ec,), # Hangul Syllable Reo
+ (0x0b808, 0x0b808,), # Hangul Syllable Re
+ (0x0b824, 0x0b824,), # Hangul Syllable Ryeo
+ (0x0b840, 0x0b840,), # Hangul Syllable Rye
+ (0x0b85c, 0x0b85c,), # Hangul Syllable Ro
+ (0x0b878, 0x0b878,), # Hangul Syllable Rwa
+ (0x0b894, 0x0b894,), # Hangul Syllable Rwae
+ (0x0b8b0, 0x0b8b0,), # Hangul Syllable Roe
+ (0x0b8cc, 0x0b8cc,), # Hangul Syllable Ryo
+ (0x0b8e8, 0x0b8e8,), # Hangul Syllable Ru
+ (0x0b904, 0x0b904,), # Hangul Syllable Rweo
+ (0x0b920, 0x0b920,), # Hangul Syllable Rwe
+ (0x0b93c, 0x0b93c,), # Hangul Syllable Rwi
+ (0x0b958, 0x0b958,), # Hangul Syllable Ryu
+ (0x0b974, 0x0b974,), # Hangul Syllable Reu
+ (0x0b990, 0x0b990,), # Hangul Syllable Ryi
+ (0x0b9ac, 0x0b9ac,), # Hangul Syllable Ri
+ (0x0b9c8, 0x0b9c8,), # Hangul Syllable Ma
+ (0x0b9e4, 0x0b9e4,), # Hangul Syllable Mae
+ (0x0ba00, 0x0ba00,), # Hangul Syllable Mya
+ (0x0ba1c, 0x0ba1c,), # Hangul Syllable Myae
+ (0x0ba38, 0x0ba38,), # Hangul Syllable Meo
+ (0x0ba54, 0x0ba54,), # Hangul Syllable Me
+ (0x0ba70, 0x0ba70,), # Hangul Syllable Myeo
+ (0x0ba8c, 0x0ba8c,), # Hangul Syllable Mye
+ (0x0baa8, 0x0baa8,), # Hangul Syllable Mo
+ (0x0bac4, 0x0bac4,), # Hangul Syllable Mwa
+ (0x0bae0, 0x0bae0,), # Hangul Syllable Mwae
+ (0x0bafc, 0x0bafc,), # Hangul Syllable Moe
+ (0x0bb18, 0x0bb18,), # Hangul Syllable Myo
+ (0x0bb34, 0x0bb34,), # Hangul Syllable Mu
+ (0x0bb50, 0x0bb50,), # Hangul Syllable Mweo
+ (0x0bb6c, 0x0bb6c,), # Hangul Syllable Mwe
+ (0x0bb88, 0x0bb88,), # Hangul Syllable Mwi
+ (0x0bba4, 0x0bba4,), # Hangul Syllable Myu
+ (0x0bbc0, 0x0bbc0,), # Hangul Syllable Meu
+ (0x0bbdc, 0x0bbdc,), # Hangul Syllable Myi
+ (0x0bbf8, 0x0bbf8,), # Hangul Syllable Mi
+ (0x0bc14, 0x0bc14,), # Hangul Syllable Ba
+ (0x0bc30, 0x0bc30,), # Hangul Syllable Bae
+ (0x0bc4c, 0x0bc4c,), # Hangul Syllable Bya
+ (0x0bc68, 0x0bc68,), # Hangul Syllable Byae
+ (0x0bc84, 0x0bc84,), # Hangul Syllable Beo
+ (0x0bca0, 0x0bca0,), # Hangul Syllable Be
+ (0x0bcbc, 0x0bcbc,), # Hangul Syllable Byeo
+ (0x0bcd8, 0x0bcd8,), # Hangul Syllable Bye
+ (0x0bcf4, 0x0bcf4,), # Hangul Syllable Bo
+ (0x0bd10, 0x0bd10,), # Hangul Syllable Bwa
+ (0x0bd2c, 0x0bd2c,), # Hangul Syllable Bwae
+ (0x0bd48, 0x0bd48,), # Hangul Syllable Boe
+ (0x0bd64, 0x0bd64,), # Hangul Syllable Byo
+ (0x0bd80, 0x0bd80,), # Hangul Syllable Bu
+ (0x0bd9c, 0x0bd9c,), # Hangul Syllable Bweo
+ (0x0bdb8, 0x0bdb8,), # Hangul Syllable Bwe
+ (0x0bdd4, 0x0bdd4,), # Hangul Syllable Bwi
+ (0x0bdf0, 0x0bdf0,), # Hangul Syllable Byu
+ (0x0be0c, 0x0be0c,), # Hangul Syllable Beu
+ (0x0be28, 0x0be28,), # Hangul Syllable Byi
+ (0x0be44, 0x0be44,), # Hangul Syllable Bi
+ (0x0be60, 0x0be60,), # Hangul Syllable Bba
+ (0x0be7c, 0x0be7c,), # Hangul Syllable Bbae
+ (0x0be98, 0x0be98,), # Hangul Syllable Bbya
+ (0x0beb4, 0x0beb4,), # Hangul Syllable Bbyae
+ (0x0bed0, 0x0bed0,), # Hangul Syllable Bbeo
+ (0x0beec, 0x0beec,), # Hangul Syllable Bbe
+ (0x0bf08, 0x0bf08,), # Hangul Syllable Bbyeo
+ (0x0bf24, 0x0bf24,), # Hangul Syllable Bbye
+ (0x0bf40, 0x0bf40,), # Hangul Syllable Bbo
+ (0x0bf5c, 0x0bf5c,), # Hangul Syllable Bbwa
+ (0x0bf78, 0x0bf78,), # Hangul Syllable Bbwae
+ (0x0bf94, 0x0bf94,), # Hangul Syllable Bboe
+ (0x0bfb0, 0x0bfb0,), # Hangul Syllable Bbyo
+ (0x0bfcc, 0x0bfcc,), # Hangul Syllable Bbu
+ (0x0bfe8, 0x0bfe8,), # Hangul Syllable Bbweo
+ (0x0c004, 0x0c004,), # Hangul Syllable Bbwe
+ (0x0c020, 0x0c020,), # Hangul Syllable Bbwi
+ (0x0c03c, 0x0c03c,), # Hangul Syllable Bbyu
+ (0x0c058, 0x0c058,), # Hangul Syllable Bbeu
+ (0x0c074, 0x0c074,), # Hangul Syllable Bbyi
+ (0x0c090, 0x0c090,), # Hangul Syllable Bbi
+ (0x0c0ac, 0x0c0ac,), # Hangul Syllable Sa
+ (0x0c0c8, 0x0c0c8,), # Hangul Syllable Sae
+ (0x0c0e4, 0x0c0e4,), # Hangul Syllable Sya
+ (0x0c100, 0x0c100,), # Hangul Syllable Syae
+ (0x0c11c, 0x0c11c,), # Hangul Syllable Seo
+ (0x0c138, 0x0c138,), # Hangul Syllable Se
+ (0x0c154, 0x0c154,), # Hangul Syllable Syeo
+ (0x0c170, 0x0c170,), # Hangul Syllable Sye
+ (0x0c18c, 0x0c18c,), # Hangul Syllable So
+ (0x0c1a8, 0x0c1a8,), # Hangul Syllable Swa
+ (0x0c1c4, 0x0c1c4,), # Hangul Syllable Swae
+ (0x0c1e0, 0x0c1e0,), # Hangul Syllable Soe
+ (0x0c1fc, 0x0c1fc,), # Hangul Syllable Syo
+ (0x0c218, 0x0c218,), # Hangul Syllable Su
+ (0x0c234, 0x0c234,), # Hangul Syllable Sweo
+ (0x0c250, 0x0c250,), # Hangul Syllable Swe
+ (0x0c26c, 0x0c26c,), # Hangul Syllable Swi
+ (0x0c288, 0x0c288,), # Hangul Syllable Syu
+ (0x0c2a4, 0x0c2a4,), # Hangul Syllable Seu
+ (0x0c2c0, 0x0c2c0,), # Hangul Syllable Syi
+ (0x0c2dc, 0x0c2dc,), # Hangul Syllable Si
+ (0x0c2f8, 0x0c2f8,), # Hangul Syllable Ssa
+ (0x0c314, 0x0c314,), # Hangul Syllable Ssae
+ (0x0c330, 0x0c330,), # Hangul Syllable Ssya
+ (0x0c34c, 0x0c34c,), # Hangul Syllable Ssyae
+ (0x0c368, 0x0c368,), # Hangul Syllable Sseo
+ (0x0c384, 0x0c384,), # Hangul Syllable Sse
+ (0x0c3a0, 0x0c3a0,), # Hangul Syllable Ssyeo
+ (0x0c3bc, 0x0c3bc,), # Hangul Syllable Ssye
+ (0x0c3d8, 0x0c3d8,), # Hangul Syllable Sso
+ (0x0c3f4, 0x0c3f4,), # Hangul Syllable Sswa
+ (0x0c410, 0x0c410,), # Hangul Syllable Sswae
+ (0x0c42c, 0x0c42c,), # Hangul Syllable Ssoe
+ (0x0c448, 0x0c448,), # Hangul Syllable Ssyo
+ (0x0c464, 0x0c464,), # Hangul Syllable Ssu
+ (0x0c480, 0x0c480,), # Hangul Syllable Ssweo
+ (0x0c49c, 0x0c49c,), # Hangul Syllable Sswe
+ (0x0c4b8, 0x0c4b8,), # Hangul Syllable Sswi
+ (0x0c4d4, 0x0c4d4,), # Hangul Syllable Ssyu
+ (0x0c4f0, 0x0c4f0,), # Hangul Syllable Sseu
+ (0x0c50c, 0x0c50c,), # Hangul Syllable Ssyi
+ (0x0c528, 0x0c528,), # Hangul Syllable Ssi
+ (0x0c544, 0x0c544,), # Hangul Syllable A
+ (0x0c560, 0x0c560,), # Hangul Syllable Ae
+ (0x0c57c, 0x0c57c,), # Hangul Syllable Ya
+ (0x0c598, 0x0c598,), # Hangul Syllable Yae
+ (0x0c5b4, 0x0c5b4,), # Hangul Syllable Eo
+ (0x0c5d0, 0x0c5d0,), # Hangul Syllable E
+ (0x0c5ec, 0x0c5ec,), # Hangul Syllable Yeo
+ (0x0c608, 0x0c608,), # Hangul Syllable Ye
+ (0x0c624, 0x0c624,), # Hangul Syllable O
+ (0x0c640, 0x0c640,), # Hangul Syllable Wa
+ (0x0c65c, 0x0c65c,), # Hangul Syllable Wae
+ (0x0c678, 0x0c678,), # Hangul Syllable Oe
+ (0x0c694, 0x0c694,), # Hangul Syllable Yo
+ (0x0c6b0, 0x0c6b0,), # Hangul Syllable U
+ (0x0c6cc, 0x0c6cc,), # Hangul Syllable Weo
+ (0x0c6e8, 0x0c6e8,), # Hangul Syllable We
+ (0x0c704, 0x0c704,), # Hangul Syllable Wi
+ (0x0c720, 0x0c720,), # Hangul Syllable Yu
+ (0x0c73c, 0x0c73c,), # Hangul Syllable Eu
+ (0x0c758, 0x0c758,), # Hangul Syllable Yi
+ (0x0c774, 0x0c774,), # Hangul Syllable I
+ (0x0c790, 0x0c790,), # Hangul Syllable Ja
+ (0x0c7ac, 0x0c7ac,), # Hangul Syllable Jae
+ (0x0c7c8, 0x0c7c8,), # Hangul Syllable Jya
+ (0x0c7e4, 0x0c7e4,), # Hangul Syllable Jyae
+ (0x0c800, 0x0c800,), # Hangul Syllable Jeo
+ (0x0c81c, 0x0c81c,), # Hangul Syllable Je
+ (0x0c838, 0x0c838,), # Hangul Syllable Jyeo
+ (0x0c854, 0x0c854,), # Hangul Syllable Jye
+ (0x0c870, 0x0c870,), # Hangul Syllable Jo
+ (0x0c88c, 0x0c88c,), # Hangul Syllable Jwa
+ (0x0c8a8, 0x0c8a8,), # Hangul Syllable Jwae
+ (0x0c8c4, 0x0c8c4,), # Hangul Syllable Joe
+ (0x0c8e0, 0x0c8e0,), # Hangul Syllable Jyo
+ (0x0c8fc, 0x0c8fc,), # Hangul Syllable Ju
+ (0x0c918, 0x0c918,), # Hangul Syllable Jweo
+ (0x0c934, 0x0c934,), # Hangul Syllable Jwe
+ (0x0c950, 0x0c950,), # Hangul Syllable Jwi
+ (0x0c96c, 0x0c96c,), # Hangul Syllable Jyu
+ (0x0c988, 0x0c988,), # Hangul Syllable Jeu
+ (0x0c9a4, 0x0c9a4,), # Hangul Syllable Jyi
+ (0x0c9c0, 0x0c9c0,), # Hangul Syllable Ji
+ (0x0c9dc, 0x0c9dc,), # Hangul Syllable Jja
+ (0x0c9f8, 0x0c9f8,), # Hangul Syllable Jjae
+ (0x0ca14, 0x0ca14,), # Hangul Syllable Jjya
+ (0x0ca30, 0x0ca30,), # Hangul Syllable Jjyae
+ (0x0ca4c, 0x0ca4c,), # Hangul Syllable Jjeo
+ (0x0ca68, 0x0ca68,), # Hangul Syllable Jje
+ (0x0ca84, 0x0ca84,), # Hangul Syllable Jjyeo
+ (0x0caa0, 0x0caa0,), # Hangul Syllable Jjye
+ (0x0cabc, 0x0cabc,), # Hangul Syllable Jjo
+ (0x0cad8, 0x0cad8,), # Hangul Syllable Jjwa
+ (0x0caf4, 0x0caf4,), # Hangul Syllable Jjwae
+ (0x0cb10, 0x0cb10,), # Hangul Syllable Jjoe
+ (0x0cb2c, 0x0cb2c,), # Hangul Syllable Jjyo
+ (0x0cb48, 0x0cb48,), # Hangul Syllable Jju
+ (0x0cb64, 0x0cb64,), # Hangul Syllable Jjweo
+ (0x0cb80, 0x0cb80,), # Hangul Syllable Jjwe
+ (0x0cb9c, 0x0cb9c,), # Hangul Syllable Jjwi
+ (0x0cbb8, 0x0cbb8,), # Hangul Syllable Jjyu
+ (0x0cbd4, 0x0cbd4,), # Hangul Syllable Jjeu
+ (0x0cbf0, 0x0cbf0,), # Hangul Syllable Jjyi
+ (0x0cc0c, 0x0cc0c,), # Hangul Syllable Jji
+ (0x0cc28, 0x0cc28,), # Hangul Syllable Ca
+ (0x0cc44, 0x0cc44,), # Hangul Syllable Cae
+ (0x0cc60, 0x0cc60,), # Hangul Syllable Cya
+ (0x0cc7c, 0x0cc7c,), # Hangul Syllable Cyae
+ (0x0cc98, 0x0cc98,), # Hangul Syllable Ceo
+ (0x0ccb4, 0x0ccb4,), # Hangul Syllable Ce
+ (0x0ccd0, 0x0ccd0,), # Hangul Syllable Cyeo
+ (0x0ccec, 0x0ccec,), # Hangul Syllable Cye
+ (0x0cd08, 0x0cd08,), # Hangul Syllable Co
+ (0x0cd24, 0x0cd24,), # Hangul Syllable Cwa
+ (0x0cd40, 0x0cd40,), # Hangul Syllable Cwae
+ (0x0cd5c, 0x0cd5c,), # Hangul Syllable Coe
+ (0x0cd78, 0x0cd78,), # Hangul Syllable Cyo
+ (0x0cd94, 0x0cd94,), # Hangul Syllable Cu
+ (0x0cdb0, 0x0cdb0,), # Hangul Syllable Cweo
+ (0x0cdcc, 0x0cdcc,), # Hangul Syllable Cwe
+ (0x0cde8, 0x0cde8,), # Hangul Syllable Cwi
+ (0x0ce04, 0x0ce04,), # Hangul Syllable Cyu
+ (0x0ce20, 0x0ce20,), # Hangul Syllable Ceu
+ (0x0ce3c, 0x0ce3c,), # Hangul Syllable Cyi
+ (0x0ce58, 0x0ce58,), # Hangul Syllable Ci
+ (0x0ce74, 0x0ce74,), # Hangul Syllable Ka
+ (0x0ce90, 0x0ce90,), # Hangul Syllable Kae
+ (0x0ceac, 0x0ceac,), # Hangul Syllable Kya
+ (0x0cec8, 0x0cec8,), # Hangul Syllable Kyae
+ (0x0cee4, 0x0cee4,), # Hangul Syllable Keo
+ (0x0cf00, 0x0cf00,), # Hangul Syllable Ke
+ (0x0cf1c, 0x0cf1c,), # Hangul Syllable Kyeo
+ (0x0cf38, 0x0cf38,), # Hangul Syllable Kye
+ (0x0cf54, 0x0cf54,), # Hangul Syllable Ko
+ (0x0cf70, 0x0cf70,), # Hangul Syllable Kwa
+ (0x0cf8c, 0x0cf8c,), # Hangul Syllable Kwae
+ (0x0cfa8, 0x0cfa8,), # Hangul Syllable Koe
+ (0x0cfc4, 0x0cfc4,), # Hangul Syllable Kyo
+ (0x0cfe0, 0x0cfe0,), # Hangul Syllable Ku
+ (0x0cffc, 0x0cffc,), # Hangul Syllable Kweo
+ (0x0d018, 0x0d018,), # Hangul Syllable Kwe
+ (0x0d034, 0x0d034,), # Hangul Syllable Kwi
+ (0x0d050, 0x0d050,), # Hangul Syllable Kyu
+ (0x0d06c, 0x0d06c,), # Hangul Syllable Keu
+ (0x0d088, 0x0d088,), # Hangul Syllable Kyi
+ (0x0d0a4, 0x0d0a4,), # Hangul Syllable Ki
+ (0x0d0c0, 0x0d0c0,), # Hangul Syllable Ta
+ (0x0d0dc, 0x0d0dc,), # Hangul Syllable Tae
+ (0x0d0f8, 0x0d0f8,), # Hangul Syllable Tya
+ (0x0d114, 0x0d114,), # Hangul Syllable Tyae
+ (0x0d130, 0x0d130,), # Hangul Syllable Teo
+ (0x0d14c, 0x0d14c,), # Hangul Syllable Te
+ (0x0d168, 0x0d168,), # Hangul Syllable Tyeo
+ (0x0d184, 0x0d184,), # Hangul Syllable Tye
+ (0x0d1a0, 0x0d1a0,), # Hangul Syllable To
+ (0x0d1bc, 0x0d1bc,), # Hangul Syllable Twa
+ (0x0d1d8, 0x0d1d8,), # Hangul Syllable Twae
+ (0x0d1f4, 0x0d1f4,), # Hangul Syllable Toe
+ (0x0d210, 0x0d210,), # Hangul Syllable Tyo
+ (0x0d22c, 0x0d22c,), # Hangul Syllable Tu
+ (0x0d248, 0x0d248,), # Hangul Syllable Tweo
+ (0x0d264, 0x0d264,), # Hangul Syllable Twe
+ (0x0d280, 0x0d280,), # Hangul Syllable Twi
+ (0x0d29c, 0x0d29c,), # Hangul Syllable Tyu
+ (0x0d2b8, 0x0d2b8,), # Hangul Syllable Teu
+ (0x0d2d4, 0x0d2d4,), # Hangul Syllable Tyi
+ (0x0d2f0, 0x0d2f0,), # Hangul Syllable Ti
+ (0x0d30c, 0x0d30c,), # Hangul Syllable Pa
+ (0x0d328, 0x0d328,), # Hangul Syllable Pae
+ (0x0d344, 0x0d344,), # Hangul Syllable Pya
+ (0x0d360, 0x0d360,), # Hangul Syllable Pyae
+ (0x0d37c, 0x0d37c,), # Hangul Syllable Peo
+ (0x0d398, 0x0d398,), # Hangul Syllable Pe
+ (0x0d3b4, 0x0d3b4,), # Hangul Syllable Pyeo
+ (0x0d3d0, 0x0d3d0,), # Hangul Syllable Pye
+ (0x0d3ec, 0x0d3ec,), # Hangul Syllable Po
+ (0x0d408, 0x0d408,), # Hangul Syllable Pwa
+ (0x0d424, 0x0d424,), # Hangul Syllable Pwae
+ (0x0d440, 0x0d440,), # Hangul Syllable Poe
+ (0x0d45c, 0x0d45c,), # Hangul Syllable Pyo
+ (0x0d478, 0x0d478,), # Hangul Syllable Pu
+ (0x0d494, 0x0d494,), # Hangul Syllable Pweo
+ (0x0d4b0, 0x0d4b0,), # Hangul Syllable Pwe
+ (0x0d4cc, 0x0d4cc,), # Hangul Syllable Pwi
+ (0x0d4e8, 0x0d4e8,), # Hangul Syllable Pyu
+ (0x0d504, 0x0d504,), # Hangul Syllable Peu
+ (0x0d520, 0x0d520,), # Hangul Syllable Pyi
+ (0x0d53c, 0x0d53c,), # Hangul Syllable Pi
+ (0x0d558, 0x0d558,), # Hangul Syllable Ha
+ (0x0d574, 0x0d574,), # Hangul Syllable Hae
+ (0x0d590, 0x0d590,), # Hangul Syllable Hya
+ (0x0d5ac, 0x0d5ac,), # Hangul Syllable Hyae
+ (0x0d5c8, 0x0d5c8,), # Hangul Syllable Heo
+ (0x0d5e4, 0x0d5e4,), # Hangul Syllable He
+ (0x0d600, 0x0d600,), # Hangul Syllable Hyeo
+ (0x0d61c, 0x0d61c,), # Hangul Syllable Hye
+ (0x0d638, 0x0d638,), # Hangul Syllable Ho
+ (0x0d654, 0x0d654,), # Hangul Syllable Hwa
+ (0x0d670, 0x0d670,), # Hangul Syllable Hwae
+ (0x0d68c, 0x0d68c,), # Hangul Syllable Hoe
+ (0x0d6a8, 0x0d6a8,), # Hangul Syllable Hyo
+ (0x0d6c4, 0x0d6c4,), # Hangul Syllable Hu
+ (0x0d6e0, 0x0d6e0,), # Hangul Syllable Hweo
+ (0x0d6fc, 0x0d6fc,), # Hangul Syllable Hwe
+ (0x0d718, 0x0d718,), # Hangul Syllable Hwi
+ (0x0d734, 0x0d734,), # Hangul Syllable Hyu
+ (0x0d750, 0x0d750,), # Hangul Syllable Heu
+ (0x0d76c, 0x0d76c,), # Hangul Syllable Hyi
+ (0x0d788, 0x0d788,), # Hangul Syllable Hi
+)
+
+GRAPHEME_LVT = (
+ # Source: GraphemeBreakProperty-17.0.0.txt
+ # Date: 2025-06-30, 06:20:23 GMT
+ #
+ (0x0ac01, 0x0ac1b,), # Hangul Syllable Gag ..Hangul Syllable Gah
+ (0x0ac1d, 0x0ac37,), # Hangul Syllable Gaeg ..Hangul Syllable Gaeh
+ (0x0ac39, 0x0ac53,), # Hangul Syllable Gyag ..Hangul Syllable Gyah
+ (0x0ac55, 0x0ac6f,), # Hangul Syllable Gyaeg ..Hangul Syllable Gyaeh
+ (0x0ac71, 0x0ac8b,), # Hangul Syllable Geog ..Hangul Syllable Geoh
+ (0x0ac8d, 0x0aca7,), # Hangul Syllable Geg ..Hangul Syllable Geh
+ (0x0aca9, 0x0acc3,), # Hangul Syllable Gyeog ..Hangul Syllable Gyeoh
+ (0x0acc5, 0x0acdf,), # Hangul Syllable Gyeg ..Hangul Syllable Gyeh
+ (0x0ace1, 0x0acfb,), # Hangul Syllable Gog ..Hangul Syllable Goh
+ (0x0acfd, 0x0ad17,), # Hangul Syllable Gwag ..Hangul Syllable Gwah
+ (0x0ad19, 0x0ad33,), # Hangul Syllable Gwaeg ..Hangul Syllable Gwaeh
+ (0x0ad35, 0x0ad4f,), # Hangul Syllable Goeg ..Hangul Syllable Goeh
+ (0x0ad51, 0x0ad6b,), # Hangul Syllable Gyog ..Hangul Syllable Gyoh
+ (0x0ad6d, 0x0ad87,), # Hangul Syllable Gug ..Hangul Syllable Guh
+ (0x0ad89, 0x0ada3,), # Hangul Syllable Gweog ..Hangul Syllable Gweoh
+ (0x0ada5, 0x0adbf,), # Hangul Syllable Gweg ..Hangul Syllable Gweh
+ (0x0adc1, 0x0addb,), # Hangul Syllable Gwig ..Hangul Syllable Gwih
+ (0x0addd, 0x0adf7,), # Hangul Syllable Gyug ..Hangul Syllable Gyuh
+ (0x0adf9, 0x0ae13,), # Hangul Syllable Geug ..Hangul Syllable Geuh
+ (0x0ae15, 0x0ae2f,), # Hangul Syllable Gyig ..Hangul Syllable Gyih
+ (0x0ae31, 0x0ae4b,), # Hangul Syllable Gig ..Hangul Syllable Gih
+ (0x0ae4d, 0x0ae67,), # Hangul Syllable Ggag ..Hangul Syllable Ggah
+ (0x0ae69, 0x0ae83,), # Hangul Syllable Ggaeg ..Hangul Syllable Ggaeh
+ (0x0ae85, 0x0ae9f,), # Hangul Syllable Ggyag ..Hangul Syllable Ggyah
+ (0x0aea1, 0x0aebb,), # Hangul Syllable Ggyaeg ..Hangul Syllable Ggyaeh
+ (0x0aebd, 0x0aed7,), # Hangul Syllable Ggeog ..Hangul Syllable Ggeoh
+ (0x0aed9, 0x0aef3,), # Hangul Syllable Ggeg ..Hangul Syllable Ggeh
+ (0x0aef5, 0x0af0f,), # Hangul Syllable Ggyeog ..Hangul Syllable Ggyeoh
+ (0x0af11, 0x0af2b,), # Hangul Syllable Ggyeg ..Hangul Syllable Ggyeh
+ (0x0af2d, 0x0af47,), # Hangul Syllable Ggog ..Hangul Syllable Ggoh
+ (0x0af49, 0x0af63,), # Hangul Syllable Ggwag ..Hangul Syllable Ggwah
+ (0x0af65, 0x0af7f,), # Hangul Syllable Ggwaeg ..Hangul Syllable Ggwaeh
+ (0x0af81, 0x0af9b,), # Hangul Syllable Ggoeg ..Hangul Syllable Ggoeh
+ (0x0af9d, 0x0afb7,), # Hangul Syllable Ggyog ..Hangul Syllable Ggyoh
+ (0x0afb9, 0x0afd3,), # Hangul Syllable Ggug ..Hangul Syllable Gguh
+ (0x0afd5, 0x0afef,), # Hangul Syllable Ggweog ..Hangul Syllable Ggweoh
+ (0x0aff1, 0x0b00b,), # Hangul Syllable Ggweg ..Hangul Syllable Ggweh
+ (0x0b00d, 0x0b027,), # Hangul Syllable Ggwig ..Hangul Syllable Ggwih
+ (0x0b029, 0x0b043,), # Hangul Syllable Ggyug ..Hangul Syllable Ggyuh
+ (0x0b045, 0x0b05f,), # Hangul Syllable Ggeug ..Hangul Syllable Ggeuh
+ (0x0b061, 0x0b07b,), # Hangul Syllable Ggyig ..Hangul Syllable Ggyih
+ (0x0b07d, 0x0b097,), # Hangul Syllable Ggig ..Hangul Syllable Ggih
+ (0x0b099, 0x0b0b3,), # Hangul Syllable Nag ..Hangul Syllable Nah
+ (0x0b0b5, 0x0b0cf,), # Hangul Syllable Naeg ..Hangul Syllable Naeh
+ (0x0b0d1, 0x0b0eb,), # Hangul Syllable Nyag ..Hangul Syllable Nyah
+ (0x0b0ed, 0x0b107,), # Hangul Syllable Nyaeg ..Hangul Syllable Nyaeh
+ (0x0b109, 0x0b123,), # Hangul Syllable Neog ..Hangul Syllable Neoh
+ (0x0b125, 0x0b13f,), # Hangul Syllable Neg ..Hangul Syllable Neh
+ (0x0b141, 0x0b15b,), # Hangul Syllable Nyeog ..Hangul Syllable Nyeoh
+ (0x0b15d, 0x0b177,), # Hangul Syllable Nyeg ..Hangul Syllable Nyeh
+ (0x0b179, 0x0b193,), # Hangul Syllable Nog ..Hangul Syllable Noh
+ (0x0b195, 0x0b1af,), # Hangul Syllable Nwag ..Hangul Syllable Nwah
+ (0x0b1b1, 0x0b1cb,), # Hangul Syllable Nwaeg ..Hangul Syllable Nwaeh
+ (0x0b1cd, 0x0b1e7,), # Hangul Syllable Noeg ..Hangul Syllable Noeh
+ (0x0b1e9, 0x0b203,), # Hangul Syllable Nyog ..Hangul Syllable Nyoh
+ (0x0b205, 0x0b21f,), # Hangul Syllable Nug ..Hangul Syllable Nuh
+ (0x0b221, 0x0b23b,), # Hangul Syllable Nweog ..Hangul Syllable Nweoh
+ (0x0b23d, 0x0b257,), # Hangul Syllable Nweg ..Hangul Syllable Nweh
+ (0x0b259, 0x0b273,), # Hangul Syllable Nwig ..Hangul Syllable Nwih
+ (0x0b275, 0x0b28f,), # Hangul Syllable Nyug ..Hangul Syllable Nyuh
+ (0x0b291, 0x0b2ab,), # Hangul Syllable Neug ..Hangul Syllable Neuh
+ (0x0b2ad, 0x0b2c7,), # Hangul Syllable Nyig ..Hangul Syllable Nyih
+ (0x0b2c9, 0x0b2e3,), # Hangul Syllable Nig ..Hangul Syllable Nih
+ (0x0b2e5, 0x0b2ff,), # Hangul Syllable Dag ..Hangul Syllable Dah
+ (0x0b301, 0x0b31b,), # Hangul Syllable Daeg ..Hangul Syllable Daeh
+ (0x0b31d, 0x0b337,), # Hangul Syllable Dyag ..Hangul Syllable Dyah
+ (0x0b339, 0x0b353,), # Hangul Syllable Dyaeg ..Hangul Syllable Dyaeh
+ (0x0b355, 0x0b36f,), # Hangul Syllable Deog ..Hangul Syllable Deoh
+ (0x0b371, 0x0b38b,), # Hangul Syllable Deg ..Hangul Syllable Deh
+ (0x0b38d, 0x0b3a7,), # Hangul Syllable Dyeog ..Hangul Syllable Dyeoh
+ (0x0b3a9, 0x0b3c3,), # Hangul Syllable Dyeg ..Hangul Syllable Dyeh
+ (0x0b3c5, 0x0b3df,), # Hangul Syllable Dog ..Hangul Syllable Doh
+ (0x0b3e1, 0x0b3fb,), # Hangul Syllable Dwag ..Hangul Syllable Dwah
+ (0x0b3fd, 0x0b417,), # Hangul Syllable Dwaeg ..Hangul Syllable Dwaeh
+ (0x0b419, 0x0b433,), # Hangul Syllable Doeg ..Hangul Syllable Doeh
+ (0x0b435, 0x0b44f,), # Hangul Syllable Dyog ..Hangul Syllable Dyoh
+ (0x0b451, 0x0b46b,), # Hangul Syllable Dug ..Hangul Syllable Duh
+ (0x0b46d, 0x0b487,), # Hangul Syllable Dweog ..Hangul Syllable Dweoh
+ (0x0b489, 0x0b4a3,), # Hangul Syllable Dweg ..Hangul Syllable Dweh
+ (0x0b4a5, 0x0b4bf,), # Hangul Syllable Dwig ..Hangul Syllable Dwih
+ (0x0b4c1, 0x0b4db,), # Hangul Syllable Dyug ..Hangul Syllable Dyuh
+ (0x0b4dd, 0x0b4f7,), # Hangul Syllable Deug ..Hangul Syllable Deuh
+ (0x0b4f9, 0x0b513,), # Hangul Syllable Dyig ..Hangul Syllable Dyih
+ (0x0b515, 0x0b52f,), # Hangul Syllable Dig ..Hangul Syllable Dih
+ (0x0b531, 0x0b54b,), # Hangul Syllable Ddag ..Hangul Syllable Ddah
+ (0x0b54d, 0x0b567,), # Hangul Syllable Ddaeg ..Hangul Syllable Ddaeh
+ (0x0b569, 0x0b583,), # Hangul Syllable Ddyag ..Hangul Syllable Ddyah
+ (0x0b585, 0x0b59f,), # Hangul Syllable Ddyaeg ..Hangul Syllable Ddyaeh
+ (0x0b5a1, 0x0b5bb,), # Hangul Syllable Ddeog ..Hangul Syllable Ddeoh
+ (0x0b5bd, 0x0b5d7,), # Hangul Syllable Ddeg ..Hangul Syllable Ddeh
+ (0x0b5d9, 0x0b5f3,), # Hangul Syllable Ddyeog ..Hangul Syllable Ddyeoh
+ (0x0b5f5, 0x0b60f,), # Hangul Syllable Ddyeg ..Hangul Syllable Ddyeh
+ (0x0b611, 0x0b62b,), # Hangul Syllable Ddog ..Hangul Syllable Ddoh
+ (0x0b62d, 0x0b647,), # Hangul Syllable Ddwag ..Hangul Syllable Ddwah
+ (0x0b649, 0x0b663,), # Hangul Syllable Ddwaeg ..Hangul Syllable Ddwaeh
+ (0x0b665, 0x0b67f,), # Hangul Syllable Ddoeg ..Hangul Syllable Ddoeh
+ (0x0b681, 0x0b69b,), # Hangul Syllable Ddyog ..Hangul Syllable Ddyoh
+ (0x0b69d, 0x0b6b7,), # Hangul Syllable Ddug ..Hangul Syllable Dduh
+ (0x0b6b9, 0x0b6d3,), # Hangul Syllable Ddweog ..Hangul Syllable Ddweoh
+ (0x0b6d5, 0x0b6ef,), # Hangul Syllable Ddweg ..Hangul Syllable Ddweh
+ (0x0b6f1, 0x0b70b,), # Hangul Syllable Ddwig ..Hangul Syllable Ddwih
+ (0x0b70d, 0x0b727,), # Hangul Syllable Ddyug ..Hangul Syllable Ddyuh
+ (0x0b729, 0x0b743,), # Hangul Syllable Ddeug ..Hangul Syllable Ddeuh
+ (0x0b745, 0x0b75f,), # Hangul Syllable Ddyig ..Hangul Syllable Ddyih
+ (0x0b761, 0x0b77b,), # Hangul Syllable Ddig ..Hangul Syllable Ddih
+ (0x0b77d, 0x0b797,), # Hangul Syllable Rag ..Hangul Syllable Rah
+ (0x0b799, 0x0b7b3,), # Hangul Syllable Raeg ..Hangul Syllable Raeh
+ (0x0b7b5, 0x0b7cf,), # Hangul Syllable Ryag ..Hangul Syllable Ryah
+ (0x0b7d1, 0x0b7eb,), # Hangul Syllable Ryaeg ..Hangul Syllable Ryaeh
+ (0x0b7ed, 0x0b807,), # Hangul Syllable Reog ..Hangul Syllable Reoh
+ (0x0b809, 0x0b823,), # Hangul Syllable Reg ..Hangul Syllable Reh
+ (0x0b825, 0x0b83f,), # Hangul Syllable Ryeog ..Hangul Syllable Ryeoh
+ (0x0b841, 0x0b85b,), # Hangul Syllable Ryeg ..Hangul Syllable Ryeh
+ (0x0b85d, 0x0b877,), # Hangul Syllable Rog ..Hangul Syllable Roh
+ (0x0b879, 0x0b893,), # Hangul Syllable Rwag ..Hangul Syllable Rwah
+ (0x0b895, 0x0b8af,), # Hangul Syllable Rwaeg ..Hangul Syllable Rwaeh
+ (0x0b8b1, 0x0b8cb,), # Hangul Syllable Roeg ..Hangul Syllable Roeh
+ (0x0b8cd, 0x0b8e7,), # Hangul Syllable Ryog ..Hangul Syllable Ryoh
+ (0x0b8e9, 0x0b903,), # Hangul Syllable Rug ..Hangul Syllable Ruh
+ (0x0b905, 0x0b91f,), # Hangul Syllable Rweog ..Hangul Syllable Rweoh
+ (0x0b921, 0x0b93b,), # Hangul Syllable Rweg ..Hangul Syllable Rweh
+ (0x0b93d, 0x0b957,), # Hangul Syllable Rwig ..Hangul Syllable Rwih
+ (0x0b959, 0x0b973,), # Hangul Syllable Ryug ..Hangul Syllable Ryuh
+ (0x0b975, 0x0b98f,), # Hangul Syllable Reug ..Hangul Syllable Reuh
+ (0x0b991, 0x0b9ab,), # Hangul Syllable Ryig ..Hangul Syllable Ryih
+ (0x0b9ad, 0x0b9c7,), # Hangul Syllable Rig ..Hangul Syllable Rih
+ (0x0b9c9, 0x0b9e3,), # Hangul Syllable Mag ..Hangul Syllable Mah
+ (0x0b9e5, 0x0b9ff,), # Hangul Syllable Maeg ..Hangul Syllable Maeh
+ (0x0ba01, 0x0ba1b,), # Hangul Syllable Myag ..Hangul Syllable Myah
+ (0x0ba1d, 0x0ba37,), # Hangul Syllable Myaeg ..Hangul Syllable Myaeh
+ (0x0ba39, 0x0ba53,), # Hangul Syllable Meog ..Hangul Syllable Meoh
+ (0x0ba55, 0x0ba6f,), # Hangul Syllable Meg ..Hangul Syllable Meh
+ (0x0ba71, 0x0ba8b,), # Hangul Syllable Myeog ..Hangul Syllable Myeoh
+ (0x0ba8d, 0x0baa7,), # Hangul Syllable Myeg ..Hangul Syllable Myeh
+ (0x0baa9, 0x0bac3,), # Hangul Syllable Mog ..Hangul Syllable Moh
+ (0x0bac5, 0x0badf,), # Hangul Syllable Mwag ..Hangul Syllable Mwah
+ (0x0bae1, 0x0bafb,), # Hangul Syllable Mwaeg ..Hangul Syllable Mwaeh
+ (0x0bafd, 0x0bb17,), # Hangul Syllable Moeg ..Hangul Syllable Moeh
+ (0x0bb19, 0x0bb33,), # Hangul Syllable Myog ..Hangul Syllable Myoh
+ (0x0bb35, 0x0bb4f,), # Hangul Syllable Mug ..Hangul Syllable Muh
+ (0x0bb51, 0x0bb6b,), # Hangul Syllable Mweog ..Hangul Syllable Mweoh
+ (0x0bb6d, 0x0bb87,), # Hangul Syllable Mweg ..Hangul Syllable Mweh
+ (0x0bb89, 0x0bba3,), # Hangul Syllable Mwig ..Hangul Syllable Mwih
+ (0x0bba5, 0x0bbbf,), # Hangul Syllable Myug ..Hangul Syllable Myuh
+ (0x0bbc1, 0x0bbdb,), # Hangul Syllable Meug ..Hangul Syllable Meuh
+ (0x0bbdd, 0x0bbf7,), # Hangul Syllable Myig ..Hangul Syllable Myih
+ (0x0bbf9, 0x0bc13,), # Hangul Syllable Mig ..Hangul Syllable Mih
+ (0x0bc15, 0x0bc2f,), # Hangul Syllable Bag ..Hangul Syllable Bah
+ (0x0bc31, 0x0bc4b,), # Hangul Syllable Baeg ..Hangul Syllable Baeh
+ (0x0bc4d, 0x0bc67,), # Hangul Syllable Byag ..Hangul Syllable Byah
+ (0x0bc69, 0x0bc83,), # Hangul Syllable Byaeg ..Hangul Syllable Byaeh
+ (0x0bc85, 0x0bc9f,), # Hangul Syllable Beog ..Hangul Syllable Beoh
+ (0x0bca1, 0x0bcbb,), # Hangul Syllable Beg ..Hangul Syllable Beh
+ (0x0bcbd, 0x0bcd7,), # Hangul Syllable Byeog ..Hangul Syllable Byeoh
+ (0x0bcd9, 0x0bcf3,), # Hangul Syllable Byeg ..Hangul Syllable Byeh
+ (0x0bcf5, 0x0bd0f,), # Hangul Syllable Bog ..Hangul Syllable Boh
+ (0x0bd11, 0x0bd2b,), # Hangul Syllable Bwag ..Hangul Syllable Bwah
+ (0x0bd2d, 0x0bd47,), # Hangul Syllable Bwaeg ..Hangul Syllable Bwaeh
+ (0x0bd49, 0x0bd63,), # Hangul Syllable Boeg ..Hangul Syllable Boeh
+ (0x0bd65, 0x0bd7f,), # Hangul Syllable Byog ..Hangul Syllable Byoh
+ (0x0bd81, 0x0bd9b,), # Hangul Syllable Bug ..Hangul Syllable Buh
+ (0x0bd9d, 0x0bdb7,), # Hangul Syllable Bweog ..Hangul Syllable Bweoh
+ (0x0bdb9, 0x0bdd3,), # Hangul Syllable Bweg ..Hangul Syllable Bweh
+ (0x0bdd5, 0x0bdef,), # Hangul Syllable Bwig ..Hangul Syllable Bwih
+ (0x0bdf1, 0x0be0b,), # Hangul Syllable Byug ..Hangul Syllable Byuh
+ (0x0be0d, 0x0be27,), # Hangul Syllable Beug ..Hangul Syllable Beuh
+ (0x0be29, 0x0be43,), # Hangul Syllable Byig ..Hangul Syllable Byih
+ (0x0be45, 0x0be5f,), # Hangul Syllable Big ..Hangul Syllable Bih
+ (0x0be61, 0x0be7b,), # Hangul Syllable Bbag ..Hangul Syllable Bbah
+ (0x0be7d, 0x0be97,), # Hangul Syllable Bbaeg ..Hangul Syllable Bbaeh
+ (0x0be99, 0x0beb3,), # Hangul Syllable Bbyag ..Hangul Syllable Bbyah
+ (0x0beb5, 0x0becf,), # Hangul Syllable Bbyaeg ..Hangul Syllable Bbyaeh
+ (0x0bed1, 0x0beeb,), # Hangul Syllable Bbeog ..Hangul Syllable Bbeoh
+ (0x0beed, 0x0bf07,), # Hangul Syllable Bbeg ..Hangul Syllable Bbeh
+ (0x0bf09, 0x0bf23,), # Hangul Syllable Bbyeog ..Hangul Syllable Bbyeoh
+ (0x0bf25, 0x0bf3f,), # Hangul Syllable Bbyeg ..Hangul Syllable Bbyeh
+ (0x0bf41, 0x0bf5b,), # Hangul Syllable Bbog ..Hangul Syllable Bboh
+ (0x0bf5d, 0x0bf77,), # Hangul Syllable Bbwag ..Hangul Syllable Bbwah
+ (0x0bf79, 0x0bf93,), # Hangul Syllable Bbwaeg ..Hangul Syllable Bbwaeh
+ (0x0bf95, 0x0bfaf,), # Hangul Syllable Bboeg ..Hangul Syllable Bboeh
+ (0x0bfb1, 0x0bfcb,), # Hangul Syllable Bbyog ..Hangul Syllable Bbyoh
+ (0x0bfcd, 0x0bfe7,), # Hangul Syllable Bbug ..Hangul Syllable Bbuh
+ (0x0bfe9, 0x0c003,), # Hangul Syllable Bbweog ..Hangul Syllable Bbweoh
+ (0x0c005, 0x0c01f,), # Hangul Syllable Bbweg ..Hangul Syllable Bbweh
+ (0x0c021, 0x0c03b,), # Hangul Syllable Bbwig ..Hangul Syllable Bbwih
+ (0x0c03d, 0x0c057,), # Hangul Syllable Bbyug ..Hangul Syllable Bbyuh
+ (0x0c059, 0x0c073,), # Hangul Syllable Bbeug ..Hangul Syllable Bbeuh
+ (0x0c075, 0x0c08f,), # Hangul Syllable Bbyig ..Hangul Syllable Bbyih
+ (0x0c091, 0x0c0ab,), # Hangul Syllable Bbig ..Hangul Syllable Bbih
+ (0x0c0ad, 0x0c0c7,), # Hangul Syllable Sag ..Hangul Syllable Sah
+ (0x0c0c9, 0x0c0e3,), # Hangul Syllable Saeg ..Hangul Syllable Saeh
+ (0x0c0e5, 0x0c0ff,), # Hangul Syllable Syag ..Hangul Syllable Syah
+ (0x0c101, 0x0c11b,), # Hangul Syllable Syaeg ..Hangul Syllable Syaeh
+ (0x0c11d, 0x0c137,), # Hangul Syllable Seog ..Hangul Syllable Seoh
+ (0x0c139, 0x0c153,), # Hangul Syllable Seg ..Hangul Syllable Seh
+ (0x0c155, 0x0c16f,), # Hangul Syllable Syeog ..Hangul Syllable Syeoh
+ (0x0c171, 0x0c18b,), # Hangul Syllable Syeg ..Hangul Syllable Syeh
+ (0x0c18d, 0x0c1a7,), # Hangul Syllable Sog ..Hangul Syllable Soh
+ (0x0c1a9, 0x0c1c3,), # Hangul Syllable Swag ..Hangul Syllable Swah
+ (0x0c1c5, 0x0c1df,), # Hangul Syllable Swaeg ..Hangul Syllable Swaeh
+ (0x0c1e1, 0x0c1fb,), # Hangul Syllable Soeg ..Hangul Syllable Soeh
+ (0x0c1fd, 0x0c217,), # Hangul Syllable Syog ..Hangul Syllable Syoh
+ (0x0c219, 0x0c233,), # Hangul Syllable Sug ..Hangul Syllable Suh
+ (0x0c235, 0x0c24f,), # Hangul Syllable Sweog ..Hangul Syllable Sweoh
+ (0x0c251, 0x0c26b,), # Hangul Syllable Sweg ..Hangul Syllable Sweh
+ (0x0c26d, 0x0c287,), # Hangul Syllable Swig ..Hangul Syllable Swih
+ (0x0c289, 0x0c2a3,), # Hangul Syllable Syug ..Hangul Syllable Syuh
+ (0x0c2a5, 0x0c2bf,), # Hangul Syllable Seug ..Hangul Syllable Seuh
+ (0x0c2c1, 0x0c2db,), # Hangul Syllable Syig ..Hangul Syllable Syih
+ (0x0c2dd, 0x0c2f7,), # Hangul Syllable Sig ..Hangul Syllable Sih
+ (0x0c2f9, 0x0c313,), # Hangul Syllable Ssag ..Hangul Syllable Ssah
+ (0x0c315, 0x0c32f,), # Hangul Syllable Ssaeg ..Hangul Syllable Ssaeh
+ (0x0c331, 0x0c34b,), # Hangul Syllable Ssyag ..Hangul Syllable Ssyah
+ (0x0c34d, 0x0c367,), # Hangul Syllable Ssyaeg ..Hangul Syllable Ssyaeh
+ (0x0c369, 0x0c383,), # Hangul Syllable Sseog ..Hangul Syllable Sseoh
+ (0x0c385, 0x0c39f,), # Hangul Syllable Sseg ..Hangul Syllable Sseh
+ (0x0c3a1, 0x0c3bb,), # Hangul Syllable Ssyeog ..Hangul Syllable Ssyeoh
+ (0x0c3bd, 0x0c3d7,), # Hangul Syllable Ssyeg ..Hangul Syllable Ssyeh
+ (0x0c3d9, 0x0c3f3,), # Hangul Syllable Ssog ..Hangul Syllable Ssoh
+ (0x0c3f5, 0x0c40f,), # Hangul Syllable Sswag ..Hangul Syllable Sswah
+ (0x0c411, 0x0c42b,), # Hangul Syllable Sswaeg ..Hangul Syllable Sswaeh
+ (0x0c42d, 0x0c447,), # Hangul Syllable Ssoeg ..Hangul Syllable Ssoeh
+ (0x0c449, 0x0c463,), # Hangul Syllable Ssyog ..Hangul Syllable Ssyoh
+ (0x0c465, 0x0c47f,), # Hangul Syllable Ssug ..Hangul Syllable Ssuh
+ (0x0c481, 0x0c49b,), # Hangul Syllable Ssweog ..Hangul Syllable Ssweoh
+ (0x0c49d, 0x0c4b7,), # Hangul Syllable Ssweg ..Hangul Syllable Ssweh
+ (0x0c4b9, 0x0c4d3,), # Hangul Syllable Sswig ..Hangul Syllable Sswih
+ (0x0c4d5, 0x0c4ef,), # Hangul Syllable Ssyug ..Hangul Syllable Ssyuh
+ (0x0c4f1, 0x0c50b,), # Hangul Syllable Sseug ..Hangul Syllable Sseuh
+ (0x0c50d, 0x0c527,), # Hangul Syllable Ssyig ..Hangul Syllable Ssyih
+ (0x0c529, 0x0c543,), # Hangul Syllable Ssig ..Hangul Syllable Ssih
+ (0x0c545, 0x0c55f,), # Hangul Syllable Ag ..Hangul Syllable Ah
+ (0x0c561, 0x0c57b,), # Hangul Syllable Aeg ..Hangul Syllable Aeh
+ (0x0c57d, 0x0c597,), # Hangul Syllable Yag ..Hangul Syllable Yah
+ (0x0c599, 0x0c5b3,), # Hangul Syllable Yaeg ..Hangul Syllable Yaeh
+ (0x0c5b5, 0x0c5cf,), # Hangul Syllable Eog ..Hangul Syllable Eoh
+ (0x0c5d1, 0x0c5eb,), # Hangul Syllable Eg ..Hangul Syllable Eh
+ (0x0c5ed, 0x0c607,), # Hangul Syllable Yeog ..Hangul Syllable Yeoh
+ (0x0c609, 0x0c623,), # Hangul Syllable Yeg ..Hangul Syllable Yeh
+ (0x0c625, 0x0c63f,), # Hangul Syllable Og ..Hangul Syllable Oh
+ (0x0c641, 0x0c65b,), # Hangul Syllable Wag ..Hangul Syllable Wah
+ (0x0c65d, 0x0c677,), # Hangul Syllable Waeg ..Hangul Syllable Waeh
+ (0x0c679, 0x0c693,), # Hangul Syllable Oeg ..Hangul Syllable Oeh
+ (0x0c695, 0x0c6af,), # Hangul Syllable Yog ..Hangul Syllable Yoh
+ (0x0c6b1, 0x0c6cb,), # Hangul Syllable Ug ..Hangul Syllable Uh
+ (0x0c6cd, 0x0c6e7,), # Hangul Syllable Weog ..Hangul Syllable Weoh
+ (0x0c6e9, 0x0c703,), # Hangul Syllable Weg ..Hangul Syllable Weh
+ (0x0c705, 0x0c71f,), # Hangul Syllable Wig ..Hangul Syllable Wih
+ (0x0c721, 0x0c73b,), # Hangul Syllable Yug ..Hangul Syllable Yuh
+ (0x0c73d, 0x0c757,), # Hangul Syllable Eug ..Hangul Syllable Euh
+ (0x0c759, 0x0c773,), # Hangul Syllable Yig ..Hangul Syllable Yih
+ (0x0c775, 0x0c78f,), # Hangul Syllable Ig ..Hangul Syllable Ih
+ (0x0c791, 0x0c7ab,), # Hangul Syllable Jag ..Hangul Syllable Jah
+ (0x0c7ad, 0x0c7c7,), # Hangul Syllable Jaeg ..Hangul Syllable Jaeh
+ (0x0c7c9, 0x0c7e3,), # Hangul Syllable Jyag ..Hangul Syllable Jyah
+ (0x0c7e5, 0x0c7ff,), # Hangul Syllable Jyaeg ..Hangul Syllable Jyaeh
+ (0x0c801, 0x0c81b,), # Hangul Syllable Jeog ..Hangul Syllable Jeoh
+ (0x0c81d, 0x0c837,), # Hangul Syllable Jeg ..Hangul Syllable Jeh
+ (0x0c839, 0x0c853,), # Hangul Syllable Jyeog ..Hangul Syllable Jyeoh
+ (0x0c855, 0x0c86f,), # Hangul Syllable Jyeg ..Hangul Syllable Jyeh
+ (0x0c871, 0x0c88b,), # Hangul Syllable Jog ..Hangul Syllable Joh
+ (0x0c88d, 0x0c8a7,), # Hangul Syllable Jwag ..Hangul Syllable Jwah
+ (0x0c8a9, 0x0c8c3,), # Hangul Syllable Jwaeg ..Hangul Syllable Jwaeh
+ (0x0c8c5, 0x0c8df,), # Hangul Syllable Joeg ..Hangul Syllable Joeh
+ (0x0c8e1, 0x0c8fb,), # Hangul Syllable Jyog ..Hangul Syllable Jyoh
+ (0x0c8fd, 0x0c917,), # Hangul Syllable Jug ..Hangul Syllable Juh
+ (0x0c919, 0x0c933,), # Hangul Syllable Jweog ..Hangul Syllable Jweoh
+ (0x0c935, 0x0c94f,), # Hangul Syllable Jweg ..Hangul Syllable Jweh
+ (0x0c951, 0x0c96b,), # Hangul Syllable Jwig ..Hangul Syllable Jwih
+ (0x0c96d, 0x0c987,), # Hangul Syllable Jyug ..Hangul Syllable Jyuh
+ (0x0c989, 0x0c9a3,), # Hangul Syllable Jeug ..Hangul Syllable Jeuh
+ (0x0c9a5, 0x0c9bf,), # Hangul Syllable Jyig ..Hangul Syllable Jyih
+ (0x0c9c1, 0x0c9db,), # Hangul Syllable Jig ..Hangul Syllable Jih
+ (0x0c9dd, 0x0c9f7,), # Hangul Syllable Jjag ..Hangul Syllable Jjah
+ (0x0c9f9, 0x0ca13,), # Hangul Syllable Jjaeg ..Hangul Syllable Jjaeh
+ (0x0ca15, 0x0ca2f,), # Hangul Syllable Jjyag ..Hangul Syllable Jjyah
+ (0x0ca31, 0x0ca4b,), # Hangul Syllable Jjyaeg ..Hangul Syllable Jjyaeh
+ (0x0ca4d, 0x0ca67,), # Hangul Syllable Jjeog ..Hangul Syllable Jjeoh
+ (0x0ca69, 0x0ca83,), # Hangul Syllable Jjeg ..Hangul Syllable Jjeh
+ (0x0ca85, 0x0ca9f,), # Hangul Syllable Jjyeog ..Hangul Syllable Jjyeoh
+ (0x0caa1, 0x0cabb,), # Hangul Syllable Jjyeg ..Hangul Syllable Jjyeh
+ (0x0cabd, 0x0cad7,), # Hangul Syllable Jjog ..Hangul Syllable Jjoh
+ (0x0cad9, 0x0caf3,), # Hangul Syllable Jjwag ..Hangul Syllable Jjwah
+ (0x0caf5, 0x0cb0f,), # Hangul Syllable Jjwaeg ..Hangul Syllable Jjwaeh
+ (0x0cb11, 0x0cb2b,), # Hangul Syllable Jjoeg ..Hangul Syllable Jjoeh
+ (0x0cb2d, 0x0cb47,), # Hangul Syllable Jjyog ..Hangul Syllable Jjyoh
+ (0x0cb49, 0x0cb63,), # Hangul Syllable Jjug ..Hangul Syllable Jjuh
+ (0x0cb65, 0x0cb7f,), # Hangul Syllable Jjweog ..Hangul Syllable Jjweoh
+ (0x0cb81, 0x0cb9b,), # Hangul Syllable Jjweg ..Hangul Syllable Jjweh
+ (0x0cb9d, 0x0cbb7,), # Hangul Syllable Jjwig ..Hangul Syllable Jjwih
+ (0x0cbb9, 0x0cbd3,), # Hangul Syllable Jjyug ..Hangul Syllable Jjyuh
+ (0x0cbd5, 0x0cbef,), # Hangul Syllable Jjeug ..Hangul Syllable Jjeuh
+ (0x0cbf1, 0x0cc0b,), # Hangul Syllable Jjyig ..Hangul Syllable Jjyih
+ (0x0cc0d, 0x0cc27,), # Hangul Syllable Jjig ..Hangul Syllable Jjih
+ (0x0cc29, 0x0cc43,), # Hangul Syllable Cag ..Hangul Syllable Cah
+ (0x0cc45, 0x0cc5f,), # Hangul Syllable Caeg ..Hangul Syllable Caeh
+ (0x0cc61, 0x0cc7b,), # Hangul Syllable Cyag ..Hangul Syllable Cyah
+ (0x0cc7d, 0x0cc97,), # Hangul Syllable Cyaeg ..Hangul Syllable Cyaeh
+ (0x0cc99, 0x0ccb3,), # Hangul Syllable Ceog ..Hangul Syllable Ceoh
+ (0x0ccb5, 0x0cccf,), # Hangul Syllable Ceg ..Hangul Syllable Ceh
+ (0x0ccd1, 0x0cceb,), # Hangul Syllable Cyeog ..Hangul Syllable Cyeoh
+ (0x0cced, 0x0cd07,), # Hangul Syllable Cyeg ..Hangul Syllable Cyeh
+ (0x0cd09, 0x0cd23,), # Hangul Syllable Cog ..Hangul Syllable Coh
+ (0x0cd25, 0x0cd3f,), # Hangul Syllable Cwag ..Hangul Syllable Cwah
+ (0x0cd41, 0x0cd5b,), # Hangul Syllable Cwaeg ..Hangul Syllable Cwaeh
+ (0x0cd5d, 0x0cd77,), # Hangul Syllable Coeg ..Hangul Syllable Coeh
+ (0x0cd79, 0x0cd93,), # Hangul Syllable Cyog ..Hangul Syllable Cyoh
+ (0x0cd95, 0x0cdaf,), # Hangul Syllable Cug ..Hangul Syllable Cuh
+ (0x0cdb1, 0x0cdcb,), # Hangul Syllable Cweog ..Hangul Syllable Cweoh
+ (0x0cdcd, 0x0cde7,), # Hangul Syllable Cweg ..Hangul Syllable Cweh
+ (0x0cde9, 0x0ce03,), # Hangul Syllable Cwig ..Hangul Syllable Cwih
+ (0x0ce05, 0x0ce1f,), # Hangul Syllable Cyug ..Hangul Syllable Cyuh
+ (0x0ce21, 0x0ce3b,), # Hangul Syllable Ceug ..Hangul Syllable Ceuh
+ (0x0ce3d, 0x0ce57,), # Hangul Syllable Cyig ..Hangul Syllable Cyih
+ (0x0ce59, 0x0ce73,), # Hangul Syllable Cig ..Hangul Syllable Cih
+ (0x0ce75, 0x0ce8f,), # Hangul Syllable Kag ..Hangul Syllable Kah
+ (0x0ce91, 0x0ceab,), # Hangul Syllable Kaeg ..Hangul Syllable Kaeh
+ (0x0cead, 0x0cec7,), # Hangul Syllable Kyag ..Hangul Syllable Kyah
+ (0x0cec9, 0x0cee3,), # Hangul Syllable Kyaeg ..Hangul Syllable Kyaeh
+ (0x0cee5, 0x0ceff,), # Hangul Syllable Keog ..Hangul Syllable Keoh
+ (0x0cf01, 0x0cf1b,), # Hangul Syllable Keg ..Hangul Syllable Keh
+ (0x0cf1d, 0x0cf37,), # Hangul Syllable Kyeog ..Hangul Syllable Kyeoh
+ (0x0cf39, 0x0cf53,), # Hangul Syllable Kyeg ..Hangul Syllable Kyeh
+ (0x0cf55, 0x0cf6f,), # Hangul Syllable Kog ..Hangul Syllable Koh
+ (0x0cf71, 0x0cf8b,), # Hangul Syllable Kwag ..Hangul Syllable Kwah
+ (0x0cf8d, 0x0cfa7,), # Hangul Syllable Kwaeg ..Hangul Syllable Kwaeh
+ (0x0cfa9, 0x0cfc3,), # Hangul Syllable Koeg ..Hangul Syllable Koeh
+ (0x0cfc5, 0x0cfdf,), # Hangul Syllable Kyog ..Hangul Syllable Kyoh
+ (0x0cfe1, 0x0cffb,), # Hangul Syllable Kug ..Hangul Syllable Kuh
+ (0x0cffd, 0x0d017,), # Hangul Syllable Kweog ..Hangul Syllable Kweoh
+ (0x0d019, 0x0d033,), # Hangul Syllable Kweg ..Hangul Syllable Kweh
+ (0x0d035, 0x0d04f,), # Hangul Syllable Kwig ..Hangul Syllable Kwih
+ (0x0d051, 0x0d06b,), # Hangul Syllable Kyug ..Hangul Syllable Kyuh
+ (0x0d06d, 0x0d087,), # Hangul Syllable Keug ..Hangul Syllable Keuh
+ (0x0d089, 0x0d0a3,), # Hangul Syllable Kyig ..Hangul Syllable Kyih
+ (0x0d0a5, 0x0d0bf,), # Hangul Syllable Kig ..Hangul Syllable Kih
+ (0x0d0c1, 0x0d0db,), # Hangul Syllable Tag ..Hangul Syllable Tah
+ (0x0d0dd, 0x0d0f7,), # Hangul Syllable Taeg ..Hangul Syllable Taeh
+ (0x0d0f9, 0x0d113,), # Hangul Syllable Tyag ..Hangul Syllable Tyah
+ (0x0d115, 0x0d12f,), # Hangul Syllable Tyaeg ..Hangul Syllable Tyaeh
+ (0x0d131, 0x0d14b,), # Hangul Syllable Teog ..Hangul Syllable Teoh
+ (0x0d14d, 0x0d167,), # Hangul Syllable Teg ..Hangul Syllable Teh
+ (0x0d169, 0x0d183,), # Hangul Syllable Tyeog ..Hangul Syllable Tyeoh
+ (0x0d185, 0x0d19f,), # Hangul Syllable Tyeg ..Hangul Syllable Tyeh
+ (0x0d1a1, 0x0d1bb,), # Hangul Syllable Tog ..Hangul Syllable Toh
+ (0x0d1bd, 0x0d1d7,), # Hangul Syllable Twag ..Hangul Syllable Twah
+ (0x0d1d9, 0x0d1f3,), # Hangul Syllable Twaeg ..Hangul Syllable Twaeh
+ (0x0d1f5, 0x0d20f,), # Hangul Syllable Toeg ..Hangul Syllable Toeh
+ (0x0d211, 0x0d22b,), # Hangul Syllable Tyog ..Hangul Syllable Tyoh
+ (0x0d22d, 0x0d247,), # Hangul Syllable Tug ..Hangul Syllable Tuh
+ (0x0d249, 0x0d263,), # Hangul Syllable Tweog ..Hangul Syllable Tweoh
+ (0x0d265, 0x0d27f,), # Hangul Syllable Tweg ..Hangul Syllable Tweh
+ (0x0d281, 0x0d29b,), # Hangul Syllable Twig ..Hangul Syllable Twih
+ (0x0d29d, 0x0d2b7,), # Hangul Syllable Tyug ..Hangul Syllable Tyuh
+ (0x0d2b9, 0x0d2d3,), # Hangul Syllable Teug ..Hangul Syllable Teuh
+ (0x0d2d5, 0x0d2ef,), # Hangul Syllable Tyig ..Hangul Syllable Tyih
+ (0x0d2f1, 0x0d30b,), # Hangul Syllable Tig ..Hangul Syllable Tih
+ (0x0d30d, 0x0d327,), # Hangul Syllable Pag ..Hangul Syllable Pah
+ (0x0d329, 0x0d343,), # Hangul Syllable Paeg ..Hangul Syllable Paeh
+ (0x0d345, 0x0d35f,), # Hangul Syllable Pyag ..Hangul Syllable Pyah
+ (0x0d361, 0x0d37b,), # Hangul Syllable Pyaeg ..Hangul Syllable Pyaeh
+ (0x0d37d, 0x0d397,), # Hangul Syllable Peog ..Hangul Syllable Peoh
+ (0x0d399, 0x0d3b3,), # Hangul Syllable Peg ..Hangul Syllable Peh
+ (0x0d3b5, 0x0d3cf,), # Hangul Syllable Pyeog ..Hangul Syllable Pyeoh
+ (0x0d3d1, 0x0d3eb,), # Hangul Syllable Pyeg ..Hangul Syllable Pyeh
+ (0x0d3ed, 0x0d407,), # Hangul Syllable Pog ..Hangul Syllable Poh
+ (0x0d409, 0x0d423,), # Hangul Syllable Pwag ..Hangul Syllable Pwah
+ (0x0d425, 0x0d43f,), # Hangul Syllable Pwaeg ..Hangul Syllable Pwaeh
+ (0x0d441, 0x0d45b,), # Hangul Syllable Poeg ..Hangul Syllable Poeh
+ (0x0d45d, 0x0d477,), # Hangul Syllable Pyog ..Hangul Syllable Pyoh
+ (0x0d479, 0x0d493,), # Hangul Syllable Pug ..Hangul Syllable Puh
+ (0x0d495, 0x0d4af,), # Hangul Syllable Pweog ..Hangul Syllable Pweoh
+ (0x0d4b1, 0x0d4cb,), # Hangul Syllable Pweg ..Hangul Syllable Pweh
+ (0x0d4cd, 0x0d4e7,), # Hangul Syllable Pwig ..Hangul Syllable Pwih
+ (0x0d4e9, 0x0d503,), # Hangul Syllable Pyug ..Hangul Syllable Pyuh
+ (0x0d505, 0x0d51f,), # Hangul Syllable Peug ..Hangul Syllable Peuh
+ (0x0d521, 0x0d53b,), # Hangul Syllable Pyig ..Hangul Syllable Pyih
+ (0x0d53d, 0x0d557,), # Hangul Syllable Pig ..Hangul Syllable Pih
+ (0x0d559, 0x0d573,), # Hangul Syllable Hag ..Hangul Syllable Hah
+ (0x0d575, 0x0d58f,), # Hangul Syllable Haeg ..Hangul Syllable Haeh
+ (0x0d591, 0x0d5ab,), # Hangul Syllable Hyag ..Hangul Syllable Hyah
+ (0x0d5ad, 0x0d5c7,), # Hangul Syllable Hyaeg ..Hangul Syllable Hyaeh
+ (0x0d5c9, 0x0d5e3,), # Hangul Syllable Heog ..Hangul Syllable Heoh
+ (0x0d5e5, 0x0d5ff,), # Hangul Syllable Heg ..Hangul Syllable Heh
+ (0x0d601, 0x0d61b,), # Hangul Syllable Hyeog ..Hangul Syllable Hyeoh
+ (0x0d61d, 0x0d637,), # Hangul Syllable Hyeg ..Hangul Syllable Hyeh
+ (0x0d639, 0x0d653,), # Hangul Syllable Hog ..Hangul Syllable Hoh
+ (0x0d655, 0x0d66f,), # Hangul Syllable Hwag ..Hangul Syllable Hwah
+ (0x0d671, 0x0d68b,), # Hangul Syllable Hwaeg ..Hangul Syllable Hwaeh
+ (0x0d68d, 0x0d6a7,), # Hangul Syllable Hoeg ..Hangul Syllable Hoeh
+ (0x0d6a9, 0x0d6c3,), # Hangul Syllable Hyog ..Hangul Syllable Hyoh
+ (0x0d6c5, 0x0d6df,), # Hangul Syllable Hug ..Hangul Syllable Huh
+ (0x0d6e1, 0x0d6fb,), # Hangul Syllable Hweog ..Hangul Syllable Hweoh
+ (0x0d6fd, 0x0d717,), # Hangul Syllable Hweg ..Hangul Syllable Hweh
+ (0x0d719, 0x0d733,), # Hangul Syllable Hwig ..Hangul Syllable Hwih
+ (0x0d735, 0x0d74f,), # Hangul Syllable Hyug ..Hangul Syllable Hyuh
+ (0x0d751, 0x0d76b,), # Hangul Syllable Heug ..Hangul Syllable Heuh
+ (0x0d76d, 0x0d787,), # Hangul Syllable Hyig ..Hangul Syllable Hyih
+ (0x0d789, 0x0d7a3,), # Hangul Syllable Hig ..Hangul Syllable Hih
+)
+
+EXTENDED_PICTOGRAPHIC = (
+ # Source: emoji-data.txt
+ # Date: 2025-07-25, 17:54:31 GMT
+ #
+ (0x000a9, 0x000a9,), # Copyright Sign
+ (0x000ae, 0x000ae,), # Registered Sign
+ (0x0203c, 0x0203c,), # Double Exclamation Mark
+ (0x02049, 0x02049,), # Exclamation Question Mark
+ (0x02122, 0x02122,), # Trade Mark Sign
+ (0x02139, 0x02139,), # Information Source
+ (0x02194, 0x02199,), # Left Right Arrow ..South West Arrow
+ (0x021a9, 0x021aa,), # Leftwards Arrow With Hoo..Rightwards Arrow With Ho
+ (0x0231a, 0x0231b,), # Watch ..Hourglass
+ (0x02328, 0x02328,), # Keyboard
+ (0x023cf, 0x023cf,), # Eject Symbol
+ (0x023e9, 0x023f3,), # Black Right-pointing Dou..Hourglass With Flowing S
+ (0x023f8, 0x023fa,), # Double Vertical Bar ..Black Circle For Record
+ (0x024c2, 0x024c2,), # Circled Latin Capital Letter M
+ (0x025aa, 0x025ab,), # Black Small Square ..White Small Square
+ (0x025b6, 0x025b6,), # Black Right-pointing Triangle
+ (0x025c0, 0x025c0,), # Black Left-pointing Triangle
+ (0x025fb, 0x025fe,), # White Medium Square ..Black Medium Small Squar
+ (0x02600, 0x02604,), # Black Sun With Rays ..Comet
+ (0x0260e, 0x0260e,), # Black Telephone
+ (0x02611, 0x02611,), # Ballot Box With Check
+ (0x02614, 0x02615,), # Umbrella With Rain Drops..Hot Beverage
+ (0x02618, 0x02618,), # Shamrock
+ (0x0261d, 0x0261d,), # White Up Pointing Index
+ (0x02620, 0x02620,), # Skull And Crossbones
+ (0x02622, 0x02623,), # Radioactive Sign ..Biohazard Sign
+ (0x02626, 0x02626,), # Orthodox Cross
+ (0x0262a, 0x0262a,), # Star And Crescent
+ (0x0262e, 0x0262f,), # Peace Symbol ..Yin Yang
+ (0x02638, 0x0263a,), # Wheel Of Dharma ..White Smiling Face
+ (0x02640, 0x02640,), # Female Sign
+ (0x02642, 0x02642,), # Male Sign
+ (0x02648, 0x02653,), # Aries ..Pisces
+ (0x0265f, 0x02660,), # Black Chess Pawn ..Black Spade Suit
+ (0x02663, 0x02663,), # Black Club Suit
+ (0x02665, 0x02666,), # Black Heart Suit ..Black Diamond Suit
+ (0x02668, 0x02668,), # Hot Springs
+ (0x0267b, 0x0267b,), # Black Universal Recycling Symbol
+ (0x0267e, 0x0267f,), # Permanent Paper Sign ..Wheelchair Symbol
+ (0x02692, 0x02697,), # Hammer And Pick ..Alembic
+ (0x02699, 0x02699,), # Gear
+ (0x0269b, 0x0269c,), # Atom Symbol ..Fleur-de-lis
+ (0x026a0, 0x026a1,), # Warning Sign ..High Voltage Sign
+ (0x026a7, 0x026a7,), # Male With Stroke And Male And Female Sign
+ (0x026aa, 0x026ab,), # Medium White Circle ..Medium Black Circle
+ (0x026b0, 0x026b1,), # Coffin ..Funeral Urn
+ (0x026bd, 0x026be,), # Soccer Ball ..Baseball
+ (0x026c4, 0x026c5,), # Snowman Without Snow ..Sun Behind Cloud
+ (0x026c8, 0x026c8,), # Thunder Cloud And Rain
+ (0x026ce, 0x026cf,), # Ophiuchus ..Pick
+ (0x026d1, 0x026d1,), # Helmet With White Cross
+ (0x026d3, 0x026d4,), # Chains ..No Entry
+ (0x026e9, 0x026ea,), # Shinto Shrine ..Church
+ (0x026f0, 0x026f5,), # Mountain ..Sailboat
+ (0x026f7, 0x026fa,), # Skier ..Tent
+ (0x026fd, 0x026fd,), # Fuel Pump
+ (0x02702, 0x02702,), # Black Scissors
+ (0x02705, 0x02705,), # White Heavy Check Mark
+ (0x02708, 0x0270d,), # Airplane ..Writing Hand
+ (0x0270f, 0x0270f,), # Pencil
+ (0x02712, 0x02712,), # Black Nib
+ (0x02714, 0x02714,), # Heavy Check Mark
+ (0x02716, 0x02716,), # Heavy Multiplication X
+ (0x0271d, 0x0271d,), # Latin Cross
+ (0x02721, 0x02721,), # Star Of David
+ (0x02728, 0x02728,), # Sparkles
+ (0x02733, 0x02734,), # Eight Spoked Asterisk ..Eight Pointed Black Star
+ (0x02744, 0x02744,), # Snowflake
+ (0x02747, 0x02747,), # Sparkle
+ (0x0274c, 0x0274c,), # Cross Mark
+ (0x0274e, 0x0274e,), # Negative Squared Cross Mark
+ (0x02753, 0x02755,), # Black Question Mark Orna..White Exclamation Mark O
+ (0x02757, 0x02757,), # Heavy Exclamation Mark Symbol
+ (0x02763, 0x02764,), # Heavy Heart Exclamation ..Heavy Black Heart
+ (0x02795, 0x02797,), # Heavy Plus Sign ..Heavy Division Sign
+ (0x027a1, 0x027a1,), # Black Rightwards Arrow
+ (0x027b0, 0x027b0,), # Curly Loop
+ (0x027bf, 0x027bf,), # Double Curly Loop
+ (0x02934, 0x02935,), # Arrow Pointing Rightward..Arrow Pointing Rightward
+ (0x02b05, 0x02b07,), # Leftwards Black Arrow ..Downwards Black Arrow
+ (0x02b1b, 0x02b1c,), # Black Large Square ..White Large Square
+ (0x02b50, 0x02b50,), # White Medium Star
+ (0x02b55, 0x02b55,), # Heavy Large Circle
+ (0x03030, 0x03030,), # Wavy Dash
+ (0x0303d, 0x0303d,), # Part Alternation Mark
+ (0x03297, 0x03297,), # Circled Ideograph Congratulation
+ (0x03299, 0x03299,), # Circled Ideograph Secret
+ (0x1f004, 0x1f004,), # Mahjong Tile Red Dragon
+ (0x1f02c, 0x1f02f,), # (nil)
+ (0x1f094, 0x1f09f,), # (nil)
+ (0x1f0af, 0x1f0b0,), # (nil)
+ (0x1f0c0, 0x1f0c0,), # (nil)
+ (0x1f0cf, 0x1f0d0,), # Playing Card Black Joker..(nil)
+ (0x1f0f6, 0x1f0ff,), # (nil)
+ (0x1f170, 0x1f171,), # Negative Squared Latin C..Negative Squared Latin C
+ (0x1f17e, 0x1f17f,), # Negative Squared Latin C..Negative Squared Latin C
+ (0x1f18e, 0x1f18e,), # Negative Squared Ab
+ (0x1f191, 0x1f19a,), # Squared Cl ..Squared Vs
+ (0x1f1ae, 0x1f1e5,), # (nil)
+ (0x1f201, 0x1f20f,), # Squared Katakana Koko ..(nil)
+ (0x1f21a, 0x1f21a,), # Squared Cjk Unified Ideograph-7121
+ (0x1f22f, 0x1f22f,), # Squared Cjk Unified Ideograph-6307
+ (0x1f232, 0x1f23a,), # Squared Cjk Unified Ideo..Squared Cjk Unified Ideo
+ (0x1f23c, 0x1f23f,), # (nil)
+ (0x1f249, 0x1f25f,), # (nil)
+ (0x1f266, 0x1f321,), # (nil) ..Thermometer
+ (0x1f324, 0x1f393,), # White Sun With Small Clo..Graduation Cap
+ (0x1f396, 0x1f397,), # Military Medal ..Reminder Ribbon
+ (0x1f399, 0x1f39b,), # Studio Microphone ..Control Knobs
+ (0x1f39e, 0x1f3f0,), # Film Frames ..European Castle
+ (0x1f3f3, 0x1f3f5,), # Waving White Flag ..Rosette
+ (0x1f3f7, 0x1f3fa,), # Label ..Amphora
+ (0x1f400, 0x1f4fd,), # Rat ..Film Projector
+ (0x1f4ff, 0x1f53d,), # Prayer Beads ..Down-pointing Small Red
+ (0x1f549, 0x1f54e,), # Om Symbol ..Menorah With Nine Branch
+ (0x1f550, 0x1f567,), # Clock Face One Oclock ..Clock Face Twelve-thirty
+ (0x1f56f, 0x1f570,), # Candle ..Mantelpiece Clock
+ (0x1f573, 0x1f57a,), # Hole ..Man Dancing
+ (0x1f587, 0x1f587,), # Linked Paperclips
+ (0x1f58a, 0x1f58d,), # Lower Left Ballpoint Pen..Lower Left Crayon
+ (0x1f590, 0x1f590,), # Raised Hand With Fingers Splayed
+ (0x1f595, 0x1f596,), # Reversed Hand With Middl..Raised Hand With Part Be
+ (0x1f5a4, 0x1f5a5,), # Black Heart ..Desktop Computer
+ (0x1f5a8, 0x1f5a8,), # Printer
+ (0x1f5b1, 0x1f5b2,), # Three Button Mouse ..Trackball
+ (0x1f5bc, 0x1f5bc,), # Frame With Picture
+ (0x1f5c2, 0x1f5c4,), # Card Index Dividers ..File Cabinet
+ (0x1f5d1, 0x1f5d3,), # Wastebasket ..Spiral Calendar Pad
+ (0x1f5dc, 0x1f5de,), # Compression ..Rolled-up Newspaper
+ (0x1f5e1, 0x1f5e1,), # Dagger Knife
+ (0x1f5e3, 0x1f5e3,), # Speaking Head In Silhouette
+ (0x1f5e8, 0x1f5e8,), # Left Speech Bubble
+ (0x1f5ef, 0x1f5ef,), # Right Anger Bubble
+ (0x1f5f3, 0x1f5f3,), # Ballot Box With Ballot
+ (0x1f5fa, 0x1f64f,), # World Map ..Person With Folded Hands
+ (0x1f680, 0x1f6c5,), # Rocket ..Left Luggage
+ (0x1f6cb, 0x1f6d2,), # Couch And Lamp ..Shopping Trolley
+ (0x1f6d5, 0x1f6e5,), # Hindu Temple ..Motor Boat
+ (0x1f6e9, 0x1f6e9,), # Small Airplane
+ (0x1f6eb, 0x1f6f0,), # Airplane Departure ..Satellite
+ (0x1f6f3, 0x1f6ff,), # Passenger Ship ..(nil)
+ (0x1f7da, 0x1f7ff,), # (nil)
+ (0x1f80c, 0x1f80f,), # (nil)
+ (0x1f848, 0x1f84f,), # (nil)
+ (0x1f85a, 0x1f85f,), # (nil)
+ (0x1f888, 0x1f88f,), # (nil)
+ (0x1f8ae, 0x1f8af,), # (nil)
+ (0x1f8bc, 0x1f8bf,), # (nil)
+ (0x1f8c2, 0x1f8cf,), # (nil)
+ (0x1f8d9, 0x1f8ff,), # (nil)
+ (0x1f90c, 0x1f93a,), # Pinched Fingers ..Fencer
+ (0x1f93c, 0x1f945,), # Wrestlers ..Goal Net
+ (0x1f947, 0x1f9ff,), # First Place Medal ..Nazar Amulet
+ (0x1fa58, 0x1fa5f,), # (nil)
+ (0x1fa6e, 0x1faff,), # (nil)
+ (0x1fc00, 0x1fffd,), # (nil)
+)
+
+INCB_LINKER = (
+ # Source: DerivedCoreProperties
+ # Date: see file
+ #
+ (0x0094d, 0x0094d,), # Devanagari Sign Virama
+ (0x009cd, 0x009cd,), # Bengali Sign Virama
+ (0x00acd, 0x00acd,), # Gujarati Sign Virama
+ (0x00b4d, 0x00b4d,), # Oriya Sign Virama
+ (0x00c4d, 0x00c4d,), # Telugu Sign Virama
+ (0x00d4d, 0x00d4d,), # Malayalam Sign Virama
+ (0x01039, 0x01039,), # Myanmar Sign Virama
+ (0x017d2, 0x017d2,), # Khmer Sign Coeng
+ (0x01a60, 0x01a60,), # Tai Tham Sign Sakot
+ (0x01b44, 0x01b44,), # Balinese Adeg Adeg
+ (0x01bab, 0x01bab,), # Sundanese Sign Virama
+ (0x0a9c0, 0x0a9c0,), # Javanese Pangkon
+ (0x0aaf6, 0x0aaf6,), # Meetei Mayek Virama
+ (0x10a3f, 0x10a3f,), # Kharoshthi Virama
+ (0x11133, 0x11133,), # Chakma Virama
+ (0x113d0, 0x113d0,), # (nil)
+ (0x1193e, 0x1193e,), # Dives Akuru Virama
+ (0x11a47, 0x11a47,), # Zanabazar Square Subjoiner
+ (0x11a99, 0x11a99,), # Soyombo Subjoiner
+ (0x11f42, 0x11f42,), # Kawi Conjoiner
+)
+
+INCB_CONSONANT = (
+ # Source: DerivedCoreProperties
+ # Date: see file
+ #
+ (0x00915, 0x00939,), # Devanagari Letter Ka ..Devanagari Letter Ha
+ (0x00958, 0x0095f,), # Devanagari Letter Qa ..Devanagari Letter Yya
+ (0x00978, 0x0097f,), # Devanagari Letter Marwar..Devanagari Letter Bba
+ (0x00995, 0x009a8,), # Bengali Letter Ka ..Bengali Letter Na
+ (0x009aa, 0x009b0,), # Bengali Letter Pa ..Bengali Letter Ra
+ (0x009b2, 0x009b2,), # Bengali Letter La
+ (0x009b6, 0x009b9,), # Bengali Letter Sha ..Bengali Letter Ha
+ (0x009dc, 0x009dd,), # Bengali Letter Rra ..Bengali Letter Rha
+ (0x009df, 0x009df,), # Bengali Letter Yya
+ (0x009f0, 0x009f1,), # Bengali Letter Ra With M..Bengali Letter Ra With L
+ (0x00a95, 0x00aa8,), # Gujarati Letter Ka ..Gujarati Letter Na
+ (0x00aaa, 0x00ab0,), # Gujarati Letter Pa ..Gujarati Letter Ra
+ (0x00ab2, 0x00ab3,), # Gujarati Letter La ..Gujarati Letter Lla
+ (0x00ab5, 0x00ab9,), # Gujarati Letter Va ..Gujarati Letter Ha
+ (0x00af9, 0x00af9,), # Gujarati Letter Zha
+ (0x00b15, 0x00b28,), # Oriya Letter Ka ..Oriya Letter Na
+ (0x00b2a, 0x00b30,), # Oriya Letter Pa ..Oriya Letter Ra
+ (0x00b32, 0x00b33,), # Oriya Letter La ..Oriya Letter Lla
+ (0x00b35, 0x00b39,), # Oriya Letter Va ..Oriya Letter Ha
+ (0x00b5c, 0x00b5d,), # Oriya Letter Rra ..Oriya Letter Rha
+ (0x00b5f, 0x00b5f,), # Oriya Letter Yya
+ (0x00b71, 0x00b71,), # Oriya Letter Wa
+ (0x00c15, 0x00c28,), # Telugu Letter Ka ..Telugu Letter Na
+ (0x00c2a, 0x00c39,), # Telugu Letter Pa ..Telugu Letter Ha
+ (0x00c58, 0x00c5a,), # Telugu Letter Tsa ..Telugu Letter Rrra
+ (0x00d15, 0x00d3a,), # Malayalam Letter Ka ..Malayalam Letter Ttta
+ (0x01000, 0x0102a,), # Myanmar Letter Ka ..Myanmar Letter Au
+ (0x0103f, 0x0103f,), # Myanmar Letter Great Sa
+ (0x01050, 0x01055,), # Myanmar Letter Sha ..Myanmar Letter Vocalic L
+ (0x0105a, 0x0105d,), # Myanmar Letter Mon Nga ..Myanmar Letter Mon Bbe
+ (0x01061, 0x01061,), # Myanmar Letter Sgaw Karen Sha
+ (0x01065, 0x01066,), # Myanmar Letter Western P..Myanmar Letter Western P
+ (0x0106e, 0x01070,), # Myanmar Letter Eastern P..Myanmar Letter Eastern P
+ (0x01075, 0x01081,), # Myanmar Letter Shan Ka ..Myanmar Letter Shan Ha
+ (0x0108e, 0x0108e,), # Myanmar Letter Rumai Palaung Fa
+ (0x01780, 0x017b3,), # Khmer Letter Ka ..Khmer Independent Vowel
+ (0x01a20, 0x01a54,), # Tai Tham Letter High Ka ..Tai Tham Letter Great Sa
+ (0x01b0b, 0x01b0c,), # Balinese Letter Ra Repa ..Balinese Letter Ra Repa
+ (0x01b13, 0x01b33,), # Balinese Letter Ka ..Balinese Letter Ha
+ (0x01b45, 0x01b4c,), # Balinese Letter Kaf Sasa..Balinese Letter Archaic
+ (0x01b83, 0x01ba0,), # Sundanese Letter A ..Sundanese Letter Ha
+ (0x01bae, 0x01baf,), # Sundanese Letter Kha ..Sundanese Letter Sya
+ (0x01bbb, 0x01bbd,), # Sundanese Letter Reu ..Sundanese Letter Bha
+ (0x0a989, 0x0a98b,), # Javanese Letter Pa Cerek..Javanese Letter Nga Lele
+ (0x0a98f, 0x0a9b2,), # Javanese Letter Ka ..Javanese Letter Ha
+ (0x0a9e0, 0x0a9e4,), # Myanmar Letter Shan Gha ..Myanmar Letter Shan Bha
+ (0x0a9e7, 0x0a9ef,), # Myanmar Letter Tai Laing..Myanmar Letter Tai Laing
+ (0x0a9fa, 0x0a9fe,), # Myanmar Letter Tai Laing..Myanmar Letter Tai Laing
+ (0x0aa60, 0x0aa6f,), # Myanmar Letter Khamti Ga..Myanmar Letter Khamti Fa
+ (0x0aa71, 0x0aa73,), # Myanmar Letter Khamti Xa..Myanmar Letter Khamti Ra
+ (0x0aa7a, 0x0aa7a,), # Myanmar Letter Aiton Ra
+ (0x0aa7e, 0x0aa7f,), # Myanmar Letter Shwe Pala..Myanmar Letter Shwe Pala
+ (0x0aae0, 0x0aaea,), # Meetei Mayek Letter E ..Meetei Mayek Letter Ssa
+ (0x0abc0, 0x0abda,), # Meetei Mayek Letter Kok ..Meetei Mayek Letter Bham
+ (0x10a00, 0x10a00,), # Kharoshthi Letter A
+ (0x10a10, 0x10a13,), # Kharoshthi Letter Ka ..Kharoshthi Letter Gha
+ (0x10a15, 0x10a17,), # Kharoshthi Letter Ca ..Kharoshthi Letter Ja
+ (0x10a19, 0x10a35,), # Kharoshthi Letter Nya ..Kharoshthi Letter Vha
+ (0x11103, 0x11126,), # Chakma Letter Aa ..Chakma Letter Haa
+ (0x11144, 0x11144,), # Chakma Letter Lhaa
+ (0x11147, 0x11147,), # Chakma Letter Vaa
+ (0x11380, 0x11389,), # (nil)
+ (0x1138b, 0x1138b,), # (nil)
+ (0x1138e, 0x1138e,), # (nil)
+ (0x11390, 0x113b5,), # (nil)
+ (0x11900, 0x11906,), # Dives Akuru Letter A ..Dives Akuru Letter E
+ (0x11909, 0x11909,), # Dives Akuru Letter O
+ (0x1190c, 0x11913,), # Dives Akuru Letter Ka ..Dives Akuru Letter Ja
+ (0x11915, 0x11916,), # Dives Akuru Letter Nya ..Dives Akuru Letter Tta
+ (0x11918, 0x1192f,), # Dives Akuru Letter Dda ..Dives Akuru Letter Za
+ (0x11a00, 0x11a00,), # Zanabazar Square Letter A
+ (0x11a0b, 0x11a32,), # Zanabazar Square Letter ..Zanabazar Square Letter
+ (0x11a50, 0x11a50,), # Soyombo Letter A
+ (0x11a5c, 0x11a83,), # Soyombo Letter Ka ..Soyombo Letter Kssa
+ (0x11f04, 0x11f10,), # Kawi Letter A ..Kawi Letter O
+ (0x11f12, 0x11f33,), # Kawi Letter Ka ..Kawi Letter Jnya
+)
+
+INCB_EXTEND = (
+ # Source: DerivedCoreProperties
+ # Date: see file
+ #
+ (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le
+ (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli
+ (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg
+ (0x005bf, 0x005bf,), # Hebrew Point Rafe
+ (0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot
+ (0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot
+ (0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan
+ (0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra
+ (0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below
+ (0x00670, 0x00670,), # Arabic Letter Superscript Alef
+ (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen
+ (0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda
+ (0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon
+ (0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem
+ (0x00711, 0x00711,), # Syriac Letter Superscript Alaph
+ (0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh
+ (0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun
+ (0x007eb, 0x007f3,), # Nko Combining Short High..Nko Combining Double Dot
+ (0x007fd, 0x007fd,), # Nko Dantayalan
+ (0x00816, 0x00819,), # Samaritan Mark In ..Samaritan Mark Dagesh
+ (0x0081b, 0x00823,), # Samaritan Mark Epentheti..Samaritan Vowel Sign A
+ (0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U
+ (0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa
+ (0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark
+ (0x00897, 0x0089f,), # (nil) ..Arabic Half Madda Over M
+ (0x008ca, 0x008e1,), # Arabic Small High Farsi ..Arabic Small High Sign S
+ (0x008e3, 0x00902,), # Arabic Turned Damma Belo..Devanagari Sign Anusvara
+ (0x0093a, 0x0093a,), # Devanagari Vowel Sign Oe
+ (0x0093c, 0x0093c,), # Devanagari Sign Nukta
+ (0x00941, 0x00948,), # Devanagari Vowel Sign U ..Devanagari Vowel Sign Ai
+ (0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu
+ (0x00962, 0x00963,), # Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo
+ (0x00981, 0x00981,), # Bengali Sign Candrabindu
+ (0x009bc, 0x009bc,), # Bengali Sign Nukta
+ (0x009be, 0x009be,), # Bengali Vowel Sign Aa
+ (0x009c1, 0x009c4,), # Bengali Vowel Sign U ..Bengali Vowel Sign Vocal
+ (0x009d7, 0x009d7,), # Bengali Au Length Mark
+ (0x009e2, 0x009e3,), # Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal
+ (0x009fe, 0x009fe,), # Bengali Sandhi Mark
+ (0x00a01, 0x00a02,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Bindi
+ (0x00a3c, 0x00a3c,), # Gurmukhi Sign Nukta
+ (0x00a41, 0x00a42,), # Gurmukhi Vowel Sign U ..Gurmukhi Vowel Sign Uu
+ (0x00a47, 0x00a48,), # Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai
+ (0x00a4b, 0x00a4d,), # Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama
+ (0x00a51, 0x00a51,), # Gurmukhi Sign Udaat
+ (0x00a70, 0x00a71,), # Gurmukhi Tippi ..Gurmukhi Addak
+ (0x00a75, 0x00a75,), # Gurmukhi Sign Yakash
+ (0x00a81, 0x00a82,), # Gujarati Sign Candrabind..Gujarati Sign Anusvara
+ (0x00abc, 0x00abc,), # Gujarati Sign Nukta
+ (0x00ac1, 0x00ac5,), # Gujarati Vowel Sign U ..Gujarati Vowel Sign Cand
+ (0x00ac7, 0x00ac8,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Ai
+ (0x00ae2, 0x00ae3,), # Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca
+ (0x00afa, 0x00aff,), # Gujarati Sign Sukun ..Gujarati Sign Two-circle
+ (0x00b01, 0x00b01,), # Oriya Sign Candrabindu
+ (0x00b3c, 0x00b3c,), # Oriya Sign Nukta
+ (0x00b3e, 0x00b3f,), # Oriya Vowel Sign Aa ..Oriya Vowel Sign I
+ (0x00b41, 0x00b44,), # Oriya Vowel Sign U ..Oriya Vowel Sign Vocalic
+ (0x00b55, 0x00b57,), # Oriya Sign Overline ..Oriya Au Length Mark
+ (0x00b62, 0x00b63,), # Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic
+ (0x00b82, 0x00b82,), # Tamil Sign Anusvara
+ (0x00bbe, 0x00bbe,), # Tamil Vowel Sign Aa
+ (0x00bc0, 0x00bc0,), # Tamil Vowel Sign Ii
+ (0x00bcd, 0x00bcd,), # Tamil Sign Virama
+ (0x00bd7, 0x00bd7,), # Tamil Au Length Mark
+ (0x00c00, 0x00c00,), # Telugu Sign Combining Candrabindu Above
+ (0x00c04, 0x00c04,), # Telugu Sign Combining Anusvara Above
+ (0x00c3c, 0x00c3c,), # Telugu Sign Nukta
+ (0x00c3e, 0x00c40,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Ii
+ (0x00c46, 0x00c48,), # Telugu Vowel Sign E ..Telugu Vowel Sign Ai
+ (0x00c4a, 0x00c4c,), # Telugu Vowel Sign O ..Telugu Vowel Sign Au
+ (0x00c55, 0x00c56,), # Telugu Length Mark ..Telugu Ai Length Mark
+ (0x00c62, 0x00c63,), # Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali
+ (0x00c81, 0x00c81,), # Kannada Sign Candrabindu
+ (0x00cbc, 0x00cbc,), # Kannada Sign Nukta
+ (0x00cbf, 0x00cc0,), # Kannada Vowel Sign I ..Kannada Vowel Sign Ii
+ (0x00cc2, 0x00cc2,), # Kannada Vowel Sign Uu
+ (0x00cc6, 0x00cc8,), # Kannada Vowel Sign E ..Kannada Vowel Sign Ai
+ (0x00cca, 0x00ccd,), # Kannada Vowel Sign O ..Kannada Sign Virama
+ (0x00cd5, 0x00cd6,), # Kannada Length Mark ..Kannada Ai Length Mark
+ (0x00ce2, 0x00ce3,), # Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal
+ (0x00d00, 0x00d01,), # Malayalam Sign Combining..Malayalam Sign Candrabin
+ (0x00d3b, 0x00d3c,), # Malayalam Sign Vertical ..Malayalam Sign Circular
+ (0x00d3e, 0x00d3e,), # Malayalam Vowel Sign Aa
+ (0x00d41, 0x00d44,), # Malayalam Vowel Sign U ..Malayalam Vowel Sign Voc
+ (0x00d57, 0x00d57,), # Malayalam Au Length Mark
+ (0x00d62, 0x00d63,), # Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc
+ (0x00d81, 0x00d81,), # Sinhala Sign Candrabindu
+ (0x00dca, 0x00dca,), # Sinhala Sign Al-lakuna
+ (0x00dcf, 0x00dcf,), # Sinhala Vowel Sign Aela-pilla
+ (0x00dd2, 0x00dd4,), # Sinhala Vowel Sign Ketti..Sinhala Vowel Sign Ketti
+ (0x00dd6, 0x00dd6,), # Sinhala Vowel Sign Diga Paa-pilla
+ (0x00ddf, 0x00ddf,), # Sinhala Vowel Sign Gayanukitta
+ (0x00e31, 0x00e31,), # Thai Character Mai Han-akat
+ (0x00e34, 0x00e3a,), # Thai Character Sara I ..Thai Character Phinthu
+ (0x00e47, 0x00e4e,), # Thai Character Maitaikhu..Thai Character Yamakkan
+ (0x00eb1, 0x00eb1,), # Lao Vowel Sign Mai Kan
+ (0x00eb4, 0x00ebc,), # Lao Vowel Sign I ..Lao Semivowel Sign Lo
+ (0x00ec8, 0x00ece,), # Lao Tone Mai Ek ..Lao Yamakkan
+ (0x00f18, 0x00f19,), # Tibetan Astrological Sig..Tibetan Astrological Sig
+ (0x00f35, 0x00f35,), # Tibetan Mark Ngas Bzung Nyi Zla
+ (0x00f37, 0x00f37,), # Tibetan Mark Ngas Bzung Sgor Rtags
+ (0x00f39, 0x00f39,), # Tibetan Mark Tsa -phru
+ (0x00f71, 0x00f7e,), # Tibetan Vowel Sign Aa ..Tibetan Sign Rjes Su Nga
+ (0x00f80, 0x00f84,), # Tibetan Vowel Sign Rever..Tibetan Mark Halanta
+ (0x00f86, 0x00f87,), # Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags
+ (0x00f8d, 0x00f97,), # Tibetan Subjoined Sign L..Tibetan Subjoined Letter
+ (0x00f99, 0x00fbc,), # Tibetan Subjoined Letter..Tibetan Subjoined Letter
+ (0x00fc6, 0x00fc6,), # Tibetan Symbol Padma Gdan
+ (0x0102d, 0x01030,), # Myanmar Vowel Sign I ..Myanmar Vowel Sign Uu
+ (0x01032, 0x01037,), # Myanmar Vowel Sign Ai ..Myanmar Sign Dot Below
+ (0x0103a, 0x0103a,), # Myanmar Sign Asat
+ (0x0103d, 0x0103e,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M
+ (0x01058, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal
+ (0x0105e, 0x01060,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M
+ (0x01071, 0x01074,), # Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah
+ (0x01082, 0x01082,), # Myanmar Consonant Sign Shan Medial Wa
+ (0x01085, 0x01086,), # Myanmar Vowel Sign Shan ..Myanmar Vowel Sign Shan
+ (0x0108d, 0x0108d,), # Myanmar Sign Shan Council Emphatic Tone
+ (0x0109d, 0x0109d,), # Myanmar Vowel Sign Aiton Ai
+ (0x0135d, 0x0135f,), # Ethiopic Combining Gemin..Ethiopic Combining Gemin
+ (0x01712, 0x01715,), # Tagalog Vowel Sign I ..Tagalog Sign Pamudpod
+ (0x01732, 0x01734,), # Hanunoo Vowel Sign I ..Hanunoo Sign Pamudpod
+ (0x01752, 0x01753,), # Buhid Vowel Sign I ..Buhid Vowel Sign U
+ (0x01772, 0x01773,), # Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U
+ (0x017b4, 0x017b5,), # Khmer Vowel Inherent Aq ..Khmer Vowel Inherent Aa
+ (0x017b7, 0x017bd,), # Khmer Vowel Sign I ..Khmer Vowel Sign Ua
+ (0x017c6, 0x017c6,), # Khmer Sign Nikahit
+ (0x017c9, 0x017d1,), # Khmer Sign Muusikatoan ..Khmer Sign Viriam
+ (0x017d3, 0x017d3,), # Khmer Sign Bathamasat
+ (0x017dd, 0x017dd,), # Khmer Sign Atthacan
+ (0x0180b, 0x0180d,), # Mongolian Free Variation..Mongolian Free Variation
+ (0x0180f, 0x0180f,), # Mongolian Free Variation Selector Four
+ (0x01885, 0x01886,), # Mongolian Letter Ali Gal..Mongolian Letter Ali Gal
+ (0x018a9, 0x018a9,), # Mongolian Letter Ali Gali Dagalga
+ (0x01920, 0x01922,), # Limbu Vowel Sign A ..Limbu Vowel Sign U
+ (0x01927, 0x01928,), # Limbu Vowel Sign E ..Limbu Vowel Sign O
+ (0x01932, 0x01932,), # Limbu Small Letter Anusvara
+ (0x01939, 0x0193b,), # Limbu Sign Mukphreng ..Limbu Sign Sa-i
+ (0x01a17, 0x01a18,), # Buginese Vowel Sign I ..Buginese Vowel Sign U
+ (0x01a1b, 0x01a1b,), # Buginese Vowel Sign Ae
+ (0x01a56, 0x01a56,), # Tai Tham Consonant Sign Medial La
+ (0x01a58, 0x01a5e,), # Tai Tham Sign Mai Kang L..Tai Tham Consonant Sign
+ (0x01a62, 0x01a62,), # Tai Tham Vowel Sign Mai Sat
+ (0x01a65, 0x01a6c,), # Tai Tham Vowel Sign I ..Tai Tham Vowel Sign Oa B
+ (0x01a73, 0x01a7c,), # Tai Tham Vowel Sign Oa A..Tai Tham Sign Khuen-lue
+ (0x01a7f, 0x01a7f,), # Tai Tham Combining Cryptogrammic Dot
+ (0x01ab0, 0x01add,), # Combining Doubled Circum..(nil)
+ (0x01ae0, 0x01aeb,), # (nil)
+ (0x01b00, 0x01b03,), # Balinese Sign Ulu Ricem ..Balinese Sign Surang
+ (0x01b34, 0x01b3d,), # Balinese Sign Rerekan ..Balinese Vowel Sign La L
+ (0x01b42, 0x01b43,), # Balinese Vowel Sign Pepe..Balinese Vowel Sign Pepe
+ (0x01b6b, 0x01b73,), # Balinese Musical Symbol ..Balinese Musical Symbol
+ (0x01b80, 0x01b81,), # Sundanese Sign Panyecek ..Sundanese Sign Panglayar
+ (0x01ba2, 0x01ba5,), # Sundanese Consonant Sign..Sundanese Vowel Sign Pan
+ (0x01ba8, 0x01baa,), # Sundanese Vowel Sign Pam..Sundanese Sign Pamaaeh
+ (0x01bac, 0x01bad,), # Sundanese Consonant Sign..Sundanese Consonant Sign
+ (0x01be6, 0x01be6,), # Batak Sign Tompi
+ (0x01be8, 0x01be9,), # Batak Vowel Sign Pakpak ..Batak Vowel Sign Ee
+ (0x01bed, 0x01bed,), # Batak Vowel Sign Karo O
+ (0x01bef, 0x01bf3,), # Batak Vowel Sign U For S..Batak Panongonan
+ (0x01c2c, 0x01c33,), # Lepcha Vowel Sign E ..Lepcha Consonant Sign T
+ (0x01c36, 0x01c37,), # Lepcha Sign Ran ..Lepcha Sign Nukta
+ (0x01cd0, 0x01cd2,), # Vedic Tone Karshana ..Vedic Tone Prenkha
+ (0x01cd4, 0x01ce0,), # Vedic Sign Yajurvedic Mi..Vedic Tone Rigvedic Kash
+ (0x01ce2, 0x01ce8,), # Vedic Sign Visarga Svari..Vedic Sign Visarga Anuda
+ (0x01ced, 0x01ced,), # Vedic Sign Tiryak
+ (0x01cf4, 0x01cf4,), # Vedic Tone Candra Above
+ (0x01cf8, 0x01cf9,), # Vedic Tone Ring Above ..Vedic Tone Double Ring A
+ (0x01dc0, 0x01dff,), # Combining Dotted Grave A..Combining Right Arrowhea
+ (0x0200d, 0x0200d,), # Zero Width Joiner
+ (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above
+ (0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu
+ (0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner
+ (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette
+ (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M
+ (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag
+ (0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous
+ (0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer
+ (0x0a69e, 0x0a69f,), # Combining Cyrillic Lette..Combining Cyrillic Lette
+ (0x0a6f0, 0x0a6f1,), # Bamum Combining Mark Koq..Bamum Combining Mark Tuk
+ (0x0a802, 0x0a802,), # Syloti Nagri Sign Dvisvara
+ (0x0a806, 0x0a806,), # Syloti Nagri Sign Hasanta
+ (0x0a80b, 0x0a80b,), # Syloti Nagri Sign Anusvara
+ (0x0a825, 0x0a826,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign
+ (0x0a82c, 0x0a82c,), # Syloti Nagri Sign Alternate Hasanta
+ (0x0a8c4, 0x0a8c5,), # Saurashtra Sign Virama ..Saurashtra Sign Candrabi
+ (0x0a8e0, 0x0a8f1,), # Combining Devanagari Dig..Combining Devanagari Sig
+ (0x0a8ff, 0x0a8ff,), # Devanagari Vowel Sign Ay
+ (0x0a926, 0x0a92d,), # Kayah Li Vowel Ue ..Kayah Li Tone Calya Plop
+ (0x0a947, 0x0a951,), # Rejang Vowel Sign I ..Rejang Consonant Sign R
+ (0x0a953, 0x0a953,), # Rejang Virama
+ (0x0a980, 0x0a982,), # Javanese Sign Panyangga ..Javanese Sign Layar
+ (0x0a9b3, 0x0a9b3,), # Javanese Sign Cecak Telu
+ (0x0a9b6, 0x0a9b9,), # Javanese Vowel Sign Wulu..Javanese Vowel Sign Suku
+ (0x0a9bc, 0x0a9bd,), # Javanese Vowel Sign Pepe..Javanese Consonant Sign
+ (0x0a9e5, 0x0a9e5,), # Myanmar Sign Shan Saw
+ (0x0aa29, 0x0aa2e,), # Cham Vowel Sign Aa ..Cham Vowel Sign Oe
+ (0x0aa31, 0x0aa32,), # Cham Vowel Sign Au ..Cham Vowel Sign Ue
+ (0x0aa35, 0x0aa36,), # Cham Consonant Sign La ..Cham Consonant Sign Wa
+ (0x0aa43, 0x0aa43,), # Cham Consonant Sign Final Ng
+ (0x0aa4c, 0x0aa4c,), # Cham Consonant Sign Final M
+ (0x0aa7c, 0x0aa7c,), # Myanmar Sign Tai Laing Tone-2
+ (0x0aab0, 0x0aab0,), # Tai Viet Mai Kang
+ (0x0aab2, 0x0aab4,), # Tai Viet Vowel I ..Tai Viet Vowel U
+ (0x0aab7, 0x0aab8,), # Tai Viet Mai Khit ..Tai Viet Vowel Ia
+ (0x0aabe, 0x0aabf,), # Tai Viet Vowel Am ..Tai Viet Tone Mai Ek
+ (0x0aac1, 0x0aac1,), # Tai Viet Tone Mai Tho
+ (0x0aaec, 0x0aaed,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign
+ (0x0abe5, 0x0abe5,), # Meetei Mayek Vowel Sign Anap
+ (0x0abe8, 0x0abe8,), # Meetei Mayek Vowel Sign Unap
+ (0x0abed, 0x0abed,), # Meetei Mayek Apun Iyek
+ (0x0fb1e, 0x0fb1e,), # Hebrew Point Judeo-spanish Varika
+ (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16
+ (0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo
+ (0x0ff9e, 0x0ff9f,), # Halfwidth Katakana Voice..Halfwidth Katakana Semi-
+ (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke
+ (0x102e0, 0x102e0,), # Coptic Epact Thousands Mark
+ (0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let
+ (0x10a01, 0x10a03,), # Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo
+ (0x10a05, 0x10a06,), # Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O
+ (0x10a0c, 0x10a0f,), # Kharoshthi Vowel Length ..Kharoshthi Sign Visarga
+ (0x10a38, 0x10a3a,), # Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo
+ (0x10ae5, 0x10ae6,), # Manichaean Abbreviation ..Manichaean Abbreviation
+ (0x10d24, 0x10d27,), # Hanifi Rohingya Sign Har..Hanifi Rohingya Sign Tas
+ (0x10d69, 0x10d6d,), # (nil)
+ (0x10eab, 0x10eac,), # Yezidi Combining Hamza M..Yezidi Combining Madda M
+ (0x10efa, 0x10eff,), # (nil) ..Arabic Small Low Word Ma
+ (0x10f46, 0x10f50,), # Sogdian Combining Dot Be..Sogdian Combining Stroke
+ (0x10f82, 0x10f85,), # Old Uyghur Combining Dot..Old Uyghur Combining Two
+ (0x11001, 0x11001,), # Brahmi Sign Anusvara
+ (0x11038, 0x11046,), # Brahmi Vowel Sign Aa ..Brahmi Virama
+ (0x11070, 0x11070,), # Brahmi Sign Old Tamil Virama
+ (0x11073, 0x11074,), # Brahmi Vowel Sign Old Ta..Brahmi Vowel Sign Old Ta
+ (0x1107f, 0x11081,), # Brahmi Number Joiner ..Kaithi Sign Anusvara
+ (0x110b3, 0x110b6,), # Kaithi Vowel Sign U ..Kaithi Vowel Sign Ai
+ (0x110b9, 0x110ba,), # Kaithi Sign Virama ..Kaithi Sign Nukta
+ (0x110c2, 0x110c2,), # Kaithi Vowel Sign Vocalic R
+ (0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga
+ (0x11127, 0x1112b,), # Chakma Vowel Sign A ..Chakma Vowel Sign Uu
+ (0x1112d, 0x11132,), # Chakma Vowel Sign Ai ..Chakma Au Mark
+ (0x11134, 0x11134,), # Chakma Maayyaa
+ (0x11173, 0x11173,), # Mahajani Sign Nukta
+ (0x11180, 0x11181,), # Sharada Sign Candrabindu..Sharada Sign Anusvara
+ (0x111b6, 0x111be,), # Sharada Vowel Sign U ..Sharada Vowel Sign O
+ (0x111c0, 0x111c0,), # Sharada Sign Virama
+ (0x111c9, 0x111cc,), # Sharada Sandhi Mark ..Sharada Extra Short Vowe
+ (0x111cf, 0x111cf,), # Sharada Sign Inverted Candrabindu
+ (0x1122f, 0x11231,), # Khojki Vowel Sign U ..Khojki Vowel Sign Ai
+ (0x11234, 0x11237,), # Khojki Sign Anusvara ..Khojki Sign Shadda
+ (0x1123e, 0x1123e,), # Khojki Sign Sukun
+ (0x11241, 0x11241,), # Khojki Vowel Sign Vocalic R
+ (0x112df, 0x112df,), # Khudawadi Sign Anusvara
+ (0x112e3, 0x112ea,), # Khudawadi Vowel Sign U ..Khudawadi Sign Virama
+ (0x11300, 0x11301,), # Grantha Sign Combining A..Grantha Sign Candrabindu
+ (0x1133b, 0x1133c,), # Combining Bindu Below ..Grantha Sign Nukta
+ (0x1133e, 0x1133e,), # Grantha Vowel Sign Aa
+ (0x11340, 0x11340,), # Grantha Vowel Sign Ii
+ (0x1134d, 0x1134d,), # Grantha Sign Virama
+ (0x11357, 0x11357,), # Grantha Au Length Mark
+ (0x11366, 0x1136c,), # Combining Grantha Digit ..Combining Grantha Digit
+ (0x11370, 0x11374,), # Combining Grantha Letter..Combining Grantha Letter
+ (0x113b8, 0x113b8,), # (nil)
+ (0x113bb, 0x113c0,), # (nil)
+ (0x113c2, 0x113c2,), # (nil)
+ (0x113c5, 0x113c5,), # (nil)
+ (0x113c7, 0x113c9,), # (nil)
+ (0x113ce, 0x113cf,), # (nil)
+ (0x113d2, 0x113d2,), # (nil)
+ (0x113e1, 0x113e2,), # (nil)
+ (0x11438, 0x1143f,), # Newa Vowel Sign U ..Newa Vowel Sign Ai
+ (0x11442, 0x11444,), # Newa Sign Virama ..Newa Sign Anusvara
+ (0x11446, 0x11446,), # Newa Sign Nukta
+ (0x1145e, 0x1145e,), # Newa Sandhi Mark
+ (0x114b0, 0x114b0,), # Tirhuta Vowel Sign Aa
+ (0x114b3, 0x114b8,), # Tirhuta Vowel Sign U ..Tirhuta Vowel Sign Vocal
+ (0x114ba, 0x114ba,), # Tirhuta Vowel Sign Short E
+ (0x114bd, 0x114bd,), # Tirhuta Vowel Sign Short O
+ (0x114bf, 0x114c0,), # Tirhuta Sign Candrabindu..Tirhuta Sign Anusvara
+ (0x114c2, 0x114c3,), # Tirhuta Sign Virama ..Tirhuta Sign Nukta
+ (0x115af, 0x115af,), # Siddham Vowel Sign Aa
+ (0x115b2, 0x115b5,), # Siddham Vowel Sign U ..Siddham Vowel Sign Vocal
+ (0x115bc, 0x115bd,), # Siddham Sign Candrabindu..Siddham Sign Anusvara
+ (0x115bf, 0x115c0,), # Siddham Sign Virama ..Siddham Sign Nukta
+ (0x115dc, 0x115dd,), # Siddham Vowel Sign Alter..Siddham Vowel Sign Alter
+ (0x11633, 0x1163a,), # Modi Vowel Sign U ..Modi Vowel Sign Ai
+ (0x1163d, 0x1163d,), # Modi Sign Anusvara
+ (0x1163f, 0x11640,), # Modi Sign Virama ..Modi Sign Ardhacandra
+ (0x116ab, 0x116ab,), # Takri Sign Anusvara
+ (0x116ad, 0x116ad,), # Takri Vowel Sign Aa
+ (0x116b0, 0x116b7,), # Takri Vowel Sign U ..Takri Sign Nukta
+ (0x1171d, 0x1171d,), # Ahom Consonant Sign Medial La
+ (0x1171f, 0x1171f,), # Ahom Consonant Sign Medial Ligating Ra
+ (0x11722, 0x11725,), # Ahom Vowel Sign I ..Ahom Vowel Sign Uu
+ (0x11727, 0x1172b,), # Ahom Vowel Sign Aw ..Ahom Sign Killer
+ (0x1182f, 0x11837,), # Dogra Vowel Sign U ..Dogra Sign Anusvara
+ (0x11839, 0x1183a,), # Dogra Sign Virama ..Dogra Sign Nukta
+ (0x11930, 0x11930,), # Dives Akuru Vowel Sign Aa
+ (0x1193b, 0x1193d,), # Dives Akuru Sign Anusvar..Dives Akuru Sign Halanta
+ (0x11943, 0x11943,), # Dives Akuru Sign Nukta
+ (0x119d4, 0x119d7,), # Nandinagari Vowel Sign U..Nandinagari Vowel Sign V
+ (0x119da, 0x119db,), # Nandinagari Vowel Sign E..Nandinagari Vowel Sign A
+ (0x119e0, 0x119e0,), # Nandinagari Sign Virama
+ (0x11a01, 0x11a0a,), # Zanabazar Square Vowel S..Zanabazar Square Vowel L
+ (0x11a33, 0x11a38,), # Zanabazar Square Final C..Zanabazar Square Sign An
+ (0x11a3b, 0x11a3e,), # Zanabazar Square Cluster..Zanabazar Square Cluster
+ (0x11a51, 0x11a56,), # Soyombo Vowel Sign I ..Soyombo Vowel Sign Oe
+ (0x11a59, 0x11a5b,), # Soyombo Vowel Sign Vocal..Soyombo Vowel Length Mar
+ (0x11a8a, 0x11a96,), # Soyombo Final Consonant ..Soyombo Sign Anusvara
+ (0x11a98, 0x11a98,), # Soyombo Gemination Mark
+ (0x11b60, 0x11b60,), # (nil)
+ (0x11b62, 0x11b64,), # (nil)
+ (0x11b66, 0x11b66,), # (nil)
+ (0x11c30, 0x11c36,), # Bhaiksuki Vowel Sign I ..Bhaiksuki Vowel Sign Voc
+ (0x11c38, 0x11c3d,), # Bhaiksuki Vowel Sign E ..Bhaiksuki Sign Anusvara
+ (0x11c3f, 0x11c3f,), # Bhaiksuki Sign Virama
+ (0x11c92, 0x11ca7,), # Marchen Subjoined Letter..Marchen Subjoined Letter
+ (0x11caa, 0x11cb0,), # Marchen Subjoined Letter..Marchen Vowel Sign Aa
+ (0x11cb2, 0x11cb3,), # Marchen Vowel Sign U ..Marchen Vowel Sign E
+ (0x11cb5, 0x11cb6,), # Marchen Sign Anusvara ..Marchen Sign Candrabindu
+ (0x11d31, 0x11d36,), # Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign
+ (0x11d3a, 0x11d3a,), # Masaram Gondi Vowel Sign E
+ (0x11d3c, 0x11d3d,), # Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign
+ (0x11d3f, 0x11d45,), # Masaram Gondi Vowel Sign..Masaram Gondi Virama
+ (0x11d47, 0x11d47,), # Masaram Gondi Ra-kara
+ (0x11d90, 0x11d91,), # Gunjala Gondi Vowel Sign..Gunjala Gondi Vowel Sign
+ (0x11d95, 0x11d95,), # Gunjala Gondi Sign Anusvara
+ (0x11d97, 0x11d97,), # Gunjala Gondi Virama
+ (0x11ef3, 0x11ef4,), # Makasar Vowel Sign I ..Makasar Vowel Sign U
+ (0x11f00, 0x11f01,), # Kawi Sign Candrabindu ..Kawi Sign Anusvara
+ (0x11f36, 0x11f3a,), # Kawi Vowel Sign I ..Kawi Vowel Sign Vocalic
+ (0x11f40, 0x11f41,), # Kawi Vowel Sign Eu ..Kawi Sign Killer
+ (0x11f5a, 0x11f5a,), # (nil)
+ (0x13440, 0x13440,), # Egyptian Hieroglyph Mirror Horizontally
+ (0x13447, 0x13455,), # Egyptian Hieroglyph Modi..Egyptian Hieroglyph Modi
+ (0x1611e, 0x16129,), # (nil)
+ (0x1612d, 0x1612f,), # (nil)
+ (0x16af0, 0x16af4,), # Bassa Vah Combining High..Bassa Vah Combining High
+ (0x16b30, 0x16b36,), # Pahawh Hmong Mark Cim Tu..Pahawh Hmong Mark Cim Ta
+ (0x16f4f, 0x16f4f,), # Miao Sign Consonant Modifier Bar
+ (0x16f8f, 0x16f92,), # Miao Tone Right ..Miao Tone Below
+ (0x16fe4, 0x16fe4,), # Khitan Small Script Filler
+ (0x16ff0, 0x16ff1,), # Vietnamese Alternate Rea..Vietnamese Alternate Rea
+ (0x1bc9d, 0x1bc9e,), # Duployan Thick Letter Se..Duployan Double Mark
+ (0x1cf00, 0x1cf2d,), # Znamenny Combining Mark ..Znamenny Combining Mark
+ (0x1cf30, 0x1cf46,), # Znamenny Combining Tonal..Znamenny Priznak Modifie
+ (0x1d165, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining
+ (0x1d16d, 0x1d172,), # Musical Symbol Combining..Musical Symbol Combining
+ (0x1d17b, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining
+ (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining
+ (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining
+ (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical
+ (0x1da00, 0x1da36,), # Signwriting Head Rim ..Signwriting Air Sucking
+ (0x1da3b, 0x1da6c,), # Signwriting Mouth Closed..Signwriting Excitement
+ (0x1da75, 0x1da75,), # Signwriting Upper Body Tilting From Hip Joints
+ (0x1da84, 0x1da84,), # Signwriting Location Head Neck
+ (0x1da9b, 0x1da9f,), # Signwriting Fill Modifie..Signwriting Fill Modifie
+ (0x1daa1, 0x1daaf,), # Signwriting Rotation Mod..Signwriting Rotation Mod
+ (0x1e000, 0x1e006,), # Combining Glagolitic Let..Combining Glagolitic Let
+ (0x1e008, 0x1e018,), # Combining Glagolitic Let..Combining Glagolitic Let
+ (0x1e01b, 0x1e021,), # Combining Glagolitic Let..Combining Glagolitic Let
+ (0x1e023, 0x1e024,), # Combining Glagolitic Let..Combining Glagolitic Let
+ (0x1e026, 0x1e02a,), # Combining Glagolitic Let..Combining Glagolitic Let
+ (0x1e08f, 0x1e08f,), # Combining Cyrillic Small Letter Byelorussian-ukr
+ (0x1e130, 0x1e136,), # Nyiakeng Puachue Hmong T..Nyiakeng Puachue Hmong T
+ (0x1e2ae, 0x1e2ae,), # Toto Sign Rising Tone
+ (0x1e2ec, 0x1e2ef,), # Wancho Tone Tup ..Wancho Tone Koini
+ (0x1e4ec, 0x1e4ef,), # Nag Mundari Sign Muhor ..Nag Mundari Sign Sutuh
+ (0x1e5ee, 0x1e5ef,), # (nil)
+ (0x1e6e3, 0x1e6e3,), # (nil)
+ (0x1e6e6, 0x1e6e6,), # (nil)
+ (0x1e6ee, 0x1e6ef,), # (nil)
+ (0x1e6f5, 0x1e6f5,), # (nil)
+ (0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining
+ (0x1e944, 0x1e94a,), # Adlam Alif Lengthener ..Adlam Nukta
+ (0x1f3fb, 0x1f3ff,), # Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri
+ (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag
+ (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256
+)
diff --git a/contrib/python/wcwidth/py3/wcwidth/table_vs15.py b/contrib/python/wcwidth/py3/wcwidth/table_vs15.py
deleted file mode 100644
index a5ede6f961e..00000000000
--- a/contrib/python/wcwidth/py3/wcwidth/table_vs15.py
+++ /dev/null
@@ -1,103 +0,0 @@
-"""
-Exports VS15_WIDE_TO_NARROW table keyed by supporting unicode version level.
-
-This code generated by wcwidth/bin/update-tables.py on 2024-02-14 19:59:22 UTC.
-"""
-VS15_WIDE_TO_NARROW = {
- '9.0.0': (
- # Source: 9.0.0
- # Date: 2023-02-01, 02:22:54 GMT
- #
- (0x0231a, 0x0231b,), # Watch ..Hourglass
- (0x023e9, 0x023ec,), # Black Right-pointing Dou..Black Down-pointing Doub
- (0x023f0, 0x023f0,), # Alarm Clock
- (0x023f3, 0x023f3,), # Hourglass With Flowing Sand
- (0x025fd, 0x025fe,), # White Medium Small Squar..Black Medium Small Squar
- (0x02614, 0x02615,), # Umbrella With Rain Drops..Hot Beverage
- (0x02648, 0x02653,), # Aries ..Pisces
- (0x0267f, 0x0267f,), # Wheelchair Symbol
- (0x02693, 0x02693,), # Anchor
- (0x026a1, 0x026a1,), # High Voltage Sign
- (0x026aa, 0x026ab,), # Medium White Circle ..Medium Black Circle
- (0x026bd, 0x026be,), # Soccer Ball ..Baseball
- (0x026c4, 0x026c5,), # Snowman Without Snow ..Sun Behind Cloud
- (0x026ce, 0x026ce,), # Ophiuchus
- (0x026d4, 0x026d4,), # No Entry
- (0x026ea, 0x026ea,), # Church
- (0x026f2, 0x026f3,), # Fountain ..Flag In Hole
- (0x026f5, 0x026f5,), # Sailboat
- (0x026fa, 0x026fa,), # Tent
- (0x026fd, 0x026fd,), # Fuel Pump
- (0x02705, 0x02705,), # White Heavy Check Mark
- (0x0270a, 0x0270b,), # Raised Fist ..Raised Hand
- (0x02728, 0x02728,), # Sparkles
- (0x0274c, 0x0274c,), # Cross Mark
- (0x0274e, 0x0274e,), # Negative Squared Cross Mark
- (0x02753, 0x02755,), # Black Question Mark Orna..White Exclamation Mark O
- (0x02757, 0x02757,), # Heavy Exclamation Mark Symbol
- (0x02795, 0x02797,), # Heavy Plus Sign ..Heavy Division Sign
- (0x027b0, 0x027b0,), # Curly Loop
- (0x027bf, 0x027bf,), # Double Curly Loop
- (0x02b1b, 0x02b1c,), # Black Large Square ..White Large Square
- (0x02b50, 0x02b50,), # White Medium Star
- (0x02b55, 0x02b55,), # Heavy Large Circle
- (0x03030, 0x03030,), # Wavy Dash
- (0x0303d, 0x0303d,), # Part Alternation Mark
- (0x03297, 0x03297,), # Circled Ideograph Congratulation
- (0x03299, 0x03299,), # Circled Ideograph Secret
- (0x1f004, 0x1f004,), # Mahjong Tile Red Dragon
- (0x1f202, 0x1f202,), # Squared Katakana Sa
- (0x1f21a, 0x1f21a,), # Squared Cjk Unified Ideograph-7121
- (0x1f22f, 0x1f22f,), # Squared Cjk Unified Ideograph-6307
- (0x1f237, 0x1f237,), # Squared Cjk Unified Ideograph-6708
- (0x1f30d, 0x1f30f,), # Earth Globe Europe-afric..Earth Globe Asia-austral
- (0x1f315, 0x1f315,), # Full Moon Symbol
- (0x1f31c, 0x1f31c,), # Last Quarter Moon With Face
- (0x1f378, 0x1f378,), # Cocktail Glass
- (0x1f393, 0x1f393,), # Graduation Cap
- (0x1f3a7, 0x1f3a7,), # Headphone
- (0x1f3ac, 0x1f3ae,), # Clapper Board ..Video Game
- (0x1f3c2, 0x1f3c2,), # Snowboarder
- (0x1f3c4, 0x1f3c4,), # Surfer
- (0x1f3c6, 0x1f3c6,), # Trophy
- (0x1f3ca, 0x1f3ca,), # Swimmer
- (0x1f3e0, 0x1f3e0,), # House Building
- (0x1f3ed, 0x1f3ed,), # Factory
- (0x1f408, 0x1f408,), # Cat
- (0x1f415, 0x1f415,), # Dog
- (0x1f41f, 0x1f41f,), # Fish
- (0x1f426, 0x1f426,), # Bird
- (0x1f442, 0x1f442,), # Ear
- (0x1f446, 0x1f449,), # White Up Pointing Backha..White Right Pointing Bac
- (0x1f44d, 0x1f44e,), # Thumbs Up Sign ..Thumbs Down Sign
- (0x1f453, 0x1f453,), # Eyeglasses
- (0x1f46a, 0x1f46a,), # Family
- (0x1f47d, 0x1f47d,), # Extraterrestrial Alien
- (0x1f4a3, 0x1f4a3,), # Bomb
- (0x1f4b0, 0x1f4b0,), # Money Bag
- (0x1f4b3, 0x1f4b3,), # Credit Card
- (0x1f4bb, 0x1f4bb,), # Personal Computer
- (0x1f4bf, 0x1f4bf,), # Optical Disc
- (0x1f4cb, 0x1f4cb,), # Clipboard
- (0x1f4da, 0x1f4da,), # Books
- (0x1f4df, 0x1f4df,), # Pager
- (0x1f4e4, 0x1f4e6,), # Outbox Tray ..Package
- (0x1f4ea, 0x1f4ed,), # Closed Mailbox With Lowe..Open Mailbox With Lowere
- (0x1f4f7, 0x1f4f7,), # Camera
- (0x1f4f9, 0x1f4fb,), # Video Camera ..Radio
- (0x1f508, 0x1f508,), # Speaker
- (0x1f50d, 0x1f50d,), # Left-pointing Magnifying Glass
- (0x1f512, 0x1f513,), # Lock ..Open Lock
- (0x1f550, 0x1f567,), # Clock Face One Oclock ..Clock Face Twelve-thirty
- (0x1f610, 0x1f610,), # Neutral Face
- (0x1f687, 0x1f687,), # Metro
- (0x1f68d, 0x1f68d,), # Oncoming Bus
- (0x1f691, 0x1f691,), # Ambulance
- (0x1f694, 0x1f694,), # Oncoming Police Car
- (0x1f698, 0x1f698,), # Oncoming Automobile
- (0x1f6ad, 0x1f6ad,), # No Smoking Symbol
- (0x1f6b2, 0x1f6b2,), # Bicycle
- (0x1f6b9, 0x1f6ba,), # Mens Symbol ..Womens Symbol
- (0x1f6bc, 0x1f6bc,), # Baby Symbol
- ),
-}
diff --git a/contrib/python/wcwidth/py3/wcwidth/table_vs16.py b/contrib/python/wcwidth/py3/wcwidth/table_vs16.py
index 1ca25c66368..70e4a7373ff 100644
--- a/contrib/python/wcwidth/py3/wcwidth/table_vs16.py
+++ b/contrib/python/wcwidth/py3/wcwidth/table_vs16.py
@@ -3,6 +3,7 @@ Exports VS16_NARROW_TO_WIDE table keyed by supporting unicode version level.
This code generated by wcwidth/bin/update-tables.py on 2025-09-15 16:57:50 UTC.
"""
+# pylint: disable=duplicate-code
VS16_NARROW_TO_WIDE = {
'9.0.0': (
# Source: 9.0.0
diff --git a/contrib/python/wcwidth/py3/wcwidth/table_wide.py b/contrib/python/wcwidth/py3/wcwidth/table_wide.py
index 5139b1f0d3b..3f422d48d33 100644
--- a/contrib/python/wcwidth/py3/wcwidth/table_wide.py
+++ b/contrib/python/wcwidth/py3/wcwidth/table_wide.py
@@ -1,8 +1,9 @@
"""
Exports WIDE_EASTASIAN table keyed by supporting unicode version level.
-This code generated by wcwidth/bin/update-tables.py on 2025-09-19 15:55:08 UTC.
+This code generated by wcwidth/bin/update-tables.py on 2026-01-19 22:42:32 UTC.
"""
+# pylint: disable=duplicate-code
WIDE_EASTASIAN = {
'4.1.0': (
# Source: EastAsianWidth-4.1.0.txt
@@ -20,7 +21,8 @@ WIDE_EASTASIAN = {
(0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke
(0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto
(0x03105, 0x0312c,), # Bopomofo Letter B ..Bopomofo Letter Gn
- (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae
+ (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I
+ (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae
(0x03190, 0x031b7,), # Ideographic Annotation L..Bopomofo Final Letter H
(0x031c0, 0x031cf,), # Cjk Stroke T ..Cjk Stroke N
(0x031f0, 0x0321e,), # Katakana Letter Small Ku..Parenthesized Korean Cha
@@ -59,7 +61,8 @@ WIDE_EASTASIAN = {
(0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke
(0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto
(0x03105, 0x0312c,), # Bopomofo Letter B ..Bopomofo Letter Gn
- (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae
+ (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I
+ (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae
(0x03190, 0x031b7,), # Ideographic Annotation L..Bopomofo Final Letter H
(0x031c0, 0x031cf,), # Cjk Stroke T ..Cjk Stroke N
(0x031f0, 0x0321e,), # Katakana Letter Small Ku..Parenthesized Korean Cha
@@ -98,7 +101,8 @@ WIDE_EASTASIAN = {
(0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke
(0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto
(0x03105, 0x0312d,), # Bopomofo Letter B ..Bopomofo Letter Ih
- (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae
+ (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I
+ (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae
(0x03190, 0x031b7,), # Ideographic Annotation L..Bopomofo Final Letter H
(0x031c0, 0x031e3,), # Cjk Stroke T ..Cjk Stroke Q
(0x031f0, 0x0321e,), # Katakana Letter Small Ku..Parenthesized Korean Cha
@@ -136,7 +140,8 @@ WIDE_EASTASIAN = {
(0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke
(0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto
(0x03105, 0x0312d,), # Bopomofo Letter B ..Bopomofo Letter Ih
- (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae
+ (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I
+ (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae
(0x03190, 0x031b7,), # Ideographic Annotation L..Bopomofo Final Letter H
(0x031c0, 0x031e3,), # Cjk Stroke T ..Cjk Stroke Q
(0x031f0, 0x0321e,), # Katakana Letter Small Ku..Parenthesized Korean Cha
@@ -175,7 +180,8 @@ WIDE_EASTASIAN = {
(0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke
(0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto
(0x03105, 0x0312d,), # Bopomofo Letter B ..Bopomofo Letter Ih
- (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae
+ (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I
+ (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae
(0x03190, 0x031ba,), # Ideographic Annotation L..Bopomofo Letter Zy
(0x031c0, 0x031e3,), # Cjk Stroke T ..Cjk Stroke Q
(0x031f0, 0x0321e,), # Katakana Letter Small Ku..Parenthesized Korean Cha
@@ -216,7 +222,8 @@ WIDE_EASTASIAN = {
(0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke
(0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto
(0x03105, 0x0312d,), # Bopomofo Letter B ..Bopomofo Letter Ih
- (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae
+ (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I
+ (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae
(0x03190, 0x031ba,), # Ideographic Annotation L..Bopomofo Letter Zy
(0x031c0, 0x031e3,), # Cjk Stroke T ..Cjk Stroke Q
(0x031f0, 0x0321e,), # Katakana Letter Small Ku..Parenthesized Korean Cha
@@ -257,7 +264,8 @@ WIDE_EASTASIAN = {
(0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke
(0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto
(0x03105, 0x0312d,), # Bopomofo Letter B ..Bopomofo Letter Ih
- (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae
+ (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I
+ (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae
(0x03190, 0x031ba,), # Ideographic Annotation L..Bopomofo Letter Zy
(0x031c0, 0x031e3,), # Cjk Stroke T ..Cjk Stroke Q
(0x031f0, 0x0321e,), # Katakana Letter Small Ku..Parenthesized Korean Cha
@@ -298,7 +306,8 @@ WIDE_EASTASIAN = {
(0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke
(0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto
(0x03105, 0x0312d,), # Bopomofo Letter B ..Bopomofo Letter Ih
- (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae
+ (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I
+ (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae
(0x03190, 0x031ba,), # Ideographic Annotation L..Bopomofo Letter Zy
(0x031c0, 0x031e3,), # Cjk Stroke T ..Cjk Stroke Q
(0x031f0, 0x0321e,), # Katakana Letter Small Ku..Parenthesized Korean Cha
@@ -339,7 +348,8 @@ WIDE_EASTASIAN = {
(0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke
(0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto
(0x03105, 0x0312d,), # Bopomofo Letter B ..Bopomofo Letter Ih
- (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae
+ (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I
+ (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae
(0x03190, 0x031ba,), # Ideographic Annotation L..Bopomofo Letter Zy
(0x031c0, 0x031e3,), # Cjk Stroke T ..Cjk Stroke Q
(0x031f0, 0x0321e,), # Katakana Letter Small Ku..Parenthesized Korean Cha
@@ -380,7 +390,8 @@ WIDE_EASTASIAN = {
(0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke
(0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto
(0x03105, 0x0312d,), # Bopomofo Letter B ..Bopomofo Letter Ih
- (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae
+ (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I
+ (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae
(0x03190, 0x031ba,), # Ideographic Annotation L..Bopomofo Letter Zy
(0x031c0, 0x031e3,), # Cjk Stroke T ..Cjk Stroke Q
(0x031f0, 0x0321e,), # Katakana Letter Small Ku..Parenthesized Korean Cha
@@ -454,7 +465,8 @@ WIDE_EASTASIAN = {
(0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke
(0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto
(0x03105, 0x0312d,), # Bopomofo Letter B ..Bopomofo Letter Ih
- (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae
+ (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I
+ (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae
(0x03190, 0x031ba,), # Ideographic Annotation L..Bopomofo Letter Zy
(0x031c0, 0x031e3,), # Cjk Stroke T ..Cjk Stroke Q
(0x031f0, 0x0321e,), # Katakana Letter Small Ku..Parenthesized Korean Cha
@@ -567,7 +579,8 @@ WIDE_EASTASIAN = {
(0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke
(0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto
(0x03105, 0x0312e,), # Bopomofo Letter B ..Bopomofo Letter O With D
- (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae
+ (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I
+ (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae
(0x03190, 0x031ba,), # Ideographic Annotation L..Bopomofo Letter Zy
(0x031c0, 0x031e3,), # Cjk Stroke T ..Cjk Stroke Q
(0x031f0, 0x0321e,), # Katakana Letter Small Ku..Parenthesized Korean Cha
@@ -680,7 +693,8 @@ WIDE_EASTASIAN = {
(0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke
(0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto
(0x03105, 0x0312f,), # Bopomofo Letter B ..Bopomofo Letter Nn
- (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae
+ (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I
+ (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae
(0x03190, 0x031ba,), # Ideographic Annotation L..Bopomofo Letter Zy
(0x031c0, 0x031e3,), # Cjk Stroke T ..Cjk Stroke Q
(0x031f0, 0x0321e,), # Katakana Letter Small Ku..Parenthesized Korean Cha
@@ -795,7 +809,8 @@ WIDE_EASTASIAN = {
(0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke
(0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto
(0x03105, 0x0312f,), # Bopomofo Letter B ..Bopomofo Letter Nn
- (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae
+ (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I
+ (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae
(0x03190, 0x031ba,), # Ideographic Annotation L..Bopomofo Letter Zy
(0x031c0, 0x031e3,), # Cjk Stroke T ..Cjk Stroke Q
(0x031f0, 0x0321e,), # Katakana Letter Small Ku..Parenthesized Korean Cha
@@ -916,7 +931,8 @@ WIDE_EASTASIAN = {
(0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke
(0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto
(0x03105, 0x0312f,), # Bopomofo Letter B ..Bopomofo Letter Nn
- (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae
+ (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I
+ (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae
(0x03190, 0x031ba,), # Ideographic Annotation L..Bopomofo Letter Zy
(0x031c0, 0x031e3,), # Cjk Stroke T ..Cjk Stroke Q
(0x031f0, 0x0321e,), # Katakana Letter Small Ku..Parenthesized Korean Cha
@@ -1036,7 +1052,8 @@ WIDE_EASTASIAN = {
(0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke
(0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto
(0x03105, 0x0312f,), # Bopomofo Letter B ..Bopomofo Letter Nn
- (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae
+ (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I
+ (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae
(0x03190, 0x031e3,), # Ideographic Annotation L..Cjk Stroke Q
(0x031f0, 0x0321e,), # Katakana Letter Small Ku..Parenthesized Korean Cha
(0x03220, 0x03247,), # Parenthesized Ideograph ..Circled Ideograph Koto
@@ -1158,7 +1175,8 @@ WIDE_EASTASIAN = {
(0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke
(0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto
(0x03105, 0x0312f,), # Bopomofo Letter B ..Bopomofo Letter Nn
- (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae
+ (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I
+ (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae
(0x03190, 0x031e3,), # Ideographic Annotation L..Cjk Stroke Q
(0x031f0, 0x0321e,), # Katakana Letter Small Ku..Parenthesized Korean Cha
(0x03220, 0x03247,), # Parenthesized Ideograph ..Circled Ideograph Koto
@@ -1285,7 +1303,8 @@ WIDE_EASTASIAN = {
(0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke
(0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto
(0x03105, 0x0312f,), # Bopomofo Letter B ..Bopomofo Letter Nn
- (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae
+ (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I
+ (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae
(0x03190, 0x031e3,), # Ideographic Annotation L..Cjk Stroke Q
(0x031f0, 0x0321e,), # Katakana Letter Small Ku..Parenthesized Korean Cha
(0x03220, 0x03247,), # Parenthesized Ideograph ..Circled Ideograph Koto
@@ -1411,7 +1430,8 @@ WIDE_EASTASIAN = {
(0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke
(0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto
(0x03105, 0x0312f,), # Bopomofo Letter B ..Bopomofo Letter Nn
- (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae
+ (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I
+ (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae
(0x03190, 0x031e3,), # Ideographic Annotation L..Cjk Stroke Q
(0x031ef, 0x0321e,), # Ideographic Description ..Parenthesized Korean Cha
(0x03220, 0x03247,), # Parenthesized Ideograph ..Circled Ideograph Koto
@@ -1539,7 +1559,8 @@ WIDE_EASTASIAN = {
(0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke
(0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto
(0x03105, 0x0312f,), # Bopomofo Letter B ..Bopomofo Letter Nn
- (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae
+ (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I
+ (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae
(0x03190, 0x031e5,), # Ideographic Annotation L..(nil)
(0x031ef, 0x0321e,), # Ideographic Description ..Parenthesized Korean Cha
(0x03220, 0x03247,), # Parenthesized Ideograph ..Circled Ideograph Koto
@@ -1667,7 +1688,8 @@ WIDE_EASTASIAN = {
(0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke
(0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto
(0x03105, 0x0312f,), # Bopomofo Letter B ..Bopomofo Letter Nn
- (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae
+ (0x03131, 0x03163,), # Hangul Letter Kiyeok ..Hangul Letter I
+ (0x03165, 0x0318e,), # Hangul Letter Ssangnieun..Hangul Letter Araeae
(0x03190, 0x031e5,), # Ideographic Annotation L..(nil)
(0x031ef, 0x0321e,), # Ideographic Description ..Parenthesized Korean Cha
(0x03220, 0x03247,), # Parenthesized Ideograph ..Circled Ideograph Koto
diff --git a/contrib/python/wcwidth/py3/wcwidth/table_zero.py b/contrib/python/wcwidth/py3/wcwidth/table_zero.py
index ef99d09e9f4..50ac7fbfd09 100644
--- a/contrib/python/wcwidth/py3/wcwidth/table_zero.py
+++ b/contrib/python/wcwidth/py3/wcwidth/table_zero.py
@@ -1,14 +1,18 @@
"""
Exports ZERO_WIDTH table keyed by supporting unicode version level.
-This code generated by wcwidth/bin/update-tables.py on 2025-09-18 07:49:05 UTC.
+This code generated by wcwidth/bin/update-tables.py on 2026-01-20 16:49:11 UTC.
"""
+# pylint: disable=duplicate-code
ZERO_WIDTH = {
'4.1.0': (
# Source: DerivedGeneralCategory-4.1.0.txt
# Date: 2005-02-26, 02:35:50 GMT [MD]
#
- (0x00000, 0x00000,), # (nil)
+ (0x00000, 0x00008,), # (nil)
+ (0x0000e, 0x0001f,), # (nil)
+ (0x0007f, 0x00084,), # (nil)
+ (0x00086, 0x0009f,), # (nil)
(0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le
(0x00483, 0x00486,), # Combining Cyrillic Titlo..Combining Cyrillic Psili
(0x00488, 0x00489,), # Combining Cyrillic Hundr..Combining Cyrillic Milli
@@ -124,21 +128,24 @@ ZERO_WIDTH = {
(0x01dc0, 0x01dc3,), # Combining Dotted Grave A..Combining Suspension Mar
(0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark
(0x02028, 0x0202e,), # Line Separator ..Right-to-left Override
- (0x02060, 0x02063,), # Word Joiner ..Invisible Separator
- (0x0206a, 0x0206f,), # Inhibit Symmetric Swappi..Nominal Digit Shapes
+ (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes
(0x020d0, 0x020eb,), # Combining Left Harpoon A..Combining Long Double So
(0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M
(0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag
+ (0x03164, 0x03164,), # Hangul Filler
(0x0a802, 0x0a802,), # Syloti Nagri Sign Dvisvara
(0x0a806, 0x0a806,), # Syloti Nagri Sign Hasanta
(0x0a80b, 0x0a80b,), # Syloti Nagri Sign Anusvara
(0x0a823, 0x0a827,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign
- (0x0d7b0, 0x0d7ff,), # Hangul Jungseong O-yeo ..(nil)
+ (0x0d7b0, 0x0dfff,), # Hangul Jungseong O-yeo ..(nil)
(0x0fb1e, 0x0fb1e,), # Hebrew Point Judeo-spanish Varika
+ (0x0fdd0, 0x0fdef,), # (nil)
(0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16
(0x0fe20, 0x0fe23,), # Combining Ligature Left ..Combining Double Tilde R
(0x0feff, 0x0feff,), # Zero Width No-break Space
- (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T
+ (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler
+ (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T
+ (0x0fffe, 0x0ffff,), # (nil)
(0x10a01, 0x10a03,), # Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo
(0x10a05, 0x10a06,), # Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O
(0x10a0c, 0x10a0f,), # Kharoshthi Vowel Length ..Kharoshthi Sign Visarga
@@ -149,15 +156,31 @@ ZERO_WIDTH = {
(0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining
(0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining
(0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical
- (0xe0001, 0xe0001,), # Language Tag
- (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag
- (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256
+ (0x1fffe, 0x1ffff,), # (nil)
+ (0x2fffe, 0x2ffff,), # (nil)
+ (0x3fffe, 0x3ffff,), # (nil)
+ (0x4fffe, 0x4ffff,), # (nil)
+ (0x5fffe, 0x5ffff,), # (nil)
+ (0x6fffe, 0x6ffff,), # (nil)
+ (0x7fffe, 0x7ffff,), # (nil)
+ (0x8fffe, 0x8ffff,), # (nil)
+ (0x9fffe, 0x9ffff,), # (nil)
+ (0xafffe, 0xaffff,), # (nil)
+ (0xbfffe, 0xbffff,), # (nil)
+ (0xcfffe, 0xcffff,), # (nil)
+ (0xdfffe, 0xe0fff,), # (nil)
+ (0xefffe, 0xeffff,), # (nil)
+ (0xffffe, 0xfffff,), # (nil)
+ (0x10fffe, 0x10ffff,), # (nil)
),
'5.0.0': (
# Source: DerivedGeneralCategory-5.0.0.txt
# Date: 2006-02-27, 23:41:27 GMT [MD]
#
- (0x00000, 0x00000,), # (nil)
+ (0x00000, 0x00008,), # (nil)
+ (0x0000e, 0x0001f,), # (nil)
+ (0x0007f, 0x00084,), # (nil)
+ (0x00086, 0x0009f,), # (nil)
(0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le
(0x00483, 0x00486,), # Combining Cyrillic Titlo..Combining Cyrillic Psili
(0x00488, 0x00489,), # Combining Cyrillic Hundr..Combining Cyrillic Milli
@@ -278,21 +301,24 @@ ZERO_WIDTH = {
(0x01dfe, 0x01dff,), # Combining Left Arrowhead..Combining Right Arrowhea
(0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark
(0x02028, 0x0202e,), # Line Separator ..Right-to-left Override
- (0x02060, 0x02063,), # Word Joiner ..Invisible Separator
- (0x0206a, 0x0206f,), # Inhibit Symmetric Swappi..Nominal Digit Shapes
+ (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes
(0x020d0, 0x020ef,), # Combining Left Harpoon A..Combining Right Arrow Be
(0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M
(0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag
+ (0x03164, 0x03164,), # Hangul Filler
(0x0a802, 0x0a802,), # Syloti Nagri Sign Dvisvara
(0x0a806, 0x0a806,), # Syloti Nagri Sign Hasanta
(0x0a80b, 0x0a80b,), # Syloti Nagri Sign Anusvara
(0x0a823, 0x0a827,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign
- (0x0d7b0, 0x0d7ff,), # Hangul Jungseong O-yeo ..(nil)
+ (0x0d7b0, 0x0dfff,), # Hangul Jungseong O-yeo ..(nil)
(0x0fb1e, 0x0fb1e,), # Hebrew Point Judeo-spanish Varika
+ (0x0fdd0, 0x0fdef,), # (nil)
(0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16
(0x0fe20, 0x0fe23,), # Combining Ligature Left ..Combining Double Tilde R
(0x0feff, 0x0feff,), # Zero Width No-break Space
- (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T
+ (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler
+ (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T
+ (0x0fffe, 0x0ffff,), # (nil)
(0x10a01, 0x10a03,), # Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo
(0x10a05, 0x10a06,), # Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O
(0x10a0c, 0x10a0f,), # Kharoshthi Vowel Length ..Kharoshthi Sign Visarga
@@ -303,9 +329,22 @@ ZERO_WIDTH = {
(0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining
(0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining
(0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical
- (0xe0001, 0xe0001,), # Language Tag
- (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag
- (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256
+ (0x1fffe, 0x1ffff,), # (nil)
+ (0x2fffe, 0x2ffff,), # (nil)
+ (0x3fffe, 0x3ffff,), # (nil)
+ (0x4fffe, 0x4ffff,), # (nil)
+ (0x5fffe, 0x5ffff,), # (nil)
+ (0x6fffe, 0x6ffff,), # (nil)
+ (0x7fffe, 0x7ffff,), # (nil)
+ (0x8fffe, 0x8ffff,), # (nil)
+ (0x9fffe, 0x9ffff,), # (nil)
+ (0xafffe, 0xaffff,), # (nil)
+ (0xbfffe, 0xbffff,), # (nil)
+ (0xcfffe, 0xcffff,), # (nil)
+ (0xdfffe, 0xe0fff,), # (nil)
+ (0xefffe, 0xeffff,), # (nil)
+ (0xffffe, 0xfffff,), # (nil)
+ (0x10fffe, 0x10ffff,), # (nil)
),
'5.1.0': (
# Source: DerivedGeneralCategory-5.1.0.txt
@@ -444,12 +483,12 @@ ZERO_WIDTH = {
(0x01dfe, 0x01dff,), # Combining Left Arrowhead..Combining Right Arrowhea
(0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark
(0x02028, 0x0202e,), # Line Separator ..Right-to-left Override
- (0x02060, 0x02064,), # Word Joiner ..Invisible Plus
- (0x0206a, 0x0206f,), # Inhibit Symmetric Swappi..Nominal Digit Shapes
+ (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes
(0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above
(0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette
(0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M
(0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag
+ (0x03164, 0x03164,), # Hangul Filler
(0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous
(0x0a67c, 0x0a67d,), # Combining Cyrillic Kavyk..Combining Cyrillic Payer
(0x0a802, 0x0a802,), # Syloti Nagri Sign Dvisvara
@@ -468,7 +507,8 @@ ZERO_WIDTH = {
(0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16
(0x0fe20, 0x0fe26,), # Combining Ligature Left ..Combining Conjoining Mac
(0x0feff, 0x0feff,), # Zero Width No-break Space
- (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T
+ (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler
+ (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T
(0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke
(0x10a01, 0x10a03,), # Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo
(0x10a05, 0x10a06,), # Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O
@@ -480,9 +520,7 @@ ZERO_WIDTH = {
(0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining
(0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining
(0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical
- (0xe0001, 0xe0001,), # Language Tag
- (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag
- (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256
+ (0xe0000, 0xe0fff,), # (nil)
),
'5.2.0': (
# Source: DerivedGeneralCategory-5.2.0.txt
@@ -633,13 +671,13 @@ ZERO_WIDTH = {
(0x01dfd, 0x01dff,), # Combining Almost Equal T..Combining Right Arrowhea
(0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark
(0x02028, 0x0202e,), # Line Separator ..Right-to-left Override
- (0x02060, 0x02064,), # Word Joiner ..Invisible Plus
- (0x0206a, 0x0206f,), # Inhibit Symmetric Swappi..Nominal Digit Shapes
+ (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes
(0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above
(0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu
(0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette
(0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M
(0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag
+ (0x03164, 0x03164,), # Hangul Filler
(0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous
(0x0a67c, 0x0a67d,), # Combining Cyrillic Kavyk..Combining Cyrillic Payer
(0x0a6f0, 0x0a6f1,), # Bamum Combining Mark Koq..Bamum Combining Mark Tuk
@@ -670,7 +708,8 @@ ZERO_WIDTH = {
(0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16
(0x0fe20, 0x0fe26,), # Combining Ligature Left ..Combining Conjoining Mac
(0x0feff, 0x0feff,), # Zero Width No-break Space
- (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T
+ (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler
+ (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T
(0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke
(0x10a01, 0x10a03,), # Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo
(0x10a05, 0x10a06,), # Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O
@@ -685,9 +724,7 @@ ZERO_WIDTH = {
(0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining
(0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining
(0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical
- (0xe0001, 0xe0001,), # Language Tag
- (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag
- (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256
+ (0xe0000, 0xe0fff,), # (nil)
),
'6.0.0': (
# Source: DerivedGeneralCategory-6.0.0.txt
@@ -841,14 +878,14 @@ ZERO_WIDTH = {
(0x01dfc, 0x01dff,), # Combining Double Inverte..Combining Right Arrowhea
(0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark
(0x02028, 0x0202e,), # Line Separator ..Right-to-left Override
- (0x02060, 0x02064,), # Word Joiner ..Invisible Plus
- (0x0206a, 0x0206f,), # Inhibit Symmetric Swappi..Nominal Digit Shapes
+ (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes
(0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above
(0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu
(0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner
(0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette
(0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M
(0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag
+ (0x03164, 0x03164,), # Hangul Filler
(0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous
(0x0a67c, 0x0a67d,), # Combining Cyrillic Kavyk..Combining Cyrillic Payer
(0x0a6f0, 0x0a6f1,), # Bamum Combining Mark Koq..Bamum Combining Mark Tuk
@@ -879,7 +916,8 @@ ZERO_WIDTH = {
(0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16
(0x0fe20, 0x0fe26,), # Combining Ligature Left ..Combining Conjoining Mac
(0x0feff, 0x0feff,), # Zero Width No-break Space
- (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T
+ (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler
+ (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T
(0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke
(0x10a01, 0x10a03,), # Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo
(0x10a05, 0x10a06,), # Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O
@@ -896,9 +934,7 @@ ZERO_WIDTH = {
(0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining
(0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining
(0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical
- (0xe0001, 0xe0001,), # Language Tag
- (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag
- (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256
+ (0xe0000, 0xe0fff,), # (nil)
),
'6.1.0': (
# Source: DerivedGeneralCategory-6.1.0.txt
@@ -1053,14 +1089,14 @@ ZERO_WIDTH = {
(0x01dfc, 0x01dff,), # Combining Double Inverte..Combining Right Arrowhea
(0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark
(0x02028, 0x0202e,), # Line Separator ..Right-to-left Override
- (0x02060, 0x02064,), # Word Joiner ..Invisible Plus
- (0x0206a, 0x0206f,), # Inhibit Symmetric Swappi..Nominal Digit Shapes
+ (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes
(0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above
(0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu
(0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner
(0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette
(0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M
(0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag
+ (0x03164, 0x03164,), # Hangul Filler
(0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous
(0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer
(0x0a69f, 0x0a69f,), # Combining Cyrillic Letter Iotified E
@@ -1094,7 +1130,8 @@ ZERO_WIDTH = {
(0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16
(0x0fe20, 0x0fe26,), # Combining Ligature Left ..Combining Conjoining Mac
(0x0feff, 0x0feff,), # Zero Width No-break Space
- (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T
+ (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler
+ (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T
(0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke
(0x10a01, 0x10a03,), # Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo
(0x10a05, 0x10a06,), # Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O
@@ -1118,9 +1155,7 @@ ZERO_WIDTH = {
(0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining
(0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining
(0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical
- (0xe0001, 0xe0001,), # Language Tag
- (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag
- (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256
+ (0xe0000, 0xe0fff,), # (nil)
),
'6.2.0': (
# Source: DerivedGeneralCategory-6.2.0.txt
@@ -1275,14 +1310,14 @@ ZERO_WIDTH = {
(0x01dfc, 0x01dff,), # Combining Double Inverte..Combining Right Arrowhea
(0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark
(0x02028, 0x0202e,), # Line Separator ..Right-to-left Override
- (0x02060, 0x02064,), # Word Joiner ..Invisible Plus
- (0x0206a, 0x0206f,), # Inhibit Symmetric Swappi..Nominal Digit Shapes
+ (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes
(0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above
(0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu
(0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner
(0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette
(0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M
(0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag
+ (0x03164, 0x03164,), # Hangul Filler
(0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous
(0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer
(0x0a69f, 0x0a69f,), # Combining Cyrillic Letter Iotified E
@@ -1316,7 +1351,8 @@ ZERO_WIDTH = {
(0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16
(0x0fe20, 0x0fe26,), # Combining Ligature Left ..Combining Conjoining Mac
(0x0feff, 0x0feff,), # Zero Width No-break Space
- (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T
+ (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler
+ (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T
(0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke
(0x10a01, 0x10a03,), # Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo
(0x10a05, 0x10a06,), # Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O
@@ -1340,9 +1376,7 @@ ZERO_WIDTH = {
(0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining
(0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining
(0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical
- (0xe0001, 0xe0001,), # Language Tag
- (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag
- (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256
+ (0xe0000, 0xe0fff,), # (nil)
),
'6.3.0': (
# Source: DerivedGeneralCategory-6.3.0.txt
@@ -1498,14 +1532,14 @@ ZERO_WIDTH = {
(0x01dfc, 0x01dff,), # Combining Double Inverte..Combining Right Arrowhea
(0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark
(0x02028, 0x0202e,), # Line Separator ..Right-to-left Override
- (0x02060, 0x02064,), # Word Joiner ..Invisible Plus
- (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes
+ (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes
(0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above
(0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu
(0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner
(0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette
(0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M
(0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag
+ (0x03164, 0x03164,), # Hangul Filler
(0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous
(0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer
(0x0a69f, 0x0a69f,), # Combining Cyrillic Letter Iotified E
@@ -1539,7 +1573,8 @@ ZERO_WIDTH = {
(0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16
(0x0fe20, 0x0fe26,), # Combining Ligature Left ..Combining Conjoining Mac
(0x0feff, 0x0feff,), # Zero Width No-break Space
- (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T
+ (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler
+ (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T
(0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke
(0x10a01, 0x10a03,), # Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo
(0x10a05, 0x10a06,), # Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O
@@ -1563,9 +1598,7 @@ ZERO_WIDTH = {
(0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining
(0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining
(0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical
- (0xe0001, 0xe0001,), # Language Tag
- (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag
- (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256
+ (0xe0000, 0xe0fff,), # (nil)
),
'7.0.0': (
# Source: DerivedGeneralCategory-7.0.0.txt
@@ -1722,14 +1755,14 @@ ZERO_WIDTH = {
(0x01dfc, 0x01dff,), # Combining Double Inverte..Combining Right Arrowhea
(0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark
(0x02028, 0x0202e,), # Line Separator ..Right-to-left Override
- (0x02060, 0x02064,), # Word Joiner ..Invisible Plus
- (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes
+ (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes
(0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above
(0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu
(0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner
(0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette
(0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M
(0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag
+ (0x03164, 0x03164,), # Hangul Filler
(0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous
(0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer
(0x0a69f, 0x0a69f,), # Combining Cyrillic Letter Iotified E
@@ -1764,7 +1797,8 @@ ZERO_WIDTH = {
(0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16
(0x0fe20, 0x0fe2d,), # Combining Ligature Left ..Combining Conjoining Mac
(0x0feff, 0x0feff,), # Zero Width No-break Space
- (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T
+ (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler
+ (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T
(0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke
(0x102e0, 0x102e0,), # Coptic Epact Thousands Mark
(0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let
@@ -1812,9 +1846,7 @@ ZERO_WIDTH = {
(0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining
(0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical
(0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining
- (0xe0001, 0xe0001,), # Language Tag
- (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag
- (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256
+ (0xe0000, 0xe0fff,), # (nil)
),
'8.0.0': (
# Source: DerivedGeneralCategory-8.0.0.txt
@@ -1969,14 +2001,14 @@ ZERO_WIDTH = {
(0x01dfc, 0x01dff,), # Combining Double Inverte..Combining Right Arrowhea
(0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark
(0x02028, 0x0202e,), # Line Separator ..Right-to-left Override
- (0x02060, 0x02064,), # Word Joiner ..Invisible Plus
- (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes
+ (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes
(0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above
(0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu
(0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner
(0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette
(0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M
(0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag
+ (0x03164, 0x03164,), # Hangul Filler
(0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous
(0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer
(0x0a69e, 0x0a69f,), # Combining Cyrillic Lette..Combining Cyrillic Lette
@@ -2011,7 +2043,8 @@ ZERO_WIDTH = {
(0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16
(0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo
(0x0feff, 0x0feff,), # Zero Width No-break Space
- (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T
+ (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler
+ (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T
(0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke
(0x102e0, 0x102e0,), # Coptic Epact Thousands Mark
(0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let
@@ -2069,9 +2102,7 @@ ZERO_WIDTH = {
(0x1daa1, 0x1daaf,), # Signwriting Rotation Mod..Signwriting Rotation Mod
(0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining
(0x1f3fb, 0x1f3ff,), # Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri
- (0xe0001, 0xe0001,), # Language Tag
- (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag
- (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256
+ (0xe0000, 0xe0fff,), # (nil)
),
'9.0.0': (
# Source: DerivedGeneralCategory-9.0.0.txt
@@ -2085,16 +2116,14 @@ ZERO_WIDTH = {
(0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot
(0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot
(0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan
- (0x00600, 0x00605,), # Arabic Number Sign ..Arabic Number Mark Above
(0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra
(0x0061c, 0x0061c,), # Arabic Letter Mark
(0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below
(0x00670, 0x00670,), # Arabic Letter Superscript Alef
- (0x006d6, 0x006dd,), # Arabic Small High Ligatu..Arabic End Of Ayah
+ (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen
(0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda
(0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon
(0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem
- (0x0070f, 0x0070f,), # Syriac Abbreviation Mark
(0x00711, 0x00711,), # Syriac Letter Superscript Alaph
(0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh
(0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun
@@ -2104,7 +2133,8 @@ ZERO_WIDTH = {
(0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U
(0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa
(0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark
- (0x008d4, 0x00903,), # Arabic Small High Word A..Devanagari Sign Visarga
+ (0x008d4, 0x008e1,), # Arabic Small High Word A..Arabic Small High Sign S
+ (0x008e3, 0x00903,), # Arabic Turned Damma Belo..Devanagari Sign Visarga
(0x0093a, 0x0093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta
(0x0093e, 0x0094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw
(0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu
@@ -2227,14 +2257,14 @@ ZERO_WIDTH = {
(0x01dfb, 0x01dff,), # Combining Deletion Mark ..Combining Right Arrowhea
(0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark
(0x02028, 0x0202e,), # Line Separator ..Right-to-left Override
- (0x02060, 0x02064,), # Word Joiner ..Invisible Plus
- (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes
+ (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes
(0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above
(0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu
(0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner
(0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette
(0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M
(0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag
+ (0x03164, 0x03164,), # Hangul Filler
(0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous
(0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer
(0x0a69e, 0x0a69f,), # Combining Cyrillic Lette..Combining Cyrillic Lette
@@ -2269,7 +2299,8 @@ ZERO_WIDTH = {
(0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16
(0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo
(0x0feff, 0x0feff,), # Zero Width No-break Space
- (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T
+ (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler
+ (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T
(0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke
(0x102e0, 0x102e0,), # Coptic Epact Thousands Mark
(0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let
@@ -2283,7 +2314,6 @@ ZERO_WIDTH = {
(0x11038, 0x11046,), # Brahmi Vowel Sign Aa ..Brahmi Virama
(0x1107f, 0x11082,), # Brahmi Number Joiner ..Kaithi Sign Visarga
(0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta
- (0x110bd, 0x110bd,), # Kaithi Number Sign
(0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga
(0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa
(0x11173, 0x11173,), # Mahajani Sign Nukta
@@ -2339,9 +2369,7 @@ ZERO_WIDTH = {
(0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining
(0x1e944, 0x1e94a,), # Adlam Alif Lengthener ..Adlam Nukta
(0x1f3fb, 0x1f3ff,), # Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri
- (0xe0001, 0xe0001,), # Language Tag
- (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag
- (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256
+ (0xe0000, 0xe0fff,), # (nil)
),
'10.0.0': (
# Source: DerivedGeneralCategory-10.0.0.txt
@@ -2355,16 +2383,14 @@ ZERO_WIDTH = {
(0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot
(0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot
(0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan
- (0x00600, 0x00605,), # Arabic Number Sign ..Arabic Number Mark Above
(0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra
(0x0061c, 0x0061c,), # Arabic Letter Mark
(0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below
(0x00670, 0x00670,), # Arabic Letter Superscript Alef
- (0x006d6, 0x006dd,), # Arabic Small High Ligatu..Arabic End Of Ayah
+ (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen
(0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda
(0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon
(0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem
- (0x0070f, 0x0070f,), # Syriac Abbreviation Mark
(0x00711, 0x00711,), # Syriac Letter Superscript Alaph
(0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh
(0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun
@@ -2374,7 +2400,8 @@ ZERO_WIDTH = {
(0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U
(0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa
(0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark
- (0x008d4, 0x00903,), # Arabic Small High Word A..Devanagari Sign Visarga
+ (0x008d4, 0x008e1,), # Arabic Small High Word A..Arabic Small High Sign S
+ (0x008e3, 0x00903,), # Arabic Turned Damma Belo..Devanagari Sign Visarga
(0x0093a, 0x0093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta
(0x0093e, 0x0094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw
(0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu
@@ -2499,14 +2526,14 @@ ZERO_WIDTH = {
(0x01dfb, 0x01dff,), # Combining Deletion Mark ..Combining Right Arrowhea
(0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark
(0x02028, 0x0202e,), # Line Separator ..Right-to-left Override
- (0x02060, 0x02064,), # Word Joiner ..Invisible Plus
- (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes
+ (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes
(0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above
(0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu
(0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner
(0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette
(0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M
(0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag
+ (0x03164, 0x03164,), # Hangul Filler
(0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous
(0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer
(0x0a69e, 0x0a69f,), # Combining Cyrillic Lette..Combining Cyrillic Lette
@@ -2541,7 +2568,8 @@ ZERO_WIDTH = {
(0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16
(0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo
(0x0feff, 0x0feff,), # Zero Width No-break Space
- (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T
+ (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler
+ (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T
(0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke
(0x102e0, 0x102e0,), # Coptic Epact Thousands Mark
(0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let
@@ -2555,7 +2583,6 @@ ZERO_WIDTH = {
(0x11038, 0x11046,), # Brahmi Vowel Sign Aa ..Brahmi Virama
(0x1107f, 0x11082,), # Brahmi Number Joiner ..Kaithi Sign Visarga
(0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta
- (0x110bd, 0x110bd,), # Kaithi Number Sign
(0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga
(0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa
(0x11173, 0x11173,), # Mahajani Sign Nukta
@@ -2622,9 +2649,7 @@ ZERO_WIDTH = {
(0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining
(0x1e944, 0x1e94a,), # Adlam Alif Lengthener ..Adlam Nukta
(0x1f3fb, 0x1f3ff,), # Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri
- (0xe0001, 0xe0001,), # Language Tag
- (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag
- (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256
+ (0xe0000, 0xe0fff,), # (nil)
),
'11.0.0': (
# Source: DerivedGeneralCategory-11.0.0.txt
@@ -2638,16 +2663,14 @@ ZERO_WIDTH = {
(0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot
(0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot
(0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan
- (0x00600, 0x00605,), # Arabic Number Sign ..Arabic Number Mark Above
(0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra
(0x0061c, 0x0061c,), # Arabic Letter Mark
(0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below
(0x00670, 0x00670,), # Arabic Letter Superscript Alef
- (0x006d6, 0x006dd,), # Arabic Small High Ligatu..Arabic End Of Ayah
+ (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen
(0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda
(0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon
(0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem
- (0x0070f, 0x0070f,), # Syriac Abbreviation Mark
(0x00711, 0x00711,), # Syriac Letter Superscript Alaph
(0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh
(0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun
@@ -2658,7 +2681,8 @@ ZERO_WIDTH = {
(0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U
(0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa
(0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark
- (0x008d3, 0x00903,), # Arabic Small Low Waw ..Devanagari Sign Visarga
+ (0x008d3, 0x008e1,), # Arabic Small Low Waw ..Arabic Small High Sign S
+ (0x008e3, 0x00903,), # Arabic Turned Damma Belo..Devanagari Sign Visarga
(0x0093a, 0x0093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta
(0x0093e, 0x0094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw
(0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu
@@ -2784,14 +2808,14 @@ ZERO_WIDTH = {
(0x01dfb, 0x01dff,), # Combining Deletion Mark ..Combining Right Arrowhea
(0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark
(0x02028, 0x0202e,), # Line Separator ..Right-to-left Override
- (0x02060, 0x02064,), # Word Joiner ..Invisible Plus
- (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes
+ (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes
(0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above
(0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu
(0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner
(0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette
(0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M
(0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag
+ (0x03164, 0x03164,), # Hangul Filler
(0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous
(0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer
(0x0a69e, 0x0a69f,), # Combining Cyrillic Lette..Combining Cyrillic Lette
@@ -2827,7 +2851,8 @@ ZERO_WIDTH = {
(0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16
(0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo
(0x0feff, 0x0feff,), # Zero Width No-break Space
- (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T
+ (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler
+ (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T
(0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke
(0x102e0, 0x102e0,), # Coptic Epact Thousands Mark
(0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let
@@ -2843,8 +2868,6 @@ ZERO_WIDTH = {
(0x11038, 0x11046,), # Brahmi Vowel Sign Aa ..Brahmi Virama
(0x1107f, 0x11082,), # Brahmi Number Joiner ..Kaithi Sign Visarga
(0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta
- (0x110bd, 0x110bd,), # Kaithi Number Sign
- (0x110cd, 0x110cd,), # Kaithi Number Sign Above
(0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga
(0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa
(0x11145, 0x11146,), # Chakma Vowel Sign Aa ..Chakma Vowel Sign Ei
@@ -2918,9 +2941,7 @@ ZERO_WIDTH = {
(0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining
(0x1e944, 0x1e94a,), # Adlam Alif Lengthener ..Adlam Nukta
(0x1f3fb, 0x1f3ff,), # Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri
- (0xe0001, 0xe0001,), # Language Tag
- (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag
- (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256
+ (0xe0000, 0xe0fff,), # (nil)
),
'12.0.0': (
# Source: DerivedGeneralCategory-12.0.0.txt
@@ -2934,16 +2955,14 @@ ZERO_WIDTH = {
(0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot
(0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot
(0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan
- (0x00600, 0x00605,), # Arabic Number Sign ..Arabic Number Mark Above
(0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra
(0x0061c, 0x0061c,), # Arabic Letter Mark
(0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below
(0x00670, 0x00670,), # Arabic Letter Superscript Alef
- (0x006d6, 0x006dd,), # Arabic Small High Ligatu..Arabic End Of Ayah
+ (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen
(0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda
(0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon
(0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem
- (0x0070f, 0x0070f,), # Syriac Abbreviation Mark
(0x00711, 0x00711,), # Syriac Letter Superscript Alaph
(0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh
(0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun
@@ -2954,7 +2973,8 @@ ZERO_WIDTH = {
(0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U
(0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa
(0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark
- (0x008d3, 0x00903,), # Arabic Small Low Waw ..Devanagari Sign Visarga
+ (0x008d3, 0x008e1,), # Arabic Small Low Waw ..Arabic Small High Sign S
+ (0x008e3, 0x00903,), # Arabic Turned Damma Belo..Devanagari Sign Visarga
(0x0093a, 0x0093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta
(0x0093e, 0x0094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw
(0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu
@@ -3079,14 +3099,14 @@ ZERO_WIDTH = {
(0x01dfb, 0x01dff,), # Combining Deletion Mark ..Combining Right Arrowhea
(0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark
(0x02028, 0x0202e,), # Line Separator ..Right-to-left Override
- (0x02060, 0x02064,), # Word Joiner ..Invisible Plus
- (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes
+ (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes
(0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above
(0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu
(0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner
(0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette
(0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M
(0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag
+ (0x03164, 0x03164,), # Hangul Filler
(0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous
(0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer
(0x0a69e, 0x0a69f,), # Combining Cyrillic Lette..Combining Cyrillic Lette
@@ -3122,7 +3142,8 @@ ZERO_WIDTH = {
(0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16
(0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo
(0x0feff, 0x0feff,), # Zero Width No-break Space
- (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T
+ (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler
+ (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T
(0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke
(0x102e0, 0x102e0,), # Coptic Epact Thousands Mark
(0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let
@@ -3138,8 +3159,6 @@ ZERO_WIDTH = {
(0x11038, 0x11046,), # Brahmi Vowel Sign Aa ..Brahmi Virama
(0x1107f, 0x11082,), # Brahmi Number Joiner ..Kaithi Sign Visarga
(0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta
- (0x110bd, 0x110bd,), # Kaithi Number Sign
- (0x110cd, 0x110cd,), # Kaithi Number Sign Above
(0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga
(0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa
(0x11145, 0x11146,), # Chakma Vowel Sign Aa ..Chakma Vowel Sign Ei
@@ -3220,9 +3239,7 @@ ZERO_WIDTH = {
(0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining
(0x1e944, 0x1e94a,), # Adlam Alif Lengthener ..Adlam Nukta
(0x1f3fb, 0x1f3ff,), # Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri
- (0xe0001, 0xe0001,), # Language Tag
- (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag
- (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256
+ (0xe0000, 0xe0fff,), # (nil)
),
'12.1.0': (
# Source: DerivedGeneralCategory-12.1.0.txt
@@ -3236,16 +3253,14 @@ ZERO_WIDTH = {
(0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot
(0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot
(0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan
- (0x00600, 0x00605,), # Arabic Number Sign ..Arabic Number Mark Above
(0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra
(0x0061c, 0x0061c,), # Arabic Letter Mark
(0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below
(0x00670, 0x00670,), # Arabic Letter Superscript Alef
- (0x006d6, 0x006dd,), # Arabic Small High Ligatu..Arabic End Of Ayah
+ (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen
(0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda
(0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon
(0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem
- (0x0070f, 0x0070f,), # Syriac Abbreviation Mark
(0x00711, 0x00711,), # Syriac Letter Superscript Alaph
(0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh
(0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun
@@ -3256,7 +3271,8 @@ ZERO_WIDTH = {
(0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U
(0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa
(0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark
- (0x008d3, 0x00903,), # Arabic Small Low Waw ..Devanagari Sign Visarga
+ (0x008d3, 0x008e1,), # Arabic Small Low Waw ..Arabic Small High Sign S
+ (0x008e3, 0x00903,), # Arabic Turned Damma Belo..Devanagari Sign Visarga
(0x0093a, 0x0093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta
(0x0093e, 0x0094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw
(0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu
@@ -3381,14 +3397,14 @@ ZERO_WIDTH = {
(0x01dfb, 0x01dff,), # Combining Deletion Mark ..Combining Right Arrowhea
(0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark
(0x02028, 0x0202e,), # Line Separator ..Right-to-left Override
- (0x02060, 0x02064,), # Word Joiner ..Invisible Plus
- (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes
+ (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes
(0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above
(0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu
(0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner
(0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette
(0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M
(0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag
+ (0x03164, 0x03164,), # Hangul Filler
(0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous
(0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer
(0x0a69e, 0x0a69f,), # Combining Cyrillic Lette..Combining Cyrillic Lette
@@ -3424,7 +3440,8 @@ ZERO_WIDTH = {
(0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16
(0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo
(0x0feff, 0x0feff,), # Zero Width No-break Space
- (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T
+ (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler
+ (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T
(0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke
(0x102e0, 0x102e0,), # Coptic Epact Thousands Mark
(0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let
@@ -3440,8 +3457,6 @@ ZERO_WIDTH = {
(0x11038, 0x11046,), # Brahmi Vowel Sign Aa ..Brahmi Virama
(0x1107f, 0x11082,), # Brahmi Number Joiner ..Kaithi Sign Visarga
(0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta
- (0x110bd, 0x110bd,), # Kaithi Number Sign
- (0x110cd, 0x110cd,), # Kaithi Number Sign Above
(0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga
(0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa
(0x11145, 0x11146,), # Chakma Vowel Sign Aa ..Chakma Vowel Sign Ei
@@ -3522,9 +3537,7 @@ ZERO_WIDTH = {
(0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining
(0x1e944, 0x1e94a,), # Adlam Alif Lengthener ..Adlam Nukta
(0x1f3fb, 0x1f3ff,), # Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri
- (0xe0001, 0xe0001,), # Language Tag
- (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag
- (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256
+ (0xe0000, 0xe0fff,), # (nil)
),
'13.0.0': (
# Source: DerivedGeneralCategory-13.0.0.txt
@@ -3538,16 +3551,14 @@ ZERO_WIDTH = {
(0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot
(0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot
(0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan
- (0x00600, 0x00605,), # Arabic Number Sign ..Arabic Number Mark Above
(0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra
(0x0061c, 0x0061c,), # Arabic Letter Mark
(0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below
(0x00670, 0x00670,), # Arabic Letter Superscript Alef
- (0x006d6, 0x006dd,), # Arabic Small High Ligatu..Arabic End Of Ayah
+ (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen
(0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda
(0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon
(0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem
- (0x0070f, 0x0070f,), # Syriac Abbreviation Mark
(0x00711, 0x00711,), # Syriac Letter Superscript Alaph
(0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh
(0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun
@@ -3558,7 +3569,8 @@ ZERO_WIDTH = {
(0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U
(0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa
(0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark
- (0x008d3, 0x00903,), # Arabic Small Low Waw ..Devanagari Sign Visarga
+ (0x008d3, 0x008e1,), # Arabic Small Low Waw ..Arabic Small High Sign S
+ (0x008e3, 0x00903,), # Arabic Turned Damma Belo..Devanagari Sign Visarga
(0x0093a, 0x0093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta
(0x0093e, 0x0094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw
(0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu
@@ -3683,14 +3695,14 @@ ZERO_WIDTH = {
(0x01dfb, 0x01dff,), # Combining Deletion Mark ..Combining Right Arrowhea
(0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark
(0x02028, 0x0202e,), # Line Separator ..Right-to-left Override
- (0x02060, 0x02064,), # Word Joiner ..Invisible Plus
- (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes
+ (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes
(0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above
(0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu
(0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner
(0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette
(0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M
(0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag
+ (0x03164, 0x03164,), # Hangul Filler
(0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous
(0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer
(0x0a69e, 0x0a69f,), # Combining Cyrillic Lette..Combining Cyrillic Lette
@@ -3727,7 +3739,8 @@ ZERO_WIDTH = {
(0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16
(0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo
(0x0feff, 0x0feff,), # Zero Width No-break Space
- (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T
+ (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler
+ (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T
(0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke
(0x102e0, 0x102e0,), # Coptic Epact Thousands Mark
(0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let
@@ -3744,8 +3757,6 @@ ZERO_WIDTH = {
(0x11038, 0x11046,), # Brahmi Vowel Sign Aa ..Brahmi Virama
(0x1107f, 0x11082,), # Brahmi Number Joiner ..Kaithi Sign Visarga
(0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta
- (0x110bd, 0x110bd,), # Kaithi Number Sign
- (0x110cd, 0x110cd,), # Kaithi Number Sign Above
(0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga
(0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa
(0x11145, 0x11146,), # Chakma Vowel Sign Aa ..Chakma Vowel Sign Ei
@@ -3834,9 +3845,7 @@ ZERO_WIDTH = {
(0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining
(0x1e944, 0x1e94a,), # Adlam Alif Lengthener ..Adlam Nukta
(0x1f3fb, 0x1f3ff,), # Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri
- (0xe0001, 0xe0001,), # Language Tag
- (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag
- (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256
+ (0xe0000, 0xe0fff,), # (nil)
),
'14.0.0': (
# Source: DerivedGeneralCategory-14.0.0.txt
@@ -3850,16 +3859,14 @@ ZERO_WIDTH = {
(0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot
(0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot
(0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan
- (0x00600, 0x00605,), # Arabic Number Sign ..Arabic Number Mark Above
(0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra
(0x0061c, 0x0061c,), # Arabic Letter Mark
(0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below
(0x00670, 0x00670,), # Arabic Letter Superscript Alef
- (0x006d6, 0x006dd,), # Arabic Small High Ligatu..Arabic End Of Ayah
+ (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen
(0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda
(0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon
(0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem
- (0x0070f, 0x0070f,), # Syriac Abbreviation Mark
(0x00711, 0x00711,), # Syriac Letter Superscript Alaph
(0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh
(0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun
@@ -3870,9 +3877,9 @@ ZERO_WIDTH = {
(0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U
(0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa
(0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark
- (0x00890, 0x00891,), # Arabic Pound Mark Above ..Arabic Piastre Mark Abov
(0x00898, 0x0089f,), # Arabic Small High Word A..Arabic Half Madda Over M
- (0x008ca, 0x00903,), # Arabic Small High Farsi ..Devanagari Sign Visarga
+ (0x008ca, 0x008e1,), # Arabic Small High Farsi ..Arabic Small High Sign S
+ (0x008e3, 0x00903,), # Arabic Turned Damma Belo..Devanagari Sign Visarga
(0x0093a, 0x0093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta
(0x0093e, 0x0094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw
(0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu
@@ -3997,14 +4004,14 @@ ZERO_WIDTH = {
(0x01dc0, 0x01dff,), # Combining Dotted Grave A..Combining Right Arrowhea
(0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark
(0x02028, 0x0202e,), # Line Separator ..Right-to-left Override
- (0x02060, 0x02064,), # Word Joiner ..Invisible Plus
- (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes
+ (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes
(0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above
(0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu
(0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner
(0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette
(0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M
(0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag
+ (0x03164, 0x03164,), # Hangul Filler
(0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous
(0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer
(0x0a69e, 0x0a69f,), # Combining Cyrillic Lette..Combining Cyrillic Lette
@@ -4041,7 +4048,8 @@ ZERO_WIDTH = {
(0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16
(0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo
(0x0feff, 0x0feff,), # Zero Width No-break Space
- (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T
+ (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler
+ (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T
(0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke
(0x102e0, 0x102e0,), # Coptic Epact Thousands Mark
(0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let
@@ -4061,9 +4069,7 @@ ZERO_WIDTH = {
(0x11073, 0x11074,), # Brahmi Vowel Sign Old Ta..Brahmi Vowel Sign Old Ta
(0x1107f, 0x11082,), # Brahmi Number Joiner ..Kaithi Sign Visarga
(0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta
- (0x110bd, 0x110bd,), # Kaithi Number Sign
(0x110c2, 0x110c2,), # Kaithi Vowel Sign Vocalic R
- (0x110cd, 0x110cd,), # Kaithi Number Sign Above
(0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga
(0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa
(0x11145, 0x11146,), # Chakma Vowel Sign Aa ..Chakma Vowel Sign Ei
@@ -4155,9 +4161,7 @@ ZERO_WIDTH = {
(0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining
(0x1e944, 0x1e94a,), # Adlam Alif Lengthener ..Adlam Nukta
(0x1f3fb, 0x1f3ff,), # Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri
- (0xe0001, 0xe0001,), # Language Tag
- (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag
- (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256
+ (0xe0000, 0xe0fff,), # (nil)
),
'15.0.0': (
# Source: DerivedGeneralCategory-15.0.0.txt
@@ -4171,16 +4175,14 @@ ZERO_WIDTH = {
(0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot
(0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot
(0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan
- (0x00600, 0x00605,), # Arabic Number Sign ..Arabic Number Mark Above
(0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra
(0x0061c, 0x0061c,), # Arabic Letter Mark
(0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below
(0x00670, 0x00670,), # Arabic Letter Superscript Alef
- (0x006d6, 0x006dd,), # Arabic Small High Ligatu..Arabic End Of Ayah
+ (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen
(0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda
(0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon
(0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem
- (0x0070f, 0x0070f,), # Syriac Abbreviation Mark
(0x00711, 0x00711,), # Syriac Letter Superscript Alaph
(0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh
(0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun
@@ -4191,9 +4193,9 @@ ZERO_WIDTH = {
(0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U
(0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa
(0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark
- (0x00890, 0x00891,), # Arabic Pound Mark Above ..Arabic Piastre Mark Abov
(0x00898, 0x0089f,), # Arabic Small High Word A..Arabic Half Madda Over M
- (0x008ca, 0x00903,), # Arabic Small High Farsi ..Devanagari Sign Visarga
+ (0x008ca, 0x008e1,), # Arabic Small High Farsi ..Arabic Small High Sign S
+ (0x008e3, 0x00903,), # Arabic Turned Damma Belo..Devanagari Sign Visarga
(0x0093a, 0x0093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta
(0x0093e, 0x0094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw
(0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu
@@ -4319,14 +4321,14 @@ ZERO_WIDTH = {
(0x01dc0, 0x01dff,), # Combining Dotted Grave A..Combining Right Arrowhea
(0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark
(0x02028, 0x0202e,), # Line Separator ..Right-to-left Override
- (0x02060, 0x02064,), # Word Joiner ..Invisible Plus
- (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes
+ (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes
(0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above
(0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu
(0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner
(0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette
(0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M
(0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag
+ (0x03164, 0x03164,), # Hangul Filler
(0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous
(0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer
(0x0a69e, 0x0a69f,), # Combining Cyrillic Lette..Combining Cyrillic Lette
@@ -4363,7 +4365,8 @@ ZERO_WIDTH = {
(0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16
(0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo
(0x0feff, 0x0feff,), # Zero Width No-break Space
- (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T
+ (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler
+ (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T
(0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke
(0x102e0, 0x102e0,), # Coptic Epact Thousands Mark
(0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let
@@ -4384,9 +4387,7 @@ ZERO_WIDTH = {
(0x11073, 0x11074,), # Brahmi Vowel Sign Old Ta..Brahmi Vowel Sign Old Ta
(0x1107f, 0x11082,), # Brahmi Number Joiner ..Kaithi Sign Visarga
(0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta
- (0x110bd, 0x110bd,), # Kaithi Number Sign
(0x110c2, 0x110c2,), # Kaithi Vowel Sign Vocalic R
- (0x110cd, 0x110cd,), # Kaithi Number Sign Above
(0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga
(0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa
(0x11145, 0x11146,), # Chakma Vowel Sign Aa ..Chakma Vowel Sign Ei
@@ -4486,9 +4487,7 @@ ZERO_WIDTH = {
(0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining
(0x1e944, 0x1e94a,), # Adlam Alif Lengthener ..Adlam Nukta
(0x1f3fb, 0x1f3ff,), # Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri
- (0xe0001, 0xe0001,), # Language Tag
- (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag
- (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256
+ (0xe0000, 0xe0fff,), # (nil)
),
'15.1.0': (
# Source: DerivedGeneralCategory-15.1.0.txt
@@ -4502,16 +4501,14 @@ ZERO_WIDTH = {
(0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot
(0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot
(0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan
- (0x00600, 0x00605,), # Arabic Number Sign ..Arabic Number Mark Above
(0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra
(0x0061c, 0x0061c,), # Arabic Letter Mark
(0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below
(0x00670, 0x00670,), # Arabic Letter Superscript Alef
- (0x006d6, 0x006dd,), # Arabic Small High Ligatu..Arabic End Of Ayah
+ (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen
(0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda
(0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon
(0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem
- (0x0070f, 0x0070f,), # Syriac Abbreviation Mark
(0x00711, 0x00711,), # Syriac Letter Superscript Alaph
(0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh
(0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun
@@ -4522,9 +4519,9 @@ ZERO_WIDTH = {
(0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U
(0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa
(0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark
- (0x00890, 0x00891,), # Arabic Pound Mark Above ..Arabic Piastre Mark Abov
(0x00898, 0x0089f,), # Arabic Small High Word A..Arabic Half Madda Over M
- (0x008ca, 0x00903,), # Arabic Small High Farsi ..Devanagari Sign Visarga
+ (0x008ca, 0x008e1,), # Arabic Small High Farsi ..Arabic Small High Sign S
+ (0x008e3, 0x00903,), # Arabic Turned Damma Belo..Devanagari Sign Visarga
(0x0093a, 0x0093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta
(0x0093e, 0x0094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw
(0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu
@@ -4650,14 +4647,14 @@ ZERO_WIDTH = {
(0x01dc0, 0x01dff,), # Combining Dotted Grave A..Combining Right Arrowhea
(0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark
(0x02028, 0x0202e,), # Line Separator ..Right-to-left Override
- (0x02060, 0x02064,), # Word Joiner ..Invisible Plus
- (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes
+ (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes
(0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above
(0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu
(0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner
(0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette
(0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M
(0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag
+ (0x03164, 0x03164,), # Hangul Filler
(0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous
(0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer
(0x0a69e, 0x0a69f,), # Combining Cyrillic Lette..Combining Cyrillic Lette
@@ -4694,7 +4691,8 @@ ZERO_WIDTH = {
(0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16
(0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo
(0x0feff, 0x0feff,), # Zero Width No-break Space
- (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T
+ (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler
+ (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T
(0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke
(0x102e0, 0x102e0,), # Coptic Epact Thousands Mark
(0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let
@@ -4715,9 +4713,7 @@ ZERO_WIDTH = {
(0x11073, 0x11074,), # Brahmi Vowel Sign Old Ta..Brahmi Vowel Sign Old Ta
(0x1107f, 0x11082,), # Brahmi Number Joiner ..Kaithi Sign Visarga
(0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta
- (0x110bd, 0x110bd,), # Kaithi Number Sign
(0x110c2, 0x110c2,), # Kaithi Vowel Sign Vocalic R
- (0x110cd, 0x110cd,), # Kaithi Number Sign Above
(0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga
(0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa
(0x11145, 0x11146,), # Chakma Vowel Sign Aa ..Chakma Vowel Sign Ei
@@ -4817,9 +4813,7 @@ ZERO_WIDTH = {
(0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining
(0x1e944, 0x1e94a,), # Adlam Alif Lengthener ..Adlam Nukta
(0x1f3fb, 0x1f3ff,), # Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri
- (0xe0001, 0xe0001,), # Language Tag
- (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag
- (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256
+ (0xe0000, 0xe0fff,), # (nil)
),
'16.0.0': (
# Source: DerivedGeneralCategory-16.0.0.txt
@@ -4833,16 +4827,14 @@ ZERO_WIDTH = {
(0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot
(0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot
(0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan
- (0x00600, 0x00605,), # Arabic Number Sign ..Arabic Number Mark Above
(0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra
(0x0061c, 0x0061c,), # Arabic Letter Mark
(0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below
(0x00670, 0x00670,), # Arabic Letter Superscript Alef
- (0x006d6, 0x006dd,), # Arabic Small High Ligatu..Arabic End Of Ayah
+ (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen
(0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda
(0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon
(0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem
- (0x0070f, 0x0070f,), # Syriac Abbreviation Mark
(0x00711, 0x00711,), # Syriac Letter Superscript Alaph
(0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh
(0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun
@@ -4853,9 +4845,9 @@ ZERO_WIDTH = {
(0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U
(0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa
(0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark
- (0x00890, 0x00891,), # Arabic Pound Mark Above ..Arabic Piastre Mark Abov
(0x00897, 0x0089f,), # (nil) ..Arabic Half Madda Over M
- (0x008ca, 0x00903,), # Arabic Small High Farsi ..Devanagari Sign Visarga
+ (0x008ca, 0x008e1,), # Arabic Small High Farsi ..Arabic Small High Sign S
+ (0x008e3, 0x00903,), # Arabic Turned Damma Belo..Devanagari Sign Visarga
(0x0093a, 0x0093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta
(0x0093e, 0x0094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw
(0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu
@@ -4981,14 +4973,14 @@ ZERO_WIDTH = {
(0x01dc0, 0x01dff,), # Combining Dotted Grave A..Combining Right Arrowhea
(0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark
(0x02028, 0x0202e,), # Line Separator ..Right-to-left Override
- (0x02060, 0x02064,), # Word Joiner ..Invisible Plus
- (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes
+ (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes
(0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above
(0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu
(0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner
(0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette
(0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M
(0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag
+ (0x03164, 0x03164,), # Hangul Filler
(0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous
(0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer
(0x0a69e, 0x0a69f,), # Combining Cyrillic Lette..Combining Cyrillic Lette
@@ -5025,7 +5017,8 @@ ZERO_WIDTH = {
(0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16
(0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo
(0x0feff, 0x0feff,), # Zero Width No-break Space
- (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T
+ (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler
+ (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T
(0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke
(0x102e0, 0x102e0,), # Coptic Epact Thousands Mark
(0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let
@@ -5047,9 +5040,7 @@ ZERO_WIDTH = {
(0x11073, 0x11074,), # Brahmi Vowel Sign Old Ta..Brahmi Vowel Sign Old Ta
(0x1107f, 0x11082,), # Brahmi Number Joiner ..Kaithi Sign Visarga
(0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta
- (0x110bd, 0x110bd,), # Kaithi Number Sign
(0x110c2, 0x110c2,), # Kaithi Vowel Sign Vocalic R
- (0x110cd, 0x110cd,), # Kaithi Number Sign Above
(0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga
(0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa
(0x11145, 0x11146,), # Chakma Vowel Sign Aa ..Chakma Vowel Sign Ei
@@ -5159,9 +5150,7 @@ ZERO_WIDTH = {
(0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining
(0x1e944, 0x1e94a,), # Adlam Alif Lengthener ..Adlam Nukta
(0x1f3fb, 0x1f3ff,), # Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri
- (0xe0001, 0xe0001,), # Language Tag
- (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag
- (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256
+ (0xe0000, 0xe0fff,), # (nil)
),
'17.0.0': (
# Source: DerivedGeneralCategory-17.0.0.txt
@@ -5175,16 +5164,14 @@ ZERO_WIDTH = {
(0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot
(0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot
(0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan
- (0x00600, 0x00605,), # Arabic Number Sign ..Arabic Number Mark Above
(0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra
(0x0061c, 0x0061c,), # Arabic Letter Mark
(0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below
(0x00670, 0x00670,), # Arabic Letter Superscript Alef
- (0x006d6, 0x006dd,), # Arabic Small High Ligatu..Arabic End Of Ayah
+ (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen
(0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda
(0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon
(0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem
- (0x0070f, 0x0070f,), # Syriac Abbreviation Mark
(0x00711, 0x00711,), # Syriac Letter Superscript Alaph
(0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh
(0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun
@@ -5195,9 +5182,9 @@ ZERO_WIDTH = {
(0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U
(0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa
(0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark
- (0x00890, 0x00891,), # Arabic Pound Mark Above ..Arabic Piastre Mark Abov
(0x00897, 0x0089f,), # (nil) ..Arabic Half Madda Over M
- (0x008ca, 0x00903,), # Arabic Small High Farsi ..Devanagari Sign Visarga
+ (0x008ca, 0x008e1,), # Arabic Small High Farsi ..Arabic Small High Sign S
+ (0x008e3, 0x00903,), # Arabic Turned Damma Belo..Devanagari Sign Visarga
(0x0093a, 0x0093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta
(0x0093e, 0x0094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw
(0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu
@@ -5324,14 +5311,14 @@ ZERO_WIDTH = {
(0x01dc0, 0x01dff,), # Combining Dotted Grave A..Combining Right Arrowhea
(0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark
(0x02028, 0x0202e,), # Line Separator ..Right-to-left Override
- (0x02060, 0x02064,), # Word Joiner ..Invisible Plus
- (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes
+ (0x02060, 0x0206f,), # Word Joiner ..Nominal Digit Shapes
(0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above
(0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu
(0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner
(0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette
(0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M
(0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag
+ (0x03164, 0x03164,), # Hangul Filler
(0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous
(0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer
(0x0a69e, 0x0a69f,), # Combining Cyrillic Lette..Combining Cyrillic Lette
@@ -5368,7 +5355,8 @@ ZERO_WIDTH = {
(0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16
(0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo
(0x0feff, 0x0feff,), # Zero Width No-break Space
- (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T
+ (0x0ffa0, 0x0ffa0,), # Halfwidth Hangul Filler
+ (0x0fff0, 0x0fffb,), # (nil) ..Interlinear Annotation T
(0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke
(0x102e0, 0x102e0,), # Coptic Epact Thousands Mark
(0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let
@@ -5390,9 +5378,7 @@ ZERO_WIDTH = {
(0x11073, 0x11074,), # Brahmi Vowel Sign Old Ta..Brahmi Vowel Sign Old Ta
(0x1107f, 0x11082,), # Brahmi Number Joiner ..Kaithi Sign Visarga
(0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta
- (0x110bd, 0x110bd,), # Kaithi Number Sign
(0x110c2, 0x110c2,), # Kaithi Vowel Sign Vocalic R
- (0x110cd, 0x110cd,), # Kaithi Number Sign Above
(0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga
(0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa
(0x11145, 0x11146,), # Chakma Vowel Sign Aa ..Chakma Vowel Sign Ei
@@ -5507,8 +5493,6 @@ ZERO_WIDTH = {
(0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining
(0x1e944, 0x1e94a,), # Adlam Alif Lengthener ..Adlam Nukta
(0x1f3fb, 0x1f3ff,), # Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri
- (0xe0001, 0xe0001,), # Language Tag
- (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag
- (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256
+ (0xe0000, 0xe0fff,), # (nil)
),
}
diff --git a/contrib/python/wcwidth/py3/wcwidth/textwrap.py b/contrib/python/wcwidth/py3/wcwidth/textwrap.py
new file mode 100644
index 00000000000..1b45213b5e1
--- /dev/null
+++ b/contrib/python/wcwidth/py3/wcwidth/textwrap.py
@@ -0,0 +1,387 @@
+"""
+Sequence-aware text wrapping functions.
+
+This module provides functions for wrapping text that may contain terminal escape sequences, with
+proper handling of Unicode grapheme clusters and character display widths.
+"""
+# std imports
+import textwrap
+
+from typing import List
+
+# local
+from .wcwidth import width as _width
+from .wcwidth import iter_sequences
+from .grapheme import iter_graphemes
+from .escape_sequences import ZERO_WIDTH_PATTERN
+
+
+class SequenceTextWrapper(textwrap.TextWrapper):
+ """
+ Sequence-aware text wrapper extending :class:`textwrap.TextWrapper`.
+
+ This wrapper properly handles terminal escape sequences and Unicode grapheme clusters when
+ calculating text width for wrapping.
+
+ This implementation is based on the SequenceTextWrapper from the 'blessed' library, with
+ contributions from Avram Lubkin and grayjk.
+
+ The key difference from the blessed implementation is the addition of grapheme cluster support
+ via :func:`~.iter_graphemes`, providing width calculation for ZWJ emoji sequences, VS-16 emojis
+ and variations, regional indicator flags, and combining characters.
+ """
+
+ def __init__(self, width: int = 70, *,
+ control_codes: str = 'parse',
+ tabsize: int = 8,
+ ambiguous_width: int = 1,
+ **kwargs):
+ """
+ Initialize the wrapper.
+
+ :param width: Maximum line width in display cells.
+ :param control_codes: How to handle control sequences (see :func:`~.width`).
+ :param tabsize: Tab stop width for tab expansion.
+ :param ambiguous_width: Width to use for East Asian Ambiguous (A) characters.
+ :param kwargs: Additional arguments passed to :class:`textwrap.TextWrapper`.
+ """
+ super().__init__(width=width, **kwargs)
+ self.control_codes = control_codes
+ self.tabsize = tabsize
+ self.ambiguous_width = ambiguous_width
+
+ def _width(self, text: str) -> int:
+ """Measure text width accounting for sequences."""
+ return _width(text, control_codes=self.control_codes, tabsize=self.tabsize,
+ ambiguous_width=self.ambiguous_width)
+
+ def _strip_sequences(self, text: str) -> str:
+ """Strip all terminal sequences from text."""
+ result = []
+ for segment, is_seq in iter_sequences(text):
+ if not is_seq:
+ result.append(segment)
+ return ''.join(result)
+
+ def _extract_sequences(self, text: str) -> str:
+ """Extract only terminal sequences from text."""
+ result = []
+ for segment, is_seq in iter_sequences(text):
+ if is_seq:
+ result.append(segment)
+ return ''.join(result)
+
+ def _split(self, text: str) -> List[str]: # pylint: disable=too-many-locals
+ """
+ Sequence-aware variant of :meth:`textwrap.TextWrapper._split`.
+
+ This method ensures that terminal escape sequences don't interfere with the text splitting
+ logic, particularly for hyphen-based word breaking. It builds a position mapping from
+ stripped text to original text, calls the parent's _split on stripped text, then maps chunks
+ back.
+ """
+ # pylint: disable=too-many-locals,too-many-branches
+ # Build a mapping from stripped text positions to original text positions.
+ # We track where each character ENDS so that sequences between characters
+ # attach to the following text (not preceding text). This ensures sequences
+ # aren't lost when whitespace is dropped.
+ #
+ # char_end[i] = position in original text right after the i-th stripped char
+ char_end: List[int] = []
+ stripped_text = ''
+ original_pos = 0
+
+ for segment, is_seq in iter_sequences(text):
+ if not is_seq:
+ for char in segment:
+ original_pos += 1
+ char_end.append(original_pos)
+ stripped_text += char
+ else:
+ # Escape sequences advance position but don't add to stripped text
+ original_pos += len(segment)
+
+ # Add sentinel for final position
+ char_end.append(original_pos)
+
+ # Use parent's _split on the stripped text
+ # pylint: disable-next=protected-access
+ stripped_chunks = textwrap.TextWrapper._split(self, stripped_text)
+
+ # Handle text that contains only sequences (no visible characters).
+ # Return the sequences as a single chunk to preserve them.
+ if not stripped_chunks and text:
+ return [text]
+
+ # Map the chunks back to the original text with sequences
+ result: List[str] = []
+ stripped_pos = 0
+ num_chunks = len(stripped_chunks)
+
+ for idx, chunk in enumerate(stripped_chunks):
+ chunk_len = len(chunk)
+
+ # Start is where previous character ended (or 0 for first chunk)
+ start_orig = 0 if stripped_pos == 0 else char_end[stripped_pos - 1]
+
+ # End is where next character starts. For last chunk, use sentinel
+ # to include any trailing sequences.
+ if idx == num_chunks - 1:
+ end_orig = char_end[-1] # sentinel includes trailing sequences
+ else:
+ end_orig = char_end[stripped_pos + chunk_len - 1]
+
+ # Extract the corresponding portion from the original text
+ result.append(text[start_orig:end_orig])
+ stripped_pos += chunk_len
+
+ return result
+
+ def _wrap_chunks(self, chunks: List[str]) -> List[str]: # pylint: disable=too-many-branches
+ """
+ Wrap chunks into lines using sequence-aware width.
+
+ Override TextWrapper._wrap_chunks to use _width instead of len. Follows stdlib's algorithm:
+ greedily fill lines, handle long words.
+ """
+ # pylint: disable=too-many-branches
+ if not chunks:
+ return []
+
+ lines = []
+ is_first_line = True
+
+ # Arrange in reverse order so items can be efficiently popped
+ chunks = list(reversed(chunks))
+
+ while chunks:
+ current_line: List[str] = []
+ current_width = 0
+
+ # Get the indent and available width for current line
+ indent = self.initial_indent if is_first_line else self.subsequent_indent
+ line_width = self.width - self._width(indent)
+
+ # Drop leading whitespace (except at very start)
+ # When dropping, transfer any sequences to the next chunk.
+ # Only drop if there's actual whitespace text, not if it's only sequences.
+ stripped = self._strip_sequences(chunks[-1])
+ if self.drop_whitespace and lines and stripped and not stripped.strip():
+ sequences = self._extract_sequences(chunks[-1])
+ del chunks[-1]
+ if sequences and chunks:
+ chunks[-1] = sequences + chunks[-1]
+
+ # Greedily add chunks that fit
+ while chunks:
+ chunk = chunks[-1]
+ chunk_width = self._width(chunk)
+
+ if current_width + chunk_width <= line_width:
+ current_line.append(chunks.pop())
+ current_width += chunk_width
+ else:
+ break
+
+ # Handle chunk that's too long for any line
+ if chunks and self._width(chunks[-1]) > line_width:
+ self._handle_long_word(
+ chunks, current_line, current_width, line_width
+ )
+ current_width = self._width(''.join(current_line))
+ # Remove any empty chunks left by _handle_long_word
+ while chunks and not chunks[-1]:
+ del chunks[-1]
+
+ # Drop trailing whitespace
+ # When dropping, transfer any sequences to the previous chunk.
+ # Only drop if there's actual whitespace text, not if it's only sequences.
+ stripped_last = self._strip_sequences(current_line[-1]) if current_line else ''
+ if (self.drop_whitespace and current_line and
+ stripped_last and not stripped_last.strip()):
+ sequences = self._extract_sequences(current_line[-1])
+ current_width -= self._width(current_line[-1])
+ del current_line[-1]
+ if sequences and current_line:
+ current_line[-1] = current_line[-1] + sequences
+
+ if current_line:
+ line_content = ''.join(current_line)
+ # Strip trailing whitespace when drop_whitespace is enabled
+ # (matches CPython #140627 fix behavior)
+ if self.drop_whitespace:
+ line_content = line_content.rstrip()
+ lines.append(indent + line_content)
+ is_first_line = False
+
+ return lines
+
+ def _handle_long_word(self, reversed_chunks: List[str],
+ cur_line: List[str], cur_len: int,
+ width: int) -> None:
+ """
+ Sequence-aware :meth:`textwrap.TextWrapper._handle_long_word`.
+
+ This method ensures that word boundaries are not broken mid-sequence, and respects grapheme
+ cluster boundaries when breaking long words.
+ """
+ if width < 1:
+ space_left = 1
+ else:
+ space_left = width - cur_len
+
+ if self.break_long_words:
+ chunk = reversed_chunks[-1]
+ break_at_hyphen = False
+ hyphen_end = 0
+
+ # Handle break_on_hyphens: find last hyphen within space_left
+ if self.break_on_hyphens:
+ # Strip sequences to find hyphen in logical text
+ stripped = self._strip_sequences(chunk)
+ if len(stripped) > space_left:
+ # Find last hyphen in the portion that fits
+ hyphen_pos = stripped.rfind('-', 0, space_left)
+ if hyphen_pos > 0 and any(c != '-' for c in stripped[:hyphen_pos]):
+ # Map back to original position including sequences
+ hyphen_end = self._map_stripped_pos_to_original(chunk, hyphen_pos + 1)
+ break_at_hyphen = True
+
+ # Break at grapheme boundaries to avoid splitting multi-codepoint characters
+ if break_at_hyphen:
+ actual_end = hyphen_end
+ else:
+ actual_end = self._find_break_position(chunk, space_left)
+ # If no progress possible (e.g., wide char exceeds line width),
+ # force at least one grapheme to avoid infinite loop.
+ # Only force when cur_line is empty; if line has content,
+ # appending nothing is safe and the line will be committed.
+ if actual_end == 0 and not cur_line:
+ actual_end = self._find_first_grapheme_end(chunk)
+ cur_line.append(chunk[:actual_end])
+ reversed_chunks[-1] = chunk[actual_end:]
+
+ elif not cur_line:
+ cur_line.append(reversed_chunks.pop())
+
+ def _map_stripped_pos_to_original(self, text: str, stripped_pos: int) -> int:
+ """Map a position in stripped text back to original text position."""
+ stripped_idx = 0
+ original_idx = 0
+
+ for segment, is_seq in iter_sequences(text):
+ if is_seq:
+ original_idx += len(segment)
+ elif stripped_idx + len(segment) > stripped_pos:
+ # Position is within this segment
+ return original_idx + (stripped_pos - stripped_idx)
+ else:
+ stripped_idx += len(segment)
+ original_idx += len(segment)
+
+ # Caller guarantees stripped_pos < total stripped chars, so we always
+ # return from within the loop. This line satisfies the type checker.
+ return original_idx # pragma: no cover
+
+ def _find_break_position(self, text: str, max_width: int) -> int:
+ """Find string index in text that fits within max_width cells."""
+ idx = 0
+ width_so_far = 0
+
+ while idx < len(text):
+ char = text[idx]
+
+ # Skip escape sequences (they don't add width)
+ if char == '\x1b':
+ match = ZERO_WIDTH_PATTERN.match(text, idx)
+ if match:
+ idx = match.end()
+ continue
+
+ # Get grapheme
+ grapheme = next(iter_graphemes(text[idx:]))
+
+ grapheme_width = self._width(grapheme)
+ if width_so_far + grapheme_width > max_width:
+ return idx # Found break point
+
+ width_so_far += grapheme_width
+ idx += len(grapheme)
+
+ # Caller guarantees chunk_width > max_width, so a grapheme always
+ # exceeds and we return from within the loop. Type checker requires this.
+ return idx # pragma: no cover
+
+ def _find_first_grapheme_end(self, text: str) -> int:
+ """Find the end position of the first grapheme."""
+ return len(next(iter_graphemes(text)))
+
+
+def wrap(text: str, width: int = 70, *,
+ control_codes: str = 'parse',
+ tabsize: int = 8,
+ ambiguous_width: int = 1,
+ initial_indent: str = '',
+ subsequent_indent: str = '',
+ break_long_words: bool = True,
+ break_on_hyphens: bool = True) -> List[str]:
+ r"""
+ Wrap text to fit within given width, returning a list of wrapped lines.
+
+ Like :func:`textwrap.wrap`, but measures width in display cells rather than
+ characters, correctly handling wide characters, combining marks, and terminal
+ escape sequences.
+
+ :param str text: Text to wrap, may contain terminal sequences.
+ :param int width: Maximum line width in display cells.
+ :param str control_codes: How to handle terminal sequences (see :func:`~.width`).
+ :param int tabsize: Tab stop width for tab expansion.
+ :param int ambiguous_width: Width to use for East Asian Ambiguous (A)
+ characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts.
+ :param str initial_indent: String prepended to first line.
+ :param str subsequent_indent: String prepended to subsequent lines.
+ :param bool break_long_words: If True, break words longer than width.
+ :param bool break_on_hyphens: If True, allow breaking at hyphens.
+ :returns: List of wrapped lines without trailing newlines.
+ :rtype: list[str]
+
+ Like :func:`textwrap.wrap`, newlines in the input text are treated as
+ whitespace and collapsed. To preserve paragraph breaks, wrap each
+ paragraph separately::
+
+ >>> text = 'First line.\\nSecond line.'
+ >>> wrap(text, 40) # newline collapsed to space
+ ['First line. Second line.']
+ >>> [line for para in text.split('\\n')
+ ... for line in (wrap(para, 40) if para else [''])]
+ ['First line.', 'Second line.']
+
+ .. seealso::
+
+ :func:`textwrap.wrap`, :class:`textwrap.TextWrapper`
+ Standard library text wrapping (character-based).
+
+ :class:`.SequenceTextWrapper`
+ Class interface for advanced wrapping options.
+
+ .. versionadded:: 0.3.0
+
+ Example::
+
+ >>> from wcwidth import wrap
+ >>> wrap('hello world', 5)
+ ['hello', 'world']
+ >>> wrap('中文字符', 4) # CJK characters (2 cells each)
+ ['中文', '字符']
+ """
+ wrapper = SequenceTextWrapper(
+ width=width,
+ control_codes=control_codes,
+ tabsize=tabsize,
+ ambiguous_width=ambiguous_width,
+ initial_indent=initial_indent,
+ subsequent_indent=subsequent_indent,
+ break_long_words=break_long_words,
+ break_on_hyphens=break_on_hyphens,
+ )
+ return wrapper.wrap(text)
diff --git a/contrib/python/wcwidth/py3/wcwidth/wcwidth.py b/contrib/python/wcwidth/py3/wcwidth/wcwidth.py
index 92ca14afba1..09938dbea2c 100644
--- a/contrib/python/wcwidth/py3/wcwidth/wcwidth.py
+++ b/contrib/python/wcwidth/py3/wcwidth/wcwidth.py
@@ -67,52 +67,56 @@ import warnings
from functools import lru_cache
# local
+from .bisearch import bisearch as _bisearch
+from .grapheme import iter_graphemes
from .table_vs16 import VS16_NARROW_TO_WIDE
from .table_wide import WIDE_EASTASIAN
from .table_zero import ZERO_WIDTH
+from .control_codes import ILLEGAL_CTRL, VERTICAL_CTRL, HORIZONTAL_CTRL, ZERO_WIDTH_CTRL
+from .table_ambiguous import AMBIGUOUS_EASTASIAN
+from .escape_sequences import (ZERO_WIDTH_PATTERN,
+ CURSOR_LEFT_SEQUENCE,
+ CURSOR_RIGHT_SEQUENCE,
+ INDETERMINATE_EFFECT_SEQUENCE)
from .unicode_versions import list_versions
+_AMBIGUOUS_TABLE = AMBIGUOUS_EASTASIAN[next(iter(AMBIGUOUS_EASTASIAN))]
-def _bisearch(ucs, table):
- """
- Auxiliary function for binary search in interval table.
-
- :arg int ucs: Ordinal value of unicode character.
- :arg list table: List of starting and ending ranges of ordinal values,
- in form of ``[(start, end), ...]``.
- :rtype: int
- :returns: 1 if ordinal value ucs is found within lookup table, else 0.
- """
- lbound = 0
- ubound = len(table) - 1
-
- if ucs < table[0][0] or ucs > table[ubound][1]:
- return 0
- while ubound >= lbound:
- mid = (lbound + ubound) // 2
- if ucs > table[mid][1]:
- lbound = mid + 1
- elif ucs < table[mid][0]:
- ubound = mid - 1
- else:
- return 1
-
- return 0
+# Translation table to strip C0/C1 control characters for fast 'ignore' mode.
+_CONTROL_CHAR_TABLE = str.maketrans('', '', (
+ ''.join(chr(c) for c in range(0x00, 0x20)) + # C0: NUL through US (including tab)
+ '\x7f' + # DEL
+ ''.join(chr(c) for c in range(0x80, 0xa0)) # C1: U+0080-U+009F
+))
-@lru_cache(maxsize=1000)
-def wcwidth(wc, unicode_version='auto'):
+@lru_cache(maxsize=2000)
+def wcwidth(wc, unicode_version='auto', ambiguous_width=1):
r"""
- Given one Unicode character, return its printable length on a terminal.
+ Given one Unicode codepoint, return its printable length on a terminal.
:param str wc: A single Unicode character.
:param str unicode_version: A Unicode version number, such as
- ``'6.0.0'``. A list of version levels suported by wcwidth
+ ``'6.0.0'``. A list of version levels supported by wcwidth
is returned by :func:`list_versions`.
Any version string may be specified without error -- the nearest
- matching version is selected. When ``latest`` (default), the
- highest Unicode version level is used.
+ matching version is selected. When ``'auto'`` (default), the
+ ``UNICODE_VERSION`` environment variable is used if set, otherwise
+ the highest Unicode version level is used.
+
+ .. deprecated:: 0.3.0
+
+ This parameter is deprecated. Empirical data shows that Unicode
+ support in terminals varies not only by unicode version, but
+ by capabilities, Emojis, and specific language support.
+
+ The default ``'auto'`` behavior is recommended for all use cases.
+
+ :param int ambiguous_width: Width to use for East Asian Ambiguous (A)
+ characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts
+ where ambiguous characters display as double-width. See
+ :ref:`ambiguous_width` for details.
:return: The width, in cells, necessary to display the character of
Unicode string character, ``wc``. Returns 0 if the ``wc`` argument has
no printable effect on a terminal (such as NUL '\0'), -1 if ``wc`` is
@@ -141,11 +145,18 @@ def wcwidth(wc, unicode_version='auto'):
if _bisearch(ucs, ZERO_WIDTH[_unicode_version]):
return 0
- # 1 or 2 width
- return 1 + _bisearch(ucs, WIDE_EASTASIAN[_unicode_version])
+ # Wide (F/W categories)
+ if _bisearch(ucs, WIDE_EASTASIAN[_unicode_version]):
+ return 2
+
+ # Ambiguous width (A category) - only when ambiguous_width=2
+ if ambiguous_width == 2 and _bisearch(ucs, _AMBIGUOUS_TABLE):
+ return 2
+ return 1
-def wcswidth(pwcs, n=None, unicode_version='auto'):
+
+def wcswidth(pwcs, n=None, unicode_version='auto', ambiguous_width=1):
"""
Given a unicode string, return its printable length on a terminal.
@@ -155,10 +166,21 @@ def wcswidth(pwcs, n=None, unicode_version='auto'):
argument exists only for compatibility with the C POSIX function
signature. It is suggested instead to use python's string slicing
capability, ``wcswidth(pwcs[:n])``
- :param str unicode_version: An explicit definition of the unicode version
- level to use for determination, may be ``auto`` (default), which uses
- the Environment Variable, ``UNICODE_VERSION`` if defined, or the latest
- available unicode version, otherwise.
+ :param str unicode_version: A Unicode version number, such as
+ ``'6.0.0'``, or ``'auto'`` (default) which uses the
+ ``UNICODE_VERSION`` environment variable if defined, or the latest
+ available unicode version otherwise.
+
+ .. deprecated:: 0.3.0
+
+ This parameter is deprecated. Empirical data shows that Unicode
+ support in terminals varies not only by unicode version, but
+ by capabilities, Emojis, and specific language support.
+
+ The default ``'auto'`` behavior is recommended for all use cases.
+
+ :param int ambiguous_width: Width to use for East Asian Ambiguous (A)
+ characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts.
:rtype: int
:returns: The width, in cells, needed to display the first ``n`` characters
of the unicode string ``pwcs``. Returns ``-1`` for C0 and C1 control
@@ -167,40 +189,43 @@ def wcswidth(pwcs, n=None, unicode_version='auto'):
See :ref:`Specification` for details of cell measurement.
"""
# this 'n' argument is a holdover for POSIX function
+
+ # Fast path: pure ASCII printable strings are always width == length
+ if n is None and pwcs.isascii() and pwcs.isprintable():
+ return len(pwcs)
+
_unicode_version = None
end = len(pwcs) if n is None else n
- width = 0
+ total_width = 0
idx = 0
- last_measured_char = None
+ last_measured_idx = -2 # Track index of last measured char for VS16
while idx < end:
char = pwcs[idx]
if char == '\u200D':
# Zero Width Joiner, do not measure this or next character
idx += 2
continue
- if char == '\uFE0F' and last_measured_char:
- # on variation selector 16 (VS16) following another character,
- # conditionally add '1' to the measured width if that character is
- # known to be converted from narrow to wide by the VS16 character.
+ if char == '\uFE0F' and last_measured_idx >= 0:
+ # VS16 following a measured character: add 1 if that character is
+ # known to be converted from narrow to wide by VS16.
if _unicode_version is None:
_unicode_version = _wcversion_value(_wcmatch_version(unicode_version))
if _unicode_version >= (9, 0, 0):
- width += _bisearch(ord(last_measured_char), VS16_NARROW_TO_WIDE["9.0.0"])
- last_measured_char = None
+ total_width += _bisearch(ord(pwcs[last_measured_idx]),
+ VS16_NARROW_TO_WIDE["9.0.0"])
+ last_measured_idx = -2 # Prevent double application
idx += 1
continue
# measure character at current index
- wcw = wcwidth(char, unicode_version)
+ wcw = wcwidth(char, unicode_version, ambiguous_width)
if wcw < 0:
# early return -1 on C0 and C1 control characters
return wcw
if wcw > 0:
- # track last character measured to contain a cell, so that
- # subsequent VS-16 modifiers may be understood
- last_measured_char = char
- width += wcw
+ last_measured_idx = idx
+ total_width += wcw
idx += 1
- return width
+ return total_width
@lru_cache(maxsize=128)
@@ -274,11 +299,10 @@ def _wcmatch_version(given_version):
except ValueError:
# submitted value raises ValueError in int(), warn and use latest.
- warnings.warn("UNICODE_VERSION value, {given_version!r}, is invalid. "
+ warnings.warn(f"UNICODE_VERSION value, {given_version!r}, is invalid. "
"Value should be in form of `integer[.]+', the latest "
- "supported unicode version {latest_version!r} has been "
- "inferred.".format(given_version=given_version,
- latest_version=latest_version))
+ f"supported unicode version {latest_version!r} has been "
+ "inferred.")
return latest_version
# given version is less than any available version, return earliest
@@ -290,11 +314,9 @@ def _wcmatch_version(given_version):
# this probably isn't what you wanted, the oldest wcwidth.c you will
# find in the wild is likely version 5 or 6, which we both support,
# but it's better than not saying anything at all.
- warnings.warn("UNICODE_VERSION value, {given_version!r}, is lower "
+ warnings.warn(f"UNICODE_VERSION value, {given_version!r}, is lower "
"than any available unicode version. Returning lowest "
- "version level, {earliest_version!r}".format(
- given_version=given_version,
- earliest_version=earliest_version))
+ f"version level, {earliest_version!r}")
return earliest_version
# create list of versions which are less than our equal to given version,
@@ -324,3 +346,457 @@ def _wcmatch_version(given_version):
if cmp_next_version > cmp_given:
return unicode_version
assert False, ("Code path unreachable", given_version, unicode_versions) # pragma: no cover
+
+
+def iter_sequences(text):
+ r"""
+ Iterate through text, yielding segments with sequence identification.
+
+ This generator yields tuples of ``(segment, is_sequence)`` for each part
+ of the input text, where ``is_sequence`` is ``True`` if the segment is
+ a recognized terminal escape sequence.
+
+ :param str text: String to iterate through.
+ :rtype: Iterator[tuple[str, bool]]
+ :returns: Iterator of (segment, is_sequence) tuples.
+
+ .. versionadded:: 0.3.0
+
+ Example::
+
+ >>> list(iter_sequences('hello'))
+ [('hello', False)]
+ >>> list(iter_sequences('\\x1b[31mred'))
+ [('\\x1b[31m', True), ('red', False)]
+ >>> list(iter_sequences('\\x1b[1m\\x1b[31m'))
+ [('\\x1b[1m', True), ('\\x1b[31m', True)]
+ """
+ idx = 0
+ text_len = len(text)
+ segment_start = 0
+
+ while idx < text_len:
+ char = text[idx]
+
+ if char == '\x1b':
+ # Yield any accumulated non-sequence text
+ if idx > segment_start:
+ yield (text[segment_start:idx], False)
+
+ # Try to match an escape sequence
+ match = ZERO_WIDTH_PATTERN.match(text, idx)
+ if match:
+ yield (match.group(), True)
+ idx = match.end()
+ else:
+ # Lone ESC or unrecognized - yield as sequence anyway
+ yield (char, True)
+ idx += 1
+ segment_start = idx
+ else:
+ idx += 1
+
+ # Yield any remaining text
+ if segment_start < text_len:
+ yield (text[segment_start:], False)
+
+
+def _width_ignored_codes(text, ambiguous_width=1):
+ """
+ Fast path for width() with control_codes='ignore'.
+
+ Strips escape sequences and control characters, then measures remaining text.
+ """
+ return wcswidth(
+ strip_sequences(text).translate(_CONTROL_CHAR_TABLE),
+ ambiguous_width=ambiguous_width
+ )
+
+
+def width(text, *, control_codes='parse', tabsize=8, ambiguous_width=1):
+ r"""
+ Return printable width of text containing many kinds of control codes and sequences.
+
+ Unlike :func:`wcswidth`, this function handles most control characters and many popular terminal
+ output sequences. Never returns -1.
+
+ :param str text: String to measure.
+ :param str control_codes: How to handle control characters and sequences:
+
+ - ``'parse'`` (default): Track horizontal cursor movement from BS ``\\b``, CR ``\\r``, TAB
+ ``\\t``, and cursor left and right movement sequences. Vertical movement (LF, VT, FF) and
+ indeterminate sequences are zero-width. Never raises.
+ - ``'strict'``: Like parse, but raises :exc:`ValueError` on control characters with
+ indeterminate results of the screen or cursor, like clear or vertical movement. Generally,
+ these should be handled with a virtual terminal emulator (like 'pyte').
+ - ``'ignore'``: All C0 and C1 control characters and escape sequences are measured as
+ width 0. This is the fastest measurement for text already filtered or known not to contain
+ any kinds of control codes or sequences. TAB ``\\t`` is zero-width; for tab expansion,
+ pre-process: ``text.replace('\\t', ' ' * 8)``.
+
+ :param int tabsize: Tab stop width for ``'parse'`` and ``'strict'`` modes. Default is 8.
+ Must be positive. Has no effect when ``control_codes='ignore'``.
+ :param int ambiguous_width: Width to use for East Asian Ambiguous (A)
+ characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts.
+ :rtype: int
+ :returns: Maximum cursor position reached, "extent", accounting for cursor movement sequences
+ present in ``text`` according to given parameters. This represents the rightmost column the
+ cursor reaches. Always a non-negative integer.
+
+ :raises ValueError: If ``control_codes='strict'`` and control characters with indeterminate
+ effects, such as vertical movement or clear sequences are encountered, or on unexpected
+ C0 or C1 control code. Also raised when ``control_codes`` is not one of the valid values.
+
+ .. versionadded:: 0.3.0
+
+ Examples::
+
+ >>> width('hello')
+ 5
+ >>> width('コンニチハ')
+ 10
+ >>> width('\\x1b[31mred\\x1b[0m')
+ 3
+ >>> width('\\x1b[31mred\\x1b[0m', control_codes='ignore') # same result (ignored)
+ 3
+ >>> width('123\\b4') # backspace overwrites previous cell (outputs '124')
+ 3
+ >>> width('abc\\t') # tab caused cursor to move to column 8
+ 8
+ >>> width('1\\x1b[10C') # '1' + cursor right 10, cursor ends on column 11
+ 11
+ >>> width('1\\x1b[10C', control_codes='ignore') # faster but wrong in this case
+ 1
+ """
+ # pylint: disable=too-complex,too-many-branches,too-many-statements
+ # This could be broken into sub-functions (#1, #3, and 6 especially), but for reduced overhead
+ # considering this function is a likely "hot path", they are inlined, breaking many of our
+ # complexity rules.
+
+ # Fast path for ignore mode -- this is useful if you know the text is already "clean"
+ if control_codes == 'ignore':
+ return _width_ignored_codes(text, ambiguous_width)
+
+ strict = control_codes == 'strict'
+ # Track absolute positions: tab stops need modulo on absolute column, CR resets to 0.
+ # Initialize max_extent to 0 so backward movement (CR, BS) won't yield negative width.
+ current_col = 0
+ max_extent = 0
+ idx = 0
+ last_measured_idx = -2 # Track index of last measured char for VS16; -2 can never match idx-1
+
+ while idx < len(text):
+ char = text[idx]
+
+ # 1. Handle ESC sequences
+ if char == '\x1b':
+ match = ZERO_WIDTH_PATTERN.match(text, idx)
+ if match:
+ seq = match.group()
+ if strict and INDETERMINATE_EFFECT_SEQUENCE.match(seq):
+ raise ValueError(f"Indeterminate cursor sequence at position {idx}")
+ # Apply cursor movement
+ right = CURSOR_RIGHT_SEQUENCE.match(seq)
+ if right:
+ current_col += int(right.group(1) or 1)
+ else:
+ left = CURSOR_LEFT_SEQUENCE.match(seq)
+ if left:
+ current_col = max(0, current_col - int(left.group(1) or 1))
+ idx = match.end()
+ else:
+ idx += 1
+ max_extent = max(max_extent, current_col)
+ continue
+
+ # 2. Handle illegal and vertical control characters (zero width, error in strict)
+ if char in ILLEGAL_CTRL:
+ if strict:
+ raise ValueError(f"Illegal control character {ord(char):#x} at position {idx}")
+ idx += 1
+ continue
+
+ if char in VERTICAL_CTRL:
+ if strict:
+ raise ValueError(f"Vertical movement character {ord(char):#x} at position {idx}")
+ idx += 1
+ continue
+
+ # 3. Handle horizontal movement characters
+ if char in HORIZONTAL_CTRL:
+ if char == '\x09' and tabsize > 0: # Tab
+ current_col += tabsize - (current_col % tabsize)
+ elif char == '\x08': # Backspace
+ if current_col > 0:
+ current_col -= 1
+ elif char == '\x0d': # Carriage return
+ current_col = 0
+ max_extent = max(max_extent, current_col)
+ idx += 1
+ continue
+
+ # 4. Handle ZWJ (skip this and next character)
+ if char == '\u200D':
+ idx += 2
+ continue
+
+ # 5. Handle other zero-width characters (control chars)
+ if char in ZERO_WIDTH_CTRL:
+ idx += 1
+ continue
+
+ # 6. Handle VS16: converts preceding narrow character to wide
+ if char == '\uFE0F':
+ if last_measured_idx == idx - 1:
+ if _bisearch(ord(text[last_measured_idx]), VS16_NARROW_TO_WIDE["9.0.0"]):
+ current_col += 1
+ max_extent = max(max_extent, current_col)
+ idx += 1
+ continue
+
+ # 7. Normal characters: measure with wcwidth
+ w = wcwidth(char, 'auto', ambiguous_width)
+ if w > 0:
+ current_col += w
+ max_extent = max(max_extent, current_col)
+ last_measured_idx = idx
+ idx += 1
+
+ return max_extent
+
+
+def ljust(text, dest_width, fillchar=' ', *, control_codes='parse', ambiguous_width=1):
+ r"""
+ Return text left-justified in a string of given display width.
+
+ :param str text: String to justify, may contain terminal sequences.
+ :param int dest_width: Total display width of result in terminal cells.
+ :param str fillchar: Single character for padding (default space). Must have
+ display width of 1 (not wide, not zero-width, not combining). Unicode
+ characters like ``'·'`` are acceptable. The width is not validated.
+ :param str control_codes: How to handle control sequences when measuring.
+ Passed to :func:`width` for measurement.
+ :param int ambiguous_width: Width to use for East Asian Ambiguous (A)
+ characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts.
+ :returns: Text padded on the right to reach ``dest_width``.
+ :rtype: str
+
+ .. versionadded:: 0.3.0
+
+ Example::
+
+ >>> wcwidth.ljust('hi', 5)
+ 'hi '
+ >>> wcwidth.ljust('\\x1b[31mhi\\x1b[0m', 5)
+ '\\x1b[31mhi\\x1b[0m '
+ >>> wcwidth.ljust('\\U0001F468\\u200D\\U0001F469\\u200D\\U0001F467', 6)
+ '👨‍👩‍👧 '
+ """
+ if text.isascii() and text.isprintable():
+ text_width = len(text)
+ else:
+ text_width = width(text, control_codes=control_codes, ambiguous_width=ambiguous_width)
+ padding_cells = max(0, dest_width - text_width)
+ return text + fillchar * padding_cells
+
+
+def rjust(text, dest_width, fillchar=' ', *, control_codes='parse', ambiguous_width=1):
+ r"""
+ Return text right-justified in a string of given display width.
+
+ :param str text: String to justify, may contain terminal sequences.
+ :param int dest_width: Total display width of result in terminal cells.
+ :param str fillchar: Single character for padding (default space). Must have
+ display width of 1 (not wide, not zero-width, not combining). Unicode
+ characters like ``'·'`` are acceptable. The width is not validated.
+ :param str control_codes: How to handle control sequences when measuring.
+ Passed to :func:`width` for measurement.
+ :param int ambiguous_width: Width to use for East Asian Ambiguous (A)
+ characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts.
+ :returns: Text padded on the left to reach ``dest_width``.
+ :rtype: str
+
+ .. versionadded:: 0.3.0
+
+ Example::
+
+ >>> wcwidth.rjust('hi', 5)
+ ' hi'
+ >>> wcwidth.rjust('\\x1b[31mhi\\x1b[0m', 5)
+ ' \\x1b[31mhi\\x1b[0m'
+ >>> wcwidth.rjust('\\U0001F468\\u200D\\U0001F469\\u200D\\U0001F467', 6)
+ ' 👨‍👩‍👧'
+ """
+ if text.isascii() and text.isprintable():
+ text_width = len(text)
+ else:
+ text_width = width(text, control_codes=control_codes, ambiguous_width=ambiguous_width)
+ padding_cells = max(0, dest_width - text_width)
+ return fillchar * padding_cells + text
+
+
+def center(text, dest_width, fillchar=' ', *, control_codes='parse', ambiguous_width=1):
+ r"""
+ Return text centered in a string of given display width.
+
+ :param str text: String to center, may contain terminal sequences.
+ :param int dest_width: Total display width of result in terminal cells.
+ :param str fillchar: Single character for padding (default space). Must have
+ display width of 1 (not wide, not zero-width, not combining). Unicode
+ characters like ``'·'`` are acceptable. The width is not validated.
+ :param str control_codes: How to handle control sequences when measuring.
+ Passed to :func:`width` for measurement.
+ :param int ambiguous_width: Width to use for East Asian Ambiguous (A)
+ characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts.
+ :returns: Text padded on both sides to reach ``dest_width``.
+ :rtype: str
+
+ For odd-width padding, the extra cell goes on the right (matching
+ Python's :meth:`str.center` behavior).
+
+ .. versionadded:: 0.3.0
+
+ Example::
+
+ >>> wcwidth.center('hi', 6)
+ ' hi '
+ >>> wcwidth.center('\\x1b[31mhi\\x1b[0m', 6)
+ ' \\x1b[31mhi\\x1b[0m '
+ >>> wcwidth.center('\\U0001F468\\u200D\\U0001F469\\u200D\\U0001F467', 6)
+ ' 👨‍👩‍👧 '
+ """
+ if text.isascii() and text.isprintable():
+ text_width = len(text)
+ else:
+ text_width = width(text, control_codes=control_codes, ambiguous_width=ambiguous_width)
+ total_padding = max(0, dest_width - text_width)
+ left_pad = total_padding // 2
+ right_pad = total_padding - left_pad
+ return fillchar * left_pad + text + fillchar * right_pad
+
+
+def strip_sequences(text):
+ r"""
+ Return text with all terminal escape sequences removed.
+
+ Unknown or incomplete ESC sequences are preserved.
+
+ :param str text: String that may contain terminal escape sequences.
+ :rtype: str
+ :returns: The input text with all escape sequences stripped.
+
+ .. versionadded:: 0.3.0
+
+ Example::
+
+ >>> strip_sequences('\\x1b[31mred\\x1b[0m')
+ 'red'
+ >>> strip_sequences('hello')
+ 'hello'
+ >>> strip_sequences('\\x1b[1m\\x1b[31mbold red\\x1b[0m text')
+ 'bold red text'
+ """
+ return ZERO_WIDTH_PATTERN.sub('', text)
+
+
+def clip(text, start, end, *, fillchar=' ', tabsize=8, ambiguous_width=1):
+ r"""
+ Clip text to display columns ``(start, end)`` while preserving all terminal sequences.
+
+ This function extracts a substring based on visible column positions rather than
+ character indices. Terminal escape sequences are preserved in the output since
+ they have zero display width. If a wide character (width 2) would be split at
+ either boundary, it is replaced with ``fillchar``.
+
+ TAB characters (``\\t``) are expanded to spaces up to the next tab stop,
+ controlled by the ``tabsize`` parameter.
+
+ Other cursor movement characters (backspace, carriage return) and cursor
+ movement sequences are passed through unchanged as zero-width.
+
+ :param str text: String to clip, may contain terminal escape sequences.
+ :param int start: Absolute starting column (inclusive, 0-indexed).
+ :param int end: Absolute ending column (exclusive).
+ :param str fillchar: Character to use when a wide character must be split at
+ a boundary (default space). Must have display width of 1.
+ :param int tabsize: Tab stop width (default 8). Set to 0 to pass tabs through
+ as zero-width (preserved in output but don't advance column position).
+ :param int ambiguous_width: Width to use for East Asian Ambiguous (A)
+ characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts.
+ :rtype: str
+ :returns: Substring of ``text`` spanning display columns ``(start, end)``,
+ with all terminal sequences preserved and wide characters at boundaries
+ replaced with ``fillchar``.
+
+ .. versionadded:: 0.3.0
+
+ Example::
+
+ >>> clip('hello world', 0, 5)
+ 'hello'
+ >>> clip('中文字', 0, 3) # Wide char split at column 3
+ '中 '
+ >>> clip('a\\tb', 0, 10) # Tab expanded to spaces
+ 'a b'
+ """
+ # pylint: disable=too-complex,too-many-locals,too-many-branches
+ start = max(start, 0)
+ if end <= start:
+ return ''
+
+ # Fast path: printable ASCII only (no tabs, escapes, or wide chars)
+ if text.isascii() and text.isprintable():
+ return text[start:end]
+
+ output = []
+ col = 0
+ idx = 0
+ text_len = len(text)
+
+ while idx < text_len:
+ char = text[idx]
+
+ # Escape sequences: always include (zero-width)
+ if char == '\x1b':
+ match = ZERO_WIDTH_PATTERN.match(text, idx)
+ if match:
+ output.append(match.group())
+ idx = match.end()
+ else:
+ output.append(char)
+ idx += 1
+ continue
+
+ # TAB: expand to spaces (or pass through if tabsize=0)
+ if char == '\t':
+ if tabsize > 0:
+ next_tab = col + (tabsize - (col % tabsize))
+ while col < next_tab:
+ if start <= col < end:
+ output.append(' ')
+ col += 1
+ else:
+ output.append(char)
+ idx += 1
+ continue
+
+ # Grapheme clustering handles everything else (including control chars)
+ grapheme = next(iter_graphemes(text[idx:]))
+ w = width(grapheme, ambiguous_width=ambiguous_width)
+
+ if w == 0:
+ # Zero-width (combining marks, etc): always include, doesn't advance column
+ output.append(grapheme)
+ else:
+ if col >= start and col + w <= end:
+ # Fully visible: include the grapheme
+ output.append(grapheme)
+ elif col < end and col + w > start:
+ # Partially visible: wide char spans boundary, replace with fillchar
+ output.append(fillchar * (min(end, col + w) - max(start, col)))
+ # Else: fully outside (start, end), omit entirely
+ col += w
+
+ idx += len(grapheme)
+
+ return ''.join(output)
diff --git a/contrib/python/wcwidth/py3/ya.make b/contrib/python/wcwidth/py3/ya.make
index 7f889617c63..798281c2205 100644
--- a/contrib/python/wcwidth/py3/ya.make
+++ b/contrib/python/wcwidth/py3/ya.make
@@ -2,7 +2,7 @@
PY3_LIBRARY()
-VERSION(0.2.14)
+VERSION(0.3.0)
LICENSE(MIT)
@@ -11,10 +11,16 @@ NO_LINT()
PY_SRCS(
TOP_LEVEL
wcwidth/__init__.py
- wcwidth/table_vs15.py
+ wcwidth/bisearch.py
+ wcwidth/control_codes.py
+ wcwidth/escape_sequences.py
+ wcwidth/grapheme.py
+ wcwidth/table_ambiguous.py
+ wcwidth/table_grapheme.py
wcwidth/table_vs16.py
wcwidth/table_wide.py
wcwidth/table_zero.py
+ wcwidth/textwrap.py
wcwidth/unicode_versions.py
wcwidth/wcwidth.py
)