diff options
author | AlexSm <alex@ydb.tech> | 2023-12-27 23:31:58 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-12-27 23:31:58 +0100 |
commit | d67bfb4b4b7549081543e87a31bc6cb5c46ac973 (patch) | |
tree | 8674f2f1570877cb653e7ddcff37ba00288de15a /contrib/python | |
parent | 1f6bef05ed441c3aa2d565ac792b26cded704ac7 (diff) | |
download | ydb-d67bfb4b4b7549081543e87a31bc6cb5c46ac973.tar.gz |
Import libs 4 (#758)
Diffstat (limited to 'contrib/python')
150 files changed, 8783 insertions, 3492 deletions
diff --git a/contrib/python/argcomplete/py3/.dist-info/METADATA b/contrib/python/argcomplete/py3/.dist-info/METADATA index d9b6891313..c86dfbcaf5 100644 --- a/contrib/python/argcomplete/py3/.dist-info/METADATA +++ b/contrib/python/argcomplete/py3/.dist-info/METADATA @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: argcomplete -Version: 3.1.6 +Version: 3.2.1 Summary: Bash tab completion for argparse Home-page: https://github.com/kislyuk/argcomplete Author: Andrey Kislyuk diff --git a/contrib/python/argcomplete/py3/argcomplete/_check_console_script.py b/contrib/python/argcomplete/py3/argcomplete/_check_console_script.py index bd324b4e71..63eac73f6f 100644 --- a/contrib/python/argcomplete/py3/argcomplete/_check_console_script.py +++ b/contrib/python/argcomplete/py3/argcomplete/_check_console_script.py @@ -13,12 +13,11 @@ Intended to be invoked by argcomplete's global completion function. """ import os import sys - -from importlib.metadata import entry_points as importlib_entry_points from importlib.metadata import EntryPoint +from importlib.metadata import entry_points as importlib_entry_points +from typing import Iterable from ._check_module import ArgcompleteMarkerNotFound, find -from typing import Iterable def main(): @@ -29,15 +28,14 @@ def main(): # assuming it is actually a console script. name = os.path.basename(script_path) - entry_points : Iterable[EntryPoint] = importlib_entry_points() # type:ignore + entry_points: Iterable[EntryPoint] = importlib_entry_points() # type:ignore # Python 3.12+ returns a tuple of entry point objects # whereas <=3.11 returns a SelectableGroups object if sys.version_info < (3, 12): - entry_points = entry_points["console_scripts"] # type:ignore + entry_points = entry_points["console_scripts"] # type:ignore - entry_points = [ep for ep in entry_points \ - if ep.name == name and ep.group == "console_scripts" ] # type:ignore + entry_points = [ep for ep in entry_points if ep.name == name and ep.group == "console_scripts"] # type:ignore if not entry_points: raise ArgcompleteMarkerNotFound("no entry point found matching script") diff --git a/contrib/python/argcomplete/py3/argcomplete/bash_completion.d/_python-argcomplete b/contrib/python/argcomplete/py3/argcomplete/bash_completion.d/_python-argcomplete index 1e4c66ce80..3023ff289e 100644 --- a/contrib/python/argcomplete/py3/argcomplete/bash_completion.d/_python-argcomplete +++ b/contrib/python/argcomplete/py3/argcomplete/bash_completion.d/_python-argcomplete @@ -1,10 +1,15 @@ -#compdef -P * +#compdef -default- # Copyright 2012-2023, Andrey Kislyuk and argcomplete contributors. # Licensed under the Apache License. See https://github.com/kislyuk/argcomplete for more info. # Note: both the leading underscore in the name of this file and the first line (compdef) are required by zsh +# In zsh, this file is autoloaded and used as the default completer (_default). +# There are many other special contexts we don't want to override +# (as would be the case with `#compdef -P *`). +# https://zsh.sourceforge.io/Doc/Release/Completion-System.html + # Copy of __expand_tilde_by_ref from bash-completion # ZSH implementation added __python_argcomplete_expand_tilde_by_ref () { @@ -138,12 +143,6 @@ _python_argcomplete_global() { req_argv=( "" "${COMP_WORDS[@]:1}" ) __python_argcomplete_expand_tilde_by_ref executable else - if [[ "$service" != "-default-" ]]; then - # TODO: this may not be sufficient - see https://zsh.sourceforge.io/Doc/Release/Completion-System.html - # May need to call _complete with avoid-completer=_python-argcomplete or something like that - _default - return - fi executable="${words[1]}" req_argv=( "${words[@]:1}" ) fi @@ -208,7 +207,15 @@ _python_argcomplete_global() { _ARGCOMPLETE_SHELL="zsh" \ _ARGCOMPLETE_SUPPRESS_SPACE=1 \ __python_argcomplete_run "$executable" "${(@)req_argv[1, ${ARGCOMPLETE}-1]}")) - _describe "$executable" completions + local nosort=() + local nospace=() + if is-at-least 5.8; then + nosort=(-o nosort) + fi + if [[ "${completions-}" =~ ([^\\]): && "${BASH_REMATCH[2]}" =~ [=/:] ]]; then + nospace=(-S '') + fi + _describe "$executable" completions "${nosort[@]}" "${nospace[@]}" else COMPREPLY=($(IFS="$IFS" \ COMP_LINE="$COMP_LINE" \ @@ -234,5 +241,20 @@ _python_argcomplete_global() { if [[ -z "${ZSH_VERSION-}" ]]; then complete -o default -o bashdefault -D -F _python_argcomplete_global else - compdef _python_argcomplete_global -P '*' + # -Uz is recommended for the use of functions supplied with the zsh distribution. + # https://unix.stackexchange.com/a/214306 + autoload -Uz is-at-least + # If this is being implicitly loaded because we placed it on fpath, + # the comment at the top of this file causes zsh to invoke this script directly, + # so we must explicitly call the global completion function. + # Note $service should only ever be -default- because the comment at the top + # registers this script as the default completer (#compdef -default-). + if [[ $service == -default- ]]; then + _python_argcomplete_global + fi + # If this has been executed directly (e.g. `eval "$(activate-global-python-argcomplete --dest=-)"`) + # we need to explicitly call compdef to register the completion function. + # If we have been implicitly loaded, we still call compdef as a slight optimisation + # (there is no need to execute any top-level code more than once). + compdef _python_argcomplete_global -default- fi diff --git a/contrib/python/argcomplete/py3/argcomplete/shell_integration.py b/contrib/python/argcomplete/py3/argcomplete/shell_integration.py index 53b8e18234..73214bb651 100644 --- a/contrib/python/argcomplete/py3/argcomplete/shell_integration.py +++ b/contrib/python/argcomplete/py3/argcomplete/shell_integration.py @@ -42,7 +42,15 @@ _python_argcomplete%(function_suffix)s() { _ARGCOMPLETE_SHELL="zsh" \ _ARGCOMPLETE_SUPPRESS_SPACE=1 \ __python_argcomplete_run ${script:-${words[1]}})) - _describe "${words[1]}" completions -o nosort + local nosort=() + local nospace=() + if is-at-least 5.8; then + nosort=(-o nosort) + fi + if [[ "${completions-}" =~ ([^\\]): && "${match[1]}" =~ [=/:] ]]; then + nospace=(-S '') + fi + _describe "${words[1]}" completions "${nosort[@]}" "${nospace[@]}" else local SUPPRESS_SPACE=0 if compopt +o nospace 2> /dev/null; then @@ -67,6 +75,7 @@ _python_argcomplete%(function_suffix)s() { if [[ -z "${ZSH_VERSION-}" ]]; then complete %(complete_opts)s -F _python_argcomplete%(function_suffix)s %(executables)s else + autoload is-at-least compdef _python_argcomplete%(function_suffix)s %(executables)s fi """ diff --git a/contrib/python/argcomplete/py3/ya.make b/contrib/python/argcomplete/py3/ya.make index 6c09efa674..ceca768f4d 100644 --- a/contrib/python/argcomplete/py3/ya.make +++ b/contrib/python/argcomplete/py3/ya.make @@ -2,7 +2,7 @@ PY3_LIBRARY() -VERSION(3.1.6) +VERSION(3.2.1) LICENSE(Apache-2.0) diff --git a/contrib/python/google-auth/py3/.dist-info/METADATA b/contrib/python/google-auth/py3/.dist-info/METADATA index 23841a2ee7..f86d77d41b 100644 --- a/contrib/python/google-auth/py3/.dist-info/METADATA +++ b/contrib/python/google-auth/py3/.dist-info/METADATA @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: google-auth -Version: 2.23.0 +Version: 2.25.2 Summary: Google Authentication Library Home-page: https://github.com/googleapis/google-auth-library-python Author: Google Cloud Platform @@ -13,6 +13,7 @@ Classifier: Programming Language :: Python :: 3.8 Classifier: Programming Language :: Python :: 3.9 Classifier: Programming Language :: Python :: 3.10 Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 Classifier: Development Status :: 5 - Production/Stable Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: Apache Software License @@ -23,23 +24,22 @@ Classifier: Operating System :: OS Independent Classifier: Topic :: Internet :: WWW/HTTP Requires-Python: >=3.7 License-File: LICENSE -Requires-Dist: cachetools (<6.0,>=2.0.0) -Requires-Dist: pyasn1-modules (>=0.2.1) -Requires-Dist: rsa (<5,>=3.1.4) -Requires-Dist: urllib3 (<2.0) +Requires-Dist: cachetools <6.0,>=2.0.0 +Requires-Dist: pyasn1-modules >=0.2.1 +Requires-Dist: rsa <5,>=3.1.4 Provides-Extra: aiohttp -Requires-Dist: aiohttp (<4.0.0.dev0,>=3.6.2) ; extra == 'aiohttp' -Requires-Dist: requests (<3.0.0.dev0,>=2.20.0) ; extra == 'aiohttp' +Requires-Dist: aiohttp <4.0.0.dev0,>=3.6.2 ; extra == 'aiohttp' +Requires-Dist: requests <3.0.0.dev0,>=2.20.0 ; extra == 'aiohttp' Provides-Extra: enterprise_cert -Requires-Dist: cryptography (==36.0.2) ; extra == 'enterprise_cert' -Requires-Dist: pyopenssl (==22.0.0) ; extra == 'enterprise_cert' +Requires-Dist: cryptography ==36.0.2 ; extra == 'enterprise_cert' +Requires-Dist: pyopenssl ==22.0.0 ; extra == 'enterprise_cert' Provides-Extra: pyopenssl -Requires-Dist: pyopenssl (>=20.0.0) ; extra == 'pyopenssl' -Requires-Dist: cryptography (>=38.0.3) ; extra == 'pyopenssl' +Requires-Dist: pyopenssl >=20.0.0 ; extra == 'pyopenssl' +Requires-Dist: cryptography >=38.0.3 ; extra == 'pyopenssl' Provides-Extra: reauth -Requires-Dist: pyu2f (>=0.1.5) ; extra == 'reauth' +Requires-Dist: pyu2f >=0.1.5 ; extra == 'reauth' Provides-Extra: requests -Requires-Dist: requests (<3.0.0.dev0,>=2.20.0) ; extra == 'requests' +Requires-Dist: requests <3.0.0.dev0,>=2.20.0 ; extra == 'requests' Google Auth Python Library ========================== @@ -63,7 +63,7 @@ You can install using `pip`_:: For more information on setting up your Python development environment, please refer to `Python Development Environment Setup Guide`_ for Google Cloud Platform. -.. _`Python Development Environment Setup Guide`: https://cloud.google.com/python/setup +.. _`Python Development Environment Setup Guide`: https://cloud.google.com/python/docs/setup Extras ------ @@ -80,6 +80,16 @@ Supported Python Versions ^^^^^^^^^^^^^^^^^^^^^^^^^ Python >= 3.7 +**NOTE**: +Python 3.7 was marked as `unsupported`_ by the python community in June 2023. +We recommend that all developers upgrade to Python 3.8 and newer as soon as +they can. Support for Python 3.7 will be removed from this library after +January 1 2024. Previous releases that support Python 3.7 will continue to be available +for download, but releases after January 1 2024 will only target Python 3.8 and +newer. + +.. _unsupported: https://devguide.python.org/versions/#unsupported-versions + Unsupported Python Versions ^^^^^^^^^^^^^^^^^^^^^^^^^^^ - Python == 2.7: The last version of this library with support for Python 2.7 diff --git a/contrib/python/google-auth/py3/README.rst b/contrib/python/google-auth/py3/README.rst index cdd19bed50..e058f24713 100644 --- a/contrib/python/google-auth/py3/README.rst +++ b/contrib/python/google-auth/py3/README.rst @@ -20,7 +20,7 @@ You can install using `pip`_:: For more information on setting up your Python development environment, please refer to `Python Development Environment Setup Guide`_ for Google Cloud Platform. -.. _`Python Development Environment Setup Guide`: https://cloud.google.com/python/setup +.. _`Python Development Environment Setup Guide`: https://cloud.google.com/python/docs/setup Extras ------ @@ -37,6 +37,16 @@ Supported Python Versions ^^^^^^^^^^^^^^^^^^^^^^^^^ Python >= 3.7 +**NOTE**: +Python 3.7 was marked as `unsupported`_ by the python community in June 2023. +We recommend that all developers upgrade to Python 3.8 and newer as soon as +they can. Support for Python 3.7 will be removed from this library after +January 1 2024. Previous releases that support Python 3.7 will continue to be available +for download, but releases after January 1 2024 will only target Python 3.8 and +newer. + +.. _unsupported: https://devguide.python.org/versions/#unsupported-versions + Unsupported Python Versions ^^^^^^^^^^^^^^^^^^^^^^^^^^^ - Python == 2.7: The last version of this library with support for Python 2.7 diff --git a/contrib/python/google-auth/py3/google/auth/__init__.py b/contrib/python/google-auth/py3/google/auth/__init__.py index 2875772b37..765bbd7058 100644 --- a/contrib/python/google-auth/py3/google/auth/__init__.py +++ b/contrib/python/google-auth/py3/google/auth/__init__.py @@ -15,6 +15,8 @@ """Google Auth Library for Python.""" import logging +import sys +import warnings from google.auth import version as google_auth_version from google.auth._default import ( @@ -29,5 +31,23 @@ __version__ = google_auth_version.__version__ __all__ = ["default", "load_credentials_from_file", "load_credentials_from_dict"] + +class Python37DeprecationWarning(DeprecationWarning): # pragma: NO COVER + """ + Deprecation warning raised when Python 3.7 runtime is detected. + Python 3.7 support will be dropped after January 1, 2024. + """ + + pass + + +# Checks if the current runtime is Python 3.7. +if sys.version_info.major == 3 and sys.version_info.minor == 7: # pragma: NO COVER + message = ( + "After January 1, 2024, new releases of this library will drop support " + "for Python 3.7." + ) + warnings.warn(message, Python37DeprecationWarning) + # Set default logging handler to avoid "No handler found" warnings. logging.getLogger(__name__).addHandler(logging.NullHandler()) diff --git a/contrib/python/google-auth/py3/google/auth/_helpers.py b/contrib/python/google-auth/py3/google/auth/_helpers.py index ad2c095f28..a6c07f7d82 100644 --- a/contrib/python/google-auth/py3/google/auth/_helpers.py +++ b/contrib/python/google-auth/py3/google/auth/_helpers.py @@ -17,16 +17,15 @@ import base64 import calendar import datetime +from email.message import Message import sys import urllib from google.auth import exceptions -# Token server doesn't provide a new a token when doing refresh unless the -# token is expiring within 30 seconds, so refresh threshold should not be -# more than 30 seconds. Otherwise auth lib will send tons of refresh requests -# until 30 seconds before the expiration, and cause a spike of CPU usage. -REFRESH_THRESHOLD = datetime.timedelta(seconds=20) +# The smallest MDS cache used by this library stores tokens until 4 minutes from +# expiry. +REFRESH_THRESHOLD = datetime.timedelta(minutes=3, seconds=45) def copy_docstring(source_class): @@ -63,13 +62,42 @@ def copy_docstring(source_class): return decorator +def parse_content_type(header_value): + """Parse a 'content-type' header value to get just the plain media-type (without parameters). + + This is done using the class Message from email.message as suggested in PEP 594 + (because the cgi is now deprecated and will be removed in python 3.13, + see https://peps.python.org/pep-0594/#cgi). + + Args: + header_value (str): The value of a 'content-type' header as a string. + + Returns: + str: A string with just the lowercase media-type from the parsed 'content-type' header. + If the provided content-type is not parsable, returns 'text/plain', + the default value for textual files. + """ + m = Message() + m["content-type"] = header_value + return ( + m.get_content_type() + ) # Despite the name, actually returns just the media-type + + def utcnow(): """Returns the current UTC datetime. Returns: datetime: The current time in UTC. """ - return datetime.datetime.utcnow() + # We used datetime.utcnow() before, since it's deprecated from python 3.12, + # we are using datetime.now(timezone.utc) now. "utcnow()" is offset-native + # (no timezone info), but "now()" is offset-aware (with timezone info). + # This will cause datetime comparison problem. For backward compatibility, + # we need to remove the timezone info. + now = datetime.datetime.now(datetime.timezone.utc) + now = now.replace(tzinfo=None) + return now def datetime_to_secs(value): diff --git a/contrib/python/google-auth/py3/google/auth/compute_engine/__init__.py b/contrib/python/google-auth/py3/google/auth/compute_engine/__init__.py index 5c84234e93..7e1206fc1b 100644 --- a/contrib/python/google-auth/py3/google/auth/compute_engine/__init__.py +++ b/contrib/python/google-auth/py3/google/auth/compute_engine/__init__.py @@ -14,8 +14,9 @@ """Google Compute Engine authentication.""" +from google.auth.compute_engine._metadata import detect_gce_residency_linux from google.auth.compute_engine.credentials import Credentials from google.auth.compute_engine.credentials import IDTokenCredentials -__all__ = ["Credentials", "IDTokenCredentials"] +__all__ = ["Credentials", "IDTokenCredentials", "detect_gce_residency_linux"] diff --git a/contrib/python/google-auth/py3/google/auth/compute_engine/_metadata.py b/contrib/python/google-auth/py3/google/auth/compute_engine/_metadata.py index 04abe178f5..1c884c3c43 100644 --- a/contrib/python/google-auth/py3/google/auth/compute_engine/_metadata.py +++ b/contrib/python/google-auth/py3/google/auth/compute_engine/_metadata.py @@ -156,6 +156,7 @@ def get( recursive=False, retry_count=5, headers=None, + return_none_for_not_found_error=False, ): """Fetch a resource from the metadata server. @@ -173,6 +174,8 @@ def get( retry_count (int): How many times to attempt connecting to metadata server using above timeout. headers (Optional[Mapping[str, str]]): Headers for the request. + return_none_for_not_found_error (Optional[bool]): If True, returns None + for 404 error instead of throwing an exception. Returns: Union[Mapping, str]: If the metadata server returns JSON, a mapping of @@ -216,9 +219,21 @@ def get( "metadata service. Compute Engine Metadata server unavailable".format(url) ) + content = _helpers.from_bytes(response.data) + + if response.status == http_client.NOT_FOUND and return_none_for_not_found_error: + _LOGGER.info( + "Compute Engine Metadata server call to %s returned 404, reason: %s", + path, + content, + ) + return None + if response.status == http_client.OK: - content = _helpers.from_bytes(response.data) - if response.headers["content-type"] == "application/json": + if ( + _helpers.parse_content_type(response.headers["content-type"]) + == "application/json" + ): try: return json.loads(content) except ValueError as caught_exc: @@ -229,14 +244,14 @@ def get( raise new_exc from caught_exc else: return content - else: - raise exceptions.TransportError( - "Failed to retrieve {} from the Google Compute Engine " - "metadata service. Status: {} Response:\n{}".format( - url, response.status, response.data - ), - response, - ) + + raise exceptions.TransportError( + "Failed to retrieve {} from the Google Compute Engine " + "metadata service. Status: {} Response:\n{}".format( + url, response.status, response.data + ), + response, + ) def get_project_id(request): @@ -256,6 +271,29 @@ def get_project_id(request): return get(request, "project/project-id") +def get_universe_domain(request): + """Get the universe domain value from the metadata server. + + Args: + request (google.auth.transport.Request): A callable used to make + HTTP requests. + + Returns: + str: The universe domain value. If the universe domain endpoint is not + not found, return the default value, which is googleapis.com + + Raises: + google.auth.exceptions.TransportError: if an error other than + 404 occurs while retrieving metadata. + """ + universe_domain = get( + request, "universe/universe_domain", return_none_for_not_found_error=True + ) + if not universe_domain: + return "googleapis.com" + return universe_domain + + def get_service_account_info(request, service_account="default"): """Get information about a service account from the metadata server. diff --git a/contrib/python/google-auth/py3/google/auth/compute_engine/credentials.py b/contrib/python/google-auth/py3/google/auth/compute_engine/credentials.py index 7ae673880f..a035c7697a 100644 --- a/contrib/python/google-auth/py3/google/auth/compute_engine/credentials.py +++ b/contrib/python/google-auth/py3/google/auth/compute_engine/credentials.py @@ -28,6 +28,7 @@ from google.auth import iam from google.auth import jwt from google.auth import metrics from google.auth.compute_engine import _metadata +from google.auth.transport import requests as google_auth_requests from google.oauth2 import _client @@ -73,6 +74,8 @@ class Credentials(credentials.Scoped, credentials.CredentialsWithQuotaProject): self._quota_project_id = quota_project_id self._scopes = scopes self._default_scopes = default_scopes + self._universe_domain_cached = False + self._universe_domain_request = google_auth_requests.Request() def _retrieve_info(self, request): """Retrieve information about the service account. @@ -131,6 +134,16 @@ class Credentials(credentials.Scoped, credentials.CredentialsWithQuotaProject): def requires_scopes(self): return not self._scopes + @property + def universe_domain(self): + if self._universe_domain_cached: + return self._universe_domain + self._universe_domain = _metadata.get_universe_domain( + self._universe_domain_request + ) + self._universe_domain_cached = True + return self._universe_domain + @_helpers.copy_docstring(credentials.CredentialsWithQuotaProject) def with_quota_project(self, quota_project_id): return self.__class__( diff --git a/contrib/python/google-auth/py3/google/auth/credentials.py b/contrib/python/google-auth/py3/google/auth/credentials.py index 80a2a5c0b4..800781c408 100644 --- a/contrib/python/google-auth/py3/google/auth/credentials.py +++ b/contrib/python/google-auth/py3/google/auth/credentials.py @@ -52,8 +52,9 @@ class Credentials(metaclass=abc.ABCMeta): self._quota_project_id = None """Optional[str]: Project to use for quota and billing purposes.""" self._trust_boundary = None - """Optional[str]: Encoded string representation of credentials trust - boundary.""" + """Optional[dict]: Cache of a trust boundary response which has a list + of allowed regions and an encoded string representation of credentials + trust boundary.""" self._universe_domain = "googleapis.com" """Optional[str]: The universe domain value, default is googleapis.com """ @@ -135,8 +136,21 @@ class Credentials(metaclass=abc.ABCMeta): headers["authorization"] = "Bearer {}".format( _helpers.from_bytes(token or self.token) ) + """Trust boundary value will be a cached value from global lookup. + + The response of trust boundary will be a list of regions and a hex + encoded representation. + + An example of global lookup response: + { + "locations": [ + "us-central1", "us-east1", "europe-west1", "asia-east1" + ] + "encoded_locations": "0xA30" + } + """ if self._trust_boundary is not None: - headers["x-identity-trust-boundary"] = self._trust_boundary + headers["x-allowed-locations"] = self._trust_boundary["encoded_locations"] if self.quota_project_id: headers["x-goog-user-project"] = self.quota_project_id diff --git a/contrib/python/google-auth/py3/google/auth/crypt/_cryptography_rsa.py b/contrib/python/google-auth/py3/google/auth/crypt/_cryptography_rsa.py index 4f2d611666..1a3e9ff52c 100644 --- a/contrib/python/google-auth/py3/google/auth/crypt/_cryptography_rsa.py +++ b/contrib/python/google-auth/py3/google/auth/crypt/_cryptography_rsa.py @@ -134,3 +134,18 @@ class RSASigner(base.Signer, base.FromServiceAccountMixin): key, password=None, backend=_BACKEND ) return cls(private_key, key_id=key_id) + + def __getstate__(self): + """Pickle helper that serializes the _key attribute.""" + state = self.__dict__.copy() + state["_key"] = self._key.private_bytes( + encoding=serialization.Encoding.PEM, + format=serialization.PrivateFormat.PKCS8, + encryption_algorithm=serialization.NoEncryption(), + ) + return state + + def __setstate__(self, state): + """Pickle helper that deserializes the _key attribute.""" + state["_key"] = serialization.load_pem_private_key(state["_key"], None) + self.__dict__.update(state) diff --git a/contrib/python/google-auth/py3/google/auth/crypt/es256.py b/contrib/python/google-auth/py3/google/auth/crypt/es256.py index 7920cc7ffb..820e4becce 100644 --- a/contrib/python/google-auth/py3/google/auth/crypt/es256.py +++ b/contrib/python/google-auth/py3/google/auth/crypt/es256.py @@ -158,3 +158,18 @@ class ES256Signer(base.Signer, base.FromServiceAccountMixin): key, password=None, backend=_BACKEND ) return cls(private_key, key_id=key_id) + + def __getstate__(self): + """Pickle helper that serializes the _key attribute.""" + state = self.__dict__.copy() + state["_key"] = self._key.private_bytes( + encoding=serialization.Encoding.PEM, + format=serialization.PrivateFormat.PKCS8, + encryption_algorithm=serialization.NoEncryption(), + ) + return state + + def __setstate__(self, state): + """Pickle helper that deserializes the _key attribute.""" + state["_key"] = serialization.load_pem_private_key(state["_key"], None) + self.__dict__.update(state) diff --git a/contrib/python/google-auth/py3/google/auth/external_account.py b/contrib/python/google-auth/py3/google/auth/external_account.py index c45e6f2133..e7fed8695a 100644 --- a/contrib/python/google-auth/py3/google/auth/external_account.py +++ b/contrib/python/google-auth/py3/google/auth/external_account.py @@ -132,7 +132,10 @@ class Credentials( self._default_scopes = default_scopes self._workforce_pool_user_project = workforce_pool_user_project self._universe_domain = universe_domain or _DEFAULT_UNIVERSE_DOMAIN - self._trust_boundary = "0" # expose a placeholder trust boundary value. + self._trust_boundary = { + "locations": [], + "encoded_locations": "0x0", + } # expose a placeholder trust boundary value. if self._client_id: self._client_auth = utils.ClientAuthentication( @@ -412,6 +415,22 @@ class Credentials( new_cred._metrics_options = self._metrics_options return new_cred + def with_universe_domain(self, universe_domain): + """Create a copy of these credentials with the given universe domain. + + Args: + universe_domain (str): The universe domain value. + + Returns: + google.auth.external_account.Credentials: A new credentials + instance. + """ + kwargs = self._constructor_args() + kwargs.update(universe_domain=universe_domain) + new_cred = self.__class__(**kwargs) + new_cred._metrics_options = self._metrics_options + return new_cred + def _initialize_impersonated_credentials(self): """Generates an impersonated credentials. diff --git a/contrib/python/google-auth/py3/google/auth/transport/_custom_tls_signer.py b/contrib/python/google-auth/py3/google/auth/transport/_custom_tls_signer.py index 07f14df02d..57a563d03b 100644 --- a/contrib/python/google-auth/py3/google/auth/transport/_custom_tls_signer.py +++ b/contrib/python/google-auth/py3/google/auth/transport/_custom_tls_signer.py @@ -107,6 +107,22 @@ def load_signer_lib(signer_lib_path): return lib +def load_provider_lib(provider_lib_path): + _LOGGER.debug("loading provider library from %s", provider_lib_path) + + # winmode parameter is only available for python 3.8+. + lib = ( + ctypes.CDLL(provider_lib_path, winmode=0) + if sys.version_info >= (3, 8) and os.name == "nt" + else ctypes.CDLL(provider_lib_path) + ) + + lib.ECP_attach_to_ctx.argtypes = [ctypes.c_void_p, ctypes.c_char_p] + lib.ECP_attach_to_ctx.restype = ctypes.c_int + + return lib + + # Computes SHA256 hash. def _compute_sha256_digest(to_be_signed, to_be_signed_len): from cryptography.hazmat.primitives import hashes @@ -199,21 +215,31 @@ class CustomTlsSigner(object): self._enterprise_cert_file_path = enterprise_cert_file_path self._cert = None self._sign_callback = None + self._provider_lib = None def load_libraries(self): - try: - with open(self._enterprise_cert_file_path, "r") as f: - enterprise_cert_json = json.load(f) - libs = enterprise_cert_json["libs"] - signer_library = libs["ecp_client"] - offload_library = libs["tls_offload"] - except (KeyError, ValueError) as caught_exc: - new_exc = exceptions.MutualTLSChannelError( - "enterprise cert file is invalid", caught_exc - ) - raise new_exc from caught_exc - self._offload_lib = load_offload_lib(offload_library) - self._signer_lib = load_signer_lib(signer_library) + with open(self._enterprise_cert_file_path, "r") as f: + enterprise_cert_json = json.load(f) + libs = enterprise_cert_json.get("libs", {}) + + signer_library = libs.get("ecp_client", None) + offload_library = libs.get("tls_offload", None) + provider_library = libs.get("ecp_provider", None) + + # Using newer provider implementation. This is mutually exclusive to the + # offload implementation. + if provider_library: + self._provider_lib = load_provider_lib(provider_library) + return + + # Using old offload implementation + if offload_library and signer_library: + self._offload_lib = load_offload_lib(offload_library) + self._signer_lib = load_signer_lib(signer_library) + self.set_up_custom_key() + return + + raise exceptions.MutualTLSChannelError("enterprise cert file is invalid") def set_up_custom_key(self): # We need to keep a reference of the cert and sign callback so it won't @@ -224,11 +250,22 @@ class CustomTlsSigner(object): ) def attach_to_ssl_context(self, ctx): - # In the TLS handshake, the signing operation will be done by the - # sign_callback. - if not self._offload_lib.ConfigureSslContext( - self._sign_callback, - ctypes.c_char_p(self._cert), - _cast_ssl_ctx_to_void_p(ctx._ctx._context), - ): - raise exceptions.MutualTLSChannelError("failed to configure SSL context") + if self._provider_lib: + if not self._provider_lib.ECP_attach_to_ctx( + _cast_ssl_ctx_to_void_p(ctx._ctx._context), + self._enterprise_cert_file_path.encode("ascii"), + ): + raise exceptions.MutualTLSChannelError( + "failed to configure ECP Provider SSL context" + ) + elif self._offload_lib and self._signer_lib: + if not self._offload_lib.ConfigureSslContext( + self._sign_callback, + ctypes.c_char_p(self._cert), + _cast_ssl_ctx_to_void_p(ctx._ctx._context), + ): + raise exceptions.MutualTLSChannelError( + "failed to configure ECP Offload SSL context" + ) + else: + raise exceptions.MutualTLSChannelError("Invalid ECP configuration.") diff --git a/contrib/python/google-auth/py3/google/auth/transport/requests.py b/contrib/python/google-auth/py3/google/auth/transport/requests.py index b9bcad359f..aa16113226 100644 --- a/contrib/python/google-auth/py3/google/auth/transport/requests.py +++ b/contrib/python/google-auth/py3/google/auth/transport/requests.py @@ -274,7 +274,6 @@ class _MutualTlsOffloadAdapter(requests.adapters.HTTPAdapter): self.signer = _custom_tls_signer.CustomTlsSigner(enterprise_cert_file_path) self.signer.load_libraries() - self.signer.set_up_custom_key() poolmanager = create_urllib3_context() poolmanager.load_verify_locations(cafile=certifi.where()) diff --git a/contrib/python/google-auth/py3/google/auth/transport/urllib3.py b/contrib/python/google-auth/py3/google/auth/transport/urllib3.py index 053d6f7b72..63144f5fff 100644 --- a/contrib/python/google-auth/py3/google/auth/transport/urllib3.py +++ b/contrib/python/google-auth/py3/google/auth/transport/urllib3.py @@ -40,11 +40,18 @@ except ImportError as caught_exc: # pragma: NO COVER "urllib3 package to use the urllib3 transport." ) from caught_exc +from packaging import version # type: ignore + from google.auth import environment_vars from google.auth import exceptions from google.auth import transport from google.oauth2 import service_account +if version.parse(urllib3.__version__) >= version.parse("2.0.0"): # pragma: NO COVER + RequestMethods = urllib3._request_methods.RequestMethods # type: ignore +else: # pragma: NO COVER + RequestMethods = urllib3.request.RequestMethods # type: ignore + _LOGGER = logging.getLogger(__name__) @@ -179,7 +186,7 @@ def _make_mutual_tls_http(cert, key): return http -class AuthorizedHttp(urllib3.request.RequestMethods): +class AuthorizedHttp(RequestMethods): # type: ignore """A urllib3 HTTP class with credentials. This class is used to perform requests to API endpoints that require diff --git a/contrib/python/google-auth/py3/google/auth/version.py b/contrib/python/google-auth/py3/google/auth/version.py index 491187e6d7..31cc30242a 100644 --- a/contrib/python/google-auth/py3/google/auth/version.py +++ b/contrib/python/google-auth/py3/google/auth/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.23.0" +__version__ = "2.25.2" diff --git a/contrib/python/google-auth/py3/google/oauth2/__init__.py b/contrib/python/google-auth/py3/google/oauth2/__init__.py index 4fb71fd1ad..accae96579 100644 --- a/contrib/python/google-auth/py3/google/oauth2/__init__.py +++ b/contrib/python/google-auth/py3/google/oauth2/__init__.py @@ -13,3 +13,24 @@ # limitations under the License. """Google OAuth 2.0 Library for Python.""" + +import sys +import warnings + + +class Python37DeprecationWarning(DeprecationWarning): # pragma: NO COVER + """ + Deprecation warning raised when Python 3.7 runtime is detected. + Python 3.7 support will be dropped after January 1, 2024. + """ + + pass + + +# Checks if the current runtime is Python 3.7. +if sys.version_info.major == 3 and sys.version_info.minor == 7: # pragma: NO COVER + message = ( + "After January 1, 2024, new releases of this library will drop support " + "for Python 3.7." + ) + warnings.warn(message, Python37DeprecationWarning) diff --git a/contrib/python/google-auth/py3/google/oauth2/_credentials_async.py b/contrib/python/google-auth/py3/google/oauth2/_credentials_async.py index e7b9637c82..b5561aae02 100644 --- a/contrib/python/google-auth/py3/google/oauth2/_credentials_async.py +++ b/contrib/python/google-auth/py3/google/oauth2/_credentials_async.py @@ -96,6 +96,12 @@ class Credentials(oauth2_credentials.Credentials): ) ) + @_helpers.copy_docstring(credentials.Credentials) + async def before_request(self, request, method, url, headers): + if not self.valid: + await self.refresh(request) + self.apply(headers) + class UserAccessTokenCredentials(oauth2_credentials.UserAccessTokenCredentials): """Access token credentials for user account. diff --git a/contrib/python/google-auth/py3/google/oauth2/credentials.py b/contrib/python/google-auth/py3/google/oauth2/credentials.py index 4643fdbea6..a5c93ecc2f 100644 --- a/contrib/python/google-auth/py3/google/oauth2/credentials.py +++ b/contrib/python/google-auth/py3/google/oauth2/credentials.py @@ -49,13 +49,15 @@ _LOGGER = logging.getLogger(__name__) # The Google OAuth 2.0 token endpoint. Used for authorized user credentials. _GOOGLE_OAUTH2_TOKEN_ENDPOINT = "https://oauth2.googleapis.com/token" +_DEFAULT_UNIVERSE_DOMAIN = "googleapis.com" class Credentials(credentials.ReadOnlyScoped, credentials.CredentialsWithQuotaProject): """Credentials using OAuth 2.0 access and refresh tokens. - The credentials are considered immutable. If you want to modify the - quota project, use :meth:`with_quota_project` or :: + The credentials are considered immutable except the tokens and the token + expiry, which are updated after refresh. If you want to modify the quota + project, use :meth:`with_quota_project` or :: credentials = credentials.with_quota_project('myproject-123') @@ -84,6 +86,7 @@ class Credentials(credentials.ReadOnlyScoped, credentials.CredentialsWithQuotaPr enable_reauth_refresh=False, granted_scopes=None, trust_boundary=None, + universe_domain=_DEFAULT_UNIVERSE_DOMAIN, ): """ Args: @@ -125,6 +128,9 @@ class Credentials(credentials.ReadOnlyScoped, credentials.CredentialsWithQuotaPr granted_scopes (Optional[Sequence[str]]): The scopes that were consented/granted by the user. This could be different from the requested scopes and it could be empty if granted and requested scopes were same. + trust_boundary (str): String representation of trust boundary meta. + universe_domain (Optional[str]): The universe domain. The default + universe domain is googleapis.com. """ super(Credentials, self).__init__() self.token = token @@ -142,6 +148,7 @@ class Credentials(credentials.ReadOnlyScoped, credentials.CredentialsWithQuotaPr self.refresh_handler = refresh_handler self._enable_reauth_refresh = enable_reauth_refresh self._trust_boundary = trust_boundary + self._universe_domain = universe_domain or _DEFAULT_UNIVERSE_DOMAIN def __getstate__(self): """A __getstate__ method must exist for the __setstate__ to be called @@ -173,7 +180,7 @@ class Credentials(credentials.ReadOnlyScoped, credentials.CredentialsWithQuotaPr self._rapt_token = d.get("_rapt_token") self._enable_reauth_refresh = d.get("_enable_reauth_refresh") self._trust_boundary = d.get("_trust_boundary") - self._universe_domain = d.get("_universe_domain") + self._universe_domain = d.get("_universe_domain") or _DEFAULT_UNIVERSE_DOMAIN # The refresh_handler setter should be used to repopulate this. self._refresh_handler = None @@ -272,6 +279,7 @@ class Credentials(credentials.ReadOnlyScoped, credentials.CredentialsWithQuotaPr rapt_token=self.rapt_token, enable_reauth_refresh=self._enable_reauth_refresh, trust_boundary=self._trust_boundary, + universe_domain=self._universe_domain, ) @_helpers.copy_docstring(credentials.CredentialsWithTokenUri) @@ -291,6 +299,34 @@ class Credentials(credentials.ReadOnlyScoped, credentials.CredentialsWithQuotaPr rapt_token=self.rapt_token, enable_reauth_refresh=self._enable_reauth_refresh, trust_boundary=self._trust_boundary, + universe_domain=self._universe_domain, + ) + + def with_universe_domain(self, universe_domain): + """Create a copy of the credential with the given universe domain. + + Args: + universe_domain (str): The universe domain value. + + Returns: + google.oauth2.credentials.Credentials: A new credentials instance. + """ + + return self.__class__( + self.token, + refresh_token=self.refresh_token, + id_token=self.id_token, + token_uri=self._token_uri, + client_id=self.client_id, + client_secret=self.client_secret, + scopes=self.scopes, + default_scopes=self.default_scopes, + granted_scopes=self.granted_scopes, + quota_project_id=self.quota_project_id, + rapt_token=self.rapt_token, + enable_reauth_refresh=self._enable_reauth_refresh, + trust_boundary=self._trust_boundary, + universe_domain=universe_domain, ) def _metric_header_for_usage(self): @@ -298,6 +334,17 @@ class Credentials(credentials.ReadOnlyScoped, credentials.CredentialsWithQuotaPr @_helpers.copy_docstring(credentials.Credentials) def refresh(self, request): + if self._universe_domain != _DEFAULT_UNIVERSE_DOMAIN: + raise exceptions.RefreshError( + "User credential refresh is only supported in the default " + "googleapis.com universe domain, but the current universe " + "domain is {}. If you created the credential with an access " + "token, it's likely that the provided token is expired now, " + "please update your code with a valid token.".format( + self._universe_domain + ) + ) + scopes = self._scopes if self._scopes is not None else self._default_scopes # Use refresh handler if available and no refresh token is # available. This is useful in general when tokens are obtained by calling @@ -427,6 +474,7 @@ class Credentials(credentials.ReadOnlyScoped, credentials.CredentialsWithQuotaPr expiry=expiry, rapt_token=info.get("rapt_token"), # may not exist trust_boundary=info.get("trust_boundary"), # may not exist + universe_domain=info.get("universe_domain"), # may not exist ) @classmethod @@ -470,6 +518,7 @@ class Credentials(credentials.ReadOnlyScoped, credentials.CredentialsWithQuotaPr "client_secret": self.client_secret, "scopes": self.scopes, "rapt_token": self.rapt_token, + "universe_domain": self._universe_domain, } if self.expiry: # flatten expiry timestamp prep["expiry"] = self.expiry.isoformat() + "Z" diff --git a/contrib/python/google-auth/py3/google/oauth2/service_account.py b/contrib/python/google-auth/py3/google/oauth2/service_account.py index e08899f8e5..68db41af40 100644 --- a/contrib/python/google-auth/py3/google/oauth2/service_account.py +++ b/contrib/python/google-auth/py3/google/oauth2/service_account.py @@ -182,10 +182,7 @@ class Credentials( self._quota_project_id = quota_project_id self._token_uri = token_uri self._always_use_jwt_access = always_use_jwt_access - if not universe_domain: - self._universe_domain = _DEFAULT_UNIVERSE_DOMAIN - else: - self._universe_domain = universe_domain + self._universe_domain = universe_domain or _DEFAULT_UNIVERSE_DOMAIN if universe_domain != _DEFAULT_UNIVERSE_DOMAIN: self._always_use_jwt_access = True @@ -196,7 +193,7 @@ class Credentials( self._additional_claims = additional_claims else: self._additional_claims = {} - self._trust_boundary = "0" + self._trust_boundary = {"locations": [], "encoded_locations": "0x0"} @classmethod def _from_signer_and_info(cls, signer, info, **kwargs): @@ -328,6 +325,22 @@ class Credentials( cred._always_use_jwt_access = always_use_jwt_access return cred + def with_universe_domain(self, universe_domain): + """Create a copy of these credentials with the given universe domain. + + Args: + universe_domain (str): The universe domain value. + + Returns: + google.auth.service_account.Credentials: A new credentials + instance. + """ + cred = self._make_copy() + cred._universe_domain = universe_domain + if universe_domain != _DEFAULT_UNIVERSE_DOMAIN: + cred._always_use_jwt_access = True + return cred + def with_subject(self, subject): """Create a copy of these credentials with the specified subject. @@ -417,13 +430,11 @@ class Credentials( @_helpers.copy_docstring(credentials.Credentials) def refresh(self, request): - if ( - self._universe_domain != _DEFAULT_UNIVERSE_DOMAIN - and not self._jwt_credentials - ): - raise exceptions.RefreshError( - "self._jwt_credentials is missing for non-default universe domain" - ) + if self._always_use_jwt_access and not self._jwt_credentials: + # If self signed jwt should be used but jwt credential is not + # created, try to create one with scopes + self._create_self_signed_jwt(None) + if self._universe_domain != _DEFAULT_UNIVERSE_DOMAIN and self._subject: raise exceptions.RefreshError( "domain wide delegation is not supported for non-default universe domain" diff --git a/contrib/python/google-auth/py3/tests/compute_engine/test__metadata.py b/contrib/python/google-auth/py3/tests/compute_engine/test__metadata.py index ddf84596af..5e037a940b 100644 --- a/contrib/python/google-auth/py3/tests/compute_engine/test__metadata.py +++ b/contrib/python/google-auth/py3/tests/compute_engine/test__metadata.py @@ -63,6 +63,7 @@ def make_request(data, status=http_client.OK, headers=None, retry=False): return request +@pytest.mark.xfail def test_detect_gce_residency_linux_success(): _metadata._GCE_PRODUCT_NAME_FILE = SMBIOS_PRODUCT_NAME_FILE assert _metadata.detect_gce_residency_linux() @@ -89,6 +90,7 @@ def test_is_on_gce_windows_success(): assert not _metadata.is_on_gce(request) +@pytest.mark.xfail @mock.patch("os.name", new="posix") def test_is_on_gce_linux_success(): request = make_request("", headers={_metadata._METADATA_FLAVOR_HEADER: "meep"}) @@ -176,6 +178,24 @@ def test_get_success_json(): assert result[key] == value +def test_get_success_json_content_type_charset(): + key, value = "foo", "bar" + + data = json.dumps({key: value}) + request = make_request( + data, headers={"content-type": "application/json; charset=UTF-8"} + ) + + result = _metadata.get(request, PATH) + + request.assert_called_once_with( + method="GET", + url=_metadata._METADATA_ROOT + PATH, + headers=_metadata._METADATA_HEADERS, + ) + assert result[key] == value + + def test_get_success_retry(): key, value = "foo", "bar" @@ -307,6 +327,18 @@ def test_get_failure(): ) +def test_get_return_none_for_not_found_error(): + request = make_request("Metadata error", status=http_client.NOT_FOUND) + + assert _metadata.get(request, PATH, return_none_for_not_found_error=True) is None + + request.assert_called_once_with( + method="GET", + url=_metadata._METADATA_ROOT + PATH, + headers=_metadata._METADATA_HEADERS, + ) + + def test_get_failure_connection_failed(): request = make_request("") request.side_effect = exceptions.TransportError() @@ -353,6 +385,53 @@ def test_get_project_id(): assert project_id == project +def test_get_universe_domain_success(): + request = make_request( + "fake_universe_domain", headers={"content-type": "text/plain"} + ) + + universe_domain = _metadata.get_universe_domain(request) + + request.assert_called_once_with( + method="GET", + url=_metadata._METADATA_ROOT + "universe/universe_domain", + headers=_metadata._METADATA_HEADERS, + ) + assert universe_domain == "fake_universe_domain" + + +def test_get_universe_domain_not_found(): + # Test that if the universe domain endpoint returns 404 error, we should + # use googleapis.com as the universe domain + request = make_request("not found", status=http_client.NOT_FOUND) + + universe_domain = _metadata.get_universe_domain(request) + + request.assert_called_once_with( + method="GET", + url=_metadata._METADATA_ROOT + "universe/universe_domain", + headers=_metadata._METADATA_HEADERS, + ) + assert universe_domain == "googleapis.com" + + +def test_get_universe_domain_other_error(): + # Test that if the universe domain endpoint returns an error other than 404 + # we should throw the error + request = make_request("unauthorized", status=http_client.UNAUTHORIZED) + + with pytest.raises(exceptions.TransportError) as excinfo: + _metadata.get_universe_domain(request) + + assert excinfo.match(r"unauthorized") + + request.assert_called_once_with( + method="GET", + url=_metadata._METADATA_ROOT + "universe/universe_domain", + headers=_metadata._METADATA_HEADERS, + ) + + @mock.patch( "google.auth.metrics.token_request_access_token_mds", return_value=ACCESS_TOKEN_REQUEST_METRICS_HEADER_VALUE, diff --git a/contrib/python/google-auth/py3/tests/compute_engine/test_credentials.py b/contrib/python/google-auth/py3/tests/compute_engine/test_credentials.py index 507fea9fcc..5d6ccdcdec 100644 --- a/contrib/python/google-auth/py3/tests/compute_engine/test_credentials.py +++ b/contrib/python/google-auth/py3/tests/compute_engine/test_credentials.py @@ -208,6 +208,30 @@ class TestCredentials(object): assert headers["authorization"] == "Bearer token" assert headers["x-goog-api-client"] == "cred-type/mds" + @mock.patch( + "google.auth.compute_engine._metadata.get_universe_domain", + return_value="fake_universe_domain", + ) + def test_universe_domain(self, get_universe_domain): + self.credentials._universe_domain_cached = False + self.credentials._universe_domain = "googleapis.com" + + # calling the universe_domain property should trigger a call to + # get_universe_domain to fetch the value. The value should be cached. + assert self.credentials.universe_domain == "fake_universe_domain" + assert self.credentials._universe_domain == "fake_universe_domain" + assert self.credentials._universe_domain_cached + get_universe_domain.assert_called_once_with( + self.credentials._universe_domain_request + ) + + # calling the universe_domain property the second time should use the + # cached value instead of calling get_universe_domain + assert self.credentials.universe_domain == "fake_universe_domain" + get_universe_domain.assert_called_once_with( + self.credentials._universe_domain_request + ) + class TestIDTokenCredentials(object): credentials = None diff --git a/contrib/python/google-auth/py3/tests/conftest.py b/contrib/python/google-auth/py3/tests/conftest.py index 08896b0f82..7658d8f456 100644 --- a/contrib/python/google-auth/py3/tests/conftest.py +++ b/contrib/python/google-auth/py3/tests/conftest.py @@ -21,9 +21,14 @@ import pytest # type: ignore def pytest_configure(): """Load public certificate and private key.""" - import __res - pytest.private_key_bytes = __res.find("data/privatekey.pem") - pytest.public_cert_bytes = __res.find("data/public_cert.pem") + import yatest.common as yc + pytest.data_dir = os.path.join(os.path.dirname(yc.source_path("contrib/python/google-auth/py3/tests/conftest.py")), "data") + + with open(os.path.join(pytest.data_dir, "privatekey.pem"), "rb") as fh: + pytest.private_key_bytes = fh.read() + + with open(os.path.join(pytest.data_dir, "public_cert.pem"), "rb") as fh: + pytest.public_cert_bytes = fh.read() @pytest.fixture diff --git a/contrib/python/google-auth/py3/tests/crypt/test__cryptography_rsa.py b/contrib/python/google-auth/py3/tests/crypt/test__cryptography_rsa.py index d19154b61b..2c4cebe0d7 100644 --- a/contrib/python/google-auth/py3/tests/crypt/test__cryptography_rsa.py +++ b/contrib/python/google-auth/py3/tests/crypt/test__cryptography_rsa.py @@ -14,6 +14,7 @@ import json import os +import pickle from cryptography.hazmat.primitives.asymmetric import rsa import pytest # type: ignore @@ -23,8 +24,8 @@ from google.auth.crypt import _cryptography_rsa from google.auth.crypt import base -import yatest.common -DATA_DIR = os.path.join(yatest.common.test_source_path(), "data") +import yatest.common as yc +DATA_DIR = os.path.join(os.path.dirname(yc.source_path(__file__)), "..", "data") # To generate privatekey.pem, privatekey.pub, and public_cert.pem: # $ openssl req -new -newkey rsa:1024 -x509 -nodes -out public_cert.pem \ @@ -160,3 +161,17 @@ class TestRSASigner(object): assert signer.key_id == SERVICE_ACCOUNT_INFO[base._JSON_FILE_PRIVATE_KEY_ID] assert isinstance(signer._key, rsa.RSAPrivateKey) + + def test_pickle(self): + signer = _cryptography_rsa.RSASigner.from_service_account_file( + SERVICE_ACCOUNT_JSON_FILE + ) + + assert signer.key_id == SERVICE_ACCOUNT_INFO[base._JSON_FILE_PRIVATE_KEY_ID] + assert isinstance(signer._key, rsa.RSAPrivateKey) + + pickled_signer = pickle.dumps(signer) + signer = pickle.loads(pickled_signer) + + assert signer.key_id == SERVICE_ACCOUNT_INFO[base._JSON_FILE_PRIVATE_KEY_ID] + assert isinstance(signer._key, rsa.RSAPrivateKey) diff --git a/contrib/python/google-auth/py3/tests/crypt/test__python_rsa.py b/contrib/python/google-auth/py3/tests/crypt/test__python_rsa.py index 592b523d92..75dcb314f7 100644 --- a/contrib/python/google-auth/py3/tests/crypt/test__python_rsa.py +++ b/contrib/python/google-auth/py3/tests/crypt/test__python_rsa.py @@ -26,8 +26,8 @@ from google.auth.crypt import _python_rsa from google.auth.crypt import base -import yatest.common -DATA_DIR = os.path.join(yatest.common.test_source_path(), "data") +import yatest.common as yc +DATA_DIR = os.path.join(os.path.dirname(yc.source_path(__file__)), "..", "data") # To generate privatekey.pem, privatekey.pub, and public_cert.pem: # $ openssl req -new -newkey rsa:1024 -x509 -nodes -out public_cert.pem \ diff --git a/contrib/python/google-auth/py3/tests/crypt/test_crypt.py b/contrib/python/google-auth/py3/tests/crypt/test_crypt.py index 97c2abc257..30de18a5dd 100644 --- a/contrib/python/google-auth/py3/tests/crypt/test_crypt.py +++ b/contrib/python/google-auth/py3/tests/crypt/test_crypt.py @@ -17,8 +17,8 @@ import os from google.auth import crypt -import yatest.common -DATA_DIR = os.path.join(yatest.common.test_source_path(), "data") +import yatest.common as yc +DATA_DIR = os.path.join(os.path.dirname(yc.source_path(__file__)), "..", "data") # To generate privatekey.pem, privatekey.pub, and public_cert.pem: # $ openssl req -new -newkey rsa:1024 -x509 -nodes -out public_cert.pem \ diff --git a/contrib/python/google-auth/py3/tests/crypt/test_es256.py b/contrib/python/google-auth/py3/tests/crypt/test_es256.py index 1a43a2f01b..3ba5b64fad 100644 --- a/contrib/python/google-auth/py3/tests/crypt/test_es256.py +++ b/contrib/python/google-auth/py3/tests/crypt/test_es256.py @@ -15,6 +15,7 @@ import base64 import json import os +import pickle from cryptography.hazmat.primitives.asymmetric import ec import pytest # type: ignore @@ -24,8 +25,8 @@ from google.auth.crypt import base from google.auth.crypt import es256 -import yatest.common -DATA_DIR = os.path.join(yatest.common.test_source_path(), "data") +import yatest.common as yc +DATA_DIR = os.path.join(os.path.dirname(yc.source_path(__file__)), "..", "data") # To generate es256_privatekey.pem, es256_privatekey.pub, and # es256_public_cert.pem: @@ -142,3 +143,15 @@ class TestES256Signer(object): assert signer.key_id == SERVICE_ACCOUNT_INFO[base._JSON_FILE_PRIVATE_KEY_ID] assert isinstance(signer._key, ec.EllipticCurvePrivateKey) + + def test_pickle(self): + signer = es256.ES256Signer.from_service_account_file(SERVICE_ACCOUNT_JSON_FILE) + + assert signer.key_id == SERVICE_ACCOUNT_INFO[base._JSON_FILE_PRIVATE_KEY_ID] + assert isinstance(signer._key, ec.EllipticCurvePrivateKey) + + pickled_signer = pickle.dumps(signer) + signer = pickle.loads(pickled_signer) + + assert signer.key_id == SERVICE_ACCOUNT_INFO[base._JSON_FILE_PRIVATE_KEY_ID] + assert isinstance(signer._key, ec.EllipticCurvePrivateKey) diff --git a/contrib/python/google-auth/py3/tests/data/enterprise_cert_valid_provider.json b/contrib/python/google-auth/py3/tests/data/enterprise_cert_valid_provider.json new file mode 100644 index 0000000000..9b7adf8bc3 --- /dev/null +++ b/contrib/python/google-auth/py3/tests/data/enterprise_cert_valid_provider.json @@ -0,0 +1,6 @@ +{ + "libs": { + "ecp_client": "/path/to/signer/lib", + "ecp_provider": "/path/to/provider/lib" + } +} diff --git a/contrib/python/google-auth/py3/tests/oauth2/test__client.py b/contrib/python/google-auth/py3/tests/oauth2/test__client.py index 54179269bd..444232f396 100644 --- a/contrib/python/google-auth/py3/tests/oauth2/test__client.py +++ b/contrib/python/google-auth/py3/tests/oauth2/test__client.py @@ -29,8 +29,8 @@ from google.auth import transport from google.oauth2 import _client -import yatest.common -DATA_DIR = os.path.join(yatest.common.test_source_path(), "data") +import yatest.common as yc +DATA_DIR = os.path.join(os.path.dirname(yc.source_path(__file__)), "..", "data") with open(os.path.join(DATA_DIR, "privatekey.pem"), "rb") as fh: PRIVATE_KEY_BYTES = fh.read() diff --git a/contrib/python/google-auth/py3/tests/oauth2/test_credentials.py b/contrib/python/google-auth/py3/tests/oauth2/test_credentials.py index f2604a5f18..d6a1915862 100644 --- a/contrib/python/google-auth/py3/tests/oauth2/test_credentials.py +++ b/contrib/python/google-auth/py3/tests/oauth2/test_credentials.py @@ -27,8 +27,8 @@ from google.auth import transport from google.oauth2 import credentials -import yatest.common -DATA_DIR = os.path.join(yatest.common.test_source_path(), "data") +import yatest.common as yc +DATA_DIR = os.path.join(os.path.dirname(yc.source_path(__file__)), "..", "data") AUTH_USER_JSON_FILE = os.path.join(DATA_DIR, "authorized_user.json") @@ -123,6 +123,17 @@ class TestCredentials(object): assert excinfo.match("The provided refresh_handler is not a callable or None.") + def test_refresh_with_non_default_universe_domain(self): + creds = credentials.Credentials( + token="token", universe_domain="dummy_universe.com" + ) + with pytest.raises(exceptions.RefreshError) as excinfo: + creds.refresh(mock.Mock()) + + assert excinfo.match( + "refresh is only supported in the default googleapis.com universe domain" + ) + @mock.patch("google.oauth2.reauth.refresh_grant", autospec=True) @mock.patch( "google.auth._helpers.utcnow", @@ -775,6 +786,12 @@ class TestCredentials(object): creds.apply(headers) assert "x-goog-user-project" in headers + def test_with_universe_domain(self): + creds = credentials.Credentials(token="token") + assert creds.universe_domain == "googleapis.com" + new_creds = creds.with_universe_domain("dummy_universe.com") + assert new_creds.universe_domain == "dummy_universe.com" + def test_with_token_uri(self): info = AUTH_USER_INFO.copy() @@ -869,6 +886,7 @@ class TestCredentials(object): assert json_asdict.get("scopes") == creds.scopes assert json_asdict.get("client_secret") == creds.client_secret assert json_asdict.get("expiry") == info["expiry"] + assert json_asdict.get("universe_domain") == creds.universe_domain # Test with a `strip` arg json_output = creds.to_json(strip=["client_secret"]) @@ -896,6 +914,17 @@ class TestCredentials(object): for attr in list(creds.__dict__): assert getattr(creds, attr) == getattr(unpickled, attr) + def test_pickle_and_unpickle_universe_domain(self): + # old version of auth lib doesn't have _universe_domain, so the pickled + # cred doesn't have such a field. + creds = self.make_credentials() + del creds._universe_domain + + unpickled = pickle.loads(pickle.dumps(creds)) + + # make sure the unpickled cred sets _universe_domain to default. + assert unpickled.universe_domain == "googleapis.com" + def test_pickle_and_unpickle_with_refresh_handler(self): expected_expiry = _helpers.utcnow() + datetime.timedelta(seconds=2800) refresh_handler = mock.Mock(return_value=("TOKEN", expected_expiry)) diff --git a/contrib/python/google-auth/py3/tests/oauth2/test_gdch_credentials.py b/contrib/python/google-auth/py3/tests/oauth2/test_gdch_credentials.py index 1ff61d8683..9a67a07345 100644 --- a/contrib/python/google-auth/py3/tests/oauth2/test_gdch_credentials.py +++ b/contrib/python/google-auth/py3/tests/oauth2/test_gdch_credentials.py @@ -27,7 +27,7 @@ import google.auth.transport.requests from google.oauth2 import gdch_credentials from google.oauth2.gdch_credentials import ServiceAccountCredentials -import yatest.common +import yatest.common as yc class TestServiceAccountCredentials(object): @@ -39,7 +39,7 @@ class TestServiceAccountCredentials(object): TOKEN_URI = "https://service-identity.<Domain>/authenticate" JSON_PATH = os.path.join( - yatest.common.test_source_path(), "data", "gdch_service_account.json" + os.path.dirname(yc.source_path(__file__)), "..", "data", "gdch_service_account.json" ) with open(JSON_PATH, "rb") as fh: INFO = json.load(fh) diff --git a/contrib/python/google-auth/py3/tests/oauth2/test_id_token.py b/contrib/python/google-auth/py3/tests/oauth2/test_id_token.py index 861f76ce4f..8657bdfb7e 100644 --- a/contrib/python/google-auth/py3/tests/oauth2/test_id_token.py +++ b/contrib/python/google-auth/py3/tests/oauth2/test_id_token.py @@ -24,9 +24,9 @@ from google.auth import transport from google.oauth2 import id_token from google.oauth2 import service_account -import yatest.common +import yatest.common as yc SERVICE_ACCOUNT_FILE = os.path.join( - yatest.common.test_source_path(), "data/service_account.json" + os.path.dirname(yc.source_path(__file__)), "../data/service_account.json" ) ID_TOKEN_AUDIENCE = "https://pubsub.googleapis.com" @@ -263,7 +263,7 @@ def test_fetch_id_token_credentials_no_cred_exists(monkeypatch): def test_fetch_id_token_credentials_invalid_cred_file_type(monkeypatch): user_credentials_file = os.path.join( - yatest.common.test_source_path(), "data/authorized_user.json" + os.path.dirname(yc.source_path(__file__)), "../data/authorized_user.json" ) monkeypatch.setenv(environment_vars.CREDENTIALS, user_credentials_file) @@ -276,7 +276,7 @@ def test_fetch_id_token_credentials_invalid_cred_file_type(monkeypatch): def test_fetch_id_token_credentials_invalid_json(monkeypatch): - not_json_file = os.path.join(yatest.common.test_source_path(), "data/public_cert.pem") + not_json_file = os.path.join(os.path.dirname(yc.source_path(__file__)), "../data/public_cert.pem") monkeypatch.setenv(environment_vars.CREDENTIALS, not_json_file) with pytest.raises(exceptions.DefaultCredentialsError) as excinfo: @@ -287,7 +287,7 @@ def test_fetch_id_token_credentials_invalid_json(monkeypatch): def test_fetch_id_token_credentials_invalid_cred_path(monkeypatch): - not_json_file = os.path.join(yatest.common.test_source_path(), "data/not_exists.json") + not_json_file = os.path.join(os.path.dirname(yc.source_path(__file__)), "../data/not_exists.json") monkeypatch.setenv(environment_vars.CREDENTIALS, not_json_file) with pytest.raises(exceptions.DefaultCredentialsError) as excinfo: diff --git a/contrib/python/google-auth/py3/tests/oauth2/test_service_account.py b/contrib/python/google-auth/py3/tests/oauth2/test_service_account.py index c474c90e6b..8dd5f219be 100644 --- a/contrib/python/google-auth/py3/tests/oauth2/test_service_account.py +++ b/contrib/python/google-auth/py3/tests/oauth2/test_service_account.py @@ -27,8 +27,8 @@ from google.auth import transport from google.oauth2 import service_account -import yatest.common -DATA_DIR = os.path.join(yatest.common.test_source_path(), "data") +import yatest.common as yc +DATA_DIR = os.path.join(os.path.dirname(yc.source_path(__file__)), "..", "data") with open(os.path.join(DATA_DIR, "privatekey.pem"), "rb") as fh: PRIVATE_KEY_BYTES = fh.read() @@ -206,6 +206,17 @@ class TestCredentials(object): creds_with_new_token_uri = credentials.with_token_uri(new_token_uri) assert creds_with_new_token_uri._token_uri == new_token_uri + def test_with_universe_domain(self): + credentials = self.make_credentials() + + new_credentials = credentials.with_universe_domain("dummy_universe.com") + assert new_credentials.universe_domain == "dummy_universe.com" + assert new_credentials._always_use_jwt_access + + new_credentials = credentials.with_universe_domain("googleapis.com") + assert new_credentials.universe_domain == "googleapis.com" + assert not new_credentials._always_use_jwt_access + def test__with_always_use_jwt_access(self): credentials = self.make_credentials() assert not credentials._always_use_jwt_access @@ -558,12 +569,16 @@ class TestCredentials(object): assert jwt_grant.called assert not self_signed_jwt_refresh.called - def test_refresh_non_gdu_missing_jwt_credentials(self): - credentials = self.make_credentials(universe_domain="foo") + def test_refresh_missing_jwt_credentials(self): + credentials = self.make_credentials() + credentials = credentials.with_scopes(["foo", "bar"]) + credentials = credentials.with_always_use_jwt_access(True) + assert not credentials._jwt_credentials - with pytest.raises(exceptions.RefreshError) as excinfo: - credentials.refresh(None) - assert excinfo.match("self._jwt_credentials is missing") + credentials.refresh(mock.Mock()) + + # jwt credentials should have been automatically created with scopes + assert credentials._jwt_credentials is not None def test_refresh_non_gdu_domain_wide_delegation_not_supported(self): credentials = self.make_credentials(universe_domain="foo") diff --git a/contrib/python/google-auth/py3/tests/test__cloud_sdk.py b/contrib/python/google-auth/py3/tests/test__cloud_sdk.py index 18ac18fa35..d46621a7f3 100644 --- a/contrib/python/google-auth/py3/tests/test__cloud_sdk.py +++ b/contrib/python/google-auth/py3/tests/test__cloud_sdk.py @@ -26,8 +26,8 @@ from google.auth import environment_vars from google.auth import exceptions -import yatest.common -DATA_DIR = os.path.join(yatest.common.test_source_path(), "data") +import yatest.common as yc +DATA_DIR = os.path.join(os.path.dirname(yc.source_path(__file__)), "data") AUTHORIZED_USER_FILE = os.path.join(DATA_DIR, "authorized_user.json") with io.open(AUTHORIZED_USER_FILE, "rb") as fh: @@ -66,8 +66,7 @@ def test_get_project_id_call_error(check_output): assert check_output.called -@pytest.mark.xfail -def test__run_subprocess_ignore_stderr(): +def _test__run_subprocess_ignore_stderr(): command = [ sys.executable, "-c", diff --git a/contrib/python/google-auth/py3/tests/test__default.py b/contrib/python/google-auth/py3/tests/test__default.py index 29904ec7aa..d619614790 100644 --- a/contrib/python/google-auth/py3/tests/test__default.py +++ b/contrib/python/google-auth/py3/tests/test__default.py @@ -36,8 +36,8 @@ from google.oauth2 import service_account import google.oauth2.credentials -import yatest.common -DATA_DIR = os.path.join(yatest.common.test_source_path(), "data") +import yatest.common as yc +DATA_DIR = os.path.join(os.path.dirname(yc.source_path(__file__)), "data") AUTHORIZED_USER_FILE = os.path.join(DATA_DIR, "authorized_user.json") with open(AUTHORIZED_USER_FILE) as fh: diff --git a/contrib/python/google-auth/py3/tests/test__helpers.py b/contrib/python/google-auth/py3/tests/test__helpers.py index c1f1d812e5..c9a3847ac4 100644 --- a/contrib/python/google-auth/py3/tests/test__helpers.py +++ b/contrib/python/google-auth/py3/tests/test__helpers.py @@ -51,6 +51,32 @@ def test_copy_docstring_non_existing(): _helpers.copy_docstring(SourceClass)(func2) +def test_parse_content_type_plain(): + assert _helpers.parse_content_type("text/html") == "text/html" + assert _helpers.parse_content_type("application/xml") == "application/xml" + assert _helpers.parse_content_type("application/json") == "application/json" + + +def test_parse_content_type_with_parameters(): + content_type_html = "text/html; charset=UTF-8" + content_type_xml = "application/xml; charset=UTF-16; version=1.0" + content_type_json = "application/json; charset=UTF-8; indent=2" + assert _helpers.parse_content_type(content_type_html) == "text/html" + assert _helpers.parse_content_type(content_type_xml) == "application/xml" + assert _helpers.parse_content_type(content_type_json) == "application/json" + + +def test_parse_content_type_missing_or_broken(): + content_type_foo = None + content_type_bar = "" + content_type_baz = "1234" + content_type_qux = " ; charset=UTF-8" + assert _helpers.parse_content_type(content_type_foo) == "text/plain" + assert _helpers.parse_content_type(content_type_bar) == "text/plain" + assert _helpers.parse_content_type(content_type_baz) == "text/plain" + assert _helpers.parse_content_type(content_type_qux) == "text/plain" + + def test_utcnow(): assert isinstance(_helpers.utcnow(), datetime.datetime) diff --git a/contrib/python/google-auth/py3/tests/test__oauth2client.py b/contrib/python/google-auth/py3/tests/test__oauth2client.py index 72db6535bc..1db595fd9a 100644 --- a/contrib/python/google-auth/py3/tests/test__oauth2client.py +++ b/contrib/python/google-auth/py3/tests/test__oauth2client.py @@ -33,8 +33,8 @@ except ImportError: # pragma: NO COVER from google.auth import _oauth2client -import yatest.common -DATA_DIR = os.path.join(yatest.common.test_source_path(), "data") +import yatest.common as yc +DATA_DIR = os.path.join(os.path.dirname(yc.source_path(__file__)), "data") SERVICE_ACCOUNT_JSON_FILE = os.path.join(DATA_DIR, "service_account.json") diff --git a/contrib/python/google-auth/py3/tests/test__service_account_info.py b/contrib/python/google-auth/py3/tests/test__service_account_info.py index db8106081c..2335765bb4 100644 --- a/contrib/python/google-auth/py3/tests/test__service_account_info.py +++ b/contrib/python/google-auth/py3/tests/test__service_account_info.py @@ -21,8 +21,8 @@ from google.auth import _service_account_info from google.auth import crypt -import yatest.common -DATA_DIR = os.path.join(yatest.common.test_source_path(), "data") +import yatest.common as yc +DATA_DIR = os.path.join(os.path.dirname(yc.source_path(__file__)), "data") SERVICE_ACCOUNT_JSON_FILE = os.path.join(DATA_DIR, "service_account.json") GDCH_SERVICE_ACCOUNT_JSON_FILE = os.path.join(DATA_DIR, "gdch_service_account.json") diff --git a/contrib/python/google-auth/py3/tests/test_aws.py b/contrib/python/google-auth/py3/tests/test_aws.py index 39138ab12e..db2e984100 100644 --- a/contrib/python/google-auth/py3/tests/test_aws.py +++ b/contrib/python/google-auth/py3/tests/test_aws.py @@ -1969,7 +1969,7 @@ class TestCredentials(object): "authorization": "Bearer {}".format(self.SUCCESS_RESPONSE["access_token"]), "x-goog-user-project": QUOTA_PROJECT_ID, "x-goog-api-client": IMPERSONATE_ACCESS_TOKEN_REQUEST_METRICS_HEADER_VALUE, - "x-identity-trust-boundary": "0", + "x-allowed-locations": "0x0", } impersonation_request_data = { "delegates": None, @@ -2066,7 +2066,7 @@ class TestCredentials(object): "authorization": "Bearer {}".format(self.SUCCESS_RESPONSE["access_token"]), "x-goog-user-project": QUOTA_PROJECT_ID, "x-goog-api-client": IMPERSONATE_ACCESS_TOKEN_REQUEST_METRICS_HEADER_VALUE, - "x-identity-trust-boundary": "0", + "x-allowed-locations": "0x0", } impersonation_request_data = { "delegates": None, diff --git a/contrib/python/google-auth/py3/tests/test_credentials.py b/contrib/python/google-auth/py3/tests/test_credentials.py index 99235cda61..d64f3abb50 100644 --- a/contrib/python/google-auth/py3/tests/test_credentials.py +++ b/contrib/python/google-auth/py3/tests/test_credentials.py @@ -55,9 +55,7 @@ def test_expired_and_valid(): # Set the expiration to one second more than now plus the clock skew # accomodation. These credentials should be valid. credentials.expiry = ( - datetime.datetime.utcnow() - + _helpers.REFRESH_THRESHOLD - + datetime.timedelta(seconds=1) + _helpers.utcnow() + _helpers.REFRESH_THRESHOLD + datetime.timedelta(seconds=1) ) assert credentials.valid @@ -65,7 +63,7 @@ def test_expired_and_valid(): # Set the credentials expiration to now. Because of the clock skew # accomodation, these credentials should report as expired. - credentials.expiry = datetime.datetime.utcnow() + credentials.expiry = _helpers.utcnow() assert not credentials.valid assert credentials.expired @@ -81,7 +79,7 @@ def test_before_request(): assert credentials.valid assert credentials.token == "token" assert headers["authorization"] == "Bearer token" - assert "x-identity-trust-boundary" not in headers + assert "x-allowed-locations" not in headers request = "token2" headers = {} @@ -91,13 +89,13 @@ def test_before_request(): assert credentials.valid assert credentials.token == "token" assert headers["authorization"] == "Bearer token" - assert "x-identity-trust-boundary" not in headers + assert "x-allowed-locations" not in headers def test_before_request_with_trust_boundary(): - DUMMY_BOUNDARY = "00110101" + DUMMY_BOUNDARY = "0xA30" credentials = CredentialsImpl() - credentials._trust_boundary = DUMMY_BOUNDARY + credentials._trust_boundary = {"locations": [], "encoded_locations": DUMMY_BOUNDARY} request = "token" headers = {} @@ -106,7 +104,7 @@ def test_before_request_with_trust_boundary(): assert credentials.valid assert credentials.token == "token" assert headers["authorization"] == "Bearer token" - assert headers["x-identity-trust-boundary"] == DUMMY_BOUNDARY + assert headers["x-allowed-locations"] == DUMMY_BOUNDARY request = "token2" headers = {} @@ -116,7 +114,7 @@ def test_before_request_with_trust_boundary(): assert credentials.valid assert credentials.token == "token" assert headers["authorization"] == "Bearer token" - assert headers["x-identity-trust-boundary"] == DUMMY_BOUNDARY + assert headers["x-allowed-locations"] == DUMMY_BOUNDARY def test_before_request_metrics(): diff --git a/contrib/python/google-auth/py3/tests/test_external_account.py b/contrib/python/google-auth/py3/tests/test_external_account.py index 0b165bc70b..5225dcf342 100644 --- a/contrib/python/google-auth/py3/tests/test_external_account.py +++ b/contrib/python/google-auth/py3/tests/test_external_account.py @@ -505,6 +505,11 @@ class TestCredentials(object): credentials = self.make_credentials() assert credentials.universe_domain == external_account._DEFAULT_UNIVERSE_DOMAIN + def test_with_universe_domain(self): + credentials = self.make_credentials() + new_credentials = credentials.with_universe_domain("dummy_universe.com") + assert new_credentials.universe_domain == "dummy_universe.com" + def test_info_workforce_pool(self): credentials = self.make_workforce_pool_credentials( workforce_pool_user_project=self.WORKFORCE_POOL_USER_PROJECT @@ -833,7 +838,7 @@ class TestCredentials(object): "Content-Type": "application/json", "authorization": "Bearer {}".format(token_response["access_token"]), "x-goog-api-client": IMPERSONATE_ACCESS_TOKEN_REQUEST_METRICS_HEADER_VALUE, - "x-identity-trust-boundary": "0", + "x-allowed-locations": "0x0", } impersonation_request_data = { "delegates": None, @@ -915,7 +920,7 @@ class TestCredentials(object): "Content-Type": "application/json", "authorization": "Bearer {}".format(token_response["access_token"]), "x-goog-api-client": IMPERSONATE_ACCESS_TOKEN_REQUEST_METRICS_HEADER_VALUE, - "x-identity-trust-boundary": "0", + "x-allowed-locations": "0x0", } impersonation_request_data = { "delegates": None, @@ -1134,7 +1139,7 @@ class TestCredentials(object): "Content-Type": "application/json", "authorization": "Bearer {}".format(token_response["access_token"]), "x-goog-api-client": IMPERSONATE_ACCESS_TOKEN_REQUEST_METRICS_HEADER_VALUE, - "x-identity-trust-boundary": "0", + "x-allowed-locations": "0x0", } impersonation_request_data = { "delegates": None, @@ -1218,7 +1223,7 @@ class TestCredentials(object): "Content-Type": "application/json", "authorization": "Bearer {}".format(token_response["access_token"]), "x-goog-api-client": IMPERSONATE_ACCESS_TOKEN_REQUEST_METRICS_HEADER_VALUE, - "x-identity-trust-boundary": "0", + "x-allowed-locations": "0x0", } impersonation_request_data = { "delegates": None, @@ -1274,7 +1279,7 @@ class TestCredentials(object): assert headers == { "authorization": "Bearer {}".format(self.SUCCESS_RESPONSE["access_token"]), - "x-identity-trust-boundary": "0", + "x-allowed-locations": "0x0", } def test_apply_workforce_without_quota_project_id(self): @@ -1291,7 +1296,7 @@ class TestCredentials(object): assert headers == { "authorization": "Bearer {}".format(self.SUCCESS_RESPONSE["access_token"]), - "x-identity-trust-boundary": "0", + "x-allowed-locations": "0x0", } def test_apply_impersonation_without_quota_project_id(self): @@ -1323,7 +1328,7 @@ class TestCredentials(object): assert headers == { "authorization": "Bearer {}".format(impersonation_response["accessToken"]), - "x-identity-trust-boundary": "0", + "x-allowed-locations": "0x0", } def test_apply_with_quota_project_id(self): @@ -1340,7 +1345,7 @@ class TestCredentials(object): "other": "header-value", "authorization": "Bearer {}".format(self.SUCCESS_RESPONSE["access_token"]), "x-goog-user-project": self.QUOTA_PROJECT_ID, - "x-identity-trust-boundary": "0", + "x-allowed-locations": "0x0", } def test_apply_impersonation_with_quota_project_id(self): @@ -1375,7 +1380,7 @@ class TestCredentials(object): "other": "header-value", "authorization": "Bearer {}".format(impersonation_response["accessToken"]), "x-goog-user-project": self.QUOTA_PROJECT_ID, - "x-identity-trust-boundary": "0", + "x-allowed-locations": "0x0", } def test_before_request(self): @@ -1391,7 +1396,7 @@ class TestCredentials(object): assert headers == { "other": "header-value", "authorization": "Bearer {}".format(self.SUCCESS_RESPONSE["access_token"]), - "x-identity-trust-boundary": "0", + "x-allowed-locations": "0x0", } # Second call shouldn't call refresh. @@ -1400,7 +1405,7 @@ class TestCredentials(object): assert headers == { "other": "header-value", "authorization": "Bearer {}".format(self.SUCCESS_RESPONSE["access_token"]), - "x-identity-trust-boundary": "0", + "x-allowed-locations": "0x0", } def test_before_request_workforce(self): @@ -1418,7 +1423,7 @@ class TestCredentials(object): assert headers == { "other": "header-value", "authorization": "Bearer {}".format(self.SUCCESS_RESPONSE["access_token"]), - "x-identity-trust-boundary": "0", + "x-allowed-locations": "0x0", } # Second call shouldn't call refresh. @@ -1427,7 +1432,7 @@ class TestCredentials(object): assert headers == { "other": "header-value", "authorization": "Bearer {}".format(self.SUCCESS_RESPONSE["access_token"]), - "x-identity-trust-boundary": "0", + "x-allowed-locations": "0x0", } def test_before_request_impersonation(self): @@ -1458,7 +1463,7 @@ class TestCredentials(object): assert headers == { "other": "header-value", "authorization": "Bearer {}".format(impersonation_response["accessToken"]), - "x-identity-trust-boundary": "0", + "x-allowed-locations": "0x0", } # Second call shouldn't call refresh. @@ -1467,7 +1472,7 @@ class TestCredentials(object): assert headers == { "other": "header-value", "authorization": "Bearer {}".format(impersonation_response["accessToken"]), - "x-identity-trust-boundary": "0", + "x-allowed-locations": "0x0", } @mock.patch("google.auth._helpers.utcnow") @@ -1495,7 +1500,7 @@ class TestCredentials(object): # Cached token should be used. assert headers == { "authorization": "Bearer token", - "x-identity-trust-boundary": "0", + "x-allowed-locations": "0x0", } # Next call should simulate 1 second passed. @@ -1509,7 +1514,7 @@ class TestCredentials(object): # New token should be retrieved. assert headers == { "authorization": "Bearer {}".format(self.SUCCESS_RESPONSE["access_token"]), - "x-identity-trust-boundary": "0", + "x-allowed-locations": "0x0", } @mock.patch("google.auth._helpers.utcnow") @@ -1552,7 +1557,7 @@ class TestCredentials(object): # Cached token should be used. assert headers == { "authorization": "Bearer token", - "x-identity-trust-boundary": "0", + "x-allowed-locations": "0x0", } # Next call should simulate 1 second passed. This will trigger the expiration @@ -1567,7 +1572,7 @@ class TestCredentials(object): # New token should be retrieved. assert headers == { "authorization": "Bearer {}".format(impersonation_response["accessToken"]), - "x-identity-trust-boundary": "0", + "x-allowed-locations": "0x0", } @pytest.mark.parametrize( @@ -1666,7 +1671,7 @@ class TestCredentials(object): "x-goog-user-project": self.QUOTA_PROJECT_ID, "authorization": "Bearer {}".format(token_response["access_token"]), "x-goog-api-client": IMPERSONATE_ACCESS_TOKEN_REQUEST_METRICS_HEADER_VALUE, - "x-identity-trust-boundary": "0", + "x-allowed-locations": "0x0", } impersonation_request_data = { "delegates": None, @@ -1720,7 +1725,7 @@ class TestCredentials(object): "authorization": "Bearer {}".format( impersonation_response["accessToken"] ), - "x-identity-trust-boundary": "0", + "x-allowed-locations": "0x0", }, ) @@ -1792,7 +1797,7 @@ class TestCredentials(object): "authorization": "Bearer {}".format( self.SUCCESS_RESPONSE["access_token"] ), - "x-identity-trust-boundary": "0", + "x-allowed-locations": "0x0", }, ) @@ -1842,7 +1847,7 @@ class TestCredentials(object): "Content-Type": "application/json", "authorization": "Bearer {}".format(token_response["access_token"]), "x-goog-api-client": IMPERSONATE_ACCESS_TOKEN_REQUEST_METRICS_HEADER_VALUE, - "x-identity-trust-boundary": "0", + "x-allowed-locations": "0x0", } impersonation_request_data = { "delegates": None, diff --git a/contrib/python/google-auth/py3/tests/test_identity_pool.py b/contrib/python/google-auth/py3/tests/test_identity_pool.py index d126a579bd..2d10a5d268 100644 --- a/contrib/python/google-auth/py3/tests/test_identity_pool.py +++ b/contrib/python/google-auth/py3/tests/test_identity_pool.py @@ -45,8 +45,8 @@ SERVICE_ACCOUNT_IMPERSONATION_URL = ( QUOTA_PROJECT_ID = "QUOTA_PROJECT_ID" SCOPES = ["scope1", "scope2"] -import yatest.common -DATA_DIR = os.path.join(yatest.common.test_source_path(), "data") +import yatest.common as yc +DATA_DIR = os.path.join(os.path.dirname(yc.source_path(__file__)), "data") SUBJECT_TOKEN_TEXT_FILE = os.path.join(DATA_DIR, "external_subject_token.txt") SUBJECT_TOKEN_JSON_FILE = os.path.join(DATA_DIR, "external_subject_token.json") SUBJECT_TOKEN_FIELD_NAME = "access_token" @@ -320,7 +320,7 @@ class TestCredentials(object): "Content-Type": "application/json", "authorization": "Bearer {}".format(token_response["access_token"]), "x-goog-api-client": metrics_header_value, - "x-identity-trust-boundary": "0", + "x-allowed-locations": "0x0", } impersonation_request_data = { "delegates": None, diff --git a/contrib/python/google-auth/py3/tests/test_impersonated_credentials.py b/contrib/python/google-auth/py3/tests/test_impersonated_credentials.py index d63d2d5d3b..9696e823ff 100644 --- a/contrib/python/google-auth/py3/tests/test_impersonated_credentials.py +++ b/contrib/python/google-auth/py3/tests/test_impersonated_credentials.py @@ -29,8 +29,8 @@ from google.auth.impersonated_credentials import Credentials from google.oauth2 import credentials from google.oauth2 import service_account -import yatest.common -DATA_DIR = os.path.join(yatest.common.test_source_path(), "data") +import yatest.common as yc +DATA_DIR = os.path.join(os.path.dirname(yc.source_path(__file__)), "data") with open(os.path.join(DATA_DIR, "privatekey.pem"), "rb") as fh: PRIVATE_KEY_BYTES = fh.read() diff --git a/contrib/python/google-auth/py3/tests/test_jwt.py b/contrib/python/google-auth/py3/tests/test_jwt.py index 62f310606d..ff8fd67da6 100644 --- a/contrib/python/google-auth/py3/tests/test_jwt.py +++ b/contrib/python/google-auth/py3/tests/test_jwt.py @@ -26,8 +26,8 @@ from google.auth import exceptions from google.auth import jwt -import yatest.common -DATA_DIR = os.path.join(yatest.common.test_source_path(), "data") +import yatest.common as yc +DATA_DIR = os.path.join(os.path.dirname(yc.source_path(__file__)), "data") with open(os.path.join(DATA_DIR, "privatekey.pem"), "rb") as fh: PRIVATE_KEY_BYTES = fh.read() diff --git a/contrib/python/google-auth/py3/tests/transport/test__custom_tls_signer.py b/contrib/python/google-auth/py3/tests/transport/test__custom_tls_signer.py index 5836b325ad..d2907bad29 100644 --- a/contrib/python/google-auth/py3/tests/transport/test__custom_tls_signer.py +++ b/contrib/python/google-auth/py3/tests/transport/test__custom_tls_signer.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import base64 import ctypes import os @@ -30,11 +29,19 @@ FAKE_ENTERPRISE_CERT_FILE_PATH = "/path/to/enterprise/cert/file" ENTERPRISE_CERT_FILE = os.path.join( os.path.dirname(__file__), "../data/enterprise_cert_valid.json" ) +ENTERPRISE_CERT_FILE_PROVIDER = os.path.join( + os.path.dirname(__file__), "../data/enterprise_cert_valid_provider.json" +) INVALID_ENTERPRISE_CERT_FILE = os.path.join( os.path.dirname(__file__), "../data/enterprise_cert_invalid.json" ) +def test_load_provider_lib(): + with mock.patch("ctypes.CDLL", return_value=mock.MagicMock()): + _custom_tls_signer.load_provider_lib("/path/to/provider/lib") + + def test_load_offload_lib(): with mock.patch("ctypes.CDLL", return_value=mock.MagicMock()): lib = _custom_tls_signer.load_offload_lib("/path/to/offload/lib") @@ -173,62 +180,81 @@ def test_custom_tls_signer(): ) as load_offload_lib: load_offload_lib.return_value = offload_lib load_signer_lib.return_value = signer_lib - signer_object = _custom_tls_signer.CustomTlsSigner(ENTERPRISE_CERT_FILE) - signer_object.load_libraries() - assert signer_object._cert is None + with mock.patch( + "google.auth.transport._custom_tls_signer.get_cert" + ) as get_cert: + with mock.patch( + "google.auth.transport._custom_tls_signer.get_sign_callback" + ) as get_sign_callback: + get_cert.return_value = b"mock_cert" + signer_object = _custom_tls_signer.CustomTlsSigner( + ENTERPRISE_CERT_FILE + ) + signer_object.load_libraries() + signer_object.attach_to_ssl_context(create_urllib3_context()) + get_cert.assert_called_once() + get_sign_callback.assert_called_once() + offload_lib.ConfigureSslContext.assert_called_once() assert signer_object._enterprise_cert_file_path == ENTERPRISE_CERT_FILE assert signer_object._offload_lib == offload_lib assert signer_object._signer_lib == signer_lib load_signer_lib.assert_called_with("/path/to/signer/lib") load_offload_lib.assert_called_with("/path/to/offload/lib") - # Test set_up_custom_key and set_up_ssl_context methods - with mock.patch("google.auth.transport._custom_tls_signer.get_cert") as get_cert: - with mock.patch( - "google.auth.transport._custom_tls_signer.get_sign_callback" - ) as get_sign_callback: - get_cert.return_value = b"mock_cert" - signer_object.set_up_custom_key() - signer_object.attach_to_ssl_context(create_urllib3_context()) - get_cert.assert_called_once() - get_sign_callback.assert_called_once() - offload_lib.ConfigureSslContext.assert_called_once() +def test_custom_tls_signer_provider(): + provider_lib = mock.MagicMock() -def test_custom_tls_signer_failed_to_load_libraries(): # Test load_libraries method + with mock.patch( + "google.auth.transport._custom_tls_signer.load_provider_lib" + ) as load_provider_lib: + load_provider_lib.return_value = provider_lib + signer_object = _custom_tls_signer.CustomTlsSigner( + ENTERPRISE_CERT_FILE_PROVIDER + ) + signer_object.load_libraries() + signer_object.attach_to_ssl_context(mock.MagicMock()) + + assert signer_object._enterprise_cert_file_path == ENTERPRISE_CERT_FILE_PROVIDER + assert signer_object._provider_lib == provider_lib + load_provider_lib.assert_called_with("/path/to/provider/lib") + + +def test_custom_tls_signer_failed_to_load_libraries(): with pytest.raises(exceptions.MutualTLSChannelError) as excinfo: signer_object = _custom_tls_signer.CustomTlsSigner(INVALID_ENTERPRISE_CERT_FILE) signer_object.load_libraries() assert excinfo.match("enterprise cert file is invalid") -def test_custom_tls_signer_fail_to_offload(): - offload_lib = mock.MagicMock() - signer_lib = mock.MagicMock() +def test_custom_tls_signer_failed_to_attach(): + with pytest.raises(exceptions.MutualTLSChannelError) as excinfo: + signer_object = _custom_tls_signer.CustomTlsSigner(ENTERPRISE_CERT_FILE) + signer_object._offload_lib = mock.MagicMock() + signer_object._signer_lib = mock.MagicMock() + signer_object._sign_callback = mock.MagicMock() + signer_object._cert = b"mock cert" + signer_object._offload_lib.ConfigureSslContext.return_value = False + signer_object.attach_to_ssl_context(mock.MagicMock()) + assert excinfo.match("failed to configure ECP Offload SSL context") - with mock.patch( - "google.auth.transport._custom_tls_signer.load_signer_lib" - ) as load_signer_lib: - with mock.patch( - "google.auth.transport._custom_tls_signer.load_offload_lib" - ) as load_offload_lib: - load_offload_lib.return_value = offload_lib - load_signer_lib.return_value = signer_lib - signer_object = _custom_tls_signer.CustomTlsSigner(ENTERPRISE_CERT_FILE) - signer_object.load_libraries() - # set the return value to be 0 which indicts offload fails - offload_lib.ConfigureSslContext.return_value = 0 +def test_custom_tls_signer_failed_to_attach_provider(): + with pytest.raises(exceptions.MutualTLSChannelError) as excinfo: + signer_object = _custom_tls_signer.CustomTlsSigner( + ENTERPRISE_CERT_FILE_PROVIDER + ) + signer_object._provider_lib = mock.MagicMock() + signer_object._provider_lib.ECP_attach_to_ctx.return_value = False + signer_object.attach_to_ssl_context(mock.MagicMock()) + assert excinfo.match("failed to configure ECP Provider SSL context") + +def test_custom_tls_signer_failed_to_attach_no_libs(): with pytest.raises(exceptions.MutualTLSChannelError) as excinfo: - with mock.patch( - "google.auth.transport._custom_tls_signer.get_cert" - ) as get_cert: - with mock.patch( - "google.auth.transport._custom_tls_signer.get_sign_callback" - ): - get_cert.return_value = b"mock_cert" - signer_object.set_up_custom_key() - signer_object.attach_to_ssl_context(create_urllib3_context()) - assert excinfo.match("failed to configure SSL context") + signer_object = _custom_tls_signer.CustomTlsSigner(ENTERPRISE_CERT_FILE) + signer_object._offload_lib = None + signer_object._signer_lib = None + signer_object.attach_to_ssl_context(mock.MagicMock()) + assert excinfo.match("Invalid ECP configuration.") diff --git a/contrib/python/google-auth/py3/tests/transport/test__mtls_helper.py b/contrib/python/google-auth/py3/tests/transport/test__mtls_helper.py index 642283a5c5..1621a05302 100644 --- a/contrib/python/google-auth/py3/tests/transport/test__mtls_helper.py +++ b/contrib/python/google-auth/py3/tests/transport/test__mtls_helper.py @@ -22,9 +22,6 @@ import pytest # type: ignore from google.auth import exceptions from google.auth.transport import _mtls_helper -import yatest.common -DATA_DIR = os.path.join(yatest.common.test_source_path(), "data") - CONTEXT_AWARE_METADATA = {"cert_provider_command": ["some command"]} ENCRYPTED_EC_PRIVATE_KEY = b"""-----BEGIN ENCRYPTED PRIVATE KEY----- @@ -116,26 +113,26 @@ class TestCertAndKeyRegex(object): class TestCheckaMetadataPath(object): def test_success(self): - metadata_path = os.path.join(DATA_DIR, "context_aware_metadata.json") + metadata_path = os.path.join(pytest.data_dir, "context_aware_metadata.json") returned_path = _mtls_helper._check_dca_metadata_path(metadata_path) assert returned_path is not None def test_failure(self): - metadata_path = os.path.join(DATA_DIR, "not_exists.json") + metadata_path = os.path.join(pytest.data_dir, "not_exists.json") returned_path = _mtls_helper._check_dca_metadata_path(metadata_path) assert returned_path is None class TestReadMetadataFile(object): def test_success(self): - metadata_path = os.path.join(DATA_DIR, "context_aware_metadata.json") + metadata_path = os.path.join(pytest.data_dir, "context_aware_metadata.json") metadata = _mtls_helper._read_dca_metadata_file(metadata_path) assert "cert_provider_command" in metadata def test_file_not_json(self): # read a file which is not json format. - metadata_path = os.path.join(DATA_DIR, "privatekey.pem") + metadata_path = os.path.join(pytest.data_dir, "privatekey.pem") with pytest.raises(exceptions.ClientCertError): _mtls_helper._read_dca_metadata_file(metadata_path) diff --git a/contrib/python/google-auth/py3/tests/transport/test_grpc.py b/contrib/python/google-auth/py3/tests/transport/test_grpc.py index 05dc5fad0e..29fae4cdf6 100644 --- a/contrib/python/google-auth/py3/tests/transport/test_grpc.py +++ b/contrib/python/google-auth/py3/tests/transport/test_grpc.py @@ -35,8 +35,8 @@ try: except ImportError: # pragma: NO COVER HAS_GRPC = False -import yatest.common -DATA_DIR = os.path.join(yatest.common.test_source_path(), "data") +import yatest.common as yc +DATA_DIR = os.path.join(os.path.dirname(yc.source_path(__file__)), "..", "data") METADATA_PATH = os.path.join(DATA_DIR, "context_aware_metadata.json") with open(os.path.join(DATA_DIR, "privatekey.pem"), "rb") as fh: PRIVATE_KEY_BYTES = fh.read() diff --git a/contrib/python/google-auth/py3/tests/transport/test_requests.py b/contrib/python/google-auth/py3/tests/transport/test_requests.py index d962814346..aadc1ddbfd 100644 --- a/contrib/python/google-auth/py3/tests/transport/test_requests.py +++ b/contrib/python/google-auth/py3/tests/transport/test_requests.py @@ -545,16 +545,12 @@ class TestMutualTlsOffloadAdapter(object): google.auth.transport._custom_tls_signer.CustomTlsSigner, "load_libraries" ) @mock.patch.object( - google.auth.transport._custom_tls_signer.CustomTlsSigner, "set_up_custom_key" - ) - @mock.patch.object( google.auth.transport._custom_tls_signer.CustomTlsSigner, "attach_to_ssl_context", ) def test_success( self, mock_attach_to_ssl_context, - mock_set_up_custom_key, mock_load_libraries, mock_proxy_manager_for, mock_init_poolmanager, @@ -565,7 +561,6 @@ class TestMutualTlsOffloadAdapter(object): ) mock_load_libraries.assert_called_once() - mock_set_up_custom_key.assert_called_once() assert mock_attach_to_ssl_context.call_count == 2 adapter.init_poolmanager() diff --git a/contrib/python/google-auth/py3/tests/ya.make b/contrib/python/google-auth/py3/tests/ya.make index e7a1b3b272..dfcabf5bfb 100644 --- a/contrib/python/google-auth/py3/tests/ya.make +++ b/contrib/python/google-auth/py3/tests/ya.make @@ -67,11 +67,6 @@ TEST_SRCS( # transport/test_urllib3.py ) -RESOURCE( - data/privatekey.pem data/privatekey.pem - data/public_cert.pem data/public_cert.pem -) - NO_LINT() END() diff --git a/contrib/python/google-auth/py3/ya.make b/contrib/python/google-auth/py3/ya.make index 77b6e5f741..ec71907cc6 100644 --- a/contrib/python/google-auth/py3/ya.make +++ b/contrib/python/google-auth/py3/ya.make @@ -2,7 +2,7 @@ PY3_LIBRARY() -VERSION(2.23.0) +VERSION(2.25.2) LICENSE(Apache-2.0) @@ -10,10 +10,10 @@ PEERDIR( contrib/python/cachetools contrib/python/cryptography contrib/python/grpcio + contrib/python/packaging contrib/python/pyasn1-modules contrib/python/requests contrib/python/rsa - contrib/python/urllib3 ) NO_LINT() diff --git a/contrib/python/hypothesis/py3/.dist-info/METADATA b/contrib/python/hypothesis/py3/.dist-info/METADATA index 2a7e253367..c243f74513 100644 --- a/contrib/python/hypothesis/py3/.dist-info/METADATA +++ b/contrib/python/hypothesis/py3/.dist-info/METADATA @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: hypothesis -Version: 6.91.0 +Version: 6.92.0 Summary: A library for property-based testing Home-page: https://hypothesis.works Author: David R. MacIver and Zac Hatfield-Dodds @@ -35,7 +35,7 @@ Classifier: Typing :: Typed Requires-Python: >=3.8 Description-Content-Type: text/x-rst License-File: LICENSE.txt -Requires-Dist: attrs >=19.2.0 +Requires-Dist: attrs >=22.2.0 Requires-Dist: sortedcontainers <3.0.0,>=2.1.0 Requires-Dist: exceptiongroup >=1.0.0 ; python_version < "3.11" Provides-Extra: all diff --git a/contrib/python/hypothesis/py3/_hypothesis_pytestplugin.py b/contrib/python/hypothesis/py3/_hypothesis_pytestplugin.py index 3bb2535f3b..9875e067f5 100644 --- a/contrib/python/hypothesis/py3/_hypothesis_pytestplugin.py +++ b/contrib/python/hypothesis/py3/_hypothesis_pytestplugin.py @@ -373,6 +373,13 @@ else: if fex: failing_examples.append(json.loads(fex)) + from hypothesis.internal.observability import _WROTE_TO + + if _WROTE_TO: + terminalreporter.section("Hypothesis") + for fname in sorted(_WROTE_TO): + terminalreporter.write_line(f"observations written to {fname}") + if failing_examples: # This must have been imported already to write the failing examples from hypothesis.extra._patching import gc_patches, make_patch, save_patch @@ -384,7 +391,8 @@ else: except Exception: # fail gracefully if we hit any filesystem or permissions problems return - terminalreporter.section("Hypothesis") + if not _WROTE_TO: + terminalreporter.section("Hypothesis") terminalreporter.write_line( f"`git apply {fname}` to add failing examples to your code." ) diff --git a/contrib/python/hypothesis/py3/hypothesis/core.py b/contrib/python/hypothesis/py3/hypothesis/core.py index b2298418c8..82e359fc2f 100644 --- a/contrib/python/hypothesis/py3/hypothesis/core.py +++ b/contrib/python/hypothesis/py3/hypothesis/core.py @@ -87,6 +87,11 @@ from hypothesis.internal.escalation import ( get_trimmed_traceback, ) from hypothesis.internal.healthcheck import fail_health_check +from hypothesis.internal.observability import ( + TESTCASE_CALLBACKS, + deliver_json_blob, + make_testcase, +) from hypothesis.internal.reflection import ( convert_positional_arguments, define_function_signature, @@ -99,7 +104,12 @@ from hypothesis.internal.reflection import ( proxies, repr_call, ) -from hypothesis.internal.scrutineer import Tracer, explanatory_lines +from hypothesis.internal.scrutineer import ( + Trace, + Tracer, + explanatory_lines, + tractable_coverage_report, +) from hypothesis.internal.validation import check_type from hypothesis.reporting import ( current_verbosity, @@ -107,13 +117,14 @@ from hypothesis.reporting import ( verbose_report, with_reporter, ) -from hypothesis.statistics import describe_targets, note_statistics +from hypothesis.statistics import describe_statistics, describe_targets, note_statistics from hypothesis.strategies._internal.misc import NOTHING from hypothesis.strategies._internal.strategies import ( Ex, SearchStrategy, check_strategy, ) +from hypothesis.strategies._internal.utils import to_jsonable from hypothesis.vendor.pretty import RepresentationPrinter from hypothesis.version import __version__ @@ -484,13 +495,14 @@ def execute_explicit_examples(state, wrapped_test, arguments, kwargs, original_s with local_settings(state.settings): fragments_reported = [] + empty_data = ConjectureData.for_buffer(b"") try: bits = ", ".join(nicerepr(x) for x in arguments) + ", ".join( f"{k}={nicerepr(v)}" for k, v in example_kwargs.items() ) execute_example = partial( state.execute_once, - ConjectureData.for_buffer(b""), + empty_data, is_final=True, print_example=True, example_kwargs=example_kwargs, @@ -544,7 +556,8 @@ def execute_explicit_examples(state, wrapped_test, arguments, kwargs, original_s # development, this is rather useful to replay Hypothesis' part of # a saved failure when other arguments are supplied by e.g. pytest. # See https://github.com/HypothesisWorks/hypothesis/issues/2125 - pass + with contextlib.suppress(StopTest): + empty_data.conclude_test(Status.INVALID) except BaseException as err: # In order to support reporting of multiple failing examples, we yield # each of the (report text, error) pairs we find back to the top-level @@ -567,6 +580,8 @@ def execute_explicit_examples(state, wrapped_test, arguments, kwargs, original_s new.__cause__ = err err = new + with contextlib.suppress(StopTest): + empty_data.conclude_test(Status.INVALID) yield (fragments_reported, err) if ( state.settings.report_multiple_bugs @@ -583,6 +598,15 @@ def execute_explicit_examples(state, wrapped_test, arguments, kwargs, original_s "Falsifying example", "Falsifying explicit example", 1 ) + tc = make_testcase( + start_timestamp=state._start_timestamp, + test_name_or_nodeid=state.test_identifier, + data=empty_data, + how_generated="explicit example", + string_repr=state._string_repr, + ) + deliver_json_blob(tc) + if fragments_reported: verbose_report(fragments_reported[0].replace("Falsifying", "Trying", 1)) for f in fragments_reported[1:]: @@ -738,7 +762,6 @@ class StateForActualGivenExecution: self.last_exception = None self.falsifying_examples = () self.random = random - self.__test_runtime = None self.ever_executed = False self.is_find = getattr(wrapped_test, "_hypothesis_internal_is_find", False) @@ -756,6 +779,16 @@ class StateForActualGivenExecution: self.failed_due_to_deadline = False self.explain_traces = defaultdict(set) + self._start_timestamp = time.time() + self._string_repr = "" + self._jsonable_arguments = {} + self._timing_features = {} + + @property + def test_identifier(self): + return getattr( + current_pytest_item.value, "nodeid", None + ) or get_pretty_function_description(self.wrapped_test) def execute_once( self, @@ -780,6 +813,7 @@ class StateForActualGivenExecution: self.ever_executed = True data.is_find = self.is_find + self._string_repr = "" text_repr = None if self.settings.deadline is None: test = self.test @@ -787,16 +821,23 @@ class StateForActualGivenExecution: @proxies(self.test) def test(*args, **kwargs): - self.__test_runtime = None + arg_drawtime = sum(data.draw_times) initial_draws = len(data.draw_times) start = time.perf_counter() - result = self.test(*args, **kwargs) - finish = time.perf_counter() - internal_draw_time = sum(data.draw_times[initial_draws:]) - runtime = datetime.timedelta( - seconds=finish - start - internal_draw_time - ) - self.__test_runtime = runtime + try: + result = self.test(*args, **kwargs) + finally: + finish = time.perf_counter() + internal_draw_time = sum(data.draw_times[initial_draws:]) + runtime = datetime.timedelta( + seconds=finish - start - internal_draw_time + ) + self._timing_features = { + "time_running_test": finish - start - internal_draw_time, + "time_drawing_args": arg_drawtime, + "time_interactive_draws": internal_draw_time, + } + current_deadline = self.settings.deadline if not is_final: current_deadline = (current_deadline // 4) * 5 @@ -855,6 +896,26 @@ class StateForActualGivenExecution: ), ) report(printer.getvalue()) + + if TESTCASE_CALLBACKS: + printer = RepresentationPrinter(context=context) + printer.repr_call( + test.__name__, + args, + kwargs, + force_split=True, + arg_slices=argslices, + leading_comment=( + "# " + context.data.slice_comments[(0, 0)] + if (0, 0) in context.data.slice_comments + else None + ), + ) + self._string_repr = printer.getvalue() + self._jsonable_arguments = { + **dict(enumerate(map(to_jsonable, args))), + **{k: to_jsonable(v) for k, v in kwargs.items()}, + } return test(*args, **kwargs) # self.test_runner can include the execute_example method, or setup/teardown @@ -870,9 +931,8 @@ class StateForActualGivenExecution: # instead raise an appropriate diagnostic error. if expected_failure is not None: exception, traceback = expected_failure - if ( - isinstance(exception, DeadlineExceeded) - and self.__test_runtime is not None + if isinstance(exception, DeadlineExceeded) and ( + runtime_secs := self._timing_features.get("time_running_test") ): report( "Unreliable test timings! On an initial run, this " @@ -884,7 +944,7 @@ class StateForActualGivenExecution: % ( exception.runtime.total_seconds() * 1000, self.settings.deadline.total_seconds() * 1000, - self.__test_runtime.total_seconds() * 1000, + runtime_secs * 1000, ) ) else: @@ -895,7 +955,7 @@ class StateForActualGivenExecution: ) from exception return result - def _execute_once_for_engine(self, data): + def _execute_once_for_engine(self, data: ConjectureData) -> None: """Wrapper around ``execute_once`` that intercepts test failure exceptions and single-test control exceptions, and turns them into appropriate method calls to `data` instead. @@ -903,16 +963,18 @@ class StateForActualGivenExecution: This allows the engine to assume that any exception other than ``StopTest`` must be a fatal error, and should stop the entire engine. """ + trace: Trace = set() try: - trace = frozenset() - if ( + _can_trace = ( + sys.gettrace() is None or sys.version_info[:2] >= (3, 12) + ) and not PYPY + _trace_obs = TESTCASE_CALLBACKS + _trace_failure = ( self.failed_normally and not self.failed_due_to_deadline - and Phase.shrink in self.settings.phases - and Phase.explain in self.settings.phases - and (sys.gettrace() is None or sys.version_info[:2] >= (3, 12)) - and not PYPY - ): # pragma: no cover + and {Phase.shrink, Phase.explain}.issubset(self.settings.phases) + ) + if _can_trace and (_trace_obs or _trace_failure): # pragma: no cover # This is in fact covered by our *non-coverage* tests, but due to the # settrace() contention *not* by our coverage tests. Ah well. with Tracer() as tracer: @@ -921,7 +983,7 @@ class StateForActualGivenExecution: if data.status == Status.VALID: self.explain_traces[None].add(frozenset(tracer.branches)) finally: - trace = frozenset(tracer.branches) + trace = tracer.branches else: result = self.execute_once(data) if result is not None: @@ -964,20 +1026,40 @@ class StateForActualGivenExecution: tb = get_trimmed_traceback() info = data.extra_information - info.__expected_traceback = format_exception(e, tb) - info.__expected_exception = e - verbose_report(info.__expected_traceback) + info._expected_traceback = format_exception(e, tb) # type: ignore + info._expected_exception = e # type: ignore + verbose_report(info._expected_traceback) # type: ignore self.failed_normally = True interesting_origin = InterestingOrigin.from_exception(e) if trace: # pragma: no cover # Trace collection is explicitly disabled under coverage. - self.explain_traces[interesting_origin].add(trace) + self.explain_traces[interesting_origin].add(frozenset(trace)) if interesting_origin[0] == DeadlineExceeded: self.failed_due_to_deadline = True self.explain_traces.clear() - data.mark_interesting(interesting_origin) + data.mark_interesting(interesting_origin) # type: ignore # mypy bug? + finally: + # Conditional here so we can save some time constructing the payload; in + # other cases (without coverage) it's cheap enough to do that regardless. + if TESTCASE_CALLBACKS: + if self.failed_normally or self.failed_due_to_deadline: + phase = "shrink" + else: + phase = "unknown" + tc = make_testcase( + start_timestamp=self._start_timestamp, + test_name_or_nodeid=self.test_identifier, + data=data, + how_generated=f"generated during {phase} phase", + string_repr=self._string_repr, + arguments={**self._jsonable_arguments, **data._observability_args}, + metadata=self._timing_features, + coverage=tractable_coverage_report(trace) or None, + ) + deliver_json_blob(tc) + self._timing_features.clear() def run_engine(self): """Run the test function many times, on database input and generated @@ -1003,6 +1085,15 @@ class StateForActualGivenExecution: # on different inputs. runner.run() note_statistics(runner.statistics) + deliver_json_blob( + { + "type": "info", + "run_start": self._start_timestamp, + "property": self.test_identifier, + "title": "Hypothesis Statistics", + "content": describe_statistics(runner.statistics), + } + ) if runner.call_count == 0: return @@ -1041,7 +1132,9 @@ class StateForActualGivenExecution: falsifying_example.buffer ) ran_example.slice_comments = falsifying_example.slice_comments - assert info.__expected_exception is not None + tb = None + origin = None + assert info._expected_exception is not None try: with with_reporter(fragments.append): self.execute_once( @@ -1049,8 +1142,8 @@ class StateForActualGivenExecution: print_example=not self.is_find, is_final=True, expected_failure=( - info.__expected_exception, - info.__expected_traceback, + info._expected_exception, + info._expected_traceback, ), ) except (UnsatisfiedAssumption, StopTest) as e: @@ -1066,10 +1159,34 @@ class StateForActualGivenExecution: errors_to_report.append( (fragments, e.with_traceback(get_trimmed_traceback())) ) + tb = format_exception(e, get_trimmed_traceback(e)) + origin = InterestingOrigin.from_exception(e) else: # execute_once() will always raise either the expected error, or Flaky. raise NotImplementedError("This should be unreachable") finally: + # log our observability line for the final failing example + tc = { + "type": "test_case", + "run_start": self._start_timestamp, + "property": self.test_identifier, + "status": "passed" if sys.exc_info()[0] else "failed", + "status_reason": str(origin or "unexpected/flaky pass"), + "representation": self._string_repr, + "how_generated": "minimal failing example", + "features": { + **{ + k: v + for k, v in ran_example.target_observations.items() + if isinstance(k, str) + }, + **ran_example.events, + **self._timing_features, + }, + "coverage": None, # TODO: expose this? + "metadata": {"traceback": tb}, + } + deliver_json_blob(tc) # Whether or not replay actually raised the exception again, we want # to print the reproduce_failure decorator for the failing example. if self.settings.print_blob: diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py index 2f086bf04f..7a5542b0bd 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py @@ -75,7 +75,6 @@ else: ONE_BOUND_INTEGERS_LABEL = calc_label_from_name("trying a one-bound int allowing 0") INTEGER_RANGE_DRAW_LABEL = calc_label_from_name("another draw in integer_range()") BIASED_COIN_LABEL = calc_label_from_name("biased_coin()") -BIASED_COIN_INNER_LABEL = calc_label_from_name("inside biased_coin()") TOP_LABEL = calc_label_from_name("top") DRAW_BYTES_LABEL = calc_label_from_name("draw_bytes() in ConjectureData") @@ -936,18 +935,9 @@ class PrimitiveProvider: else: partial = True - if forced is None: - # We want to get to the point where True is represented by - # 1 and False is represented by 0 as quickly as possible, so - # we use the remove_discarded machinery in the shrinker to - # achieve that by discarding any draws that are > 1 and writing - # a suitable draw into the choice sequence at the end of the - # loop. - self._cd.start_example(BIASED_COIN_INNER_LABEL) - i = self._cd.draw_bits(bits) - self._cd.stop_example(discard=i > 1) - else: - i = self._cd.draw_bits(bits, forced=int(forced)) + i = self._cd.draw_bits( + bits, forced=None if forced is None else int(forced) + ) # We always choose the region that causes us to repeat the loop as # the maximum value, so that shrinking the drawn bits never causes @@ -977,8 +967,6 @@ class PrimitiveProvider: # becomes i > falsey. result = i > falsey - if i > 1: - self._cd.draw_bits(bits, forced=int(result)) break self._cd.stop_example() return result @@ -993,6 +981,11 @@ class PrimitiveProvider: shrink_towards: int = 0, forced: Optional[int] = None, ) -> int: + if min_value is not None: + shrink_towards = max(min_value, shrink_towards) + if max_value is not None: + shrink_towards = min(max_value, shrink_towards) + # This is easy to build on top of our existing conjecture utils, # and it's easy to build sampled_from and weighted_coin on this. if weights is not None: @@ -1000,45 +993,46 @@ class PrimitiveProvider: assert max_value is not None sampler = Sampler(weights) - idx = sampler.sample(self._cd) + gap = max_value - shrink_towards - if shrink_towards <= min_value: - return min_value + idx - elif max_value <= shrink_towards: - return max_value - idx - else: - # For range -2..2, interpret idx = 0..4 as [0, 1, 2, -1, -2] - if idx <= (gap := max_value - shrink_towards): - return shrink_towards + idx + forced_idx = None + if forced is not None: + if forced >= shrink_towards: + forced_idx = forced - shrink_towards else: - return shrink_towards - (idx - gap) + forced_idx = shrink_towards + gap - forced + idx = sampler.sample(self._cd, forced=forced_idx) + + # For range -2..2, interpret idx = 0..4 as [0, 1, 2, -1, -2] + if idx <= gap: + return shrink_towards + idx + else: + return shrink_towards - (idx - gap) if min_value is None and max_value is None: - return self._draw_unbounded_integer() + return self._draw_unbounded_integer(forced=forced) if min_value is None: assert max_value is not None # make mypy happy - if max_value <= shrink_towards: - return max_value - abs(self._draw_unbounded_integer()) - else: - probe = max_value + 1 - while max_value < probe: - self._cd.start_example(ONE_BOUND_INTEGERS_LABEL) - probe = self._draw_unbounded_integer() + shrink_towards - self._cd.stop_example(discard=max_value < probe) - return probe + probe = max_value + 1 + while max_value < probe: + self._cd.start_example(ONE_BOUND_INTEGERS_LABEL) + probe = shrink_towards + self._draw_unbounded_integer( + forced=None if forced is None else forced - shrink_towards + ) + self._cd.stop_example(discard=max_value < probe) + return probe if max_value is None: assert min_value is not None - if min_value >= shrink_towards: - return min_value + abs(self._draw_unbounded_integer()) - else: - probe = min_value - 1 - while probe < min_value: - self._cd.start_example(ONE_BOUND_INTEGERS_LABEL) - probe = self._draw_unbounded_integer() + shrink_towards - self._cd.stop_example(discard=probe < min_value) - return probe + probe = min_value - 1 + while probe < min_value: + self._cd.start_example(ONE_BOUND_INTEGERS_LABEL) + probe = shrink_towards + self._draw_unbounded_integer( + forced=None if forced is None else forced - shrink_towards + ) + self._cd.stop_example(discard=probe < min_value) + return probe return self._draw_bounded_integer( min_value, @@ -1057,7 +1051,8 @@ class PrimitiveProvider: # TODO: consider supporting these float widths at the IR level in the # future. # width: Literal[16, 32, 64] = 64, - # exclude_min and exclude_max handled higher up + # exclude_min and exclude_max handled higher up, + forced: Optional[float] = None, ) -> float: ( sampler, @@ -1074,17 +1069,25 @@ class PrimitiveProvider: while True: self._cd.start_example(FLOAT_STRATEGY_DO_DRAW_LABEL) - i = sampler.sample(self._cd) if sampler else 0 + # If `forced in nasty_floats`, then `forced` was *probably* + # generated by drawing a nonzero index from the sampler. However, we + # have no obligation to generate it that way when forcing. In particular, + # i == 0 is able to produce all possible floats, and the forcing + # logic is simpler if we assume this choice. + forced_i = None if forced is None else 0 + i = sampler.sample(self._cd, forced=forced_i) if sampler else 0 self._cd.start_example(DRAW_FLOAT_LABEL) if i == 0: - result = self._draw_float(forced_sign_bit=forced_sign_bit) + result = self._draw_float( + forced_sign_bit=forced_sign_bit, forced=forced + ) if math.copysign(1.0, result) == -1: assert neg_clamper is not None clamped = -neg_clamper(-result) else: assert pos_clamper is not None clamped = pos_clamper(result) - if clamped != result: + if clamped != result and not (math.isnan(result) and allow_nan): self._cd.stop_example(discard=True) self._cd.start_example(DRAW_FLOAT_LABEL) self._write_float(clamped) @@ -1104,10 +1107,13 @@ class PrimitiveProvider: *, min_size: int = 0, max_size: Optional[int] = None, + forced: Optional[str] = None, ) -> str: if max_size is None: max_size = 10**10 # "arbitrarily large" + assert forced is None or min_size <= len(forced) <= max_size + average_size = min( max(min_size * 2, min_size + 5), 0.5 * (min_size + max_size), @@ -1119,31 +1125,57 @@ class PrimitiveProvider: min_size=min_size, max_size=max_size, average_size=average_size, + forced=None if forced is None else len(forced), ) while elements.more(): + forced_i: Optional[int] = None + if forced is not None: + c = forced[elements.count - 1] + forced_i = intervals.index_from_char_in_shrink_order(c) + if len(intervals) > 256: - if self.draw_boolean(0.2): - i = self._draw_bounded_integer(256, len(intervals) - 1) + if self.draw_boolean( + 0.2, forced=None if forced_i is None else forced_i > 255 + ): + i = self._draw_bounded_integer( + 256, len(intervals) - 1, forced=forced_i + ) else: - i = self._draw_bounded_integer(0, 255) + i = self._draw_bounded_integer(0, 255, forced=forced_i) else: - i = self._draw_bounded_integer(0, len(intervals) - 1) + i = self._draw_bounded_integer(0, len(intervals) - 1, forced=forced_i) chars.append(intervals.char_in_shrink_order(i)) return "".join(chars) - def draw_bytes(self, size: int) -> bytes: - return self._cd.draw_bits(8 * size).to_bytes(size, "big") + def draw_bytes(self, size: int, *, forced: Optional[bytes] = None) -> bytes: + forced_i = None + if forced is not None: + forced_i = int_from_bytes(forced) + size = len(forced) + + return self._cd.draw_bits(8 * size, forced=forced_i).to_bytes(size, "big") - def _draw_float(self, forced_sign_bit: Optional[int] = None) -> float: + def _draw_float( + self, forced_sign_bit: Optional[int] = None, *, forced: Optional[float] = None + ) -> float: """ Helper for draw_float which draws a random 64-bit float. """ + if forced is not None: + # sign_aware_lte(forced, -0.0) does not correctly handle the + # math.nan case here. + forced_sign_bit = math.copysign(1, forced) == -1 + self._cd.start_example(DRAW_FLOAT_LABEL) try: is_negative = self._cd.draw_bits(1, forced=forced_sign_bit) - f = lex_to_float(self._cd.draw_bits(64)) + f = lex_to_float( + self._cd.draw_bits( + 64, forced=None if forced is None else float_to_lex(abs(forced)) + ) + ) return -f if is_negative else f finally: self._cd.stop_example() @@ -1153,14 +1185,37 @@ class PrimitiveProvider: self._cd.draw_bits(1, forced=sign) self._cd.draw_bits(64, forced=float_to_lex(abs(f))) - def _draw_unbounded_integer(self) -> int: - size = INT_SIZES[INT_SIZES_SAMPLER.sample(self._cd)] - r = self._cd.draw_bits(size) + def _draw_unbounded_integer(self, *, forced: Optional[int] = None) -> int: + forced_i = None + if forced is not None: + # Using any bucket large enough to contain this integer would be a + # valid way to force it. This is because an n bit integer could have + # been drawn from a bucket of size n, or from any bucket of size + # m > n. + # We'll always choose the smallest eligible bucket here. + + # We need an extra bit to handle forced signed integers. INT_SIZES + # is interpreted as unsigned sizes. + bit_size = forced.bit_length() + 1 + size = min(size for size in INT_SIZES if bit_size <= size) + forced_i = INT_SIZES.index(size) + + size = INT_SIZES[INT_SIZES_SAMPLER.sample(self._cd, forced=forced_i)] + + forced_r = None + if forced is not None: + forced_r = forced + forced_r <<= 1 + if forced < 0: + forced_r = -forced_r + forced_r |= 1 + + r = self._cd.draw_bits(size, forced=forced_r) sign = r & 1 r >>= 1 if sign: r = -r - return int(r) + return r def _draw_bounded_integer( self, @@ -1202,7 +1257,9 @@ class PrimitiveProvider: bits = gap.bit_length() probe = gap + 1 - if bits > 24 and self._cd.draw_bits(3, forced=None if forced is None else 0): + if bits > 24 and self.draw_boolean( + 7 / 8, forced=None if forced is None else False + ): # For large ranges, we combine the uniform random distribution from draw_bits # with a weighting scheme with moderate chance. Cutoff at 2 ** 24 so that our # choice of unicode characters is uniform but the 32bit distribution is not. @@ -1399,6 +1456,7 @@ class ConjectureData: # try varying, to report if the minimal example always fails anyway. self.arg_slices: Set[Tuple[int, int]] = set() self.slice_comments: Dict[Tuple[int, int], str] = {} + self._observability_args: Dict[str, Any] = {} self.extra_information = ExtraInformation() @@ -1425,11 +1483,20 @@ class ConjectureData: if weights is not None: assert min_value is not None assert max_value is not None - assert (max_value - min_value) <= 1024 # arbitrary practical limit - - if forced is not None: - assert min_value is not None - assert max_value is not None + width = max_value - min_value + 1 + assert width <= 1024 # arbitrary practical limit + assert len(weights) == width + + if forced is not None and (min_value is None or max_value is None): + # We draw `forced=forced - shrink_towards` here internally. If that + # grows larger than a 128 bit signed integer, we can't represent it. + # Disallow this combination for now. + # Note that bit_length() = 128 -> signed bit size = 129. + assert (forced - shrink_towards).bit_length() < 128 + if forced is not None and min_value is not None: + assert min_value <= forced + if forced is not None and max_value is not None: + assert forced <= max_value return self.provider.draw_integer( min_value=min_value, @@ -1449,14 +1516,23 @@ class ConjectureData: # TODO: consider supporting these float widths at the IR level in the # future. # width: Literal[16, 32, 64] = 64, - # exclude_min and exclude_max handled higher up + # exclude_min and exclude_max handled higher up, + forced: Optional[float] = None, ) -> float: assert smallest_nonzero_magnitude > 0 + assert not math.isnan(min_value) + assert not math.isnan(max_value) + + if forced is not None: + assert allow_nan or not math.isnan(forced) + assert math.isnan(forced) or min_value <= forced <= max_value + return self.provider.draw_float( min_value=min_value, max_value=max_value, allow_nan=allow_nan, smallest_nonzero_magnitude=smallest_nonzero_magnitude, + forced=forced, ) def draw_string( @@ -1465,13 +1541,16 @@ class ConjectureData: *, min_size: int = 0, max_size: Optional[int] = None, + forced: Optional[str] = None, ) -> str: + assert forced is None or min_size <= len(forced) return self.provider.draw_string( - intervals, min_size=min_size, max_size=max_size + intervals, min_size=min_size, max_size=max_size, forced=forced ) - def draw_bytes(self, size: int) -> bytes: - return self.provider.draw_bytes(size) + def draw_bytes(self, size: int, *, forced: Optional[bytes] = None) -> bytes: + assert forced is None or len(forced) == size + return self.provider.draw_bytes(size, forced=forced) def draw_boolean(self, p: float = 0.5, *, forced: Optional[bool] = None) -> bool: return self.provider.draw_boolean(p, forced=forced) diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/utils.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/utils.py index 48a5ec3f27..0712b2d8c8 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/utils.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/utils.py @@ -81,8 +81,12 @@ def check_sample( return tuple(values) -def choice(data: "ConjectureData", values: Sequence[T]) -> T: - return values[data.draw_integer(0, len(values) - 1)] +def choice( + data: "ConjectureData", values: Sequence[T], *, forced: Optional[T] = None +) -> T: + forced_i = None if forced is None else values.index(forced) + i = data.draw_integer(0, len(values) - 1, forced=forced_i) + return values[i] class Sampler: @@ -171,14 +175,25 @@ class Sampler: self.table.append((base, alternate, alternate_chance)) self.table.sort() - def sample(self, data: "ConjectureData") -> int: + def sample(self, data: "ConjectureData", forced: Optional[int] = None) -> int: data.start_example(SAMPLE_IN_SAMPLER_LABEL) - base, alternate, alternate_chance = choice(data, self.table) - use_alternate = data.draw_boolean(alternate_chance) + forced_choice = ( # pragma: no branch # https://github.com/nedbat/coveragepy/issues/1617 + None + if forced is None + else next((b, a, a_c) for (b, a, a_c) in self.table if forced in (b, a)) + ) + base, alternate, alternate_chance = choice( + data, self.table, forced=forced_choice + ) + use_alternate = data.draw_boolean( + alternate_chance, forced=None if forced is None else forced == alternate + ) data.stop_example() if use_alternate: + assert forced is None or alternate == forced, (forced, alternate) return alternate else: + assert forced is None or base == forced, (forced, base) return base @@ -204,11 +219,15 @@ class many: min_size: int, max_size: Union[int, float], average_size: Union[int, float], + *, + forced: Optional[int] = None, ) -> None: assert 0 <= min_size <= average_size <= max_size + assert forced is None or min_size <= forced <= max_size self.min_size = min_size self.max_size = max_size self.data = data + self.forced_size = forced self.p_continue = _calc_p_continue(average_size - min_size, max_size - min_size) self.count = 0 self.rejections = 0 @@ -227,15 +246,22 @@ class many: self.data.start_example(ONE_FROM_MANY_LABEL) if self.min_size == self.max_size: + # if we have to hit an exact size, draw unconditionally until that + # point, and no further. should_continue = self.count < self.min_size else: forced_result = None if self.force_stop: + # if our size is forced, we can't reject in a way that would + # cause us to differ from the forced size. + assert self.forced_size is None or self.count == self.forced_size forced_result = False elif self.count < self.min_size: forced_result = True elif self.count >= self.max_size: forced_result = False + elif self.forced_size is not None: + forced_result = self.count < self.forced_size should_continue = self.data.draw_boolean( self.p_continue, forced=forced_result ) diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/intervalsets.py b/contrib/python/hypothesis/py3/hypothesis/internal/intervalsets.py index c5e82f6b22..4a143c80b8 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/intervalsets.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/intervalsets.py @@ -8,6 +8,8 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. +from typing import Union + class IntervalSet: @classmethod @@ -61,17 +63,16 @@ class IntervalSet: assert r <= v return r - def __contains__(self, elem): + def __contains__(self, elem: Union[str, int]) -> bool: if isinstance(elem, str): elem = ord(elem) - assert isinstance(elem, int) assert 0 <= elem <= 0x10FFFF return any(start <= elem <= end for start, end in self.intervals) def __repr__(self): return f"IntervalSet({self.intervals!r})" - def index(self, value): + def index(self, value: int) -> int: for offset, (u, v) in zip(self.offsets, self.intervals): if u == value: return offset @@ -81,7 +82,7 @@ class IntervalSet: return offset + (value - u) raise ValueError(f"{value} is not in list") - def index_above(self, value): + def index_above(self, value: int) -> int: for offset, (u, v) in zip(self.offsets, self.intervals): if u >= value: return offset @@ -254,3 +255,24 @@ class IntervalSet: assert 0 <= i <= self._idx_of_Z return chr(self[i]) + + def index_from_char_in_shrink_order(self, c: str) -> int: + """ + Inverse of char_in_shrink_order. + """ + assert len(c) == 1 + i = self.index(ord(c)) + + if i <= self._idx_of_Z: + n = self._idx_of_Z - self._idx_of_zero + # Rewrite [zero_point, Z_point] to [0, n]. + if self._idx_of_zero <= i <= self._idx_of_Z: + i -= self._idx_of_zero + assert 0 <= i <= n + # Rewrite [zero_point, 0] to [n + 1, Z_point]. + else: + i = self._idx_of_zero - i + n + assert n + 1 <= i <= self._idx_of_Z + assert 0 <= i <= self._idx_of_Z + + return i diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/observability.py b/contrib/python/hypothesis/py3/hypothesis/internal/observability.py new file mode 100644 index 0000000000..6752868737 --- /dev/null +++ b/contrib/python/hypothesis/py3/hypothesis/internal/observability.py @@ -0,0 +1,92 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +"""Observability tools to spit out analysis-ready tables, one row per test case.""" + +import json +import os +from datetime import date, timedelta +from typing import Callable, Dict, List, Optional + +from hypothesis.configuration import storage_directory +from hypothesis.internal.conjecture.data import ConjectureData, Status + +TESTCASE_CALLBACKS: List[Callable[[dict], None]] = [] + + +def deliver_json_blob(value: dict) -> None: + for callback in TESTCASE_CALLBACKS: + callback(value) + + +def make_testcase( + *, + start_timestamp: float, + test_name_or_nodeid: str, + data: ConjectureData, + how_generated: str = "unknown", + string_repr: str = "<unknown>", + arguments: Optional[dict] = None, + metadata: Optional[dict] = None, + coverage: Optional[Dict[str, List[int]]] = None, +) -> dict: + if data.interesting_origin: + status_reason = str(data.interesting_origin) + else: + status_reason = str(data.events.pop("invalid because", "")) + + return { + "type": "test_case", + "run_start": start_timestamp, + "property": test_name_or_nodeid, + "status": { + Status.OVERRUN: "gave_up", + Status.INVALID: "gave_up", + Status.VALID: "passed", + Status.INTERESTING: "failed", + }[data.status], + "status_reason": status_reason, + "representation": string_repr, + "arguments": arguments or {}, + "how_generated": how_generated, # iid, mutation, etc. + "features": { + **{ + f"target:{k}".strip(":"): v for k, v in data.target_observations.items() + }, + **data.events, + }, + "metadata": { + **(metadata or {}), + "traceback": getattr(data.extra_information, "_expected_traceback", None), + }, + "coverage": coverage, + } + + +_WROTE_TO = set() + + +def _deliver_to_file(value): # pragma: no cover + kind = "testcases" if value["type"] == "test_case" else "info" + fname = storage_directory("observed", f"{date.today().isoformat()}_{kind}.jsonl") + fname.parent.mkdir(exist_ok=True) + _WROTE_TO.add(fname) + with fname.open(mode="a") as f: + f.write(json.dumps(value) + "\n") + + +if "HYPOTHESIS_EXPERIMENTAL_OBSERVABILITY" in os.environ: # pragma: no cover + TESTCASE_CALLBACKS.append(_deliver_to_file) + + # Remove files more than a week old, to cap the size on disk + max_age = (date.today() - timedelta(days=8)).isoformat() + for f in storage_directory("observed").glob("*.jsonl"): + if f.stem < max_age: # pragma: no branch + f.unlink(missing_ok=True) diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/reflection.py b/contrib/python/hypothesis/py3/hypothesis/internal/reflection.py index 31123b61ec..2f0480c987 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/reflection.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/reflection.py @@ -306,8 +306,12 @@ def extract_lambda_source(f): This is not a good function and I am sorry for it. Forgive me my sins, oh lord """ + # You might be wondering how a lambda can have a return-type annotation? + # The answer is that we add this at runtime, in new_given_signature(), + # and we do support strange choices as applying @given() to a lambda. sig = inspect.signature(f) - assert sig.return_annotation is inspect.Parameter.empty + assert sig.return_annotation in (inspect.Parameter.empty, None), sig + if sig.parameters: if_confused = f"lambda {str(sig)[1:-1]}: <unknown>" else: diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/scrutineer.py b/contrib/python/hypothesis/py3/hypothesis/internal/scrutineer.py index 5b372ffd65..39352844b4 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/scrutineer.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/scrutineer.py @@ -8,16 +8,29 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. +import functools +import os +import subprocess import sys import types from collections import defaultdict from functools import lru_cache, reduce from os import sep from pathlib import Path +from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple from hypothesis._settings import Phase, Verbosity from hypothesis.internal.escalation import is_hypothesis_file +if TYPE_CHECKING: + from typing import TypeAlias +else: + TypeAlias = object + +Location: TypeAlias = Tuple[str, int] +Branch: TypeAlias = Tuple[Optional[Location], Location] +Trace: TypeAlias = Set[Branch] + @lru_cache(maxsize=None) def should_trace_file(fname): @@ -41,7 +54,7 @@ class Tracer: __slots__ = ("branches", "_previous_location") def __init__(self): - self.branches = set() + self.branches: Trace = set() self._previous_location = None def trace(self, frame, event, arg): @@ -179,3 +192,50 @@ def explanatory_lines(traces, settings): explanations = get_explaining_locations(traces) max_lines = 5 if settings.verbosity <= Verbosity.normal else float("inf") return make_report(explanations, cap_lines_at=max_lines) + + +# beware the code below; we're using some heuristics to make a nicer report... + + +@functools.lru_cache +def _get_git_repo_root() -> Path: + try: + where = subprocess.run( + ["git", "rev-parse", "--show-toplevel"], + check=True, + timeout=10, + capture_output=True, + text=True, + encoding="utf-8", + ).stdout.strip() + except Exception: # pragma: no cover + return Path().absolute().parents[-1] + else: + return Path(where) + + +if sys.version_info[:2] <= (3, 8): + + def is_relative_to(self, other): + return other == self or other in self.parents + +else: + is_relative_to = Path.is_relative_to + + +def tractable_coverage_report(trace: Trace) -> Dict[str, List[int]]: + """Report a simple coverage map which is (probably most) of the user's code.""" + coverage: dict = {} + t = dict(trace) + for file, line in set(t.keys()).union(t.values()) - {None}: # type: ignore + # On Python <= 3.11, we can use coverage.py xor Hypothesis' tracer, + # so the trace will be empty and this line never run under coverage. + coverage.setdefault(file, set()).add(line) # pragma: no cover + stdlib_fragment = f"{os.sep}lib{os.sep}python3.{sys.version_info.minor}{os.sep}" + return { + k: sorted(v) + for k, v in coverage.items() + if stdlib_fragment not in k + and is_relative_to(p := Path(k), _get_git_repo_root()) + and "site-packages" not in p.parts + } diff --git a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/attrs.py b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/attrs.py index d4f56a1f3a..3b08f3a43d 100644 --- a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/attrs.py +++ b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/attrs.py @@ -21,12 +21,38 @@ from hypothesis.strategies._internal.types import is_a_type, type_sorting_key from hypothesis.utils.conventions import infer +def get_attribute_by_alias(fields, alias, *, target=None): + """ + Get an attrs attribute by its alias, rather than its name (compare + getattr(fields, name)). + + ``target`` is used only to provide a nicer error message, and can be safely + omitted. + """ + # attrs supports defining an alias for a field, which is the name used when + # defining __init__. The init args are what we pull from when determining + # what parameters we need to supply to the class, so it's what we need to + # match against as well, rather than the class-level attribute name. + matched_fields = [f for f in fields if f.alias == alias] + if not matched_fields: + raise TypeError( + f"Unexpected keyword argument {alias} for attrs class" + f"{f' {target}' if target else ''}. Expected one of " + f"{[f.name for f in fields]}" + ) + # alias is used as an arg in __init__, so it is guaranteed to be unique, if + # it exists. + assert len(matched_fields) == 1 + return matched_fields[0] + + def from_attrs(target, args, kwargs, to_infer): """An internal version of builds(), specialised for Attrs classes.""" fields = attr.fields(target) kwargs = {k: v for k, v in kwargs.items() if v is not infer} for name in to_infer: - kwargs[name] = from_attrs_attribute(getattr(fields, name), target) + attrib = get_attribute_by_alias(fields, name, target=target) + kwargs[name] = from_attrs_attribute(attrib, target) # We might make this strategy more efficient if we added a layer here that # retries drawing if validation fails, for improved composition. # The treatment of timezones in datetimes() provides a precedent. diff --git a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/core.py b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/core.py index b8c5601587..a5a862635a 100644 --- a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/core.py +++ b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/core.py @@ -129,7 +129,11 @@ from hypothesis.strategies._internal.strings import ( OneCharStringStrategy, TextStrategy, ) -from hypothesis.strategies._internal.utils import cacheable, defines_strategy +from hypothesis.strategies._internal.utils import ( + cacheable, + defines_strategy, + to_jsonable, +) from hypothesis.utils.conventions import not_set from hypothesis.vendor.pretty import RepresentationPrinter @@ -2098,8 +2102,9 @@ class DataObject: result = self.conjecture_data.draw(strategy) self.count += 1 printer = RepresentationPrinter(context=current_build_context()) - printer.text(f"Draw {self.count}") - printer.text(": " if label is None else f" ({label}): ") + desc = f"Draw {self.count}{'' if label is None else f' ({label})'}: " + self.conjecture_data._observability_args[desc] = to_jsonable(result) + printer.text(desc) printer.pretty(result) note(printer.getvalue()) return result diff --git a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/utils.py b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/utils.py index b0e0746314..995b179b40 100644 --- a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/utils.py +++ b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/utils.py @@ -8,13 +8,17 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. +import sys import threading from inspect import signature from typing import TYPE_CHECKING, Callable, Dict +import attr + from hypothesis.internal.cache import LRUReusedCache from hypothesis.internal.floats import float_to_int from hypothesis.internal.reflection import proxies +from hypothesis.vendor.pretty import pretty if TYPE_CHECKING: from hypothesis.strategies._internal.strategies import SearchStrategy, T @@ -144,3 +148,40 @@ def defines_strategy( return accept return decorator + + +def to_jsonable(obj: object) -> object: + """Recursively convert an object to json-encodable form. + + This is not intended to round-trip, but rather provide an analysis-ready + format for observability. To avoid side affects, we pretty-print all but + known types. + """ + if isinstance(obj, (str, int, float, bool, type(None))): + if isinstance(obj, int) and abs(obj) >= 2**63: + return float(obj) + return obj + if isinstance(obj, (list, tuple, set, frozenset)): + if isinstance(obj, tuple) and hasattr(obj, "_asdict"): + return to_jsonable(obj._asdict()) # treat namedtuples as dicts + return [to_jsonable(x) for x in obj] + if isinstance(obj, dict): + return { + k if isinstance(k, str) else pretty(k): to_jsonable(v) + for k, v in obj.items() + } + + # Special handling for dataclasses, attrs, and pydantic classes + if ( + (dcs := sys.modules.get("dataclasses")) + and dcs.is_dataclass(obj) + and not isinstance(obj, type) + ): + return to_jsonable(dcs.asdict(obj)) + if attr.has(type(obj)): + return to_jsonable(attr.asdict(obj, recurse=False)) # type: ignore + if (pyd := sys.modules.get("pydantic")) and isinstance(obj, pyd.BaseModel): + return to_jsonable(obj.model_dump()) + + # If all else fails, we'll just pretty-print as a string. + return pretty(obj) diff --git a/contrib/python/hypothesis/py3/hypothesis/version.py b/contrib/python/hypothesis/py3/hypothesis/version.py index 9b0a5e2eb1..8357097704 100644 --- a/contrib/python/hypothesis/py3/hypothesis/version.py +++ b/contrib/python/hypothesis/py3/hypothesis/version.py @@ -8,5 +8,5 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. -__version_info__ = (6, 91, 0) +__version_info__ = (6, 92, 0) __version__ = ".".join(map(str, __version_info__)) diff --git a/contrib/python/hypothesis/py3/ya.make b/contrib/python/hypothesis/py3/ya.make index 100509362e..456dabd865 100644 --- a/contrib/python/hypothesis/py3/ya.make +++ b/contrib/python/hypothesis/py3/ya.make @@ -2,7 +2,7 @@ PY3_LIBRARY() -VERSION(6.91.0) +VERSION(6.92.0) LICENSE(MPL-2.0) @@ -82,6 +82,7 @@ PY_SRCS( hypothesis/internal/floats.py hypothesis/internal/healthcheck.py hypothesis/internal/intervalsets.py + hypothesis/internal/observability.py hypothesis/internal/reflection.py hypothesis/internal/scrutineer.py hypothesis/internal/validation.py diff --git a/contrib/python/jsonschema/py3/jsonschema/tests/test_validators.py b/contrib/python/jsonschema/py3/jsonschema/tests/test_validators.py index 07be4f08bc..7088b0b824 100644 --- a/contrib/python/jsonschema/py3/jsonschema/tests/test_validators.py +++ b/contrib/python/jsonschema/py3/jsonschema/tests/test_validators.py @@ -1525,7 +1525,7 @@ class TestValidate(SynchronousTestCase): def test_validation_error_message(self): with self.assertRaises(exceptions.ValidationError) as e: validators.validate(12, {"type": "string"}) - self.assertRegexpMatches( + self.assertRegex( str(e.exception), "(?s)Failed validating u?'.*' in schema.*On instance", ) @@ -1533,7 +1533,7 @@ class TestValidate(SynchronousTestCase): def test_schema_error_message(self): with self.assertRaises(exceptions.SchemaError) as e: validators.validate(12, {"type": 12}) - self.assertRegexpMatches( + self.assertRegex( str(e.exception), "(?s)Failed validating u?'.*' in metaschema.*On schema", ) diff --git a/contrib/python/multidict/multidict/_multidict.c b/contrib/python/multidict/multidict/_multidict.c index 1ba79df304..cbc6179932 100644 --- a/contrib/python/multidict/multidict/_multidict.c +++ b/contrib/python/multidict/multidict/_multidict.c @@ -455,7 +455,11 @@ multidict_getall(MultiDictObject *self, PyObject *const *args, return NULL; } #else - static _PyArg_Parser _parser = {NULL, _keywords, "getall", 0}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "getall", + .kwtuple = NULL, + }; PyObject *argsbuf[2]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, @@ -500,7 +504,11 @@ multidict_getone(MultiDictObject *self, PyObject *const *args, return NULL; } #else - static _PyArg_Parser _parser = {NULL, _keywords, "getone", 0}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "getone", + .kwtuple = NULL, + }; PyObject *argsbuf[2]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, @@ -535,7 +543,11 @@ multidict_get(MultiDictObject *self, PyObject *const *args, return NULL; } #else - static _PyArg_Parser _parser = {NULL, _keywords, "get", 0}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "get", + .kwtuple = NULL, + }; PyObject *argsbuf[2]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, @@ -777,7 +789,11 @@ multidict_add(MultiDictObject *self, PyObject *const *args, return NULL; } #else - static _PyArg_Parser _parser = {NULL, _keywords, "add", 0}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "add", + .kwtuple = NULL, + }; PyObject *argsbuf[2]; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, @@ -836,7 +852,11 @@ multidict_setdefault(MultiDictObject *self, PyObject *const *args, return NULL; } #else - static _PyArg_Parser _parser = {NULL, _keywords, "setdefault", 0}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "setdefault", + .kwtuple = NULL, + }; PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; @@ -872,7 +892,11 @@ multidict_popone(MultiDictObject *self, PyObject *const *args, return NULL; } #else - static _PyArg_Parser _parser = {NULL, _keywords, "popone", 0}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "popone", + .kwtuple = NULL, + }; PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; @@ -919,7 +943,11 @@ multidict_pop(MultiDictObject *self, PyObject *const *args, return NULL; } #else - static _PyArg_Parser _parser = {NULL, _keywords, "pop", 0}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "pop", + .kwtuple = NULL, + }; PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; @@ -967,7 +995,11 @@ multidict_popall(MultiDictObject *self, PyObject *const *args, return NULL; } #else - static _PyArg_Parser _parser = {NULL, _keywords, "popall", 0}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "popall", + .kwtuple = NULL, + }; PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; diff --git a/contrib/python/multidict/tests/test_multidict.py b/contrib/python/multidict/tests/test_multidict.py index 706fc93e75..e2ad71ee34 100644 --- a/contrib/python/multidict/tests/test_multidict.py +++ b/contrib/python/multidict/tests/test_multidict.py @@ -199,6 +199,7 @@ class BaseMultiDictTest: d.getone("key2") assert d.getone("key2", "default") == "default" + assert d.getone(key="key2", default="default") == "default" def test__iter__( self, @@ -532,6 +533,8 @@ class TestMultiDict(BaseMultiDictTest): def test_get(self, cls: Type[MultiDict[int]]) -> None: d = cls([("a", 1), ("a", 2)]) assert d["a"] == 1 + assert d.get("a") == 1 + assert d.get("z", 3) == 3 def test_items__repr__(self, cls: Type[MultiDict[str]]) -> None: d = cls([("key", "value1")], key="value2") diff --git a/contrib/python/multidict/tests/test_mutable_multidict.py b/contrib/python/multidict/tests/test_mutable_multidict.py index 3d4d16ac03..3f66e279ad 100644 --- a/contrib/python/multidict/tests/test_mutable_multidict.py +++ b/contrib/python/multidict/tests/test_mutable_multidict.py @@ -44,6 +44,7 @@ class TestMutableMultiDict: default = object() assert d.getall("some_key", default) is default + assert d.getall(key="some_key", default=default) is default def test_add(self, cls): d = cls() @@ -124,7 +125,7 @@ class TestMutableMultiDict: def test_set_default(self, cls): d = cls([("key", "one"), ("key", "two")], foo="bar") assert "one" == d.setdefault("key", "three") - assert "three" == d.setdefault("otherkey", "three") + assert "three" == d.setdefault(key="otherkey", default="three") assert "otherkey" in d assert "three" == d["otherkey"] @@ -163,6 +164,7 @@ class TestMutableMultiDict: d = cls(other="val") assert "default" == d.pop("key", "default") + assert "default" == d.pop(key="key", default="default") assert "other" in d def test_pop_raises(self, cls): @@ -229,6 +231,7 @@ class TestMutableMultiDict: def test_popall_default(self, cls): d = cls() assert "val" == d.popall("key", "val") + assert "val" == d.popall(key="key", default="val") def test_popall_key_error(self, cls): d = cls() diff --git a/contrib/python/multidict/tests/test_version.py b/contrib/python/multidict/tests/test_version.py index 067d6210ce..9b25c0e72d 100644 --- a/contrib/python/multidict/tests/test_version.py +++ b/contrib/python/multidict/tests/test_version.py @@ -95,6 +95,8 @@ class VersionMixin: v = self.getver(m) m.popone("key2", "default") assert self.getver(m) == v + m.popone(key="key2", default="default") + assert self.getver(m) == v def test_popone_key_error(self): m = self.cls() diff --git a/contrib/python/numpy/py3/numpy/tests/test_public_api.py b/contrib/python/numpy/py3/numpy/tests/test_public_api.py index 79d05407e5..3711c2f96b 100644 --- a/contrib/python/numpy/py3/numpy/tests/test_public_api.py +++ b/contrib/python/numpy/py3/numpy/tests/test_public_api.py @@ -339,6 +339,8 @@ SKIP_LIST = [ ] if sys.version_info < (3, 12): SKIP_LIST += ["numpy.distutils.msvc9compiler"] +else: + SKIP_LIST += ["numpy.distutils"] # suppressing warnings from deprecated modules diff --git a/contrib/python/olefile/py3/.dist-info/METADATA b/contrib/python/olefile/py3/.dist-info/METADATA index fe7735d0fd..2521b75751 100644 --- a/contrib/python/olefile/py3/.dist-info/METADATA +++ b/contrib/python/olefile/py3/.dist-info/METADATA @@ -1,13 +1,12 @@ Metadata-Version: 2.1 Name: olefile -Version: 0.46 +Version: 0.47 Summary: Python package to parse, read and write Microsoft OLE2 files (Structured Storage or Compound Document, Microsoft Office) Home-page: https://www.decalage.info/python/olefileio Author: Philippe Lagadec Author-email: nospam@decalage.info License: BSD Download-URL: https://github.com/decalage2/olefile/tarball/master -Description-Content-Type: UNKNOWN Platform: UNKNOWN Classifier: Development Status :: 5 - Production/Stable Classifier: Intended Audience :: Developers @@ -20,74 +19,79 @@ Classifier: Programming Language :: Python Classifier: Programming Language :: Python :: 2 Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3.4 Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: 3.6 Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 Classifier: Programming Language :: Python :: Implementation :: CPython Classifier: Programming Language :: Python :: Implementation :: PyPy Classifier: Topic :: Software Development :: Libraries :: Python Modules -Requires-Python: >=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.* +Requires-Python: >=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.* +Description-Content-Type: text/markdown +Provides-Extra: tests +Requires-Dist: pytest ; extra == 'tests' +Requires-Dist: pytest-cov ; extra == 'tests' olefile ======= -|Build Status TravisCI| |Build Status AppVeyor| |Coverage Status| -|Documentation Status| |PyPI| |Can I Use Python 3?| |Say Thanks!| - -`olefile <https://www.decalage.info/olefile>`__ is a Python package to -parse, read and write `Microsoft OLE2 -files <http://en.wikipedia.org/wiki/Compound_File_Binary_Format>`__ -(also called Structured Storage, Compound File Binary Format or Compound -Document File Format), such as Microsoft Office 97-2003 documents, -vbaProject.bin in MS Office 2007+ files, Image Composer and FlashPix -files, Outlook messages, StickyNotes, several Microscopy file formats, -McAfee antivirus quarantine files, etc. - -**Quick links:** `Home page <https://www.decalage.info/olefile>`__ - -`Download/Install <http://olefile.readthedocs.io/en/latest/Install.html>`__ -- `Documentation <http://olefile.readthedocs.io/en/latest>`__ - `Report -Issues/Suggestions/Questions <https://github.com/decalage2/olefile/issues>`__ -- `Contact the author <https://www.decalage.info/contact>`__ - -`Repository <https://github.com/decalage2/olefile>`__ - `Updates on -Twitter <https://twitter.com/decalage2>`__ +[![Test](https://github.com/decalage2/olefile/actions/workflows/test.yml/badge.svg)](https://github.com/decalage2/olefile/actions) +[![Build Status AppVeyor](https://ci.appveyor.com/api/projects/status/github/decalage2/olefile?svg=true)](https://ci.appveyor.com/project/decalage2/olefile) +[![codecov](https://codecov.io/gh/decalage2/olefile/branch/main/graph/badge.svg)](https://codecov.io/gh/decalage2/olefile) +[![Documentation Status](http://readthedocs.org/projects/olefile/badge/?version=latest)](http://olefile.readthedocs.io/en/latest/?badge=latest) +[![PyPI](https://img.shields.io/pypi/v/olefile.svg)](https://pypi.org/project/olefile/) +[![Say Thanks!](https://img.shields.io/badge/Say%20Thanks-!-1EAEDB.svg)](https://saythanks.io/to/decalage2) + +[olefile](https://www.decalage.info/olefile) is a Python package to parse, read and write +[Microsoft OLE2 files](http://en.wikipedia.org/wiki/Compound_File_Binary_Format) +(also called Structured Storage, Compound File Binary Format or Compound Document File Format), +such as Microsoft Office 97-2003 documents, vbaProject.bin in MS Office 2007+ files, Image Composer +and FlashPix files, Outlook messages, StickyNotes, several Microscopy file formats, McAfee antivirus quarantine files, +etc. + + +**Quick links:** [Home page](https://www.decalage.info/olefile) - +[Download/Install](http://olefile.readthedocs.io/en/latest/Install.html) - +[Documentation](http://olefile.readthedocs.io/en/latest) - +[Report Issues/Suggestions/Questions](https://github.com/decalage2/olefile/issues) - +[Contact the author](https://www.decalage.info/contact) - +[Repository](https://github.com/decalage2/olefile) - +[Updates on Twitter](https://twitter.com/decalage2) + News ---- -Follow all updates and news on Twitter: https://twitter.com/decalage2 - -- **2018-09-09 v0.46**: OleFileIO can now be used as a context manager - (with...as), to close the file automatically (see - `doc <https://olefile.readthedocs.io/en/latest/Howto.html#open-an-ole-file-from-disk>`__). - Improved handling of malformed files, fixed several bugs. -- 2018-01-24 v0.45: olefile can now overwrite streams of any size, - improved handling of malformed files, fixed several - `bugs <https://github.com/decalage2/olefile/milestone/4?closed=1>`__, - end of support for Python 2.6 and 3.3. -- 2017-01-06 v0.44: several bugfixes, removed support for Python 2.5 - (olefile2), added support for incomplete streams and incorrect - directory entries (to read malformed documents), added getclsid, - improved `documentation <http://olefile.readthedocs.io/en/latest>`__ - with API reference. -- 2017-01-04: moved the documentation to - `ReadTheDocs <http://olefile.readthedocs.io/en/latest>`__ -- 2016-05-20: moved olefile repository to - `GitHub <https://github.com/decalage2/olefile>`__ -- 2016-02-02 v0.43: fixed issues - `#26 <https://github.com/decalage2/olefile/issues/26>`__ and - `#27 <https://github.com/decalage2/olefile/issues/27>`__, better - handling of malformed files, use python logging. -- see - `changelog <https://github.com/decalage2/olefile/blob/master/CHANGELOG.md>`__ - for more detailed information and the latest changes. +Follow all updates and news on Twitter: <https://twitter.com/decalage2> + +- **2023-12-01 v0.47**: now distributed as wheel package, added VT_VECTOR support for properties, + added get_userdefined_properties, fixed bugs in isOleFile and write_sect, improved file closure +- 2018-09-09 v0.46: OleFileIO can now be used as a context manager +(with...as), to close the file automatically +(see [doc](https://olefile.readthedocs.io/en/latest/Howto.html#open-an-ole-file-from-disk)). +Improved handling of malformed files, fixed several bugs. +- 2018-01-24 v0.45: olefile can now overwrite streams of any size, improved handling of malformed files, +fixed several [bugs](https://github.com/decalage2/olefile/milestone/4?closed=1), end of support for Python 2.6 and 3.3. +- 2017-01-06 v0.44: several bugfixes, removed support for Python 2.5 (olefile2), +added support for incomplete streams and incorrect directory entries (to read malformed documents), +added getclsid, improved [documentation](http://olefile.readthedocs.io/en/latest) with API reference. +- 2017-01-04: moved the documentation to [ReadTheDocs](http://olefile.readthedocs.io/en/latest) +- 2016-05-20: moved olefile repository to [GitHub](https://github.com/decalage2/olefile) +- 2016-02-02 v0.43: fixed issues [#26](https://github.com/decalage2/olefile/issues/26) + and [#27](https://github.com/decalage2/olefile/issues/27), + better handling of malformed files, use python logging. +- see [changelog](https://github.com/decalage2/olefile/blob/master/CHANGELOG.md) for more detailed information and +the latest changes. Download/Install ---------------- -If you have pip or setuptools installed (pip is included in Python -2.7.9+), you may simply run **pip install olefile** or **easy_install -olefile** for the first installation. +If you have pip or setuptools installed (pip is included in Python 2.7.9+), you may simply run **pip install olefile** +or **easy_install olefile** for the first installation. To update olefile, run **pip install -U olefile**. @@ -96,115 +100,86 @@ Otherwise, see http://olefile.readthedocs.io/en/latest/Install.html Features -------- -- Parse, read and write any OLE file such as Microsoft Office 97-2003 - legacy document formats (Word .doc, Excel .xls, PowerPoint .ppt, - Visio .vsd, Project .mpp), Image Composer and FlashPix files, Outlook - messages, StickyNotes, Zeiss AxioVision ZVI files, Olympus FluoView - OIB files, etc -- List all the streams and storages contained in an OLE file -- Open streams as files -- Parse and read property streams, containing metadata of the file -- Portable, pure Python module, no dependency +- Parse, read and write any OLE file such as Microsoft Office 97-2003 legacy document formats (Word .doc, Excel .xls, + PowerPoint .ppt, Visio .vsd, Project .mpp), MSI files, Image Composer and FlashPix files, Outlook messages, StickyNotes, + Zeiss AxioVision ZVI files, Olympus FluoView OIB files, etc +- List all the streams and storages contained in an OLE file +- Open streams as files +- Parse and read property streams, containing metadata of the file +- Portable, pure Python module, no dependency olefile can be used as an independent package or with PIL/Pillow. -olefile is mostly meant for developers. If you are looking for tools to -analyze OLE files or to extract data (especially for security purposes -such as malware analysis and forensics), then please also check my -`python-oletools <https://www.decalage.info/python/oletools>`__, which -are built upon olefile and provide a higher-level interface. +olefile is mostly meant for developers. If you are looking for tools to analyze OLE files or to extract data (especially +for security purposes such as malware analysis and forensics), then please also check my +[python-oletools](https://www.decalage.info/python/oletools), which are built upon olefile and provide a higher-level interface. + Documentation ------------- -Please see the `online -documentation <http://olefile.readthedocs.io/en/latest>`__ for more -information. +Please see the [online documentation](http://olefile.readthedocs.io/en/latest) for more information. + -Real-life examples ------------------- +## Real-life examples ## -A real-life example: `using OleFileIO_PL for malware analysis and -forensics <http://blog.gregback.net/2011/03/using-remnux-for-forensic-puzzle-6/>`__. +A real-life example: [using OleFileIO_PL for malware analysis and forensics](http://blog.gregback.net/2011/03/using-remnux-for-forensic-puzzle-6/). + +See also [this paper](https://computer-forensics.sans.org/community/papers/gcfa/grow-forensic-tools-taxonomy-python-libraries-helpful-forensic-analysis_6879) about python tools for forensics, which features olefile. -See also `this -paper <https://computer-forensics.sans.org/community/papers/gcfa/grow-forensic-tools-taxonomy-python-libraries-helpful-forensic-analysis_6879>`__ -about python tools for forensics, which features olefile. License ------- -olefile (formerly OleFileIO_PL) is copyright (c) 2005-2018 Philippe -Lagadec (https://www.decalage.info) +olefile (formerly OleFileIO_PL) is copyright (c) 2005-2023 Philippe Lagadec +([https://www.decalage.info](https://www.decalage.info)) All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -- Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------- - -olefile is based on source code from the OleFileIO module of the Python -Imaging Library (PIL) published by Fredrik Lundh under the following -license: +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +---------- + +olefile is based on source code from the OleFileIO module of the Python Imaging Library (PIL) published by Fredrik +Lundh under the following license: The Python Imaging Library (PIL) is -- Copyright (c) 1997-2009 by Secret Labs AB -- Copyright (c) 1995-2009 by Fredrik Lundh - -By obtaining, using, and/or copying this software and/or its associated -documentation, you agree that you have read, understood, and will comply -with the following terms and conditions: - -Permission to use, copy, modify, and distribute this software and its -associated documentation for any purpose and without fee is hereby -granted, provided that the above copyright notice appears in all copies, -and that both that copyright notice and this permission notice appear in -supporting documentation, and that the name of Secret Labs AB or the -author not be used in advertising or publicity pertaining to -distribution of the software without specific, written prior permission. - -SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO -THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND -FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR -ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER -RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF -CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN -CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - -.. |Build Status TravisCI| image:: https://travis-ci.org/decalage2/olefile.svg?branch=master - :target: https://travis-ci.org/decalage2/olefile -.. |Build Status AppVeyor| image:: https://ci.appveyor.com/api/projects/status/github/decalage2/olefile?svg=true - :target: https://ci.appveyor.com/project/decalage2/olefile -.. |Coverage Status| image:: https://coveralls.io/repos/github/decalage2/olefile/badge.svg?branch=master - :target: https://coveralls.io/github/decalage2/olefile?branch=master -.. |Documentation Status| image:: http://readthedocs.org/projects/olefile/badge/?version=latest - :target: http://olefile.readthedocs.io/en/latest/?badge=latest -.. |PyPI| image:: https://img.shields.io/pypi/v/olefile.svg - :target: https://pypi.org/project/olefile/ -.. |Can I Use Python 3?| image:: https://caniusepython3.com/project/olefile.svg - :target: https://caniusepython3.com/project/olefile -.. |Say Thanks!| image:: https://img.shields.io/badge/Say%20Thanks-!-1EAEDB.svg - :target: https://saythanks.io/to/decalage2 +- Copyright (c) 1997-2009 by Secret Labs AB +- Copyright (c) 1995-2009 by Fredrik Lundh + +By obtaining, using, and/or copying this software and/or its associated documentation, you agree that you have read, +understood, and will comply with the following terms and conditions: + +Permission to use, copy, modify, and distribute this software and its associated documentation for any purpose and +without fee is hereby granted, provided that the above copyright notice appears in all copies, and that both that +copyright notice and this permission notice appear in supporting documentation, and that the name of Secret Labs AB or +the author not be used in advertising or publicity pertaining to distribution of the software without specific, written +prior permission. + +SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR ANY SPECIAL, INDIRECT OR +CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS +SOFTWARE. diff --git a/contrib/python/olefile/py3/LICENSE.txt b/contrib/python/olefile/py3/LICENSE.txt index 81dad32622..5f53714a94 100644 --- a/contrib/python/olefile/py3/LICENSE.txt +++ b/contrib/python/olefile/py3/LICENSE.txt @@ -1,6 +1,6 @@ LICENSE for the olefile package: -olefile (formerly OleFileIO_PL) is copyright (c) 2005-2018 Philippe Lagadec +olefile (formerly OleFileIO_PL) is copyright (c) 2005-2023 Philippe Lagadec (https://www.decalage.info) All rights reserved. diff --git a/contrib/python/olefile/py3/OleFileIO_PL.py b/contrib/python/olefile/py3/OleFileIO_PL.py new file mode 100644 index 0000000000..ffa4075e3b --- /dev/null +++ b/contrib/python/olefile/py3/OleFileIO_PL.py @@ -0,0 +1,37 @@ +#!/usr/local/bin/python +# -*- coding: latin-1 -*- +""" +olefile (formerly OleFileIO_PL) + +Module to read/write Microsoft OLE2 files (also called Structured Storage or +Microsoft Compound Document File Format), such as Microsoft Office 97-2003 +documents, Image Composer and FlashPix files, Outlook messages, ... +This version is compatible with Python 2.6+ and 3.x + +Project website: http://www.decalage.info/olefile + +olefile is copyright (c) 2005-2015 Philippe Lagadec (http://www.decalage.info) + +olefile is based on the OleFileIO module from the PIL library v1.1.6 +See: http://www.pythonware.com/products/pil/index.htm + +The Python Imaging Library (PIL) is + Copyright (c) 1997-2005 by Secret Labs AB + Copyright (c) 1995-2005 by Fredrik Lundh + +See source code and LICENSE.txt for information on usage and redistribution. +""" + +# The OleFileIO_PL module is for backward compatibility + +try: + # first try to import olefile for Python 2.6+/3.x + from olefile.olefile import * + # import metadata not covered by *: + from olefile.olefile import __version__, __author__, __date__ + +except: + # if it fails, fallback to the old version olefile2 for Python 2.x: + from olefile.olefile2 import * + # import metadata not covered by *: + from olefile.olefile2 import __doc__, __version__, __author__, __date__ diff --git a/contrib/python/olefile/py3/README.md b/contrib/python/olefile/py3/README.md index 8987e1b8a2..f053489a47 100644 --- a/contrib/python/olefile/py3/README.md +++ b/contrib/python/olefile/py3/README.md @@ -1,12 +1,11 @@ olefile ======= -[![Build Status TravisCI](https://travis-ci.org/decalage2/olefile.svg?branch=master)](https://travis-ci.org/decalage2/olefile) +[![Test](https://github.com/decalage2/olefile/actions/workflows/test.yml/badge.svg)](https://github.com/decalage2/olefile/actions) [![Build Status AppVeyor](https://ci.appveyor.com/api/projects/status/github/decalage2/olefile?svg=true)](https://ci.appveyor.com/project/decalage2/olefile) -[![Coverage Status](https://coveralls.io/repos/github/decalage2/olefile/badge.svg?branch=master)](https://coveralls.io/github/decalage2/olefile?branch=master) +[![codecov](https://codecov.io/gh/decalage2/olefile/branch/main/graph/badge.svg)](https://codecov.io/gh/decalage2/olefile) [![Documentation Status](http://readthedocs.org/projects/olefile/badge/?version=latest)](http://olefile.readthedocs.io/en/latest/?badge=latest) [![PyPI](https://img.shields.io/pypi/v/olefile.svg)](https://pypi.org/project/olefile/) -[![Can I Use Python 3?](https://caniusepython3.com/project/olefile.svg)](https://caniusepython3.com/project/olefile) [![Say Thanks!](https://img.shields.io/badge/Say%20Thanks-!-1EAEDB.svg)](https://saythanks.io/to/decalage2) [olefile](https://www.decalage.info/olefile) is a Python package to parse, read and write @@ -31,7 +30,9 @@ News Follow all updates and news on Twitter: <https://twitter.com/decalage2> -- **2018-09-09 v0.46**: OleFileIO can now be used as a context manager +- **2023-12-01 v0.47**: now distributed as wheel package, added VT_VECTOR support for properties, + added get_userdefined_properties, fixed bugs in isOleFile and write_sect, improved file closure +- 2018-09-09 v0.46: OleFileIO can now be used as a context manager (with...as), to close the file automatically (see [doc](https://olefile.readthedocs.io/en/latest/Howto.html#open-an-ole-file-from-disk)). Improved handling of malformed files, fixed several bugs. @@ -62,7 +63,7 @@ Features -------- - Parse, read and write any OLE file such as Microsoft Office 97-2003 legacy document formats (Word .doc, Excel .xls, - PowerPoint .ppt, Visio .vsd, Project .mpp), Image Composer and FlashPix files, Outlook messages, StickyNotes, + PowerPoint .ppt, Visio .vsd, Project .mpp), MSI files, Image Composer and FlashPix files, Outlook messages, StickyNotes, Zeiss AxioVision ZVI files, Olympus FluoView OIB files, etc - List all the streams and storages contained in an OLE file - Open streams as files @@ -92,7 +93,7 @@ See also [this paper](https://computer-forensics.sans.org/community/papers/gcfa/ License ------- -olefile (formerly OleFileIO_PL) is copyright (c) 2005-2018 Philippe Lagadec +olefile (formerly OleFileIO_PL) is copyright (c) 2005-2023 Philippe Lagadec ([https://www.decalage.info](https://www.decalage.info)) All rights reserved. diff --git a/contrib/python/olefile/py3/README.rst b/contrib/python/olefile/py3/README.rst deleted file mode 100644 index df8fb11462..0000000000 --- a/contrib/python/olefile/py3/README.rst +++ /dev/null @@ -1,177 +0,0 @@ -olefile -======= - -|Build Status TravisCI| |Build Status AppVeyor| |Coverage Status| -|Documentation Status| |PyPI| |Can I Use Python 3?| |Say Thanks!| - -`olefile <https://www.decalage.info/olefile>`__ is a Python package to -parse, read and write `Microsoft OLE2 -files <http://en.wikipedia.org/wiki/Compound_File_Binary_Format>`__ -(also called Structured Storage, Compound File Binary Format or Compound -Document File Format), such as Microsoft Office 97-2003 documents, -vbaProject.bin in MS Office 2007+ files, Image Composer and FlashPix -files, Outlook messages, StickyNotes, several Microscopy file formats, -McAfee antivirus quarantine files, etc. - -**Quick links:** `Home page <https://www.decalage.info/olefile>`__ - -`Download/Install <http://olefile.readthedocs.io/en/latest/Install.html>`__ -- `Documentation <http://olefile.readthedocs.io/en/latest>`__ - `Report -Issues/Suggestions/Questions <https://github.com/decalage2/olefile/issues>`__ -- `Contact the author <https://www.decalage.info/contact>`__ - -`Repository <https://github.com/decalage2/olefile>`__ - `Updates on -Twitter <https://twitter.com/decalage2>`__ - -News ----- - -Follow all updates and news on Twitter: https://twitter.com/decalage2 - -- **2018-09-09 v0.46**: OleFileIO can now be used as a context manager - (with...as), to close the file automatically (see - `doc <https://olefile.readthedocs.io/en/latest/Howto.html#open-an-ole-file-from-disk>`__). - Improved handling of malformed files, fixed several bugs. -- 2018-01-24 v0.45: olefile can now overwrite streams of any size, - improved handling of malformed files, fixed several - `bugs <https://github.com/decalage2/olefile/milestone/4?closed=1>`__, - end of support for Python 2.6 and 3.3. -- 2017-01-06 v0.44: several bugfixes, removed support for Python 2.5 - (olefile2), added support for incomplete streams and incorrect - directory entries (to read malformed documents), added getclsid, - improved `documentation <http://olefile.readthedocs.io/en/latest>`__ - with API reference. -- 2017-01-04: moved the documentation to - `ReadTheDocs <http://olefile.readthedocs.io/en/latest>`__ -- 2016-05-20: moved olefile repository to - `GitHub <https://github.com/decalage2/olefile>`__ -- 2016-02-02 v0.43: fixed issues - `#26 <https://github.com/decalage2/olefile/issues/26>`__ and - `#27 <https://github.com/decalage2/olefile/issues/27>`__, better - handling of malformed files, use python logging. -- see - `changelog <https://github.com/decalage2/olefile/blob/master/CHANGELOG.md>`__ - for more detailed information and the latest changes. - -Download/Install ----------------- - -If you have pip or setuptools installed (pip is included in Python -2.7.9+), you may simply run **pip install olefile** or **easy_install -olefile** for the first installation. - -To update olefile, run **pip install -U olefile**. - -Otherwise, see http://olefile.readthedocs.io/en/latest/Install.html - -Features --------- - -- Parse, read and write any OLE file such as Microsoft Office 97-2003 - legacy document formats (Word .doc, Excel .xls, PowerPoint .ppt, - Visio .vsd, Project .mpp), Image Composer and FlashPix files, Outlook - messages, StickyNotes, Zeiss AxioVision ZVI files, Olympus FluoView - OIB files, etc -- List all the streams and storages contained in an OLE file -- Open streams as files -- Parse and read property streams, containing metadata of the file -- Portable, pure Python module, no dependency - -olefile can be used as an independent package or with PIL/Pillow. - -olefile is mostly meant for developers. If you are looking for tools to -analyze OLE files or to extract data (especially for security purposes -such as malware analysis and forensics), then please also check my -`python-oletools <https://www.decalage.info/python/oletools>`__, which -are built upon olefile and provide a higher-level interface. - -Documentation -------------- - -Please see the `online -documentation <http://olefile.readthedocs.io/en/latest>`__ for more -information. - -Real-life examples ------------------- - -A real-life example: `using OleFileIO_PL for malware analysis and -forensics <http://blog.gregback.net/2011/03/using-remnux-for-forensic-puzzle-6/>`__. - -See also `this -paper <https://computer-forensics.sans.org/community/papers/gcfa/grow-forensic-tools-taxonomy-python-libraries-helpful-forensic-analysis_6879>`__ -about python tools for forensics, which features olefile. - -License -------- - -olefile (formerly OleFileIO_PL) is copyright (c) 2005-2018 Philippe -Lagadec (https://www.decalage.info) - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -- Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------- - -olefile is based on source code from the OleFileIO module of the Python -Imaging Library (PIL) published by Fredrik Lundh under the following -license: - -The Python Imaging Library (PIL) is - -- Copyright (c) 1997-2009 by Secret Labs AB -- Copyright (c) 1995-2009 by Fredrik Lundh - -By obtaining, using, and/or copying this software and/or its associated -documentation, you agree that you have read, understood, and will comply -with the following terms and conditions: - -Permission to use, copy, modify, and distribute this software and its -associated documentation for any purpose and without fee is hereby -granted, provided that the above copyright notice appears in all copies, -and that both that copyright notice and this permission notice appear in -supporting documentation, and that the name of Secret Labs AB or the -author not be used in advertising or publicity pertaining to -distribution of the software without specific, written prior permission. - -SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO -THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND -FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR -ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER -RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF -CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN -CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - -.. |Build Status TravisCI| image:: https://travis-ci.org/decalage2/olefile.svg?branch=master - :target: https://travis-ci.org/decalage2/olefile -.. |Build Status AppVeyor| image:: https://ci.appveyor.com/api/projects/status/github/decalage2/olefile?svg=true - :target: https://ci.appveyor.com/project/decalage2/olefile -.. |Coverage Status| image:: https://coveralls.io/repos/github/decalage2/olefile/badge.svg?branch=master - :target: https://coveralls.io/github/decalage2/olefile?branch=master -.. |Documentation Status| image:: http://readthedocs.org/projects/olefile/badge/?version=latest - :target: http://olefile.readthedocs.io/en/latest/?badge=latest -.. |PyPI| image:: https://img.shields.io/pypi/v/olefile.svg - :target: https://pypi.org/project/olefile/ -.. |Can I Use Python 3?| image:: https://caniusepython3.com/project/olefile.svg - :target: https://caniusepython3.com/project/olefile -.. |Say Thanks!| image:: https://img.shields.io/badge/Say%20Thanks-!-1EAEDB.svg - :target: https://saythanks.io/to/decalage2 diff --git a/contrib/python/olefile/py3/olefile/CONTRIBUTORS.txt b/contrib/python/olefile/py3/olefile/CONTRIBUTORS.txt new file mode 100644 index 0000000000..45c56828c3 --- /dev/null +++ b/contrib/python/olefile/py3/olefile/CONTRIBUTORS.txt @@ -0,0 +1,17 @@ +CONTRIBUTORS for the olefile project +==================================== + +This is a non-exhaustive list of all the people who helped me improve the +olefile project (formerly OleFileIO_PL), in approximative chronological order. +Please contact me if I forgot to mention your name. + +A big thank you to all of them: + +- Niko Ehrenfeuchter: added support for Jython +- Niko Ehrenfeuchter, Martijn Berger and Dave Jones: helped fix 4K sector support +- Martin Panter: conversion to Python 3.x/2.6+ +- mete0r_kr: added support for file-like objects +- chuckleberryfinn: fixed bug in getproperties +- Martijn, Ben G.: bug report for 64 bits platforms +- Philippe Lagadec: main author and maintainer since 2005 +- and of course Fredrik Lundh: original author of OleFileIO from 1995 to 2005 diff --git a/contrib/python/olefile/py3/olefile/LICENSE.txt b/contrib/python/olefile/py3/olefile/LICENSE.txt new file mode 100644 index 0000000000..61ecdd96c1 --- /dev/null +++ b/contrib/python/olefile/py3/olefile/LICENSE.txt @@ -0,0 +1,56 @@ +LICENSE for the olefile package: + +olefile (formerly OleFileIO_PL) is copyright (c) 2005-2015 Philippe Lagadec +(http://www.decalage.info) + +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +---------- + +olefile is based on source code from the OleFileIO module of the Python +Imaging Library (PIL) published by Fredrik Lundh under the following license: + +The Python Imaging Library (PIL) is +- Copyright (c) 1997-2005 by Secret Labs AB +- Copyright (c) 1995-2005 by Fredrik Lundh + +By obtaining, using, and/or copying this software and/or its associated +documentation, you agree that you have read, understood, and will comply with +the following terms and conditions: + +Permission to use, copy, modify, and distribute this software and its +associated documentation for any purpose and without fee is hereby granted, +provided that the above copyright notice appears in all copies, and that both +that copyright notice and this permission notice appear in supporting +documentation, and that the name of Secret Labs AB or the author not be used +in advertising or publicity pertaining to distribution of the software without +specific, written prior permission. + +SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS +SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN +NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR ANY SPECIAL, +INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM +LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR +OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +PERFORMANCE OF THIS SOFTWARE. diff --git a/contrib/python/olefile/py3/olefile/README.rst b/contrib/python/olefile/py3/olefile/README.rst new file mode 100644 index 0000000000..4c5fbc4579 --- /dev/null +++ b/contrib/python/olefile/py3/olefile/README.rst @@ -0,0 +1,221 @@ +olefile (formerly OleFileIO\_PL) +================================ + +`olefile <http://www.decalage.info/olefile>`_ is a Python package to +parse, read and write `Microsoft OLE2 +files <http://en.wikipedia.org/wiki/Compound_File_Binary_Format>`_ (also +called Structured Storage, Compound File Binary Format or Compound +Document File Format), such as Microsoft Office 97-2003 documents, +vbaProject.bin in MS Office 2007+ files, Image Composer and FlashPix +files, Outlook messages, StickyNotes, several Microscopy file formats, +McAfee antivirus quarantine files, etc. + +**Quick links:** `Home page <http://www.decalage.info/olefile>`_ - +`Download/Install <https://bitbucket.org/decalage/olefileio_pl/wiki/Install>`_ +- `Documentation <https://bitbucket.org/decalage/olefileio_pl/wiki>`_ - +`Report +Issues/Suggestions/Questions <https://bitbucket.org/decalage/olefileio_pl/issues?status=new&status=open>`_ +- `Contact the author <http://decalage.info/contact>`_ - +`Repository <https://bitbucket.org/decalage/olefileio_pl>`_ - `Updates +on Twitter <https://twitter.com/decalage2>`_ + +News +---- + +Follow all updates and news on Twitter: https://twitter.com/decalage2 + +- **2015-01-25 v0.42**: improved handling of special characters in + stream/storage names on Python 2.x (using UTF-8 instead of Latin-1), + fixed bug in listdir with empty storages. +- 2014-11-25 v0.41: OleFileIO.open and isOleFile now support OLE files + stored in byte strings, fixed installer for python 3, added support + for Jython (Niko Ehrenfeuchter) +- 2014-10-01 v0.40: renamed OleFileIO\_PL to olefile, added initial + write support for streams >4K, updated doc and license, improved the + setup script. +- 2014-07-27 v0.31: fixed support for large files with 4K sectors, + thanks to Niko Ehrenfeuchter, Martijn Berger and Dave Jones. Added + test scripts from Pillow (by hugovk). Fixed setup for Python 3 + (Martin Panter) +- 2014-02-04 v0.30: now compatible with Python 3.x, thanks to Martin + Panter who did most of the hard work. +- 2013-07-24 v0.26: added methods to parse stream/storage timestamps, + improved listdir to include storages, fixed parsing of direntry + timestamps +- 2013-05-27 v0.25: improved metadata extraction, properties parsing + and exception handling, fixed `issue + #12 <https://bitbucket.org/decalage/olefileio_pl/issue/12/error-when-converting-timestamps-in-ole>`_ +- 2013-05-07 v0.24: new features to extract metadata (get\_metadata + method and OleMetadata class), improved getproperties to convert + timestamps to Python datetime +- 2012-10-09: published + `python-oletools <http://www.decalage.info/python/oletools>`_, a + package of analysis tools based on OleFileIO\_PL +- 2012-09-11 v0.23: added support for file-like objects, fixed `issue + #8 <https://bitbucket.org/decalage/olefileio_pl/issue/8/bug-with-file-object>`_ +- 2012-02-17 v0.22: fixed issues #7 (bug in getproperties) and #2 + (added close method) +- 2011-10-20: code hosted on bitbucket to ease contributions and bug + tracking +- 2010-01-24 v0.21: fixed support for big-endian CPUs, such as PowerPC + Macs. +- 2009-12-11 v0.20: small bugfix in OleFileIO.open when filename is not + plain str. +- 2009-12-10 v0.19: fixed support for 64 bits platforms (thanks to Ben + G. and Martijn for reporting the bug) +- see changelog in source code for more info. + +Download/Install +---------------- + +If you have pip or setuptools installed (pip is included in Python +2.7.9+), you may simply run **pip install olefile** or **easy\_install +olefile** for the first installation. + +To update olefile, run **pip install -U olefile**. + +Otherwise, see https://bitbucket.org/decalage/olefileio\_pl/wiki/Install + +Features +-------- + +- Parse, read and write any OLE file such as Microsoft Office 97-2003 + legacy document formats (Word .doc, Excel .xls, PowerPoint .ppt, + Visio .vsd, Project .mpp), Image Composer and FlashPix files, Outlook + messages, StickyNotes, Zeiss AxioVision ZVI files, Olympus FluoView + OIB files, etc +- List all the streams and storages contained in an OLE file +- Open streams as files +- Parse and read property streams, containing metadata of the file +- Portable, pure Python module, no dependency + +olefile can be used as an independent package or with PIL/Pillow. + +olefile is mostly meant for developers. If you are looking for tools to +analyze OLE files or to extract data (especially for security purposes +such as malware analysis and forensics), then please also check my +`python-oletools <http://www.decalage.info/python/oletools>`_, which are +built upon olefile and provide a higher-level interface. + +History +------- + +olefile is based on the OleFileIO module from +`PIL <http://www.pythonware.com/products/pil/index.htm>`_, the excellent +Python Imaging Library, created and maintained by Fredrik Lundh. The +olefile API is still compatible with PIL, but since 2005 I have improved +the internal implementation significantly, with new features, bugfixes +and a more robust design. From 2005 to 2014 the project was called +OleFileIO\_PL, and in 2014 I changed its name to olefile to celebrate +its 9 years and its new write features. + +As far as I know, olefile is the most complete and robust Python +implementation to read MS OLE2 files, portable on several operating +systems. (please tell me if you know other similar Python modules) + +Since 2014 olefile/OleFileIO\_PL has been integrated into +`Pillow <http://python-imaging.github.io/>`_, the friendly fork of PIL. +olefile will continue to be improved as a separate project, and new +versions will be merged into Pillow regularly. + +Main improvements over the original version of OleFileIO in PIL: +---------------------------------------------------------------- + +- Compatible with Python 3.x and 2.6+ +- Many bug fixes +- Support for files larger than 6.8MB +- Support for 64 bits platforms and big-endian CPUs +- Robust: many checks to detect malformed files +- Runtime option to choose if malformed files should be parsed or raise + exceptions +- Improved API +- Metadata extraction, stream/storage timestamps (e.g. for document + forensics) +- Can open file-like objects +- Added setup.py and install.bat to ease installation +- More convenient slash-based syntax for stream paths +- Write features + +Documentation +------------- + +Please see the `online +documentation <https://bitbucket.org/decalage/olefileio_pl/wiki>`_ for +more information, especially the `OLE +overview <https://bitbucket.org/decalage/olefileio_pl/wiki/OLE_Overview>`_ +and the `API +page <https://bitbucket.org/decalage/olefileio_pl/wiki/API>`_ which +describe how to use olefile in Python applications. A copy of the same +documentation is also provided in the doc subfolder of the olefile +package. + +Real-life examples +------------------ + +A real-life example: `using OleFileIO\_PL for malware analysis and +forensics <http://blog.gregback.net/2011/03/using-remnux-for-forensic-puzzle-6/>`_. + +See also `this +paper <https://computer-forensics.sans.org/community/papers/gcfa/grow-forensic-tools-taxonomy-python-libraries-helpful-forensic-analysis_6879>`_ +about python tools for forensics, which features olefile. + +License +------- + +olefile (formerly OleFileIO\_PL) is copyright (c) 2005-2015 Philippe +Lagadec (`http://www.decalage.info <http://www.decalage.info>`_) + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +- Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------- + +olefile is based on source code from the OleFileIO module of the Python +Imaging Library (PIL) published by Fredrik Lundh under the following +license: + +The Python Imaging Library (PIL) is + +- Copyright (c) 1997-2005 by Secret Labs AB +- Copyright (c) 1995-2005 by Fredrik Lundh + +By obtaining, using, and/or copying this software and/or its associated +documentation, you agree that you have read, understood, and will comply +with the following terms and conditions: + +Permission to use, copy, modify, and distribute this software and its +associated documentation for any purpose and without fee is hereby +granted, provided that the above copyright notice appears in all copies, +and that both that copyright notice and this permission notice appear in +supporting documentation, and that the name of Secret Labs AB or the +author not be used in advertising or publicity pertaining to +distribution of the software without specific, written prior permission. + +SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO +THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR +ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/contrib/python/olefile/py3/olefile/__init__.py b/contrib/python/olefile/py3/olefile/__init__.py index 52247f3e01..72c91df7fa 100644 --- a/contrib/python/olefile/py3/olefile/__init__.py +++ b/contrib/python/olefile/py3/olefile/__init__.py @@ -4,11 +4,11 @@ olefile (formerly OleFileIO_PL) Module to read/write Microsoft OLE2 files (also called Structured Storage or Microsoft Compound Document File Format), such as Microsoft Office 97-2003 documents, Image Composer and FlashPix files, Outlook messages, ... -This version is compatible with Python 2.7 and 3.4+ +This version is compatible with Python 2.7 and 3.5+ Project website: https://www.decalage.info/olefile -olefile is copyright (c) 2005-2018 Philippe Lagadec (https://www.decalage.info) +olefile is copyright (c) 2005-2023 Philippe Lagadec (https://www.decalage.info) olefile is based on the OleFileIO module from the PIL library v1.1.7 See: http://www.pythonware.com/products/pil/index.htm diff --git a/contrib/python/olefile/py3/olefile/olefile.py b/contrib/python/olefile/py3/olefile/olefile.py index 1bfa2a6a65..60f8963c19 100644 --- a/contrib/python/olefile/py3/olefile/olefile.py +++ b/contrib/python/olefile/py3/olefile/olefile.py @@ -4,11 +4,11 @@ olefile (formerly OleFileIO_PL) Module to read/write Microsoft OLE2 files (also called Structured Storage or Microsoft Compound Document File Format), such as Microsoft Office 97-2003 documents, Image Composer and FlashPix files, Outlook messages, ... -This version is compatible with Python 2.7 and 3.4+ +This version is compatible with Python 2.7 and 3.5+ Project website: https://www.decalage.info/olefile -olefile is copyright (c) 2005-2018 Philippe Lagadec +olefile is copyright (c) 2005-2023 Philippe Lagadec (https://www.decalage.info) olefile is based on the OleFileIO module from the PIL library v1.1.7 @@ -22,16 +22,16 @@ Copyright (c) 1995-2009 by Fredrik Lundh See source code and LICENSE.txt for information on usage and redistribution. """ -# Since OleFileIO_PL v0.45, only Python 2.7 and 3.4+ are supported +# Since olefile v0.47, only Python 2.7 and 3.5+ are supported # This import enables print() as a function rather than a keyword # (main requirement to be compatible with Python 3.x) # The comment on the line below should be printed on Python 2.5 or older: -from __future__ import print_function # This version of olefile requires Python 2.7 or 3.4+. +from __future__ import print_function # This version of olefile requires Python 2.7 or 3.5+. #--- LICENSE ------------------------------------------------------------------ -# olefile (formerly OleFileIO_PL) is copyright (c) 2005-2018 Philippe Lagadec +# olefile (formerly OleFileIO_PL) is copyright (c) 2005-2023 Philippe Lagadec # (https://www.decalage.info) # # All rights reserved. @@ -86,23 +86,24 @@ from __future__ import print_function # This version of olefile requires Pytho # OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR # PERFORMANCE OF THIS SOFTWARE. -__date__ = "2018-09-09" -__version__ = '0.46' +__date__ = "2023-12-01" +__version__ = '0.47' __author__ = "Philippe Lagadec" __all__ = ['isOleFile', 'OleFileIO', 'OleMetadata', 'enable_logging', - 'MAGIC', 'STGTY_EMPTY', 'KEEP_UNICODE_NAMES', + 'MAGIC', 'STGTY_EMPTY', 'STGTY_STREAM', 'STGTY_STORAGE', 'STGTY_ROOT', 'STGTY_PROPERTY', 'STGTY_LOCKBYTES', 'MINIMAL_OLEFILE_SIZE', 'DEFECT_UNSURE', 'DEFECT_POTENTIAL', 'DEFECT_INCORRECT', 'DEFECT_FATAL', 'DEFAULT_PATH_ENCODING', 'MAXREGSECT', 'DIFSECT', 'FATSECT', 'ENDOFCHAIN', 'FREESECT', - 'MAXREGSID', 'NOSTREAM', 'UNKNOWN_SIZE', 'WORD_CLSID' + 'MAXREGSID', 'NOSTREAM', 'UNKNOWN_SIZE', 'WORD_CLSID', + 'OleFileIONotClosed' ] import io import sys -import struct, array, os.path, datetime, logging +import struct, array, os.path, datetime, logging, warnings, traceback #=== COMPATIBILITY WORKAROUNDS ================================================ @@ -114,11 +115,11 @@ if str is not bytes: try: # on Python 2 we need xrange: iterrange = xrange -except: +except Exception: # no xrange, for Python 3 it was renamed as range: iterrange = range -#[PL] workaround to fix an issue with array item size on 64 bits systems: +# [PL] workaround to fix an issue with array item size on 64 bits systems: if array.array('L').itemsize == 4: # on 32 bits platforms, long integers in an array are 32 bits: UINT32 = 'L' @@ -142,17 +143,14 @@ else: raise ValueError('Need to fix a bug with 32 bit arrays, please contact author...') -#[PL] These workarounds were inspired from the Path module +# [PL] These workarounds were inspired from the Path module # (see http://www.jorendorff.com/articles/python/path/) +# TODO: remove the use of basestring, as it was removed in Python 3 try: basestring except NameError: basestring = str -#[PL] Experimental setting: if True, OLE filenames will be kept in Unicode -# if False (default PIL behaviour), all filenames are converted to Latin-1. -KEEP_UNICODE_NAMES = True - if sys.version_info[0] < 3: # On Python 2.x, the default encoding for path names is UTF-8: DEFAULT_PATH_ENCODING = 'utf-8' @@ -208,18 +206,18 @@ def enable_logging(): #: magic bytes that should be at the beginning of every OLE file: MAGIC = b'\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1' -#[PL]: added constants for Sector IDs (from AAF specifications) +# [PL]: added constants for Sector IDs (from AAF specifications) MAXREGSECT = 0xFFFFFFFA #: (-6) maximum SECT DIFSECT = 0xFFFFFFFC #: (-4) denotes a DIFAT sector in a FAT FATSECT = 0xFFFFFFFD #: (-3) denotes a FAT sector in a FAT ENDOFCHAIN = 0xFFFFFFFE #: (-2) end of a virtual stream chain FREESECT = 0xFFFFFFFF #: (-1) unallocated sector -#[PL]: added constants for Directory Entry IDs (from AAF specifications) +# [PL]: added constants for Directory Entry IDs (from AAF specifications) MAXREGSID = 0xFFFFFFFA #: (-6) maximum directory entry ID NOSTREAM = 0xFFFFFFFF #: (-1) unallocated directory entry -#[PL] object types in storage (from AAF specifications) +# [PL] object types in storage (from AAF specifications) STGTY_EMPTY = 0 #: empty directory entry STGTY_STORAGE = 1 #: element is a storage object STGTY_STREAM = 2 #: element is a stream object @@ -245,19 +243,19 @@ VT_STORED_OBJECT=69; VT_BLOB_OBJECT=70; VT_CF=71; VT_CLSID=72; VT_VECTOR=0x1000; # map property id to name (for debugging purposes) -# VT = {} -# for keyword, var in list(vars().items()): -# if keyword[:3] == "VT_": -# VT[var] = keyword +VT = {} +for keyword, var in list(vars().items()): + if keyword[:3] == "VT_": + VT[var] = keyword # # -------------------------------------------------------------------- # Some common document types (root.clsid fields) WORD_CLSID = "00020900-0000-0000-C000-000000000046" -#TODO: check Excel, PPT, ... +# TODO: check Excel, PPT, ... -#[PL]: Defect levels to classify parsing errors - see OleFileIO._raise_defect() +# [PL]: Defect levels to classify parsing errors - see OleFileIO._raise_defect() DEFECT_UNSURE = 10 # a case which looks weird, but not sure it's a defect DEFECT_POTENTIAL = 20 # a potential defect DEFECT_INCORRECT = 30 # an error according to specifications, but parsing @@ -266,37 +264,55 @@ DEFECT_FATAL = 40 # an error which cannot be ignored, parsing is # impossible # Minimal size of an empty OLE file, with 512-bytes sectors = 1536 bytes -# (this is used in isOleFile and OleFile.open) +# (this is used in isOleFile and OleFileIO.open) MINIMAL_OLEFILE_SIZE = 1536 - #=== FUNCTIONS =============================================================== -def isOleFile (filename): +def isOleFile (filename=None, data=None): """ Test if a file is an OLE container (according to the magic bytes in its header). .. note:: This function only checks the first 8 bytes of the file, not the rest of the OLE structure. + If data is provided, it also checks if the file size is above + the minimal size of an OLE file (1536 bytes). + If filename is provided with the path of the file on disk, the file is + open only to read the first 8 bytes, then closed. .. versionadded:: 0.16 :param filename: filename, contents or file-like object of the OLE file (string-like or file-like object) - - if filename is a string smaller than 1536 bytes, it is the path - of the file to open. (bytes or unicode string) - - if filename is a string longer than 1535 bytes, it is parsed + - if data is provided, filename is ignored. + - if filename is a unicode string, it is used as path of the file to open on disk. + - if filename is a bytes string smaller than 1536 bytes, it is used as path + of the file to open on disk. + - [deprecated] if filename is a bytes string longer than 1535 bytes, it is parsed as the content of an OLE file in memory. (bytes type only) + Note that this use case is deprecated and should be replaced by the new data parameter - if filename is a file-like object (with read and seek methods), it is parsed as-is. + :type filename: bytes, str, unicode or file-like object + + :param data: bytes string with the contents of the file to be checked, when the file is in memory + (added in olefile 0.47) + :type data: bytes - :type filename: bytes or str or unicode or file :returns: True if OLE, False otherwise. :rtype: bool """ + header = None + # first check if data is provided and large enough + if data is not None: + if len(data) >= MINIMAL_OLEFILE_SIZE: + header = data[:len(MAGIC)] + else: + # the file is too small, cannot be OLE + return False # check if filename is a string-like or file-like object: - if hasattr(filename, 'read'): + elif hasattr(filename, 'read') and hasattr(filename, 'seek'): # file-like object: use it directly header = filename.read(len(MAGIC)) # just in case, seek back to start of file: @@ -362,22 +378,34 @@ def _clsid(clsid): def filetime2datetime(filetime): - """ - convert FILETIME (64 bits int) to Python datetime.datetime - """ - # TODO: manage exception when microseconds is too large - # inspired from https://code.activestate.com/recipes/511425-filetime-to-datetime/ - _FILETIME_null_date = datetime.datetime(1601, 1, 1, 0, 0, 0) - #log.debug('timedelta days=%d' % (filetime//(10*1000000*3600*24))) - return _FILETIME_null_date + datetime.timedelta(microseconds=filetime//10) + """ + convert FILETIME (64 bits int) to Python datetime.datetime + """ + # TODO: manage exception when microseconds is too large + # inspired from https://code.activestate.com/recipes/511425-filetime-to-datetime/ + _FILETIME_null_date = datetime.datetime(1601, 1, 1, 0, 0, 0) + # log.debug('timedelta days=%d' % (filetime//(10*1000000*3600*24))) + return _FILETIME_null_date + datetime.timedelta(microseconds=filetime//10) #=== CLASSES ================================================================== +class OleFileError(IOError): + """ + Generic base error for this module. + """ + pass + +class NotOleFileError(OleFileError): + """ + Error raised when the opened file is not an OLE file. + """ + pass + class OleMetadata: """ - class to parse and store metadata from standard properties of OLE files. + Class to parse and store metadata from standard properties of OLE files. Available attributes: codepage, title, subject, author, keywords, comments, template, @@ -407,7 +435,7 @@ class OleMetadata: - https://msdn.microsoft.com/en-us/library/windows/desktop/aa380374%28v=vs.85%29.aspx - https://poi.apache.org/apidocs/org/apache/poi/hpsf/DocumentSummaryInformation.html - new in version 0.25 + New in version 0.25 """ # attribute names for SummaryInformation stream properties: @@ -482,33 +510,34 @@ class OleMetadata: self.language = None self.doc_version = None - - def parse_properties(self, olefile): + def parse_properties(self, ole_file): """ Parse standard properties of an OLE file, from the streams ``\\x05SummaryInformation`` and ``\\x05DocumentSummaryInformation``, if present. Properties are converted to strings, integers or python datetime objects. If a property is not present, its value is set to None. + + :param ole_file: OleFileIO object from which to parse properties """ # first set all attributes to None: for attrib in (self.SUMMARY_ATTRIBS + self.DOCSUM_ATTRIBS): setattr(self, attrib, None) - if olefile.exists("\x05SummaryInformation"): + if ole_file.exists("\x05SummaryInformation"): # get properties from the stream: # (converting timestamps to python datetime, except total_edit_time, # which is property #10) - props = olefile.getproperties("\x05SummaryInformation", - convert_time=True, no_conversion=[10]) + props = ole_file.getproperties("\x05SummaryInformation", + convert_time=True, no_conversion=[10]) # store them into this object's attributes: for i in range(len(self.SUMMARY_ATTRIBS)): # ids for standards properties start at 0x01, until 0x13 value = props.get(i+1, None) setattr(self, self.SUMMARY_ATTRIBS[i], value) - if olefile.exists("\x05DocumentSummaryInformation"): + if ole_file.exists("\x05DocumentSummaryInformation"): # get properties from the stream: - props = olefile.getproperties("\x05DocumentSummaryInformation", - convert_time=True) + props = ole_file.getproperties("\x05DocumentSummaryInformation", + convert_time=True) # store them into this object's attributes: for i in range(len(self.DOCSUM_ATTRIBS)): # ids for standards properties start at 0x01, until 0x13 @@ -522,14 +551,33 @@ class OleMetadata: print('Properties from SummaryInformation stream:') for prop in self.SUMMARY_ATTRIBS: value = getattr(self, prop) - print('- %s: %s' % (prop, repr(value))) + print('- {}: {}'.format(prop, repr(value))) print('Properties from DocumentSummaryInformation stream:') for prop in self.DOCSUM_ATTRIBS: value = getattr(self, prop) - print('- %s: %s' % (prop, repr(value))) + print('- {}: {}'.format(prop, repr(value))) +class OleFileIONotClosed(RuntimeWarning): + """ + Warning type used when OleFileIO is destructed but has open file handle. + """ + def __init__(self, stack_of_open=None): + super(OleFileIONotClosed, self).__init__() + self.stack_of_open = stack_of_open + + def __str__(self): + msg = 'Deleting OleFileIO instance with open file handle. ' \ + 'You should ensure that OleFileIO is never deleted ' \ + 'without calling close() first. Consider using '\ + '"with OleFileIO(...) as ole: ...".' + if self.stack_of_open: + return ''.join([msg, '\n', 'Stacktrace of open() call:\n'] + + self.stack_of_open.format()) + else: + return msg -#--- OleStream --------------------------------------------------------------- + +# --- OleStream --------------------------------------------------------------- class OleStream(io.BytesIO): """ @@ -537,7 +585,7 @@ class OleStream(io.BytesIO): Returns a read-only file object which can be used to read the contents of a OLE stream (instance of the BytesIO class). - To open a stream, use the openstream method in the OleFile class. + To open a stream, use the openstream method in the OleFileIO class. This function can be used with either ordinary streams, or ministreams, depending on the offset, sectorsize, and @@ -570,11 +618,11 @@ class OleStream(io.BytesIO): %(sect,sect,size,offset,sectorsize,len(fat), repr(fp))) self.ole = olefileio # this check is necessary, otherwise when attempting to open a stream - # from a closed OleFile, a stream of size zero is returned without + # from a closed OleFileIO, a stream of size zero is returned without # raising an exception. (see issue #81) if self.ole.fp.closed: raise OSError('Attempting to open a stream from a closed OLE File') - #[PL] To detect malformed documents with FAT loops, we compute the + # [PL] To detect malformed documents with FAT loops, we compute the # expected number of sectors in the stream: unknown_size = False if size == UNKNOWN_SIZE: @@ -599,7 +647,7 @@ class OleStream(io.BytesIO): if size == 0 and sect != ENDOFCHAIN: log.debug('size == 0 and sect != ENDOFCHAIN:') self.ole._raise_defect(DEFECT_INCORRECT, 'incorrect OLE sector index for empty stream') - #[PL] A fixed-length for loop is used instead of an undefined while + # [PL] A fixed-length for loop is used instead of an undefined while # loop to avoid DoS attacks: for i in range(nb_sectors): log.debug('Reading stream sector[%d] = %Xh' % (i, sect)) @@ -624,11 +672,11 @@ class OleStream(io.BytesIO): self.ole._raise_defect(DEFECT_INCORRECT, 'incorrect OLE FAT, sector index out of range') # stop reading here if the exception is ignored: break - #TODO: merge this code with OleFileIO.getsect() ? - #TODO: check if this works with 4K sectors: + # TODO: merge this code with OleFileIO.getsect() ? + # TODO: check if this works with 4K sectors: try: fp.seek(offset + sectorsize * sect) - except: + except Exception: log.debug('sect=%d, seek=%d, filesize=%d' % (sect, offset+sectorsize*sect, filesize)) self.ole._raise_defect(DEFECT_INCORRECT, 'OLE sector index out of range') @@ -653,7 +701,7 @@ class OleStream(io.BytesIO): self.ole._raise_defect(DEFECT_INCORRECT, 'incorrect OLE FAT, sector index out of range') # stop reading here if the exception is ignored: break - #[PL] Last sector should be a "end of chain" marker: + # [PL] Last sector should be a "end of chain" marker: # if sect != ENDOFCHAIN: # raise IOError('incorrect last sector index in OLE stream') data = b"".join(data) @@ -679,15 +727,12 @@ class OleStream(io.BytesIO): # Then the OleStream object can be used as a read-only file object. -#--- OleDirectoryEntry ------------------------------------------------------- +# --- OleDirectoryEntry ------------------------------------------------------- class OleDirectoryEntry: - """ - OLE2 Directory Entry + OLE2 Directory Entry pointing to a stream or a storage """ - #[PL] parsing code moved from OleFileIO.loaddirectory - # struct to parse directory entries: # <: little-endian byte order, standard sizes # (note: this should guarantee that Q returns a 64 bits int) @@ -711,19 +756,18 @@ class OleDirectoryEntry: DIRENTRY_SIZE = 128 assert struct.calcsize(STRUCT_DIRENTRY) == DIRENTRY_SIZE - - def __init__(self, entry, sid, olefile): + def __init__(self, entry, sid, ole_file): """ Constructor for an OleDirectoryEntry object. Parses a 128-bytes entry from the OLE Directory stream. - :param entry : string (must be 128 bytes long) - :param sid : index of this directory entry in the OLE file directory - :param olefile: OleFileIO containing this directory entry + :param bytes entry: bytes string (must be 128 bytes long) + :param int sid: index of this directory entry in the OLE file directory + :param OleFileIO ole_file: OleFileIO object containing this directory entry """ self.sid = sid - # ref to olefile is stored for future use - self.olefile = olefile + # ref to ole_file is stored for future use + self.olefile = ole_file # kids is a list of children entries, if this entry is a storage: # (list of OleDirectoryEntry objects) self.kids = [] @@ -752,26 +796,26 @@ class OleDirectoryEntry: self.sizeHigh ) = struct.unpack(OleDirectoryEntry.STRUCT_DIRENTRY, entry) if self.entry_type not in [STGTY_ROOT, STGTY_STORAGE, STGTY_STREAM, STGTY_EMPTY]: - olefile._raise_defect(DEFECT_INCORRECT, 'unhandled OLE storage type') + ole_file._raise_defect(DEFECT_INCORRECT, 'unhandled OLE storage type') # only first directory entry can (and should) be root: if self.entry_type == STGTY_ROOT and sid != 0: - olefile._raise_defect(DEFECT_INCORRECT, 'duplicate OLE root entry') + ole_file._raise_defect(DEFECT_INCORRECT, 'duplicate OLE root entry') if sid == 0 and self.entry_type != STGTY_ROOT: - olefile._raise_defect(DEFECT_INCORRECT, 'incorrect OLE root entry') - #log.debug(struct.unpack(fmt_entry, entry[:len_entry])) + ole_file._raise_defect(DEFECT_INCORRECT, 'incorrect OLE root entry') + # log.debug(struct.unpack(fmt_entry, entry[:len_entry])) # name should be at most 31 unicode characters + null character, # so 64 bytes in total (31*2 + 2): - if self.namelength>64: - olefile._raise_defect(DEFECT_INCORRECT, 'incorrect DirEntry name length >64 bytes') + if self.namelength > 64: + ole_file._raise_defect(DEFECT_INCORRECT, 'incorrect DirEntry name length >64 bytes') # if exception not raised, namelength is set to the maximum value: self.namelength = 64 # only characters without ending null char are kept: self.name_utf16 = self.name_raw[:(self.namelength-2)] - #TODO: check if the name is actually followed by a null unicode character ([MS-CFB] 2.6.1) - #TODO: check if the name does not contain forbidden characters: + # TODO: check if the name is actually followed by a null unicode character ([MS-CFB] 2.6.1) + # TODO: check if the name does not contain forbidden characters: # [MS-CFB] 2.6.1: "The following characters are illegal and MUST NOT be part of the name: '/', '\', ':', '!'." # name is converted from UTF-16LE to the path encoding specified in the OleFileIO: - self.name = olefile._decode_utf16_str(self.name_utf16) + self.name = ole_file._decode_utf16_str(self.name_utf16) log.debug('DirEntry SID=%d: %s' % (self.sid, repr(self.name))) log.debug(' - type: %d' % self.entry_type) @@ -782,11 +826,11 @@ class OleDirectoryEntry: # sizeHigh is only used for 4K sectors, it should be zero for 512 bytes # sectors, BUT apparently some implementations set it as 0xFFFFFFFF, 1 # or some other value so it cannot be raised as a defect in general: - if olefile.sectorsize == 512: + if ole_file.sectorsize == 512: if self.sizeHigh != 0 and self.sizeHigh != 0xFFFFFFFF: log.debug('sectorsize=%d, sizeLow=%d, sizeHigh=%d (%X)' % - (olefile.sectorsize, self.sizeLow, self.sizeHigh, self.sizeHigh)) - olefile._raise_defect(DEFECT_UNSURE, 'incorrect OLE stream size') + (ole_file.sectorsize, self.sizeLow, self.sizeHigh, self.sizeHigh)) + ole_file._raise_defect(DEFECT_UNSURE, 'incorrect OLE stream size') self.size = self.sizeLow else: self.size = self.sizeLow + (long(self.sizeHigh)<<32) @@ -796,21 +840,28 @@ class OleDirectoryEntry: # a storage should have a null size, BUT some implementations such as # Word 8 for Mac seem to allow non-null values => Potential defect: if self.entry_type == STGTY_STORAGE and self.size != 0: - olefile._raise_defect(DEFECT_POTENTIAL, 'OLE storage with size>0') + ole_file._raise_defect(DEFECT_POTENTIAL, 'OLE storage with size>0') # check if stream is not already referenced elsewhere: self.is_minifat = False if self.entry_type in (STGTY_ROOT, STGTY_STREAM) and self.size>0: - if self.size < olefile.minisectorcutoff \ + if self.size < ole_file.minisectorcutoff \ and self.entry_type==STGTY_STREAM: # only streams can be in MiniFAT # ministream object self.is_minifat = True else: self.is_minifat = False - olefile._check_duplicate_stream(self.isectStart, self.is_minifat) + ole_file._check_duplicate_stream(self.isectStart, self.is_minifat) self.sect_chain = None + def build_sect_chain(self, ole_file): + """ + Build the sector chain for a stream (from the FAT or the MiniFAT) - def build_sect_chain(self, olefile): + :param OleFileIO ole_file: OleFileIO object containing this directory entry + :return: nothing + """ + # TODO: seems to be used only from _write_mini_stream, is it useful? + # TODO: use self.olefile instead of ole_file if self.sect_chain: return if self.entry_type not in (STGTY_ROOT, STGTY_STREAM) or self.size == 0: @@ -818,16 +869,16 @@ class OleDirectoryEntry: self.sect_chain = list() - if self.is_minifat and not olefile.minifat: - olefile.loadminifat() + if self.is_minifat and not ole_file.minifat: + ole_file.loadminifat() next_sect = self.isectStart while next_sect != ENDOFCHAIN: self.sect_chain.append(next_sect) if self.is_minifat: - next_sect = olefile.minifat[next_sect] + next_sect = ole_file.minifat[next_sect] else: - next_sect = olefile.fat[next_sect] + next_sect = ole_file.fat[next_sect] def build_storage_tree(self): """ @@ -852,7 +903,6 @@ class OleDirectoryEntry: # (see rich comparison methods in this class) self.kids.sort() - def append_kids(self, child_sid): """ Walk through red-black tree of children of this directory entry to add @@ -862,7 +912,7 @@ class OleDirectoryEntry: first time for the root. (only used during recursion) """ log.debug('append_kids: child_sid=%d' % child_sid) - #[PL] this method was added to use simple recursion instead of a complex + # [PL] this method was added to use simple recursion instead of a complex # algorithm. # if this is not a storage or a leaf of the tree, nothing to do: if child_sid == NOSTREAM: @@ -899,7 +949,6 @@ class OleDirectoryEntry: # Afterwards build kid's own tree if it's also a storage: child.build_storage_tree() - def __eq__(self, other): "Compare entries by name" return self.name == other.name @@ -916,10 +965,9 @@ class OleDirectoryEntry: # Reflected __lt__() and __le__() will be used for __gt__() and __ge__() - #TODO: replace by the same function as MS implementation ? + # TODO: replace by the same function as MS implementation ? # (order by name length first, then case-insensitive order) - def dump(self, tab = 0): "Dump this entry, and all its subentries (for debug purposes only)" TYPES = ["(invalid)", "(storage)", "(stream)", "(lockbytes)", @@ -938,7 +986,6 @@ class OleDirectoryEntry: for kid in self.kids: kid.dump(tab + 2) - def getmtime(self): """ Return modification time of a directory entry. @@ -981,17 +1028,17 @@ class OleFileIO: level. The root entry should be omitted. For example, the following code extracts all image streams from a Microsoft Image Composer file:: - ole = OleFileIO("fan.mic") + with OleFileIO("fan.mic") as ole: - for entry in ole.listdir(): - if entry[1:2] == "Image": - fin = ole.openstream(entry) - fout = open(entry[0:1], "wb") - while True: - s = fin.read(8192) - if not s: - break - fout.write(s) + for entry in ole.listdir(): + if entry[1:2] == "Image": + fin = ole.openstream(entry) + fout = open(entry[0:1], "wb") + while True: + s = fin.read(8192) + if not s: + break + fout.write(s) You can use the viewer application provided with the Python Imaging Library to view the resulting files (which happens to be standard @@ -1010,7 +1057,7 @@ class OleFileIO: - if filename is a string longer than 1535 bytes, it is parsed as the content of an OLE file in memory. (bytes type only) - if filename is a file-like object (with read, seek and tell methods), - it is parsed as-is. + it is parsed as-is. The caller is responsible for closing it when done. :param raise_defects: minimal level for defects to be raised as exceptions. (use DEFECT_FATAL for a typical application, DEFECT_INCORRECT for a @@ -1071,8 +1118,22 @@ class OleFileIO: self.sector_shift = None self.sector_size = None self.transaction_signature_number = None + self.warn_if_not_closed = False + self._we_opened_fp = False + self._open_stack = None if filename: - self.open(filename, write_mode=write_mode) + # try opening, ensure fp is closed if that fails + try: + self.open(filename, write_mode=write_mode) + except Exception: + # caller has no chance of calling close() now + self._close(warn=False) + raise + + def __del__(self): + """Destructor, ensures all file handles are closed that we opened.""" + self._close(warn=True) + # super(OleFileIO, self).__del__() # there's no super-class destructor def __enter__(self): @@ -1080,13 +1141,13 @@ class OleFileIO: def __exit__(self, *args): - self.close() + self._close(warn=False) - def _raise_defect(self, defect_level, message, exception_type=IOError): + def _raise_defect(self, defect_level, message, exception_type=OleFileError): """ This method should be called for any defect found during file parsing. - It may raise an IOError exception according to the minimal level chosen + It may raise an OleFileError exception according to the minimal level chosen for the OleFileIO object. :param defect_level: defect level, possible values are: @@ -1097,7 +1158,7 @@ class OleFileIO: - DEFECT_FATAL : an error which cannot be ignored, parsing is impossible :param message: string describing the defect, used with raised exception. - :param exception_type: exception class to be raised, IOError by default + :param exception_type: exception class to be raised, OleFileError by default """ # added by [PL] if defect_level >= self._raise_defects_level: @@ -1115,9 +1176,10 @@ class OleFileIO: directory or in property streams. Return a string encoded according to the path_encoding specified for the OleFileIO object. - :param utf16_str: bytes string encoded in UTF-16 LE format - :param errors: str, see python documentation for str.decode() + :param bytes utf16_str: bytes string encoded in UTF-16 LE format + :param str errors: str, see python documentation for str.decode() :return: str, encoded according to path_encoding + :rtype: str """ unicode_str = utf16_str.decode('UTF-16LE', errors) if self.path_encoding: @@ -1140,16 +1202,16 @@ class OleFileIO: - if filename is a string longer than 1535 bytes, it is parsed as the content of an OLE file in memory. (bytes type only) - if filename is a file-like object (with read, seek and tell methods), - it is parsed as-is. + it is parsed as-is. The caller is responsible for closing it when done :param write_mode: bool, if True the file is opened in read/write mode instead of read-only by default. (ignored if filename is not a path) """ self.write_mode = write_mode - #[PL] check if filename is a string-like or file-like object: + # [PL] check if filename is a string-like or file-like object: # (it is better to check for a read() method) if hasattr(filename, 'read'): - #TODO: also check seek and tell methods? + # TODO: also check seek and tell methods? # file-like object: use it directly self.fp = filename elif isinstance(filename, bytes) and len(filename) >= MINIMAL_OLEFILE_SIZE: @@ -1160,18 +1222,20 @@ class OleFileIO: # string-like object: filename of file on disk if self.write_mode: # open file in mode 'read with update, binary' - # According to https://docs.python.org/2/library/functions.html#open + # According to https://docs.python.org/library/functions.html#open # 'w' would truncate the file, 'a' may only append on some Unixes mode = 'r+b' else: # read-only mode by default mode = 'rb' self.fp = open(filename, mode) + self._we_opened_fp = True + self._open_stack = traceback.extract_stack() # remember for warning # obtain the filesize by using seek and tell, which should work on most # file-like objects: - #TODO: do it above, using getsize with filename when possible? - #TODO: fix code to fail with clear exception when filesize cannot be obtained - filesize=0 + # TODO: do it above, using getsize with filename when possible? + # TODO: fix code to fail with clear exception when filesize cannot be obtained + filesize = 0 self.fp.seek(0, os.SEEK_END) try: filesize = self.fp.tell() @@ -1188,8 +1252,8 @@ class OleFileIO: header = self.fp.read(512) if len(header) != 512 or header[:8] != MAGIC: - log.debug('Magic = %r instead of %r' % (header[:8], MAGIC)) - self._raise_defect(DEFECT_FATAL, "not an OLE2 structured storage file") + log.debug('Magic = {!r} instead of {!r}'.format(header[:8], MAGIC)) + self._raise_defect(DEFECT_FATAL, "not an OLE2 structured storage file", NotOleFileError) # [PL] header structure according to AAF specifications: ##Header @@ -1323,13 +1387,13 @@ class OleFileIO: # (-1 because header doesn't count) self.nb_sect = ( (filesize + self.sector_size-1) // self.sector_size) - 1 log.debug( "Maximum number of sectors in the file: %d (%Xh)" % (self.nb_sect, self.nb_sect)) - #TODO: change this test, because an OLE file MAY contain other data + # TODO: change this test, because an OLE file MAY contain other data # after the last sector. # file clsid self.header_clsid = _clsid(header[8:24]) - #TODO: remove redundant attributes, and fix the code which uses them? + # TODO: remove redundant attributes, and fix the code which uses them? self.sectorsize = self.sector_size #1 << i16(header, 30) self.minisectorsize = self.mini_sector_size #1 << i16(header, 32) self.minisectorcutoff = self.mini_stream_cutoff_size # i32(header, 56) @@ -1351,13 +1415,23 @@ class OleFileIO: self.loaddirectory(self.first_dir_sector) self.minifatsect = self.first_mini_fat_sector - def close(self): """ - close the OLE file, to release the file object + close the OLE file, release the file object if we created it ourselves. + + Leaves the file handle open if it was provided by the caller. """ - self.fp.close() + self._close(warn=False) + def _close(self, warn=False): + """Implementation of close() with internal arg `warn`.""" + if self._we_opened_fp: + if warn and self.warn_if_not_closed: + # we only raise a warning if the file was not explicitly closed, + # and if the option warn_if_not_closed is enabled + warnings.warn(OleFileIONotClosed(self._open_stack)) + self.fp.close() + self._we_opened_fp = False def _check_duplicate_stream(self, first_sect, minifat=False): """ @@ -1377,14 +1451,13 @@ class OleFileIO: if first_sect in (DIFSECT,FATSECT,ENDOFCHAIN,FREESECT): return used_streams = self._used_streams_fat - #TODO: would it be more efficient using a dict or hash values, instead + # TODO: would it be more efficient using a dict or hash values, instead # of a list of long ? if first_sect in used_streams: self._raise_defect(DEFECT_INCORRECT, 'Stream referenced twice') else: used_streams.append(first_sect) - def dumpfat(self, fat, firstindex=0): """ Display a part of FAT in human-readable form for debugging purposes @@ -1421,7 +1494,6 @@ class OleFileIO: print(name, end=" ") print() - def dumpsect(self, sector, firstindex=0): """ Display a sector in a human-readable form, for debugging purposes @@ -1452,13 +1524,13 @@ class OleFileIO: convert a sector to an array of 32 bits unsigned integers, swapping bytes on big endian CPUs such as PowerPC (old Macs) """ + # TODO: make this a static function a = array.array(UINT32, sect) # if CPU is big endian, swap bytes: if sys.byteorder == 'big': a.byteswap() return a - def loadfat_sect(self, sect): """ Adds the indexes of the given sector to the FAT @@ -1494,7 +1566,6 @@ class OleFileIO: self.fat = self.fat + nextfat return isect - def loadfat(self, header): """ Load the FAT table. @@ -1507,21 +1578,21 @@ class OleFileIO: log.debug('Loading the FAT table, starting with the 1st sector after the header') sect = header[76:512] log.debug( "len(sect)=%d, so %d integers" % (len(sect), len(sect)//4) ) - #fat = [] - # [PL] FAT is an array of 32 bits unsigned ints, it's more effective + # fat = [] + # FAT is an array of 32 bits unsigned ints, it's more effective # to use an array than a list in Python. # It's initialized as empty first: self.fat = array.array(UINT32) self.loadfat_sect(sect) - #self.dumpfat(self.fat) -## for i in range(0, len(sect), 4): -## ix = i32(sect, i) -## #[PL] if ix == -2 or ix == -1: # ix == 0xFFFFFFFE or ix == 0xFFFFFFFF: -## if ix == 0xFFFFFFFE or ix == 0xFFFFFFFF: -## break -## s = self.getsect(ix) -## #fat = fat + [i32(s, i) for i in range(0, len(s), 4)] -## fat = fat + array.array(UINT32, s) + # self.dumpfat(self.fat) + # for i in range(0, len(sect), 4): + # ix = i32(sect, i) + # # [PL] if ix == -2 or ix == -1: # ix == 0xFFFFFFFE or ix == 0xFFFFFFFF: + # if ix == 0xFFFFFFFE or ix == 0xFFFFFFFF: + # break + # s = self.getsect(ix) + # # fat = fat + [i32(s, i) for i in range(0, len(s), 4)] + # fat = fat + array.array(UINT32, s) if self.num_difat_sectors != 0: log.debug('DIFAT is used, because file size > 6.8MB.') # [PL] There's a DIFAT because file is larger than 6.8MB @@ -1546,7 +1617,7 @@ class OleFileIO: isect_difat = self.first_difat_sector for i in iterrange(nb_difat): log.debug( "DIFAT block %d, sector %X" % (i, isect_difat) ) - #TODO: check if corresponding FAT SID = DIFSECT + # TODO: check if corresponding FAT SID = DIFSECT sector_difat = self.getsect(isect_difat) difat = self.sect2array(sector_difat) # Display the sector contents only if the logging level is debug: @@ -1560,10 +1631,10 @@ class OleFileIO: if isect_difat not in [ENDOFCHAIN, FREESECT]: # last DIFAT pointer value must be ENDOFCHAIN or FREESECT raise IOError('incorrect end of DIFAT') -## if len(self.fat) != self.num_fat_sectors: -## # FAT should contain num_fat_sectors blocks -## print("FAT length: %d instead of %d" % (len(self.fat), self.num_fat_sectors)) -## raise IOError('incorrect DIFAT') + # if len(self.fat) != self.num_fat_sectors: + # # FAT should contain num_fat_sectors blocks + # print("FAT length: %d instead of %d" % (len(self.fat), self.num_fat_sectors)) + # raise IOError('incorrect DIFAT') else: log.debug('No DIFAT, because file size < 6.8MB.') # since FAT is read from fixed-size sectors, it may contain more values @@ -1578,7 +1649,6 @@ class OleFileIO: log.debug('\nFAT:') self.dumpfat(self.fat) - def loadminifat(self): """ Load the MiniFAT table. @@ -1603,7 +1673,7 @@ class OleFileIO: self._raise_defect(DEFECT_INCORRECT, 'OLE MiniStream is larger than MiniFAT') # In any case, first read stream_size: s = self._open(self.minifatsect, stream_size, force_FAT=True).read() - #[PL] Old code replaced by an array: + # [PL] Old code replaced by an array: #self.minifat = [i32(s, i) for i in range(0, len(s), 4)] self.minifat = self.sect2array(s) # Then shrink the array to used size, to avoid indexes out of MiniStream: @@ -1631,11 +1701,11 @@ class OleFileIO: # [PL] the original code in PIL was wrong when sectors are 4KB instead of # 512 bytes: #self.fp.seek(512 + self.sectorsize * sect) - #[PL]: added safety checks: + # [PL]: added safety checks: #print("getsect(%X)" % sect) try: self.fp.seek(self.sectorsize * (sect+1)) - except: + except Exception: log.debug('getsect(): sect=%X, seek=%d, filesize=%d' % (sect, self.sectorsize*(sect+1), self._filesize)) self._raise_defect(DEFECT_FATAL, 'OLE sector index out of range') @@ -1646,7 +1716,6 @@ class OleFileIO: self._raise_defect(DEFECT_FATAL, 'incomplete OLE sector') return sector - def write_sect(self, sect, data, padding=b'\x00'): """ Write given sector to file on disk. @@ -1659,17 +1728,17 @@ class OleFileIO: raise TypeError("write_sect: data must be a bytes string") if not isinstance(padding, bytes) or len(padding)!=1: raise TypeError("write_sect: padding must be a bytes string of 1 char") - #TODO: we could allow padding=None for no padding at all + # TODO: we could allow padding=None for no padding at all try: self.fp.seek(self.sectorsize * (sect+1)) - except: + except Exception: log.debug('write_sect(): sect=%X, seek=%d, filesize=%d' % (sect, self.sectorsize*(sect+1), self._filesize)) self._raise_defect(DEFECT_FATAL, 'OLE sector index out of range') if len(data) < self.sectorsize: # add padding data += padding * (self.sectorsize - len(data)) - elif len(data) < self.sectorsize: + elif len(data) > self.sectorsize: raise ValueError("Data is larger than sector size") self.fp.write(data) @@ -1688,7 +1757,7 @@ class OleFileIO: try: self.fp.seek(fp_pos) - except: + except Exception: log.debug('write_mini_sect(): fp_pos=%d, filesize=%d' % (fp_pos, self._filesize)) self._raise_defect(DEFECT_FATAL, 'OLE sector index out of range') @@ -1713,21 +1782,21 @@ class OleFileIO: # (stream size is not known in advance) self.directory_fp = self._open(sect, force_FAT=True) - #[PL] to detect malformed documents and avoid DoS attacks, the maximum + # [PL] to detect malformed documents and avoid DoS attacks, the maximum # number of directory entries can be calculated: max_entries = self.directory_fp.size // 128 log.debug('loaddirectory: size=%d, max_entries=%d' % (self.directory_fp.size, max_entries)) # Create list of directory entries - #self.direntries = [] + # self.direntries = [] # We start with a list of "None" object self.direntries = [None] * max_entries -## for sid in iterrange(max_entries): -## entry = fp.read(128) -## if not entry: -## break -## self.direntries.append(OleDirectoryEntry(entry, sid, self)) + # for sid in iterrange(max_entries): + # entry = fp.read(128) + # if not entry: + # break + # self.direntries.append(OleDirectoryEntry(entry, sid, self)) # load root entry: root_entry = self._load_direntry(0) # Root entry is the first entry: @@ -1739,7 +1808,6 @@ class OleFileIO: # read and build all storage trees, starting from the root: self.root.build_storage_tree() - def _load_direntry (self, sid): """ Load a directory entry from the directory. @@ -1749,7 +1817,7 @@ class OleFileIO: :param sid: index of storage/stream in the directory. :returns: a OleDirectoryEntry object - :exception IOError: if the entry has always been referenced. + :exception OleFileError: if the entry has always been referenced. """ # check if SID is OK: if sid<0 or sid>=len(self.direntries): @@ -1765,14 +1833,12 @@ class OleFileIO: self.direntries[sid] = OleDirectoryEntry(entry, sid, self) return self.direntries[sid] - def dumpdirectory(self): """ Dump directory (for debugging only) """ self.root.dump() - def _open(self, start, size = UNKNOWN_SIZE, force_FAT=False): """ Open a stream, either in FAT or MiniFAT according to its size. @@ -1810,7 +1876,6 @@ class OleFileIO: filesize=self._filesize, olefileio=self) - def _list(self, files, prefix, node, streams=True, storages=False): """ listdir helper @@ -1839,7 +1904,6 @@ class OleFileIO: else: self._raise_defect(DEFECT_INCORRECT, 'The directory tree contains an entry which is not a stream nor a storage.') - def listdir(self, streams=True, storages=False): """ Return a list of streams and/or storages stored in this file @@ -1853,7 +1917,6 @@ class OleFileIO: self._list(files, [], self.root, streams, storages) return files - def _find(self, filename): """ Returns directory entry of given filename. (openstream helper) @@ -1885,7 +1948,6 @@ class OleFileIO: node = kid return node.sid - def openstream(self, filename): """ Open a stream as a read-only file object (BytesIO). @@ -1957,36 +2019,35 @@ class OleFileIO: nb_sectors = (size + (self.sectorsize-1)) // self.sectorsize log.debug('nb_sectors = %d' % nb_sectors) for i in range(nb_sectors): -## try: -## self.fp.seek(offset + self.sectorsize * sect) -## except: -## log.debug('sect=%d, seek=%d' % -## (sect, offset+self.sectorsize*sect)) -## raise IOError('OLE sector index out of range') + # try: + # self.fp.seek(offset + self.sectorsize * sect) + # except Exception: + # log.debug('sect=%d, seek=%d' % + # (sect, offset+self.sectorsize*sect)) + # raise IOError('OLE sector index out of range') # extract one sector from data, the last one being smaller: if i<(nb_sectors-1): data_sector = data [i*self.sectorsize : (i+1)*self.sectorsize] - #TODO: comment this if it works + # TODO: comment this if it works assert(len(data_sector)==self.sectorsize) else: data_sector = data [i*self.sectorsize:] - #TODO: comment this if it works + # TODO: comment this if it works log.debug('write_stream: size=%d sectorsize=%d data_sector=%Xh size%%sectorsize=%d' % (size, self.sectorsize, len(data_sector), size % self.sectorsize)) assert(len(data_sector) % self.sectorsize==size % self.sectorsize) self.write_sect(sect, data_sector) -## self.fp.write(data_sector) + # self.fp.write(data_sector) # jump to next sector in the FAT: try: sect = self.fat[sect] except IndexError: # [PL] if pointer is out of the FAT an exception is raised raise IOError('incorrect OLE FAT, sector index out of range') - #[PL] Last sector should be a "end of chain" marker: + # [PL] Last sector should be a "end of chain" marker: if sect != ENDOFCHAIN: raise IOError('incorrect last sector index in OLE stream') - def get_type(self, filename): """ Test if given filename exists as a stream or a storage in the OLE @@ -2003,10 +2064,9 @@ class OleFileIO: sid = self._find(filename) entry = self.direntries[sid] return entry.entry_type - except: + except Exception: return False - def getclsid(self, filename): """ Return clsid of a stream/storage. @@ -2021,7 +2081,6 @@ class OleFileIO: entry = self.direntries[sid] return entry.clsid - def getmtime(self, filename): """ Return modification time of a stream/storage. @@ -2037,7 +2096,6 @@ class OleFileIO: entry = self.direntries[sid] return entry.getmtime() - def getctime(self, filename): """ Return creation time of a stream/storage. @@ -2053,7 +2111,6 @@ class OleFileIO: entry = self.direntries[sid] return entry.getctime() - def exists(self, filename): """ Test if given filename exists as a stream or a storage in the OLE @@ -2066,10 +2123,9 @@ class OleFileIO: try: sid = self._find(filename) return True - except: + except Exception: return False - def get_size(self, filename): """ Return size of a stream in the OLE container, in bytes. @@ -2082,11 +2138,10 @@ class OleFileIO: sid = self._find(filename) entry = self.direntries[sid] if entry.entry_type != STGTY_STREAM: - #TODO: Should it return zero instead of raising an exception ? + # TODO: Should it return zero instead of raising an exception ? raise TypeError('object is not an OLE stream') return entry.size - def get_rootentry_name(self): """ Return root entry name. Should usually be 'Root Entry' or 'R' in most @@ -2094,7 +2149,6 @@ class OleFileIO: """ return self.root.name - def getproperties(self, filename, convert_time=False, no_conversion=None): """ Return properties described in substream. @@ -2114,21 +2168,16 @@ class OleFileIO: streampath = filename if not isinstance(streampath, str): streampath = '/'.join(streampath) - fp = self.openstream(filename) - data = {} - try: # header s = fp.read(28) clsid = _clsid(s[8:24]) - # format id s = fp.read(20) fmtid = _clsid(s[:16]) fp.seek(i32(s, 16)) - # get section s = b"****" + fp.read(i32(fp.read(4))-4) # number of properties: @@ -2137,14 +2186,12 @@ class OleFileIO: # catch exception while parsing property header, and only raise # a DEFECT_INCORRECT then return an empty dict, because this is not # a fatal error when parsing the whole file - msg = 'Error while parsing properties header in stream %s: %s' % ( + msg = 'Error while parsing properties header in stream {}: {}'.format( repr(streampath), exc) self._raise_defect(DEFECT_INCORRECT, msg, type(exc)) return data - # clamp num_props based on the data length num_props = min(num_props, int(len(s) / 8)) - for i in iterrange(num_props): property_id = 0 # just in case of an exception try: @@ -2152,91 +2199,10 @@ class OleFileIO: offset = i32(s, 12+i*8) property_type = i32(s, offset) - log.debug('property id=%d: type=%d offset=%X' % (property_id, property_type, offset)) - - # test for common types first (should perhaps use - # a dictionary instead?) - - if property_type == VT_I2: # 16-bit signed integer - value = i16(s, offset+4) - if value >= 32768: - value = value - 65536 - elif property_type == VT_UI2: # 2-byte unsigned integer - value = i16(s, offset+4) - elif property_type in (VT_I4, VT_INT, VT_ERROR): - # VT_I4: 32-bit signed integer - # VT_ERROR: HRESULT, similar to 32-bit signed integer, - # see https://msdn.microsoft.com/en-us/library/cc230330.aspx - value = i32(s, offset+4) - elif property_type in (VT_UI4, VT_UINT): # 4-byte unsigned integer - value = i32(s, offset+4) # FIXME - elif property_type in (VT_BSTR, VT_LPSTR): - # CodePageString, see https://msdn.microsoft.com/en-us/library/dd942354.aspx - # size is a 32 bits integer, including the null terminator, and - # possibly trailing or embedded null chars - #TODO: if codepage is unicode, the string should be converted as such - count = i32(s, offset+4) - value = s[offset+8:offset+8+count-1] - # remove all null chars: - value = value.replace(b'\x00', b'') - elif property_type == VT_BLOB: - # binary large object (BLOB) - # see https://msdn.microsoft.com/en-us/library/dd942282.aspx - count = i32(s, offset+4) - value = s[offset+8:offset+8+count] - elif property_type == VT_LPWSTR: - # UnicodeString - # see https://msdn.microsoft.com/en-us/library/dd942313.aspx - # "the string should NOT contain embedded or additional trailing - # null characters." - count = i32(s, offset+4) - value = self._decode_utf16_str(s[offset+8:offset+8+count*2]) - elif property_type == VT_FILETIME: - value = long(i32(s, offset+4)) + (long(i32(s, offset+8))<<32) - # FILETIME is a 64-bit int: "number of 100ns periods - # since Jan 1,1601". - if convert_time and property_id not in no_conversion: - log.debug('Converting property #%d to python datetime, value=%d=%fs' - %(property_id, value, float(value)/10000000)) - # convert FILETIME to Python datetime.datetime - # inspired from https://code.activestate.com/recipes/511425-filetime-to-datetime/ - _FILETIME_null_date = datetime.datetime(1601, 1, 1, 0, 0, 0) - log.debug('timedelta days=%d' % (value//(10*1000000*3600*24))) - value = _FILETIME_null_date + datetime.timedelta(microseconds=value//10) - else: - # legacy code kept for backward compatibility: returns a - # number of seconds since Jan 1,1601 - value = value // 10000000 # seconds - elif property_type == VT_UI1: # 1-byte unsigned integer - value = i8(s[offset+4]) - elif property_type == VT_CLSID: - value = _clsid(s[offset+4:offset+20]) - elif property_type == VT_CF: - # PropertyIdentifier or ClipboardData?? - # see https://msdn.microsoft.com/en-us/library/dd941945.aspx - count = i32(s, offset+4) - value = s[offset+8:offset+8+count] - elif property_type == VT_BOOL: - # VARIANT_BOOL, 16 bits bool, 0x0000=Fals, 0xFFFF=True - # see https://msdn.microsoft.com/en-us/library/cc237864.aspx - value = bool(i16(s, offset+4)) - else: - value = None # everything else yields "None" - log.debug('property id=%d: type=%d not implemented in parser yet' % (property_id, property_type)) - - # missing: VT_EMPTY, VT_NULL, VT_R4, VT_R8, VT_CY, VT_DATE, - # VT_DECIMAL, VT_I1, VT_I8, VT_UI8, - # see https://msdn.microsoft.com/en-us/library/dd942033.aspx - - # FIXME: add support for VT_VECTOR - # VT_VECTOR is a 32 uint giving the number of items, followed by - # the items in sequence. The VT_VECTOR value is combined with the - # type of items, e.g. VT_VECTOR|VT_BSTR - # see https://msdn.microsoft.com/en-us/library/dd942011.aspx - - #print("%08x" % property_id, repr(value), end=" ") - #print("(%s)" % VT[i32(s, offset) & 0xFFF]) + vt_name = VT.get(property_type, 'UNKNOWN') + log.debug('property id=%d: type=%d/%s offset=%X' % (property_id, property_type, vt_name, offset)) + value = self._parse_property(s, offset+4, property_id, property_type, convert_time, no_conversion) data[property_id] = value except BaseException as exc: # catch exception while parsing each property, and only raise @@ -2247,6 +2213,131 @@ class OleFileIO: return data + def _parse_property(self, s, offset, property_id, property_type, convert_time, no_conversion): + v = None + if property_type <= VT_BLOB or property_type in (VT_CLSID, VT_CF): + v, _ = self._parse_property_basic(s, offset, property_id, property_type, convert_time, no_conversion) + elif property_type == VT_VECTOR | VT_VARIANT: + log.debug('property_type == VT_VECTOR | VT_VARIANT') + off = 4 + count = i32(s, offset) + values = [] + for _ in range(count): + property_type = i32(s, offset + off) + v, sz = self._parse_property_basic(s, offset + off + 4, property_id, property_type, convert_time, no_conversion) + values.append(v) + off += sz + 4 + v = values + + elif property_type & VT_VECTOR: + property_type_base = property_type & ~VT_VECTOR + log.debug('property_type == VT_VECTOR | %s' % VT.get(property_type_base, 'UNKNOWN')) + off = 4 + count = i32(s, offset) + values = [] + for _ in range(count): + v, sz = self._parse_property_basic(s, offset + off, property_id, property_type & ~VT_VECTOR, convert_time, no_conversion) + values.append(v) + off += sz + v = values + else: + log.debug('property id=%d: type=%d not implemented in parser yet' % (property_id, property_type)) + return v + + def _parse_property_basic(self, s, offset, property_id, property_type, convert_time, no_conversion): + value = None + size = 0 + # test for common types first (should perhaps use + # a dictionary instead?) + + if property_type == VT_I2: # 16-bit signed integer + value = i16(s, offset) + if value >= 32768: + value = value - 65536 + size = 2 + elif property_type == VT_UI2: # 2-byte unsigned integer + value = i16(s, offset) + size = 2 + elif property_type in (VT_I4, VT_INT, VT_ERROR): + # VT_I4: 32-bit signed integer + # VT_ERROR: HRESULT, similar to 32-bit signed integer, + # see https://msdn.microsoft.com/en-us/library/cc230330.aspx + value = i32(s, offset) + size = 4 + elif property_type in (VT_UI4, VT_UINT): # 4-byte unsigned integer + value = i32(s, offset) # FIXME + size = 4 + elif property_type in (VT_BSTR, VT_LPSTR): + # CodePageString, see https://msdn.microsoft.com/en-us/library/dd942354.aspx + # size is a 32 bits integer, including the null terminator, and + # possibly trailing or embedded null chars + #TODO: if codepage is unicode, the string should be converted as such + count = i32(s, offset) + value = s[offset+4:offset+4+count-1] + # remove all null chars: + value = value.replace(b'\x00', b'') + size = 4 + count + elif property_type == VT_BLOB: + # binary large object (BLOB) + # see https://msdn.microsoft.com/en-us/library/dd942282.aspx + count = i32(s, offset) + value = s[offset+4:offset+4+count] + size = 4 + count + elif property_type == VT_LPWSTR: + # UnicodeString + # see https://msdn.microsoft.com/en-us/library/dd942313.aspx + # "the string should NOT contain embedded or additional trailing + # null characters." + count = i32(s, offset+4) + value = self._decode_utf16_str(s[offset+4:offset+4+count*2]) + size = 4 + count * 2 + elif property_type == VT_FILETIME: + value = long(i32(s, offset)) + (long(i32(s, offset+4))<<32) + # FILETIME is a 64-bit int: "number of 100ns periods + # since Jan 1,1601". + if convert_time and property_id not in no_conversion: + log.debug('Converting property #%d to python datetime, value=%d=%fs' + %(property_id, value, float(value)/10000000)) + # convert FILETIME to Python datetime.datetime + # inspired from https://code.activestate.com/recipes/511425-filetime-to-datetime/ + _FILETIME_null_date = datetime.datetime(1601, 1, 1, 0, 0, 0) + log.debug('timedelta days=%d' % (value//(10*1000000*3600*24))) + value = _FILETIME_null_date + datetime.timedelta(microseconds=value//10) + else: + # legacy code kept for backward compatibility: returns a + # number of seconds since Jan 1,1601 + value = value // 10000000 # seconds + size = 8 + elif property_type == VT_UI1: # 1-byte unsigned integer + value = i8(s[offset]) + size = 1 + elif property_type == VT_CLSID: + value = _clsid(s[offset:offset+16]) + size = 16 + elif property_type == VT_CF: + # PropertyIdentifier or ClipboardData?? + # see https://msdn.microsoft.com/en-us/library/dd941945.aspx + count = i32(s, offset) + value = s[offset+4:offset+4+count] + size = 4 + count + elif property_type == VT_BOOL: + # VARIANT_BOOL, 16 bits bool, 0x0000=Fals, 0xFFFF=True + # see https://msdn.microsoft.com/en-us/library/cc237864.aspx + value = bool(i16(s, offset)) + size = 2 + else: + value = None # everything else yields "None" + log.debug('property id=%d: type=%d not implemented in parser yet' % (property_id, property_type)) + + # missing: VT_EMPTY, VT_NULL, VT_R4, VT_R8, VT_CY, VT_DATE, + # VT_DECIMAL, VT_I1, VT_I8, VT_UI8, + # see https://msdn.microsoft.com/en-us/library/dd942033.aspx + + #print("%08x" % property_id, repr(value), end=" ") + #print("(%s)" % VT[i32(s, offset) & 0xFFF]) + return value, size + + def get_metadata(self): """ Parse standard properties streams, return an OleMetadata object @@ -2259,7 +2350,196 @@ class OleFileIO: self.metadata.parse_properties(self) return self.metadata -# + def get_userdefined_properties(self, filename, convert_time=False, no_conversion=None): + """ + Return properties described in substream. + + :param filename: path of stream in storage tree (see openstream for syntax) + :param convert_time: bool, if True timestamps will be converted to Python datetime + :param no_conversion: None or list of int, timestamps not to be converted + (for example total editing time is not a real timestamp) + + :returns: a dictionary of values indexed by id (integer) + """ + # REFERENCE: [MS-OLEPS] https://msdn.microsoft.com/en-us/library/dd942421.aspx + # REFERENCE: https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-oshared/2ea8be67-a4a0-4e2e-b42f-49a182645562 + #'D5CDD502-2E9C-101B-9397-08002B2CF9AE' + # TODO: testing the code more rigorously + # TODO: adding exception handeling + FMTID_USERDEFINED_PROPERTIES = _clsid(b'\x05\xD5\xCD\xD5\x9C\x2E\x1B\x10\x93\x97\x08\x00\x2B\x2C\xF9\xAE') + + # make sure no_conversion is a list, just to simplify code below: + if no_conversion == None: + no_conversion = [] + # stream path as a string to report exceptions: + streampath = filename + if not isinstance(streampath, str): + streampath = '/'.join(streampath) + + fp = self.openstream(filename) + + data = [] + + # header + s = fp.read(28) + clsid = _clsid(s[8:24]) + + # PropertySetStream.cSections (4 bytes starts at 1c): number of property sets in this stream + sections_count = i32(s, 24) + + section_file_pointers = [] + + try: + for i in range(sections_count): + # format id + s = fp.read(20) + fmtid = _clsid(s[:16]) + + if fmtid == FMTID_USERDEFINED_PROPERTIES: + file_pointer = i32(s, 16) + fp.seek(file_pointer) + # read saved sections + s = b"****" + fp.read(i32(fp.read(4)) - 4) + # number of properties: + num_props = i32(s, 4) + + PropertyIdentifierAndOffset = s[8: 8+8*num_props] + + # property names (dictionary) + # ref: https://docs.microsoft.com/en-us/openspecs/windows_protocols/MS-OLEPS/99127b7f-c440-4697-91a4-c853086d6b33 + index = 8+8*num_props + entry_count = i32(s[index: index+4]) + index += 4 + for i in range(entry_count): + identifier = s[index: index +4] + str_size = i32(s[index+4: index + 8]) + string = s[index+8: index+8+str_size].decode('utf_8').strip('\0') + data.append({'property_name':string, 'value':None}) + index = index+8+str_size + # clamp num_props based on the data length + num_props = min(num_props, int(len(s) / 8)) + + # property values + # ref: https://docs.microsoft.com/en-us/openspecs/windows_protocols/MS-OLEPS/f122b9d7-e5cf-4484-8466-83f6fd94b3cc + for i in iterrange(2, num_props): + property_id = 0 # just in case of an exception + try: + property_id = i32(s, 8 + i * 8) + offset = i32(s, 12 + i * 8) + property_type = i32(s, offset) + + vt_name = VT.get(property_type, 'UNKNOWN') + log.debug('property id=%d: type=%d/%s offset=%X' % (property_id, property_type, vt_name, offset)) + + # test for common types first (should perhaps use + # a dictionary instead?) + + if property_type == VT_I2: # 16-bit signed integer + value = i16(s, offset + 4) + if value >= 32768: + value = value - 65536 + elif property_type == 1: + # supposed to be VT_NULL but seems it is not NULL + str_size = i32(s, offset + 8) + value = s[offset + 12:offset + 12 + str_size - 1] + + elif property_type == VT_UI2: # 2-byte unsigned integer + value = i16(s, offset + 4) + elif property_type in (VT_I4, VT_INT, VT_ERROR): + # VT_I4: 32-bit signed integer + # VT_ERROR: HRESULT, similar to 32-bit signed integer, + # see https://msdn.microsoft.com/en-us/library/cc230330.aspx + value = i32(s, offset + 4) + elif property_type in (VT_UI4, VT_UINT): # 4-byte unsigned integer + value = i32(s, offset + 4) # FIXME + elif property_type in (VT_BSTR, VT_LPSTR): + # CodePageString, see https://msdn.microsoft.com/en-us/library/dd942354.aspx + # size is a 32 bits integer, including the null terminator, and + # possibly trailing or embedded null chars + # TODO: if codepage is unicode, the string should be converted as such + count = i32(s, offset + 4) + value = s[offset + 8:offset + 8 + count - 1] + # remove all null chars: + value = value.replace(b'\x00', b'') + elif property_type == VT_BLOB: + # binary large object (BLOB) + # see https://msdn.microsoft.com/en-us/library/dd942282.aspx + count = i32(s, offset + 4) + value = s[offset + 8:offset + 8 + count] + elif property_type == VT_LPWSTR: + # UnicodeString + # see https://msdn.microsoft.com/en-us/library/dd942313.aspx + # "the string should NOT contain embedded or additional trailing + # null characters." + count = i32(s, offset + 4) + value = self._decode_utf16_str(s[offset + 8:offset + 8 + count * 2]) + elif property_type == VT_FILETIME: + value = long(i32(s, offset + 4)) + (long(i32(s, offset + 8)) << 32) + # FILETIME is a 64-bit int: "number of 100ns periods + # since Jan 1,1601". + if convert_time and property_id not in no_conversion: + log.debug('Converting property #%d to python datetime, value=%d=%fs' + % (property_id, value, float(value) / 10000000)) + # convert FILETIME to Python datetime.datetime + # inspired from https://code.activestate.com/recipes/511425-filetime-to-datetime/ + _FILETIME_null_date = datetime.datetime(1601, 1, 1, 0, 0, 0) + log.debug('timedelta days=%d' % (value // (10 * 1000000 * 3600 * 24))) + value = _FILETIME_null_date + datetime.timedelta(microseconds=value // 10) + else: + # legacy code kept for backward compatibility: returns a + # number of seconds since Jan 1,1601 + value = value // 10000000 # seconds + elif property_type == VT_UI1: # 1-byte unsigned integer + value = i8(s[offset + 4]) + elif property_type == VT_CLSID: + value = _clsid(s[offset + 4:offset + 20]) + elif property_type == VT_CF: + # PropertyIdentifier or ClipboardData?? + # see https://msdn.microsoft.com/en-us/library/dd941945.aspx + count = i32(s, offset + 4) + value = s[offset + 8:offset + 8 + count] + elif property_type == VT_BOOL: + # VARIANT_BOOL, 16 bits bool, 0x0000=Fals, 0xFFFF=True + # see https://msdn.microsoft.com/en-us/library/cc237864.aspx + value = bool(i16(s, offset + 4)) + else: + value = None # everything else yields "None" + log.debug( + 'property id=%d: type=%d not implemented in parser yet' % (property_id, property_type)) + + # missing: VT_EMPTY, VT_NULL, VT_R4, VT_R8, VT_CY, VT_DATE, + # VT_DECIMAL, VT_I1, VT_I8, VT_UI8, + # see https://msdn.microsoft.com/en-us/library/dd942033.aspx + + # FIXME: add support for VT_VECTOR + # VT_VECTOR is a 32 uint giving the number of items, followed by + # the items in sequence. The VT_VECTOR value is combined with the + # type of items, e.g. VT_VECTOR|VT_BSTR + # see https://msdn.microsoft.com/en-us/library/dd942011.aspx + + # print("%08x" % property_id, repr(value), end=" ") + # print("(%s)" % VT[i32(s, offset) & 0xFFF]) + + data[i-2]['value']=value + except BaseException as exc: + # catch exception while parsing each property, and only raise + # a DEFECT_INCORRECT, because parsing can go on + msg = 'Error while parsing property id %d in stream %s: %s' % ( + property_id, repr(streampath), exc) + self._raise_defect(DEFECT_INCORRECT, msg, type(exc)) + + except BaseException as exc: + # catch exception while parsing property header, and only raise + # a DEFECT_INCORRECT then return an empty dict, because this is not + # a fatal error when parsing the whole file + msg = 'Error while parsing properties header in stream %s: %s' % ( + repr(streampath), exc) + self._raise_defect(DEFECT_INCORRECT, msg, type(exc)) + return data + + return data + + # -------------------------------------------------------------------- # This script can be used to dump the directory of any OLE2 structured # storage file. @@ -2283,8 +2563,11 @@ def main(): usage = 'usage: %prog [options] <filename> [filename2 ...]' parser = optparse.OptionParser(usage=usage) + parser.add_option("-c", action="store_true", dest="check_streams", help='check all streams (for debugging purposes)') + parser.add_option("-p", action="store_true", dest="extract_customprop", + help='extract all user-defined propertires') parser.add_option("-d", action="store_true", dest="debug_mode", help='debug mode, shortcut for -l debug (displays a lot of debug information, for developers only)') parser.add_option('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL, @@ -2292,7 +2575,7 @@ def main(): (options, args) = parser.parse_args() - print('olefile version %s %s - https://www.decalage.info/en/olefile\n' % (__version__, __date__)) + print('olefile version {} {} - https://www.decalage.info/en/olefile\n'.format(__version__, __date__)) # Print help if no arguments are passed if len(args) == 0: @@ -2323,21 +2606,33 @@ def main(): props = ole.getproperties(streamname, convert_time=True) props = sorted(props.items()) for k, v in props: - #[PL]: avoid to display too large or binary values: + # [PL]: avoid to display too large or binary values: if isinstance(v, (basestring, bytes)): if len(v) > 50: v = v[:50] if isinstance(v, bytes): # quick and dirty binary check: for c in (1,2,3,4,5,6,7,11,12,14,15,16,17,18,19,20, - 21,22,23,24,25,26,27,28,29,30,31): + 21,22,23,24,25,26,27,28,29,30,31): if c in bytearray(v): v = '(binary data)' break print(" ", k, v) - except: + except Exception: log.exception('Error while parsing property stream %r' % streamname) + try: + if options.extract_customprop: + variables = ole.get_userdefined_properties(streamname, convert_time=True) + if len(variables): + print("%r: user-defined properties" % streamname) + for index, variable in enumerate(variables): + print('\t{} {}: {}'.format(index, variable['property_name'],variable['value'])) + + except: + log.exception('Error while parsing user-defined property stream %r' % streamname) + + if options.check_streams: # Read all streams to check if there are errors: print('\nChecking streams...') @@ -2353,16 +2648,16 @@ def main(): print('NOT a stream : type=%d' % st_type) print() -## for streamname in ole.listdir(): -## # print name using repr() to convert binary chars to \xNN: -## print('-', repr('/'.join(streamname)),'-', end=' ') -## print(ole.getmtime(streamname)) -## print() + # for streamname in ole.listdir(): + # # print name using repr() to convert binary chars to \xNN: + # print('-', repr('/'.join(streamname)),'-', end=' ') + # print(ole.getmtime(streamname)) + # print() print('Modification/Creation times of all directory entries:') for entry in ole.direntries: if entry is not None: - print('- %s: mtime=%s ctime=%s' % (entry.name, + print('- {}: mtime={} ctime={}'.format(entry.name, entry.getmtime(), entry.getctime())) print() @@ -2370,10 +2665,10 @@ def main(): try: meta = ole.get_metadata() meta.dump() - except: + except Exception: log.exception('Error while parsing metadata') print() - #[PL] Test a few new methods: + # [PL] Test a few new methods: root = ole.get_rootentry_name() print('Root entry name: "%s"' % root) if ole.exists('worddocument'): @@ -2387,10 +2682,11 @@ def main(): print('\nNon-fatal issues raised during parsing:') if ole.parsing_issues: for exctype, msg in ole.parsing_issues: - print('- %s: %s' % (exctype.__name__, msg)) + print('- {}: {}'.format(exctype.__name__, msg)) else: print('None') - except: + ole.close() + except Exception: log.exception('Error while parsing file %r' % filename) diff --git a/contrib/python/olefile/py3/ya.make b/contrib/python/olefile/py3/ya.make index 085f594332..33e0be14d2 100644 --- a/contrib/python/olefile/py3/ya.make +++ b/contrib/python/olefile/py3/ya.make @@ -2,14 +2,15 @@ PY3_LIBRARY() -VERSION(0.46) +VERSION(0.47) -LICENSE(BSD-3-Clause AND PIL) +LICENSE(BSD-2-Clause AND PIL) NO_LINT() PY_SRCS( TOP_LEVEL + OleFileIO_PL.py olefile/__init__.py olefile/olefile.py ) diff --git a/contrib/python/pexpect/py2/.dist-info/METADATA b/contrib/python/pexpect/py2/.dist-info/METADATA index 4c417227db..c5ea4a1a59 100644 --- a/contrib/python/pexpect/py2/.dist-info/METADATA +++ b/contrib/python/pexpect/py2/.dist-info/METADATA @@ -1,11 +1,15 @@ Metadata-Version: 2.1 Name: pexpect -Version: 4.8.0 +Version: 4.9.0 Summary: Pexpect allows easy control of interactive console applications. Home-page: https://pexpect.readthedocs.io/ Author: Noah Spurrier; Thomas Kluyver; Jeff Quast Author-email: noah@noah.org, thomas@kluyver.me.uk, contact@jeffquast.com License: ISC license +Project-URL: Bug Tracker, https://github.com/pexpect/pexpect/issues +Project-URL: Documentation, https://pexpect.readthedocs.io/ +Project-URL: Source Code, https://github.com/pexpect/pexpect +Project-URL: History, https://pexpect.readthedocs.io/en/stable/history.html Platform: UNIX Classifier: Development Status :: 5 - Production/Stable Classifier: Environment :: Console @@ -27,6 +31,7 @@ Classifier: Topic :: System :: Installation/Setup Classifier: Topic :: System :: Shells Classifier: Topic :: System :: Software Distribution Classifier: Topic :: Terminals +License-File: LICENSE Requires-Dist: ptyprocess (>=0.5) @@ -36,7 +41,7 @@ Don Libes' Expect. Pexpect allows your script to spawn a child application and control it as if a human were typing commands. Pexpect can be used for automating interactive applications such as ssh, ftp, -passwd, telnet, etc. It can be used to a automate setup scripts for duplicating +passwd, telnet, etc. It can be used to automate setup scripts for duplicating software package installations on different servers. It can be used for automated software testing. Pexpect is in the spirit of Don Libes' Expect, but Pexpect is pure Python. @@ -45,5 +50,3 @@ The main features of Pexpect require the pty module in the Python standard library, which is only available on Unix-like systems. Some features—waiting for patterns from file descriptors or subprocesses—are also available on Windows. - - diff --git a/contrib/python/pexpect/py2/pexpect/__init__.py b/contrib/python/pexpect/py2/pexpect/__init__.py index 7e30453787..86254ee720 100644 --- a/contrib/python/pexpect/py2/pexpect/__init__.py +++ b/contrib/python/pexpect/py2/pexpect/__init__.py @@ -1,6 +1,6 @@ '''Pexpect is a Python module for spawning child applications and controlling them automatically. Pexpect can be used for automating interactive applications -such as ssh, ftp, passwd, telnet, etc. It can be used to a automate setup +such as ssh, ftp, passwd, telnet, etc. It can be used to automate setup scripts for duplicating software package installations on different servers. It can be used for automated software testing. Pexpect is in the spirit of Don Libes' Expect, but Pexpect is pure Python. Other Expect-like modules for Python @@ -29,6 +29,12 @@ For example:: child.expect('Password:') child.sendline(mypassword) +Context manager can be used for the spawn() function:: + + with pexpect.spawn('scp foo user@example.com:.') as child: + child.expect('Password:') + child.sendline(mypassword) + This works even for commands that ask for passwords or other input outside of the normal stdio streams. For example, ssh reads input directly from the TTY device which bypasses stdin. @@ -75,7 +81,7 @@ if sys.platform != 'win32': from .pty_spawn import spawn, spawnu from .run import run, runu -__version__ = '4.8.0' +__version__ = '4.9.0' __revision__ = '' __all__ = ['ExceptionPexpect', 'EOF', 'TIMEOUT', 'spawn', 'spawnu', 'run', 'runu', 'which', 'split_command_line', '__version__', '__revision__'] diff --git a/contrib/python/pexpect/py2/pexpect/bashrc.sh b/contrib/python/pexpect/py2/pexpect/bashrc.sh index c734ac90b8..d75d1a5b62 100644 --- a/contrib/python/pexpect/py2/pexpect/bashrc.sh +++ b/contrib/python/pexpect/py2/pexpect/bashrc.sh @@ -14,3 +14,5 @@ PS1="$" # Unset PROMPT_COMMAND, so that it can't change PS1 to something unexpected. unset PROMPT_COMMAND + +bind 'set enable-bracketed-paste off' diff --git a/contrib/python/pexpect/py2/pexpect/fdpexpect.py b/contrib/python/pexpect/py2/pexpect/fdpexpect.py index cddd50e100..140bdfeeda 100644 --- a/contrib/python/pexpect/py2/pexpect/fdpexpect.py +++ b/contrib/python/pexpect/py2/pexpect/fdpexpect.py @@ -1,7 +1,11 @@ -'''This is like pexpect, but it will work with any file descriptor that you +'''This is like :mod:`pexpect`, but it will work with any file descriptor that you pass it. You are responsible for opening and close the file descriptor. This allows you to use Pexpect with sockets and named pipes (FIFOs). +.. note:: + socket.fileno() does not give a readable file descriptor on windows. + Use :mod:`pexpect.socket_pexpect` for cross-platform socket support + PEXPECT LICENSE This license is approved by the OSI and FSF as GPL-compatible. diff --git a/contrib/python/pexpect/py2/pexpect/popen_spawn.py b/contrib/python/pexpect/py2/pexpect/popen_spawn.py index 4bb58cfe76..e6bdf07d61 100644 --- a/contrib/python/pexpect/py2/pexpect/popen_spawn.py +++ b/contrib/python/pexpect/py2/pexpect/popen_spawn.py @@ -57,7 +57,7 @@ class PopenSpawn(SpawnBase): self._read_queue = Queue() self._read_thread = threading.Thread(target=self._read_incoming) - self._read_thread.setDaemon(True) + self._read_thread.daemon = True self._read_thread.start() _read_reached_eof = False diff --git a/contrib/python/pexpect/py2/pexpect/pxssh.py b/contrib/python/pexpect/py2/pexpect/pxssh.py index 3d53bd9746..742f59e406 100644 --- a/contrib/python/pexpect/py2/pexpect/pxssh.py +++ b/contrib/python/pexpect/py2/pexpect/pxssh.py @@ -143,8 +143,8 @@ class pxssh (spawn): # used to set shell command-line prompt to UNIQUE_PROMPT. self.PROMPT_SET_SH = r"PS1='[PEXPECT]\$ '" self.PROMPT_SET_CSH = r"set prompt='[PEXPECT]\$ '" - self.SSH_OPTS = ("-o'RSAAuthentication=no'" - + " -o 'PubkeyAuthentication=no'") + self.PROMPT_SET_ZSH = "prompt restore;\nPS1='[PEXPECT]%(!.#.$) '" + self.SSH_OPTS = (" -o 'PubkeyAuthentication=no'") # Disabling host key checking, makes you vulnerable to MITM attacks. # + " -o 'StrictHostKeyChecking=no'" # + " -o 'UserKnownHostsFile /dev/null' ") @@ -152,7 +152,7 @@ class pxssh (spawn): # displaying a GUI password dialog. I have not figured out how to # disable only SSH_ASKPASS without also disabling X11 forwarding. # Unsetting SSH_ASKPASS on the remote side doesn't disable it! Annoying! - #self.SSH_OPTS = "-x -o'RSAAuthentication=no' -o 'PubkeyAuthentication=no'" + #self.SSH_OPTS = "-x -o 'PubkeyAuthentication=no'" self.force_password = False self.debug_command_string = debug_command_string @@ -530,8 +530,11 @@ class pxssh (spawn): if i == 0: # csh-style self.sendline(self.PROMPT_SET_CSH) i = self.expect([TIMEOUT, self.PROMPT], timeout=10) - if i == 0: - return False + if i == 0: # zsh-style + self.sendline(self.PROMPT_SET_ZSH) + i = self.expect([TIMEOUT, self.PROMPT], timeout=10) + if i == 0: + return False return True # vi:ts=4:sw=4:expandtab:ft=python: diff --git a/contrib/python/pexpect/py2/pexpect/replwrap.py b/contrib/python/pexpect/py2/pexpect/replwrap.py index c930f1e4fe..08dbd5e869 100644 --- a/contrib/python/pexpect/py2/pexpect/replwrap.py +++ b/contrib/python/pexpect/py2/pexpect/replwrap.py @@ -108,23 +108,29 @@ class REPLWrapper(object): + command) return u''.join(res + [self.child.before]) -def python(command="python"): +def python(command=sys.executable): """Start a Python shell and return a :class:`REPLWrapper` object.""" return REPLWrapper(command, u">>> ", u"import sys; sys.ps1={0!r}; sys.ps2={1!r}") -def bash(command="bash"): - """Start a bash shell and return a :class:`REPLWrapper` object.""" - bashrc = os.path.join(os.path.dirname(__file__), 'bashrc.sh') - child = pexpect.spawn(command, ['--rcfile', bashrc], echo=False, - encoding='utf-8') +def _repl_sh(command, args, non_printable_insert): + child = pexpect.spawn(command, args, echo=False, encoding='utf-8') # If the user runs 'env', the value of PS1 will be in the output. To avoid # replwrap seeing that as the next prompt, we'll embed the marker characters # for invisible characters in the prompt; these show up when inspecting the # environment variable, but not when bash displays the prompt. - ps1 = PEXPECT_PROMPT[:5] + u'\\[\\]' + PEXPECT_PROMPT[5:] - ps2 = PEXPECT_CONTINUATION_PROMPT[:5] + u'\\[\\]' + PEXPECT_CONTINUATION_PROMPT[5:] + ps1 = PEXPECT_PROMPT[:5] + non_printable_insert + PEXPECT_PROMPT[5:] + ps2 = PEXPECT_CONTINUATION_PROMPT[:5] + non_printable_insert + PEXPECT_CONTINUATION_PROMPT[5:] prompt_change = u"PS1='{0}' PS2='{1}' PROMPT_COMMAND=''".format(ps1, ps2) return REPLWrapper(child, u'\\$', prompt_change, extra_init_cmd="export PAGER=cat") + +def bash(command="bash"): + """Start a bash shell and return a :class:`REPLWrapper` object.""" + bashrc = os.path.join(os.path.dirname(__file__), 'bashrc.sh') + return _repl_sh(command, ['--rcfile', bashrc], non_printable_insert='\\[\\]') + +def zsh(command="zsh", args=("--no-rcs", "-V", "+Z")): + """Start a zsh shell and return a :class:`REPLWrapper` object.""" + return _repl_sh(command, list(args), non_printable_insert='%(!..)') diff --git a/contrib/python/pexpect/py2/pexpect/run.py b/contrib/python/pexpect/py2/pexpect/run.py index ff288a1246..5695ab7f7b 100644 --- a/contrib/python/pexpect/py2/pexpect/run.py +++ b/contrib/python/pexpect/py2/pexpect/run.py @@ -66,7 +66,7 @@ def run(command, timeout=30, withexitstatus=False, events=None, The 'events' argument should be either a dictionary or a tuple list that contains patterns and responses. Whenever one of the patterns is seen in the command output, run() will send the associated response string. - So, run() in the above example can be also written as: + So, run() in the above example can be also written as:: run("mencoder dvd://1 -o video.avi -oac copy -ovc copy", events=[(TIMEOUT,print_ticks)], timeout=5) diff --git a/contrib/python/pexpect/py2/pexpect/spawnbase.py b/contrib/python/pexpect/py2/pexpect/spawnbase.py index 59e905764c..abf8071ec1 100644 --- a/contrib/python/pexpect/py2/pexpect/spawnbase.py +++ b/contrib/python/pexpect/py2/pexpect/spawnbase.py @@ -141,6 +141,16 @@ class SpawnBase(object): return s.encode('ascii') return s + # In bytes mode, regex patterns should also be of bytes type + def _coerce_expect_re(self, r): + p = r.pattern + if self.encoding is None and not isinstance(p, bytes): + return re.compile(p.encode('utf-8')) + # And vice-versa + elif self.encoding is not None and isinstance(p, bytes): + return re.compile(p.decode('utf-8')) + return r + def _coerce_send_string(self, s): if self.encoding is None and not isinstance(s, bytes): return s.encode('utf-8') @@ -153,7 +163,7 @@ class SpawnBase(object): self._buffer = self.buffer_type() self._buffer.write(value) - # This property is provided for backwards compatability (self.buffer used + # This property is provided for backwards compatibility (self.buffer used # to be a string/bytes object) buffer = property(_get_buffer, _set_buffer) @@ -235,6 +245,7 @@ class SpawnBase(object): elif p is TIMEOUT: compiled_pattern_list.append(TIMEOUT) elif isinstance(p, type(re.compile(''))): + p = self._coerce_expect_re(p) compiled_pattern_list.append(p) else: self._pattern_type_err(p) diff --git a/contrib/python/pexpect/py2/ya.make b/contrib/python/pexpect/py2/ya.make index 9dc28a63d0..2498af4417 100644 --- a/contrib/python/pexpect/py2/ya.make +++ b/contrib/python/pexpect/py2/ya.make @@ -2,7 +2,7 @@ PY2_LIBRARY() -VERSION(4.8.0) +VERSION(4.9.0) LICENSE(ISC) diff --git a/contrib/python/prompt-toolkit/py3/.dist-info/METADATA b/contrib/python/prompt-toolkit/py3/.dist-info/METADATA index ab2db30bbf..13de29c342 100644 --- a/contrib/python/prompt-toolkit/py3/.dist-info/METADATA +++ b/contrib/python/prompt-toolkit/py3/.dist-info/METADATA @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: prompt-toolkit -Version: 3.0.41 +Version: 3.0.43 Summary: Library for building powerful interactive command lines in Python Home-page: https://github.com/prompt-toolkit/python-prompt-toolkit Author: Jonathan Slenders diff --git a/contrib/python/prompt-toolkit/py3/prompt_toolkit/__init__.py b/contrib/python/prompt-toolkit/py3/prompt_toolkit/__init__.py index 2d408152fe..82324cb815 100644 --- a/contrib/python/prompt-toolkit/py3/prompt_toolkit/__init__.py +++ b/contrib/python/prompt-toolkit/py3/prompt_toolkit/__init__.py @@ -27,7 +27,7 @@ from .formatted_text import ANSI, HTML from .shortcuts import PromptSession, print_formatted_text, prompt # Don't forget to update in `docs/conf.py`! -__version__ = "3.0.41" +__version__ = "3.0.43" assert pep440.match(__version__) diff --git a/contrib/python/prompt-toolkit/py3/prompt_toolkit/application/application.py b/contrib/python/prompt-toolkit/py3/prompt_toolkit/application/application.py index 726fc0a067..d4637811ba 100644 --- a/contrib/python/prompt-toolkit/py3/prompt_toolkit/application/application.py +++ b/contrib/python/prompt-toolkit/py3/prompt_toolkit/application/application.py @@ -807,16 +807,19 @@ class Application(Generic[_AppResult]): @contextmanager def set_handle_sigint(loop: AbstractEventLoop) -> Iterator[None]: if handle_sigint: - loop.add_signal_handler( - signal.SIGINT, - lambda *_: loop.call_soon_threadsafe( - self.key_processor.send_sigint - ), - ) - try: - yield - finally: - loop.remove_signal_handler(signal.SIGINT) + with _restore_sigint_from_ctypes(): + # save sigint handlers (python and os level) + # See: https://github.com/prompt-toolkit/python-prompt-toolkit/issues/1576 + loop.add_signal_handler( + signal.SIGINT, + lambda *_: loop.call_soon_threadsafe( + self.key_processor.send_sigint + ), + ) + try: + yield + finally: + loop.remove_signal_handler(signal.SIGINT) else: yield @@ -960,7 +963,8 @@ class Application(Generic[_AppResult]): def _called_from_ipython() -> bool: try: return ( - "IPython/terminal/interactiveshell.py" + sys.modules["IPython"].version_info < (8, 18, 0, "") + and "IPython/terminal/interactiveshell.py" in sys._getframe(3).f_code.co_filename ) except BaseException: @@ -1545,7 +1549,7 @@ async def _do_wait_for_enter(wait_text: AnyFormattedText) -> None: @contextmanager def attach_winch_signal_handler( - handler: Callable[[], None] + handler: Callable[[], None], ) -> Generator[None, None, None]: """ Attach the given callback as a WINCH signal handler within the context @@ -1586,3 +1590,36 @@ def attach_winch_signal_handler( previous_winch_handler._callback, *previous_winch_handler._args, ) + + +@contextmanager +def _restore_sigint_from_ctypes() -> Generator[None, None, None]: + # The following functions are part of the stable ABI since python 3.2 + # See: https://docs.python.org/3/c-api/sys.html#c.PyOS_getsig + # Inline import: these are not available on Pypy. + try: + from ctypes import c_int, c_void_p, pythonapi + except ImportError: + # Any of the above imports don't exist? Don't do anything here. + yield + return + + # PyOS_sighandler_t PyOS_getsig(int i) + pythonapi.PyOS_getsig.restype = c_void_p + pythonapi.PyOS_getsig.argtypes = (c_int,) + + # PyOS_sighandler_t PyOS_setsig(int i, PyOS_sighandler_t h) + pythonapi.PyOS_setsig.restype = c_void_p + pythonapi.PyOS_setsig.argtypes = ( + c_int, + c_void_p, + ) + + sigint = signal.getsignal(signal.SIGINT) + sigint_os = pythonapi.PyOS_getsig(signal.SIGINT) + + try: + yield + finally: + signal.signal(signal.SIGINT, sigint) + pythonapi.PyOS_setsig(signal.SIGINT, sigint_os) diff --git a/contrib/python/prompt-toolkit/py3/prompt_toolkit/buffer.py b/contrib/python/prompt-toolkit/py3/prompt_toolkit/buffer.py index 7dbc7a6fb3..100ca78d7b 100644 --- a/contrib/python/prompt-toolkit/py3/prompt_toolkit/buffer.py +++ b/contrib/python/prompt-toolkit/py3/prompt_toolkit/buffer.py @@ -15,7 +15,7 @@ import tempfile from collections import deque from enum import Enum from functools import wraps -from typing import Any, Awaitable, Callable, Coroutine, Iterable, TypeVar, cast +from typing import Any, Callable, Coroutine, Iterable, TypeVar, cast from .application.current import get_app from .application.run_in_terminal import run_in_terminal @@ -1891,7 +1891,7 @@ class Buffer: self.reset() -_T = TypeVar("_T", bound=Callable[..., Awaitable[None]]) +_T = TypeVar("_T", bound=Callable[..., Coroutine[Any, Any, None]]) def _only_one_at_a_time(coroutine: _T) -> _T: diff --git a/contrib/python/prompt-toolkit/py3/prompt_toolkit/contrib/regular_languages/__init__.py b/contrib/python/prompt-toolkit/py3/prompt_toolkit/contrib/regular_languages/__init__.py index 1743af4e74..c947fd5337 100644 --- a/contrib/python/prompt-toolkit/py3/prompt_toolkit/contrib/regular_languages/__init__.py +++ b/contrib/python/prompt-toolkit/py3/prompt_toolkit/contrib/regular_languages/__init__.py @@ -30,7 +30,7 @@ What can we do with this grammar? --------------------------------- - Syntax highlighting: We could use this for instance to give file names - different colour. + different color. - Parse the result: .. We can extract the file names and commands by using a regular expression with named groups. - Input validation: .. Don't accept anything that does not match this grammar. diff --git a/contrib/python/prompt-toolkit/py3/prompt_toolkit/contrib/ssh/server.py b/contrib/python/prompt-toolkit/py3/prompt_toolkit/contrib/ssh/server.py index 5f9b5f6b7c..9a5d4022a0 100644 --- a/contrib/python/prompt-toolkit/py3/prompt_toolkit/contrib/ssh/server.py +++ b/contrib/python/prompt-toolkit/py3/prompt_toolkit/contrib/ssh/server.py @@ -6,7 +6,7 @@ from __future__ import annotations import asyncio import traceback from asyncio import get_running_loop -from typing import Any, Awaitable, Callable, TextIO, cast +from typing import Any, Callable, Coroutine, TextIO, cast import asyncssh @@ -21,7 +21,7 @@ __all__ = ["PromptToolkitSSHSession", "PromptToolkitSSHServer"] class PromptToolkitSSHSession(asyncssh.SSHServerSession): # type: ignore def __init__( self, - interact: Callable[[PromptToolkitSSHSession], Awaitable[None]], + interact: Callable[[PromptToolkitSSHSession], Coroutine[Any, Any, None]], *, enable_cpr: bool, ) -> None: @@ -162,7 +162,7 @@ class PromptToolkitSSHServer(asyncssh.SSHServer): def __init__( self, - interact: Callable[[PromptToolkitSSHSession], Awaitable[None]], + interact: Callable[[PromptToolkitSSHSession], Coroutine[Any, Any, None]], *, enable_cpr: bool = True, ) -> None: diff --git a/contrib/python/prompt-toolkit/py3/prompt_toolkit/contrib/telnet/server.py b/contrib/python/prompt-toolkit/py3/prompt_toolkit/contrib/telnet/server.py index 81c19f6c88..9ebe66c62d 100644 --- a/contrib/python/prompt-toolkit/py3/prompt_toolkit/contrib/telnet/server.py +++ b/contrib/python/prompt-toolkit/py3/prompt_toolkit/contrib/telnet/server.py @@ -7,7 +7,7 @@ import asyncio import contextvars import socket from asyncio import get_running_loop -from typing import Awaitable, Callable, TextIO, cast +from typing import Any, Callable, Coroutine, TextIO, cast from prompt_toolkit.application.current import create_app_session, get_app from prompt_toolkit.application.run_in_terminal import run_in_terminal @@ -124,7 +124,7 @@ class TelnetConnection: self, conn: socket.socket, addr: tuple[str, int], - interact: Callable[[TelnetConnection], Awaitable[None]], + interact: Callable[[TelnetConnection], Coroutine[Any, Any, None]], server: TelnetServer, encoding: str, style: BaseStyle | None, @@ -283,7 +283,9 @@ class TelnetServer: self, host: str = "127.0.0.1", port: int = 23, - interact: Callable[[TelnetConnection], Awaitable[None]] = _dummy_interact, + interact: Callable[ + [TelnetConnection], Coroutine[Any, Any, None] + ] = _dummy_interact, encoding: str = "utf-8", style: BaseStyle | None = None, enable_cpr: bool = True, diff --git a/contrib/python/prompt-toolkit/py3/prompt_toolkit/eventloop/inputhook.py b/contrib/python/prompt-toolkit/py3/prompt_toolkit/eventloop/inputhook.py index 5731573f52..a4c0eee6bb 100644 --- a/contrib/python/prompt-toolkit/py3/prompt_toolkit/eventloop/inputhook.py +++ b/contrib/python/prompt-toolkit/py3/prompt_toolkit/eventloop/inputhook.py @@ -66,7 +66,7 @@ InputHook: TypeAlias = Callable[[InputHookContext], None] def new_eventloop_with_inputhook( - inputhook: Callable[[InputHookContext], None] + inputhook: Callable[[InputHookContext], None], ) -> AbstractEventLoop: """ Create a new event loop with the given inputhook. @@ -77,7 +77,7 @@ def new_eventloop_with_inputhook( def set_eventloop_with_inputhook( - inputhook: Callable[[InputHookContext], None] + inputhook: Callable[[InputHookContext], None], ) -> AbstractEventLoop: """ Create a new event loop with the given inputhook, and activate it. diff --git a/contrib/python/prompt-toolkit/py3/prompt_toolkit/filters/app.py b/contrib/python/prompt-toolkit/py3/prompt_toolkit/filters/app.py index 303a078c4e..aacb228416 100644 --- a/contrib/python/prompt-toolkit/py3/prompt_toolkit/filters/app.py +++ b/contrib/python/prompt-toolkit/py3/prompt_toolkit/filters/app.py @@ -50,7 +50,7 @@ __all__ = [ # NOTE: `has_focus` below should *not* be `memoized`. It can reference any user -# control. For instance, if we would contiously create new +# control. For instance, if we would continuously create new # `PromptSession` instances, then previous instances won't be released, # because this memoize (which caches results in the global scope) will # still refer to each instance. diff --git a/contrib/python/prompt-toolkit/py3/prompt_toolkit/formatted_text/__init__.py b/contrib/python/prompt-toolkit/py3/prompt_toolkit/formatted_text/__init__.py index e34db13d80..db44ab9266 100644 --- a/contrib/python/prompt-toolkit/py3/prompt_toolkit/formatted_text/__init__.py +++ b/contrib/python/prompt-toolkit/py3/prompt_toolkit/formatted_text/__init__.py @@ -16,6 +16,7 @@ from .ansi import ANSI from .base import ( AnyFormattedText, FormattedText, + OneStyleAndTextTuple, StyleAndTextTuples, Template, is_formatted_text, @@ -35,6 +36,7 @@ from .utils import ( __all__ = [ # Base. "AnyFormattedText", + "OneStyleAndTextTuple", "to_formatted_text", "is_formatted_text", "Template", diff --git a/contrib/python/prompt-toolkit/py3/prompt_toolkit/formatted_text/utils.py b/contrib/python/prompt-toolkit/py3/prompt_toolkit/formatted_text/utils.py index b242c2cc89..c8c37e0946 100644 --- a/contrib/python/prompt-toolkit/py3/prompt_toolkit/formatted_text/utils.py +++ b/contrib/python/prompt-toolkit/py3/prompt_toolkit/formatted_text/utils.py @@ -72,13 +72,15 @@ def fragment_list_to_text(fragments: StyleAndTextTuples) -> str: return "".join(item[1] for item in fragments if ZeroWidthEscape not in item[0]) -def split_lines(fragments: StyleAndTextTuples) -> Iterable[StyleAndTextTuples]: +def split_lines( + fragments: Iterable[OneStyleAndTextTuple], +) -> Iterable[StyleAndTextTuples]: """ Take a single list of (style_str, text) tuples and yield one such list for each line. Just like str.split, this will yield at least one item. - :param fragments: List of (style_str, text) or (style_str, text, mouse_handler) - tuples. + :param fragments: Iterable of ``(style_str, text)`` or + ``(style_str, text, mouse_handler)`` tuples. """ line: StyleAndTextTuples = [] diff --git a/contrib/python/prompt-toolkit/py3/prompt_toolkit/input/posix_utils.py b/contrib/python/prompt-toolkit/py3/prompt_toolkit/input/posix_utils.py index e9c73fecc6..4a78dc421b 100644 --- a/contrib/python/prompt-toolkit/py3/prompt_toolkit/input/posix_utils.py +++ b/contrib/python/prompt-toolkit/py3/prompt_toolkit/input/posix_utils.py @@ -26,7 +26,7 @@ class PosixStdinReader: On Python3, this can be 'surrogateescape', which is the default. 'surrogateescape' is preferred, because this allows us to transfer - unrecognised bytes to the key bindings. Some terminals, like lxterminal + unrecognized bytes to the key bindings. Some terminals, like lxterminal and Guake, use the 'Mxx' notation to send mouse events, where each 'x' can be any possible byte. """ diff --git a/contrib/python/prompt-toolkit/py3/prompt_toolkit/key_binding/key_bindings.py b/contrib/python/prompt-toolkit/py3/prompt_toolkit/key_binding/key_bindings.py index 6abb595daf..62530f2b77 100644 --- a/contrib/python/prompt-toolkit/py3/prompt_toolkit/key_binding/key_bindings.py +++ b/contrib/python/prompt-toolkit/py3/prompt_toolkit/key_binding/key_bindings.py @@ -40,8 +40,9 @@ from abc import ABCMeta, abstractmethod, abstractproperty from inspect import isawaitable from typing import ( TYPE_CHECKING, - Awaitable, + Any, Callable, + Coroutine, Hashable, Sequence, Tuple, @@ -89,7 +90,8 @@ __all__ = [ # This is mainly used in case of mouse move events, to prevent excessive # repainting during mouse move events. KeyHandlerCallable = Callable[ - ["KeyPressEvent"], Union["NotImplementedOrNone", Awaitable["NotImplementedOrNone"]] + ["KeyPressEvent"], + Union["NotImplementedOrNone", Coroutine[Any, Any, "NotImplementedOrNone"]], ] @@ -125,7 +127,7 @@ class Binding: # If the handler is a coroutine, create an asyncio task. if isawaitable(result): - awaitable = cast(Awaitable["NotImplementedOrNone"], result) + awaitable = cast(Coroutine[Any, Any, "NotImplementedOrNone"], result) async def bg_task() -> None: result = await awaitable diff --git a/contrib/python/prompt-toolkit/py3/prompt_toolkit/layout/containers.py b/contrib/python/prompt-toolkit/py3/prompt_toolkit/layout/containers.py index 5de87b08a7..100d4aaebc 100644 --- a/contrib/python/prompt-toolkit/py3/prompt_toolkit/layout/containers.py +++ b/contrib/python/prompt-toolkit/py3/prompt_toolkit/layout/containers.py @@ -2050,7 +2050,7 @@ class Window(Container): new_buffer_row[x + xpos] = char # When we print a multi width character, make sure - # to erase the neighbours positions in the screen. + # to erase the neighbors positions in the screen. # (The empty string if different from everything, # so next redraw this cell will repaint anyway.) if char_width > 1: @@ -2537,7 +2537,7 @@ class Window(Container): scroll_offset_end=offsets.right, cursor_pos=get_cwidth(current_line_text[: ui_content.cursor_position.x]), window_size=width - current_line_prefix_width, - # We can only analyse the current line. Calculating the width off + # We can only analyze the current line. Calculating the width off # all the lines is too expensive. content_size=max( get_cwidth(current_line_text), self.horizontal_scroll + width diff --git a/contrib/python/prompt-toolkit/py3/prompt_toolkit/layout/controls.py b/contrib/python/prompt-toolkit/py3/prompt_toolkit/layout/controls.py index c13960bc43..c30c0effa8 100644 --- a/contrib/python/prompt-toolkit/py3/prompt_toolkit/layout/controls.py +++ b/contrib/python/prompt-toolkit/py3/prompt_toolkit/layout/controls.py @@ -491,7 +491,7 @@ class _ProcessedLine(NamedTuple): class BufferControl(UIControl): """ - Control for visualising the content of a :class:`.Buffer`. + Control for visualizing the content of a :class:`.Buffer`. :param buffer: The :class:`.Buffer` object to be displayed. :param input_processors: A list of @@ -603,7 +603,7 @@ class BufferControl(UIControl): width can be done by calculating the longest line, but this would require applying all the processors to each line. This is unfeasible for a larger document, and doing it for small - documents only would result in inconsistent behaviour. + documents only would result in inconsistent behavior. """ return None diff --git a/contrib/python/prompt-toolkit/py3/prompt_toolkit/layout/menus.py b/contrib/python/prompt-toolkit/py3/prompt_toolkit/layout/menus.py index 1a21237a84..2c2ccb6436 100644 --- a/contrib/python/prompt-toolkit/py3/prompt_toolkit/layout/menus.py +++ b/contrib/python/prompt-toolkit/py3/prompt_toolkit/layout/menus.py @@ -679,7 +679,7 @@ class MultiColumnCompletionsMenu(HSplit): filter=full_filter & show_meta & any_completion_has_meta, ) - # Initialise split. + # Initialize split. super().__init__([completions_window, meta_window], z_index=z_index) diff --git a/contrib/python/prompt-toolkit/py3/prompt_toolkit/layout/processors.py b/contrib/python/prompt-toolkit/py3/prompt_toolkit/layout/processors.py index 90c017ade7..b7376115e4 100644 --- a/contrib/python/prompt-toolkit/py3/prompt_toolkit/layout/processors.py +++ b/contrib/python/prompt-toolkit/py3/prompt_toolkit/layout/processors.py @@ -288,7 +288,7 @@ class HighlightSelectionProcessor(Processor): if from_ == 0 and to == 0 and len(fragments) == 0: # When this is an empty line, insert a space in order to - # visualise the selection. + # visualize the selection. return Transformation([(selected_fragment, " ")]) else: for i in range(from_, to): diff --git a/contrib/python/prompt-toolkit/py3/prompt_toolkit/lexers/pygments.py b/contrib/python/prompt-toolkit/py3/prompt_toolkit/lexers/pygments.py index 16ed29a287..4721d730c8 100644 --- a/contrib/python/prompt-toolkit/py3/prompt_toolkit/lexers/pygments.py +++ b/contrib/python/prompt-toolkit/py3/prompt_toolkit/lexers/pygments.py @@ -31,7 +31,7 @@ __all__ = [ class SyntaxSync(metaclass=ABCMeta): """ - Syntax synchroniser. This is a tool that finds a start position for the + Syntax synchronizer. This is a tool that finds a start position for the lexer. This is especially important when editing big documents; we don't want to start the highlighting by running the lexer from the beginning of the file. That is very slow when editing. @@ -67,12 +67,12 @@ class RegexSync(SyntaxSync): Synchronize by starting at a line that matches the given regex pattern. """ - # Never go more than this amount of lines backwards for synchronisation. + # Never go more than this amount of lines backwards for synchronization. # That would be too CPU intensive. MAX_BACKWARDS = 500 # Start lexing at the start, if we are in the first 'n' lines and no - # synchronisation position was found. + # synchronization position was found. FROM_START_IF_NO_SYNC_POS_FOUND = 100 def __init__(self, pattern: str) -> None: @@ -88,13 +88,13 @@ class RegexSync(SyntaxSync): lines = document.lines # Scan upwards, until we find a point where we can start the syntax - # synchronisation. + # synchronization. for i in range(lineno, max(-1, lineno - self.MAX_BACKWARDS), -1): match = pattern.match(lines[i]) if match: return i, match.start() - # No synchronisation point found. If we aren't that far from the + # No synchronization point found. If we aren't that far from the # beginning, start at the very beginning, otherwise, just try to start # at the current line. if lineno < self.FROM_START_IF_NO_SYNC_POS_FOUND: @@ -228,7 +228,7 @@ class PygmentsLexer(Lexer): line_generators: dict[LineGenerator, int] = {} def get_syntax_sync() -> SyntaxSync: - "The Syntax synchronisation object that we currently use." + "The Syntax synchronization object that we currently use." if self.sync_from_start(): return SyncFromStart() else: @@ -271,7 +271,7 @@ class PygmentsLexer(Lexer): return generator # No generator found. Determine starting point for the syntax - # synchronisation first. + # synchronization first. # Go at least x lines back. (Make scrolling upwards more # efficient.) @@ -291,7 +291,7 @@ class PygmentsLexer(Lexer): generator = create_line_generator(row, column) # If the column is not 0, ignore the first line. (Which is - # incomplete. This happens when the synchronisation algorithm tells + # incomplete. This happens when the synchronization algorithm tells # us to start parsing in the middle of a line.) if column: next(generator) @@ -316,7 +316,7 @@ class PygmentsLexer(Lexer): # Remove the next item from the cache. # (It could happen that it's already there, because of # another generator that started filling these lines, - # but we want to synchronise these lines with the + # but we want to synchronize these lines with the # current lexer's state.) if num + 1 in cache: del cache[num + 1] diff --git a/contrib/python/prompt-toolkit/py3/prompt_toolkit/output/base.py b/contrib/python/prompt-toolkit/py3/prompt_toolkit/output/base.py index 6b06a50eed..3c38cec86e 100644 --- a/contrib/python/prompt-toolkit/py3/prompt_toolkit/output/base.py +++ b/contrib/python/prompt-toolkit/py3/prompt_toolkit/output/base.py @@ -66,7 +66,7 @@ class Output(metaclass=ABCMeta): @abstractmethod def erase_screen(self) -> None: """ - Erases the screen with the background colour and moves the cursor to + Erases the screen with the background color and moves the cursor to home. """ diff --git a/contrib/python/prompt-toolkit/py3/prompt_toolkit/output/vt100.py b/contrib/python/prompt-toolkit/py3/prompt_toolkit/output/vt100.py index 29743db21d..142deab077 100644 --- a/contrib/python/prompt-toolkit/py3/prompt_toolkit/output/vt100.py +++ b/contrib/python/prompt-toolkit/py3/prompt_toolkit/output/vt100.py @@ -529,7 +529,7 @@ class Vt100_Output(Output): def erase_screen(self) -> None: """ - Erases the screen with the background colour and moves the cursor to + Erases the screen with the background color and moves the cursor to home. """ self.write_raw("\x1b[2J") diff --git a/contrib/python/prompt-toolkit/py3/prompt_toolkit/patch_stdout.py b/contrib/python/prompt-toolkit/py3/prompt_toolkit/patch_stdout.py index 81a7ccbb44..528bec7ffe 100644 --- a/contrib/python/prompt-toolkit/py3/prompt_toolkit/patch_stdout.py +++ b/contrib/python/prompt-toolkit/py3/prompt_toolkit/patch_stdout.py @@ -43,7 +43,7 @@ def patch_stdout(raw: bool = False) -> Generator[None, None, None]: Writing to this proxy will make sure that the text appears above the prompt, and that it doesn't destroy the output from the renderer. If no - application is curring, the behaviour should be identical to writing to + application is curring, the behavior should be identical to writing to `sys.stdout` directly. Warning: If a new event loop is installed using `asyncio.set_event_loop()`, @@ -203,6 +203,13 @@ class StdoutProxy: """ def write_and_flush() -> None: + # Ensure that autowrap is enabled before calling `write`. + # XXX: On Windows, the `Windows10_Output` enables/disables VT + # terminal processing for every flush. It turns out that this + # causes autowrap to be reset (disabled) after each flush. So, + # we have to enable it again before writing text. + self._output.enable_autowrap() + if self.raw: self._output.write_raw(text) else: diff --git a/contrib/python/prompt-toolkit/py3/prompt_toolkit/renderer.py b/contrib/python/prompt-toolkit/py3/prompt_toolkit/renderer.py index 7a3b88a608..5ad1dd68d0 100644 --- a/contrib/python/prompt-toolkit/py3/prompt_toolkit/renderer.py +++ b/contrib/python/prompt-toolkit/py3/prompt_toolkit/renderer.py @@ -233,7 +233,7 @@ def _output_screen_diff( # output. That way, we're sure that the terminal scrolls up, even when the # lower lines of the canvas just contain whitespace. - # The most obvious reason that we actually want this behaviour is the avoid + # The most obvious reason that we actually want this behavior is the avoid # the artifact of the input scrolling when the completion menu is shown. # (If the scrolling is actually wanted, the layout can still be build in a # way to behave that way by setting a dynamic height.) diff --git a/contrib/python/prompt-toolkit/py3/tests/test_async_generator.py b/contrib/python/prompt-toolkit/py3/tests/test_async_generator.py index 4a01c0e3d6..8c95f8c087 100644 --- a/contrib/python/prompt-toolkit/py3/tests/test_async_generator.py +++ b/contrib/python/prompt-toolkit/py3/tests/test_async_generator.py @@ -12,7 +12,7 @@ def _sync_generator(): def test_generator_to_async_generator(): """ - Test conversion of sync to asycn generator. + Test conversion of sync to async generator. This should run the synchronous parts in a background thread. """ async_gen = generator_to_async_generator(_sync_generator) diff --git a/contrib/python/prompt-toolkit/py3/tests/test_formatted_text.py b/contrib/python/prompt-toolkit/py3/tests/test_formatted_text.py index 2d8e184ade..843aac1619 100644 --- a/contrib/python/prompt-toolkit/py3/tests/test_formatted_text.py +++ b/contrib/python/prompt-toolkit/py3/tests/test_formatted_text.py @@ -28,7 +28,7 @@ def test_basic_html(): ] # It's important that `to_formatted_text` returns a `FormattedText` - # instance. Otherwise, `print_formatted_text` won't recognise it and will + # instance. Otherwise, `print_formatted_text` won't recognize it and will # print a list literal instead. assert isinstance(to_formatted_text(html), FormattedText) diff --git a/contrib/python/prompt-toolkit/py3/tests/test_widgets.py b/contrib/python/prompt-toolkit/py3/tests/test_widgets.py index 1fc8ae4398..ee7745a2d0 100644 --- a/contrib/python/prompt-toolkit/py3/tests/test_widgets.py +++ b/contrib/python/prompt-toolkit/py3/tests/test_widgets.py @@ -10,7 +10,7 @@ def _to_text(button: Button) -> str: return fragment_list_to_text(control.text()) -def test_defaulf_button(): +def test_default_button(): button = Button("Exit") assert _to_text(button) == "< Exit >" diff --git a/contrib/python/prompt-toolkit/py3/ya.make b/contrib/python/prompt-toolkit/py3/ya.make index 74bf71f6b0..90f446832b 100644 --- a/contrib/python/prompt-toolkit/py3/ya.make +++ b/contrib/python/prompt-toolkit/py3/ya.make @@ -2,7 +2,7 @@ PY3_LIBRARY() -VERSION(3.0.41) +VERSION(3.0.43) LICENSE(BSD-3-Clause) diff --git a/contrib/python/pyasn1/py2/.dist-info/METADATA b/contrib/python/pyasn1/py2/.dist-info/METADATA index 530fe5bf7b..1a6727cecc 100644 --- a/contrib/python/pyasn1/py2/.dist-info/METADATA +++ b/contrib/python/pyasn1/py2/.dist-info/METADATA @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: pyasn1 -Version: 0.5.0 +Version: 0.5.1 Summary: Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208) Home-page: https://github.com/pyasn1/pyasn1 Author: Ilya Etingof @@ -32,6 +32,7 @@ Classifier: Programming Language :: Python :: 3.8 Classifier: Programming Language :: Python :: 3.9 Classifier: Programming Language :: Python :: 3.10 Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 Classifier: Programming Language :: Python :: Implementation :: CPython Classifier: Programming Language :: Python :: Implementation :: PyPy Classifier: Topic :: Communications diff --git a/contrib/python/pyasn1/py2/pyasn1/__init__.py b/contrib/python/pyasn1/py2/pyasn1/__init__.py index a979d291f2..73d47f3424 100644 --- a/contrib/python/pyasn1/py2/pyasn1/__init__.py +++ b/contrib/python/pyasn1/py2/pyasn1/__init__.py @@ -1,2 +1,2 @@ # https://www.python.org/dev/peps/pep-0396/ -__version__ = '0.5.0' +__version__ = '0.5.1' diff --git a/contrib/python/pyasn1/py2/pyasn1/codec/ber/decoder.py b/contrib/python/pyasn1/py2/pyasn1/codec/ber/decoder.py index 070733fd28..7cc863d1c7 100644 --- a/contrib/python/pyasn1/py2/pyasn1/codec/ber/decoder.py +++ b/contrib/python/pyasn1/py2/pyasn1/codec/ber/decoder.py @@ -4,7 +4,10 @@ # Copyright (c) 2005-2020, Ilya Etingof <etingof@gmail.com> # License: https://pyasn1.readthedocs.io/en/latest/license.html # +import io import os +import sys + from pyasn1 import debug from pyasn1 import error @@ -1762,7 +1765,14 @@ class SingleItemDecoder(object): if state is stDecodeValue: if not options.get('recursiveFlag', True) and not substrateFun: # deprecate this - substrateFun = lambda a, b, c: (a, b[:c]) + def substrateFun(asn1Object, _substrate, _length, _options): + """Legacy hack to keep the recursiveFlag=False option supported. + + The decode(..., substrateFun=userCallback) option was introduced in 0.1.4 as a generalization + of the old recursiveFlag=False option. Users should pass their callback instead of using + recursiveFlag. + """ + yield asn1Object original_position = substrate.tell() @@ -1783,9 +1793,13 @@ class SingleItemDecoder(object): yield value bytesRead = substrate.tell() - original_position - if bytesRead != length: + if not substrateFun and bytesRead != length: raise PyAsn1Error( "Read %s bytes instead of expected %s." % (bytesRead, length)) + elif substrateFun and bytesRead > length: + # custom substrateFun may be used for partial decoding, reading less is expected there + raise PyAsn1Error( + "Read %s bytes are more than expected %s." % (bytesRead, length)) if LOG: LOG('codec %s yields type %s, value:\n%s\n...' % ( @@ -1959,6 +1973,27 @@ class Decoder(object): may not be required. Most common reason for it to require is that ASN.1 structure is encoded in *IMPLICIT* tagging mode. + substrateFun: :py:class:`Union[ + Callable[[pyasn1.type.base.PyAsn1Item, bytes, int], + Tuple[pyasn1.type.base.PyAsn1Item, bytes]], + Callable[[pyasn1.type.base.PyAsn1Item, io.BytesIO, int, dict], + Generator[Union[pyasn1.type.base.PyAsn1Item, + pyasn1.error.SubstrateUnderrunError], + None, None]] + ]` + User callback meant to generalize special use cases like non-recursive or + partial decoding. A 3-arg non-streaming variant is supported for backwards + compatiblilty in addition to the newer 4-arg streaming variant. + The callback will receive the uninitialized object recovered from substrate + as 1st argument, the uninterpreted payload as 2nd argument, and the length + of the uninterpreted payload as 3rd argument. The streaming variant will + additionally receive the decode(..., **options) kwargs as 4th argument. + The non-streaming variant shall return an object that will be propagated + as decode() return value as 1st item, and the remainig payload for further + decode passes as 2nd item. + The streaming variant shall yield an object that will be propagated as + decode() return value, and leave the remaining payload in the stream. + Returns ------- : :py:class:`tuple` @@ -1997,6 +2032,31 @@ class Decoder(object): """ substrate = asSeekableStream(substrate) + if "substrateFun" in options: + origSubstrateFun = options["substrateFun"] + + def substrateFunWrapper(asn1Object, substrate, length, options=None): + """Support both 0.4 and 0.5 style APIs. + + substrateFun API has changed in 0.5 for use with streaming decoders. To stay backwards compatible, + we first try if we received a streaming user callback. If that fails,we assume we've received a + non-streaming v0.4 user callback and convert it for streaming on the fly + """ + try: + substrate_gen = origSubstrateFun(asn1Object, substrate, length, options) + except TypeError: + _type, _value, traceback = sys.exc_info() + if traceback.tb_next: + # Traceback depth > 1 means TypeError from inside user provided function + raise + # invariant maintained at Decoder.__call__ entry + assert isinstance(substrate, io.BytesIO) # nosec assert_used + substrate_gen = Decoder._callSubstrateFunV4asV5(origSubstrateFun, asn1Object, substrate, length) + for value in substrate_gen: + yield value + + options["substrateFun"] = substrateFunWrapper + streamingDecoder = cls.STREAMING_DECODER( substrate, asn1Spec, **options) @@ -2012,6 +2072,16 @@ class Decoder(object): return asn1Object, tail + @staticmethod + def _callSubstrateFunV4asV5(substrateFunV4, asn1Object, substrate, length): + substrate_bytes = substrate.read() + if length == -1: + length = len(substrate_bytes) + value, nextSubstrate = substrateFunV4(asn1Object, substrate_bytes, length) + nbytes = substrate.write(nextSubstrate) + substrate.truncate() + substrate.seek(-nbytes, os.SEEK_CUR) + yield value #: Turns BER octet stream into an ASN.1 object. #: diff --git a/contrib/python/pyasn1/py2/tests/codec/ber/test_decoder.py b/contrib/python/pyasn1/py2/tests/codec/ber/test_decoder.py index 9e238cd458..35d12d0536 100644 --- a/contrib/python/pyasn1/py2/tests/codec/ber/test_decoder.py +++ b/contrib/python/pyasn1/py2/tests/codec/ber/test_decoder.py @@ -141,12 +141,24 @@ class BitStringDecoderTestCase(BaseTestCase): substrateFun=lambda a, b, c, d: streaming.readFromStream(b, c) ) == (ints2octs((3, 2, 0, 169, 3, 2, 1, 138)), str2octs('')) + def testDefModeChunkedSubstV04(self): + assert decoder.decode( + ints2octs((35, 8, 3, 2, 0, 169, 3, 2, 1, 138)), + substrateFun=lambda a, b, c: (b, b[c:]) + ) == (ints2octs((3, 2, 0, 169, 3, 2, 1, 138)), str2octs('')) + def testIndefModeChunkedSubst(self): assert decoder.decode( ints2octs((35, 128, 3, 2, 0, 169, 3, 2, 1, 138, 0, 0)), substrateFun=lambda a, b, c, d: streaming.readFromStream(b, c) ) == (ints2octs((3, 2, 0, 169, 3, 2, 1, 138, 0, 0)), str2octs('')) + def testIndefModeChunkedSubstV04(self): + assert decoder.decode( + ints2octs((35, 128, 3, 2, 0, 169, 3, 2, 1, 138, 0, 0)), + substrateFun=lambda a, b, c: (b, b[c:]) + ) == (ints2octs((3, 2, 0, 169, 3, 2, 1, 138, 0, 0)), str2octs('')) + def testTypeChecking(self): try: decoder.decode(ints2octs((35, 4, 2, 2, 42, 42))) @@ -185,6 +197,13 @@ class OctetStringDecoderTestCase(BaseTestCase): substrateFun=lambda a, b, c, d: streaming.readFromStream(b, c) ) == (ints2octs((4, 4, 81, 117, 105, 99, 4, 4, 107, 32, 98, 114, 4, 4, 111, 119, 110, 32, 4, 3, 102, 111, 120)), str2octs('')) + def testDefModeChunkedSubstV04(self): + assert decoder.decode( + ints2octs( + (36, 23, 4, 4, 81, 117, 105, 99, 4, 4, 107, 32, 98, 114, 4, 4, 111, 119, 110, 32, 4, 3, 102, 111, 120)), + substrateFun=lambda a, b, c: (b, b[c:]) + ) == (ints2octs((4, 4, 81, 117, 105, 99, 4, 4, 107, 32, 98, 114, 4, 4, 111, 119, 110, 32, 4, 3, 102, 111, 120)), str2octs('')) + def testIndefModeChunkedSubst(self): assert decoder.decode( ints2octs((36, 128, 4, 4, 81, 117, 105, 99, 4, 4, 107, 32, 98, 114, 4, 4, 111, 119, 110, 32, 4, 3, 102, 111, @@ -193,6 +212,14 @@ class OctetStringDecoderTestCase(BaseTestCase): ) == (ints2octs( (4, 4, 81, 117, 105, 99, 4, 4, 107, 32, 98, 114, 4, 4, 111, 119, 110, 32, 4, 3, 102, 111, 120, 0, 0)), str2octs('')) + def testIndefModeChunkedSubstV04(self): + assert decoder.decode( + ints2octs((36, 128, 4, 4, 81, 117, 105, 99, 4, 4, 107, 32, 98, 114, 4, 4, 111, 119, 110, 32, 4, 3, 102, 111, + 120, 0, 0)), + substrateFun=lambda a, b, c: (b, b[c:]) + ) == (ints2octs( + (4, 4, 81, 117, 105, 99, 4, 4, 107, 32, 98, 114, 4, 4, 111, 119, 110, 32, 4, 3, 102, 111, 120, 0, 0)), str2octs('')) + class ExpTaggedOctetStringDecoderTestCase(BaseTestCase): def setUp(self): @@ -245,6 +272,12 @@ class ExpTaggedOctetStringDecoderTestCase(BaseTestCase): substrateFun=lambda a, b, c, d: streaming.readFromStream(b, c) ) == (ints2octs((4, 15, 81, 117, 105, 99, 107, 32, 98, 114, 111, 119, 110, 32, 102, 111, 120)), str2octs('')) + def testDefModeSubstV04(self): + assert decoder.decode( + ints2octs((101, 17, 4, 15, 81, 117, 105, 99, 107, 32, 98, 114, 111, 119, 110, 32, 102, 111, 120)), + substrateFun=lambda a, b, c: (b, b[c:]) + ) == (ints2octs((4, 15, 81, 117, 105, 99, 107, 32, 98, 114, 111, 119, 110, 32, 102, 111, 120)), str2octs('')) + def testIndefModeSubst(self): assert decoder.decode( ints2octs(( @@ -254,6 +287,15 @@ class ExpTaggedOctetStringDecoderTestCase(BaseTestCase): ) == (ints2octs( (36, 128, 4, 15, 81, 117, 105, 99, 107, 32, 98, 114, 111, 119, 110, 32, 102, 111, 120, 0, 0, 0, 0)), str2octs('')) + def testIndefModeSubstV04(self): + assert decoder.decode( + ints2octs(( + 101, 128, 36, 128, 4, 15, 81, 117, 105, 99, 107, 32, 98, 114, 111, 119, 110, 32, 102, 111, 120, 0, + 0, 0, 0)), + substrateFun=lambda a, b, c: (b, b[c:]) + ) == (ints2octs( + (36, 128, 4, 15, 81, 117, 105, 99, 107, 32, 98, 114, 111, 119, 110, 32, 102, 111, 120, 0, 0, 0, 0)), str2octs('')) + class NullDecoderTestCase(BaseTestCase): def testNull(self): @@ -680,6 +722,12 @@ class SequenceDecoderTestCase(BaseTestCase): substrateFun=lambda a, b, c, d: streaming.readFromStream(b, c) ) == (ints2octs((5, 0, 4, 11, 113, 117, 105, 99, 107, 32, 98, 114, 111, 119, 110, 2, 1, 1)), str2octs('')) + def testWithOptionalAndDefaultedDefModeSubstV04(self): + assert decoder.decode( + ints2octs((48, 18, 5, 0, 4, 11, 113, 117, 105, 99, 107, 32, 98, 114, 111, 119, 110, 2, 1, 1)), + substrateFun=lambda a, b, c: (b, b[c:]) + ) == (ints2octs((5, 0, 4, 11, 113, 117, 105, 99, 107, 32, 98, 114, 111, 119, 110, 2, 1, 1)), str2octs('')) + def testWithOptionalAndDefaultedIndefModeSubst(self): assert decoder.decode( ints2octs((48, 128, 5, 0, 36, 128, 4, 11, 113, 117, 105, 99, 107, 32, 98, 114, 111, 119, 110, 0, 0, 2, 1, 1, 0, 0)), @@ -687,6 +735,13 @@ class SequenceDecoderTestCase(BaseTestCase): ) == (ints2octs( (5, 0, 36, 128, 4, 11, 113, 117, 105, 99, 107, 32, 98, 114, 111, 119, 110, 0, 0, 2, 1, 1, 0, 0)), str2octs('')) + def testWithOptionalAndDefaultedIndefModeSubstV04(self): + assert decoder.decode( + ints2octs((48, 128, 5, 0, 36, 128, 4, 11, 113, 117, 105, 99, 107, 32, 98, 114, 111, 119, 110, 0, 0, 2, 1, 1, 0, 0)), + substrateFun=lambda a, b, c: (b, b[c:]) + ) == (ints2octs( + (5, 0, 36, 128, 4, 11, 113, 117, 105, 99, 107, 32, 98, 114, 111, 119, 110, 0, 0, 2, 1, 1, 0, 0)), str2octs('')) + def testTagFormat(self): try: decoder.decode( @@ -1166,6 +1221,12 @@ class SetDecoderTestCase(BaseTestCase): substrateFun=lambda a, b, c, d: streaming.readFromStream(b, c) ) == (ints2octs((5, 0, 4, 11, 113, 117, 105, 99, 107, 32, 98, 114, 111, 119, 110, 2, 1, 1)), str2octs('')) + def testWithOptionalAndDefaultedDefModeSubstV04(self): + assert decoder.decode( + ints2octs((49, 18, 5, 0, 4, 11, 113, 117, 105, 99, 107, 32, 98, 114, 111, 119, 110, 2, 1, 1)), + substrateFun=lambda a, b, c: (b, b[c:]) + ) == (ints2octs((5, 0, 4, 11, 113, 117, 105, 99, 107, 32, 98, 114, 111, 119, 110, 2, 1, 1)), str2octs('')) + def testWithOptionalAndDefaultedIndefModeSubst(self): assert decoder.decode( ints2octs((49, 128, 5, 0, 36, 128, 4, 11, 113, 117, 105, 99, 107, 32, 98, 114, 111, 119, 110, 0, 0, 2, 1, 1, 0, 0)), @@ -1173,6 +1234,13 @@ class SetDecoderTestCase(BaseTestCase): ) == (ints2octs( (5, 0, 36, 128, 4, 11, 113, 117, 105, 99, 107, 32, 98, 114, 111, 119, 110, 0, 0, 2, 1, 1, 0, 0)), str2octs('')) + def testWithOptionalAndDefaultedIndefModeSubstV04(self): + assert decoder.decode( + ints2octs((49, 128, 5, 0, 36, 128, 4, 11, 113, 117, 105, 99, 107, 32, 98, 114, 111, 119, 110, 0, 0, 2, 1, 1, 0, 0)), + substrateFun=lambda a, b, c: (b, b[c:]) + ) == (ints2octs( + (5, 0, 36, 128, 4, 11, 113, 117, 105, 99, 107, 32, 98, 114, 111, 119, 110, 0, 0, 2, 1, 1, 0, 0)), str2octs('')) + def testTagFormat(self): try: decoder.decode( @@ -1498,6 +1566,13 @@ class AnyDecoderTestCase(BaseTestCase): substrateFun=lambda a, b, c, d: streaming.readFromStream(b, c) ) == (ints2octs((4, 3, 102, 111, 120)), str2octs('')) + def testByUntaggedSubstV04(self): + assert decoder.decode( + ints2octs((4, 3, 102, 111, 120)), + asn1Spec=self.s, + substrateFun=lambda a, b, c: (b, b[c:]) + ) == (ints2octs((4, 3, 102, 111, 120)), str2octs('')) + def testTaggedExSubst(self): assert decoder.decode( ints2octs((164, 5, 4, 3, 102, 111, 120)), @@ -1505,6 +1580,13 @@ class AnyDecoderTestCase(BaseTestCase): substrateFun=lambda a, b, c, d: streaming.readFromStream(b, c) ) == (ints2octs((164, 5, 4, 3, 102, 111, 120)), str2octs('')) + def testTaggedExSubstV04(self): + assert decoder.decode( + ints2octs((164, 5, 4, 3, 102, 111, 120)), + asn1Spec=self.s, + substrateFun=lambda a, b, c: (b, b[c:]) + ) == (ints2octs((164, 5, 4, 3, 102, 111, 120)), str2octs('')) + class EndOfOctetsTestCase(BaseTestCase): def testUnexpectedEoo(self): @@ -1841,6 +1923,50 @@ class CompressedFilesTestCase(BaseTestCase): os.remove(path) +class NonStreamingCompatibilityTestCase(BaseTestCase): + def setUp(self): + from pyasn1 import debug + BaseTestCase.setUp(self) + debug.setLogger(None) # undo logger setup from BaseTestCase to work around unrelated issue + + def testPartialDecodeWithCustomSubstrateFun(self): + snmp_req_substrate = ints2octs(( + 0x30, 0x22, 0x02, 0x01, 0x01, 0x04, 0x06, 0x70, 0x75, 0x62, 0x6c, 0x69, 0x63, 0xa0, 0x15, 0x02, 0x04, 0x69, + 0x30, 0xdb, 0xeb, 0x02, 0x01, 0x00, 0x02, 0x01, 0x00, 0x30, 0x07, 0x30, 0x05, 0x06, 0x01, 0x01, 0x05, 0x00)) + seq, next_substrate = decoder.decode( + snmp_req_substrate, asn1Spec=univ.Sequence(), + recursiveFlag=False, substrateFun=lambda a, b, c: (a, b[:c]) + ) + assert seq.isSameTypeWith(univ.Sequence) + assert next_substrate == snmp_req_substrate[2:] + version, next_substrate = decoder.decode( + next_substrate, asn1Spec=univ.Integer(), recursiveFlag=False, + substrateFun=lambda a, b, c: (a, b[:c]) + ) + assert version == 1 + + def testPartialDecodeWithDefaultSubstrateFun(self): + substrate = ints2octs(( + 0x04, 0x0e, 0x30, 0x0c, 0x06, 0x0a, 0x2b, 0x06, 0x01, 0x04, 0x01, 0x82, 0x37, 0x3c, 0x03, 0x02 + )) + result, rest = decoder.decode(substrate, recursiveFlag=False) + assert result.isSameTypeWith(univ.OctetString) + assert rest == substrate[2:] + + def testPropagateUserException(self): + substrate = io.BytesIO(ints2octs((0x04, 0x00))) + + def userSubstrateFun(_asn1Object, _substrate, _length, _options): + raise TypeError("error inside user function") + + try: + decoder.decode(substrate, asn1Spec=univ.OctetString, substrateFun=userSubstrateFun) + except TypeError as exc: + assert str(exc) == "error inside user function" + else: + raise AssertionError("decode() must not hide TypeError from inside user provided callback") + + suite = unittest.TestLoader().loadTestsFromModule(sys.modules[__name__]) if __name__ == '__main__': diff --git a/contrib/python/pyasn1/py2/ya.make b/contrib/python/pyasn1/py2/ya.make index cd11432c2f..78da352060 100644 --- a/contrib/python/pyasn1/py2/ya.make +++ b/contrib/python/pyasn1/py2/ya.make @@ -2,7 +2,7 @@ PY2_LIBRARY() -VERSION(0.5.0) +VERSION(0.5.1) LICENSE(BSD-3-Clause) diff --git a/contrib/python/ruamel.yaml.clib/py2/.dist-info/METADATA b/contrib/python/ruamel.yaml.clib/py2/.dist-info/METADATA index 7b8ca2a332..6c952ac9d6 100644 --- a/contrib/python/ruamel.yaml.clib/py2/.dist-info/METADATA +++ b/contrib/python/ruamel.yaml.clib/py2/.dist-info/METADATA @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: ruamel.yaml.clib -Version: 0.2.7 +Version: 0.2.2 Summary: C version of reader, parser and emitter for ruamel.yaml derived from libyaml Home-page: https://sourceforge.net/p/ruamel-yaml-clib/code/ci/default/tree Author: Anthon van der Neut @@ -13,17 +13,14 @@ Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: MIT License Classifier: Operating System :: OS Independent Classifier: Programming Language :: Python -Classifier: Programming Language :: Python :: 3.10 -Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: 3.6 Classifier: Programming Language :: Python :: 3.7 Classifier: Programming Language :: Python :: 3.8 -Classifier: Programming Language :: Python :: 3.9 Classifier: Programming Language :: Python :: Implementation :: CPython Classifier: Topic :: Software Development :: Libraries :: Python Modules -Requires-Python: >=3.6 Description-Content-Type: text/x-rst -License-File: LICENSE ruamel.yaml.clib @@ -31,8 +28,8 @@ ruamel.yaml.clib ``ruamel.yaml.clib`` is the C based reader/scanner and emitter for ruamel.yaml -:version: 0.2.7 -:updated: 2022-10-19 +:version: 0.2.2 +:updated: 2020-09-04 :documentation: http://yaml.readthedocs.io :repository: https://sourceforge.net/projects/ruamel-yaml-clib/ :pypi: https://pypi.org/project/ruamel.yaml.clib/ @@ -49,7 +46,6 @@ directory) and the Python code for ruamel.yaml under /usr/lib/pythonX.Y. .. image:: https://sourceforge.net/p/ruamel-yaml-clib/code/ci/default/tree/_doc/_static/license.svg?format=raw :target: https://opensource.org/licenses/MIT - -This release in loving memory of Johanna Clasina van der Neut-Bandel [1922-10-19 - 2015-11-21] + diff --git a/contrib/python/ruamel.yaml.clib/py2/LICENSE b/contrib/python/ruamel.yaml.clib/py2/LICENSE index 786ebed939..630d5ecbac 100644 --- a/contrib/python/ruamel.yaml.clib/py2/LICENSE +++ b/contrib/python/ruamel.yaml.clib/py2/LICENSE @@ -1,6 +1,6 @@ The MIT License (MIT) - Copyright (c) 2019-2022 Anthon van der Neut, Ruamel bvba + Copyright (c) 2019-2020 Anthon van der Neut, Ruamel bvba Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/contrib/python/ruamel.yaml.clib/py2/README.rst b/contrib/python/ruamel.yaml.clib/py2/README.rst index c41aca60e6..c8fc5e463c 100644 --- a/contrib/python/ruamel.yaml.clib/py2/README.rst +++ b/contrib/python/ruamel.yaml.clib/py2/README.rst @@ -4,8 +4,8 @@ ruamel.yaml.clib ``ruamel.yaml.clib`` is the C based reader/scanner and emitter for ruamel.yaml -:version: 0.2.6 -:updated: 2021-07-04 +:version: 0.2.2 +:updated: 2020-09-04 :documentation: http://yaml.readthedocs.io :repository: https://sourceforge.net/projects/ruamel-yaml-clib/ :pypi: https://pypi.org/project/ruamel.yaml.clib/ diff --git a/contrib/python/ruamel.yaml.clib/py2/ya.make b/contrib/python/ruamel.yaml.clib/py2/ya.make index 0f53126bd7..b4bf40fbce 100644 --- a/contrib/python/ruamel.yaml.clib/py2/ya.make +++ b/contrib/python/ruamel.yaml.clib/py2/ya.make @@ -1,6 +1,8 @@ +# Generated by devtools/yamaker (pypi). + PY2_LIBRARY() -VERSION(0.2.7) +VERSION(0.2.2) LICENSE(MIT) diff --git a/contrib/python/ruamel.yaml.clib/py3/.dist-info/METADATA b/contrib/python/ruamel.yaml.clib/py3/.dist-info/METADATA index 7b8ca2a332..01aea7bcfa 100644 --- a/contrib/python/ruamel.yaml.clib/py3/.dist-info/METADATA +++ b/contrib/python/ruamel.yaml.clib/py3/.dist-info/METADATA @@ -1,13 +1,12 @@ Metadata-Version: 2.1 Name: ruamel.yaml.clib -Version: 0.2.7 +Version: 0.2.8 Summary: C version of reader, parser and emitter for ruamel.yaml derived from libyaml Home-page: https://sourceforge.net/p/ruamel-yaml-clib/code/ci/default/tree Author: Anthon van der Neut Author-email: a.van.der.neut@ruamel.eu License: MIT Keywords: yaml 1.2 parser c-library config -Platform: UNKNOWN Classifier: Development Status :: 4 - Beta Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: MIT License @@ -51,5 +50,3 @@ directory) and the Python code for ruamel.yaml under /usr/lib/pythonX.Y. :target: https://opensource.org/licenses/MIT This release in loving memory of Johanna Clasina van der Neut-Bandel [1922-10-19 - 2015-11-21] - - diff --git a/contrib/python/ruamel.yaml.clib/py3/LICENSE b/contrib/python/ruamel.yaml.clib/py3/LICENSE index 786ebed939..2383b7c6c4 100644 --- a/contrib/python/ruamel.yaml.clib/py3/LICENSE +++ b/contrib/python/ruamel.yaml.clib/py3/LICENSE @@ -1,6 +1,6 @@ The MIT License (MIT) - Copyright (c) 2019-2022 Anthon van der Neut, Ruamel bvba + Copyright (c) 2019-2023 Anthon van der Neut, Ruamel bvba Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/contrib/python/ruamel.yaml.clib/py3/README.rst b/contrib/python/ruamel.yaml.clib/py3/README.rst index c41aca60e6..0e83f9e423 100644 --- a/contrib/python/ruamel.yaml.clib/py3/README.rst +++ b/contrib/python/ruamel.yaml.clib/py3/README.rst @@ -4,8 +4,8 @@ ruamel.yaml.clib ``ruamel.yaml.clib`` is the C based reader/scanner and emitter for ruamel.yaml -:version: 0.2.6 -:updated: 2021-07-04 +:version: 0.2.7 +:updated: 2022-10-19 :documentation: http://yaml.readthedocs.io :repository: https://sourceforge.net/projects/ruamel-yaml-clib/ :pypi: https://pypi.org/project/ruamel.yaml.clib/ @@ -23,3 +23,4 @@ directory) and the Python code for ruamel.yaml under /usr/lib/pythonX.Y. .. image:: https://sourceforge.net/p/ruamel-yaml-clib/code/ci/default/tree/_doc/_static/license.svg?format=raw :target: https://opensource.org/licenses/MIT +This release in loving memory of Johanna Clasina van der Neut-Bandel [1922-10-19 - 2015-11-21] diff --git a/contrib/python/ruamel.yaml.clib/py3/ya.make b/contrib/python/ruamel.yaml.clib/py3/ya.make index af7562fd1c..1c874c3925 100644 --- a/contrib/python/ruamel.yaml.clib/py3/ya.make +++ b/contrib/python/ruamel.yaml.clib/py3/ya.make @@ -1,6 +1,8 @@ +# Generated by devtools/yamaker (pypi). + PY3_LIBRARY() -VERSION(0.2.7) +VERSION(0.2.8) LICENSE(MIT) diff --git a/contrib/python/typing-extensions/py3/.dist-info/METADATA b/contrib/python/typing-extensions/py3/.dist-info/METADATA index dc7c951a0f..863e977c2f 100644 --- a/contrib/python/typing-extensions/py3/.dist-info/METADATA +++ b/contrib/python/typing-extensions/py3/.dist-info/METADATA @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: typing_extensions -Version: 4.8.0 +Version: 4.9.0 Summary: Backported and Experimental Type Hints for Python 3.8+ Keywords: annotations,backport,checker,checking,function,hinting,hints,type,typechecking,typehinting,typehints,typing Author-email: "Guido van Rossum, Jukka Lehtosalo, Łukasz Langa, Michael Lee" <levkivskyi@gmail.com> diff --git a/contrib/python/typing-extensions/py3/typing_extensions.py b/contrib/python/typing-extensions/py3/typing_extensions.py index c96bf90fec..1666e96b7e 100644 --- a/contrib/python/typing-extensions/py3/typing_extensions.py +++ b/contrib/python/typing-extensions/py3/typing_extensions.py @@ -86,6 +86,7 @@ __all__ = [ 'TYPE_CHECKING', 'Never', 'NoReturn', + 'ReadOnly', 'Required', 'NotRequired', @@ -473,6 +474,7 @@ _EXCLUDED_ATTRS = { "__orig_bases__", "__module__", "_MutableMapping__marker", "__doc__", "__subclasshook__", "__orig_class__", "__init__", "__new__", "__protocol_attrs__", "__callable_proto_members_only__", + "__match_args__", } if sys.version_info >= (3, 9): @@ -503,9 +505,9 @@ def _caller(depth=2): return None -# The performance of runtime-checkable protocols is significantly improved on Python 3.12, -# so we backport the 3.12 version of Protocol to Python <=3.11 -if sys.version_info >= (3, 12): +# `__match_args__` attribute was removed from protocol members in 3.13, +# we want to backport this change to older Python versions. +if sys.version_info >= (3, 13): Protocol = typing.Protocol else: def _allow_reckless_class_checks(depth=3): @@ -569,8 +571,13 @@ else: not cls.__callable_proto_members_only__ and cls.__dict__.get("__subclasshook__") is _proto_hook ): + non_method_attrs = sorted( + attr for attr in cls.__protocol_attrs__ + if not callable(getattr(cls, attr, None)) + ) raise TypeError( - "Protocols with non-method members don't support issubclass()" + "Protocols with non-method members don't support issubclass()." + f" Non-method members: {str(non_method_attrs)[1:-1]}." ) if not getattr(cls, '_is_runtime_protocol', False): raise TypeError( @@ -767,7 +774,7 @@ def _ensure_subclassable(mro_entries): return inner -if sys.version_info >= (3, 13): +if hasattr(typing, "ReadOnly"): # The standard library TypedDict in Python 3.8 does not store runtime information # about which (if any) keys are optional. See https://bugs.python.org/issue38834 # The standard library TypedDict in Python 3.9.0/1 does not honour the "total" @@ -778,6 +785,7 @@ if sys.version_info >= (3, 13): # Aaaand on 3.12 we add __orig_bases__ to TypedDict # to enable better runtime introspection. # On 3.13 we deprecate some odd ways of creating TypedDicts. + # PEP 705 proposes adding the ReadOnly[] qualifier. TypedDict = typing.TypedDict _TypedDictMeta = typing._TypedDictMeta is_typeddict = typing.is_typeddict @@ -785,8 +793,29 @@ else: # 3.10.0 and later _TAKES_MODULE = "module" in inspect.signature(typing._type_check).parameters + def _get_typeddict_qualifiers(annotation_type): + while True: + annotation_origin = get_origin(annotation_type) + if annotation_origin is Annotated: + annotation_args = get_args(annotation_type) + if annotation_args: + annotation_type = annotation_args[0] + else: + break + elif annotation_origin is Required: + yield Required + annotation_type, = get_args(annotation_type) + elif annotation_origin is NotRequired: + yield NotRequired + annotation_type, = get_args(annotation_type) + elif annotation_origin is ReadOnly: + yield ReadOnly + annotation_type, = get_args(annotation_type) + else: + break + class _TypedDictMeta(type): - def __new__(cls, name, bases, ns, total=True): + def __new__(cls, name, bases, ns, *, total=True): """Create new typed dict class object. This method is called when TypedDict is subclassed, @@ -829,33 +858,46 @@ else: } required_keys = set() optional_keys = set() + readonly_keys = set() + mutable_keys = set() for base in bases: - annotations.update(base.__dict__.get('__annotations__', {})) - required_keys.update(base.__dict__.get('__required_keys__', ())) - optional_keys.update(base.__dict__.get('__optional_keys__', ())) + base_dict = base.__dict__ + + annotations.update(base_dict.get('__annotations__', {})) + required_keys.update(base_dict.get('__required_keys__', ())) + optional_keys.update(base_dict.get('__optional_keys__', ())) + readonly_keys.update(base_dict.get('__readonly_keys__', ())) + mutable_keys.update(base_dict.get('__mutable_keys__', ())) annotations.update(own_annotations) for annotation_key, annotation_type in own_annotations.items(): - annotation_origin = get_origin(annotation_type) - if annotation_origin is Annotated: - annotation_args = get_args(annotation_type) - if annotation_args: - annotation_type = annotation_args[0] - annotation_origin = get_origin(annotation_type) - - if annotation_origin is Required: + qualifiers = set(_get_typeddict_qualifiers(annotation_type)) + + if Required in qualifiers: required_keys.add(annotation_key) - elif annotation_origin is NotRequired: + elif NotRequired in qualifiers: optional_keys.add(annotation_key) elif total: required_keys.add(annotation_key) else: optional_keys.add(annotation_key) + if ReadOnly in qualifiers: + if annotation_key in mutable_keys: + raise TypeError( + f"Cannot override mutable key {annotation_key!r}" + " with read-only key" + ) + readonly_keys.add(annotation_key) + else: + mutable_keys.add(annotation_key) + readonly_keys.discard(annotation_key) tp_dict.__annotations__ = annotations tp_dict.__required_keys__ = frozenset(required_keys) tp_dict.__optional_keys__ = frozenset(optional_keys) + tp_dict.__readonly_keys__ = frozenset(readonly_keys) + tp_dict.__mutable_keys__ = frozenset(mutable_keys) if not hasattr(tp_dict, '__total__'): tp_dict.__total__ = total return tp_dict @@ -936,6 +978,8 @@ else: raise TypeError("TypedDict takes either a dict or keyword arguments," " but not both") if kwargs: + if sys.version_info >= (3, 13): + raise TypeError("TypedDict takes no keyword arguments") warnings.warn( "The kwargs-based syntax for TypedDict definitions is deprecated " "in Python 3.11, will be removed in Python 3.13, and may not be " @@ -1924,6 +1968,53 @@ else: # 3.8 """) +if hasattr(typing, 'ReadOnly'): + ReadOnly = typing.ReadOnly +elif sys.version_info[:2] >= (3, 9): # 3.9-3.12 + @_ExtensionsSpecialForm + def ReadOnly(self, parameters): + """A special typing construct to mark an item of a TypedDict as read-only. + + For example: + + class Movie(TypedDict): + title: ReadOnly[str] + year: int + + def mutate_movie(m: Movie) -> None: + m["year"] = 1992 # allowed + m["title"] = "The Matrix" # typechecker error + + There is no runtime checking for this property. + """ + item = typing._type_check(parameters, f'{self._name} accepts only a single type.') + return typing._GenericAlias(self, (item,)) + +else: # 3.8 + class _ReadOnlyForm(_ExtensionsSpecialForm, _root=True): + def __getitem__(self, parameters): + item = typing._type_check(parameters, + f'{self._name} accepts only a single type.') + return typing._GenericAlias(self, (item,)) + + ReadOnly = _ReadOnlyForm( + 'ReadOnly', + doc="""A special typing construct to mark a key of a TypedDict as read-only. + + For example: + + class Movie(TypedDict): + title: ReadOnly[str] + year: int + + def mutate_movie(m: Movie) -> None: + m["year"] = 1992 # allowed + m["title"] = "The Matrix" # typechecker error + + There is no runtime checking for this propery. + """) + + _UNPACK_DOC = """\ Type unpack operator. @@ -2251,7 +2342,7 @@ else: # <=3.11 Usage: class Base: - def method(self) -> None: ... + def method(self) -> None: pass class Child(Base): @@ -2281,20 +2372,17 @@ else: # <=3.11 return arg -if hasattr(typing, "deprecated"): - deprecated = typing.deprecated +if hasattr(warnings, "deprecated"): + deprecated = warnings.deprecated else: _T = typing.TypeVar("_T") - def deprecated( - msg: str, - /, - *, - category: typing.Optional[typing.Type[Warning]] = DeprecationWarning, - stacklevel: int = 1, - ) -> typing.Callable[[_T], _T]: + class deprecated: """Indicate that a class, function or overload is deprecated. + When this decorator is applied to an object, the type checker + will generate a diagnostic on usage of the deprecated object. + Usage: @deprecated("Use B instead") @@ -2311,49 +2399,100 @@ else: @overload def g(x: str) -> int: ... - When this decorator is applied to an object, the type checker - will generate a diagnostic on usage of the deprecated object. - - The warning specified by ``category`` will be emitted on use - of deprecated objects. For functions, that happens on calls; - for classes, on instantiation. If the ``category`` is ``None``, - no warning is emitted. The ``stacklevel`` determines where the + The warning specified by *category* will be emitted at runtime + on use of deprecated objects. For functions, that happens on calls; + for classes, on instantiation and on creation of subclasses. + If the *category* is ``None``, no warning is emitted at runtime. + The *stacklevel* determines where the warning is emitted. If it is ``1`` (the default), the warning is emitted at the direct caller of the deprecated object; if it is higher, it is emitted further up the stack. + Static type checker behavior is not affected by the *category* + and *stacklevel* arguments. - The decorator sets the ``__deprecated__`` - attribute on the decorated object to the deprecation message - passed to the decorator. If applied to an overload, the decorator + The deprecation message passed to the decorator is saved in the + ``__deprecated__`` attribute on the decorated object. + If applied to an overload, the decorator must be after the ``@overload`` decorator for the attribute to exist on the overload as returned by ``get_overloads()``. See PEP 702 for details. """ - def decorator(arg: _T, /) -> _T: + def __init__( + self, + message: str, + /, + *, + category: typing.Optional[typing.Type[Warning]] = DeprecationWarning, + stacklevel: int = 1, + ) -> None: + if not isinstance(message, str): + raise TypeError( + "Expected an object of type str for 'message', not " + f"{type(message).__name__!r}" + ) + self.message = message + self.category = category + self.stacklevel = stacklevel + + def __call__(self, arg: _T, /) -> _T: + # Make sure the inner functions created below don't + # retain a reference to self. + msg = self.message + category = self.category + stacklevel = self.stacklevel if category is None: arg.__deprecated__ = msg return arg elif isinstance(arg, type): + import functools + from types import MethodType + original_new = arg.__new__ - has_init = arg.__init__ is not object.__init__ @functools.wraps(original_new) def __new__(cls, *args, **kwargs): - warnings.warn(msg, category=category, stacklevel=stacklevel + 1) + if cls is arg: + warnings.warn(msg, category=category, stacklevel=stacklevel + 1) if original_new is not object.__new__: return original_new(cls, *args, **kwargs) # Mirrors a similar check in object.__new__. - elif not has_init and (args or kwargs): + elif cls.__init__ is object.__init__ and (args or kwargs): raise TypeError(f"{cls.__name__}() takes no arguments") else: return original_new(cls) arg.__new__ = staticmethod(__new__) + + original_init_subclass = arg.__init_subclass__ + # We need slightly different behavior if __init_subclass__ + # is a bound method (likely if it was implemented in Python) + if isinstance(original_init_subclass, MethodType): + original_init_subclass = original_init_subclass.__func__ + + @functools.wraps(original_init_subclass) + def __init_subclass__(*args, **kwargs): + warnings.warn(msg, category=category, stacklevel=stacklevel + 1) + return original_init_subclass(*args, **kwargs) + + arg.__init_subclass__ = classmethod(__init_subclass__) + # Or otherwise, which likely means it's a builtin such as + # object's implementation of __init_subclass__. + else: + @functools.wraps(original_init_subclass) + def __init_subclass__(*args, **kwargs): + warnings.warn(msg, category=category, stacklevel=stacklevel + 1) + return original_init_subclass(*args, **kwargs) + + arg.__init_subclass__ = __init_subclass__ + arg.__deprecated__ = __new__.__deprecated__ = msg + __init_subclass__.__deprecated__ = msg return arg elif callable(arg): + import functools + @functools.wraps(arg) def wrapper(*args, **kwargs): warnings.warn(msg, category=category, stacklevel=stacklevel + 1) @@ -2367,8 +2506,6 @@ else: f"a class or callable, not {arg!r}" ) - return decorator - # We have to do some monkey patching to deal with the dual nature of # Unpack/TypeVarTuple: @@ -2437,11 +2574,35 @@ else: class_getitem = typing.Generic.__class_getitem__.__func__ nm_tpl.__class_getitem__ = classmethod(class_getitem) # update from user namespace without overriding special namedtuple attributes - for key in ns: + for key, val in ns.items(): if key in _prohibited_namedtuple_fields: raise AttributeError("Cannot overwrite NamedTuple attribute " + key) - elif key not in _special_namedtuple_fields and key not in nm_tpl._fields: - setattr(nm_tpl, key, ns[key]) + elif key not in _special_namedtuple_fields: + if key not in nm_tpl._fields: + setattr(nm_tpl, key, ns[key]) + try: + set_name = type(val).__set_name__ + except AttributeError: + pass + else: + try: + set_name(val, nm_tpl, key) + except BaseException as e: + msg = ( + f"Error calling __set_name__ on {type(val).__name__!r} " + f"instance {key!r} in {typename!r}" + ) + # BaseException.add_note() existed on py311, + # but the __set_name__ machinery didn't start + # using add_note() until py312. + # Making sure exceptions are raised in the same way + # as in "normal" classes seems most important here. + if sys.version_info >= (3, 12): + e.add_note(msg) + raise + else: + raise RuntimeError(msg) from e + if typing.Generic in bases: nm_tpl.__init_subclass__() return nm_tpl @@ -2600,7 +2761,7 @@ else: num = UserId(5) + 1 # type: int """ - def __call__(self, obj): + def __call__(self, obj, /): return obj def __init__(self, name, tp): diff --git a/contrib/python/typing-extensions/py3/ya.make b/contrib/python/typing-extensions/py3/ya.make index d6837aed7b..1e65722a16 100644 --- a/contrib/python/typing-extensions/py3/ya.make +++ b/contrib/python/typing-extensions/py3/ya.make @@ -2,7 +2,7 @@ PY3_LIBRARY() -VERSION(4.8.0) +VERSION(4.9.0) LICENSE(PSF-2.0) diff --git a/contrib/python/wcwidth/py2/.dist-info/METADATA b/contrib/python/wcwidth/py2/.dist-info/METADATA index ce98bebb24..2539b52f73 100644 --- a/contrib/python/wcwidth/py2/.dist-info/METADATA +++ b/contrib/python/wcwidth/py2/.dist-info/METADATA @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: wcwidth -Version: 0.2.6 +Version: 0.2.12 Summary: Measures the displayed width of unicode strings in a terminal Home-page: https://github.com/jquast/wcwidth Author: Jeff Quast @@ -18,12 +18,16 @@ Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: 3.6 Classifier: Programming Language :: Python :: 3.7 Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 Classifier: Topic :: Software Development :: Libraries Classifier: Topic :: Software Development :: Localization Classifier: Topic :: Software Development :: Internationalization Classifier: Topic :: Terminals License-File: LICENSE -Requires-Dist: backports.functools-lru-cache (>=1.2.1) ; python_version < "3.2" +Requires-Dist: backports.functools-lru-cache >=1.2.1 ; python_version < "3.2" |pypi_downloads| |codecov| |license| @@ -122,7 +126,7 @@ Briefly, return values of function ``wcwidth()`` are: Function ``wcswidth()`` simply returns the sum of all values for each character along a string, or ``-1`` when it occurs anywhere along a string. -Full API Documentation at http://wcwidth.readthedocs.org +Full API Documentation at https://wcwidth.readthedocs.org ========== Developing @@ -130,32 +134,70 @@ Developing Install wcwidth in editable mode:: - pip install -e. + pip install -e . Execute unit tests using tox_:: - tox + tox -e py27,py35,py36,py37,py38,py39,py310,py311,py312 + +Updating Unicode Version +------------------------ Regenerate python code tables from latest Unicode Specification data files:: tox -e update +The script is located at ``bin/update-tables.py``, requires Python 3.9 or +later. It is recommended but not necessary to run this script with the newest +Python, because the newest Python has the latest ``unicodedata`` for generating +comments. + +Building Documentation +---------------------- + +This project is using `sphinx`_ 4.5 to build documentation:: + + tox -e sphinx + +The output will be in ``docs/_build/html/``. + +Updating Requirements +--------------------- + +This project is using `pip-tools`_ to manage requirements. + +To upgrade requirements for updating unicode version, run:: + + tox -e update_requirements_update + +To upgrade requirements for testing, run:: + + tox -e update_requirements37,update_requirements39 + +To upgrade requirements for building documentation, run:: + + tox -e update_requirements_docs + +Utilities +--------- + Supplementary tools for browsing and testing terminals for wide unicode characters are found in the `bin/`_ of this project's source code. Just ensure -to first ``pip install -erequirements-develop.txt`` from this projects main +to first ``pip install -r requirements-develop.txt`` from this projects main folder. For example, an interactive browser for testing:: python ./bin/wcwidth-browser.py +==== Uses ----- +==== This library is used in: - `jquast/blessed`_: a thin, practical wrapper around terminal capabilities in Python. -- `jonathanslenders/python-prompt-toolkit`_: a Library for building powerful +- `prompt-toolkit/python-prompt-toolkit`_: a Library for building powerful interactive command lines in Python. - `dbcli/pgcli`_: Postgres CLI with autocompletion and syntax highlighting. @@ -168,7 +210,7 @@ This library is used in: - `astanin/python-tabulate`_: Pretty-print tabular data in Python, a library and a command-line utility. -- `LuminosoInsight/python-ftfy`_: Fixes mojibake and other glitches in Unicode +- `rspeer/python-ftfy`_: Fixes mojibake and other glitches in Unicode text. - `nbedos/termtosvg`_: Terminal recorder that renders sessions as SVG @@ -177,23 +219,57 @@ This library is used in: - `peterbrittain/asciimatics`_: Package to help people create full-screen text UIs. +- `python-cmd2/cmd2`_: A tool for building interactive command line apps + +- `stratis-storage/stratis-cli`_: CLI for the Stratis project + +- `ihabunek/toot`_: A Mastodon CLI/TUI client + +- `saulpw/visidata`_: Terminal spreadsheet multitool for discovering and + arranging data + +=============== Other Languages ---------------- +=============== - `timoxley/wcwidth`_: JavaScript - `janlelis/unicode-display_width`_: Ruby - `alecrabbit/php-wcwidth`_: PHP - `Text::CharWidth`_: Perl -- `bluebear94/Terminal-WCWidth`: Perl 6 +- `bluebear94/Terminal-WCWidth`_: Perl 6 - `mattn/go-runewidth`_: Go -- `emugel/wcwidth`_: Haxe -- `aperezdc/lua-wcwidth`: Lua -- `joachimschmidt557/zig-wcwidth`: Zig -- `fumiyas/wcwidth-cjk`: `LD_PRELOAD` override -- `joshuarubin/wcwidth9`: Unicode version 9 in C +- `grepsuzette/wcwidth`_: Haxe +- `aperezdc/lua-wcwidth`_: Lua +- `joachimschmidt557/zig-wcwidth`_: Zig +- `fumiyas/wcwidth-cjk`_: `LD_PRELOAD` override +- `joshuarubin/wcwidth9`_: Unicode version 9 in C +======= History -------- +======= +0.2.12 *2023-11-21* + * re-release to remove .pyi file misplaced in wheel files `Issue #101`. + +0.2.11 *2023-11-20* + * Include tests files in the source distribution (`PR #98`_, `PR #100`_). + +0.2.10 *2023-11-13* + * **Bugfix** accounting of some kinds of emoji sequences using U+FE0F + Variation Selector 16 (`PR #97`_). + * **Updated** `Specification <Specification_from_pypi_>`_. + +0.2.9 *2023-10-30* + * **Bugfix** zero-width characters used in Emoji ZWJ sequences, Balinese, + Jamo, Devanagari, Tamil, Kannada and others (`PR #91`_). + * **Updated** to include `Specification <Specification_from_pypi_>`_ of + character measurements. + +0.2.8 *2023-09-30* + * Include requirements files in the source distribution (`PR #82`_). + +0.2.7 *2023-09-28* + * **Updated** tables to include Unicode Specification 15.1.0. + * Include ``bin``, ``docs``, and ``tox.ini`` in the source distribution 0.2.6 *2023-01-14* * **Updated** tables to include Unicode Specification 14.0.0 and 15.0.0. @@ -259,7 +335,7 @@ History This code was originally derived directly from C code of the same name, whose latest version is available at -http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c:: +https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c:: * Markus Kuhn -- 2007-05-26 (Unicode 5.0) * @@ -267,11 +343,12 @@ http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c:: * for any purpose and without fee is hereby granted. The author * disclaims all warranties with regard to this software. -.. _`tox`: https://testrun.org/tox/latest/install.html +.. _`Specification_from_pypi`: https://wcwidth.readthedocs.io/en/latest/specs.html +.. _`tox`: https://tox.wiki/en/latest/ .. _`prospector`: https://github.com/landscapeio/prospector .. _`combining`: https://en.wikipedia.org/wiki/Combining_character .. _`bin/`: https://github.com/jquast/wcwidth/tree/master/bin -.. _`bin/wcwidth-browser.py`: https://github.com/jquast/wcwidth/tree/master/bin/wcwidth-browser.py +.. _`bin/wcwidth-browser.py`: https://github.com/jquast/wcwidth/blob/master/bin/wcwidth-browser.py .. _`Thomas Ballinger`: https://github.com/thomasballinger .. _`Leta Montopoli`: https://github.com/lmontopo .. _`Philip Craig`: https://github.com/philipc @@ -282,34 +359,48 @@ http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c:: .. _`PR #18`: https://github.com/jquast/wcwidth/pull/18 .. _`PR #30`: https://github.com/jquast/wcwidth/pull/30 .. _`PR #35`: https://github.com/jquast/wcwidth/pull/35 +.. _`PR #82`: https://github.com/jquast/wcwidth/pull/82 +.. _`PR #91`: https://github.com/jquast/wcwidth/pull/91 +.. _`PR #97`: https://github.com/jquast/wcwidth/pull/97 +.. _`PR #98`: https://github.com/jquast/wcwidth/pull/98 +.. _`PR #100`: https://github.com/jquast/wcwidth/pull/100 +.. _`Issue #101`: https://github.com/jquast/wcwidth/issues/101 .. _`jquast/blessed`: https://github.com/jquast/blessed .. _`selectel/pyte`: https://github.com/selectel/pyte .. _`thomasballinger/curtsies`: https://github.com/thomasballinger/curtsies .. _`dbcli/pgcli`: https://github.com/dbcli/pgcli -.. _`jonathanslenders/python-prompt-toolkit`: https://github.com/jonathanslenders/python-prompt-toolkit +.. _`prompt-toolkit/python-prompt-toolkit`: https://github.com/prompt-toolkit/python-prompt-toolkit .. _`timoxley/wcwidth`: https://github.com/timoxley/wcwidth -.. _`wcwidth(3)`: http://man7.org/linux/man-pages/man3/wcwidth.3.html -.. _`wcswidth(3)`: http://man7.org/linux/man-pages/man3/wcswidth.3.html +.. _`wcwidth(3)`: https://man7.org/linux/man-pages/man3/wcwidth.3.html +.. _`wcswidth(3)`: https://man7.org/linux/man-pages/man3/wcswidth.3.html .. _`astanin/python-tabulate`: https://github.com/astanin/python-tabulate .. _`janlelis/unicode-display_width`: https://github.com/janlelis/unicode-display_width -.. _`LuminosoInsight/python-ftfy`: https://github.com/LuminosoInsight/python-ftfy +.. _`rspeer/python-ftfy`: https://github.com/rspeer/python-ftfy .. _`alecrabbit/php-wcwidth`: https://github.com/alecrabbit/php-wcwidth .. _`Text::CharWidth`: https://metacpan.org/pod/Text::CharWidth .. _`bluebear94/Terminal-WCWidth`: https://github.com/bluebear94/Terminal-WCWidth .. _`mattn/go-runewidth`: https://github.com/mattn/go-runewidth -.. _`emugel/wcwidth`: https://github.com/emugel/wcwidth +.. _`grepsuzette/wcwidth`: https://github.com/grepsuzette/wcwidth .. _`jquast/ucs-detect`: https://github.com/jquast/ucs-detect .. _`Avram Lubkin`: https://github.com/avylove .. _`nbedos/termtosvg`: https://github.com/nbedos/termtosvg .. _`peterbrittain/asciimatics`: https://github.com/peterbrittain/asciimatics .. _`aperezdc/lua-wcwidth`: https://github.com/aperezdc/lua-wcwidth +.. _`joachimschmidt557/zig-wcwidth`: https://github.com/joachimschmidt557/zig-wcwidth .. _`fumiyas/wcwidth-cjk`: https://github.com/fumiyas/wcwidth-cjk +.. _`joshuarubin/wcwidth9`: https://github.com/joshuarubin/wcwidth9 +.. _`python-cmd2/cmd2`: https://github.com/python-cmd2/cmd2 +.. _`stratis-storage/stratis-cli`: https://github.com/stratis-storage/stratis-cli +.. _`ihabunek/toot`: https://github.com/ihabunek/toot +.. _`saulpw/visidata`: https://github.com/saulpw/visidata +.. _`pip-tools`: https://pip-tools.readthedocs.io/ +.. _`sphinx`: https://www.sphinx-doc.org/ .. |pypi_downloads| image:: https://img.shields.io/pypi/dm/wcwidth.svg?logo=pypi :alt: Downloads :target: https://pypi.org/project/wcwidth/ .. |codecov| image:: https://codecov.io/gh/jquast/wcwidth/branch/master/graph/badge.svg :alt: codecov.io Code Coverage - :target: https://codecov.io/gh/jquast/wcwidth/ -.. |license| image:: https://img.shields.io/github/license/jquast/wcwidth.svg - :target: https://pypi.python.org/pypi/wcwidth/ + :target: https://app.codecov.io/gh/jquast/wcwidth/ +.. |license| image:: https://img.shields.io/pypi/l/wcwidth.svg + :target: https://pypi.org/project/wcwidth/ :alt: MIT License diff --git a/contrib/python/wcwidth/py2/README.rst b/contrib/python/wcwidth/py2/README.rst index 813e244ba2..a0dd44cb83 100644 --- a/contrib/python/wcwidth/py2/README.rst +++ b/contrib/python/wcwidth/py2/README.rst @@ -95,7 +95,7 @@ Briefly, return values of function ``wcwidth()`` are: Function ``wcswidth()`` simply returns the sum of all values for each character along a string, or ``-1`` when it occurs anywhere along a string. -Full API Documentation at http://wcwidth.readthedocs.org +Full API Documentation at https://wcwidth.readthedocs.org ========== Developing @@ -103,32 +103,70 @@ Developing Install wcwidth in editable mode:: - pip install -e. + pip install -e . Execute unit tests using tox_:: - tox + tox -e py27,py35,py36,py37,py38,py39,py310,py311,py312 + +Updating Unicode Version +------------------------ Regenerate python code tables from latest Unicode Specification data files:: tox -e update +The script is located at ``bin/update-tables.py``, requires Python 3.9 or +later. It is recommended but not necessary to run this script with the newest +Python, because the newest Python has the latest ``unicodedata`` for generating +comments. + +Building Documentation +---------------------- + +This project is using `sphinx`_ 4.5 to build documentation:: + + tox -e sphinx + +The output will be in ``docs/_build/html/``. + +Updating Requirements +--------------------- + +This project is using `pip-tools`_ to manage requirements. + +To upgrade requirements for updating unicode version, run:: + + tox -e update_requirements_update + +To upgrade requirements for testing, run:: + + tox -e update_requirements37,update_requirements39 + +To upgrade requirements for building documentation, run:: + + tox -e update_requirements_docs + +Utilities +--------- + Supplementary tools for browsing and testing terminals for wide unicode characters are found in the `bin/`_ of this project's source code. Just ensure -to first ``pip install -erequirements-develop.txt`` from this projects main +to first ``pip install -r requirements-develop.txt`` from this projects main folder. For example, an interactive browser for testing:: python ./bin/wcwidth-browser.py +==== Uses ----- +==== This library is used in: - `jquast/blessed`_: a thin, practical wrapper around terminal capabilities in Python. -- `jonathanslenders/python-prompt-toolkit`_: a Library for building powerful +- `prompt-toolkit/python-prompt-toolkit`_: a Library for building powerful interactive command lines in Python. - `dbcli/pgcli`_: Postgres CLI with autocompletion and syntax highlighting. @@ -141,7 +179,7 @@ This library is used in: - `astanin/python-tabulate`_: Pretty-print tabular data in Python, a library and a command-line utility. -- `LuminosoInsight/python-ftfy`_: Fixes mojibake and other glitches in Unicode +- `rspeer/python-ftfy`_: Fixes mojibake and other glitches in Unicode text. - `nbedos/termtosvg`_: Terminal recorder that renders sessions as SVG @@ -150,23 +188,57 @@ This library is used in: - `peterbrittain/asciimatics`_: Package to help people create full-screen text UIs. +- `python-cmd2/cmd2`_: A tool for building interactive command line apps + +- `stratis-storage/stratis-cli`_: CLI for the Stratis project + +- `ihabunek/toot`_: A Mastodon CLI/TUI client + +- `saulpw/visidata`_: Terminal spreadsheet multitool for discovering and + arranging data + +=============== Other Languages ---------------- +=============== - `timoxley/wcwidth`_: JavaScript - `janlelis/unicode-display_width`_: Ruby - `alecrabbit/php-wcwidth`_: PHP - `Text::CharWidth`_: Perl -- `bluebear94/Terminal-WCWidth`: Perl 6 +- `bluebear94/Terminal-WCWidth`_: Perl 6 - `mattn/go-runewidth`_: Go -- `emugel/wcwidth`_: Haxe -- `aperezdc/lua-wcwidth`: Lua -- `joachimschmidt557/zig-wcwidth`: Zig -- `fumiyas/wcwidth-cjk`: `LD_PRELOAD` override -- `joshuarubin/wcwidth9`: Unicode version 9 in C +- `grepsuzette/wcwidth`_: Haxe +- `aperezdc/lua-wcwidth`_: Lua +- `joachimschmidt557/zig-wcwidth`_: Zig +- `fumiyas/wcwidth-cjk`_: `LD_PRELOAD` override +- `joshuarubin/wcwidth9`_: Unicode version 9 in C +======= History -------- +======= +0.2.12 *2023-11-21* + * re-release to remove .pyi file misplaced in wheel files `Issue #101`. + +0.2.11 *2023-11-20* + * Include tests files in the source distribution (`PR #98`_, `PR #100`_). + +0.2.10 *2023-11-13* + * **Bugfix** accounting of some kinds of emoji sequences using U+FE0F + Variation Selector 16 (`PR #97`_). + * **Updated** `Specification <Specification_from_pypi_>`_. + +0.2.9 *2023-10-30* + * **Bugfix** zero-width characters used in Emoji ZWJ sequences, Balinese, + Jamo, Devanagari, Tamil, Kannada and others (`PR #91`_). + * **Updated** to include `Specification <Specification_from_pypi_>`_ of + character measurements. + +0.2.8 *2023-09-30* + * Include requirements files in the source distribution (`PR #82`_). + +0.2.7 *2023-09-28* + * **Updated** tables to include Unicode Specification 15.1.0. + * Include ``bin``, ``docs``, and ``tox.ini`` in the source distribution 0.2.6 *2023-01-14* * **Updated** tables to include Unicode Specification 14.0.0 and 15.0.0. @@ -232,7 +304,7 @@ History This code was originally derived directly from C code of the same name, whose latest version is available at -http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c:: +https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c:: * Markus Kuhn -- 2007-05-26 (Unicode 5.0) * @@ -240,11 +312,12 @@ http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c:: * for any purpose and without fee is hereby granted. The author * disclaims all warranties with regard to this software. -.. _`tox`: https://testrun.org/tox/latest/install.html +.. _`Specification_from_pypi`: https://wcwidth.readthedocs.io/en/latest/specs.html +.. _`tox`: https://tox.wiki/en/latest/ .. _`prospector`: https://github.com/landscapeio/prospector .. _`combining`: https://en.wikipedia.org/wiki/Combining_character .. _`bin/`: https://github.com/jquast/wcwidth/tree/master/bin -.. _`bin/wcwidth-browser.py`: https://github.com/jquast/wcwidth/tree/master/bin/wcwidth-browser.py +.. _`bin/wcwidth-browser.py`: https://github.com/jquast/wcwidth/blob/master/bin/wcwidth-browser.py .. _`Thomas Ballinger`: https://github.com/thomasballinger .. _`Leta Montopoli`: https://github.com/lmontopo .. _`Philip Craig`: https://github.com/philipc @@ -255,34 +328,48 @@ http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c:: .. _`PR #18`: https://github.com/jquast/wcwidth/pull/18 .. _`PR #30`: https://github.com/jquast/wcwidth/pull/30 .. _`PR #35`: https://github.com/jquast/wcwidth/pull/35 +.. _`PR #82`: https://github.com/jquast/wcwidth/pull/82 +.. _`PR #91`: https://github.com/jquast/wcwidth/pull/91 +.. _`PR #97`: https://github.com/jquast/wcwidth/pull/97 +.. _`PR #98`: https://github.com/jquast/wcwidth/pull/98 +.. _`PR #100`: https://github.com/jquast/wcwidth/pull/100 +.. _`Issue #101`: https://github.com/jquast/wcwidth/issues/101 .. _`jquast/blessed`: https://github.com/jquast/blessed .. _`selectel/pyte`: https://github.com/selectel/pyte .. _`thomasballinger/curtsies`: https://github.com/thomasballinger/curtsies .. _`dbcli/pgcli`: https://github.com/dbcli/pgcli -.. _`jonathanslenders/python-prompt-toolkit`: https://github.com/jonathanslenders/python-prompt-toolkit +.. _`prompt-toolkit/python-prompt-toolkit`: https://github.com/prompt-toolkit/python-prompt-toolkit .. _`timoxley/wcwidth`: https://github.com/timoxley/wcwidth -.. _`wcwidth(3)`: http://man7.org/linux/man-pages/man3/wcwidth.3.html -.. _`wcswidth(3)`: http://man7.org/linux/man-pages/man3/wcswidth.3.html +.. _`wcwidth(3)`: https://man7.org/linux/man-pages/man3/wcwidth.3.html +.. _`wcswidth(3)`: https://man7.org/linux/man-pages/man3/wcswidth.3.html .. _`astanin/python-tabulate`: https://github.com/astanin/python-tabulate .. _`janlelis/unicode-display_width`: https://github.com/janlelis/unicode-display_width -.. _`LuminosoInsight/python-ftfy`: https://github.com/LuminosoInsight/python-ftfy +.. _`rspeer/python-ftfy`: https://github.com/rspeer/python-ftfy .. _`alecrabbit/php-wcwidth`: https://github.com/alecrabbit/php-wcwidth .. _`Text::CharWidth`: https://metacpan.org/pod/Text::CharWidth .. _`bluebear94/Terminal-WCWidth`: https://github.com/bluebear94/Terminal-WCWidth .. _`mattn/go-runewidth`: https://github.com/mattn/go-runewidth -.. _`emugel/wcwidth`: https://github.com/emugel/wcwidth +.. _`grepsuzette/wcwidth`: https://github.com/grepsuzette/wcwidth .. _`jquast/ucs-detect`: https://github.com/jquast/ucs-detect .. _`Avram Lubkin`: https://github.com/avylove .. _`nbedos/termtosvg`: https://github.com/nbedos/termtosvg .. _`peterbrittain/asciimatics`: https://github.com/peterbrittain/asciimatics .. _`aperezdc/lua-wcwidth`: https://github.com/aperezdc/lua-wcwidth +.. _`joachimschmidt557/zig-wcwidth`: https://github.com/joachimschmidt557/zig-wcwidth .. _`fumiyas/wcwidth-cjk`: https://github.com/fumiyas/wcwidth-cjk +.. _`joshuarubin/wcwidth9`: https://github.com/joshuarubin/wcwidth9 +.. _`python-cmd2/cmd2`: https://github.com/python-cmd2/cmd2 +.. _`stratis-storage/stratis-cli`: https://github.com/stratis-storage/stratis-cli +.. _`ihabunek/toot`: https://github.com/ihabunek/toot +.. _`saulpw/visidata`: https://github.com/saulpw/visidata +.. _`pip-tools`: https://pip-tools.readthedocs.io/ +.. _`sphinx`: https://www.sphinx-doc.org/ .. |pypi_downloads| image:: https://img.shields.io/pypi/dm/wcwidth.svg?logo=pypi :alt: Downloads :target: https://pypi.org/project/wcwidth/ .. |codecov| image:: https://codecov.io/gh/jquast/wcwidth/branch/master/graph/badge.svg :alt: codecov.io Code Coverage - :target: https://codecov.io/gh/jquast/wcwidth/ -.. |license| image:: https://img.shields.io/github/license/jquast/wcwidth.svg - :target: https://pypi.python.org/pypi/wcwidth/ + :target: https://app.codecov.io/gh/jquast/wcwidth/ +.. |license| image:: https://img.shields.io/pypi/l/wcwidth.svg + :target: https://pypi.org/project/wcwidth/ :alt: MIT License diff --git a/contrib/python/wcwidth/py2/tests/emoji-variation-sequences.txt b/contrib/python/wcwidth/py2/tests/emoji-variation-sequences.txt new file mode 100644 index 0000000000..d8a3c9f431 --- /dev/null +++ b/contrib/python/wcwidth/py2/tests/emoji-variation-sequences.txt @@ -0,0 +1,757 @@ +# emoji-variation-sequences.txt +# Date: 2023-02-01, 02:22:54 GMT +# © 2023 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use, see https://www.unicode.org/terms_of_use.html +# +# Emoji Variation Sequences for UTS #51 +# Used with Emoji Version 15.1 and subsequent minor revisions (if any) +# +# For documentation and usage, see https://www.unicode.org/reports/tr51 +# +0023 FE0E ; text style; # (1.1) NUMBER SIGN +0023 FE0F ; emoji style; # (1.1) NUMBER SIGN +002A FE0E ; text style; # (1.1) ASTERISK +002A FE0F ; emoji style; # (1.1) ASTERISK +0030 FE0E ; text style; # (1.1) DIGIT ZERO +0030 FE0F ; emoji style; # (1.1) DIGIT ZERO +0031 FE0E ; text style; # (1.1) DIGIT ONE +0031 FE0F ; emoji style; # (1.1) DIGIT ONE +0032 FE0E ; text style; # (1.1) DIGIT TWO +0032 FE0F ; emoji style; # (1.1) DIGIT TWO +0033 FE0E ; text style; # (1.1) DIGIT THREE +0033 FE0F ; emoji style; # (1.1) DIGIT THREE +0034 FE0E ; text style; # (1.1) DIGIT FOUR +0034 FE0F ; emoji style; # (1.1) DIGIT FOUR +0035 FE0E ; text style; # (1.1) DIGIT FIVE +0035 FE0F ; emoji style; # (1.1) DIGIT FIVE +0036 FE0E ; text style; # (1.1) DIGIT SIX +0036 FE0F ; emoji style; # (1.1) DIGIT SIX +0037 FE0E ; text style; # (1.1) DIGIT SEVEN +0037 FE0F ; emoji style; # (1.1) DIGIT SEVEN +0038 FE0E ; text style; # (1.1) DIGIT EIGHT +0038 FE0F ; emoji style; # (1.1) DIGIT EIGHT +0039 FE0E ; text style; # (1.1) DIGIT NINE +0039 FE0F ; emoji style; # (1.1) DIGIT NINE +00A9 FE0E ; text style; # (1.1) COPYRIGHT SIGN +00A9 FE0F ; emoji style; # (1.1) COPYRIGHT SIGN +00AE FE0E ; text style; # (1.1) REGISTERED SIGN +00AE FE0F ; emoji style; # (1.1) REGISTERED SIGN +203C FE0E ; text style; # (1.1) DOUBLE EXCLAMATION MARK +203C FE0F ; emoji style; # (1.1) DOUBLE EXCLAMATION MARK +2049 FE0E ; text style; # (3.0) EXCLAMATION QUESTION MARK +2049 FE0F ; emoji style; # (3.0) EXCLAMATION QUESTION MARK +2122 FE0E ; text style; # (1.1) TRADE MARK SIGN +2122 FE0F ; emoji style; # (1.1) TRADE MARK SIGN +2139 FE0E ; text style; # (3.0) INFORMATION SOURCE +2139 FE0F ; emoji style; # (3.0) INFORMATION SOURCE +2194 FE0E ; text style; # (1.1) LEFT RIGHT ARROW +2194 FE0F ; emoji style; # (1.1) LEFT RIGHT ARROW +2195 FE0E ; text style; # (1.1) UP DOWN ARROW +2195 FE0F ; emoji style; # (1.1) UP DOWN ARROW +2196 FE0E ; text style; # (1.1) NORTH WEST ARROW +2196 FE0F ; emoji style; # (1.1) NORTH WEST ARROW +2197 FE0E ; text style; # (1.1) NORTH EAST ARROW +2197 FE0F ; emoji style; # (1.1) NORTH EAST ARROW +2198 FE0E ; text style; # (1.1) SOUTH EAST ARROW +2198 FE0F ; emoji style; # (1.1) SOUTH EAST ARROW +2199 FE0E ; text style; # (1.1) SOUTH WEST ARROW +2199 FE0F ; emoji style; # (1.1) SOUTH WEST ARROW +21A9 FE0E ; text style; # (1.1) LEFTWARDS ARROW WITH HOOK +21A9 FE0F ; emoji style; # (1.1) LEFTWARDS ARROW WITH HOOK +21AA FE0E ; text style; # (1.1) RIGHTWARDS ARROW WITH HOOK +21AA FE0F ; emoji style; # (1.1) RIGHTWARDS ARROW WITH HOOK +231A FE0E ; text style; # (1.1) WATCH +231A FE0F ; emoji style; # (1.1) WATCH +231B FE0E ; text style; # (1.1) HOURGLASS +231B FE0F ; emoji style; # (1.1) HOURGLASS +2328 FE0E ; text style; # (1.1) KEYBOARD +2328 FE0F ; emoji style; # (1.1) KEYBOARD +23CF FE0E ; text style; # (4.0) EJECT SYMBOL +23CF FE0F ; emoji style; # (4.0) EJECT SYMBOL +23E9 FE0E ; text style; # (6.0) BLACK RIGHT-POINTING DOUBLE TRIANGLE +23E9 FE0F ; emoji style; # (6.0) BLACK RIGHT-POINTING DOUBLE TRIANGLE +23EA FE0E ; text style; # (6.0) BLACK LEFT-POINTING DOUBLE TRIANGLE +23EA FE0F ; emoji style; # (6.0) BLACK LEFT-POINTING DOUBLE TRIANGLE +23EB FE0E ; text style; # (6.0) BLACK UP-POINTING DOUBLE TRIANGLE +23EB FE0F ; emoji style; # (6.0) BLACK UP-POINTING DOUBLE TRIANGLE +23EC FE0E ; text style; # (6.0) BLACK DOWN-POINTING DOUBLE TRIANGLE +23EC FE0F ; emoji style; # (6.0) BLACK DOWN-POINTING DOUBLE TRIANGLE +23ED FE0E ; text style; # (6.0) BLACK RIGHT-POINTING DOUBLE TRIANGLE WITH VERTICAL BAR +23ED FE0F ; emoji style; # (6.0) BLACK RIGHT-POINTING DOUBLE TRIANGLE WITH VERTICAL BAR +23EE FE0E ; text style; # (6.0) BLACK LEFT-POINTING DOUBLE TRIANGLE WITH VERTICAL BAR +23EE FE0F ; emoji style; # (6.0) BLACK LEFT-POINTING DOUBLE TRIANGLE WITH VERTICAL BAR +23EF FE0E ; text style; # (6.0) BLACK RIGHT-POINTING TRIANGLE WITH DOUBLE VERTICAL BAR +23EF FE0F ; emoji style; # (6.0) BLACK RIGHT-POINTING TRIANGLE WITH DOUBLE VERTICAL BAR +23F0 FE0E ; text style; # (6.0) ALARM CLOCK +23F0 FE0F ; emoji style; # (6.0) ALARM CLOCK +23F1 FE0E ; text style; # (6.0) STOPWATCH +23F1 FE0F ; emoji style; # (6.0) STOPWATCH +23F2 FE0E ; text style; # (6.0) TIMER CLOCK +23F2 FE0F ; emoji style; # (6.0) TIMER CLOCK +23F3 FE0E ; text style; # (6.0) HOURGLASS WITH FLOWING SAND +23F3 FE0F ; emoji style; # (6.0) HOURGLASS WITH FLOWING SAND +23F8 FE0E ; text style; # (7.0) DOUBLE VERTICAL BAR +23F8 FE0F ; emoji style; # (7.0) DOUBLE VERTICAL BAR +23F9 FE0E ; text style; # (7.0) BLACK SQUARE FOR STOP +23F9 FE0F ; emoji style; # (7.0) BLACK SQUARE FOR STOP +23FA FE0E ; text style; # (7.0) BLACK CIRCLE FOR RECORD +23FA FE0F ; emoji style; # (7.0) BLACK CIRCLE FOR RECORD +24C2 FE0E ; text style; # (1.1) CIRCLED LATIN CAPITAL LETTER M +24C2 FE0F ; emoji style; # (1.1) CIRCLED LATIN CAPITAL LETTER M +25AA FE0E ; text style; # (1.1) BLACK SMALL SQUARE +25AA FE0F ; emoji style; # (1.1) BLACK SMALL SQUARE +25AB FE0E ; text style; # (1.1) WHITE SMALL SQUARE +25AB FE0F ; emoji style; # (1.1) WHITE SMALL SQUARE +25B6 FE0E ; text style; # (1.1) BLACK RIGHT-POINTING TRIANGLE +25B6 FE0F ; emoji style; # (1.1) BLACK RIGHT-POINTING TRIANGLE +25C0 FE0E ; text style; # (1.1) BLACK LEFT-POINTING TRIANGLE +25C0 FE0F ; emoji style; # (1.1) BLACK LEFT-POINTING TRIANGLE +25FB FE0E ; text style; # (3.2) WHITE MEDIUM SQUARE +25FB FE0F ; emoji style; # (3.2) WHITE MEDIUM SQUARE +25FC FE0E ; text style; # (3.2) BLACK MEDIUM SQUARE +25FC FE0F ; emoji style; # (3.2) BLACK MEDIUM SQUARE +25FD FE0E ; text style; # (3.2) WHITE MEDIUM SMALL SQUARE +25FD FE0F ; emoji style; # (3.2) WHITE MEDIUM SMALL SQUARE +25FE FE0E ; text style; # (3.2) BLACK MEDIUM SMALL SQUARE +25FE FE0F ; emoji style; # (3.2) BLACK MEDIUM SMALL SQUARE +2600 FE0E ; text style; # (1.1) BLACK SUN WITH RAYS +2600 FE0F ; emoji style; # (1.1) BLACK SUN WITH RAYS +2601 FE0E ; text style; # (1.1) CLOUD +2601 FE0F ; emoji style; # (1.1) CLOUD +2602 FE0E ; text style; # (1.1) UMBRELLA +2602 FE0F ; emoji style; # (1.1) UMBRELLA +2603 FE0E ; text style; # (1.1) SNOWMAN +2603 FE0F ; emoji style; # (1.1) SNOWMAN +2604 FE0E ; text style; # (1.1) COMET +2604 FE0F ; emoji style; # (1.1) COMET +260E FE0E ; text style; # (1.1) BLACK TELEPHONE +260E FE0F ; emoji style; # (1.1) BLACK TELEPHONE +2611 FE0E ; text style; # (1.1) BALLOT BOX WITH CHECK +2611 FE0F ; emoji style; # (1.1) BALLOT BOX WITH CHECK +2614 FE0E ; text style; # (4.0) UMBRELLA WITH RAIN DROPS +2614 FE0F ; emoji style; # (4.0) UMBRELLA WITH RAIN DROPS +2615 FE0E ; text style; # (4.0) HOT BEVERAGE +2615 FE0F ; emoji style; # (4.0) HOT BEVERAGE +2618 FE0E ; text style; # (4.1) SHAMROCK +2618 FE0F ; emoji style; # (4.1) SHAMROCK +261D FE0E ; text style; # (1.1) WHITE UP POINTING INDEX +261D FE0F ; emoji style; # (1.1) WHITE UP POINTING INDEX +2620 FE0E ; text style; # (1.1) SKULL AND CROSSBONES +2620 FE0F ; emoji style; # (1.1) SKULL AND CROSSBONES +2622 FE0E ; text style; # (1.1) RADIOACTIVE SIGN +2622 FE0F ; emoji style; # (1.1) RADIOACTIVE SIGN +2623 FE0E ; text style; # (1.1) BIOHAZARD SIGN +2623 FE0F ; emoji style; # (1.1) BIOHAZARD SIGN +2626 FE0E ; text style; # (1.1) ORTHODOX CROSS +2626 FE0F ; emoji style; # (1.1) ORTHODOX CROSS +262A FE0E ; text style; # (1.1) STAR AND CRESCENT +262A FE0F ; emoji style; # (1.1) STAR AND CRESCENT +262E FE0E ; text style; # (1.1) PEACE SYMBOL +262E FE0F ; emoji style; # (1.1) PEACE SYMBOL +262F FE0E ; text style; # (1.1) YIN YANG +262F FE0F ; emoji style; # (1.1) YIN YANG +2638 FE0E ; text style; # (1.1) WHEEL OF DHARMA +2638 FE0F ; emoji style; # (1.1) WHEEL OF DHARMA +2639 FE0E ; text style; # (1.1) WHITE FROWNING FACE +2639 FE0F ; emoji style; # (1.1) WHITE FROWNING FACE +263A FE0E ; text style; # (1.1) WHITE SMILING FACE +263A FE0F ; emoji style; # (1.1) WHITE SMILING FACE +2640 FE0E ; text style; # (1.1) FEMALE SIGN +2640 FE0F ; emoji style; # (1.1) FEMALE SIGN +2642 FE0E ; text style; # (1.1) MALE SIGN +2642 FE0F ; emoji style; # (1.1) MALE SIGN +2648 FE0E ; text style; # (1.1) ARIES +2648 FE0F ; emoji style; # (1.1) ARIES +2649 FE0E ; text style; # (1.1) TAURUS +2649 FE0F ; emoji style; # (1.1) TAURUS +264A FE0E ; text style; # (1.1) GEMINI +264A FE0F ; emoji style; # (1.1) GEMINI +264B FE0E ; text style; # (1.1) CANCER +264B FE0F ; emoji style; # (1.1) CANCER +264C FE0E ; text style; # (1.1) LEO +264C FE0F ; emoji style; # (1.1) LEO +264D FE0E ; text style; # (1.1) VIRGO +264D FE0F ; emoji style; # (1.1) VIRGO +264E FE0E ; text style; # (1.1) LIBRA +264E FE0F ; emoji style; # (1.1) LIBRA +264F FE0E ; text style; # (1.1) SCORPIUS +264F FE0F ; emoji style; # (1.1) SCORPIUS +2650 FE0E ; text style; # (1.1) SAGITTARIUS +2650 FE0F ; emoji style; # (1.1) SAGITTARIUS +2651 FE0E ; text style; # (1.1) CAPRICORN +2651 FE0F ; emoji style; # (1.1) CAPRICORN +2652 FE0E ; text style; # (1.1) AQUARIUS +2652 FE0F ; emoji style; # (1.1) AQUARIUS +2653 FE0E ; text style; # (1.1) PISCES +2653 FE0F ; emoji style; # (1.1) PISCES +265F FE0E ; text style; # (1.1) BLACK CHESS PAWN +265F FE0F ; emoji style; # (1.1) BLACK CHESS PAWN +2660 FE0E ; text style; # (1.1) BLACK SPADE SUIT +2660 FE0F ; emoji style; # (1.1) BLACK SPADE SUIT +2663 FE0E ; text style; # (1.1) BLACK CLUB SUIT +2663 FE0F ; emoji style; # (1.1) BLACK CLUB SUIT +2665 FE0E ; text style; # (1.1) BLACK HEART SUIT +2665 FE0F ; emoji style; # (1.1) BLACK HEART SUIT +2666 FE0E ; text style; # (1.1) BLACK DIAMOND SUIT +2666 FE0F ; emoji style; # (1.1) BLACK DIAMOND SUIT +2668 FE0E ; text style; # (1.1) HOT SPRINGS +2668 FE0F ; emoji style; # (1.1) HOT SPRINGS +267B FE0E ; text style; # (3.2) BLACK UNIVERSAL RECYCLING SYMBOL +267B FE0F ; emoji style; # (3.2) BLACK UNIVERSAL RECYCLING SYMBOL +267E FE0E ; text style; # (4.1) PERMANENT PAPER SIGN +267E FE0F ; emoji style; # (4.1) PERMANENT PAPER SIGN +267F FE0E ; text style; # (4.1) WHEELCHAIR SYMBOL +267F FE0F ; emoji style; # (4.1) WHEELCHAIR SYMBOL +2692 FE0E ; text style; # (4.1) HAMMER AND PICK +2692 FE0F ; emoji style; # (4.1) HAMMER AND PICK +2693 FE0E ; text style; # (4.1) ANCHOR +2693 FE0F ; emoji style; # (4.1) ANCHOR +2694 FE0E ; text style; # (4.1) CROSSED SWORDS +2694 FE0F ; emoji style; # (4.1) CROSSED SWORDS +2695 FE0E ; text style; # (4.1) STAFF OF AESCULAPIUS +2695 FE0F ; emoji style; # (4.1) STAFF OF AESCULAPIUS +2696 FE0E ; text style; # (4.1) SCALES +2696 FE0F ; emoji style; # (4.1) SCALES +2697 FE0E ; text style; # (4.1) ALEMBIC +2697 FE0F ; emoji style; # (4.1) ALEMBIC +2699 FE0E ; text style; # (4.1) GEAR +2699 FE0F ; emoji style; # (4.1) GEAR +269B FE0E ; text style; # (4.1) ATOM SYMBOL +269B FE0F ; emoji style; # (4.1) ATOM SYMBOL +269C FE0E ; text style; # (4.1) FLEUR-DE-LIS +269C FE0F ; emoji style; # (4.1) FLEUR-DE-LIS +26A0 FE0E ; text style; # (4.0) WARNING SIGN +26A0 FE0F ; emoji style; # (4.0) WARNING SIGN +26A1 FE0E ; text style; # (4.0) HIGH VOLTAGE SIGN +26A1 FE0F ; emoji style; # (4.0) HIGH VOLTAGE SIGN +26A7 FE0E ; text style; # (4.1) MALE WITH STROKE AND MALE AND FEMALE SIGN +26A7 FE0F ; emoji style; # (4.1) MALE WITH STROKE AND MALE AND FEMALE SIGN +26AA FE0E ; text style; # (4.1) MEDIUM WHITE CIRCLE +26AA FE0F ; emoji style; # (4.1) MEDIUM WHITE CIRCLE +26AB FE0E ; text style; # (4.1) MEDIUM BLACK CIRCLE +26AB FE0F ; emoji style; # (4.1) MEDIUM BLACK CIRCLE +26B0 FE0E ; text style; # (4.1) COFFIN +26B0 FE0F ; emoji style; # (4.1) COFFIN +26B1 FE0E ; text style; # (4.1) FUNERAL URN +26B1 FE0F ; emoji style; # (4.1) FUNERAL URN +26BD FE0E ; text style; # (5.2) SOCCER BALL +26BD FE0F ; emoji style; # (5.2) SOCCER BALL +26BE FE0E ; text style; # (5.2) BASEBALL +26BE FE0F ; emoji style; # (5.2) BASEBALL +26C4 FE0E ; text style; # (5.2) SNOWMAN WITHOUT SNOW +26C4 FE0F ; emoji style; # (5.2) SNOWMAN WITHOUT SNOW +26C5 FE0E ; text style; # (5.2) SUN BEHIND CLOUD +26C5 FE0F ; emoji style; # (5.2) SUN BEHIND CLOUD +26C8 FE0E ; text style; # (5.2) THUNDER CLOUD AND RAIN +26C8 FE0F ; emoji style; # (5.2) THUNDER CLOUD AND RAIN +26CE FE0E ; text style; # (6.0) OPHIUCHUS +26CE FE0F ; emoji style; # (6.0) OPHIUCHUS +26CF FE0E ; text style; # (5.2) PICK +26CF FE0F ; emoji style; # (5.2) PICK +26D1 FE0E ; text style; # (5.2) HELMET WITH WHITE CROSS +26D1 FE0F ; emoji style; # (5.2) HELMET WITH WHITE CROSS +26D3 FE0E ; text style; # (5.2) CHAINS +26D3 FE0F ; emoji style; # (5.2) CHAINS +26D4 FE0E ; text style; # (5.2) NO ENTRY +26D4 FE0F ; emoji style; # (5.2) NO ENTRY +26E9 FE0E ; text style; # (5.2) SHINTO SHRINE +26E9 FE0F ; emoji style; # (5.2) SHINTO SHRINE +26EA FE0E ; text style; # (5.2) CHURCH +26EA FE0F ; emoji style; # (5.2) CHURCH +26F0 FE0E ; text style; # (5.2) MOUNTAIN +26F0 FE0F ; emoji style; # (5.2) MOUNTAIN +26F1 FE0E ; text style; # (5.2) UMBRELLA ON GROUND +26F1 FE0F ; emoji style; # (5.2) UMBRELLA ON GROUND +26F2 FE0E ; text style; # (5.2) FOUNTAIN +26F2 FE0F ; emoji style; # (5.2) FOUNTAIN +26F3 FE0E ; text style; # (5.2) FLAG IN HOLE +26F3 FE0F ; emoji style; # (5.2) FLAG IN HOLE +26F4 FE0E ; text style; # (5.2) FERRY +26F4 FE0F ; emoji style; # (5.2) FERRY +26F5 FE0E ; text style; # (5.2) SAILBOAT +26F5 FE0F ; emoji style; # (5.2) SAILBOAT +26F7 FE0E ; text style; # (5.2) SKIER +26F7 FE0F ; emoji style; # (5.2) SKIER +26F8 FE0E ; text style; # (5.2) ICE SKATE +26F8 FE0F ; emoji style; # (5.2) ICE SKATE +26F9 FE0E ; text style; # (5.2) PERSON WITH BALL +26F9 FE0F ; emoji style; # (5.2) PERSON WITH BALL +26FA FE0E ; text style; # (5.2) TENT +26FA FE0F ; emoji style; # (5.2) TENT +26FD FE0E ; text style; # (5.2) FUEL PUMP +26FD FE0F ; emoji style; # (5.2) FUEL PUMP +2702 FE0E ; text style; # (1.1) BLACK SCISSORS +2702 FE0F ; emoji style; # (1.1) BLACK SCISSORS +2705 FE0E ; text style; # (6.0) WHITE HEAVY CHECK MARK +2705 FE0F ; emoji style; # (6.0) WHITE HEAVY CHECK MARK +2708 FE0E ; text style; # (1.1) AIRPLANE +2708 FE0F ; emoji style; # (1.1) AIRPLANE +2709 FE0E ; text style; # (1.1) ENVELOPE +2709 FE0F ; emoji style; # (1.1) ENVELOPE +270A FE0E ; text style; # (6.0) RAISED FIST +270A FE0F ; emoji style; # (6.0) RAISED FIST +270B FE0E ; text style; # (6.0) RAISED HAND +270B FE0F ; emoji style; # (6.0) RAISED HAND +270C FE0E ; text style; # (1.1) VICTORY HAND +270C FE0F ; emoji style; # (1.1) VICTORY HAND +270D FE0E ; text style; # (1.1) WRITING HAND +270D FE0F ; emoji style; # (1.1) WRITING HAND +270F FE0E ; text style; # (1.1) PENCIL +270F FE0F ; emoji style; # (1.1) PENCIL +2712 FE0E ; text style; # (1.1) BLACK NIB +2712 FE0F ; emoji style; # (1.1) BLACK NIB +2714 FE0E ; text style; # (1.1) HEAVY CHECK MARK +2714 FE0F ; emoji style; # (1.1) HEAVY CHECK MARK +2716 FE0E ; text style; # (1.1) HEAVY MULTIPLICATION X +2716 FE0F ; emoji style; # (1.1) HEAVY MULTIPLICATION X +271D FE0E ; text style; # (1.1) LATIN CROSS +271D FE0F ; emoji style; # (1.1) LATIN CROSS +2721 FE0E ; text style; # (1.1) STAR OF DAVID +2721 FE0F ; emoji style; # (1.1) STAR OF DAVID +2728 FE0E ; text style; # (6.0) SPARKLES +2728 FE0F ; emoji style; # (6.0) SPARKLES +2733 FE0E ; text style; # (1.1) EIGHT SPOKED ASTERISK +2733 FE0F ; emoji style; # (1.1) EIGHT SPOKED ASTERISK +2734 FE0E ; text style; # (1.1) EIGHT POINTED BLACK STAR +2734 FE0F ; emoji style; # (1.1) EIGHT POINTED BLACK STAR +2744 FE0E ; text style; # (1.1) SNOWFLAKE +2744 FE0F ; emoji style; # (1.1) SNOWFLAKE +2747 FE0E ; text style; # (1.1) SPARKLE +2747 FE0F ; emoji style; # (1.1) SPARKLE +274C FE0E ; text style; # (6.0) CROSS MARK +274C FE0F ; emoji style; # (6.0) CROSS MARK +274E FE0E ; text style; # (6.0) NEGATIVE SQUARED CROSS MARK +274E FE0F ; emoji style; # (6.0) NEGATIVE SQUARED CROSS MARK +2753 FE0E ; text style; # (6.0) BLACK QUESTION MARK ORNAMENT +2753 FE0F ; emoji style; # (6.0) BLACK QUESTION MARK ORNAMENT +2754 FE0E ; text style; # (6.0) WHITE QUESTION MARK ORNAMENT +2754 FE0F ; emoji style; # (6.0) WHITE QUESTION MARK ORNAMENT +2755 FE0E ; text style; # (6.0) WHITE EXCLAMATION MARK ORNAMENT +2755 FE0F ; emoji style; # (6.0) WHITE EXCLAMATION MARK ORNAMENT +2757 FE0E ; text style; # (5.2) HEAVY EXCLAMATION MARK SYMBOL +2757 FE0F ; emoji style; # (5.2) HEAVY EXCLAMATION MARK SYMBOL +2763 FE0E ; text style; # (1.1) HEAVY HEART EXCLAMATION MARK ORNAMENT +2763 FE0F ; emoji style; # (1.1) HEAVY HEART EXCLAMATION MARK ORNAMENT +2764 FE0E ; text style; # (1.1) HEAVY BLACK HEART +2764 FE0F ; emoji style; # (1.1) HEAVY BLACK HEART +2795 FE0E ; text style; # (6.0) HEAVY PLUS SIGN +2795 FE0F ; emoji style; # (6.0) HEAVY PLUS SIGN +2796 FE0E ; text style; # (6.0) HEAVY MINUS SIGN +2796 FE0F ; emoji style; # (6.0) HEAVY MINUS SIGN +2797 FE0E ; text style; # (6.0) HEAVY DIVISION SIGN +2797 FE0F ; emoji style; # (6.0) HEAVY DIVISION SIGN +27A1 FE0E ; text style; # (1.1) BLACK RIGHTWARDS ARROW +27A1 FE0F ; emoji style; # (1.1) BLACK RIGHTWARDS ARROW +27B0 FE0E ; text style; # (6.0) CURLY LOOP +27B0 FE0F ; emoji style; # (6.0) CURLY LOOP +27BF FE0E ; text style; # (6.0) DOUBLE CURLY LOOP +27BF FE0F ; emoji style; # (6.0) DOUBLE CURLY LOOP +2934 FE0E ; text style; # (3.2) ARROW POINTING RIGHTWARDS THEN CURVING UPWARDS +2934 FE0F ; emoji style; # (3.2) ARROW POINTING RIGHTWARDS THEN CURVING UPWARDS +2935 FE0E ; text style; # (3.2) ARROW POINTING RIGHTWARDS THEN CURVING DOWNWARDS +2935 FE0F ; emoji style; # (3.2) ARROW POINTING RIGHTWARDS THEN CURVING DOWNWARDS +2B05 FE0E ; text style; # (4.0) LEFTWARDS BLACK ARROW +2B05 FE0F ; emoji style; # (4.0) LEFTWARDS BLACK ARROW +2B06 FE0E ; text style; # (4.0) UPWARDS BLACK ARROW +2B06 FE0F ; emoji style; # (4.0) UPWARDS BLACK ARROW +2B07 FE0E ; text style; # (4.0) DOWNWARDS BLACK ARROW +2B07 FE0F ; emoji style; # (4.0) DOWNWARDS BLACK ARROW +2B1B FE0E ; text style; # (5.1) BLACK LARGE SQUARE +2B1B FE0F ; emoji style; # (5.1) BLACK LARGE SQUARE +2B1C FE0E ; text style; # (5.1) WHITE LARGE SQUARE +2B1C FE0F ; emoji style; # (5.1) WHITE LARGE SQUARE +2B50 FE0E ; text style; # (5.1) WHITE MEDIUM STAR +2B50 FE0F ; emoji style; # (5.1) WHITE MEDIUM STAR +2B55 FE0E ; text style; # (5.2) HEAVY LARGE CIRCLE +2B55 FE0F ; emoji style; # (5.2) HEAVY LARGE CIRCLE +3030 FE0E ; text style; # (1.1) WAVY DASH +3030 FE0F ; emoji style; # (1.1) WAVY DASH +303D FE0E ; text style; # (3.2) PART ALTERNATION MARK +303D FE0F ; emoji style; # (3.2) PART ALTERNATION MARK +3297 FE0E ; text style; # (1.1) CIRCLED IDEOGRAPH CONGRATULATION +3297 FE0F ; emoji style; # (1.1) CIRCLED IDEOGRAPH CONGRATULATION +3299 FE0E ; text style; # (1.1) CIRCLED IDEOGRAPH SECRET +3299 FE0F ; emoji style; # (1.1) CIRCLED IDEOGRAPH SECRET +1F004 FE0E ; text style; # (5.1) MAHJONG TILE RED DRAGON +1F004 FE0F ; emoji style; # (5.1) MAHJONG TILE RED DRAGON +1F170 FE0E ; text style; # (6.0) NEGATIVE SQUARED LATIN CAPITAL LETTER A +1F170 FE0F ; emoji style; # (6.0) NEGATIVE SQUARED LATIN CAPITAL LETTER A +1F171 FE0E ; text style; # (6.0) NEGATIVE SQUARED LATIN CAPITAL LETTER B +1F171 FE0F ; emoji style; # (6.0) NEGATIVE SQUARED LATIN CAPITAL LETTER B +1F17E FE0E ; text style; # (6.0) NEGATIVE SQUARED LATIN CAPITAL LETTER O +1F17E FE0F ; emoji style; # (6.0) NEGATIVE SQUARED LATIN CAPITAL LETTER O +1F17F FE0E ; text style; # (5.2) NEGATIVE SQUARED LATIN CAPITAL LETTER P +1F17F FE0F ; emoji style; # (5.2) NEGATIVE SQUARED LATIN CAPITAL LETTER P +1F202 FE0E ; text style; # (6.0) SQUARED KATAKANA SA +1F202 FE0F ; emoji style; # (6.0) SQUARED KATAKANA SA +1F21A FE0E ; text style; # (5.2) SQUARED CJK UNIFIED IDEOGRAPH-7121 +1F21A FE0F ; emoji style; # (5.2) SQUARED CJK UNIFIED IDEOGRAPH-7121 +1F22F FE0E ; text style; # (5.2) SQUARED CJK UNIFIED IDEOGRAPH-6307 +1F22F FE0F ; emoji style; # (5.2) SQUARED CJK UNIFIED IDEOGRAPH-6307 +1F237 FE0E ; text style; # (6.0) SQUARED CJK UNIFIED IDEOGRAPH-6708 +1F237 FE0F ; emoji style; # (6.0) SQUARED CJK UNIFIED IDEOGRAPH-6708 +1F30D FE0E ; text style; # (6.0) EARTH GLOBE EUROPE-AFRICA +1F30D FE0F ; emoji style; # (6.0) EARTH GLOBE EUROPE-AFRICA +1F30E FE0E ; text style; # (6.0) EARTH GLOBE AMERICAS +1F30E FE0F ; emoji style; # (6.0) EARTH GLOBE AMERICAS +1F30F FE0E ; text style; # (6.0) EARTH GLOBE ASIA-AUSTRALIA +1F30F FE0F ; emoji style; # (6.0) EARTH GLOBE ASIA-AUSTRALIA +1F315 FE0E ; text style; # (6.0) FULL MOON SYMBOL +1F315 FE0F ; emoji style; # (6.0) FULL MOON SYMBOL +1F31C FE0E ; text style; # (6.0) LAST QUARTER MOON WITH FACE +1F31C FE0F ; emoji style; # (6.0) LAST QUARTER MOON WITH FACE +1F321 FE0E ; text style; # (7.0) THERMOMETER +1F321 FE0F ; emoji style; # (7.0) THERMOMETER +1F324 FE0E ; text style; # (7.0) WHITE SUN WITH SMALL CLOUD +1F324 FE0F ; emoji style; # (7.0) WHITE SUN WITH SMALL CLOUD +1F325 FE0E ; text style; # (7.0) WHITE SUN BEHIND CLOUD +1F325 FE0F ; emoji style; # (7.0) WHITE SUN BEHIND CLOUD +1F326 FE0E ; text style; # (7.0) WHITE SUN BEHIND CLOUD WITH RAIN +1F326 FE0F ; emoji style; # (7.0) WHITE SUN BEHIND CLOUD WITH RAIN +1F327 FE0E ; text style; # (7.0) CLOUD WITH RAIN +1F327 FE0F ; emoji style; # (7.0) CLOUD WITH RAIN +1F328 FE0E ; text style; # (7.0) CLOUD WITH SNOW +1F328 FE0F ; emoji style; # (7.0) CLOUD WITH SNOW +1F329 FE0E ; text style; # (7.0) CLOUD WITH LIGHTNING +1F329 FE0F ; emoji style; # (7.0) CLOUD WITH LIGHTNING +1F32A FE0E ; text style; # (7.0) CLOUD WITH TORNADO +1F32A FE0F ; emoji style; # (7.0) CLOUD WITH TORNADO +1F32B FE0E ; text style; # (7.0) FOG +1F32B FE0F ; emoji style; # (7.0) FOG +1F32C FE0E ; text style; # (7.0) WIND BLOWING FACE +1F32C FE0F ; emoji style; # (7.0) WIND BLOWING FACE +1F336 FE0E ; text style; # (7.0) HOT PEPPER +1F336 FE0F ; emoji style; # (7.0) HOT PEPPER +1F378 FE0E ; text style; # (6.0) COCKTAIL GLASS +1F378 FE0F ; emoji style; # (6.0) COCKTAIL GLASS +1F37D FE0E ; text style; # (7.0) FORK AND KNIFE WITH PLATE +1F37D FE0F ; emoji style; # (7.0) FORK AND KNIFE WITH PLATE +1F393 FE0E ; text style; # (6.0) GRADUATION CAP +1F393 FE0F ; emoji style; # (6.0) GRADUATION CAP +1F396 FE0E ; text style; # (7.0) MILITARY MEDAL +1F396 FE0F ; emoji style; # (7.0) MILITARY MEDAL +1F397 FE0E ; text style; # (7.0) REMINDER RIBBON +1F397 FE0F ; emoji style; # (7.0) REMINDER RIBBON +1F399 FE0E ; text style; # (7.0) STUDIO MICROPHONE +1F399 FE0F ; emoji style; # (7.0) STUDIO MICROPHONE +1F39A FE0E ; text style; # (7.0) LEVEL SLIDER +1F39A FE0F ; emoji style; # (7.0) LEVEL SLIDER +1F39B FE0E ; text style; # (7.0) CONTROL KNOBS +1F39B FE0F ; emoji style; # (7.0) CONTROL KNOBS +1F39E FE0E ; text style; # (7.0) FILM FRAMES +1F39E FE0F ; emoji style; # (7.0) FILM FRAMES +1F39F FE0E ; text style; # (7.0) ADMISSION TICKETS +1F39F FE0F ; emoji style; # (7.0) ADMISSION TICKETS +1F3A7 FE0E ; text style; # (6.0) HEADPHONE +1F3A7 FE0F ; emoji style; # (6.0) HEADPHONE +1F3AC FE0E ; text style; # (6.0) CLAPPER BOARD +1F3AC FE0F ; emoji style; # (6.0) CLAPPER BOARD +1F3AD FE0E ; text style; # (6.0) PERFORMING ARTS +1F3AD FE0F ; emoji style; # (6.0) PERFORMING ARTS +1F3AE FE0E ; text style; # (6.0) VIDEO GAME +1F3AE FE0F ; emoji style; # (6.0) VIDEO GAME +1F3C2 FE0E ; text style; # (6.0) SNOWBOARDER +1F3C2 FE0F ; emoji style; # (6.0) SNOWBOARDER +1F3C4 FE0E ; text style; # (6.0) SURFER +1F3C4 FE0F ; emoji style; # (6.0) SURFER +1F3C6 FE0E ; text style; # (6.0) TROPHY +1F3C6 FE0F ; emoji style; # (6.0) TROPHY +1F3CA FE0E ; text style; # (6.0) SWIMMER +1F3CA FE0F ; emoji style; # (6.0) SWIMMER +1F3CB FE0E ; text style; # (7.0) WEIGHT LIFTER +1F3CB FE0F ; emoji style; # (7.0) WEIGHT LIFTER +1F3CC FE0E ; text style; # (7.0) GOLFER +1F3CC FE0F ; emoji style; # (7.0) GOLFER +1F3CD FE0E ; text style; # (7.0) RACING MOTORCYCLE +1F3CD FE0F ; emoji style; # (7.0) RACING MOTORCYCLE +1F3CE FE0E ; text style; # (7.0) RACING CAR +1F3CE FE0F ; emoji style; # (7.0) RACING CAR +1F3D4 FE0E ; text style; # (7.0) SNOW CAPPED MOUNTAIN +1F3D4 FE0F ; emoji style; # (7.0) SNOW CAPPED MOUNTAIN +1F3D5 FE0E ; text style; # (7.0) CAMPING +1F3D5 FE0F ; emoji style; # (7.0) CAMPING +1F3D6 FE0E ; text style; # (7.0) BEACH WITH UMBRELLA +1F3D6 FE0F ; emoji style; # (7.0) BEACH WITH UMBRELLA +1F3D7 FE0E ; text style; # (7.0) BUILDING CONSTRUCTION +1F3D7 FE0F ; emoji style; # (7.0) BUILDING CONSTRUCTION +1F3D8 FE0E ; text style; # (7.0) HOUSE BUILDINGS +1F3D8 FE0F ; emoji style; # (7.0) HOUSE BUILDINGS +1F3D9 FE0E ; text style; # (7.0) CITYSCAPE +1F3D9 FE0F ; emoji style; # (7.0) CITYSCAPE +1F3DA FE0E ; text style; # (7.0) DERELICT HOUSE BUILDING +1F3DA FE0F ; emoji style; # (7.0) DERELICT HOUSE BUILDING +1F3DB FE0E ; text style; # (7.0) CLASSICAL BUILDING +1F3DB FE0F ; emoji style; # (7.0) CLASSICAL BUILDING +1F3DC FE0E ; text style; # (7.0) DESERT +1F3DC FE0F ; emoji style; # (7.0) DESERT +1F3DD FE0E ; text style; # (7.0) DESERT ISLAND +1F3DD FE0F ; emoji style; # (7.0) DESERT ISLAND +1F3DE FE0E ; text style; # (7.0) NATIONAL PARK +1F3DE FE0F ; emoji style; # (7.0) NATIONAL PARK +1F3DF FE0E ; text style; # (7.0) STADIUM +1F3DF FE0F ; emoji style; # (7.0) STADIUM +1F3E0 FE0E ; text style; # (6.0) HOUSE BUILDING +1F3E0 FE0F ; emoji style; # (6.0) HOUSE BUILDING +1F3ED FE0E ; text style; # (6.0) FACTORY +1F3ED FE0F ; emoji style; # (6.0) FACTORY +1F3F3 FE0E ; text style; # (7.0) WAVING WHITE FLAG +1F3F3 FE0F ; emoji style; # (7.0) WAVING WHITE FLAG +1F3F5 FE0E ; text style; # (7.0) ROSETTE +1F3F5 FE0F ; emoji style; # (7.0) ROSETTE +1F3F7 FE0E ; text style; # (7.0) LABEL +1F3F7 FE0F ; emoji style; # (7.0) LABEL +1F408 FE0E ; text style; # (6.0) CAT +1F408 FE0F ; emoji style; # (6.0) CAT +1F415 FE0E ; text style; # (6.0) DOG +1F415 FE0F ; emoji style; # (6.0) DOG +1F41F FE0E ; text style; # (6.0) FISH +1F41F FE0F ; emoji style; # (6.0) FISH +1F426 FE0E ; text style; # (6.0) BIRD +1F426 FE0F ; emoji style; # (6.0) BIRD +1F43F FE0E ; text style; # (7.0) CHIPMUNK +1F43F FE0F ; emoji style; # (7.0) CHIPMUNK +1F441 FE0E ; text style; # (7.0) EYE +1F441 FE0F ; emoji style; # (7.0) EYE +1F442 FE0E ; text style; # (6.0) EAR +1F442 FE0F ; emoji style; # (6.0) EAR +1F446 FE0E ; text style; # (6.0) WHITE UP POINTING BACKHAND INDEX +1F446 FE0F ; emoji style; # (6.0) WHITE UP POINTING BACKHAND INDEX +1F447 FE0E ; text style; # (6.0) WHITE DOWN POINTING BACKHAND INDEX +1F447 FE0F ; emoji style; # (6.0) WHITE DOWN POINTING BACKHAND INDEX +1F448 FE0E ; text style; # (6.0) WHITE LEFT POINTING BACKHAND INDEX +1F448 FE0F ; emoji style; # (6.0) WHITE LEFT POINTING BACKHAND INDEX +1F449 FE0E ; text style; # (6.0) WHITE RIGHT POINTING BACKHAND INDEX +1F449 FE0F ; emoji style; # (6.0) WHITE RIGHT POINTING BACKHAND INDEX +1F44D FE0E ; text style; # (6.0) THUMBS UP SIGN +1F44D FE0F ; emoji style; # (6.0) THUMBS UP SIGN +1F44E FE0E ; text style; # (6.0) THUMBS DOWN SIGN +1F44E FE0F ; emoji style; # (6.0) THUMBS DOWN SIGN +1F453 FE0E ; text style; # (6.0) EYEGLASSES +1F453 FE0F ; emoji style; # (6.0) EYEGLASSES +1F46A FE0E ; text style; # (6.0) FAMILY +1F46A FE0F ; emoji style; # (6.0) FAMILY +1F47D FE0E ; text style; # (6.0) EXTRATERRESTRIAL ALIEN +1F47D FE0F ; emoji style; # (6.0) EXTRATERRESTRIAL ALIEN +1F4A3 FE0E ; text style; # (6.0) BOMB +1F4A3 FE0F ; emoji style; # (6.0) BOMB +1F4B0 FE0E ; text style; # (6.0) MONEY BAG +1F4B0 FE0F ; emoji style; # (6.0) MONEY BAG +1F4B3 FE0E ; text style; # (6.0) CREDIT CARD +1F4B3 FE0F ; emoji style; # (6.0) CREDIT CARD +1F4BB FE0E ; text style; # (6.0) PERSONAL COMPUTER +1F4BB FE0F ; emoji style; # (6.0) PERSONAL COMPUTER +1F4BF FE0E ; text style; # (6.0) OPTICAL DISC +1F4BF FE0F ; emoji style; # (6.0) OPTICAL DISC +1F4CB FE0E ; text style; # (6.0) CLIPBOARD +1F4CB FE0F ; emoji style; # (6.0) CLIPBOARD +1F4DA FE0E ; text style; # (6.0) BOOKS +1F4DA FE0F ; emoji style; # (6.0) BOOKS +1F4DF FE0E ; text style; # (6.0) PAGER +1F4DF FE0F ; emoji style; # (6.0) PAGER +1F4E4 FE0E ; text style; # (6.0) OUTBOX TRAY +1F4E4 FE0F ; emoji style; # (6.0) OUTBOX TRAY +1F4E5 FE0E ; text style; # (6.0) INBOX TRAY +1F4E5 FE0F ; emoji style; # (6.0) INBOX TRAY +1F4E6 FE0E ; text style; # (6.0) PACKAGE +1F4E6 FE0F ; emoji style; # (6.0) PACKAGE +1F4EA FE0E ; text style; # (6.0) CLOSED MAILBOX WITH LOWERED FLAG +1F4EA FE0F ; emoji style; # (6.0) CLOSED MAILBOX WITH LOWERED FLAG +1F4EB FE0E ; text style; # (6.0) CLOSED MAILBOX WITH RAISED FLAG +1F4EB FE0F ; emoji style; # (6.0) CLOSED MAILBOX WITH RAISED FLAG +1F4EC FE0E ; text style; # (6.0) OPEN MAILBOX WITH RAISED FLAG +1F4EC FE0F ; emoji style; # (6.0) OPEN MAILBOX WITH RAISED FLAG +1F4ED FE0E ; text style; # (6.0) OPEN MAILBOX WITH LOWERED FLAG +1F4ED FE0F ; emoji style; # (6.0) OPEN MAILBOX WITH LOWERED FLAG +1F4F7 FE0E ; text style; # (6.0) CAMERA +1F4F7 FE0F ; emoji style; # (6.0) CAMERA +1F4F9 FE0E ; text style; # (6.0) VIDEO CAMERA +1F4F9 FE0F ; emoji style; # (6.0) VIDEO CAMERA +1F4FA FE0E ; text style; # (6.0) TELEVISION +1F4FA FE0F ; emoji style; # (6.0) TELEVISION +1F4FB FE0E ; text style; # (6.0) RADIO +1F4FB FE0F ; emoji style; # (6.0) RADIO +1F4FD FE0E ; text style; # (7.0) FILM PROJECTOR +1F4FD FE0F ; emoji style; # (7.0) FILM PROJECTOR +1F508 FE0E ; text style; # (6.0) SPEAKER +1F508 FE0F ; emoji style; # (6.0) SPEAKER +1F50D FE0E ; text style; # (6.0) LEFT-POINTING MAGNIFYING GLASS +1F50D FE0F ; emoji style; # (6.0) LEFT-POINTING MAGNIFYING GLASS +1F512 FE0E ; text style; # (6.0) LOCK +1F512 FE0F ; emoji style; # (6.0) LOCK +1F513 FE0E ; text style; # (6.0) OPEN LOCK +1F513 FE0F ; emoji style; # (6.0) OPEN LOCK +1F549 FE0E ; text style; # (7.0) OM SYMBOL +1F549 FE0F ; emoji style; # (7.0) OM SYMBOL +1F54A FE0E ; text style; # (7.0) DOVE OF PEACE +1F54A FE0F ; emoji style; # (7.0) DOVE OF PEACE +1F550 FE0E ; text style; # (6.0) CLOCK FACE ONE OCLOCK +1F550 FE0F ; emoji style; # (6.0) CLOCK FACE ONE OCLOCK +1F551 FE0E ; text style; # (6.0) CLOCK FACE TWO OCLOCK +1F551 FE0F ; emoji style; # (6.0) CLOCK FACE TWO OCLOCK +1F552 FE0E ; text style; # (6.0) CLOCK FACE THREE OCLOCK +1F552 FE0F ; emoji style; # (6.0) CLOCK FACE THREE OCLOCK +1F553 FE0E ; text style; # (6.0) CLOCK FACE FOUR OCLOCK +1F553 FE0F ; emoji style; # (6.0) CLOCK FACE FOUR OCLOCK +1F554 FE0E ; text style; # (6.0) CLOCK FACE FIVE OCLOCK +1F554 FE0F ; emoji style; # (6.0) CLOCK FACE FIVE OCLOCK +1F555 FE0E ; text style; # (6.0) CLOCK FACE SIX OCLOCK +1F555 FE0F ; emoji style; # (6.0) CLOCK FACE SIX OCLOCK +1F556 FE0E ; text style; # (6.0) CLOCK FACE SEVEN OCLOCK +1F556 FE0F ; emoji style; # (6.0) CLOCK FACE SEVEN OCLOCK +1F557 FE0E ; text style; # (6.0) CLOCK FACE EIGHT OCLOCK +1F557 FE0F ; emoji style; # (6.0) CLOCK FACE EIGHT OCLOCK +1F558 FE0E ; text style; # (6.0) CLOCK FACE NINE OCLOCK +1F558 FE0F ; emoji style; # (6.0) CLOCK FACE NINE OCLOCK +1F559 FE0E ; text style; # (6.0) CLOCK FACE TEN OCLOCK +1F559 FE0F ; emoji style; # (6.0) CLOCK FACE TEN OCLOCK +1F55A FE0E ; text style; # (6.0) CLOCK FACE ELEVEN OCLOCK +1F55A FE0F ; emoji style; # (6.0) CLOCK FACE ELEVEN OCLOCK +1F55B FE0E ; text style; # (6.0) CLOCK FACE TWELVE OCLOCK +1F55B FE0F ; emoji style; # (6.0) CLOCK FACE TWELVE OCLOCK +1F55C FE0E ; text style; # (6.0) CLOCK FACE ONE-THIRTY +1F55C FE0F ; emoji style; # (6.0) CLOCK FACE ONE-THIRTY +1F55D FE0E ; text style; # (6.0) CLOCK FACE TWO-THIRTY +1F55D FE0F ; emoji style; # (6.0) CLOCK FACE TWO-THIRTY +1F55E FE0E ; text style; # (6.0) CLOCK FACE THREE-THIRTY +1F55E FE0F ; emoji style; # (6.0) CLOCK FACE THREE-THIRTY +1F55F FE0E ; text style; # (6.0) CLOCK FACE FOUR-THIRTY +1F55F FE0F ; emoji style; # (6.0) CLOCK FACE FOUR-THIRTY +1F560 FE0E ; text style; # (6.0) CLOCK FACE FIVE-THIRTY +1F560 FE0F ; emoji style; # (6.0) CLOCK FACE FIVE-THIRTY +1F561 FE0E ; text style; # (6.0) CLOCK FACE SIX-THIRTY +1F561 FE0F ; emoji style; # (6.0) CLOCK FACE SIX-THIRTY +1F562 FE0E ; text style; # (6.0) CLOCK FACE SEVEN-THIRTY +1F562 FE0F ; emoji style; # (6.0) CLOCK FACE SEVEN-THIRTY +1F563 FE0E ; text style; # (6.0) CLOCK FACE EIGHT-THIRTY +1F563 FE0F ; emoji style; # (6.0) CLOCK FACE EIGHT-THIRTY +1F564 FE0E ; text style; # (6.0) CLOCK FACE NINE-THIRTY +1F564 FE0F ; emoji style; # (6.0) CLOCK FACE NINE-THIRTY +1F565 FE0E ; text style; # (6.0) CLOCK FACE TEN-THIRTY +1F565 FE0F ; emoji style; # (6.0) CLOCK FACE TEN-THIRTY +1F566 FE0E ; text style; # (6.0) CLOCK FACE ELEVEN-THIRTY +1F566 FE0F ; emoji style; # (6.0) CLOCK FACE ELEVEN-THIRTY +1F567 FE0E ; text style; # (6.0) CLOCK FACE TWELVE-THIRTY +1F567 FE0F ; emoji style; # (6.0) CLOCK FACE TWELVE-THIRTY +1F56F FE0E ; text style; # (7.0) CANDLE +1F56F FE0F ; emoji style; # (7.0) CANDLE +1F570 FE0E ; text style; # (7.0) MANTELPIECE CLOCK +1F570 FE0F ; emoji style; # (7.0) MANTELPIECE CLOCK +1F573 FE0E ; text style; # (7.0) HOLE +1F573 FE0F ; emoji style; # (7.0) HOLE +1F574 FE0E ; text style; # (7.0) MAN IN BUSINESS SUIT LEVITATING +1F574 FE0F ; emoji style; # (7.0) MAN IN BUSINESS SUIT LEVITATING +1F575 FE0E ; text style; # (7.0) SLEUTH OR SPY +1F575 FE0F ; emoji style; # (7.0) SLEUTH OR SPY +1F576 FE0E ; text style; # (7.0) DARK SUNGLASSES +1F576 FE0F ; emoji style; # (7.0) DARK SUNGLASSES +1F577 FE0E ; text style; # (7.0) SPIDER +1F577 FE0F ; emoji style; # (7.0) SPIDER +1F578 FE0E ; text style; # (7.0) SPIDER WEB +1F578 FE0F ; emoji style; # (7.0) SPIDER WEB +1F579 FE0E ; text style; # (7.0) JOYSTICK +1F579 FE0F ; emoji style; # (7.0) JOYSTICK +1F587 FE0E ; text style; # (7.0) LINKED PAPERCLIPS +1F587 FE0F ; emoji style; # (7.0) LINKED PAPERCLIPS +1F58A FE0E ; text style; # (7.0) LOWER LEFT BALLPOINT PEN +1F58A FE0F ; emoji style; # (7.0) LOWER LEFT BALLPOINT PEN +1F58B FE0E ; text style; # (7.0) LOWER LEFT FOUNTAIN PEN +1F58B FE0F ; emoji style; # (7.0) LOWER LEFT FOUNTAIN PEN +1F58C FE0E ; text style; # (7.0) LOWER LEFT PAINTBRUSH +1F58C FE0F ; emoji style; # (7.0) LOWER LEFT PAINTBRUSH +1F58D FE0E ; text style; # (7.0) LOWER LEFT CRAYON +1F58D FE0F ; emoji style; # (7.0) LOWER LEFT CRAYON +1F590 FE0E ; text style; # (7.0) RAISED HAND WITH FINGERS SPLAYED +1F590 FE0F ; emoji style; # (7.0) RAISED HAND WITH FINGERS SPLAYED +1F5A5 FE0E ; text style; # (7.0) DESKTOP COMPUTER +1F5A5 FE0F ; emoji style; # (7.0) DESKTOP COMPUTER +1F5A8 FE0E ; text style; # (7.0) PRINTER +1F5A8 FE0F ; emoji style; # (7.0) PRINTER +1F5B1 FE0E ; text style; # (7.0) THREE BUTTON MOUSE +1F5B1 FE0F ; emoji style; # (7.0) THREE BUTTON MOUSE +1F5B2 FE0E ; text style; # (7.0) TRACKBALL +1F5B2 FE0F ; emoji style; # (7.0) TRACKBALL +1F5BC FE0E ; text style; # (7.0) FRAME WITH PICTURE +1F5BC FE0F ; emoji style; # (7.0) FRAME WITH PICTURE +1F5C2 FE0E ; text style; # (7.0) CARD INDEX DIVIDERS +1F5C2 FE0F ; emoji style; # (7.0) CARD INDEX DIVIDERS +1F5C3 FE0E ; text style; # (7.0) CARD FILE BOX +1F5C3 FE0F ; emoji style; # (7.0) CARD FILE BOX +1F5C4 FE0E ; text style; # (7.0) FILE CABINET +1F5C4 FE0F ; emoji style; # (7.0) FILE CABINET +1F5D1 FE0E ; text style; # (7.0) WASTEBASKET +1F5D1 FE0F ; emoji style; # (7.0) WASTEBASKET +1F5D2 FE0E ; text style; # (7.0) SPIRAL NOTE PAD +1F5D2 FE0F ; emoji style; # (7.0) SPIRAL NOTE PAD +1F5D3 FE0E ; text style; # (7.0) SPIRAL CALENDAR PAD +1F5D3 FE0F ; emoji style; # (7.0) SPIRAL CALENDAR PAD +1F5DC FE0E ; text style; # (7.0) COMPRESSION +1F5DC FE0F ; emoji style; # (7.0) COMPRESSION +1F5DD FE0E ; text style; # (7.0) OLD KEY +1F5DD FE0F ; emoji style; # (7.0) OLD KEY +1F5DE FE0E ; text style; # (7.0) ROLLED-UP NEWSPAPER +1F5DE FE0F ; emoji style; # (7.0) ROLLED-UP NEWSPAPER +1F5E1 FE0E ; text style; # (7.0) DAGGER KNIFE +1F5E1 FE0F ; emoji style; # (7.0) DAGGER KNIFE +1F5E3 FE0E ; text style; # (7.0) SPEAKING HEAD IN SILHOUETTE +1F5E3 FE0F ; emoji style; # (7.0) SPEAKING HEAD IN SILHOUETTE +1F5E8 FE0E ; text style; # (7.0) LEFT SPEECH BUBBLE +1F5E8 FE0F ; emoji style; # (7.0) LEFT SPEECH BUBBLE +1F5EF FE0E ; text style; # (7.0) RIGHT ANGER BUBBLE +1F5EF FE0F ; emoji style; # (7.0) RIGHT ANGER BUBBLE +1F5F3 FE0E ; text style; # (7.0) BALLOT BOX WITH BALLOT +1F5F3 FE0F ; emoji style; # (7.0) BALLOT BOX WITH BALLOT +1F5FA FE0E ; text style; # (7.0) WORLD MAP +1F5FA FE0F ; emoji style; # (7.0) WORLD MAP +1F610 FE0E ; text style; # (6.0) NEUTRAL FACE +1F610 FE0F ; emoji style; # (6.0) NEUTRAL FACE +1F687 FE0E ; text style; # (6.0) METRO +1F687 FE0F ; emoji style; # (6.0) METRO +1F68D FE0E ; text style; # (6.0) ONCOMING BUS +1F68D FE0F ; emoji style; # (6.0) ONCOMING BUS +1F691 FE0E ; text style; # (6.0) AMBULANCE +1F691 FE0F ; emoji style; # (6.0) AMBULANCE +1F694 FE0E ; text style; # (6.0) ONCOMING POLICE CAR +1F694 FE0F ; emoji style; # (6.0) ONCOMING POLICE CAR +1F698 FE0E ; text style; # (6.0) ONCOMING AUTOMOBILE +1F698 FE0F ; emoji style; # (6.0) ONCOMING AUTOMOBILE +1F6AD FE0E ; text style; # (6.0) NO SMOKING SYMBOL +1F6AD FE0F ; emoji style; # (6.0) NO SMOKING SYMBOL +1F6B2 FE0E ; text style; # (6.0) BICYCLE +1F6B2 FE0F ; emoji style; # (6.0) BICYCLE +1F6B9 FE0E ; text style; # (6.0) MENS SYMBOL +1F6B9 FE0F ; emoji style; # (6.0) MENS SYMBOL +1F6BA FE0E ; text style; # (6.0) WOMENS SYMBOL +1F6BA FE0F ; emoji style; # (6.0) WOMENS SYMBOL +1F6BC FE0E ; text style; # (6.0) BABY SYMBOL +1F6BC FE0F ; emoji style; # (6.0) BABY SYMBOL +1F6CB FE0E ; text style; # (7.0) COUCH AND LAMP +1F6CB FE0F ; emoji style; # (7.0) COUCH AND LAMP +1F6CD FE0E ; text style; # (7.0) SHOPPING BAGS +1F6CD FE0F ; emoji style; # (7.0) SHOPPING BAGS +1F6CE FE0E ; text style; # (7.0) BELLHOP BELL +1F6CE FE0F ; emoji style; # (7.0) BELLHOP BELL +1F6CF FE0E ; text style; # (7.0) BED +1F6CF FE0F ; emoji style; # (7.0) BED +1F6E0 FE0E ; text style; # (7.0) HAMMER AND WRENCH +1F6E0 FE0F ; emoji style; # (7.0) HAMMER AND WRENCH +1F6E1 FE0E ; text style; # (7.0) SHIELD +1F6E1 FE0F ; emoji style; # (7.0) SHIELD +1F6E2 FE0E ; text style; # (7.0) OIL DRUM +1F6E2 FE0F ; emoji style; # (7.0) OIL DRUM +1F6E3 FE0E ; text style; # (7.0) MOTORWAY +1F6E3 FE0F ; emoji style; # (7.0) MOTORWAY +1F6E4 FE0E ; text style; # (7.0) RAILWAY TRACK +1F6E4 FE0F ; emoji style; # (7.0) RAILWAY TRACK +1F6E5 FE0E ; text style; # (7.0) MOTOR BOAT +1F6E5 FE0F ; emoji style; # (7.0) MOTOR BOAT +1F6E9 FE0E ; text style; # (7.0) SMALL AIRPLANE +1F6E9 FE0F ; emoji style; # (7.0) SMALL AIRPLANE +1F6F0 FE0E ; text style; # (7.0) SATELLITE +1F6F0 FE0F ; emoji style; # (7.0) SATELLITE +1F6F3 FE0E ; text style; # (7.0) PASSENGER SHIP +1F6F3 FE0F ; emoji style; # (7.0) PASSENGER SHIP + +#Total sequences: 371 + +#EOF diff --git a/contrib/python/wcwidth/py2/tests/emoji-zwj-sequences.txt b/contrib/python/wcwidth/py2/tests/emoji-zwj-sequences.txt new file mode 100644 index 0000000000..25f8b6154b --- /dev/null +++ b/contrib/python/wcwidth/py2/tests/emoji-zwj-sequences.txt @@ -0,0 +1,1529 @@ +# emoji-zwj-sequences.txt +# Date: 2023-06-05, 20:04:50 GMT +# © 2023 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use, see https://www.unicode.org/terms_of_use.html +# +# Emoji ZWJ Sequences for UTS #51 +# Version: 15.1 +# +# For documentation and usage, see https://www.unicode.org/reports/tr51 +# +# Format: +# code_point(s) ; type_field ; description # comments +# Fields: +# code_point(s): one or more code points in hex format, separated by spaces +# type_field :RGI_Emoji_ZWJ_Sequence +# The type_field is a convenience for parsing the emoji sequence files, and is not intended to be maintained as a property. +# short name: CLDR short name of sequence; characters may be escaped with \x{hex}. +# +# For the purpose of regular expressions, the above type field defines the name of +# a binary property of strings. The short name of the property is the same as the long name. +# +# Characters and sequences are listed in code point order. Users should be shown a more natural order. +# See the CLDR collation order for Emoji. + +# ================================================ + +# RGI_Emoji_ZWJ_Sequence: Family + +1F468 200D 2764 FE0F 200D 1F468 ; RGI_Emoji_ZWJ_Sequence ; couple with heart: man, man # E2.0 [1] (👨❤️👨) +1F468 200D 2764 FE0F 200D 1F48B 200D 1F468 ; RGI_Emoji_ZWJ_Sequence ; kiss: man, man # E2.0 [1] (👨❤️💋👨) +1F468 200D 1F466 ; RGI_Emoji_ZWJ_Sequence ; family: man, boy # E4.0 [1] (👨👦) +1F468 200D 1F466 200D 1F466 ; RGI_Emoji_ZWJ_Sequence ; family: man, boy, boy # E4.0 [1] (👨👦👦) +1F468 200D 1F467 ; RGI_Emoji_ZWJ_Sequence ; family: man, girl # E4.0 [1] (👨👧) +1F468 200D 1F467 200D 1F466 ; RGI_Emoji_ZWJ_Sequence ; family: man, girl, boy # E4.0 [1] (👨👧👦) +1F468 200D 1F467 200D 1F467 ; RGI_Emoji_ZWJ_Sequence ; family: man, girl, girl # E4.0 [1] (👨👧👧) +1F468 200D 1F468 200D 1F466 ; RGI_Emoji_ZWJ_Sequence ; family: man, man, boy # E2.0 [1] (👨👨👦) +1F468 200D 1F468 200D 1F466 200D 1F466 ; RGI_Emoji_ZWJ_Sequence ; family: man, man, boy, boy # E2.0 [1] (👨👨👦👦) +1F468 200D 1F468 200D 1F467 ; RGI_Emoji_ZWJ_Sequence ; family: man, man, girl # E2.0 [1] (👨👨👧) +1F468 200D 1F468 200D 1F467 200D 1F466 ; RGI_Emoji_ZWJ_Sequence ; family: man, man, girl, boy # E2.0 [1] (👨👨👧👦) +1F468 200D 1F468 200D 1F467 200D 1F467 ; RGI_Emoji_ZWJ_Sequence ; family: man, man, girl, girl # E2.0 [1] (👨👨👧👧) +1F468 200D 1F469 200D 1F466 ; RGI_Emoji_ZWJ_Sequence ; family: man, woman, boy # E2.0 [1] (👨👩👦) +1F468 200D 1F469 200D 1F466 200D 1F466 ; RGI_Emoji_ZWJ_Sequence ; family: man, woman, boy, boy # E2.0 [1] (👨👩👦👦) +1F468 200D 1F469 200D 1F467 ; RGI_Emoji_ZWJ_Sequence ; family: man, woman, girl # E2.0 [1] (👨👩👧) +1F468 200D 1F469 200D 1F467 200D 1F466 ; RGI_Emoji_ZWJ_Sequence ; family: man, woman, girl, boy # E2.0 [1] (👨👩👧👦) +1F468 200D 1F469 200D 1F467 200D 1F467 ; RGI_Emoji_ZWJ_Sequence ; family: man, woman, girl, girl # E2.0 [1] (👨👩👧👧) +1F468 1F3FB 200D 2764 FE0F 200D 1F468 1F3FB ; RGI_Emoji_ZWJ_Sequence ; couple with heart: man, man, light skin tone # E13.1 [1] (👨🏻❤️👨🏻) +1F468 1F3FB 200D 2764 FE0F 200D 1F468 1F3FC ; RGI_Emoji_ZWJ_Sequence ; couple with heart: man, man, light skin tone, medium-light skin tone #E13.1[1] (👨🏻❤️👨🏼) +1F468 1F3FB 200D 2764 FE0F 200D 1F468 1F3FD ; RGI_Emoji_ZWJ_Sequence ; couple with heart: man, man, light skin tone, medium skin tone # E13.1 [1] (👨🏻❤️👨🏽) +1F468 1F3FB 200D 2764 FE0F 200D 1F468 1F3FE ; RGI_Emoji_ZWJ_Sequence ; couple with heart: man, man, light skin tone, medium-dark skin tone #E13.1[1] (👨🏻❤️👨🏾) +1F468 1F3FB 200D 2764 FE0F 200D 1F468 1F3FF ; RGI_Emoji_ZWJ_Sequence ; couple with heart: man, man, light skin tone, dark skin tone # E13.1 [1] (👨🏻❤️👨🏿) +1F468 1F3FB 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FB; RGI_Emoji_ZWJ_Sequence; kiss: man, man, light skin tone # E13.1 [1] (👨🏻❤️💋👨🏻) +1F468 1F3FB 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FC; RGI_Emoji_ZWJ_Sequence; kiss: man, man, light skin tone, medium-light skin tone #E13.1 [1] (👨🏻❤️💋👨🏼) +1F468 1F3FB 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FD; RGI_Emoji_ZWJ_Sequence; kiss: man, man, light skin tone, medium skin tone # E13.1 [1] (👨🏻❤️💋👨🏽) +1F468 1F3FB 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FE; RGI_Emoji_ZWJ_Sequence; kiss: man, man, light skin tone, medium-dark skin tone # E13.1 [1] (👨🏻❤️💋👨🏾) +1F468 1F3FB 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FF; RGI_Emoji_ZWJ_Sequence; kiss: man, man, light skin tone, dark skin tone # E13.1 [1] (👨🏻❤️💋👨🏿) +1F468 1F3FB 200D 1F91D 200D 1F468 1F3FC ; RGI_Emoji_ZWJ_Sequence ; men holding hands: light skin tone, medium-light skin tone # E12.1 [1] (👨🏻🤝👨🏼) +1F468 1F3FB 200D 1F91D 200D 1F468 1F3FD ; RGI_Emoji_ZWJ_Sequence ; men holding hands: light skin tone, medium skin tone # E12.1 [1] (👨🏻🤝👨🏽) +1F468 1F3FB 200D 1F91D 200D 1F468 1F3FE ; RGI_Emoji_ZWJ_Sequence ; men holding hands: light skin tone, medium-dark skin tone # E12.1 [1] (👨🏻🤝👨🏾) +1F468 1F3FB 200D 1F91D 200D 1F468 1F3FF ; RGI_Emoji_ZWJ_Sequence ; men holding hands: light skin tone, dark skin tone # E12.1 [1] (👨🏻🤝👨🏿) +1F468 1F3FC 200D 2764 FE0F 200D 1F468 1F3FB ; RGI_Emoji_ZWJ_Sequence ; couple with heart: man, man, medium-light skin tone, light skin tone #E13.1[1] (👨🏼❤️👨🏻) +1F468 1F3FC 200D 2764 FE0F 200D 1F468 1F3FC ; RGI_Emoji_ZWJ_Sequence ; couple with heart: man, man, medium-light skin tone # E13.1 [1] (👨🏼❤️👨🏼) +1F468 1F3FC 200D 2764 FE0F 200D 1F468 1F3FD ; RGI_Emoji_ZWJ_Sequence ; couple with heart: man, man, medium-light skin tone, medium skin tone #E13.1[1] (👨🏼❤️👨🏽) +1F468 1F3FC 200D 2764 FE0F 200D 1F468 1F3FE ; RGI_Emoji_ZWJ_Sequence ; couple with heart: man, man, medium-light skin tone, medium-dark skin tone #E13.1[1] (👨🏼❤️👨🏾) +1F468 1F3FC 200D 2764 FE0F 200D 1F468 1F3FF ; RGI_Emoji_ZWJ_Sequence ; couple with heart: man, man, medium-light skin tone, dark skin tone #E13.1[1] (👨🏼❤️👨🏿) +1F468 1F3FC 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FB; RGI_Emoji_ZWJ_Sequence; kiss: man, man, medium-light skin tone, light skin tone #E13.1 [1] (👨🏼❤️💋👨🏻) +1F468 1F3FC 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FC; RGI_Emoji_ZWJ_Sequence; kiss: man, man, medium-light skin tone # E13.1 [1] (👨🏼❤️💋👨🏼) +1F468 1F3FC 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FD; RGI_Emoji_ZWJ_Sequence; kiss: man, man, medium-light skin tone, medium skin tone #E13.1 [1] (👨🏼❤️💋👨🏽) +1F468 1F3FC 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FE; RGI_Emoji_ZWJ_Sequence; kiss: man, man, medium-light skin tone, medium-dark skin tone #E13.1[1] (👨🏼❤️💋👨🏾) +1F468 1F3FC 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FF; RGI_Emoji_ZWJ_Sequence; kiss: man, man, medium-light skin tone, dark skin tone # E13.1 [1] (👨🏼❤️💋👨🏿) +1F468 1F3FC 200D 1F91D 200D 1F468 1F3FB ; RGI_Emoji_ZWJ_Sequence ; men holding hands: medium-light skin tone, light skin tone # E12.0 [1] (👨🏼🤝👨🏻) +1F468 1F3FC 200D 1F91D 200D 1F468 1F3FD ; RGI_Emoji_ZWJ_Sequence ; men holding hands: medium-light skin tone, medium skin tone # E12.1 [1] (👨🏼🤝👨🏽) +1F468 1F3FC 200D 1F91D 200D 1F468 1F3FE ; RGI_Emoji_ZWJ_Sequence ; men holding hands: medium-light skin tone, medium-dark skin tone #E12.1 [1] (👨🏼🤝👨🏾) +1F468 1F3FC 200D 1F91D 200D 1F468 1F3FF ; RGI_Emoji_ZWJ_Sequence ; men holding hands: medium-light skin tone, dark skin tone # E12.1 [1] (👨🏼🤝👨🏿) +1F468 1F3FD 200D 2764 FE0F 200D 1F468 1F3FB ; RGI_Emoji_ZWJ_Sequence ; couple with heart: man, man, medium skin tone, light skin tone # E13.1 [1] (👨🏽❤️👨🏻) +1F468 1F3FD 200D 2764 FE0F 200D 1F468 1F3FC ; RGI_Emoji_ZWJ_Sequence ; couple with heart: man, man, medium skin tone, medium-light skin tone #E13.1[1] (👨🏽❤️👨🏼) +1F468 1F3FD 200D 2764 FE0F 200D 1F468 1F3FD ; RGI_Emoji_ZWJ_Sequence ; couple with heart: man, man, medium skin tone # E13.1 [1] (👨🏽❤️👨🏽) +1F468 1F3FD 200D 2764 FE0F 200D 1F468 1F3FE ; RGI_Emoji_ZWJ_Sequence ; couple with heart: man, man, medium skin tone, medium-dark skin tone #E13.1[1] (👨🏽❤️👨🏾) +1F468 1F3FD 200D 2764 FE0F 200D 1F468 1F3FF ; RGI_Emoji_ZWJ_Sequence ; couple with heart: man, man, medium skin tone, dark skin tone # E13.1 [1] (👨🏽❤️👨🏿) +1F468 1F3FD 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FB; RGI_Emoji_ZWJ_Sequence; kiss: man, man, medium skin tone, light skin tone # E13.1 [1] (👨🏽❤️💋👨🏻) +1F468 1F3FD 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FC; RGI_Emoji_ZWJ_Sequence; kiss: man, man, medium skin tone, medium-light skin tone #E13.1 [1] (👨🏽❤️💋👨🏼) +1F468 1F3FD 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FD; RGI_Emoji_ZWJ_Sequence; kiss: man, man, medium skin tone # E13.1 [1] (👨🏽❤️💋👨🏽) +1F468 1F3FD 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FE; RGI_Emoji_ZWJ_Sequence; kiss: man, man, medium skin tone, medium-dark skin tone #E13.1 [1] (👨🏽❤️💋👨🏾) +1F468 1F3FD 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FF; RGI_Emoji_ZWJ_Sequence; kiss: man, man, medium skin tone, dark skin tone # E13.1 [1] (👨🏽❤️💋👨🏿) +1F468 1F3FD 200D 1F91D 200D 1F468 1F3FB ; RGI_Emoji_ZWJ_Sequence ; men holding hands: medium skin tone, light skin tone # E12.0 [1] (👨🏽🤝👨🏻) +1F468 1F3FD 200D 1F91D 200D 1F468 1F3FC ; RGI_Emoji_ZWJ_Sequence ; men holding hands: medium skin tone, medium-light skin tone # E12.0 [1] (👨🏽🤝👨🏼) +1F468 1F3FD 200D 1F91D 200D 1F468 1F3FE ; RGI_Emoji_ZWJ_Sequence ; men holding hands: medium skin tone, medium-dark skin tone # E12.1 [1] (👨🏽🤝👨🏾) +1F468 1F3FD 200D 1F91D 200D 1F468 1F3FF ; RGI_Emoji_ZWJ_Sequence ; men holding hands: medium skin tone, dark skin tone # E12.1 [1] (👨🏽🤝👨🏿) +1F468 1F3FE 200D 2764 FE0F 200D 1F468 1F3FB ; RGI_Emoji_ZWJ_Sequence ; couple with heart: man, man, medium-dark skin tone, light skin tone #E13.1[1] (👨🏾❤️👨🏻) +1F468 1F3FE 200D 2764 FE0F 200D 1F468 1F3FC ; RGI_Emoji_ZWJ_Sequence ; couple with heart: man, man, medium-dark skin tone, medium-light skin tone #E13.1[1] (👨🏾❤️👨🏼) +1F468 1F3FE 200D 2764 FE0F 200D 1F468 1F3FD ; RGI_Emoji_ZWJ_Sequence ; couple with heart: man, man, medium-dark skin tone, medium skin tone #E13.1[1] (👨🏾❤️👨🏽) +1F468 1F3FE 200D 2764 FE0F 200D 1F468 1F3FE ; RGI_Emoji_ZWJ_Sequence ; couple with heart: man, man, medium-dark skin tone # E13.1 [1] (👨🏾❤️👨🏾) +1F468 1F3FE 200D 2764 FE0F 200D 1F468 1F3FF ; RGI_Emoji_ZWJ_Sequence ; couple with heart: man, man, medium-dark skin tone, dark skin tone #E13.1[1] (👨🏾❤️👨🏿) +1F468 1F3FE 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FB; RGI_Emoji_ZWJ_Sequence; kiss: man, man, medium-dark skin tone, light skin tone # E13.1 [1] (👨🏾❤️💋👨🏻) +1F468 1F3FE 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FC; RGI_Emoji_ZWJ_Sequence; kiss: man, man, medium-dark skin tone, medium-light skin tone #E13.1[1] (👨🏾❤️💋👨🏼) +1F468 1F3FE 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FD; RGI_Emoji_ZWJ_Sequence; kiss: man, man, medium-dark skin tone, medium skin tone #E13.1 [1] (👨🏾❤️💋👨🏽) +1F468 1F3FE 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FE; RGI_Emoji_ZWJ_Sequence; kiss: man, man, medium-dark skin tone # E13.1 [1] (👨🏾❤️💋👨🏾) +1F468 1F3FE 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FF; RGI_Emoji_ZWJ_Sequence; kiss: man, man, medium-dark skin tone, dark skin tone # E13.1 [1] (👨🏾❤️💋👨🏿) +1F468 1F3FE 200D 1F91D 200D 1F468 1F3FB ; RGI_Emoji_ZWJ_Sequence ; men holding hands: medium-dark skin tone, light skin tone # E12.0 [1] (👨🏾🤝👨🏻) +1F468 1F3FE 200D 1F91D 200D 1F468 1F3FC ; RGI_Emoji_ZWJ_Sequence ; men holding hands: medium-dark skin tone, medium-light skin tone #E12.0 [1] (👨🏾🤝👨🏼) +1F468 1F3FE 200D 1F91D 200D 1F468 1F3FD ; RGI_Emoji_ZWJ_Sequence ; men holding hands: medium-dark skin tone, medium skin tone # E12.0 [1] (👨🏾🤝👨🏽) +1F468 1F3FE 200D 1F91D 200D 1F468 1F3FF ; RGI_Emoji_ZWJ_Sequence ; men holding hands: medium-dark skin tone, dark skin tone # E12.1 [1] (👨🏾🤝👨🏿) +1F468 1F3FF 200D 2764 FE0F 200D 1F468 1F3FB ; RGI_Emoji_ZWJ_Sequence ; couple with heart: man, man, dark skin tone, light skin tone # E13.1 [1] (👨🏿❤️👨🏻) +1F468 1F3FF 200D 2764 FE0F 200D 1F468 1F3FC ; RGI_Emoji_ZWJ_Sequence ; couple with heart: man, man, dark skin tone, medium-light skin tone #E13.1[1] (👨🏿❤️👨🏼) +1F468 1F3FF 200D 2764 FE0F 200D 1F468 1F3FD ; RGI_Emoji_ZWJ_Sequence ; couple with heart: man, man, dark skin tone, medium skin tone # E13.1 [1] (👨🏿❤️👨🏽) +1F468 1F3FF 200D 2764 FE0F 200D 1F468 1F3FE ; RGI_Emoji_ZWJ_Sequence ; couple with heart: man, man, dark skin tone, medium-dark skin tone #E13.1[1] (👨🏿❤️👨🏾) +1F468 1F3FF 200D 2764 FE0F 200D 1F468 1F3FF ; RGI_Emoji_ZWJ_Sequence ; couple with heart: man, man, dark skin tone # E13.1 [1] (👨🏿❤️👨🏿) +1F468 1F3FF 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FB; RGI_Emoji_ZWJ_Sequence; kiss: man, man, dark skin tone, light skin tone # E13.1 [1] (👨🏿❤️💋👨🏻) +1F468 1F3FF 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FC; RGI_Emoji_ZWJ_Sequence; kiss: man, man, dark skin tone, medium-light skin tone # E13.1 [1] (👨🏿❤️💋👨🏼) +1F468 1F3FF 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FD; RGI_Emoji_ZWJ_Sequence; kiss: man, man, dark skin tone, medium skin tone # E13.1 [1] (👨🏿❤️💋👨🏽) +1F468 1F3FF 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FE; RGI_Emoji_ZWJ_Sequence; kiss: man, man, dark skin tone, medium-dark skin tone # E13.1 [1] (👨🏿❤️💋👨🏾) +1F468 1F3FF 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FF; RGI_Emoji_ZWJ_Sequence; kiss: man, man, dark skin tone # E13.1 [1] (👨🏿❤️💋👨🏿) +1F468 1F3FF 200D 1F91D 200D 1F468 1F3FB ; RGI_Emoji_ZWJ_Sequence ; men holding hands: dark skin tone, light skin tone # E12.0 [1] (👨🏿🤝👨🏻) +1F468 1F3FF 200D 1F91D 200D 1F468 1F3FC ; RGI_Emoji_ZWJ_Sequence ; men holding hands: dark skin tone, medium-light skin tone # E12.0 [1] (👨🏿🤝👨🏼) +1F468 1F3FF 200D 1F91D 200D 1F468 1F3FD ; RGI_Emoji_ZWJ_Sequence ; men holding hands: dark skin tone, medium skin tone # E12.0 [1] (👨🏿🤝👨🏽) +1F468 1F3FF 200D 1F91D 200D 1F468 1F3FE ; RGI_Emoji_ZWJ_Sequence ; men holding hands: dark skin tone, medium-dark skin tone # E12.0 [1] (👨🏿🤝👨🏾) +1F469 200D 2764 FE0F 200D 1F468 ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, man # E2.0 [1] (👩❤️👨) +1F469 200D 2764 FE0F 200D 1F469 ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, woman # E2.0 [1] (👩❤️👩) +1F469 200D 2764 FE0F 200D 1F48B 200D 1F468 ; RGI_Emoji_ZWJ_Sequence ; kiss: woman, man # E2.0 [1] (👩❤️💋👨) +1F469 200D 2764 FE0F 200D 1F48B 200D 1F469 ; RGI_Emoji_ZWJ_Sequence ; kiss: woman, woman # E2.0 [1] (👩❤️💋👩) +1F469 200D 1F466 ; RGI_Emoji_ZWJ_Sequence ; family: woman, boy # E4.0 [1] (👩👦) +1F469 200D 1F466 200D 1F466 ; RGI_Emoji_ZWJ_Sequence ; family: woman, boy, boy # E4.0 [1] (👩👦👦) +1F469 200D 1F467 ; RGI_Emoji_ZWJ_Sequence ; family: woman, girl # E4.0 [1] (👩👧) +1F469 200D 1F467 200D 1F466 ; RGI_Emoji_ZWJ_Sequence ; family: woman, girl, boy # E4.0 [1] (👩👧👦) +1F469 200D 1F467 200D 1F467 ; RGI_Emoji_ZWJ_Sequence ; family: woman, girl, girl # E4.0 [1] (👩👧👧) +1F469 200D 1F469 200D 1F466 ; RGI_Emoji_ZWJ_Sequence ; family: woman, woman, boy # E2.0 [1] (👩👩👦) +1F469 200D 1F469 200D 1F466 200D 1F466 ; RGI_Emoji_ZWJ_Sequence ; family: woman, woman, boy, boy # E2.0 [1] (👩👩👦👦) +1F469 200D 1F469 200D 1F467 ; RGI_Emoji_ZWJ_Sequence ; family: woman, woman, girl # E2.0 [1] (👩👩👧) +1F469 200D 1F469 200D 1F467 200D 1F466 ; RGI_Emoji_ZWJ_Sequence ; family: woman, woman, girl, boy # E2.0 [1] (👩👩👧👦) +1F469 200D 1F469 200D 1F467 200D 1F467 ; RGI_Emoji_ZWJ_Sequence ; family: woman, woman, girl, girl # E2.0 [1] (👩👩👧👧) +1F469 1F3FB 200D 2764 FE0F 200D 1F468 1F3FB ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, man, light skin tone # E13.1 [1] (👩🏻❤️👨🏻) +1F469 1F3FB 200D 2764 FE0F 200D 1F468 1F3FC ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, man, light skin tone, medium-light skin tone #E13.1[1] (👩🏻❤️👨🏼) +1F469 1F3FB 200D 2764 FE0F 200D 1F468 1F3FD ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, man, light skin tone, medium skin tone #E13.1 [1] (👩🏻❤️👨🏽) +1F469 1F3FB 200D 2764 FE0F 200D 1F468 1F3FE ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, man, light skin tone, medium-dark skin tone #E13.1[1] (👩🏻❤️👨🏾) +1F469 1F3FB 200D 2764 FE0F 200D 1F468 1F3FF ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, man, light skin tone, dark skin tone # E13.1 [1] (👩🏻❤️👨🏿) +1F469 1F3FB 200D 2764 FE0F 200D 1F469 1F3FB ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, woman, light skin tone # E13.1 [1] (👩🏻❤️👩🏻) +1F469 1F3FB 200D 2764 FE0F 200D 1F469 1F3FC ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, woman, light skin tone, medium-light skin tone #E13.1[1] (👩🏻❤️👩🏼) +1F469 1F3FB 200D 2764 FE0F 200D 1F469 1F3FD ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, woman, light skin tone, medium skin tone #E13.1[1] (👩🏻❤️👩🏽) +1F469 1F3FB 200D 2764 FE0F 200D 1F469 1F3FE ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, woman, light skin tone, medium-dark skin tone #E13.1[1] (👩🏻❤️👩🏾) +1F469 1F3FB 200D 2764 FE0F 200D 1F469 1F3FF ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, woman, light skin tone, dark skin tone #E13.1 [1] (👩🏻❤️👩🏿) +1F469 1F3FB 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FB; RGI_Emoji_ZWJ_Sequence; kiss: woman, man, light skin tone # E13.1 [1] (👩🏻❤️💋👨🏻) +1F469 1F3FB 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FC; RGI_Emoji_ZWJ_Sequence; kiss: woman, man, light skin tone, medium-light skin tone #E13.1[1] (👩🏻❤️💋👨🏼) +1F469 1F3FB 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FD; RGI_Emoji_ZWJ_Sequence; kiss: woman, man, light skin tone, medium skin tone # E13.1 [1] (👩🏻❤️💋👨🏽) +1F469 1F3FB 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FE; RGI_Emoji_ZWJ_Sequence; kiss: woman, man, light skin tone, medium-dark skin tone #E13.1 [1] (👩🏻❤️💋👨🏾) +1F469 1F3FB 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FF; RGI_Emoji_ZWJ_Sequence; kiss: woman, man, light skin tone, dark skin tone # E13.1 [1] (👩🏻❤️💋👨🏿) +1F469 1F3FB 200D 2764 FE0F 200D 1F48B 200D 1F469 1F3FB; RGI_Emoji_ZWJ_Sequence; kiss: woman, woman, light skin tone # E13.1 [1] (👩🏻❤️💋👩🏻) +1F469 1F3FB 200D 2764 FE0F 200D 1F48B 200D 1F469 1F3FC; RGI_Emoji_ZWJ_Sequence; kiss: woman, woman, light skin tone, medium-light skin tone #E13.1[1] (👩🏻❤️💋👩🏼) +1F469 1F3FB 200D 2764 FE0F 200D 1F48B 200D 1F469 1F3FD; RGI_Emoji_ZWJ_Sequence; kiss: woman, woman, light skin tone, medium skin tone # E13.1 [1] (👩🏻❤️💋👩🏽) +1F469 1F3FB 200D 2764 FE0F 200D 1F48B 200D 1F469 1F3FE; RGI_Emoji_ZWJ_Sequence; kiss: woman, woman, light skin tone, medium-dark skin tone #E13.1[1] (👩🏻❤️💋👩🏾) +1F469 1F3FB 200D 2764 FE0F 200D 1F48B 200D 1F469 1F3FF; RGI_Emoji_ZWJ_Sequence; kiss: woman, woman, light skin tone, dark skin tone # E13.1 [1] (👩🏻❤️💋👩🏿) +1F469 1F3FB 200D 1F91D 200D 1F468 1F3FC ; RGI_Emoji_ZWJ_Sequence ; woman and man holding hands: light skin tone, medium-light skin tone #E12.0[1] (👩🏻🤝👨🏼) +1F469 1F3FB 200D 1F91D 200D 1F468 1F3FD ; RGI_Emoji_ZWJ_Sequence ; woman and man holding hands: light skin tone, medium skin tone # E12.0 [1] (👩🏻🤝👨🏽) +1F469 1F3FB 200D 1F91D 200D 1F468 1F3FE ; RGI_Emoji_ZWJ_Sequence ; woman and man holding hands: light skin tone, medium-dark skin tone #E12.0[1] (👩🏻🤝👨🏾) +1F469 1F3FB 200D 1F91D 200D 1F468 1F3FF ; RGI_Emoji_ZWJ_Sequence ; woman and man holding hands: light skin tone, dark skin tone # E12.0 [1] (👩🏻🤝👨🏿) +1F469 1F3FB 200D 1F91D 200D 1F469 1F3FC ; RGI_Emoji_ZWJ_Sequence ; women holding hands: light skin tone, medium-light skin tone # E12.1 [1] (👩🏻🤝👩🏼) +1F469 1F3FB 200D 1F91D 200D 1F469 1F3FD ; RGI_Emoji_ZWJ_Sequence ; women holding hands: light skin tone, medium skin tone # E12.1 [1] (👩🏻🤝👩🏽) +1F469 1F3FB 200D 1F91D 200D 1F469 1F3FE ; RGI_Emoji_ZWJ_Sequence ; women holding hands: light skin tone, medium-dark skin tone # E12.1 [1] (👩🏻🤝👩🏾) +1F469 1F3FB 200D 1F91D 200D 1F469 1F3FF ; RGI_Emoji_ZWJ_Sequence ; women holding hands: light skin tone, dark skin tone # E12.1 [1] (👩🏻🤝👩🏿) +1F469 1F3FC 200D 2764 FE0F 200D 1F468 1F3FB ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, man, medium-light skin tone, light skin tone #E13.1[1] (👩🏼❤️👨🏻) +1F469 1F3FC 200D 2764 FE0F 200D 1F468 1F3FC ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, man, medium-light skin tone # E13.1 [1] (👩🏼❤️👨🏼) +1F469 1F3FC 200D 2764 FE0F 200D 1F468 1F3FD ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, man, medium-light skin tone, medium skin tone #E13.1[1] (👩🏼❤️👨🏽) +1F469 1F3FC 200D 2764 FE0F 200D 1F468 1F3FE ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, man, medium-light skin tone, medium-dark skin tone #E13.1[1] (👩🏼❤️👨🏾) +1F469 1F3FC 200D 2764 FE0F 200D 1F468 1F3FF ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, man, medium-light skin tone, dark skin tone #E13.1[1] (👩🏼❤️👨🏿) +1F469 1F3FC 200D 2764 FE0F 200D 1F469 1F3FB ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, woman, medium-light skin tone, light skin tone #E13.1[1] (👩🏼❤️👩🏻) +1F469 1F3FC 200D 2764 FE0F 200D 1F469 1F3FC ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, woman, medium-light skin tone # E13.1 [1] (👩🏼❤️👩🏼) +1F469 1F3FC 200D 2764 FE0F 200D 1F469 1F3FD ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, woman, medium-light skin tone, medium skin tone #E13.1[1] (👩🏼❤️👩🏽) +1F469 1F3FC 200D 2764 FE0F 200D 1F469 1F3FE ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, woman, medium-light skin tone, medium-dark skin tone #E13.1[1] (👩🏼❤️👩🏾) +1F469 1F3FC 200D 2764 FE0F 200D 1F469 1F3FF ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, woman, medium-light skin tone, dark skin tone #E13.1[1] (👩🏼❤️👩🏿) +1F469 1F3FC 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FB; RGI_Emoji_ZWJ_Sequence; kiss: woman, man, medium-light skin tone, light skin tone #E13.1[1] (👩🏼❤️💋👨🏻) +1F469 1F3FC 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FC; RGI_Emoji_ZWJ_Sequence; kiss: woman, man, medium-light skin tone # E13.1 [1] (👩🏼❤️💋👨🏼) +1F469 1F3FC 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FD; RGI_Emoji_ZWJ_Sequence; kiss: woman, man, medium-light skin tone, medium skin tone #E13.1[1] (👩🏼❤️💋👨🏽) +1F469 1F3FC 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FE; RGI_Emoji_ZWJ_Sequence; kiss: woman, man, medium-light skin tone, medium-dark skin tone #E13.1[1] (👩🏼❤️💋👨🏾) +1F469 1F3FC 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FF; RGI_Emoji_ZWJ_Sequence; kiss: woman, man, medium-light skin tone, dark skin tone #E13.1 [1] (👩🏼❤️💋👨🏿) +1F469 1F3FC 200D 2764 FE0F 200D 1F48B 200D 1F469 1F3FB; RGI_Emoji_ZWJ_Sequence; kiss: woman, woman, medium-light skin tone, light skin tone #E13.1[1] (👩🏼❤️💋👩🏻) +1F469 1F3FC 200D 2764 FE0F 200D 1F48B 200D 1F469 1F3FC; RGI_Emoji_ZWJ_Sequence; kiss: woman, woman, medium-light skin tone # E13.1 [1] (👩🏼❤️💋👩🏼) +1F469 1F3FC 200D 2764 FE0F 200D 1F48B 200D 1F469 1F3FD; RGI_Emoji_ZWJ_Sequence; kiss: woman, woman, medium-light skin tone, medium skin tone #E13.1[1] (👩🏼❤️💋👩🏽) +1F469 1F3FC 200D 2764 FE0F 200D 1F48B 200D 1F469 1F3FE; RGI_Emoji_ZWJ_Sequence; kiss: woman, woman, medium-light skin tone, medium-dark skin tone #E13.1[1] (👩🏼❤️💋👩🏾) +1F469 1F3FC 200D 2764 FE0F 200D 1F48B 200D 1F469 1F3FF; RGI_Emoji_ZWJ_Sequence; kiss: woman, woman, medium-light skin tone, dark skin tone #E13.1[1] (👩🏼❤️💋👩🏿) +1F469 1F3FC 200D 1F91D 200D 1F468 1F3FB ; RGI_Emoji_ZWJ_Sequence ; woman and man holding hands: medium-light skin tone, light skin tone #E12.0[1] (👩🏼🤝👨🏻) +1F469 1F3FC 200D 1F91D 200D 1F468 1F3FD ; RGI_Emoji_ZWJ_Sequence ; woman and man holding hands: medium-light skin tone, medium skin tone #E12.0[1] (👩🏼🤝👨🏽) +1F469 1F3FC 200D 1F91D 200D 1F468 1F3FE ; RGI_Emoji_ZWJ_Sequence ; woman and man holding hands: medium-light skin tone, medium-dark skin tone #E12.0[1] (👩🏼🤝👨🏾) +1F469 1F3FC 200D 1F91D 200D 1F468 1F3FF ; RGI_Emoji_ZWJ_Sequence ; woman and man holding hands: medium-light skin tone, dark skin tone #E12.0[1] (👩🏼🤝👨🏿) +1F469 1F3FC 200D 1F91D 200D 1F469 1F3FB ; RGI_Emoji_ZWJ_Sequence ; women holding hands: medium-light skin tone, light skin tone # E12.0 [1] (👩🏼🤝👩🏻) +1F469 1F3FC 200D 1F91D 200D 1F469 1F3FD ; RGI_Emoji_ZWJ_Sequence ; women holding hands: medium-light skin tone, medium skin tone # E12.1 [1] (👩🏼🤝👩🏽) +1F469 1F3FC 200D 1F91D 200D 1F469 1F3FE ; RGI_Emoji_ZWJ_Sequence ; women holding hands: medium-light skin tone, medium-dark skin tone #E12.1[1] (👩🏼🤝👩🏾) +1F469 1F3FC 200D 1F91D 200D 1F469 1F3FF ; RGI_Emoji_ZWJ_Sequence ; women holding hands: medium-light skin tone, dark skin tone # E12.1 [1] (👩🏼🤝👩🏿) +1F469 1F3FD 200D 2764 FE0F 200D 1F468 1F3FB ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, man, medium skin tone, light skin tone #E13.1 [1] (👩🏽❤️👨🏻) +1F469 1F3FD 200D 2764 FE0F 200D 1F468 1F3FC ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, man, medium skin tone, medium-light skin tone #E13.1[1] (👩🏽❤️👨🏼) +1F469 1F3FD 200D 2764 FE0F 200D 1F468 1F3FD ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, man, medium skin tone # E13.1 [1] (👩🏽❤️👨🏽) +1F469 1F3FD 200D 2764 FE0F 200D 1F468 1F3FE ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, man, medium skin tone, medium-dark skin tone #E13.1[1] (👩🏽❤️👨🏾) +1F469 1F3FD 200D 2764 FE0F 200D 1F468 1F3FF ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, man, medium skin tone, dark skin tone #E13.1 [1] (👩🏽❤️👨🏿) +1F469 1F3FD 200D 2764 FE0F 200D 1F469 1F3FB ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, woman, medium skin tone, light skin tone #E13.1[1] (👩🏽❤️👩🏻) +1F469 1F3FD 200D 2764 FE0F 200D 1F469 1F3FC ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, woman, medium skin tone, medium-light skin tone #E13.1[1] (👩🏽❤️👩🏼) +1F469 1F3FD 200D 2764 FE0F 200D 1F469 1F3FD ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, woman, medium skin tone # E13.1 [1] (👩🏽❤️👩🏽) +1F469 1F3FD 200D 2764 FE0F 200D 1F469 1F3FE ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, woman, medium skin tone, medium-dark skin tone #E13.1[1] (👩🏽❤️👩🏾) +1F469 1F3FD 200D 2764 FE0F 200D 1F469 1F3FF ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, woman, medium skin tone, dark skin tone #E13.1[1] (👩🏽❤️👩🏿) +1F469 1F3FD 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FB; RGI_Emoji_ZWJ_Sequence; kiss: woman, man, medium skin tone, light skin tone # E13.1 [1] (👩🏽❤️💋👨🏻) +1F469 1F3FD 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FC; RGI_Emoji_ZWJ_Sequence; kiss: woman, man, medium skin tone, medium-light skin tone #E13.1[1] (👩🏽❤️💋👨🏼) +1F469 1F3FD 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FD; RGI_Emoji_ZWJ_Sequence; kiss: woman, man, medium skin tone # E13.1 [1] (👩🏽❤️💋👨🏽) +1F469 1F3FD 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FE; RGI_Emoji_ZWJ_Sequence; kiss: woman, man, medium skin tone, medium-dark skin tone #E13.1[1] (👩🏽❤️💋👨🏾) +1F469 1F3FD 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FF; RGI_Emoji_ZWJ_Sequence; kiss: woman, man, medium skin tone, dark skin tone # E13.1 [1] (👩🏽❤️💋👨🏿) +1F469 1F3FD 200D 2764 FE0F 200D 1F48B 200D 1F469 1F3FB; RGI_Emoji_ZWJ_Sequence; kiss: woman, woman, medium skin tone, light skin tone # E13.1 [1] (👩🏽❤️💋👩🏻) +1F469 1F3FD 200D 2764 FE0F 200D 1F48B 200D 1F469 1F3FC; RGI_Emoji_ZWJ_Sequence; kiss: woman, woman, medium skin tone, medium-light skin tone #E13.1[1] (👩🏽❤️💋👩🏼) +1F469 1F3FD 200D 2764 FE0F 200D 1F48B 200D 1F469 1F3FD; RGI_Emoji_ZWJ_Sequence; kiss: woman, woman, medium skin tone # E13.1 [1] (👩🏽❤️💋👩🏽) +1F469 1F3FD 200D 2764 FE0F 200D 1F48B 200D 1F469 1F3FE; RGI_Emoji_ZWJ_Sequence; kiss: woman, woman, medium skin tone, medium-dark skin tone #E13.1[1] (👩🏽❤️💋👩🏾) +1F469 1F3FD 200D 2764 FE0F 200D 1F48B 200D 1F469 1F3FF; RGI_Emoji_ZWJ_Sequence; kiss: woman, woman, medium skin tone, dark skin tone # E13.1 [1] (👩🏽❤️💋👩🏿) +1F469 1F3FD 200D 1F91D 200D 1F468 1F3FB ; RGI_Emoji_ZWJ_Sequence ; woman and man holding hands: medium skin tone, light skin tone # E12.0 [1] (👩🏽🤝👨🏻) +1F469 1F3FD 200D 1F91D 200D 1F468 1F3FC ; RGI_Emoji_ZWJ_Sequence ; woman and man holding hands: medium skin tone, medium-light skin tone #E12.0[1] (👩🏽🤝👨🏼) +1F469 1F3FD 200D 1F91D 200D 1F468 1F3FE ; RGI_Emoji_ZWJ_Sequence ; woman and man holding hands: medium skin tone, medium-dark skin tone #E12.0[1] (👩🏽🤝👨🏾) +1F469 1F3FD 200D 1F91D 200D 1F468 1F3FF ; RGI_Emoji_ZWJ_Sequence ; woman and man holding hands: medium skin tone, dark skin tone # E12.0 [1] (👩🏽🤝👨🏿) +1F469 1F3FD 200D 1F91D 200D 1F469 1F3FB ; RGI_Emoji_ZWJ_Sequence ; women holding hands: medium skin tone, light skin tone # E12.0 [1] (👩🏽🤝👩🏻) +1F469 1F3FD 200D 1F91D 200D 1F469 1F3FC ; RGI_Emoji_ZWJ_Sequence ; women holding hands: medium skin tone, medium-light skin tone # E12.0 [1] (👩🏽🤝👩🏼) +1F469 1F3FD 200D 1F91D 200D 1F469 1F3FE ; RGI_Emoji_ZWJ_Sequence ; women holding hands: medium skin tone, medium-dark skin tone # E12.1 [1] (👩🏽🤝👩🏾) +1F469 1F3FD 200D 1F91D 200D 1F469 1F3FF ; RGI_Emoji_ZWJ_Sequence ; women holding hands: medium skin tone, dark skin tone # E12.1 [1] (👩🏽🤝👩🏿) +1F469 1F3FE 200D 2764 FE0F 200D 1F468 1F3FB ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, man, medium-dark skin tone, light skin tone #E13.1[1] (👩🏾❤️👨🏻) +1F469 1F3FE 200D 2764 FE0F 200D 1F468 1F3FC ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, man, medium-dark skin tone, medium-light skin tone #E13.1[1] (👩🏾❤️👨🏼) +1F469 1F3FE 200D 2764 FE0F 200D 1F468 1F3FD ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, man, medium-dark skin tone, medium skin tone #E13.1[1] (👩🏾❤️👨🏽) +1F469 1F3FE 200D 2764 FE0F 200D 1F468 1F3FE ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, man, medium-dark skin tone # E13.1 [1] (👩🏾❤️👨🏾) +1F469 1F3FE 200D 2764 FE0F 200D 1F468 1F3FF ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, man, medium-dark skin tone, dark skin tone #E13.1[1] (👩🏾❤️👨🏿) +1F469 1F3FE 200D 2764 FE0F 200D 1F469 1F3FB ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, woman, medium-dark skin tone, light skin tone #E13.1[1] (👩🏾❤️👩🏻) +1F469 1F3FE 200D 2764 FE0F 200D 1F469 1F3FC ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, woman, medium-dark skin tone, medium-light skin tone #E13.1[1] (👩🏾❤️👩🏼) +1F469 1F3FE 200D 2764 FE0F 200D 1F469 1F3FD ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, woman, medium-dark skin tone, medium skin tone #E13.1[1] (👩🏾❤️👩🏽) +1F469 1F3FE 200D 2764 FE0F 200D 1F469 1F3FE ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, woman, medium-dark skin tone # E13.1 [1] (👩🏾❤️👩🏾) +1F469 1F3FE 200D 2764 FE0F 200D 1F469 1F3FF ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, woman, medium-dark skin tone, dark skin tone #E13.1[1] (👩🏾❤️👩🏿) +1F469 1F3FE 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FB; RGI_Emoji_ZWJ_Sequence; kiss: woman, man, medium-dark skin tone, light skin tone #E13.1 [1] (👩🏾❤️💋👨🏻) +1F469 1F3FE 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FC; RGI_Emoji_ZWJ_Sequence; kiss: woman, man, medium-dark skin tone, medium-light skin tone #E13.1[1] (👩🏾❤️💋👨🏼) +1F469 1F3FE 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FD; RGI_Emoji_ZWJ_Sequence; kiss: woman, man, medium-dark skin tone, medium skin tone #E13.1[1] (👩🏾❤️💋👨🏽) +1F469 1F3FE 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FE; RGI_Emoji_ZWJ_Sequence; kiss: woman, man, medium-dark skin tone # E13.1 [1] (👩🏾❤️💋👨🏾) +1F469 1F3FE 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FF; RGI_Emoji_ZWJ_Sequence; kiss: woman, man, medium-dark skin tone, dark skin tone #E13.1 [1] (👩🏾❤️💋👨🏿) +1F469 1F3FE 200D 2764 FE0F 200D 1F48B 200D 1F469 1F3FB; RGI_Emoji_ZWJ_Sequence; kiss: woman, woman, medium-dark skin tone, light skin tone #E13.1[1] (👩🏾❤️💋👩🏻) +1F469 1F3FE 200D 2764 FE0F 200D 1F48B 200D 1F469 1F3FC; RGI_Emoji_ZWJ_Sequence; kiss: woman, woman, medium-dark skin tone, medium-light skin tone #E13.1[1] (👩🏾❤️💋👩🏼) +1F469 1F3FE 200D 2764 FE0F 200D 1F48B 200D 1F469 1F3FD; RGI_Emoji_ZWJ_Sequence; kiss: woman, woman, medium-dark skin tone, medium skin tone #E13.1[1] (👩🏾❤️💋👩🏽) +1F469 1F3FE 200D 2764 FE0F 200D 1F48B 200D 1F469 1F3FE; RGI_Emoji_ZWJ_Sequence; kiss: woman, woman, medium-dark skin tone # E13.1 [1] (👩🏾❤️💋👩🏾) +1F469 1F3FE 200D 2764 FE0F 200D 1F48B 200D 1F469 1F3FF; RGI_Emoji_ZWJ_Sequence; kiss: woman, woman, medium-dark skin tone, dark skin tone #E13.1[1] (👩🏾❤️💋👩🏿) +1F469 1F3FE 200D 1F91D 200D 1F468 1F3FB ; RGI_Emoji_ZWJ_Sequence ; woman and man holding hands: medium-dark skin tone, light skin tone #E12.0[1] (👩🏾🤝👨🏻) +1F469 1F3FE 200D 1F91D 200D 1F468 1F3FC ; RGI_Emoji_ZWJ_Sequence ; woman and man holding hands: medium-dark skin tone, medium-light skin tone #E12.0[1] (👩🏾🤝👨🏼) +1F469 1F3FE 200D 1F91D 200D 1F468 1F3FD ; RGI_Emoji_ZWJ_Sequence ; woman and man holding hands: medium-dark skin tone, medium skin tone #E12.0[1] (👩🏾🤝👨🏽) +1F469 1F3FE 200D 1F91D 200D 1F468 1F3FF ; RGI_Emoji_ZWJ_Sequence ; woman and man holding hands: medium-dark skin tone, dark skin tone #E12.0[1] (👩🏾🤝👨🏿) +1F469 1F3FE 200D 1F91D 200D 1F469 1F3FB ; RGI_Emoji_ZWJ_Sequence ; women holding hands: medium-dark skin tone, light skin tone # E12.0 [1] (👩🏾🤝👩🏻) +1F469 1F3FE 200D 1F91D 200D 1F469 1F3FC ; RGI_Emoji_ZWJ_Sequence ; women holding hands: medium-dark skin tone, medium-light skin tone #E12.0[1] (👩🏾🤝👩🏼) +1F469 1F3FE 200D 1F91D 200D 1F469 1F3FD ; RGI_Emoji_ZWJ_Sequence ; women holding hands: medium-dark skin tone, medium skin tone # E12.0 [1] (👩🏾🤝👩🏽) +1F469 1F3FE 200D 1F91D 200D 1F469 1F3FF ; RGI_Emoji_ZWJ_Sequence ; women holding hands: medium-dark skin tone, dark skin tone # E12.1 [1] (👩🏾🤝👩🏿) +1F469 1F3FF 200D 2764 FE0F 200D 1F468 1F3FB ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, man, dark skin tone, light skin tone # E13.1 [1] (👩🏿❤️👨🏻) +1F469 1F3FF 200D 2764 FE0F 200D 1F468 1F3FC ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, man, dark skin tone, medium-light skin tone #E13.1[1] (👩🏿❤️👨🏼) +1F469 1F3FF 200D 2764 FE0F 200D 1F468 1F3FD ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, man, dark skin tone, medium skin tone #E13.1 [1] (👩🏿❤️👨🏽) +1F469 1F3FF 200D 2764 FE0F 200D 1F468 1F3FE ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, man, dark skin tone, medium-dark skin tone #E13.1[1] (👩🏿❤️👨🏾) +1F469 1F3FF 200D 2764 FE0F 200D 1F468 1F3FF ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, man, dark skin tone # E13.1 [1] (👩🏿❤️👨🏿) +1F469 1F3FF 200D 2764 FE0F 200D 1F469 1F3FB ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, woman, dark skin tone, light skin tone #E13.1 [1] (👩🏿❤️👩🏻) +1F469 1F3FF 200D 2764 FE0F 200D 1F469 1F3FC ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, woman, dark skin tone, medium-light skin tone #E13.1[1] (👩🏿❤️👩🏼) +1F469 1F3FF 200D 2764 FE0F 200D 1F469 1F3FD ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, woman, dark skin tone, medium skin tone #E13.1[1] (👩🏿❤️👩🏽) +1F469 1F3FF 200D 2764 FE0F 200D 1F469 1F3FE ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, woman, dark skin tone, medium-dark skin tone #E13.1[1] (👩🏿❤️👩🏾) +1F469 1F3FF 200D 2764 FE0F 200D 1F469 1F3FF ; RGI_Emoji_ZWJ_Sequence ; couple with heart: woman, woman, dark skin tone # E13.1 [1] (👩🏿❤️👩🏿) +1F469 1F3FF 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FB; RGI_Emoji_ZWJ_Sequence; kiss: woman, man, dark skin tone, light skin tone # E13.1 [1] (👩🏿❤️💋👨🏻) +1F469 1F3FF 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FC; RGI_Emoji_ZWJ_Sequence; kiss: woman, man, dark skin tone, medium-light skin tone #E13.1 [1] (👩🏿❤️💋👨🏼) +1F469 1F3FF 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FD; RGI_Emoji_ZWJ_Sequence; kiss: woman, man, dark skin tone, medium skin tone # E13.1 [1] (👩🏿❤️💋👨🏽) +1F469 1F3FF 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FE; RGI_Emoji_ZWJ_Sequence; kiss: woman, man, dark skin tone, medium-dark skin tone #E13.1 [1] (👩🏿❤️💋👨🏾) +1F469 1F3FF 200D 2764 FE0F 200D 1F48B 200D 1F468 1F3FF; RGI_Emoji_ZWJ_Sequence; kiss: woman, man, dark skin tone # E13.1 [1] (👩🏿❤️💋👨🏿) +1F469 1F3FF 200D 2764 FE0F 200D 1F48B 200D 1F469 1F3FB; RGI_Emoji_ZWJ_Sequence; kiss: woman, woman, dark skin tone, light skin tone # E13.1 [1] (👩🏿❤️💋👩🏻) +1F469 1F3FF 200D 2764 FE0F 200D 1F48B 200D 1F469 1F3FC; RGI_Emoji_ZWJ_Sequence; kiss: woman, woman, dark skin tone, medium-light skin tone #E13.1[1] (👩🏿❤️💋👩🏼) +1F469 1F3FF 200D 2764 FE0F 200D 1F48B 200D 1F469 1F3FD; RGI_Emoji_ZWJ_Sequence; kiss: woman, woman, dark skin tone, medium skin tone # E13.1 [1] (👩🏿❤️💋👩🏽) +1F469 1F3FF 200D 2764 FE0F 200D 1F48B 200D 1F469 1F3FE; RGI_Emoji_ZWJ_Sequence; kiss: woman, woman, dark skin tone, medium-dark skin tone #E13.1[1] (👩🏿❤️💋👩🏾) +1F469 1F3FF 200D 2764 FE0F 200D 1F48B 200D 1F469 1F3FF; RGI_Emoji_ZWJ_Sequence; kiss: woman, woman, dark skin tone # E13.1 [1] (👩🏿❤️💋👩🏿) +1F469 1F3FF 200D 1F91D 200D 1F468 1F3FB ; RGI_Emoji_ZWJ_Sequence ; woman and man holding hands: dark skin tone, light skin tone # E12.0 [1] (👩🏿🤝👨🏻) +1F469 1F3FF 200D 1F91D 200D 1F468 1F3FC ; RGI_Emoji_ZWJ_Sequence ; woman and man holding hands: dark skin tone, medium-light skin tone #E12.0[1] (👩🏿🤝👨🏼) +1F469 1F3FF 200D 1F91D 200D 1F468 1F3FD ; RGI_Emoji_ZWJ_Sequence ; woman and man holding hands: dark skin tone, medium skin tone # E12.0 [1] (👩🏿🤝👨🏽) +1F469 1F3FF 200D 1F91D 200D 1F468 1F3FE ; RGI_Emoji_ZWJ_Sequence ; woman and man holding hands: dark skin tone, medium-dark skin tone #E12.0[1] (👩🏿🤝👨🏾) +1F469 1F3FF 200D 1F91D 200D 1F469 1F3FB ; RGI_Emoji_ZWJ_Sequence ; women holding hands: dark skin tone, light skin tone # E12.0 [1] (👩🏿🤝👩🏻) +1F469 1F3FF 200D 1F91D 200D 1F469 1F3FC ; RGI_Emoji_ZWJ_Sequence ; women holding hands: dark skin tone, medium-light skin tone # E12.0 [1] (👩🏿🤝👩🏼) +1F469 1F3FF 200D 1F91D 200D 1F469 1F3FD ; RGI_Emoji_ZWJ_Sequence ; women holding hands: dark skin tone, medium skin tone # E12.0 [1] (👩🏿🤝👩🏽) +1F469 1F3FF 200D 1F91D 200D 1F469 1F3FE ; RGI_Emoji_ZWJ_Sequence ; women holding hands: dark skin tone, medium-dark skin tone # E12.0 [1] (👩🏿🤝👩🏾) +1F9D1 200D 1F91D 200D 1F9D1 ; RGI_Emoji_ZWJ_Sequence ; people holding hands # E12.0 [1] (🧑🤝🧑) +1F9D1 200D 1F9D1 200D 1F9D2 ; RGI_Emoji_ZWJ_Sequence ; family: adult, adult, child # E15.1 [1] (🧑🧑🧒) +1F9D1 200D 1F9D1 200D 1F9D2 200D 1F9D2 ; RGI_Emoji_ZWJ_Sequence ; family: adult, adult, child, child # E15.1 [1] (🧑🧑🧒🧒) +1F9D1 200D 1F9D2 ; RGI_Emoji_ZWJ_Sequence ; family: adult, child # E15.1 [1] (🧑🧒) +1F9D1 200D 1F9D2 200D 1F9D2 ; RGI_Emoji_ZWJ_Sequence ; family: adult, child, child # E15.1 [1] (🧑🧒🧒) +1F9D1 1F3FB 200D 2764 FE0F 200D 1F48B 200D 1F9D1 1F3FC; RGI_Emoji_ZWJ_Sequence; kiss: person, person, light skin tone, medium-light skin tone #E13.1[1] (🧑🏻❤️💋🧑🏼) +1F9D1 1F3FB 200D 2764 FE0F 200D 1F48B 200D 1F9D1 1F3FD; RGI_Emoji_ZWJ_Sequence; kiss: person, person, light skin tone, medium skin tone #E13.1 [1] (🧑🏻❤️💋🧑🏽) +1F9D1 1F3FB 200D 2764 FE0F 200D 1F48B 200D 1F9D1 1F3FE; RGI_Emoji_ZWJ_Sequence; kiss: person, person, light skin tone, medium-dark skin tone #E13.1[1] (🧑🏻❤️💋🧑🏾) +1F9D1 1F3FB 200D 2764 FE0F 200D 1F48B 200D 1F9D1 1F3FF; RGI_Emoji_ZWJ_Sequence; kiss: person, person, light skin tone, dark skin tone # E13.1 [1] (🧑🏻❤️💋🧑🏿) +1F9D1 1F3FB 200D 2764 FE0F 200D 1F9D1 1F3FC ; RGI_Emoji_ZWJ_Sequence ; couple with heart: person, person, light skin tone, medium-light skin tone #E13.1[1] (🧑🏻❤️🧑🏼) +1F9D1 1F3FB 200D 2764 FE0F 200D 1F9D1 1F3FD ; RGI_Emoji_ZWJ_Sequence ; couple with heart: person, person, light skin tone, medium skin tone #E13.1[1] (🧑🏻❤️🧑🏽) +1F9D1 1F3FB 200D 2764 FE0F 200D 1F9D1 1F3FE ; RGI_Emoji_ZWJ_Sequence ; couple with heart: person, person, light skin tone, medium-dark skin tone #E13.1[1] (🧑🏻❤️🧑🏾) +1F9D1 1F3FB 200D 2764 FE0F 200D 1F9D1 1F3FF ; RGI_Emoji_ZWJ_Sequence ; couple with heart: person, person, light skin tone, dark skin tone #E13.1[1] (🧑🏻❤️🧑🏿) +1F9D1 1F3FB 200D 1F91D 200D 1F9D1 1F3FB ; RGI_Emoji_ZWJ_Sequence ; people holding hands: light skin tone # E12.0 [1] (🧑🏻🤝🧑🏻) +1F9D1 1F3FB 200D 1F91D 200D 1F9D1 1F3FC ; RGI_Emoji_ZWJ_Sequence ; people holding hands: light skin tone, medium-light skin tone # E12.1 [1] (🧑🏻🤝🧑🏼) +1F9D1 1F3FB 200D 1F91D 200D 1F9D1 1F3FD ; RGI_Emoji_ZWJ_Sequence ; people holding hands: light skin tone, medium skin tone # E12.1 [1] (🧑🏻🤝🧑🏽) +1F9D1 1F3FB 200D 1F91D 200D 1F9D1 1F3FE ; RGI_Emoji_ZWJ_Sequence ; people holding hands: light skin tone, medium-dark skin tone # E12.1 [1] (🧑🏻🤝🧑🏾) +1F9D1 1F3FB 200D 1F91D 200D 1F9D1 1F3FF ; RGI_Emoji_ZWJ_Sequence ; people holding hands: light skin tone, dark skin tone # E12.1 [1] (🧑🏻🤝🧑🏿) +1F9D1 1F3FC 200D 2764 FE0F 200D 1F48B 200D 1F9D1 1F3FB; RGI_Emoji_ZWJ_Sequence; kiss: person, person, medium-light skin tone, light skin tone #E13.1[1] (🧑🏼❤️💋🧑🏻) +1F9D1 1F3FC 200D 2764 FE0F 200D 1F48B 200D 1F9D1 1F3FD; RGI_Emoji_ZWJ_Sequence; kiss: person, person, medium-light skin tone, medium skin tone #E13.1[1] (🧑🏼❤️💋🧑🏽) +1F9D1 1F3FC 200D 2764 FE0F 200D 1F48B 200D 1F9D1 1F3FE; RGI_Emoji_ZWJ_Sequence; kiss: person, person, medium-light skin tone, medium-dark skin tone #E13.1[1] (🧑🏼❤️💋🧑🏾) +1F9D1 1F3FC 200D 2764 FE0F 200D 1F48B 200D 1F9D1 1F3FF; RGI_Emoji_ZWJ_Sequence; kiss: person, person, medium-light skin tone, dark skin tone #E13.1[1] (🧑🏼❤️💋🧑🏿) +1F9D1 1F3FC 200D 2764 FE0F 200D 1F9D1 1F3FB ; RGI_Emoji_ZWJ_Sequence ; couple with heart: person, person, medium-light skin tone, light skin tone #E13.1[1] (🧑🏼❤️🧑🏻) +1F9D1 1F3FC 200D 2764 FE0F 200D 1F9D1 1F3FD ; RGI_Emoji_ZWJ_Sequence ; couple with heart: person, person, medium-light skin tone, medium skin tone #E13.1[1] (🧑🏼❤️🧑🏽) +1F9D1 1F3FC 200D 2764 FE0F 200D 1F9D1 1F3FE ; RGI_Emoji_ZWJ_Sequence ; couple with heart: person, person, medium-light skin tone, medium-dark skin tone #E13.1[1] (🧑🏼❤️🧑🏾) +1F9D1 1F3FC 200D 2764 FE0F 200D 1F9D1 1F3FF ; RGI_Emoji_ZWJ_Sequence ; couple with heart: person, person, medium-light skin tone, dark skin tone #E13.1[1] (🧑🏼❤️🧑🏿) +1F9D1 1F3FC 200D 1F91D 200D 1F9D1 1F3FB ; RGI_Emoji_ZWJ_Sequence ; people holding hands: medium-light skin tone, light skin tone # E12.0 [1] (🧑🏼🤝🧑🏻) +1F9D1 1F3FC 200D 1F91D 200D 1F9D1 1F3FC ; RGI_Emoji_ZWJ_Sequence ; people holding hands: medium-light skin tone # E12.0 [1] (🧑🏼🤝🧑🏼) +1F9D1 1F3FC 200D 1F91D 200D 1F9D1 1F3FD ; RGI_Emoji_ZWJ_Sequence ; people holding hands: medium-light skin tone, medium skin tone # E12.1 [1] (🧑🏼🤝🧑🏽) +1F9D1 1F3FC 200D 1F91D 200D 1F9D1 1F3FE ; RGI_Emoji_ZWJ_Sequence ; people holding hands: medium-light skin tone, medium-dark skin tone #E12.1[1] (🧑🏼🤝🧑🏾) +1F9D1 1F3FC 200D 1F91D 200D 1F9D1 1F3FF ; RGI_Emoji_ZWJ_Sequence ; people holding hands: medium-light skin tone, dark skin tone # E12.1 [1] (🧑🏼🤝🧑🏿) +1F9D1 1F3FD 200D 2764 FE0F 200D 1F48B 200D 1F9D1 1F3FB; RGI_Emoji_ZWJ_Sequence; kiss: person, person, medium skin tone, light skin tone #E13.1 [1] (🧑🏽❤️💋🧑🏻) +1F9D1 1F3FD 200D 2764 FE0F 200D 1F48B 200D 1F9D1 1F3FC; RGI_Emoji_ZWJ_Sequence; kiss: person, person, medium skin tone, medium-light skin tone #E13.1[1] (🧑🏽❤️💋🧑🏼) +1F9D1 1F3FD 200D 2764 FE0F 200D 1F48B 200D 1F9D1 1F3FE; RGI_Emoji_ZWJ_Sequence; kiss: person, person, medium skin tone, medium-dark skin tone #E13.1[1] (🧑🏽❤️💋🧑🏾) +1F9D1 1F3FD 200D 2764 FE0F 200D 1F48B 200D 1F9D1 1F3FF; RGI_Emoji_ZWJ_Sequence; kiss: person, person, medium skin tone, dark skin tone # E13.1 [1] (🧑🏽❤️💋🧑🏿) +1F9D1 1F3FD 200D 2764 FE0F 200D 1F9D1 1F3FB ; RGI_Emoji_ZWJ_Sequence ; couple with heart: person, person, medium skin tone, light skin tone #E13.1[1] (🧑🏽❤️🧑🏻) +1F9D1 1F3FD 200D 2764 FE0F 200D 1F9D1 1F3FC ; RGI_Emoji_ZWJ_Sequence ; couple with heart: person, person, medium skin tone, medium-light skin tone #E13.1[1] (🧑🏽❤️🧑🏼) +1F9D1 1F3FD 200D 2764 FE0F 200D 1F9D1 1F3FE ; RGI_Emoji_ZWJ_Sequence ; couple with heart: person, person, medium skin tone, medium-dark skin tone #E13.1[1] (🧑🏽❤️🧑🏾) +1F9D1 1F3FD 200D 2764 FE0F 200D 1F9D1 1F3FF ; RGI_Emoji_ZWJ_Sequence ; couple with heart: person, person, medium skin tone, dark skin tone #E13.1[1] (🧑🏽❤️🧑🏿) +1F9D1 1F3FD 200D 1F91D 200D 1F9D1 1F3FB ; RGI_Emoji_ZWJ_Sequence ; people holding hands: medium skin tone, light skin tone # E12.0 [1] (🧑🏽🤝🧑🏻) +1F9D1 1F3FD 200D 1F91D 200D 1F9D1 1F3FC ; RGI_Emoji_ZWJ_Sequence ; people holding hands: medium skin tone, medium-light skin tone # E12.0 [1] (🧑🏽🤝🧑🏼) +1F9D1 1F3FD 200D 1F91D 200D 1F9D1 1F3FD ; RGI_Emoji_ZWJ_Sequence ; people holding hands: medium skin tone # E12.0 [1] (🧑🏽🤝🧑🏽) +1F9D1 1F3FD 200D 1F91D 200D 1F9D1 1F3FE ; RGI_Emoji_ZWJ_Sequence ; people holding hands: medium skin tone, medium-dark skin tone # E12.1 [1] (🧑🏽🤝🧑🏾) +1F9D1 1F3FD 200D 1F91D 200D 1F9D1 1F3FF ; RGI_Emoji_ZWJ_Sequence ; people holding hands: medium skin tone, dark skin tone # E12.1 [1] (🧑🏽🤝🧑🏿) +1F9D1 1F3FE 200D 2764 FE0F 200D 1F48B 200D 1F9D1 1F3FB; RGI_Emoji_ZWJ_Sequence; kiss: person, person, medium-dark skin tone, light skin tone #E13.1[1] (🧑🏾❤️💋🧑🏻) +1F9D1 1F3FE 200D 2764 FE0F 200D 1F48B 200D 1F9D1 1F3FC; RGI_Emoji_ZWJ_Sequence; kiss: person, person, medium-dark skin tone, medium-light skin tone #E13.1[1] (🧑🏾❤️💋🧑🏼) +1F9D1 1F3FE 200D 2764 FE0F 200D 1F48B 200D 1F9D1 1F3FD; RGI_Emoji_ZWJ_Sequence; kiss: person, person, medium-dark skin tone, medium skin tone #E13.1[1] (🧑🏾❤️💋🧑🏽) +1F9D1 1F3FE 200D 2764 FE0F 200D 1F48B 200D 1F9D1 1F3FF; RGI_Emoji_ZWJ_Sequence; kiss: person, person, medium-dark skin tone, dark skin tone #E13.1[1] (🧑🏾❤️💋🧑🏿) +1F9D1 1F3FE 200D 2764 FE0F 200D 1F9D1 1F3FB ; RGI_Emoji_ZWJ_Sequence ; couple with heart: person, person, medium-dark skin tone, light skin tone #E13.1[1] (🧑🏾❤️🧑🏻) +1F9D1 1F3FE 200D 2764 FE0F 200D 1F9D1 1F3FC ; RGI_Emoji_ZWJ_Sequence ; couple with heart: person, person, medium-dark skin tone, medium-light skin tone #E13.1[1] (🧑🏾❤️🧑🏼) +1F9D1 1F3FE 200D 2764 FE0F 200D 1F9D1 1F3FD ; RGI_Emoji_ZWJ_Sequence ; couple with heart: person, person, medium-dark skin tone, medium skin tone #E13.1[1] (🧑🏾❤️🧑🏽) +1F9D1 1F3FE 200D 2764 FE0F 200D 1F9D1 1F3FF ; RGI_Emoji_ZWJ_Sequence ; couple with heart: person, person, medium-dark skin tone, dark skin tone #E13.1[1] (🧑🏾❤️🧑🏿) +1F9D1 1F3FE 200D 1F91D 200D 1F9D1 1F3FB ; RGI_Emoji_ZWJ_Sequence ; people holding hands: medium-dark skin tone, light skin tone # E12.0 [1] (🧑🏾🤝🧑🏻) +1F9D1 1F3FE 200D 1F91D 200D 1F9D1 1F3FC ; RGI_Emoji_ZWJ_Sequence ; people holding hands: medium-dark skin tone, medium-light skin tone #E12.0[1] (🧑🏾🤝🧑🏼) +1F9D1 1F3FE 200D 1F91D 200D 1F9D1 1F3FD ; RGI_Emoji_ZWJ_Sequence ; people holding hands: medium-dark skin tone, medium skin tone # E12.0 [1] (🧑🏾🤝🧑🏽) +1F9D1 1F3FE 200D 1F91D 200D 1F9D1 1F3FE ; RGI_Emoji_ZWJ_Sequence ; people holding hands: medium-dark skin tone # E12.0 [1] (🧑🏾🤝🧑🏾) +1F9D1 1F3FE 200D 1F91D 200D 1F9D1 1F3FF ; RGI_Emoji_ZWJ_Sequence ; people holding hands: medium-dark skin tone, dark skin tone # E12.1 [1] (🧑🏾🤝🧑🏿) +1F9D1 1F3FF 200D 2764 FE0F 200D 1F48B 200D 1F9D1 1F3FB; RGI_Emoji_ZWJ_Sequence; kiss: person, person, dark skin tone, light skin tone # E13.1 [1] (🧑🏿❤️💋🧑🏻) +1F9D1 1F3FF 200D 2764 FE0F 200D 1F48B 200D 1F9D1 1F3FC; RGI_Emoji_ZWJ_Sequence; kiss: person, person, dark skin tone, medium-light skin tone #E13.1[1] (🧑🏿❤️💋🧑🏼) +1F9D1 1F3FF 200D 2764 FE0F 200D 1F48B 200D 1F9D1 1F3FD; RGI_Emoji_ZWJ_Sequence; kiss: person, person, dark skin tone, medium skin tone # E13.1 [1] (🧑🏿❤️💋🧑🏽) +1F9D1 1F3FF 200D 2764 FE0F 200D 1F48B 200D 1F9D1 1F3FE; RGI_Emoji_ZWJ_Sequence; kiss: person, person, dark skin tone, medium-dark skin tone #E13.1[1] (🧑🏿❤️💋🧑🏾) +1F9D1 1F3FF 200D 2764 FE0F 200D 1F9D1 1F3FB ; RGI_Emoji_ZWJ_Sequence ; couple with heart: person, person, dark skin tone, light skin tone #E13.1[1] (🧑🏿❤️🧑🏻) +1F9D1 1F3FF 200D 2764 FE0F 200D 1F9D1 1F3FC ; RGI_Emoji_ZWJ_Sequence ; couple with heart: person, person, dark skin tone, medium-light skin tone #E13.1[1] (🧑🏿❤️🧑🏼) +1F9D1 1F3FF 200D 2764 FE0F 200D 1F9D1 1F3FD ; RGI_Emoji_ZWJ_Sequence ; couple with heart: person, person, dark skin tone, medium skin tone #E13.1[1] (🧑🏿❤️🧑🏽) +1F9D1 1F3FF 200D 2764 FE0F 200D 1F9D1 1F3FE ; RGI_Emoji_ZWJ_Sequence ; couple with heart: person, person, dark skin tone, medium-dark skin tone #E13.1[1] (🧑🏿❤️🧑🏾) +1F9D1 1F3FF 200D 1F91D 200D 1F9D1 1F3FB ; RGI_Emoji_ZWJ_Sequence ; people holding hands: dark skin tone, light skin tone # E12.0 [1] (🧑🏿🤝🧑🏻) +1F9D1 1F3FF 200D 1F91D 200D 1F9D1 1F3FC ; RGI_Emoji_ZWJ_Sequence ; people holding hands: dark skin tone, medium-light skin tone # E12.0 [1] (🧑🏿🤝🧑🏼) +1F9D1 1F3FF 200D 1F91D 200D 1F9D1 1F3FD ; RGI_Emoji_ZWJ_Sequence ; people holding hands: dark skin tone, medium skin tone # E12.0 [1] (🧑🏿🤝🧑🏽) +1F9D1 1F3FF 200D 1F91D 200D 1F9D1 1F3FE ; RGI_Emoji_ZWJ_Sequence ; people holding hands: dark skin tone, medium-dark skin tone # E12.0 [1] (🧑🏿🤝🧑🏾) +1F9D1 1F3FF 200D 1F91D 200D 1F9D1 1F3FF ; RGI_Emoji_ZWJ_Sequence ; people holding hands: dark skin tone # E12.0 [1] (🧑🏿🤝🧑🏿) +1FAF1 1F3FB 200D 1FAF2 1F3FC ; RGI_Emoji_ZWJ_Sequence ; handshake: light skin tone, medium-light skin tone # E14.0 [1] (🫱🏻🫲🏼) +1FAF1 1F3FB 200D 1FAF2 1F3FD ; RGI_Emoji_ZWJ_Sequence ; handshake: light skin tone, medium skin tone # E14.0 [1] (🫱🏻🫲🏽) +1FAF1 1F3FB 200D 1FAF2 1F3FE ; RGI_Emoji_ZWJ_Sequence ; handshake: light skin tone, medium-dark skin tone # E14.0 [1] (🫱🏻🫲🏾) +1FAF1 1F3FB 200D 1FAF2 1F3FF ; RGI_Emoji_ZWJ_Sequence ; handshake: light skin tone, dark skin tone # E14.0 [1] (🫱🏻🫲🏿) +1FAF1 1F3FC 200D 1FAF2 1F3FB ; RGI_Emoji_ZWJ_Sequence ; handshake: medium-light skin tone, light skin tone # E14.0 [1] (🫱🏼🫲🏻) +1FAF1 1F3FC 200D 1FAF2 1F3FD ; RGI_Emoji_ZWJ_Sequence ; handshake: medium-light skin tone, medium skin tone # E14.0 [1] (🫱🏼🫲🏽) +1FAF1 1F3FC 200D 1FAF2 1F3FE ; RGI_Emoji_ZWJ_Sequence ; handshake: medium-light skin tone, medium-dark skin tone # E14.0 [1] (🫱🏼🫲🏾) +1FAF1 1F3FC 200D 1FAF2 1F3FF ; RGI_Emoji_ZWJ_Sequence ; handshake: medium-light skin tone, dark skin tone # E14.0 [1] (🫱🏼🫲🏿) +1FAF1 1F3FD 200D 1FAF2 1F3FB ; RGI_Emoji_ZWJ_Sequence ; handshake: medium skin tone, light skin tone # E14.0 [1] (🫱🏽🫲🏻) +1FAF1 1F3FD 200D 1FAF2 1F3FC ; RGI_Emoji_ZWJ_Sequence ; handshake: medium skin tone, medium-light skin tone # E14.0 [1] (🫱🏽🫲🏼) +1FAF1 1F3FD 200D 1FAF2 1F3FE ; RGI_Emoji_ZWJ_Sequence ; handshake: medium skin tone, medium-dark skin tone # E14.0 [1] (🫱🏽🫲🏾) +1FAF1 1F3FD 200D 1FAF2 1F3FF ; RGI_Emoji_ZWJ_Sequence ; handshake: medium skin tone, dark skin tone # E14.0 [1] (🫱🏽🫲🏿) +1FAF1 1F3FE 200D 1FAF2 1F3FB ; RGI_Emoji_ZWJ_Sequence ; handshake: medium-dark skin tone, light skin tone # E14.0 [1] (🫱🏾🫲🏻) +1FAF1 1F3FE 200D 1FAF2 1F3FC ; RGI_Emoji_ZWJ_Sequence ; handshake: medium-dark skin tone, medium-light skin tone # E14.0 [1] (🫱🏾🫲🏼) +1FAF1 1F3FE 200D 1FAF2 1F3FD ; RGI_Emoji_ZWJ_Sequence ; handshake: medium-dark skin tone, medium skin tone # E14.0 [1] (🫱🏾🫲🏽) +1FAF1 1F3FE 200D 1FAF2 1F3FF ; RGI_Emoji_ZWJ_Sequence ; handshake: medium-dark skin tone, dark skin tone # E14.0 [1] (🫱🏾🫲🏿) +1FAF1 1F3FF 200D 1FAF2 1F3FB ; RGI_Emoji_ZWJ_Sequence ; handshake: dark skin tone, light skin tone # E14.0 [1] (🫱🏿🫲🏻) +1FAF1 1F3FF 200D 1FAF2 1F3FC ; RGI_Emoji_ZWJ_Sequence ; handshake: dark skin tone, medium-light skin tone # E14.0 [1] (🫱🏿🫲🏼) +1FAF1 1F3FF 200D 1FAF2 1F3FD ; RGI_Emoji_ZWJ_Sequence ; handshake: dark skin tone, medium skin tone # E14.0 [1] (🫱🏿🫲🏽) +1FAF1 1F3FF 200D 1FAF2 1F3FE ; RGI_Emoji_ZWJ_Sequence ; handshake: dark skin tone, medium-dark skin tone # E14.0 [1] (🫱🏿🫲🏾) + +# Total elements: 331 + +# ================================================ + +# RGI_Emoji_ZWJ_Sequence: Role + +1F3C3 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person running facing right # E15.1 [1] (🏃➡️) +1F3C3 1F3FB 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person running facing right: light skin tone # E15.1 [1] (🏃🏻➡️) +1F3C3 1F3FC 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person running facing right: medium-light skin tone # E15.1 [1] (🏃🏼➡️) +1F3C3 1F3FD 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person running facing right: medium skin tone # E15.1 [1] (🏃🏽➡️) +1F3C3 1F3FE 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person running facing right: medium-dark skin tone # E15.1 [1] (🏃🏾➡️) +1F3C3 1F3FF 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person running facing right: dark skin tone # E15.1 [1] (🏃🏿➡️) +1F468 200D 2695 FE0F ; RGI_Emoji_ZWJ_Sequence ; man health worker # E4.0 [1] (👨⚕️) +1F468 200D 2696 FE0F ; RGI_Emoji_ZWJ_Sequence ; man judge # E4.0 [1] (👨⚖️) +1F468 200D 2708 FE0F ; RGI_Emoji_ZWJ_Sequence ; man pilot # E4.0 [1] (👨✈️) +1F468 200D 1F33E ; RGI_Emoji_ZWJ_Sequence ; man farmer # E4.0 [1] (👨🌾) +1F468 200D 1F373 ; RGI_Emoji_ZWJ_Sequence ; man cook # E4.0 [1] (👨🍳) +1F468 200D 1F37C ; RGI_Emoji_ZWJ_Sequence ; man feeding baby # E13.0 [1] (👨🍼) +1F468 200D 1F393 ; RGI_Emoji_ZWJ_Sequence ; man student # E4.0 [1] (👨🎓) +1F468 200D 1F3A4 ; RGI_Emoji_ZWJ_Sequence ; man singer # E4.0 [1] (👨🎤) +1F468 200D 1F3A8 ; RGI_Emoji_ZWJ_Sequence ; man artist # E4.0 [1] (👨🎨) +1F468 200D 1F3EB ; RGI_Emoji_ZWJ_Sequence ; man teacher # E4.0 [1] (👨🏫) +1F468 200D 1F3ED ; RGI_Emoji_ZWJ_Sequence ; man factory worker # E4.0 [1] (👨🏭) +1F468 200D 1F4BB ; RGI_Emoji_ZWJ_Sequence ; man technologist # E4.0 [1] (👨💻) +1F468 200D 1F4BC ; RGI_Emoji_ZWJ_Sequence ; man office worker # E4.0 [1] (👨💼) +1F468 200D 1F527 ; RGI_Emoji_ZWJ_Sequence ; man mechanic # E4.0 [1] (👨🔧) +1F468 200D 1F52C ; RGI_Emoji_ZWJ_Sequence ; man scientist # E4.0 [1] (👨🔬) +1F468 200D 1F680 ; RGI_Emoji_ZWJ_Sequence ; man astronaut # E4.0 [1] (👨🚀) +1F468 200D 1F692 ; RGI_Emoji_ZWJ_Sequence ; man firefighter # E4.0 [1] (👨🚒) +1F468 200D 1F9AF ; RGI_Emoji_ZWJ_Sequence ; man with white cane # E12.0 [1] (👨🦯) +1F468 200D 1F9AF 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; man with white cane facing right # E15.1 [1] (👨🦯➡️) +1F468 200D 1F9BC ; RGI_Emoji_ZWJ_Sequence ; man in motorized wheelchair # E12.0 [1] (👨🦼) +1F468 200D 1F9BC 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; man in motorized wheelchair facing right # E15.1 [1] (👨🦼➡️) +1F468 200D 1F9BD ; RGI_Emoji_ZWJ_Sequence ; man in manual wheelchair # E12.0 [1] (👨🦽) +1F468 200D 1F9BD 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; man in manual wheelchair facing right # E15.1 [1] (👨🦽➡️) +1F468 1F3FB 200D 2695 FE0F ; RGI_Emoji_ZWJ_Sequence ; man health worker: light skin tone # E4.0 [1] (👨🏻⚕️) +1F468 1F3FB 200D 2696 FE0F ; RGI_Emoji_ZWJ_Sequence ; man judge: light skin tone # E4.0 [1] (👨🏻⚖️) +1F468 1F3FB 200D 2708 FE0F ; RGI_Emoji_ZWJ_Sequence ; man pilot: light skin tone # E4.0 [1] (👨🏻✈️) +1F468 1F3FB 200D 1F33E ; RGI_Emoji_ZWJ_Sequence ; man farmer: light skin tone # E4.0 [1] (👨🏻🌾) +1F468 1F3FB 200D 1F373 ; RGI_Emoji_ZWJ_Sequence ; man cook: light skin tone # E4.0 [1] (👨🏻🍳) +1F468 1F3FB 200D 1F37C ; RGI_Emoji_ZWJ_Sequence ; man feeding baby: light skin tone # E13.0 [1] (👨🏻🍼) +1F468 1F3FB 200D 1F393 ; RGI_Emoji_ZWJ_Sequence ; man student: light skin tone # E4.0 [1] (👨🏻🎓) +1F468 1F3FB 200D 1F3A4 ; RGI_Emoji_ZWJ_Sequence ; man singer: light skin tone # E4.0 [1] (👨🏻🎤) +1F468 1F3FB 200D 1F3A8 ; RGI_Emoji_ZWJ_Sequence ; man artist: light skin tone # E4.0 [1] (👨🏻🎨) +1F468 1F3FB 200D 1F3EB ; RGI_Emoji_ZWJ_Sequence ; man teacher: light skin tone # E4.0 [1] (👨🏻🏫) +1F468 1F3FB 200D 1F3ED ; RGI_Emoji_ZWJ_Sequence ; man factory worker: light skin tone # E4.0 [1] (👨🏻🏭) +1F468 1F3FB 200D 1F4BB ; RGI_Emoji_ZWJ_Sequence ; man technologist: light skin tone # E4.0 [1] (👨🏻💻) +1F468 1F3FB 200D 1F4BC ; RGI_Emoji_ZWJ_Sequence ; man office worker: light skin tone # E4.0 [1] (👨🏻💼) +1F468 1F3FB 200D 1F527 ; RGI_Emoji_ZWJ_Sequence ; man mechanic: light skin tone # E4.0 [1] (👨🏻🔧) +1F468 1F3FB 200D 1F52C ; RGI_Emoji_ZWJ_Sequence ; man scientist: light skin tone # E4.0 [1] (👨🏻🔬) +1F468 1F3FB 200D 1F680 ; RGI_Emoji_ZWJ_Sequence ; man astronaut: light skin tone # E4.0 [1] (👨🏻🚀) +1F468 1F3FB 200D 1F692 ; RGI_Emoji_ZWJ_Sequence ; man firefighter: light skin tone # E4.0 [1] (👨🏻🚒) +1F468 1F3FB 200D 1F9AF ; RGI_Emoji_ZWJ_Sequence ; man with white cane: light skin tone # E12.0 [1] (👨🏻🦯) +1F468 1F3FB 200D 1F9AF 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; man with white cane facing right: light skin tone # E15.1 [1] (👨🏻🦯➡️) +1F468 1F3FB 200D 1F9BC ; RGI_Emoji_ZWJ_Sequence ; man in motorized wheelchair: light skin tone # E12.0 [1] (👨🏻🦼) +1F468 1F3FB 200D 1F9BC 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; man in motorized wheelchair facing right: light skin tone # E15.1 [1] (👨🏻🦼➡️) +1F468 1F3FB 200D 1F9BD ; RGI_Emoji_ZWJ_Sequence ; man in manual wheelchair: light skin tone # E12.0 [1] (👨🏻🦽) +1F468 1F3FB 200D 1F9BD 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; man in manual wheelchair facing right: light skin tone # E15.1 [1] (👨🏻🦽➡️) +1F468 1F3FC 200D 2695 FE0F ; RGI_Emoji_ZWJ_Sequence ; man health worker: medium-light skin tone # E4.0 [1] (👨🏼⚕️) +1F468 1F3FC 200D 2696 FE0F ; RGI_Emoji_ZWJ_Sequence ; man judge: medium-light skin tone # E4.0 [1] (👨🏼⚖️) +1F468 1F3FC 200D 2708 FE0F ; RGI_Emoji_ZWJ_Sequence ; man pilot: medium-light skin tone # E4.0 [1] (👨🏼✈️) +1F468 1F3FC 200D 1F33E ; RGI_Emoji_ZWJ_Sequence ; man farmer: medium-light skin tone # E4.0 [1] (👨🏼🌾) +1F468 1F3FC 200D 1F373 ; RGI_Emoji_ZWJ_Sequence ; man cook: medium-light skin tone # E4.0 [1] (👨🏼🍳) +1F468 1F3FC 200D 1F37C ; RGI_Emoji_ZWJ_Sequence ; man feeding baby: medium-light skin tone # E13.0 [1] (👨🏼🍼) +1F468 1F3FC 200D 1F393 ; RGI_Emoji_ZWJ_Sequence ; man student: medium-light skin tone # E4.0 [1] (👨🏼🎓) +1F468 1F3FC 200D 1F3A4 ; RGI_Emoji_ZWJ_Sequence ; man singer: medium-light skin tone # E4.0 [1] (👨🏼🎤) +1F468 1F3FC 200D 1F3A8 ; RGI_Emoji_ZWJ_Sequence ; man artist: medium-light skin tone # E4.0 [1] (👨🏼🎨) +1F468 1F3FC 200D 1F3EB ; RGI_Emoji_ZWJ_Sequence ; man teacher: medium-light skin tone # E4.0 [1] (👨🏼🏫) +1F468 1F3FC 200D 1F3ED ; RGI_Emoji_ZWJ_Sequence ; man factory worker: medium-light skin tone # E4.0 [1] (👨🏼🏭) +1F468 1F3FC 200D 1F4BB ; RGI_Emoji_ZWJ_Sequence ; man technologist: medium-light skin tone # E4.0 [1] (👨🏼💻) +1F468 1F3FC 200D 1F4BC ; RGI_Emoji_ZWJ_Sequence ; man office worker: medium-light skin tone # E4.0 [1] (👨🏼💼) +1F468 1F3FC 200D 1F527 ; RGI_Emoji_ZWJ_Sequence ; man mechanic: medium-light skin tone # E4.0 [1] (👨🏼🔧) +1F468 1F3FC 200D 1F52C ; RGI_Emoji_ZWJ_Sequence ; man scientist: medium-light skin tone # E4.0 [1] (👨🏼🔬) +1F468 1F3FC 200D 1F680 ; RGI_Emoji_ZWJ_Sequence ; man astronaut: medium-light skin tone # E4.0 [1] (👨🏼🚀) +1F468 1F3FC 200D 1F692 ; RGI_Emoji_ZWJ_Sequence ; man firefighter: medium-light skin tone # E4.0 [1] (👨🏼🚒) +1F468 1F3FC 200D 1F9AF ; RGI_Emoji_ZWJ_Sequence ; man with white cane: medium-light skin tone # E12.0 [1] (👨🏼🦯) +1F468 1F3FC 200D 1F9AF 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; man with white cane facing right: medium-light skin tone # E15.1 [1] (👨🏼🦯➡️) +1F468 1F3FC 200D 1F9BC ; RGI_Emoji_ZWJ_Sequence ; man in motorized wheelchair: medium-light skin tone # E12.0 [1] (👨🏼🦼) +1F468 1F3FC 200D 1F9BC 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; man in motorized wheelchair facing right: medium-light skin tone #E15.1 [1] (👨🏼🦼➡️) +1F468 1F3FC 200D 1F9BD ; RGI_Emoji_ZWJ_Sequence ; man in manual wheelchair: medium-light skin tone # E12.0 [1] (👨🏼🦽) +1F468 1F3FC 200D 1F9BD 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; man in manual wheelchair facing right: medium-light skin tone # E15.1 [1] (👨🏼🦽➡️) +1F468 1F3FD 200D 2695 FE0F ; RGI_Emoji_ZWJ_Sequence ; man health worker: medium skin tone # E4.0 [1] (👨🏽⚕️) +1F468 1F3FD 200D 2696 FE0F ; RGI_Emoji_ZWJ_Sequence ; man judge: medium skin tone # E4.0 [1] (👨🏽⚖️) +1F468 1F3FD 200D 2708 FE0F ; RGI_Emoji_ZWJ_Sequence ; man pilot: medium skin tone # E4.0 [1] (👨🏽✈️) +1F468 1F3FD 200D 1F33E ; RGI_Emoji_ZWJ_Sequence ; man farmer: medium skin tone # E4.0 [1] (👨🏽🌾) +1F468 1F3FD 200D 1F373 ; RGI_Emoji_ZWJ_Sequence ; man cook: medium skin tone # E4.0 [1] (👨🏽🍳) +1F468 1F3FD 200D 1F37C ; RGI_Emoji_ZWJ_Sequence ; man feeding baby: medium skin tone # E13.0 [1] (👨🏽🍼) +1F468 1F3FD 200D 1F393 ; RGI_Emoji_ZWJ_Sequence ; man student: medium skin tone # E4.0 [1] (👨🏽🎓) +1F468 1F3FD 200D 1F3A4 ; RGI_Emoji_ZWJ_Sequence ; man singer: medium skin tone # E4.0 [1] (👨🏽🎤) +1F468 1F3FD 200D 1F3A8 ; RGI_Emoji_ZWJ_Sequence ; man artist: medium skin tone # E4.0 [1] (👨🏽🎨) +1F468 1F3FD 200D 1F3EB ; RGI_Emoji_ZWJ_Sequence ; man teacher: medium skin tone # E4.0 [1] (👨🏽🏫) +1F468 1F3FD 200D 1F3ED ; RGI_Emoji_ZWJ_Sequence ; man factory worker: medium skin tone # E4.0 [1] (👨🏽🏭) +1F468 1F3FD 200D 1F4BB ; RGI_Emoji_ZWJ_Sequence ; man technologist: medium skin tone # E4.0 [1] (👨🏽💻) +1F468 1F3FD 200D 1F4BC ; RGI_Emoji_ZWJ_Sequence ; man office worker: medium skin tone # E4.0 [1] (👨🏽💼) +1F468 1F3FD 200D 1F527 ; RGI_Emoji_ZWJ_Sequence ; man mechanic: medium skin tone # E4.0 [1] (👨🏽🔧) +1F468 1F3FD 200D 1F52C ; RGI_Emoji_ZWJ_Sequence ; man scientist: medium skin tone # E4.0 [1] (👨🏽🔬) +1F468 1F3FD 200D 1F680 ; RGI_Emoji_ZWJ_Sequence ; man astronaut: medium skin tone # E4.0 [1] (👨🏽🚀) +1F468 1F3FD 200D 1F692 ; RGI_Emoji_ZWJ_Sequence ; man firefighter: medium skin tone # E4.0 [1] (👨🏽🚒) +1F468 1F3FD 200D 1F9AF ; RGI_Emoji_ZWJ_Sequence ; man with white cane: medium skin tone # E12.0 [1] (👨🏽🦯) +1F468 1F3FD 200D 1F9AF 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; man with white cane facing right: medium skin tone # E15.1 [1] (👨🏽🦯➡️) +1F468 1F3FD 200D 1F9BC ; RGI_Emoji_ZWJ_Sequence ; man in motorized wheelchair: medium skin tone # E12.0 [1] (👨🏽🦼) +1F468 1F3FD 200D 1F9BC 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; man in motorized wheelchair facing right: medium skin tone # E15.1 [1] (👨🏽🦼➡️) +1F468 1F3FD 200D 1F9BD ; RGI_Emoji_ZWJ_Sequence ; man in manual wheelchair: medium skin tone # E12.0 [1] (👨🏽🦽) +1F468 1F3FD 200D 1F9BD 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; man in manual wheelchair facing right: medium skin tone # E15.1 [1] (👨🏽🦽➡️) +1F468 1F3FE 200D 2695 FE0F ; RGI_Emoji_ZWJ_Sequence ; man health worker: medium-dark skin tone # E4.0 [1] (👨🏾⚕️) +1F468 1F3FE 200D 2696 FE0F ; RGI_Emoji_ZWJ_Sequence ; man judge: medium-dark skin tone # E4.0 [1] (👨🏾⚖️) +1F468 1F3FE 200D 2708 FE0F ; RGI_Emoji_ZWJ_Sequence ; man pilot: medium-dark skin tone # E4.0 [1] (👨🏾✈️) +1F468 1F3FE 200D 1F33E ; RGI_Emoji_ZWJ_Sequence ; man farmer: medium-dark skin tone # E4.0 [1] (👨🏾🌾) +1F468 1F3FE 200D 1F373 ; RGI_Emoji_ZWJ_Sequence ; man cook: medium-dark skin tone # E4.0 [1] (👨🏾🍳) +1F468 1F3FE 200D 1F37C ; RGI_Emoji_ZWJ_Sequence ; man feeding baby: medium-dark skin tone # E13.0 [1] (👨🏾🍼) +1F468 1F3FE 200D 1F393 ; RGI_Emoji_ZWJ_Sequence ; man student: medium-dark skin tone # E4.0 [1] (👨🏾🎓) +1F468 1F3FE 200D 1F3A4 ; RGI_Emoji_ZWJ_Sequence ; man singer: medium-dark skin tone # E4.0 [1] (👨🏾🎤) +1F468 1F3FE 200D 1F3A8 ; RGI_Emoji_ZWJ_Sequence ; man artist: medium-dark skin tone # E4.0 [1] (👨🏾🎨) +1F468 1F3FE 200D 1F3EB ; RGI_Emoji_ZWJ_Sequence ; man teacher: medium-dark skin tone # E4.0 [1] (👨🏾🏫) +1F468 1F3FE 200D 1F3ED ; RGI_Emoji_ZWJ_Sequence ; man factory worker: medium-dark skin tone # E4.0 [1] (👨🏾🏭) +1F468 1F3FE 200D 1F4BB ; RGI_Emoji_ZWJ_Sequence ; man technologist: medium-dark skin tone # E4.0 [1] (👨🏾💻) +1F468 1F3FE 200D 1F4BC ; RGI_Emoji_ZWJ_Sequence ; man office worker: medium-dark skin tone # E4.0 [1] (👨🏾💼) +1F468 1F3FE 200D 1F527 ; RGI_Emoji_ZWJ_Sequence ; man mechanic: medium-dark skin tone # E4.0 [1] (👨🏾🔧) +1F468 1F3FE 200D 1F52C ; RGI_Emoji_ZWJ_Sequence ; man scientist: medium-dark skin tone # E4.0 [1] (👨🏾🔬) +1F468 1F3FE 200D 1F680 ; RGI_Emoji_ZWJ_Sequence ; man astronaut: medium-dark skin tone # E4.0 [1] (👨🏾🚀) +1F468 1F3FE 200D 1F692 ; RGI_Emoji_ZWJ_Sequence ; man firefighter: medium-dark skin tone # E4.0 [1] (👨🏾🚒) +1F468 1F3FE 200D 1F9AF ; RGI_Emoji_ZWJ_Sequence ; man with white cane: medium-dark skin tone # E12.0 [1] (👨🏾🦯) +1F468 1F3FE 200D 1F9AF 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; man with white cane facing right: medium-dark skin tone # E15.1 [1] (👨🏾🦯➡️) +1F468 1F3FE 200D 1F9BC ; RGI_Emoji_ZWJ_Sequence ; man in motorized wheelchair: medium-dark skin tone # E12.0 [1] (👨🏾🦼) +1F468 1F3FE 200D 1F9BC 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; man in motorized wheelchair facing right: medium-dark skin tone #E15.1 [1] (👨🏾🦼➡️) +1F468 1F3FE 200D 1F9BD ; RGI_Emoji_ZWJ_Sequence ; man in manual wheelchair: medium-dark skin tone # E12.0 [1] (👨🏾🦽) +1F468 1F3FE 200D 1F9BD 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; man in manual wheelchair facing right: medium-dark skin tone # E15.1 [1] (👨🏾🦽➡️) +1F468 1F3FF 200D 2695 FE0F ; RGI_Emoji_ZWJ_Sequence ; man health worker: dark skin tone # E4.0 [1] (👨🏿⚕️) +1F468 1F3FF 200D 2696 FE0F ; RGI_Emoji_ZWJ_Sequence ; man judge: dark skin tone # E4.0 [1] (👨🏿⚖️) +1F468 1F3FF 200D 2708 FE0F ; RGI_Emoji_ZWJ_Sequence ; man pilot: dark skin tone # E4.0 [1] (👨🏿✈️) +1F468 1F3FF 200D 1F33E ; RGI_Emoji_ZWJ_Sequence ; man farmer: dark skin tone # E4.0 [1] (👨🏿🌾) +1F468 1F3FF 200D 1F373 ; RGI_Emoji_ZWJ_Sequence ; man cook: dark skin tone # E4.0 [1] (👨🏿🍳) +1F468 1F3FF 200D 1F37C ; RGI_Emoji_ZWJ_Sequence ; man feeding baby: dark skin tone # E13.0 [1] (👨🏿🍼) +1F468 1F3FF 200D 1F393 ; RGI_Emoji_ZWJ_Sequence ; man student: dark skin tone # E4.0 [1] (👨🏿🎓) +1F468 1F3FF 200D 1F3A4 ; RGI_Emoji_ZWJ_Sequence ; man singer: dark skin tone # E4.0 [1] (👨🏿🎤) +1F468 1F3FF 200D 1F3A8 ; RGI_Emoji_ZWJ_Sequence ; man artist: dark skin tone # E4.0 [1] (👨🏿🎨) +1F468 1F3FF 200D 1F3EB ; RGI_Emoji_ZWJ_Sequence ; man teacher: dark skin tone # E4.0 [1] (👨🏿🏫) +1F468 1F3FF 200D 1F3ED ; RGI_Emoji_ZWJ_Sequence ; man factory worker: dark skin tone # E4.0 [1] (👨🏿🏭) +1F468 1F3FF 200D 1F4BB ; RGI_Emoji_ZWJ_Sequence ; man technologist: dark skin tone # E4.0 [1] (👨🏿💻) +1F468 1F3FF 200D 1F4BC ; RGI_Emoji_ZWJ_Sequence ; man office worker: dark skin tone # E4.0 [1] (👨🏿💼) +1F468 1F3FF 200D 1F527 ; RGI_Emoji_ZWJ_Sequence ; man mechanic: dark skin tone # E4.0 [1] (👨🏿🔧) +1F468 1F3FF 200D 1F52C ; RGI_Emoji_ZWJ_Sequence ; man scientist: dark skin tone # E4.0 [1] (👨🏿🔬) +1F468 1F3FF 200D 1F680 ; RGI_Emoji_ZWJ_Sequence ; man astronaut: dark skin tone # E4.0 [1] (👨🏿🚀) +1F468 1F3FF 200D 1F692 ; RGI_Emoji_ZWJ_Sequence ; man firefighter: dark skin tone # E4.0 [1] (👨🏿🚒) +1F468 1F3FF 200D 1F9AF ; RGI_Emoji_ZWJ_Sequence ; man with white cane: dark skin tone # E12.0 [1] (👨🏿🦯) +1F468 1F3FF 200D 1F9AF 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; man with white cane facing right: dark skin tone # E15.1 [1] (👨🏿🦯➡️) +1F468 1F3FF 200D 1F9BC ; RGI_Emoji_ZWJ_Sequence ; man in motorized wheelchair: dark skin tone # E12.0 [1] (👨🏿🦼) +1F468 1F3FF 200D 1F9BC 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; man in motorized wheelchair facing right: dark skin tone # E15.1 [1] (👨🏿🦼➡️) +1F468 1F3FF 200D 1F9BD ; RGI_Emoji_ZWJ_Sequence ; man in manual wheelchair: dark skin tone # E12.0 [1] (👨🏿🦽) +1F468 1F3FF 200D 1F9BD 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; man in manual wheelchair facing right: dark skin tone # E15.1 [1] (👨🏿🦽➡️) +1F469 200D 2695 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman health worker # E4.0 [1] (👩⚕️) +1F469 200D 2696 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman judge # E4.0 [1] (👩⚖️) +1F469 200D 2708 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman pilot # E4.0 [1] (👩✈️) +1F469 200D 1F33E ; RGI_Emoji_ZWJ_Sequence ; woman farmer # E4.0 [1] (👩🌾) +1F469 200D 1F373 ; RGI_Emoji_ZWJ_Sequence ; woman cook # E4.0 [1] (👩🍳) +1F469 200D 1F37C ; RGI_Emoji_ZWJ_Sequence ; woman feeding baby # E13.0 [1] (👩🍼) +1F469 200D 1F393 ; RGI_Emoji_ZWJ_Sequence ; woman student # E4.0 [1] (👩🎓) +1F469 200D 1F3A4 ; RGI_Emoji_ZWJ_Sequence ; woman singer # E4.0 [1] (👩🎤) +1F469 200D 1F3A8 ; RGI_Emoji_ZWJ_Sequence ; woman artist # E4.0 [1] (👩🎨) +1F469 200D 1F3EB ; RGI_Emoji_ZWJ_Sequence ; woman teacher # E4.0 [1] (👩🏫) +1F469 200D 1F3ED ; RGI_Emoji_ZWJ_Sequence ; woman factory worker # E4.0 [1] (👩🏭) +1F469 200D 1F4BB ; RGI_Emoji_ZWJ_Sequence ; woman technologist # E4.0 [1] (👩💻) +1F469 200D 1F4BC ; RGI_Emoji_ZWJ_Sequence ; woman office worker # E4.0 [1] (👩💼) +1F469 200D 1F527 ; RGI_Emoji_ZWJ_Sequence ; woman mechanic # E4.0 [1] (👩🔧) +1F469 200D 1F52C ; RGI_Emoji_ZWJ_Sequence ; woman scientist # E4.0 [1] (👩🔬) +1F469 200D 1F680 ; RGI_Emoji_ZWJ_Sequence ; woman astronaut # E4.0 [1] (👩🚀) +1F469 200D 1F692 ; RGI_Emoji_ZWJ_Sequence ; woman firefighter # E4.0 [1] (👩🚒) +1F469 200D 1F9AF ; RGI_Emoji_ZWJ_Sequence ; woman with white cane # E12.0 [1] (👩🦯) +1F469 200D 1F9AF 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman with white cane facing right # E15.1 [1] (👩🦯➡️) +1F469 200D 1F9BC ; RGI_Emoji_ZWJ_Sequence ; woman in motorized wheelchair # E12.0 [1] (👩🦼) +1F469 200D 1F9BC 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman in motorized wheelchair facing right # E15.1 [1] (👩🦼➡️) +1F469 200D 1F9BD ; RGI_Emoji_ZWJ_Sequence ; woman in manual wheelchair # E12.0 [1] (👩🦽) +1F469 200D 1F9BD 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman in manual wheelchair facing right # E15.1 [1] (👩🦽➡️) +1F469 1F3FB 200D 2695 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman health worker: light skin tone # E4.0 [1] (👩🏻⚕️) +1F469 1F3FB 200D 2696 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman judge: light skin tone # E4.0 [1] (👩🏻⚖️) +1F469 1F3FB 200D 2708 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman pilot: light skin tone # E4.0 [1] (👩🏻✈️) +1F469 1F3FB 200D 1F33E ; RGI_Emoji_ZWJ_Sequence ; woman farmer: light skin tone # E4.0 [1] (👩🏻🌾) +1F469 1F3FB 200D 1F373 ; RGI_Emoji_ZWJ_Sequence ; woman cook: light skin tone # E4.0 [1] (👩🏻🍳) +1F469 1F3FB 200D 1F37C ; RGI_Emoji_ZWJ_Sequence ; woman feeding baby: light skin tone # E13.0 [1] (👩🏻🍼) +1F469 1F3FB 200D 1F393 ; RGI_Emoji_ZWJ_Sequence ; woman student: light skin tone # E4.0 [1] (👩🏻🎓) +1F469 1F3FB 200D 1F3A4 ; RGI_Emoji_ZWJ_Sequence ; woman singer: light skin tone # E4.0 [1] (👩🏻🎤) +1F469 1F3FB 200D 1F3A8 ; RGI_Emoji_ZWJ_Sequence ; woman artist: light skin tone # E4.0 [1] (👩🏻🎨) +1F469 1F3FB 200D 1F3EB ; RGI_Emoji_ZWJ_Sequence ; woman teacher: light skin tone # E4.0 [1] (👩🏻🏫) +1F469 1F3FB 200D 1F3ED ; RGI_Emoji_ZWJ_Sequence ; woman factory worker: light skin tone # E4.0 [1] (👩🏻🏭) +1F469 1F3FB 200D 1F4BB ; RGI_Emoji_ZWJ_Sequence ; woman technologist: light skin tone # E4.0 [1] (👩🏻💻) +1F469 1F3FB 200D 1F4BC ; RGI_Emoji_ZWJ_Sequence ; woman office worker: light skin tone # E4.0 [1] (👩🏻💼) +1F469 1F3FB 200D 1F527 ; RGI_Emoji_ZWJ_Sequence ; woman mechanic: light skin tone # E4.0 [1] (👩🏻🔧) +1F469 1F3FB 200D 1F52C ; RGI_Emoji_ZWJ_Sequence ; woman scientist: light skin tone # E4.0 [1] (👩🏻🔬) +1F469 1F3FB 200D 1F680 ; RGI_Emoji_ZWJ_Sequence ; woman astronaut: light skin tone # E4.0 [1] (👩🏻🚀) +1F469 1F3FB 200D 1F692 ; RGI_Emoji_ZWJ_Sequence ; woman firefighter: light skin tone # E4.0 [1] (👩🏻🚒) +1F469 1F3FB 200D 1F9AF ; RGI_Emoji_ZWJ_Sequence ; woman with white cane: light skin tone # E12.0 [1] (👩🏻🦯) +1F469 1F3FB 200D 1F9AF 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman with white cane facing right: light skin tone # E15.1 [1] (👩🏻🦯➡️) +1F469 1F3FB 200D 1F9BC ; RGI_Emoji_ZWJ_Sequence ; woman in motorized wheelchair: light skin tone # E12.0 [1] (👩🏻🦼) +1F469 1F3FB 200D 1F9BC 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman in motorized wheelchair facing right: light skin tone # E15.1 [1] (👩🏻🦼➡️) +1F469 1F3FB 200D 1F9BD ; RGI_Emoji_ZWJ_Sequence ; woman in manual wheelchair: light skin tone # E12.0 [1] (👩🏻🦽) +1F469 1F3FB 200D 1F9BD 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman in manual wheelchair facing right: light skin tone # E15.1 [1] (👩🏻🦽➡️) +1F469 1F3FC 200D 2695 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman health worker: medium-light skin tone # E4.0 [1] (👩🏼⚕️) +1F469 1F3FC 200D 2696 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman judge: medium-light skin tone # E4.0 [1] (👩🏼⚖️) +1F469 1F3FC 200D 2708 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman pilot: medium-light skin tone # E4.0 [1] (👩🏼✈️) +1F469 1F3FC 200D 1F33E ; RGI_Emoji_ZWJ_Sequence ; woman farmer: medium-light skin tone # E4.0 [1] (👩🏼🌾) +1F469 1F3FC 200D 1F373 ; RGI_Emoji_ZWJ_Sequence ; woman cook: medium-light skin tone # E4.0 [1] (👩🏼🍳) +1F469 1F3FC 200D 1F37C ; RGI_Emoji_ZWJ_Sequence ; woman feeding baby: medium-light skin tone # E13.0 [1] (👩🏼🍼) +1F469 1F3FC 200D 1F393 ; RGI_Emoji_ZWJ_Sequence ; woman student: medium-light skin tone # E4.0 [1] (👩🏼🎓) +1F469 1F3FC 200D 1F3A4 ; RGI_Emoji_ZWJ_Sequence ; woman singer: medium-light skin tone # E4.0 [1] (👩🏼🎤) +1F469 1F3FC 200D 1F3A8 ; RGI_Emoji_ZWJ_Sequence ; woman artist: medium-light skin tone # E4.0 [1] (👩🏼🎨) +1F469 1F3FC 200D 1F3EB ; RGI_Emoji_ZWJ_Sequence ; woman teacher: medium-light skin tone # E4.0 [1] (👩🏼🏫) +1F469 1F3FC 200D 1F3ED ; RGI_Emoji_ZWJ_Sequence ; woman factory worker: medium-light skin tone # E4.0 [1] (👩🏼🏭) +1F469 1F3FC 200D 1F4BB ; RGI_Emoji_ZWJ_Sequence ; woman technologist: medium-light skin tone # E4.0 [1] (👩🏼💻) +1F469 1F3FC 200D 1F4BC ; RGI_Emoji_ZWJ_Sequence ; woman office worker: medium-light skin tone # E4.0 [1] (👩🏼💼) +1F469 1F3FC 200D 1F527 ; RGI_Emoji_ZWJ_Sequence ; woman mechanic: medium-light skin tone # E4.0 [1] (👩🏼🔧) +1F469 1F3FC 200D 1F52C ; RGI_Emoji_ZWJ_Sequence ; woman scientist: medium-light skin tone # E4.0 [1] (👩🏼🔬) +1F469 1F3FC 200D 1F680 ; RGI_Emoji_ZWJ_Sequence ; woman astronaut: medium-light skin tone # E4.0 [1] (👩🏼🚀) +1F469 1F3FC 200D 1F692 ; RGI_Emoji_ZWJ_Sequence ; woman firefighter: medium-light skin tone # E4.0 [1] (👩🏼🚒) +1F469 1F3FC 200D 1F9AF ; RGI_Emoji_ZWJ_Sequence ; woman with white cane: medium-light skin tone # E12.0 [1] (👩🏼🦯) +1F469 1F3FC 200D 1F9AF 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman with white cane facing right: medium-light skin tone # E15.1 [1] (👩🏼🦯➡️) +1F469 1F3FC 200D 1F9BC ; RGI_Emoji_ZWJ_Sequence ; woman in motorized wheelchair: medium-light skin tone # E12.0 [1] (👩🏼🦼) +1F469 1F3FC 200D 1F9BC 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman in motorized wheelchair facing right: medium-light skin tone #E15.1[1] (👩🏼🦼➡️) +1F469 1F3FC 200D 1F9BD ; RGI_Emoji_ZWJ_Sequence ; woman in manual wheelchair: medium-light skin tone # E12.0 [1] (👩🏼🦽) +1F469 1F3FC 200D 1F9BD 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman in manual wheelchair facing right: medium-light skin tone #E15.1 [1] (👩🏼🦽➡️) +1F469 1F3FD 200D 2695 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman health worker: medium skin tone # E4.0 [1] (👩🏽⚕️) +1F469 1F3FD 200D 2696 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman judge: medium skin tone # E4.0 [1] (👩🏽⚖️) +1F469 1F3FD 200D 2708 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman pilot: medium skin tone # E4.0 [1] (👩🏽✈️) +1F469 1F3FD 200D 1F33E ; RGI_Emoji_ZWJ_Sequence ; woman farmer: medium skin tone # E4.0 [1] (👩🏽🌾) +1F469 1F3FD 200D 1F373 ; RGI_Emoji_ZWJ_Sequence ; woman cook: medium skin tone # E4.0 [1] (👩🏽🍳) +1F469 1F3FD 200D 1F37C ; RGI_Emoji_ZWJ_Sequence ; woman feeding baby: medium skin tone # E13.0 [1] (👩🏽🍼) +1F469 1F3FD 200D 1F393 ; RGI_Emoji_ZWJ_Sequence ; woman student: medium skin tone # E4.0 [1] (👩🏽🎓) +1F469 1F3FD 200D 1F3A4 ; RGI_Emoji_ZWJ_Sequence ; woman singer: medium skin tone # E4.0 [1] (👩🏽🎤) +1F469 1F3FD 200D 1F3A8 ; RGI_Emoji_ZWJ_Sequence ; woman artist: medium skin tone # E4.0 [1] (👩🏽🎨) +1F469 1F3FD 200D 1F3EB ; RGI_Emoji_ZWJ_Sequence ; woman teacher: medium skin tone # E4.0 [1] (👩🏽🏫) +1F469 1F3FD 200D 1F3ED ; RGI_Emoji_ZWJ_Sequence ; woman factory worker: medium skin tone # E4.0 [1] (👩🏽🏭) +1F469 1F3FD 200D 1F4BB ; RGI_Emoji_ZWJ_Sequence ; woman technologist: medium skin tone # E4.0 [1] (👩🏽💻) +1F469 1F3FD 200D 1F4BC ; RGI_Emoji_ZWJ_Sequence ; woman office worker: medium skin tone # E4.0 [1] (👩🏽💼) +1F469 1F3FD 200D 1F527 ; RGI_Emoji_ZWJ_Sequence ; woman mechanic: medium skin tone # E4.0 [1] (👩🏽🔧) +1F469 1F3FD 200D 1F52C ; RGI_Emoji_ZWJ_Sequence ; woman scientist: medium skin tone # E4.0 [1] (👩🏽🔬) +1F469 1F3FD 200D 1F680 ; RGI_Emoji_ZWJ_Sequence ; woman astronaut: medium skin tone # E4.0 [1] (👩🏽🚀) +1F469 1F3FD 200D 1F692 ; RGI_Emoji_ZWJ_Sequence ; woman firefighter: medium skin tone # E4.0 [1] (👩🏽🚒) +1F469 1F3FD 200D 1F9AF ; RGI_Emoji_ZWJ_Sequence ; woman with white cane: medium skin tone # E12.0 [1] (👩🏽🦯) +1F469 1F3FD 200D 1F9AF 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman with white cane facing right: medium skin tone # E15.1 [1] (👩🏽🦯➡️) +1F469 1F3FD 200D 1F9BC ; RGI_Emoji_ZWJ_Sequence ; woman in motorized wheelchair: medium skin tone # E12.0 [1] (👩🏽🦼) +1F469 1F3FD 200D 1F9BC 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman in motorized wheelchair facing right: medium skin tone # E15.1 [1] (👩🏽🦼➡️) +1F469 1F3FD 200D 1F9BD ; RGI_Emoji_ZWJ_Sequence ; woman in manual wheelchair: medium skin tone # E12.0 [1] (👩🏽🦽) +1F469 1F3FD 200D 1F9BD 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman in manual wheelchair facing right: medium skin tone # E15.1 [1] (👩🏽🦽➡️) +1F469 1F3FE 200D 2695 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman health worker: medium-dark skin tone # E4.0 [1] (👩🏾⚕️) +1F469 1F3FE 200D 2696 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman judge: medium-dark skin tone # E4.0 [1] (👩🏾⚖️) +1F469 1F3FE 200D 2708 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman pilot: medium-dark skin tone # E4.0 [1] (👩🏾✈️) +1F469 1F3FE 200D 1F33E ; RGI_Emoji_ZWJ_Sequence ; woman farmer: medium-dark skin tone # E4.0 [1] (👩🏾🌾) +1F469 1F3FE 200D 1F373 ; RGI_Emoji_ZWJ_Sequence ; woman cook: medium-dark skin tone # E4.0 [1] (👩🏾🍳) +1F469 1F3FE 200D 1F37C ; RGI_Emoji_ZWJ_Sequence ; woman feeding baby: medium-dark skin tone # E13.0 [1] (👩🏾🍼) +1F469 1F3FE 200D 1F393 ; RGI_Emoji_ZWJ_Sequence ; woman student: medium-dark skin tone # E4.0 [1] (👩🏾🎓) +1F469 1F3FE 200D 1F3A4 ; RGI_Emoji_ZWJ_Sequence ; woman singer: medium-dark skin tone # E4.0 [1] (👩🏾🎤) +1F469 1F3FE 200D 1F3A8 ; RGI_Emoji_ZWJ_Sequence ; woman artist: medium-dark skin tone # E4.0 [1] (👩🏾🎨) +1F469 1F3FE 200D 1F3EB ; RGI_Emoji_ZWJ_Sequence ; woman teacher: medium-dark skin tone # E4.0 [1] (👩🏾🏫) +1F469 1F3FE 200D 1F3ED ; RGI_Emoji_ZWJ_Sequence ; woman factory worker: medium-dark skin tone # E4.0 [1] (👩🏾🏭) +1F469 1F3FE 200D 1F4BB ; RGI_Emoji_ZWJ_Sequence ; woman technologist: medium-dark skin tone # E4.0 [1] (👩🏾💻) +1F469 1F3FE 200D 1F4BC ; RGI_Emoji_ZWJ_Sequence ; woman office worker: medium-dark skin tone # E4.0 [1] (👩🏾💼) +1F469 1F3FE 200D 1F527 ; RGI_Emoji_ZWJ_Sequence ; woman mechanic: medium-dark skin tone # E4.0 [1] (👩🏾🔧) +1F469 1F3FE 200D 1F52C ; RGI_Emoji_ZWJ_Sequence ; woman scientist: medium-dark skin tone # E4.0 [1] (👩🏾🔬) +1F469 1F3FE 200D 1F680 ; RGI_Emoji_ZWJ_Sequence ; woman astronaut: medium-dark skin tone # E4.0 [1] (👩🏾🚀) +1F469 1F3FE 200D 1F692 ; RGI_Emoji_ZWJ_Sequence ; woman firefighter: medium-dark skin tone # E4.0 [1] (👩🏾🚒) +1F469 1F3FE 200D 1F9AF ; RGI_Emoji_ZWJ_Sequence ; woman with white cane: medium-dark skin tone # E12.0 [1] (👩🏾🦯) +1F469 1F3FE 200D 1F9AF 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman with white cane facing right: medium-dark skin tone # E15.1 [1] (👩🏾🦯➡️) +1F469 1F3FE 200D 1F9BC ; RGI_Emoji_ZWJ_Sequence ; woman in motorized wheelchair: medium-dark skin tone # E12.0 [1] (👩🏾🦼) +1F469 1F3FE 200D 1F9BC 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman in motorized wheelchair facing right: medium-dark skin tone #E15.1[1] (👩🏾🦼➡️) +1F469 1F3FE 200D 1F9BD ; RGI_Emoji_ZWJ_Sequence ; woman in manual wheelchair: medium-dark skin tone # E12.0 [1] (👩🏾🦽) +1F469 1F3FE 200D 1F9BD 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman in manual wheelchair facing right: medium-dark skin tone # E15.1 [1] (👩🏾🦽➡️) +1F469 1F3FF 200D 2695 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman health worker: dark skin tone # E4.0 [1] (👩🏿⚕️) +1F469 1F3FF 200D 2696 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman judge: dark skin tone # E4.0 [1] (👩🏿⚖️) +1F469 1F3FF 200D 2708 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman pilot: dark skin tone # E4.0 [1] (👩🏿✈️) +1F469 1F3FF 200D 1F33E ; RGI_Emoji_ZWJ_Sequence ; woman farmer: dark skin tone # E4.0 [1] (👩🏿🌾) +1F469 1F3FF 200D 1F373 ; RGI_Emoji_ZWJ_Sequence ; woman cook: dark skin tone # E4.0 [1] (👩🏿🍳) +1F469 1F3FF 200D 1F37C ; RGI_Emoji_ZWJ_Sequence ; woman feeding baby: dark skin tone # E13.0 [1] (👩🏿🍼) +1F469 1F3FF 200D 1F393 ; RGI_Emoji_ZWJ_Sequence ; woman student: dark skin tone # E4.0 [1] (👩🏿🎓) +1F469 1F3FF 200D 1F3A4 ; RGI_Emoji_ZWJ_Sequence ; woman singer: dark skin tone # E4.0 [1] (👩🏿🎤) +1F469 1F3FF 200D 1F3A8 ; RGI_Emoji_ZWJ_Sequence ; woman artist: dark skin tone # E4.0 [1] (👩🏿🎨) +1F469 1F3FF 200D 1F3EB ; RGI_Emoji_ZWJ_Sequence ; woman teacher: dark skin tone # E4.0 [1] (👩🏿🏫) +1F469 1F3FF 200D 1F3ED ; RGI_Emoji_ZWJ_Sequence ; woman factory worker: dark skin tone # E4.0 [1] (👩🏿🏭) +1F469 1F3FF 200D 1F4BB ; RGI_Emoji_ZWJ_Sequence ; woman technologist: dark skin tone # E4.0 [1] (👩🏿💻) +1F469 1F3FF 200D 1F4BC ; RGI_Emoji_ZWJ_Sequence ; woman office worker: dark skin tone # E4.0 [1] (👩🏿💼) +1F469 1F3FF 200D 1F527 ; RGI_Emoji_ZWJ_Sequence ; woman mechanic: dark skin tone # E4.0 [1] (👩🏿🔧) +1F469 1F3FF 200D 1F52C ; RGI_Emoji_ZWJ_Sequence ; woman scientist: dark skin tone # E4.0 [1] (👩🏿🔬) +1F469 1F3FF 200D 1F680 ; RGI_Emoji_ZWJ_Sequence ; woman astronaut: dark skin tone # E4.0 [1] (👩🏿🚀) +1F469 1F3FF 200D 1F692 ; RGI_Emoji_ZWJ_Sequence ; woman firefighter: dark skin tone # E4.0 [1] (👩🏿🚒) +1F469 1F3FF 200D 1F9AF ; RGI_Emoji_ZWJ_Sequence ; woman with white cane: dark skin tone # E12.0 [1] (👩🏿🦯) +1F469 1F3FF 200D 1F9AF 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman with white cane facing right: dark skin tone # E15.1 [1] (👩🏿🦯➡️) +1F469 1F3FF 200D 1F9BC ; RGI_Emoji_ZWJ_Sequence ; woman in motorized wheelchair: dark skin tone # E12.0 [1] (👩🏿🦼) +1F469 1F3FF 200D 1F9BC 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman in motorized wheelchair facing right: dark skin tone # E15.1 [1] (👩🏿🦼➡️) +1F469 1F3FF 200D 1F9BD ; RGI_Emoji_ZWJ_Sequence ; woman in manual wheelchair: dark skin tone # E12.0 [1] (👩🏿🦽) +1F469 1F3FF 200D 1F9BD 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman in manual wheelchair facing right: dark skin tone # E15.1 [1] (👩🏿🦽➡️) +1F6B6 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person walking facing right # E15.1 [1] (🚶➡️) +1F6B6 1F3FB 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person walking facing right: light skin tone # E15.1 [1] (🚶🏻➡️) +1F6B6 1F3FC 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person walking facing right: medium-light skin tone # E15.1 [1] (🚶🏼➡️) +1F6B6 1F3FD 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person walking facing right: medium skin tone # E15.1 [1] (🚶🏽➡️) +1F6B6 1F3FE 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person walking facing right: medium-dark skin tone # E15.1 [1] (🚶🏾➡️) +1F6B6 1F3FF 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person walking facing right: dark skin tone # E15.1 [1] (🚶🏿➡️) +1F9CE 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person kneeling facing right # E15.1 [1] (🧎➡️) +1F9CE 1F3FB 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person kneeling facing right: light skin tone # E15.1 [1] (🧎🏻➡️) +1F9CE 1F3FC 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person kneeling facing right: medium-light skin tone # E15.1 [1] (🧎🏼➡️) +1F9CE 1F3FD 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person kneeling facing right: medium skin tone # E15.1 [1] (🧎🏽➡️) +1F9CE 1F3FE 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person kneeling facing right: medium-dark skin tone # E15.1 [1] (🧎🏾➡️) +1F9CE 1F3FF 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person kneeling facing right: dark skin tone # E15.1 [1] (🧎🏿➡️) +1F9D1 200D 2695 FE0F ; RGI_Emoji_ZWJ_Sequence ; health worker # E12.1 [1] (🧑⚕️) +1F9D1 200D 2696 FE0F ; RGI_Emoji_ZWJ_Sequence ; judge # E12.1 [1] (🧑⚖️) +1F9D1 200D 2708 FE0F ; RGI_Emoji_ZWJ_Sequence ; pilot # E12.1 [1] (🧑✈️) +1F9D1 200D 1F33E ; RGI_Emoji_ZWJ_Sequence ; farmer # E12.1 [1] (🧑🌾) +1F9D1 200D 1F373 ; RGI_Emoji_ZWJ_Sequence ; cook # E12.1 [1] (🧑🍳) +1F9D1 200D 1F37C ; RGI_Emoji_ZWJ_Sequence ; person feeding baby # E13.0 [1] (🧑🍼) +1F9D1 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; mx claus # E13.0 [1] (🧑🎄) +1F9D1 200D 1F393 ; RGI_Emoji_ZWJ_Sequence ; student # E12.1 [1] (🧑🎓) +1F9D1 200D 1F3A4 ; RGI_Emoji_ZWJ_Sequence ; singer # E12.1 [1] (🧑🎤) +1F9D1 200D 1F3A8 ; RGI_Emoji_ZWJ_Sequence ; artist # E12.1 [1] (🧑🎨) +1F9D1 200D 1F3EB ; RGI_Emoji_ZWJ_Sequence ; teacher # E12.1 [1] (🧑🏫) +1F9D1 200D 1F3ED ; RGI_Emoji_ZWJ_Sequence ; factory worker # E12.1 [1] (🧑🏭) +1F9D1 200D 1F4BB ; RGI_Emoji_ZWJ_Sequence ; technologist # E12.1 [1] (🧑💻) +1F9D1 200D 1F4BC ; RGI_Emoji_ZWJ_Sequence ; office worker # E12.1 [1] (🧑💼) +1F9D1 200D 1F527 ; RGI_Emoji_ZWJ_Sequence ; mechanic # E12.1 [1] (🧑🔧) +1F9D1 200D 1F52C ; RGI_Emoji_ZWJ_Sequence ; scientist # E12.1 [1] (🧑🔬) +1F9D1 200D 1F680 ; RGI_Emoji_ZWJ_Sequence ; astronaut # E12.1 [1] (🧑🚀) +1F9D1 200D 1F692 ; RGI_Emoji_ZWJ_Sequence ; firefighter # E12.1 [1] (🧑🚒) +1F9D1 200D 1F9AF ; RGI_Emoji_ZWJ_Sequence ; person with white cane # E12.1 [1] (🧑🦯) +1F9D1 200D 1F9AF 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person with white cane facing right # E15.1 [1] (🧑🦯➡️) +1F9D1 200D 1F9BC ; RGI_Emoji_ZWJ_Sequence ; person in motorized wheelchair # E12.1 [1] (🧑🦼) +1F9D1 200D 1F9BC 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person in motorized wheelchair facing right # E15.1 [1] (🧑🦼➡️) +1F9D1 200D 1F9BD ; RGI_Emoji_ZWJ_Sequence ; person in manual wheelchair # E12.1 [1] (🧑🦽) +1F9D1 200D 1F9BD 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person in manual wheelchair facing right # E15.1 [1] (🧑🦽➡️) +1F9D1 1F3FB 200D 2695 FE0F ; RGI_Emoji_ZWJ_Sequence ; health worker: light skin tone # E12.1 [1] (🧑🏻⚕️) +1F9D1 1F3FB 200D 2696 FE0F ; RGI_Emoji_ZWJ_Sequence ; judge: light skin tone # E12.1 [1] (🧑🏻⚖️) +1F9D1 1F3FB 200D 2708 FE0F ; RGI_Emoji_ZWJ_Sequence ; pilot: light skin tone # E12.1 [1] (🧑🏻✈️) +1F9D1 1F3FB 200D 1F33E ; RGI_Emoji_ZWJ_Sequence ; farmer: light skin tone # E12.1 [1] (🧑🏻🌾) +1F9D1 1F3FB 200D 1F373 ; RGI_Emoji_ZWJ_Sequence ; cook: light skin tone # E12.1 [1] (🧑🏻🍳) +1F9D1 1F3FB 200D 1F37C ; RGI_Emoji_ZWJ_Sequence ; person feeding baby: light skin tone # E13.0 [1] (🧑🏻🍼) +1F9D1 1F3FB 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; mx claus: light skin tone # E13.0 [1] (🧑🏻🎄) +1F9D1 1F3FB 200D 1F393 ; RGI_Emoji_ZWJ_Sequence ; student: light skin tone # E12.1 [1] (🧑🏻🎓) +1F9D1 1F3FB 200D 1F3A4 ; RGI_Emoji_ZWJ_Sequence ; singer: light skin tone # E12.1 [1] (🧑🏻🎤) +1F9D1 1F3FB 200D 1F3A8 ; RGI_Emoji_ZWJ_Sequence ; artist: light skin tone # E12.1 [1] (🧑🏻🎨) +1F9D1 1F3FB 200D 1F3EB ; RGI_Emoji_ZWJ_Sequence ; teacher: light skin tone # E12.1 [1] (🧑🏻🏫) +1F9D1 1F3FB 200D 1F3ED ; RGI_Emoji_ZWJ_Sequence ; factory worker: light skin tone # E12.1 [1] (🧑🏻🏭) +1F9D1 1F3FB 200D 1F4BB ; RGI_Emoji_ZWJ_Sequence ; technologist: light skin tone # E12.1 [1] (🧑🏻💻) +1F9D1 1F3FB 200D 1F4BC ; RGI_Emoji_ZWJ_Sequence ; office worker: light skin tone # E12.1 [1] (🧑🏻💼) +1F9D1 1F3FB 200D 1F527 ; RGI_Emoji_ZWJ_Sequence ; mechanic: light skin tone # E12.1 [1] (🧑🏻🔧) +1F9D1 1F3FB 200D 1F52C ; RGI_Emoji_ZWJ_Sequence ; scientist: light skin tone # E12.1 [1] (🧑🏻🔬) +1F9D1 1F3FB 200D 1F680 ; RGI_Emoji_ZWJ_Sequence ; astronaut: light skin tone # E12.1 [1] (🧑🏻🚀) +1F9D1 1F3FB 200D 1F692 ; RGI_Emoji_ZWJ_Sequence ; firefighter: light skin tone # E12.1 [1] (🧑🏻🚒) +1F9D1 1F3FB 200D 1F9AF ; RGI_Emoji_ZWJ_Sequence ; person with white cane: light skin tone # E12.1 [1] (🧑🏻🦯) +1F9D1 1F3FB 200D 1F9AF 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person with white cane facing right: light skin tone # E15.1 [1] (🧑🏻🦯➡️) +1F9D1 1F3FB 200D 1F9BC ; RGI_Emoji_ZWJ_Sequence ; person in motorized wheelchair: light skin tone # E12.1 [1] (🧑🏻🦼) +1F9D1 1F3FB 200D 1F9BC 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person in motorized wheelchair facing right: light skin tone # E15.1 [1] (🧑🏻🦼➡️) +1F9D1 1F3FB 200D 1F9BD ; RGI_Emoji_ZWJ_Sequence ; person in manual wheelchair: light skin tone # E12.1 [1] (🧑🏻🦽) +1F9D1 1F3FB 200D 1F9BD 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person in manual wheelchair facing right: light skin tone # E15.1 [1] (🧑🏻🦽➡️) +1F9D1 1F3FC 200D 2695 FE0F ; RGI_Emoji_ZWJ_Sequence ; health worker: medium-light skin tone # E12.1 [1] (🧑🏼⚕️) +1F9D1 1F3FC 200D 2696 FE0F ; RGI_Emoji_ZWJ_Sequence ; judge: medium-light skin tone # E12.1 [1] (🧑🏼⚖️) +1F9D1 1F3FC 200D 2708 FE0F ; RGI_Emoji_ZWJ_Sequence ; pilot: medium-light skin tone # E12.1 [1] (🧑🏼✈️) +1F9D1 1F3FC 200D 1F33E ; RGI_Emoji_ZWJ_Sequence ; farmer: medium-light skin tone # E12.1 [1] (🧑🏼🌾) +1F9D1 1F3FC 200D 1F373 ; RGI_Emoji_ZWJ_Sequence ; cook: medium-light skin tone # E12.1 [1] (🧑🏼🍳) +1F9D1 1F3FC 200D 1F37C ; RGI_Emoji_ZWJ_Sequence ; person feeding baby: medium-light skin tone # E13.0 [1] (🧑🏼🍼) +1F9D1 1F3FC 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; mx claus: medium-light skin tone # E13.0 [1] (🧑🏼🎄) +1F9D1 1F3FC 200D 1F393 ; RGI_Emoji_ZWJ_Sequence ; student: medium-light skin tone # E12.1 [1] (🧑🏼🎓) +1F9D1 1F3FC 200D 1F3A4 ; RGI_Emoji_ZWJ_Sequence ; singer: medium-light skin tone # E12.1 [1] (🧑🏼🎤) +1F9D1 1F3FC 200D 1F3A8 ; RGI_Emoji_ZWJ_Sequence ; artist: medium-light skin tone # E12.1 [1] (🧑🏼🎨) +1F9D1 1F3FC 200D 1F3EB ; RGI_Emoji_ZWJ_Sequence ; teacher: medium-light skin tone # E12.1 [1] (🧑🏼🏫) +1F9D1 1F3FC 200D 1F3ED ; RGI_Emoji_ZWJ_Sequence ; factory worker: medium-light skin tone # E12.1 [1] (🧑🏼🏭) +1F9D1 1F3FC 200D 1F4BB ; RGI_Emoji_ZWJ_Sequence ; technologist: medium-light skin tone # E12.1 [1] (🧑🏼💻) +1F9D1 1F3FC 200D 1F4BC ; RGI_Emoji_ZWJ_Sequence ; office worker: medium-light skin tone # E12.1 [1] (🧑🏼💼) +1F9D1 1F3FC 200D 1F527 ; RGI_Emoji_ZWJ_Sequence ; mechanic: medium-light skin tone # E12.1 [1] (🧑🏼🔧) +1F9D1 1F3FC 200D 1F52C ; RGI_Emoji_ZWJ_Sequence ; scientist: medium-light skin tone # E12.1 [1] (🧑🏼🔬) +1F9D1 1F3FC 200D 1F680 ; RGI_Emoji_ZWJ_Sequence ; astronaut: medium-light skin tone # E12.1 [1] (🧑🏼🚀) +1F9D1 1F3FC 200D 1F692 ; RGI_Emoji_ZWJ_Sequence ; firefighter: medium-light skin tone # E12.1 [1] (🧑🏼🚒) +1F9D1 1F3FC 200D 1F9AF ; RGI_Emoji_ZWJ_Sequence ; person with white cane: medium-light skin tone # E12.1 [1] (🧑🏼🦯) +1F9D1 1F3FC 200D 1F9AF 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person with white cane facing right: medium-light skin tone # E15.1 [1] (🧑🏼🦯➡️) +1F9D1 1F3FC 200D 1F9BC ; RGI_Emoji_ZWJ_Sequence ; person in motorized wheelchair: medium-light skin tone # E12.1 [1] (🧑🏼🦼) +1F9D1 1F3FC 200D 1F9BC 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person in motorized wheelchair facing right: medium-light skin tone #E15.1[1] (🧑🏼🦼➡️) +1F9D1 1F3FC 200D 1F9BD ; RGI_Emoji_ZWJ_Sequence ; person in manual wheelchair: medium-light skin tone # E12.1 [1] (🧑🏼🦽) +1F9D1 1F3FC 200D 1F9BD 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person in manual wheelchair facing right: medium-light skin tone #E15.1 [1] (🧑🏼🦽➡️) +1F9D1 1F3FD 200D 2695 FE0F ; RGI_Emoji_ZWJ_Sequence ; health worker: medium skin tone # E12.1 [1] (🧑🏽⚕️) +1F9D1 1F3FD 200D 2696 FE0F ; RGI_Emoji_ZWJ_Sequence ; judge: medium skin tone # E12.1 [1] (🧑🏽⚖️) +1F9D1 1F3FD 200D 2708 FE0F ; RGI_Emoji_ZWJ_Sequence ; pilot: medium skin tone # E12.1 [1] (🧑🏽✈️) +1F9D1 1F3FD 200D 1F33E ; RGI_Emoji_ZWJ_Sequence ; farmer: medium skin tone # E12.1 [1] (🧑🏽🌾) +1F9D1 1F3FD 200D 1F373 ; RGI_Emoji_ZWJ_Sequence ; cook: medium skin tone # E12.1 [1] (🧑🏽🍳) +1F9D1 1F3FD 200D 1F37C ; RGI_Emoji_ZWJ_Sequence ; person feeding baby: medium skin tone # E13.0 [1] (🧑🏽🍼) +1F9D1 1F3FD 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; mx claus: medium skin tone # E13.0 [1] (🧑🏽🎄) +1F9D1 1F3FD 200D 1F393 ; RGI_Emoji_ZWJ_Sequence ; student: medium skin tone # E12.1 [1] (🧑🏽🎓) +1F9D1 1F3FD 200D 1F3A4 ; RGI_Emoji_ZWJ_Sequence ; singer: medium skin tone # E12.1 [1] (🧑🏽🎤) +1F9D1 1F3FD 200D 1F3A8 ; RGI_Emoji_ZWJ_Sequence ; artist: medium skin tone # E12.1 [1] (🧑🏽🎨) +1F9D1 1F3FD 200D 1F3EB ; RGI_Emoji_ZWJ_Sequence ; teacher: medium skin tone # E12.1 [1] (🧑🏽🏫) +1F9D1 1F3FD 200D 1F3ED ; RGI_Emoji_ZWJ_Sequence ; factory worker: medium skin tone # E12.1 [1] (🧑🏽🏭) +1F9D1 1F3FD 200D 1F4BB ; RGI_Emoji_ZWJ_Sequence ; technologist: medium skin tone # E12.1 [1] (🧑🏽💻) +1F9D1 1F3FD 200D 1F4BC ; RGI_Emoji_ZWJ_Sequence ; office worker: medium skin tone # E12.1 [1] (🧑🏽💼) +1F9D1 1F3FD 200D 1F527 ; RGI_Emoji_ZWJ_Sequence ; mechanic: medium skin tone # E12.1 [1] (🧑🏽🔧) +1F9D1 1F3FD 200D 1F52C ; RGI_Emoji_ZWJ_Sequence ; scientist: medium skin tone # E12.1 [1] (🧑🏽🔬) +1F9D1 1F3FD 200D 1F680 ; RGI_Emoji_ZWJ_Sequence ; astronaut: medium skin tone # E12.1 [1] (🧑🏽🚀) +1F9D1 1F3FD 200D 1F692 ; RGI_Emoji_ZWJ_Sequence ; firefighter: medium skin tone # E12.1 [1] (🧑🏽🚒) +1F9D1 1F3FD 200D 1F9AF ; RGI_Emoji_ZWJ_Sequence ; person with white cane: medium skin tone # E12.1 [1] (🧑🏽🦯) +1F9D1 1F3FD 200D 1F9AF 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person with white cane facing right: medium skin tone # E15.1 [1] (🧑🏽🦯➡️) +1F9D1 1F3FD 200D 1F9BC ; RGI_Emoji_ZWJ_Sequence ; person in motorized wheelchair: medium skin tone # E12.1 [1] (🧑🏽🦼) +1F9D1 1F3FD 200D 1F9BC 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person in motorized wheelchair facing right: medium skin tone # E15.1 [1] (🧑🏽🦼➡️) +1F9D1 1F3FD 200D 1F9BD ; RGI_Emoji_ZWJ_Sequence ; person in manual wheelchair: medium skin tone # E12.1 [1] (🧑🏽🦽) +1F9D1 1F3FD 200D 1F9BD 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person in manual wheelchair facing right: medium skin tone # E15.1 [1] (🧑🏽🦽➡️) +1F9D1 1F3FE 200D 2695 FE0F ; RGI_Emoji_ZWJ_Sequence ; health worker: medium-dark skin tone # E12.1 [1] (🧑🏾⚕️) +1F9D1 1F3FE 200D 2696 FE0F ; RGI_Emoji_ZWJ_Sequence ; judge: medium-dark skin tone # E12.1 [1] (🧑🏾⚖️) +1F9D1 1F3FE 200D 2708 FE0F ; RGI_Emoji_ZWJ_Sequence ; pilot: medium-dark skin tone # E12.1 [1] (🧑🏾✈️) +1F9D1 1F3FE 200D 1F33E ; RGI_Emoji_ZWJ_Sequence ; farmer: medium-dark skin tone # E12.1 [1] (🧑🏾🌾) +1F9D1 1F3FE 200D 1F373 ; RGI_Emoji_ZWJ_Sequence ; cook: medium-dark skin tone # E12.1 [1] (🧑🏾🍳) +1F9D1 1F3FE 200D 1F37C ; RGI_Emoji_ZWJ_Sequence ; person feeding baby: medium-dark skin tone # E13.0 [1] (🧑🏾🍼) +1F9D1 1F3FE 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; mx claus: medium-dark skin tone # E13.0 [1] (🧑🏾🎄) +1F9D1 1F3FE 200D 1F393 ; RGI_Emoji_ZWJ_Sequence ; student: medium-dark skin tone # E12.1 [1] (🧑🏾🎓) +1F9D1 1F3FE 200D 1F3A4 ; RGI_Emoji_ZWJ_Sequence ; singer: medium-dark skin tone # E12.1 [1] (🧑🏾🎤) +1F9D1 1F3FE 200D 1F3A8 ; RGI_Emoji_ZWJ_Sequence ; artist: medium-dark skin tone # E12.1 [1] (🧑🏾🎨) +1F9D1 1F3FE 200D 1F3EB ; RGI_Emoji_ZWJ_Sequence ; teacher: medium-dark skin tone # E12.1 [1] (🧑🏾🏫) +1F9D1 1F3FE 200D 1F3ED ; RGI_Emoji_ZWJ_Sequence ; factory worker: medium-dark skin tone # E12.1 [1] (🧑🏾🏭) +1F9D1 1F3FE 200D 1F4BB ; RGI_Emoji_ZWJ_Sequence ; technologist: medium-dark skin tone # E12.1 [1] (🧑🏾💻) +1F9D1 1F3FE 200D 1F4BC ; RGI_Emoji_ZWJ_Sequence ; office worker: medium-dark skin tone # E12.1 [1] (🧑🏾💼) +1F9D1 1F3FE 200D 1F527 ; RGI_Emoji_ZWJ_Sequence ; mechanic: medium-dark skin tone # E12.1 [1] (🧑🏾🔧) +1F9D1 1F3FE 200D 1F52C ; RGI_Emoji_ZWJ_Sequence ; scientist: medium-dark skin tone # E12.1 [1] (🧑🏾🔬) +1F9D1 1F3FE 200D 1F680 ; RGI_Emoji_ZWJ_Sequence ; astronaut: medium-dark skin tone # E12.1 [1] (🧑🏾🚀) +1F9D1 1F3FE 200D 1F692 ; RGI_Emoji_ZWJ_Sequence ; firefighter: medium-dark skin tone # E12.1 [1] (🧑🏾🚒) +1F9D1 1F3FE 200D 1F9AF ; RGI_Emoji_ZWJ_Sequence ; person with white cane: medium-dark skin tone # E12.1 [1] (🧑🏾🦯) +1F9D1 1F3FE 200D 1F9AF 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person with white cane facing right: medium-dark skin tone # E15.1 [1] (🧑🏾🦯➡️) +1F9D1 1F3FE 200D 1F9BC ; RGI_Emoji_ZWJ_Sequence ; person in motorized wheelchair: medium-dark skin tone # E12.1 [1] (🧑🏾🦼) +1F9D1 1F3FE 200D 1F9BC 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person in motorized wheelchair facing right: medium-dark skin tone #E15.1[1] (🧑🏾🦼➡️) +1F9D1 1F3FE 200D 1F9BD ; RGI_Emoji_ZWJ_Sequence ; person in manual wheelchair: medium-dark skin tone # E12.1 [1] (🧑🏾🦽) +1F9D1 1F3FE 200D 1F9BD 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person in manual wheelchair facing right: medium-dark skin tone #E15.1 [1] (🧑🏾🦽➡️) +1F9D1 1F3FF 200D 2695 FE0F ; RGI_Emoji_ZWJ_Sequence ; health worker: dark skin tone # E12.1 [1] (🧑🏿⚕️) +1F9D1 1F3FF 200D 2696 FE0F ; RGI_Emoji_ZWJ_Sequence ; judge: dark skin tone # E12.1 [1] (🧑🏿⚖️) +1F9D1 1F3FF 200D 2708 FE0F ; RGI_Emoji_ZWJ_Sequence ; pilot: dark skin tone # E12.1 [1] (🧑🏿✈️) +1F9D1 1F3FF 200D 1F33E ; RGI_Emoji_ZWJ_Sequence ; farmer: dark skin tone # E12.1 [1] (🧑🏿🌾) +1F9D1 1F3FF 200D 1F373 ; RGI_Emoji_ZWJ_Sequence ; cook: dark skin tone # E12.1 [1] (🧑🏿🍳) +1F9D1 1F3FF 200D 1F37C ; RGI_Emoji_ZWJ_Sequence ; person feeding baby: dark skin tone # E13.0 [1] (🧑🏿🍼) +1F9D1 1F3FF 200D 1F384 ; RGI_Emoji_ZWJ_Sequence ; mx claus: dark skin tone # E13.0 [1] (🧑🏿🎄) +1F9D1 1F3FF 200D 1F393 ; RGI_Emoji_ZWJ_Sequence ; student: dark skin tone # E12.1 [1] (🧑🏿🎓) +1F9D1 1F3FF 200D 1F3A4 ; RGI_Emoji_ZWJ_Sequence ; singer: dark skin tone # E12.1 [1] (🧑🏿🎤) +1F9D1 1F3FF 200D 1F3A8 ; RGI_Emoji_ZWJ_Sequence ; artist: dark skin tone # E12.1 [1] (🧑🏿🎨) +1F9D1 1F3FF 200D 1F3EB ; RGI_Emoji_ZWJ_Sequence ; teacher: dark skin tone # E12.1 [1] (🧑🏿🏫) +1F9D1 1F3FF 200D 1F3ED ; RGI_Emoji_ZWJ_Sequence ; factory worker: dark skin tone # E12.1 [1] (🧑🏿🏭) +1F9D1 1F3FF 200D 1F4BB ; RGI_Emoji_ZWJ_Sequence ; technologist: dark skin tone # E12.1 [1] (🧑🏿💻) +1F9D1 1F3FF 200D 1F4BC ; RGI_Emoji_ZWJ_Sequence ; office worker: dark skin tone # E12.1 [1] (🧑🏿💼) +1F9D1 1F3FF 200D 1F527 ; RGI_Emoji_ZWJ_Sequence ; mechanic: dark skin tone # E12.1 [1] (🧑🏿🔧) +1F9D1 1F3FF 200D 1F52C ; RGI_Emoji_ZWJ_Sequence ; scientist: dark skin tone # E12.1 [1] (🧑🏿🔬) +1F9D1 1F3FF 200D 1F680 ; RGI_Emoji_ZWJ_Sequence ; astronaut: dark skin tone # E12.1 [1] (🧑🏿🚀) +1F9D1 1F3FF 200D 1F692 ; RGI_Emoji_ZWJ_Sequence ; firefighter: dark skin tone # E12.1 [1] (🧑🏿🚒) +1F9D1 1F3FF 200D 1F9AF ; RGI_Emoji_ZWJ_Sequence ; person with white cane: dark skin tone # E12.1 [1] (🧑🏿🦯) +1F9D1 1F3FF 200D 1F9AF 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person with white cane facing right: dark skin tone # E15.1 [1] (🧑🏿🦯➡️) +1F9D1 1F3FF 200D 1F9BC ; RGI_Emoji_ZWJ_Sequence ; person in motorized wheelchair: dark skin tone # E12.1 [1] (🧑🏿🦼) +1F9D1 1F3FF 200D 1F9BC 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person in motorized wheelchair facing right: dark skin tone # E15.1 [1] (🧑🏿🦼➡️) +1F9D1 1F3FF 200D 1F9BD ; RGI_Emoji_ZWJ_Sequence ; person in manual wheelchair: dark skin tone # E12.1 [1] (🧑🏿🦽) +1F9D1 1F3FF 200D 1F9BD 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person in manual wheelchair facing right: dark skin tone # E15.1 [1] (🧑🏿🦽➡️) + +# Total elements: 438 + +# ================================================ + +# RGI_Emoji_ZWJ_Sequence: Gendered + +26F9 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman bouncing ball: light skin tone # E4.0 [1] (⛹🏻♀️) +26F9 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man bouncing ball: light skin tone # E4.0 [1] (⛹🏻♂️) +26F9 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman bouncing ball: medium-light skin tone # E4.0 [1] (⛹🏼♀️) +26F9 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man bouncing ball: medium-light skin tone # E4.0 [1] (⛹🏼♂️) +26F9 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman bouncing ball: medium skin tone # E4.0 [1] (⛹🏽♀️) +26F9 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man bouncing ball: medium skin tone # E4.0 [1] (⛹🏽♂️) +26F9 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman bouncing ball: medium-dark skin tone # E4.0 [1] (⛹🏾♀️) +26F9 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man bouncing ball: medium-dark skin tone # E4.0 [1] (⛹🏾♂️) +26F9 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman bouncing ball: dark skin tone # E4.0 [1] (⛹🏿♀️) +26F9 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man bouncing ball: dark skin tone # E4.0 [1] (⛹🏿♂️) +26F9 FE0F 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman bouncing ball # E4.0 [1] (⛹️♀️) +26F9 FE0F 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man bouncing ball # E4.0 [1] (⛹️♂️) +1F3C3 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman running # E4.0 [1] (🏃♀️) +1F3C3 200D 2640 FE0F 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman running facing right # E15.1 [1] (🏃♀️➡️) +1F3C3 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man running # E4.0 [1] (🏃♂️) +1F3C3 200D 2642 FE0F 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; man running facing right # E15.1 [1] (🏃♂️➡️) +1F3C3 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman running: light skin tone # E4.0 [1] (🏃🏻♀️) +1F3C3 1F3FB 200D 2640 FE0F 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman running facing right: light skin tone # E15.1 [1] (🏃🏻♀️➡️) +1F3C3 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man running: light skin tone # E4.0 [1] (🏃🏻♂️) +1F3C3 1F3FB 200D 2642 FE0F 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; man running facing right: light skin tone # E15.1 [1] (🏃🏻♂️➡️) +1F3C3 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman running: medium-light skin tone # E4.0 [1] (🏃🏼♀️) +1F3C3 1F3FC 200D 2640 FE0F 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman running facing right: medium-light skin tone # E15.1 [1] (🏃🏼♀️➡️) +1F3C3 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man running: medium-light skin tone # E4.0 [1] (🏃🏼♂️) +1F3C3 1F3FC 200D 2642 FE0F 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; man running facing right: medium-light skin tone # E15.1 [1] (🏃🏼♂️➡️) +1F3C3 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman running: medium skin tone # E4.0 [1] (🏃🏽♀️) +1F3C3 1F3FD 200D 2640 FE0F 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman running facing right: medium skin tone # E15.1 [1] (🏃🏽♀️➡️) +1F3C3 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man running: medium skin tone # E4.0 [1] (🏃🏽♂️) +1F3C3 1F3FD 200D 2642 FE0F 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; man running facing right: medium skin tone # E15.1 [1] (🏃🏽♂️➡️) +1F3C3 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman running: medium-dark skin tone # E4.0 [1] (🏃🏾♀️) +1F3C3 1F3FE 200D 2640 FE0F 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman running facing right: medium-dark skin tone # E15.1 [1] (🏃🏾♀️➡️) +1F3C3 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man running: medium-dark skin tone # E4.0 [1] (🏃🏾♂️) +1F3C3 1F3FE 200D 2642 FE0F 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; man running facing right: medium-dark skin tone # E15.1 [1] (🏃🏾♂️➡️) +1F3C3 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman running: dark skin tone # E4.0 [1] (🏃🏿♀️) +1F3C3 1F3FF 200D 2640 FE0F 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman running facing right: dark skin tone # E15.1 [1] (🏃🏿♀️➡️) +1F3C3 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man running: dark skin tone # E4.0 [1] (🏃🏿♂️) +1F3C3 1F3FF 200D 2642 FE0F 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; man running facing right: dark skin tone # E15.1 [1] (🏃🏿♂️➡️) +1F3C4 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman surfing # E4.0 [1] (🏄♀️) +1F3C4 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man surfing # E4.0 [1] (🏄♂️) +1F3C4 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman surfing: light skin tone # E4.0 [1] (🏄🏻♀️) +1F3C4 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man surfing: light skin tone # E4.0 [1] (🏄🏻♂️) +1F3C4 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman surfing: medium-light skin tone # E4.0 [1] (🏄🏼♀️) +1F3C4 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man surfing: medium-light skin tone # E4.0 [1] (🏄🏼♂️) +1F3C4 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman surfing: medium skin tone # E4.0 [1] (🏄🏽♀️) +1F3C4 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man surfing: medium skin tone # E4.0 [1] (🏄🏽♂️) +1F3C4 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman surfing: medium-dark skin tone # E4.0 [1] (🏄🏾♀️) +1F3C4 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man surfing: medium-dark skin tone # E4.0 [1] (🏄🏾♂️) +1F3C4 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman surfing: dark skin tone # E4.0 [1] (🏄🏿♀️) +1F3C4 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man surfing: dark skin tone # E4.0 [1] (🏄🏿♂️) +1F3CA 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman swimming # E4.0 [1] (🏊♀️) +1F3CA 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man swimming # E4.0 [1] (🏊♂️) +1F3CA 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman swimming: light skin tone # E4.0 [1] (🏊🏻♀️) +1F3CA 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man swimming: light skin tone # E4.0 [1] (🏊🏻♂️) +1F3CA 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman swimming: medium-light skin tone # E4.0 [1] (🏊🏼♀️) +1F3CA 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man swimming: medium-light skin tone # E4.0 [1] (🏊🏼♂️) +1F3CA 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman swimming: medium skin tone # E4.0 [1] (🏊🏽♀️) +1F3CA 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man swimming: medium skin tone # E4.0 [1] (🏊🏽♂️) +1F3CA 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman swimming: medium-dark skin tone # E4.0 [1] (🏊🏾♀️) +1F3CA 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man swimming: medium-dark skin tone # E4.0 [1] (🏊🏾♂️) +1F3CA 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman swimming: dark skin tone # E4.0 [1] (🏊🏿♀️) +1F3CA 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man swimming: dark skin tone # E4.0 [1] (🏊🏿♂️) +1F3CB 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman lifting weights: light skin tone # E4.0 [1] (🏋🏻♀️) +1F3CB 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man lifting weights: light skin tone # E4.0 [1] (🏋🏻♂️) +1F3CB 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman lifting weights: medium-light skin tone # E4.0 [1] (🏋🏼♀️) +1F3CB 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man lifting weights: medium-light skin tone # E4.0 [1] (🏋🏼♂️) +1F3CB 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman lifting weights: medium skin tone # E4.0 [1] (🏋🏽♀️) +1F3CB 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man lifting weights: medium skin tone # E4.0 [1] (🏋🏽♂️) +1F3CB 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman lifting weights: medium-dark skin tone # E4.0 [1] (🏋🏾♀️) +1F3CB 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man lifting weights: medium-dark skin tone # E4.0 [1] (🏋🏾♂️) +1F3CB 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman lifting weights: dark skin tone # E4.0 [1] (🏋🏿♀️) +1F3CB 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man lifting weights: dark skin tone # E4.0 [1] (🏋🏿♂️) +1F3CB FE0F 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman lifting weights # E4.0 [1] (🏋️♀️) +1F3CB FE0F 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man lifting weights # E4.0 [1] (🏋️♂️) +1F3CC 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman golfing: light skin tone # E4.0 [1] (🏌🏻♀️) +1F3CC 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man golfing: light skin tone # E4.0 [1] (🏌🏻♂️) +1F3CC 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman golfing: medium-light skin tone # E4.0 [1] (🏌🏼♀️) +1F3CC 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man golfing: medium-light skin tone # E4.0 [1] (🏌🏼♂️) +1F3CC 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman golfing: medium skin tone # E4.0 [1] (🏌🏽♀️) +1F3CC 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man golfing: medium skin tone # E4.0 [1] (🏌🏽♂️) +1F3CC 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman golfing: medium-dark skin tone # E4.0 [1] (🏌🏾♀️) +1F3CC 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man golfing: medium-dark skin tone # E4.0 [1] (🏌🏾♂️) +1F3CC 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman golfing: dark skin tone # E4.0 [1] (🏌🏿♀️) +1F3CC 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man golfing: dark skin tone # E4.0 [1] (🏌🏿♂️) +1F3CC FE0F 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman golfing # E4.0 [1] (🏌️♀️) +1F3CC FE0F 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man golfing # E4.0 [1] (🏌️♂️) +1F46E 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman police officer # E4.0 [1] (👮♀️) +1F46E 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man police officer # E4.0 [1] (👮♂️) +1F46E 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman police officer: light skin tone # E4.0 [1] (👮🏻♀️) +1F46E 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man police officer: light skin tone # E4.0 [1] (👮🏻♂️) +1F46E 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman police officer: medium-light skin tone # E4.0 [1] (👮🏼♀️) +1F46E 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man police officer: medium-light skin tone # E4.0 [1] (👮🏼♂️) +1F46E 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman police officer: medium skin tone # E4.0 [1] (👮🏽♀️) +1F46E 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man police officer: medium skin tone # E4.0 [1] (👮🏽♂️) +1F46E 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman police officer: medium-dark skin tone # E4.0 [1] (👮🏾♀️) +1F46E 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man police officer: medium-dark skin tone # E4.0 [1] (👮🏾♂️) +1F46E 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman police officer: dark skin tone # E4.0 [1] (👮🏿♀️) +1F46E 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man police officer: dark skin tone # E4.0 [1] (👮🏿♂️) +1F46F 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; women with bunny ears # E4.0 [1] (👯♀️) +1F46F 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; men with bunny ears # E4.0 [1] (👯♂️) +1F470 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman with veil # E13.0 [1] (👰♀️) +1F470 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man with veil # E13.0 [1] (👰♂️) +1F470 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman with veil: light skin tone # E13.0 [1] (👰🏻♀️) +1F470 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man with veil: light skin tone # E13.0 [1] (👰🏻♂️) +1F470 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman with veil: medium-light skin tone # E13.0 [1] (👰🏼♀️) +1F470 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man with veil: medium-light skin tone # E13.0 [1] (👰🏼♂️) +1F470 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman with veil: medium skin tone # E13.0 [1] (👰🏽♀️) +1F470 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man with veil: medium skin tone # E13.0 [1] (👰🏽♂️) +1F470 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman with veil: medium-dark skin tone # E13.0 [1] (👰🏾♀️) +1F470 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man with veil: medium-dark skin tone # E13.0 [1] (👰🏾♂️) +1F470 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman with veil: dark skin tone # E13.0 [1] (👰🏿♀️) +1F470 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man with veil: dark skin tone # E13.0 [1] (👰🏿♂️) +1F471 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman: blond hair # E4.0 [1] (👱♀️) +1F471 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man: blond hair # E4.0 [1] (👱♂️) +1F471 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman: light skin tone, blond hair # E4.0 [1] (👱🏻♀️) +1F471 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man: light skin tone, blond hair # E4.0 [1] (👱🏻♂️) +1F471 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman: medium-light skin tone, blond hair # E4.0 [1] (👱🏼♀️) +1F471 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man: medium-light skin tone, blond hair # E4.0 [1] (👱🏼♂️) +1F471 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman: medium skin tone, blond hair # E4.0 [1] (👱🏽♀️) +1F471 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man: medium skin tone, blond hair # E4.0 [1] (👱🏽♂️) +1F471 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman: medium-dark skin tone, blond hair # E4.0 [1] (👱🏾♀️) +1F471 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man: medium-dark skin tone, blond hair # E4.0 [1] (👱🏾♂️) +1F471 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman: dark skin tone, blond hair # E4.0 [1] (👱🏿♀️) +1F471 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man: dark skin tone, blond hair # E4.0 [1] (👱🏿♂️) +1F473 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman wearing turban # E4.0 [1] (👳♀️) +1F473 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man wearing turban # E4.0 [1] (👳♂️) +1F473 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman wearing turban: light skin tone # E4.0 [1] (👳🏻♀️) +1F473 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man wearing turban: light skin tone # E4.0 [1] (👳🏻♂️) +1F473 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman wearing turban: medium-light skin tone # E4.0 [1] (👳🏼♀️) +1F473 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man wearing turban: medium-light skin tone # E4.0 [1] (👳🏼♂️) +1F473 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman wearing turban: medium skin tone # E4.0 [1] (👳🏽♀️) +1F473 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man wearing turban: medium skin tone # E4.0 [1] (👳🏽♂️) +1F473 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman wearing turban: medium-dark skin tone # E4.0 [1] (👳🏾♀️) +1F473 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man wearing turban: medium-dark skin tone # E4.0 [1] (👳🏾♂️) +1F473 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman wearing turban: dark skin tone # E4.0 [1] (👳🏿♀️) +1F473 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man wearing turban: dark skin tone # E4.0 [1] (👳🏿♂️) +1F477 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman construction worker # E4.0 [1] (👷♀️) +1F477 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man construction worker # E4.0 [1] (👷♂️) +1F477 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman construction worker: light skin tone # E4.0 [1] (👷🏻♀️) +1F477 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man construction worker: light skin tone # E4.0 [1] (👷🏻♂️) +1F477 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman construction worker: medium-light skin tone # E4.0 [1] (👷🏼♀️) +1F477 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man construction worker: medium-light skin tone # E4.0 [1] (👷🏼♂️) +1F477 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman construction worker: medium skin tone # E4.0 [1] (👷🏽♀️) +1F477 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man construction worker: medium skin tone # E4.0 [1] (👷🏽♂️) +1F477 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman construction worker: medium-dark skin tone # E4.0 [1] (👷🏾♀️) +1F477 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man construction worker: medium-dark skin tone # E4.0 [1] (👷🏾♂️) +1F477 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman construction worker: dark skin tone # E4.0 [1] (👷🏿♀️) +1F477 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man construction worker: dark skin tone # E4.0 [1] (👷🏿♂️) +1F481 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman tipping hand # E4.0 [1] (💁♀️) +1F481 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man tipping hand # E4.0 [1] (💁♂️) +1F481 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman tipping hand: light skin tone # E4.0 [1] (💁🏻♀️) +1F481 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man tipping hand: light skin tone # E4.0 [1] (💁🏻♂️) +1F481 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman tipping hand: medium-light skin tone # E4.0 [1] (💁🏼♀️) +1F481 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man tipping hand: medium-light skin tone # E4.0 [1] (💁🏼♂️) +1F481 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman tipping hand: medium skin tone # E4.0 [1] (💁🏽♀️) +1F481 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man tipping hand: medium skin tone # E4.0 [1] (💁🏽♂️) +1F481 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman tipping hand: medium-dark skin tone # E4.0 [1] (💁🏾♀️) +1F481 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man tipping hand: medium-dark skin tone # E4.0 [1] (💁🏾♂️) +1F481 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman tipping hand: dark skin tone # E4.0 [1] (💁🏿♀️) +1F481 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man tipping hand: dark skin tone # E4.0 [1] (💁🏿♂️) +1F482 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman guard # E4.0 [1] (💂♀️) +1F482 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man guard # E4.0 [1] (💂♂️) +1F482 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman guard: light skin tone # E4.0 [1] (💂🏻♀️) +1F482 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man guard: light skin tone # E4.0 [1] (💂🏻♂️) +1F482 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman guard: medium-light skin tone # E4.0 [1] (💂🏼♀️) +1F482 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man guard: medium-light skin tone # E4.0 [1] (💂🏼♂️) +1F482 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman guard: medium skin tone # E4.0 [1] (💂🏽♀️) +1F482 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man guard: medium skin tone # E4.0 [1] (💂🏽♂️) +1F482 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman guard: medium-dark skin tone # E4.0 [1] (💂🏾♀️) +1F482 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man guard: medium-dark skin tone # E4.0 [1] (💂🏾♂️) +1F482 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman guard: dark skin tone # E4.0 [1] (💂🏿♀️) +1F482 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man guard: dark skin tone # E4.0 [1] (💂🏿♂️) +1F486 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman getting massage # E4.0 [1] (💆♀️) +1F486 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man getting massage # E4.0 [1] (💆♂️) +1F486 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman getting massage: light skin tone # E4.0 [1] (💆🏻♀️) +1F486 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man getting massage: light skin tone # E4.0 [1] (💆🏻♂️) +1F486 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman getting massage: medium-light skin tone # E4.0 [1] (💆🏼♀️) +1F486 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man getting massage: medium-light skin tone # E4.0 [1] (💆🏼♂️) +1F486 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman getting massage: medium skin tone # E4.0 [1] (💆🏽♀️) +1F486 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man getting massage: medium skin tone # E4.0 [1] (💆🏽♂️) +1F486 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman getting massage: medium-dark skin tone # E4.0 [1] (💆🏾♀️) +1F486 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man getting massage: medium-dark skin tone # E4.0 [1] (💆🏾♂️) +1F486 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman getting massage: dark skin tone # E4.0 [1] (💆🏿♀️) +1F486 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man getting massage: dark skin tone # E4.0 [1] (💆🏿♂️) +1F487 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman getting haircut # E4.0 [1] (💇♀️) +1F487 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man getting haircut # E4.0 [1] (💇♂️) +1F487 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman getting haircut: light skin tone # E4.0 [1] (💇🏻♀️) +1F487 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man getting haircut: light skin tone # E4.0 [1] (💇🏻♂️) +1F487 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman getting haircut: medium-light skin tone # E4.0 [1] (💇🏼♀️) +1F487 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man getting haircut: medium-light skin tone # E4.0 [1] (💇🏼♂️) +1F487 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman getting haircut: medium skin tone # E4.0 [1] (💇🏽♀️) +1F487 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man getting haircut: medium skin tone # E4.0 [1] (💇🏽♂️) +1F487 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman getting haircut: medium-dark skin tone # E4.0 [1] (💇🏾♀️) +1F487 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man getting haircut: medium-dark skin tone # E4.0 [1] (💇🏾♂️) +1F487 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman getting haircut: dark skin tone # E4.0 [1] (💇🏿♀️) +1F487 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man getting haircut: dark skin tone # E4.0 [1] (💇🏿♂️) +1F575 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman detective: light skin tone # E4.0 [1] (🕵🏻♀️) +1F575 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man detective: light skin tone # E4.0 [1] (🕵🏻♂️) +1F575 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman detective: medium-light skin tone # E4.0 [1] (🕵🏼♀️) +1F575 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man detective: medium-light skin tone # E4.0 [1] (🕵🏼♂️) +1F575 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman detective: medium skin tone # E4.0 [1] (🕵🏽♀️) +1F575 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man detective: medium skin tone # E4.0 [1] (🕵🏽♂️) +1F575 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman detective: medium-dark skin tone # E4.0 [1] (🕵🏾♀️) +1F575 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man detective: medium-dark skin tone # E4.0 [1] (🕵🏾♂️) +1F575 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman detective: dark skin tone # E4.0 [1] (🕵🏿♀️) +1F575 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man detective: dark skin tone # E4.0 [1] (🕵🏿♂️) +1F575 FE0F 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman detective # E4.0 [1] (🕵️♀️) +1F575 FE0F 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man detective # E4.0 [1] (🕵️♂️) +1F645 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman gesturing NO # E4.0 [1] (🙅♀️) +1F645 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man gesturing NO # E4.0 [1] (🙅♂️) +1F645 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman gesturing NO: light skin tone # E4.0 [1] (🙅🏻♀️) +1F645 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man gesturing NO: light skin tone # E4.0 [1] (🙅🏻♂️) +1F645 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman gesturing NO: medium-light skin tone # E4.0 [1] (🙅🏼♀️) +1F645 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man gesturing NO: medium-light skin tone # E4.0 [1] (🙅🏼♂️) +1F645 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman gesturing NO: medium skin tone # E4.0 [1] (🙅🏽♀️) +1F645 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man gesturing NO: medium skin tone # E4.0 [1] (🙅🏽♂️) +1F645 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman gesturing NO: medium-dark skin tone # E4.0 [1] (🙅🏾♀️) +1F645 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man gesturing NO: medium-dark skin tone # E4.0 [1] (🙅🏾♂️) +1F645 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman gesturing NO: dark skin tone # E4.0 [1] (🙅🏿♀️) +1F645 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man gesturing NO: dark skin tone # E4.0 [1] (🙅🏿♂️) +1F646 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman gesturing OK # E4.0 [1] (🙆♀️) +1F646 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man gesturing OK # E4.0 [1] (🙆♂️) +1F646 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman gesturing OK: light skin tone # E4.0 [1] (🙆🏻♀️) +1F646 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man gesturing OK: light skin tone # E4.0 [1] (🙆🏻♂️) +1F646 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman gesturing OK: medium-light skin tone # E4.0 [1] (🙆🏼♀️) +1F646 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man gesturing OK: medium-light skin tone # E4.0 [1] (🙆🏼♂️) +1F646 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman gesturing OK: medium skin tone # E4.0 [1] (🙆🏽♀️) +1F646 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man gesturing OK: medium skin tone # E4.0 [1] (🙆🏽♂️) +1F646 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman gesturing OK: medium-dark skin tone # E4.0 [1] (🙆🏾♀️) +1F646 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man gesturing OK: medium-dark skin tone # E4.0 [1] (🙆🏾♂️) +1F646 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman gesturing OK: dark skin tone # E4.0 [1] (🙆🏿♀️) +1F646 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man gesturing OK: dark skin tone # E4.0 [1] (🙆🏿♂️) +1F647 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman bowing # E4.0 [1] (🙇♀️) +1F647 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man bowing # E4.0 [1] (🙇♂️) +1F647 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman bowing: light skin tone # E4.0 [1] (🙇🏻♀️) +1F647 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man bowing: light skin tone # E4.0 [1] (🙇🏻♂️) +1F647 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman bowing: medium-light skin tone # E4.0 [1] (🙇🏼♀️) +1F647 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man bowing: medium-light skin tone # E4.0 [1] (🙇🏼♂️) +1F647 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman bowing: medium skin tone # E4.0 [1] (🙇🏽♀️) +1F647 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man bowing: medium skin tone # E4.0 [1] (🙇🏽♂️) +1F647 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman bowing: medium-dark skin tone # E4.0 [1] (🙇🏾♀️) +1F647 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man bowing: medium-dark skin tone # E4.0 [1] (🙇🏾♂️) +1F647 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman bowing: dark skin tone # E4.0 [1] (🙇🏿♀️) +1F647 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man bowing: dark skin tone # E4.0 [1] (🙇🏿♂️) +1F64B 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman raising hand # E4.0 [1] (🙋♀️) +1F64B 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man raising hand # E4.0 [1] (🙋♂️) +1F64B 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman raising hand: light skin tone # E4.0 [1] (🙋🏻♀️) +1F64B 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man raising hand: light skin tone # E4.0 [1] (🙋🏻♂️) +1F64B 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman raising hand: medium-light skin tone # E4.0 [1] (🙋🏼♀️) +1F64B 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man raising hand: medium-light skin tone # E4.0 [1] (🙋🏼♂️) +1F64B 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman raising hand: medium skin tone # E4.0 [1] (🙋🏽♀️) +1F64B 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man raising hand: medium skin tone # E4.0 [1] (🙋🏽♂️) +1F64B 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman raising hand: medium-dark skin tone # E4.0 [1] (🙋🏾♀️) +1F64B 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man raising hand: medium-dark skin tone # E4.0 [1] (🙋🏾♂️) +1F64B 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman raising hand: dark skin tone # E4.0 [1] (🙋🏿♀️) +1F64B 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man raising hand: dark skin tone # E4.0 [1] (🙋🏿♂️) +1F64D 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman frowning # E4.0 [1] (🙍♀️) +1F64D 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man frowning # E4.0 [1] (🙍♂️) +1F64D 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman frowning: light skin tone # E4.0 [1] (🙍🏻♀️) +1F64D 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man frowning: light skin tone # E4.0 [1] (🙍🏻♂️) +1F64D 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman frowning: medium-light skin tone # E4.0 [1] (🙍🏼♀️) +1F64D 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man frowning: medium-light skin tone # E4.0 [1] (🙍🏼♂️) +1F64D 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman frowning: medium skin tone # E4.0 [1] (🙍🏽♀️) +1F64D 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man frowning: medium skin tone # E4.0 [1] (🙍🏽♂️) +1F64D 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman frowning: medium-dark skin tone # E4.0 [1] (🙍🏾♀️) +1F64D 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man frowning: medium-dark skin tone # E4.0 [1] (🙍🏾♂️) +1F64D 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman frowning: dark skin tone # E4.0 [1] (🙍🏿♀️) +1F64D 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man frowning: dark skin tone # E4.0 [1] (🙍🏿♂️) +1F64E 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman pouting # E4.0 [1] (🙎♀️) +1F64E 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man pouting # E4.0 [1] (🙎♂️) +1F64E 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman pouting: light skin tone # E4.0 [1] (🙎🏻♀️) +1F64E 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man pouting: light skin tone # E4.0 [1] (🙎🏻♂️) +1F64E 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman pouting: medium-light skin tone # E4.0 [1] (🙎🏼♀️) +1F64E 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man pouting: medium-light skin tone # E4.0 [1] (🙎🏼♂️) +1F64E 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman pouting: medium skin tone # E4.0 [1] (🙎🏽♀️) +1F64E 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man pouting: medium skin tone # E4.0 [1] (🙎🏽♂️) +1F64E 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman pouting: medium-dark skin tone # E4.0 [1] (🙎🏾♀️) +1F64E 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man pouting: medium-dark skin tone # E4.0 [1] (🙎🏾♂️) +1F64E 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman pouting: dark skin tone # E4.0 [1] (🙎🏿♀️) +1F64E 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man pouting: dark skin tone # E4.0 [1] (🙎🏿♂️) +1F6A3 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman rowing boat # E4.0 [1] (🚣♀️) +1F6A3 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man rowing boat # E4.0 [1] (🚣♂️) +1F6A3 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman rowing boat: light skin tone # E4.0 [1] (🚣🏻♀️) +1F6A3 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man rowing boat: light skin tone # E4.0 [1] (🚣🏻♂️) +1F6A3 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman rowing boat: medium-light skin tone # E4.0 [1] (🚣🏼♀️) +1F6A3 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man rowing boat: medium-light skin tone # E4.0 [1] (🚣🏼♂️) +1F6A3 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman rowing boat: medium skin tone # E4.0 [1] (🚣🏽♀️) +1F6A3 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man rowing boat: medium skin tone # E4.0 [1] (🚣🏽♂️) +1F6A3 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman rowing boat: medium-dark skin tone # E4.0 [1] (🚣🏾♀️) +1F6A3 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man rowing boat: medium-dark skin tone # E4.0 [1] (🚣🏾♂️) +1F6A3 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman rowing boat: dark skin tone # E4.0 [1] (🚣🏿♀️) +1F6A3 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man rowing boat: dark skin tone # E4.0 [1] (🚣🏿♂️) +1F6B4 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman biking # E4.0 [1] (🚴♀️) +1F6B4 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man biking # E4.0 [1] (🚴♂️) +1F6B4 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman biking: light skin tone # E4.0 [1] (🚴🏻♀️) +1F6B4 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man biking: light skin tone # E4.0 [1] (🚴🏻♂️) +1F6B4 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman biking: medium-light skin tone # E4.0 [1] (🚴🏼♀️) +1F6B4 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man biking: medium-light skin tone # E4.0 [1] (🚴🏼♂️) +1F6B4 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman biking: medium skin tone # E4.0 [1] (🚴🏽♀️) +1F6B4 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man biking: medium skin tone # E4.0 [1] (🚴🏽♂️) +1F6B4 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman biking: medium-dark skin tone # E4.0 [1] (🚴🏾♀️) +1F6B4 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man biking: medium-dark skin tone # E4.0 [1] (🚴🏾♂️) +1F6B4 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman biking: dark skin tone # E4.0 [1] (🚴🏿♀️) +1F6B4 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man biking: dark skin tone # E4.0 [1] (🚴🏿♂️) +1F6B5 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman mountain biking # E4.0 [1] (🚵♀️) +1F6B5 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man mountain biking # E4.0 [1] (🚵♂️) +1F6B5 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman mountain biking: light skin tone # E4.0 [1] (🚵🏻♀️) +1F6B5 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man mountain biking: light skin tone # E4.0 [1] (🚵🏻♂️) +1F6B5 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman mountain biking: medium-light skin tone # E4.0 [1] (🚵🏼♀️) +1F6B5 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man mountain biking: medium-light skin tone # E4.0 [1] (🚵🏼♂️) +1F6B5 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman mountain biking: medium skin tone # E4.0 [1] (🚵🏽♀️) +1F6B5 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man mountain biking: medium skin tone # E4.0 [1] (🚵🏽♂️) +1F6B5 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman mountain biking: medium-dark skin tone # E4.0 [1] (🚵🏾♀️) +1F6B5 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man mountain biking: medium-dark skin tone # E4.0 [1] (🚵🏾♂️) +1F6B5 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman mountain biking: dark skin tone # E4.0 [1] (🚵🏿♀️) +1F6B5 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man mountain biking: dark skin tone # E4.0 [1] (🚵🏿♂️) +1F6B6 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman walking # E4.0 [1] (🚶♀️) +1F6B6 200D 2640 FE0F 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman walking facing right # E15.1 [1] (🚶♀️➡️) +1F6B6 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man walking # E4.0 [1] (🚶♂️) +1F6B6 200D 2642 FE0F 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; man walking facing right # E15.1 [1] (🚶♂️➡️) +1F6B6 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman walking: light skin tone # E4.0 [1] (🚶🏻♀️) +1F6B6 1F3FB 200D 2640 FE0F 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman walking facing right: light skin tone # E15.1 [1] (🚶🏻♀️➡️) +1F6B6 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man walking: light skin tone # E4.0 [1] (🚶🏻♂️) +1F6B6 1F3FB 200D 2642 FE0F 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; man walking facing right: light skin tone # E15.1 [1] (🚶🏻♂️➡️) +1F6B6 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman walking: medium-light skin tone # E4.0 [1] (🚶🏼♀️) +1F6B6 1F3FC 200D 2640 FE0F 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman walking facing right: medium-light skin tone # E15.1 [1] (🚶🏼♀️➡️) +1F6B6 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man walking: medium-light skin tone # E4.0 [1] (🚶🏼♂️) +1F6B6 1F3FC 200D 2642 FE0F 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; man walking facing right: medium-light skin tone # E15.1 [1] (🚶🏼♂️➡️) +1F6B6 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman walking: medium skin tone # E4.0 [1] (🚶🏽♀️) +1F6B6 1F3FD 200D 2640 FE0F 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman walking facing right: medium skin tone # E15.1 [1] (🚶🏽♀️➡️) +1F6B6 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man walking: medium skin tone # E4.0 [1] (🚶🏽♂️) +1F6B6 1F3FD 200D 2642 FE0F 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; man walking facing right: medium skin tone # E15.1 [1] (🚶🏽♂️➡️) +1F6B6 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman walking: medium-dark skin tone # E4.0 [1] (🚶🏾♀️) +1F6B6 1F3FE 200D 2640 FE0F 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman walking facing right: medium-dark skin tone # E15.1 [1] (🚶🏾♀️➡️) +1F6B6 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man walking: medium-dark skin tone # E4.0 [1] (🚶🏾♂️) +1F6B6 1F3FE 200D 2642 FE0F 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; man walking facing right: medium-dark skin tone # E15.1 [1] (🚶🏾♂️➡️) +1F6B6 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman walking: dark skin tone # E4.0 [1] (🚶🏿♀️) +1F6B6 1F3FF 200D 2640 FE0F 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman walking facing right: dark skin tone # E15.1 [1] (🚶🏿♀️➡️) +1F6B6 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man walking: dark skin tone # E4.0 [1] (🚶🏿♂️) +1F6B6 1F3FF 200D 2642 FE0F 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; man walking facing right: dark skin tone # E15.1 [1] (🚶🏿♂️➡️) +1F926 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman facepalming # E4.0 [1] (🤦♀️) +1F926 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man facepalming # E4.0 [1] (🤦♂️) +1F926 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman facepalming: light skin tone # E4.0 [1] (🤦🏻♀️) +1F926 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man facepalming: light skin tone # E4.0 [1] (🤦🏻♂️) +1F926 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman facepalming: medium-light skin tone # E4.0 [1] (🤦🏼♀️) +1F926 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man facepalming: medium-light skin tone # E4.0 [1] (🤦🏼♂️) +1F926 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman facepalming: medium skin tone # E4.0 [1] (🤦🏽♀️) +1F926 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man facepalming: medium skin tone # E4.0 [1] (🤦🏽♂️) +1F926 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman facepalming: medium-dark skin tone # E4.0 [1] (🤦🏾♀️) +1F926 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man facepalming: medium-dark skin tone # E4.0 [1] (🤦🏾♂️) +1F926 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman facepalming: dark skin tone # E4.0 [1] (🤦🏿♀️) +1F926 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man facepalming: dark skin tone # E4.0 [1] (🤦🏿♂️) +1F935 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman in tuxedo # E13.0 [1] (🤵♀️) +1F935 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man in tuxedo # E13.0 [1] (🤵♂️) +1F935 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman in tuxedo: light skin tone # E13.0 [1] (🤵🏻♀️) +1F935 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man in tuxedo: light skin tone # E13.0 [1] (🤵🏻♂️) +1F935 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman in tuxedo: medium-light skin tone # E13.0 [1] (🤵🏼♀️) +1F935 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man in tuxedo: medium-light skin tone # E13.0 [1] (🤵🏼♂️) +1F935 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman in tuxedo: medium skin tone # E13.0 [1] (🤵🏽♀️) +1F935 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man in tuxedo: medium skin tone # E13.0 [1] (🤵🏽♂️) +1F935 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman in tuxedo: medium-dark skin tone # E13.0 [1] (🤵🏾♀️) +1F935 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man in tuxedo: medium-dark skin tone # E13.0 [1] (🤵🏾♂️) +1F935 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman in tuxedo: dark skin tone # E13.0 [1] (🤵🏿♀️) +1F935 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man in tuxedo: dark skin tone # E13.0 [1] (🤵🏿♂️) +1F937 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman shrugging # E4.0 [1] (🤷♀️) +1F937 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man shrugging # E4.0 [1] (🤷♂️) +1F937 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman shrugging: light skin tone # E4.0 [1] (🤷🏻♀️) +1F937 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man shrugging: light skin tone # E4.0 [1] (🤷🏻♂️) +1F937 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman shrugging: medium-light skin tone # E4.0 [1] (🤷🏼♀️) +1F937 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man shrugging: medium-light skin tone # E4.0 [1] (🤷🏼♂️) +1F937 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman shrugging: medium skin tone # E4.0 [1] (🤷🏽♀️) +1F937 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man shrugging: medium skin tone # E4.0 [1] (🤷🏽♂️) +1F937 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman shrugging: medium-dark skin tone # E4.0 [1] (🤷🏾♀️) +1F937 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man shrugging: medium-dark skin tone # E4.0 [1] (🤷🏾♂️) +1F937 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman shrugging: dark skin tone # E4.0 [1] (🤷🏿♀️) +1F937 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man shrugging: dark skin tone # E4.0 [1] (🤷🏿♂️) +1F938 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman cartwheeling # E4.0 [1] (🤸♀️) +1F938 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man cartwheeling # E4.0 [1] (🤸♂️) +1F938 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman cartwheeling: light skin tone # E4.0 [1] (🤸🏻♀️) +1F938 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man cartwheeling: light skin tone # E4.0 [1] (🤸🏻♂️) +1F938 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman cartwheeling: medium-light skin tone # E4.0 [1] (🤸🏼♀️) +1F938 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man cartwheeling: medium-light skin tone # E4.0 [1] (🤸🏼♂️) +1F938 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman cartwheeling: medium skin tone # E4.0 [1] (🤸🏽♀️) +1F938 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man cartwheeling: medium skin tone # E4.0 [1] (🤸🏽♂️) +1F938 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman cartwheeling: medium-dark skin tone # E4.0 [1] (🤸🏾♀️) +1F938 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man cartwheeling: medium-dark skin tone # E4.0 [1] (🤸🏾♂️) +1F938 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman cartwheeling: dark skin tone # E4.0 [1] (🤸🏿♀️) +1F938 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man cartwheeling: dark skin tone # E4.0 [1] (🤸🏿♂️) +1F939 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman juggling # E4.0 [1] (🤹♀️) +1F939 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man juggling # E4.0 [1] (🤹♂️) +1F939 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman juggling: light skin tone # E4.0 [1] (🤹🏻♀️) +1F939 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man juggling: light skin tone # E4.0 [1] (🤹🏻♂️) +1F939 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman juggling: medium-light skin tone # E4.0 [1] (🤹🏼♀️) +1F939 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man juggling: medium-light skin tone # E4.0 [1] (🤹🏼♂️) +1F939 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman juggling: medium skin tone # E4.0 [1] (🤹🏽♀️) +1F939 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man juggling: medium skin tone # E4.0 [1] (🤹🏽♂️) +1F939 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman juggling: medium-dark skin tone # E4.0 [1] (🤹🏾♀️) +1F939 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man juggling: medium-dark skin tone # E4.0 [1] (🤹🏾♂️) +1F939 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman juggling: dark skin tone # E4.0 [1] (🤹🏿♀️) +1F939 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man juggling: dark skin tone # E4.0 [1] (🤹🏿♂️) +1F93C 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; women wrestling # E4.0 [1] (🤼♀️) +1F93C 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; men wrestling # E4.0 [1] (🤼♂️) +1F93D 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman playing water polo # E4.0 [1] (🤽♀️) +1F93D 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man playing water polo # E4.0 [1] (🤽♂️) +1F93D 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman playing water polo: light skin tone # E4.0 [1] (🤽🏻♀️) +1F93D 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man playing water polo: light skin tone # E4.0 [1] (🤽🏻♂️) +1F93D 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman playing water polo: medium-light skin tone # E4.0 [1] (🤽🏼♀️) +1F93D 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man playing water polo: medium-light skin tone # E4.0 [1] (🤽🏼♂️) +1F93D 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman playing water polo: medium skin tone # E4.0 [1] (🤽🏽♀️) +1F93D 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man playing water polo: medium skin tone # E4.0 [1] (🤽🏽♂️) +1F93D 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman playing water polo: medium-dark skin tone # E4.0 [1] (🤽🏾♀️) +1F93D 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man playing water polo: medium-dark skin tone # E4.0 [1] (🤽🏾♂️) +1F93D 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman playing water polo: dark skin tone # E4.0 [1] (🤽🏿♀️) +1F93D 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man playing water polo: dark skin tone # E4.0 [1] (🤽🏿♂️) +1F93E 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman playing handball # E4.0 [1] (🤾♀️) +1F93E 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man playing handball # E4.0 [1] (🤾♂️) +1F93E 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman playing handball: light skin tone # E4.0 [1] (🤾🏻♀️) +1F93E 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man playing handball: light skin tone # E4.0 [1] (🤾🏻♂️) +1F93E 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman playing handball: medium-light skin tone # E4.0 [1] (🤾🏼♀️) +1F93E 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man playing handball: medium-light skin tone # E4.0 [1] (🤾🏼♂️) +1F93E 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman playing handball: medium skin tone # E4.0 [1] (🤾🏽♀️) +1F93E 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man playing handball: medium skin tone # E4.0 [1] (🤾🏽♂️) +1F93E 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman playing handball: medium-dark skin tone # E4.0 [1] (🤾🏾♀️) +1F93E 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man playing handball: medium-dark skin tone # E4.0 [1] (🤾🏾♂️) +1F93E 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman playing handball: dark skin tone # E4.0 [1] (🤾🏿♀️) +1F93E 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man playing handball: dark skin tone # E4.0 [1] (🤾🏿♂️) +1F9B8 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman superhero # E11.0 [1] (🦸♀️) +1F9B8 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man superhero # E11.0 [1] (🦸♂️) +1F9B8 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman superhero: light skin tone # E11.0 [1] (🦸🏻♀️) +1F9B8 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man superhero: light skin tone # E11.0 [1] (🦸🏻♂️) +1F9B8 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman superhero: medium-light skin tone # E11.0 [1] (🦸🏼♀️) +1F9B8 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man superhero: medium-light skin tone # E11.0 [1] (🦸🏼♂️) +1F9B8 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman superhero: medium skin tone # E11.0 [1] (🦸🏽♀️) +1F9B8 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man superhero: medium skin tone # E11.0 [1] (🦸🏽♂️) +1F9B8 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman superhero: medium-dark skin tone # E11.0 [1] (🦸🏾♀️) +1F9B8 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man superhero: medium-dark skin tone # E11.0 [1] (🦸🏾♂️) +1F9B8 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman superhero: dark skin tone # E11.0 [1] (🦸🏿♀️) +1F9B8 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man superhero: dark skin tone # E11.0 [1] (🦸🏿♂️) +1F9B9 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman supervillain # E11.0 [1] (🦹♀️) +1F9B9 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man supervillain # E11.0 [1] (🦹♂️) +1F9B9 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman supervillain: light skin tone # E11.0 [1] (🦹🏻♀️) +1F9B9 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man supervillain: light skin tone # E11.0 [1] (🦹🏻♂️) +1F9B9 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman supervillain: medium-light skin tone # E11.0 [1] (🦹🏼♀️) +1F9B9 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man supervillain: medium-light skin tone # E11.0 [1] (🦹🏼♂️) +1F9B9 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman supervillain: medium skin tone # E11.0 [1] (🦹🏽♀️) +1F9B9 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man supervillain: medium skin tone # E11.0 [1] (🦹🏽♂️) +1F9B9 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman supervillain: medium-dark skin tone # E11.0 [1] (🦹🏾♀️) +1F9B9 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man supervillain: medium-dark skin tone # E11.0 [1] (🦹🏾♂️) +1F9B9 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman supervillain: dark skin tone # E11.0 [1] (🦹🏿♀️) +1F9B9 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man supervillain: dark skin tone # E11.0 [1] (🦹🏿♂️) +1F9CD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman standing # E12.0 [1] (🧍♀️) +1F9CD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man standing # E12.0 [1] (🧍♂️) +1F9CD 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman standing: light skin tone # E12.0 [1] (🧍🏻♀️) +1F9CD 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man standing: light skin tone # E12.0 [1] (🧍🏻♂️) +1F9CD 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman standing: medium-light skin tone # E12.0 [1] (🧍🏼♀️) +1F9CD 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man standing: medium-light skin tone # E12.0 [1] (🧍🏼♂️) +1F9CD 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman standing: medium skin tone # E12.0 [1] (🧍🏽♀️) +1F9CD 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man standing: medium skin tone # E12.0 [1] (🧍🏽♂️) +1F9CD 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman standing: medium-dark skin tone # E12.0 [1] (🧍🏾♀️) +1F9CD 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man standing: medium-dark skin tone # E12.0 [1] (🧍🏾♂️) +1F9CD 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman standing: dark skin tone # E12.0 [1] (🧍🏿♀️) +1F9CD 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man standing: dark skin tone # E12.0 [1] (🧍🏿♂️) +1F9CE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman kneeling # E12.0 [1] (🧎♀️) +1F9CE 200D 2640 FE0F 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman kneeling facing right # E15.1 [1] (🧎♀️➡️) +1F9CE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man kneeling # E12.0 [1] (🧎♂️) +1F9CE 200D 2642 FE0F 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; man kneeling facing right # E15.1 [1] (🧎♂️➡️) +1F9CE 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman kneeling: light skin tone # E12.0 [1] (🧎🏻♀️) +1F9CE 1F3FB 200D 2640 FE0F 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman kneeling facing right: light skin tone # E15.1 [1] (🧎🏻♀️➡️) +1F9CE 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man kneeling: light skin tone # E12.0 [1] (🧎🏻♂️) +1F9CE 1F3FB 200D 2642 FE0F 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; man kneeling facing right: light skin tone # E15.1 [1] (🧎🏻♂️➡️) +1F9CE 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman kneeling: medium-light skin tone # E12.0 [1] (🧎🏼♀️) +1F9CE 1F3FC 200D 2640 FE0F 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman kneeling facing right: medium-light skin tone # E15.1 [1] (🧎🏼♀️➡️) +1F9CE 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man kneeling: medium-light skin tone # E12.0 [1] (🧎🏼♂️) +1F9CE 1F3FC 200D 2642 FE0F 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; man kneeling facing right: medium-light skin tone # E15.1 [1] (🧎🏼♂️➡️) +1F9CE 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman kneeling: medium skin tone # E12.0 [1] (🧎🏽♀️) +1F9CE 1F3FD 200D 2640 FE0F 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman kneeling facing right: medium skin tone # E15.1 [1] (🧎🏽♀️➡️) +1F9CE 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man kneeling: medium skin tone # E12.0 [1] (🧎🏽♂️) +1F9CE 1F3FD 200D 2642 FE0F 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; man kneeling facing right: medium skin tone # E15.1 [1] (🧎🏽♂️➡️) +1F9CE 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman kneeling: medium-dark skin tone # E12.0 [1] (🧎🏾♀️) +1F9CE 1F3FE 200D 2640 FE0F 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman kneeling facing right: medium-dark skin tone # E15.1 [1] (🧎🏾♀️➡️) +1F9CE 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man kneeling: medium-dark skin tone # E12.0 [1] (🧎🏾♂️) +1F9CE 1F3FE 200D 2642 FE0F 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; man kneeling facing right: medium-dark skin tone # E15.1 [1] (🧎🏾♂️➡️) +1F9CE 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman kneeling: dark skin tone # E12.0 [1] (🧎🏿♀️) +1F9CE 1F3FF 200D 2640 FE0F 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman kneeling facing right: dark skin tone # E15.1 [1] (🧎🏿♀️➡️) +1F9CE 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man kneeling: dark skin tone # E12.0 [1] (🧎🏿♂️) +1F9CE 1F3FF 200D 2642 FE0F 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; man kneeling facing right: dark skin tone # E15.1 [1] (🧎🏿♂️➡️) +1F9CF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; deaf woman # E12.0 [1] (🧏♀️) +1F9CF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; deaf man # E12.0 [1] (🧏♂️) +1F9CF 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; deaf woman: light skin tone # E12.0 [1] (🧏🏻♀️) +1F9CF 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; deaf man: light skin tone # E12.0 [1] (🧏🏻♂️) +1F9CF 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; deaf woman: medium-light skin tone # E12.0 [1] (🧏🏼♀️) +1F9CF 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; deaf man: medium-light skin tone # E12.0 [1] (🧏🏼♂️) +1F9CF 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; deaf woman: medium skin tone # E12.0 [1] (🧏🏽♀️) +1F9CF 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; deaf man: medium skin tone # E12.0 [1] (🧏🏽♂️) +1F9CF 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; deaf woman: medium-dark skin tone # E12.0 [1] (🧏🏾♀️) +1F9CF 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; deaf man: medium-dark skin tone # E12.0 [1] (🧏🏾♂️) +1F9CF 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; deaf woman: dark skin tone # E12.0 [1] (🧏🏿♀️) +1F9CF 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; deaf man: dark skin tone # E12.0 [1] (🧏🏿♂️) +1F9D4 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman: beard # E13.1 [1] (🧔♀️) +1F9D4 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man: beard # E13.1 [1] (🧔♂️) +1F9D4 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman: light skin tone, beard # E13.1 [1] (🧔🏻♀️) +1F9D4 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man: light skin tone, beard # E13.1 [1] (🧔🏻♂️) +1F9D4 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman: medium-light skin tone, beard # E13.1 [1] (🧔🏼♀️) +1F9D4 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man: medium-light skin tone, beard # E13.1 [1] (🧔🏼♂️) +1F9D4 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman: medium skin tone, beard # E13.1 [1] (🧔🏽♀️) +1F9D4 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man: medium skin tone, beard # E13.1 [1] (🧔🏽♂️) +1F9D4 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman: medium-dark skin tone, beard # E13.1 [1] (🧔🏾♀️) +1F9D4 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man: medium-dark skin tone, beard # E13.1 [1] (🧔🏾♂️) +1F9D4 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman: dark skin tone, beard # E13.1 [1] (🧔🏿♀️) +1F9D4 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man: dark skin tone, beard # E13.1 [1] (🧔🏿♂️) +1F9D6 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman in steamy room # E5.0 [1] (🧖♀️) +1F9D6 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man in steamy room # E5.0 [1] (🧖♂️) +1F9D6 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman in steamy room: light skin tone # E5.0 [1] (🧖🏻♀️) +1F9D6 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man in steamy room: light skin tone # E5.0 [1] (🧖🏻♂️) +1F9D6 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman in steamy room: medium-light skin tone # E5.0 [1] (🧖🏼♀️) +1F9D6 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man in steamy room: medium-light skin tone # E5.0 [1] (🧖🏼♂️) +1F9D6 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman in steamy room: medium skin tone # E5.0 [1] (🧖🏽♀️) +1F9D6 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man in steamy room: medium skin tone # E5.0 [1] (🧖🏽♂️) +1F9D6 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman in steamy room: medium-dark skin tone # E5.0 [1] (🧖🏾♀️) +1F9D6 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man in steamy room: medium-dark skin tone # E5.0 [1] (🧖🏾♂️) +1F9D6 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman in steamy room: dark skin tone # E5.0 [1] (🧖🏿♀️) +1F9D6 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man in steamy room: dark skin tone # E5.0 [1] (🧖🏿♂️) +1F9D7 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman climbing # E5.0 [1] (🧗♀️) +1F9D7 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man climbing # E5.0 [1] (🧗♂️) +1F9D7 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman climbing: light skin tone # E5.0 [1] (🧗🏻♀️) +1F9D7 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man climbing: light skin tone # E5.0 [1] (🧗🏻♂️) +1F9D7 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman climbing: medium-light skin tone # E5.0 [1] (🧗🏼♀️) +1F9D7 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man climbing: medium-light skin tone # E5.0 [1] (🧗🏼♂️) +1F9D7 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman climbing: medium skin tone # E5.0 [1] (🧗🏽♀️) +1F9D7 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man climbing: medium skin tone # E5.0 [1] (🧗🏽♂️) +1F9D7 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman climbing: medium-dark skin tone # E5.0 [1] (🧗🏾♀️) +1F9D7 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man climbing: medium-dark skin tone # E5.0 [1] (🧗🏾♂️) +1F9D7 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman climbing: dark skin tone # E5.0 [1] (🧗🏿♀️) +1F9D7 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man climbing: dark skin tone # E5.0 [1] (🧗🏿♂️) +1F9D8 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman in lotus position # E5.0 [1] (🧘♀️) +1F9D8 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man in lotus position # E5.0 [1] (🧘♂️) +1F9D8 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman in lotus position: light skin tone # E5.0 [1] (🧘🏻♀️) +1F9D8 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man in lotus position: light skin tone # E5.0 [1] (🧘🏻♂️) +1F9D8 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman in lotus position: medium-light skin tone # E5.0 [1] (🧘🏼♀️) +1F9D8 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man in lotus position: medium-light skin tone # E5.0 [1] (🧘🏼♂️) +1F9D8 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman in lotus position: medium skin tone # E5.0 [1] (🧘🏽♀️) +1F9D8 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man in lotus position: medium skin tone # E5.0 [1] (🧘🏽♂️) +1F9D8 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman in lotus position: medium-dark skin tone # E5.0 [1] (🧘🏾♀️) +1F9D8 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man in lotus position: medium-dark skin tone # E5.0 [1] (🧘🏾♂️) +1F9D8 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman in lotus position: dark skin tone # E5.0 [1] (🧘🏿♀️) +1F9D8 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man in lotus position: dark skin tone # E5.0 [1] (🧘🏿♂️) +1F9D9 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman mage # E5.0 [1] (🧙♀️) +1F9D9 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man mage # E5.0 [1] (🧙♂️) +1F9D9 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman mage: light skin tone # E5.0 [1] (🧙🏻♀️) +1F9D9 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man mage: light skin tone # E5.0 [1] (🧙🏻♂️) +1F9D9 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman mage: medium-light skin tone # E5.0 [1] (🧙🏼♀️) +1F9D9 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man mage: medium-light skin tone # E5.0 [1] (🧙🏼♂️) +1F9D9 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman mage: medium skin tone # E5.0 [1] (🧙🏽♀️) +1F9D9 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man mage: medium skin tone # E5.0 [1] (🧙🏽♂️) +1F9D9 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman mage: medium-dark skin tone # E5.0 [1] (🧙🏾♀️) +1F9D9 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man mage: medium-dark skin tone # E5.0 [1] (🧙🏾♂️) +1F9D9 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman mage: dark skin tone # E5.0 [1] (🧙🏿♀️) +1F9D9 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man mage: dark skin tone # E5.0 [1] (🧙🏿♂️) +1F9DA 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman fairy # E5.0 [1] (🧚♀️) +1F9DA 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man fairy # E5.0 [1] (🧚♂️) +1F9DA 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman fairy: light skin tone # E5.0 [1] (🧚🏻♀️) +1F9DA 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man fairy: light skin tone # E5.0 [1] (🧚🏻♂️) +1F9DA 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman fairy: medium-light skin tone # E5.0 [1] (🧚🏼♀️) +1F9DA 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man fairy: medium-light skin tone # E5.0 [1] (🧚🏼♂️) +1F9DA 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman fairy: medium skin tone # E5.0 [1] (🧚🏽♀️) +1F9DA 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man fairy: medium skin tone # E5.0 [1] (🧚🏽♂️) +1F9DA 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman fairy: medium-dark skin tone # E5.0 [1] (🧚🏾♀️) +1F9DA 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man fairy: medium-dark skin tone # E5.0 [1] (🧚🏾♂️) +1F9DA 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman fairy: dark skin tone # E5.0 [1] (🧚🏿♀️) +1F9DA 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man fairy: dark skin tone # E5.0 [1] (🧚🏿♂️) +1F9DB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman vampire # E5.0 [1] (🧛♀️) +1F9DB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man vampire # E5.0 [1] (🧛♂️) +1F9DB 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman vampire: light skin tone # E5.0 [1] (🧛🏻♀️) +1F9DB 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man vampire: light skin tone # E5.0 [1] (🧛🏻♂️) +1F9DB 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman vampire: medium-light skin tone # E5.0 [1] (🧛🏼♀️) +1F9DB 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man vampire: medium-light skin tone # E5.0 [1] (🧛🏼♂️) +1F9DB 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman vampire: medium skin tone # E5.0 [1] (🧛🏽♀️) +1F9DB 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man vampire: medium skin tone # E5.0 [1] (🧛🏽♂️) +1F9DB 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman vampire: medium-dark skin tone # E5.0 [1] (🧛🏾♀️) +1F9DB 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man vampire: medium-dark skin tone # E5.0 [1] (🧛🏾♂️) +1F9DB 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman vampire: dark skin tone # E5.0 [1] (🧛🏿♀️) +1F9DB 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man vampire: dark skin tone # E5.0 [1] (🧛🏿♂️) +1F9DC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; mermaid # E5.0 [1] (🧜♀️) +1F9DC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; merman # E5.0 [1] (🧜♂️) +1F9DC 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; mermaid: light skin tone # E5.0 [1] (🧜🏻♀️) +1F9DC 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; merman: light skin tone # E5.0 [1] (🧜🏻♂️) +1F9DC 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; mermaid: medium-light skin tone # E5.0 [1] (🧜🏼♀️) +1F9DC 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; merman: medium-light skin tone # E5.0 [1] (🧜🏼♂️) +1F9DC 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; mermaid: medium skin tone # E5.0 [1] (🧜🏽♀️) +1F9DC 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; merman: medium skin tone # E5.0 [1] (🧜🏽♂️) +1F9DC 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; mermaid: medium-dark skin tone # E5.0 [1] (🧜🏾♀️) +1F9DC 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; merman: medium-dark skin tone # E5.0 [1] (🧜🏾♂️) +1F9DC 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; mermaid: dark skin tone # E5.0 [1] (🧜🏿♀️) +1F9DC 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; merman: dark skin tone # E5.0 [1] (🧜🏿♂️) +1F9DD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman elf # E5.0 [1] (🧝♀️) +1F9DD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man elf # E5.0 [1] (🧝♂️) +1F9DD 1F3FB 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman elf: light skin tone # E5.0 [1] (🧝🏻♀️) +1F9DD 1F3FB 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man elf: light skin tone # E5.0 [1] (🧝🏻♂️) +1F9DD 1F3FC 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman elf: medium-light skin tone # E5.0 [1] (🧝🏼♀️) +1F9DD 1F3FC 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man elf: medium-light skin tone # E5.0 [1] (🧝🏼♂️) +1F9DD 1F3FD 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman elf: medium skin tone # E5.0 [1] (🧝🏽♀️) +1F9DD 1F3FD 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man elf: medium skin tone # E5.0 [1] (🧝🏽♂️) +1F9DD 1F3FE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman elf: medium-dark skin tone # E5.0 [1] (🧝🏾♀️) +1F9DD 1F3FE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man elf: medium-dark skin tone # E5.0 [1] (🧝🏾♂️) +1F9DD 1F3FF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman elf: dark skin tone # E5.0 [1] (🧝🏿♀️) +1F9DD 1F3FF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man elf: dark skin tone # E5.0 [1] (🧝🏿♂️) +1F9DE 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman genie # E5.0 [1] (🧞♀️) +1F9DE 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man genie # E5.0 [1] (🧞♂️) +1F9DF 200D 2640 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman zombie # E5.0 [1] (🧟♀️) +1F9DF 200D 2642 FE0F ; RGI_Emoji_ZWJ_Sequence ; man zombie # E5.0 [1] (🧟♂️) + +# Total elements: 608 + +# ================================================ + +# RGI_Emoji_ZWJ_Sequence: Hair + +1F468 200D 1F9B0 ; RGI_Emoji_ZWJ_Sequence ; man: red hair # E11.0 [1] (👨🦰) +1F468 200D 1F9B1 ; RGI_Emoji_ZWJ_Sequence ; man: curly hair # E11.0 [1] (👨🦱) +1F468 200D 1F9B2 ; RGI_Emoji_ZWJ_Sequence ; man: bald # E11.0 [1] (👨🦲) +1F468 200D 1F9B3 ; RGI_Emoji_ZWJ_Sequence ; man: white hair # E11.0 [1] (👨🦳) +1F468 1F3FB 200D 1F9B0 ; RGI_Emoji_ZWJ_Sequence ; man: light skin tone, red hair # E11.0 [1] (👨🏻🦰) +1F468 1F3FB 200D 1F9B1 ; RGI_Emoji_ZWJ_Sequence ; man: light skin tone, curly hair # E11.0 [1] (👨🏻🦱) +1F468 1F3FB 200D 1F9B2 ; RGI_Emoji_ZWJ_Sequence ; man: light skin tone, bald # E11.0 [1] (👨🏻🦲) +1F468 1F3FB 200D 1F9B3 ; RGI_Emoji_ZWJ_Sequence ; man: light skin tone, white hair # E11.0 [1] (👨🏻🦳) +1F468 1F3FC 200D 1F9B0 ; RGI_Emoji_ZWJ_Sequence ; man: medium-light skin tone, red hair # E11.0 [1] (👨🏼🦰) +1F468 1F3FC 200D 1F9B1 ; RGI_Emoji_ZWJ_Sequence ; man: medium-light skin tone, curly hair # E11.0 [1] (👨🏼🦱) +1F468 1F3FC 200D 1F9B2 ; RGI_Emoji_ZWJ_Sequence ; man: medium-light skin tone, bald # E11.0 [1] (👨🏼🦲) +1F468 1F3FC 200D 1F9B3 ; RGI_Emoji_ZWJ_Sequence ; man: medium-light skin tone, white hair # E11.0 [1] (👨🏼🦳) +1F468 1F3FD 200D 1F9B0 ; RGI_Emoji_ZWJ_Sequence ; man: medium skin tone, red hair # E11.0 [1] (👨🏽🦰) +1F468 1F3FD 200D 1F9B1 ; RGI_Emoji_ZWJ_Sequence ; man: medium skin tone, curly hair # E11.0 [1] (👨🏽🦱) +1F468 1F3FD 200D 1F9B2 ; RGI_Emoji_ZWJ_Sequence ; man: medium skin tone, bald # E11.0 [1] (👨🏽🦲) +1F468 1F3FD 200D 1F9B3 ; RGI_Emoji_ZWJ_Sequence ; man: medium skin tone, white hair # E11.0 [1] (👨🏽🦳) +1F468 1F3FE 200D 1F9B0 ; RGI_Emoji_ZWJ_Sequence ; man: medium-dark skin tone, red hair # E11.0 [1] (👨🏾🦰) +1F468 1F3FE 200D 1F9B1 ; RGI_Emoji_ZWJ_Sequence ; man: medium-dark skin tone, curly hair # E11.0 [1] (👨🏾🦱) +1F468 1F3FE 200D 1F9B2 ; RGI_Emoji_ZWJ_Sequence ; man: medium-dark skin tone, bald # E11.0 [1] (👨🏾🦲) +1F468 1F3FE 200D 1F9B3 ; RGI_Emoji_ZWJ_Sequence ; man: medium-dark skin tone, white hair # E11.0 [1] (👨🏾🦳) +1F468 1F3FF 200D 1F9B0 ; RGI_Emoji_ZWJ_Sequence ; man: dark skin tone, red hair # E11.0 [1] (👨🏿🦰) +1F468 1F3FF 200D 1F9B1 ; RGI_Emoji_ZWJ_Sequence ; man: dark skin tone, curly hair # E11.0 [1] (👨🏿🦱) +1F468 1F3FF 200D 1F9B2 ; RGI_Emoji_ZWJ_Sequence ; man: dark skin tone, bald # E11.0 [1] (👨🏿🦲) +1F468 1F3FF 200D 1F9B3 ; RGI_Emoji_ZWJ_Sequence ; man: dark skin tone, white hair # E11.0 [1] (👨🏿🦳) +1F469 200D 1F9B0 ; RGI_Emoji_ZWJ_Sequence ; woman: red hair # E11.0 [1] (👩🦰) +1F469 200D 1F9B1 ; RGI_Emoji_ZWJ_Sequence ; woman: curly hair # E11.0 [1] (👩🦱) +1F469 200D 1F9B2 ; RGI_Emoji_ZWJ_Sequence ; woman: bald # E11.0 [1] (👩🦲) +1F469 200D 1F9B3 ; RGI_Emoji_ZWJ_Sequence ; woman: white hair # E11.0 [1] (👩🦳) +1F469 1F3FB 200D 1F9B0 ; RGI_Emoji_ZWJ_Sequence ; woman: light skin tone, red hair # E11.0 [1] (👩🏻🦰) +1F469 1F3FB 200D 1F9B1 ; RGI_Emoji_ZWJ_Sequence ; woman: light skin tone, curly hair # E11.0 [1] (👩🏻🦱) +1F469 1F3FB 200D 1F9B2 ; RGI_Emoji_ZWJ_Sequence ; woman: light skin tone, bald # E11.0 [1] (👩🏻🦲) +1F469 1F3FB 200D 1F9B3 ; RGI_Emoji_ZWJ_Sequence ; woman: light skin tone, white hair # E11.0 [1] (👩🏻🦳) +1F469 1F3FC 200D 1F9B0 ; RGI_Emoji_ZWJ_Sequence ; woman: medium-light skin tone, red hair # E11.0 [1] (👩🏼🦰) +1F469 1F3FC 200D 1F9B1 ; RGI_Emoji_ZWJ_Sequence ; woman: medium-light skin tone, curly hair # E11.0 [1] (👩🏼🦱) +1F469 1F3FC 200D 1F9B2 ; RGI_Emoji_ZWJ_Sequence ; woman: medium-light skin tone, bald # E11.0 [1] (👩🏼🦲) +1F469 1F3FC 200D 1F9B3 ; RGI_Emoji_ZWJ_Sequence ; woman: medium-light skin tone, white hair # E11.0 [1] (👩🏼🦳) +1F469 1F3FD 200D 1F9B0 ; RGI_Emoji_ZWJ_Sequence ; woman: medium skin tone, red hair # E11.0 [1] (👩🏽🦰) +1F469 1F3FD 200D 1F9B1 ; RGI_Emoji_ZWJ_Sequence ; woman: medium skin tone, curly hair # E11.0 [1] (👩🏽🦱) +1F469 1F3FD 200D 1F9B2 ; RGI_Emoji_ZWJ_Sequence ; woman: medium skin tone, bald # E11.0 [1] (👩🏽🦲) +1F469 1F3FD 200D 1F9B3 ; RGI_Emoji_ZWJ_Sequence ; woman: medium skin tone, white hair # E11.0 [1] (👩🏽🦳) +1F469 1F3FE 200D 1F9B0 ; RGI_Emoji_ZWJ_Sequence ; woman: medium-dark skin tone, red hair # E11.0 [1] (👩🏾🦰) +1F469 1F3FE 200D 1F9B1 ; RGI_Emoji_ZWJ_Sequence ; woman: medium-dark skin tone, curly hair # E11.0 [1] (👩🏾🦱) +1F469 1F3FE 200D 1F9B2 ; RGI_Emoji_ZWJ_Sequence ; woman: medium-dark skin tone, bald # E11.0 [1] (👩🏾🦲) +1F469 1F3FE 200D 1F9B3 ; RGI_Emoji_ZWJ_Sequence ; woman: medium-dark skin tone, white hair # E11.0 [1] (👩🏾🦳) +1F469 1F3FF 200D 1F9B0 ; RGI_Emoji_ZWJ_Sequence ; woman: dark skin tone, red hair # E11.0 [1] (👩🏿🦰) +1F469 1F3FF 200D 1F9B1 ; RGI_Emoji_ZWJ_Sequence ; woman: dark skin tone, curly hair # E11.0 [1] (👩🏿🦱) +1F469 1F3FF 200D 1F9B2 ; RGI_Emoji_ZWJ_Sequence ; woman: dark skin tone, bald # E11.0 [1] (👩🏿🦲) +1F469 1F3FF 200D 1F9B3 ; RGI_Emoji_ZWJ_Sequence ; woman: dark skin tone, white hair # E11.0 [1] (👩🏿🦳) +1F9D1 200D 1F9B0 ; RGI_Emoji_ZWJ_Sequence ; person: red hair # E12.1 [1] (🧑🦰) +1F9D1 200D 1F9B1 ; RGI_Emoji_ZWJ_Sequence ; person: curly hair # E12.1 [1] (🧑🦱) +1F9D1 200D 1F9B2 ; RGI_Emoji_ZWJ_Sequence ; person: bald # E12.1 [1] (🧑🦲) +1F9D1 200D 1F9B3 ; RGI_Emoji_ZWJ_Sequence ; person: white hair # E12.1 [1] (🧑🦳) +1F9D1 1F3FB 200D 1F9B0 ; RGI_Emoji_ZWJ_Sequence ; person: light skin tone, red hair # E12.1 [1] (🧑🏻🦰) +1F9D1 1F3FB 200D 1F9B1 ; RGI_Emoji_ZWJ_Sequence ; person: light skin tone, curly hair # E12.1 [1] (🧑🏻🦱) +1F9D1 1F3FB 200D 1F9B2 ; RGI_Emoji_ZWJ_Sequence ; person: light skin tone, bald # E12.1 [1] (🧑🏻🦲) +1F9D1 1F3FB 200D 1F9B3 ; RGI_Emoji_ZWJ_Sequence ; person: light skin tone, white hair # E12.1 [1] (🧑🏻🦳) +1F9D1 1F3FC 200D 1F9B0 ; RGI_Emoji_ZWJ_Sequence ; person: medium-light skin tone, red hair # E12.1 [1] (🧑🏼🦰) +1F9D1 1F3FC 200D 1F9B1 ; RGI_Emoji_ZWJ_Sequence ; person: medium-light skin tone, curly hair # E12.1 [1] (🧑🏼🦱) +1F9D1 1F3FC 200D 1F9B2 ; RGI_Emoji_ZWJ_Sequence ; person: medium-light skin tone, bald # E12.1 [1] (🧑🏼🦲) +1F9D1 1F3FC 200D 1F9B3 ; RGI_Emoji_ZWJ_Sequence ; person: medium-light skin tone, white hair # E12.1 [1] (🧑🏼🦳) +1F9D1 1F3FD 200D 1F9B0 ; RGI_Emoji_ZWJ_Sequence ; person: medium skin tone, red hair # E12.1 [1] (🧑🏽🦰) +1F9D1 1F3FD 200D 1F9B1 ; RGI_Emoji_ZWJ_Sequence ; person: medium skin tone, curly hair # E12.1 [1] (🧑🏽🦱) +1F9D1 1F3FD 200D 1F9B2 ; RGI_Emoji_ZWJ_Sequence ; person: medium skin tone, bald # E12.1 [1] (🧑🏽🦲) +1F9D1 1F3FD 200D 1F9B3 ; RGI_Emoji_ZWJ_Sequence ; person: medium skin tone, white hair # E12.1 [1] (🧑🏽🦳) +1F9D1 1F3FE 200D 1F9B0 ; RGI_Emoji_ZWJ_Sequence ; person: medium-dark skin tone, red hair # E12.1 [1] (🧑🏾🦰) +1F9D1 1F3FE 200D 1F9B1 ; RGI_Emoji_ZWJ_Sequence ; person: medium-dark skin tone, curly hair # E12.1 [1] (🧑🏾🦱) +1F9D1 1F3FE 200D 1F9B2 ; RGI_Emoji_ZWJ_Sequence ; person: medium-dark skin tone, bald # E12.1 [1] (🧑🏾🦲) +1F9D1 1F3FE 200D 1F9B3 ; RGI_Emoji_ZWJ_Sequence ; person: medium-dark skin tone, white hair # E12.1 [1] (🧑🏾🦳) +1F9D1 1F3FF 200D 1F9B0 ; RGI_Emoji_ZWJ_Sequence ; person: dark skin tone, red hair # E12.1 [1] (🧑🏿🦰) +1F9D1 1F3FF 200D 1F9B1 ; RGI_Emoji_ZWJ_Sequence ; person: dark skin tone, curly hair # E12.1 [1] (🧑🏿🦱) +1F9D1 1F3FF 200D 1F9B2 ; RGI_Emoji_ZWJ_Sequence ; person: dark skin tone, bald # E12.1 [1] (🧑🏿🦲) +1F9D1 1F3FF 200D 1F9B3 ; RGI_Emoji_ZWJ_Sequence ; person: dark skin tone, white hair # E12.1 [1] (🧑🏿🦳) + +# Total elements: 72 + +# ================================================ + +# RGI_Emoji_ZWJ_Sequence: Other + +26D3 FE0F 200D 1F4A5 ; RGI_Emoji_ZWJ_Sequence ; broken chain # E15.1 [1] (⛓️💥) +2764 FE0F 200D 1F525 ; RGI_Emoji_ZWJ_Sequence ; heart on fire # E13.1 [1] (❤️🔥) +2764 FE0F 200D 1FA79 ; RGI_Emoji_ZWJ_Sequence ; mending heart # E13.1 [1] (❤️🩹) +1F344 200D 1F7EB ; RGI_Emoji_ZWJ_Sequence ; brown mushroom # E15.1 [1] (🍄🟫) +1F34B 200D 1F7E9 ; RGI_Emoji_ZWJ_Sequence ; lime # E15.1 [1] (🍋🟩) +1F3F3 FE0F 200D 26A7 FE0F ; RGI_Emoji_ZWJ_Sequence ; transgender flag # E13.0 [1] (🏳️⚧️) +1F3F3 FE0F 200D 1F308 ; RGI_Emoji_ZWJ_Sequence ; rainbow flag # E4.0 [1] (🏳️🌈) +1F3F4 200D 2620 FE0F ; RGI_Emoji_ZWJ_Sequence ; pirate flag # E11.0 [1] (🏴☠️) +1F408 200D 2B1B ; RGI_Emoji_ZWJ_Sequence ; black cat # E13.0 [1] (🐈⬛) +1F415 200D 1F9BA ; RGI_Emoji_ZWJ_Sequence ; service dog # E12.0 [1] (🐕🦺) +1F426 200D 2B1B ; RGI_Emoji_ZWJ_Sequence ; black bird # E15.0 [1] (🐦⬛) +1F426 200D 1F525 ; RGI_Emoji_ZWJ_Sequence ; phoenix # E15.1 [1] (🐦🔥) +1F43B 200D 2744 FE0F ; RGI_Emoji_ZWJ_Sequence ; polar bear # E13.0 [1] (🐻❄️) +1F441 FE0F 200D 1F5E8 FE0F ; RGI_Emoji_ZWJ_Sequence ; eye in speech bubble # E2.0 [1] (👁️🗨️) +1F62E 200D 1F4A8 ; RGI_Emoji_ZWJ_Sequence ; face exhaling # E13.1 [1] (😮💨) +1F635 200D 1F4AB ; RGI_Emoji_ZWJ_Sequence ; face with spiral eyes # E13.1 [1] (😵💫) +1F636 200D 1F32B FE0F ; RGI_Emoji_ZWJ_Sequence ; face in clouds # E13.1 [1] (😶🌫️) +1F642 200D 2194 FE0F ; RGI_Emoji_ZWJ_Sequence ; head shaking horizontally # E15.1 [1] (🙂↔️) +1F642 200D 2195 FE0F ; RGI_Emoji_ZWJ_Sequence ; head shaking vertically # E15.1 [1] (🙂↕️) + +# Total elements: 19 + +#EOF diff --git a/contrib/python/wcwidth/py2/tests/test_core.py b/contrib/python/wcwidth/py2/tests/test_core.py index c8f791c016..d2776cd992 100644 --- a/contrib/python/wcwidth/py2/tests/test_core.py +++ b/contrib/python/wcwidth/py2/tests/test_core.py @@ -10,6 +10,13 @@ except ImportError: # local import wcwidth +try: + # python 2 + _ = unichr +except NameError: + # python 3 + unichr = chr + def test_package_version(): """wcwidth.__version__ is expected value.""" @@ -23,6 +30,45 @@ def test_package_version(): assert result == expected +def test_empty_string(): + """ + Test empty string is OK. + + https://github.com/jquast/wcwidth/issues/24 + """ + phrase = "" + expect_length_each = 0 + expect_length_phrase = 0 + + # exercise, + length_each = wcwidth.wcwidth(phrase) + length_phrase = wcwidth.wcswidth(phrase) + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + + +def basic_string_type(): + """ + This is a python 2-specific test of the basic "string type" + + Such strings cannot contain anything but ascii in python2. + """ + # given, + phrase = 'hello\x00world' + expect_length_each = (1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1) + expect_length_phrase = sum(expect_length_each) + + # exercise, + length_each = tuple(map(wcwidth.wcwidth, phrase)) + length_phrase = wcwidth.wcswidth(phrase) + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + + def test_hello_jp(): u""" Width of Japanese phrase: コンニチハ, セカイ! @@ -59,9 +105,11 @@ def test_wcswidth_substr(): expect_length_phrase = sum(expect_length_each) # exercise, + length_each = tuple(map(wcwidth.wcwidth, phrase))[:end] length_phrase = wcwidth.wcswidth(phrase, end) # verify. + assert length_each == expect_length_each assert length_phrase == expect_length_phrase @@ -82,7 +130,15 @@ def test_null_width_0(): def test_control_c0_width_negative_1(): - """CSI (Control sequence initiate) reports width -1 for ESC.""" + """How the API reacts to CSI (Control sequence initiate). + + An example of bad fortune, this terminal sequence is a width of 0 + on all terminals, but wcwidth doesn't parse Control-Sequence-Inducer + (CSI) sequences. + + Also the "legacy" posix functions wcwidth and wcswidth return -1 for + any string containing the C1 control character \x1b (ESC). + """ # given, phrase = u'\x1b[0m' expect_length_each = (-1, 1, 1, 1) @@ -90,9 +146,9 @@ def test_control_c0_width_negative_1(): # exercise, length_each = tuple(map(wcwidth.wcwidth, phrase)) - length_phrase = wcwidth.wcswidth(phrase, len(phrase)) + length_phrase = wcwidth.wcswidth(phrase) - # verify. + # verify, though this is actually *0* width for a terminal emulator assert length_each == expect_length_each assert length_phrase == expect_length_phrase @@ -106,7 +162,7 @@ def test_combining_width(): # exercise, length_each = tuple(map(wcwidth.wcwidth, phrase)) - length_phrase = wcwidth.wcswidth(phrase, len(phrase)) + length_phrase = wcwidth.wcswidth(phrase) # verify. assert length_each == expect_length_each @@ -121,7 +177,7 @@ def test_combining_cafe(): # exercise, length_each = tuple(map(wcwidth.wcwidth, phrase)) - length_phrase = wcwidth.wcswidth(phrase, len(phrase)) + length_phrase = wcwidth.wcswidth(phrase) # verify. assert length_each == expect_length_each @@ -129,29 +185,172 @@ def test_combining_cafe(): def test_combining_enclosing(): - u"""CYRILLIC CAPITAL LETTER A + COMBINING CYRILLIC HUNDRED THOUSANDS SIGN is А҈ of length 1.""" + u"""CYRILLIC CAPITAL LETTER A + COMBINING CYRILLIC HUNDRED THOUSANDS SIGN is of length 1.""" phrase = u"\u0410\u0488" expect_length_each = (1, 0) expect_length_phrase = 1 # exercise, length_each = tuple(map(wcwidth.wcwidth, phrase)) - length_phrase = wcwidth.wcswidth(phrase, len(phrase)) + length_phrase = wcwidth.wcswidth(phrase) # verify. assert length_each == expect_length_each assert length_phrase == expect_length_phrase -def test_combining_spacing(): - u"""Balinese kapal (ship) is ᬓᬨᬮ᭄ of length 4.""" - phrase = u"\u1B13\u1B28\u1B2E\u1B44" - expect_length_each = (1, 1, 1, 1) - expect_length_phrase = 4 +def test_balinese_script(): + u""" + Balinese kapal (ship) is length 3. + + This may be an example that is not yet correctly rendered by any terminal so + far, like devanagari. + """ + phrase = (u"\u1B13" # Category 'Lo', EAW 'N' -- BALINESE LETTER KA + u"\u1B28" # Category 'Lo', EAW 'N' -- BALINESE LETTER PA KAPAL + u"\u1B2E" # Category 'Lo', EAW 'N' -- BALINESE LETTER LA + u"\u1B44") # Category 'Mc', EAW 'N' -- BALINESE ADEG ADEG + expect_length_each = (1, 1, 1, 0) + expect_length_phrase = 3 # exercise, length_each = tuple(map(wcwidth.wcwidth, phrase)) - length_phrase = wcwidth.wcswidth(phrase, len(phrase)) + length_phrase = wcwidth.wcswidth(phrase) + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + + +def test_kr_jamo_filler(): + u""" + Jamo filler is 0 width. + + According to https://www.unicode.org/L2/L2006/06310-hangul-decompose9.pdf this character and others + like it, ``\uffa0``, ``\u1160``, ``\u115f``, ``\u1160``, are not commonly viewed with a terminal, + seems it doesn't matter whether it is implemented or not, they are not typically used ! + """ + phrase = u"\u1100\u1160" + expect_length_each = (2, 1) + expect_length_phrase = 3 + + # exercise, + length_each = tuple(map(wcwidth.wcwidth, phrase)) + length_phrase = wcwidth.wcswidth(phrase) + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + + +def test_devanagari_script(): + """ + Attempt to test the measurement width of Devanagari script. + + I believe this 'phrase' should be length 3. + + This is a difficult problem, and this library does not yet get it right, + because we interpret the unicode data files programmatically, but they do + not correctly describe how their terminal width is measured. + + There are very few Terminals that do! + + As of 2023, + + - iTerm2: correct length but individual characters are out of order and + horizaontally misplaced as to be unreadable in its language when + using 'Noto Sans' font. + - mlterm: mixed results, it offers several options in the configuration + dialog, "Xft", "Cario", and "Variable Column Width" have some + effect, but with neither 'Noto Sans' or 'unifont', it is not + recognizable as the Devanagari script it is meant to display. + + Previous testing with Devanagari documented at address https://benizi.com/vim/devanagari/ + + See also, https://askubuntu.com/questions/8437/is-there-a-good-mono-spaced-font-for-devanagari-script-in-the-terminal + """ + # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf + # please note that document correctly points out that the final width cannot be determined + # as a sum of each individual width, as this library currently performs with exception of + # ZWJ, but I think it incorrectly gestures what a stateless call to wcwidth.wcwidth of + # each codepoint *should* return. + phrase = (u"\u0915" # Akhand, Category 'Lo', East Asian Width property 'N' -- DEVANAGARI LETTER KA + u"\u094D" # Joiner, Category 'Mn', East Asian Width property 'N' -- DEVANAGARI SIGN VIRAMA + u"\u0937" # Fused, Category 'Lo', East Asian Width property 'N' -- DEVANAGARI LETTER SSA + u"\u093F") # MatraL, Category 'Mc', East Asian Width property 'N' -- DEVANAGARI VOWEL SIGN I + # 23107-terminal-suppt.pdf suggests wcwidth.wcwidth should return (2, 0, 0, 1) + expect_length_each = (1, 0, 1, 0) + # I believe the final width *should* be 3. + expect_length_phrase = 2 + + # exercise, + length_each = tuple(map(wcwidth.wcwidth, phrase)) + length_phrase = wcwidth.wcswidth(phrase) + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + + +def test_tamil_script(): + # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf + phrase = (u"\u0b95" # Akhand, Category 'Lo', East Asian Width property 'N' -- TAMIL LETTER KA + u"\u0bcd" # Joiner, Category 'Mn', East Asian Width property 'N' -- TAMIL SIGN VIRAMA + u"\u0bb7" # Fused, Category 'Lo', East Asian Width property 'N' -- TAMIL LETTER SSA + u"\u0bcc") # MatraLR, Category 'Mc', East Asian Width property 'N' -- TAMIL VOWEL SIGN AU + # 23107-terminal-suppt.pdf suggests wcwidth.wcwidth should return (3, 0, 0, 4) + expect_length_each = (1, 0, 1, 0) + + # I believe the final width should be about 5 or 6. + expect_length_phrase = 2 + + # exercise, + length_each = tuple(map(wcwidth.wcwidth, phrase)) + length_phrase = wcwidth.wcswidth(phrase) + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + + +def test_kannada_script(): + # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf + # |ರ್ಝೈ| + # |123| + phrase = (u"\u0cb0" # Repha, Category 'Lo', East Asian Width property 'N' -- KANNADA LETTER RA + u"\u0ccd" # Joiner, Category 'Mn', East Asian Width property 'N' -- KANNADA SIGN VIRAMA + u"\u0c9d" # Base, Category 'Lo', East Asian Width property 'N' -- KANNADA LETTER JHA + u"\u0cc8") # MatraUR, Category 'Mc', East Asian Width property 'N' -- KANNADA VOWEL SIGN AI + # 23107-terminal-suppt.pdf suggests should be (2, 0, 3, 1) + expect_length_each = (1, 0, 1, 0) + # I believe the correct final width *should* be 3 or 4. + expect_length_phrase = 2 + + # exercise, + length_each = tuple(map(wcwidth.wcwidth, phrase)) + length_phrase = wcwidth.wcswidth(phrase) + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + + +def test_kannada_script_2(): + # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf + # |ರ಼್ಚ| + # |12| + phrase = (u"\u0cb0" # Base, Category 'Lo', East Asian Width property 'N' -- KANNADA LETTER RA + u"\u0cbc" # Nukta, Category 'Mn', East Asian Width property 'N' -- KANNADA SIGN NUKTA + u"\u0ccd" # Joiner, Category 'Lo', East Asian Width property 'N' -- KANNADA SIGN VIRAMA + u"\u0c9a") # Subjoin, Category 'Mc', East Asian Width property 'N' -- KANNADA LETTER CA + # 23107-terminal-suppt.pdf suggests wcwidth.wcwidth should return (2, 0, 0, 1) + expect_length_each = (1, 0, 0, 1) + # I believe the final width is correct, but maybe for the wrong reasons! + expect_length_phrase = 2 + + # exercise, + length_each = tuple(map(wcwidth.wcwidth, phrase)) + length_phrase = wcwidth.wcswidth(phrase) # verify. assert length_each == expect_length_each diff --git a/contrib/python/wcwidth/py2/tests/test_emojis.py b/contrib/python/wcwidth/py2/tests/test_emojis.py new file mode 100644 index 0000000000..4f88e2330e --- /dev/null +++ b/contrib/python/wcwidth/py2/tests/test_emojis.py @@ -0,0 +1,243 @@ +# std imports +import os +import codecs + +# 3rd party +import pytest + +try: + # python 2 + _ = unichr +except NameError: + # python 3 + unichr = chr + +# some tests cannot be done on some builds of python, where the internal +# unicode structure is limited to 0x10000 for memory conservation, +# "ValueError: unichr() arg not in range(0x10000) (narrow Python build)" +try: + unichr(0x2fffe) + NARROW_ONLY = False +except ValueError: + NARROW_ONLY = True + +# local +import wcwidth + + +def make_sequence_from_line(line): + # convert '002A FE0F ; ..' -> (0x2a, 0xfe0f) -> chr(0x2a) + chr(0xfe0f) + return ''.join(unichr(int(cp, 16)) for cp in line.split(';', 1)[0].strip().split()) + + +@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") +def emoji_zwj_sequence(): + u""" + Emoji zwj sequence of four codepoints is just 2 cells. + """ + phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN + u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 + u"\u200d" # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER + u"\U0001f4bb") # Fused, Category So, East Asian Width peroperty 'W' -- PERSONAL COMPUTER + # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf + expect_length_each = (2, 0, 0, 2) + expect_length_phrase = 2 + + # exercise, + length_each = tuple(map(wcwidth.wcwidth, phrase)) + length_phrase = wcwidth.wcswidth(phrase) + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + + +@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") +def test_unfinished_zwj_sequence(): + u""" + Ensure index-out-of-bounds does not occur for zero-width joiner without any following character + """ + phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN + u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 + u"\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER + expect_length_each = (2, 0, 0) + expect_length_phrase = 2 + + # exercise, + length_each = tuple(map(wcwidth.wcwidth, phrase)) + length_phrase = wcwidth.wcswidth(phrase) + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + + +@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") +def test_non_recommended_zwj_sequence(): + """ + Verify ZWJ is measured as though successful with characters that cannot be joined, wcwidth does not verify + """ + phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN + u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 + u"\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER + expect_length_each = (2, 0, 0) + expect_length_phrase = 2 + + # exercise, + length_each = tuple(map(wcwidth.wcwidth, phrase)) + length_phrase = wcwidth.wcswidth(phrase) + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + + +@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") +def test_another_emoji_zwj_sequence(): + phrase = ( + u"\u26F9" # PERSON WITH BALL + u"\U0001F3FB" # EMOJI MODIFIER FITZPATRICK TYPE-1-2 + u"\u200D" # ZERO WIDTH JOINER + u"\u2640" # FEMALE SIGN + u"\uFE0F") # VARIATION SELECTOR-16 + expect_length_each = (1, 0, 0, 1, 0) + expect_length_phrase = 2 + + # exercise, + length_each = tuple(map(wcwidth.wcwidth, phrase)) + length_phrase = wcwidth.wcswidth(phrase) + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + + +@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") +def test_longer_emoji_zwj_sequence(): + """ + A much longer emoji ZWJ sequence of 10 total codepoints is just 2 cells! + + Also test the same sequence in duplicate, verifying multiple VS-16 sequences + in a single function call. + """ + # 'Category Code', 'East Asian Width property' -- 'description' + phrase = (u"\U0001F9D1" # 'So', 'W' -- ADULT + u"\U0001F3FB" # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 + u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER + u"\u2764" # 'So', 'N' -- HEAVY BLACK HEART + u"\uFE0F" # 'Mn', 'A' -- VARIATION SELECTOR-16 + u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER + u"\U0001F48B" # 'So', 'W' -- KISS MARK + u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER + u"\U0001F9D1" # 'So', 'W' -- ADULT + u"\U0001F3FD" # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-4 + ) * 2 + # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf + expect_length_each = (2, 0, 0, 1, 0, 0, 2, 0, 2, 0) * 2 + expect_length_phrase = 4 + + # exercise, + length_each = tuple(map(wcwidth.wcwidth, phrase)) + length_phrase = wcwidth.wcswidth(phrase) + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + + +def read_sequences_from_file(filename): + fp = codecs.open(os.path.join(os.path.dirname(__file__), filename), 'r', encoding='utf-8') + lines = [line.strip() + for line in fp.readlines() + if not line.startswith('#') and line.strip()] + fp.close() + sequences = [make_sequence_from_line(line) for line in lines] + return lines, sequences + + +@pytest.mark.skipif(NARROW_ONLY, reason="Some sequences in text file are not compatible with 'narrow' builds") +def test_recommended_emoji_zwj_sequences(): + """ + Test wcswidth of all of the unicode.org-published emoji-zwj-sequences.txt + """ + # given, + lines, sequences = read_sequences_from_file('emoji-zwj-sequences.txt') + + errors = [] + # Exercise, track by zipping with original text file line, a debugging aide + num = 0 + for sequence, line in zip(sequences, lines): + num += 1 + measured_width = wcwidth.wcswidth(sequence) + if measured_width != 2: + errors.append({ + 'expected_width': 2, + 'line': line, + 'measured_width': measured_width, + 'sequence': sequence, + }) + + # verify + assert errors == [] + assert num >= 1468 + + +def test_recommended_variation_16_sequences(): + """ + Test wcswidth of all of the unicode.org-published emoji-variation-sequences.txt + """ + # given, + lines, sequences = read_sequences_from_file('emoji-variation-sequences.txt') + + errors = [] + num = 0 + for sequence, line in zip(sequences, lines): + num += 1 + if '\ufe0f' not in sequence: + # filter for only \uFE0F (VS-16) + continue + measured_width = wcwidth.wcswidth(sequence) + if measured_width != 2: + errors.append({ + 'expected_width': 2, + 'line': line, + 'measured_width': wcwidth.wcswidth(sequence), + 'sequence': sequence, + }) + + # verify + assert errors == [] + assert num >= 742 + + +def test_unicode_9_vs16(): + """Verify effect of VS-16 on unicode_version 9.0 and later""" + phrase = (u"\u2640" # FEMALE SIGN + u"\uFE0F") # VARIATION SELECTOR-16 + + expect_length_each = (1, 0) + expect_length_phrase = 2 + + # exercise, + length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='9.0') for w_char in phrase) + length_phrase = wcwidth.wcswidth(phrase, unicode_version='9.0') + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + +def test_unicode_8_vs16(): + """Verify that VS-16 has no effect on unicode_version 8.0 and earler""" + phrase = (u"\u2640" # FEMALE SIGN + u"\uFE0F") # VARIATION SELECTOR-16 + + expect_length_each = (1, 0) + expect_length_phrase = 1 + + # exercise, + length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='8.0') for w_char in phrase) + length_phrase = wcwidth.wcswidth(phrase, unicode_version='8.0') + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase
\ No newline at end of file diff --git a/contrib/python/wcwidth/py2/wcwidth/__init__.py b/contrib/python/wcwidth/py2/wcwidth/__init__.py index ec554c383d..40eedb6d22 100644 --- a/contrib/python/wcwidth/py2/wcwidth/__init__.py +++ b/contrib/python/wcwidth/py2/wcwidth/__init__.py @@ -1,5 +1,5 @@ """ -wcwidth module. +Wcwidth module. https://github.com/jquast/wcwidth """ @@ -11,6 +11,7 @@ https://github.com/jquast/wcwidth # local from .wcwidth import ZERO_WIDTH # noqa from .wcwidth import (WIDE_EASTASIAN, + VS16_NARROW_TO_WIDE, wcwidth, wcswidth, _bisearch, @@ -25,4 +26,4 @@ __all__ = ('wcwidth', 'wcswidth', 'list_versions') # We also used pkg_resources to load unicode version tables from version.json, # generated by bin/update-tables.py, but some environments are unable to # import pkg_resources for one reason or another, yikes! -__version__ = '0.2.6' +__version__ = '0.2.12' diff --git a/contrib/python/wcwidth/py2/wcwidth/table_vs16.py b/contrib/python/wcwidth/py2/wcwidth/table_vs16.py new file mode 100644 index 0000000000..3249262d98 --- /dev/null +++ b/contrib/python/wcwidth/py2/wcwidth/table_vs16.py @@ -0,0 +1,125 @@ +""" +Exports VS16_NARROW_TO_WIDE table keyed by supporting unicode version level. + +This code generated by wcwidth/bin/update-tables.py on 2023-11-07 16:43:49 UTC. +""" +VS16_NARROW_TO_WIDE = { + '9.0.0': ( + # Source: 9.0.0 + # Date: 2023-02-01, 02:22:54 GMT + # + (0x00023, 0x00023,), # Number Sign + (0x0002a, 0x0002a,), # Asterisk + (0x00030, 0x00039,), # Digit Zero ..Digit Nine + (0x000a9, 0x000a9,), # Copyright Sign + (0x000ae, 0x000ae,), # Registered Sign + (0x0203c, 0x0203c,), # Double Exclamation Mark + (0x02049, 0x02049,), # Exclamation Question Mark + (0x02122, 0x02122,), # Trade Mark Sign + (0x02139, 0x02139,), # Information Source + (0x02194, 0x02199,), # Left Right Arrow ..South West Arrow + (0x021a9, 0x021aa,), # Leftwards Arrow With Hoo..Rightwards Arrow With Ho + (0x02328, 0x02328,), # Keyboard + (0x023cf, 0x023cf,), # Eject Symbol + (0x023ed, 0x023ef,), # Black Right-pointing Dou..Black Right-pointing Tri + (0x023f1, 0x023f2,), # Stopwatch ..Timer Clock + (0x023f8, 0x023fa,), # Double Vertical Bar ..Black Circle For Record + (0x024c2, 0x024c2,), # Circled Latin Capital Letter M + (0x025aa, 0x025ab,), # Black Small Square ..White Small Square + (0x025b6, 0x025b6,), # Black Right-pointing Triangle + (0x025c0, 0x025c0,), # Black Left-pointing Triangle + (0x025fb, 0x025fc,), # White Medium Square ..Black Medium Square + (0x02600, 0x02604,), # Black Sun With Rays ..Comet + (0x0260e, 0x0260e,), # Black Telephone + (0x02611, 0x02611,), # Ballot Box With Check + (0x02618, 0x02618,), # Shamrock + (0x0261d, 0x0261d,), # White Up Pointing Index + (0x02620, 0x02620,), # Skull And Crossbones + (0x02622, 0x02623,), # Radioactive Sign ..Biohazard Sign + (0x02626, 0x02626,), # Orthodox Cross + (0x0262a, 0x0262a,), # Star And Crescent + (0x0262e, 0x0262f,), # Peace Symbol ..Yin Yang + (0x02638, 0x0263a,), # Wheel Of Dharma ..White Smiling Face + (0x02640, 0x02640,), # Female Sign + (0x02642, 0x02642,), # Male Sign + (0x0265f, 0x02660,), # Black Chess Pawn ..Black Spade Suit + (0x02663, 0x02663,), # Black Club Suit + (0x02665, 0x02666,), # Black Heart Suit ..Black Diamond Suit + (0x02668, 0x02668,), # Hot Springs + (0x0267b, 0x0267b,), # Black Universal Recycling Symbol + (0x0267e, 0x0267e,), # Permanent Paper Sign + (0x02692, 0x02692,), # Hammer And Pick + (0x02694, 0x02697,), # Crossed Swords ..Alembic + (0x02699, 0x02699,), # Gear + (0x0269b, 0x0269c,), # Atom Symbol ..Fleur-de-lis + (0x026a0, 0x026a0,), # Warning Sign + (0x026a7, 0x026a7,), # Male With Stroke And Male And Female Sign + (0x026b0, 0x026b1,), # Coffin ..Funeral Urn + (0x026c8, 0x026c8,), # Thunder Cloud And Rain + (0x026cf, 0x026cf,), # Pick + (0x026d1, 0x026d1,), # Helmet With White Cross + (0x026d3, 0x026d3,), # Chains + (0x026e9, 0x026e9,), # Shinto Shrine + (0x026f0, 0x026f1,), # Mountain ..Umbrella On Ground + (0x026f4, 0x026f4,), # Ferry + (0x026f7, 0x026f9,), # Skier ..Person With Ball + (0x02702, 0x02702,), # Black Scissors + (0x02708, 0x02709,), # Airplane ..Envelope + (0x0270c, 0x0270d,), # Victory Hand ..Writing Hand + (0x0270f, 0x0270f,), # Pencil + (0x02712, 0x02712,), # Black Nib + (0x02714, 0x02714,), # Heavy Check Mark + (0x02716, 0x02716,), # Heavy Multiplication X + (0x0271d, 0x0271d,), # Latin Cross + (0x02721, 0x02721,), # Star Of David + (0x02733, 0x02734,), # Eight Spoked Asterisk ..Eight Pointed Black Star + (0x02744, 0x02744,), # Snowflake + (0x02747, 0x02747,), # Sparkle + (0x02763, 0x02764,), # Heavy Heart Exclamation ..Heavy Black Heart + (0x027a1, 0x027a1,), # Black Rightwards Arrow + (0x02934, 0x02935,), # Arrow Pointing Rightward..Arrow Pointing Rightward + (0x02b05, 0x02b07,), # Leftwards Black Arrow ..Downwards Black Arrow + (0x1f170, 0x1f171,), # Negative Squared Latin C..Negative Squared Latin C + (0x1f17e, 0x1f17f,), # Negative Squared Latin C..Negative Squared Latin C + (0x1f321, 0x1f321,), # Thermometer + (0x1f324, 0x1f32c,), # White Sun With Small Clo..Wind Blowing Face + (0x1f336, 0x1f336,), # Hot Pepper + (0x1f37d, 0x1f37d,), # Fork And Knife With Plate + (0x1f396, 0x1f397,), # Military Medal ..Reminder Ribbon + (0x1f399, 0x1f39b,), # Studio Microphone ..Control Knobs + (0x1f39e, 0x1f39f,), # Film Frames ..Admission Tickets + (0x1f3cb, 0x1f3ce,), # Weight Lifter ..Racing Car + (0x1f3d4, 0x1f3df,), # Snow Capped Mountain ..Stadium + (0x1f3f3, 0x1f3f3,), # Waving White Flag + (0x1f3f5, 0x1f3f5,), # Rosette + (0x1f3f7, 0x1f3f7,), # Label + (0x1f43f, 0x1f43f,), # Chipmunk + (0x1f441, 0x1f441,), # Eye + (0x1f4fd, 0x1f4fd,), # Film Projector + (0x1f549, 0x1f54a,), # Om Symbol ..Dove Of Peace + (0x1f56f, 0x1f570,), # Candle ..Mantelpiece Clock + (0x1f573, 0x1f579,), # Hole ..Joystick + (0x1f587, 0x1f587,), # Linked Paperclips + (0x1f58a, 0x1f58d,), # Lower Left Ballpoint Pen..Lower Left Crayon + (0x1f590, 0x1f590,), # Raised Hand With Fingers Splayed + (0x1f5a5, 0x1f5a5,), # Desktop Computer + (0x1f5a8, 0x1f5a8,), # Printer + (0x1f5b1, 0x1f5b2,), # Three Button Mouse ..Trackball + (0x1f5bc, 0x1f5bc,), # Frame With Picture + (0x1f5c2, 0x1f5c4,), # Card Index Dividers ..File Cabinet + (0x1f5d1, 0x1f5d3,), # Wastebasket ..Spiral Calendar Pad + (0x1f5dc, 0x1f5de,), # Compression ..Rolled-up Newspaper + (0x1f5e1, 0x1f5e1,), # Dagger Knife + (0x1f5e3, 0x1f5e3,), # Speaking Head In Silhouette + (0x1f5e8, 0x1f5e8,), # Left Speech Bubble + (0x1f5ef, 0x1f5ef,), # Right Anger Bubble + (0x1f5f3, 0x1f5f3,), # Ballot Box With Ballot + (0x1f5fa, 0x1f5fa,), # World Map + (0x1f6cb, 0x1f6cb,), # Couch And Lamp + (0x1f6cd, 0x1f6cf,), # Shopping Bags ..Bed + (0x1f6e0, 0x1f6e5,), # Hammer And Wrench ..Motor Boat + (0x1f6e9, 0x1f6e9,), # Small Airplane + (0x1f6f0, 0x1f6f0,), # Satellite + (0x1f6f3, 0x1f6f3,), # Passenger Ship + ), +} diff --git a/contrib/python/wcwidth/py2/wcwidth/table_wide.py b/contrib/python/wcwidth/py2/wcwidth/table_wide.py index 9f8cd0a845..02afd5c2b7 100644 --- a/contrib/python/wcwidth/py2/wcwidth/table_wide.py +++ b/contrib/python/wcwidth/py2/wcwidth/table_wide.py @@ -1,7 +1,7 @@ """ Exports WIDE_EASTASIAN table keyed by supporting unicode version level. -This code generated by wcwidth/bin/update-tables.py on 2023-01-14 03:25:41 UTC. +This code generated by wcwidth/bin/update-tables.py on 2023-09-14 15:45:33 UTC. """ WIDE_EASTASIAN = { '4.1.0': ( @@ -1299,9 +1299,9 @@ WIDE_EASTASIAN = { (0x1aff5, 0x1affb,), # Katakana Letter Minnan T..Katakana Letter Minnan N (0x1affd, 0x1affe,), # Katakana Letter Minnan N..Katakana Letter Minnan N (0x1b000, 0x1b122,), # Katakana Letter Archaic ..Katakana Letter Archaic - (0x1b132, 0x1b132,), # (nil) + (0x1b132, 0x1b132,), # Hiragana Letter Small Ko (0x1b150, 0x1b152,), # Hiragana Letter Small Wi..Hiragana Letter Small Wo - (0x1b155, 0x1b155,), # (nil) + (0x1b155, 0x1b155,), # Katakana Letter Small Ko (0x1b164, 0x1b167,), # Katakana Letter Small Wi..Katakana Letter Small N (0x1b170, 0x1b2fb,), # Nushu Character-1b170 ..Nushu Character-1b2fb (0x1f004, 0x1f004,), # Mahjong Tile Red Dragon @@ -1335,7 +1335,7 @@ WIDE_EASTASIAN = { (0x1f6cc, 0x1f6cc,), # Sleeping Accommodation (0x1f6d0, 0x1f6d2,), # Place Of Worship ..Shopping Trolley (0x1f6d5, 0x1f6d7,), # Hindu Temple ..Elevator - (0x1f6dc, 0x1f6df,), # (nil) ..Ring Buoy + (0x1f6dc, 0x1f6df,), # Wireless ..Ring Buoy (0x1f6eb, 0x1f6ec,), # Airplane Departure ..Airplane Arriving (0x1f6f4, 0x1f6fc,), # Scooter ..Roller Skate (0x1f7e0, 0x1f7eb,), # Large Orange Circle ..Large Brown Square @@ -1344,12 +1344,137 @@ WIDE_EASTASIAN = { (0x1f93c, 0x1f945,), # Wrestlers ..Goal Net (0x1f947, 0x1f9ff,), # First Place Medal ..Nazar Amulet (0x1fa70, 0x1fa7c,), # Ballet Shoes ..Crutch - (0x1fa80, 0x1fa88,), # Yo-yo ..(nil) - (0x1fa90, 0x1fabd,), # Ringed Planet ..(nil) - (0x1fabf, 0x1fac5,), # (nil) ..Person With Crown - (0x1face, 0x1fadb,), # (nil) - (0x1fae0, 0x1fae8,), # Melting Face ..(nil) - (0x1faf0, 0x1faf8,), # Hand With Index Finger A..(nil) + (0x1fa80, 0x1fa88,), # Yo-yo ..Flute + (0x1fa90, 0x1fabd,), # Ringed Planet ..Wing + (0x1fabf, 0x1fac5,), # Goose ..Person With Crown + (0x1face, 0x1fadb,), # Moose ..Pea Pod + (0x1fae0, 0x1fae8,), # Melting Face ..Shaking Face + (0x1faf0, 0x1faf8,), # Hand With Index Finger A..Rightwards Pushing Hand + (0x20000, 0x2fffd,), # Cjk Unified Ideograph-20..(nil) + (0x30000, 0x3fffd,), # Cjk Unified Ideograph-30..(nil) + ), + '15.1.0': ( + # Source: EastAsianWidth-15.1.0.txt + # Date: 2023-07-28, 23:34:08 GMT + # + (0x01100, 0x0115f,), # Hangul Choseong Kiyeok ..Hangul Choseong Filler + (0x0231a, 0x0231b,), # Watch ..Hourglass + (0x02329, 0x0232a,), # Left-pointing Angle Brac..Right-pointing Angle Bra + (0x023e9, 0x023ec,), # Black Right-pointing Dou..Black Down-pointing Doub + (0x023f0, 0x023f0,), # Alarm Clock + (0x023f3, 0x023f3,), # Hourglass With Flowing Sand + (0x025fd, 0x025fe,), # White Medium Small Squar..Black Medium Small Squar + (0x02614, 0x02615,), # Umbrella With Rain Drops..Hot Beverage + (0x02648, 0x02653,), # Aries ..Pisces + (0x0267f, 0x0267f,), # Wheelchair Symbol + (0x02693, 0x02693,), # Anchor + (0x026a1, 0x026a1,), # High Voltage Sign + (0x026aa, 0x026ab,), # Medium White Circle ..Medium Black Circle + (0x026bd, 0x026be,), # Soccer Ball ..Baseball + (0x026c4, 0x026c5,), # Snowman Without Snow ..Sun Behind Cloud + (0x026ce, 0x026ce,), # Ophiuchus + (0x026d4, 0x026d4,), # No Entry + (0x026ea, 0x026ea,), # Church + (0x026f2, 0x026f3,), # Fountain ..Flag In Hole + (0x026f5, 0x026f5,), # Sailboat + (0x026fa, 0x026fa,), # Tent + (0x026fd, 0x026fd,), # Fuel Pump + (0x02705, 0x02705,), # White Heavy Check Mark + (0x0270a, 0x0270b,), # Raised Fist ..Raised Hand + (0x02728, 0x02728,), # Sparkles + (0x0274c, 0x0274c,), # Cross Mark + (0x0274e, 0x0274e,), # Negative Squared Cross Mark + (0x02753, 0x02755,), # Black Question Mark Orna..White Exclamation Mark O + (0x02757, 0x02757,), # Heavy Exclamation Mark Symbol + (0x02795, 0x02797,), # Heavy Plus Sign ..Heavy Division Sign + (0x027b0, 0x027b0,), # Curly Loop + (0x027bf, 0x027bf,), # Double Curly Loop + (0x02b1b, 0x02b1c,), # Black Large Square ..White Large Square + (0x02b50, 0x02b50,), # White Medium Star + (0x02b55, 0x02b55,), # Heavy Large Circle + (0x02e80, 0x02e99,), # Cjk Radical Repeat ..Cjk Radical Rap + (0x02e9b, 0x02ef3,), # Cjk Radical Choke ..Cjk Radical C-simplified + (0x02f00, 0x02fd5,), # Kangxi Radical One ..Kangxi Radical Flute + (0x02ff0, 0x0303e,), # Ideographic Description ..Ideographic Variation In + (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke + (0x03099, 0x030ff,), # Combining Katakana-hirag..Katakana Digraph Koto + (0x03105, 0x0312f,), # Bopomofo Letter B ..Bopomofo Letter Nn + (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae + (0x03190, 0x031e3,), # Ideographic Annotation L..Cjk Stroke Q + (0x031ef, 0x0321e,), # (nil) ..Parenthesized Korean Cha + (0x03220, 0x03247,), # Parenthesized Ideograph ..Circled Ideograph Koto + (0x03250, 0x04dbf,), # Partnership Sign ..Cjk Unified Ideograph-4d + (0x04e00, 0x0a48c,), # Cjk Unified Ideograph-4e..Yi Syllable Yyr + (0x0a490, 0x0a4c6,), # Yi Radical Qot ..Yi Radical Ke + (0x0a960, 0x0a97c,), # Hangul Choseong Tikeut-m..Hangul Choseong Ssangyeo + (0x0ac00, 0x0d7a3,), # Hangul Syllable Ga ..Hangul Syllable Hih + (0x0f900, 0x0faff,), # Cjk Compatibility Ideogr..(nil) + (0x0fe10, 0x0fe19,), # Presentation Form For Ve..Presentation Form For Ve + (0x0fe30, 0x0fe52,), # Presentation Form For Ve..Small Full Stop + (0x0fe54, 0x0fe66,), # Small Semicolon ..Small Equals Sign + (0x0fe68, 0x0fe6b,), # Small Reverse Solidus ..Small Commercial At + (0x0ff01, 0x0ff60,), # Fullwidth Exclamation Ma..Fullwidth Right White Pa + (0x0ffe0, 0x0ffe6,), # Fullwidth Cent Sign ..Fullwidth Won Sign + (0x16fe0, 0x16fe4,), # Tangut Iteration Mark ..Khitan Small Script Fill + (0x16ff0, 0x16ff1,), # Vietnamese Alternate Rea..Vietnamese Alternate Rea + (0x17000, 0x187f7,), # (nil) + (0x18800, 0x18cd5,), # Tangut Component-001 ..Khitan Small Script Char + (0x18d00, 0x18d08,), # (nil) + (0x1aff0, 0x1aff3,), # Katakana Letter Minnan T..Katakana Letter Minnan T + (0x1aff5, 0x1affb,), # Katakana Letter Minnan T..Katakana Letter Minnan N + (0x1affd, 0x1affe,), # Katakana Letter Minnan N..Katakana Letter Minnan N + (0x1b000, 0x1b122,), # Katakana Letter Archaic ..Katakana Letter Archaic + (0x1b132, 0x1b132,), # Hiragana Letter Small Ko + (0x1b150, 0x1b152,), # Hiragana Letter Small Wi..Hiragana Letter Small Wo + (0x1b155, 0x1b155,), # Katakana Letter Small Ko + (0x1b164, 0x1b167,), # Katakana Letter Small Wi..Katakana Letter Small N + (0x1b170, 0x1b2fb,), # Nushu Character-1b170 ..Nushu Character-1b2fb + (0x1f004, 0x1f004,), # Mahjong Tile Red Dragon + (0x1f0cf, 0x1f0cf,), # Playing Card Black Joker + (0x1f18e, 0x1f18e,), # Negative Squared Ab + (0x1f191, 0x1f19a,), # Squared Cl ..Squared Vs + (0x1f200, 0x1f202,), # Square Hiragana Hoka ..Squared Katakana Sa + (0x1f210, 0x1f23b,), # Squared Cjk Unified Ideo..Squared Cjk Unified Ideo + (0x1f240, 0x1f248,), # Tortoise Shell Bracketed..Tortoise Shell Bracketed + (0x1f250, 0x1f251,), # Circled Ideograph Advant..Circled Ideograph Accept + (0x1f260, 0x1f265,), # Rounded Symbol For Fu ..Rounded Symbol For Cai + (0x1f300, 0x1f320,), # Cyclone ..Shooting Star + (0x1f32d, 0x1f335,), # Hot Dog ..Cactus + (0x1f337, 0x1f37c,), # Tulip ..Baby Bottle + (0x1f37e, 0x1f393,), # Bottle With Popping Cork..Graduation Cap + (0x1f3a0, 0x1f3ca,), # Carousel Horse ..Swimmer + (0x1f3cf, 0x1f3d3,), # Cricket Bat And Ball ..Table Tennis Paddle And + (0x1f3e0, 0x1f3f0,), # House Building ..European Castle + (0x1f3f4, 0x1f3f4,), # Waving Black Flag + (0x1f3f8, 0x1f43e,), # Badminton Racquet And Sh..Paw Prints + (0x1f440, 0x1f440,), # Eyes + (0x1f442, 0x1f4fc,), # Ear ..Videocassette + (0x1f4ff, 0x1f53d,), # Prayer Beads ..Down-pointing Small Red + (0x1f54b, 0x1f54e,), # Kaaba ..Menorah With Nine Branch + (0x1f550, 0x1f567,), # Clock Face One Oclock ..Clock Face Twelve-thirty + (0x1f57a, 0x1f57a,), # Man Dancing + (0x1f595, 0x1f596,), # Reversed Hand With Middl..Raised Hand With Part Be + (0x1f5a4, 0x1f5a4,), # Black Heart + (0x1f5fb, 0x1f64f,), # Mount Fuji ..Person With Folded Hands + (0x1f680, 0x1f6c5,), # Rocket ..Left Luggage + (0x1f6cc, 0x1f6cc,), # Sleeping Accommodation + (0x1f6d0, 0x1f6d2,), # Place Of Worship ..Shopping Trolley + (0x1f6d5, 0x1f6d7,), # Hindu Temple ..Elevator + (0x1f6dc, 0x1f6df,), # Wireless ..Ring Buoy + (0x1f6eb, 0x1f6ec,), # Airplane Departure ..Airplane Arriving + (0x1f6f4, 0x1f6fc,), # Scooter ..Roller Skate + (0x1f7e0, 0x1f7eb,), # Large Orange Circle ..Large Brown Square + (0x1f7f0, 0x1f7f0,), # Heavy Equals Sign + (0x1f90c, 0x1f93a,), # Pinched Fingers ..Fencer + (0x1f93c, 0x1f945,), # Wrestlers ..Goal Net + (0x1f947, 0x1f9ff,), # First Place Medal ..Nazar Amulet + (0x1fa70, 0x1fa7c,), # Ballet Shoes ..Crutch + (0x1fa80, 0x1fa88,), # Yo-yo ..Flute + (0x1fa90, 0x1fabd,), # Ringed Planet ..Wing + (0x1fabf, 0x1fac5,), # Goose ..Person With Crown + (0x1face, 0x1fadb,), # Moose ..Pea Pod + (0x1fae0, 0x1fae8,), # Melting Face ..Shaking Face + (0x1faf0, 0x1faf8,), # Hand With Index Finger A..Rightwards Pushing Hand (0x20000, 0x2fffd,), # Cjk Unified Ideograph-20..(nil) (0x30000, 0x3fffd,), # Cjk Unified Ideograph-30..(nil) ), diff --git a/contrib/python/wcwidth/py2/wcwidth/table_zero.py b/contrib/python/wcwidth/py2/wcwidth/table_zero.py index a4abaea192..67261fd659 100644 --- a/contrib/python/wcwidth/py2/wcwidth/table_zero.py +++ b/contrib/python/wcwidth/py2/wcwidth/table_zero.py @@ -1,13 +1,15 @@ """ Exports ZERO_WIDTH table keyed by supporting unicode version level. -This code generated by wcwidth/bin/update-tables.py on 2023-01-14 03:25:41 UTC. +This code generated by wcwidth/bin/update-tables.py on 2023-10-19 20:57:31 UTC. """ ZERO_WIDTH = { '4.1.0': ( # Source: DerivedGeneralCategory-4.1.0.txt # Date: 2005-02-26, 02:35:50 GMT [MD] # + (0x00000, 0x00000,), # (nil) + (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00486,), # Combining Cyrillic Titlo..Combining Cyrillic Psili (0x00488, 0x00489,), # Combining Cyrillic Hundr..Combining Cyrillic Milli @@ -17,61 +19,74 @@ ZERO_WIDTH = { (0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot (0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot (0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan + (0x00600, 0x00603,), # Arabic Number Sign ..Arabic Sign Safha (0x00610, 0x00615,), # Arabic Sign Sallallahou ..Arabic Small High Tah (0x0064b, 0x0065e,), # Arabic Fathatan ..Arabic Fatha With Two Do (0x00670, 0x00670,), # Arabic Letter Superscript Alef - (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen - (0x006de, 0x006e4,), # Arabic Start Of Rub El H..Arabic Small High Madda + (0x006d6, 0x006e4,), # Arabic Small High Ligatu..Arabic Small High Madda (0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon (0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem + (0x0070f, 0x0070f,), # Syriac Abbreviation Mark (0x00711, 0x00711,), # Syriac Letter Superscript Alaph (0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh (0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun - (0x00901, 0x00902,), # Devanagari Sign Candrabi..Devanagari Sign Anusvara + (0x00901, 0x00903,), # Devanagari Sign Candrabi..Devanagari Sign Visarga (0x0093c, 0x0093c,), # Devanagari Sign Nukta - (0x00941, 0x00948,), # Devanagari Vowel Sign U ..Devanagari Vowel Sign Ai - (0x0094d, 0x0094d,), # Devanagari Sign Virama + (0x0093e, 0x0094d,), # Devanagari Vowel Sign Aa..Devanagari Sign Virama (0x00951, 0x00954,), # Devanagari Stress Sign U..Devanagari Acute Accent (0x00962, 0x00963,), # Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo - (0x00981, 0x00981,), # Bengali Sign Candrabindu + (0x00981, 0x00983,), # Bengali Sign Candrabindu..Bengali Sign Visarga (0x009bc, 0x009bc,), # Bengali Sign Nukta - (0x009c1, 0x009c4,), # Bengali Vowel Sign U ..Bengali Vowel Sign Vocal - (0x009cd, 0x009cd,), # Bengali Sign Virama + (0x009be, 0x009c4,), # Bengali Vowel Sign Aa ..Bengali Vowel Sign Vocal + (0x009c7, 0x009c8,), # Bengali Vowel Sign E ..Bengali Vowel Sign Ai + (0x009cb, 0x009cd,), # Bengali Vowel Sign O ..Bengali Sign Virama + (0x009d7, 0x009d7,), # Bengali Au Length Mark (0x009e2, 0x009e3,), # Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal - (0x00a01, 0x00a02,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Bindi + (0x00a01, 0x00a03,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Visarga (0x00a3c, 0x00a3c,), # Gurmukhi Sign Nukta - (0x00a41, 0x00a42,), # Gurmukhi Vowel Sign U ..Gurmukhi Vowel Sign Uu + (0x00a3e, 0x00a42,), # Gurmukhi Vowel Sign Aa ..Gurmukhi Vowel Sign Uu (0x00a47, 0x00a48,), # Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai (0x00a4b, 0x00a4d,), # Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama (0x00a70, 0x00a71,), # Gurmukhi Tippi ..Gurmukhi Addak - (0x00a81, 0x00a82,), # Gujarati Sign Candrabind..Gujarati Sign Anusvara + (0x00a81, 0x00a83,), # Gujarati Sign Candrabind..Gujarati Sign Visarga (0x00abc, 0x00abc,), # Gujarati Sign Nukta - (0x00ac1, 0x00ac5,), # Gujarati Vowel Sign U ..Gujarati Vowel Sign Cand - (0x00ac7, 0x00ac8,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Ai - (0x00acd, 0x00acd,), # Gujarati Sign Virama + (0x00abe, 0x00ac5,), # Gujarati Vowel Sign Aa ..Gujarati Vowel Sign Cand + (0x00ac7, 0x00ac9,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Cand + (0x00acb, 0x00acd,), # Gujarati Vowel Sign O ..Gujarati Sign Virama (0x00ae2, 0x00ae3,), # Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca - (0x00b01, 0x00b01,), # Oriya Sign Candrabindu + (0x00b01, 0x00b03,), # Oriya Sign Candrabindu ..Oriya Sign Visarga (0x00b3c, 0x00b3c,), # Oriya Sign Nukta - (0x00b3f, 0x00b3f,), # Oriya Vowel Sign I - (0x00b41, 0x00b43,), # Oriya Vowel Sign U ..Oriya Vowel Sign Vocalic - (0x00b4d, 0x00b4d,), # Oriya Sign Virama - (0x00b56, 0x00b56,), # Oriya Ai Length Mark + (0x00b3e, 0x00b43,), # Oriya Vowel Sign Aa ..Oriya Vowel Sign Vocalic + (0x00b47, 0x00b48,), # Oriya Vowel Sign E ..Oriya Vowel Sign Ai + (0x00b4b, 0x00b4d,), # Oriya Vowel Sign O ..Oriya Sign Virama + (0x00b56, 0x00b57,), # Oriya Ai Length Mark ..Oriya Au Length Mark (0x00b82, 0x00b82,), # Tamil Sign Anusvara - (0x00bc0, 0x00bc0,), # Tamil Vowel Sign Ii - (0x00bcd, 0x00bcd,), # Tamil Sign Virama - (0x00c3e, 0x00c40,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Ii + (0x00bbe, 0x00bc2,), # Tamil Vowel Sign Aa ..Tamil Vowel Sign Uu + (0x00bc6, 0x00bc8,), # Tamil Vowel Sign E ..Tamil Vowel Sign Ai + (0x00bca, 0x00bcd,), # Tamil Vowel Sign O ..Tamil Sign Virama + (0x00bd7, 0x00bd7,), # Tamil Au Length Mark + (0x00c01, 0x00c03,), # Telugu Sign Candrabindu ..Telugu Sign Visarga + (0x00c3e, 0x00c44,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Vocali (0x00c46, 0x00c48,), # Telugu Vowel Sign E ..Telugu Vowel Sign Ai (0x00c4a, 0x00c4d,), # Telugu Vowel Sign O ..Telugu Sign Virama (0x00c55, 0x00c56,), # Telugu Length Mark ..Telugu Ai Length Mark + (0x00c82, 0x00c83,), # Kannada Sign Anusvara ..Kannada Sign Visarga (0x00cbc, 0x00cbc,), # Kannada Sign Nukta - (0x00cbf, 0x00cbf,), # Kannada Vowel Sign I - (0x00cc6, 0x00cc6,), # Kannada Vowel Sign E - (0x00ccc, 0x00ccd,), # Kannada Vowel Sign Au ..Kannada Sign Virama - (0x00d41, 0x00d43,), # Malayalam Vowel Sign U ..Malayalam Vowel Sign Voc - (0x00d4d, 0x00d4d,), # Malayalam Sign Virama + (0x00cbe, 0x00cc4,), # Kannada Vowel Sign Aa ..Kannada Vowel Sign Vocal + (0x00cc6, 0x00cc8,), # Kannada Vowel Sign E ..Kannada Vowel Sign Ai + (0x00cca, 0x00ccd,), # Kannada Vowel Sign O ..Kannada Sign Virama + (0x00cd5, 0x00cd6,), # Kannada Length Mark ..Kannada Ai Length Mark + (0x00d02, 0x00d03,), # Malayalam Sign Anusvara ..Malayalam Sign Visarga + (0x00d3e, 0x00d43,), # Malayalam Vowel Sign Aa ..Malayalam Vowel Sign Voc + (0x00d46, 0x00d48,), # Malayalam Vowel Sign E ..Malayalam Vowel Sign Ai + (0x00d4a, 0x00d4d,), # Malayalam Vowel Sign O ..Malayalam Sign Virama + (0x00d57, 0x00d57,), # Malayalam Au Length Mark + (0x00d82, 0x00d83,), # Sinhala Sign Anusvaraya ..Sinhala Sign Visargaya (0x00dca, 0x00dca,), # Sinhala Sign Al-lakuna - (0x00dd2, 0x00dd4,), # Sinhala Vowel Sign Ketti..Sinhala Vowel Sign Ketti + (0x00dcf, 0x00dd4,), # Sinhala Vowel Sign Aela-..Sinhala Vowel Sign Ketti (0x00dd6, 0x00dd6,), # Sinhala Vowel Sign Diga Paa-pilla + (0x00dd8, 0x00ddf,), # Sinhala Vowel Sign Gaett..Sinhala Vowel Sign Gayan + (0x00df2, 0x00df3,), # Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga (0x00e31, 0x00e31,), # Thai Character Mai Han-akat (0x00e34, 0x00e3a,), # Thai Character Sara I ..Thai Character Phinthu (0x00e47, 0x00e4e,), # Thai Character Maitaikhu..Thai Character Yamakkan @@ -83,59 +98,66 @@ ZERO_WIDTH = { (0x00f35, 0x00f35,), # Tibetan Mark Ngas Bzung Nyi Zla (0x00f37, 0x00f37,), # Tibetan Mark Ngas Bzung Sgor Rtags (0x00f39, 0x00f39,), # Tibetan Mark Tsa -phru - (0x00f71, 0x00f7e,), # Tibetan Vowel Sign Aa ..Tibetan Sign Rjes Su Nga - (0x00f80, 0x00f84,), # Tibetan Vowel Sign Rever..Tibetan Mark Halanta + (0x00f3e, 0x00f3f,), # Tibetan Sign Yar Tshes ..Tibetan Sign Mar Tshes + (0x00f71, 0x00f84,), # Tibetan Vowel Sign Aa ..Tibetan Mark Halanta (0x00f86, 0x00f87,), # Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags (0x00f90, 0x00f97,), # Tibetan Subjoined Letter..Tibetan Subjoined Letter (0x00f99, 0x00fbc,), # Tibetan Subjoined Letter..Tibetan Subjoined Letter (0x00fc6, 0x00fc6,), # Tibetan Symbol Padma Gdan - (0x0102d, 0x01030,), # Myanmar Vowel Sign I ..Myanmar Vowel Sign Uu - (0x01032, 0x01032,), # Myanmar Vowel Sign Ai - (0x01036, 0x01037,), # Myanmar Sign Anusvara ..Myanmar Sign Dot Below - (0x01039, 0x01039,), # Myanmar Sign Virama - (0x01058, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal + (0x0102c, 0x01032,), # Myanmar Vowel Sign Aa ..Myanmar Vowel Sign Ai + (0x01036, 0x01039,), # Myanmar Sign Anusvara ..Myanmar Sign Virama + (0x01056, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal (0x0135f, 0x0135f,), # Ethiopic Combining Gemination Mark (0x01712, 0x01714,), # Tagalog Vowel Sign I ..Tagalog Sign Virama (0x01732, 0x01734,), # Hanunoo Vowel Sign I ..Hanunoo Sign Pamudpod (0x01752, 0x01753,), # Buhid Vowel Sign I ..Buhid Vowel Sign U (0x01772, 0x01773,), # Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U - (0x017b7, 0x017bd,), # Khmer Vowel Sign I ..Khmer Vowel Sign Ua - (0x017c6, 0x017c6,), # Khmer Sign Nikahit - (0x017c9, 0x017d3,), # Khmer Sign Muusikatoan ..Khmer Sign Bathamasat + (0x017b4, 0x017d3,), # Khmer Vowel Inherent Aq ..Khmer Sign Bathamasat (0x017dd, 0x017dd,), # Khmer Sign Atthacan (0x0180b, 0x0180d,), # Mongolian Free Variation..Mongolian Free Variation (0x018a9, 0x018a9,), # Mongolian Letter Ali Gali Dagalga - (0x01920, 0x01922,), # Limbu Vowel Sign A ..Limbu Vowel Sign U - (0x01927, 0x01928,), # Limbu Vowel Sign E ..Limbu Vowel Sign O - (0x01932, 0x01932,), # Limbu Small Letter Anusvara - (0x01939, 0x0193b,), # Limbu Sign Mukphreng ..Limbu Sign Sa-i - (0x01a17, 0x01a18,), # Buginese Vowel Sign I ..Buginese Vowel Sign U + (0x01920, 0x0192b,), # Limbu Vowel Sign A ..Limbu Subjoined Letter W + (0x01930, 0x0193b,), # Limbu Small Letter Ka ..Limbu Sign Sa-i + (0x019b0, 0x019c0,), # New Tai Lue Vowel Sign V..New Tai Lue Vowel Sign I + (0x019c8, 0x019c9,), # New Tai Lue Tone Mark-1 ..New Tai Lue Tone Mark-2 + (0x01a17, 0x01a1b,), # Buginese Vowel Sign I ..Buginese Vowel Sign Ae (0x01dc0, 0x01dc3,), # Combining Dotted Grave A..Combining Suspension Mar + (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark + (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override + (0x02060, 0x02063,), # Word Joiner ..Invisible Separator + (0x0206a, 0x0206f,), # Inhibit Symmetric Swappi..Nominal Digit Shapes (0x020d0, 0x020eb,), # Combining Left Harpoon A..Combining Long Double So (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag + (0x0a802, 0x0a802,), # Syloti Nagri Sign Dvisvara (0x0a806, 0x0a806,), # Syloti Nagri Sign Hasanta (0x0a80b, 0x0a80b,), # Syloti Nagri Sign Anusvara - (0x0a825, 0x0a826,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign + (0x0a823, 0x0a827,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign (0x0fb1e, 0x0fb1e,), # Hebrew Point Judeo-spanish Varika (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe23,), # Combining Ligature Left ..Combining Double Tilde R + (0x0feff, 0x0feff,), # Zero Width No-break Space + (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T (0x10a01, 0x10a03,), # Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo (0x10a05, 0x10a06,), # Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O (0x10a0c, 0x10a0f,), # Kharoshthi Vowel Length ..Kharoshthi Sign Visarga (0x10a38, 0x10a3a,), # Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo (0x10a3f, 0x10a3f,), # Kharoshthi Virama - (0x1d167, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining - (0x1d17b, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d165, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d16d, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical + (0xe0001, 0xe0001,), # Language Tag + (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 ), '5.0.0': ( # Source: DerivedGeneralCategory-5.0.0.txt # Date: 2006-02-27, 23:41:27 GMT [MD] # + (0x00000, 0x00000,), # (nil) + (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00486,), # Combining Cyrillic Titlo..Combining Cyrillic Psili (0x00488, 0x00489,), # Combining Cyrillic Hundr..Combining Cyrillic Milli @@ -144,63 +166,76 @@ ZERO_WIDTH = { (0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot (0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot (0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan + (0x00600, 0x00603,), # Arabic Number Sign ..Arabic Sign Safha (0x00610, 0x00615,), # Arabic Sign Sallallahou ..Arabic Small High Tah (0x0064b, 0x0065e,), # Arabic Fathatan ..Arabic Fatha With Two Do (0x00670, 0x00670,), # Arabic Letter Superscript Alef - (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen - (0x006de, 0x006e4,), # Arabic Start Of Rub El H..Arabic Small High Madda + (0x006d6, 0x006e4,), # Arabic Small High Ligatu..Arabic Small High Madda (0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon (0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem + (0x0070f, 0x0070f,), # Syriac Abbreviation Mark (0x00711, 0x00711,), # Syriac Letter Superscript Alaph (0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh (0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun (0x007eb, 0x007f3,), # Nko Combining Short High..Nko Combining Double Dot - (0x00901, 0x00902,), # Devanagari Sign Candrabi..Devanagari Sign Anusvara + (0x00901, 0x00903,), # Devanagari Sign Candrabi..Devanagari Sign Visarga (0x0093c, 0x0093c,), # Devanagari Sign Nukta - (0x00941, 0x00948,), # Devanagari Vowel Sign U ..Devanagari Vowel Sign Ai - (0x0094d, 0x0094d,), # Devanagari Sign Virama + (0x0093e, 0x0094d,), # Devanagari Vowel Sign Aa..Devanagari Sign Virama (0x00951, 0x00954,), # Devanagari Stress Sign U..Devanagari Acute Accent (0x00962, 0x00963,), # Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo - (0x00981, 0x00981,), # Bengali Sign Candrabindu + (0x00981, 0x00983,), # Bengali Sign Candrabindu..Bengali Sign Visarga (0x009bc, 0x009bc,), # Bengali Sign Nukta - (0x009c1, 0x009c4,), # Bengali Vowel Sign U ..Bengali Vowel Sign Vocal - (0x009cd, 0x009cd,), # Bengali Sign Virama + (0x009be, 0x009c4,), # Bengali Vowel Sign Aa ..Bengali Vowel Sign Vocal + (0x009c7, 0x009c8,), # Bengali Vowel Sign E ..Bengali Vowel Sign Ai + (0x009cb, 0x009cd,), # Bengali Vowel Sign O ..Bengali Sign Virama + (0x009d7, 0x009d7,), # Bengali Au Length Mark (0x009e2, 0x009e3,), # Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal - (0x00a01, 0x00a02,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Bindi + (0x00a01, 0x00a03,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Visarga (0x00a3c, 0x00a3c,), # Gurmukhi Sign Nukta - (0x00a41, 0x00a42,), # Gurmukhi Vowel Sign U ..Gurmukhi Vowel Sign Uu + (0x00a3e, 0x00a42,), # Gurmukhi Vowel Sign Aa ..Gurmukhi Vowel Sign Uu (0x00a47, 0x00a48,), # Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai (0x00a4b, 0x00a4d,), # Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama (0x00a70, 0x00a71,), # Gurmukhi Tippi ..Gurmukhi Addak - (0x00a81, 0x00a82,), # Gujarati Sign Candrabind..Gujarati Sign Anusvara + (0x00a81, 0x00a83,), # Gujarati Sign Candrabind..Gujarati Sign Visarga (0x00abc, 0x00abc,), # Gujarati Sign Nukta - (0x00ac1, 0x00ac5,), # Gujarati Vowel Sign U ..Gujarati Vowel Sign Cand - (0x00ac7, 0x00ac8,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Ai - (0x00acd, 0x00acd,), # Gujarati Sign Virama + (0x00abe, 0x00ac5,), # Gujarati Vowel Sign Aa ..Gujarati Vowel Sign Cand + (0x00ac7, 0x00ac9,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Cand + (0x00acb, 0x00acd,), # Gujarati Vowel Sign O ..Gujarati Sign Virama (0x00ae2, 0x00ae3,), # Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca - (0x00b01, 0x00b01,), # Oriya Sign Candrabindu + (0x00b01, 0x00b03,), # Oriya Sign Candrabindu ..Oriya Sign Visarga (0x00b3c, 0x00b3c,), # Oriya Sign Nukta - (0x00b3f, 0x00b3f,), # Oriya Vowel Sign I - (0x00b41, 0x00b43,), # Oriya Vowel Sign U ..Oriya Vowel Sign Vocalic - (0x00b4d, 0x00b4d,), # Oriya Sign Virama - (0x00b56, 0x00b56,), # Oriya Ai Length Mark + (0x00b3e, 0x00b43,), # Oriya Vowel Sign Aa ..Oriya Vowel Sign Vocalic + (0x00b47, 0x00b48,), # Oriya Vowel Sign E ..Oriya Vowel Sign Ai + (0x00b4b, 0x00b4d,), # Oriya Vowel Sign O ..Oriya Sign Virama + (0x00b56, 0x00b57,), # Oriya Ai Length Mark ..Oriya Au Length Mark (0x00b82, 0x00b82,), # Tamil Sign Anusvara - (0x00bc0, 0x00bc0,), # Tamil Vowel Sign Ii - (0x00bcd, 0x00bcd,), # Tamil Sign Virama - (0x00c3e, 0x00c40,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Ii + (0x00bbe, 0x00bc2,), # Tamil Vowel Sign Aa ..Tamil Vowel Sign Uu + (0x00bc6, 0x00bc8,), # Tamil Vowel Sign E ..Tamil Vowel Sign Ai + (0x00bca, 0x00bcd,), # Tamil Vowel Sign O ..Tamil Sign Virama + (0x00bd7, 0x00bd7,), # Tamil Au Length Mark + (0x00c01, 0x00c03,), # Telugu Sign Candrabindu ..Telugu Sign Visarga + (0x00c3e, 0x00c44,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Vocali (0x00c46, 0x00c48,), # Telugu Vowel Sign E ..Telugu Vowel Sign Ai (0x00c4a, 0x00c4d,), # Telugu Vowel Sign O ..Telugu Sign Virama (0x00c55, 0x00c56,), # Telugu Length Mark ..Telugu Ai Length Mark + (0x00c82, 0x00c83,), # Kannada Sign Anusvara ..Kannada Sign Visarga (0x00cbc, 0x00cbc,), # Kannada Sign Nukta - (0x00cbf, 0x00cbf,), # Kannada Vowel Sign I - (0x00cc6, 0x00cc6,), # Kannada Vowel Sign E - (0x00ccc, 0x00ccd,), # Kannada Vowel Sign Au ..Kannada Sign Virama + (0x00cbe, 0x00cc4,), # Kannada Vowel Sign Aa ..Kannada Vowel Sign Vocal + (0x00cc6, 0x00cc8,), # Kannada Vowel Sign E ..Kannada Vowel Sign Ai + (0x00cca, 0x00ccd,), # Kannada Vowel Sign O ..Kannada Sign Virama + (0x00cd5, 0x00cd6,), # Kannada Length Mark ..Kannada Ai Length Mark (0x00ce2, 0x00ce3,), # Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal - (0x00d41, 0x00d43,), # Malayalam Vowel Sign U ..Malayalam Vowel Sign Voc - (0x00d4d, 0x00d4d,), # Malayalam Sign Virama + (0x00d02, 0x00d03,), # Malayalam Sign Anusvara ..Malayalam Sign Visarga + (0x00d3e, 0x00d43,), # Malayalam Vowel Sign Aa ..Malayalam Vowel Sign Voc + (0x00d46, 0x00d48,), # Malayalam Vowel Sign E ..Malayalam Vowel Sign Ai + (0x00d4a, 0x00d4d,), # Malayalam Vowel Sign O ..Malayalam Sign Virama + (0x00d57, 0x00d57,), # Malayalam Au Length Mark + (0x00d82, 0x00d83,), # Sinhala Sign Anusvaraya ..Sinhala Sign Visargaya (0x00dca, 0x00dca,), # Sinhala Sign Al-lakuna - (0x00dd2, 0x00dd4,), # Sinhala Vowel Sign Ketti..Sinhala Vowel Sign Ketti + (0x00dcf, 0x00dd4,), # Sinhala Vowel Sign Aela-..Sinhala Vowel Sign Ketti (0x00dd6, 0x00dd6,), # Sinhala Vowel Sign Diga Paa-pilla + (0x00dd8, 0x00ddf,), # Sinhala Vowel Sign Gaett..Sinhala Vowel Sign Gayan + (0x00df2, 0x00df3,), # Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga (0x00e31, 0x00e31,), # Thai Character Mai Han-akat (0x00e34, 0x00e3a,), # Thai Character Sara I ..Thai Character Phinthu (0x00e47, 0x00e4e,), # Thai Character Maitaikhu..Thai Character Yamakkan @@ -212,66 +247,70 @@ ZERO_WIDTH = { (0x00f35, 0x00f35,), # Tibetan Mark Ngas Bzung Nyi Zla (0x00f37, 0x00f37,), # Tibetan Mark Ngas Bzung Sgor Rtags (0x00f39, 0x00f39,), # Tibetan Mark Tsa -phru - (0x00f71, 0x00f7e,), # Tibetan Vowel Sign Aa ..Tibetan Sign Rjes Su Nga - (0x00f80, 0x00f84,), # Tibetan Vowel Sign Rever..Tibetan Mark Halanta + (0x00f3e, 0x00f3f,), # Tibetan Sign Yar Tshes ..Tibetan Sign Mar Tshes + (0x00f71, 0x00f84,), # Tibetan Vowel Sign Aa ..Tibetan Mark Halanta (0x00f86, 0x00f87,), # Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags (0x00f90, 0x00f97,), # Tibetan Subjoined Letter..Tibetan Subjoined Letter (0x00f99, 0x00fbc,), # Tibetan Subjoined Letter..Tibetan Subjoined Letter (0x00fc6, 0x00fc6,), # Tibetan Symbol Padma Gdan - (0x0102d, 0x01030,), # Myanmar Vowel Sign I ..Myanmar Vowel Sign Uu - (0x01032, 0x01032,), # Myanmar Vowel Sign Ai - (0x01036, 0x01037,), # Myanmar Sign Anusvara ..Myanmar Sign Dot Below - (0x01039, 0x01039,), # Myanmar Sign Virama - (0x01058, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal + (0x0102c, 0x01032,), # Myanmar Vowel Sign Aa ..Myanmar Vowel Sign Ai + (0x01036, 0x01039,), # Myanmar Sign Anusvara ..Myanmar Sign Virama + (0x01056, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal (0x0135f, 0x0135f,), # Ethiopic Combining Gemination Mark (0x01712, 0x01714,), # Tagalog Vowel Sign I ..Tagalog Sign Virama (0x01732, 0x01734,), # Hanunoo Vowel Sign I ..Hanunoo Sign Pamudpod (0x01752, 0x01753,), # Buhid Vowel Sign I ..Buhid Vowel Sign U (0x01772, 0x01773,), # Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U - (0x017b7, 0x017bd,), # Khmer Vowel Sign I ..Khmer Vowel Sign Ua - (0x017c6, 0x017c6,), # Khmer Sign Nikahit - (0x017c9, 0x017d3,), # Khmer Sign Muusikatoan ..Khmer Sign Bathamasat + (0x017b4, 0x017d3,), # Khmer Vowel Inherent Aq ..Khmer Sign Bathamasat (0x017dd, 0x017dd,), # Khmer Sign Atthacan (0x0180b, 0x0180d,), # Mongolian Free Variation..Mongolian Free Variation (0x018a9, 0x018a9,), # Mongolian Letter Ali Gali Dagalga - (0x01920, 0x01922,), # Limbu Vowel Sign A ..Limbu Vowel Sign U - (0x01927, 0x01928,), # Limbu Vowel Sign E ..Limbu Vowel Sign O - (0x01932, 0x01932,), # Limbu Small Letter Anusvara - (0x01939, 0x0193b,), # Limbu Sign Mukphreng ..Limbu Sign Sa-i - (0x01a17, 0x01a18,), # Buginese Vowel Sign I ..Buginese Vowel Sign U - (0x01b00, 0x01b03,), # Balinese Sign Ulu Ricem ..Balinese Sign Surang - (0x01b34, 0x01b34,), # Balinese Sign Rerekan - (0x01b36, 0x01b3a,), # Balinese Vowel Sign Ulu ..Balinese Vowel Sign Ra R - (0x01b3c, 0x01b3c,), # Balinese Vowel Sign La Lenga - (0x01b42, 0x01b42,), # Balinese Vowel Sign Pepet + (0x01920, 0x0192b,), # Limbu Vowel Sign A ..Limbu Subjoined Letter W + (0x01930, 0x0193b,), # Limbu Small Letter Ka ..Limbu Sign Sa-i + (0x019b0, 0x019c0,), # New Tai Lue Vowel Sign V..New Tai Lue Vowel Sign I + (0x019c8, 0x019c9,), # New Tai Lue Tone Mark-1 ..New Tai Lue Tone Mark-2 + (0x01a17, 0x01a1b,), # Buginese Vowel Sign I ..Buginese Vowel Sign Ae + (0x01b00, 0x01b04,), # Balinese Sign Ulu Ricem ..Balinese Sign Bisah + (0x01b34, 0x01b44,), # Balinese Sign Rerekan ..Balinese Adeg Adeg (0x01b6b, 0x01b73,), # Balinese Musical Symbol ..Balinese Musical Symbol (0x01dc0, 0x01dca,), # Combining Dotted Grave A..Combining Latin Small Le (0x01dfe, 0x01dff,), # Combining Left Arrowhead..Combining Right Arrowhea + (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark + (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override + (0x02060, 0x02063,), # Word Joiner ..Invisible Separator + (0x0206a, 0x0206f,), # Inhibit Symmetric Swappi..Nominal Digit Shapes (0x020d0, 0x020ef,), # Combining Left Harpoon A..Combining Right Arrow Be (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag + (0x0a802, 0x0a802,), # Syloti Nagri Sign Dvisvara (0x0a806, 0x0a806,), # Syloti Nagri Sign Hasanta (0x0a80b, 0x0a80b,), # Syloti Nagri Sign Anusvara - (0x0a825, 0x0a826,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign + (0x0a823, 0x0a827,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign (0x0fb1e, 0x0fb1e,), # Hebrew Point Judeo-spanish Varika (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe23,), # Combining Ligature Left ..Combining Double Tilde R + (0x0feff, 0x0feff,), # Zero Width No-break Space + (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T (0x10a01, 0x10a03,), # Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo (0x10a05, 0x10a06,), # Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O (0x10a0c, 0x10a0f,), # Kharoshthi Vowel Length ..Kharoshthi Sign Visarga (0x10a38, 0x10a3a,), # Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo (0x10a3f, 0x10a3f,), # Kharoshthi Virama - (0x1d167, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining - (0x1d17b, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d165, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d16d, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical + (0xe0001, 0xe0001,), # Language Tag + (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 ), '5.1.0': ( # Source: DerivedGeneralCategory-5.1.0.txt # Date: 2008-03-20, 17:54:57 GMT [MD] # + (0x00000, 0x00000,), # (nil) + (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg @@ -279,68 +318,81 @@ ZERO_WIDTH = { (0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot (0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot (0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan + (0x00600, 0x00603,), # Arabic Number Sign ..Arabic Sign Safha (0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra (0x0064b, 0x0065e,), # Arabic Fathatan ..Arabic Fatha With Two Do (0x00670, 0x00670,), # Arabic Letter Superscript Alef - (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen - (0x006de, 0x006e4,), # Arabic Start Of Rub El H..Arabic Small High Madda + (0x006d6, 0x006e4,), # Arabic Small High Ligatu..Arabic Small High Madda (0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon (0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem + (0x0070f, 0x0070f,), # Syriac Abbreviation Mark (0x00711, 0x00711,), # Syriac Letter Superscript Alaph (0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh (0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun (0x007eb, 0x007f3,), # Nko Combining Short High..Nko Combining Double Dot - (0x00901, 0x00902,), # Devanagari Sign Candrabi..Devanagari Sign Anusvara + (0x00901, 0x00903,), # Devanagari Sign Candrabi..Devanagari Sign Visarga (0x0093c, 0x0093c,), # Devanagari Sign Nukta - (0x00941, 0x00948,), # Devanagari Vowel Sign U ..Devanagari Vowel Sign Ai - (0x0094d, 0x0094d,), # Devanagari Sign Virama + (0x0093e, 0x0094d,), # Devanagari Vowel Sign Aa..Devanagari Sign Virama (0x00951, 0x00954,), # Devanagari Stress Sign U..Devanagari Acute Accent (0x00962, 0x00963,), # Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo - (0x00981, 0x00981,), # Bengali Sign Candrabindu + (0x00981, 0x00983,), # Bengali Sign Candrabindu..Bengali Sign Visarga (0x009bc, 0x009bc,), # Bengali Sign Nukta - (0x009c1, 0x009c4,), # Bengali Vowel Sign U ..Bengali Vowel Sign Vocal - (0x009cd, 0x009cd,), # Bengali Sign Virama + (0x009be, 0x009c4,), # Bengali Vowel Sign Aa ..Bengali Vowel Sign Vocal + (0x009c7, 0x009c8,), # Bengali Vowel Sign E ..Bengali Vowel Sign Ai + (0x009cb, 0x009cd,), # Bengali Vowel Sign O ..Bengali Sign Virama + (0x009d7, 0x009d7,), # Bengali Au Length Mark (0x009e2, 0x009e3,), # Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal - (0x00a01, 0x00a02,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Bindi + (0x00a01, 0x00a03,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Visarga (0x00a3c, 0x00a3c,), # Gurmukhi Sign Nukta - (0x00a41, 0x00a42,), # Gurmukhi Vowel Sign U ..Gurmukhi Vowel Sign Uu + (0x00a3e, 0x00a42,), # Gurmukhi Vowel Sign Aa ..Gurmukhi Vowel Sign Uu (0x00a47, 0x00a48,), # Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai (0x00a4b, 0x00a4d,), # Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama (0x00a51, 0x00a51,), # Gurmukhi Sign Udaat (0x00a70, 0x00a71,), # Gurmukhi Tippi ..Gurmukhi Addak (0x00a75, 0x00a75,), # Gurmukhi Sign Yakash - (0x00a81, 0x00a82,), # Gujarati Sign Candrabind..Gujarati Sign Anusvara + (0x00a81, 0x00a83,), # Gujarati Sign Candrabind..Gujarati Sign Visarga (0x00abc, 0x00abc,), # Gujarati Sign Nukta - (0x00ac1, 0x00ac5,), # Gujarati Vowel Sign U ..Gujarati Vowel Sign Cand - (0x00ac7, 0x00ac8,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Ai - (0x00acd, 0x00acd,), # Gujarati Sign Virama + (0x00abe, 0x00ac5,), # Gujarati Vowel Sign Aa ..Gujarati Vowel Sign Cand + (0x00ac7, 0x00ac9,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Cand + (0x00acb, 0x00acd,), # Gujarati Vowel Sign O ..Gujarati Sign Virama (0x00ae2, 0x00ae3,), # Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca - (0x00b01, 0x00b01,), # Oriya Sign Candrabindu + (0x00b01, 0x00b03,), # Oriya Sign Candrabindu ..Oriya Sign Visarga (0x00b3c, 0x00b3c,), # Oriya Sign Nukta - (0x00b3f, 0x00b3f,), # Oriya Vowel Sign I - (0x00b41, 0x00b44,), # Oriya Vowel Sign U ..Oriya Vowel Sign Vocalic - (0x00b4d, 0x00b4d,), # Oriya Sign Virama - (0x00b56, 0x00b56,), # Oriya Ai Length Mark + (0x00b3e, 0x00b44,), # Oriya Vowel Sign Aa ..Oriya Vowel Sign Vocalic + (0x00b47, 0x00b48,), # Oriya Vowel Sign E ..Oriya Vowel Sign Ai + (0x00b4b, 0x00b4d,), # Oriya Vowel Sign O ..Oriya Sign Virama + (0x00b56, 0x00b57,), # Oriya Ai Length Mark ..Oriya Au Length Mark (0x00b62, 0x00b63,), # Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic (0x00b82, 0x00b82,), # Tamil Sign Anusvara - (0x00bc0, 0x00bc0,), # Tamil Vowel Sign Ii - (0x00bcd, 0x00bcd,), # Tamil Sign Virama - (0x00c3e, 0x00c40,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Ii + (0x00bbe, 0x00bc2,), # Tamil Vowel Sign Aa ..Tamil Vowel Sign Uu + (0x00bc6, 0x00bc8,), # Tamil Vowel Sign E ..Tamil Vowel Sign Ai + (0x00bca, 0x00bcd,), # Tamil Vowel Sign O ..Tamil Sign Virama + (0x00bd7, 0x00bd7,), # Tamil Au Length Mark + (0x00c01, 0x00c03,), # Telugu Sign Candrabindu ..Telugu Sign Visarga + (0x00c3e, 0x00c44,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Vocali (0x00c46, 0x00c48,), # Telugu Vowel Sign E ..Telugu Vowel Sign Ai (0x00c4a, 0x00c4d,), # Telugu Vowel Sign O ..Telugu Sign Virama (0x00c55, 0x00c56,), # Telugu Length Mark ..Telugu Ai Length Mark (0x00c62, 0x00c63,), # Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali + (0x00c82, 0x00c83,), # Kannada Sign Anusvara ..Kannada Sign Visarga (0x00cbc, 0x00cbc,), # Kannada Sign Nukta - (0x00cbf, 0x00cbf,), # Kannada Vowel Sign I - (0x00cc6, 0x00cc6,), # Kannada Vowel Sign E - (0x00ccc, 0x00ccd,), # Kannada Vowel Sign Au ..Kannada Sign Virama + (0x00cbe, 0x00cc4,), # Kannada Vowel Sign Aa ..Kannada Vowel Sign Vocal + (0x00cc6, 0x00cc8,), # Kannada Vowel Sign E ..Kannada Vowel Sign Ai + (0x00cca, 0x00ccd,), # Kannada Vowel Sign O ..Kannada Sign Virama + (0x00cd5, 0x00cd6,), # Kannada Length Mark ..Kannada Ai Length Mark (0x00ce2, 0x00ce3,), # Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal - (0x00d41, 0x00d44,), # Malayalam Vowel Sign U ..Malayalam Vowel Sign Voc - (0x00d4d, 0x00d4d,), # Malayalam Sign Virama + (0x00d02, 0x00d03,), # Malayalam Sign Anusvara ..Malayalam Sign Visarga + (0x00d3e, 0x00d44,), # Malayalam Vowel Sign Aa ..Malayalam Vowel Sign Voc + (0x00d46, 0x00d48,), # Malayalam Vowel Sign E ..Malayalam Vowel Sign Ai + (0x00d4a, 0x00d4d,), # Malayalam Vowel Sign O ..Malayalam Sign Virama + (0x00d57, 0x00d57,), # Malayalam Au Length Mark (0x00d62, 0x00d63,), # Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc + (0x00d82, 0x00d83,), # Sinhala Sign Anusvaraya ..Sinhala Sign Visargaya (0x00dca, 0x00dca,), # Sinhala Sign Al-lakuna - (0x00dd2, 0x00dd4,), # Sinhala Vowel Sign Ketti..Sinhala Vowel Sign Ketti + (0x00dcf, 0x00dd4,), # Sinhala Vowel Sign Aela-..Sinhala Vowel Sign Ketti (0x00dd6, 0x00dd6,), # Sinhala Vowel Sign Diga Paa-pilla + (0x00dd8, 0x00ddf,), # Sinhala Vowel Sign Gaett..Sinhala Vowel Sign Gayan + (0x00df2, 0x00df3,), # Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga (0x00e31, 0x00e31,), # Thai Character Mai Han-akat (0x00e34, 0x00e3a,), # Thai Character Sara I ..Thai Character Phinthu (0x00e47, 0x00e4e,), # Thai Character Maitaikhu..Thai Character Yamakkan @@ -352,51 +404,46 @@ ZERO_WIDTH = { (0x00f35, 0x00f35,), # Tibetan Mark Ngas Bzung Nyi Zla (0x00f37, 0x00f37,), # Tibetan Mark Ngas Bzung Sgor Rtags (0x00f39, 0x00f39,), # Tibetan Mark Tsa -phru - (0x00f71, 0x00f7e,), # Tibetan Vowel Sign Aa ..Tibetan Sign Rjes Su Nga - (0x00f80, 0x00f84,), # Tibetan Vowel Sign Rever..Tibetan Mark Halanta + (0x00f3e, 0x00f3f,), # Tibetan Sign Yar Tshes ..Tibetan Sign Mar Tshes + (0x00f71, 0x00f84,), # Tibetan Vowel Sign Aa ..Tibetan Mark Halanta (0x00f86, 0x00f87,), # Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags (0x00f90, 0x00f97,), # Tibetan Subjoined Letter..Tibetan Subjoined Letter (0x00f99, 0x00fbc,), # Tibetan Subjoined Letter..Tibetan Subjoined Letter (0x00fc6, 0x00fc6,), # Tibetan Symbol Padma Gdan - (0x0102d, 0x01030,), # Myanmar Vowel Sign I ..Myanmar Vowel Sign Uu - (0x01032, 0x01037,), # Myanmar Vowel Sign Ai ..Myanmar Sign Dot Below - (0x01039, 0x0103a,), # Myanmar Sign Virama ..Myanmar Sign Asat - (0x0103d, 0x0103e,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M - (0x01058, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal + (0x0102b, 0x0103e,), # Myanmar Vowel Sign Tall ..Myanmar Consonant Sign M + (0x01056, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal (0x0105e, 0x01060,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M + (0x01062, 0x01064,), # Myanmar Vowel Sign Sgaw ..Myanmar Tone Mark Sgaw K + (0x01067, 0x0106d,), # Myanmar Vowel Sign Weste..Myanmar Sign Western Pwo (0x01071, 0x01074,), # Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah - (0x01082, 0x01082,), # Myanmar Consonant Sign Shan Medial Wa - (0x01085, 0x01086,), # Myanmar Vowel Sign Shan ..Myanmar Vowel Sign Shan - (0x0108d, 0x0108d,), # Myanmar Sign Shan Council Emphatic Tone + (0x01082, 0x0108d,), # Myanmar Consonant Sign S..Myanmar Sign Shan Counci + (0x0108f, 0x0108f,), # Myanmar Sign Rumai Palaung Tone-5 (0x0135f, 0x0135f,), # Ethiopic Combining Gemination Mark (0x01712, 0x01714,), # Tagalog Vowel Sign I ..Tagalog Sign Virama (0x01732, 0x01734,), # Hanunoo Vowel Sign I ..Hanunoo Sign Pamudpod (0x01752, 0x01753,), # Buhid Vowel Sign I ..Buhid Vowel Sign U (0x01772, 0x01773,), # Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U - (0x017b7, 0x017bd,), # Khmer Vowel Sign I ..Khmer Vowel Sign Ua - (0x017c6, 0x017c6,), # Khmer Sign Nikahit - (0x017c9, 0x017d3,), # Khmer Sign Muusikatoan ..Khmer Sign Bathamasat + (0x017b4, 0x017d3,), # Khmer Vowel Inherent Aq ..Khmer Sign Bathamasat (0x017dd, 0x017dd,), # Khmer Sign Atthacan (0x0180b, 0x0180d,), # Mongolian Free Variation..Mongolian Free Variation (0x018a9, 0x018a9,), # Mongolian Letter Ali Gali Dagalga - (0x01920, 0x01922,), # Limbu Vowel Sign A ..Limbu Vowel Sign U - (0x01927, 0x01928,), # Limbu Vowel Sign E ..Limbu Vowel Sign O - (0x01932, 0x01932,), # Limbu Small Letter Anusvara - (0x01939, 0x0193b,), # Limbu Sign Mukphreng ..Limbu Sign Sa-i - (0x01a17, 0x01a18,), # Buginese Vowel Sign I ..Buginese Vowel Sign U - (0x01b00, 0x01b03,), # Balinese Sign Ulu Ricem ..Balinese Sign Surang - (0x01b34, 0x01b34,), # Balinese Sign Rerekan - (0x01b36, 0x01b3a,), # Balinese Vowel Sign Ulu ..Balinese Vowel Sign Ra R - (0x01b3c, 0x01b3c,), # Balinese Vowel Sign La Lenga - (0x01b42, 0x01b42,), # Balinese Vowel Sign Pepet + (0x01920, 0x0192b,), # Limbu Vowel Sign A ..Limbu Subjoined Letter W + (0x01930, 0x0193b,), # Limbu Small Letter Ka ..Limbu Sign Sa-i + (0x019b0, 0x019c0,), # New Tai Lue Vowel Sign V..New Tai Lue Vowel Sign I + (0x019c8, 0x019c9,), # New Tai Lue Tone Mark-1 ..New Tai Lue Tone Mark-2 + (0x01a17, 0x01a1b,), # Buginese Vowel Sign I ..Buginese Vowel Sign Ae + (0x01b00, 0x01b04,), # Balinese Sign Ulu Ricem ..Balinese Sign Bisah + (0x01b34, 0x01b44,), # Balinese Sign Rerekan ..Balinese Adeg Adeg (0x01b6b, 0x01b73,), # Balinese Musical Symbol ..Balinese Musical Symbol - (0x01b80, 0x01b81,), # Sundanese Sign Panyecek ..Sundanese Sign Panglayar - (0x01ba2, 0x01ba5,), # Sundanese Consonant Sign..Sundanese Vowel Sign Pan - (0x01ba8, 0x01ba9,), # Sundanese Vowel Sign Pam..Sundanese Vowel Sign Pan - (0x01c2c, 0x01c33,), # Lepcha Vowel Sign E ..Lepcha Consonant Sign T - (0x01c36, 0x01c37,), # Lepcha Sign Ran ..Lepcha Sign Nukta + (0x01b80, 0x01b82,), # Sundanese Sign Panyecek ..Sundanese Sign Pangwisad + (0x01ba1, 0x01baa,), # Sundanese Consonant Sign..Sundanese Sign Pamaaeh + (0x01c24, 0x01c37,), # Lepcha Subjoined Letter ..Lepcha Sign Nukta (0x01dc0, 0x01de6,), # Combining Dotted Grave A..Combining Latin Small Le (0x01dfe, 0x01dff,), # Combining Left Arrowhead..Combining Right Arrowhea + (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark + (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override + (0x02060, 0x02064,), # Word Joiner ..Invisible Plus + (0x0206a, 0x0206f,), # Inhibit Symmetric Swappi..Nominal Digit Shapes (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M @@ -406,35 +453,40 @@ ZERO_WIDTH = { (0x0a802, 0x0a802,), # Syloti Nagri Sign Dvisvara (0x0a806, 0x0a806,), # Syloti Nagri Sign Hasanta (0x0a80b, 0x0a80b,), # Syloti Nagri Sign Anusvara - (0x0a825, 0x0a826,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign - (0x0a8c4, 0x0a8c4,), # Saurashtra Sign Virama + (0x0a823, 0x0a827,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign + (0x0a880, 0x0a881,), # Saurashtra Sign Anusvara..Saurashtra Sign Visarga + (0x0a8b4, 0x0a8c4,), # Saurashtra Consonant Sig..Saurashtra Sign Virama (0x0a926, 0x0a92d,), # Kayah Li Vowel Ue ..Kayah Li Tone Calya Plop - (0x0a947, 0x0a951,), # Rejang Vowel Sign I ..Rejang Consonant Sign R - (0x0aa29, 0x0aa2e,), # Cham Vowel Sign Aa ..Cham Vowel Sign Oe - (0x0aa31, 0x0aa32,), # Cham Vowel Sign Au ..Cham Vowel Sign Ue - (0x0aa35, 0x0aa36,), # Cham Consonant Sign La ..Cham Consonant Sign Wa + (0x0a947, 0x0a953,), # Rejang Vowel Sign I ..Rejang Virama + (0x0aa29, 0x0aa36,), # Cham Vowel Sign Aa ..Cham Consonant Sign Wa (0x0aa43, 0x0aa43,), # Cham Consonant Sign Final Ng - (0x0aa4c, 0x0aa4c,), # Cham Consonant Sign Final M + (0x0aa4c, 0x0aa4d,), # Cham Consonant Sign Fina..Cham Consonant Sign Fina (0x0fb1e, 0x0fb1e,), # Hebrew Point Judeo-spanish Varika (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe26,), # Combining Ligature Left ..Combining Conjoining Mac + (0x0feff, 0x0feff,), # Zero Width No-break Space + (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke (0x10a01, 0x10a03,), # Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo (0x10a05, 0x10a06,), # Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O (0x10a0c, 0x10a0f,), # Kharoshthi Vowel Length ..Kharoshthi Sign Visarga (0x10a38, 0x10a3a,), # Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo (0x10a3f, 0x10a3f,), # Kharoshthi Virama - (0x1d167, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining - (0x1d17b, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d165, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d16d, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical + (0xe0001, 0xe0001,), # Language Tag + (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 ), '5.2.0': ( # Source: DerivedGeneralCategory-5.2.0.txt # Date: 2009-08-22, 04:58:21 GMT [MD] # + (0x00000, 0x00000,), # (nil) + (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg @@ -442,13 +494,14 @@ ZERO_WIDTH = { (0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot (0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot (0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan + (0x00600, 0x00603,), # Arabic Number Sign ..Arabic Sign Safha (0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra (0x0064b, 0x0065e,), # Arabic Fathatan ..Arabic Fatha With Two Do (0x00670, 0x00670,), # Arabic Letter Superscript Alef - (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen - (0x006de, 0x006e4,), # Arabic Start Of Rub El H..Arabic Small High Madda + (0x006d6, 0x006e4,), # Arabic Small High Ligatu..Arabic Small High Madda (0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon (0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem + (0x0070f, 0x0070f,), # Syriac Abbreviation Mark (0x00711, 0x00711,), # Syriac Letter Superscript Alaph (0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh (0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun @@ -457,57 +510,69 @@ ZERO_WIDTH = { (0x0081b, 0x00823,), # Samaritan Mark Epentheti..Samaritan Vowel Sign A (0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U (0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa - (0x00900, 0x00902,), # Devanagari Sign Inverted..Devanagari Sign Anusvara + (0x00900, 0x00903,), # Devanagari Sign Inverted..Devanagari Sign Visarga (0x0093c, 0x0093c,), # Devanagari Sign Nukta - (0x00941, 0x00948,), # Devanagari Vowel Sign U ..Devanagari Vowel Sign Ai - (0x0094d, 0x0094d,), # Devanagari Sign Virama + (0x0093e, 0x0094e,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Pr (0x00951, 0x00955,), # Devanagari Stress Sign U..Devanagari Vowel Sign Ca (0x00962, 0x00963,), # Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo - (0x00981, 0x00981,), # Bengali Sign Candrabindu + (0x00981, 0x00983,), # Bengali Sign Candrabindu..Bengali Sign Visarga (0x009bc, 0x009bc,), # Bengali Sign Nukta - (0x009c1, 0x009c4,), # Bengali Vowel Sign U ..Bengali Vowel Sign Vocal - (0x009cd, 0x009cd,), # Bengali Sign Virama + (0x009be, 0x009c4,), # Bengali Vowel Sign Aa ..Bengali Vowel Sign Vocal + (0x009c7, 0x009c8,), # Bengali Vowel Sign E ..Bengali Vowel Sign Ai + (0x009cb, 0x009cd,), # Bengali Vowel Sign O ..Bengali Sign Virama + (0x009d7, 0x009d7,), # Bengali Au Length Mark (0x009e2, 0x009e3,), # Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal - (0x00a01, 0x00a02,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Bindi + (0x00a01, 0x00a03,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Visarga (0x00a3c, 0x00a3c,), # Gurmukhi Sign Nukta - (0x00a41, 0x00a42,), # Gurmukhi Vowel Sign U ..Gurmukhi Vowel Sign Uu + (0x00a3e, 0x00a42,), # Gurmukhi Vowel Sign Aa ..Gurmukhi Vowel Sign Uu (0x00a47, 0x00a48,), # Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai (0x00a4b, 0x00a4d,), # Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama (0x00a51, 0x00a51,), # Gurmukhi Sign Udaat (0x00a70, 0x00a71,), # Gurmukhi Tippi ..Gurmukhi Addak (0x00a75, 0x00a75,), # Gurmukhi Sign Yakash - (0x00a81, 0x00a82,), # Gujarati Sign Candrabind..Gujarati Sign Anusvara + (0x00a81, 0x00a83,), # Gujarati Sign Candrabind..Gujarati Sign Visarga (0x00abc, 0x00abc,), # Gujarati Sign Nukta - (0x00ac1, 0x00ac5,), # Gujarati Vowel Sign U ..Gujarati Vowel Sign Cand - (0x00ac7, 0x00ac8,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Ai - (0x00acd, 0x00acd,), # Gujarati Sign Virama + (0x00abe, 0x00ac5,), # Gujarati Vowel Sign Aa ..Gujarati Vowel Sign Cand + (0x00ac7, 0x00ac9,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Cand + (0x00acb, 0x00acd,), # Gujarati Vowel Sign O ..Gujarati Sign Virama (0x00ae2, 0x00ae3,), # Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca - (0x00b01, 0x00b01,), # Oriya Sign Candrabindu + (0x00b01, 0x00b03,), # Oriya Sign Candrabindu ..Oriya Sign Visarga (0x00b3c, 0x00b3c,), # Oriya Sign Nukta - (0x00b3f, 0x00b3f,), # Oriya Vowel Sign I - (0x00b41, 0x00b44,), # Oriya Vowel Sign U ..Oriya Vowel Sign Vocalic - (0x00b4d, 0x00b4d,), # Oriya Sign Virama - (0x00b56, 0x00b56,), # Oriya Ai Length Mark + (0x00b3e, 0x00b44,), # Oriya Vowel Sign Aa ..Oriya Vowel Sign Vocalic + (0x00b47, 0x00b48,), # Oriya Vowel Sign E ..Oriya Vowel Sign Ai + (0x00b4b, 0x00b4d,), # Oriya Vowel Sign O ..Oriya Sign Virama + (0x00b56, 0x00b57,), # Oriya Ai Length Mark ..Oriya Au Length Mark (0x00b62, 0x00b63,), # Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic (0x00b82, 0x00b82,), # Tamil Sign Anusvara - (0x00bc0, 0x00bc0,), # Tamil Vowel Sign Ii - (0x00bcd, 0x00bcd,), # Tamil Sign Virama - (0x00c3e, 0x00c40,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Ii + (0x00bbe, 0x00bc2,), # Tamil Vowel Sign Aa ..Tamil Vowel Sign Uu + (0x00bc6, 0x00bc8,), # Tamil Vowel Sign E ..Tamil Vowel Sign Ai + (0x00bca, 0x00bcd,), # Tamil Vowel Sign O ..Tamil Sign Virama + (0x00bd7, 0x00bd7,), # Tamil Au Length Mark + (0x00c01, 0x00c03,), # Telugu Sign Candrabindu ..Telugu Sign Visarga + (0x00c3e, 0x00c44,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Vocali (0x00c46, 0x00c48,), # Telugu Vowel Sign E ..Telugu Vowel Sign Ai (0x00c4a, 0x00c4d,), # Telugu Vowel Sign O ..Telugu Sign Virama (0x00c55, 0x00c56,), # Telugu Length Mark ..Telugu Ai Length Mark (0x00c62, 0x00c63,), # Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali + (0x00c82, 0x00c83,), # Kannada Sign Anusvara ..Kannada Sign Visarga (0x00cbc, 0x00cbc,), # Kannada Sign Nukta - (0x00cbf, 0x00cbf,), # Kannada Vowel Sign I - (0x00cc6, 0x00cc6,), # Kannada Vowel Sign E - (0x00ccc, 0x00ccd,), # Kannada Vowel Sign Au ..Kannada Sign Virama + (0x00cbe, 0x00cc4,), # Kannada Vowel Sign Aa ..Kannada Vowel Sign Vocal + (0x00cc6, 0x00cc8,), # Kannada Vowel Sign E ..Kannada Vowel Sign Ai + (0x00cca, 0x00ccd,), # Kannada Vowel Sign O ..Kannada Sign Virama + (0x00cd5, 0x00cd6,), # Kannada Length Mark ..Kannada Ai Length Mark (0x00ce2, 0x00ce3,), # Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal - (0x00d41, 0x00d44,), # Malayalam Vowel Sign U ..Malayalam Vowel Sign Voc - (0x00d4d, 0x00d4d,), # Malayalam Sign Virama + (0x00d02, 0x00d03,), # Malayalam Sign Anusvara ..Malayalam Sign Visarga + (0x00d3e, 0x00d44,), # Malayalam Vowel Sign Aa ..Malayalam Vowel Sign Voc + (0x00d46, 0x00d48,), # Malayalam Vowel Sign E ..Malayalam Vowel Sign Ai + (0x00d4a, 0x00d4d,), # Malayalam Vowel Sign O ..Malayalam Sign Virama + (0x00d57, 0x00d57,), # Malayalam Au Length Mark (0x00d62, 0x00d63,), # Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc + (0x00d82, 0x00d83,), # Sinhala Sign Anusvaraya ..Sinhala Sign Visargaya (0x00dca, 0x00dca,), # Sinhala Sign Al-lakuna - (0x00dd2, 0x00dd4,), # Sinhala Vowel Sign Ketti..Sinhala Vowel Sign Ketti + (0x00dcf, 0x00dd4,), # Sinhala Vowel Sign Aela-..Sinhala Vowel Sign Ketti (0x00dd6, 0x00dd6,), # Sinhala Vowel Sign Diga Paa-pilla + (0x00dd8, 0x00ddf,), # Sinhala Vowel Sign Gaett..Sinhala Vowel Sign Gayan + (0x00df2, 0x00df3,), # Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga (0x00e31, 0x00e31,), # Thai Character Mai Han-akat (0x00e34, 0x00e3a,), # Thai Character Sara I ..Thai Character Phinthu (0x00e47, 0x00e4e,), # Thai Character Maitaikhu..Thai Character Yamakkan @@ -519,63 +584,54 @@ ZERO_WIDTH = { (0x00f35, 0x00f35,), # Tibetan Mark Ngas Bzung Nyi Zla (0x00f37, 0x00f37,), # Tibetan Mark Ngas Bzung Sgor Rtags (0x00f39, 0x00f39,), # Tibetan Mark Tsa -phru - (0x00f71, 0x00f7e,), # Tibetan Vowel Sign Aa ..Tibetan Sign Rjes Su Nga - (0x00f80, 0x00f84,), # Tibetan Vowel Sign Rever..Tibetan Mark Halanta + (0x00f3e, 0x00f3f,), # Tibetan Sign Yar Tshes ..Tibetan Sign Mar Tshes + (0x00f71, 0x00f84,), # Tibetan Vowel Sign Aa ..Tibetan Mark Halanta (0x00f86, 0x00f87,), # Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags (0x00f90, 0x00f97,), # Tibetan Subjoined Letter..Tibetan Subjoined Letter (0x00f99, 0x00fbc,), # Tibetan Subjoined Letter..Tibetan Subjoined Letter (0x00fc6, 0x00fc6,), # Tibetan Symbol Padma Gdan - (0x0102d, 0x01030,), # Myanmar Vowel Sign I ..Myanmar Vowel Sign Uu - (0x01032, 0x01037,), # Myanmar Vowel Sign Ai ..Myanmar Sign Dot Below - (0x01039, 0x0103a,), # Myanmar Sign Virama ..Myanmar Sign Asat - (0x0103d, 0x0103e,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M - (0x01058, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal + (0x0102b, 0x0103e,), # Myanmar Vowel Sign Tall ..Myanmar Consonant Sign M + (0x01056, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal (0x0105e, 0x01060,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M + (0x01062, 0x01064,), # Myanmar Vowel Sign Sgaw ..Myanmar Tone Mark Sgaw K + (0x01067, 0x0106d,), # Myanmar Vowel Sign Weste..Myanmar Sign Western Pwo (0x01071, 0x01074,), # Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah - (0x01082, 0x01082,), # Myanmar Consonant Sign Shan Medial Wa - (0x01085, 0x01086,), # Myanmar Vowel Sign Shan ..Myanmar Vowel Sign Shan - (0x0108d, 0x0108d,), # Myanmar Sign Shan Council Emphatic Tone - (0x0109d, 0x0109d,), # Myanmar Vowel Sign Aiton Ai + (0x01082, 0x0108d,), # Myanmar Consonant Sign S..Myanmar Sign Shan Counci + (0x0108f, 0x0108f,), # Myanmar Sign Rumai Palaung Tone-5 + (0x0109a, 0x0109d,), # Myanmar Sign Khamti Tone..Myanmar Vowel Sign Aiton (0x0135f, 0x0135f,), # Ethiopic Combining Gemination Mark (0x01712, 0x01714,), # Tagalog Vowel Sign I ..Tagalog Sign Virama (0x01732, 0x01734,), # Hanunoo Vowel Sign I ..Hanunoo Sign Pamudpod (0x01752, 0x01753,), # Buhid Vowel Sign I ..Buhid Vowel Sign U (0x01772, 0x01773,), # Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U - (0x017b7, 0x017bd,), # Khmer Vowel Sign I ..Khmer Vowel Sign Ua - (0x017c6, 0x017c6,), # Khmer Sign Nikahit - (0x017c9, 0x017d3,), # Khmer Sign Muusikatoan ..Khmer Sign Bathamasat + (0x017b4, 0x017d3,), # Khmer Vowel Inherent Aq ..Khmer Sign Bathamasat (0x017dd, 0x017dd,), # Khmer Sign Atthacan (0x0180b, 0x0180d,), # Mongolian Free Variation..Mongolian Free Variation (0x018a9, 0x018a9,), # Mongolian Letter Ali Gali Dagalga - (0x01920, 0x01922,), # Limbu Vowel Sign A ..Limbu Vowel Sign U - (0x01927, 0x01928,), # Limbu Vowel Sign E ..Limbu Vowel Sign O - (0x01932, 0x01932,), # Limbu Small Letter Anusvara - (0x01939, 0x0193b,), # Limbu Sign Mukphreng ..Limbu Sign Sa-i - (0x01a17, 0x01a18,), # Buginese Vowel Sign I ..Buginese Vowel Sign U - (0x01a56, 0x01a56,), # Tai Tham Consonant Sign Medial La - (0x01a58, 0x01a5e,), # Tai Tham Sign Mai Kang L..Tai Tham Consonant Sign - (0x01a60, 0x01a60,), # Tai Tham Sign Sakot - (0x01a62, 0x01a62,), # Tai Tham Vowel Sign Mai Sat - (0x01a65, 0x01a6c,), # Tai Tham Vowel Sign I ..Tai Tham Vowel Sign Oa B - (0x01a73, 0x01a7c,), # Tai Tham Vowel Sign Oa A..Tai Tham Sign Khuen-lue + (0x01920, 0x0192b,), # Limbu Vowel Sign A ..Limbu Subjoined Letter W + (0x01930, 0x0193b,), # Limbu Small Letter Ka ..Limbu Sign Sa-i + (0x019b0, 0x019c0,), # New Tai Lue Vowel Sign V..New Tai Lue Vowel Sign I + (0x019c8, 0x019c9,), # New Tai Lue Tone Mark-1 ..New Tai Lue Tone Mark-2 + (0x01a17, 0x01a1b,), # Buginese Vowel Sign I ..Buginese Vowel Sign Ae + (0x01a55, 0x01a5e,), # Tai Tham Consonant Sign ..Tai Tham Consonant Sign + (0x01a60, 0x01a7c,), # Tai Tham Sign Sakot ..Tai Tham Sign Khuen-lue (0x01a7f, 0x01a7f,), # Tai Tham Combining Cryptogrammic Dot - (0x01b00, 0x01b03,), # Balinese Sign Ulu Ricem ..Balinese Sign Surang - (0x01b34, 0x01b34,), # Balinese Sign Rerekan - (0x01b36, 0x01b3a,), # Balinese Vowel Sign Ulu ..Balinese Vowel Sign Ra R - (0x01b3c, 0x01b3c,), # Balinese Vowel Sign La Lenga - (0x01b42, 0x01b42,), # Balinese Vowel Sign Pepet + (0x01b00, 0x01b04,), # Balinese Sign Ulu Ricem ..Balinese Sign Bisah + (0x01b34, 0x01b44,), # Balinese Sign Rerekan ..Balinese Adeg Adeg (0x01b6b, 0x01b73,), # Balinese Musical Symbol ..Balinese Musical Symbol - (0x01b80, 0x01b81,), # Sundanese Sign Panyecek ..Sundanese Sign Panglayar - (0x01ba2, 0x01ba5,), # Sundanese Consonant Sign..Sundanese Vowel Sign Pan - (0x01ba8, 0x01ba9,), # Sundanese Vowel Sign Pam..Sundanese Vowel Sign Pan - (0x01c2c, 0x01c33,), # Lepcha Vowel Sign E ..Lepcha Consonant Sign T - (0x01c36, 0x01c37,), # Lepcha Sign Ran ..Lepcha Sign Nukta + (0x01b80, 0x01b82,), # Sundanese Sign Panyecek ..Sundanese Sign Pangwisad + (0x01ba1, 0x01baa,), # Sundanese Consonant Sign..Sundanese Sign Pamaaeh + (0x01c24, 0x01c37,), # Lepcha Subjoined Letter ..Lepcha Sign Nukta (0x01cd0, 0x01cd2,), # Vedic Tone Karshana ..Vedic Tone Prenkha - (0x01cd4, 0x01ce0,), # Vedic Sign Yajurvedic Mi..Vedic Tone Rigvedic Kash - (0x01ce2, 0x01ce8,), # Vedic Sign Visarga Svari..Vedic Sign Visarga Anuda + (0x01cd4, 0x01ce8,), # Vedic Sign Yajurvedic Mi..Vedic Sign Visarga Anuda (0x01ced, 0x01ced,), # Vedic Sign Tiryak + (0x01cf2, 0x01cf2,), # Vedic Sign Ardhavisarga (0x01dc0, 0x01de6,), # Combining Dotted Grave A..Combining Latin Small Le (0x01dfd, 0x01dff,), # Combining Almost Equal T..Combining Right Arrowhea + (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark + (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override + (0x02060, 0x02064,), # Word Joiner ..Invisible Plus + (0x0206a, 0x0206f,), # Inhibit Symmetric Swappi..Nominal Digit Shapes (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above (0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette @@ -587,51 +643,54 @@ ZERO_WIDTH = { (0x0a802, 0x0a802,), # Syloti Nagri Sign Dvisvara (0x0a806, 0x0a806,), # Syloti Nagri Sign Hasanta (0x0a80b, 0x0a80b,), # Syloti Nagri Sign Anusvara - (0x0a825, 0x0a826,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign - (0x0a8c4, 0x0a8c4,), # Saurashtra Sign Virama + (0x0a823, 0x0a827,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign + (0x0a880, 0x0a881,), # Saurashtra Sign Anusvara..Saurashtra Sign Visarga + (0x0a8b4, 0x0a8c4,), # Saurashtra Consonant Sig..Saurashtra Sign Virama (0x0a8e0, 0x0a8f1,), # Combining Devanagari Dig..Combining Devanagari Sig (0x0a926, 0x0a92d,), # Kayah Li Vowel Ue ..Kayah Li Tone Calya Plop - (0x0a947, 0x0a951,), # Rejang Vowel Sign I ..Rejang Consonant Sign R - (0x0a980, 0x0a982,), # Javanese Sign Panyangga ..Javanese Sign Layar - (0x0a9b3, 0x0a9b3,), # Javanese Sign Cecak Telu - (0x0a9b6, 0x0a9b9,), # Javanese Vowel Sign Wulu..Javanese Vowel Sign Suku - (0x0a9bc, 0x0a9bc,), # Javanese Vowel Sign Pepet - (0x0aa29, 0x0aa2e,), # Cham Vowel Sign Aa ..Cham Vowel Sign Oe - (0x0aa31, 0x0aa32,), # Cham Vowel Sign Au ..Cham Vowel Sign Ue - (0x0aa35, 0x0aa36,), # Cham Consonant Sign La ..Cham Consonant Sign Wa + (0x0a947, 0x0a953,), # Rejang Vowel Sign I ..Rejang Virama + (0x0a980, 0x0a983,), # Javanese Sign Panyangga ..Javanese Sign Wignyan + (0x0a9b3, 0x0a9c0,), # Javanese Sign Cecak Telu..Javanese Pangkon + (0x0aa29, 0x0aa36,), # Cham Vowel Sign Aa ..Cham Consonant Sign Wa (0x0aa43, 0x0aa43,), # Cham Consonant Sign Final Ng - (0x0aa4c, 0x0aa4c,), # Cham Consonant Sign Final M + (0x0aa4c, 0x0aa4d,), # Cham Consonant Sign Fina..Cham Consonant Sign Fina + (0x0aa7b, 0x0aa7b,), # Myanmar Sign Pao Karen Tone (0x0aab0, 0x0aab0,), # Tai Viet Mai Kang (0x0aab2, 0x0aab4,), # Tai Viet Vowel I ..Tai Viet Vowel U (0x0aab7, 0x0aab8,), # Tai Viet Mai Khit ..Tai Viet Vowel Ia (0x0aabe, 0x0aabf,), # Tai Viet Vowel Am ..Tai Viet Tone Mai Ek (0x0aac1, 0x0aac1,), # Tai Viet Tone Mai Tho - (0x0abe5, 0x0abe5,), # Meetei Mayek Vowel Sign Anap - (0x0abe8, 0x0abe8,), # Meetei Mayek Vowel Sign Unap - (0x0abed, 0x0abed,), # Meetei Mayek Apun Iyek + (0x0abe3, 0x0abea,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0x0abec, 0x0abed,), # Meetei Mayek Lum Iyek ..Meetei Mayek Apun Iyek (0x0fb1e, 0x0fb1e,), # Hebrew Point Judeo-spanish Varika (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe26,), # Combining Ligature Left ..Combining Conjoining Mac + (0x0feff, 0x0feff,), # Zero Width No-break Space + (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke (0x10a01, 0x10a03,), # Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo (0x10a05, 0x10a06,), # Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O (0x10a0c, 0x10a0f,), # Kharoshthi Vowel Length ..Kharoshthi Sign Visarga (0x10a38, 0x10a3a,), # Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo (0x10a3f, 0x10a3f,), # Kharoshthi Virama - (0x11080, 0x11081,), # Kaithi Sign Candrabindu ..Kaithi Sign Anusvara - (0x110b3, 0x110b6,), # Kaithi Vowel Sign U ..Kaithi Vowel Sign Ai - (0x110b9, 0x110ba,), # Kaithi Sign Virama ..Kaithi Sign Nukta - (0x1d167, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining - (0x1d17b, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining + (0x11080, 0x11082,), # Kaithi Sign Candrabindu ..Kaithi Sign Visarga + (0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta + (0x110bd, 0x110bd,), # Kaithi Number Sign + (0x1d165, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d16d, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical + (0xe0001, 0xe0001,), # Language Tag + (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 ), '6.0.0': ( # Source: DerivedGeneralCategory-6.0.0.txt # Date: 2010-08-19, 00:48:09 GMT [MD] # + (0x00000, 0x00000,), # (nil) + (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg @@ -639,13 +698,15 @@ ZERO_WIDTH = { (0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot (0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot (0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan + (0x00600, 0x00603,), # Arabic Number Sign ..Arabic Sign Safha (0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra (0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below (0x00670, 0x00670,), # Arabic Letter Superscript Alef - (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen + (0x006d6, 0x006dd,), # Arabic Small High Ligatu..Arabic End Of Ayah (0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda (0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon (0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem + (0x0070f, 0x0070f,), # Syriac Abbreviation Mark (0x00711, 0x00711,), # Syriac Letter Superscript Alaph (0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh (0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun @@ -655,58 +716,69 @@ ZERO_WIDTH = { (0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U (0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa (0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark - (0x00900, 0x00902,), # Devanagari Sign Inverted..Devanagari Sign Anusvara - (0x0093a, 0x0093a,), # Devanagari Vowel Sign Oe - (0x0093c, 0x0093c,), # Devanagari Sign Nukta - (0x00941, 0x00948,), # Devanagari Vowel Sign U ..Devanagari Vowel Sign Ai - (0x0094d, 0x0094d,), # Devanagari Sign Virama + (0x00900, 0x00903,), # Devanagari Sign Inverted..Devanagari Sign Visarga + (0x0093a, 0x0093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta + (0x0093e, 0x0094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw (0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu (0x00962, 0x00963,), # Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo - (0x00981, 0x00981,), # Bengali Sign Candrabindu + (0x00981, 0x00983,), # Bengali Sign Candrabindu..Bengali Sign Visarga (0x009bc, 0x009bc,), # Bengali Sign Nukta - (0x009c1, 0x009c4,), # Bengali Vowel Sign U ..Bengali Vowel Sign Vocal - (0x009cd, 0x009cd,), # Bengali Sign Virama + (0x009be, 0x009c4,), # Bengali Vowel Sign Aa ..Bengali Vowel Sign Vocal + (0x009c7, 0x009c8,), # Bengali Vowel Sign E ..Bengali Vowel Sign Ai + (0x009cb, 0x009cd,), # Bengali Vowel Sign O ..Bengali Sign Virama + (0x009d7, 0x009d7,), # Bengali Au Length Mark (0x009e2, 0x009e3,), # Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal - (0x00a01, 0x00a02,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Bindi + (0x00a01, 0x00a03,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Visarga (0x00a3c, 0x00a3c,), # Gurmukhi Sign Nukta - (0x00a41, 0x00a42,), # Gurmukhi Vowel Sign U ..Gurmukhi Vowel Sign Uu + (0x00a3e, 0x00a42,), # Gurmukhi Vowel Sign Aa ..Gurmukhi Vowel Sign Uu (0x00a47, 0x00a48,), # Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai (0x00a4b, 0x00a4d,), # Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama (0x00a51, 0x00a51,), # Gurmukhi Sign Udaat (0x00a70, 0x00a71,), # Gurmukhi Tippi ..Gurmukhi Addak (0x00a75, 0x00a75,), # Gurmukhi Sign Yakash - (0x00a81, 0x00a82,), # Gujarati Sign Candrabind..Gujarati Sign Anusvara + (0x00a81, 0x00a83,), # Gujarati Sign Candrabind..Gujarati Sign Visarga (0x00abc, 0x00abc,), # Gujarati Sign Nukta - (0x00ac1, 0x00ac5,), # Gujarati Vowel Sign U ..Gujarati Vowel Sign Cand - (0x00ac7, 0x00ac8,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Ai - (0x00acd, 0x00acd,), # Gujarati Sign Virama + (0x00abe, 0x00ac5,), # Gujarati Vowel Sign Aa ..Gujarati Vowel Sign Cand + (0x00ac7, 0x00ac9,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Cand + (0x00acb, 0x00acd,), # Gujarati Vowel Sign O ..Gujarati Sign Virama (0x00ae2, 0x00ae3,), # Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca - (0x00b01, 0x00b01,), # Oriya Sign Candrabindu + (0x00b01, 0x00b03,), # Oriya Sign Candrabindu ..Oriya Sign Visarga (0x00b3c, 0x00b3c,), # Oriya Sign Nukta - (0x00b3f, 0x00b3f,), # Oriya Vowel Sign I - (0x00b41, 0x00b44,), # Oriya Vowel Sign U ..Oriya Vowel Sign Vocalic - (0x00b4d, 0x00b4d,), # Oriya Sign Virama - (0x00b56, 0x00b56,), # Oriya Ai Length Mark + (0x00b3e, 0x00b44,), # Oriya Vowel Sign Aa ..Oriya Vowel Sign Vocalic + (0x00b47, 0x00b48,), # Oriya Vowel Sign E ..Oriya Vowel Sign Ai + (0x00b4b, 0x00b4d,), # Oriya Vowel Sign O ..Oriya Sign Virama + (0x00b56, 0x00b57,), # Oriya Ai Length Mark ..Oriya Au Length Mark (0x00b62, 0x00b63,), # Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic (0x00b82, 0x00b82,), # Tamil Sign Anusvara - (0x00bc0, 0x00bc0,), # Tamil Vowel Sign Ii - (0x00bcd, 0x00bcd,), # Tamil Sign Virama - (0x00c3e, 0x00c40,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Ii + (0x00bbe, 0x00bc2,), # Tamil Vowel Sign Aa ..Tamil Vowel Sign Uu + (0x00bc6, 0x00bc8,), # Tamil Vowel Sign E ..Tamil Vowel Sign Ai + (0x00bca, 0x00bcd,), # Tamil Vowel Sign O ..Tamil Sign Virama + (0x00bd7, 0x00bd7,), # Tamil Au Length Mark + (0x00c01, 0x00c03,), # Telugu Sign Candrabindu ..Telugu Sign Visarga + (0x00c3e, 0x00c44,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Vocali (0x00c46, 0x00c48,), # Telugu Vowel Sign E ..Telugu Vowel Sign Ai (0x00c4a, 0x00c4d,), # Telugu Vowel Sign O ..Telugu Sign Virama (0x00c55, 0x00c56,), # Telugu Length Mark ..Telugu Ai Length Mark (0x00c62, 0x00c63,), # Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali + (0x00c82, 0x00c83,), # Kannada Sign Anusvara ..Kannada Sign Visarga (0x00cbc, 0x00cbc,), # Kannada Sign Nukta - (0x00cbf, 0x00cbf,), # Kannada Vowel Sign I - (0x00cc6, 0x00cc6,), # Kannada Vowel Sign E - (0x00ccc, 0x00ccd,), # Kannada Vowel Sign Au ..Kannada Sign Virama + (0x00cbe, 0x00cc4,), # Kannada Vowel Sign Aa ..Kannada Vowel Sign Vocal + (0x00cc6, 0x00cc8,), # Kannada Vowel Sign E ..Kannada Vowel Sign Ai + (0x00cca, 0x00ccd,), # Kannada Vowel Sign O ..Kannada Sign Virama + (0x00cd5, 0x00cd6,), # Kannada Length Mark ..Kannada Ai Length Mark (0x00ce2, 0x00ce3,), # Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal - (0x00d41, 0x00d44,), # Malayalam Vowel Sign U ..Malayalam Vowel Sign Voc - (0x00d4d, 0x00d4d,), # Malayalam Sign Virama + (0x00d02, 0x00d03,), # Malayalam Sign Anusvara ..Malayalam Sign Visarga + (0x00d3e, 0x00d44,), # Malayalam Vowel Sign Aa ..Malayalam Vowel Sign Voc + (0x00d46, 0x00d48,), # Malayalam Vowel Sign E ..Malayalam Vowel Sign Ai + (0x00d4a, 0x00d4d,), # Malayalam Vowel Sign O ..Malayalam Sign Virama + (0x00d57, 0x00d57,), # Malayalam Au Length Mark (0x00d62, 0x00d63,), # Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc + (0x00d82, 0x00d83,), # Sinhala Sign Anusvaraya ..Sinhala Sign Visargaya (0x00dca, 0x00dca,), # Sinhala Sign Al-lakuna - (0x00dd2, 0x00dd4,), # Sinhala Vowel Sign Ketti..Sinhala Vowel Sign Ketti + (0x00dcf, 0x00dd4,), # Sinhala Vowel Sign Aela-..Sinhala Vowel Sign Ketti (0x00dd6, 0x00dd6,), # Sinhala Vowel Sign Diga Paa-pilla + (0x00dd8, 0x00ddf,), # Sinhala Vowel Sign Gaett..Sinhala Vowel Sign Gayan + (0x00df2, 0x00df3,), # Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga (0x00e31, 0x00e31,), # Thai Character Mai Han-akat (0x00e34, 0x00e3a,), # Thai Character Sara I ..Thai Character Phinthu (0x00e47, 0x00e4e,), # Thai Character Maitaikhu..Thai Character Yamakkan @@ -718,67 +790,55 @@ ZERO_WIDTH = { (0x00f35, 0x00f35,), # Tibetan Mark Ngas Bzung Nyi Zla (0x00f37, 0x00f37,), # Tibetan Mark Ngas Bzung Sgor Rtags (0x00f39, 0x00f39,), # Tibetan Mark Tsa -phru - (0x00f71, 0x00f7e,), # Tibetan Vowel Sign Aa ..Tibetan Sign Rjes Su Nga - (0x00f80, 0x00f84,), # Tibetan Vowel Sign Rever..Tibetan Mark Halanta + (0x00f3e, 0x00f3f,), # Tibetan Sign Yar Tshes ..Tibetan Sign Mar Tshes + (0x00f71, 0x00f84,), # Tibetan Vowel Sign Aa ..Tibetan Mark Halanta (0x00f86, 0x00f87,), # Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags (0x00f8d, 0x00f97,), # Tibetan Subjoined Sign L..Tibetan Subjoined Letter (0x00f99, 0x00fbc,), # Tibetan Subjoined Letter..Tibetan Subjoined Letter (0x00fc6, 0x00fc6,), # Tibetan Symbol Padma Gdan - (0x0102d, 0x01030,), # Myanmar Vowel Sign I ..Myanmar Vowel Sign Uu - (0x01032, 0x01037,), # Myanmar Vowel Sign Ai ..Myanmar Sign Dot Below - (0x01039, 0x0103a,), # Myanmar Sign Virama ..Myanmar Sign Asat - (0x0103d, 0x0103e,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M - (0x01058, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal + (0x0102b, 0x0103e,), # Myanmar Vowel Sign Tall ..Myanmar Consonant Sign M + (0x01056, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal (0x0105e, 0x01060,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M + (0x01062, 0x01064,), # Myanmar Vowel Sign Sgaw ..Myanmar Tone Mark Sgaw K + (0x01067, 0x0106d,), # Myanmar Vowel Sign Weste..Myanmar Sign Western Pwo (0x01071, 0x01074,), # Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah - (0x01082, 0x01082,), # Myanmar Consonant Sign Shan Medial Wa - (0x01085, 0x01086,), # Myanmar Vowel Sign Shan ..Myanmar Vowel Sign Shan - (0x0108d, 0x0108d,), # Myanmar Sign Shan Council Emphatic Tone - (0x0109d, 0x0109d,), # Myanmar Vowel Sign Aiton Ai + (0x01082, 0x0108d,), # Myanmar Consonant Sign S..Myanmar Sign Shan Counci + (0x0108f, 0x0108f,), # Myanmar Sign Rumai Palaung Tone-5 + (0x0109a, 0x0109d,), # Myanmar Sign Khamti Tone..Myanmar Vowel Sign Aiton (0x0135d, 0x0135f,), # Ethiopic Combining Gemin..Ethiopic Combining Gemin (0x01712, 0x01714,), # Tagalog Vowel Sign I ..Tagalog Sign Virama (0x01732, 0x01734,), # Hanunoo Vowel Sign I ..Hanunoo Sign Pamudpod (0x01752, 0x01753,), # Buhid Vowel Sign I ..Buhid Vowel Sign U (0x01772, 0x01773,), # Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U - (0x017b7, 0x017bd,), # Khmer Vowel Sign I ..Khmer Vowel Sign Ua - (0x017c6, 0x017c6,), # Khmer Sign Nikahit - (0x017c9, 0x017d3,), # Khmer Sign Muusikatoan ..Khmer Sign Bathamasat + (0x017b4, 0x017d3,), # Khmer Vowel Inherent Aq ..Khmer Sign Bathamasat (0x017dd, 0x017dd,), # Khmer Sign Atthacan (0x0180b, 0x0180d,), # Mongolian Free Variation..Mongolian Free Variation (0x018a9, 0x018a9,), # Mongolian Letter Ali Gali Dagalga - (0x01920, 0x01922,), # Limbu Vowel Sign A ..Limbu Vowel Sign U - (0x01927, 0x01928,), # Limbu Vowel Sign E ..Limbu Vowel Sign O - (0x01932, 0x01932,), # Limbu Small Letter Anusvara - (0x01939, 0x0193b,), # Limbu Sign Mukphreng ..Limbu Sign Sa-i - (0x01a17, 0x01a18,), # Buginese Vowel Sign I ..Buginese Vowel Sign U - (0x01a56, 0x01a56,), # Tai Tham Consonant Sign Medial La - (0x01a58, 0x01a5e,), # Tai Tham Sign Mai Kang L..Tai Tham Consonant Sign - (0x01a60, 0x01a60,), # Tai Tham Sign Sakot - (0x01a62, 0x01a62,), # Tai Tham Vowel Sign Mai Sat - (0x01a65, 0x01a6c,), # Tai Tham Vowel Sign I ..Tai Tham Vowel Sign Oa B - (0x01a73, 0x01a7c,), # Tai Tham Vowel Sign Oa A..Tai Tham Sign Khuen-lue + (0x01920, 0x0192b,), # Limbu Vowel Sign A ..Limbu Subjoined Letter W + (0x01930, 0x0193b,), # Limbu Small Letter Ka ..Limbu Sign Sa-i + (0x019b0, 0x019c0,), # New Tai Lue Vowel Sign V..New Tai Lue Vowel Sign I + (0x019c8, 0x019c9,), # New Tai Lue Tone Mark-1 ..New Tai Lue Tone Mark-2 + (0x01a17, 0x01a1b,), # Buginese Vowel Sign I ..Buginese Vowel Sign Ae + (0x01a55, 0x01a5e,), # Tai Tham Consonant Sign ..Tai Tham Consonant Sign + (0x01a60, 0x01a7c,), # Tai Tham Sign Sakot ..Tai Tham Sign Khuen-lue (0x01a7f, 0x01a7f,), # Tai Tham Combining Cryptogrammic Dot - (0x01b00, 0x01b03,), # Balinese Sign Ulu Ricem ..Balinese Sign Surang - (0x01b34, 0x01b34,), # Balinese Sign Rerekan - (0x01b36, 0x01b3a,), # Balinese Vowel Sign Ulu ..Balinese Vowel Sign Ra R - (0x01b3c, 0x01b3c,), # Balinese Vowel Sign La Lenga - (0x01b42, 0x01b42,), # Balinese Vowel Sign Pepet + (0x01b00, 0x01b04,), # Balinese Sign Ulu Ricem ..Balinese Sign Bisah + (0x01b34, 0x01b44,), # Balinese Sign Rerekan ..Balinese Adeg Adeg (0x01b6b, 0x01b73,), # Balinese Musical Symbol ..Balinese Musical Symbol - (0x01b80, 0x01b81,), # Sundanese Sign Panyecek ..Sundanese Sign Panglayar - (0x01ba2, 0x01ba5,), # Sundanese Consonant Sign..Sundanese Vowel Sign Pan - (0x01ba8, 0x01ba9,), # Sundanese Vowel Sign Pam..Sundanese Vowel Sign Pan - (0x01be6, 0x01be6,), # Batak Sign Tompi - (0x01be8, 0x01be9,), # Batak Vowel Sign Pakpak ..Batak Vowel Sign Ee - (0x01bed, 0x01bed,), # Batak Vowel Sign Karo O - (0x01bef, 0x01bf1,), # Batak Vowel Sign U For S..Batak Consonant Sign H - (0x01c2c, 0x01c33,), # Lepcha Vowel Sign E ..Lepcha Consonant Sign T - (0x01c36, 0x01c37,), # Lepcha Sign Ran ..Lepcha Sign Nukta + (0x01b80, 0x01b82,), # Sundanese Sign Panyecek ..Sundanese Sign Pangwisad + (0x01ba1, 0x01baa,), # Sundanese Consonant Sign..Sundanese Sign Pamaaeh + (0x01be6, 0x01bf3,), # Batak Sign Tompi ..Batak Panongonan + (0x01c24, 0x01c37,), # Lepcha Subjoined Letter ..Lepcha Sign Nukta (0x01cd0, 0x01cd2,), # Vedic Tone Karshana ..Vedic Tone Prenkha - (0x01cd4, 0x01ce0,), # Vedic Sign Yajurvedic Mi..Vedic Tone Rigvedic Kash - (0x01ce2, 0x01ce8,), # Vedic Sign Visarga Svari..Vedic Sign Visarga Anuda + (0x01cd4, 0x01ce8,), # Vedic Sign Yajurvedic Mi..Vedic Sign Visarga Anuda (0x01ced, 0x01ced,), # Vedic Sign Tiryak + (0x01cf2, 0x01cf2,), # Vedic Sign Ardhavisarga (0x01dc0, 0x01de6,), # Combining Dotted Grave A..Combining Latin Small Le (0x01dfc, 0x01dff,), # Combining Double Inverte..Combining Right Arrowhea + (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark + (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override + (0x02060, 0x02064,), # Word Joiner ..Invisible Plus + (0x0206a, 0x0206f,), # Inhibit Symmetric Swappi..Nominal Digit Shapes (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above (0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu (0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner @@ -791,53 +851,56 @@ ZERO_WIDTH = { (0x0a802, 0x0a802,), # Syloti Nagri Sign Dvisvara (0x0a806, 0x0a806,), # Syloti Nagri Sign Hasanta (0x0a80b, 0x0a80b,), # Syloti Nagri Sign Anusvara - (0x0a825, 0x0a826,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign - (0x0a8c4, 0x0a8c4,), # Saurashtra Sign Virama + (0x0a823, 0x0a827,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign + (0x0a880, 0x0a881,), # Saurashtra Sign Anusvara..Saurashtra Sign Visarga + (0x0a8b4, 0x0a8c4,), # Saurashtra Consonant Sig..Saurashtra Sign Virama (0x0a8e0, 0x0a8f1,), # Combining Devanagari Dig..Combining Devanagari Sig (0x0a926, 0x0a92d,), # Kayah Li Vowel Ue ..Kayah Li Tone Calya Plop - (0x0a947, 0x0a951,), # Rejang Vowel Sign I ..Rejang Consonant Sign R - (0x0a980, 0x0a982,), # Javanese Sign Panyangga ..Javanese Sign Layar - (0x0a9b3, 0x0a9b3,), # Javanese Sign Cecak Telu - (0x0a9b6, 0x0a9b9,), # Javanese Vowel Sign Wulu..Javanese Vowel Sign Suku - (0x0a9bc, 0x0a9bc,), # Javanese Vowel Sign Pepet - (0x0aa29, 0x0aa2e,), # Cham Vowel Sign Aa ..Cham Vowel Sign Oe - (0x0aa31, 0x0aa32,), # Cham Vowel Sign Au ..Cham Vowel Sign Ue - (0x0aa35, 0x0aa36,), # Cham Consonant Sign La ..Cham Consonant Sign Wa + (0x0a947, 0x0a953,), # Rejang Vowel Sign I ..Rejang Virama + (0x0a980, 0x0a983,), # Javanese Sign Panyangga ..Javanese Sign Wignyan + (0x0a9b3, 0x0a9c0,), # Javanese Sign Cecak Telu..Javanese Pangkon + (0x0aa29, 0x0aa36,), # Cham Vowel Sign Aa ..Cham Consonant Sign Wa (0x0aa43, 0x0aa43,), # Cham Consonant Sign Final Ng - (0x0aa4c, 0x0aa4c,), # Cham Consonant Sign Final M + (0x0aa4c, 0x0aa4d,), # Cham Consonant Sign Fina..Cham Consonant Sign Fina + (0x0aa7b, 0x0aa7b,), # Myanmar Sign Pao Karen Tone (0x0aab0, 0x0aab0,), # Tai Viet Mai Kang (0x0aab2, 0x0aab4,), # Tai Viet Vowel I ..Tai Viet Vowel U (0x0aab7, 0x0aab8,), # Tai Viet Mai Khit ..Tai Viet Vowel Ia (0x0aabe, 0x0aabf,), # Tai Viet Vowel Am ..Tai Viet Tone Mai Ek (0x0aac1, 0x0aac1,), # Tai Viet Tone Mai Tho - (0x0abe5, 0x0abe5,), # Meetei Mayek Vowel Sign Anap - (0x0abe8, 0x0abe8,), # Meetei Mayek Vowel Sign Unap - (0x0abed, 0x0abed,), # Meetei Mayek Apun Iyek + (0x0abe3, 0x0abea,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0x0abec, 0x0abed,), # Meetei Mayek Lum Iyek ..Meetei Mayek Apun Iyek (0x0fb1e, 0x0fb1e,), # Hebrew Point Judeo-spanish Varika (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe26,), # Combining Ligature Left ..Combining Conjoining Mac + (0x0feff, 0x0feff,), # Zero Width No-break Space + (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke (0x10a01, 0x10a03,), # Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo (0x10a05, 0x10a06,), # Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O (0x10a0c, 0x10a0f,), # Kharoshthi Vowel Length ..Kharoshthi Sign Visarga (0x10a38, 0x10a3a,), # Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo (0x10a3f, 0x10a3f,), # Kharoshthi Virama - (0x11001, 0x11001,), # Brahmi Sign Anusvara + (0x11000, 0x11002,), # Brahmi Sign Candrabindu ..Brahmi Sign Visarga (0x11038, 0x11046,), # Brahmi Vowel Sign Aa ..Brahmi Virama - (0x11080, 0x11081,), # Kaithi Sign Candrabindu ..Kaithi Sign Anusvara - (0x110b3, 0x110b6,), # Kaithi Vowel Sign U ..Kaithi Vowel Sign Ai - (0x110b9, 0x110ba,), # Kaithi Sign Virama ..Kaithi Sign Nukta - (0x1d167, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining - (0x1d17b, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining + (0x11080, 0x11082,), # Kaithi Sign Candrabindu ..Kaithi Sign Visarga + (0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta + (0x110bd, 0x110bd,), # Kaithi Number Sign + (0x1d165, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d16d, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical + (0xe0001, 0xe0001,), # Language Tag + (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 ), '6.1.0': ( # Source: DerivedGeneralCategory-6.1.0.txt # Date: 2011-11-27, 05:10:22 GMT [MD] # + (0x00000, 0x00000,), # (nil) + (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg @@ -845,13 +908,15 @@ ZERO_WIDTH = { (0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot (0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot (0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan + (0x00600, 0x00604,), # Arabic Number Sign ..Arabic Sign Samvat (0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra (0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below (0x00670, 0x00670,), # Arabic Letter Superscript Alef - (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen + (0x006d6, 0x006dd,), # Arabic Small High Ligatu..Arabic End Of Ayah (0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda (0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon (0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem + (0x0070f, 0x0070f,), # Syriac Abbreviation Mark (0x00711, 0x00711,), # Syriac Letter Superscript Alaph (0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh (0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun @@ -862,58 +927,69 @@ ZERO_WIDTH = { (0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa (0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark (0x008e4, 0x008fe,), # Arabic Curly Fatha ..Arabic Damma With Dot - (0x00900, 0x00902,), # Devanagari Sign Inverted..Devanagari Sign Anusvara - (0x0093a, 0x0093a,), # Devanagari Vowel Sign Oe - (0x0093c, 0x0093c,), # Devanagari Sign Nukta - (0x00941, 0x00948,), # Devanagari Vowel Sign U ..Devanagari Vowel Sign Ai - (0x0094d, 0x0094d,), # Devanagari Sign Virama + (0x00900, 0x00903,), # Devanagari Sign Inverted..Devanagari Sign Visarga + (0x0093a, 0x0093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta + (0x0093e, 0x0094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw (0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu (0x00962, 0x00963,), # Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo - (0x00981, 0x00981,), # Bengali Sign Candrabindu + (0x00981, 0x00983,), # Bengali Sign Candrabindu..Bengali Sign Visarga (0x009bc, 0x009bc,), # Bengali Sign Nukta - (0x009c1, 0x009c4,), # Bengali Vowel Sign U ..Bengali Vowel Sign Vocal - (0x009cd, 0x009cd,), # Bengali Sign Virama + (0x009be, 0x009c4,), # Bengali Vowel Sign Aa ..Bengali Vowel Sign Vocal + (0x009c7, 0x009c8,), # Bengali Vowel Sign E ..Bengali Vowel Sign Ai + (0x009cb, 0x009cd,), # Bengali Vowel Sign O ..Bengali Sign Virama + (0x009d7, 0x009d7,), # Bengali Au Length Mark (0x009e2, 0x009e3,), # Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal - (0x00a01, 0x00a02,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Bindi + (0x00a01, 0x00a03,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Visarga (0x00a3c, 0x00a3c,), # Gurmukhi Sign Nukta - (0x00a41, 0x00a42,), # Gurmukhi Vowel Sign U ..Gurmukhi Vowel Sign Uu + (0x00a3e, 0x00a42,), # Gurmukhi Vowel Sign Aa ..Gurmukhi Vowel Sign Uu (0x00a47, 0x00a48,), # Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai (0x00a4b, 0x00a4d,), # Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama (0x00a51, 0x00a51,), # Gurmukhi Sign Udaat (0x00a70, 0x00a71,), # Gurmukhi Tippi ..Gurmukhi Addak (0x00a75, 0x00a75,), # Gurmukhi Sign Yakash - (0x00a81, 0x00a82,), # Gujarati Sign Candrabind..Gujarati Sign Anusvara + (0x00a81, 0x00a83,), # Gujarati Sign Candrabind..Gujarati Sign Visarga (0x00abc, 0x00abc,), # Gujarati Sign Nukta - (0x00ac1, 0x00ac5,), # Gujarati Vowel Sign U ..Gujarati Vowel Sign Cand - (0x00ac7, 0x00ac8,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Ai - (0x00acd, 0x00acd,), # Gujarati Sign Virama + (0x00abe, 0x00ac5,), # Gujarati Vowel Sign Aa ..Gujarati Vowel Sign Cand + (0x00ac7, 0x00ac9,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Cand + (0x00acb, 0x00acd,), # Gujarati Vowel Sign O ..Gujarati Sign Virama (0x00ae2, 0x00ae3,), # Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca - (0x00b01, 0x00b01,), # Oriya Sign Candrabindu + (0x00b01, 0x00b03,), # Oriya Sign Candrabindu ..Oriya Sign Visarga (0x00b3c, 0x00b3c,), # Oriya Sign Nukta - (0x00b3f, 0x00b3f,), # Oriya Vowel Sign I - (0x00b41, 0x00b44,), # Oriya Vowel Sign U ..Oriya Vowel Sign Vocalic - (0x00b4d, 0x00b4d,), # Oriya Sign Virama - (0x00b56, 0x00b56,), # Oriya Ai Length Mark + (0x00b3e, 0x00b44,), # Oriya Vowel Sign Aa ..Oriya Vowel Sign Vocalic + (0x00b47, 0x00b48,), # Oriya Vowel Sign E ..Oriya Vowel Sign Ai + (0x00b4b, 0x00b4d,), # Oriya Vowel Sign O ..Oriya Sign Virama + (0x00b56, 0x00b57,), # Oriya Ai Length Mark ..Oriya Au Length Mark (0x00b62, 0x00b63,), # Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic (0x00b82, 0x00b82,), # Tamil Sign Anusvara - (0x00bc0, 0x00bc0,), # Tamil Vowel Sign Ii - (0x00bcd, 0x00bcd,), # Tamil Sign Virama - (0x00c3e, 0x00c40,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Ii + (0x00bbe, 0x00bc2,), # Tamil Vowel Sign Aa ..Tamil Vowel Sign Uu + (0x00bc6, 0x00bc8,), # Tamil Vowel Sign E ..Tamil Vowel Sign Ai + (0x00bca, 0x00bcd,), # Tamil Vowel Sign O ..Tamil Sign Virama + (0x00bd7, 0x00bd7,), # Tamil Au Length Mark + (0x00c01, 0x00c03,), # Telugu Sign Candrabindu ..Telugu Sign Visarga + (0x00c3e, 0x00c44,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Vocali (0x00c46, 0x00c48,), # Telugu Vowel Sign E ..Telugu Vowel Sign Ai (0x00c4a, 0x00c4d,), # Telugu Vowel Sign O ..Telugu Sign Virama (0x00c55, 0x00c56,), # Telugu Length Mark ..Telugu Ai Length Mark (0x00c62, 0x00c63,), # Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali + (0x00c82, 0x00c83,), # Kannada Sign Anusvara ..Kannada Sign Visarga (0x00cbc, 0x00cbc,), # Kannada Sign Nukta - (0x00cbf, 0x00cbf,), # Kannada Vowel Sign I - (0x00cc6, 0x00cc6,), # Kannada Vowel Sign E - (0x00ccc, 0x00ccd,), # Kannada Vowel Sign Au ..Kannada Sign Virama + (0x00cbe, 0x00cc4,), # Kannada Vowel Sign Aa ..Kannada Vowel Sign Vocal + (0x00cc6, 0x00cc8,), # Kannada Vowel Sign E ..Kannada Vowel Sign Ai + (0x00cca, 0x00ccd,), # Kannada Vowel Sign O ..Kannada Sign Virama + (0x00cd5, 0x00cd6,), # Kannada Length Mark ..Kannada Ai Length Mark (0x00ce2, 0x00ce3,), # Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal - (0x00d41, 0x00d44,), # Malayalam Vowel Sign U ..Malayalam Vowel Sign Voc - (0x00d4d, 0x00d4d,), # Malayalam Sign Virama + (0x00d02, 0x00d03,), # Malayalam Sign Anusvara ..Malayalam Sign Visarga + (0x00d3e, 0x00d44,), # Malayalam Vowel Sign Aa ..Malayalam Vowel Sign Voc + (0x00d46, 0x00d48,), # Malayalam Vowel Sign E ..Malayalam Vowel Sign Ai + (0x00d4a, 0x00d4d,), # Malayalam Vowel Sign O ..Malayalam Sign Virama + (0x00d57, 0x00d57,), # Malayalam Au Length Mark (0x00d62, 0x00d63,), # Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc + (0x00d82, 0x00d83,), # Sinhala Sign Anusvaraya ..Sinhala Sign Visargaya (0x00dca, 0x00dca,), # Sinhala Sign Al-lakuna - (0x00dd2, 0x00dd4,), # Sinhala Vowel Sign Ketti..Sinhala Vowel Sign Ketti + (0x00dcf, 0x00dd4,), # Sinhala Vowel Sign Aela-..Sinhala Vowel Sign Ketti (0x00dd6, 0x00dd6,), # Sinhala Vowel Sign Diga Paa-pilla + (0x00dd8, 0x00ddf,), # Sinhala Vowel Sign Gaett..Sinhala Vowel Sign Gayan + (0x00df2, 0x00df3,), # Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga (0x00e31, 0x00e31,), # Thai Character Mai Han-akat (0x00e34, 0x00e3a,), # Thai Character Sara I ..Thai Character Phinthu (0x00e47, 0x00e4e,), # Thai Character Maitaikhu..Thai Character Yamakkan @@ -925,75 +1001,60 @@ ZERO_WIDTH = { (0x00f35, 0x00f35,), # Tibetan Mark Ngas Bzung Nyi Zla (0x00f37, 0x00f37,), # Tibetan Mark Ngas Bzung Sgor Rtags (0x00f39, 0x00f39,), # Tibetan Mark Tsa -phru - (0x00f71, 0x00f7e,), # Tibetan Vowel Sign Aa ..Tibetan Sign Rjes Su Nga - (0x00f80, 0x00f84,), # Tibetan Vowel Sign Rever..Tibetan Mark Halanta + (0x00f3e, 0x00f3f,), # Tibetan Sign Yar Tshes ..Tibetan Sign Mar Tshes + (0x00f71, 0x00f84,), # Tibetan Vowel Sign Aa ..Tibetan Mark Halanta (0x00f86, 0x00f87,), # Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags (0x00f8d, 0x00f97,), # Tibetan Subjoined Sign L..Tibetan Subjoined Letter (0x00f99, 0x00fbc,), # Tibetan Subjoined Letter..Tibetan Subjoined Letter (0x00fc6, 0x00fc6,), # Tibetan Symbol Padma Gdan - (0x0102d, 0x01030,), # Myanmar Vowel Sign I ..Myanmar Vowel Sign Uu - (0x01032, 0x01037,), # Myanmar Vowel Sign Ai ..Myanmar Sign Dot Below - (0x01039, 0x0103a,), # Myanmar Sign Virama ..Myanmar Sign Asat - (0x0103d, 0x0103e,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M - (0x01058, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal + (0x0102b, 0x0103e,), # Myanmar Vowel Sign Tall ..Myanmar Consonant Sign M + (0x01056, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal (0x0105e, 0x01060,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M + (0x01062, 0x01064,), # Myanmar Vowel Sign Sgaw ..Myanmar Tone Mark Sgaw K + (0x01067, 0x0106d,), # Myanmar Vowel Sign Weste..Myanmar Sign Western Pwo (0x01071, 0x01074,), # Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah - (0x01082, 0x01082,), # Myanmar Consonant Sign Shan Medial Wa - (0x01085, 0x01086,), # Myanmar Vowel Sign Shan ..Myanmar Vowel Sign Shan - (0x0108d, 0x0108d,), # Myanmar Sign Shan Council Emphatic Tone - (0x0109d, 0x0109d,), # Myanmar Vowel Sign Aiton Ai + (0x01082, 0x0108d,), # Myanmar Consonant Sign S..Myanmar Sign Shan Counci + (0x0108f, 0x0108f,), # Myanmar Sign Rumai Palaung Tone-5 + (0x0109a, 0x0109d,), # Myanmar Sign Khamti Tone..Myanmar Vowel Sign Aiton (0x0135d, 0x0135f,), # Ethiopic Combining Gemin..Ethiopic Combining Gemin (0x01712, 0x01714,), # Tagalog Vowel Sign I ..Tagalog Sign Virama (0x01732, 0x01734,), # Hanunoo Vowel Sign I ..Hanunoo Sign Pamudpod (0x01752, 0x01753,), # Buhid Vowel Sign I ..Buhid Vowel Sign U (0x01772, 0x01773,), # Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U - (0x017b4, 0x017b5,), # Khmer Vowel Inherent Aq ..Khmer Vowel Inherent Aa - (0x017b7, 0x017bd,), # Khmer Vowel Sign I ..Khmer Vowel Sign Ua - (0x017c6, 0x017c6,), # Khmer Sign Nikahit - (0x017c9, 0x017d3,), # Khmer Sign Muusikatoan ..Khmer Sign Bathamasat + (0x017b4, 0x017d3,), # Khmer Vowel Inherent Aq ..Khmer Sign Bathamasat (0x017dd, 0x017dd,), # Khmer Sign Atthacan (0x0180b, 0x0180d,), # Mongolian Free Variation..Mongolian Free Variation (0x018a9, 0x018a9,), # Mongolian Letter Ali Gali Dagalga - (0x01920, 0x01922,), # Limbu Vowel Sign A ..Limbu Vowel Sign U - (0x01927, 0x01928,), # Limbu Vowel Sign E ..Limbu Vowel Sign O - (0x01932, 0x01932,), # Limbu Small Letter Anusvara - (0x01939, 0x0193b,), # Limbu Sign Mukphreng ..Limbu Sign Sa-i - (0x01a17, 0x01a18,), # Buginese Vowel Sign I ..Buginese Vowel Sign U - (0x01a56, 0x01a56,), # Tai Tham Consonant Sign Medial La - (0x01a58, 0x01a5e,), # Tai Tham Sign Mai Kang L..Tai Tham Consonant Sign - (0x01a60, 0x01a60,), # Tai Tham Sign Sakot - (0x01a62, 0x01a62,), # Tai Tham Vowel Sign Mai Sat - (0x01a65, 0x01a6c,), # Tai Tham Vowel Sign I ..Tai Tham Vowel Sign Oa B - (0x01a73, 0x01a7c,), # Tai Tham Vowel Sign Oa A..Tai Tham Sign Khuen-lue + (0x01920, 0x0192b,), # Limbu Vowel Sign A ..Limbu Subjoined Letter W + (0x01930, 0x0193b,), # Limbu Small Letter Ka ..Limbu Sign Sa-i + (0x019b0, 0x019c0,), # New Tai Lue Vowel Sign V..New Tai Lue Vowel Sign I + (0x019c8, 0x019c9,), # New Tai Lue Tone Mark-1 ..New Tai Lue Tone Mark-2 + (0x01a17, 0x01a1b,), # Buginese Vowel Sign I ..Buginese Vowel Sign Ae + (0x01a55, 0x01a5e,), # Tai Tham Consonant Sign ..Tai Tham Consonant Sign + (0x01a60, 0x01a7c,), # Tai Tham Sign Sakot ..Tai Tham Sign Khuen-lue (0x01a7f, 0x01a7f,), # Tai Tham Combining Cryptogrammic Dot - (0x01b00, 0x01b03,), # Balinese Sign Ulu Ricem ..Balinese Sign Surang - (0x01b34, 0x01b34,), # Balinese Sign Rerekan - (0x01b36, 0x01b3a,), # Balinese Vowel Sign Ulu ..Balinese Vowel Sign Ra R - (0x01b3c, 0x01b3c,), # Balinese Vowel Sign La Lenga - (0x01b42, 0x01b42,), # Balinese Vowel Sign Pepet + (0x01b00, 0x01b04,), # Balinese Sign Ulu Ricem ..Balinese Sign Bisah + (0x01b34, 0x01b44,), # Balinese Sign Rerekan ..Balinese Adeg Adeg (0x01b6b, 0x01b73,), # Balinese Musical Symbol ..Balinese Musical Symbol - (0x01b80, 0x01b81,), # Sundanese Sign Panyecek ..Sundanese Sign Panglayar - (0x01ba2, 0x01ba5,), # Sundanese Consonant Sign..Sundanese Vowel Sign Pan - (0x01ba8, 0x01ba9,), # Sundanese Vowel Sign Pam..Sundanese Vowel Sign Pan - (0x01bab, 0x01bab,), # Sundanese Sign Virama - (0x01be6, 0x01be6,), # Batak Sign Tompi - (0x01be8, 0x01be9,), # Batak Vowel Sign Pakpak ..Batak Vowel Sign Ee - (0x01bed, 0x01bed,), # Batak Vowel Sign Karo O - (0x01bef, 0x01bf1,), # Batak Vowel Sign U For S..Batak Consonant Sign H - (0x01c2c, 0x01c33,), # Lepcha Vowel Sign E ..Lepcha Consonant Sign T - (0x01c36, 0x01c37,), # Lepcha Sign Ran ..Lepcha Sign Nukta + (0x01b80, 0x01b82,), # Sundanese Sign Panyecek ..Sundanese Sign Pangwisad + (0x01ba1, 0x01bad,), # Sundanese Consonant Sign..Sundanese Consonant Sign + (0x01be6, 0x01bf3,), # Batak Sign Tompi ..Batak Panongonan + (0x01c24, 0x01c37,), # Lepcha Subjoined Letter ..Lepcha Sign Nukta (0x01cd0, 0x01cd2,), # Vedic Tone Karshana ..Vedic Tone Prenkha - (0x01cd4, 0x01ce0,), # Vedic Sign Yajurvedic Mi..Vedic Tone Rigvedic Kash - (0x01ce2, 0x01ce8,), # Vedic Sign Visarga Svari..Vedic Sign Visarga Anuda + (0x01cd4, 0x01ce8,), # Vedic Sign Yajurvedic Mi..Vedic Sign Visarga Anuda (0x01ced, 0x01ced,), # Vedic Sign Tiryak - (0x01cf4, 0x01cf4,), # Vedic Tone Candra Above + (0x01cf2, 0x01cf4,), # Vedic Sign Ardhavisarga ..Vedic Tone Candra Above (0x01dc0, 0x01de6,), # Combining Dotted Grave A..Combining Latin Small Le (0x01dfc, 0x01dff,), # Combining Double Inverte..Combining Right Arrowhea + (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark + (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override + (0x02060, 0x02064,), # Word Joiner ..Invisible Plus + (0x0206a, 0x0206f,), # Inhibit Symmetric Swappi..Nominal Digit Shapes (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above (0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu (0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette - (0x0302a, 0x0302d,), # Ideographic Level Tone M..Ideographic Entering Ton + (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag (0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous (0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer @@ -1002,65 +1063,65 @@ ZERO_WIDTH = { (0x0a802, 0x0a802,), # Syloti Nagri Sign Dvisvara (0x0a806, 0x0a806,), # Syloti Nagri Sign Hasanta (0x0a80b, 0x0a80b,), # Syloti Nagri Sign Anusvara - (0x0a825, 0x0a826,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign - (0x0a8c4, 0x0a8c4,), # Saurashtra Sign Virama + (0x0a823, 0x0a827,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign + (0x0a880, 0x0a881,), # Saurashtra Sign Anusvara..Saurashtra Sign Visarga + (0x0a8b4, 0x0a8c4,), # Saurashtra Consonant Sig..Saurashtra Sign Virama (0x0a8e0, 0x0a8f1,), # Combining Devanagari Dig..Combining Devanagari Sig (0x0a926, 0x0a92d,), # Kayah Li Vowel Ue ..Kayah Li Tone Calya Plop - (0x0a947, 0x0a951,), # Rejang Vowel Sign I ..Rejang Consonant Sign R - (0x0a980, 0x0a982,), # Javanese Sign Panyangga ..Javanese Sign Layar - (0x0a9b3, 0x0a9b3,), # Javanese Sign Cecak Telu - (0x0a9b6, 0x0a9b9,), # Javanese Vowel Sign Wulu..Javanese Vowel Sign Suku - (0x0a9bc, 0x0a9bc,), # Javanese Vowel Sign Pepet - (0x0aa29, 0x0aa2e,), # Cham Vowel Sign Aa ..Cham Vowel Sign Oe - (0x0aa31, 0x0aa32,), # Cham Vowel Sign Au ..Cham Vowel Sign Ue - (0x0aa35, 0x0aa36,), # Cham Consonant Sign La ..Cham Consonant Sign Wa + (0x0a947, 0x0a953,), # Rejang Vowel Sign I ..Rejang Virama + (0x0a980, 0x0a983,), # Javanese Sign Panyangga ..Javanese Sign Wignyan + (0x0a9b3, 0x0a9c0,), # Javanese Sign Cecak Telu..Javanese Pangkon + (0x0aa29, 0x0aa36,), # Cham Vowel Sign Aa ..Cham Consonant Sign Wa (0x0aa43, 0x0aa43,), # Cham Consonant Sign Final Ng - (0x0aa4c, 0x0aa4c,), # Cham Consonant Sign Final M + (0x0aa4c, 0x0aa4d,), # Cham Consonant Sign Fina..Cham Consonant Sign Fina + (0x0aa7b, 0x0aa7b,), # Myanmar Sign Pao Karen Tone (0x0aab0, 0x0aab0,), # Tai Viet Mai Kang (0x0aab2, 0x0aab4,), # Tai Viet Vowel I ..Tai Viet Vowel U (0x0aab7, 0x0aab8,), # Tai Viet Mai Khit ..Tai Viet Vowel Ia (0x0aabe, 0x0aabf,), # Tai Viet Vowel Am ..Tai Viet Tone Mai Ek (0x0aac1, 0x0aac1,), # Tai Viet Tone Mai Tho - (0x0aaec, 0x0aaed,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign - (0x0aaf6, 0x0aaf6,), # Meetei Mayek Virama - (0x0abe5, 0x0abe5,), # Meetei Mayek Vowel Sign Anap - (0x0abe8, 0x0abe8,), # Meetei Mayek Vowel Sign Unap - (0x0abed, 0x0abed,), # Meetei Mayek Apun Iyek + (0x0aaeb, 0x0aaef,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0x0aaf5, 0x0aaf6,), # Meetei Mayek Vowel Sign ..Meetei Mayek Virama + (0x0abe3, 0x0abea,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0x0abec, 0x0abed,), # Meetei Mayek Lum Iyek ..Meetei Mayek Apun Iyek (0x0fb1e, 0x0fb1e,), # Hebrew Point Judeo-spanish Varika (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe26,), # Combining Ligature Left ..Combining Conjoining Mac + (0x0feff, 0x0feff,), # Zero Width No-break Space + (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke (0x10a01, 0x10a03,), # Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo (0x10a05, 0x10a06,), # Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O (0x10a0c, 0x10a0f,), # Kharoshthi Vowel Length ..Kharoshthi Sign Visarga (0x10a38, 0x10a3a,), # Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo (0x10a3f, 0x10a3f,), # Kharoshthi Virama - (0x11001, 0x11001,), # Brahmi Sign Anusvara + (0x11000, 0x11002,), # Brahmi Sign Candrabindu ..Brahmi Sign Visarga (0x11038, 0x11046,), # Brahmi Vowel Sign Aa ..Brahmi Virama - (0x11080, 0x11081,), # Kaithi Sign Candrabindu ..Kaithi Sign Anusvara - (0x110b3, 0x110b6,), # Kaithi Vowel Sign U ..Kaithi Vowel Sign Ai - (0x110b9, 0x110ba,), # Kaithi Sign Virama ..Kaithi Sign Nukta + (0x11080, 0x11082,), # Kaithi Sign Candrabindu ..Kaithi Sign Visarga + (0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta + (0x110bd, 0x110bd,), # Kaithi Number Sign (0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga - (0x11127, 0x1112b,), # Chakma Vowel Sign A ..Chakma Vowel Sign Uu - (0x1112d, 0x11134,), # Chakma Vowel Sign Ai ..Chakma Maayyaa - (0x11180, 0x11181,), # Sharada Sign Candrabindu..Sharada Sign Anusvara - (0x111b6, 0x111be,), # Sharada Vowel Sign U ..Sharada Vowel Sign O - (0x116ab, 0x116ab,), # Takri Sign Anusvara - (0x116ad, 0x116ad,), # Takri Vowel Sign Aa - (0x116b0, 0x116b5,), # Takri Vowel Sign U ..Takri Vowel Sign Au - (0x116b7, 0x116b7,), # Takri Sign Nukta + (0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa + (0x11180, 0x11182,), # Sharada Sign Candrabindu..Sharada Sign Visarga + (0x111b3, 0x111c0,), # Sharada Vowel Sign Aa ..Sharada Sign Virama + (0x116ab, 0x116b7,), # Takri Sign Anusvara ..Takri Sign Nukta + (0x16f51, 0x16f7e,), # Miao Sign Aspiration ..Miao Vowel Sign Ng (0x16f8f, 0x16f92,), # Miao Tone Right ..Miao Tone Below - (0x1d167, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining - (0x1d17b, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d165, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d16d, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical + (0xe0001, 0xe0001,), # Language Tag + (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 ), '6.2.0': ( # Source: DerivedGeneralCategory-6.2.0.txt # Date: 2012-05-20, 00:42:34 GMT [MD] # + (0x00000, 0x00000,), # (nil) + (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg @@ -1068,13 +1129,15 @@ ZERO_WIDTH = { (0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot (0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot (0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan + (0x00600, 0x00604,), # Arabic Number Sign ..Arabic Sign Samvat (0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra (0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below (0x00670, 0x00670,), # Arabic Letter Superscript Alef - (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen + (0x006d6, 0x006dd,), # Arabic Small High Ligatu..Arabic End Of Ayah (0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda (0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon (0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem + (0x0070f, 0x0070f,), # Syriac Abbreviation Mark (0x00711, 0x00711,), # Syriac Letter Superscript Alaph (0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh (0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun @@ -1085,58 +1148,69 @@ ZERO_WIDTH = { (0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa (0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark (0x008e4, 0x008fe,), # Arabic Curly Fatha ..Arabic Damma With Dot - (0x00900, 0x00902,), # Devanagari Sign Inverted..Devanagari Sign Anusvara - (0x0093a, 0x0093a,), # Devanagari Vowel Sign Oe - (0x0093c, 0x0093c,), # Devanagari Sign Nukta - (0x00941, 0x00948,), # Devanagari Vowel Sign U ..Devanagari Vowel Sign Ai - (0x0094d, 0x0094d,), # Devanagari Sign Virama + (0x00900, 0x00903,), # Devanagari Sign Inverted..Devanagari Sign Visarga + (0x0093a, 0x0093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta + (0x0093e, 0x0094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw (0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu (0x00962, 0x00963,), # Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo - (0x00981, 0x00981,), # Bengali Sign Candrabindu + (0x00981, 0x00983,), # Bengali Sign Candrabindu..Bengali Sign Visarga (0x009bc, 0x009bc,), # Bengali Sign Nukta - (0x009c1, 0x009c4,), # Bengali Vowel Sign U ..Bengali Vowel Sign Vocal - (0x009cd, 0x009cd,), # Bengali Sign Virama + (0x009be, 0x009c4,), # Bengali Vowel Sign Aa ..Bengali Vowel Sign Vocal + (0x009c7, 0x009c8,), # Bengali Vowel Sign E ..Bengali Vowel Sign Ai + (0x009cb, 0x009cd,), # Bengali Vowel Sign O ..Bengali Sign Virama + (0x009d7, 0x009d7,), # Bengali Au Length Mark (0x009e2, 0x009e3,), # Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal - (0x00a01, 0x00a02,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Bindi + (0x00a01, 0x00a03,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Visarga (0x00a3c, 0x00a3c,), # Gurmukhi Sign Nukta - (0x00a41, 0x00a42,), # Gurmukhi Vowel Sign U ..Gurmukhi Vowel Sign Uu + (0x00a3e, 0x00a42,), # Gurmukhi Vowel Sign Aa ..Gurmukhi Vowel Sign Uu (0x00a47, 0x00a48,), # Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai (0x00a4b, 0x00a4d,), # Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama (0x00a51, 0x00a51,), # Gurmukhi Sign Udaat (0x00a70, 0x00a71,), # Gurmukhi Tippi ..Gurmukhi Addak (0x00a75, 0x00a75,), # Gurmukhi Sign Yakash - (0x00a81, 0x00a82,), # Gujarati Sign Candrabind..Gujarati Sign Anusvara + (0x00a81, 0x00a83,), # Gujarati Sign Candrabind..Gujarati Sign Visarga (0x00abc, 0x00abc,), # Gujarati Sign Nukta - (0x00ac1, 0x00ac5,), # Gujarati Vowel Sign U ..Gujarati Vowel Sign Cand - (0x00ac7, 0x00ac8,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Ai - (0x00acd, 0x00acd,), # Gujarati Sign Virama + (0x00abe, 0x00ac5,), # Gujarati Vowel Sign Aa ..Gujarati Vowel Sign Cand + (0x00ac7, 0x00ac9,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Cand + (0x00acb, 0x00acd,), # Gujarati Vowel Sign O ..Gujarati Sign Virama (0x00ae2, 0x00ae3,), # Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca - (0x00b01, 0x00b01,), # Oriya Sign Candrabindu + (0x00b01, 0x00b03,), # Oriya Sign Candrabindu ..Oriya Sign Visarga (0x00b3c, 0x00b3c,), # Oriya Sign Nukta - (0x00b3f, 0x00b3f,), # Oriya Vowel Sign I - (0x00b41, 0x00b44,), # Oriya Vowel Sign U ..Oriya Vowel Sign Vocalic - (0x00b4d, 0x00b4d,), # Oriya Sign Virama - (0x00b56, 0x00b56,), # Oriya Ai Length Mark + (0x00b3e, 0x00b44,), # Oriya Vowel Sign Aa ..Oriya Vowel Sign Vocalic + (0x00b47, 0x00b48,), # Oriya Vowel Sign E ..Oriya Vowel Sign Ai + (0x00b4b, 0x00b4d,), # Oriya Vowel Sign O ..Oriya Sign Virama + (0x00b56, 0x00b57,), # Oriya Ai Length Mark ..Oriya Au Length Mark (0x00b62, 0x00b63,), # Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic (0x00b82, 0x00b82,), # Tamil Sign Anusvara - (0x00bc0, 0x00bc0,), # Tamil Vowel Sign Ii - (0x00bcd, 0x00bcd,), # Tamil Sign Virama - (0x00c3e, 0x00c40,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Ii + (0x00bbe, 0x00bc2,), # Tamil Vowel Sign Aa ..Tamil Vowel Sign Uu + (0x00bc6, 0x00bc8,), # Tamil Vowel Sign E ..Tamil Vowel Sign Ai + (0x00bca, 0x00bcd,), # Tamil Vowel Sign O ..Tamil Sign Virama + (0x00bd7, 0x00bd7,), # Tamil Au Length Mark + (0x00c01, 0x00c03,), # Telugu Sign Candrabindu ..Telugu Sign Visarga + (0x00c3e, 0x00c44,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Vocali (0x00c46, 0x00c48,), # Telugu Vowel Sign E ..Telugu Vowel Sign Ai (0x00c4a, 0x00c4d,), # Telugu Vowel Sign O ..Telugu Sign Virama (0x00c55, 0x00c56,), # Telugu Length Mark ..Telugu Ai Length Mark (0x00c62, 0x00c63,), # Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali + (0x00c82, 0x00c83,), # Kannada Sign Anusvara ..Kannada Sign Visarga (0x00cbc, 0x00cbc,), # Kannada Sign Nukta - (0x00cbf, 0x00cbf,), # Kannada Vowel Sign I - (0x00cc6, 0x00cc6,), # Kannada Vowel Sign E - (0x00ccc, 0x00ccd,), # Kannada Vowel Sign Au ..Kannada Sign Virama + (0x00cbe, 0x00cc4,), # Kannada Vowel Sign Aa ..Kannada Vowel Sign Vocal + (0x00cc6, 0x00cc8,), # Kannada Vowel Sign E ..Kannada Vowel Sign Ai + (0x00cca, 0x00ccd,), # Kannada Vowel Sign O ..Kannada Sign Virama + (0x00cd5, 0x00cd6,), # Kannada Length Mark ..Kannada Ai Length Mark (0x00ce2, 0x00ce3,), # Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal - (0x00d41, 0x00d44,), # Malayalam Vowel Sign U ..Malayalam Vowel Sign Voc - (0x00d4d, 0x00d4d,), # Malayalam Sign Virama + (0x00d02, 0x00d03,), # Malayalam Sign Anusvara ..Malayalam Sign Visarga + (0x00d3e, 0x00d44,), # Malayalam Vowel Sign Aa ..Malayalam Vowel Sign Voc + (0x00d46, 0x00d48,), # Malayalam Vowel Sign E ..Malayalam Vowel Sign Ai + (0x00d4a, 0x00d4d,), # Malayalam Vowel Sign O ..Malayalam Sign Virama + (0x00d57, 0x00d57,), # Malayalam Au Length Mark (0x00d62, 0x00d63,), # Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc + (0x00d82, 0x00d83,), # Sinhala Sign Anusvaraya ..Sinhala Sign Visargaya (0x00dca, 0x00dca,), # Sinhala Sign Al-lakuna - (0x00dd2, 0x00dd4,), # Sinhala Vowel Sign Ketti..Sinhala Vowel Sign Ketti + (0x00dcf, 0x00dd4,), # Sinhala Vowel Sign Aela-..Sinhala Vowel Sign Ketti (0x00dd6, 0x00dd6,), # Sinhala Vowel Sign Diga Paa-pilla + (0x00dd8, 0x00ddf,), # Sinhala Vowel Sign Gaett..Sinhala Vowel Sign Gayan + (0x00df2, 0x00df3,), # Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga (0x00e31, 0x00e31,), # Thai Character Mai Han-akat (0x00e34, 0x00e3a,), # Thai Character Sara I ..Thai Character Phinthu (0x00e47, 0x00e4e,), # Thai Character Maitaikhu..Thai Character Yamakkan @@ -1148,75 +1222,60 @@ ZERO_WIDTH = { (0x00f35, 0x00f35,), # Tibetan Mark Ngas Bzung Nyi Zla (0x00f37, 0x00f37,), # Tibetan Mark Ngas Bzung Sgor Rtags (0x00f39, 0x00f39,), # Tibetan Mark Tsa -phru - (0x00f71, 0x00f7e,), # Tibetan Vowel Sign Aa ..Tibetan Sign Rjes Su Nga - (0x00f80, 0x00f84,), # Tibetan Vowel Sign Rever..Tibetan Mark Halanta + (0x00f3e, 0x00f3f,), # Tibetan Sign Yar Tshes ..Tibetan Sign Mar Tshes + (0x00f71, 0x00f84,), # Tibetan Vowel Sign Aa ..Tibetan Mark Halanta (0x00f86, 0x00f87,), # Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags (0x00f8d, 0x00f97,), # Tibetan Subjoined Sign L..Tibetan Subjoined Letter (0x00f99, 0x00fbc,), # Tibetan Subjoined Letter..Tibetan Subjoined Letter (0x00fc6, 0x00fc6,), # Tibetan Symbol Padma Gdan - (0x0102d, 0x01030,), # Myanmar Vowel Sign I ..Myanmar Vowel Sign Uu - (0x01032, 0x01037,), # Myanmar Vowel Sign Ai ..Myanmar Sign Dot Below - (0x01039, 0x0103a,), # Myanmar Sign Virama ..Myanmar Sign Asat - (0x0103d, 0x0103e,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M - (0x01058, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal + (0x0102b, 0x0103e,), # Myanmar Vowel Sign Tall ..Myanmar Consonant Sign M + (0x01056, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal (0x0105e, 0x01060,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M + (0x01062, 0x01064,), # Myanmar Vowel Sign Sgaw ..Myanmar Tone Mark Sgaw K + (0x01067, 0x0106d,), # Myanmar Vowel Sign Weste..Myanmar Sign Western Pwo (0x01071, 0x01074,), # Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah - (0x01082, 0x01082,), # Myanmar Consonant Sign Shan Medial Wa - (0x01085, 0x01086,), # Myanmar Vowel Sign Shan ..Myanmar Vowel Sign Shan - (0x0108d, 0x0108d,), # Myanmar Sign Shan Council Emphatic Tone - (0x0109d, 0x0109d,), # Myanmar Vowel Sign Aiton Ai + (0x01082, 0x0108d,), # Myanmar Consonant Sign S..Myanmar Sign Shan Counci + (0x0108f, 0x0108f,), # Myanmar Sign Rumai Palaung Tone-5 + (0x0109a, 0x0109d,), # Myanmar Sign Khamti Tone..Myanmar Vowel Sign Aiton (0x0135d, 0x0135f,), # Ethiopic Combining Gemin..Ethiopic Combining Gemin (0x01712, 0x01714,), # Tagalog Vowel Sign I ..Tagalog Sign Virama (0x01732, 0x01734,), # Hanunoo Vowel Sign I ..Hanunoo Sign Pamudpod (0x01752, 0x01753,), # Buhid Vowel Sign I ..Buhid Vowel Sign U (0x01772, 0x01773,), # Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U - (0x017b4, 0x017b5,), # Khmer Vowel Inherent Aq ..Khmer Vowel Inherent Aa - (0x017b7, 0x017bd,), # Khmer Vowel Sign I ..Khmer Vowel Sign Ua - (0x017c6, 0x017c6,), # Khmer Sign Nikahit - (0x017c9, 0x017d3,), # Khmer Sign Muusikatoan ..Khmer Sign Bathamasat + (0x017b4, 0x017d3,), # Khmer Vowel Inherent Aq ..Khmer Sign Bathamasat (0x017dd, 0x017dd,), # Khmer Sign Atthacan (0x0180b, 0x0180d,), # Mongolian Free Variation..Mongolian Free Variation (0x018a9, 0x018a9,), # Mongolian Letter Ali Gali Dagalga - (0x01920, 0x01922,), # Limbu Vowel Sign A ..Limbu Vowel Sign U - (0x01927, 0x01928,), # Limbu Vowel Sign E ..Limbu Vowel Sign O - (0x01932, 0x01932,), # Limbu Small Letter Anusvara - (0x01939, 0x0193b,), # Limbu Sign Mukphreng ..Limbu Sign Sa-i - (0x01a17, 0x01a18,), # Buginese Vowel Sign I ..Buginese Vowel Sign U - (0x01a56, 0x01a56,), # Tai Tham Consonant Sign Medial La - (0x01a58, 0x01a5e,), # Tai Tham Sign Mai Kang L..Tai Tham Consonant Sign - (0x01a60, 0x01a60,), # Tai Tham Sign Sakot - (0x01a62, 0x01a62,), # Tai Tham Vowel Sign Mai Sat - (0x01a65, 0x01a6c,), # Tai Tham Vowel Sign I ..Tai Tham Vowel Sign Oa B - (0x01a73, 0x01a7c,), # Tai Tham Vowel Sign Oa A..Tai Tham Sign Khuen-lue + (0x01920, 0x0192b,), # Limbu Vowel Sign A ..Limbu Subjoined Letter W + (0x01930, 0x0193b,), # Limbu Small Letter Ka ..Limbu Sign Sa-i + (0x019b0, 0x019c0,), # New Tai Lue Vowel Sign V..New Tai Lue Vowel Sign I + (0x019c8, 0x019c9,), # New Tai Lue Tone Mark-1 ..New Tai Lue Tone Mark-2 + (0x01a17, 0x01a1b,), # Buginese Vowel Sign I ..Buginese Vowel Sign Ae + (0x01a55, 0x01a5e,), # Tai Tham Consonant Sign ..Tai Tham Consonant Sign + (0x01a60, 0x01a7c,), # Tai Tham Sign Sakot ..Tai Tham Sign Khuen-lue (0x01a7f, 0x01a7f,), # Tai Tham Combining Cryptogrammic Dot - (0x01b00, 0x01b03,), # Balinese Sign Ulu Ricem ..Balinese Sign Surang - (0x01b34, 0x01b34,), # Balinese Sign Rerekan - (0x01b36, 0x01b3a,), # Balinese Vowel Sign Ulu ..Balinese Vowel Sign Ra R - (0x01b3c, 0x01b3c,), # Balinese Vowel Sign La Lenga - (0x01b42, 0x01b42,), # Balinese Vowel Sign Pepet + (0x01b00, 0x01b04,), # Balinese Sign Ulu Ricem ..Balinese Sign Bisah + (0x01b34, 0x01b44,), # Balinese Sign Rerekan ..Balinese Adeg Adeg (0x01b6b, 0x01b73,), # Balinese Musical Symbol ..Balinese Musical Symbol - (0x01b80, 0x01b81,), # Sundanese Sign Panyecek ..Sundanese Sign Panglayar - (0x01ba2, 0x01ba5,), # Sundanese Consonant Sign..Sundanese Vowel Sign Pan - (0x01ba8, 0x01ba9,), # Sundanese Vowel Sign Pam..Sundanese Vowel Sign Pan - (0x01bab, 0x01bab,), # Sundanese Sign Virama - (0x01be6, 0x01be6,), # Batak Sign Tompi - (0x01be8, 0x01be9,), # Batak Vowel Sign Pakpak ..Batak Vowel Sign Ee - (0x01bed, 0x01bed,), # Batak Vowel Sign Karo O - (0x01bef, 0x01bf1,), # Batak Vowel Sign U For S..Batak Consonant Sign H - (0x01c2c, 0x01c33,), # Lepcha Vowel Sign E ..Lepcha Consonant Sign T - (0x01c36, 0x01c37,), # Lepcha Sign Ran ..Lepcha Sign Nukta + (0x01b80, 0x01b82,), # Sundanese Sign Panyecek ..Sundanese Sign Pangwisad + (0x01ba1, 0x01bad,), # Sundanese Consonant Sign..Sundanese Consonant Sign + (0x01be6, 0x01bf3,), # Batak Sign Tompi ..Batak Panongonan + (0x01c24, 0x01c37,), # Lepcha Subjoined Letter ..Lepcha Sign Nukta (0x01cd0, 0x01cd2,), # Vedic Tone Karshana ..Vedic Tone Prenkha - (0x01cd4, 0x01ce0,), # Vedic Sign Yajurvedic Mi..Vedic Tone Rigvedic Kash - (0x01ce2, 0x01ce8,), # Vedic Sign Visarga Svari..Vedic Sign Visarga Anuda + (0x01cd4, 0x01ce8,), # Vedic Sign Yajurvedic Mi..Vedic Sign Visarga Anuda (0x01ced, 0x01ced,), # Vedic Sign Tiryak - (0x01cf4, 0x01cf4,), # Vedic Tone Candra Above + (0x01cf2, 0x01cf4,), # Vedic Sign Ardhavisarga ..Vedic Tone Candra Above (0x01dc0, 0x01de6,), # Combining Dotted Grave A..Combining Latin Small Le (0x01dfc, 0x01dff,), # Combining Double Inverte..Combining Right Arrowhea + (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark + (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override + (0x02060, 0x02064,), # Word Joiner ..Invisible Plus + (0x0206a, 0x0206f,), # Inhibit Symmetric Swappi..Nominal Digit Shapes (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above (0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu (0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette - (0x0302a, 0x0302d,), # Ideographic Level Tone M..Ideographic Entering Ton + (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag (0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous (0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer @@ -1225,65 +1284,65 @@ ZERO_WIDTH = { (0x0a802, 0x0a802,), # Syloti Nagri Sign Dvisvara (0x0a806, 0x0a806,), # Syloti Nagri Sign Hasanta (0x0a80b, 0x0a80b,), # Syloti Nagri Sign Anusvara - (0x0a825, 0x0a826,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign - (0x0a8c4, 0x0a8c4,), # Saurashtra Sign Virama + (0x0a823, 0x0a827,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign + (0x0a880, 0x0a881,), # Saurashtra Sign Anusvara..Saurashtra Sign Visarga + (0x0a8b4, 0x0a8c4,), # Saurashtra Consonant Sig..Saurashtra Sign Virama (0x0a8e0, 0x0a8f1,), # Combining Devanagari Dig..Combining Devanagari Sig (0x0a926, 0x0a92d,), # Kayah Li Vowel Ue ..Kayah Li Tone Calya Plop - (0x0a947, 0x0a951,), # Rejang Vowel Sign I ..Rejang Consonant Sign R - (0x0a980, 0x0a982,), # Javanese Sign Panyangga ..Javanese Sign Layar - (0x0a9b3, 0x0a9b3,), # Javanese Sign Cecak Telu - (0x0a9b6, 0x0a9b9,), # Javanese Vowel Sign Wulu..Javanese Vowel Sign Suku - (0x0a9bc, 0x0a9bc,), # Javanese Vowel Sign Pepet - (0x0aa29, 0x0aa2e,), # Cham Vowel Sign Aa ..Cham Vowel Sign Oe - (0x0aa31, 0x0aa32,), # Cham Vowel Sign Au ..Cham Vowel Sign Ue - (0x0aa35, 0x0aa36,), # Cham Consonant Sign La ..Cham Consonant Sign Wa + (0x0a947, 0x0a953,), # Rejang Vowel Sign I ..Rejang Virama + (0x0a980, 0x0a983,), # Javanese Sign Panyangga ..Javanese Sign Wignyan + (0x0a9b3, 0x0a9c0,), # Javanese Sign Cecak Telu..Javanese Pangkon + (0x0aa29, 0x0aa36,), # Cham Vowel Sign Aa ..Cham Consonant Sign Wa (0x0aa43, 0x0aa43,), # Cham Consonant Sign Final Ng - (0x0aa4c, 0x0aa4c,), # Cham Consonant Sign Final M + (0x0aa4c, 0x0aa4d,), # Cham Consonant Sign Fina..Cham Consonant Sign Fina + (0x0aa7b, 0x0aa7b,), # Myanmar Sign Pao Karen Tone (0x0aab0, 0x0aab0,), # Tai Viet Mai Kang (0x0aab2, 0x0aab4,), # Tai Viet Vowel I ..Tai Viet Vowel U (0x0aab7, 0x0aab8,), # Tai Viet Mai Khit ..Tai Viet Vowel Ia (0x0aabe, 0x0aabf,), # Tai Viet Vowel Am ..Tai Viet Tone Mai Ek (0x0aac1, 0x0aac1,), # Tai Viet Tone Mai Tho - (0x0aaec, 0x0aaed,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign - (0x0aaf6, 0x0aaf6,), # Meetei Mayek Virama - (0x0abe5, 0x0abe5,), # Meetei Mayek Vowel Sign Anap - (0x0abe8, 0x0abe8,), # Meetei Mayek Vowel Sign Unap - (0x0abed, 0x0abed,), # Meetei Mayek Apun Iyek + (0x0aaeb, 0x0aaef,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0x0aaf5, 0x0aaf6,), # Meetei Mayek Vowel Sign ..Meetei Mayek Virama + (0x0abe3, 0x0abea,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0x0abec, 0x0abed,), # Meetei Mayek Lum Iyek ..Meetei Mayek Apun Iyek (0x0fb1e, 0x0fb1e,), # Hebrew Point Judeo-spanish Varika (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe26,), # Combining Ligature Left ..Combining Conjoining Mac + (0x0feff, 0x0feff,), # Zero Width No-break Space + (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke (0x10a01, 0x10a03,), # Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo (0x10a05, 0x10a06,), # Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O (0x10a0c, 0x10a0f,), # Kharoshthi Vowel Length ..Kharoshthi Sign Visarga (0x10a38, 0x10a3a,), # Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo (0x10a3f, 0x10a3f,), # Kharoshthi Virama - (0x11001, 0x11001,), # Brahmi Sign Anusvara + (0x11000, 0x11002,), # Brahmi Sign Candrabindu ..Brahmi Sign Visarga (0x11038, 0x11046,), # Brahmi Vowel Sign Aa ..Brahmi Virama - (0x11080, 0x11081,), # Kaithi Sign Candrabindu ..Kaithi Sign Anusvara - (0x110b3, 0x110b6,), # Kaithi Vowel Sign U ..Kaithi Vowel Sign Ai - (0x110b9, 0x110ba,), # Kaithi Sign Virama ..Kaithi Sign Nukta + (0x11080, 0x11082,), # Kaithi Sign Candrabindu ..Kaithi Sign Visarga + (0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta + (0x110bd, 0x110bd,), # Kaithi Number Sign (0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga - (0x11127, 0x1112b,), # Chakma Vowel Sign A ..Chakma Vowel Sign Uu - (0x1112d, 0x11134,), # Chakma Vowel Sign Ai ..Chakma Maayyaa - (0x11180, 0x11181,), # Sharada Sign Candrabindu..Sharada Sign Anusvara - (0x111b6, 0x111be,), # Sharada Vowel Sign U ..Sharada Vowel Sign O - (0x116ab, 0x116ab,), # Takri Sign Anusvara - (0x116ad, 0x116ad,), # Takri Vowel Sign Aa - (0x116b0, 0x116b5,), # Takri Vowel Sign U ..Takri Vowel Sign Au - (0x116b7, 0x116b7,), # Takri Sign Nukta + (0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa + (0x11180, 0x11182,), # Sharada Sign Candrabindu..Sharada Sign Visarga + (0x111b3, 0x111c0,), # Sharada Vowel Sign Aa ..Sharada Sign Virama + (0x116ab, 0x116b7,), # Takri Sign Anusvara ..Takri Sign Nukta + (0x16f51, 0x16f7e,), # Miao Sign Aspiration ..Miao Vowel Sign Ng (0x16f8f, 0x16f92,), # Miao Tone Right ..Miao Tone Below - (0x1d167, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining - (0x1d17b, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d165, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d16d, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical + (0xe0001, 0xe0001,), # Language Tag + (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 ), '6.3.0': ( # Source: DerivedGeneralCategory-6.3.0.txt # Date: 2013-07-05, 14:08:45 GMT [MD] # + (0x00000, 0x00000,), # (nil) + (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg @@ -1291,13 +1350,16 @@ ZERO_WIDTH = { (0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot (0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot (0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan + (0x00600, 0x00604,), # Arabic Number Sign ..Arabic Sign Samvat (0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra + (0x0061c, 0x0061c,), # Arabic Letter Mark (0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below (0x00670, 0x00670,), # Arabic Letter Superscript Alef - (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen + (0x006d6, 0x006dd,), # Arabic Small High Ligatu..Arabic End Of Ayah (0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda (0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon (0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem + (0x0070f, 0x0070f,), # Syriac Abbreviation Mark (0x00711, 0x00711,), # Syriac Letter Superscript Alaph (0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh (0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun @@ -1308,58 +1370,69 @@ ZERO_WIDTH = { (0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa (0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark (0x008e4, 0x008fe,), # Arabic Curly Fatha ..Arabic Damma With Dot - (0x00900, 0x00902,), # Devanagari Sign Inverted..Devanagari Sign Anusvara - (0x0093a, 0x0093a,), # Devanagari Vowel Sign Oe - (0x0093c, 0x0093c,), # Devanagari Sign Nukta - (0x00941, 0x00948,), # Devanagari Vowel Sign U ..Devanagari Vowel Sign Ai - (0x0094d, 0x0094d,), # Devanagari Sign Virama + (0x00900, 0x00903,), # Devanagari Sign Inverted..Devanagari Sign Visarga + (0x0093a, 0x0093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta + (0x0093e, 0x0094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw (0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu (0x00962, 0x00963,), # Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo - (0x00981, 0x00981,), # Bengali Sign Candrabindu + (0x00981, 0x00983,), # Bengali Sign Candrabindu..Bengali Sign Visarga (0x009bc, 0x009bc,), # Bengali Sign Nukta - (0x009c1, 0x009c4,), # Bengali Vowel Sign U ..Bengali Vowel Sign Vocal - (0x009cd, 0x009cd,), # Bengali Sign Virama + (0x009be, 0x009c4,), # Bengali Vowel Sign Aa ..Bengali Vowel Sign Vocal + (0x009c7, 0x009c8,), # Bengali Vowel Sign E ..Bengali Vowel Sign Ai + (0x009cb, 0x009cd,), # Bengali Vowel Sign O ..Bengali Sign Virama + (0x009d7, 0x009d7,), # Bengali Au Length Mark (0x009e2, 0x009e3,), # Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal - (0x00a01, 0x00a02,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Bindi + (0x00a01, 0x00a03,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Visarga (0x00a3c, 0x00a3c,), # Gurmukhi Sign Nukta - (0x00a41, 0x00a42,), # Gurmukhi Vowel Sign U ..Gurmukhi Vowel Sign Uu + (0x00a3e, 0x00a42,), # Gurmukhi Vowel Sign Aa ..Gurmukhi Vowel Sign Uu (0x00a47, 0x00a48,), # Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai (0x00a4b, 0x00a4d,), # Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama (0x00a51, 0x00a51,), # Gurmukhi Sign Udaat (0x00a70, 0x00a71,), # Gurmukhi Tippi ..Gurmukhi Addak (0x00a75, 0x00a75,), # Gurmukhi Sign Yakash - (0x00a81, 0x00a82,), # Gujarati Sign Candrabind..Gujarati Sign Anusvara + (0x00a81, 0x00a83,), # Gujarati Sign Candrabind..Gujarati Sign Visarga (0x00abc, 0x00abc,), # Gujarati Sign Nukta - (0x00ac1, 0x00ac5,), # Gujarati Vowel Sign U ..Gujarati Vowel Sign Cand - (0x00ac7, 0x00ac8,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Ai - (0x00acd, 0x00acd,), # Gujarati Sign Virama + (0x00abe, 0x00ac5,), # Gujarati Vowel Sign Aa ..Gujarati Vowel Sign Cand + (0x00ac7, 0x00ac9,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Cand + (0x00acb, 0x00acd,), # Gujarati Vowel Sign O ..Gujarati Sign Virama (0x00ae2, 0x00ae3,), # Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca - (0x00b01, 0x00b01,), # Oriya Sign Candrabindu + (0x00b01, 0x00b03,), # Oriya Sign Candrabindu ..Oriya Sign Visarga (0x00b3c, 0x00b3c,), # Oriya Sign Nukta - (0x00b3f, 0x00b3f,), # Oriya Vowel Sign I - (0x00b41, 0x00b44,), # Oriya Vowel Sign U ..Oriya Vowel Sign Vocalic - (0x00b4d, 0x00b4d,), # Oriya Sign Virama - (0x00b56, 0x00b56,), # Oriya Ai Length Mark + (0x00b3e, 0x00b44,), # Oriya Vowel Sign Aa ..Oriya Vowel Sign Vocalic + (0x00b47, 0x00b48,), # Oriya Vowel Sign E ..Oriya Vowel Sign Ai + (0x00b4b, 0x00b4d,), # Oriya Vowel Sign O ..Oriya Sign Virama + (0x00b56, 0x00b57,), # Oriya Ai Length Mark ..Oriya Au Length Mark (0x00b62, 0x00b63,), # Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic (0x00b82, 0x00b82,), # Tamil Sign Anusvara - (0x00bc0, 0x00bc0,), # Tamil Vowel Sign Ii - (0x00bcd, 0x00bcd,), # Tamil Sign Virama - (0x00c3e, 0x00c40,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Ii + (0x00bbe, 0x00bc2,), # Tamil Vowel Sign Aa ..Tamil Vowel Sign Uu + (0x00bc6, 0x00bc8,), # Tamil Vowel Sign E ..Tamil Vowel Sign Ai + (0x00bca, 0x00bcd,), # Tamil Vowel Sign O ..Tamil Sign Virama + (0x00bd7, 0x00bd7,), # Tamil Au Length Mark + (0x00c01, 0x00c03,), # Telugu Sign Candrabindu ..Telugu Sign Visarga + (0x00c3e, 0x00c44,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Vocali (0x00c46, 0x00c48,), # Telugu Vowel Sign E ..Telugu Vowel Sign Ai (0x00c4a, 0x00c4d,), # Telugu Vowel Sign O ..Telugu Sign Virama (0x00c55, 0x00c56,), # Telugu Length Mark ..Telugu Ai Length Mark (0x00c62, 0x00c63,), # Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali + (0x00c82, 0x00c83,), # Kannada Sign Anusvara ..Kannada Sign Visarga (0x00cbc, 0x00cbc,), # Kannada Sign Nukta - (0x00cbf, 0x00cbf,), # Kannada Vowel Sign I - (0x00cc6, 0x00cc6,), # Kannada Vowel Sign E - (0x00ccc, 0x00ccd,), # Kannada Vowel Sign Au ..Kannada Sign Virama + (0x00cbe, 0x00cc4,), # Kannada Vowel Sign Aa ..Kannada Vowel Sign Vocal + (0x00cc6, 0x00cc8,), # Kannada Vowel Sign E ..Kannada Vowel Sign Ai + (0x00cca, 0x00ccd,), # Kannada Vowel Sign O ..Kannada Sign Virama + (0x00cd5, 0x00cd6,), # Kannada Length Mark ..Kannada Ai Length Mark (0x00ce2, 0x00ce3,), # Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal - (0x00d41, 0x00d44,), # Malayalam Vowel Sign U ..Malayalam Vowel Sign Voc - (0x00d4d, 0x00d4d,), # Malayalam Sign Virama + (0x00d02, 0x00d03,), # Malayalam Sign Anusvara ..Malayalam Sign Visarga + (0x00d3e, 0x00d44,), # Malayalam Vowel Sign Aa ..Malayalam Vowel Sign Voc + (0x00d46, 0x00d48,), # Malayalam Vowel Sign E ..Malayalam Vowel Sign Ai + (0x00d4a, 0x00d4d,), # Malayalam Vowel Sign O ..Malayalam Sign Virama + (0x00d57, 0x00d57,), # Malayalam Au Length Mark (0x00d62, 0x00d63,), # Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc + (0x00d82, 0x00d83,), # Sinhala Sign Anusvaraya ..Sinhala Sign Visargaya (0x00dca, 0x00dca,), # Sinhala Sign Al-lakuna - (0x00dd2, 0x00dd4,), # Sinhala Vowel Sign Ketti..Sinhala Vowel Sign Ketti + (0x00dcf, 0x00dd4,), # Sinhala Vowel Sign Aela-..Sinhala Vowel Sign Ketti (0x00dd6, 0x00dd6,), # Sinhala Vowel Sign Diga Paa-pilla + (0x00dd8, 0x00ddf,), # Sinhala Vowel Sign Gaett..Sinhala Vowel Sign Gayan + (0x00df2, 0x00df3,), # Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga (0x00e31, 0x00e31,), # Thai Character Mai Han-akat (0x00e34, 0x00e3a,), # Thai Character Sara I ..Thai Character Phinthu (0x00e47, 0x00e4e,), # Thai Character Maitaikhu..Thai Character Yamakkan @@ -1371,76 +1444,60 @@ ZERO_WIDTH = { (0x00f35, 0x00f35,), # Tibetan Mark Ngas Bzung Nyi Zla (0x00f37, 0x00f37,), # Tibetan Mark Ngas Bzung Sgor Rtags (0x00f39, 0x00f39,), # Tibetan Mark Tsa -phru - (0x00f71, 0x00f7e,), # Tibetan Vowel Sign Aa ..Tibetan Sign Rjes Su Nga - (0x00f80, 0x00f84,), # Tibetan Vowel Sign Rever..Tibetan Mark Halanta + (0x00f3e, 0x00f3f,), # Tibetan Sign Yar Tshes ..Tibetan Sign Mar Tshes + (0x00f71, 0x00f84,), # Tibetan Vowel Sign Aa ..Tibetan Mark Halanta (0x00f86, 0x00f87,), # Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags (0x00f8d, 0x00f97,), # Tibetan Subjoined Sign L..Tibetan Subjoined Letter (0x00f99, 0x00fbc,), # Tibetan Subjoined Letter..Tibetan Subjoined Letter (0x00fc6, 0x00fc6,), # Tibetan Symbol Padma Gdan - (0x0102d, 0x01030,), # Myanmar Vowel Sign I ..Myanmar Vowel Sign Uu - (0x01032, 0x01037,), # Myanmar Vowel Sign Ai ..Myanmar Sign Dot Below - (0x01039, 0x0103a,), # Myanmar Sign Virama ..Myanmar Sign Asat - (0x0103d, 0x0103e,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M - (0x01058, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal + (0x0102b, 0x0103e,), # Myanmar Vowel Sign Tall ..Myanmar Consonant Sign M + (0x01056, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal (0x0105e, 0x01060,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M + (0x01062, 0x01064,), # Myanmar Vowel Sign Sgaw ..Myanmar Tone Mark Sgaw K + (0x01067, 0x0106d,), # Myanmar Vowel Sign Weste..Myanmar Sign Western Pwo (0x01071, 0x01074,), # Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah - (0x01082, 0x01082,), # Myanmar Consonant Sign Shan Medial Wa - (0x01085, 0x01086,), # Myanmar Vowel Sign Shan ..Myanmar Vowel Sign Shan - (0x0108d, 0x0108d,), # Myanmar Sign Shan Council Emphatic Tone - (0x0109d, 0x0109d,), # Myanmar Vowel Sign Aiton Ai + (0x01082, 0x0108d,), # Myanmar Consonant Sign S..Myanmar Sign Shan Counci + (0x0108f, 0x0108f,), # Myanmar Sign Rumai Palaung Tone-5 + (0x0109a, 0x0109d,), # Myanmar Sign Khamti Tone..Myanmar Vowel Sign Aiton (0x0135d, 0x0135f,), # Ethiopic Combining Gemin..Ethiopic Combining Gemin (0x01712, 0x01714,), # Tagalog Vowel Sign I ..Tagalog Sign Virama (0x01732, 0x01734,), # Hanunoo Vowel Sign I ..Hanunoo Sign Pamudpod (0x01752, 0x01753,), # Buhid Vowel Sign I ..Buhid Vowel Sign U (0x01772, 0x01773,), # Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U - (0x017b4, 0x017b5,), # Khmer Vowel Inherent Aq ..Khmer Vowel Inherent Aa - (0x017b7, 0x017bd,), # Khmer Vowel Sign I ..Khmer Vowel Sign Ua - (0x017c6, 0x017c6,), # Khmer Sign Nikahit - (0x017c9, 0x017d3,), # Khmer Sign Muusikatoan ..Khmer Sign Bathamasat + (0x017b4, 0x017d3,), # Khmer Vowel Inherent Aq ..Khmer Sign Bathamasat (0x017dd, 0x017dd,), # Khmer Sign Atthacan - (0x0180b, 0x0180d,), # Mongolian Free Variation..Mongolian Free Variation + (0x0180b, 0x0180e,), # Mongolian Free Variation..Mongolian Vowel Separato (0x018a9, 0x018a9,), # Mongolian Letter Ali Gali Dagalga - (0x01920, 0x01922,), # Limbu Vowel Sign A ..Limbu Vowel Sign U - (0x01927, 0x01928,), # Limbu Vowel Sign E ..Limbu Vowel Sign O - (0x01932, 0x01932,), # Limbu Small Letter Anusvara - (0x01939, 0x0193b,), # Limbu Sign Mukphreng ..Limbu Sign Sa-i - (0x01a17, 0x01a18,), # Buginese Vowel Sign I ..Buginese Vowel Sign U - (0x01a1b, 0x01a1b,), # Buginese Vowel Sign Ae - (0x01a56, 0x01a56,), # Tai Tham Consonant Sign Medial La - (0x01a58, 0x01a5e,), # Tai Tham Sign Mai Kang L..Tai Tham Consonant Sign - (0x01a60, 0x01a60,), # Tai Tham Sign Sakot - (0x01a62, 0x01a62,), # Tai Tham Vowel Sign Mai Sat - (0x01a65, 0x01a6c,), # Tai Tham Vowel Sign I ..Tai Tham Vowel Sign Oa B - (0x01a73, 0x01a7c,), # Tai Tham Vowel Sign Oa A..Tai Tham Sign Khuen-lue + (0x01920, 0x0192b,), # Limbu Vowel Sign A ..Limbu Subjoined Letter W + (0x01930, 0x0193b,), # Limbu Small Letter Ka ..Limbu Sign Sa-i + (0x019b0, 0x019c0,), # New Tai Lue Vowel Sign V..New Tai Lue Vowel Sign I + (0x019c8, 0x019c9,), # New Tai Lue Tone Mark-1 ..New Tai Lue Tone Mark-2 + (0x01a17, 0x01a1b,), # Buginese Vowel Sign I ..Buginese Vowel Sign Ae + (0x01a55, 0x01a5e,), # Tai Tham Consonant Sign ..Tai Tham Consonant Sign + (0x01a60, 0x01a7c,), # Tai Tham Sign Sakot ..Tai Tham Sign Khuen-lue (0x01a7f, 0x01a7f,), # Tai Tham Combining Cryptogrammic Dot - (0x01b00, 0x01b03,), # Balinese Sign Ulu Ricem ..Balinese Sign Surang - (0x01b34, 0x01b34,), # Balinese Sign Rerekan - (0x01b36, 0x01b3a,), # Balinese Vowel Sign Ulu ..Balinese Vowel Sign Ra R - (0x01b3c, 0x01b3c,), # Balinese Vowel Sign La Lenga - (0x01b42, 0x01b42,), # Balinese Vowel Sign Pepet + (0x01b00, 0x01b04,), # Balinese Sign Ulu Ricem ..Balinese Sign Bisah + (0x01b34, 0x01b44,), # Balinese Sign Rerekan ..Balinese Adeg Adeg (0x01b6b, 0x01b73,), # Balinese Musical Symbol ..Balinese Musical Symbol - (0x01b80, 0x01b81,), # Sundanese Sign Panyecek ..Sundanese Sign Panglayar - (0x01ba2, 0x01ba5,), # Sundanese Consonant Sign..Sundanese Vowel Sign Pan - (0x01ba8, 0x01ba9,), # Sundanese Vowel Sign Pam..Sundanese Vowel Sign Pan - (0x01bab, 0x01bab,), # Sundanese Sign Virama - (0x01be6, 0x01be6,), # Batak Sign Tompi - (0x01be8, 0x01be9,), # Batak Vowel Sign Pakpak ..Batak Vowel Sign Ee - (0x01bed, 0x01bed,), # Batak Vowel Sign Karo O - (0x01bef, 0x01bf1,), # Batak Vowel Sign U For S..Batak Consonant Sign H - (0x01c2c, 0x01c33,), # Lepcha Vowel Sign E ..Lepcha Consonant Sign T - (0x01c36, 0x01c37,), # Lepcha Sign Ran ..Lepcha Sign Nukta + (0x01b80, 0x01b82,), # Sundanese Sign Panyecek ..Sundanese Sign Pangwisad + (0x01ba1, 0x01bad,), # Sundanese Consonant Sign..Sundanese Consonant Sign + (0x01be6, 0x01bf3,), # Batak Sign Tompi ..Batak Panongonan + (0x01c24, 0x01c37,), # Lepcha Subjoined Letter ..Lepcha Sign Nukta (0x01cd0, 0x01cd2,), # Vedic Tone Karshana ..Vedic Tone Prenkha - (0x01cd4, 0x01ce0,), # Vedic Sign Yajurvedic Mi..Vedic Tone Rigvedic Kash - (0x01ce2, 0x01ce8,), # Vedic Sign Visarga Svari..Vedic Sign Visarga Anuda + (0x01cd4, 0x01ce8,), # Vedic Sign Yajurvedic Mi..Vedic Sign Visarga Anuda (0x01ced, 0x01ced,), # Vedic Sign Tiryak - (0x01cf4, 0x01cf4,), # Vedic Tone Candra Above + (0x01cf2, 0x01cf4,), # Vedic Sign Ardhavisarga ..Vedic Tone Candra Above (0x01dc0, 0x01de6,), # Combining Dotted Grave A..Combining Latin Small Le (0x01dfc, 0x01dff,), # Combining Double Inverte..Combining Right Arrowhea + (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark + (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override + (0x02060, 0x02064,), # Word Joiner ..Invisible Plus + (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above (0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu (0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette - (0x0302a, 0x0302d,), # Ideographic Level Tone M..Ideographic Entering Ton + (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag (0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous (0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer @@ -1449,65 +1506,65 @@ ZERO_WIDTH = { (0x0a802, 0x0a802,), # Syloti Nagri Sign Dvisvara (0x0a806, 0x0a806,), # Syloti Nagri Sign Hasanta (0x0a80b, 0x0a80b,), # Syloti Nagri Sign Anusvara - (0x0a825, 0x0a826,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign - (0x0a8c4, 0x0a8c4,), # Saurashtra Sign Virama + (0x0a823, 0x0a827,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign + (0x0a880, 0x0a881,), # Saurashtra Sign Anusvara..Saurashtra Sign Visarga + (0x0a8b4, 0x0a8c4,), # Saurashtra Consonant Sig..Saurashtra Sign Virama (0x0a8e0, 0x0a8f1,), # Combining Devanagari Dig..Combining Devanagari Sig (0x0a926, 0x0a92d,), # Kayah Li Vowel Ue ..Kayah Li Tone Calya Plop - (0x0a947, 0x0a951,), # Rejang Vowel Sign I ..Rejang Consonant Sign R - (0x0a980, 0x0a982,), # Javanese Sign Panyangga ..Javanese Sign Layar - (0x0a9b3, 0x0a9b3,), # Javanese Sign Cecak Telu - (0x0a9b6, 0x0a9b9,), # Javanese Vowel Sign Wulu..Javanese Vowel Sign Suku - (0x0a9bc, 0x0a9bc,), # Javanese Vowel Sign Pepet - (0x0aa29, 0x0aa2e,), # Cham Vowel Sign Aa ..Cham Vowel Sign Oe - (0x0aa31, 0x0aa32,), # Cham Vowel Sign Au ..Cham Vowel Sign Ue - (0x0aa35, 0x0aa36,), # Cham Consonant Sign La ..Cham Consonant Sign Wa + (0x0a947, 0x0a953,), # Rejang Vowel Sign I ..Rejang Virama + (0x0a980, 0x0a983,), # Javanese Sign Panyangga ..Javanese Sign Wignyan + (0x0a9b3, 0x0a9c0,), # Javanese Sign Cecak Telu..Javanese Pangkon + (0x0aa29, 0x0aa36,), # Cham Vowel Sign Aa ..Cham Consonant Sign Wa (0x0aa43, 0x0aa43,), # Cham Consonant Sign Final Ng - (0x0aa4c, 0x0aa4c,), # Cham Consonant Sign Final M + (0x0aa4c, 0x0aa4d,), # Cham Consonant Sign Fina..Cham Consonant Sign Fina + (0x0aa7b, 0x0aa7b,), # Myanmar Sign Pao Karen Tone (0x0aab0, 0x0aab0,), # Tai Viet Mai Kang (0x0aab2, 0x0aab4,), # Tai Viet Vowel I ..Tai Viet Vowel U (0x0aab7, 0x0aab8,), # Tai Viet Mai Khit ..Tai Viet Vowel Ia (0x0aabe, 0x0aabf,), # Tai Viet Vowel Am ..Tai Viet Tone Mai Ek (0x0aac1, 0x0aac1,), # Tai Viet Tone Mai Tho - (0x0aaec, 0x0aaed,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign - (0x0aaf6, 0x0aaf6,), # Meetei Mayek Virama - (0x0abe5, 0x0abe5,), # Meetei Mayek Vowel Sign Anap - (0x0abe8, 0x0abe8,), # Meetei Mayek Vowel Sign Unap - (0x0abed, 0x0abed,), # Meetei Mayek Apun Iyek + (0x0aaeb, 0x0aaef,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0x0aaf5, 0x0aaf6,), # Meetei Mayek Vowel Sign ..Meetei Mayek Virama + (0x0abe3, 0x0abea,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0x0abec, 0x0abed,), # Meetei Mayek Lum Iyek ..Meetei Mayek Apun Iyek (0x0fb1e, 0x0fb1e,), # Hebrew Point Judeo-spanish Varika (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe26,), # Combining Ligature Left ..Combining Conjoining Mac + (0x0feff, 0x0feff,), # Zero Width No-break Space + (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke (0x10a01, 0x10a03,), # Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo (0x10a05, 0x10a06,), # Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O (0x10a0c, 0x10a0f,), # Kharoshthi Vowel Length ..Kharoshthi Sign Visarga (0x10a38, 0x10a3a,), # Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo (0x10a3f, 0x10a3f,), # Kharoshthi Virama - (0x11001, 0x11001,), # Brahmi Sign Anusvara + (0x11000, 0x11002,), # Brahmi Sign Candrabindu ..Brahmi Sign Visarga (0x11038, 0x11046,), # Brahmi Vowel Sign Aa ..Brahmi Virama - (0x11080, 0x11081,), # Kaithi Sign Candrabindu ..Kaithi Sign Anusvara - (0x110b3, 0x110b6,), # Kaithi Vowel Sign U ..Kaithi Vowel Sign Ai - (0x110b9, 0x110ba,), # Kaithi Sign Virama ..Kaithi Sign Nukta + (0x11080, 0x11082,), # Kaithi Sign Candrabindu ..Kaithi Sign Visarga + (0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta + (0x110bd, 0x110bd,), # Kaithi Number Sign (0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga - (0x11127, 0x1112b,), # Chakma Vowel Sign A ..Chakma Vowel Sign Uu - (0x1112d, 0x11134,), # Chakma Vowel Sign Ai ..Chakma Maayyaa - (0x11180, 0x11181,), # Sharada Sign Candrabindu..Sharada Sign Anusvara - (0x111b6, 0x111be,), # Sharada Vowel Sign U ..Sharada Vowel Sign O - (0x116ab, 0x116ab,), # Takri Sign Anusvara - (0x116ad, 0x116ad,), # Takri Vowel Sign Aa - (0x116b0, 0x116b5,), # Takri Vowel Sign U ..Takri Vowel Sign Au - (0x116b7, 0x116b7,), # Takri Sign Nukta + (0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa + (0x11180, 0x11182,), # Sharada Sign Candrabindu..Sharada Sign Visarga + (0x111b3, 0x111c0,), # Sharada Vowel Sign Aa ..Sharada Sign Virama + (0x116ab, 0x116b7,), # Takri Sign Anusvara ..Takri Sign Nukta + (0x16f51, 0x16f7e,), # Miao Sign Aspiration ..Miao Vowel Sign Ng (0x16f8f, 0x16f92,), # Miao Tone Right ..Miao Tone Below - (0x1d167, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining - (0x1d17b, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d165, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d16d, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical + (0xe0001, 0xe0001,), # Language Tag + (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 ), '7.0.0': ( # Source: DerivedGeneralCategory-7.0.0.txt # Date: 2014-02-07, 18:42:12 GMT [MD] # + (0x00000, 0x00000,), # (nil) + (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg @@ -1515,13 +1572,16 @@ ZERO_WIDTH = { (0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot (0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot (0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan + (0x00600, 0x00605,), # Arabic Number Sign ..Arabic Number Mark Above (0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra + (0x0061c, 0x0061c,), # Arabic Letter Mark (0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below (0x00670, 0x00670,), # Arabic Letter Superscript Alef - (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen + (0x006d6, 0x006dd,), # Arabic Small High Ligatu..Arabic End Of Ayah (0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda (0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon (0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem + (0x0070f, 0x0070f,), # Syriac Abbreviation Mark (0x00711, 0x00711,), # Syriac Letter Superscript Alaph (0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh (0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun @@ -1531,61 +1591,69 @@ ZERO_WIDTH = { (0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U (0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa (0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark - (0x008e4, 0x00902,), # Arabic Curly Fatha ..Devanagari Sign Anusvara - (0x0093a, 0x0093a,), # Devanagari Vowel Sign Oe - (0x0093c, 0x0093c,), # Devanagari Sign Nukta - (0x00941, 0x00948,), # Devanagari Vowel Sign U ..Devanagari Vowel Sign Ai - (0x0094d, 0x0094d,), # Devanagari Sign Virama + (0x008e4, 0x00903,), # Arabic Curly Fatha ..Devanagari Sign Visarga + (0x0093a, 0x0093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta + (0x0093e, 0x0094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw (0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu (0x00962, 0x00963,), # Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo - (0x00981, 0x00981,), # Bengali Sign Candrabindu + (0x00981, 0x00983,), # Bengali Sign Candrabindu..Bengali Sign Visarga (0x009bc, 0x009bc,), # Bengali Sign Nukta - (0x009c1, 0x009c4,), # Bengali Vowel Sign U ..Bengali Vowel Sign Vocal - (0x009cd, 0x009cd,), # Bengali Sign Virama + (0x009be, 0x009c4,), # Bengali Vowel Sign Aa ..Bengali Vowel Sign Vocal + (0x009c7, 0x009c8,), # Bengali Vowel Sign E ..Bengali Vowel Sign Ai + (0x009cb, 0x009cd,), # Bengali Vowel Sign O ..Bengali Sign Virama + (0x009d7, 0x009d7,), # Bengali Au Length Mark (0x009e2, 0x009e3,), # Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal - (0x00a01, 0x00a02,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Bindi + (0x00a01, 0x00a03,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Visarga (0x00a3c, 0x00a3c,), # Gurmukhi Sign Nukta - (0x00a41, 0x00a42,), # Gurmukhi Vowel Sign U ..Gurmukhi Vowel Sign Uu + (0x00a3e, 0x00a42,), # Gurmukhi Vowel Sign Aa ..Gurmukhi Vowel Sign Uu (0x00a47, 0x00a48,), # Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai (0x00a4b, 0x00a4d,), # Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama (0x00a51, 0x00a51,), # Gurmukhi Sign Udaat (0x00a70, 0x00a71,), # Gurmukhi Tippi ..Gurmukhi Addak (0x00a75, 0x00a75,), # Gurmukhi Sign Yakash - (0x00a81, 0x00a82,), # Gujarati Sign Candrabind..Gujarati Sign Anusvara + (0x00a81, 0x00a83,), # Gujarati Sign Candrabind..Gujarati Sign Visarga (0x00abc, 0x00abc,), # Gujarati Sign Nukta - (0x00ac1, 0x00ac5,), # Gujarati Vowel Sign U ..Gujarati Vowel Sign Cand - (0x00ac7, 0x00ac8,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Ai - (0x00acd, 0x00acd,), # Gujarati Sign Virama + (0x00abe, 0x00ac5,), # Gujarati Vowel Sign Aa ..Gujarati Vowel Sign Cand + (0x00ac7, 0x00ac9,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Cand + (0x00acb, 0x00acd,), # Gujarati Vowel Sign O ..Gujarati Sign Virama (0x00ae2, 0x00ae3,), # Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca - (0x00b01, 0x00b01,), # Oriya Sign Candrabindu + (0x00b01, 0x00b03,), # Oriya Sign Candrabindu ..Oriya Sign Visarga (0x00b3c, 0x00b3c,), # Oriya Sign Nukta - (0x00b3f, 0x00b3f,), # Oriya Vowel Sign I - (0x00b41, 0x00b44,), # Oriya Vowel Sign U ..Oriya Vowel Sign Vocalic - (0x00b4d, 0x00b4d,), # Oriya Sign Virama - (0x00b56, 0x00b56,), # Oriya Ai Length Mark + (0x00b3e, 0x00b44,), # Oriya Vowel Sign Aa ..Oriya Vowel Sign Vocalic + (0x00b47, 0x00b48,), # Oriya Vowel Sign E ..Oriya Vowel Sign Ai + (0x00b4b, 0x00b4d,), # Oriya Vowel Sign O ..Oriya Sign Virama + (0x00b56, 0x00b57,), # Oriya Ai Length Mark ..Oriya Au Length Mark (0x00b62, 0x00b63,), # Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic (0x00b82, 0x00b82,), # Tamil Sign Anusvara - (0x00bc0, 0x00bc0,), # Tamil Vowel Sign Ii - (0x00bcd, 0x00bcd,), # Tamil Sign Virama - (0x00c00, 0x00c00,), # Telugu Sign Combining Candrabindu Above - (0x00c3e, 0x00c40,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Ii + (0x00bbe, 0x00bc2,), # Tamil Vowel Sign Aa ..Tamil Vowel Sign Uu + (0x00bc6, 0x00bc8,), # Tamil Vowel Sign E ..Tamil Vowel Sign Ai + (0x00bca, 0x00bcd,), # Tamil Vowel Sign O ..Tamil Sign Virama + (0x00bd7, 0x00bd7,), # Tamil Au Length Mark + (0x00c00, 0x00c03,), # Telugu Sign Combining Ca..Telugu Sign Visarga + (0x00c3e, 0x00c44,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Vocali (0x00c46, 0x00c48,), # Telugu Vowel Sign E ..Telugu Vowel Sign Ai (0x00c4a, 0x00c4d,), # Telugu Vowel Sign O ..Telugu Sign Virama (0x00c55, 0x00c56,), # Telugu Length Mark ..Telugu Ai Length Mark (0x00c62, 0x00c63,), # Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali - (0x00c81, 0x00c81,), # Kannada Sign Candrabindu + (0x00c81, 0x00c83,), # Kannada Sign Candrabindu..Kannada Sign Visarga (0x00cbc, 0x00cbc,), # Kannada Sign Nukta - (0x00cbf, 0x00cbf,), # Kannada Vowel Sign I - (0x00cc6, 0x00cc6,), # Kannada Vowel Sign E - (0x00ccc, 0x00ccd,), # Kannada Vowel Sign Au ..Kannada Sign Virama + (0x00cbe, 0x00cc4,), # Kannada Vowel Sign Aa ..Kannada Vowel Sign Vocal + (0x00cc6, 0x00cc8,), # Kannada Vowel Sign E ..Kannada Vowel Sign Ai + (0x00cca, 0x00ccd,), # Kannada Vowel Sign O ..Kannada Sign Virama + (0x00cd5, 0x00cd6,), # Kannada Length Mark ..Kannada Ai Length Mark (0x00ce2, 0x00ce3,), # Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal - (0x00d01, 0x00d01,), # Malayalam Sign Candrabindu - (0x00d41, 0x00d44,), # Malayalam Vowel Sign U ..Malayalam Vowel Sign Voc - (0x00d4d, 0x00d4d,), # Malayalam Sign Virama + (0x00d01, 0x00d03,), # Malayalam Sign Candrabin..Malayalam Sign Visarga + (0x00d3e, 0x00d44,), # Malayalam Vowel Sign Aa ..Malayalam Vowel Sign Voc + (0x00d46, 0x00d48,), # Malayalam Vowel Sign E ..Malayalam Vowel Sign Ai + (0x00d4a, 0x00d4d,), # Malayalam Vowel Sign O ..Malayalam Sign Virama + (0x00d57, 0x00d57,), # Malayalam Au Length Mark (0x00d62, 0x00d63,), # Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc + (0x00d82, 0x00d83,), # Sinhala Sign Anusvaraya ..Sinhala Sign Visargaya (0x00dca, 0x00dca,), # Sinhala Sign Al-lakuna - (0x00dd2, 0x00dd4,), # Sinhala Vowel Sign Ketti..Sinhala Vowel Sign Ketti + (0x00dcf, 0x00dd4,), # Sinhala Vowel Sign Aela-..Sinhala Vowel Sign Ketti (0x00dd6, 0x00dd6,), # Sinhala Vowel Sign Diga Paa-pilla + (0x00dd8, 0x00ddf,), # Sinhala Vowel Sign Gaett..Sinhala Vowel Sign Gayan + (0x00df2, 0x00df3,), # Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga (0x00e31, 0x00e31,), # Thai Character Mai Han-akat (0x00e34, 0x00e3a,), # Thai Character Sara I ..Thai Character Phinthu (0x00e47, 0x00e4e,), # Thai Character Maitaikhu..Thai Character Yamakkan @@ -1597,78 +1665,62 @@ ZERO_WIDTH = { (0x00f35, 0x00f35,), # Tibetan Mark Ngas Bzung Nyi Zla (0x00f37, 0x00f37,), # Tibetan Mark Ngas Bzung Sgor Rtags (0x00f39, 0x00f39,), # Tibetan Mark Tsa -phru - (0x00f71, 0x00f7e,), # Tibetan Vowel Sign Aa ..Tibetan Sign Rjes Su Nga - (0x00f80, 0x00f84,), # Tibetan Vowel Sign Rever..Tibetan Mark Halanta + (0x00f3e, 0x00f3f,), # Tibetan Sign Yar Tshes ..Tibetan Sign Mar Tshes + (0x00f71, 0x00f84,), # Tibetan Vowel Sign Aa ..Tibetan Mark Halanta (0x00f86, 0x00f87,), # Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags (0x00f8d, 0x00f97,), # Tibetan Subjoined Sign L..Tibetan Subjoined Letter (0x00f99, 0x00fbc,), # Tibetan Subjoined Letter..Tibetan Subjoined Letter (0x00fc6, 0x00fc6,), # Tibetan Symbol Padma Gdan - (0x0102d, 0x01030,), # Myanmar Vowel Sign I ..Myanmar Vowel Sign Uu - (0x01032, 0x01037,), # Myanmar Vowel Sign Ai ..Myanmar Sign Dot Below - (0x01039, 0x0103a,), # Myanmar Sign Virama ..Myanmar Sign Asat - (0x0103d, 0x0103e,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M - (0x01058, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal + (0x0102b, 0x0103e,), # Myanmar Vowel Sign Tall ..Myanmar Consonant Sign M + (0x01056, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal (0x0105e, 0x01060,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M + (0x01062, 0x01064,), # Myanmar Vowel Sign Sgaw ..Myanmar Tone Mark Sgaw K + (0x01067, 0x0106d,), # Myanmar Vowel Sign Weste..Myanmar Sign Western Pwo (0x01071, 0x01074,), # Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah - (0x01082, 0x01082,), # Myanmar Consonant Sign Shan Medial Wa - (0x01085, 0x01086,), # Myanmar Vowel Sign Shan ..Myanmar Vowel Sign Shan - (0x0108d, 0x0108d,), # Myanmar Sign Shan Council Emphatic Tone - (0x0109d, 0x0109d,), # Myanmar Vowel Sign Aiton Ai + (0x01082, 0x0108d,), # Myanmar Consonant Sign S..Myanmar Sign Shan Counci + (0x0108f, 0x0108f,), # Myanmar Sign Rumai Palaung Tone-5 + (0x0109a, 0x0109d,), # Myanmar Sign Khamti Tone..Myanmar Vowel Sign Aiton (0x0135d, 0x0135f,), # Ethiopic Combining Gemin..Ethiopic Combining Gemin (0x01712, 0x01714,), # Tagalog Vowel Sign I ..Tagalog Sign Virama (0x01732, 0x01734,), # Hanunoo Vowel Sign I ..Hanunoo Sign Pamudpod (0x01752, 0x01753,), # Buhid Vowel Sign I ..Buhid Vowel Sign U (0x01772, 0x01773,), # Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U - (0x017b4, 0x017b5,), # Khmer Vowel Inherent Aq ..Khmer Vowel Inherent Aa - (0x017b7, 0x017bd,), # Khmer Vowel Sign I ..Khmer Vowel Sign Ua - (0x017c6, 0x017c6,), # Khmer Sign Nikahit - (0x017c9, 0x017d3,), # Khmer Sign Muusikatoan ..Khmer Sign Bathamasat + (0x017b4, 0x017d3,), # Khmer Vowel Inherent Aq ..Khmer Sign Bathamasat (0x017dd, 0x017dd,), # Khmer Sign Atthacan - (0x0180b, 0x0180d,), # Mongolian Free Variation..Mongolian Free Variation + (0x0180b, 0x0180e,), # Mongolian Free Variation..Mongolian Vowel Separato (0x018a9, 0x018a9,), # Mongolian Letter Ali Gali Dagalga - (0x01920, 0x01922,), # Limbu Vowel Sign A ..Limbu Vowel Sign U - (0x01927, 0x01928,), # Limbu Vowel Sign E ..Limbu Vowel Sign O - (0x01932, 0x01932,), # Limbu Small Letter Anusvara - (0x01939, 0x0193b,), # Limbu Sign Mukphreng ..Limbu Sign Sa-i - (0x01a17, 0x01a18,), # Buginese Vowel Sign I ..Buginese Vowel Sign U - (0x01a1b, 0x01a1b,), # Buginese Vowel Sign Ae - (0x01a56, 0x01a56,), # Tai Tham Consonant Sign Medial La - (0x01a58, 0x01a5e,), # Tai Tham Sign Mai Kang L..Tai Tham Consonant Sign - (0x01a60, 0x01a60,), # Tai Tham Sign Sakot - (0x01a62, 0x01a62,), # Tai Tham Vowel Sign Mai Sat - (0x01a65, 0x01a6c,), # Tai Tham Vowel Sign I ..Tai Tham Vowel Sign Oa B - (0x01a73, 0x01a7c,), # Tai Tham Vowel Sign Oa A..Tai Tham Sign Khuen-lue + (0x01920, 0x0192b,), # Limbu Vowel Sign A ..Limbu Subjoined Letter W + (0x01930, 0x0193b,), # Limbu Small Letter Ka ..Limbu Sign Sa-i + (0x019b0, 0x019c0,), # New Tai Lue Vowel Sign V..New Tai Lue Vowel Sign I + (0x019c8, 0x019c9,), # New Tai Lue Tone Mark-1 ..New Tai Lue Tone Mark-2 + (0x01a17, 0x01a1b,), # Buginese Vowel Sign I ..Buginese Vowel Sign Ae + (0x01a55, 0x01a5e,), # Tai Tham Consonant Sign ..Tai Tham Consonant Sign + (0x01a60, 0x01a7c,), # Tai Tham Sign Sakot ..Tai Tham Sign Khuen-lue (0x01a7f, 0x01a7f,), # Tai Tham Combining Cryptogrammic Dot (0x01ab0, 0x01abe,), # Combining Doubled Circum..Combining Parentheses Ov - (0x01b00, 0x01b03,), # Balinese Sign Ulu Ricem ..Balinese Sign Surang - (0x01b34, 0x01b34,), # Balinese Sign Rerekan - (0x01b36, 0x01b3a,), # Balinese Vowel Sign Ulu ..Balinese Vowel Sign Ra R - (0x01b3c, 0x01b3c,), # Balinese Vowel Sign La Lenga - (0x01b42, 0x01b42,), # Balinese Vowel Sign Pepet + (0x01b00, 0x01b04,), # Balinese Sign Ulu Ricem ..Balinese Sign Bisah + (0x01b34, 0x01b44,), # Balinese Sign Rerekan ..Balinese Adeg Adeg (0x01b6b, 0x01b73,), # Balinese Musical Symbol ..Balinese Musical Symbol - (0x01b80, 0x01b81,), # Sundanese Sign Panyecek ..Sundanese Sign Panglayar - (0x01ba2, 0x01ba5,), # Sundanese Consonant Sign..Sundanese Vowel Sign Pan - (0x01ba8, 0x01ba9,), # Sundanese Vowel Sign Pam..Sundanese Vowel Sign Pan - (0x01bab, 0x01bad,), # Sundanese Sign Virama ..Sundanese Consonant Sign - (0x01be6, 0x01be6,), # Batak Sign Tompi - (0x01be8, 0x01be9,), # Batak Vowel Sign Pakpak ..Batak Vowel Sign Ee - (0x01bed, 0x01bed,), # Batak Vowel Sign Karo O - (0x01bef, 0x01bf1,), # Batak Vowel Sign U For S..Batak Consonant Sign H - (0x01c2c, 0x01c33,), # Lepcha Vowel Sign E ..Lepcha Consonant Sign T - (0x01c36, 0x01c37,), # Lepcha Sign Ran ..Lepcha Sign Nukta + (0x01b80, 0x01b82,), # Sundanese Sign Panyecek ..Sundanese Sign Pangwisad + (0x01ba1, 0x01bad,), # Sundanese Consonant Sign..Sundanese Consonant Sign + (0x01be6, 0x01bf3,), # Batak Sign Tompi ..Batak Panongonan + (0x01c24, 0x01c37,), # Lepcha Subjoined Letter ..Lepcha Sign Nukta (0x01cd0, 0x01cd2,), # Vedic Tone Karshana ..Vedic Tone Prenkha - (0x01cd4, 0x01ce0,), # Vedic Sign Yajurvedic Mi..Vedic Tone Rigvedic Kash - (0x01ce2, 0x01ce8,), # Vedic Sign Visarga Svari..Vedic Sign Visarga Anuda + (0x01cd4, 0x01ce8,), # Vedic Sign Yajurvedic Mi..Vedic Sign Visarga Anuda (0x01ced, 0x01ced,), # Vedic Sign Tiryak - (0x01cf4, 0x01cf4,), # Vedic Tone Candra Above + (0x01cf2, 0x01cf4,), # Vedic Sign Ardhavisarga ..Vedic Tone Candra Above (0x01cf8, 0x01cf9,), # Vedic Tone Ring Above ..Vedic Tone Double Ring A (0x01dc0, 0x01df5,), # Combining Dotted Grave A..Combining Up Tack Above (0x01dfc, 0x01dff,), # Combining Double Inverte..Combining Right Arrowhea + (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark + (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override + (0x02060, 0x02064,), # Word Joiner ..Invisible Plus + (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above (0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu (0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette - (0x0302a, 0x0302d,), # Ideographic Level Tone M..Ideographic Entering Ton + (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag (0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous (0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer @@ -1677,35 +1729,33 @@ ZERO_WIDTH = { (0x0a802, 0x0a802,), # Syloti Nagri Sign Dvisvara (0x0a806, 0x0a806,), # Syloti Nagri Sign Hasanta (0x0a80b, 0x0a80b,), # Syloti Nagri Sign Anusvara - (0x0a825, 0x0a826,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign - (0x0a8c4, 0x0a8c4,), # Saurashtra Sign Virama + (0x0a823, 0x0a827,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign + (0x0a880, 0x0a881,), # Saurashtra Sign Anusvara..Saurashtra Sign Visarga + (0x0a8b4, 0x0a8c4,), # Saurashtra Consonant Sig..Saurashtra Sign Virama (0x0a8e0, 0x0a8f1,), # Combining Devanagari Dig..Combining Devanagari Sig (0x0a926, 0x0a92d,), # Kayah Li Vowel Ue ..Kayah Li Tone Calya Plop - (0x0a947, 0x0a951,), # Rejang Vowel Sign I ..Rejang Consonant Sign R - (0x0a980, 0x0a982,), # Javanese Sign Panyangga ..Javanese Sign Layar - (0x0a9b3, 0x0a9b3,), # Javanese Sign Cecak Telu - (0x0a9b6, 0x0a9b9,), # Javanese Vowel Sign Wulu..Javanese Vowel Sign Suku - (0x0a9bc, 0x0a9bc,), # Javanese Vowel Sign Pepet + (0x0a947, 0x0a953,), # Rejang Vowel Sign I ..Rejang Virama + (0x0a980, 0x0a983,), # Javanese Sign Panyangga ..Javanese Sign Wignyan + (0x0a9b3, 0x0a9c0,), # Javanese Sign Cecak Telu..Javanese Pangkon (0x0a9e5, 0x0a9e5,), # Myanmar Sign Shan Saw - (0x0aa29, 0x0aa2e,), # Cham Vowel Sign Aa ..Cham Vowel Sign Oe - (0x0aa31, 0x0aa32,), # Cham Vowel Sign Au ..Cham Vowel Sign Ue - (0x0aa35, 0x0aa36,), # Cham Consonant Sign La ..Cham Consonant Sign Wa + (0x0aa29, 0x0aa36,), # Cham Vowel Sign Aa ..Cham Consonant Sign Wa (0x0aa43, 0x0aa43,), # Cham Consonant Sign Final Ng - (0x0aa4c, 0x0aa4c,), # Cham Consonant Sign Final M - (0x0aa7c, 0x0aa7c,), # Myanmar Sign Tai Laing Tone-2 + (0x0aa4c, 0x0aa4d,), # Cham Consonant Sign Fina..Cham Consonant Sign Fina + (0x0aa7b, 0x0aa7d,), # Myanmar Sign Pao Karen T..Myanmar Sign Tai Laing T (0x0aab0, 0x0aab0,), # Tai Viet Mai Kang (0x0aab2, 0x0aab4,), # Tai Viet Vowel I ..Tai Viet Vowel U (0x0aab7, 0x0aab8,), # Tai Viet Mai Khit ..Tai Viet Vowel Ia (0x0aabe, 0x0aabf,), # Tai Viet Vowel Am ..Tai Viet Tone Mai Ek (0x0aac1, 0x0aac1,), # Tai Viet Tone Mai Tho - (0x0aaec, 0x0aaed,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign - (0x0aaf6, 0x0aaf6,), # Meetei Mayek Virama - (0x0abe5, 0x0abe5,), # Meetei Mayek Vowel Sign Anap - (0x0abe8, 0x0abe8,), # Meetei Mayek Vowel Sign Unap - (0x0abed, 0x0abed,), # Meetei Mayek Apun Iyek + (0x0aaeb, 0x0aaef,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0x0aaf5, 0x0aaf6,), # Meetei Mayek Vowel Sign ..Meetei Mayek Virama + (0x0abe3, 0x0abea,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0x0abec, 0x0abed,), # Meetei Mayek Lum Iyek ..Meetei Mayek Apun Iyek (0x0fb1e, 0x0fb1e,), # Hebrew Point Judeo-spanish Varika (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe2d,), # Combining Ligature Left ..Combining Conjoining Mac + (0x0feff, 0x0feff,), # Zero Width No-break Space + (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke (0x102e0, 0x102e0,), # Coptic Epact Thousands Mark (0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let @@ -1715,57 +1765,54 @@ ZERO_WIDTH = { (0x10a38, 0x10a3a,), # Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo (0x10a3f, 0x10a3f,), # Kharoshthi Virama (0x10ae5, 0x10ae6,), # Manichaean Abbreviation ..Manichaean Abbreviation - (0x11001, 0x11001,), # Brahmi Sign Anusvara + (0x11000, 0x11002,), # Brahmi Sign Candrabindu ..Brahmi Sign Visarga (0x11038, 0x11046,), # Brahmi Vowel Sign Aa ..Brahmi Virama - (0x1107f, 0x11081,), # Brahmi Number Joiner ..Kaithi Sign Anusvara - (0x110b3, 0x110b6,), # Kaithi Vowel Sign U ..Kaithi Vowel Sign Ai - (0x110b9, 0x110ba,), # Kaithi Sign Virama ..Kaithi Sign Nukta + (0x1107f, 0x11082,), # Brahmi Number Joiner ..Kaithi Sign Visarga + (0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta + (0x110bd, 0x110bd,), # Kaithi Number Sign (0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga - (0x11127, 0x1112b,), # Chakma Vowel Sign A ..Chakma Vowel Sign Uu - (0x1112d, 0x11134,), # Chakma Vowel Sign Ai ..Chakma Maayyaa + (0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa (0x11173, 0x11173,), # Mahajani Sign Nukta - (0x11180, 0x11181,), # Sharada Sign Candrabindu..Sharada Sign Anusvara - (0x111b6, 0x111be,), # Sharada Vowel Sign U ..Sharada Vowel Sign O - (0x1122f, 0x11231,), # Khojki Vowel Sign U ..Khojki Vowel Sign Ai - (0x11234, 0x11234,), # Khojki Sign Anusvara - (0x11236, 0x11237,), # Khojki Sign Nukta ..Khojki Sign Shadda - (0x112df, 0x112df,), # Khudawadi Sign Anusvara - (0x112e3, 0x112ea,), # Khudawadi Vowel Sign U ..Khudawadi Sign Virama - (0x11301, 0x11301,), # Grantha Sign Candrabindu + (0x11180, 0x11182,), # Sharada Sign Candrabindu..Sharada Sign Visarga + (0x111b3, 0x111c0,), # Sharada Vowel Sign Aa ..Sharada Sign Virama + (0x1122c, 0x11237,), # Khojki Vowel Sign Aa ..Khojki Sign Shadda + (0x112df, 0x112ea,), # Khudawadi Sign Anusvara ..Khudawadi Sign Virama + (0x11301, 0x11303,), # Grantha Sign Candrabindu..Grantha Sign Visarga (0x1133c, 0x1133c,), # Grantha Sign Nukta - (0x11340, 0x11340,), # Grantha Vowel Sign Ii + (0x1133e, 0x11344,), # Grantha Vowel Sign Aa ..Grantha Vowel Sign Vocal + (0x11347, 0x11348,), # Grantha Vowel Sign Ee ..Grantha Vowel Sign Ai + (0x1134b, 0x1134d,), # Grantha Vowel Sign Oo ..Grantha Sign Virama + (0x11357, 0x11357,), # Grantha Au Length Mark + (0x11362, 0x11363,), # Grantha Vowel Sign Vocal..Grantha Vowel Sign Vocal (0x11366, 0x1136c,), # Combining Grantha Digit ..Combining Grantha Digit (0x11370, 0x11374,), # Combining Grantha Letter..Combining Grantha Letter - (0x114b3, 0x114b8,), # Tirhuta Vowel Sign U ..Tirhuta Vowel Sign Vocal - (0x114ba, 0x114ba,), # Tirhuta Vowel Sign Short E - (0x114bf, 0x114c0,), # Tirhuta Sign Candrabindu..Tirhuta Sign Anusvara - (0x114c2, 0x114c3,), # Tirhuta Sign Virama ..Tirhuta Sign Nukta - (0x115b2, 0x115b5,), # Siddham Vowel Sign U ..Siddham Vowel Sign Vocal - (0x115bc, 0x115bd,), # Siddham Sign Candrabindu..Siddham Sign Anusvara - (0x115bf, 0x115c0,), # Siddham Sign Virama ..Siddham Sign Nukta - (0x11633, 0x1163a,), # Modi Vowel Sign U ..Modi Vowel Sign Ai - (0x1163d, 0x1163d,), # Modi Sign Anusvara - (0x1163f, 0x11640,), # Modi Sign Virama ..Modi Sign Ardhacandra - (0x116ab, 0x116ab,), # Takri Sign Anusvara - (0x116ad, 0x116ad,), # Takri Vowel Sign Aa - (0x116b0, 0x116b5,), # Takri Vowel Sign U ..Takri Vowel Sign Au - (0x116b7, 0x116b7,), # Takri Sign Nukta + (0x114b0, 0x114c3,), # Tirhuta Vowel Sign Aa ..Tirhuta Sign Nukta + (0x115af, 0x115b5,), # Siddham Vowel Sign Aa ..Siddham Vowel Sign Vocal + (0x115b8, 0x115c0,), # Siddham Vowel Sign E ..Siddham Sign Nukta + (0x11630, 0x11640,), # Modi Vowel Sign Aa ..Modi Sign Ardhacandra + (0x116ab, 0x116b7,), # Takri Sign Anusvara ..Takri Sign Nukta (0x16af0, 0x16af4,), # Bassa Vah Combining High..Bassa Vah Combining High (0x16b30, 0x16b36,), # Pahawh Hmong Mark Cim Tu..Pahawh Hmong Mark Cim Ta + (0x16f51, 0x16f7e,), # Miao Sign Aspiration ..Miao Vowel Sign Ng (0x16f8f, 0x16f92,), # Miao Tone Right ..Miao Tone Below (0x1bc9d, 0x1bc9e,), # Duployan Thick Letter Se..Duployan Double Mark - (0x1d167, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining - (0x1d17b, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining + (0x1bca0, 0x1bca3,), # Shorthand Format Letter ..Shorthand Format Up Step + (0x1d165, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d16d, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical (0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining + (0xe0001, 0xe0001,), # Language Tag + (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 ), '8.0.0': ( # Source: DerivedGeneralCategory-8.0.0.txt # Date: 2015-02-13, 13:47:11 GMT [MD] # + (0x00000, 0x00000,), # (nil) + (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg @@ -1773,13 +1820,16 @@ ZERO_WIDTH = { (0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot (0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot (0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan + (0x00600, 0x00605,), # Arabic Number Sign ..Arabic Number Mark Above (0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra + (0x0061c, 0x0061c,), # Arabic Letter Mark (0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below (0x00670, 0x00670,), # Arabic Letter Superscript Alef - (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen + (0x006d6, 0x006dd,), # Arabic Small High Ligatu..Arabic End Of Ayah (0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda (0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon (0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem + (0x0070f, 0x0070f,), # Syriac Abbreviation Mark (0x00711, 0x00711,), # Syriac Letter Superscript Alaph (0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh (0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun @@ -1789,61 +1839,69 @@ ZERO_WIDTH = { (0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U (0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa (0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark - (0x008e3, 0x00902,), # Arabic Turned Damma Belo..Devanagari Sign Anusvara - (0x0093a, 0x0093a,), # Devanagari Vowel Sign Oe - (0x0093c, 0x0093c,), # Devanagari Sign Nukta - (0x00941, 0x00948,), # Devanagari Vowel Sign U ..Devanagari Vowel Sign Ai - (0x0094d, 0x0094d,), # Devanagari Sign Virama + (0x008e3, 0x00903,), # Arabic Turned Damma Belo..Devanagari Sign Visarga + (0x0093a, 0x0093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta + (0x0093e, 0x0094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw (0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu (0x00962, 0x00963,), # Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo - (0x00981, 0x00981,), # Bengali Sign Candrabindu + (0x00981, 0x00983,), # Bengali Sign Candrabindu..Bengali Sign Visarga (0x009bc, 0x009bc,), # Bengali Sign Nukta - (0x009c1, 0x009c4,), # Bengali Vowel Sign U ..Bengali Vowel Sign Vocal - (0x009cd, 0x009cd,), # Bengali Sign Virama + (0x009be, 0x009c4,), # Bengali Vowel Sign Aa ..Bengali Vowel Sign Vocal + (0x009c7, 0x009c8,), # Bengali Vowel Sign E ..Bengali Vowel Sign Ai + (0x009cb, 0x009cd,), # Bengali Vowel Sign O ..Bengali Sign Virama + (0x009d7, 0x009d7,), # Bengali Au Length Mark (0x009e2, 0x009e3,), # Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal - (0x00a01, 0x00a02,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Bindi + (0x00a01, 0x00a03,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Visarga (0x00a3c, 0x00a3c,), # Gurmukhi Sign Nukta - (0x00a41, 0x00a42,), # Gurmukhi Vowel Sign U ..Gurmukhi Vowel Sign Uu + (0x00a3e, 0x00a42,), # Gurmukhi Vowel Sign Aa ..Gurmukhi Vowel Sign Uu (0x00a47, 0x00a48,), # Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai (0x00a4b, 0x00a4d,), # Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama (0x00a51, 0x00a51,), # Gurmukhi Sign Udaat (0x00a70, 0x00a71,), # Gurmukhi Tippi ..Gurmukhi Addak (0x00a75, 0x00a75,), # Gurmukhi Sign Yakash - (0x00a81, 0x00a82,), # Gujarati Sign Candrabind..Gujarati Sign Anusvara + (0x00a81, 0x00a83,), # Gujarati Sign Candrabind..Gujarati Sign Visarga (0x00abc, 0x00abc,), # Gujarati Sign Nukta - (0x00ac1, 0x00ac5,), # Gujarati Vowel Sign U ..Gujarati Vowel Sign Cand - (0x00ac7, 0x00ac8,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Ai - (0x00acd, 0x00acd,), # Gujarati Sign Virama + (0x00abe, 0x00ac5,), # Gujarati Vowel Sign Aa ..Gujarati Vowel Sign Cand + (0x00ac7, 0x00ac9,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Cand + (0x00acb, 0x00acd,), # Gujarati Vowel Sign O ..Gujarati Sign Virama (0x00ae2, 0x00ae3,), # Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca - (0x00b01, 0x00b01,), # Oriya Sign Candrabindu + (0x00b01, 0x00b03,), # Oriya Sign Candrabindu ..Oriya Sign Visarga (0x00b3c, 0x00b3c,), # Oriya Sign Nukta - (0x00b3f, 0x00b3f,), # Oriya Vowel Sign I - (0x00b41, 0x00b44,), # Oriya Vowel Sign U ..Oriya Vowel Sign Vocalic - (0x00b4d, 0x00b4d,), # Oriya Sign Virama - (0x00b56, 0x00b56,), # Oriya Ai Length Mark + (0x00b3e, 0x00b44,), # Oriya Vowel Sign Aa ..Oriya Vowel Sign Vocalic + (0x00b47, 0x00b48,), # Oriya Vowel Sign E ..Oriya Vowel Sign Ai + (0x00b4b, 0x00b4d,), # Oriya Vowel Sign O ..Oriya Sign Virama + (0x00b56, 0x00b57,), # Oriya Ai Length Mark ..Oriya Au Length Mark (0x00b62, 0x00b63,), # Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic (0x00b82, 0x00b82,), # Tamil Sign Anusvara - (0x00bc0, 0x00bc0,), # Tamil Vowel Sign Ii - (0x00bcd, 0x00bcd,), # Tamil Sign Virama - (0x00c00, 0x00c00,), # Telugu Sign Combining Candrabindu Above - (0x00c3e, 0x00c40,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Ii + (0x00bbe, 0x00bc2,), # Tamil Vowel Sign Aa ..Tamil Vowel Sign Uu + (0x00bc6, 0x00bc8,), # Tamil Vowel Sign E ..Tamil Vowel Sign Ai + (0x00bca, 0x00bcd,), # Tamil Vowel Sign O ..Tamil Sign Virama + (0x00bd7, 0x00bd7,), # Tamil Au Length Mark + (0x00c00, 0x00c03,), # Telugu Sign Combining Ca..Telugu Sign Visarga + (0x00c3e, 0x00c44,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Vocali (0x00c46, 0x00c48,), # Telugu Vowel Sign E ..Telugu Vowel Sign Ai (0x00c4a, 0x00c4d,), # Telugu Vowel Sign O ..Telugu Sign Virama (0x00c55, 0x00c56,), # Telugu Length Mark ..Telugu Ai Length Mark (0x00c62, 0x00c63,), # Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali - (0x00c81, 0x00c81,), # Kannada Sign Candrabindu + (0x00c81, 0x00c83,), # Kannada Sign Candrabindu..Kannada Sign Visarga (0x00cbc, 0x00cbc,), # Kannada Sign Nukta - (0x00cbf, 0x00cbf,), # Kannada Vowel Sign I - (0x00cc6, 0x00cc6,), # Kannada Vowel Sign E - (0x00ccc, 0x00ccd,), # Kannada Vowel Sign Au ..Kannada Sign Virama + (0x00cbe, 0x00cc4,), # Kannada Vowel Sign Aa ..Kannada Vowel Sign Vocal + (0x00cc6, 0x00cc8,), # Kannada Vowel Sign E ..Kannada Vowel Sign Ai + (0x00cca, 0x00ccd,), # Kannada Vowel Sign O ..Kannada Sign Virama + (0x00cd5, 0x00cd6,), # Kannada Length Mark ..Kannada Ai Length Mark (0x00ce2, 0x00ce3,), # Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal - (0x00d01, 0x00d01,), # Malayalam Sign Candrabindu - (0x00d41, 0x00d44,), # Malayalam Vowel Sign U ..Malayalam Vowel Sign Voc - (0x00d4d, 0x00d4d,), # Malayalam Sign Virama + (0x00d01, 0x00d03,), # Malayalam Sign Candrabin..Malayalam Sign Visarga + (0x00d3e, 0x00d44,), # Malayalam Vowel Sign Aa ..Malayalam Vowel Sign Voc + (0x00d46, 0x00d48,), # Malayalam Vowel Sign E ..Malayalam Vowel Sign Ai + (0x00d4a, 0x00d4d,), # Malayalam Vowel Sign O ..Malayalam Sign Virama + (0x00d57, 0x00d57,), # Malayalam Au Length Mark (0x00d62, 0x00d63,), # Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc + (0x00d82, 0x00d83,), # Sinhala Sign Anusvaraya ..Sinhala Sign Visargaya (0x00dca, 0x00dca,), # Sinhala Sign Al-lakuna - (0x00dd2, 0x00dd4,), # Sinhala Vowel Sign Ketti..Sinhala Vowel Sign Ketti + (0x00dcf, 0x00dd4,), # Sinhala Vowel Sign Aela-..Sinhala Vowel Sign Ketti (0x00dd6, 0x00dd6,), # Sinhala Vowel Sign Diga Paa-pilla + (0x00dd8, 0x00ddf,), # Sinhala Vowel Sign Gaett..Sinhala Vowel Sign Gayan + (0x00df2, 0x00df3,), # Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga (0x00e31, 0x00e31,), # Thai Character Mai Han-akat (0x00e34, 0x00e3a,), # Thai Character Sara I ..Thai Character Phinthu (0x00e47, 0x00e4e,), # Thai Character Maitaikhu..Thai Character Yamakkan @@ -1855,78 +1913,60 @@ ZERO_WIDTH = { (0x00f35, 0x00f35,), # Tibetan Mark Ngas Bzung Nyi Zla (0x00f37, 0x00f37,), # Tibetan Mark Ngas Bzung Sgor Rtags (0x00f39, 0x00f39,), # Tibetan Mark Tsa -phru - (0x00f71, 0x00f7e,), # Tibetan Vowel Sign Aa ..Tibetan Sign Rjes Su Nga - (0x00f80, 0x00f84,), # Tibetan Vowel Sign Rever..Tibetan Mark Halanta + (0x00f3e, 0x00f3f,), # Tibetan Sign Yar Tshes ..Tibetan Sign Mar Tshes + (0x00f71, 0x00f84,), # Tibetan Vowel Sign Aa ..Tibetan Mark Halanta (0x00f86, 0x00f87,), # Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags (0x00f8d, 0x00f97,), # Tibetan Subjoined Sign L..Tibetan Subjoined Letter (0x00f99, 0x00fbc,), # Tibetan Subjoined Letter..Tibetan Subjoined Letter (0x00fc6, 0x00fc6,), # Tibetan Symbol Padma Gdan - (0x0102d, 0x01030,), # Myanmar Vowel Sign I ..Myanmar Vowel Sign Uu - (0x01032, 0x01037,), # Myanmar Vowel Sign Ai ..Myanmar Sign Dot Below - (0x01039, 0x0103a,), # Myanmar Sign Virama ..Myanmar Sign Asat - (0x0103d, 0x0103e,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M - (0x01058, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal + (0x0102b, 0x0103e,), # Myanmar Vowel Sign Tall ..Myanmar Consonant Sign M + (0x01056, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal (0x0105e, 0x01060,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M + (0x01062, 0x01064,), # Myanmar Vowel Sign Sgaw ..Myanmar Tone Mark Sgaw K + (0x01067, 0x0106d,), # Myanmar Vowel Sign Weste..Myanmar Sign Western Pwo (0x01071, 0x01074,), # Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah - (0x01082, 0x01082,), # Myanmar Consonant Sign Shan Medial Wa - (0x01085, 0x01086,), # Myanmar Vowel Sign Shan ..Myanmar Vowel Sign Shan - (0x0108d, 0x0108d,), # Myanmar Sign Shan Council Emphatic Tone - (0x0109d, 0x0109d,), # Myanmar Vowel Sign Aiton Ai + (0x01082, 0x0108d,), # Myanmar Consonant Sign S..Myanmar Sign Shan Counci + (0x0108f, 0x0108f,), # Myanmar Sign Rumai Palaung Tone-5 + (0x0109a, 0x0109d,), # Myanmar Sign Khamti Tone..Myanmar Vowel Sign Aiton (0x0135d, 0x0135f,), # Ethiopic Combining Gemin..Ethiopic Combining Gemin (0x01712, 0x01714,), # Tagalog Vowel Sign I ..Tagalog Sign Virama (0x01732, 0x01734,), # Hanunoo Vowel Sign I ..Hanunoo Sign Pamudpod (0x01752, 0x01753,), # Buhid Vowel Sign I ..Buhid Vowel Sign U (0x01772, 0x01773,), # Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U - (0x017b4, 0x017b5,), # Khmer Vowel Inherent Aq ..Khmer Vowel Inherent Aa - (0x017b7, 0x017bd,), # Khmer Vowel Sign I ..Khmer Vowel Sign Ua - (0x017c6, 0x017c6,), # Khmer Sign Nikahit - (0x017c9, 0x017d3,), # Khmer Sign Muusikatoan ..Khmer Sign Bathamasat + (0x017b4, 0x017d3,), # Khmer Vowel Inherent Aq ..Khmer Sign Bathamasat (0x017dd, 0x017dd,), # Khmer Sign Atthacan - (0x0180b, 0x0180d,), # Mongolian Free Variation..Mongolian Free Variation + (0x0180b, 0x0180e,), # Mongolian Free Variation..Mongolian Vowel Separato (0x018a9, 0x018a9,), # Mongolian Letter Ali Gali Dagalga - (0x01920, 0x01922,), # Limbu Vowel Sign A ..Limbu Vowel Sign U - (0x01927, 0x01928,), # Limbu Vowel Sign E ..Limbu Vowel Sign O - (0x01932, 0x01932,), # Limbu Small Letter Anusvara - (0x01939, 0x0193b,), # Limbu Sign Mukphreng ..Limbu Sign Sa-i - (0x01a17, 0x01a18,), # Buginese Vowel Sign I ..Buginese Vowel Sign U - (0x01a1b, 0x01a1b,), # Buginese Vowel Sign Ae - (0x01a56, 0x01a56,), # Tai Tham Consonant Sign Medial La - (0x01a58, 0x01a5e,), # Tai Tham Sign Mai Kang L..Tai Tham Consonant Sign - (0x01a60, 0x01a60,), # Tai Tham Sign Sakot - (0x01a62, 0x01a62,), # Tai Tham Vowel Sign Mai Sat - (0x01a65, 0x01a6c,), # Tai Tham Vowel Sign I ..Tai Tham Vowel Sign Oa B - (0x01a73, 0x01a7c,), # Tai Tham Vowel Sign Oa A..Tai Tham Sign Khuen-lue + (0x01920, 0x0192b,), # Limbu Vowel Sign A ..Limbu Subjoined Letter W + (0x01930, 0x0193b,), # Limbu Small Letter Ka ..Limbu Sign Sa-i + (0x01a17, 0x01a1b,), # Buginese Vowel Sign I ..Buginese Vowel Sign Ae + (0x01a55, 0x01a5e,), # Tai Tham Consonant Sign ..Tai Tham Consonant Sign + (0x01a60, 0x01a7c,), # Tai Tham Sign Sakot ..Tai Tham Sign Khuen-lue (0x01a7f, 0x01a7f,), # Tai Tham Combining Cryptogrammic Dot (0x01ab0, 0x01abe,), # Combining Doubled Circum..Combining Parentheses Ov - (0x01b00, 0x01b03,), # Balinese Sign Ulu Ricem ..Balinese Sign Surang - (0x01b34, 0x01b34,), # Balinese Sign Rerekan - (0x01b36, 0x01b3a,), # Balinese Vowel Sign Ulu ..Balinese Vowel Sign Ra R - (0x01b3c, 0x01b3c,), # Balinese Vowel Sign La Lenga - (0x01b42, 0x01b42,), # Balinese Vowel Sign Pepet + (0x01b00, 0x01b04,), # Balinese Sign Ulu Ricem ..Balinese Sign Bisah + (0x01b34, 0x01b44,), # Balinese Sign Rerekan ..Balinese Adeg Adeg (0x01b6b, 0x01b73,), # Balinese Musical Symbol ..Balinese Musical Symbol - (0x01b80, 0x01b81,), # Sundanese Sign Panyecek ..Sundanese Sign Panglayar - (0x01ba2, 0x01ba5,), # Sundanese Consonant Sign..Sundanese Vowel Sign Pan - (0x01ba8, 0x01ba9,), # Sundanese Vowel Sign Pam..Sundanese Vowel Sign Pan - (0x01bab, 0x01bad,), # Sundanese Sign Virama ..Sundanese Consonant Sign - (0x01be6, 0x01be6,), # Batak Sign Tompi - (0x01be8, 0x01be9,), # Batak Vowel Sign Pakpak ..Batak Vowel Sign Ee - (0x01bed, 0x01bed,), # Batak Vowel Sign Karo O - (0x01bef, 0x01bf1,), # Batak Vowel Sign U For S..Batak Consonant Sign H - (0x01c2c, 0x01c33,), # Lepcha Vowel Sign E ..Lepcha Consonant Sign T - (0x01c36, 0x01c37,), # Lepcha Sign Ran ..Lepcha Sign Nukta + (0x01b80, 0x01b82,), # Sundanese Sign Panyecek ..Sundanese Sign Pangwisad + (0x01ba1, 0x01bad,), # Sundanese Consonant Sign..Sundanese Consonant Sign + (0x01be6, 0x01bf3,), # Batak Sign Tompi ..Batak Panongonan + (0x01c24, 0x01c37,), # Lepcha Subjoined Letter ..Lepcha Sign Nukta (0x01cd0, 0x01cd2,), # Vedic Tone Karshana ..Vedic Tone Prenkha - (0x01cd4, 0x01ce0,), # Vedic Sign Yajurvedic Mi..Vedic Tone Rigvedic Kash - (0x01ce2, 0x01ce8,), # Vedic Sign Visarga Svari..Vedic Sign Visarga Anuda + (0x01cd4, 0x01ce8,), # Vedic Sign Yajurvedic Mi..Vedic Sign Visarga Anuda (0x01ced, 0x01ced,), # Vedic Sign Tiryak - (0x01cf4, 0x01cf4,), # Vedic Tone Candra Above + (0x01cf2, 0x01cf4,), # Vedic Sign Ardhavisarga ..Vedic Tone Candra Above (0x01cf8, 0x01cf9,), # Vedic Tone Ring Above ..Vedic Tone Double Ring A (0x01dc0, 0x01df5,), # Combining Dotted Grave A..Combining Up Tack Above (0x01dfc, 0x01dff,), # Combining Double Inverte..Combining Right Arrowhea + (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark + (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override + (0x02060, 0x02064,), # Word Joiner ..Invisible Plus + (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above (0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu (0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette - (0x0302a, 0x0302d,), # Ideographic Level Tone M..Ideographic Entering Ton + (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag (0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous (0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer @@ -1935,35 +1975,33 @@ ZERO_WIDTH = { (0x0a802, 0x0a802,), # Syloti Nagri Sign Dvisvara (0x0a806, 0x0a806,), # Syloti Nagri Sign Hasanta (0x0a80b, 0x0a80b,), # Syloti Nagri Sign Anusvara - (0x0a825, 0x0a826,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign - (0x0a8c4, 0x0a8c4,), # Saurashtra Sign Virama + (0x0a823, 0x0a827,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign + (0x0a880, 0x0a881,), # Saurashtra Sign Anusvara..Saurashtra Sign Visarga + (0x0a8b4, 0x0a8c4,), # Saurashtra Consonant Sig..Saurashtra Sign Virama (0x0a8e0, 0x0a8f1,), # Combining Devanagari Dig..Combining Devanagari Sig (0x0a926, 0x0a92d,), # Kayah Li Vowel Ue ..Kayah Li Tone Calya Plop - (0x0a947, 0x0a951,), # Rejang Vowel Sign I ..Rejang Consonant Sign R - (0x0a980, 0x0a982,), # Javanese Sign Panyangga ..Javanese Sign Layar - (0x0a9b3, 0x0a9b3,), # Javanese Sign Cecak Telu - (0x0a9b6, 0x0a9b9,), # Javanese Vowel Sign Wulu..Javanese Vowel Sign Suku - (0x0a9bc, 0x0a9bc,), # Javanese Vowel Sign Pepet + (0x0a947, 0x0a953,), # Rejang Vowel Sign I ..Rejang Virama + (0x0a980, 0x0a983,), # Javanese Sign Panyangga ..Javanese Sign Wignyan + (0x0a9b3, 0x0a9c0,), # Javanese Sign Cecak Telu..Javanese Pangkon (0x0a9e5, 0x0a9e5,), # Myanmar Sign Shan Saw - (0x0aa29, 0x0aa2e,), # Cham Vowel Sign Aa ..Cham Vowel Sign Oe - (0x0aa31, 0x0aa32,), # Cham Vowel Sign Au ..Cham Vowel Sign Ue - (0x0aa35, 0x0aa36,), # Cham Consonant Sign La ..Cham Consonant Sign Wa + (0x0aa29, 0x0aa36,), # Cham Vowel Sign Aa ..Cham Consonant Sign Wa (0x0aa43, 0x0aa43,), # Cham Consonant Sign Final Ng - (0x0aa4c, 0x0aa4c,), # Cham Consonant Sign Final M - (0x0aa7c, 0x0aa7c,), # Myanmar Sign Tai Laing Tone-2 + (0x0aa4c, 0x0aa4d,), # Cham Consonant Sign Fina..Cham Consonant Sign Fina + (0x0aa7b, 0x0aa7d,), # Myanmar Sign Pao Karen T..Myanmar Sign Tai Laing T (0x0aab0, 0x0aab0,), # Tai Viet Mai Kang (0x0aab2, 0x0aab4,), # Tai Viet Vowel I ..Tai Viet Vowel U (0x0aab7, 0x0aab8,), # Tai Viet Mai Khit ..Tai Viet Vowel Ia (0x0aabe, 0x0aabf,), # Tai Viet Vowel Am ..Tai Viet Tone Mai Ek (0x0aac1, 0x0aac1,), # Tai Viet Tone Mai Tho - (0x0aaec, 0x0aaed,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign - (0x0aaf6, 0x0aaf6,), # Meetei Mayek Virama - (0x0abe5, 0x0abe5,), # Meetei Mayek Vowel Sign Anap - (0x0abe8, 0x0abe8,), # Meetei Mayek Vowel Sign Unap - (0x0abed, 0x0abed,), # Meetei Mayek Apun Iyek + (0x0aaeb, 0x0aaef,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0x0aaf5, 0x0aaf6,), # Meetei Mayek Vowel Sign ..Meetei Mayek Virama + (0x0abe3, 0x0abea,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0x0abec, 0x0abed,), # Meetei Mayek Lum Iyek ..Meetei Mayek Apun Iyek (0x0fb1e, 0x0fb1e,), # Hebrew Point Judeo-spanish Varika (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo + (0x0feff, 0x0feff,), # Zero Width No-break Space + (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke (0x102e0, 0x102e0,), # Coptic Epact Thousands Mark (0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let @@ -1973,52 +2011,43 @@ ZERO_WIDTH = { (0x10a38, 0x10a3a,), # Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo (0x10a3f, 0x10a3f,), # Kharoshthi Virama (0x10ae5, 0x10ae6,), # Manichaean Abbreviation ..Manichaean Abbreviation - (0x11001, 0x11001,), # Brahmi Sign Anusvara + (0x11000, 0x11002,), # Brahmi Sign Candrabindu ..Brahmi Sign Visarga (0x11038, 0x11046,), # Brahmi Vowel Sign Aa ..Brahmi Virama - (0x1107f, 0x11081,), # Brahmi Number Joiner ..Kaithi Sign Anusvara - (0x110b3, 0x110b6,), # Kaithi Vowel Sign U ..Kaithi Vowel Sign Ai - (0x110b9, 0x110ba,), # Kaithi Sign Virama ..Kaithi Sign Nukta + (0x1107f, 0x11082,), # Brahmi Number Joiner ..Kaithi Sign Visarga + (0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta + (0x110bd, 0x110bd,), # Kaithi Number Sign (0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga - (0x11127, 0x1112b,), # Chakma Vowel Sign A ..Chakma Vowel Sign Uu - (0x1112d, 0x11134,), # Chakma Vowel Sign Ai ..Chakma Maayyaa + (0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa (0x11173, 0x11173,), # Mahajani Sign Nukta - (0x11180, 0x11181,), # Sharada Sign Candrabindu..Sharada Sign Anusvara - (0x111b6, 0x111be,), # Sharada Vowel Sign U ..Sharada Vowel Sign O + (0x11180, 0x11182,), # Sharada Sign Candrabindu..Sharada Sign Visarga + (0x111b3, 0x111c0,), # Sharada Vowel Sign Aa ..Sharada Sign Virama (0x111ca, 0x111cc,), # Sharada Sign Nukta ..Sharada Extra Short Vowe - (0x1122f, 0x11231,), # Khojki Vowel Sign U ..Khojki Vowel Sign Ai - (0x11234, 0x11234,), # Khojki Sign Anusvara - (0x11236, 0x11237,), # Khojki Sign Nukta ..Khojki Sign Shadda - (0x112df, 0x112df,), # Khudawadi Sign Anusvara - (0x112e3, 0x112ea,), # Khudawadi Vowel Sign U ..Khudawadi Sign Virama - (0x11300, 0x11301,), # Grantha Sign Combining A..Grantha Sign Candrabindu + (0x1122c, 0x11237,), # Khojki Vowel Sign Aa ..Khojki Sign Shadda + (0x112df, 0x112ea,), # Khudawadi Sign Anusvara ..Khudawadi Sign Virama + (0x11300, 0x11303,), # Grantha Sign Combining A..Grantha Sign Visarga (0x1133c, 0x1133c,), # Grantha Sign Nukta - (0x11340, 0x11340,), # Grantha Vowel Sign Ii + (0x1133e, 0x11344,), # Grantha Vowel Sign Aa ..Grantha Vowel Sign Vocal + (0x11347, 0x11348,), # Grantha Vowel Sign Ee ..Grantha Vowel Sign Ai + (0x1134b, 0x1134d,), # Grantha Vowel Sign Oo ..Grantha Sign Virama + (0x11357, 0x11357,), # Grantha Au Length Mark + (0x11362, 0x11363,), # Grantha Vowel Sign Vocal..Grantha Vowel Sign Vocal (0x11366, 0x1136c,), # Combining Grantha Digit ..Combining Grantha Digit (0x11370, 0x11374,), # Combining Grantha Letter..Combining Grantha Letter - (0x114b3, 0x114b8,), # Tirhuta Vowel Sign U ..Tirhuta Vowel Sign Vocal - (0x114ba, 0x114ba,), # Tirhuta Vowel Sign Short E - (0x114bf, 0x114c0,), # Tirhuta Sign Candrabindu..Tirhuta Sign Anusvara - (0x114c2, 0x114c3,), # Tirhuta Sign Virama ..Tirhuta Sign Nukta - (0x115b2, 0x115b5,), # Siddham Vowel Sign U ..Siddham Vowel Sign Vocal - (0x115bc, 0x115bd,), # Siddham Sign Candrabindu..Siddham Sign Anusvara - (0x115bf, 0x115c0,), # Siddham Sign Virama ..Siddham Sign Nukta + (0x114b0, 0x114c3,), # Tirhuta Vowel Sign Aa ..Tirhuta Sign Nukta + (0x115af, 0x115b5,), # Siddham Vowel Sign Aa ..Siddham Vowel Sign Vocal + (0x115b8, 0x115c0,), # Siddham Vowel Sign E ..Siddham Sign Nukta (0x115dc, 0x115dd,), # Siddham Vowel Sign Alter..Siddham Vowel Sign Alter - (0x11633, 0x1163a,), # Modi Vowel Sign U ..Modi Vowel Sign Ai - (0x1163d, 0x1163d,), # Modi Sign Anusvara - (0x1163f, 0x11640,), # Modi Sign Virama ..Modi Sign Ardhacandra - (0x116ab, 0x116ab,), # Takri Sign Anusvara - (0x116ad, 0x116ad,), # Takri Vowel Sign Aa - (0x116b0, 0x116b5,), # Takri Vowel Sign U ..Takri Vowel Sign Au - (0x116b7, 0x116b7,), # Takri Sign Nukta - (0x1171d, 0x1171f,), # Ahom Consonant Sign Medi..Ahom Consonant Sign Medi - (0x11722, 0x11725,), # Ahom Vowel Sign I ..Ahom Vowel Sign Uu - (0x11727, 0x1172b,), # Ahom Vowel Sign Aw ..Ahom Sign Killer + (0x11630, 0x11640,), # Modi Vowel Sign Aa ..Modi Sign Ardhacandra + (0x116ab, 0x116b7,), # Takri Sign Anusvara ..Takri Sign Nukta + (0x1171d, 0x1172b,), # Ahom Consonant Sign Medi..Ahom Sign Killer (0x16af0, 0x16af4,), # Bassa Vah Combining High..Bassa Vah Combining High (0x16b30, 0x16b36,), # Pahawh Hmong Mark Cim Tu..Pahawh Hmong Mark Cim Ta + (0x16f51, 0x16f7e,), # Miao Sign Aspiration ..Miao Vowel Sign Ng (0x16f8f, 0x16f92,), # Miao Tone Right ..Miao Tone Below (0x1bc9d, 0x1bc9e,), # Duployan Thick Letter Se..Duployan Double Mark - (0x1d167, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining - (0x1d17b, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining + (0x1bca0, 0x1bca3,), # Shorthand Format Letter ..Shorthand Format Up Step + (0x1d165, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d16d, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical @@ -2029,12 +2058,17 @@ ZERO_WIDTH = { (0x1da9b, 0x1da9f,), # Signwriting Fill Modifie..Signwriting Fill Modifie (0x1daa1, 0x1daaf,), # Signwriting Rotation Mod..Signwriting Rotation Mod (0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining + (0x1f3fb, 0x1f3ff,), # Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri + (0xe0001, 0xe0001,), # Language Tag + (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 ), '9.0.0': ( # Source: DerivedGeneralCategory-9.0.0.txt # Date: 2016-06-01, 10:34:26 GMT # + (0x00000, 0x00000,), # (nil) + (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg @@ -2042,13 +2076,16 @@ ZERO_WIDTH = { (0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot (0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot (0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan + (0x00600, 0x00605,), # Arabic Number Sign ..Arabic Number Mark Above (0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra + (0x0061c, 0x0061c,), # Arabic Letter Mark (0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below (0x00670, 0x00670,), # Arabic Letter Superscript Alef - (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen + (0x006d6, 0x006dd,), # Arabic Small High Ligatu..Arabic End Of Ayah (0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda (0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon (0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem + (0x0070f, 0x0070f,), # Syriac Abbreviation Mark (0x00711, 0x00711,), # Syriac Letter Superscript Alaph (0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh (0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun @@ -2058,62 +2095,69 @@ ZERO_WIDTH = { (0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U (0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa (0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark - (0x008d4, 0x008e1,), # Arabic Small High Word A..Arabic Small High Sign S - (0x008e3, 0x00902,), # Arabic Turned Damma Belo..Devanagari Sign Anusvara - (0x0093a, 0x0093a,), # Devanagari Vowel Sign Oe - (0x0093c, 0x0093c,), # Devanagari Sign Nukta - (0x00941, 0x00948,), # Devanagari Vowel Sign U ..Devanagari Vowel Sign Ai - (0x0094d, 0x0094d,), # Devanagari Sign Virama + (0x008d4, 0x00903,), # Arabic Small High Word A..Devanagari Sign Visarga + (0x0093a, 0x0093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta + (0x0093e, 0x0094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw (0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu (0x00962, 0x00963,), # Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo - (0x00981, 0x00981,), # Bengali Sign Candrabindu + (0x00981, 0x00983,), # Bengali Sign Candrabindu..Bengali Sign Visarga (0x009bc, 0x009bc,), # Bengali Sign Nukta - (0x009c1, 0x009c4,), # Bengali Vowel Sign U ..Bengali Vowel Sign Vocal - (0x009cd, 0x009cd,), # Bengali Sign Virama + (0x009be, 0x009c4,), # Bengali Vowel Sign Aa ..Bengali Vowel Sign Vocal + (0x009c7, 0x009c8,), # Bengali Vowel Sign E ..Bengali Vowel Sign Ai + (0x009cb, 0x009cd,), # Bengali Vowel Sign O ..Bengali Sign Virama + (0x009d7, 0x009d7,), # Bengali Au Length Mark (0x009e2, 0x009e3,), # Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal - (0x00a01, 0x00a02,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Bindi + (0x00a01, 0x00a03,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Visarga (0x00a3c, 0x00a3c,), # Gurmukhi Sign Nukta - (0x00a41, 0x00a42,), # Gurmukhi Vowel Sign U ..Gurmukhi Vowel Sign Uu + (0x00a3e, 0x00a42,), # Gurmukhi Vowel Sign Aa ..Gurmukhi Vowel Sign Uu (0x00a47, 0x00a48,), # Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai (0x00a4b, 0x00a4d,), # Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama (0x00a51, 0x00a51,), # Gurmukhi Sign Udaat (0x00a70, 0x00a71,), # Gurmukhi Tippi ..Gurmukhi Addak (0x00a75, 0x00a75,), # Gurmukhi Sign Yakash - (0x00a81, 0x00a82,), # Gujarati Sign Candrabind..Gujarati Sign Anusvara + (0x00a81, 0x00a83,), # Gujarati Sign Candrabind..Gujarati Sign Visarga (0x00abc, 0x00abc,), # Gujarati Sign Nukta - (0x00ac1, 0x00ac5,), # Gujarati Vowel Sign U ..Gujarati Vowel Sign Cand - (0x00ac7, 0x00ac8,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Ai - (0x00acd, 0x00acd,), # Gujarati Sign Virama + (0x00abe, 0x00ac5,), # Gujarati Vowel Sign Aa ..Gujarati Vowel Sign Cand + (0x00ac7, 0x00ac9,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Cand + (0x00acb, 0x00acd,), # Gujarati Vowel Sign O ..Gujarati Sign Virama (0x00ae2, 0x00ae3,), # Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca - (0x00b01, 0x00b01,), # Oriya Sign Candrabindu + (0x00b01, 0x00b03,), # Oriya Sign Candrabindu ..Oriya Sign Visarga (0x00b3c, 0x00b3c,), # Oriya Sign Nukta - (0x00b3f, 0x00b3f,), # Oriya Vowel Sign I - (0x00b41, 0x00b44,), # Oriya Vowel Sign U ..Oriya Vowel Sign Vocalic - (0x00b4d, 0x00b4d,), # Oriya Sign Virama - (0x00b56, 0x00b56,), # Oriya Ai Length Mark + (0x00b3e, 0x00b44,), # Oriya Vowel Sign Aa ..Oriya Vowel Sign Vocalic + (0x00b47, 0x00b48,), # Oriya Vowel Sign E ..Oriya Vowel Sign Ai + (0x00b4b, 0x00b4d,), # Oriya Vowel Sign O ..Oriya Sign Virama + (0x00b56, 0x00b57,), # Oriya Ai Length Mark ..Oriya Au Length Mark (0x00b62, 0x00b63,), # Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic (0x00b82, 0x00b82,), # Tamil Sign Anusvara - (0x00bc0, 0x00bc0,), # Tamil Vowel Sign Ii - (0x00bcd, 0x00bcd,), # Tamil Sign Virama - (0x00c00, 0x00c00,), # Telugu Sign Combining Candrabindu Above - (0x00c3e, 0x00c40,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Ii + (0x00bbe, 0x00bc2,), # Tamil Vowel Sign Aa ..Tamil Vowel Sign Uu + (0x00bc6, 0x00bc8,), # Tamil Vowel Sign E ..Tamil Vowel Sign Ai + (0x00bca, 0x00bcd,), # Tamil Vowel Sign O ..Tamil Sign Virama + (0x00bd7, 0x00bd7,), # Tamil Au Length Mark + (0x00c00, 0x00c03,), # Telugu Sign Combining Ca..Telugu Sign Visarga + (0x00c3e, 0x00c44,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Vocali (0x00c46, 0x00c48,), # Telugu Vowel Sign E ..Telugu Vowel Sign Ai (0x00c4a, 0x00c4d,), # Telugu Vowel Sign O ..Telugu Sign Virama (0x00c55, 0x00c56,), # Telugu Length Mark ..Telugu Ai Length Mark (0x00c62, 0x00c63,), # Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali - (0x00c81, 0x00c81,), # Kannada Sign Candrabindu + (0x00c81, 0x00c83,), # Kannada Sign Candrabindu..Kannada Sign Visarga (0x00cbc, 0x00cbc,), # Kannada Sign Nukta - (0x00cbf, 0x00cbf,), # Kannada Vowel Sign I - (0x00cc6, 0x00cc6,), # Kannada Vowel Sign E - (0x00ccc, 0x00ccd,), # Kannada Vowel Sign Au ..Kannada Sign Virama + (0x00cbe, 0x00cc4,), # Kannada Vowel Sign Aa ..Kannada Vowel Sign Vocal + (0x00cc6, 0x00cc8,), # Kannada Vowel Sign E ..Kannada Vowel Sign Ai + (0x00cca, 0x00ccd,), # Kannada Vowel Sign O ..Kannada Sign Virama + (0x00cd5, 0x00cd6,), # Kannada Length Mark ..Kannada Ai Length Mark (0x00ce2, 0x00ce3,), # Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal - (0x00d01, 0x00d01,), # Malayalam Sign Candrabindu - (0x00d41, 0x00d44,), # Malayalam Vowel Sign U ..Malayalam Vowel Sign Voc - (0x00d4d, 0x00d4d,), # Malayalam Sign Virama + (0x00d01, 0x00d03,), # Malayalam Sign Candrabin..Malayalam Sign Visarga + (0x00d3e, 0x00d44,), # Malayalam Vowel Sign Aa ..Malayalam Vowel Sign Voc + (0x00d46, 0x00d48,), # Malayalam Vowel Sign E ..Malayalam Vowel Sign Ai + (0x00d4a, 0x00d4d,), # Malayalam Vowel Sign O ..Malayalam Sign Virama + (0x00d57, 0x00d57,), # Malayalam Au Length Mark (0x00d62, 0x00d63,), # Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc + (0x00d82, 0x00d83,), # Sinhala Sign Anusvaraya ..Sinhala Sign Visargaya (0x00dca, 0x00dca,), # Sinhala Sign Al-lakuna - (0x00dd2, 0x00dd4,), # Sinhala Vowel Sign Ketti..Sinhala Vowel Sign Ketti + (0x00dcf, 0x00dd4,), # Sinhala Vowel Sign Aela-..Sinhala Vowel Sign Ketti (0x00dd6, 0x00dd6,), # Sinhala Vowel Sign Diga Paa-pilla + (0x00dd8, 0x00ddf,), # Sinhala Vowel Sign Gaett..Sinhala Vowel Sign Gayan + (0x00df2, 0x00df3,), # Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga (0x00e31, 0x00e31,), # Thai Character Mai Han-akat (0x00e34, 0x00e3a,), # Thai Character Sara I ..Thai Character Phinthu (0x00e47, 0x00e4e,), # Thai Character Maitaikhu..Thai Character Yamakkan @@ -2125,79 +2169,61 @@ ZERO_WIDTH = { (0x00f35, 0x00f35,), # Tibetan Mark Ngas Bzung Nyi Zla (0x00f37, 0x00f37,), # Tibetan Mark Ngas Bzung Sgor Rtags (0x00f39, 0x00f39,), # Tibetan Mark Tsa -phru - (0x00f71, 0x00f7e,), # Tibetan Vowel Sign Aa ..Tibetan Sign Rjes Su Nga - (0x00f80, 0x00f84,), # Tibetan Vowel Sign Rever..Tibetan Mark Halanta + (0x00f3e, 0x00f3f,), # Tibetan Sign Yar Tshes ..Tibetan Sign Mar Tshes + (0x00f71, 0x00f84,), # Tibetan Vowel Sign Aa ..Tibetan Mark Halanta (0x00f86, 0x00f87,), # Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags (0x00f8d, 0x00f97,), # Tibetan Subjoined Sign L..Tibetan Subjoined Letter (0x00f99, 0x00fbc,), # Tibetan Subjoined Letter..Tibetan Subjoined Letter (0x00fc6, 0x00fc6,), # Tibetan Symbol Padma Gdan - (0x0102d, 0x01030,), # Myanmar Vowel Sign I ..Myanmar Vowel Sign Uu - (0x01032, 0x01037,), # Myanmar Vowel Sign Ai ..Myanmar Sign Dot Below - (0x01039, 0x0103a,), # Myanmar Sign Virama ..Myanmar Sign Asat - (0x0103d, 0x0103e,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M - (0x01058, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal + (0x0102b, 0x0103e,), # Myanmar Vowel Sign Tall ..Myanmar Consonant Sign M + (0x01056, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal (0x0105e, 0x01060,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M + (0x01062, 0x01064,), # Myanmar Vowel Sign Sgaw ..Myanmar Tone Mark Sgaw K + (0x01067, 0x0106d,), # Myanmar Vowel Sign Weste..Myanmar Sign Western Pwo (0x01071, 0x01074,), # Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah - (0x01082, 0x01082,), # Myanmar Consonant Sign Shan Medial Wa - (0x01085, 0x01086,), # Myanmar Vowel Sign Shan ..Myanmar Vowel Sign Shan - (0x0108d, 0x0108d,), # Myanmar Sign Shan Council Emphatic Tone - (0x0109d, 0x0109d,), # Myanmar Vowel Sign Aiton Ai + (0x01082, 0x0108d,), # Myanmar Consonant Sign S..Myanmar Sign Shan Counci + (0x0108f, 0x0108f,), # Myanmar Sign Rumai Palaung Tone-5 + (0x0109a, 0x0109d,), # Myanmar Sign Khamti Tone..Myanmar Vowel Sign Aiton (0x0135d, 0x0135f,), # Ethiopic Combining Gemin..Ethiopic Combining Gemin (0x01712, 0x01714,), # Tagalog Vowel Sign I ..Tagalog Sign Virama (0x01732, 0x01734,), # Hanunoo Vowel Sign I ..Hanunoo Sign Pamudpod (0x01752, 0x01753,), # Buhid Vowel Sign I ..Buhid Vowel Sign U (0x01772, 0x01773,), # Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U - (0x017b4, 0x017b5,), # Khmer Vowel Inherent Aq ..Khmer Vowel Inherent Aa - (0x017b7, 0x017bd,), # Khmer Vowel Sign I ..Khmer Vowel Sign Ua - (0x017c6, 0x017c6,), # Khmer Sign Nikahit - (0x017c9, 0x017d3,), # Khmer Sign Muusikatoan ..Khmer Sign Bathamasat + (0x017b4, 0x017d3,), # Khmer Vowel Inherent Aq ..Khmer Sign Bathamasat (0x017dd, 0x017dd,), # Khmer Sign Atthacan - (0x0180b, 0x0180d,), # Mongolian Free Variation..Mongolian Free Variation + (0x0180b, 0x0180e,), # Mongolian Free Variation..Mongolian Vowel Separato (0x01885, 0x01886,), # Mongolian Letter Ali Gal..Mongolian Letter Ali Gal (0x018a9, 0x018a9,), # Mongolian Letter Ali Gali Dagalga - (0x01920, 0x01922,), # Limbu Vowel Sign A ..Limbu Vowel Sign U - (0x01927, 0x01928,), # Limbu Vowel Sign E ..Limbu Vowel Sign O - (0x01932, 0x01932,), # Limbu Small Letter Anusvara - (0x01939, 0x0193b,), # Limbu Sign Mukphreng ..Limbu Sign Sa-i - (0x01a17, 0x01a18,), # Buginese Vowel Sign I ..Buginese Vowel Sign U - (0x01a1b, 0x01a1b,), # Buginese Vowel Sign Ae - (0x01a56, 0x01a56,), # Tai Tham Consonant Sign Medial La - (0x01a58, 0x01a5e,), # Tai Tham Sign Mai Kang L..Tai Tham Consonant Sign - (0x01a60, 0x01a60,), # Tai Tham Sign Sakot - (0x01a62, 0x01a62,), # Tai Tham Vowel Sign Mai Sat - (0x01a65, 0x01a6c,), # Tai Tham Vowel Sign I ..Tai Tham Vowel Sign Oa B - (0x01a73, 0x01a7c,), # Tai Tham Vowel Sign Oa A..Tai Tham Sign Khuen-lue + (0x01920, 0x0192b,), # Limbu Vowel Sign A ..Limbu Subjoined Letter W + (0x01930, 0x0193b,), # Limbu Small Letter Ka ..Limbu Sign Sa-i + (0x01a17, 0x01a1b,), # Buginese Vowel Sign I ..Buginese Vowel Sign Ae + (0x01a55, 0x01a5e,), # Tai Tham Consonant Sign ..Tai Tham Consonant Sign + (0x01a60, 0x01a7c,), # Tai Tham Sign Sakot ..Tai Tham Sign Khuen-lue (0x01a7f, 0x01a7f,), # Tai Tham Combining Cryptogrammic Dot (0x01ab0, 0x01abe,), # Combining Doubled Circum..Combining Parentheses Ov - (0x01b00, 0x01b03,), # Balinese Sign Ulu Ricem ..Balinese Sign Surang - (0x01b34, 0x01b34,), # Balinese Sign Rerekan - (0x01b36, 0x01b3a,), # Balinese Vowel Sign Ulu ..Balinese Vowel Sign Ra R - (0x01b3c, 0x01b3c,), # Balinese Vowel Sign La Lenga - (0x01b42, 0x01b42,), # Balinese Vowel Sign Pepet + (0x01b00, 0x01b04,), # Balinese Sign Ulu Ricem ..Balinese Sign Bisah + (0x01b34, 0x01b44,), # Balinese Sign Rerekan ..Balinese Adeg Adeg (0x01b6b, 0x01b73,), # Balinese Musical Symbol ..Balinese Musical Symbol - (0x01b80, 0x01b81,), # Sundanese Sign Panyecek ..Sundanese Sign Panglayar - (0x01ba2, 0x01ba5,), # Sundanese Consonant Sign..Sundanese Vowel Sign Pan - (0x01ba8, 0x01ba9,), # Sundanese Vowel Sign Pam..Sundanese Vowel Sign Pan - (0x01bab, 0x01bad,), # Sundanese Sign Virama ..Sundanese Consonant Sign - (0x01be6, 0x01be6,), # Batak Sign Tompi - (0x01be8, 0x01be9,), # Batak Vowel Sign Pakpak ..Batak Vowel Sign Ee - (0x01bed, 0x01bed,), # Batak Vowel Sign Karo O - (0x01bef, 0x01bf1,), # Batak Vowel Sign U For S..Batak Consonant Sign H - (0x01c2c, 0x01c33,), # Lepcha Vowel Sign E ..Lepcha Consonant Sign T - (0x01c36, 0x01c37,), # Lepcha Sign Ran ..Lepcha Sign Nukta + (0x01b80, 0x01b82,), # Sundanese Sign Panyecek ..Sundanese Sign Pangwisad + (0x01ba1, 0x01bad,), # Sundanese Consonant Sign..Sundanese Consonant Sign + (0x01be6, 0x01bf3,), # Batak Sign Tompi ..Batak Panongonan + (0x01c24, 0x01c37,), # Lepcha Subjoined Letter ..Lepcha Sign Nukta (0x01cd0, 0x01cd2,), # Vedic Tone Karshana ..Vedic Tone Prenkha - (0x01cd4, 0x01ce0,), # Vedic Sign Yajurvedic Mi..Vedic Tone Rigvedic Kash - (0x01ce2, 0x01ce8,), # Vedic Sign Visarga Svari..Vedic Sign Visarga Anuda + (0x01cd4, 0x01ce8,), # Vedic Sign Yajurvedic Mi..Vedic Sign Visarga Anuda (0x01ced, 0x01ced,), # Vedic Sign Tiryak - (0x01cf4, 0x01cf4,), # Vedic Tone Candra Above + (0x01cf2, 0x01cf4,), # Vedic Sign Ardhavisarga ..Vedic Tone Candra Above (0x01cf8, 0x01cf9,), # Vedic Tone Ring Above ..Vedic Tone Double Ring A (0x01dc0, 0x01df5,), # Combining Dotted Grave A..Combining Up Tack Above (0x01dfb, 0x01dff,), # Combining Deletion Mark ..Combining Right Arrowhea + (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark + (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override + (0x02060, 0x02064,), # Word Joiner ..Invisible Plus + (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above (0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu (0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette - (0x0302a, 0x0302d,), # Ideographic Level Tone M..Ideographic Entering Ton + (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag (0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous (0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer @@ -2206,35 +2232,33 @@ ZERO_WIDTH = { (0x0a802, 0x0a802,), # Syloti Nagri Sign Dvisvara (0x0a806, 0x0a806,), # Syloti Nagri Sign Hasanta (0x0a80b, 0x0a80b,), # Syloti Nagri Sign Anusvara - (0x0a825, 0x0a826,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign - (0x0a8c4, 0x0a8c5,), # Saurashtra Sign Virama ..Saurashtra Sign Candrabi + (0x0a823, 0x0a827,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign + (0x0a880, 0x0a881,), # Saurashtra Sign Anusvara..Saurashtra Sign Visarga + (0x0a8b4, 0x0a8c5,), # Saurashtra Consonant Sig..Saurashtra Sign Candrabi (0x0a8e0, 0x0a8f1,), # Combining Devanagari Dig..Combining Devanagari Sig (0x0a926, 0x0a92d,), # Kayah Li Vowel Ue ..Kayah Li Tone Calya Plop - (0x0a947, 0x0a951,), # Rejang Vowel Sign I ..Rejang Consonant Sign R - (0x0a980, 0x0a982,), # Javanese Sign Panyangga ..Javanese Sign Layar - (0x0a9b3, 0x0a9b3,), # Javanese Sign Cecak Telu - (0x0a9b6, 0x0a9b9,), # Javanese Vowel Sign Wulu..Javanese Vowel Sign Suku - (0x0a9bc, 0x0a9bc,), # Javanese Vowel Sign Pepet + (0x0a947, 0x0a953,), # Rejang Vowel Sign I ..Rejang Virama + (0x0a980, 0x0a983,), # Javanese Sign Panyangga ..Javanese Sign Wignyan + (0x0a9b3, 0x0a9c0,), # Javanese Sign Cecak Telu..Javanese Pangkon (0x0a9e5, 0x0a9e5,), # Myanmar Sign Shan Saw - (0x0aa29, 0x0aa2e,), # Cham Vowel Sign Aa ..Cham Vowel Sign Oe - (0x0aa31, 0x0aa32,), # Cham Vowel Sign Au ..Cham Vowel Sign Ue - (0x0aa35, 0x0aa36,), # Cham Consonant Sign La ..Cham Consonant Sign Wa + (0x0aa29, 0x0aa36,), # Cham Vowel Sign Aa ..Cham Consonant Sign Wa (0x0aa43, 0x0aa43,), # Cham Consonant Sign Final Ng - (0x0aa4c, 0x0aa4c,), # Cham Consonant Sign Final M - (0x0aa7c, 0x0aa7c,), # Myanmar Sign Tai Laing Tone-2 + (0x0aa4c, 0x0aa4d,), # Cham Consonant Sign Fina..Cham Consonant Sign Fina + (0x0aa7b, 0x0aa7d,), # Myanmar Sign Pao Karen T..Myanmar Sign Tai Laing T (0x0aab0, 0x0aab0,), # Tai Viet Mai Kang (0x0aab2, 0x0aab4,), # Tai Viet Vowel I ..Tai Viet Vowel U (0x0aab7, 0x0aab8,), # Tai Viet Mai Khit ..Tai Viet Vowel Ia (0x0aabe, 0x0aabf,), # Tai Viet Vowel Am ..Tai Viet Tone Mai Ek (0x0aac1, 0x0aac1,), # Tai Viet Tone Mai Tho - (0x0aaec, 0x0aaed,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign - (0x0aaf6, 0x0aaf6,), # Meetei Mayek Virama - (0x0abe5, 0x0abe5,), # Meetei Mayek Vowel Sign Anap - (0x0abe8, 0x0abe8,), # Meetei Mayek Vowel Sign Unap - (0x0abed, 0x0abed,), # Meetei Mayek Apun Iyek + (0x0aaeb, 0x0aaef,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0x0aaf5, 0x0aaf6,), # Meetei Mayek Vowel Sign ..Meetei Mayek Virama + (0x0abe3, 0x0abea,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0x0abec, 0x0abed,), # Meetei Mayek Lum Iyek ..Meetei Mayek Apun Iyek (0x0fb1e, 0x0fb1e,), # Hebrew Point Judeo-spanish Varika (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo + (0x0feff, 0x0feff,), # Zero Width No-break Space + (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke (0x102e0, 0x102e0,), # Coptic Epact Thousands Mark (0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let @@ -2244,63 +2268,49 @@ ZERO_WIDTH = { (0x10a38, 0x10a3a,), # Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo (0x10a3f, 0x10a3f,), # Kharoshthi Virama (0x10ae5, 0x10ae6,), # Manichaean Abbreviation ..Manichaean Abbreviation - (0x11001, 0x11001,), # Brahmi Sign Anusvara + (0x11000, 0x11002,), # Brahmi Sign Candrabindu ..Brahmi Sign Visarga (0x11038, 0x11046,), # Brahmi Vowel Sign Aa ..Brahmi Virama - (0x1107f, 0x11081,), # Brahmi Number Joiner ..Kaithi Sign Anusvara - (0x110b3, 0x110b6,), # Kaithi Vowel Sign U ..Kaithi Vowel Sign Ai - (0x110b9, 0x110ba,), # Kaithi Sign Virama ..Kaithi Sign Nukta + (0x1107f, 0x11082,), # Brahmi Number Joiner ..Kaithi Sign Visarga + (0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta + (0x110bd, 0x110bd,), # Kaithi Number Sign (0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga - (0x11127, 0x1112b,), # Chakma Vowel Sign A ..Chakma Vowel Sign Uu - (0x1112d, 0x11134,), # Chakma Vowel Sign Ai ..Chakma Maayyaa + (0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa (0x11173, 0x11173,), # Mahajani Sign Nukta - (0x11180, 0x11181,), # Sharada Sign Candrabindu..Sharada Sign Anusvara - (0x111b6, 0x111be,), # Sharada Vowel Sign U ..Sharada Vowel Sign O + (0x11180, 0x11182,), # Sharada Sign Candrabindu..Sharada Sign Visarga + (0x111b3, 0x111c0,), # Sharada Vowel Sign Aa ..Sharada Sign Virama (0x111ca, 0x111cc,), # Sharada Sign Nukta ..Sharada Extra Short Vowe - (0x1122f, 0x11231,), # Khojki Vowel Sign U ..Khojki Vowel Sign Ai - (0x11234, 0x11234,), # Khojki Sign Anusvara - (0x11236, 0x11237,), # Khojki Sign Nukta ..Khojki Sign Shadda + (0x1122c, 0x11237,), # Khojki Vowel Sign Aa ..Khojki Sign Shadda (0x1123e, 0x1123e,), # Khojki Sign Sukun - (0x112df, 0x112df,), # Khudawadi Sign Anusvara - (0x112e3, 0x112ea,), # Khudawadi Vowel Sign U ..Khudawadi Sign Virama - (0x11300, 0x11301,), # Grantha Sign Combining A..Grantha Sign Candrabindu + (0x112df, 0x112ea,), # Khudawadi Sign Anusvara ..Khudawadi Sign Virama + (0x11300, 0x11303,), # Grantha Sign Combining A..Grantha Sign Visarga (0x1133c, 0x1133c,), # Grantha Sign Nukta - (0x11340, 0x11340,), # Grantha Vowel Sign Ii + (0x1133e, 0x11344,), # Grantha Vowel Sign Aa ..Grantha Vowel Sign Vocal + (0x11347, 0x11348,), # Grantha Vowel Sign Ee ..Grantha Vowel Sign Ai + (0x1134b, 0x1134d,), # Grantha Vowel Sign Oo ..Grantha Sign Virama + (0x11357, 0x11357,), # Grantha Au Length Mark + (0x11362, 0x11363,), # Grantha Vowel Sign Vocal..Grantha Vowel Sign Vocal (0x11366, 0x1136c,), # Combining Grantha Digit ..Combining Grantha Digit (0x11370, 0x11374,), # Combining Grantha Letter..Combining Grantha Letter - (0x11438, 0x1143f,), # Newa Vowel Sign U ..Newa Vowel Sign Ai - (0x11442, 0x11444,), # Newa Sign Virama ..Newa Sign Anusvara - (0x11446, 0x11446,), # Newa Sign Nukta - (0x114b3, 0x114b8,), # Tirhuta Vowel Sign U ..Tirhuta Vowel Sign Vocal - (0x114ba, 0x114ba,), # Tirhuta Vowel Sign Short E - (0x114bf, 0x114c0,), # Tirhuta Sign Candrabindu..Tirhuta Sign Anusvara - (0x114c2, 0x114c3,), # Tirhuta Sign Virama ..Tirhuta Sign Nukta - (0x115b2, 0x115b5,), # Siddham Vowel Sign U ..Siddham Vowel Sign Vocal - (0x115bc, 0x115bd,), # Siddham Sign Candrabindu..Siddham Sign Anusvara - (0x115bf, 0x115c0,), # Siddham Sign Virama ..Siddham Sign Nukta + (0x11435, 0x11446,), # Newa Vowel Sign Aa ..Newa Sign Nukta + (0x114b0, 0x114c3,), # Tirhuta Vowel Sign Aa ..Tirhuta Sign Nukta + (0x115af, 0x115b5,), # Siddham Vowel Sign Aa ..Siddham Vowel Sign Vocal + (0x115b8, 0x115c0,), # Siddham Vowel Sign E ..Siddham Sign Nukta (0x115dc, 0x115dd,), # Siddham Vowel Sign Alter..Siddham Vowel Sign Alter - (0x11633, 0x1163a,), # Modi Vowel Sign U ..Modi Vowel Sign Ai - (0x1163d, 0x1163d,), # Modi Sign Anusvara - (0x1163f, 0x11640,), # Modi Sign Virama ..Modi Sign Ardhacandra - (0x116ab, 0x116ab,), # Takri Sign Anusvara - (0x116ad, 0x116ad,), # Takri Vowel Sign Aa - (0x116b0, 0x116b5,), # Takri Vowel Sign U ..Takri Vowel Sign Au - (0x116b7, 0x116b7,), # Takri Sign Nukta - (0x1171d, 0x1171f,), # Ahom Consonant Sign Medi..Ahom Consonant Sign Medi - (0x11722, 0x11725,), # Ahom Vowel Sign I ..Ahom Vowel Sign Uu - (0x11727, 0x1172b,), # Ahom Vowel Sign Aw ..Ahom Sign Killer - (0x11c30, 0x11c36,), # Bhaiksuki Vowel Sign I ..Bhaiksuki Vowel Sign Voc - (0x11c38, 0x11c3d,), # Bhaiksuki Vowel Sign E ..Bhaiksuki Sign Anusvara - (0x11c3f, 0x11c3f,), # Bhaiksuki Sign Virama + (0x11630, 0x11640,), # Modi Vowel Sign Aa ..Modi Sign Ardhacandra + (0x116ab, 0x116b7,), # Takri Sign Anusvara ..Takri Sign Nukta + (0x1171d, 0x1172b,), # Ahom Consonant Sign Medi..Ahom Sign Killer + (0x11c2f, 0x11c36,), # Bhaiksuki Vowel Sign Aa ..Bhaiksuki Vowel Sign Voc + (0x11c38, 0x11c3f,), # Bhaiksuki Vowel Sign E ..Bhaiksuki Sign Virama (0x11c92, 0x11ca7,), # Marchen Subjoined Letter..Marchen Subjoined Letter - (0x11caa, 0x11cb0,), # Marchen Subjoined Letter..Marchen Vowel Sign Aa - (0x11cb2, 0x11cb3,), # Marchen Vowel Sign U ..Marchen Vowel Sign E - (0x11cb5, 0x11cb6,), # Marchen Sign Anusvara ..Marchen Sign Candrabindu + (0x11ca9, 0x11cb6,), # Marchen Subjoined Letter..Marchen Sign Candrabindu (0x16af0, 0x16af4,), # Bassa Vah Combining High..Bassa Vah Combining High (0x16b30, 0x16b36,), # Pahawh Hmong Mark Cim Tu..Pahawh Hmong Mark Cim Ta + (0x16f51, 0x16f7e,), # Miao Sign Aspiration ..Miao Vowel Sign Ng (0x16f8f, 0x16f92,), # Miao Tone Right ..Miao Tone Below (0x1bc9d, 0x1bc9e,), # Duployan Thick Letter Se..Duployan Double Mark - (0x1d167, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining - (0x1d17b, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining + (0x1bca0, 0x1bca3,), # Shorthand Format Letter ..Shorthand Format Up Step + (0x1d165, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d16d, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical @@ -2317,12 +2327,17 @@ ZERO_WIDTH = { (0x1e026, 0x1e02a,), # Combining Glagolitic Let..Combining Glagolitic Let (0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining (0x1e944, 0x1e94a,), # Adlam Alif Lengthener ..Adlam Nukta + (0x1f3fb, 0x1f3ff,), # Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri + (0xe0001, 0xe0001,), # Language Tag + (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 ), '10.0.0': ( # Source: DerivedGeneralCategory-10.0.0.txt # Date: 2017-03-08, 08:41:49 GMT # + (0x00000, 0x00000,), # (nil) + (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg @@ -2330,13 +2345,16 @@ ZERO_WIDTH = { (0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot (0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot (0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan + (0x00600, 0x00605,), # Arabic Number Sign ..Arabic Number Mark Above (0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra + (0x0061c, 0x0061c,), # Arabic Letter Mark (0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below (0x00670, 0x00670,), # Arabic Letter Superscript Alef - (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen + (0x006d6, 0x006dd,), # Arabic Small High Ligatu..Arabic End Of Ayah (0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda (0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon (0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem + (0x0070f, 0x0070f,), # Syriac Abbreviation Mark (0x00711, 0x00711,), # Syriac Letter Superscript Alaph (0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh (0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun @@ -2346,64 +2364,71 @@ ZERO_WIDTH = { (0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U (0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa (0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark - (0x008d4, 0x008e1,), # Arabic Small High Word A..Arabic Small High Sign S - (0x008e3, 0x00902,), # Arabic Turned Damma Belo..Devanagari Sign Anusvara - (0x0093a, 0x0093a,), # Devanagari Vowel Sign Oe - (0x0093c, 0x0093c,), # Devanagari Sign Nukta - (0x00941, 0x00948,), # Devanagari Vowel Sign U ..Devanagari Vowel Sign Ai - (0x0094d, 0x0094d,), # Devanagari Sign Virama + (0x008d4, 0x00903,), # Arabic Small High Word A..Devanagari Sign Visarga + (0x0093a, 0x0093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta + (0x0093e, 0x0094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw (0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu (0x00962, 0x00963,), # Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo - (0x00981, 0x00981,), # Bengali Sign Candrabindu + (0x00981, 0x00983,), # Bengali Sign Candrabindu..Bengali Sign Visarga (0x009bc, 0x009bc,), # Bengali Sign Nukta - (0x009c1, 0x009c4,), # Bengali Vowel Sign U ..Bengali Vowel Sign Vocal - (0x009cd, 0x009cd,), # Bengali Sign Virama + (0x009be, 0x009c4,), # Bengali Vowel Sign Aa ..Bengali Vowel Sign Vocal + (0x009c7, 0x009c8,), # Bengali Vowel Sign E ..Bengali Vowel Sign Ai + (0x009cb, 0x009cd,), # Bengali Vowel Sign O ..Bengali Sign Virama + (0x009d7, 0x009d7,), # Bengali Au Length Mark (0x009e2, 0x009e3,), # Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal - (0x00a01, 0x00a02,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Bindi + (0x00a01, 0x00a03,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Visarga (0x00a3c, 0x00a3c,), # Gurmukhi Sign Nukta - (0x00a41, 0x00a42,), # Gurmukhi Vowel Sign U ..Gurmukhi Vowel Sign Uu + (0x00a3e, 0x00a42,), # Gurmukhi Vowel Sign Aa ..Gurmukhi Vowel Sign Uu (0x00a47, 0x00a48,), # Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai (0x00a4b, 0x00a4d,), # Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama (0x00a51, 0x00a51,), # Gurmukhi Sign Udaat (0x00a70, 0x00a71,), # Gurmukhi Tippi ..Gurmukhi Addak (0x00a75, 0x00a75,), # Gurmukhi Sign Yakash - (0x00a81, 0x00a82,), # Gujarati Sign Candrabind..Gujarati Sign Anusvara + (0x00a81, 0x00a83,), # Gujarati Sign Candrabind..Gujarati Sign Visarga (0x00abc, 0x00abc,), # Gujarati Sign Nukta - (0x00ac1, 0x00ac5,), # Gujarati Vowel Sign U ..Gujarati Vowel Sign Cand - (0x00ac7, 0x00ac8,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Ai - (0x00acd, 0x00acd,), # Gujarati Sign Virama + (0x00abe, 0x00ac5,), # Gujarati Vowel Sign Aa ..Gujarati Vowel Sign Cand + (0x00ac7, 0x00ac9,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Cand + (0x00acb, 0x00acd,), # Gujarati Vowel Sign O ..Gujarati Sign Virama (0x00ae2, 0x00ae3,), # Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca (0x00afa, 0x00aff,), # Gujarati Sign Sukun ..Gujarati Sign Two-circle - (0x00b01, 0x00b01,), # Oriya Sign Candrabindu + (0x00b01, 0x00b03,), # Oriya Sign Candrabindu ..Oriya Sign Visarga (0x00b3c, 0x00b3c,), # Oriya Sign Nukta - (0x00b3f, 0x00b3f,), # Oriya Vowel Sign I - (0x00b41, 0x00b44,), # Oriya Vowel Sign U ..Oriya Vowel Sign Vocalic - (0x00b4d, 0x00b4d,), # Oriya Sign Virama - (0x00b56, 0x00b56,), # Oriya Ai Length Mark + (0x00b3e, 0x00b44,), # Oriya Vowel Sign Aa ..Oriya Vowel Sign Vocalic + (0x00b47, 0x00b48,), # Oriya Vowel Sign E ..Oriya Vowel Sign Ai + (0x00b4b, 0x00b4d,), # Oriya Vowel Sign O ..Oriya Sign Virama + (0x00b56, 0x00b57,), # Oriya Ai Length Mark ..Oriya Au Length Mark (0x00b62, 0x00b63,), # Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic (0x00b82, 0x00b82,), # Tamil Sign Anusvara - (0x00bc0, 0x00bc0,), # Tamil Vowel Sign Ii - (0x00bcd, 0x00bcd,), # Tamil Sign Virama - (0x00c00, 0x00c00,), # Telugu Sign Combining Candrabindu Above - (0x00c3e, 0x00c40,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Ii + (0x00bbe, 0x00bc2,), # Tamil Vowel Sign Aa ..Tamil Vowel Sign Uu + (0x00bc6, 0x00bc8,), # Tamil Vowel Sign E ..Tamil Vowel Sign Ai + (0x00bca, 0x00bcd,), # Tamil Vowel Sign O ..Tamil Sign Virama + (0x00bd7, 0x00bd7,), # Tamil Au Length Mark + (0x00c00, 0x00c03,), # Telugu Sign Combining Ca..Telugu Sign Visarga + (0x00c3e, 0x00c44,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Vocali (0x00c46, 0x00c48,), # Telugu Vowel Sign E ..Telugu Vowel Sign Ai (0x00c4a, 0x00c4d,), # Telugu Vowel Sign O ..Telugu Sign Virama (0x00c55, 0x00c56,), # Telugu Length Mark ..Telugu Ai Length Mark (0x00c62, 0x00c63,), # Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali - (0x00c81, 0x00c81,), # Kannada Sign Candrabindu + (0x00c81, 0x00c83,), # Kannada Sign Candrabindu..Kannada Sign Visarga (0x00cbc, 0x00cbc,), # Kannada Sign Nukta - (0x00cbf, 0x00cbf,), # Kannada Vowel Sign I - (0x00cc6, 0x00cc6,), # Kannada Vowel Sign E - (0x00ccc, 0x00ccd,), # Kannada Vowel Sign Au ..Kannada Sign Virama + (0x00cbe, 0x00cc4,), # Kannada Vowel Sign Aa ..Kannada Vowel Sign Vocal + (0x00cc6, 0x00cc8,), # Kannada Vowel Sign E ..Kannada Vowel Sign Ai + (0x00cca, 0x00ccd,), # Kannada Vowel Sign O ..Kannada Sign Virama + (0x00cd5, 0x00cd6,), # Kannada Length Mark ..Kannada Ai Length Mark (0x00ce2, 0x00ce3,), # Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal - (0x00d00, 0x00d01,), # Malayalam Sign Combining..Malayalam Sign Candrabin + (0x00d00, 0x00d03,), # Malayalam Sign Combining..Malayalam Sign Visarga (0x00d3b, 0x00d3c,), # Malayalam Sign Vertical ..Malayalam Sign Circular - (0x00d41, 0x00d44,), # Malayalam Vowel Sign U ..Malayalam Vowel Sign Voc - (0x00d4d, 0x00d4d,), # Malayalam Sign Virama + (0x00d3e, 0x00d44,), # Malayalam Vowel Sign Aa ..Malayalam Vowel Sign Voc + (0x00d46, 0x00d48,), # Malayalam Vowel Sign E ..Malayalam Vowel Sign Ai + (0x00d4a, 0x00d4d,), # Malayalam Vowel Sign O ..Malayalam Sign Virama + (0x00d57, 0x00d57,), # Malayalam Au Length Mark (0x00d62, 0x00d63,), # Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc + (0x00d82, 0x00d83,), # Sinhala Sign Anusvaraya ..Sinhala Sign Visargaya (0x00dca, 0x00dca,), # Sinhala Sign Al-lakuna - (0x00dd2, 0x00dd4,), # Sinhala Vowel Sign Ketti..Sinhala Vowel Sign Ketti + (0x00dcf, 0x00dd4,), # Sinhala Vowel Sign Aela-..Sinhala Vowel Sign Ketti (0x00dd6, 0x00dd6,), # Sinhala Vowel Sign Diga Paa-pilla + (0x00dd8, 0x00ddf,), # Sinhala Vowel Sign Gaett..Sinhala Vowel Sign Gayan + (0x00df2, 0x00df3,), # Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga (0x00e31, 0x00e31,), # Thai Character Mai Han-akat (0x00e34, 0x00e3a,), # Thai Character Sara I ..Thai Character Phinthu (0x00e47, 0x00e4e,), # Thai Character Maitaikhu..Thai Character Yamakkan @@ -2415,79 +2440,61 @@ ZERO_WIDTH = { (0x00f35, 0x00f35,), # Tibetan Mark Ngas Bzung Nyi Zla (0x00f37, 0x00f37,), # Tibetan Mark Ngas Bzung Sgor Rtags (0x00f39, 0x00f39,), # Tibetan Mark Tsa -phru - (0x00f71, 0x00f7e,), # Tibetan Vowel Sign Aa ..Tibetan Sign Rjes Su Nga - (0x00f80, 0x00f84,), # Tibetan Vowel Sign Rever..Tibetan Mark Halanta + (0x00f3e, 0x00f3f,), # Tibetan Sign Yar Tshes ..Tibetan Sign Mar Tshes + (0x00f71, 0x00f84,), # Tibetan Vowel Sign Aa ..Tibetan Mark Halanta (0x00f86, 0x00f87,), # Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags (0x00f8d, 0x00f97,), # Tibetan Subjoined Sign L..Tibetan Subjoined Letter (0x00f99, 0x00fbc,), # Tibetan Subjoined Letter..Tibetan Subjoined Letter (0x00fc6, 0x00fc6,), # Tibetan Symbol Padma Gdan - (0x0102d, 0x01030,), # Myanmar Vowel Sign I ..Myanmar Vowel Sign Uu - (0x01032, 0x01037,), # Myanmar Vowel Sign Ai ..Myanmar Sign Dot Below - (0x01039, 0x0103a,), # Myanmar Sign Virama ..Myanmar Sign Asat - (0x0103d, 0x0103e,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M - (0x01058, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal + (0x0102b, 0x0103e,), # Myanmar Vowel Sign Tall ..Myanmar Consonant Sign M + (0x01056, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal (0x0105e, 0x01060,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M + (0x01062, 0x01064,), # Myanmar Vowel Sign Sgaw ..Myanmar Tone Mark Sgaw K + (0x01067, 0x0106d,), # Myanmar Vowel Sign Weste..Myanmar Sign Western Pwo (0x01071, 0x01074,), # Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah - (0x01082, 0x01082,), # Myanmar Consonant Sign Shan Medial Wa - (0x01085, 0x01086,), # Myanmar Vowel Sign Shan ..Myanmar Vowel Sign Shan - (0x0108d, 0x0108d,), # Myanmar Sign Shan Council Emphatic Tone - (0x0109d, 0x0109d,), # Myanmar Vowel Sign Aiton Ai + (0x01082, 0x0108d,), # Myanmar Consonant Sign S..Myanmar Sign Shan Counci + (0x0108f, 0x0108f,), # Myanmar Sign Rumai Palaung Tone-5 + (0x0109a, 0x0109d,), # Myanmar Sign Khamti Tone..Myanmar Vowel Sign Aiton (0x0135d, 0x0135f,), # Ethiopic Combining Gemin..Ethiopic Combining Gemin (0x01712, 0x01714,), # Tagalog Vowel Sign I ..Tagalog Sign Virama (0x01732, 0x01734,), # Hanunoo Vowel Sign I ..Hanunoo Sign Pamudpod (0x01752, 0x01753,), # Buhid Vowel Sign I ..Buhid Vowel Sign U (0x01772, 0x01773,), # Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U - (0x017b4, 0x017b5,), # Khmer Vowel Inherent Aq ..Khmer Vowel Inherent Aa - (0x017b7, 0x017bd,), # Khmer Vowel Sign I ..Khmer Vowel Sign Ua - (0x017c6, 0x017c6,), # Khmer Sign Nikahit - (0x017c9, 0x017d3,), # Khmer Sign Muusikatoan ..Khmer Sign Bathamasat + (0x017b4, 0x017d3,), # Khmer Vowel Inherent Aq ..Khmer Sign Bathamasat (0x017dd, 0x017dd,), # Khmer Sign Atthacan - (0x0180b, 0x0180d,), # Mongolian Free Variation..Mongolian Free Variation + (0x0180b, 0x0180e,), # Mongolian Free Variation..Mongolian Vowel Separato (0x01885, 0x01886,), # Mongolian Letter Ali Gal..Mongolian Letter Ali Gal (0x018a9, 0x018a9,), # Mongolian Letter Ali Gali Dagalga - (0x01920, 0x01922,), # Limbu Vowel Sign A ..Limbu Vowel Sign U - (0x01927, 0x01928,), # Limbu Vowel Sign E ..Limbu Vowel Sign O - (0x01932, 0x01932,), # Limbu Small Letter Anusvara - (0x01939, 0x0193b,), # Limbu Sign Mukphreng ..Limbu Sign Sa-i - (0x01a17, 0x01a18,), # Buginese Vowel Sign I ..Buginese Vowel Sign U - (0x01a1b, 0x01a1b,), # Buginese Vowel Sign Ae - (0x01a56, 0x01a56,), # Tai Tham Consonant Sign Medial La - (0x01a58, 0x01a5e,), # Tai Tham Sign Mai Kang L..Tai Tham Consonant Sign - (0x01a60, 0x01a60,), # Tai Tham Sign Sakot - (0x01a62, 0x01a62,), # Tai Tham Vowel Sign Mai Sat - (0x01a65, 0x01a6c,), # Tai Tham Vowel Sign I ..Tai Tham Vowel Sign Oa B - (0x01a73, 0x01a7c,), # Tai Tham Vowel Sign Oa A..Tai Tham Sign Khuen-lue + (0x01920, 0x0192b,), # Limbu Vowel Sign A ..Limbu Subjoined Letter W + (0x01930, 0x0193b,), # Limbu Small Letter Ka ..Limbu Sign Sa-i + (0x01a17, 0x01a1b,), # Buginese Vowel Sign I ..Buginese Vowel Sign Ae + (0x01a55, 0x01a5e,), # Tai Tham Consonant Sign ..Tai Tham Consonant Sign + (0x01a60, 0x01a7c,), # Tai Tham Sign Sakot ..Tai Tham Sign Khuen-lue (0x01a7f, 0x01a7f,), # Tai Tham Combining Cryptogrammic Dot (0x01ab0, 0x01abe,), # Combining Doubled Circum..Combining Parentheses Ov - (0x01b00, 0x01b03,), # Balinese Sign Ulu Ricem ..Balinese Sign Surang - (0x01b34, 0x01b34,), # Balinese Sign Rerekan - (0x01b36, 0x01b3a,), # Balinese Vowel Sign Ulu ..Balinese Vowel Sign Ra R - (0x01b3c, 0x01b3c,), # Balinese Vowel Sign La Lenga - (0x01b42, 0x01b42,), # Balinese Vowel Sign Pepet + (0x01b00, 0x01b04,), # Balinese Sign Ulu Ricem ..Balinese Sign Bisah + (0x01b34, 0x01b44,), # Balinese Sign Rerekan ..Balinese Adeg Adeg (0x01b6b, 0x01b73,), # Balinese Musical Symbol ..Balinese Musical Symbol - (0x01b80, 0x01b81,), # Sundanese Sign Panyecek ..Sundanese Sign Panglayar - (0x01ba2, 0x01ba5,), # Sundanese Consonant Sign..Sundanese Vowel Sign Pan - (0x01ba8, 0x01ba9,), # Sundanese Vowel Sign Pam..Sundanese Vowel Sign Pan - (0x01bab, 0x01bad,), # Sundanese Sign Virama ..Sundanese Consonant Sign - (0x01be6, 0x01be6,), # Batak Sign Tompi - (0x01be8, 0x01be9,), # Batak Vowel Sign Pakpak ..Batak Vowel Sign Ee - (0x01bed, 0x01bed,), # Batak Vowel Sign Karo O - (0x01bef, 0x01bf1,), # Batak Vowel Sign U For S..Batak Consonant Sign H - (0x01c2c, 0x01c33,), # Lepcha Vowel Sign E ..Lepcha Consonant Sign T - (0x01c36, 0x01c37,), # Lepcha Sign Ran ..Lepcha Sign Nukta + (0x01b80, 0x01b82,), # Sundanese Sign Panyecek ..Sundanese Sign Pangwisad + (0x01ba1, 0x01bad,), # Sundanese Consonant Sign..Sundanese Consonant Sign + (0x01be6, 0x01bf3,), # Batak Sign Tompi ..Batak Panongonan + (0x01c24, 0x01c37,), # Lepcha Subjoined Letter ..Lepcha Sign Nukta (0x01cd0, 0x01cd2,), # Vedic Tone Karshana ..Vedic Tone Prenkha - (0x01cd4, 0x01ce0,), # Vedic Sign Yajurvedic Mi..Vedic Tone Rigvedic Kash - (0x01ce2, 0x01ce8,), # Vedic Sign Visarga Svari..Vedic Sign Visarga Anuda + (0x01cd4, 0x01ce8,), # Vedic Sign Yajurvedic Mi..Vedic Sign Visarga Anuda (0x01ced, 0x01ced,), # Vedic Sign Tiryak - (0x01cf4, 0x01cf4,), # Vedic Tone Candra Above - (0x01cf8, 0x01cf9,), # Vedic Tone Ring Above ..Vedic Tone Double Ring A + (0x01cf2, 0x01cf4,), # Vedic Sign Ardhavisarga ..Vedic Tone Candra Above + (0x01cf7, 0x01cf9,), # Vedic Sign Atikrama ..Vedic Tone Double Ring A (0x01dc0, 0x01df9,), # Combining Dotted Grave A..Combining Wide Inverted (0x01dfb, 0x01dff,), # Combining Deletion Mark ..Combining Right Arrowhea + (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark + (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override + (0x02060, 0x02064,), # Word Joiner ..Invisible Plus + (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above (0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu (0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette - (0x0302a, 0x0302d,), # Ideographic Level Tone M..Ideographic Entering Ton + (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag (0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous (0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer @@ -2496,35 +2503,33 @@ ZERO_WIDTH = { (0x0a802, 0x0a802,), # Syloti Nagri Sign Dvisvara (0x0a806, 0x0a806,), # Syloti Nagri Sign Hasanta (0x0a80b, 0x0a80b,), # Syloti Nagri Sign Anusvara - (0x0a825, 0x0a826,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign - (0x0a8c4, 0x0a8c5,), # Saurashtra Sign Virama ..Saurashtra Sign Candrabi + (0x0a823, 0x0a827,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign + (0x0a880, 0x0a881,), # Saurashtra Sign Anusvara..Saurashtra Sign Visarga + (0x0a8b4, 0x0a8c5,), # Saurashtra Consonant Sig..Saurashtra Sign Candrabi (0x0a8e0, 0x0a8f1,), # Combining Devanagari Dig..Combining Devanagari Sig (0x0a926, 0x0a92d,), # Kayah Li Vowel Ue ..Kayah Li Tone Calya Plop - (0x0a947, 0x0a951,), # Rejang Vowel Sign I ..Rejang Consonant Sign R - (0x0a980, 0x0a982,), # Javanese Sign Panyangga ..Javanese Sign Layar - (0x0a9b3, 0x0a9b3,), # Javanese Sign Cecak Telu - (0x0a9b6, 0x0a9b9,), # Javanese Vowel Sign Wulu..Javanese Vowel Sign Suku - (0x0a9bc, 0x0a9bc,), # Javanese Vowel Sign Pepet + (0x0a947, 0x0a953,), # Rejang Vowel Sign I ..Rejang Virama + (0x0a980, 0x0a983,), # Javanese Sign Panyangga ..Javanese Sign Wignyan + (0x0a9b3, 0x0a9c0,), # Javanese Sign Cecak Telu..Javanese Pangkon (0x0a9e5, 0x0a9e5,), # Myanmar Sign Shan Saw - (0x0aa29, 0x0aa2e,), # Cham Vowel Sign Aa ..Cham Vowel Sign Oe - (0x0aa31, 0x0aa32,), # Cham Vowel Sign Au ..Cham Vowel Sign Ue - (0x0aa35, 0x0aa36,), # Cham Consonant Sign La ..Cham Consonant Sign Wa + (0x0aa29, 0x0aa36,), # Cham Vowel Sign Aa ..Cham Consonant Sign Wa (0x0aa43, 0x0aa43,), # Cham Consonant Sign Final Ng - (0x0aa4c, 0x0aa4c,), # Cham Consonant Sign Final M - (0x0aa7c, 0x0aa7c,), # Myanmar Sign Tai Laing Tone-2 + (0x0aa4c, 0x0aa4d,), # Cham Consonant Sign Fina..Cham Consonant Sign Fina + (0x0aa7b, 0x0aa7d,), # Myanmar Sign Pao Karen T..Myanmar Sign Tai Laing T (0x0aab0, 0x0aab0,), # Tai Viet Mai Kang (0x0aab2, 0x0aab4,), # Tai Viet Vowel I ..Tai Viet Vowel U (0x0aab7, 0x0aab8,), # Tai Viet Mai Khit ..Tai Viet Vowel Ia (0x0aabe, 0x0aabf,), # Tai Viet Vowel Am ..Tai Viet Tone Mai Ek (0x0aac1, 0x0aac1,), # Tai Viet Tone Mai Tho - (0x0aaec, 0x0aaed,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign - (0x0aaf6, 0x0aaf6,), # Meetei Mayek Virama - (0x0abe5, 0x0abe5,), # Meetei Mayek Vowel Sign Anap - (0x0abe8, 0x0abe8,), # Meetei Mayek Vowel Sign Unap - (0x0abed, 0x0abed,), # Meetei Mayek Apun Iyek + (0x0aaeb, 0x0aaef,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0x0aaf5, 0x0aaf6,), # Meetei Mayek Vowel Sign ..Meetei Mayek Virama + (0x0abe3, 0x0abea,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0x0abec, 0x0abed,), # Meetei Mayek Lum Iyek ..Meetei Mayek Apun Iyek (0x0fb1e, 0x0fb1e,), # Hebrew Point Judeo-spanish Varika (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo + (0x0feff, 0x0feff,), # Zero Width No-break Space + (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke (0x102e0, 0x102e0,), # Coptic Epact Thousands Mark (0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let @@ -2534,66 +2539,47 @@ ZERO_WIDTH = { (0x10a38, 0x10a3a,), # Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo (0x10a3f, 0x10a3f,), # Kharoshthi Virama (0x10ae5, 0x10ae6,), # Manichaean Abbreviation ..Manichaean Abbreviation - (0x11001, 0x11001,), # Brahmi Sign Anusvara + (0x11000, 0x11002,), # Brahmi Sign Candrabindu ..Brahmi Sign Visarga (0x11038, 0x11046,), # Brahmi Vowel Sign Aa ..Brahmi Virama - (0x1107f, 0x11081,), # Brahmi Number Joiner ..Kaithi Sign Anusvara - (0x110b3, 0x110b6,), # Kaithi Vowel Sign U ..Kaithi Vowel Sign Ai - (0x110b9, 0x110ba,), # Kaithi Sign Virama ..Kaithi Sign Nukta + (0x1107f, 0x11082,), # Brahmi Number Joiner ..Kaithi Sign Visarga + (0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta + (0x110bd, 0x110bd,), # Kaithi Number Sign (0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga - (0x11127, 0x1112b,), # Chakma Vowel Sign A ..Chakma Vowel Sign Uu - (0x1112d, 0x11134,), # Chakma Vowel Sign Ai ..Chakma Maayyaa + (0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa (0x11173, 0x11173,), # Mahajani Sign Nukta - (0x11180, 0x11181,), # Sharada Sign Candrabindu..Sharada Sign Anusvara - (0x111b6, 0x111be,), # Sharada Vowel Sign U ..Sharada Vowel Sign O + (0x11180, 0x11182,), # Sharada Sign Candrabindu..Sharada Sign Visarga + (0x111b3, 0x111c0,), # Sharada Vowel Sign Aa ..Sharada Sign Virama (0x111ca, 0x111cc,), # Sharada Sign Nukta ..Sharada Extra Short Vowe - (0x1122f, 0x11231,), # Khojki Vowel Sign U ..Khojki Vowel Sign Ai - (0x11234, 0x11234,), # Khojki Sign Anusvara - (0x11236, 0x11237,), # Khojki Sign Nukta ..Khojki Sign Shadda + (0x1122c, 0x11237,), # Khojki Vowel Sign Aa ..Khojki Sign Shadda (0x1123e, 0x1123e,), # Khojki Sign Sukun - (0x112df, 0x112df,), # Khudawadi Sign Anusvara - (0x112e3, 0x112ea,), # Khudawadi Vowel Sign U ..Khudawadi Sign Virama - (0x11300, 0x11301,), # Grantha Sign Combining A..Grantha Sign Candrabindu + (0x112df, 0x112ea,), # Khudawadi Sign Anusvara ..Khudawadi Sign Virama + (0x11300, 0x11303,), # Grantha Sign Combining A..Grantha Sign Visarga (0x1133c, 0x1133c,), # Grantha Sign Nukta - (0x11340, 0x11340,), # Grantha Vowel Sign Ii + (0x1133e, 0x11344,), # Grantha Vowel Sign Aa ..Grantha Vowel Sign Vocal + (0x11347, 0x11348,), # Grantha Vowel Sign Ee ..Grantha Vowel Sign Ai + (0x1134b, 0x1134d,), # Grantha Vowel Sign Oo ..Grantha Sign Virama + (0x11357, 0x11357,), # Grantha Au Length Mark + (0x11362, 0x11363,), # Grantha Vowel Sign Vocal..Grantha Vowel Sign Vocal (0x11366, 0x1136c,), # Combining Grantha Digit ..Combining Grantha Digit (0x11370, 0x11374,), # Combining Grantha Letter..Combining Grantha Letter - (0x11438, 0x1143f,), # Newa Vowel Sign U ..Newa Vowel Sign Ai - (0x11442, 0x11444,), # Newa Sign Virama ..Newa Sign Anusvara - (0x11446, 0x11446,), # Newa Sign Nukta - (0x114b3, 0x114b8,), # Tirhuta Vowel Sign U ..Tirhuta Vowel Sign Vocal - (0x114ba, 0x114ba,), # Tirhuta Vowel Sign Short E - (0x114bf, 0x114c0,), # Tirhuta Sign Candrabindu..Tirhuta Sign Anusvara - (0x114c2, 0x114c3,), # Tirhuta Sign Virama ..Tirhuta Sign Nukta - (0x115b2, 0x115b5,), # Siddham Vowel Sign U ..Siddham Vowel Sign Vocal - (0x115bc, 0x115bd,), # Siddham Sign Candrabindu..Siddham Sign Anusvara - (0x115bf, 0x115c0,), # Siddham Sign Virama ..Siddham Sign Nukta + (0x11435, 0x11446,), # Newa Vowel Sign Aa ..Newa Sign Nukta + (0x114b0, 0x114c3,), # Tirhuta Vowel Sign Aa ..Tirhuta Sign Nukta + (0x115af, 0x115b5,), # Siddham Vowel Sign Aa ..Siddham Vowel Sign Vocal + (0x115b8, 0x115c0,), # Siddham Vowel Sign E ..Siddham Sign Nukta (0x115dc, 0x115dd,), # Siddham Vowel Sign Alter..Siddham Vowel Sign Alter - (0x11633, 0x1163a,), # Modi Vowel Sign U ..Modi Vowel Sign Ai - (0x1163d, 0x1163d,), # Modi Sign Anusvara - (0x1163f, 0x11640,), # Modi Sign Virama ..Modi Sign Ardhacandra - (0x116ab, 0x116ab,), # Takri Sign Anusvara - (0x116ad, 0x116ad,), # Takri Vowel Sign Aa - (0x116b0, 0x116b5,), # Takri Vowel Sign U ..Takri Vowel Sign Au - (0x116b7, 0x116b7,), # Takri Sign Nukta - (0x1171d, 0x1171f,), # Ahom Consonant Sign Medi..Ahom Consonant Sign Medi - (0x11722, 0x11725,), # Ahom Vowel Sign I ..Ahom Vowel Sign Uu - (0x11727, 0x1172b,), # Ahom Vowel Sign Aw ..Ahom Sign Killer - (0x11a01, 0x11a06,), # Zanabazar Square Vowel S..Zanabazar Square Vowel S - (0x11a09, 0x11a0a,), # Zanabazar Square Vowel S..Zanabazar Square Vowel L - (0x11a33, 0x11a38,), # Zanabazar Square Final C..Zanabazar Square Sign An + (0x11630, 0x11640,), # Modi Vowel Sign Aa ..Modi Sign Ardhacandra + (0x116ab, 0x116b7,), # Takri Sign Anusvara ..Takri Sign Nukta + (0x1171d, 0x1172b,), # Ahom Consonant Sign Medi..Ahom Sign Killer + (0x11a01, 0x11a0a,), # Zanabazar Square Vowel S..Zanabazar Square Vowel L + (0x11a33, 0x11a39,), # Zanabazar Square Final C..Zanabazar Square Sign Vi (0x11a3b, 0x11a3e,), # Zanabazar Square Cluster..Zanabazar Square Cluster (0x11a47, 0x11a47,), # Zanabazar Square Subjoiner - (0x11a51, 0x11a56,), # Soyombo Vowel Sign I ..Soyombo Vowel Sign Oe - (0x11a59, 0x11a5b,), # Soyombo Vowel Sign Vocal..Soyombo Vowel Length Mar - (0x11a8a, 0x11a96,), # Soyombo Final Consonant ..Soyombo Sign Anusvara - (0x11a98, 0x11a99,), # Soyombo Gemination Mark ..Soyombo Subjoiner - (0x11c30, 0x11c36,), # Bhaiksuki Vowel Sign I ..Bhaiksuki Vowel Sign Voc - (0x11c38, 0x11c3d,), # Bhaiksuki Vowel Sign E ..Bhaiksuki Sign Anusvara - (0x11c3f, 0x11c3f,), # Bhaiksuki Sign Virama + (0x11a51, 0x11a5b,), # Soyombo Vowel Sign I ..Soyombo Vowel Length Mar + (0x11a8a, 0x11a99,), # Soyombo Final Consonant ..Soyombo Subjoiner + (0x11c2f, 0x11c36,), # Bhaiksuki Vowel Sign Aa ..Bhaiksuki Vowel Sign Voc + (0x11c38, 0x11c3f,), # Bhaiksuki Vowel Sign E ..Bhaiksuki Sign Virama (0x11c92, 0x11ca7,), # Marchen Subjoined Letter..Marchen Subjoined Letter - (0x11caa, 0x11cb0,), # Marchen Subjoined Letter..Marchen Vowel Sign Aa - (0x11cb2, 0x11cb3,), # Marchen Vowel Sign U ..Marchen Vowel Sign E - (0x11cb5, 0x11cb6,), # Marchen Sign Anusvara ..Marchen Sign Candrabindu + (0x11ca9, 0x11cb6,), # Marchen Subjoined Letter..Marchen Sign Candrabindu (0x11d31, 0x11d36,), # Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign (0x11d3a, 0x11d3a,), # Masaram Gondi Vowel Sign E (0x11d3c, 0x11d3d,), # Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign @@ -2601,10 +2587,12 @@ ZERO_WIDTH = { (0x11d47, 0x11d47,), # Masaram Gondi Ra-kara (0x16af0, 0x16af4,), # Bassa Vah Combining High..Bassa Vah Combining High (0x16b30, 0x16b36,), # Pahawh Hmong Mark Cim Tu..Pahawh Hmong Mark Cim Ta + (0x16f51, 0x16f7e,), # Miao Sign Aspiration ..Miao Vowel Sign Ng (0x16f8f, 0x16f92,), # Miao Tone Right ..Miao Tone Below (0x1bc9d, 0x1bc9e,), # Duployan Thick Letter Se..Duployan Double Mark - (0x1d167, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining - (0x1d17b, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining + (0x1bca0, 0x1bca3,), # Shorthand Format Letter ..Shorthand Format Up Step + (0x1d165, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d16d, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical @@ -2621,12 +2609,17 @@ ZERO_WIDTH = { (0x1e026, 0x1e02a,), # Combining Glagolitic Let..Combining Glagolitic Let (0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining (0x1e944, 0x1e94a,), # Adlam Alif Lengthener ..Adlam Nukta + (0x1f3fb, 0x1f3ff,), # Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri + (0xe0001, 0xe0001,), # Language Tag + (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 ), '11.0.0': ( # Source: DerivedGeneralCategory-11.0.0.txt # Date: 2018-02-21, 05:34:04 GMT # + (0x00000, 0x00000,), # (nil) + (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg @@ -2634,13 +2627,16 @@ ZERO_WIDTH = { (0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot (0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot (0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan + (0x00600, 0x00605,), # Arabic Number Sign ..Arabic Number Mark Above (0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra + (0x0061c, 0x0061c,), # Arabic Letter Mark (0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below (0x00670, 0x00670,), # Arabic Letter Superscript Alef - (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen + (0x006d6, 0x006dd,), # Arabic Small High Ligatu..Arabic End Of Ayah (0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda (0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon (0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem + (0x0070f, 0x0070f,), # Syriac Abbreviation Mark (0x00711, 0x00711,), # Syriac Letter Superscript Alaph (0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh (0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun @@ -2651,66 +2647,72 @@ ZERO_WIDTH = { (0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U (0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa (0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark - (0x008d3, 0x008e1,), # Arabic Small Low Waw ..Arabic Small High Sign S - (0x008e3, 0x00902,), # Arabic Turned Damma Belo..Devanagari Sign Anusvara - (0x0093a, 0x0093a,), # Devanagari Vowel Sign Oe - (0x0093c, 0x0093c,), # Devanagari Sign Nukta - (0x00941, 0x00948,), # Devanagari Vowel Sign U ..Devanagari Vowel Sign Ai - (0x0094d, 0x0094d,), # Devanagari Sign Virama + (0x008d3, 0x00903,), # Arabic Small Low Waw ..Devanagari Sign Visarga + (0x0093a, 0x0093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta + (0x0093e, 0x0094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw (0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu (0x00962, 0x00963,), # Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo - (0x00981, 0x00981,), # Bengali Sign Candrabindu + (0x00981, 0x00983,), # Bengali Sign Candrabindu..Bengali Sign Visarga (0x009bc, 0x009bc,), # Bengali Sign Nukta - (0x009c1, 0x009c4,), # Bengali Vowel Sign U ..Bengali Vowel Sign Vocal - (0x009cd, 0x009cd,), # Bengali Sign Virama + (0x009be, 0x009c4,), # Bengali Vowel Sign Aa ..Bengali Vowel Sign Vocal + (0x009c7, 0x009c8,), # Bengali Vowel Sign E ..Bengali Vowel Sign Ai + (0x009cb, 0x009cd,), # Bengali Vowel Sign O ..Bengali Sign Virama + (0x009d7, 0x009d7,), # Bengali Au Length Mark (0x009e2, 0x009e3,), # Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal (0x009fe, 0x009fe,), # Bengali Sandhi Mark - (0x00a01, 0x00a02,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Bindi + (0x00a01, 0x00a03,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Visarga (0x00a3c, 0x00a3c,), # Gurmukhi Sign Nukta - (0x00a41, 0x00a42,), # Gurmukhi Vowel Sign U ..Gurmukhi Vowel Sign Uu + (0x00a3e, 0x00a42,), # Gurmukhi Vowel Sign Aa ..Gurmukhi Vowel Sign Uu (0x00a47, 0x00a48,), # Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai (0x00a4b, 0x00a4d,), # Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama (0x00a51, 0x00a51,), # Gurmukhi Sign Udaat (0x00a70, 0x00a71,), # Gurmukhi Tippi ..Gurmukhi Addak (0x00a75, 0x00a75,), # Gurmukhi Sign Yakash - (0x00a81, 0x00a82,), # Gujarati Sign Candrabind..Gujarati Sign Anusvara + (0x00a81, 0x00a83,), # Gujarati Sign Candrabind..Gujarati Sign Visarga (0x00abc, 0x00abc,), # Gujarati Sign Nukta - (0x00ac1, 0x00ac5,), # Gujarati Vowel Sign U ..Gujarati Vowel Sign Cand - (0x00ac7, 0x00ac8,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Ai - (0x00acd, 0x00acd,), # Gujarati Sign Virama + (0x00abe, 0x00ac5,), # Gujarati Vowel Sign Aa ..Gujarati Vowel Sign Cand + (0x00ac7, 0x00ac9,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Cand + (0x00acb, 0x00acd,), # Gujarati Vowel Sign O ..Gujarati Sign Virama (0x00ae2, 0x00ae3,), # Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca (0x00afa, 0x00aff,), # Gujarati Sign Sukun ..Gujarati Sign Two-circle - (0x00b01, 0x00b01,), # Oriya Sign Candrabindu + (0x00b01, 0x00b03,), # Oriya Sign Candrabindu ..Oriya Sign Visarga (0x00b3c, 0x00b3c,), # Oriya Sign Nukta - (0x00b3f, 0x00b3f,), # Oriya Vowel Sign I - (0x00b41, 0x00b44,), # Oriya Vowel Sign U ..Oriya Vowel Sign Vocalic - (0x00b4d, 0x00b4d,), # Oriya Sign Virama - (0x00b56, 0x00b56,), # Oriya Ai Length Mark + (0x00b3e, 0x00b44,), # Oriya Vowel Sign Aa ..Oriya Vowel Sign Vocalic + (0x00b47, 0x00b48,), # Oriya Vowel Sign E ..Oriya Vowel Sign Ai + (0x00b4b, 0x00b4d,), # Oriya Vowel Sign O ..Oriya Sign Virama + (0x00b56, 0x00b57,), # Oriya Ai Length Mark ..Oriya Au Length Mark (0x00b62, 0x00b63,), # Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic (0x00b82, 0x00b82,), # Tamil Sign Anusvara - (0x00bc0, 0x00bc0,), # Tamil Vowel Sign Ii - (0x00bcd, 0x00bcd,), # Tamil Sign Virama - (0x00c00, 0x00c00,), # Telugu Sign Combining Candrabindu Above - (0x00c04, 0x00c04,), # Telugu Sign Combining Anusvara Above - (0x00c3e, 0x00c40,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Ii + (0x00bbe, 0x00bc2,), # Tamil Vowel Sign Aa ..Tamil Vowel Sign Uu + (0x00bc6, 0x00bc8,), # Tamil Vowel Sign E ..Tamil Vowel Sign Ai + (0x00bca, 0x00bcd,), # Tamil Vowel Sign O ..Tamil Sign Virama + (0x00bd7, 0x00bd7,), # Tamil Au Length Mark + (0x00c00, 0x00c04,), # Telugu Sign Combining Ca..Telugu Sign Combining An + (0x00c3e, 0x00c44,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Vocali (0x00c46, 0x00c48,), # Telugu Vowel Sign E ..Telugu Vowel Sign Ai (0x00c4a, 0x00c4d,), # Telugu Vowel Sign O ..Telugu Sign Virama (0x00c55, 0x00c56,), # Telugu Length Mark ..Telugu Ai Length Mark (0x00c62, 0x00c63,), # Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali - (0x00c81, 0x00c81,), # Kannada Sign Candrabindu + (0x00c81, 0x00c83,), # Kannada Sign Candrabindu..Kannada Sign Visarga (0x00cbc, 0x00cbc,), # Kannada Sign Nukta - (0x00cbf, 0x00cbf,), # Kannada Vowel Sign I - (0x00cc6, 0x00cc6,), # Kannada Vowel Sign E - (0x00ccc, 0x00ccd,), # Kannada Vowel Sign Au ..Kannada Sign Virama + (0x00cbe, 0x00cc4,), # Kannada Vowel Sign Aa ..Kannada Vowel Sign Vocal + (0x00cc6, 0x00cc8,), # Kannada Vowel Sign E ..Kannada Vowel Sign Ai + (0x00cca, 0x00ccd,), # Kannada Vowel Sign O ..Kannada Sign Virama + (0x00cd5, 0x00cd6,), # Kannada Length Mark ..Kannada Ai Length Mark (0x00ce2, 0x00ce3,), # Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal - (0x00d00, 0x00d01,), # Malayalam Sign Combining..Malayalam Sign Candrabin + (0x00d00, 0x00d03,), # Malayalam Sign Combining..Malayalam Sign Visarga (0x00d3b, 0x00d3c,), # Malayalam Sign Vertical ..Malayalam Sign Circular - (0x00d41, 0x00d44,), # Malayalam Vowel Sign U ..Malayalam Vowel Sign Voc - (0x00d4d, 0x00d4d,), # Malayalam Sign Virama + (0x00d3e, 0x00d44,), # Malayalam Vowel Sign Aa ..Malayalam Vowel Sign Voc + (0x00d46, 0x00d48,), # Malayalam Vowel Sign E ..Malayalam Vowel Sign Ai + (0x00d4a, 0x00d4d,), # Malayalam Vowel Sign O ..Malayalam Sign Virama + (0x00d57, 0x00d57,), # Malayalam Au Length Mark (0x00d62, 0x00d63,), # Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc + (0x00d82, 0x00d83,), # Sinhala Sign Anusvaraya ..Sinhala Sign Visargaya (0x00dca, 0x00dca,), # Sinhala Sign Al-lakuna - (0x00dd2, 0x00dd4,), # Sinhala Vowel Sign Ketti..Sinhala Vowel Sign Ketti + (0x00dcf, 0x00dd4,), # Sinhala Vowel Sign Aela-..Sinhala Vowel Sign Ketti (0x00dd6, 0x00dd6,), # Sinhala Vowel Sign Diga Paa-pilla + (0x00dd8, 0x00ddf,), # Sinhala Vowel Sign Gaett..Sinhala Vowel Sign Gayan + (0x00df2, 0x00df3,), # Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga (0x00e31, 0x00e31,), # Thai Character Mai Han-akat (0x00e34, 0x00e3a,), # Thai Character Sara I ..Thai Character Phinthu (0x00e47, 0x00e4e,), # Thai Character Maitaikhu..Thai Character Yamakkan @@ -2722,79 +2724,61 @@ ZERO_WIDTH = { (0x00f35, 0x00f35,), # Tibetan Mark Ngas Bzung Nyi Zla (0x00f37, 0x00f37,), # Tibetan Mark Ngas Bzung Sgor Rtags (0x00f39, 0x00f39,), # Tibetan Mark Tsa -phru - (0x00f71, 0x00f7e,), # Tibetan Vowel Sign Aa ..Tibetan Sign Rjes Su Nga - (0x00f80, 0x00f84,), # Tibetan Vowel Sign Rever..Tibetan Mark Halanta + (0x00f3e, 0x00f3f,), # Tibetan Sign Yar Tshes ..Tibetan Sign Mar Tshes + (0x00f71, 0x00f84,), # Tibetan Vowel Sign Aa ..Tibetan Mark Halanta (0x00f86, 0x00f87,), # Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags (0x00f8d, 0x00f97,), # Tibetan Subjoined Sign L..Tibetan Subjoined Letter (0x00f99, 0x00fbc,), # Tibetan Subjoined Letter..Tibetan Subjoined Letter (0x00fc6, 0x00fc6,), # Tibetan Symbol Padma Gdan - (0x0102d, 0x01030,), # Myanmar Vowel Sign I ..Myanmar Vowel Sign Uu - (0x01032, 0x01037,), # Myanmar Vowel Sign Ai ..Myanmar Sign Dot Below - (0x01039, 0x0103a,), # Myanmar Sign Virama ..Myanmar Sign Asat - (0x0103d, 0x0103e,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M - (0x01058, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal + (0x0102b, 0x0103e,), # Myanmar Vowel Sign Tall ..Myanmar Consonant Sign M + (0x01056, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal (0x0105e, 0x01060,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M + (0x01062, 0x01064,), # Myanmar Vowel Sign Sgaw ..Myanmar Tone Mark Sgaw K + (0x01067, 0x0106d,), # Myanmar Vowel Sign Weste..Myanmar Sign Western Pwo (0x01071, 0x01074,), # Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah - (0x01082, 0x01082,), # Myanmar Consonant Sign Shan Medial Wa - (0x01085, 0x01086,), # Myanmar Vowel Sign Shan ..Myanmar Vowel Sign Shan - (0x0108d, 0x0108d,), # Myanmar Sign Shan Council Emphatic Tone - (0x0109d, 0x0109d,), # Myanmar Vowel Sign Aiton Ai + (0x01082, 0x0108d,), # Myanmar Consonant Sign S..Myanmar Sign Shan Counci + (0x0108f, 0x0108f,), # Myanmar Sign Rumai Palaung Tone-5 + (0x0109a, 0x0109d,), # Myanmar Sign Khamti Tone..Myanmar Vowel Sign Aiton (0x0135d, 0x0135f,), # Ethiopic Combining Gemin..Ethiopic Combining Gemin (0x01712, 0x01714,), # Tagalog Vowel Sign I ..Tagalog Sign Virama (0x01732, 0x01734,), # Hanunoo Vowel Sign I ..Hanunoo Sign Pamudpod (0x01752, 0x01753,), # Buhid Vowel Sign I ..Buhid Vowel Sign U (0x01772, 0x01773,), # Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U - (0x017b4, 0x017b5,), # Khmer Vowel Inherent Aq ..Khmer Vowel Inherent Aa - (0x017b7, 0x017bd,), # Khmer Vowel Sign I ..Khmer Vowel Sign Ua - (0x017c6, 0x017c6,), # Khmer Sign Nikahit - (0x017c9, 0x017d3,), # Khmer Sign Muusikatoan ..Khmer Sign Bathamasat + (0x017b4, 0x017d3,), # Khmer Vowel Inherent Aq ..Khmer Sign Bathamasat (0x017dd, 0x017dd,), # Khmer Sign Atthacan - (0x0180b, 0x0180d,), # Mongolian Free Variation..Mongolian Free Variation + (0x0180b, 0x0180e,), # Mongolian Free Variation..Mongolian Vowel Separato (0x01885, 0x01886,), # Mongolian Letter Ali Gal..Mongolian Letter Ali Gal (0x018a9, 0x018a9,), # Mongolian Letter Ali Gali Dagalga - (0x01920, 0x01922,), # Limbu Vowel Sign A ..Limbu Vowel Sign U - (0x01927, 0x01928,), # Limbu Vowel Sign E ..Limbu Vowel Sign O - (0x01932, 0x01932,), # Limbu Small Letter Anusvara - (0x01939, 0x0193b,), # Limbu Sign Mukphreng ..Limbu Sign Sa-i - (0x01a17, 0x01a18,), # Buginese Vowel Sign I ..Buginese Vowel Sign U - (0x01a1b, 0x01a1b,), # Buginese Vowel Sign Ae - (0x01a56, 0x01a56,), # Tai Tham Consonant Sign Medial La - (0x01a58, 0x01a5e,), # Tai Tham Sign Mai Kang L..Tai Tham Consonant Sign - (0x01a60, 0x01a60,), # Tai Tham Sign Sakot - (0x01a62, 0x01a62,), # Tai Tham Vowel Sign Mai Sat - (0x01a65, 0x01a6c,), # Tai Tham Vowel Sign I ..Tai Tham Vowel Sign Oa B - (0x01a73, 0x01a7c,), # Tai Tham Vowel Sign Oa A..Tai Tham Sign Khuen-lue + (0x01920, 0x0192b,), # Limbu Vowel Sign A ..Limbu Subjoined Letter W + (0x01930, 0x0193b,), # Limbu Small Letter Ka ..Limbu Sign Sa-i + (0x01a17, 0x01a1b,), # Buginese Vowel Sign I ..Buginese Vowel Sign Ae + (0x01a55, 0x01a5e,), # Tai Tham Consonant Sign ..Tai Tham Consonant Sign + (0x01a60, 0x01a7c,), # Tai Tham Sign Sakot ..Tai Tham Sign Khuen-lue (0x01a7f, 0x01a7f,), # Tai Tham Combining Cryptogrammic Dot (0x01ab0, 0x01abe,), # Combining Doubled Circum..Combining Parentheses Ov - (0x01b00, 0x01b03,), # Balinese Sign Ulu Ricem ..Balinese Sign Surang - (0x01b34, 0x01b34,), # Balinese Sign Rerekan - (0x01b36, 0x01b3a,), # Balinese Vowel Sign Ulu ..Balinese Vowel Sign Ra R - (0x01b3c, 0x01b3c,), # Balinese Vowel Sign La Lenga - (0x01b42, 0x01b42,), # Balinese Vowel Sign Pepet + (0x01b00, 0x01b04,), # Balinese Sign Ulu Ricem ..Balinese Sign Bisah + (0x01b34, 0x01b44,), # Balinese Sign Rerekan ..Balinese Adeg Adeg (0x01b6b, 0x01b73,), # Balinese Musical Symbol ..Balinese Musical Symbol - (0x01b80, 0x01b81,), # Sundanese Sign Panyecek ..Sundanese Sign Panglayar - (0x01ba2, 0x01ba5,), # Sundanese Consonant Sign..Sundanese Vowel Sign Pan - (0x01ba8, 0x01ba9,), # Sundanese Vowel Sign Pam..Sundanese Vowel Sign Pan - (0x01bab, 0x01bad,), # Sundanese Sign Virama ..Sundanese Consonant Sign - (0x01be6, 0x01be6,), # Batak Sign Tompi - (0x01be8, 0x01be9,), # Batak Vowel Sign Pakpak ..Batak Vowel Sign Ee - (0x01bed, 0x01bed,), # Batak Vowel Sign Karo O - (0x01bef, 0x01bf1,), # Batak Vowel Sign U For S..Batak Consonant Sign H - (0x01c2c, 0x01c33,), # Lepcha Vowel Sign E ..Lepcha Consonant Sign T - (0x01c36, 0x01c37,), # Lepcha Sign Ran ..Lepcha Sign Nukta + (0x01b80, 0x01b82,), # Sundanese Sign Panyecek ..Sundanese Sign Pangwisad + (0x01ba1, 0x01bad,), # Sundanese Consonant Sign..Sundanese Consonant Sign + (0x01be6, 0x01bf3,), # Batak Sign Tompi ..Batak Panongonan + (0x01c24, 0x01c37,), # Lepcha Subjoined Letter ..Lepcha Sign Nukta (0x01cd0, 0x01cd2,), # Vedic Tone Karshana ..Vedic Tone Prenkha - (0x01cd4, 0x01ce0,), # Vedic Sign Yajurvedic Mi..Vedic Tone Rigvedic Kash - (0x01ce2, 0x01ce8,), # Vedic Sign Visarga Svari..Vedic Sign Visarga Anuda + (0x01cd4, 0x01ce8,), # Vedic Sign Yajurvedic Mi..Vedic Sign Visarga Anuda (0x01ced, 0x01ced,), # Vedic Sign Tiryak - (0x01cf4, 0x01cf4,), # Vedic Tone Candra Above - (0x01cf8, 0x01cf9,), # Vedic Tone Ring Above ..Vedic Tone Double Ring A + (0x01cf2, 0x01cf4,), # Vedic Sign Ardhavisarga ..Vedic Tone Candra Above + (0x01cf7, 0x01cf9,), # Vedic Sign Atikrama ..Vedic Tone Double Ring A (0x01dc0, 0x01df9,), # Combining Dotted Grave A..Combining Wide Inverted (0x01dfb, 0x01dff,), # Combining Deletion Mark ..Combining Right Arrowhea + (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark + (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override + (0x02060, 0x02064,), # Word Joiner ..Invisible Plus + (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above (0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu (0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette - (0x0302a, 0x0302d,), # Ideographic Level Tone M..Ideographic Entering Ton + (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag (0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous (0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer @@ -2803,36 +2787,34 @@ ZERO_WIDTH = { (0x0a802, 0x0a802,), # Syloti Nagri Sign Dvisvara (0x0a806, 0x0a806,), # Syloti Nagri Sign Hasanta (0x0a80b, 0x0a80b,), # Syloti Nagri Sign Anusvara - (0x0a825, 0x0a826,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign - (0x0a8c4, 0x0a8c5,), # Saurashtra Sign Virama ..Saurashtra Sign Candrabi + (0x0a823, 0x0a827,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign + (0x0a880, 0x0a881,), # Saurashtra Sign Anusvara..Saurashtra Sign Visarga + (0x0a8b4, 0x0a8c5,), # Saurashtra Consonant Sig..Saurashtra Sign Candrabi (0x0a8e0, 0x0a8f1,), # Combining Devanagari Dig..Combining Devanagari Sig (0x0a8ff, 0x0a8ff,), # Devanagari Vowel Sign Ay (0x0a926, 0x0a92d,), # Kayah Li Vowel Ue ..Kayah Li Tone Calya Plop - (0x0a947, 0x0a951,), # Rejang Vowel Sign I ..Rejang Consonant Sign R - (0x0a980, 0x0a982,), # Javanese Sign Panyangga ..Javanese Sign Layar - (0x0a9b3, 0x0a9b3,), # Javanese Sign Cecak Telu - (0x0a9b6, 0x0a9b9,), # Javanese Vowel Sign Wulu..Javanese Vowel Sign Suku - (0x0a9bc, 0x0a9bc,), # Javanese Vowel Sign Pepet + (0x0a947, 0x0a953,), # Rejang Vowel Sign I ..Rejang Virama + (0x0a980, 0x0a983,), # Javanese Sign Panyangga ..Javanese Sign Wignyan + (0x0a9b3, 0x0a9c0,), # Javanese Sign Cecak Telu..Javanese Pangkon (0x0a9e5, 0x0a9e5,), # Myanmar Sign Shan Saw - (0x0aa29, 0x0aa2e,), # Cham Vowel Sign Aa ..Cham Vowel Sign Oe - (0x0aa31, 0x0aa32,), # Cham Vowel Sign Au ..Cham Vowel Sign Ue - (0x0aa35, 0x0aa36,), # Cham Consonant Sign La ..Cham Consonant Sign Wa + (0x0aa29, 0x0aa36,), # Cham Vowel Sign Aa ..Cham Consonant Sign Wa (0x0aa43, 0x0aa43,), # Cham Consonant Sign Final Ng - (0x0aa4c, 0x0aa4c,), # Cham Consonant Sign Final M - (0x0aa7c, 0x0aa7c,), # Myanmar Sign Tai Laing Tone-2 + (0x0aa4c, 0x0aa4d,), # Cham Consonant Sign Fina..Cham Consonant Sign Fina + (0x0aa7b, 0x0aa7d,), # Myanmar Sign Pao Karen T..Myanmar Sign Tai Laing T (0x0aab0, 0x0aab0,), # Tai Viet Mai Kang (0x0aab2, 0x0aab4,), # Tai Viet Vowel I ..Tai Viet Vowel U (0x0aab7, 0x0aab8,), # Tai Viet Mai Khit ..Tai Viet Vowel Ia (0x0aabe, 0x0aabf,), # Tai Viet Vowel Am ..Tai Viet Tone Mai Ek (0x0aac1, 0x0aac1,), # Tai Viet Tone Mai Tho - (0x0aaec, 0x0aaed,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign - (0x0aaf6, 0x0aaf6,), # Meetei Mayek Virama - (0x0abe5, 0x0abe5,), # Meetei Mayek Vowel Sign Anap - (0x0abe8, 0x0abe8,), # Meetei Mayek Vowel Sign Unap - (0x0abed, 0x0abed,), # Meetei Mayek Apun Iyek + (0x0aaeb, 0x0aaef,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0x0aaf5, 0x0aaf6,), # Meetei Mayek Vowel Sign ..Meetei Mayek Virama + (0x0abe3, 0x0abea,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0x0abec, 0x0abed,), # Meetei Mayek Lum Iyek ..Meetei Mayek Apun Iyek (0x0fb1e, 0x0fb1e,), # Hebrew Point Judeo-spanish Varika (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo + (0x0feff, 0x0feff,), # Zero Width No-break Space + (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke (0x102e0, 0x102e0,), # Coptic Epact Thousands Mark (0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let @@ -2844,83 +2826,68 @@ ZERO_WIDTH = { (0x10ae5, 0x10ae6,), # Manichaean Abbreviation ..Manichaean Abbreviation (0x10d24, 0x10d27,), # Hanifi Rohingya Sign Har..Hanifi Rohingya Sign Tas (0x10f46, 0x10f50,), # Sogdian Combining Dot Be..Sogdian Combining Stroke - (0x11001, 0x11001,), # Brahmi Sign Anusvara + (0x11000, 0x11002,), # Brahmi Sign Candrabindu ..Brahmi Sign Visarga (0x11038, 0x11046,), # Brahmi Vowel Sign Aa ..Brahmi Virama - (0x1107f, 0x11081,), # Brahmi Number Joiner ..Kaithi Sign Anusvara - (0x110b3, 0x110b6,), # Kaithi Vowel Sign U ..Kaithi Vowel Sign Ai - (0x110b9, 0x110ba,), # Kaithi Sign Virama ..Kaithi Sign Nukta + (0x1107f, 0x11082,), # Brahmi Number Joiner ..Kaithi Sign Visarga + (0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta + (0x110bd, 0x110bd,), # Kaithi Number Sign + (0x110cd, 0x110cd,), # Kaithi Number Sign Above (0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga - (0x11127, 0x1112b,), # Chakma Vowel Sign A ..Chakma Vowel Sign Uu - (0x1112d, 0x11134,), # Chakma Vowel Sign Ai ..Chakma Maayyaa + (0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa + (0x11145, 0x11146,), # Chakma Vowel Sign Aa ..Chakma Vowel Sign Ei (0x11173, 0x11173,), # Mahajani Sign Nukta - (0x11180, 0x11181,), # Sharada Sign Candrabindu..Sharada Sign Anusvara - (0x111b6, 0x111be,), # Sharada Vowel Sign U ..Sharada Vowel Sign O + (0x11180, 0x11182,), # Sharada Sign Candrabindu..Sharada Sign Visarga + (0x111b3, 0x111c0,), # Sharada Vowel Sign Aa ..Sharada Sign Virama (0x111c9, 0x111cc,), # Sharada Sandhi Mark ..Sharada Extra Short Vowe - (0x1122f, 0x11231,), # Khojki Vowel Sign U ..Khojki Vowel Sign Ai - (0x11234, 0x11234,), # Khojki Sign Anusvara - (0x11236, 0x11237,), # Khojki Sign Nukta ..Khojki Sign Shadda + (0x1122c, 0x11237,), # Khojki Vowel Sign Aa ..Khojki Sign Shadda (0x1123e, 0x1123e,), # Khojki Sign Sukun - (0x112df, 0x112df,), # Khudawadi Sign Anusvara - (0x112e3, 0x112ea,), # Khudawadi Vowel Sign U ..Khudawadi Sign Virama - (0x11300, 0x11301,), # Grantha Sign Combining A..Grantha Sign Candrabindu + (0x112df, 0x112ea,), # Khudawadi Sign Anusvara ..Khudawadi Sign Virama + (0x11300, 0x11303,), # Grantha Sign Combining A..Grantha Sign Visarga (0x1133b, 0x1133c,), # Combining Bindu Below ..Grantha Sign Nukta - (0x11340, 0x11340,), # Grantha Vowel Sign Ii + (0x1133e, 0x11344,), # Grantha Vowel Sign Aa ..Grantha Vowel Sign Vocal + (0x11347, 0x11348,), # Grantha Vowel Sign Ee ..Grantha Vowel Sign Ai + (0x1134b, 0x1134d,), # Grantha Vowel Sign Oo ..Grantha Sign Virama + (0x11357, 0x11357,), # Grantha Au Length Mark + (0x11362, 0x11363,), # Grantha Vowel Sign Vocal..Grantha Vowel Sign Vocal (0x11366, 0x1136c,), # Combining Grantha Digit ..Combining Grantha Digit (0x11370, 0x11374,), # Combining Grantha Letter..Combining Grantha Letter - (0x11438, 0x1143f,), # Newa Vowel Sign U ..Newa Vowel Sign Ai - (0x11442, 0x11444,), # Newa Sign Virama ..Newa Sign Anusvara - (0x11446, 0x11446,), # Newa Sign Nukta + (0x11435, 0x11446,), # Newa Vowel Sign Aa ..Newa Sign Nukta (0x1145e, 0x1145e,), # Newa Sandhi Mark - (0x114b3, 0x114b8,), # Tirhuta Vowel Sign U ..Tirhuta Vowel Sign Vocal - (0x114ba, 0x114ba,), # Tirhuta Vowel Sign Short E - (0x114bf, 0x114c0,), # Tirhuta Sign Candrabindu..Tirhuta Sign Anusvara - (0x114c2, 0x114c3,), # Tirhuta Sign Virama ..Tirhuta Sign Nukta - (0x115b2, 0x115b5,), # Siddham Vowel Sign U ..Siddham Vowel Sign Vocal - (0x115bc, 0x115bd,), # Siddham Sign Candrabindu..Siddham Sign Anusvara - (0x115bf, 0x115c0,), # Siddham Sign Virama ..Siddham Sign Nukta + (0x114b0, 0x114c3,), # Tirhuta Vowel Sign Aa ..Tirhuta Sign Nukta + (0x115af, 0x115b5,), # Siddham Vowel Sign Aa ..Siddham Vowel Sign Vocal + (0x115b8, 0x115c0,), # Siddham Vowel Sign E ..Siddham Sign Nukta (0x115dc, 0x115dd,), # Siddham Vowel Sign Alter..Siddham Vowel Sign Alter - (0x11633, 0x1163a,), # Modi Vowel Sign U ..Modi Vowel Sign Ai - (0x1163d, 0x1163d,), # Modi Sign Anusvara - (0x1163f, 0x11640,), # Modi Sign Virama ..Modi Sign Ardhacandra - (0x116ab, 0x116ab,), # Takri Sign Anusvara - (0x116ad, 0x116ad,), # Takri Vowel Sign Aa - (0x116b0, 0x116b5,), # Takri Vowel Sign U ..Takri Vowel Sign Au - (0x116b7, 0x116b7,), # Takri Sign Nukta - (0x1171d, 0x1171f,), # Ahom Consonant Sign Medi..Ahom Consonant Sign Medi - (0x11722, 0x11725,), # Ahom Vowel Sign I ..Ahom Vowel Sign Uu - (0x11727, 0x1172b,), # Ahom Vowel Sign Aw ..Ahom Sign Killer - (0x1182f, 0x11837,), # Dogra Vowel Sign U ..Dogra Sign Anusvara - (0x11839, 0x1183a,), # Dogra Sign Virama ..Dogra Sign Nukta + (0x11630, 0x11640,), # Modi Vowel Sign Aa ..Modi Sign Ardhacandra + (0x116ab, 0x116b7,), # Takri Sign Anusvara ..Takri Sign Nukta + (0x1171d, 0x1172b,), # Ahom Consonant Sign Medi..Ahom Sign Killer + (0x1182c, 0x1183a,), # Dogra Vowel Sign Aa ..Dogra Sign Nukta (0x11a01, 0x11a0a,), # Zanabazar Square Vowel S..Zanabazar Square Vowel L - (0x11a33, 0x11a38,), # Zanabazar Square Final C..Zanabazar Square Sign An + (0x11a33, 0x11a39,), # Zanabazar Square Final C..Zanabazar Square Sign Vi (0x11a3b, 0x11a3e,), # Zanabazar Square Cluster..Zanabazar Square Cluster (0x11a47, 0x11a47,), # Zanabazar Square Subjoiner - (0x11a51, 0x11a56,), # Soyombo Vowel Sign I ..Soyombo Vowel Sign Oe - (0x11a59, 0x11a5b,), # Soyombo Vowel Sign Vocal..Soyombo Vowel Length Mar - (0x11a8a, 0x11a96,), # Soyombo Final Consonant ..Soyombo Sign Anusvara - (0x11a98, 0x11a99,), # Soyombo Gemination Mark ..Soyombo Subjoiner - (0x11c30, 0x11c36,), # Bhaiksuki Vowel Sign I ..Bhaiksuki Vowel Sign Voc - (0x11c38, 0x11c3d,), # Bhaiksuki Vowel Sign E ..Bhaiksuki Sign Anusvara - (0x11c3f, 0x11c3f,), # Bhaiksuki Sign Virama + (0x11a51, 0x11a5b,), # Soyombo Vowel Sign I ..Soyombo Vowel Length Mar + (0x11a8a, 0x11a99,), # Soyombo Final Consonant ..Soyombo Subjoiner + (0x11c2f, 0x11c36,), # Bhaiksuki Vowel Sign Aa ..Bhaiksuki Vowel Sign Voc + (0x11c38, 0x11c3f,), # Bhaiksuki Vowel Sign E ..Bhaiksuki Sign Virama (0x11c92, 0x11ca7,), # Marchen Subjoined Letter..Marchen Subjoined Letter - (0x11caa, 0x11cb0,), # Marchen Subjoined Letter..Marchen Vowel Sign Aa - (0x11cb2, 0x11cb3,), # Marchen Vowel Sign U ..Marchen Vowel Sign E - (0x11cb5, 0x11cb6,), # Marchen Sign Anusvara ..Marchen Sign Candrabindu + (0x11ca9, 0x11cb6,), # Marchen Subjoined Letter..Marchen Sign Candrabindu (0x11d31, 0x11d36,), # Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign (0x11d3a, 0x11d3a,), # Masaram Gondi Vowel Sign E (0x11d3c, 0x11d3d,), # Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign (0x11d3f, 0x11d45,), # Masaram Gondi Vowel Sign..Masaram Gondi Virama (0x11d47, 0x11d47,), # Masaram Gondi Ra-kara + (0x11d8a, 0x11d8e,), # Gunjala Gondi Vowel Sign..Gunjala Gondi Vowel Sign (0x11d90, 0x11d91,), # Gunjala Gondi Vowel Sign..Gunjala Gondi Vowel Sign - (0x11d95, 0x11d95,), # Gunjala Gondi Sign Anusvara - (0x11d97, 0x11d97,), # Gunjala Gondi Virama - (0x11ef3, 0x11ef4,), # Makasar Vowel Sign I ..Makasar Vowel Sign U + (0x11d93, 0x11d97,), # Gunjala Gondi Vowel Sign..Gunjala Gondi Virama + (0x11ef3, 0x11ef6,), # Makasar Vowel Sign I ..Makasar Vowel Sign O (0x16af0, 0x16af4,), # Bassa Vah Combining High..Bassa Vah Combining High (0x16b30, 0x16b36,), # Pahawh Hmong Mark Cim Tu..Pahawh Hmong Mark Cim Ta + (0x16f51, 0x16f7e,), # Miao Sign Aspiration ..Miao Vowel Sign Ng (0x16f8f, 0x16f92,), # Miao Tone Right ..Miao Tone Below (0x1bc9d, 0x1bc9e,), # Duployan Thick Letter Se..Duployan Double Mark - (0x1d167, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining - (0x1d17b, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining + (0x1bca0, 0x1bca3,), # Shorthand Format Letter ..Shorthand Format Up Step + (0x1d165, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d16d, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical @@ -2937,12 +2904,17 @@ ZERO_WIDTH = { (0x1e026, 0x1e02a,), # Combining Glagolitic Let..Combining Glagolitic Let (0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining (0x1e944, 0x1e94a,), # Adlam Alif Lengthener ..Adlam Nukta + (0x1f3fb, 0x1f3ff,), # Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri + (0xe0001, 0xe0001,), # Language Tag + (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 ), '12.0.0': ( # Source: DerivedGeneralCategory-12.0.0.txt # Date: 2019-01-22, 08:18:28 GMT # + (0x00000, 0x00000,), # (nil) + (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg @@ -2950,13 +2922,16 @@ ZERO_WIDTH = { (0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot (0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot (0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan + (0x00600, 0x00605,), # Arabic Number Sign ..Arabic Number Mark Above (0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra + (0x0061c, 0x0061c,), # Arabic Letter Mark (0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below (0x00670, 0x00670,), # Arabic Letter Superscript Alef - (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen + (0x006d6, 0x006dd,), # Arabic Small High Ligatu..Arabic End Of Ayah (0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda (0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon (0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem + (0x0070f, 0x0070f,), # Syriac Abbreviation Mark (0x00711, 0x00711,), # Syriac Letter Superscript Alaph (0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh (0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun @@ -2967,66 +2942,72 @@ ZERO_WIDTH = { (0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U (0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa (0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark - (0x008d3, 0x008e1,), # Arabic Small Low Waw ..Arabic Small High Sign S - (0x008e3, 0x00902,), # Arabic Turned Damma Belo..Devanagari Sign Anusvara - (0x0093a, 0x0093a,), # Devanagari Vowel Sign Oe - (0x0093c, 0x0093c,), # Devanagari Sign Nukta - (0x00941, 0x00948,), # Devanagari Vowel Sign U ..Devanagari Vowel Sign Ai - (0x0094d, 0x0094d,), # Devanagari Sign Virama + (0x008d3, 0x00903,), # Arabic Small Low Waw ..Devanagari Sign Visarga + (0x0093a, 0x0093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta + (0x0093e, 0x0094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw (0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu (0x00962, 0x00963,), # Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo - (0x00981, 0x00981,), # Bengali Sign Candrabindu + (0x00981, 0x00983,), # Bengali Sign Candrabindu..Bengali Sign Visarga (0x009bc, 0x009bc,), # Bengali Sign Nukta - (0x009c1, 0x009c4,), # Bengali Vowel Sign U ..Bengali Vowel Sign Vocal - (0x009cd, 0x009cd,), # Bengali Sign Virama + (0x009be, 0x009c4,), # Bengali Vowel Sign Aa ..Bengali Vowel Sign Vocal + (0x009c7, 0x009c8,), # Bengali Vowel Sign E ..Bengali Vowel Sign Ai + (0x009cb, 0x009cd,), # Bengali Vowel Sign O ..Bengali Sign Virama + (0x009d7, 0x009d7,), # Bengali Au Length Mark (0x009e2, 0x009e3,), # Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal (0x009fe, 0x009fe,), # Bengali Sandhi Mark - (0x00a01, 0x00a02,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Bindi + (0x00a01, 0x00a03,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Visarga (0x00a3c, 0x00a3c,), # Gurmukhi Sign Nukta - (0x00a41, 0x00a42,), # Gurmukhi Vowel Sign U ..Gurmukhi Vowel Sign Uu + (0x00a3e, 0x00a42,), # Gurmukhi Vowel Sign Aa ..Gurmukhi Vowel Sign Uu (0x00a47, 0x00a48,), # Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai (0x00a4b, 0x00a4d,), # Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama (0x00a51, 0x00a51,), # Gurmukhi Sign Udaat (0x00a70, 0x00a71,), # Gurmukhi Tippi ..Gurmukhi Addak (0x00a75, 0x00a75,), # Gurmukhi Sign Yakash - (0x00a81, 0x00a82,), # Gujarati Sign Candrabind..Gujarati Sign Anusvara + (0x00a81, 0x00a83,), # Gujarati Sign Candrabind..Gujarati Sign Visarga (0x00abc, 0x00abc,), # Gujarati Sign Nukta - (0x00ac1, 0x00ac5,), # Gujarati Vowel Sign U ..Gujarati Vowel Sign Cand - (0x00ac7, 0x00ac8,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Ai - (0x00acd, 0x00acd,), # Gujarati Sign Virama + (0x00abe, 0x00ac5,), # Gujarati Vowel Sign Aa ..Gujarati Vowel Sign Cand + (0x00ac7, 0x00ac9,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Cand + (0x00acb, 0x00acd,), # Gujarati Vowel Sign O ..Gujarati Sign Virama (0x00ae2, 0x00ae3,), # Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca (0x00afa, 0x00aff,), # Gujarati Sign Sukun ..Gujarati Sign Two-circle - (0x00b01, 0x00b01,), # Oriya Sign Candrabindu + (0x00b01, 0x00b03,), # Oriya Sign Candrabindu ..Oriya Sign Visarga (0x00b3c, 0x00b3c,), # Oriya Sign Nukta - (0x00b3f, 0x00b3f,), # Oriya Vowel Sign I - (0x00b41, 0x00b44,), # Oriya Vowel Sign U ..Oriya Vowel Sign Vocalic - (0x00b4d, 0x00b4d,), # Oriya Sign Virama - (0x00b56, 0x00b56,), # Oriya Ai Length Mark + (0x00b3e, 0x00b44,), # Oriya Vowel Sign Aa ..Oriya Vowel Sign Vocalic + (0x00b47, 0x00b48,), # Oriya Vowel Sign E ..Oriya Vowel Sign Ai + (0x00b4b, 0x00b4d,), # Oriya Vowel Sign O ..Oriya Sign Virama + (0x00b56, 0x00b57,), # Oriya Ai Length Mark ..Oriya Au Length Mark (0x00b62, 0x00b63,), # Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic (0x00b82, 0x00b82,), # Tamil Sign Anusvara - (0x00bc0, 0x00bc0,), # Tamil Vowel Sign Ii - (0x00bcd, 0x00bcd,), # Tamil Sign Virama - (0x00c00, 0x00c00,), # Telugu Sign Combining Candrabindu Above - (0x00c04, 0x00c04,), # Telugu Sign Combining Anusvara Above - (0x00c3e, 0x00c40,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Ii + (0x00bbe, 0x00bc2,), # Tamil Vowel Sign Aa ..Tamil Vowel Sign Uu + (0x00bc6, 0x00bc8,), # Tamil Vowel Sign E ..Tamil Vowel Sign Ai + (0x00bca, 0x00bcd,), # Tamil Vowel Sign O ..Tamil Sign Virama + (0x00bd7, 0x00bd7,), # Tamil Au Length Mark + (0x00c00, 0x00c04,), # Telugu Sign Combining Ca..Telugu Sign Combining An + (0x00c3e, 0x00c44,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Vocali (0x00c46, 0x00c48,), # Telugu Vowel Sign E ..Telugu Vowel Sign Ai (0x00c4a, 0x00c4d,), # Telugu Vowel Sign O ..Telugu Sign Virama (0x00c55, 0x00c56,), # Telugu Length Mark ..Telugu Ai Length Mark (0x00c62, 0x00c63,), # Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali - (0x00c81, 0x00c81,), # Kannada Sign Candrabindu + (0x00c81, 0x00c83,), # Kannada Sign Candrabindu..Kannada Sign Visarga (0x00cbc, 0x00cbc,), # Kannada Sign Nukta - (0x00cbf, 0x00cbf,), # Kannada Vowel Sign I - (0x00cc6, 0x00cc6,), # Kannada Vowel Sign E - (0x00ccc, 0x00ccd,), # Kannada Vowel Sign Au ..Kannada Sign Virama + (0x00cbe, 0x00cc4,), # Kannada Vowel Sign Aa ..Kannada Vowel Sign Vocal + (0x00cc6, 0x00cc8,), # Kannada Vowel Sign E ..Kannada Vowel Sign Ai + (0x00cca, 0x00ccd,), # Kannada Vowel Sign O ..Kannada Sign Virama + (0x00cd5, 0x00cd6,), # Kannada Length Mark ..Kannada Ai Length Mark (0x00ce2, 0x00ce3,), # Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal - (0x00d00, 0x00d01,), # Malayalam Sign Combining..Malayalam Sign Candrabin + (0x00d00, 0x00d03,), # Malayalam Sign Combining..Malayalam Sign Visarga (0x00d3b, 0x00d3c,), # Malayalam Sign Vertical ..Malayalam Sign Circular - (0x00d41, 0x00d44,), # Malayalam Vowel Sign U ..Malayalam Vowel Sign Voc - (0x00d4d, 0x00d4d,), # Malayalam Sign Virama + (0x00d3e, 0x00d44,), # Malayalam Vowel Sign Aa ..Malayalam Vowel Sign Voc + (0x00d46, 0x00d48,), # Malayalam Vowel Sign E ..Malayalam Vowel Sign Ai + (0x00d4a, 0x00d4d,), # Malayalam Vowel Sign O ..Malayalam Sign Virama + (0x00d57, 0x00d57,), # Malayalam Au Length Mark (0x00d62, 0x00d63,), # Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc + (0x00d82, 0x00d83,), # Sinhala Sign Anusvaraya ..Sinhala Sign Visargaya (0x00dca, 0x00dca,), # Sinhala Sign Al-lakuna - (0x00dd2, 0x00dd4,), # Sinhala Vowel Sign Ketti..Sinhala Vowel Sign Ketti + (0x00dcf, 0x00dd4,), # Sinhala Vowel Sign Aela-..Sinhala Vowel Sign Ketti (0x00dd6, 0x00dd6,), # Sinhala Vowel Sign Diga Paa-pilla + (0x00dd8, 0x00ddf,), # Sinhala Vowel Sign Gaett..Sinhala Vowel Sign Gayan + (0x00df2, 0x00df3,), # Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga (0x00e31, 0x00e31,), # Thai Character Mai Han-akat (0x00e34, 0x00e3a,), # Thai Character Sara I ..Thai Character Phinthu (0x00e47, 0x00e4e,), # Thai Character Maitaikhu..Thai Character Yamakkan @@ -3037,79 +3018,61 @@ ZERO_WIDTH = { (0x00f35, 0x00f35,), # Tibetan Mark Ngas Bzung Nyi Zla (0x00f37, 0x00f37,), # Tibetan Mark Ngas Bzung Sgor Rtags (0x00f39, 0x00f39,), # Tibetan Mark Tsa -phru - (0x00f71, 0x00f7e,), # Tibetan Vowel Sign Aa ..Tibetan Sign Rjes Su Nga - (0x00f80, 0x00f84,), # Tibetan Vowel Sign Rever..Tibetan Mark Halanta + (0x00f3e, 0x00f3f,), # Tibetan Sign Yar Tshes ..Tibetan Sign Mar Tshes + (0x00f71, 0x00f84,), # Tibetan Vowel Sign Aa ..Tibetan Mark Halanta (0x00f86, 0x00f87,), # Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags (0x00f8d, 0x00f97,), # Tibetan Subjoined Sign L..Tibetan Subjoined Letter (0x00f99, 0x00fbc,), # Tibetan Subjoined Letter..Tibetan Subjoined Letter (0x00fc6, 0x00fc6,), # Tibetan Symbol Padma Gdan - (0x0102d, 0x01030,), # Myanmar Vowel Sign I ..Myanmar Vowel Sign Uu - (0x01032, 0x01037,), # Myanmar Vowel Sign Ai ..Myanmar Sign Dot Below - (0x01039, 0x0103a,), # Myanmar Sign Virama ..Myanmar Sign Asat - (0x0103d, 0x0103e,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M - (0x01058, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal + (0x0102b, 0x0103e,), # Myanmar Vowel Sign Tall ..Myanmar Consonant Sign M + (0x01056, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal (0x0105e, 0x01060,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M + (0x01062, 0x01064,), # Myanmar Vowel Sign Sgaw ..Myanmar Tone Mark Sgaw K + (0x01067, 0x0106d,), # Myanmar Vowel Sign Weste..Myanmar Sign Western Pwo (0x01071, 0x01074,), # Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah - (0x01082, 0x01082,), # Myanmar Consonant Sign Shan Medial Wa - (0x01085, 0x01086,), # Myanmar Vowel Sign Shan ..Myanmar Vowel Sign Shan - (0x0108d, 0x0108d,), # Myanmar Sign Shan Council Emphatic Tone - (0x0109d, 0x0109d,), # Myanmar Vowel Sign Aiton Ai + (0x01082, 0x0108d,), # Myanmar Consonant Sign S..Myanmar Sign Shan Counci + (0x0108f, 0x0108f,), # Myanmar Sign Rumai Palaung Tone-5 + (0x0109a, 0x0109d,), # Myanmar Sign Khamti Tone..Myanmar Vowel Sign Aiton (0x0135d, 0x0135f,), # Ethiopic Combining Gemin..Ethiopic Combining Gemin (0x01712, 0x01714,), # Tagalog Vowel Sign I ..Tagalog Sign Virama (0x01732, 0x01734,), # Hanunoo Vowel Sign I ..Hanunoo Sign Pamudpod (0x01752, 0x01753,), # Buhid Vowel Sign I ..Buhid Vowel Sign U (0x01772, 0x01773,), # Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U - (0x017b4, 0x017b5,), # Khmer Vowel Inherent Aq ..Khmer Vowel Inherent Aa - (0x017b7, 0x017bd,), # Khmer Vowel Sign I ..Khmer Vowel Sign Ua - (0x017c6, 0x017c6,), # Khmer Sign Nikahit - (0x017c9, 0x017d3,), # Khmer Sign Muusikatoan ..Khmer Sign Bathamasat + (0x017b4, 0x017d3,), # Khmer Vowel Inherent Aq ..Khmer Sign Bathamasat (0x017dd, 0x017dd,), # Khmer Sign Atthacan - (0x0180b, 0x0180d,), # Mongolian Free Variation..Mongolian Free Variation + (0x0180b, 0x0180e,), # Mongolian Free Variation..Mongolian Vowel Separato (0x01885, 0x01886,), # Mongolian Letter Ali Gal..Mongolian Letter Ali Gal (0x018a9, 0x018a9,), # Mongolian Letter Ali Gali Dagalga - (0x01920, 0x01922,), # Limbu Vowel Sign A ..Limbu Vowel Sign U - (0x01927, 0x01928,), # Limbu Vowel Sign E ..Limbu Vowel Sign O - (0x01932, 0x01932,), # Limbu Small Letter Anusvara - (0x01939, 0x0193b,), # Limbu Sign Mukphreng ..Limbu Sign Sa-i - (0x01a17, 0x01a18,), # Buginese Vowel Sign I ..Buginese Vowel Sign U - (0x01a1b, 0x01a1b,), # Buginese Vowel Sign Ae - (0x01a56, 0x01a56,), # Tai Tham Consonant Sign Medial La - (0x01a58, 0x01a5e,), # Tai Tham Sign Mai Kang L..Tai Tham Consonant Sign - (0x01a60, 0x01a60,), # Tai Tham Sign Sakot - (0x01a62, 0x01a62,), # Tai Tham Vowel Sign Mai Sat - (0x01a65, 0x01a6c,), # Tai Tham Vowel Sign I ..Tai Tham Vowel Sign Oa B - (0x01a73, 0x01a7c,), # Tai Tham Vowel Sign Oa A..Tai Tham Sign Khuen-lue + (0x01920, 0x0192b,), # Limbu Vowel Sign A ..Limbu Subjoined Letter W + (0x01930, 0x0193b,), # Limbu Small Letter Ka ..Limbu Sign Sa-i + (0x01a17, 0x01a1b,), # Buginese Vowel Sign I ..Buginese Vowel Sign Ae + (0x01a55, 0x01a5e,), # Tai Tham Consonant Sign ..Tai Tham Consonant Sign + (0x01a60, 0x01a7c,), # Tai Tham Sign Sakot ..Tai Tham Sign Khuen-lue (0x01a7f, 0x01a7f,), # Tai Tham Combining Cryptogrammic Dot (0x01ab0, 0x01abe,), # Combining Doubled Circum..Combining Parentheses Ov - (0x01b00, 0x01b03,), # Balinese Sign Ulu Ricem ..Balinese Sign Surang - (0x01b34, 0x01b34,), # Balinese Sign Rerekan - (0x01b36, 0x01b3a,), # Balinese Vowel Sign Ulu ..Balinese Vowel Sign Ra R - (0x01b3c, 0x01b3c,), # Balinese Vowel Sign La Lenga - (0x01b42, 0x01b42,), # Balinese Vowel Sign Pepet + (0x01b00, 0x01b04,), # Balinese Sign Ulu Ricem ..Balinese Sign Bisah + (0x01b34, 0x01b44,), # Balinese Sign Rerekan ..Balinese Adeg Adeg (0x01b6b, 0x01b73,), # Balinese Musical Symbol ..Balinese Musical Symbol - (0x01b80, 0x01b81,), # Sundanese Sign Panyecek ..Sundanese Sign Panglayar - (0x01ba2, 0x01ba5,), # Sundanese Consonant Sign..Sundanese Vowel Sign Pan - (0x01ba8, 0x01ba9,), # Sundanese Vowel Sign Pam..Sundanese Vowel Sign Pan - (0x01bab, 0x01bad,), # Sundanese Sign Virama ..Sundanese Consonant Sign - (0x01be6, 0x01be6,), # Batak Sign Tompi - (0x01be8, 0x01be9,), # Batak Vowel Sign Pakpak ..Batak Vowel Sign Ee - (0x01bed, 0x01bed,), # Batak Vowel Sign Karo O - (0x01bef, 0x01bf1,), # Batak Vowel Sign U For S..Batak Consonant Sign H - (0x01c2c, 0x01c33,), # Lepcha Vowel Sign E ..Lepcha Consonant Sign T - (0x01c36, 0x01c37,), # Lepcha Sign Ran ..Lepcha Sign Nukta + (0x01b80, 0x01b82,), # Sundanese Sign Panyecek ..Sundanese Sign Pangwisad + (0x01ba1, 0x01bad,), # Sundanese Consonant Sign..Sundanese Consonant Sign + (0x01be6, 0x01bf3,), # Batak Sign Tompi ..Batak Panongonan + (0x01c24, 0x01c37,), # Lepcha Subjoined Letter ..Lepcha Sign Nukta (0x01cd0, 0x01cd2,), # Vedic Tone Karshana ..Vedic Tone Prenkha - (0x01cd4, 0x01ce0,), # Vedic Sign Yajurvedic Mi..Vedic Tone Rigvedic Kash - (0x01ce2, 0x01ce8,), # Vedic Sign Visarga Svari..Vedic Sign Visarga Anuda + (0x01cd4, 0x01ce8,), # Vedic Sign Yajurvedic Mi..Vedic Sign Visarga Anuda (0x01ced, 0x01ced,), # Vedic Sign Tiryak (0x01cf4, 0x01cf4,), # Vedic Tone Candra Above - (0x01cf8, 0x01cf9,), # Vedic Tone Ring Above ..Vedic Tone Double Ring A + (0x01cf7, 0x01cf9,), # Vedic Sign Atikrama ..Vedic Tone Double Ring A (0x01dc0, 0x01df9,), # Combining Dotted Grave A..Combining Wide Inverted (0x01dfb, 0x01dff,), # Combining Deletion Mark ..Combining Right Arrowhea + (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark + (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override + (0x02060, 0x02064,), # Word Joiner ..Invisible Plus + (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above (0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu (0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette - (0x0302a, 0x0302d,), # Ideographic Level Tone M..Ideographic Entering Ton + (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag (0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous (0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer @@ -3118,36 +3081,34 @@ ZERO_WIDTH = { (0x0a802, 0x0a802,), # Syloti Nagri Sign Dvisvara (0x0a806, 0x0a806,), # Syloti Nagri Sign Hasanta (0x0a80b, 0x0a80b,), # Syloti Nagri Sign Anusvara - (0x0a825, 0x0a826,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign - (0x0a8c4, 0x0a8c5,), # Saurashtra Sign Virama ..Saurashtra Sign Candrabi + (0x0a823, 0x0a827,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign + (0x0a880, 0x0a881,), # Saurashtra Sign Anusvara..Saurashtra Sign Visarga + (0x0a8b4, 0x0a8c5,), # Saurashtra Consonant Sig..Saurashtra Sign Candrabi (0x0a8e0, 0x0a8f1,), # Combining Devanagari Dig..Combining Devanagari Sig (0x0a8ff, 0x0a8ff,), # Devanagari Vowel Sign Ay (0x0a926, 0x0a92d,), # Kayah Li Vowel Ue ..Kayah Li Tone Calya Plop - (0x0a947, 0x0a951,), # Rejang Vowel Sign I ..Rejang Consonant Sign R - (0x0a980, 0x0a982,), # Javanese Sign Panyangga ..Javanese Sign Layar - (0x0a9b3, 0x0a9b3,), # Javanese Sign Cecak Telu - (0x0a9b6, 0x0a9b9,), # Javanese Vowel Sign Wulu..Javanese Vowel Sign Suku - (0x0a9bc, 0x0a9bd,), # Javanese Vowel Sign Pepe..Javanese Consonant Sign + (0x0a947, 0x0a953,), # Rejang Vowel Sign I ..Rejang Virama + (0x0a980, 0x0a983,), # Javanese Sign Panyangga ..Javanese Sign Wignyan + (0x0a9b3, 0x0a9c0,), # Javanese Sign Cecak Telu..Javanese Pangkon (0x0a9e5, 0x0a9e5,), # Myanmar Sign Shan Saw - (0x0aa29, 0x0aa2e,), # Cham Vowel Sign Aa ..Cham Vowel Sign Oe - (0x0aa31, 0x0aa32,), # Cham Vowel Sign Au ..Cham Vowel Sign Ue - (0x0aa35, 0x0aa36,), # Cham Consonant Sign La ..Cham Consonant Sign Wa + (0x0aa29, 0x0aa36,), # Cham Vowel Sign Aa ..Cham Consonant Sign Wa (0x0aa43, 0x0aa43,), # Cham Consonant Sign Final Ng - (0x0aa4c, 0x0aa4c,), # Cham Consonant Sign Final M - (0x0aa7c, 0x0aa7c,), # Myanmar Sign Tai Laing Tone-2 + (0x0aa4c, 0x0aa4d,), # Cham Consonant Sign Fina..Cham Consonant Sign Fina + (0x0aa7b, 0x0aa7d,), # Myanmar Sign Pao Karen T..Myanmar Sign Tai Laing T (0x0aab0, 0x0aab0,), # Tai Viet Mai Kang (0x0aab2, 0x0aab4,), # Tai Viet Vowel I ..Tai Viet Vowel U (0x0aab7, 0x0aab8,), # Tai Viet Mai Khit ..Tai Viet Vowel Ia (0x0aabe, 0x0aabf,), # Tai Viet Vowel Am ..Tai Viet Tone Mai Ek (0x0aac1, 0x0aac1,), # Tai Viet Tone Mai Tho - (0x0aaec, 0x0aaed,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign - (0x0aaf6, 0x0aaf6,), # Meetei Mayek Virama - (0x0abe5, 0x0abe5,), # Meetei Mayek Vowel Sign Anap - (0x0abe8, 0x0abe8,), # Meetei Mayek Vowel Sign Unap - (0x0abed, 0x0abed,), # Meetei Mayek Apun Iyek + (0x0aaeb, 0x0aaef,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0x0aaf5, 0x0aaf6,), # Meetei Mayek Vowel Sign ..Meetei Mayek Virama + (0x0abe3, 0x0abea,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0x0abec, 0x0abed,), # Meetei Mayek Lum Iyek ..Meetei Mayek Apun Iyek (0x0fb1e, 0x0fb1e,), # Hebrew Point Judeo-spanish Varika (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo + (0x0feff, 0x0feff,), # Zero Width No-break Space + (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke (0x102e0, 0x102e0,), # Coptic Epact Thousands Mark (0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let @@ -3159,87 +3120,73 @@ ZERO_WIDTH = { (0x10ae5, 0x10ae6,), # Manichaean Abbreviation ..Manichaean Abbreviation (0x10d24, 0x10d27,), # Hanifi Rohingya Sign Har..Hanifi Rohingya Sign Tas (0x10f46, 0x10f50,), # Sogdian Combining Dot Be..Sogdian Combining Stroke - (0x11001, 0x11001,), # Brahmi Sign Anusvara + (0x11000, 0x11002,), # Brahmi Sign Candrabindu ..Brahmi Sign Visarga (0x11038, 0x11046,), # Brahmi Vowel Sign Aa ..Brahmi Virama - (0x1107f, 0x11081,), # Brahmi Number Joiner ..Kaithi Sign Anusvara - (0x110b3, 0x110b6,), # Kaithi Vowel Sign U ..Kaithi Vowel Sign Ai - (0x110b9, 0x110ba,), # Kaithi Sign Virama ..Kaithi Sign Nukta + (0x1107f, 0x11082,), # Brahmi Number Joiner ..Kaithi Sign Visarga + (0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta + (0x110bd, 0x110bd,), # Kaithi Number Sign + (0x110cd, 0x110cd,), # Kaithi Number Sign Above (0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga - (0x11127, 0x1112b,), # Chakma Vowel Sign A ..Chakma Vowel Sign Uu - (0x1112d, 0x11134,), # Chakma Vowel Sign Ai ..Chakma Maayyaa + (0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa + (0x11145, 0x11146,), # Chakma Vowel Sign Aa ..Chakma Vowel Sign Ei (0x11173, 0x11173,), # Mahajani Sign Nukta - (0x11180, 0x11181,), # Sharada Sign Candrabindu..Sharada Sign Anusvara - (0x111b6, 0x111be,), # Sharada Vowel Sign U ..Sharada Vowel Sign O + (0x11180, 0x11182,), # Sharada Sign Candrabindu..Sharada Sign Visarga + (0x111b3, 0x111c0,), # Sharada Vowel Sign Aa ..Sharada Sign Virama (0x111c9, 0x111cc,), # Sharada Sandhi Mark ..Sharada Extra Short Vowe - (0x1122f, 0x11231,), # Khojki Vowel Sign U ..Khojki Vowel Sign Ai - (0x11234, 0x11234,), # Khojki Sign Anusvara - (0x11236, 0x11237,), # Khojki Sign Nukta ..Khojki Sign Shadda + (0x1122c, 0x11237,), # Khojki Vowel Sign Aa ..Khojki Sign Shadda (0x1123e, 0x1123e,), # Khojki Sign Sukun - (0x112df, 0x112df,), # Khudawadi Sign Anusvara - (0x112e3, 0x112ea,), # Khudawadi Vowel Sign U ..Khudawadi Sign Virama - (0x11300, 0x11301,), # Grantha Sign Combining A..Grantha Sign Candrabindu + (0x112df, 0x112ea,), # Khudawadi Sign Anusvara ..Khudawadi Sign Virama + (0x11300, 0x11303,), # Grantha Sign Combining A..Grantha Sign Visarga (0x1133b, 0x1133c,), # Combining Bindu Below ..Grantha Sign Nukta - (0x11340, 0x11340,), # Grantha Vowel Sign Ii + (0x1133e, 0x11344,), # Grantha Vowel Sign Aa ..Grantha Vowel Sign Vocal + (0x11347, 0x11348,), # Grantha Vowel Sign Ee ..Grantha Vowel Sign Ai + (0x1134b, 0x1134d,), # Grantha Vowel Sign Oo ..Grantha Sign Virama + (0x11357, 0x11357,), # Grantha Au Length Mark + (0x11362, 0x11363,), # Grantha Vowel Sign Vocal..Grantha Vowel Sign Vocal (0x11366, 0x1136c,), # Combining Grantha Digit ..Combining Grantha Digit (0x11370, 0x11374,), # Combining Grantha Letter..Combining Grantha Letter - (0x11438, 0x1143f,), # Newa Vowel Sign U ..Newa Vowel Sign Ai - (0x11442, 0x11444,), # Newa Sign Virama ..Newa Sign Anusvara - (0x11446, 0x11446,), # Newa Sign Nukta + (0x11435, 0x11446,), # Newa Vowel Sign Aa ..Newa Sign Nukta (0x1145e, 0x1145e,), # Newa Sandhi Mark - (0x114b3, 0x114b8,), # Tirhuta Vowel Sign U ..Tirhuta Vowel Sign Vocal - (0x114ba, 0x114ba,), # Tirhuta Vowel Sign Short E - (0x114bf, 0x114c0,), # Tirhuta Sign Candrabindu..Tirhuta Sign Anusvara - (0x114c2, 0x114c3,), # Tirhuta Sign Virama ..Tirhuta Sign Nukta - (0x115b2, 0x115b5,), # Siddham Vowel Sign U ..Siddham Vowel Sign Vocal - (0x115bc, 0x115bd,), # Siddham Sign Candrabindu..Siddham Sign Anusvara - (0x115bf, 0x115c0,), # Siddham Sign Virama ..Siddham Sign Nukta + (0x114b0, 0x114c3,), # Tirhuta Vowel Sign Aa ..Tirhuta Sign Nukta + (0x115af, 0x115b5,), # Siddham Vowel Sign Aa ..Siddham Vowel Sign Vocal + (0x115b8, 0x115c0,), # Siddham Vowel Sign E ..Siddham Sign Nukta (0x115dc, 0x115dd,), # Siddham Vowel Sign Alter..Siddham Vowel Sign Alter - (0x11633, 0x1163a,), # Modi Vowel Sign U ..Modi Vowel Sign Ai - (0x1163d, 0x1163d,), # Modi Sign Anusvara - (0x1163f, 0x11640,), # Modi Sign Virama ..Modi Sign Ardhacandra - (0x116ab, 0x116ab,), # Takri Sign Anusvara - (0x116ad, 0x116ad,), # Takri Vowel Sign Aa - (0x116b0, 0x116b5,), # Takri Vowel Sign U ..Takri Vowel Sign Au - (0x116b7, 0x116b7,), # Takri Sign Nukta - (0x1171d, 0x1171f,), # Ahom Consonant Sign Medi..Ahom Consonant Sign Medi - (0x11722, 0x11725,), # Ahom Vowel Sign I ..Ahom Vowel Sign Uu - (0x11727, 0x1172b,), # Ahom Vowel Sign Aw ..Ahom Sign Killer - (0x1182f, 0x11837,), # Dogra Vowel Sign U ..Dogra Sign Anusvara - (0x11839, 0x1183a,), # Dogra Sign Virama ..Dogra Sign Nukta - (0x119d4, 0x119d7,), # Nandinagari Vowel Sign U..Nandinagari Vowel Sign V - (0x119da, 0x119db,), # Nandinagari Vowel Sign E..Nandinagari Vowel Sign A - (0x119e0, 0x119e0,), # Nandinagari Sign Virama + (0x11630, 0x11640,), # Modi Vowel Sign Aa ..Modi Sign Ardhacandra + (0x116ab, 0x116b7,), # Takri Sign Anusvara ..Takri Sign Nukta + (0x1171d, 0x1172b,), # Ahom Consonant Sign Medi..Ahom Sign Killer + (0x1182c, 0x1183a,), # Dogra Vowel Sign Aa ..Dogra Sign Nukta + (0x119d1, 0x119d7,), # Nandinagari Vowel Sign A..Nandinagari Vowel Sign V + (0x119da, 0x119e0,), # Nandinagari Vowel Sign E..Nandinagari Sign Virama + (0x119e4, 0x119e4,), # Nandinagari Vowel Sign Prishthamatra E (0x11a01, 0x11a0a,), # Zanabazar Square Vowel S..Zanabazar Square Vowel L - (0x11a33, 0x11a38,), # Zanabazar Square Final C..Zanabazar Square Sign An + (0x11a33, 0x11a39,), # Zanabazar Square Final C..Zanabazar Square Sign Vi (0x11a3b, 0x11a3e,), # Zanabazar Square Cluster..Zanabazar Square Cluster (0x11a47, 0x11a47,), # Zanabazar Square Subjoiner - (0x11a51, 0x11a56,), # Soyombo Vowel Sign I ..Soyombo Vowel Sign Oe - (0x11a59, 0x11a5b,), # Soyombo Vowel Sign Vocal..Soyombo Vowel Length Mar - (0x11a8a, 0x11a96,), # Soyombo Final Consonant ..Soyombo Sign Anusvara - (0x11a98, 0x11a99,), # Soyombo Gemination Mark ..Soyombo Subjoiner - (0x11c30, 0x11c36,), # Bhaiksuki Vowel Sign I ..Bhaiksuki Vowel Sign Voc - (0x11c38, 0x11c3d,), # Bhaiksuki Vowel Sign E ..Bhaiksuki Sign Anusvara - (0x11c3f, 0x11c3f,), # Bhaiksuki Sign Virama + (0x11a51, 0x11a5b,), # Soyombo Vowel Sign I ..Soyombo Vowel Length Mar + (0x11a8a, 0x11a99,), # Soyombo Final Consonant ..Soyombo Subjoiner + (0x11c2f, 0x11c36,), # Bhaiksuki Vowel Sign Aa ..Bhaiksuki Vowel Sign Voc + (0x11c38, 0x11c3f,), # Bhaiksuki Vowel Sign E ..Bhaiksuki Sign Virama (0x11c92, 0x11ca7,), # Marchen Subjoined Letter..Marchen Subjoined Letter - (0x11caa, 0x11cb0,), # Marchen Subjoined Letter..Marchen Vowel Sign Aa - (0x11cb2, 0x11cb3,), # Marchen Vowel Sign U ..Marchen Vowel Sign E - (0x11cb5, 0x11cb6,), # Marchen Sign Anusvara ..Marchen Sign Candrabindu + (0x11ca9, 0x11cb6,), # Marchen Subjoined Letter..Marchen Sign Candrabindu (0x11d31, 0x11d36,), # Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign (0x11d3a, 0x11d3a,), # Masaram Gondi Vowel Sign E (0x11d3c, 0x11d3d,), # Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign (0x11d3f, 0x11d45,), # Masaram Gondi Vowel Sign..Masaram Gondi Virama (0x11d47, 0x11d47,), # Masaram Gondi Ra-kara + (0x11d8a, 0x11d8e,), # Gunjala Gondi Vowel Sign..Gunjala Gondi Vowel Sign (0x11d90, 0x11d91,), # Gunjala Gondi Vowel Sign..Gunjala Gondi Vowel Sign - (0x11d95, 0x11d95,), # Gunjala Gondi Sign Anusvara - (0x11d97, 0x11d97,), # Gunjala Gondi Virama - (0x11ef3, 0x11ef4,), # Makasar Vowel Sign I ..Makasar Vowel Sign U + (0x11d93, 0x11d97,), # Gunjala Gondi Vowel Sign..Gunjala Gondi Virama + (0x11ef3, 0x11ef6,), # Makasar Vowel Sign I ..Makasar Vowel Sign O + (0x13430, 0x13438,), # Egyptian Hieroglyph Vert..Egyptian Hieroglyph End (0x16af0, 0x16af4,), # Bassa Vah Combining High..Bassa Vah Combining High (0x16b30, 0x16b36,), # Pahawh Hmong Mark Cim Tu..Pahawh Hmong Mark Cim Ta (0x16f4f, 0x16f4f,), # Miao Sign Consonant Modifier Bar + (0x16f51, 0x16f87,), # Miao Sign Aspiration ..Miao Vowel Sign Ui (0x16f8f, 0x16f92,), # Miao Tone Right ..Miao Tone Below (0x1bc9d, 0x1bc9e,), # Duployan Thick Letter Se..Duployan Double Mark - (0x1d167, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining - (0x1d17b, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining + (0x1bca0, 0x1bca3,), # Shorthand Format Letter ..Shorthand Format Up Step + (0x1d165, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d16d, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical @@ -3258,12 +3205,17 @@ ZERO_WIDTH = { (0x1e2ec, 0x1e2ef,), # Wancho Tone Tup ..Wancho Tone Koini (0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining (0x1e944, 0x1e94a,), # Adlam Alif Lengthener ..Adlam Nukta + (0x1f3fb, 0x1f3ff,), # Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri + (0xe0001, 0xe0001,), # Language Tag + (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 ), '12.1.0': ( # Source: DerivedGeneralCategory-12.1.0.txt # Date: 2019-03-10, 10:53:08 GMT # + (0x00000, 0x00000,), # (nil) + (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg @@ -3271,13 +3223,16 @@ ZERO_WIDTH = { (0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot (0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot (0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan + (0x00600, 0x00605,), # Arabic Number Sign ..Arabic Number Mark Above (0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra + (0x0061c, 0x0061c,), # Arabic Letter Mark (0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below (0x00670, 0x00670,), # Arabic Letter Superscript Alef - (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen + (0x006d6, 0x006dd,), # Arabic Small High Ligatu..Arabic End Of Ayah (0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda (0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon (0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem + (0x0070f, 0x0070f,), # Syriac Abbreviation Mark (0x00711, 0x00711,), # Syriac Letter Superscript Alaph (0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh (0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun @@ -3288,66 +3243,72 @@ ZERO_WIDTH = { (0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U (0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa (0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark - (0x008d3, 0x008e1,), # Arabic Small Low Waw ..Arabic Small High Sign S - (0x008e3, 0x00902,), # Arabic Turned Damma Belo..Devanagari Sign Anusvara - (0x0093a, 0x0093a,), # Devanagari Vowel Sign Oe - (0x0093c, 0x0093c,), # Devanagari Sign Nukta - (0x00941, 0x00948,), # Devanagari Vowel Sign U ..Devanagari Vowel Sign Ai - (0x0094d, 0x0094d,), # Devanagari Sign Virama + (0x008d3, 0x00903,), # Arabic Small Low Waw ..Devanagari Sign Visarga + (0x0093a, 0x0093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta + (0x0093e, 0x0094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw (0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu (0x00962, 0x00963,), # Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo - (0x00981, 0x00981,), # Bengali Sign Candrabindu + (0x00981, 0x00983,), # Bengali Sign Candrabindu..Bengali Sign Visarga (0x009bc, 0x009bc,), # Bengali Sign Nukta - (0x009c1, 0x009c4,), # Bengali Vowel Sign U ..Bengali Vowel Sign Vocal - (0x009cd, 0x009cd,), # Bengali Sign Virama + (0x009be, 0x009c4,), # Bengali Vowel Sign Aa ..Bengali Vowel Sign Vocal + (0x009c7, 0x009c8,), # Bengali Vowel Sign E ..Bengali Vowel Sign Ai + (0x009cb, 0x009cd,), # Bengali Vowel Sign O ..Bengali Sign Virama + (0x009d7, 0x009d7,), # Bengali Au Length Mark (0x009e2, 0x009e3,), # Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal (0x009fe, 0x009fe,), # Bengali Sandhi Mark - (0x00a01, 0x00a02,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Bindi + (0x00a01, 0x00a03,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Visarga (0x00a3c, 0x00a3c,), # Gurmukhi Sign Nukta - (0x00a41, 0x00a42,), # Gurmukhi Vowel Sign U ..Gurmukhi Vowel Sign Uu + (0x00a3e, 0x00a42,), # Gurmukhi Vowel Sign Aa ..Gurmukhi Vowel Sign Uu (0x00a47, 0x00a48,), # Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai (0x00a4b, 0x00a4d,), # Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama (0x00a51, 0x00a51,), # Gurmukhi Sign Udaat (0x00a70, 0x00a71,), # Gurmukhi Tippi ..Gurmukhi Addak (0x00a75, 0x00a75,), # Gurmukhi Sign Yakash - (0x00a81, 0x00a82,), # Gujarati Sign Candrabind..Gujarati Sign Anusvara + (0x00a81, 0x00a83,), # Gujarati Sign Candrabind..Gujarati Sign Visarga (0x00abc, 0x00abc,), # Gujarati Sign Nukta - (0x00ac1, 0x00ac5,), # Gujarati Vowel Sign U ..Gujarati Vowel Sign Cand - (0x00ac7, 0x00ac8,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Ai - (0x00acd, 0x00acd,), # Gujarati Sign Virama + (0x00abe, 0x00ac5,), # Gujarati Vowel Sign Aa ..Gujarati Vowel Sign Cand + (0x00ac7, 0x00ac9,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Cand + (0x00acb, 0x00acd,), # Gujarati Vowel Sign O ..Gujarati Sign Virama (0x00ae2, 0x00ae3,), # Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca (0x00afa, 0x00aff,), # Gujarati Sign Sukun ..Gujarati Sign Two-circle - (0x00b01, 0x00b01,), # Oriya Sign Candrabindu + (0x00b01, 0x00b03,), # Oriya Sign Candrabindu ..Oriya Sign Visarga (0x00b3c, 0x00b3c,), # Oriya Sign Nukta - (0x00b3f, 0x00b3f,), # Oriya Vowel Sign I - (0x00b41, 0x00b44,), # Oriya Vowel Sign U ..Oriya Vowel Sign Vocalic - (0x00b4d, 0x00b4d,), # Oriya Sign Virama - (0x00b56, 0x00b56,), # Oriya Ai Length Mark + (0x00b3e, 0x00b44,), # Oriya Vowel Sign Aa ..Oriya Vowel Sign Vocalic + (0x00b47, 0x00b48,), # Oriya Vowel Sign E ..Oriya Vowel Sign Ai + (0x00b4b, 0x00b4d,), # Oriya Vowel Sign O ..Oriya Sign Virama + (0x00b56, 0x00b57,), # Oriya Ai Length Mark ..Oriya Au Length Mark (0x00b62, 0x00b63,), # Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic (0x00b82, 0x00b82,), # Tamil Sign Anusvara - (0x00bc0, 0x00bc0,), # Tamil Vowel Sign Ii - (0x00bcd, 0x00bcd,), # Tamil Sign Virama - (0x00c00, 0x00c00,), # Telugu Sign Combining Candrabindu Above - (0x00c04, 0x00c04,), # Telugu Sign Combining Anusvara Above - (0x00c3e, 0x00c40,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Ii + (0x00bbe, 0x00bc2,), # Tamil Vowel Sign Aa ..Tamil Vowel Sign Uu + (0x00bc6, 0x00bc8,), # Tamil Vowel Sign E ..Tamil Vowel Sign Ai + (0x00bca, 0x00bcd,), # Tamil Vowel Sign O ..Tamil Sign Virama + (0x00bd7, 0x00bd7,), # Tamil Au Length Mark + (0x00c00, 0x00c04,), # Telugu Sign Combining Ca..Telugu Sign Combining An + (0x00c3e, 0x00c44,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Vocali (0x00c46, 0x00c48,), # Telugu Vowel Sign E ..Telugu Vowel Sign Ai (0x00c4a, 0x00c4d,), # Telugu Vowel Sign O ..Telugu Sign Virama (0x00c55, 0x00c56,), # Telugu Length Mark ..Telugu Ai Length Mark (0x00c62, 0x00c63,), # Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali - (0x00c81, 0x00c81,), # Kannada Sign Candrabindu + (0x00c81, 0x00c83,), # Kannada Sign Candrabindu..Kannada Sign Visarga (0x00cbc, 0x00cbc,), # Kannada Sign Nukta - (0x00cbf, 0x00cbf,), # Kannada Vowel Sign I - (0x00cc6, 0x00cc6,), # Kannada Vowel Sign E - (0x00ccc, 0x00ccd,), # Kannada Vowel Sign Au ..Kannada Sign Virama + (0x00cbe, 0x00cc4,), # Kannada Vowel Sign Aa ..Kannada Vowel Sign Vocal + (0x00cc6, 0x00cc8,), # Kannada Vowel Sign E ..Kannada Vowel Sign Ai + (0x00cca, 0x00ccd,), # Kannada Vowel Sign O ..Kannada Sign Virama + (0x00cd5, 0x00cd6,), # Kannada Length Mark ..Kannada Ai Length Mark (0x00ce2, 0x00ce3,), # Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal - (0x00d00, 0x00d01,), # Malayalam Sign Combining..Malayalam Sign Candrabin + (0x00d00, 0x00d03,), # Malayalam Sign Combining..Malayalam Sign Visarga (0x00d3b, 0x00d3c,), # Malayalam Sign Vertical ..Malayalam Sign Circular - (0x00d41, 0x00d44,), # Malayalam Vowel Sign U ..Malayalam Vowel Sign Voc - (0x00d4d, 0x00d4d,), # Malayalam Sign Virama + (0x00d3e, 0x00d44,), # Malayalam Vowel Sign Aa ..Malayalam Vowel Sign Voc + (0x00d46, 0x00d48,), # Malayalam Vowel Sign E ..Malayalam Vowel Sign Ai + (0x00d4a, 0x00d4d,), # Malayalam Vowel Sign O ..Malayalam Sign Virama + (0x00d57, 0x00d57,), # Malayalam Au Length Mark (0x00d62, 0x00d63,), # Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc + (0x00d82, 0x00d83,), # Sinhala Sign Anusvaraya ..Sinhala Sign Visargaya (0x00dca, 0x00dca,), # Sinhala Sign Al-lakuna - (0x00dd2, 0x00dd4,), # Sinhala Vowel Sign Ketti..Sinhala Vowel Sign Ketti + (0x00dcf, 0x00dd4,), # Sinhala Vowel Sign Aela-..Sinhala Vowel Sign Ketti (0x00dd6, 0x00dd6,), # Sinhala Vowel Sign Diga Paa-pilla + (0x00dd8, 0x00ddf,), # Sinhala Vowel Sign Gaett..Sinhala Vowel Sign Gayan + (0x00df2, 0x00df3,), # Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga (0x00e31, 0x00e31,), # Thai Character Mai Han-akat (0x00e34, 0x00e3a,), # Thai Character Sara I ..Thai Character Phinthu (0x00e47, 0x00e4e,), # Thai Character Maitaikhu..Thai Character Yamakkan @@ -3358,79 +3319,61 @@ ZERO_WIDTH = { (0x00f35, 0x00f35,), # Tibetan Mark Ngas Bzung Nyi Zla (0x00f37, 0x00f37,), # Tibetan Mark Ngas Bzung Sgor Rtags (0x00f39, 0x00f39,), # Tibetan Mark Tsa -phru - (0x00f71, 0x00f7e,), # Tibetan Vowel Sign Aa ..Tibetan Sign Rjes Su Nga - (0x00f80, 0x00f84,), # Tibetan Vowel Sign Rever..Tibetan Mark Halanta + (0x00f3e, 0x00f3f,), # Tibetan Sign Yar Tshes ..Tibetan Sign Mar Tshes + (0x00f71, 0x00f84,), # Tibetan Vowel Sign Aa ..Tibetan Mark Halanta (0x00f86, 0x00f87,), # Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags (0x00f8d, 0x00f97,), # Tibetan Subjoined Sign L..Tibetan Subjoined Letter (0x00f99, 0x00fbc,), # Tibetan Subjoined Letter..Tibetan Subjoined Letter (0x00fc6, 0x00fc6,), # Tibetan Symbol Padma Gdan - (0x0102d, 0x01030,), # Myanmar Vowel Sign I ..Myanmar Vowel Sign Uu - (0x01032, 0x01037,), # Myanmar Vowel Sign Ai ..Myanmar Sign Dot Below - (0x01039, 0x0103a,), # Myanmar Sign Virama ..Myanmar Sign Asat - (0x0103d, 0x0103e,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M - (0x01058, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal + (0x0102b, 0x0103e,), # Myanmar Vowel Sign Tall ..Myanmar Consonant Sign M + (0x01056, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal (0x0105e, 0x01060,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M + (0x01062, 0x01064,), # Myanmar Vowel Sign Sgaw ..Myanmar Tone Mark Sgaw K + (0x01067, 0x0106d,), # Myanmar Vowel Sign Weste..Myanmar Sign Western Pwo (0x01071, 0x01074,), # Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah - (0x01082, 0x01082,), # Myanmar Consonant Sign Shan Medial Wa - (0x01085, 0x01086,), # Myanmar Vowel Sign Shan ..Myanmar Vowel Sign Shan - (0x0108d, 0x0108d,), # Myanmar Sign Shan Council Emphatic Tone - (0x0109d, 0x0109d,), # Myanmar Vowel Sign Aiton Ai + (0x01082, 0x0108d,), # Myanmar Consonant Sign S..Myanmar Sign Shan Counci + (0x0108f, 0x0108f,), # Myanmar Sign Rumai Palaung Tone-5 + (0x0109a, 0x0109d,), # Myanmar Sign Khamti Tone..Myanmar Vowel Sign Aiton (0x0135d, 0x0135f,), # Ethiopic Combining Gemin..Ethiopic Combining Gemin (0x01712, 0x01714,), # Tagalog Vowel Sign I ..Tagalog Sign Virama (0x01732, 0x01734,), # Hanunoo Vowel Sign I ..Hanunoo Sign Pamudpod (0x01752, 0x01753,), # Buhid Vowel Sign I ..Buhid Vowel Sign U (0x01772, 0x01773,), # Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U - (0x017b4, 0x017b5,), # Khmer Vowel Inherent Aq ..Khmer Vowel Inherent Aa - (0x017b7, 0x017bd,), # Khmer Vowel Sign I ..Khmer Vowel Sign Ua - (0x017c6, 0x017c6,), # Khmer Sign Nikahit - (0x017c9, 0x017d3,), # Khmer Sign Muusikatoan ..Khmer Sign Bathamasat + (0x017b4, 0x017d3,), # Khmer Vowel Inherent Aq ..Khmer Sign Bathamasat (0x017dd, 0x017dd,), # Khmer Sign Atthacan - (0x0180b, 0x0180d,), # Mongolian Free Variation..Mongolian Free Variation + (0x0180b, 0x0180e,), # Mongolian Free Variation..Mongolian Vowel Separato (0x01885, 0x01886,), # Mongolian Letter Ali Gal..Mongolian Letter Ali Gal (0x018a9, 0x018a9,), # Mongolian Letter Ali Gali Dagalga - (0x01920, 0x01922,), # Limbu Vowel Sign A ..Limbu Vowel Sign U - (0x01927, 0x01928,), # Limbu Vowel Sign E ..Limbu Vowel Sign O - (0x01932, 0x01932,), # Limbu Small Letter Anusvara - (0x01939, 0x0193b,), # Limbu Sign Mukphreng ..Limbu Sign Sa-i - (0x01a17, 0x01a18,), # Buginese Vowel Sign I ..Buginese Vowel Sign U - (0x01a1b, 0x01a1b,), # Buginese Vowel Sign Ae - (0x01a56, 0x01a56,), # Tai Tham Consonant Sign Medial La - (0x01a58, 0x01a5e,), # Tai Tham Sign Mai Kang L..Tai Tham Consonant Sign - (0x01a60, 0x01a60,), # Tai Tham Sign Sakot - (0x01a62, 0x01a62,), # Tai Tham Vowel Sign Mai Sat - (0x01a65, 0x01a6c,), # Tai Tham Vowel Sign I ..Tai Tham Vowel Sign Oa B - (0x01a73, 0x01a7c,), # Tai Tham Vowel Sign Oa A..Tai Tham Sign Khuen-lue + (0x01920, 0x0192b,), # Limbu Vowel Sign A ..Limbu Subjoined Letter W + (0x01930, 0x0193b,), # Limbu Small Letter Ka ..Limbu Sign Sa-i + (0x01a17, 0x01a1b,), # Buginese Vowel Sign I ..Buginese Vowel Sign Ae + (0x01a55, 0x01a5e,), # Tai Tham Consonant Sign ..Tai Tham Consonant Sign + (0x01a60, 0x01a7c,), # Tai Tham Sign Sakot ..Tai Tham Sign Khuen-lue (0x01a7f, 0x01a7f,), # Tai Tham Combining Cryptogrammic Dot (0x01ab0, 0x01abe,), # Combining Doubled Circum..Combining Parentheses Ov - (0x01b00, 0x01b03,), # Balinese Sign Ulu Ricem ..Balinese Sign Surang - (0x01b34, 0x01b34,), # Balinese Sign Rerekan - (0x01b36, 0x01b3a,), # Balinese Vowel Sign Ulu ..Balinese Vowel Sign Ra R - (0x01b3c, 0x01b3c,), # Balinese Vowel Sign La Lenga - (0x01b42, 0x01b42,), # Balinese Vowel Sign Pepet + (0x01b00, 0x01b04,), # Balinese Sign Ulu Ricem ..Balinese Sign Bisah + (0x01b34, 0x01b44,), # Balinese Sign Rerekan ..Balinese Adeg Adeg (0x01b6b, 0x01b73,), # Balinese Musical Symbol ..Balinese Musical Symbol - (0x01b80, 0x01b81,), # Sundanese Sign Panyecek ..Sundanese Sign Panglayar - (0x01ba2, 0x01ba5,), # Sundanese Consonant Sign..Sundanese Vowel Sign Pan - (0x01ba8, 0x01ba9,), # Sundanese Vowel Sign Pam..Sundanese Vowel Sign Pan - (0x01bab, 0x01bad,), # Sundanese Sign Virama ..Sundanese Consonant Sign - (0x01be6, 0x01be6,), # Batak Sign Tompi - (0x01be8, 0x01be9,), # Batak Vowel Sign Pakpak ..Batak Vowel Sign Ee - (0x01bed, 0x01bed,), # Batak Vowel Sign Karo O - (0x01bef, 0x01bf1,), # Batak Vowel Sign U For S..Batak Consonant Sign H - (0x01c2c, 0x01c33,), # Lepcha Vowel Sign E ..Lepcha Consonant Sign T - (0x01c36, 0x01c37,), # Lepcha Sign Ran ..Lepcha Sign Nukta + (0x01b80, 0x01b82,), # Sundanese Sign Panyecek ..Sundanese Sign Pangwisad + (0x01ba1, 0x01bad,), # Sundanese Consonant Sign..Sundanese Consonant Sign + (0x01be6, 0x01bf3,), # Batak Sign Tompi ..Batak Panongonan + (0x01c24, 0x01c37,), # Lepcha Subjoined Letter ..Lepcha Sign Nukta (0x01cd0, 0x01cd2,), # Vedic Tone Karshana ..Vedic Tone Prenkha - (0x01cd4, 0x01ce0,), # Vedic Sign Yajurvedic Mi..Vedic Tone Rigvedic Kash - (0x01ce2, 0x01ce8,), # Vedic Sign Visarga Svari..Vedic Sign Visarga Anuda + (0x01cd4, 0x01ce8,), # Vedic Sign Yajurvedic Mi..Vedic Sign Visarga Anuda (0x01ced, 0x01ced,), # Vedic Sign Tiryak (0x01cf4, 0x01cf4,), # Vedic Tone Candra Above - (0x01cf8, 0x01cf9,), # Vedic Tone Ring Above ..Vedic Tone Double Ring A + (0x01cf7, 0x01cf9,), # Vedic Sign Atikrama ..Vedic Tone Double Ring A (0x01dc0, 0x01df9,), # Combining Dotted Grave A..Combining Wide Inverted (0x01dfb, 0x01dff,), # Combining Deletion Mark ..Combining Right Arrowhea + (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark + (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override + (0x02060, 0x02064,), # Word Joiner ..Invisible Plus + (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above (0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu (0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette - (0x0302a, 0x0302d,), # Ideographic Level Tone M..Ideographic Entering Ton + (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag (0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous (0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer @@ -3439,36 +3382,34 @@ ZERO_WIDTH = { (0x0a802, 0x0a802,), # Syloti Nagri Sign Dvisvara (0x0a806, 0x0a806,), # Syloti Nagri Sign Hasanta (0x0a80b, 0x0a80b,), # Syloti Nagri Sign Anusvara - (0x0a825, 0x0a826,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign - (0x0a8c4, 0x0a8c5,), # Saurashtra Sign Virama ..Saurashtra Sign Candrabi + (0x0a823, 0x0a827,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign + (0x0a880, 0x0a881,), # Saurashtra Sign Anusvara..Saurashtra Sign Visarga + (0x0a8b4, 0x0a8c5,), # Saurashtra Consonant Sig..Saurashtra Sign Candrabi (0x0a8e0, 0x0a8f1,), # Combining Devanagari Dig..Combining Devanagari Sig (0x0a8ff, 0x0a8ff,), # Devanagari Vowel Sign Ay (0x0a926, 0x0a92d,), # Kayah Li Vowel Ue ..Kayah Li Tone Calya Plop - (0x0a947, 0x0a951,), # Rejang Vowel Sign I ..Rejang Consonant Sign R - (0x0a980, 0x0a982,), # Javanese Sign Panyangga ..Javanese Sign Layar - (0x0a9b3, 0x0a9b3,), # Javanese Sign Cecak Telu - (0x0a9b6, 0x0a9b9,), # Javanese Vowel Sign Wulu..Javanese Vowel Sign Suku - (0x0a9bc, 0x0a9bd,), # Javanese Vowel Sign Pepe..Javanese Consonant Sign + (0x0a947, 0x0a953,), # Rejang Vowel Sign I ..Rejang Virama + (0x0a980, 0x0a983,), # Javanese Sign Panyangga ..Javanese Sign Wignyan + (0x0a9b3, 0x0a9c0,), # Javanese Sign Cecak Telu..Javanese Pangkon (0x0a9e5, 0x0a9e5,), # Myanmar Sign Shan Saw - (0x0aa29, 0x0aa2e,), # Cham Vowel Sign Aa ..Cham Vowel Sign Oe - (0x0aa31, 0x0aa32,), # Cham Vowel Sign Au ..Cham Vowel Sign Ue - (0x0aa35, 0x0aa36,), # Cham Consonant Sign La ..Cham Consonant Sign Wa + (0x0aa29, 0x0aa36,), # Cham Vowel Sign Aa ..Cham Consonant Sign Wa (0x0aa43, 0x0aa43,), # Cham Consonant Sign Final Ng - (0x0aa4c, 0x0aa4c,), # Cham Consonant Sign Final M - (0x0aa7c, 0x0aa7c,), # Myanmar Sign Tai Laing Tone-2 + (0x0aa4c, 0x0aa4d,), # Cham Consonant Sign Fina..Cham Consonant Sign Fina + (0x0aa7b, 0x0aa7d,), # Myanmar Sign Pao Karen T..Myanmar Sign Tai Laing T (0x0aab0, 0x0aab0,), # Tai Viet Mai Kang (0x0aab2, 0x0aab4,), # Tai Viet Vowel I ..Tai Viet Vowel U (0x0aab7, 0x0aab8,), # Tai Viet Mai Khit ..Tai Viet Vowel Ia (0x0aabe, 0x0aabf,), # Tai Viet Vowel Am ..Tai Viet Tone Mai Ek (0x0aac1, 0x0aac1,), # Tai Viet Tone Mai Tho - (0x0aaec, 0x0aaed,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign - (0x0aaf6, 0x0aaf6,), # Meetei Mayek Virama - (0x0abe5, 0x0abe5,), # Meetei Mayek Vowel Sign Anap - (0x0abe8, 0x0abe8,), # Meetei Mayek Vowel Sign Unap - (0x0abed, 0x0abed,), # Meetei Mayek Apun Iyek + (0x0aaeb, 0x0aaef,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0x0aaf5, 0x0aaf6,), # Meetei Mayek Vowel Sign ..Meetei Mayek Virama + (0x0abe3, 0x0abea,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0x0abec, 0x0abed,), # Meetei Mayek Lum Iyek ..Meetei Mayek Apun Iyek (0x0fb1e, 0x0fb1e,), # Hebrew Point Judeo-spanish Varika (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo + (0x0feff, 0x0feff,), # Zero Width No-break Space + (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke (0x102e0, 0x102e0,), # Coptic Epact Thousands Mark (0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let @@ -3480,87 +3421,73 @@ ZERO_WIDTH = { (0x10ae5, 0x10ae6,), # Manichaean Abbreviation ..Manichaean Abbreviation (0x10d24, 0x10d27,), # Hanifi Rohingya Sign Har..Hanifi Rohingya Sign Tas (0x10f46, 0x10f50,), # Sogdian Combining Dot Be..Sogdian Combining Stroke - (0x11001, 0x11001,), # Brahmi Sign Anusvara + (0x11000, 0x11002,), # Brahmi Sign Candrabindu ..Brahmi Sign Visarga (0x11038, 0x11046,), # Brahmi Vowel Sign Aa ..Brahmi Virama - (0x1107f, 0x11081,), # Brahmi Number Joiner ..Kaithi Sign Anusvara - (0x110b3, 0x110b6,), # Kaithi Vowel Sign U ..Kaithi Vowel Sign Ai - (0x110b9, 0x110ba,), # Kaithi Sign Virama ..Kaithi Sign Nukta + (0x1107f, 0x11082,), # Brahmi Number Joiner ..Kaithi Sign Visarga + (0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta + (0x110bd, 0x110bd,), # Kaithi Number Sign + (0x110cd, 0x110cd,), # Kaithi Number Sign Above (0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga - (0x11127, 0x1112b,), # Chakma Vowel Sign A ..Chakma Vowel Sign Uu - (0x1112d, 0x11134,), # Chakma Vowel Sign Ai ..Chakma Maayyaa + (0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa + (0x11145, 0x11146,), # Chakma Vowel Sign Aa ..Chakma Vowel Sign Ei (0x11173, 0x11173,), # Mahajani Sign Nukta - (0x11180, 0x11181,), # Sharada Sign Candrabindu..Sharada Sign Anusvara - (0x111b6, 0x111be,), # Sharada Vowel Sign U ..Sharada Vowel Sign O + (0x11180, 0x11182,), # Sharada Sign Candrabindu..Sharada Sign Visarga + (0x111b3, 0x111c0,), # Sharada Vowel Sign Aa ..Sharada Sign Virama (0x111c9, 0x111cc,), # Sharada Sandhi Mark ..Sharada Extra Short Vowe - (0x1122f, 0x11231,), # Khojki Vowel Sign U ..Khojki Vowel Sign Ai - (0x11234, 0x11234,), # Khojki Sign Anusvara - (0x11236, 0x11237,), # Khojki Sign Nukta ..Khojki Sign Shadda + (0x1122c, 0x11237,), # Khojki Vowel Sign Aa ..Khojki Sign Shadda (0x1123e, 0x1123e,), # Khojki Sign Sukun - (0x112df, 0x112df,), # Khudawadi Sign Anusvara - (0x112e3, 0x112ea,), # Khudawadi Vowel Sign U ..Khudawadi Sign Virama - (0x11300, 0x11301,), # Grantha Sign Combining A..Grantha Sign Candrabindu + (0x112df, 0x112ea,), # Khudawadi Sign Anusvara ..Khudawadi Sign Virama + (0x11300, 0x11303,), # Grantha Sign Combining A..Grantha Sign Visarga (0x1133b, 0x1133c,), # Combining Bindu Below ..Grantha Sign Nukta - (0x11340, 0x11340,), # Grantha Vowel Sign Ii + (0x1133e, 0x11344,), # Grantha Vowel Sign Aa ..Grantha Vowel Sign Vocal + (0x11347, 0x11348,), # Grantha Vowel Sign Ee ..Grantha Vowel Sign Ai + (0x1134b, 0x1134d,), # Grantha Vowel Sign Oo ..Grantha Sign Virama + (0x11357, 0x11357,), # Grantha Au Length Mark + (0x11362, 0x11363,), # Grantha Vowel Sign Vocal..Grantha Vowel Sign Vocal (0x11366, 0x1136c,), # Combining Grantha Digit ..Combining Grantha Digit (0x11370, 0x11374,), # Combining Grantha Letter..Combining Grantha Letter - (0x11438, 0x1143f,), # Newa Vowel Sign U ..Newa Vowel Sign Ai - (0x11442, 0x11444,), # Newa Sign Virama ..Newa Sign Anusvara - (0x11446, 0x11446,), # Newa Sign Nukta + (0x11435, 0x11446,), # Newa Vowel Sign Aa ..Newa Sign Nukta (0x1145e, 0x1145e,), # Newa Sandhi Mark - (0x114b3, 0x114b8,), # Tirhuta Vowel Sign U ..Tirhuta Vowel Sign Vocal - (0x114ba, 0x114ba,), # Tirhuta Vowel Sign Short E - (0x114bf, 0x114c0,), # Tirhuta Sign Candrabindu..Tirhuta Sign Anusvara - (0x114c2, 0x114c3,), # Tirhuta Sign Virama ..Tirhuta Sign Nukta - (0x115b2, 0x115b5,), # Siddham Vowel Sign U ..Siddham Vowel Sign Vocal - (0x115bc, 0x115bd,), # Siddham Sign Candrabindu..Siddham Sign Anusvara - (0x115bf, 0x115c0,), # Siddham Sign Virama ..Siddham Sign Nukta + (0x114b0, 0x114c3,), # Tirhuta Vowel Sign Aa ..Tirhuta Sign Nukta + (0x115af, 0x115b5,), # Siddham Vowel Sign Aa ..Siddham Vowel Sign Vocal + (0x115b8, 0x115c0,), # Siddham Vowel Sign E ..Siddham Sign Nukta (0x115dc, 0x115dd,), # Siddham Vowel Sign Alter..Siddham Vowel Sign Alter - (0x11633, 0x1163a,), # Modi Vowel Sign U ..Modi Vowel Sign Ai - (0x1163d, 0x1163d,), # Modi Sign Anusvara - (0x1163f, 0x11640,), # Modi Sign Virama ..Modi Sign Ardhacandra - (0x116ab, 0x116ab,), # Takri Sign Anusvara - (0x116ad, 0x116ad,), # Takri Vowel Sign Aa - (0x116b0, 0x116b5,), # Takri Vowel Sign U ..Takri Vowel Sign Au - (0x116b7, 0x116b7,), # Takri Sign Nukta - (0x1171d, 0x1171f,), # Ahom Consonant Sign Medi..Ahom Consonant Sign Medi - (0x11722, 0x11725,), # Ahom Vowel Sign I ..Ahom Vowel Sign Uu - (0x11727, 0x1172b,), # Ahom Vowel Sign Aw ..Ahom Sign Killer - (0x1182f, 0x11837,), # Dogra Vowel Sign U ..Dogra Sign Anusvara - (0x11839, 0x1183a,), # Dogra Sign Virama ..Dogra Sign Nukta - (0x119d4, 0x119d7,), # Nandinagari Vowel Sign U..Nandinagari Vowel Sign V - (0x119da, 0x119db,), # Nandinagari Vowel Sign E..Nandinagari Vowel Sign A - (0x119e0, 0x119e0,), # Nandinagari Sign Virama + (0x11630, 0x11640,), # Modi Vowel Sign Aa ..Modi Sign Ardhacandra + (0x116ab, 0x116b7,), # Takri Sign Anusvara ..Takri Sign Nukta + (0x1171d, 0x1172b,), # Ahom Consonant Sign Medi..Ahom Sign Killer + (0x1182c, 0x1183a,), # Dogra Vowel Sign Aa ..Dogra Sign Nukta + (0x119d1, 0x119d7,), # Nandinagari Vowel Sign A..Nandinagari Vowel Sign V + (0x119da, 0x119e0,), # Nandinagari Vowel Sign E..Nandinagari Sign Virama + (0x119e4, 0x119e4,), # Nandinagari Vowel Sign Prishthamatra E (0x11a01, 0x11a0a,), # Zanabazar Square Vowel S..Zanabazar Square Vowel L - (0x11a33, 0x11a38,), # Zanabazar Square Final C..Zanabazar Square Sign An + (0x11a33, 0x11a39,), # Zanabazar Square Final C..Zanabazar Square Sign Vi (0x11a3b, 0x11a3e,), # Zanabazar Square Cluster..Zanabazar Square Cluster (0x11a47, 0x11a47,), # Zanabazar Square Subjoiner - (0x11a51, 0x11a56,), # Soyombo Vowel Sign I ..Soyombo Vowel Sign Oe - (0x11a59, 0x11a5b,), # Soyombo Vowel Sign Vocal..Soyombo Vowel Length Mar - (0x11a8a, 0x11a96,), # Soyombo Final Consonant ..Soyombo Sign Anusvara - (0x11a98, 0x11a99,), # Soyombo Gemination Mark ..Soyombo Subjoiner - (0x11c30, 0x11c36,), # Bhaiksuki Vowel Sign I ..Bhaiksuki Vowel Sign Voc - (0x11c38, 0x11c3d,), # Bhaiksuki Vowel Sign E ..Bhaiksuki Sign Anusvara - (0x11c3f, 0x11c3f,), # Bhaiksuki Sign Virama + (0x11a51, 0x11a5b,), # Soyombo Vowel Sign I ..Soyombo Vowel Length Mar + (0x11a8a, 0x11a99,), # Soyombo Final Consonant ..Soyombo Subjoiner + (0x11c2f, 0x11c36,), # Bhaiksuki Vowel Sign Aa ..Bhaiksuki Vowel Sign Voc + (0x11c38, 0x11c3f,), # Bhaiksuki Vowel Sign E ..Bhaiksuki Sign Virama (0x11c92, 0x11ca7,), # Marchen Subjoined Letter..Marchen Subjoined Letter - (0x11caa, 0x11cb0,), # Marchen Subjoined Letter..Marchen Vowel Sign Aa - (0x11cb2, 0x11cb3,), # Marchen Vowel Sign U ..Marchen Vowel Sign E - (0x11cb5, 0x11cb6,), # Marchen Sign Anusvara ..Marchen Sign Candrabindu + (0x11ca9, 0x11cb6,), # Marchen Subjoined Letter..Marchen Sign Candrabindu (0x11d31, 0x11d36,), # Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign (0x11d3a, 0x11d3a,), # Masaram Gondi Vowel Sign E (0x11d3c, 0x11d3d,), # Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign (0x11d3f, 0x11d45,), # Masaram Gondi Vowel Sign..Masaram Gondi Virama (0x11d47, 0x11d47,), # Masaram Gondi Ra-kara + (0x11d8a, 0x11d8e,), # Gunjala Gondi Vowel Sign..Gunjala Gondi Vowel Sign (0x11d90, 0x11d91,), # Gunjala Gondi Vowel Sign..Gunjala Gondi Vowel Sign - (0x11d95, 0x11d95,), # Gunjala Gondi Sign Anusvara - (0x11d97, 0x11d97,), # Gunjala Gondi Virama - (0x11ef3, 0x11ef4,), # Makasar Vowel Sign I ..Makasar Vowel Sign U + (0x11d93, 0x11d97,), # Gunjala Gondi Vowel Sign..Gunjala Gondi Virama + (0x11ef3, 0x11ef6,), # Makasar Vowel Sign I ..Makasar Vowel Sign O + (0x13430, 0x13438,), # Egyptian Hieroglyph Vert..Egyptian Hieroglyph End (0x16af0, 0x16af4,), # Bassa Vah Combining High..Bassa Vah Combining High (0x16b30, 0x16b36,), # Pahawh Hmong Mark Cim Tu..Pahawh Hmong Mark Cim Ta (0x16f4f, 0x16f4f,), # Miao Sign Consonant Modifier Bar + (0x16f51, 0x16f87,), # Miao Sign Aspiration ..Miao Vowel Sign Ui (0x16f8f, 0x16f92,), # Miao Tone Right ..Miao Tone Below (0x1bc9d, 0x1bc9e,), # Duployan Thick Letter Se..Duployan Double Mark - (0x1d167, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining - (0x1d17b, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining + (0x1bca0, 0x1bca3,), # Shorthand Format Letter ..Shorthand Format Up Step + (0x1d165, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d16d, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical @@ -3579,12 +3506,17 @@ ZERO_WIDTH = { (0x1e2ec, 0x1e2ef,), # Wancho Tone Tup ..Wancho Tone Koini (0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining (0x1e944, 0x1e94a,), # Adlam Alif Lengthener ..Adlam Nukta + (0x1f3fb, 0x1f3ff,), # Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri + (0xe0001, 0xe0001,), # Language Tag + (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 ), '13.0.0': ( # Source: DerivedGeneralCategory-13.0.0.txt # Date: 2019-10-21, 14:30:32 GMT # + (0x00000, 0x00000,), # (nil) + (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg @@ -3592,13 +3524,16 @@ ZERO_WIDTH = { (0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot (0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot (0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan + (0x00600, 0x00605,), # Arabic Number Sign ..Arabic Number Mark Above (0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra + (0x0061c, 0x0061c,), # Arabic Letter Mark (0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below (0x00670, 0x00670,), # Arabic Letter Superscript Alef - (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen + (0x006d6, 0x006dd,), # Arabic Small High Ligatu..Arabic End Of Ayah (0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda (0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon (0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem + (0x0070f, 0x0070f,), # Syriac Abbreviation Mark (0x00711, 0x00711,), # Syriac Letter Superscript Alaph (0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh (0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun @@ -3609,67 +3544,72 @@ ZERO_WIDTH = { (0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U (0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa (0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark - (0x008d3, 0x008e1,), # Arabic Small Low Waw ..Arabic Small High Sign S - (0x008e3, 0x00902,), # Arabic Turned Damma Belo..Devanagari Sign Anusvara - (0x0093a, 0x0093a,), # Devanagari Vowel Sign Oe - (0x0093c, 0x0093c,), # Devanagari Sign Nukta - (0x00941, 0x00948,), # Devanagari Vowel Sign U ..Devanagari Vowel Sign Ai - (0x0094d, 0x0094d,), # Devanagari Sign Virama + (0x008d3, 0x00903,), # Arabic Small Low Waw ..Devanagari Sign Visarga + (0x0093a, 0x0093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta + (0x0093e, 0x0094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw (0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu (0x00962, 0x00963,), # Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo - (0x00981, 0x00981,), # Bengali Sign Candrabindu + (0x00981, 0x00983,), # Bengali Sign Candrabindu..Bengali Sign Visarga (0x009bc, 0x009bc,), # Bengali Sign Nukta - (0x009c1, 0x009c4,), # Bengali Vowel Sign U ..Bengali Vowel Sign Vocal - (0x009cd, 0x009cd,), # Bengali Sign Virama + (0x009be, 0x009c4,), # Bengali Vowel Sign Aa ..Bengali Vowel Sign Vocal + (0x009c7, 0x009c8,), # Bengali Vowel Sign E ..Bengali Vowel Sign Ai + (0x009cb, 0x009cd,), # Bengali Vowel Sign O ..Bengali Sign Virama + (0x009d7, 0x009d7,), # Bengali Au Length Mark (0x009e2, 0x009e3,), # Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal (0x009fe, 0x009fe,), # Bengali Sandhi Mark - (0x00a01, 0x00a02,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Bindi + (0x00a01, 0x00a03,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Visarga (0x00a3c, 0x00a3c,), # Gurmukhi Sign Nukta - (0x00a41, 0x00a42,), # Gurmukhi Vowel Sign U ..Gurmukhi Vowel Sign Uu + (0x00a3e, 0x00a42,), # Gurmukhi Vowel Sign Aa ..Gurmukhi Vowel Sign Uu (0x00a47, 0x00a48,), # Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai (0x00a4b, 0x00a4d,), # Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama (0x00a51, 0x00a51,), # Gurmukhi Sign Udaat (0x00a70, 0x00a71,), # Gurmukhi Tippi ..Gurmukhi Addak (0x00a75, 0x00a75,), # Gurmukhi Sign Yakash - (0x00a81, 0x00a82,), # Gujarati Sign Candrabind..Gujarati Sign Anusvara + (0x00a81, 0x00a83,), # Gujarati Sign Candrabind..Gujarati Sign Visarga (0x00abc, 0x00abc,), # Gujarati Sign Nukta - (0x00ac1, 0x00ac5,), # Gujarati Vowel Sign U ..Gujarati Vowel Sign Cand - (0x00ac7, 0x00ac8,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Ai - (0x00acd, 0x00acd,), # Gujarati Sign Virama + (0x00abe, 0x00ac5,), # Gujarati Vowel Sign Aa ..Gujarati Vowel Sign Cand + (0x00ac7, 0x00ac9,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Cand + (0x00acb, 0x00acd,), # Gujarati Vowel Sign O ..Gujarati Sign Virama (0x00ae2, 0x00ae3,), # Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca (0x00afa, 0x00aff,), # Gujarati Sign Sukun ..Gujarati Sign Two-circle - (0x00b01, 0x00b01,), # Oriya Sign Candrabindu + (0x00b01, 0x00b03,), # Oriya Sign Candrabindu ..Oriya Sign Visarga (0x00b3c, 0x00b3c,), # Oriya Sign Nukta - (0x00b3f, 0x00b3f,), # Oriya Vowel Sign I - (0x00b41, 0x00b44,), # Oriya Vowel Sign U ..Oriya Vowel Sign Vocalic - (0x00b4d, 0x00b4d,), # Oriya Sign Virama - (0x00b55, 0x00b56,), # Oriya Sign Overline ..Oriya Ai Length Mark + (0x00b3e, 0x00b44,), # Oriya Vowel Sign Aa ..Oriya Vowel Sign Vocalic + (0x00b47, 0x00b48,), # Oriya Vowel Sign E ..Oriya Vowel Sign Ai + (0x00b4b, 0x00b4d,), # Oriya Vowel Sign O ..Oriya Sign Virama + (0x00b55, 0x00b57,), # Oriya Sign Overline ..Oriya Au Length Mark (0x00b62, 0x00b63,), # Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic (0x00b82, 0x00b82,), # Tamil Sign Anusvara - (0x00bc0, 0x00bc0,), # Tamil Vowel Sign Ii - (0x00bcd, 0x00bcd,), # Tamil Sign Virama - (0x00c00, 0x00c00,), # Telugu Sign Combining Candrabindu Above - (0x00c04, 0x00c04,), # Telugu Sign Combining Anusvara Above - (0x00c3e, 0x00c40,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Ii + (0x00bbe, 0x00bc2,), # Tamil Vowel Sign Aa ..Tamil Vowel Sign Uu + (0x00bc6, 0x00bc8,), # Tamil Vowel Sign E ..Tamil Vowel Sign Ai + (0x00bca, 0x00bcd,), # Tamil Vowel Sign O ..Tamil Sign Virama + (0x00bd7, 0x00bd7,), # Tamil Au Length Mark + (0x00c00, 0x00c04,), # Telugu Sign Combining Ca..Telugu Sign Combining An + (0x00c3e, 0x00c44,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Vocali (0x00c46, 0x00c48,), # Telugu Vowel Sign E ..Telugu Vowel Sign Ai (0x00c4a, 0x00c4d,), # Telugu Vowel Sign O ..Telugu Sign Virama (0x00c55, 0x00c56,), # Telugu Length Mark ..Telugu Ai Length Mark (0x00c62, 0x00c63,), # Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali - (0x00c81, 0x00c81,), # Kannada Sign Candrabindu + (0x00c81, 0x00c83,), # Kannada Sign Candrabindu..Kannada Sign Visarga (0x00cbc, 0x00cbc,), # Kannada Sign Nukta - (0x00cbf, 0x00cbf,), # Kannada Vowel Sign I - (0x00cc6, 0x00cc6,), # Kannada Vowel Sign E - (0x00ccc, 0x00ccd,), # Kannada Vowel Sign Au ..Kannada Sign Virama + (0x00cbe, 0x00cc4,), # Kannada Vowel Sign Aa ..Kannada Vowel Sign Vocal + (0x00cc6, 0x00cc8,), # Kannada Vowel Sign E ..Kannada Vowel Sign Ai + (0x00cca, 0x00ccd,), # Kannada Vowel Sign O ..Kannada Sign Virama + (0x00cd5, 0x00cd6,), # Kannada Length Mark ..Kannada Ai Length Mark (0x00ce2, 0x00ce3,), # Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal - (0x00d00, 0x00d01,), # Malayalam Sign Combining..Malayalam Sign Candrabin + (0x00d00, 0x00d03,), # Malayalam Sign Combining..Malayalam Sign Visarga (0x00d3b, 0x00d3c,), # Malayalam Sign Vertical ..Malayalam Sign Circular - (0x00d41, 0x00d44,), # Malayalam Vowel Sign U ..Malayalam Vowel Sign Voc - (0x00d4d, 0x00d4d,), # Malayalam Sign Virama + (0x00d3e, 0x00d44,), # Malayalam Vowel Sign Aa ..Malayalam Vowel Sign Voc + (0x00d46, 0x00d48,), # Malayalam Vowel Sign E ..Malayalam Vowel Sign Ai + (0x00d4a, 0x00d4d,), # Malayalam Vowel Sign O ..Malayalam Sign Virama + (0x00d57, 0x00d57,), # Malayalam Au Length Mark (0x00d62, 0x00d63,), # Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc - (0x00d81, 0x00d81,), # Sinhala Sign Candrabindu + (0x00d81, 0x00d83,), # Sinhala Sign Candrabindu..Sinhala Sign Visargaya (0x00dca, 0x00dca,), # Sinhala Sign Al-lakuna - (0x00dd2, 0x00dd4,), # Sinhala Vowel Sign Ketti..Sinhala Vowel Sign Ketti + (0x00dcf, 0x00dd4,), # Sinhala Vowel Sign Aela-..Sinhala Vowel Sign Ketti (0x00dd6, 0x00dd6,), # Sinhala Vowel Sign Diga Paa-pilla + (0x00dd8, 0x00ddf,), # Sinhala Vowel Sign Gaett..Sinhala Vowel Sign Gayan + (0x00df2, 0x00df3,), # Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga (0x00e31, 0x00e31,), # Thai Character Mai Han-akat (0x00e34, 0x00e3a,), # Thai Character Sara I ..Thai Character Phinthu (0x00e47, 0x00e4e,), # Thai Character Maitaikhu..Thai Character Yamakkan @@ -3680,79 +3620,61 @@ ZERO_WIDTH = { (0x00f35, 0x00f35,), # Tibetan Mark Ngas Bzung Nyi Zla (0x00f37, 0x00f37,), # Tibetan Mark Ngas Bzung Sgor Rtags (0x00f39, 0x00f39,), # Tibetan Mark Tsa -phru - (0x00f71, 0x00f7e,), # Tibetan Vowel Sign Aa ..Tibetan Sign Rjes Su Nga - (0x00f80, 0x00f84,), # Tibetan Vowel Sign Rever..Tibetan Mark Halanta + (0x00f3e, 0x00f3f,), # Tibetan Sign Yar Tshes ..Tibetan Sign Mar Tshes + (0x00f71, 0x00f84,), # Tibetan Vowel Sign Aa ..Tibetan Mark Halanta (0x00f86, 0x00f87,), # Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags (0x00f8d, 0x00f97,), # Tibetan Subjoined Sign L..Tibetan Subjoined Letter (0x00f99, 0x00fbc,), # Tibetan Subjoined Letter..Tibetan Subjoined Letter (0x00fc6, 0x00fc6,), # Tibetan Symbol Padma Gdan - (0x0102d, 0x01030,), # Myanmar Vowel Sign I ..Myanmar Vowel Sign Uu - (0x01032, 0x01037,), # Myanmar Vowel Sign Ai ..Myanmar Sign Dot Below - (0x01039, 0x0103a,), # Myanmar Sign Virama ..Myanmar Sign Asat - (0x0103d, 0x0103e,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M - (0x01058, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal + (0x0102b, 0x0103e,), # Myanmar Vowel Sign Tall ..Myanmar Consonant Sign M + (0x01056, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal (0x0105e, 0x01060,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M + (0x01062, 0x01064,), # Myanmar Vowel Sign Sgaw ..Myanmar Tone Mark Sgaw K + (0x01067, 0x0106d,), # Myanmar Vowel Sign Weste..Myanmar Sign Western Pwo (0x01071, 0x01074,), # Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah - (0x01082, 0x01082,), # Myanmar Consonant Sign Shan Medial Wa - (0x01085, 0x01086,), # Myanmar Vowel Sign Shan ..Myanmar Vowel Sign Shan - (0x0108d, 0x0108d,), # Myanmar Sign Shan Council Emphatic Tone - (0x0109d, 0x0109d,), # Myanmar Vowel Sign Aiton Ai + (0x01082, 0x0108d,), # Myanmar Consonant Sign S..Myanmar Sign Shan Counci + (0x0108f, 0x0108f,), # Myanmar Sign Rumai Palaung Tone-5 + (0x0109a, 0x0109d,), # Myanmar Sign Khamti Tone..Myanmar Vowel Sign Aiton (0x0135d, 0x0135f,), # Ethiopic Combining Gemin..Ethiopic Combining Gemin (0x01712, 0x01714,), # Tagalog Vowel Sign I ..Tagalog Sign Virama (0x01732, 0x01734,), # Hanunoo Vowel Sign I ..Hanunoo Sign Pamudpod (0x01752, 0x01753,), # Buhid Vowel Sign I ..Buhid Vowel Sign U (0x01772, 0x01773,), # Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U - (0x017b4, 0x017b5,), # Khmer Vowel Inherent Aq ..Khmer Vowel Inherent Aa - (0x017b7, 0x017bd,), # Khmer Vowel Sign I ..Khmer Vowel Sign Ua - (0x017c6, 0x017c6,), # Khmer Sign Nikahit - (0x017c9, 0x017d3,), # Khmer Sign Muusikatoan ..Khmer Sign Bathamasat + (0x017b4, 0x017d3,), # Khmer Vowel Inherent Aq ..Khmer Sign Bathamasat (0x017dd, 0x017dd,), # Khmer Sign Atthacan - (0x0180b, 0x0180d,), # Mongolian Free Variation..Mongolian Free Variation + (0x0180b, 0x0180e,), # Mongolian Free Variation..Mongolian Vowel Separato (0x01885, 0x01886,), # Mongolian Letter Ali Gal..Mongolian Letter Ali Gal (0x018a9, 0x018a9,), # Mongolian Letter Ali Gali Dagalga - (0x01920, 0x01922,), # Limbu Vowel Sign A ..Limbu Vowel Sign U - (0x01927, 0x01928,), # Limbu Vowel Sign E ..Limbu Vowel Sign O - (0x01932, 0x01932,), # Limbu Small Letter Anusvara - (0x01939, 0x0193b,), # Limbu Sign Mukphreng ..Limbu Sign Sa-i - (0x01a17, 0x01a18,), # Buginese Vowel Sign I ..Buginese Vowel Sign U - (0x01a1b, 0x01a1b,), # Buginese Vowel Sign Ae - (0x01a56, 0x01a56,), # Tai Tham Consonant Sign Medial La - (0x01a58, 0x01a5e,), # Tai Tham Sign Mai Kang L..Tai Tham Consonant Sign - (0x01a60, 0x01a60,), # Tai Tham Sign Sakot - (0x01a62, 0x01a62,), # Tai Tham Vowel Sign Mai Sat - (0x01a65, 0x01a6c,), # Tai Tham Vowel Sign I ..Tai Tham Vowel Sign Oa B - (0x01a73, 0x01a7c,), # Tai Tham Vowel Sign Oa A..Tai Tham Sign Khuen-lue + (0x01920, 0x0192b,), # Limbu Vowel Sign A ..Limbu Subjoined Letter W + (0x01930, 0x0193b,), # Limbu Small Letter Ka ..Limbu Sign Sa-i + (0x01a17, 0x01a1b,), # Buginese Vowel Sign I ..Buginese Vowel Sign Ae + (0x01a55, 0x01a5e,), # Tai Tham Consonant Sign ..Tai Tham Consonant Sign + (0x01a60, 0x01a7c,), # Tai Tham Sign Sakot ..Tai Tham Sign Khuen-lue (0x01a7f, 0x01a7f,), # Tai Tham Combining Cryptogrammic Dot (0x01ab0, 0x01ac0,), # Combining Doubled Circum..Combining Latin Small Le - (0x01b00, 0x01b03,), # Balinese Sign Ulu Ricem ..Balinese Sign Surang - (0x01b34, 0x01b34,), # Balinese Sign Rerekan - (0x01b36, 0x01b3a,), # Balinese Vowel Sign Ulu ..Balinese Vowel Sign Ra R - (0x01b3c, 0x01b3c,), # Balinese Vowel Sign La Lenga - (0x01b42, 0x01b42,), # Balinese Vowel Sign Pepet + (0x01b00, 0x01b04,), # Balinese Sign Ulu Ricem ..Balinese Sign Bisah + (0x01b34, 0x01b44,), # Balinese Sign Rerekan ..Balinese Adeg Adeg (0x01b6b, 0x01b73,), # Balinese Musical Symbol ..Balinese Musical Symbol - (0x01b80, 0x01b81,), # Sundanese Sign Panyecek ..Sundanese Sign Panglayar - (0x01ba2, 0x01ba5,), # Sundanese Consonant Sign..Sundanese Vowel Sign Pan - (0x01ba8, 0x01ba9,), # Sundanese Vowel Sign Pam..Sundanese Vowel Sign Pan - (0x01bab, 0x01bad,), # Sundanese Sign Virama ..Sundanese Consonant Sign - (0x01be6, 0x01be6,), # Batak Sign Tompi - (0x01be8, 0x01be9,), # Batak Vowel Sign Pakpak ..Batak Vowel Sign Ee - (0x01bed, 0x01bed,), # Batak Vowel Sign Karo O - (0x01bef, 0x01bf1,), # Batak Vowel Sign U For S..Batak Consonant Sign H - (0x01c2c, 0x01c33,), # Lepcha Vowel Sign E ..Lepcha Consonant Sign T - (0x01c36, 0x01c37,), # Lepcha Sign Ran ..Lepcha Sign Nukta + (0x01b80, 0x01b82,), # Sundanese Sign Panyecek ..Sundanese Sign Pangwisad + (0x01ba1, 0x01bad,), # Sundanese Consonant Sign..Sundanese Consonant Sign + (0x01be6, 0x01bf3,), # Batak Sign Tompi ..Batak Panongonan + (0x01c24, 0x01c37,), # Lepcha Subjoined Letter ..Lepcha Sign Nukta (0x01cd0, 0x01cd2,), # Vedic Tone Karshana ..Vedic Tone Prenkha - (0x01cd4, 0x01ce0,), # Vedic Sign Yajurvedic Mi..Vedic Tone Rigvedic Kash - (0x01ce2, 0x01ce8,), # Vedic Sign Visarga Svari..Vedic Sign Visarga Anuda + (0x01cd4, 0x01ce8,), # Vedic Sign Yajurvedic Mi..Vedic Sign Visarga Anuda (0x01ced, 0x01ced,), # Vedic Sign Tiryak (0x01cf4, 0x01cf4,), # Vedic Tone Candra Above - (0x01cf8, 0x01cf9,), # Vedic Tone Ring Above ..Vedic Tone Double Ring A + (0x01cf7, 0x01cf9,), # Vedic Sign Atikrama ..Vedic Tone Double Ring A (0x01dc0, 0x01df9,), # Combining Dotted Grave A..Combining Wide Inverted (0x01dfb, 0x01dff,), # Combining Deletion Mark ..Combining Right Arrowhea + (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark + (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override + (0x02060, 0x02064,), # Word Joiner ..Invisible Plus + (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above (0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu (0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette - (0x0302a, 0x0302d,), # Ideographic Level Tone M..Ideographic Entering Ton + (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag (0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous (0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer @@ -3761,37 +3683,35 @@ ZERO_WIDTH = { (0x0a802, 0x0a802,), # Syloti Nagri Sign Dvisvara (0x0a806, 0x0a806,), # Syloti Nagri Sign Hasanta (0x0a80b, 0x0a80b,), # Syloti Nagri Sign Anusvara - (0x0a825, 0x0a826,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign + (0x0a823, 0x0a827,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign (0x0a82c, 0x0a82c,), # Syloti Nagri Sign Alternate Hasanta - (0x0a8c4, 0x0a8c5,), # Saurashtra Sign Virama ..Saurashtra Sign Candrabi + (0x0a880, 0x0a881,), # Saurashtra Sign Anusvara..Saurashtra Sign Visarga + (0x0a8b4, 0x0a8c5,), # Saurashtra Consonant Sig..Saurashtra Sign Candrabi (0x0a8e0, 0x0a8f1,), # Combining Devanagari Dig..Combining Devanagari Sig (0x0a8ff, 0x0a8ff,), # Devanagari Vowel Sign Ay (0x0a926, 0x0a92d,), # Kayah Li Vowel Ue ..Kayah Li Tone Calya Plop - (0x0a947, 0x0a951,), # Rejang Vowel Sign I ..Rejang Consonant Sign R - (0x0a980, 0x0a982,), # Javanese Sign Panyangga ..Javanese Sign Layar - (0x0a9b3, 0x0a9b3,), # Javanese Sign Cecak Telu - (0x0a9b6, 0x0a9b9,), # Javanese Vowel Sign Wulu..Javanese Vowel Sign Suku - (0x0a9bc, 0x0a9bd,), # Javanese Vowel Sign Pepe..Javanese Consonant Sign + (0x0a947, 0x0a953,), # Rejang Vowel Sign I ..Rejang Virama + (0x0a980, 0x0a983,), # Javanese Sign Panyangga ..Javanese Sign Wignyan + (0x0a9b3, 0x0a9c0,), # Javanese Sign Cecak Telu..Javanese Pangkon (0x0a9e5, 0x0a9e5,), # Myanmar Sign Shan Saw - (0x0aa29, 0x0aa2e,), # Cham Vowel Sign Aa ..Cham Vowel Sign Oe - (0x0aa31, 0x0aa32,), # Cham Vowel Sign Au ..Cham Vowel Sign Ue - (0x0aa35, 0x0aa36,), # Cham Consonant Sign La ..Cham Consonant Sign Wa + (0x0aa29, 0x0aa36,), # Cham Vowel Sign Aa ..Cham Consonant Sign Wa (0x0aa43, 0x0aa43,), # Cham Consonant Sign Final Ng - (0x0aa4c, 0x0aa4c,), # Cham Consonant Sign Final M - (0x0aa7c, 0x0aa7c,), # Myanmar Sign Tai Laing Tone-2 + (0x0aa4c, 0x0aa4d,), # Cham Consonant Sign Fina..Cham Consonant Sign Fina + (0x0aa7b, 0x0aa7d,), # Myanmar Sign Pao Karen T..Myanmar Sign Tai Laing T (0x0aab0, 0x0aab0,), # Tai Viet Mai Kang (0x0aab2, 0x0aab4,), # Tai Viet Vowel I ..Tai Viet Vowel U (0x0aab7, 0x0aab8,), # Tai Viet Mai Khit ..Tai Viet Vowel Ia (0x0aabe, 0x0aabf,), # Tai Viet Vowel Am ..Tai Viet Tone Mai Ek (0x0aac1, 0x0aac1,), # Tai Viet Tone Mai Tho - (0x0aaec, 0x0aaed,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign - (0x0aaf6, 0x0aaf6,), # Meetei Mayek Virama - (0x0abe5, 0x0abe5,), # Meetei Mayek Vowel Sign Anap - (0x0abe8, 0x0abe8,), # Meetei Mayek Vowel Sign Unap - (0x0abed, 0x0abed,), # Meetei Mayek Apun Iyek + (0x0aaeb, 0x0aaef,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0x0aaf5, 0x0aaf6,), # Meetei Mayek Vowel Sign ..Meetei Mayek Virama + (0x0abe3, 0x0abea,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0x0abec, 0x0abed,), # Meetei Mayek Lum Iyek ..Meetei Mayek Apun Iyek (0x0fb1e, 0x0fb1e,), # Hebrew Point Judeo-spanish Varika (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo + (0x0feff, 0x0feff,), # Zero Width No-break Space + (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke (0x102e0, 0x102e0,), # Coptic Epact Thousands Mark (0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let @@ -3804,92 +3724,81 @@ ZERO_WIDTH = { (0x10d24, 0x10d27,), # Hanifi Rohingya Sign Har..Hanifi Rohingya Sign Tas (0x10eab, 0x10eac,), # Yezidi Combining Hamza M..Yezidi Combining Madda M (0x10f46, 0x10f50,), # Sogdian Combining Dot Be..Sogdian Combining Stroke - (0x11001, 0x11001,), # Brahmi Sign Anusvara + (0x11000, 0x11002,), # Brahmi Sign Candrabindu ..Brahmi Sign Visarga (0x11038, 0x11046,), # Brahmi Vowel Sign Aa ..Brahmi Virama - (0x1107f, 0x11081,), # Brahmi Number Joiner ..Kaithi Sign Anusvara - (0x110b3, 0x110b6,), # Kaithi Vowel Sign U ..Kaithi Vowel Sign Ai - (0x110b9, 0x110ba,), # Kaithi Sign Virama ..Kaithi Sign Nukta + (0x1107f, 0x11082,), # Brahmi Number Joiner ..Kaithi Sign Visarga + (0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta + (0x110bd, 0x110bd,), # Kaithi Number Sign + (0x110cd, 0x110cd,), # Kaithi Number Sign Above (0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga - (0x11127, 0x1112b,), # Chakma Vowel Sign A ..Chakma Vowel Sign Uu - (0x1112d, 0x11134,), # Chakma Vowel Sign Ai ..Chakma Maayyaa + (0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa + (0x11145, 0x11146,), # Chakma Vowel Sign Aa ..Chakma Vowel Sign Ei (0x11173, 0x11173,), # Mahajani Sign Nukta - (0x11180, 0x11181,), # Sharada Sign Candrabindu..Sharada Sign Anusvara - (0x111b6, 0x111be,), # Sharada Vowel Sign U ..Sharada Vowel Sign O + (0x11180, 0x11182,), # Sharada Sign Candrabindu..Sharada Sign Visarga + (0x111b3, 0x111c0,), # Sharada Vowel Sign Aa ..Sharada Sign Virama (0x111c9, 0x111cc,), # Sharada Sandhi Mark ..Sharada Extra Short Vowe - (0x111cf, 0x111cf,), # Sharada Sign Inverted Candrabindu - (0x1122f, 0x11231,), # Khojki Vowel Sign U ..Khojki Vowel Sign Ai - (0x11234, 0x11234,), # Khojki Sign Anusvara - (0x11236, 0x11237,), # Khojki Sign Nukta ..Khojki Sign Shadda + (0x111ce, 0x111cf,), # Sharada Vowel Sign Prish..Sharada Sign Inverted Ca + (0x1122c, 0x11237,), # Khojki Vowel Sign Aa ..Khojki Sign Shadda (0x1123e, 0x1123e,), # Khojki Sign Sukun - (0x112df, 0x112df,), # Khudawadi Sign Anusvara - (0x112e3, 0x112ea,), # Khudawadi Vowel Sign U ..Khudawadi Sign Virama - (0x11300, 0x11301,), # Grantha Sign Combining A..Grantha Sign Candrabindu + (0x112df, 0x112ea,), # Khudawadi Sign Anusvara ..Khudawadi Sign Virama + (0x11300, 0x11303,), # Grantha Sign Combining A..Grantha Sign Visarga (0x1133b, 0x1133c,), # Combining Bindu Below ..Grantha Sign Nukta - (0x11340, 0x11340,), # Grantha Vowel Sign Ii + (0x1133e, 0x11344,), # Grantha Vowel Sign Aa ..Grantha Vowel Sign Vocal + (0x11347, 0x11348,), # Grantha Vowel Sign Ee ..Grantha Vowel Sign Ai + (0x1134b, 0x1134d,), # Grantha Vowel Sign Oo ..Grantha Sign Virama + (0x11357, 0x11357,), # Grantha Au Length Mark + (0x11362, 0x11363,), # Grantha Vowel Sign Vocal..Grantha Vowel Sign Vocal (0x11366, 0x1136c,), # Combining Grantha Digit ..Combining Grantha Digit (0x11370, 0x11374,), # Combining Grantha Letter..Combining Grantha Letter - (0x11438, 0x1143f,), # Newa Vowel Sign U ..Newa Vowel Sign Ai - (0x11442, 0x11444,), # Newa Sign Virama ..Newa Sign Anusvara - (0x11446, 0x11446,), # Newa Sign Nukta + (0x11435, 0x11446,), # Newa Vowel Sign Aa ..Newa Sign Nukta (0x1145e, 0x1145e,), # Newa Sandhi Mark - (0x114b3, 0x114b8,), # Tirhuta Vowel Sign U ..Tirhuta Vowel Sign Vocal - (0x114ba, 0x114ba,), # Tirhuta Vowel Sign Short E - (0x114bf, 0x114c0,), # Tirhuta Sign Candrabindu..Tirhuta Sign Anusvara - (0x114c2, 0x114c3,), # Tirhuta Sign Virama ..Tirhuta Sign Nukta - (0x115b2, 0x115b5,), # Siddham Vowel Sign U ..Siddham Vowel Sign Vocal - (0x115bc, 0x115bd,), # Siddham Sign Candrabindu..Siddham Sign Anusvara - (0x115bf, 0x115c0,), # Siddham Sign Virama ..Siddham Sign Nukta + (0x114b0, 0x114c3,), # Tirhuta Vowel Sign Aa ..Tirhuta Sign Nukta + (0x115af, 0x115b5,), # Siddham Vowel Sign Aa ..Siddham Vowel Sign Vocal + (0x115b8, 0x115c0,), # Siddham Vowel Sign E ..Siddham Sign Nukta (0x115dc, 0x115dd,), # Siddham Vowel Sign Alter..Siddham Vowel Sign Alter - (0x11633, 0x1163a,), # Modi Vowel Sign U ..Modi Vowel Sign Ai - (0x1163d, 0x1163d,), # Modi Sign Anusvara - (0x1163f, 0x11640,), # Modi Sign Virama ..Modi Sign Ardhacandra - (0x116ab, 0x116ab,), # Takri Sign Anusvara - (0x116ad, 0x116ad,), # Takri Vowel Sign Aa - (0x116b0, 0x116b5,), # Takri Vowel Sign U ..Takri Vowel Sign Au - (0x116b7, 0x116b7,), # Takri Sign Nukta - (0x1171d, 0x1171f,), # Ahom Consonant Sign Medi..Ahom Consonant Sign Medi - (0x11722, 0x11725,), # Ahom Vowel Sign I ..Ahom Vowel Sign Uu - (0x11727, 0x1172b,), # Ahom Vowel Sign Aw ..Ahom Sign Killer - (0x1182f, 0x11837,), # Dogra Vowel Sign U ..Dogra Sign Anusvara - (0x11839, 0x1183a,), # Dogra Sign Virama ..Dogra Sign Nukta - (0x1193b, 0x1193c,), # Dives Akuru Sign Anusvar..Dives Akuru Sign Candrab - (0x1193e, 0x1193e,), # Dives Akuru Virama - (0x11943, 0x11943,), # Dives Akuru Sign Nukta - (0x119d4, 0x119d7,), # Nandinagari Vowel Sign U..Nandinagari Vowel Sign V - (0x119da, 0x119db,), # Nandinagari Vowel Sign E..Nandinagari Vowel Sign A - (0x119e0, 0x119e0,), # Nandinagari Sign Virama + (0x11630, 0x11640,), # Modi Vowel Sign Aa ..Modi Sign Ardhacandra + (0x116ab, 0x116b7,), # Takri Sign Anusvara ..Takri Sign Nukta + (0x1171d, 0x1172b,), # Ahom Consonant Sign Medi..Ahom Sign Killer + (0x1182c, 0x1183a,), # Dogra Vowel Sign Aa ..Dogra Sign Nukta + (0x11930, 0x11935,), # Dives Akuru Vowel Sign A..Dives Akuru Vowel Sign E + (0x11937, 0x11938,), # Dives Akuru Vowel Sign A..Dives Akuru Vowel Sign O + (0x1193b, 0x1193e,), # Dives Akuru Sign Anusvar..Dives Akuru Virama + (0x11940, 0x11940,), # Dives Akuru Medial Ya + (0x11942, 0x11943,), # Dives Akuru Medial Ra ..Dives Akuru Sign Nukta + (0x119d1, 0x119d7,), # Nandinagari Vowel Sign A..Nandinagari Vowel Sign V + (0x119da, 0x119e0,), # Nandinagari Vowel Sign E..Nandinagari Sign Virama + (0x119e4, 0x119e4,), # Nandinagari Vowel Sign Prishthamatra E (0x11a01, 0x11a0a,), # Zanabazar Square Vowel S..Zanabazar Square Vowel L - (0x11a33, 0x11a38,), # Zanabazar Square Final C..Zanabazar Square Sign An + (0x11a33, 0x11a39,), # Zanabazar Square Final C..Zanabazar Square Sign Vi (0x11a3b, 0x11a3e,), # Zanabazar Square Cluster..Zanabazar Square Cluster (0x11a47, 0x11a47,), # Zanabazar Square Subjoiner - (0x11a51, 0x11a56,), # Soyombo Vowel Sign I ..Soyombo Vowel Sign Oe - (0x11a59, 0x11a5b,), # Soyombo Vowel Sign Vocal..Soyombo Vowel Length Mar - (0x11a8a, 0x11a96,), # Soyombo Final Consonant ..Soyombo Sign Anusvara - (0x11a98, 0x11a99,), # Soyombo Gemination Mark ..Soyombo Subjoiner - (0x11c30, 0x11c36,), # Bhaiksuki Vowel Sign I ..Bhaiksuki Vowel Sign Voc - (0x11c38, 0x11c3d,), # Bhaiksuki Vowel Sign E ..Bhaiksuki Sign Anusvara - (0x11c3f, 0x11c3f,), # Bhaiksuki Sign Virama + (0x11a51, 0x11a5b,), # Soyombo Vowel Sign I ..Soyombo Vowel Length Mar + (0x11a8a, 0x11a99,), # Soyombo Final Consonant ..Soyombo Subjoiner + (0x11c2f, 0x11c36,), # Bhaiksuki Vowel Sign Aa ..Bhaiksuki Vowel Sign Voc + (0x11c38, 0x11c3f,), # Bhaiksuki Vowel Sign E ..Bhaiksuki Sign Virama (0x11c92, 0x11ca7,), # Marchen Subjoined Letter..Marchen Subjoined Letter - (0x11caa, 0x11cb0,), # Marchen Subjoined Letter..Marchen Vowel Sign Aa - (0x11cb2, 0x11cb3,), # Marchen Vowel Sign U ..Marchen Vowel Sign E - (0x11cb5, 0x11cb6,), # Marchen Sign Anusvara ..Marchen Sign Candrabindu + (0x11ca9, 0x11cb6,), # Marchen Subjoined Letter..Marchen Sign Candrabindu (0x11d31, 0x11d36,), # Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign (0x11d3a, 0x11d3a,), # Masaram Gondi Vowel Sign E (0x11d3c, 0x11d3d,), # Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign (0x11d3f, 0x11d45,), # Masaram Gondi Vowel Sign..Masaram Gondi Virama (0x11d47, 0x11d47,), # Masaram Gondi Ra-kara + (0x11d8a, 0x11d8e,), # Gunjala Gondi Vowel Sign..Gunjala Gondi Vowel Sign (0x11d90, 0x11d91,), # Gunjala Gondi Vowel Sign..Gunjala Gondi Vowel Sign - (0x11d95, 0x11d95,), # Gunjala Gondi Sign Anusvara - (0x11d97, 0x11d97,), # Gunjala Gondi Virama - (0x11ef3, 0x11ef4,), # Makasar Vowel Sign I ..Makasar Vowel Sign U + (0x11d93, 0x11d97,), # Gunjala Gondi Vowel Sign..Gunjala Gondi Virama + (0x11ef3, 0x11ef6,), # Makasar Vowel Sign I ..Makasar Vowel Sign O + (0x13430, 0x13438,), # Egyptian Hieroglyph Vert..Egyptian Hieroglyph End (0x16af0, 0x16af4,), # Bassa Vah Combining High..Bassa Vah Combining High (0x16b30, 0x16b36,), # Pahawh Hmong Mark Cim Tu..Pahawh Hmong Mark Cim Ta (0x16f4f, 0x16f4f,), # Miao Sign Consonant Modifier Bar + (0x16f51, 0x16f87,), # Miao Sign Aspiration ..Miao Vowel Sign Ui (0x16f8f, 0x16f92,), # Miao Tone Right ..Miao Tone Below (0x16fe4, 0x16fe4,), # Khitan Small Script Filler + (0x16ff0, 0x16ff1,), # Vietnamese Alternate Rea..Vietnamese Alternate Rea (0x1bc9d, 0x1bc9e,), # Duployan Thick Letter Se..Duployan Double Mark - (0x1d167, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining - (0x1d17b, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining + (0x1bca0, 0x1bca3,), # Shorthand Format Letter ..Shorthand Format Up Step + (0x1d165, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d16d, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical @@ -3908,12 +3817,17 @@ ZERO_WIDTH = { (0x1e2ec, 0x1e2ef,), # Wancho Tone Tup ..Wancho Tone Koini (0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining (0x1e944, 0x1e94a,), # Adlam Alif Lengthener ..Adlam Nukta + (0x1f3fb, 0x1f3ff,), # Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri + (0xe0001, 0xe0001,), # Language Tag + (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 ), '14.0.0': ( # Source: DerivedGeneralCategory-14.0.0.txt # Date: 2021-07-10, 00:35:08 GMT # + (0x00000, 0x00000,), # (nil) + (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg @@ -3921,13 +3835,16 @@ ZERO_WIDTH = { (0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot (0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot (0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan + (0x00600, 0x00605,), # Arabic Number Sign ..Arabic Number Mark Above (0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra + (0x0061c, 0x0061c,), # Arabic Letter Mark (0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below (0x00670, 0x00670,), # Arabic Letter Superscript Alef - (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen + (0x006d6, 0x006dd,), # Arabic Small High Ligatu..Arabic End Of Ayah (0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda (0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon (0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem + (0x0070f, 0x0070f,), # Syriac Abbreviation Mark (0x00711, 0x00711,), # Syriac Letter Superscript Alaph (0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh (0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun @@ -3938,69 +3855,75 @@ ZERO_WIDTH = { (0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U (0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa (0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark + (0x00890, 0x00891,), # Arabic Pound Mark Above ..Arabic Piastre Mark Abov (0x00898, 0x0089f,), # Arabic Small High Word A..Arabic Half Madda Over M - (0x008ca, 0x008e1,), # Arabic Small High Farsi ..Arabic Small High Sign S - (0x008e3, 0x00902,), # Arabic Turned Damma Belo..Devanagari Sign Anusvara - (0x0093a, 0x0093a,), # Devanagari Vowel Sign Oe - (0x0093c, 0x0093c,), # Devanagari Sign Nukta - (0x00941, 0x00948,), # Devanagari Vowel Sign U ..Devanagari Vowel Sign Ai - (0x0094d, 0x0094d,), # Devanagari Sign Virama + (0x008ca, 0x00903,), # Arabic Small High Farsi ..Devanagari Sign Visarga + (0x0093a, 0x0093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta + (0x0093e, 0x0094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw (0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu (0x00962, 0x00963,), # Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo - (0x00981, 0x00981,), # Bengali Sign Candrabindu + (0x00981, 0x00983,), # Bengali Sign Candrabindu..Bengali Sign Visarga (0x009bc, 0x009bc,), # Bengali Sign Nukta - (0x009c1, 0x009c4,), # Bengali Vowel Sign U ..Bengali Vowel Sign Vocal - (0x009cd, 0x009cd,), # Bengali Sign Virama + (0x009be, 0x009c4,), # Bengali Vowel Sign Aa ..Bengali Vowel Sign Vocal + (0x009c7, 0x009c8,), # Bengali Vowel Sign E ..Bengali Vowel Sign Ai + (0x009cb, 0x009cd,), # Bengali Vowel Sign O ..Bengali Sign Virama + (0x009d7, 0x009d7,), # Bengali Au Length Mark (0x009e2, 0x009e3,), # Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal (0x009fe, 0x009fe,), # Bengali Sandhi Mark - (0x00a01, 0x00a02,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Bindi + (0x00a01, 0x00a03,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Visarga (0x00a3c, 0x00a3c,), # Gurmukhi Sign Nukta - (0x00a41, 0x00a42,), # Gurmukhi Vowel Sign U ..Gurmukhi Vowel Sign Uu + (0x00a3e, 0x00a42,), # Gurmukhi Vowel Sign Aa ..Gurmukhi Vowel Sign Uu (0x00a47, 0x00a48,), # Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai (0x00a4b, 0x00a4d,), # Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama (0x00a51, 0x00a51,), # Gurmukhi Sign Udaat (0x00a70, 0x00a71,), # Gurmukhi Tippi ..Gurmukhi Addak (0x00a75, 0x00a75,), # Gurmukhi Sign Yakash - (0x00a81, 0x00a82,), # Gujarati Sign Candrabind..Gujarati Sign Anusvara + (0x00a81, 0x00a83,), # Gujarati Sign Candrabind..Gujarati Sign Visarga (0x00abc, 0x00abc,), # Gujarati Sign Nukta - (0x00ac1, 0x00ac5,), # Gujarati Vowel Sign U ..Gujarati Vowel Sign Cand - (0x00ac7, 0x00ac8,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Ai - (0x00acd, 0x00acd,), # Gujarati Sign Virama + (0x00abe, 0x00ac5,), # Gujarati Vowel Sign Aa ..Gujarati Vowel Sign Cand + (0x00ac7, 0x00ac9,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Cand + (0x00acb, 0x00acd,), # Gujarati Vowel Sign O ..Gujarati Sign Virama (0x00ae2, 0x00ae3,), # Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca (0x00afa, 0x00aff,), # Gujarati Sign Sukun ..Gujarati Sign Two-circle - (0x00b01, 0x00b01,), # Oriya Sign Candrabindu + (0x00b01, 0x00b03,), # Oriya Sign Candrabindu ..Oriya Sign Visarga (0x00b3c, 0x00b3c,), # Oriya Sign Nukta - (0x00b3f, 0x00b3f,), # Oriya Vowel Sign I - (0x00b41, 0x00b44,), # Oriya Vowel Sign U ..Oriya Vowel Sign Vocalic - (0x00b4d, 0x00b4d,), # Oriya Sign Virama - (0x00b55, 0x00b56,), # Oriya Sign Overline ..Oriya Ai Length Mark + (0x00b3e, 0x00b44,), # Oriya Vowel Sign Aa ..Oriya Vowel Sign Vocalic + (0x00b47, 0x00b48,), # Oriya Vowel Sign E ..Oriya Vowel Sign Ai + (0x00b4b, 0x00b4d,), # Oriya Vowel Sign O ..Oriya Sign Virama + (0x00b55, 0x00b57,), # Oriya Sign Overline ..Oriya Au Length Mark (0x00b62, 0x00b63,), # Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic (0x00b82, 0x00b82,), # Tamil Sign Anusvara - (0x00bc0, 0x00bc0,), # Tamil Vowel Sign Ii - (0x00bcd, 0x00bcd,), # Tamil Sign Virama - (0x00c00, 0x00c00,), # Telugu Sign Combining Candrabindu Above - (0x00c04, 0x00c04,), # Telugu Sign Combining Anusvara Above + (0x00bbe, 0x00bc2,), # Tamil Vowel Sign Aa ..Tamil Vowel Sign Uu + (0x00bc6, 0x00bc8,), # Tamil Vowel Sign E ..Tamil Vowel Sign Ai + (0x00bca, 0x00bcd,), # Tamil Vowel Sign O ..Tamil Sign Virama + (0x00bd7, 0x00bd7,), # Tamil Au Length Mark + (0x00c00, 0x00c04,), # Telugu Sign Combining Ca..Telugu Sign Combining An (0x00c3c, 0x00c3c,), # Telugu Sign Nukta - (0x00c3e, 0x00c40,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Ii + (0x00c3e, 0x00c44,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Vocali (0x00c46, 0x00c48,), # Telugu Vowel Sign E ..Telugu Vowel Sign Ai (0x00c4a, 0x00c4d,), # Telugu Vowel Sign O ..Telugu Sign Virama (0x00c55, 0x00c56,), # Telugu Length Mark ..Telugu Ai Length Mark (0x00c62, 0x00c63,), # Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali - (0x00c81, 0x00c81,), # Kannada Sign Candrabindu + (0x00c81, 0x00c83,), # Kannada Sign Candrabindu..Kannada Sign Visarga (0x00cbc, 0x00cbc,), # Kannada Sign Nukta - (0x00cbf, 0x00cbf,), # Kannada Vowel Sign I - (0x00cc6, 0x00cc6,), # Kannada Vowel Sign E - (0x00ccc, 0x00ccd,), # Kannada Vowel Sign Au ..Kannada Sign Virama + (0x00cbe, 0x00cc4,), # Kannada Vowel Sign Aa ..Kannada Vowel Sign Vocal + (0x00cc6, 0x00cc8,), # Kannada Vowel Sign E ..Kannada Vowel Sign Ai + (0x00cca, 0x00ccd,), # Kannada Vowel Sign O ..Kannada Sign Virama + (0x00cd5, 0x00cd6,), # Kannada Length Mark ..Kannada Ai Length Mark (0x00ce2, 0x00ce3,), # Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal - (0x00d00, 0x00d01,), # Malayalam Sign Combining..Malayalam Sign Candrabin + (0x00d00, 0x00d03,), # Malayalam Sign Combining..Malayalam Sign Visarga (0x00d3b, 0x00d3c,), # Malayalam Sign Vertical ..Malayalam Sign Circular - (0x00d41, 0x00d44,), # Malayalam Vowel Sign U ..Malayalam Vowel Sign Voc - (0x00d4d, 0x00d4d,), # Malayalam Sign Virama + (0x00d3e, 0x00d44,), # Malayalam Vowel Sign Aa ..Malayalam Vowel Sign Voc + (0x00d46, 0x00d48,), # Malayalam Vowel Sign E ..Malayalam Vowel Sign Ai + (0x00d4a, 0x00d4d,), # Malayalam Vowel Sign O ..Malayalam Sign Virama + (0x00d57, 0x00d57,), # Malayalam Au Length Mark (0x00d62, 0x00d63,), # Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc - (0x00d81, 0x00d81,), # Sinhala Sign Candrabindu + (0x00d81, 0x00d83,), # Sinhala Sign Candrabindu..Sinhala Sign Visargaya (0x00dca, 0x00dca,), # Sinhala Sign Al-lakuna - (0x00dd2, 0x00dd4,), # Sinhala Vowel Sign Ketti..Sinhala Vowel Sign Ketti + (0x00dcf, 0x00dd4,), # Sinhala Vowel Sign Aela-..Sinhala Vowel Sign Ketti (0x00dd6, 0x00dd6,), # Sinhala Vowel Sign Diga Paa-pilla + (0x00dd8, 0x00ddf,), # Sinhala Vowel Sign Gaett..Sinhala Vowel Sign Gayan + (0x00df2, 0x00df3,), # Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga (0x00e31, 0x00e31,), # Thai Character Mai Han-akat (0x00e34, 0x00e3a,), # Thai Character Sara I ..Thai Character Phinthu (0x00e47, 0x00e4e,), # Thai Character Maitaikhu..Thai Character Yamakkan @@ -4011,79 +3934,60 @@ ZERO_WIDTH = { (0x00f35, 0x00f35,), # Tibetan Mark Ngas Bzung Nyi Zla (0x00f37, 0x00f37,), # Tibetan Mark Ngas Bzung Sgor Rtags (0x00f39, 0x00f39,), # Tibetan Mark Tsa -phru - (0x00f71, 0x00f7e,), # Tibetan Vowel Sign Aa ..Tibetan Sign Rjes Su Nga - (0x00f80, 0x00f84,), # Tibetan Vowel Sign Rever..Tibetan Mark Halanta + (0x00f3e, 0x00f3f,), # Tibetan Sign Yar Tshes ..Tibetan Sign Mar Tshes + (0x00f71, 0x00f84,), # Tibetan Vowel Sign Aa ..Tibetan Mark Halanta (0x00f86, 0x00f87,), # Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags (0x00f8d, 0x00f97,), # Tibetan Subjoined Sign L..Tibetan Subjoined Letter (0x00f99, 0x00fbc,), # Tibetan Subjoined Letter..Tibetan Subjoined Letter (0x00fc6, 0x00fc6,), # Tibetan Symbol Padma Gdan - (0x0102d, 0x01030,), # Myanmar Vowel Sign I ..Myanmar Vowel Sign Uu - (0x01032, 0x01037,), # Myanmar Vowel Sign Ai ..Myanmar Sign Dot Below - (0x01039, 0x0103a,), # Myanmar Sign Virama ..Myanmar Sign Asat - (0x0103d, 0x0103e,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M - (0x01058, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal + (0x0102b, 0x0103e,), # Myanmar Vowel Sign Tall ..Myanmar Consonant Sign M + (0x01056, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal (0x0105e, 0x01060,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M + (0x01062, 0x01064,), # Myanmar Vowel Sign Sgaw ..Myanmar Tone Mark Sgaw K + (0x01067, 0x0106d,), # Myanmar Vowel Sign Weste..Myanmar Sign Western Pwo (0x01071, 0x01074,), # Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah - (0x01082, 0x01082,), # Myanmar Consonant Sign Shan Medial Wa - (0x01085, 0x01086,), # Myanmar Vowel Sign Shan ..Myanmar Vowel Sign Shan - (0x0108d, 0x0108d,), # Myanmar Sign Shan Council Emphatic Tone - (0x0109d, 0x0109d,), # Myanmar Vowel Sign Aiton Ai + (0x01082, 0x0108d,), # Myanmar Consonant Sign S..Myanmar Sign Shan Counci + (0x0108f, 0x0108f,), # Myanmar Sign Rumai Palaung Tone-5 + (0x0109a, 0x0109d,), # Myanmar Sign Khamti Tone..Myanmar Vowel Sign Aiton (0x0135d, 0x0135f,), # Ethiopic Combining Gemin..Ethiopic Combining Gemin - (0x01712, 0x01714,), # Tagalog Vowel Sign I ..Tagalog Sign Virama - (0x01732, 0x01733,), # Hanunoo Vowel Sign I ..Hanunoo Vowel Sign U + (0x01712, 0x01715,), # Tagalog Vowel Sign I ..Tagalog Sign Pamudpod + (0x01732, 0x01734,), # Hanunoo Vowel Sign I ..Hanunoo Sign Pamudpod (0x01752, 0x01753,), # Buhid Vowel Sign I ..Buhid Vowel Sign U (0x01772, 0x01773,), # Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U - (0x017b4, 0x017b5,), # Khmer Vowel Inherent Aq ..Khmer Vowel Inherent Aa - (0x017b7, 0x017bd,), # Khmer Vowel Sign I ..Khmer Vowel Sign Ua - (0x017c6, 0x017c6,), # Khmer Sign Nikahit - (0x017c9, 0x017d3,), # Khmer Sign Muusikatoan ..Khmer Sign Bathamasat + (0x017b4, 0x017d3,), # Khmer Vowel Inherent Aq ..Khmer Sign Bathamasat (0x017dd, 0x017dd,), # Khmer Sign Atthacan - (0x0180b, 0x0180d,), # Mongolian Free Variation..Mongolian Free Variation - (0x0180f, 0x0180f,), # Mongolian Free Variation Selector Four + (0x0180b, 0x0180f,), # Mongolian Free Variation..Mongolian Free Variation (0x01885, 0x01886,), # Mongolian Letter Ali Gal..Mongolian Letter Ali Gal (0x018a9, 0x018a9,), # Mongolian Letter Ali Gali Dagalga - (0x01920, 0x01922,), # Limbu Vowel Sign A ..Limbu Vowel Sign U - (0x01927, 0x01928,), # Limbu Vowel Sign E ..Limbu Vowel Sign O - (0x01932, 0x01932,), # Limbu Small Letter Anusvara - (0x01939, 0x0193b,), # Limbu Sign Mukphreng ..Limbu Sign Sa-i - (0x01a17, 0x01a18,), # Buginese Vowel Sign I ..Buginese Vowel Sign U - (0x01a1b, 0x01a1b,), # Buginese Vowel Sign Ae - (0x01a56, 0x01a56,), # Tai Tham Consonant Sign Medial La - (0x01a58, 0x01a5e,), # Tai Tham Sign Mai Kang L..Tai Tham Consonant Sign - (0x01a60, 0x01a60,), # Tai Tham Sign Sakot - (0x01a62, 0x01a62,), # Tai Tham Vowel Sign Mai Sat - (0x01a65, 0x01a6c,), # Tai Tham Vowel Sign I ..Tai Tham Vowel Sign Oa B - (0x01a73, 0x01a7c,), # Tai Tham Vowel Sign Oa A..Tai Tham Sign Khuen-lue + (0x01920, 0x0192b,), # Limbu Vowel Sign A ..Limbu Subjoined Letter W + (0x01930, 0x0193b,), # Limbu Small Letter Ka ..Limbu Sign Sa-i + (0x01a17, 0x01a1b,), # Buginese Vowel Sign I ..Buginese Vowel Sign Ae + (0x01a55, 0x01a5e,), # Tai Tham Consonant Sign ..Tai Tham Consonant Sign + (0x01a60, 0x01a7c,), # Tai Tham Sign Sakot ..Tai Tham Sign Khuen-lue (0x01a7f, 0x01a7f,), # Tai Tham Combining Cryptogrammic Dot (0x01ab0, 0x01ace,), # Combining Doubled Circum..Combining Latin Small Le - (0x01b00, 0x01b03,), # Balinese Sign Ulu Ricem ..Balinese Sign Surang - (0x01b34, 0x01b34,), # Balinese Sign Rerekan - (0x01b36, 0x01b3a,), # Balinese Vowel Sign Ulu ..Balinese Vowel Sign Ra R - (0x01b3c, 0x01b3c,), # Balinese Vowel Sign La Lenga - (0x01b42, 0x01b42,), # Balinese Vowel Sign Pepet + (0x01b00, 0x01b04,), # Balinese Sign Ulu Ricem ..Balinese Sign Bisah + (0x01b34, 0x01b44,), # Balinese Sign Rerekan ..Balinese Adeg Adeg (0x01b6b, 0x01b73,), # Balinese Musical Symbol ..Balinese Musical Symbol - (0x01b80, 0x01b81,), # Sundanese Sign Panyecek ..Sundanese Sign Panglayar - (0x01ba2, 0x01ba5,), # Sundanese Consonant Sign..Sundanese Vowel Sign Pan - (0x01ba8, 0x01ba9,), # Sundanese Vowel Sign Pam..Sundanese Vowel Sign Pan - (0x01bab, 0x01bad,), # Sundanese Sign Virama ..Sundanese Consonant Sign - (0x01be6, 0x01be6,), # Batak Sign Tompi - (0x01be8, 0x01be9,), # Batak Vowel Sign Pakpak ..Batak Vowel Sign Ee - (0x01bed, 0x01bed,), # Batak Vowel Sign Karo O - (0x01bef, 0x01bf1,), # Batak Vowel Sign U For S..Batak Consonant Sign H - (0x01c2c, 0x01c33,), # Lepcha Vowel Sign E ..Lepcha Consonant Sign T - (0x01c36, 0x01c37,), # Lepcha Sign Ran ..Lepcha Sign Nukta + (0x01b80, 0x01b82,), # Sundanese Sign Panyecek ..Sundanese Sign Pangwisad + (0x01ba1, 0x01bad,), # Sundanese Consonant Sign..Sundanese Consonant Sign + (0x01be6, 0x01bf3,), # Batak Sign Tompi ..Batak Panongonan + (0x01c24, 0x01c37,), # Lepcha Subjoined Letter ..Lepcha Sign Nukta (0x01cd0, 0x01cd2,), # Vedic Tone Karshana ..Vedic Tone Prenkha - (0x01cd4, 0x01ce0,), # Vedic Sign Yajurvedic Mi..Vedic Tone Rigvedic Kash - (0x01ce2, 0x01ce8,), # Vedic Sign Visarga Svari..Vedic Sign Visarga Anuda + (0x01cd4, 0x01ce8,), # Vedic Sign Yajurvedic Mi..Vedic Sign Visarga Anuda (0x01ced, 0x01ced,), # Vedic Sign Tiryak (0x01cf4, 0x01cf4,), # Vedic Tone Candra Above - (0x01cf8, 0x01cf9,), # Vedic Tone Ring Above ..Vedic Tone Double Ring A + (0x01cf7, 0x01cf9,), # Vedic Sign Atikrama ..Vedic Tone Double Ring A (0x01dc0, 0x01dff,), # Combining Dotted Grave A..Combining Right Arrowhea + (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark + (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override + (0x02060, 0x02064,), # Word Joiner ..Invisible Plus + (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above (0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu (0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette - (0x0302a, 0x0302d,), # Ideographic Level Tone M..Ideographic Entering Ton + (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag (0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous (0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer @@ -4092,37 +3996,35 @@ ZERO_WIDTH = { (0x0a802, 0x0a802,), # Syloti Nagri Sign Dvisvara (0x0a806, 0x0a806,), # Syloti Nagri Sign Hasanta (0x0a80b, 0x0a80b,), # Syloti Nagri Sign Anusvara - (0x0a825, 0x0a826,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign + (0x0a823, 0x0a827,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign (0x0a82c, 0x0a82c,), # Syloti Nagri Sign Alternate Hasanta - (0x0a8c4, 0x0a8c5,), # Saurashtra Sign Virama ..Saurashtra Sign Candrabi + (0x0a880, 0x0a881,), # Saurashtra Sign Anusvara..Saurashtra Sign Visarga + (0x0a8b4, 0x0a8c5,), # Saurashtra Consonant Sig..Saurashtra Sign Candrabi (0x0a8e0, 0x0a8f1,), # Combining Devanagari Dig..Combining Devanagari Sig (0x0a8ff, 0x0a8ff,), # Devanagari Vowel Sign Ay (0x0a926, 0x0a92d,), # Kayah Li Vowel Ue ..Kayah Li Tone Calya Plop - (0x0a947, 0x0a951,), # Rejang Vowel Sign I ..Rejang Consonant Sign R - (0x0a980, 0x0a982,), # Javanese Sign Panyangga ..Javanese Sign Layar - (0x0a9b3, 0x0a9b3,), # Javanese Sign Cecak Telu - (0x0a9b6, 0x0a9b9,), # Javanese Vowel Sign Wulu..Javanese Vowel Sign Suku - (0x0a9bc, 0x0a9bd,), # Javanese Vowel Sign Pepe..Javanese Consonant Sign + (0x0a947, 0x0a953,), # Rejang Vowel Sign I ..Rejang Virama + (0x0a980, 0x0a983,), # Javanese Sign Panyangga ..Javanese Sign Wignyan + (0x0a9b3, 0x0a9c0,), # Javanese Sign Cecak Telu..Javanese Pangkon (0x0a9e5, 0x0a9e5,), # Myanmar Sign Shan Saw - (0x0aa29, 0x0aa2e,), # Cham Vowel Sign Aa ..Cham Vowel Sign Oe - (0x0aa31, 0x0aa32,), # Cham Vowel Sign Au ..Cham Vowel Sign Ue - (0x0aa35, 0x0aa36,), # Cham Consonant Sign La ..Cham Consonant Sign Wa + (0x0aa29, 0x0aa36,), # Cham Vowel Sign Aa ..Cham Consonant Sign Wa (0x0aa43, 0x0aa43,), # Cham Consonant Sign Final Ng - (0x0aa4c, 0x0aa4c,), # Cham Consonant Sign Final M - (0x0aa7c, 0x0aa7c,), # Myanmar Sign Tai Laing Tone-2 + (0x0aa4c, 0x0aa4d,), # Cham Consonant Sign Fina..Cham Consonant Sign Fina + (0x0aa7b, 0x0aa7d,), # Myanmar Sign Pao Karen T..Myanmar Sign Tai Laing T (0x0aab0, 0x0aab0,), # Tai Viet Mai Kang (0x0aab2, 0x0aab4,), # Tai Viet Vowel I ..Tai Viet Vowel U (0x0aab7, 0x0aab8,), # Tai Viet Mai Khit ..Tai Viet Vowel Ia (0x0aabe, 0x0aabf,), # Tai Viet Vowel Am ..Tai Viet Tone Mai Ek (0x0aac1, 0x0aac1,), # Tai Viet Tone Mai Tho - (0x0aaec, 0x0aaed,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign - (0x0aaf6, 0x0aaf6,), # Meetei Mayek Virama - (0x0abe5, 0x0abe5,), # Meetei Mayek Vowel Sign Anap - (0x0abe8, 0x0abe8,), # Meetei Mayek Vowel Sign Unap - (0x0abed, 0x0abed,), # Meetei Mayek Apun Iyek + (0x0aaeb, 0x0aaef,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0x0aaf5, 0x0aaf6,), # Meetei Mayek Vowel Sign ..Meetei Mayek Virama + (0x0abe3, 0x0abea,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0x0abec, 0x0abed,), # Meetei Mayek Lum Iyek ..Meetei Mayek Apun Iyek (0x0fb1e, 0x0fb1e,), # Hebrew Point Judeo-spanish Varika (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo + (0x0feff, 0x0feff,), # Zero Width No-break Space + (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke (0x102e0, 0x102e0,), # Coptic Epact Thousands Mark (0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let @@ -4136,97 +4038,86 @@ ZERO_WIDTH = { (0x10eab, 0x10eac,), # Yezidi Combining Hamza M..Yezidi Combining Madda M (0x10f46, 0x10f50,), # Sogdian Combining Dot Be..Sogdian Combining Stroke (0x10f82, 0x10f85,), # Old Uyghur Combining Dot..Old Uyghur Combining Two - (0x11001, 0x11001,), # Brahmi Sign Anusvara + (0x11000, 0x11002,), # Brahmi Sign Candrabindu ..Brahmi Sign Visarga (0x11038, 0x11046,), # Brahmi Vowel Sign Aa ..Brahmi Virama (0x11070, 0x11070,), # Brahmi Sign Old Tamil Virama (0x11073, 0x11074,), # Brahmi Vowel Sign Old Ta..Brahmi Vowel Sign Old Ta - (0x1107f, 0x11081,), # Brahmi Number Joiner ..Kaithi Sign Anusvara - (0x110b3, 0x110b6,), # Kaithi Vowel Sign U ..Kaithi Vowel Sign Ai - (0x110b9, 0x110ba,), # Kaithi Sign Virama ..Kaithi Sign Nukta + (0x1107f, 0x11082,), # Brahmi Number Joiner ..Kaithi Sign Visarga + (0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta + (0x110bd, 0x110bd,), # Kaithi Number Sign (0x110c2, 0x110c2,), # Kaithi Vowel Sign Vocalic R + (0x110cd, 0x110cd,), # Kaithi Number Sign Above (0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga - (0x11127, 0x1112b,), # Chakma Vowel Sign A ..Chakma Vowel Sign Uu - (0x1112d, 0x11134,), # Chakma Vowel Sign Ai ..Chakma Maayyaa + (0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa + (0x11145, 0x11146,), # Chakma Vowel Sign Aa ..Chakma Vowel Sign Ei (0x11173, 0x11173,), # Mahajani Sign Nukta - (0x11180, 0x11181,), # Sharada Sign Candrabindu..Sharada Sign Anusvara - (0x111b6, 0x111be,), # Sharada Vowel Sign U ..Sharada Vowel Sign O + (0x11180, 0x11182,), # Sharada Sign Candrabindu..Sharada Sign Visarga + (0x111b3, 0x111c0,), # Sharada Vowel Sign Aa ..Sharada Sign Virama (0x111c9, 0x111cc,), # Sharada Sandhi Mark ..Sharada Extra Short Vowe - (0x111cf, 0x111cf,), # Sharada Sign Inverted Candrabindu - (0x1122f, 0x11231,), # Khojki Vowel Sign U ..Khojki Vowel Sign Ai - (0x11234, 0x11234,), # Khojki Sign Anusvara - (0x11236, 0x11237,), # Khojki Sign Nukta ..Khojki Sign Shadda + (0x111ce, 0x111cf,), # Sharada Vowel Sign Prish..Sharada Sign Inverted Ca + (0x1122c, 0x11237,), # Khojki Vowel Sign Aa ..Khojki Sign Shadda (0x1123e, 0x1123e,), # Khojki Sign Sukun - (0x112df, 0x112df,), # Khudawadi Sign Anusvara - (0x112e3, 0x112ea,), # Khudawadi Vowel Sign U ..Khudawadi Sign Virama - (0x11300, 0x11301,), # Grantha Sign Combining A..Grantha Sign Candrabindu + (0x112df, 0x112ea,), # Khudawadi Sign Anusvara ..Khudawadi Sign Virama + (0x11300, 0x11303,), # Grantha Sign Combining A..Grantha Sign Visarga (0x1133b, 0x1133c,), # Combining Bindu Below ..Grantha Sign Nukta - (0x11340, 0x11340,), # Grantha Vowel Sign Ii + (0x1133e, 0x11344,), # Grantha Vowel Sign Aa ..Grantha Vowel Sign Vocal + (0x11347, 0x11348,), # Grantha Vowel Sign Ee ..Grantha Vowel Sign Ai + (0x1134b, 0x1134d,), # Grantha Vowel Sign Oo ..Grantha Sign Virama + (0x11357, 0x11357,), # Grantha Au Length Mark + (0x11362, 0x11363,), # Grantha Vowel Sign Vocal..Grantha Vowel Sign Vocal (0x11366, 0x1136c,), # Combining Grantha Digit ..Combining Grantha Digit (0x11370, 0x11374,), # Combining Grantha Letter..Combining Grantha Letter - (0x11438, 0x1143f,), # Newa Vowel Sign U ..Newa Vowel Sign Ai - (0x11442, 0x11444,), # Newa Sign Virama ..Newa Sign Anusvara - (0x11446, 0x11446,), # Newa Sign Nukta + (0x11435, 0x11446,), # Newa Vowel Sign Aa ..Newa Sign Nukta (0x1145e, 0x1145e,), # Newa Sandhi Mark - (0x114b3, 0x114b8,), # Tirhuta Vowel Sign U ..Tirhuta Vowel Sign Vocal - (0x114ba, 0x114ba,), # Tirhuta Vowel Sign Short E - (0x114bf, 0x114c0,), # Tirhuta Sign Candrabindu..Tirhuta Sign Anusvara - (0x114c2, 0x114c3,), # Tirhuta Sign Virama ..Tirhuta Sign Nukta - (0x115b2, 0x115b5,), # Siddham Vowel Sign U ..Siddham Vowel Sign Vocal - (0x115bc, 0x115bd,), # Siddham Sign Candrabindu..Siddham Sign Anusvara - (0x115bf, 0x115c0,), # Siddham Sign Virama ..Siddham Sign Nukta + (0x114b0, 0x114c3,), # Tirhuta Vowel Sign Aa ..Tirhuta Sign Nukta + (0x115af, 0x115b5,), # Siddham Vowel Sign Aa ..Siddham Vowel Sign Vocal + (0x115b8, 0x115c0,), # Siddham Vowel Sign E ..Siddham Sign Nukta (0x115dc, 0x115dd,), # Siddham Vowel Sign Alter..Siddham Vowel Sign Alter - (0x11633, 0x1163a,), # Modi Vowel Sign U ..Modi Vowel Sign Ai - (0x1163d, 0x1163d,), # Modi Sign Anusvara - (0x1163f, 0x11640,), # Modi Sign Virama ..Modi Sign Ardhacandra - (0x116ab, 0x116ab,), # Takri Sign Anusvara - (0x116ad, 0x116ad,), # Takri Vowel Sign Aa - (0x116b0, 0x116b5,), # Takri Vowel Sign U ..Takri Vowel Sign Au - (0x116b7, 0x116b7,), # Takri Sign Nukta - (0x1171d, 0x1171f,), # Ahom Consonant Sign Medi..Ahom Consonant Sign Medi - (0x11722, 0x11725,), # Ahom Vowel Sign I ..Ahom Vowel Sign Uu - (0x11727, 0x1172b,), # Ahom Vowel Sign Aw ..Ahom Sign Killer - (0x1182f, 0x11837,), # Dogra Vowel Sign U ..Dogra Sign Anusvara - (0x11839, 0x1183a,), # Dogra Sign Virama ..Dogra Sign Nukta - (0x1193b, 0x1193c,), # Dives Akuru Sign Anusvar..Dives Akuru Sign Candrab - (0x1193e, 0x1193e,), # Dives Akuru Virama - (0x11943, 0x11943,), # Dives Akuru Sign Nukta - (0x119d4, 0x119d7,), # Nandinagari Vowel Sign U..Nandinagari Vowel Sign V - (0x119da, 0x119db,), # Nandinagari Vowel Sign E..Nandinagari Vowel Sign A - (0x119e0, 0x119e0,), # Nandinagari Sign Virama + (0x11630, 0x11640,), # Modi Vowel Sign Aa ..Modi Sign Ardhacandra + (0x116ab, 0x116b7,), # Takri Sign Anusvara ..Takri Sign Nukta + (0x1171d, 0x1172b,), # Ahom Consonant Sign Medi..Ahom Sign Killer + (0x1182c, 0x1183a,), # Dogra Vowel Sign Aa ..Dogra Sign Nukta + (0x11930, 0x11935,), # Dives Akuru Vowel Sign A..Dives Akuru Vowel Sign E + (0x11937, 0x11938,), # Dives Akuru Vowel Sign A..Dives Akuru Vowel Sign O + (0x1193b, 0x1193e,), # Dives Akuru Sign Anusvar..Dives Akuru Virama + (0x11940, 0x11940,), # Dives Akuru Medial Ya + (0x11942, 0x11943,), # Dives Akuru Medial Ra ..Dives Akuru Sign Nukta + (0x119d1, 0x119d7,), # Nandinagari Vowel Sign A..Nandinagari Vowel Sign V + (0x119da, 0x119e0,), # Nandinagari Vowel Sign E..Nandinagari Sign Virama + (0x119e4, 0x119e4,), # Nandinagari Vowel Sign Prishthamatra E (0x11a01, 0x11a0a,), # Zanabazar Square Vowel S..Zanabazar Square Vowel L - (0x11a33, 0x11a38,), # Zanabazar Square Final C..Zanabazar Square Sign An + (0x11a33, 0x11a39,), # Zanabazar Square Final C..Zanabazar Square Sign Vi (0x11a3b, 0x11a3e,), # Zanabazar Square Cluster..Zanabazar Square Cluster (0x11a47, 0x11a47,), # Zanabazar Square Subjoiner - (0x11a51, 0x11a56,), # Soyombo Vowel Sign I ..Soyombo Vowel Sign Oe - (0x11a59, 0x11a5b,), # Soyombo Vowel Sign Vocal..Soyombo Vowel Length Mar - (0x11a8a, 0x11a96,), # Soyombo Final Consonant ..Soyombo Sign Anusvara - (0x11a98, 0x11a99,), # Soyombo Gemination Mark ..Soyombo Subjoiner - (0x11c30, 0x11c36,), # Bhaiksuki Vowel Sign I ..Bhaiksuki Vowel Sign Voc - (0x11c38, 0x11c3d,), # Bhaiksuki Vowel Sign E ..Bhaiksuki Sign Anusvara - (0x11c3f, 0x11c3f,), # Bhaiksuki Sign Virama + (0x11a51, 0x11a5b,), # Soyombo Vowel Sign I ..Soyombo Vowel Length Mar + (0x11a8a, 0x11a99,), # Soyombo Final Consonant ..Soyombo Subjoiner + (0x11c2f, 0x11c36,), # Bhaiksuki Vowel Sign Aa ..Bhaiksuki Vowel Sign Voc + (0x11c38, 0x11c3f,), # Bhaiksuki Vowel Sign E ..Bhaiksuki Sign Virama (0x11c92, 0x11ca7,), # Marchen Subjoined Letter..Marchen Subjoined Letter - (0x11caa, 0x11cb0,), # Marchen Subjoined Letter..Marchen Vowel Sign Aa - (0x11cb2, 0x11cb3,), # Marchen Vowel Sign U ..Marchen Vowel Sign E - (0x11cb5, 0x11cb6,), # Marchen Sign Anusvara ..Marchen Sign Candrabindu + (0x11ca9, 0x11cb6,), # Marchen Subjoined Letter..Marchen Sign Candrabindu (0x11d31, 0x11d36,), # Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign (0x11d3a, 0x11d3a,), # Masaram Gondi Vowel Sign E (0x11d3c, 0x11d3d,), # Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign (0x11d3f, 0x11d45,), # Masaram Gondi Vowel Sign..Masaram Gondi Virama (0x11d47, 0x11d47,), # Masaram Gondi Ra-kara + (0x11d8a, 0x11d8e,), # Gunjala Gondi Vowel Sign..Gunjala Gondi Vowel Sign (0x11d90, 0x11d91,), # Gunjala Gondi Vowel Sign..Gunjala Gondi Vowel Sign - (0x11d95, 0x11d95,), # Gunjala Gondi Sign Anusvara - (0x11d97, 0x11d97,), # Gunjala Gondi Virama - (0x11ef3, 0x11ef4,), # Makasar Vowel Sign I ..Makasar Vowel Sign U + (0x11d93, 0x11d97,), # Gunjala Gondi Vowel Sign..Gunjala Gondi Virama + (0x11ef3, 0x11ef6,), # Makasar Vowel Sign I ..Makasar Vowel Sign O + (0x13430, 0x13438,), # Egyptian Hieroglyph Vert..Egyptian Hieroglyph End (0x16af0, 0x16af4,), # Bassa Vah Combining High..Bassa Vah Combining High (0x16b30, 0x16b36,), # Pahawh Hmong Mark Cim Tu..Pahawh Hmong Mark Cim Ta (0x16f4f, 0x16f4f,), # Miao Sign Consonant Modifier Bar + (0x16f51, 0x16f87,), # Miao Sign Aspiration ..Miao Vowel Sign Ui (0x16f8f, 0x16f92,), # Miao Tone Right ..Miao Tone Below (0x16fe4, 0x16fe4,), # Khitan Small Script Filler + (0x16ff0, 0x16ff1,), # Vietnamese Alternate Rea..Vietnamese Alternate Rea (0x1bc9d, 0x1bc9e,), # Duployan Thick Letter Se..Duployan Double Mark + (0x1bca0, 0x1bca3,), # Shorthand Format Letter ..Shorthand Format Up Step (0x1cf00, 0x1cf2d,), # Znamenny Combining Mark ..Znamenny Combining Mark (0x1cf30, 0x1cf46,), # Znamenny Combining Tonal..Znamenny Priznak Modifie - (0x1d167, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining - (0x1d17b, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d165, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d16d, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical @@ -4246,12 +4137,17 @@ ZERO_WIDTH = { (0x1e2ec, 0x1e2ef,), # Wancho Tone Tup ..Wancho Tone Koini (0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining (0x1e944, 0x1e94a,), # Adlam Alif Lengthener ..Adlam Nukta + (0x1f3fb, 0x1f3ff,), # Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri + (0xe0001, 0xe0001,), # Language Tag + (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 ), '15.0.0': ( # Source: DerivedGeneralCategory-15.0.0.txt # Date: 2022-04-26, 23:14:35 GMT # + (0x00000, 0x00000,), # (nil) + (0x000ad, 0x000ad,), # Soft Hyphen (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg @@ -4259,13 +4155,16 @@ ZERO_WIDTH = { (0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot (0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot (0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan + (0x00600, 0x00605,), # Arabic Number Sign ..Arabic Number Mark Above (0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra + (0x0061c, 0x0061c,), # Arabic Letter Mark (0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below (0x00670, 0x00670,), # Arabic Letter Superscript Alef - (0x006d6, 0x006dc,), # Arabic Small High Ligatu..Arabic Small High Seen + (0x006d6, 0x006dd,), # Arabic Small High Ligatu..Arabic End Of Ayah (0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda (0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon (0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem + (0x0070f, 0x0070f,), # Syriac Abbreviation Mark (0x00711, 0x00711,), # Syriac Letter Superscript Alaph (0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh (0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun @@ -4276,152 +4175,470 @@ ZERO_WIDTH = { (0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U (0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa (0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark + (0x00890, 0x00891,), # Arabic Pound Mark Above ..Arabic Piastre Mark Abov (0x00898, 0x0089f,), # Arabic Small High Word A..Arabic Half Madda Over M - (0x008ca, 0x008e1,), # Arabic Small High Farsi ..Arabic Small High Sign S - (0x008e3, 0x00902,), # Arabic Turned Damma Belo..Devanagari Sign Anusvara - (0x0093a, 0x0093a,), # Devanagari Vowel Sign Oe - (0x0093c, 0x0093c,), # Devanagari Sign Nukta - (0x00941, 0x00948,), # Devanagari Vowel Sign U ..Devanagari Vowel Sign Ai - (0x0094d, 0x0094d,), # Devanagari Sign Virama + (0x008ca, 0x00903,), # Arabic Small High Farsi ..Devanagari Sign Visarga + (0x0093a, 0x0093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta + (0x0093e, 0x0094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw (0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu (0x00962, 0x00963,), # Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo - (0x00981, 0x00981,), # Bengali Sign Candrabindu + (0x00981, 0x00983,), # Bengali Sign Candrabindu..Bengali Sign Visarga (0x009bc, 0x009bc,), # Bengali Sign Nukta - (0x009c1, 0x009c4,), # Bengali Vowel Sign U ..Bengali Vowel Sign Vocal - (0x009cd, 0x009cd,), # Bengali Sign Virama + (0x009be, 0x009c4,), # Bengali Vowel Sign Aa ..Bengali Vowel Sign Vocal + (0x009c7, 0x009c8,), # Bengali Vowel Sign E ..Bengali Vowel Sign Ai + (0x009cb, 0x009cd,), # Bengali Vowel Sign O ..Bengali Sign Virama + (0x009d7, 0x009d7,), # Bengali Au Length Mark (0x009e2, 0x009e3,), # Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal (0x009fe, 0x009fe,), # Bengali Sandhi Mark - (0x00a01, 0x00a02,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Bindi + (0x00a01, 0x00a03,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Visarga (0x00a3c, 0x00a3c,), # Gurmukhi Sign Nukta - (0x00a41, 0x00a42,), # Gurmukhi Vowel Sign U ..Gurmukhi Vowel Sign Uu + (0x00a3e, 0x00a42,), # Gurmukhi Vowel Sign Aa ..Gurmukhi Vowel Sign Uu (0x00a47, 0x00a48,), # Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai (0x00a4b, 0x00a4d,), # Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama (0x00a51, 0x00a51,), # Gurmukhi Sign Udaat (0x00a70, 0x00a71,), # Gurmukhi Tippi ..Gurmukhi Addak (0x00a75, 0x00a75,), # Gurmukhi Sign Yakash - (0x00a81, 0x00a82,), # Gujarati Sign Candrabind..Gujarati Sign Anusvara + (0x00a81, 0x00a83,), # Gujarati Sign Candrabind..Gujarati Sign Visarga (0x00abc, 0x00abc,), # Gujarati Sign Nukta - (0x00ac1, 0x00ac5,), # Gujarati Vowel Sign U ..Gujarati Vowel Sign Cand - (0x00ac7, 0x00ac8,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Ai - (0x00acd, 0x00acd,), # Gujarati Sign Virama + (0x00abe, 0x00ac5,), # Gujarati Vowel Sign Aa ..Gujarati Vowel Sign Cand + (0x00ac7, 0x00ac9,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Cand + (0x00acb, 0x00acd,), # Gujarati Vowel Sign O ..Gujarati Sign Virama (0x00ae2, 0x00ae3,), # Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca (0x00afa, 0x00aff,), # Gujarati Sign Sukun ..Gujarati Sign Two-circle - (0x00b01, 0x00b01,), # Oriya Sign Candrabindu + (0x00b01, 0x00b03,), # Oriya Sign Candrabindu ..Oriya Sign Visarga (0x00b3c, 0x00b3c,), # Oriya Sign Nukta - (0x00b3f, 0x00b3f,), # Oriya Vowel Sign I - (0x00b41, 0x00b44,), # Oriya Vowel Sign U ..Oriya Vowel Sign Vocalic - (0x00b4d, 0x00b4d,), # Oriya Sign Virama - (0x00b55, 0x00b56,), # Oriya Sign Overline ..Oriya Ai Length Mark + (0x00b3e, 0x00b44,), # Oriya Vowel Sign Aa ..Oriya Vowel Sign Vocalic + (0x00b47, 0x00b48,), # Oriya Vowel Sign E ..Oriya Vowel Sign Ai + (0x00b4b, 0x00b4d,), # Oriya Vowel Sign O ..Oriya Sign Virama + (0x00b55, 0x00b57,), # Oriya Sign Overline ..Oriya Au Length Mark (0x00b62, 0x00b63,), # Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic (0x00b82, 0x00b82,), # Tamil Sign Anusvara - (0x00bc0, 0x00bc0,), # Tamil Vowel Sign Ii - (0x00bcd, 0x00bcd,), # Tamil Sign Virama - (0x00c00, 0x00c00,), # Telugu Sign Combining Candrabindu Above - (0x00c04, 0x00c04,), # Telugu Sign Combining Anusvara Above + (0x00bbe, 0x00bc2,), # Tamil Vowel Sign Aa ..Tamil Vowel Sign Uu + (0x00bc6, 0x00bc8,), # Tamil Vowel Sign E ..Tamil Vowel Sign Ai + (0x00bca, 0x00bcd,), # Tamil Vowel Sign O ..Tamil Sign Virama + (0x00bd7, 0x00bd7,), # Tamil Au Length Mark + (0x00c00, 0x00c04,), # Telugu Sign Combining Ca..Telugu Sign Combining An (0x00c3c, 0x00c3c,), # Telugu Sign Nukta - (0x00c3e, 0x00c40,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Ii + (0x00c3e, 0x00c44,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Vocali (0x00c46, 0x00c48,), # Telugu Vowel Sign E ..Telugu Vowel Sign Ai (0x00c4a, 0x00c4d,), # Telugu Vowel Sign O ..Telugu Sign Virama (0x00c55, 0x00c56,), # Telugu Length Mark ..Telugu Ai Length Mark (0x00c62, 0x00c63,), # Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali - (0x00c81, 0x00c81,), # Kannada Sign Candrabindu + (0x00c81, 0x00c83,), # Kannada Sign Candrabindu..Kannada Sign Visarga (0x00cbc, 0x00cbc,), # Kannada Sign Nukta - (0x00cbf, 0x00cbf,), # Kannada Vowel Sign I - (0x00cc6, 0x00cc6,), # Kannada Vowel Sign E - (0x00ccc, 0x00ccd,), # Kannada Vowel Sign Au ..Kannada Sign Virama + (0x00cbe, 0x00cc4,), # Kannada Vowel Sign Aa ..Kannada Vowel Sign Vocal + (0x00cc6, 0x00cc8,), # Kannada Vowel Sign E ..Kannada Vowel Sign Ai + (0x00cca, 0x00ccd,), # Kannada Vowel Sign O ..Kannada Sign Virama + (0x00cd5, 0x00cd6,), # Kannada Length Mark ..Kannada Ai Length Mark (0x00ce2, 0x00ce3,), # Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal - (0x00d00, 0x00d01,), # Malayalam Sign Combining..Malayalam Sign Candrabin + (0x00cf3, 0x00cf3,), # Kannada Sign Combining Anusvara Above Right + (0x00d00, 0x00d03,), # Malayalam Sign Combining..Malayalam Sign Visarga (0x00d3b, 0x00d3c,), # Malayalam Sign Vertical ..Malayalam Sign Circular - (0x00d41, 0x00d44,), # Malayalam Vowel Sign U ..Malayalam Vowel Sign Voc - (0x00d4d, 0x00d4d,), # Malayalam Sign Virama + (0x00d3e, 0x00d44,), # Malayalam Vowel Sign Aa ..Malayalam Vowel Sign Voc + (0x00d46, 0x00d48,), # Malayalam Vowel Sign E ..Malayalam Vowel Sign Ai + (0x00d4a, 0x00d4d,), # Malayalam Vowel Sign O ..Malayalam Sign Virama + (0x00d57, 0x00d57,), # Malayalam Au Length Mark (0x00d62, 0x00d63,), # Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc - (0x00d81, 0x00d81,), # Sinhala Sign Candrabindu + (0x00d81, 0x00d83,), # Sinhala Sign Candrabindu..Sinhala Sign Visargaya (0x00dca, 0x00dca,), # Sinhala Sign Al-lakuna - (0x00dd2, 0x00dd4,), # Sinhala Vowel Sign Ketti..Sinhala Vowel Sign Ketti + (0x00dcf, 0x00dd4,), # Sinhala Vowel Sign Aela-..Sinhala Vowel Sign Ketti (0x00dd6, 0x00dd6,), # Sinhala Vowel Sign Diga Paa-pilla + (0x00dd8, 0x00ddf,), # Sinhala Vowel Sign Gaett..Sinhala Vowel Sign Gayan + (0x00df2, 0x00df3,), # Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga (0x00e31, 0x00e31,), # Thai Character Mai Han-akat (0x00e34, 0x00e3a,), # Thai Character Sara I ..Thai Character Phinthu (0x00e47, 0x00e4e,), # Thai Character Maitaikhu..Thai Character Yamakkan (0x00eb1, 0x00eb1,), # Lao Vowel Sign Mai Kan (0x00eb4, 0x00ebc,), # Lao Vowel Sign I ..Lao Semivowel Sign Lo - (0x00ec8, 0x00ece,), # Lao Tone Mai Ek ..(nil) + (0x00ec8, 0x00ece,), # Lao Tone Mai Ek ..Lao Yamakkan (0x00f18, 0x00f19,), # Tibetan Astrological Sig..Tibetan Astrological Sig (0x00f35, 0x00f35,), # Tibetan Mark Ngas Bzung Nyi Zla (0x00f37, 0x00f37,), # Tibetan Mark Ngas Bzung Sgor Rtags (0x00f39, 0x00f39,), # Tibetan Mark Tsa -phru - (0x00f71, 0x00f7e,), # Tibetan Vowel Sign Aa ..Tibetan Sign Rjes Su Nga - (0x00f80, 0x00f84,), # Tibetan Vowel Sign Rever..Tibetan Mark Halanta + (0x00f3e, 0x00f3f,), # Tibetan Sign Yar Tshes ..Tibetan Sign Mar Tshes + (0x00f71, 0x00f84,), # Tibetan Vowel Sign Aa ..Tibetan Mark Halanta (0x00f86, 0x00f87,), # Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags (0x00f8d, 0x00f97,), # Tibetan Subjoined Sign L..Tibetan Subjoined Letter (0x00f99, 0x00fbc,), # Tibetan Subjoined Letter..Tibetan Subjoined Letter (0x00fc6, 0x00fc6,), # Tibetan Symbol Padma Gdan - (0x0102d, 0x01030,), # Myanmar Vowel Sign I ..Myanmar Vowel Sign Uu - (0x01032, 0x01037,), # Myanmar Vowel Sign Ai ..Myanmar Sign Dot Below - (0x01039, 0x0103a,), # Myanmar Sign Virama ..Myanmar Sign Asat - (0x0103d, 0x0103e,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M - (0x01058, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal + (0x0102b, 0x0103e,), # Myanmar Vowel Sign Tall ..Myanmar Consonant Sign M + (0x01056, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal (0x0105e, 0x01060,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M + (0x01062, 0x01064,), # Myanmar Vowel Sign Sgaw ..Myanmar Tone Mark Sgaw K + (0x01067, 0x0106d,), # Myanmar Vowel Sign Weste..Myanmar Sign Western Pwo (0x01071, 0x01074,), # Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah - (0x01082, 0x01082,), # Myanmar Consonant Sign Shan Medial Wa - (0x01085, 0x01086,), # Myanmar Vowel Sign Shan ..Myanmar Vowel Sign Shan - (0x0108d, 0x0108d,), # Myanmar Sign Shan Council Emphatic Tone - (0x0109d, 0x0109d,), # Myanmar Vowel Sign Aiton Ai + (0x01082, 0x0108d,), # Myanmar Consonant Sign S..Myanmar Sign Shan Counci + (0x0108f, 0x0108f,), # Myanmar Sign Rumai Palaung Tone-5 + (0x0109a, 0x0109d,), # Myanmar Sign Khamti Tone..Myanmar Vowel Sign Aiton (0x0135d, 0x0135f,), # Ethiopic Combining Gemin..Ethiopic Combining Gemin - (0x01712, 0x01714,), # Tagalog Vowel Sign I ..Tagalog Sign Virama - (0x01732, 0x01733,), # Hanunoo Vowel Sign I ..Hanunoo Vowel Sign U + (0x01712, 0x01715,), # Tagalog Vowel Sign I ..Tagalog Sign Pamudpod + (0x01732, 0x01734,), # Hanunoo Vowel Sign I ..Hanunoo Sign Pamudpod (0x01752, 0x01753,), # Buhid Vowel Sign I ..Buhid Vowel Sign U (0x01772, 0x01773,), # Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U - (0x017b4, 0x017b5,), # Khmer Vowel Inherent Aq ..Khmer Vowel Inherent Aa - (0x017b7, 0x017bd,), # Khmer Vowel Sign I ..Khmer Vowel Sign Ua - (0x017c6, 0x017c6,), # Khmer Sign Nikahit - (0x017c9, 0x017d3,), # Khmer Sign Muusikatoan ..Khmer Sign Bathamasat + (0x017b4, 0x017d3,), # Khmer Vowel Inherent Aq ..Khmer Sign Bathamasat (0x017dd, 0x017dd,), # Khmer Sign Atthacan - (0x0180b, 0x0180d,), # Mongolian Free Variation..Mongolian Free Variation - (0x0180f, 0x0180f,), # Mongolian Free Variation Selector Four + (0x0180b, 0x0180f,), # Mongolian Free Variation..Mongolian Free Variation (0x01885, 0x01886,), # Mongolian Letter Ali Gal..Mongolian Letter Ali Gal (0x018a9, 0x018a9,), # Mongolian Letter Ali Gali Dagalga - (0x01920, 0x01922,), # Limbu Vowel Sign A ..Limbu Vowel Sign U - (0x01927, 0x01928,), # Limbu Vowel Sign E ..Limbu Vowel Sign O - (0x01932, 0x01932,), # Limbu Small Letter Anusvara - (0x01939, 0x0193b,), # Limbu Sign Mukphreng ..Limbu Sign Sa-i - (0x01a17, 0x01a18,), # Buginese Vowel Sign I ..Buginese Vowel Sign U - (0x01a1b, 0x01a1b,), # Buginese Vowel Sign Ae - (0x01a56, 0x01a56,), # Tai Tham Consonant Sign Medial La - (0x01a58, 0x01a5e,), # Tai Tham Sign Mai Kang L..Tai Tham Consonant Sign - (0x01a60, 0x01a60,), # Tai Tham Sign Sakot - (0x01a62, 0x01a62,), # Tai Tham Vowel Sign Mai Sat - (0x01a65, 0x01a6c,), # Tai Tham Vowel Sign I ..Tai Tham Vowel Sign Oa B - (0x01a73, 0x01a7c,), # Tai Tham Vowel Sign Oa A..Tai Tham Sign Khuen-lue + (0x01920, 0x0192b,), # Limbu Vowel Sign A ..Limbu Subjoined Letter W + (0x01930, 0x0193b,), # Limbu Small Letter Ka ..Limbu Sign Sa-i + (0x01a17, 0x01a1b,), # Buginese Vowel Sign I ..Buginese Vowel Sign Ae + (0x01a55, 0x01a5e,), # Tai Tham Consonant Sign ..Tai Tham Consonant Sign + (0x01a60, 0x01a7c,), # Tai Tham Sign Sakot ..Tai Tham Sign Khuen-lue (0x01a7f, 0x01a7f,), # Tai Tham Combining Cryptogrammic Dot (0x01ab0, 0x01ace,), # Combining Doubled Circum..Combining Latin Small Le - (0x01b00, 0x01b03,), # Balinese Sign Ulu Ricem ..Balinese Sign Surang - (0x01b34, 0x01b34,), # Balinese Sign Rerekan - (0x01b36, 0x01b3a,), # Balinese Vowel Sign Ulu ..Balinese Vowel Sign Ra R - (0x01b3c, 0x01b3c,), # Balinese Vowel Sign La Lenga - (0x01b42, 0x01b42,), # Balinese Vowel Sign Pepet + (0x01b00, 0x01b04,), # Balinese Sign Ulu Ricem ..Balinese Sign Bisah + (0x01b34, 0x01b44,), # Balinese Sign Rerekan ..Balinese Adeg Adeg (0x01b6b, 0x01b73,), # Balinese Musical Symbol ..Balinese Musical Symbol - (0x01b80, 0x01b81,), # Sundanese Sign Panyecek ..Sundanese Sign Panglayar - (0x01ba2, 0x01ba5,), # Sundanese Consonant Sign..Sundanese Vowel Sign Pan - (0x01ba8, 0x01ba9,), # Sundanese Vowel Sign Pam..Sundanese Vowel Sign Pan - (0x01bab, 0x01bad,), # Sundanese Sign Virama ..Sundanese Consonant Sign - (0x01be6, 0x01be6,), # Batak Sign Tompi - (0x01be8, 0x01be9,), # Batak Vowel Sign Pakpak ..Batak Vowel Sign Ee - (0x01bed, 0x01bed,), # Batak Vowel Sign Karo O - (0x01bef, 0x01bf1,), # Batak Vowel Sign U For S..Batak Consonant Sign H - (0x01c2c, 0x01c33,), # Lepcha Vowel Sign E ..Lepcha Consonant Sign T - (0x01c36, 0x01c37,), # Lepcha Sign Ran ..Lepcha Sign Nukta + (0x01b80, 0x01b82,), # Sundanese Sign Panyecek ..Sundanese Sign Pangwisad + (0x01ba1, 0x01bad,), # Sundanese Consonant Sign..Sundanese Consonant Sign + (0x01be6, 0x01bf3,), # Batak Sign Tompi ..Batak Panongonan + (0x01c24, 0x01c37,), # Lepcha Subjoined Letter ..Lepcha Sign Nukta (0x01cd0, 0x01cd2,), # Vedic Tone Karshana ..Vedic Tone Prenkha - (0x01cd4, 0x01ce0,), # Vedic Sign Yajurvedic Mi..Vedic Tone Rigvedic Kash - (0x01ce2, 0x01ce8,), # Vedic Sign Visarga Svari..Vedic Sign Visarga Anuda + (0x01cd4, 0x01ce8,), # Vedic Sign Yajurvedic Mi..Vedic Sign Visarga Anuda (0x01ced, 0x01ced,), # Vedic Sign Tiryak (0x01cf4, 0x01cf4,), # Vedic Tone Candra Above - (0x01cf8, 0x01cf9,), # Vedic Tone Ring Above ..Vedic Tone Double Ring A + (0x01cf7, 0x01cf9,), # Vedic Sign Atikrama ..Vedic Tone Double Ring A + (0x01dc0, 0x01dff,), # Combining Dotted Grave A..Combining Right Arrowhea + (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark + (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override + (0x02060, 0x02064,), # Word Joiner ..Invisible Plus + (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes + (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above + (0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu + (0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner + (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette + (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M + (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag + (0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous + (0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer + (0x0a69e, 0x0a69f,), # Combining Cyrillic Lette..Combining Cyrillic Lette + (0x0a6f0, 0x0a6f1,), # Bamum Combining Mark Koq..Bamum Combining Mark Tuk + (0x0a802, 0x0a802,), # Syloti Nagri Sign Dvisvara + (0x0a806, 0x0a806,), # Syloti Nagri Sign Hasanta + (0x0a80b, 0x0a80b,), # Syloti Nagri Sign Anusvara + (0x0a823, 0x0a827,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign + (0x0a82c, 0x0a82c,), # Syloti Nagri Sign Alternate Hasanta + (0x0a880, 0x0a881,), # Saurashtra Sign Anusvara..Saurashtra Sign Visarga + (0x0a8b4, 0x0a8c5,), # Saurashtra Consonant Sig..Saurashtra Sign Candrabi + (0x0a8e0, 0x0a8f1,), # Combining Devanagari Dig..Combining Devanagari Sig + (0x0a8ff, 0x0a8ff,), # Devanagari Vowel Sign Ay + (0x0a926, 0x0a92d,), # Kayah Li Vowel Ue ..Kayah Li Tone Calya Plop + (0x0a947, 0x0a953,), # Rejang Vowel Sign I ..Rejang Virama + (0x0a980, 0x0a983,), # Javanese Sign Panyangga ..Javanese Sign Wignyan + (0x0a9b3, 0x0a9c0,), # Javanese Sign Cecak Telu..Javanese Pangkon + (0x0a9e5, 0x0a9e5,), # Myanmar Sign Shan Saw + (0x0aa29, 0x0aa36,), # Cham Vowel Sign Aa ..Cham Consonant Sign Wa + (0x0aa43, 0x0aa43,), # Cham Consonant Sign Final Ng + (0x0aa4c, 0x0aa4d,), # Cham Consonant Sign Fina..Cham Consonant Sign Fina + (0x0aa7b, 0x0aa7d,), # Myanmar Sign Pao Karen T..Myanmar Sign Tai Laing T + (0x0aab0, 0x0aab0,), # Tai Viet Mai Kang + (0x0aab2, 0x0aab4,), # Tai Viet Vowel I ..Tai Viet Vowel U + (0x0aab7, 0x0aab8,), # Tai Viet Mai Khit ..Tai Viet Vowel Ia + (0x0aabe, 0x0aabf,), # Tai Viet Vowel Am ..Tai Viet Tone Mai Ek + (0x0aac1, 0x0aac1,), # Tai Viet Tone Mai Tho + (0x0aaeb, 0x0aaef,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0x0aaf5, 0x0aaf6,), # Meetei Mayek Vowel Sign ..Meetei Mayek Virama + (0x0abe3, 0x0abea,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0x0abec, 0x0abed,), # Meetei Mayek Lum Iyek ..Meetei Mayek Apun Iyek + (0x0fb1e, 0x0fb1e,), # Hebrew Point Judeo-spanish Varika + (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 + (0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo + (0x0feff, 0x0feff,), # Zero Width No-break Space + (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T + (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke + (0x102e0, 0x102e0,), # Coptic Epact Thousands Mark + (0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let + (0x10a01, 0x10a03,), # Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo + (0x10a05, 0x10a06,), # Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O + (0x10a0c, 0x10a0f,), # Kharoshthi Vowel Length ..Kharoshthi Sign Visarga + (0x10a38, 0x10a3a,), # Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo + (0x10a3f, 0x10a3f,), # Kharoshthi Virama + (0x10ae5, 0x10ae6,), # Manichaean Abbreviation ..Manichaean Abbreviation + (0x10d24, 0x10d27,), # Hanifi Rohingya Sign Har..Hanifi Rohingya Sign Tas + (0x10eab, 0x10eac,), # Yezidi Combining Hamza M..Yezidi Combining Madda M + (0x10efd, 0x10eff,), # Arabic Small Low Word Sa..Arabic Small Low Word Ma + (0x10f46, 0x10f50,), # Sogdian Combining Dot Be..Sogdian Combining Stroke + (0x10f82, 0x10f85,), # Old Uyghur Combining Dot..Old Uyghur Combining Two + (0x11000, 0x11002,), # Brahmi Sign Candrabindu ..Brahmi Sign Visarga + (0x11038, 0x11046,), # Brahmi Vowel Sign Aa ..Brahmi Virama + (0x11070, 0x11070,), # Brahmi Sign Old Tamil Virama + (0x11073, 0x11074,), # Brahmi Vowel Sign Old Ta..Brahmi Vowel Sign Old Ta + (0x1107f, 0x11082,), # Brahmi Number Joiner ..Kaithi Sign Visarga + (0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta + (0x110bd, 0x110bd,), # Kaithi Number Sign + (0x110c2, 0x110c2,), # Kaithi Vowel Sign Vocalic R + (0x110cd, 0x110cd,), # Kaithi Number Sign Above + (0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga + (0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa + (0x11145, 0x11146,), # Chakma Vowel Sign Aa ..Chakma Vowel Sign Ei + (0x11173, 0x11173,), # Mahajani Sign Nukta + (0x11180, 0x11182,), # Sharada Sign Candrabindu..Sharada Sign Visarga + (0x111b3, 0x111c0,), # Sharada Vowel Sign Aa ..Sharada Sign Virama + (0x111c9, 0x111cc,), # Sharada Sandhi Mark ..Sharada Extra Short Vowe + (0x111ce, 0x111cf,), # Sharada Vowel Sign Prish..Sharada Sign Inverted Ca + (0x1122c, 0x11237,), # Khojki Vowel Sign Aa ..Khojki Sign Shadda + (0x1123e, 0x1123e,), # Khojki Sign Sukun + (0x11241, 0x11241,), # Khojki Vowel Sign Vocalic R + (0x112df, 0x112ea,), # Khudawadi Sign Anusvara ..Khudawadi Sign Virama + (0x11300, 0x11303,), # Grantha Sign Combining A..Grantha Sign Visarga + (0x1133b, 0x1133c,), # Combining Bindu Below ..Grantha Sign Nukta + (0x1133e, 0x11344,), # Grantha Vowel Sign Aa ..Grantha Vowel Sign Vocal + (0x11347, 0x11348,), # Grantha Vowel Sign Ee ..Grantha Vowel Sign Ai + (0x1134b, 0x1134d,), # Grantha Vowel Sign Oo ..Grantha Sign Virama + (0x11357, 0x11357,), # Grantha Au Length Mark + (0x11362, 0x11363,), # Grantha Vowel Sign Vocal..Grantha Vowel Sign Vocal + (0x11366, 0x1136c,), # Combining Grantha Digit ..Combining Grantha Digit + (0x11370, 0x11374,), # Combining Grantha Letter..Combining Grantha Letter + (0x11435, 0x11446,), # Newa Vowel Sign Aa ..Newa Sign Nukta + (0x1145e, 0x1145e,), # Newa Sandhi Mark + (0x114b0, 0x114c3,), # Tirhuta Vowel Sign Aa ..Tirhuta Sign Nukta + (0x115af, 0x115b5,), # Siddham Vowel Sign Aa ..Siddham Vowel Sign Vocal + (0x115b8, 0x115c0,), # Siddham Vowel Sign E ..Siddham Sign Nukta + (0x115dc, 0x115dd,), # Siddham Vowel Sign Alter..Siddham Vowel Sign Alter + (0x11630, 0x11640,), # Modi Vowel Sign Aa ..Modi Sign Ardhacandra + (0x116ab, 0x116b7,), # Takri Sign Anusvara ..Takri Sign Nukta + (0x1171d, 0x1172b,), # Ahom Consonant Sign Medi..Ahom Sign Killer + (0x1182c, 0x1183a,), # Dogra Vowel Sign Aa ..Dogra Sign Nukta + (0x11930, 0x11935,), # Dives Akuru Vowel Sign A..Dives Akuru Vowel Sign E + (0x11937, 0x11938,), # Dives Akuru Vowel Sign A..Dives Akuru Vowel Sign O + (0x1193b, 0x1193e,), # Dives Akuru Sign Anusvar..Dives Akuru Virama + (0x11940, 0x11940,), # Dives Akuru Medial Ya + (0x11942, 0x11943,), # Dives Akuru Medial Ra ..Dives Akuru Sign Nukta + (0x119d1, 0x119d7,), # Nandinagari Vowel Sign A..Nandinagari Vowel Sign V + (0x119da, 0x119e0,), # Nandinagari Vowel Sign E..Nandinagari Sign Virama + (0x119e4, 0x119e4,), # Nandinagari Vowel Sign Prishthamatra E + (0x11a01, 0x11a0a,), # Zanabazar Square Vowel S..Zanabazar Square Vowel L + (0x11a33, 0x11a39,), # Zanabazar Square Final C..Zanabazar Square Sign Vi + (0x11a3b, 0x11a3e,), # Zanabazar Square Cluster..Zanabazar Square Cluster + (0x11a47, 0x11a47,), # Zanabazar Square Subjoiner + (0x11a51, 0x11a5b,), # Soyombo Vowel Sign I ..Soyombo Vowel Length Mar + (0x11a8a, 0x11a99,), # Soyombo Final Consonant ..Soyombo Subjoiner + (0x11c2f, 0x11c36,), # Bhaiksuki Vowel Sign Aa ..Bhaiksuki Vowel Sign Voc + (0x11c38, 0x11c3f,), # Bhaiksuki Vowel Sign E ..Bhaiksuki Sign Virama + (0x11c92, 0x11ca7,), # Marchen Subjoined Letter..Marchen Subjoined Letter + (0x11ca9, 0x11cb6,), # Marchen Subjoined Letter..Marchen Sign Candrabindu + (0x11d31, 0x11d36,), # Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign + (0x11d3a, 0x11d3a,), # Masaram Gondi Vowel Sign E + (0x11d3c, 0x11d3d,), # Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign + (0x11d3f, 0x11d45,), # Masaram Gondi Vowel Sign..Masaram Gondi Virama + (0x11d47, 0x11d47,), # Masaram Gondi Ra-kara + (0x11d8a, 0x11d8e,), # Gunjala Gondi Vowel Sign..Gunjala Gondi Vowel Sign + (0x11d90, 0x11d91,), # Gunjala Gondi Vowel Sign..Gunjala Gondi Vowel Sign + (0x11d93, 0x11d97,), # Gunjala Gondi Vowel Sign..Gunjala Gondi Virama + (0x11ef3, 0x11ef6,), # Makasar Vowel Sign I ..Makasar Vowel Sign O + (0x11f00, 0x11f01,), # Kawi Sign Candrabindu ..Kawi Sign Anusvara + (0x11f03, 0x11f03,), # Kawi Sign Visarga + (0x11f34, 0x11f3a,), # Kawi Vowel Sign Aa ..Kawi Vowel Sign Vocalic + (0x11f3e, 0x11f42,), # Kawi Vowel Sign E ..Kawi Conjoiner + (0x13430, 0x13440,), # Egyptian Hieroglyph Vert..Egyptian Hieroglyph Mirr + (0x13447, 0x13455,), # Egyptian Hieroglyph Modi..Egyptian Hieroglyph Modi + (0x16af0, 0x16af4,), # Bassa Vah Combining High..Bassa Vah Combining High + (0x16b30, 0x16b36,), # Pahawh Hmong Mark Cim Tu..Pahawh Hmong Mark Cim Ta + (0x16f4f, 0x16f4f,), # Miao Sign Consonant Modifier Bar + (0x16f51, 0x16f87,), # Miao Sign Aspiration ..Miao Vowel Sign Ui + (0x16f8f, 0x16f92,), # Miao Tone Right ..Miao Tone Below + (0x16fe4, 0x16fe4,), # Khitan Small Script Filler + (0x16ff0, 0x16ff1,), # Vietnamese Alternate Rea..Vietnamese Alternate Rea + (0x1bc9d, 0x1bc9e,), # Duployan Thick Letter Se..Duployan Double Mark + (0x1bca0, 0x1bca3,), # Shorthand Format Letter ..Shorthand Format Up Step + (0x1cf00, 0x1cf2d,), # Znamenny Combining Mark ..Znamenny Combining Mark + (0x1cf30, 0x1cf46,), # Znamenny Combining Tonal..Znamenny Priznak Modifie + (0x1d165, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d16d, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical + (0x1da00, 0x1da36,), # Signwriting Head Rim ..Signwriting Air Sucking + (0x1da3b, 0x1da6c,), # Signwriting Mouth Closed..Signwriting Excitement + (0x1da75, 0x1da75,), # Signwriting Upper Body Tilting From Hip Joints + (0x1da84, 0x1da84,), # Signwriting Location Head Neck + (0x1da9b, 0x1da9f,), # Signwriting Fill Modifie..Signwriting Fill Modifie + (0x1daa1, 0x1daaf,), # Signwriting Rotation Mod..Signwriting Rotation Mod + (0x1e000, 0x1e006,), # Combining Glagolitic Let..Combining Glagolitic Let + (0x1e008, 0x1e018,), # Combining Glagolitic Let..Combining Glagolitic Let + (0x1e01b, 0x1e021,), # Combining Glagolitic Let..Combining Glagolitic Let + (0x1e023, 0x1e024,), # Combining Glagolitic Let..Combining Glagolitic Let + (0x1e026, 0x1e02a,), # Combining Glagolitic Let..Combining Glagolitic Let + (0x1e08f, 0x1e08f,), # Combining Cyrillic Small Letter Byelorussian-ukr + (0x1e130, 0x1e136,), # Nyiakeng Puachue Hmong T..Nyiakeng Puachue Hmong T + (0x1e2ae, 0x1e2ae,), # Toto Sign Rising Tone + (0x1e2ec, 0x1e2ef,), # Wancho Tone Tup ..Wancho Tone Koini + (0x1e4ec, 0x1e4ef,), # Nag Mundari Sign Muhor ..Nag Mundari Sign Sutuh + (0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining + (0x1e944, 0x1e94a,), # Adlam Alif Lengthener ..Adlam Nukta + (0x1f3fb, 0x1f3ff,), # Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri + (0xe0001, 0xe0001,), # Language Tag + (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag + (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 + ), + '15.1.0': ( + # Source: DerivedGeneralCategory-15.1.0.txt + # Date: 2023-07-28, 23:34:02 GMT + # + (0x00000, 0x00000,), # (nil) + (0x000ad, 0x000ad,), # Soft Hyphen + (0x00300, 0x0036f,), # Combining Grave Accent ..Combining Latin Small Le + (0x00483, 0x00489,), # Combining Cyrillic Titlo..Combining Cyrillic Milli + (0x00591, 0x005bd,), # Hebrew Accent Etnahta ..Hebrew Point Meteg + (0x005bf, 0x005bf,), # Hebrew Point Rafe + (0x005c1, 0x005c2,), # Hebrew Point Shin Dot ..Hebrew Point Sin Dot + (0x005c4, 0x005c5,), # Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot + (0x005c7, 0x005c7,), # Hebrew Point Qamats Qatan + (0x00600, 0x00605,), # Arabic Number Sign ..Arabic Number Mark Above + (0x00610, 0x0061a,), # Arabic Sign Sallallahou ..Arabic Small Kasra + (0x0061c, 0x0061c,), # Arabic Letter Mark + (0x0064b, 0x0065f,), # Arabic Fathatan ..Arabic Wavy Hamza Below + (0x00670, 0x00670,), # Arabic Letter Superscript Alef + (0x006d6, 0x006dd,), # Arabic Small High Ligatu..Arabic End Of Ayah + (0x006df, 0x006e4,), # Arabic Small High Rounde..Arabic Small High Madda + (0x006e7, 0x006e8,), # Arabic Small High Yeh ..Arabic Small High Noon + (0x006ea, 0x006ed,), # Arabic Empty Centre Low ..Arabic Small Low Meem + (0x0070f, 0x0070f,), # Syriac Abbreviation Mark + (0x00711, 0x00711,), # Syriac Letter Superscript Alaph + (0x00730, 0x0074a,), # Syriac Pthaha Above ..Syriac Barrekh + (0x007a6, 0x007b0,), # Thaana Abafili ..Thaana Sukun + (0x007eb, 0x007f3,), # Nko Combining Short High..Nko Combining Double Dot + (0x007fd, 0x007fd,), # Nko Dantayalan + (0x00816, 0x00819,), # Samaritan Mark In ..Samaritan Mark Dagesh + (0x0081b, 0x00823,), # Samaritan Mark Epentheti..Samaritan Vowel Sign A + (0x00825, 0x00827,), # Samaritan Vowel Sign Sho..Samaritan Vowel Sign U + (0x00829, 0x0082d,), # Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa + (0x00859, 0x0085b,), # Mandaic Affrication Mark..Mandaic Gemination Mark + (0x00890, 0x00891,), # Arabic Pound Mark Above ..Arabic Piastre Mark Abov + (0x00898, 0x0089f,), # Arabic Small High Word A..Arabic Half Madda Over M + (0x008ca, 0x00903,), # Arabic Small High Farsi ..Devanagari Sign Visarga + (0x0093a, 0x0093c,), # Devanagari Vowel Sign Oe..Devanagari Sign Nukta + (0x0093e, 0x0094f,), # Devanagari Vowel Sign Aa..Devanagari Vowel Sign Aw + (0x00951, 0x00957,), # Devanagari Stress Sign U..Devanagari Vowel Sign Uu + (0x00962, 0x00963,), # Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo + (0x00981, 0x00983,), # Bengali Sign Candrabindu..Bengali Sign Visarga + (0x009bc, 0x009bc,), # Bengali Sign Nukta + (0x009be, 0x009c4,), # Bengali Vowel Sign Aa ..Bengali Vowel Sign Vocal + (0x009c7, 0x009c8,), # Bengali Vowel Sign E ..Bengali Vowel Sign Ai + (0x009cb, 0x009cd,), # Bengali Vowel Sign O ..Bengali Sign Virama + (0x009d7, 0x009d7,), # Bengali Au Length Mark + (0x009e2, 0x009e3,), # Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal + (0x009fe, 0x009fe,), # Bengali Sandhi Mark + (0x00a01, 0x00a03,), # Gurmukhi Sign Adak Bindi..Gurmukhi Sign Visarga + (0x00a3c, 0x00a3c,), # Gurmukhi Sign Nukta + (0x00a3e, 0x00a42,), # Gurmukhi Vowel Sign Aa ..Gurmukhi Vowel Sign Uu + (0x00a47, 0x00a48,), # Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai + (0x00a4b, 0x00a4d,), # Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama + (0x00a51, 0x00a51,), # Gurmukhi Sign Udaat + (0x00a70, 0x00a71,), # Gurmukhi Tippi ..Gurmukhi Addak + (0x00a75, 0x00a75,), # Gurmukhi Sign Yakash + (0x00a81, 0x00a83,), # Gujarati Sign Candrabind..Gujarati Sign Visarga + (0x00abc, 0x00abc,), # Gujarati Sign Nukta + (0x00abe, 0x00ac5,), # Gujarati Vowel Sign Aa ..Gujarati Vowel Sign Cand + (0x00ac7, 0x00ac9,), # Gujarati Vowel Sign E ..Gujarati Vowel Sign Cand + (0x00acb, 0x00acd,), # Gujarati Vowel Sign O ..Gujarati Sign Virama + (0x00ae2, 0x00ae3,), # Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca + (0x00afa, 0x00aff,), # Gujarati Sign Sukun ..Gujarati Sign Two-circle + (0x00b01, 0x00b03,), # Oriya Sign Candrabindu ..Oriya Sign Visarga + (0x00b3c, 0x00b3c,), # Oriya Sign Nukta + (0x00b3e, 0x00b44,), # Oriya Vowel Sign Aa ..Oriya Vowel Sign Vocalic + (0x00b47, 0x00b48,), # Oriya Vowel Sign E ..Oriya Vowel Sign Ai + (0x00b4b, 0x00b4d,), # Oriya Vowel Sign O ..Oriya Sign Virama + (0x00b55, 0x00b57,), # Oriya Sign Overline ..Oriya Au Length Mark + (0x00b62, 0x00b63,), # Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic + (0x00b82, 0x00b82,), # Tamil Sign Anusvara + (0x00bbe, 0x00bc2,), # Tamil Vowel Sign Aa ..Tamil Vowel Sign Uu + (0x00bc6, 0x00bc8,), # Tamil Vowel Sign E ..Tamil Vowel Sign Ai + (0x00bca, 0x00bcd,), # Tamil Vowel Sign O ..Tamil Sign Virama + (0x00bd7, 0x00bd7,), # Tamil Au Length Mark + (0x00c00, 0x00c04,), # Telugu Sign Combining Ca..Telugu Sign Combining An + (0x00c3c, 0x00c3c,), # Telugu Sign Nukta + (0x00c3e, 0x00c44,), # Telugu Vowel Sign Aa ..Telugu Vowel Sign Vocali + (0x00c46, 0x00c48,), # Telugu Vowel Sign E ..Telugu Vowel Sign Ai + (0x00c4a, 0x00c4d,), # Telugu Vowel Sign O ..Telugu Sign Virama + (0x00c55, 0x00c56,), # Telugu Length Mark ..Telugu Ai Length Mark + (0x00c62, 0x00c63,), # Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali + (0x00c81, 0x00c83,), # Kannada Sign Candrabindu..Kannada Sign Visarga + (0x00cbc, 0x00cbc,), # Kannada Sign Nukta + (0x00cbe, 0x00cc4,), # Kannada Vowel Sign Aa ..Kannada Vowel Sign Vocal + (0x00cc6, 0x00cc8,), # Kannada Vowel Sign E ..Kannada Vowel Sign Ai + (0x00cca, 0x00ccd,), # Kannada Vowel Sign O ..Kannada Sign Virama + (0x00cd5, 0x00cd6,), # Kannada Length Mark ..Kannada Ai Length Mark + (0x00ce2, 0x00ce3,), # Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal + (0x00cf3, 0x00cf3,), # Kannada Sign Combining Anusvara Above Right + (0x00d00, 0x00d03,), # Malayalam Sign Combining..Malayalam Sign Visarga + (0x00d3b, 0x00d3c,), # Malayalam Sign Vertical ..Malayalam Sign Circular + (0x00d3e, 0x00d44,), # Malayalam Vowel Sign Aa ..Malayalam Vowel Sign Voc + (0x00d46, 0x00d48,), # Malayalam Vowel Sign E ..Malayalam Vowel Sign Ai + (0x00d4a, 0x00d4d,), # Malayalam Vowel Sign O ..Malayalam Sign Virama + (0x00d57, 0x00d57,), # Malayalam Au Length Mark + (0x00d62, 0x00d63,), # Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc + (0x00d81, 0x00d83,), # Sinhala Sign Candrabindu..Sinhala Sign Visargaya + (0x00dca, 0x00dca,), # Sinhala Sign Al-lakuna + (0x00dcf, 0x00dd4,), # Sinhala Vowel Sign Aela-..Sinhala Vowel Sign Ketti + (0x00dd6, 0x00dd6,), # Sinhala Vowel Sign Diga Paa-pilla + (0x00dd8, 0x00ddf,), # Sinhala Vowel Sign Gaett..Sinhala Vowel Sign Gayan + (0x00df2, 0x00df3,), # Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga + (0x00e31, 0x00e31,), # Thai Character Mai Han-akat + (0x00e34, 0x00e3a,), # Thai Character Sara I ..Thai Character Phinthu + (0x00e47, 0x00e4e,), # Thai Character Maitaikhu..Thai Character Yamakkan + (0x00eb1, 0x00eb1,), # Lao Vowel Sign Mai Kan + (0x00eb4, 0x00ebc,), # Lao Vowel Sign I ..Lao Semivowel Sign Lo + (0x00ec8, 0x00ece,), # Lao Tone Mai Ek ..Lao Yamakkan + (0x00f18, 0x00f19,), # Tibetan Astrological Sig..Tibetan Astrological Sig + (0x00f35, 0x00f35,), # Tibetan Mark Ngas Bzung Nyi Zla + (0x00f37, 0x00f37,), # Tibetan Mark Ngas Bzung Sgor Rtags + (0x00f39, 0x00f39,), # Tibetan Mark Tsa -phru + (0x00f3e, 0x00f3f,), # Tibetan Sign Yar Tshes ..Tibetan Sign Mar Tshes + (0x00f71, 0x00f84,), # Tibetan Vowel Sign Aa ..Tibetan Mark Halanta + (0x00f86, 0x00f87,), # Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags + (0x00f8d, 0x00f97,), # Tibetan Subjoined Sign L..Tibetan Subjoined Letter + (0x00f99, 0x00fbc,), # Tibetan Subjoined Letter..Tibetan Subjoined Letter + (0x00fc6, 0x00fc6,), # Tibetan Symbol Padma Gdan + (0x0102b, 0x0103e,), # Myanmar Vowel Sign Tall ..Myanmar Consonant Sign M + (0x01056, 0x01059,), # Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal + (0x0105e, 0x01060,), # Myanmar Consonant Sign M..Myanmar Consonant Sign M + (0x01062, 0x01064,), # Myanmar Vowel Sign Sgaw ..Myanmar Tone Mark Sgaw K + (0x01067, 0x0106d,), # Myanmar Vowel Sign Weste..Myanmar Sign Western Pwo + (0x01071, 0x01074,), # Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah + (0x01082, 0x0108d,), # Myanmar Consonant Sign S..Myanmar Sign Shan Counci + (0x0108f, 0x0108f,), # Myanmar Sign Rumai Palaung Tone-5 + (0x0109a, 0x0109d,), # Myanmar Sign Khamti Tone..Myanmar Vowel Sign Aiton + (0x0135d, 0x0135f,), # Ethiopic Combining Gemin..Ethiopic Combining Gemin + (0x01712, 0x01715,), # Tagalog Vowel Sign I ..Tagalog Sign Pamudpod + (0x01732, 0x01734,), # Hanunoo Vowel Sign I ..Hanunoo Sign Pamudpod + (0x01752, 0x01753,), # Buhid Vowel Sign I ..Buhid Vowel Sign U + (0x01772, 0x01773,), # Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U + (0x017b4, 0x017d3,), # Khmer Vowel Inherent Aq ..Khmer Sign Bathamasat + (0x017dd, 0x017dd,), # Khmer Sign Atthacan + (0x0180b, 0x0180f,), # Mongolian Free Variation..Mongolian Free Variation + (0x01885, 0x01886,), # Mongolian Letter Ali Gal..Mongolian Letter Ali Gal + (0x018a9, 0x018a9,), # Mongolian Letter Ali Gali Dagalga + (0x01920, 0x0192b,), # Limbu Vowel Sign A ..Limbu Subjoined Letter W + (0x01930, 0x0193b,), # Limbu Small Letter Ka ..Limbu Sign Sa-i + (0x01a17, 0x01a1b,), # Buginese Vowel Sign I ..Buginese Vowel Sign Ae + (0x01a55, 0x01a5e,), # Tai Tham Consonant Sign ..Tai Tham Consonant Sign + (0x01a60, 0x01a7c,), # Tai Tham Sign Sakot ..Tai Tham Sign Khuen-lue + (0x01a7f, 0x01a7f,), # Tai Tham Combining Cryptogrammic Dot + (0x01ab0, 0x01ace,), # Combining Doubled Circum..Combining Latin Small Le + (0x01b00, 0x01b04,), # Balinese Sign Ulu Ricem ..Balinese Sign Bisah + (0x01b34, 0x01b44,), # Balinese Sign Rerekan ..Balinese Adeg Adeg + (0x01b6b, 0x01b73,), # Balinese Musical Symbol ..Balinese Musical Symbol + (0x01b80, 0x01b82,), # Sundanese Sign Panyecek ..Sundanese Sign Pangwisad + (0x01ba1, 0x01bad,), # Sundanese Consonant Sign..Sundanese Consonant Sign + (0x01be6, 0x01bf3,), # Batak Sign Tompi ..Batak Panongonan + (0x01c24, 0x01c37,), # Lepcha Subjoined Letter ..Lepcha Sign Nukta + (0x01cd0, 0x01cd2,), # Vedic Tone Karshana ..Vedic Tone Prenkha + (0x01cd4, 0x01ce8,), # Vedic Sign Yajurvedic Mi..Vedic Sign Visarga Anuda + (0x01ced, 0x01ced,), # Vedic Sign Tiryak + (0x01cf4, 0x01cf4,), # Vedic Tone Candra Above + (0x01cf7, 0x01cf9,), # Vedic Sign Atikrama ..Vedic Tone Double Ring A (0x01dc0, 0x01dff,), # Combining Dotted Grave A..Combining Right Arrowhea + (0x0200b, 0x0200f,), # Zero Width Space ..Right-to-left Mark + (0x02028, 0x0202e,), # Line Separator ..Right-to-left Override + (0x02060, 0x02064,), # Word Joiner ..Invisible Plus + (0x02066, 0x0206f,), # Left-to-right Isolate ..Nominal Digit Shapes (0x020d0, 0x020f0,), # Combining Left Harpoon A..Combining Asterisk Above (0x02cef, 0x02cf1,), # Coptic Combining Ni Abov..Coptic Combining Spiritu (0x02d7f, 0x02d7f,), # Tifinagh Consonant Joiner (0x02de0, 0x02dff,), # Combining Cyrillic Lette..Combining Cyrillic Lette - (0x0302a, 0x0302d,), # Ideographic Level Tone M..Ideographic Entering Ton + (0x0302a, 0x0302f,), # Ideographic Level Tone M..Hangul Double Dot Tone M (0x03099, 0x0309a,), # Combining Katakana-hirag..Combining Katakana-hirag (0x0a66f, 0x0a672,), # Combining Cyrillic Vzmet..Combining Cyrillic Thous (0x0a674, 0x0a67d,), # Combining Cyrillic Lette..Combining Cyrillic Payer @@ -4430,37 +4647,35 @@ ZERO_WIDTH = { (0x0a802, 0x0a802,), # Syloti Nagri Sign Dvisvara (0x0a806, 0x0a806,), # Syloti Nagri Sign Hasanta (0x0a80b, 0x0a80b,), # Syloti Nagri Sign Anusvara - (0x0a825, 0x0a826,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign + (0x0a823, 0x0a827,), # Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign (0x0a82c, 0x0a82c,), # Syloti Nagri Sign Alternate Hasanta - (0x0a8c4, 0x0a8c5,), # Saurashtra Sign Virama ..Saurashtra Sign Candrabi + (0x0a880, 0x0a881,), # Saurashtra Sign Anusvara..Saurashtra Sign Visarga + (0x0a8b4, 0x0a8c5,), # Saurashtra Consonant Sig..Saurashtra Sign Candrabi (0x0a8e0, 0x0a8f1,), # Combining Devanagari Dig..Combining Devanagari Sig (0x0a8ff, 0x0a8ff,), # Devanagari Vowel Sign Ay (0x0a926, 0x0a92d,), # Kayah Li Vowel Ue ..Kayah Li Tone Calya Plop - (0x0a947, 0x0a951,), # Rejang Vowel Sign I ..Rejang Consonant Sign R - (0x0a980, 0x0a982,), # Javanese Sign Panyangga ..Javanese Sign Layar - (0x0a9b3, 0x0a9b3,), # Javanese Sign Cecak Telu - (0x0a9b6, 0x0a9b9,), # Javanese Vowel Sign Wulu..Javanese Vowel Sign Suku - (0x0a9bc, 0x0a9bd,), # Javanese Vowel Sign Pepe..Javanese Consonant Sign + (0x0a947, 0x0a953,), # Rejang Vowel Sign I ..Rejang Virama + (0x0a980, 0x0a983,), # Javanese Sign Panyangga ..Javanese Sign Wignyan + (0x0a9b3, 0x0a9c0,), # Javanese Sign Cecak Telu..Javanese Pangkon (0x0a9e5, 0x0a9e5,), # Myanmar Sign Shan Saw - (0x0aa29, 0x0aa2e,), # Cham Vowel Sign Aa ..Cham Vowel Sign Oe - (0x0aa31, 0x0aa32,), # Cham Vowel Sign Au ..Cham Vowel Sign Ue - (0x0aa35, 0x0aa36,), # Cham Consonant Sign La ..Cham Consonant Sign Wa + (0x0aa29, 0x0aa36,), # Cham Vowel Sign Aa ..Cham Consonant Sign Wa (0x0aa43, 0x0aa43,), # Cham Consonant Sign Final Ng - (0x0aa4c, 0x0aa4c,), # Cham Consonant Sign Final M - (0x0aa7c, 0x0aa7c,), # Myanmar Sign Tai Laing Tone-2 + (0x0aa4c, 0x0aa4d,), # Cham Consonant Sign Fina..Cham Consonant Sign Fina + (0x0aa7b, 0x0aa7d,), # Myanmar Sign Pao Karen T..Myanmar Sign Tai Laing T (0x0aab0, 0x0aab0,), # Tai Viet Mai Kang (0x0aab2, 0x0aab4,), # Tai Viet Vowel I ..Tai Viet Vowel U (0x0aab7, 0x0aab8,), # Tai Viet Mai Khit ..Tai Viet Vowel Ia (0x0aabe, 0x0aabf,), # Tai Viet Vowel Am ..Tai Viet Tone Mai Ek (0x0aac1, 0x0aac1,), # Tai Viet Tone Mai Tho - (0x0aaec, 0x0aaed,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign - (0x0aaf6, 0x0aaf6,), # Meetei Mayek Virama - (0x0abe5, 0x0abe5,), # Meetei Mayek Vowel Sign Anap - (0x0abe8, 0x0abe8,), # Meetei Mayek Vowel Sign Unap - (0x0abed, 0x0abed,), # Meetei Mayek Apun Iyek + (0x0aaeb, 0x0aaef,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0x0aaf5, 0x0aaf6,), # Meetei Mayek Vowel Sign ..Meetei Mayek Virama + (0x0abe3, 0x0abea,), # Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + (0x0abec, 0x0abed,), # Meetei Mayek Lum Iyek ..Meetei Mayek Apun Iyek (0x0fb1e, 0x0fb1e,), # Hebrew Point Judeo-spanish Varika (0x0fe00, 0x0fe0f,), # Variation Selector-1 ..Variation Selector-16 (0x0fe20, 0x0fe2f,), # Combining Ligature Left ..Combining Cyrillic Titlo + (0x0feff, 0x0feff,), # Zero Width No-break Space + (0x0fff9, 0x0fffb,), # Interlinear Annotation A..Interlinear Annotation T (0x101fd, 0x101fd,), # Phaistos Disc Sign Combining Oblique Stroke (0x102e0, 0x102e0,), # Coptic Epact Thousands Mark (0x10376, 0x1037a,), # Combining Old Permic Let..Combining Old Permic Let @@ -4472,107 +4687,95 @@ ZERO_WIDTH = { (0x10ae5, 0x10ae6,), # Manichaean Abbreviation ..Manichaean Abbreviation (0x10d24, 0x10d27,), # Hanifi Rohingya Sign Har..Hanifi Rohingya Sign Tas (0x10eab, 0x10eac,), # Yezidi Combining Hamza M..Yezidi Combining Madda M - (0x10efd, 0x10eff,), # (nil) + (0x10efd, 0x10eff,), # Arabic Small Low Word Sa..Arabic Small Low Word Ma (0x10f46, 0x10f50,), # Sogdian Combining Dot Be..Sogdian Combining Stroke (0x10f82, 0x10f85,), # Old Uyghur Combining Dot..Old Uyghur Combining Two - (0x11001, 0x11001,), # Brahmi Sign Anusvara + (0x11000, 0x11002,), # Brahmi Sign Candrabindu ..Brahmi Sign Visarga (0x11038, 0x11046,), # Brahmi Vowel Sign Aa ..Brahmi Virama (0x11070, 0x11070,), # Brahmi Sign Old Tamil Virama (0x11073, 0x11074,), # Brahmi Vowel Sign Old Ta..Brahmi Vowel Sign Old Ta - (0x1107f, 0x11081,), # Brahmi Number Joiner ..Kaithi Sign Anusvara - (0x110b3, 0x110b6,), # Kaithi Vowel Sign U ..Kaithi Vowel Sign Ai - (0x110b9, 0x110ba,), # Kaithi Sign Virama ..Kaithi Sign Nukta + (0x1107f, 0x11082,), # Brahmi Number Joiner ..Kaithi Sign Visarga + (0x110b0, 0x110ba,), # Kaithi Vowel Sign Aa ..Kaithi Sign Nukta + (0x110bd, 0x110bd,), # Kaithi Number Sign (0x110c2, 0x110c2,), # Kaithi Vowel Sign Vocalic R + (0x110cd, 0x110cd,), # Kaithi Number Sign Above (0x11100, 0x11102,), # Chakma Sign Candrabindu ..Chakma Sign Visarga - (0x11127, 0x1112b,), # Chakma Vowel Sign A ..Chakma Vowel Sign Uu - (0x1112d, 0x11134,), # Chakma Vowel Sign Ai ..Chakma Maayyaa + (0x11127, 0x11134,), # Chakma Vowel Sign A ..Chakma Maayyaa + (0x11145, 0x11146,), # Chakma Vowel Sign Aa ..Chakma Vowel Sign Ei (0x11173, 0x11173,), # Mahajani Sign Nukta - (0x11180, 0x11181,), # Sharada Sign Candrabindu..Sharada Sign Anusvara - (0x111b6, 0x111be,), # Sharada Vowel Sign U ..Sharada Vowel Sign O + (0x11180, 0x11182,), # Sharada Sign Candrabindu..Sharada Sign Visarga + (0x111b3, 0x111c0,), # Sharada Vowel Sign Aa ..Sharada Sign Virama (0x111c9, 0x111cc,), # Sharada Sandhi Mark ..Sharada Extra Short Vowe - (0x111cf, 0x111cf,), # Sharada Sign Inverted Candrabindu - (0x1122f, 0x11231,), # Khojki Vowel Sign U ..Khojki Vowel Sign Ai - (0x11234, 0x11234,), # Khojki Sign Anusvara - (0x11236, 0x11237,), # Khojki Sign Nukta ..Khojki Sign Shadda + (0x111ce, 0x111cf,), # Sharada Vowel Sign Prish..Sharada Sign Inverted Ca + (0x1122c, 0x11237,), # Khojki Vowel Sign Aa ..Khojki Sign Shadda (0x1123e, 0x1123e,), # Khojki Sign Sukun - (0x11241, 0x11241,), # (nil) - (0x112df, 0x112df,), # Khudawadi Sign Anusvara - (0x112e3, 0x112ea,), # Khudawadi Vowel Sign U ..Khudawadi Sign Virama - (0x11300, 0x11301,), # Grantha Sign Combining A..Grantha Sign Candrabindu + (0x11241, 0x11241,), # Khojki Vowel Sign Vocalic R + (0x112df, 0x112ea,), # Khudawadi Sign Anusvara ..Khudawadi Sign Virama + (0x11300, 0x11303,), # Grantha Sign Combining A..Grantha Sign Visarga (0x1133b, 0x1133c,), # Combining Bindu Below ..Grantha Sign Nukta - (0x11340, 0x11340,), # Grantha Vowel Sign Ii + (0x1133e, 0x11344,), # Grantha Vowel Sign Aa ..Grantha Vowel Sign Vocal + (0x11347, 0x11348,), # Grantha Vowel Sign Ee ..Grantha Vowel Sign Ai + (0x1134b, 0x1134d,), # Grantha Vowel Sign Oo ..Grantha Sign Virama + (0x11357, 0x11357,), # Grantha Au Length Mark + (0x11362, 0x11363,), # Grantha Vowel Sign Vocal..Grantha Vowel Sign Vocal (0x11366, 0x1136c,), # Combining Grantha Digit ..Combining Grantha Digit (0x11370, 0x11374,), # Combining Grantha Letter..Combining Grantha Letter - (0x11438, 0x1143f,), # Newa Vowel Sign U ..Newa Vowel Sign Ai - (0x11442, 0x11444,), # Newa Sign Virama ..Newa Sign Anusvara - (0x11446, 0x11446,), # Newa Sign Nukta + (0x11435, 0x11446,), # Newa Vowel Sign Aa ..Newa Sign Nukta (0x1145e, 0x1145e,), # Newa Sandhi Mark - (0x114b3, 0x114b8,), # Tirhuta Vowel Sign U ..Tirhuta Vowel Sign Vocal - (0x114ba, 0x114ba,), # Tirhuta Vowel Sign Short E - (0x114bf, 0x114c0,), # Tirhuta Sign Candrabindu..Tirhuta Sign Anusvara - (0x114c2, 0x114c3,), # Tirhuta Sign Virama ..Tirhuta Sign Nukta - (0x115b2, 0x115b5,), # Siddham Vowel Sign U ..Siddham Vowel Sign Vocal - (0x115bc, 0x115bd,), # Siddham Sign Candrabindu..Siddham Sign Anusvara - (0x115bf, 0x115c0,), # Siddham Sign Virama ..Siddham Sign Nukta + (0x114b0, 0x114c3,), # Tirhuta Vowel Sign Aa ..Tirhuta Sign Nukta + (0x115af, 0x115b5,), # Siddham Vowel Sign Aa ..Siddham Vowel Sign Vocal + (0x115b8, 0x115c0,), # Siddham Vowel Sign E ..Siddham Sign Nukta (0x115dc, 0x115dd,), # Siddham Vowel Sign Alter..Siddham Vowel Sign Alter - (0x11633, 0x1163a,), # Modi Vowel Sign U ..Modi Vowel Sign Ai - (0x1163d, 0x1163d,), # Modi Sign Anusvara - (0x1163f, 0x11640,), # Modi Sign Virama ..Modi Sign Ardhacandra - (0x116ab, 0x116ab,), # Takri Sign Anusvara - (0x116ad, 0x116ad,), # Takri Vowel Sign Aa - (0x116b0, 0x116b5,), # Takri Vowel Sign U ..Takri Vowel Sign Au - (0x116b7, 0x116b7,), # Takri Sign Nukta - (0x1171d, 0x1171f,), # Ahom Consonant Sign Medi..Ahom Consonant Sign Medi - (0x11722, 0x11725,), # Ahom Vowel Sign I ..Ahom Vowel Sign Uu - (0x11727, 0x1172b,), # Ahom Vowel Sign Aw ..Ahom Sign Killer - (0x1182f, 0x11837,), # Dogra Vowel Sign U ..Dogra Sign Anusvara - (0x11839, 0x1183a,), # Dogra Sign Virama ..Dogra Sign Nukta - (0x1193b, 0x1193c,), # Dives Akuru Sign Anusvar..Dives Akuru Sign Candrab - (0x1193e, 0x1193e,), # Dives Akuru Virama - (0x11943, 0x11943,), # Dives Akuru Sign Nukta - (0x119d4, 0x119d7,), # Nandinagari Vowel Sign U..Nandinagari Vowel Sign V - (0x119da, 0x119db,), # Nandinagari Vowel Sign E..Nandinagari Vowel Sign A - (0x119e0, 0x119e0,), # Nandinagari Sign Virama + (0x11630, 0x11640,), # Modi Vowel Sign Aa ..Modi Sign Ardhacandra + (0x116ab, 0x116b7,), # Takri Sign Anusvara ..Takri Sign Nukta + (0x1171d, 0x1172b,), # Ahom Consonant Sign Medi..Ahom Sign Killer + (0x1182c, 0x1183a,), # Dogra Vowel Sign Aa ..Dogra Sign Nukta + (0x11930, 0x11935,), # Dives Akuru Vowel Sign A..Dives Akuru Vowel Sign E + (0x11937, 0x11938,), # Dives Akuru Vowel Sign A..Dives Akuru Vowel Sign O + (0x1193b, 0x1193e,), # Dives Akuru Sign Anusvar..Dives Akuru Virama + (0x11940, 0x11940,), # Dives Akuru Medial Ya + (0x11942, 0x11943,), # Dives Akuru Medial Ra ..Dives Akuru Sign Nukta + (0x119d1, 0x119d7,), # Nandinagari Vowel Sign A..Nandinagari Vowel Sign V + (0x119da, 0x119e0,), # Nandinagari Vowel Sign E..Nandinagari Sign Virama + (0x119e4, 0x119e4,), # Nandinagari Vowel Sign Prishthamatra E (0x11a01, 0x11a0a,), # Zanabazar Square Vowel S..Zanabazar Square Vowel L - (0x11a33, 0x11a38,), # Zanabazar Square Final C..Zanabazar Square Sign An + (0x11a33, 0x11a39,), # Zanabazar Square Final C..Zanabazar Square Sign Vi (0x11a3b, 0x11a3e,), # Zanabazar Square Cluster..Zanabazar Square Cluster (0x11a47, 0x11a47,), # Zanabazar Square Subjoiner - (0x11a51, 0x11a56,), # Soyombo Vowel Sign I ..Soyombo Vowel Sign Oe - (0x11a59, 0x11a5b,), # Soyombo Vowel Sign Vocal..Soyombo Vowel Length Mar - (0x11a8a, 0x11a96,), # Soyombo Final Consonant ..Soyombo Sign Anusvara - (0x11a98, 0x11a99,), # Soyombo Gemination Mark ..Soyombo Subjoiner - (0x11c30, 0x11c36,), # Bhaiksuki Vowel Sign I ..Bhaiksuki Vowel Sign Voc - (0x11c38, 0x11c3d,), # Bhaiksuki Vowel Sign E ..Bhaiksuki Sign Anusvara - (0x11c3f, 0x11c3f,), # Bhaiksuki Sign Virama + (0x11a51, 0x11a5b,), # Soyombo Vowel Sign I ..Soyombo Vowel Length Mar + (0x11a8a, 0x11a99,), # Soyombo Final Consonant ..Soyombo Subjoiner + (0x11c2f, 0x11c36,), # Bhaiksuki Vowel Sign Aa ..Bhaiksuki Vowel Sign Voc + (0x11c38, 0x11c3f,), # Bhaiksuki Vowel Sign E ..Bhaiksuki Sign Virama (0x11c92, 0x11ca7,), # Marchen Subjoined Letter..Marchen Subjoined Letter - (0x11caa, 0x11cb0,), # Marchen Subjoined Letter..Marchen Vowel Sign Aa - (0x11cb2, 0x11cb3,), # Marchen Vowel Sign U ..Marchen Vowel Sign E - (0x11cb5, 0x11cb6,), # Marchen Sign Anusvara ..Marchen Sign Candrabindu + (0x11ca9, 0x11cb6,), # Marchen Subjoined Letter..Marchen Sign Candrabindu (0x11d31, 0x11d36,), # Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign (0x11d3a, 0x11d3a,), # Masaram Gondi Vowel Sign E (0x11d3c, 0x11d3d,), # Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign (0x11d3f, 0x11d45,), # Masaram Gondi Vowel Sign..Masaram Gondi Virama (0x11d47, 0x11d47,), # Masaram Gondi Ra-kara + (0x11d8a, 0x11d8e,), # Gunjala Gondi Vowel Sign..Gunjala Gondi Vowel Sign (0x11d90, 0x11d91,), # Gunjala Gondi Vowel Sign..Gunjala Gondi Vowel Sign - (0x11d95, 0x11d95,), # Gunjala Gondi Sign Anusvara - (0x11d97, 0x11d97,), # Gunjala Gondi Virama - (0x11ef3, 0x11ef4,), # Makasar Vowel Sign I ..Makasar Vowel Sign U - (0x11f00, 0x11f01,), # (nil) - (0x11f36, 0x11f3a,), # (nil) - (0x11f40, 0x11f40,), # (nil) - (0x11f42, 0x11f42,), # (nil) - (0x13440, 0x13440,), # (nil) - (0x13447, 0x13455,), # (nil) + (0x11d93, 0x11d97,), # Gunjala Gondi Vowel Sign..Gunjala Gondi Virama + (0x11ef3, 0x11ef6,), # Makasar Vowel Sign I ..Makasar Vowel Sign O + (0x11f00, 0x11f01,), # Kawi Sign Candrabindu ..Kawi Sign Anusvara + (0x11f03, 0x11f03,), # Kawi Sign Visarga + (0x11f34, 0x11f3a,), # Kawi Vowel Sign Aa ..Kawi Vowel Sign Vocalic + (0x11f3e, 0x11f42,), # Kawi Vowel Sign E ..Kawi Conjoiner + (0x13430, 0x13440,), # Egyptian Hieroglyph Vert..Egyptian Hieroglyph Mirr + (0x13447, 0x13455,), # Egyptian Hieroglyph Modi..Egyptian Hieroglyph Modi (0x16af0, 0x16af4,), # Bassa Vah Combining High..Bassa Vah Combining High (0x16b30, 0x16b36,), # Pahawh Hmong Mark Cim Tu..Pahawh Hmong Mark Cim Ta (0x16f4f, 0x16f4f,), # Miao Sign Consonant Modifier Bar + (0x16f51, 0x16f87,), # Miao Sign Aspiration ..Miao Vowel Sign Ui (0x16f8f, 0x16f92,), # Miao Tone Right ..Miao Tone Below (0x16fe4, 0x16fe4,), # Khitan Small Script Filler + (0x16ff0, 0x16ff1,), # Vietnamese Alternate Rea..Vietnamese Alternate Rea (0x1bc9d, 0x1bc9e,), # Duployan Thick Letter Se..Duployan Double Mark + (0x1bca0, 0x1bca3,), # Shorthand Format Letter ..Shorthand Format Up Step (0x1cf00, 0x1cf2d,), # Znamenny Combining Mark ..Znamenny Combining Mark (0x1cf30, 0x1cf46,), # Znamenny Combining Tonal..Znamenny Priznak Modifie - (0x1d167, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining - (0x1d17b, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d165, 0x1d169,), # Musical Symbol Combining..Musical Symbol Combining + (0x1d16d, 0x1d182,), # Musical Symbol Combining..Musical Symbol Combining (0x1d185, 0x1d18b,), # Musical Symbol Combining..Musical Symbol Combining (0x1d1aa, 0x1d1ad,), # Musical Symbol Combining..Musical Symbol Combining (0x1d242, 0x1d244,), # Combining Greek Musical ..Combining Greek Musical @@ -4587,13 +4790,16 @@ ZERO_WIDTH = { (0x1e01b, 0x1e021,), # Combining Glagolitic Let..Combining Glagolitic Let (0x1e023, 0x1e024,), # Combining Glagolitic Let..Combining Glagolitic Let (0x1e026, 0x1e02a,), # Combining Glagolitic Let..Combining Glagolitic Let - (0x1e08f, 0x1e08f,), # (nil) + (0x1e08f, 0x1e08f,), # Combining Cyrillic Small Letter Byelorussian-ukr (0x1e130, 0x1e136,), # Nyiakeng Puachue Hmong T..Nyiakeng Puachue Hmong T (0x1e2ae, 0x1e2ae,), # Toto Sign Rising Tone (0x1e2ec, 0x1e2ef,), # Wancho Tone Tup ..Wancho Tone Koini - (0x1e4ec, 0x1e4ef,), # (nil) + (0x1e4ec, 0x1e4ef,), # Nag Mundari Sign Muhor ..Nag Mundari Sign Sutuh (0x1e8d0, 0x1e8d6,), # Mende Kikakui Combining ..Mende Kikakui Combining (0x1e944, 0x1e94a,), # Adlam Alif Lengthener ..Adlam Nukta + (0x1f3fb, 0x1f3ff,), # Emoji Modifier Fitzpatri..Emoji Modifier Fitzpatri + (0xe0001, 0xe0001,), # Language Tag + (0xe0020, 0xe007f,), # Tag Space ..Cancel Tag (0xe0100, 0xe01ef,), # Variation Selector-17 ..Variation Selector-256 ), } diff --git a/contrib/python/wcwidth/py2/wcwidth/unicode_versions.py b/contrib/python/wcwidth/py2/wcwidth/unicode_versions.py index cd33688b2e..4e9ccbf7a7 100644 --- a/contrib/python/wcwidth/py2/wcwidth/unicode_versions.py +++ b/contrib/python/wcwidth/py2/wcwidth/unicode_versions.py @@ -1,7 +1,7 @@ """ Exports function list_versions() for unicode version level support. -This code generated by wcwidth/bin/update-tables.py on 2023-01-14 00:53:07 UTC. +This code generated by wcwidth/bin/update-tables.py on 2023-09-14 15:45:33 UTC. """ @@ -34,4 +34,5 @@ def list_versions(): "13.0.0", "14.0.0", "15.0.0", + "15.1.0", ) diff --git a/contrib/python/wcwidth/py2/wcwidth/wcwidth.py b/contrib/python/wcwidth/py2/wcwidth/wcwidth.py index 6162cddffb..59eb5c0806 100644 --- a/contrib/python/wcwidth/py2/wcwidth/wcwidth.py +++ b/contrib/python/wcwidth/py2/wcwidth/wcwidth.py @@ -68,6 +68,7 @@ import sys import warnings # local +from .table_vs16 import VS16_NARROW_TO_WIDE from .table_wide import WIDE_EASTASIAN from .table_zero import ZERO_WIDTH from .unicode_versions import list_versions @@ -81,34 +82,7 @@ except ImportError: from backports.functools_lru_cache import lru_cache # global cache -_UNICODE_CMPTABLE = None -_PY3 = (sys.version_info[0] >= 3) - - -# NOTE: created by hand, there isn't anything identifiable other than -# general Cf category code to identify these, and some characters in Cf -# category code are of non-zero width. -# Also includes some Cc, Mn, Zl, and Zp characters -ZERO_WIDTH_CF = set([ - 0, # Null (Cc) - 0x034F, # Combining grapheme joiner (Mn) - 0x200B, # Zero width space - 0x200C, # Zero width non-joiner - 0x200D, # Zero width joiner - 0x200E, # Left-to-right mark - 0x200F, # Right-to-left mark - 0x2028, # Line separator (Zl) - 0x2029, # Paragraph separator (Zp) - 0x202A, # Left-to-right embedding - 0x202B, # Right-to-left embedding - 0x202C, # Pop directional formatting - 0x202D, # Left-to-right override - 0x202E, # Right-to-left override - 0x2060, # Word joiner - 0x2061, # Function application - 0x2062, # Invisible times - 0x2063, # Invisible separator -]) +_PY3 = sys.version_info[0] >= 3 def _bisearch(ucs, table): @@ -145,8 +119,8 @@ def wcwidth(wc, unicode_version='auto'): :param str wc: A single Unicode character. :param str unicode_version: A Unicode version number, such as - ``'6.0.0'``, the list of available version levels may be - listed by pairing function :func:`list_versions`. + ``'6.0.0'``. A list of version levels suported by wcwidth + is returned by :func:`list_versions`. Any version string may be specified without error -- the nearest matching version is selected. When ``latest`` (default), the @@ -159,66 +133,27 @@ def wcwidth(wc, unicode_version='auto'): character occupies on a graphic terminal (1 or 2) is returned. :rtype: int - The following have a column width of -1: - - - C0 control characters (U+001 through U+01F). - - - C1 control characters and DEL (U+07F through U+0A0). - - The following have a column width of 0: - - - Non-spacing and enclosing combining characters (general - category code Mn or Me in the Unicode database). - - - NULL (``U+0000``). - - - COMBINING GRAPHEME JOINER (``U+034F``). - - - ZERO WIDTH SPACE (``U+200B``) *through* - RIGHT-TO-LEFT MARK (``U+200F``). - - - LINE SEPARATOR (``U+2028``) *and* - PARAGRAPH SEPARATOR (``U+2029``). - - - LEFT-TO-RIGHT EMBEDDING (``U+202A``) *through* - RIGHT-TO-LEFT OVERRIDE (``U+202E``). - - - WORD JOINER (``U+2060``) *through* - INVISIBLE SEPARATOR (``U+2063``). - - The following have a column width of 1: - - - SOFT HYPHEN (``U+00AD``). - - - All remaining characters, including all printable ISO 8859-1 - and WGL4 characters, Unicode control characters, etc. - - The following have a column width of 2: - - - Spacing characters in the East Asian Wide (W) or East Asian - Full-width (F) category as defined in Unicode Technical - Report #11 have a column width of 2. - - - Some kinds of Emoji or symbols. + See :ref:`Specification` for details of cell measurement. """ - # NOTE: created by hand, there isn't anything identifiable other than - # general Cf category code to identify these, and some characters in Cf - # category code are of non-zero width. - ucs = ord(wc) - if ucs in ZERO_WIDTH_CF: - return 0 + ucs = ord(wc) if wc else 0 + + # small optimization: early return of 1 for printable ASCII, this provides + # approximately 40% performance improvement for mostly-ascii documents, with + # less than 1% impact to others. + if 32 <= ucs < 0x7f: + return 1 - # C0/C1 control characters - if ucs < 32 or 0x07F <= ucs < 0x0A0: + # C0/C1 control characters are -1 for compatibility with POSIX-like calls + if ucs and ucs < 32 or 0x07F <= ucs < 0x0A0: return -1 _unicode_version = _wcmatch_version(unicode_version) - # combining characters with zero width + # Zero width if _bisearch(ucs, ZERO_WIDTH[_unicode_version]): return 0 - # "Wide AastAsian" (and emojis) + # 1 or 2 width return 1 + _bisearch(ucs, WIDE_EASTASIAN[_unicode_version]) @@ -234,21 +169,46 @@ def wcswidth(pwcs, n=None, unicode_version='auto'): the Environment Variable, ``UNICODE_VERSION`` if defined, or the latest available unicode version, otherwise. :rtype: int - :returns: The width, in cells, necessary to display the first ``n`` - characters of the unicode string ``pwcs``. Returns ``-1`` if - a non-printable character is encountered. - """ - # pylint: disable=C0103 - # Invalid argument name "n" + :returns: The width, in cells, needed to display the first ``n`` characters + of the unicode string ``pwcs``. Returns ``-1`` for C0 and C1 control + characters! + See :ref:`Specification` for details of cell measurement. + """ + # this 'n' argument is a holdover for POSIX function + _unicode_version = None end = len(pwcs) if n is None else n - idx = slice(0, end) width = 0 - for char in pwcs[idx]: + idx = 0 + last_measured_char = None + while idx < end: + char = pwcs[idx] + if char == u'\u200D': + # Zero Width Joiner, do not measure this or next character + idx += 2 + continue + if char == u'\uFE0F' and last_measured_char: + # on variation selector 16 (VS16) following another character, + # conditionally add '1' to the measured width if that character is + # known to be converted from narrow to wide by the VS16 character. + if _unicode_version is None: + _unicode_version = _wcversion_value(_wcmatch_version(unicode_version)) + if _unicode_version >= (9, 0, 0): + width += _bisearch(ord(last_measured_char), VS16_NARROW_TO_WIDE["9.0.0"]) + last_measured_char = None + idx += 1 + continue + # measure character at current index wcw = wcwidth(char, unicode_version) if wcw < 0: - return -1 + # early return -1 on C0 and C1 control characters + return wcw + if wcw > 0: + # track last character measured to contain a cell, so that + # subsequent VS-16 modifiers may be understood + last_measured_char = char width += wcw + idx += 1 return width @@ -292,14 +252,18 @@ def _wcmatch_version(given_version): """ # Design note: the choice to return the same type that is given certainly # complicates it for python 2 str-type, but allows us to define an api that - # to use 'string-type', for unicode version level definitions, so all of our - # example code works with all versions of python. That, along with the - # string-to-numeric and comparisons of earliest, latest, matching, or - # nearest, greatly complicates this function. + # uses 'string-type' for unicode version level definitions, so all of our + # example code works with all versions of python. + # + # That, along with the string-to-numeric and comparisons of earliest, + # latest, matching, or nearest, greatly complicates this function. + # Performance is somewhat curbed by memoization. _return_str = not _PY3 and isinstance(given_version, str) if _return_str: - unicode_versions = [ucs.encode() for ucs in list_versions()] + # avoid list-comprehension to work around a coverage issue: + # https://github.com/nedbat/coveragepy/issues/753 + unicode_versions = list(map(lambda ucs: ucs.encode(), list_versions())) else: unicode_versions = list_versions() latest_version = unicode_versions[-1] @@ -375,4 +339,4 @@ def _wcmatch_version(given_version): # is, 4.1 is returned for given 4.9.9, where 4.1 and 5.0 are available. if cmp_next_version > cmp_given: return unicode_version - assert False, ("Code path unreachable", given_version, unicode_versions) + assert False, ("Code path unreachable", given_version, unicode_versions) # pragma: no cover diff --git a/contrib/python/wcwidth/py2/ya.make b/contrib/python/wcwidth/py2/ya.make index fd199c6a95..8453f5ee2f 100644 --- a/contrib/python/wcwidth/py2/ya.make +++ b/contrib/python/wcwidth/py2/ya.make @@ -2,7 +2,7 @@ PY2_LIBRARY() -VERSION(0.2.6) +VERSION(0.2.12) LICENSE(MIT) @@ -15,6 +15,7 @@ NO_LINT() PY_SRCS( TOP_LEVEL wcwidth/__init__.py + wcwidth/table_vs16.py wcwidth/table_wide.py wcwidth/table_zero.py wcwidth/unicode_versions.py |