Sync contrib/tools/python3 layout with upstream

* Move src/ subdir contents to the top of the layout * Rename self-written lib -> lib2 to avoid CaseFolding warning from the VCS * Regenerate contrib/libs/python proxy-headers accordingly 4ccc62ac1511abcf0fed14ccade38e984e088f1e
author: thegeorg <thegeorg@yandex-team.com> 2024-02-19 02:38:52 +0300
committer: thegeorg <thegeorg@yandex-team.com> 2024-02-19 02:50:43 +0300
commit: d96fa07134c06472bfee6718b5cfd1679196fc99 (patch)
tree: 31ec344fa9d3ff8dc038692516b6438dfbdb8a2d /contrib/tools/python3/Lib/locale.py
parent: 452cf9e068aef7110e35e654c5d47eb80111ef89 (diff)
download: ydb-d96fa07134c06472bfee6718b5cfd1679196fc99.tar.gz
1 files changed, 1778 insertions, 0 deletions
diff --git a/contrib/tools/python3/Lib/locale.py b/contrib/tools/python3/Lib/locale.py
new file mode 100644
index 00000000000..4965c973074
--- /dev/null
+++ b/contrib/tools/python3/Lib/locale.py
@@ -0,0 +1,1778 @@
+"""Locale support module.
+
+The module provides low-level access to the C lib's locale APIs and adds high
+level number formatting APIs as well as a locale aliasing engine to complement
+these.
+
+The aliasing engine includes support for many commonly used locale names and
+maps them to values suitable for passing to the C lib's setlocale() function. It
+also includes default encodings for all supported locale names.
+
+"""
+
+import sys
+import encodings
+import encodings.aliases
+import re
+import _collections_abc
+from builtins import str as _builtin_str
+import functools
+
+# Try importing the _locale module.
+#
+# If this fails, fall back on a basic 'C' locale emulation.
+
+# Yuck:  LC_MESSAGES is non-standard:  can't tell whether it exists before
+# trying the import.  So __all__ is also fiddled at the end of the file.
+__all__ = ["getlocale", "getdefaultlocale", "getpreferredencoding", "Error",
+           "setlocale", "resetlocale", "localeconv", "strcoll", "strxfrm",
+           "str", "atof", "atoi", "format_string", "currency",
+           "normalize", "LC_CTYPE", "LC_COLLATE", "LC_TIME", "LC_MONETARY",
+           "LC_NUMERIC", "LC_ALL", "CHAR_MAX", "getencoding"]
+
+def _strcoll(a,b):
+    """ strcoll(string,string) -> int.
+        Compares two strings according to the locale.
+    """
+    return (a > b) - (a < b)
+
+def _strxfrm(s):
+    """ strxfrm(string) -> string.
+        Returns a string that behaves for cmp locale-aware.
+    """
+    return s
+
+try:
+
+    from _locale import *
+
+except ImportError:
+
+    # Locale emulation
+
+    CHAR_MAX = 127
+    LC_ALL = 6
+    LC_COLLATE = 3
+    LC_CTYPE = 0
+    LC_MESSAGES = 5
+    LC_MONETARY = 4
+    LC_NUMERIC = 1
+    LC_TIME = 2
+    Error = ValueError
+
+    def localeconv():
+        """ localeconv() -> dict.
+            Returns numeric and monetary locale-specific parameters.
+        """
+        # 'C' locale default values
+        return {'grouping': [127],
+                'currency_symbol': '',
+                'n_sign_posn': 127,
+                'p_cs_precedes': 127,
+                'n_cs_precedes': 127,
+                'mon_grouping': [],
+                'n_sep_by_space': 127,
+                'decimal_point': '.',
+                'negative_sign': '',
+                'positive_sign': '',
+                'p_sep_by_space': 127,
+                'int_curr_symbol': '',
+                'p_sign_posn': 127,
+                'thousands_sep': '',
+                'mon_thousands_sep': '',
+                'frac_digits': 127,
+                'mon_decimal_point': '',
+                'int_frac_digits': 127}
+
+    def setlocale(category, value=None):
+        """ setlocale(integer,string=None) -> string.
+            Activates/queries locale processing.
+        """
+        if value not in (None, '', 'C'):
+            raise Error('_locale emulation only supports "C" locale')
+        return 'C'
+
+# These may or may not exist in _locale, so be sure to set them.
+if 'strxfrm' not in globals():
+    strxfrm = _strxfrm
+if 'strcoll' not in globals():
+    strcoll = _strcoll
+
+
+_localeconv = localeconv
+
+# With this dict, you can override some items of localeconv's return value.
+# This is useful for testing purposes.
+_override_localeconv = {}
+
+@functools.wraps(_localeconv)
+def localeconv():
+    d = _localeconv()
+    if _override_localeconv:
+        d.update(_override_localeconv)
+    return d
+
+
+### Number formatting APIs
+
+# Author: Martin von Loewis
+# improved by Georg Brandl
+
+# Iterate over grouping intervals
+def _grouping_intervals(grouping):
+    last_interval = None
+    for interval in grouping:
+        # if grouping is -1, we are done
+        if interval == CHAR_MAX:
+            return
+        # 0: re-use last group ad infinitum
+        if interval == 0:
+            if last_interval is None:
+                raise ValueError("invalid grouping")
+            while True:
+                yield last_interval
+        yield interval
+        last_interval = interval
+
+#perform the grouping from right to left
+def _group(s, monetary=False):
+    conv = localeconv()
+    thousands_sep = conv[monetary and 'mon_thousands_sep' or 'thousands_sep']
+    grouping = conv[monetary and 'mon_grouping' or 'grouping']
+    if not grouping:
+        return (s, 0)
+    if s[-1] == ' ':
+        stripped = s.rstrip()
+        right_spaces = s[len(stripped):]
+        s = stripped
+    else:
+        right_spaces = ''
+    left_spaces = ''
+    groups = []
+    for interval in _grouping_intervals(grouping):
+        if not s or s[-1] not in "0123456789":
+            # only non-digit characters remain (sign, spaces)
+            left_spaces = s
+            s = ''
+            break
+        groups.append(s[-interval:])
+        s = s[:-interval]
+    if s:
+        groups.append(s)
+    groups.reverse()
+    return (
+        left_spaces + thousands_sep.join(groups) + right_spaces,
+        len(thousands_sep) * (len(groups) - 1)
+    )
+
+# Strip a given amount of excess padding from the given string
+def _strip_padding(s, amount):
+    lpos = 0
+    while amount and s[lpos] == ' ':
+        lpos += 1
+        amount -= 1
+    rpos = len(s) - 1
+    while amount and s[rpos] == ' ':
+        rpos -= 1
+        amount -= 1
+    return s[lpos:rpos+1]
+
+_percent_re = re.compile(r'%(?:\((?P<key>.*?)\))?'
+                         r'(?P<modifiers>[-#0-9 +*.hlL]*?)[eEfFgGdiouxXcrs%]')
+
+def _format(percent, value, grouping=False, monetary=False, *additional):
+    if additional:
+        formatted = percent % ((value,) + additional)
+    else:
+        formatted = percent % value
+    if percent[-1] in 'eEfFgGdiu':
+        formatted = _localize(formatted, grouping, monetary)
+    return formatted
+
+# Transform formatted as locale number according to the locale settings
+def _localize(formatted, grouping=False, monetary=False):
+    # floats and decimal ints need special action!
+    if '.' in formatted:
+        seps = 0
+        parts = formatted.split('.')
+        if grouping:
+            parts[0], seps = _group(parts[0], monetary=monetary)
+        decimal_point = localeconv()[monetary and 'mon_decimal_point'
+                                              or 'decimal_point']
+        formatted = decimal_point.join(parts)
+        if seps:
+            formatted = _strip_padding(formatted, seps)
+    else:
+        seps = 0
+        if grouping:
+            formatted, seps = _group(formatted, monetary=monetary)
+        if seps:
+            formatted = _strip_padding(formatted, seps)
+    return formatted
+
+def format_string(f, val, grouping=False, monetary=False):
+    """Formats a string in the same way that the % formatting would use,
+    but takes the current locale into account.
+
+    Grouping is applied if the third parameter is true.
+    Conversion uses monetary thousands separator and grouping strings if
+    forth parameter monetary is true."""
+    percents = list(_percent_re.finditer(f))
+    new_f = _percent_re.sub('%s', f)
+
+    if isinstance(val, _collections_abc.Mapping):
+        new_val = []
+        for perc in percents:
+            if perc.group()[-1]=='%':
+                new_val.append('%')
+            else:
+                new_val.append(_format(perc.group(), val, grouping, monetary))
+    else:
+        if not isinstance(val, tuple):
+            val = (val,)
+        new_val = []
+        i = 0
+        for perc in percents:
+            if perc.group()[-1]=='%':
+                new_val.append('%')
+            else:
+                starcount = perc.group('modifiers').count('*')
+                new_val.append(_format(perc.group(),
+                                      val[i],
+                                      grouping,
+                                      monetary,
+                                      *val[i+1:i+1+starcount]))
+                i += (1 + starcount)
+    val = tuple(new_val)
+
+    return new_f % val
+
+def currency(val, symbol=True, grouping=False, international=False):
+    """Formats val according to the currency settings
+    in the current locale."""
+    conv = localeconv()
+
+    # check for illegal values
+    digits = conv[international and 'int_frac_digits' or 'frac_digits']
+    if digits == 127:
+        raise ValueError("Currency formatting is not possible using "
+                         "the 'C' locale.")
+
+    s = _localize(f'{abs(val):.{digits}f}', grouping, monetary=True)
+    # '<' and '>' are markers if the sign must be inserted between symbol and value
+    s = '<' + s + '>'
+
+    if symbol:
+        smb = conv[international and 'int_curr_symbol' or 'currency_symbol']
+        precedes = conv[val<0 and 'n_cs_precedes' or 'p_cs_precedes']
+        separated = conv[val<0 and 'n_sep_by_space' or 'p_sep_by_space']
+
+        if precedes:
+            s = smb + (separated and ' ' or '') + s
+        else:
+            if international and smb[-1] == ' ':
+                smb = smb[:-1]
+            s = s + (separated and ' ' or '') + smb
+
+    sign_pos = conv[val<0 and 'n_sign_posn' or 'p_sign_posn']
+    sign = conv[val<0 and 'negative_sign' or 'positive_sign']
+
+    if sign_pos == 0:
+        s = '(' + s + ')'
+    elif sign_pos == 1:
+        s = sign + s
+    elif sign_pos == 2:
+        s = s + sign
+    elif sign_pos == 3:
+        s = s.replace('<', sign)
+    elif sign_pos == 4:
+        s = s.replace('>', sign)
+    else:
+        # the default if nothing specified;
+        # this should be the most fitting sign position
+        s = sign + s
+
+    return s.replace('<', '').replace('>', '')
+
+def str(val):
+    """Convert float to string, taking the locale into account."""
+    return _format("%.12g", val)
+
+def delocalize(string):
+    "Parses a string as a normalized number according to the locale settings."
+
+    conv = localeconv()
+
+    #First, get rid of the grouping
+    ts = conv['thousands_sep']
+    if ts:
+        string = string.replace(ts, '')
+
+    #next, replace the decimal point with a dot
+    dd = conv['decimal_point']
+    if dd:
+        string = string.replace(dd, '.')
+    return string
+
+def localize(string, grouping=False, monetary=False):
+    """Parses a string as locale number according to the locale settings."""
+    return _localize(string, grouping, monetary)
+
+def atof(string, func=float):
+    "Parses a string as a float according to the locale settings."
+    return func(delocalize(string))
+
+def atoi(string):
+    "Converts a string to an integer according to the locale settings."
+    return int(delocalize(string))
+
+def _test():
+    setlocale(LC_ALL, "")
+    #do grouping
+    s1 = format_string("%d", 123456789,1)
+    print(s1, "is", atoi(s1))
+    #standard formatting
+    s1 = str(3.14)
+    print(s1, "is", atof(s1))
+
+### Locale name aliasing engine
+
+# Author: Marc-Andre Lemburg, mal@lemburg.com
+# Various tweaks by Fredrik Lundh <fredrik@pythonware.com>
+
+# store away the low-level version of setlocale (it's
+# overridden below)
+_setlocale = setlocale
+
+def _replace_encoding(code, encoding):
+    if '.' in code:
+        langname = code[:code.index('.')]
+    else:
+        langname = code
+    # Convert the encoding to a C lib compatible encoding string
+    norm_encoding = encodings.normalize_encoding(encoding)
+    #print('norm encoding: %r' % norm_encoding)
+    norm_encoding = encodings.aliases.aliases.get(norm_encoding.lower(),
+                                                  norm_encoding)
+    #print('aliased encoding: %r' % norm_encoding)
+    encoding = norm_encoding
+    norm_encoding = norm_encoding.lower()
+    if norm_encoding in locale_encoding_alias:
+        encoding = locale_encoding_alias[norm_encoding]
+    else:
+        norm_encoding = norm_encoding.replace('_', '')
+        norm_encoding = norm_encoding.replace('-', '')
+        if norm_encoding in locale_encoding_alias:
+            encoding = locale_encoding_alias[norm_encoding]
+    #print('found encoding %r' % encoding)
+    return langname + '.' + encoding
+
+def _append_modifier(code, modifier):
+    if modifier == 'euro':
+        if '.' not in code:
+            return code + '.ISO8859-15'
+        _, _, encoding = code.partition('.')
+        if encoding in ('ISO8859-15', 'UTF-8'):
+            return code
+        if encoding == 'ISO8859-1':
+            return _replace_encoding(code, 'ISO8859-15')
+    return code + '@' + modifier
+
+def normalize(localename):
+
+    """ Returns a normalized locale code for the given locale
+        name.
+
+        The returned locale code is formatted for use with
+        setlocale().
+
+        If normalization fails, the original name is returned
+        unchanged.
+
+        If the given encoding is not known, the function defaults to
+        the default encoding for the locale code just like setlocale()
+        does.
+
+    """
+    # Normalize the locale name and extract the encoding and modifier
+    code = localename.lower()
+    if ':' in code:
+        # ':' is sometimes used as encoding delimiter.
+        code = code.replace(':', '.')
+    if '@' in code:
+        code, modifier = code.split('@', 1)
+    else:
+        modifier = ''
+    if '.' in code:
+        langname, encoding = code.split('.')[:2]
+    else:
+        langname = code
+        encoding = ''
+
+    # First lookup: fullname (possibly with encoding and modifier)
+    lang_enc = langname
+    if encoding:
+        norm_encoding = encoding.replace('-', '')
+        norm_encoding = norm_encoding.replace('_', '')
+        lang_enc += '.' + norm_encoding
+    lookup_name = lang_enc
+    if modifier:
+        lookup_name += '@' + modifier
+    code = locale_alias.get(lookup_name, None)
+    if code is not None:
+        return code
+    #print('first lookup failed')
+
+    if modifier:
+        # Second try: fullname without modifier (possibly with encoding)
+        code = locale_alias.get(lang_enc, None)
+        if code is not None:
+            #print('lookup without modifier succeeded')
+            if '@' not in code:
+                return _append_modifier(code, modifier)
+            if code.split('@', 1)[1].lower() == modifier:
+                return code
+        #print('second lookup failed')
+
+    if encoding:
+        # Third try: langname (without encoding, possibly with modifier)
+        lookup_name = langname
+        if modifier:
+            lookup_name += '@' + modifier
+        code = locale_alias.get(lookup_name, None)
+        if code is not None:
+            #print('lookup without encoding succeeded')
+            if '@' not in code:
+                return _replace_encoding(code, encoding)
+            code, modifier = code.split('@', 1)
+            return _replace_encoding(code, encoding) + '@' + modifier
+
+        if modifier:
+            # Fourth try: langname (without encoding and modifier)
+            code = locale_alias.get(langname, None)
+            if code is not None:
+                #print('lookup without modifier and encoding succeeded')
+                if '@' not in code:
+                    code = _replace_encoding(code, encoding)
+                    return _append_modifier(code, modifier)
+                code, defmod = code.split('@', 1)
+                if defmod.lower() == modifier:
+                    return _replace_encoding(code, encoding) + '@' + defmod
+
+    return localename
+
+def _parse_localename(localename):
+
+    """ Parses the locale code for localename and returns the
+        result as tuple (language code, encoding).
+
+        The localename is normalized and passed through the locale
+        alias engine. A ValueError is raised in case the locale name
+        cannot be parsed.
+
+        The language code corresponds to RFC 1766.  code and encoding
+        can be None in case the values cannot be determined or are
+        unknown to this implementation.
+
+    """
+    code = normalize(localename)
+    if '@' in code:
+        # Deal with locale modifiers
+        code, modifier = code.split('@', 1)
+        if modifier == 'euro' and '.' not in code:
+            # Assume Latin-9 for @euro locales. This is bogus,
+            # since some systems may use other encodings for these
+            # locales. Also, we ignore other modifiers.
+            return code, 'iso-8859-15'
+
+    if '.' in code:
+        return tuple(code.split('.')[:2])
+    elif code == 'C':
+        return None, None
+    elif code == 'UTF-8':
+        # On macOS "LC_CTYPE=UTF-8" is a valid locale setting
+        # for getting UTF-8 handling for text.
+        return None, 'UTF-8'
+    raise ValueError('unknown locale: %s' % localename)
+
+def _build_localename(localetuple):
+
+    """ Builds a locale code from the given tuple (language code,
+        encoding).
+
+        No aliasing or normalizing takes place.
+
+    """
+    try:
+        language, encoding = localetuple
+
+        if language is None:
+            language = 'C'
+        if encoding is None:
+            return language
+        else:
+            return language + '.' + encoding
+    except (TypeError, ValueError):
+        raise TypeError('Locale must be None, a string, or an iterable of '
+                        'two strings -- language code, encoding.') from None
+
+def getdefaultlocale(envvars=('LC_ALL', 'LC_CTYPE', 'LANG', 'LANGUAGE')):
+
+    """ Tries to determine the default locale settings and returns
+        them as tuple (language code, encoding).
+
+        According to POSIX, a program which has not called
+        setlocale(LC_ALL, "") runs using the portable 'C' locale.
+        Calling setlocale(LC_ALL, "") lets it use the default locale as
+        defined by the LANG variable. Since we don't want to interfere
+        with the current locale setting we thus emulate the behavior
+        in the way described above.
+
+        To maintain compatibility with other platforms, not only the
+        LANG variable is tested, but a list of variables given as
+        envvars parameter. The first found to be defined will be
+        used. envvars defaults to the search path used in GNU gettext;
+        it must always contain the variable name 'LANG'.
+
+        Except for the code 'C', the language code corresponds to RFC
+        1766.  code and encoding can be None in case the values cannot
+        be determined.
+
+    """
+
+    import warnings
+    warnings._deprecated(
+        "locale.getdefaultlocale",
+        "{name!r} is deprecated and slated for removal in Python {remove}. "
+        "Use setlocale(), getencoding() and getlocale() instead.",
+        remove=(3, 15))
+    return _getdefaultlocale(envvars)
+
+
+def _getdefaultlocale(envvars=('LC_ALL', 'LC_CTYPE', 'LANG', 'LANGUAGE')):
+    try:
+        # check if it's supported by the _locale module
+        import _locale
+        code, encoding = _locale._getdefaultlocale()
+    except (ImportError, AttributeError):
+        pass
+    else:
+        # make sure the code/encoding values are valid
+        if sys.platform == "win32" and code and code[:2] == "0x":
+            # map windows language identifier to language name
+            code = windows_locale.get(int(code, 0))
+        # ...add other platform-specific processing here, if
+        # necessary...
+        return code, encoding
+
+    # fall back on POSIX behaviour
+    import os
+    lookup = os.environ.get
+    for variable in envvars:
+        localename = lookup(variable,None)
+        if localename:
+            if variable == 'LANGUAGE':
+                localename = localename.split(':')[0]
+            break
+    else:
+        localename = 'C'
+    return _parse_localename(localename)
+
+
+def getlocale(category=LC_CTYPE):
+
+    """ Returns the current setting for the given locale category as
+        tuple (language code, encoding).
+
+        category may be one of the LC_* value except LC_ALL. It
+        defaults to LC_CTYPE.
+
+        Except for the code 'C', the language code corresponds to RFC
+        1766.  code and encoding can be None in case the values cannot
+        be determined.
+
+    """
+    localename = _setlocale(category)
+    if category == LC_ALL and ';' in localename:
+        raise TypeError('category LC_ALL is not supported')
+    return _parse_localename(localename)
+
+def setlocale(category, locale=None):
+
+    """ Set the locale for the given category.  The locale can be
+        a string, an iterable of two strings (language code and encoding),
+        or None.
+
+        Iterables are converted to strings using the locale aliasing
+        engine.  Locale strings are passed directly to the C lib.
+
+        category may be given as one of the LC_* values.
+
+    """
+    if locale and not isinstance(locale, _builtin_str):
+        # convert to string
+        locale = normalize(_build_localename(locale))
+    return _setlocale(category, locale)
+
+def resetlocale(category=LC_ALL):
+
+    """ Sets the locale for category to the default setting.
+
+        The default setting is determined by calling
+        getdefaultlocale(). category defaults to LC_ALL.
+
+    """
+    import warnings
+    warnings.warn(
+        'Use locale.setlocale(locale.LC_ALL, "") instead',
+        DeprecationWarning, stacklevel=2
+    )
+
+    with warnings.catch_warnings():
+        warnings.simplefilter('ignore', category=DeprecationWarning)
+        loc = getdefaultlocale()
+
+    _setlocale(category, _build_localename(loc))
+
+
+try:
+    from _locale import getencoding
+except ImportError:
+    def getencoding():
+        if hasattr(sys, 'getandroidapilevel'):
+            # On Android langinfo.h and CODESET are missing, and UTF-8 is
+            # always used in mbstowcs() and wcstombs().
+            return 'utf-8'
+        encoding = _getdefaultlocale()[1]
+        if encoding is None:
+            # LANG not set, default to UTF-8
+            encoding = 'utf-8'
+        return encoding
+
+try:
+    CODESET
+except NameError:
+    def getpreferredencoding(do_setlocale=True):
+        """Return the charset that the user is likely using."""
+        if sys.flags.warn_default_encoding:
+            import warnings
+            warnings.warn(
+                "UTF-8 Mode affects locale.getpreferredencoding(). Consider locale.getencoding() instead.",
+                EncodingWarning, 2)
+        if sys.flags.utf8_mode:
+            return 'utf-8'
+        return getencoding()
+else:
+    # On Unix, if CODESET is available, use that.
+    def getpreferredencoding(do_setlocale=True):
+        """Return the charset that the user is likely using,
+        according to the system configuration."""
+
+        if sys.flags.warn_default_encoding:
+            import warnings
+            warnings.warn(
+                "UTF-8 Mode affects locale.getpreferredencoding(). Consider locale.getencoding() instead.",
+                EncodingWarning, 2)
+        if sys.flags.utf8_mode:
+            return 'utf-8'
+
+        if not do_setlocale:
+            return getencoding()
+
+        old_loc = setlocale(LC_CTYPE)
+        try:
+            try:
+                setlocale(LC_CTYPE, "")
+            except Error:
+                pass
+            return getencoding()
+        finally:
+            setlocale(LC_CTYPE, old_loc)
+
+
+### Database
+#
+# The following data was extracted from the locale.alias file which
+# comes with X11 and then hand edited removing the explicit encoding
+# definitions and adding some more aliases. The file is usually
+# available as /usr/lib/X11/locale/locale.alias.
+#
+
+#
+# The local_encoding_alias table maps lowercase encoding alias names
+# to C locale encoding names (case-sensitive). Note that normalize()
+# first looks up the encoding in the encodings.aliases dictionary and
+# then applies this mapping to find the correct C lib name for the
+# encoding.
+#
+locale_encoding_alias = {
+
+    # Mappings for non-standard encoding names used in locale names
+    '437':                          'C',
+    'c':                            'C',
+    'en':                           'ISO8859-1',
+    'jis':                          'JIS7',
+    'jis7':                         'JIS7',
+    'ajec':                         'eucJP',
+    'koi8c':                        'KOI8-C',
+    'microsoftcp1251':              'CP1251',
+    'microsoftcp1255':              'CP1255',
+    'microsoftcp1256':              'CP1256',
+    '88591':                        'ISO8859-1',
+    '88592':                        'ISO8859-2',
+    '88595':                        'ISO8859-5',
+    '885915':                       'ISO8859-15',
+
+    # Mappings from Python codec names to C lib encoding names
+    'ascii':                        'ISO8859-1',
+    'latin_1':                      'ISO8859-1',
+    'iso8859_1':                    'ISO8859-1',
+    'iso8859_10':                   'ISO8859-10',
+    'iso8859_11':                   'ISO8859-11',
+    'iso8859_13':                   'ISO8859-13',
+    'iso8859_14':                   'ISO8859-14',
+    'iso8859_15':                   'ISO8859-15',
+    'iso8859_16':                   'ISO8859-16',
+    'iso8859_2':                    'ISO8859-2',
+    'iso8859_3':                    'ISO8859-3',
+    'iso8859_4':                    'ISO8859-4',
+    'iso8859_5':                    'ISO8859-5',
+    'iso8859_6':                    'ISO8859-6',
+    'iso8859_7':                    'ISO8859-7',
+    'iso8859_8':                    'ISO8859-8',
+    'iso8859_9':                    'ISO8859-9',
+    'iso2022_jp':                   'JIS7',
+    'shift_jis':                    'SJIS',
+    'tactis':                       'TACTIS',
+    'euc_jp':                       'eucJP',
+    'euc_kr':                       'eucKR',
+    'utf_8':                        'UTF-8',
+    'koi8_r':                       'KOI8-R',
+    'koi8_t':                       'KOI8-T',
+    'koi8_u':                       'KOI8-U',
+    'kz1048':                       'RK1048',
+    'cp1251':                       'CP1251',
+    'cp1255':                       'CP1255',
+    'cp1256':                       'CP1256',
+
+    # XXX This list is still incomplete. If you know more
+    # mappings, please file a bug report. Thanks.
+}
+
+for k, v in sorted(locale_encoding_alias.items()):
+    k = k.replace('_', '')
+    locale_encoding_alias.setdefault(k, v)
+del k, v
+
+#
+# The locale_alias table maps lowercase alias names to C locale names
+# (case-sensitive). Encodings are always separated from the locale
+# name using a dot ('.'); they should only be given in case the
+# language name is needed to interpret the given encoding alias
+# correctly (CJK codes often have this need).
+#
+# Note that the normalize() function which uses this tables
+# removes '_' and '-' characters from the encoding part of the
+# locale name before doing the lookup. This saves a lot of
+# space in the table.
+#
+# MAL 2004-12-10:
+# Updated alias mapping to most recent locale.alias file
+# from X.org distribution using makelocalealias.py.
+#
+# These are the differences compared to the old mapping (Python 2.4
+# and older):
+#
+#    updated 'bg' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
+#    updated 'bg_bg' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
+#    updated 'bulgarian' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
+#    updated 'cz' -> 'cz_CZ.ISO8859-2' to 'cs_CZ.ISO8859-2'
+#    updated 'cz_cz' -> 'cz_CZ.ISO8859-2' to 'cs_CZ.ISO8859-2'
+#    updated 'czech' -> 'cs_CS.ISO8859-2' to 'cs_CZ.ISO8859-2'
+#    updated 'dutch' -> 'nl_BE.ISO8859-1' to 'nl_NL.ISO8859-1'
+#    updated 'et' -> 'et_EE.ISO8859-4' to 'et_EE.ISO8859-15'
+#    updated 'et_ee' -> 'et_EE.ISO8859-4' to 'et_EE.ISO8859-15'
+#    updated 'fi' -> 'fi_FI.ISO8859-1' to 'fi_FI.ISO8859-15'
+#    updated 'fi_fi' -> 'fi_FI.ISO8859-1' to 'fi_FI.ISO8859-15'
+#    updated 'iw' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
+#    updated 'iw_il' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
+#    updated 'japanese' -> 'ja_JP.SJIS' to 'ja_JP.eucJP'
+#    updated 'lt' -> 'lt_LT.ISO8859-4' to 'lt_LT.ISO8859-13'
+#    updated 'lv' -> 'lv_LV.ISO8859-4' to 'lv_LV.ISO8859-13'
+#    updated 'sl' -> 'sl_CS.ISO8859-2' to 'sl_SI.ISO8859-2'
+#    updated 'slovene' -> 'sl_CS.ISO8859-2' to 'sl_SI.ISO8859-2'
+#    updated 'th_th' -> 'th_TH.TACTIS' to 'th_TH.ISO8859-11'
+#    updated 'zh_cn' -> 'zh_CN.eucCN' to 'zh_CN.gb2312'
+#    updated 'zh_cn.big5' -> 'zh_TW.eucTW' to 'zh_TW.big5'
+#    updated 'zh_tw' -> 'zh_TW.eucTW' to 'zh_TW.big5'
+#
+# MAL 2008-05-30:
+# Updated alias mapping to most recent locale.alias file
+# from X.org distribution using makelocalealias.py.
+#
+# These are the differences compared to the old mapping (Python 2.5
+# and older):
+#
+#    updated 'cs_cs.iso88592' -> 'cs_CZ.ISO8859-2' to 'cs_CS.ISO8859-2'
+#    updated 'serbocroatian' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
+#    updated 'sh' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
+#    updated 'sh_hr.iso88592' -> 'sh_HR.ISO8859-2' to 'hr_HR.ISO8859-2'
+#    updated 'sh_sp' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
+#    updated 'sh_yu' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
+#    updated 'sp' -> 'sp_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
+#    updated 'sp_yu' -> 'sp_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
+#    updated 'sr' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
+#    updated 'sr@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
+#    updated 'sr_sp' -> 'sr_SP.ISO8859-2' to 'sr_CS.ISO8859-2'
+#    updated 'sr_yu' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
+#    updated 'sr_yu.cp1251@cyrillic' -> 'sr_YU.CP1251' to 'sr_CS.CP1251'
+#    updated 'sr_yu.iso88592' -> 'sr_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
+#    updated 'sr_yu.iso88595' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
+#    updated 'sr_yu.iso88595@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
+#    updated 'sr_yu.microsoftcp1251@cyrillic' -> 'sr_YU.CP1251' to 'sr_CS.CP1251'
+#    updated 'sr_yu.utf8@cyrillic' -> 'sr_YU.UTF-8' to 'sr_CS.UTF-8'
+#    updated 'sr_yu@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
+#
+# AP 2010-04-12:
+# Updated alias mapping to most recent locale.alias file
+# from X.org distribution using makelocalealias.py.
+#
+# These are the differences compared to the old mapping (Python 2.6.5
+# and older):
+#
+#    updated 'ru' -> 'ru_RU.ISO8859-5' to 'ru_RU.UTF-8'
+#    updated 'ru_ru' -> 'ru_RU.ISO8859-5' to 'ru_RU.UTF-8'
+#    updated 'serbocroatian' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
+#    updated 'sh' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
+#    updated 'sh_yu' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
+#    updated 'sr' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8'
+#    updated 'sr@cyrillic' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8'
+#    updated 'sr@latn' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
+#    updated 'sr_cs.utf8@latn' -> 'sr_CS.UTF-8' to 'sr_RS.UTF-8@latin'
+#    updated 'sr_cs@latn' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
+#    updated 'sr_yu' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8@latin'
+#    updated 'sr_yu.utf8@cyrillic' -> 'sr_CS.UTF-8' to 'sr_RS.UTF-8'
+#    updated 'sr_yu@cyrillic' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8'
+#
+# SS 2013-12-20:
+# Updated alias mapping to most recent locale.alias file
+# from X.org distribution using makelocalealias.py.
+#
+# These are the differences compared to the old mapping (Python 3.3.3
+# and older):
+#
+#    updated 'a3' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
+#    updated 'a3_az' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
+#    updated 'a3_az.koi8c' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
+#    updated 'cs_cs.iso88592' -> 'cs_CS.ISO8859-2' to 'cs_CZ.ISO8859-2'
+#    updated 'hebrew' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
+#    updated 'hebrew.iso88598' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
+#    updated 'sd' -> 'sd_IN@devanagari.UTF-8' to 'sd_IN.UTF-8'
+#    updated 'sr@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
+#    updated 'sr_cs' -> 'sr_RS.UTF-8' to 'sr_CS.UTF-8'
+#    updated 'sr_cs.utf8@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
+#    updated 'sr_cs@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
+#
+# SS 2014-10-01:
+# Updated alias mapping with glibc 2.19 supported locales.
+#
+# SS 2018-05-05:
+# Updated alias mapping with glibc 2.27 supported locales.
+#
+# These are the differences compared to the old mapping (Python 3.6.5
+# and older):
+#
+#    updated 'ca_es@valencia' -> 'ca_ES.ISO8859-15@valencia' to 'ca_ES.UTF-8@valencia'
+#    updated 'kk_kz' -> 'kk_KZ.RK1048' to 'kk_KZ.ptcp154'
+#    updated 'russian' -> 'ru_RU.ISO8859-5' to 'ru_RU.KOI8-R'
+
+locale_alias = {
+    'a3':                                   'az_AZ.KOI8-C',
+    'a3_az':                                'az_AZ.KOI8-C',
+    'a3_az.koic':                           'az_AZ.KOI8-C',
+    'aa_dj':                                'aa_DJ.ISO8859-1',
+    'aa_er':                                'aa_ER.UTF-8',
+    'aa_et':                                'aa_ET.UTF-8',
+    'af':                                   'af_ZA.ISO8859-1',
+    'af_za':                                'af_ZA.ISO8859-1',
+    'agr_pe':                               'agr_PE.UTF-8',
+    'ak_gh':                                'ak_GH.UTF-8',
+    'am':                                   'am_ET.UTF-8',
+    'am_et':                                'am_ET.UTF-8',
+    'american':                             'en_US.ISO8859-1',
+    'an_es':                                'an_ES.ISO8859-15',
+    'anp_in':                               'anp_IN.UTF-8',
+    'ar':                                   'ar_AA.ISO8859-6',
+    'ar_aa':                                'ar_AA.ISO8859-6',
+    'ar_ae':                                'ar_AE.ISO8859-6',
+    'ar_bh':                                'ar_BH.ISO8859-6',
+    'ar_dz':                                'ar_DZ.ISO8859-6',
+    'ar_eg':                                'ar_EG.ISO8859-6',
+    'ar_in':                                'ar_IN.UTF-8',
+    'ar_iq':                                'ar_IQ.ISO8859-6',
+    'ar_jo':                                'ar_JO.ISO8859-6',
+    'ar_kw':                                'ar_KW.ISO8859-6',
+    'ar_lb':                                'ar_LB.ISO8859-6',
+    'ar_ly':                                'ar_LY.ISO8859-6',
+    'ar_ma':                                'ar_MA.ISO8859-6',
+    'ar_om':                                'ar_OM.ISO8859-6',
+    'ar_qa':                                'ar_QA.ISO8859-6',
+    'ar_sa':                                'ar_SA.ISO8859-6',
+    'ar_sd':                                'ar_SD.ISO8859-6',
+    'ar_ss':                                'ar_SS.UTF-8',
+    'ar_sy':                                'ar_SY.ISO8859-6',
+    'ar_tn':                                'ar_TN.ISO8859-6',
+    'ar_ye':                                'ar_YE.ISO8859-6',
+    'arabic':                               'ar_AA.ISO8859-6',
+    'as':                                   'as_IN.UTF-8',
+    'as_in':                                'as_IN.UTF-8',
+    'ast_es':                               'ast_ES.ISO8859-15',
+    'ayc_pe':                               'ayc_PE.UTF-8',
+    'az':                                   'az_AZ.ISO8859-9E',
+    'az_az':                                'az_AZ.ISO8859-9E',
+    'az_az.iso88599e':                      'az_AZ.ISO8859-9E',
+    'az_ir':                                'az_IR.UTF-8',
+    'be':                                   'be_BY.CP1251',
+    'be@latin':                             'be_BY.UTF-8@latin',
+    'be_bg.utf8':                           'bg_BG.UTF-8',
+    'be_by':                                'be_BY.CP1251',
+    'be_by@latin':                          'be_BY.UTF-8@latin',
+    'bem_zm':                               'bem_ZM.UTF-8',
+    'ber_dz':                               'ber_DZ.UTF-8',
+    'ber_ma':                               'ber_MA.UTF-8',
+    'bg':                                   'bg_BG.CP1251',
+    'bg_bg':                                'bg_BG.CP1251',
+    'bhb_in.utf8':                          'bhb_IN.UTF-8',
+    'bho_in':                               'bho_IN.UTF-8',
+    'bho_np':                               'bho_NP.UTF-8',
+    'bi_vu':                                'bi_VU.UTF-8',
+    'bn_bd':                                'bn_BD.UTF-8',
+    'bn_in':                                'bn_IN.UTF-8',
+    'bo_cn':                                'bo_CN.UTF-8',
+    'bo_in':                                'bo_IN.UTF-8',
+    'bokmal':                               'nb_NO.ISO8859-1',
+    'bokm\xe5l':                            'nb_NO.ISO8859-1',
+    'br':                                   'br_FR.ISO8859-1',
+    'br_fr':                                'br_FR.ISO8859-1',
+    'brx_in':                               'brx_IN.UTF-8',
+    'bs':                                   'bs_BA.ISO8859-2',
+    'bs_ba':                                'bs_BA.ISO8859-2',
+    'bulgarian':                            'bg_BG.CP1251',
+    'byn_er':                               'byn_ER.UTF-8',
+    'c':                                    'C',
+    'c-french':                             'fr_CA.ISO8859-1',
+    'c.ascii':                              'C',
+    'c.en':                                 'C',
+    'c.iso88591':                           'en_US.ISO8859-1',
+    'c.utf8':                               'C.UTF-8',
+    'c_c':                                  'C',
+    'c_c.c':                                'C',
+    'ca':                                   'ca_ES.ISO8859-1',
+    'ca_ad':                                'ca_AD.ISO8859-1',
+    'ca_es':                                'ca_ES.ISO8859-1',
+    'ca_es@valencia':                       'ca_ES.UTF-8@valencia',
+    'ca_fr':                                'ca_FR.ISO8859-1',
+    'ca_it':                                'ca_IT.ISO8859-1',
+    'catalan':                              'ca_ES.ISO8859-1',
+    'ce_ru':                                'ce_RU.UTF-8',
+    'cextend':                              'en_US.ISO8859-1',
+    'chinese-s':                            'zh_CN.eucCN',
+    'chinese-t':                            'zh_TW.eucTW',
+    'chr_us':                               'chr_US.UTF-8',
+    'ckb_iq':                               'ckb_IQ.UTF-8',
+    'cmn_tw':                               'cmn_TW.UTF-8',
+    'crh_ua':                               'crh_UA.UTF-8',
+    'croatian':                             'hr_HR.ISO8859-2',
+    'cs':                                   'cs_CZ.ISO8859-2',
+    'cs_cs':                                'cs_CZ.ISO8859-2',
+    'cs_cz':                                'cs_CZ.ISO8859-2',
+    'csb_pl':                               'csb_PL.UTF-8',
+    'cv_ru':                                'cv_RU.UTF-8',
+    'cy':                                   'cy_GB.ISO8859-1',
+    'cy_gb':                                'cy_GB.ISO8859-1',
+    'cz':                                   'cs_CZ.ISO8859-2',
+    'cz_cz':                                'cs_CZ.ISO8859-2',
+    'czech':                                'cs_CZ.ISO8859-2',
+    'da':                                   'da_DK.ISO8859-1',
+    'da_dk':                                'da_DK.ISO8859-1',
+    'danish':                               'da_DK.ISO8859-1',
+    'dansk':                                'da_DK.ISO8859-1',
+    'de':                                   'de_DE.ISO8859-1',
+    'de_at':                                'de_AT.ISO8859-1',
+    'de_be':                                'de_BE.ISO8859-1',
+    'de_ch':                                'de_CH.ISO8859-1',
+    'de_de':                                'de_DE.ISO8859-1',
+    'de_it':                                'de_IT.ISO8859-1',
+    'de_li.utf8':                           'de_LI.UTF-8',
+    'de_lu':                                'de_LU.ISO8859-1',
+    'deutsch':                              'de_DE.ISO8859-1',
+    'doi_in':                               'doi_IN.UTF-8',
+    'dutch':                                'nl_NL.ISO8859-1',
+    'dutch.iso88591':                       'nl_BE.ISO8859-1',
+    'dv_mv':                                'dv_MV.UTF-8',
+    'dz_bt':                                'dz_BT.UTF-8',
+    'ee':                                   'ee_EE.ISO8859-4',
+    'ee_ee':                                'ee_EE.ISO8859-4',
+    'eesti':                                'et_EE.ISO8859-1',
+    'el':                                   'el_GR.ISO8859-7',
+    'el_cy':                                'el_CY.ISO8859-7',
+    'el_gr':                                'el_GR.ISO8859-7',
+    'el_gr@euro':                           'el_GR.ISO8859-15',
+    'en':                                   'en_US.ISO8859-1',
+    'en_ag':                                'en_AG.UTF-8',
+    'en_au':                                'en_AU.ISO8859-1',
+    'en_be':                                'en_BE.ISO8859-1',
+    'en_bw':                                'en_BW.ISO8859-1',
+    'en_ca':                                'en_CA.ISO8859-1',
+    'en_dk':                                'en_DK.ISO8859-1',
+    'en_dl.utf8':                           'en_DL.UTF-8',
+    'en_gb':                                'en_GB.ISO8859-1',
+    'en_hk':                                'en_HK.ISO8859-1',
+    'en_ie':                                'en_IE.ISO8859-1',
+    'en_il':                                'en_IL.UTF-8',
+    'en_in':                                'en_IN.ISO8859-1',
+    'en_ng':                                'en_NG.UTF-8',
+    'en_nz':                                'en_NZ.ISO8859-1',
+    'en_ph':                                'en_PH.ISO8859-1',
+    'en_sc.utf8':                           'en_SC.UTF-8',
+    'en_sg':                                'en_SG.ISO8859-1',
+    'en_uk':                                'en_GB.ISO8859-1',
+    'en_us':                                'en_US.ISO8859-1',
+    'en_us@euro@euro':                      'en_US.ISO8859-15',
+    'en_za':                                'en_ZA.ISO8859-1',
+    'en_zm':                                'en_ZM.UTF-8',
+    'en_zw':                                'en_ZW.ISO8859-1',
+    'en_zw.utf8':                           'en_ZS.UTF-8',
+    'eng_gb':                               'en_GB.ISO8859-1',
+    'english':                              'en_EN.ISO8859-1',
+    'english.iso88591':                     'en_US.ISO8859-1',
+    'english_uk':                           'en_GB.ISO8859-1',
+    'english_united-states':                'en_US.ISO8859-1',
+    'english_united-states.437':            'C',
+    'english_us':                           'en_US.ISO8859-1',
+    'eo':                                   'eo_XX.ISO8859-3',
+    'eo.utf8':                              'eo.UTF-8',
+    'eo_eo':                                'eo_EO.ISO8859-3',
+    'eo_us.utf8':                           'eo_US.UTF-8',
+    'eo_xx':                                'eo_XX.ISO8859-3',
+    'es':                                   'es_ES.ISO8859-1',
+    'es_ar':                                'es_AR.ISO8859-1',
+    'es_bo':                                'es_BO.ISO8859-1',
+    'es_cl':                                'es_CL.ISO8859-1',
+    'es_co':                                'es_CO.ISO8859-1',
+    'es_cr':                                'es_CR.ISO8859-1',
+    'es_cu':                                'es_CU.UTF-8',
+    'es_do':                                'es_DO.ISO8859-1',
+    'es_ec':                                'es_EC.ISO8859-1',
+    'es_es':                                'es_ES.ISO8859-1',
+    'es_gt':                                'es_GT.ISO8859-1',
+    'es_hn':                                'es_HN.ISO8859-1',
+    'es_mx':                                'es_MX.ISO8859-1',
+    'es_ni':                                'es_NI.ISO8859-1',
+    'es_pa':                                'es_PA.ISO8859-1',
+    'es_pe':                                'es_PE.ISO8859-1',
+    'es_pr':                                'es_PR.ISO8859-1',
+    'es_py':                                'es_PY.ISO8859-1',
+    'es_sv':                                'es_SV.ISO8859-1',
+    'es_us':                                'es_US.ISO8859-1',
+    'es_uy':                                'es_UY.ISO8859-1',
+    'es_ve':                                'es_VE.ISO8859-1',
+    'estonian':                             'et_EE.ISO8859-1',
+    'et':                                   'et_EE.ISO8859-15',
+    'et_ee':                                'et_EE.ISO8859-15',
+    'eu':                                   'eu_ES.ISO8859-1',
+    'eu_es':                                'eu_ES.ISO8859-1',
+    'eu_fr':                                'eu_FR.ISO8859-1',
+    'fa':                                   'fa_IR.UTF-8',
+    'fa_ir':                                'fa_IR.UTF-8',
+    'fa_ir.isiri3342':                      'fa_IR.ISIRI-3342',
+    'ff_sn':                                'ff_SN.UTF-8',
+    'fi':                                   'fi_FI.ISO8859-15',
+    'fi_fi':                                'fi_FI.ISO8859-15',
+    'fil_ph':                               'fil_PH.UTF-8',
+    'finnish':                              'fi_FI.ISO8859-1',
+    'fo':                                   'fo_FO.ISO8859-1',
+    'fo_fo':                                'fo_FO.ISO8859-1',
+    'fr':                                   'fr_FR.ISO8859-1',
+    'fr_be':                                'fr_BE.ISO8859-1',
+    'fr_ca':                                'fr_CA.ISO8859-1',
+    'fr_ch':                                'fr_CH.ISO8859-1',
+    'fr_fr':                                'fr_FR.ISO8859-1',
+    'fr_lu':                                'fr_LU.ISO8859-1',
+    'fran\xe7ais':                          'fr_FR.ISO8859-1',
+    'fre_fr':                               'fr_FR.ISO8859-1',
+    'french':                               'fr_FR.ISO8859-1',
+    'french.iso88591':                      'fr_CH.ISO8859-1',
+    'french_france':                        'fr_FR.ISO8859-1',
+    'fur_it':                               'fur_IT.UTF-8',
+    'fy_de':                                'fy_DE.UTF-8',
+    'fy_nl':                                'fy_NL.UTF-8',
+    'ga':                                   'ga_IE.ISO8859-1',
+    'ga_ie':                                'ga_IE.ISO8859-1',
+    'galego':                               'gl_ES.ISO8859-1',
+    'galician':                             'gl_ES.ISO8859-1',
+    'gd':                                   'gd_GB.ISO8859-1',
+    'gd_gb':                                'gd_GB.ISO8859-1',
+    'ger_de':                               'de_DE.ISO8859-1',
+    'german':                               'de_DE.ISO8859-1',
+    'german.iso88591':                      'de_CH.ISO8859-1',
+    'german_germany':                       'de_DE.ISO8859-1',
+    'gez_er':                               'gez_ER.UTF-8',
+    'gez_et':                               'gez_ET.UTF-8',
+    'gl':                                   'gl_ES.ISO8859-1',
+    'gl_es':                                'gl_ES.ISO8859-1',
+    'greek':                                'el_GR.ISO8859-7',
+    'gu_in':                                'gu_IN.UTF-8',
+    'gv':                                   'gv_GB.ISO8859-1',
+    'gv_gb':                                'gv_GB.ISO8859-1',
+    'ha_ng':                                'ha_NG.UTF-8',
+    'hak_tw':                               'hak_TW.UTF-8',
+    'he':                                   'he_IL.ISO8859-8',
+    'he_il':                                'he_IL.ISO8859-8',
+    'hebrew':                               'he_IL.ISO8859-8',
+    'hi':                                   'hi_IN.ISCII-DEV',
+    'hi_in':                                'hi_IN.ISCII-DEV',
+    'hi_in.isciidev':                       'hi_IN.ISCII-DEV',
+    'hif_fj':                               'hif_FJ.UTF-8',
+    'hne':                                  'hne_IN.UTF-8',
+    'hne_in':                               'hne_IN.UTF-8',
+    'hr':                                   'hr_HR.ISO8859-2',
+    'hr_hr':                                'hr_HR.ISO8859-2',
+    'hrvatski':                             'hr_HR.ISO8859-2',
+    'hsb_de':                               'hsb_DE.ISO8859-2',
+    'ht_ht':                                'ht_HT.UTF-8',
+    'hu':                                   'hu_HU.ISO8859-2',
+    'hu_hu':                                'hu_HU.ISO8859-2',
+    'hungarian':                            'hu_HU.ISO8859-2',
+    'hy_am':                                'hy_AM.UTF-8',
+    'hy_am.armscii8':                       'hy_AM.ARMSCII_8',
+    'ia':                                   'ia.UTF-8',
+    'ia_fr':                                'ia_FR.UTF-8',
+    'icelandic':                            'is_IS.ISO8859-1',
+    'id':                                   'id_ID.ISO8859-1',
+    'id_id':                                'id_ID.ISO8859-1',
+    'ig_ng':                                'ig_NG.UTF-8',
+    'ik_ca':                                'ik_CA.UTF-8',
+    'in':                                   'id_ID.ISO8859-1',
+    'in_id':                                'id_ID.ISO8859-1',
+    'is':                                   'is_IS.ISO8859-1',
+    'is_is':                                'is_IS.ISO8859-1',
+    'iso-8859-1':                           'en_US.ISO8859-1',
+    'iso-8859-15':                          'en_US.ISO8859-15',
+    'iso8859-1':                            'en_US.ISO8859-1',
+    'iso8859-15':                           'en_US.ISO8859-15',
+    'iso_8859_1':                           'en_US.ISO8859-1',
+    'iso_8859_15':                          'en_US.ISO8859-15',
+    'it':                                   'it_IT.ISO8859-1',
+    'it_ch':                                'it_CH.ISO8859-1',
+    'it_it':                                'it_IT.ISO8859-1',
+    'italian':                              'it_IT.ISO8859-1',
+    'iu':                                   'iu_CA.NUNACOM-8',
+    'iu_ca':                                'iu_CA.NUNACOM-8',
+    'iu_ca.nunacom8':                       'iu_CA.NUNACOM-8',
+    'iw':                                   'he_IL.ISO8859-8',
+    'iw_il':                                'he_IL.ISO8859-8',
+    'iw_il.utf8':                           'iw_IL.UTF-8',
+    'ja':                                   'ja_JP.eucJP',
+    'ja_jp':                                'ja_JP.eucJP',
+    'ja_jp.euc':                            'ja_JP.eucJP',
+    'ja_jp.mscode':                         'ja_JP.SJIS',
+    'ja_jp.pck':                            'ja_JP.SJIS',
+    'japan':                                'ja_JP.eucJP',
+    'japanese':                             'ja_JP.eucJP',
+    'japanese-euc':                         'ja_JP.eucJP',
+    'japanese.euc':                         'ja_JP.eucJP',
+    'jp_jp':                                'ja_JP.eucJP',
+    'ka':                                   'ka_GE.GEORGIAN-ACADEMY',
+    'ka_ge':                                'ka_GE.GEORGIAN-ACADEMY',
+    'ka_ge.georgianacademy':                'ka_GE.GEORGIAN-ACADEMY',
+    'ka_ge.georgianps':                     'ka_GE.GEORGIAN-PS',
+    'ka_ge.georgianrs':                     'ka_GE.GEORGIAN-ACADEMY',
+    'kab_dz':                               'kab_DZ.UTF-8',
+    'kk_kz':                                'kk_KZ.ptcp154',
+    'kl':                                   'kl_GL.ISO8859-1',
+    'kl_gl':                                'kl_GL.ISO8859-1',
+    'km_kh':                                'km_KH.UTF-8',
+    'kn':                                   'kn_IN.UTF-8',
+    'kn_in':                                'kn_IN.UTF-8',
+    'ko':                                   'ko_KR.eucKR',
+    'ko_kr':                                'ko_KR.eucKR',
+    'ko_kr.euc':                            'ko_KR.eucKR',
+    'kok_in':                               'kok_IN.UTF-8',
+    'korean':                               'ko_KR.eucKR',
+    'korean.euc':                           'ko_KR.eucKR',
+    'ks':                                   'ks_IN.UTF-8',
+    'ks_in':                                'ks_IN.UTF-8',
+    'ks_in@devanagari.utf8':                'ks_IN.UTF-8@devanagari',
+    'ku_tr':                                'ku_TR.ISO8859-9',
+    'kw':                                   'kw_GB.ISO8859-1',
+    'kw_gb':                                'kw_GB.ISO8859-1',
+    'ky':                                   'ky_KG.UTF-8',
+    'ky_kg':                                'ky_KG.UTF-8',
+    'lb_lu':                                'lb_LU.UTF-8',
+    'lg_ug':                                'lg_UG.ISO8859-10',
+    'li_be':                                'li_BE.UTF-8',
+    'li_nl':                                'li_NL.UTF-8',
+    'lij_it':                               'lij_IT.UTF-8',
+    'lithuanian':                           'lt_LT.ISO8859-13',
+    'ln_cd':                                'ln_CD.UTF-8',
+    'lo':                                   'lo_LA.MULELAO-1',
+    'lo_la':                                'lo_LA.MULELAO-1',
+    'lo_la.cp1133':                         'lo_LA.IBM-CP1133',
+    'lo_la.ibmcp1133':                      'lo_LA.IBM-CP1133',
+    'lo_la.mulelao1':                       'lo_LA.MULELAO-1',
+    'lt':                                   'lt_LT.ISO8859-13',
+    'lt_lt':                                'lt_LT.ISO8859-13',
+    'lv':                                   'lv_LV.ISO8859-13',
+    'lv_lv':                                'lv_LV.ISO8859-13',
+    'lzh_tw':                               'lzh_TW.UTF-8',
+    'mag_in':                               'mag_IN.UTF-8',
+    'mai':                                  'mai_IN.UTF-8',
+    'mai_in':                               'mai_IN.UTF-8',
+    'mai_np':                               'mai_NP.UTF-8',
+    'mfe_mu':                               'mfe_MU.UTF-8',
+    'mg_mg':                                'mg_MG.ISO8859-15',
+    'mhr_ru':                               'mhr_RU.UTF-8',
+    'mi':                                   'mi_NZ.ISO8859-1',
+    'mi_nz':                                'mi_NZ.ISO8859-1',
+    'miq_ni':                               'miq_NI.UTF-8',
+    'mjw_in':                               'mjw_IN.UTF-8',
+    'mk':                                   'mk_MK.ISO8859-5',
+    'mk_mk':                                'mk_MK.ISO8859-5',
+    'ml':                                   'ml_IN.UTF-8',
+    'ml_in':                                'ml_IN.UTF-8',
+    'mn_mn':                                'mn_MN.UTF-8',
+    'mni_in':                               'mni_IN.UTF-8',
+    'mr':                                   'mr_IN.UTF-8',
+    'mr_in':                                'mr_IN.UTF-8',
+    'ms':                                   'ms_MY.ISO8859-1',
+    'ms_my':                                'ms_MY.ISO8859-1',
+    'mt':                                   'mt_MT.ISO8859-3',
+    'mt_mt':                                'mt_MT.ISO8859-3',
+    'my_mm':                                'my_MM.UTF-8',
+    'nan_tw':                               'nan_TW.UTF-8',
+    'nb':                                   'nb_NO.ISO8859-1',
+    'nb_no':                                'nb_NO.ISO8859-1',
+    'nds_de':                               'nds_DE.UTF-8',
+    'nds_nl':                               'nds_NL.UTF-8',
+    'ne_np':                                'ne_NP.UTF-8',
+    'nhn_mx':                               'nhn_MX.UTF-8',
+    'niu_nu':                               'niu_NU.UTF-8',
+    'niu_nz':                               'niu_NZ.UTF-8',
+    'nl':                                   'nl_NL.ISO8859-1',
+    'nl_aw':                                'nl_AW.UTF-8',
+    'nl_be':                                'nl_BE.ISO8859-1',
+    'nl_nl':                                'nl_NL.ISO8859-1',
+    'nn':                                   'nn_NO.ISO8859-1',
+    'nn_no':                                'nn_NO.ISO8859-1',
+    'no':                                   'no_NO.ISO8859-1',
+    'no@nynorsk':                           'ny_NO.ISO8859-1',
+    'no_no':                                'no_NO.ISO8859-1',
+    'no_no.iso88591@bokmal':                'no_NO.ISO8859-1',
+    'no_no.iso88591@nynorsk':               'no_NO.ISO8859-1',
+    'norwegian':                            'no_NO.ISO8859-1',
+    'nr':                                   'nr_ZA.ISO8859-1',
+    'nr_za':                                'nr_ZA.ISO8859-1',
+    'nso':                                  'nso_ZA.ISO8859-15',
+    'nso_za':                               'nso_ZA.ISO8859-15',
+    'ny':                                   'ny_NO.ISO8859-1',
+    'ny_no':                                'ny_NO.ISO8859-1',
+    'nynorsk':                              'nn_NO.ISO8859-1',
+    'oc':                                   'oc_FR.ISO8859-1',
+    'oc_fr':                                'oc_FR.ISO8859-1',
+    'om_et':                                'om_ET.UTF-8',
+    'om_ke':                                'om_KE.ISO8859-1',
+    'or':                                   'or_IN.UTF-8',
+    'or_in':                                'or_IN.UTF-8',
+    'os_ru':                                'os_RU.UTF-8',
+    'pa':                                   'pa_IN.UTF-8',
+    'pa_in':                                'pa_IN.UTF-8',
+    'pa_pk':                                'pa_PK.UTF-8',
+    'pap_an':                               'pap_AN.UTF-8',
+    'pap_aw':                               'pap_AW.UTF-8',
+    'pap_cw':                               'pap_CW.UTF-8',
+    'pd':                                   'pd_US.ISO8859-1',
+    'pd_de':                                'pd_DE.ISO8859-1',
+    'pd_us':                                'pd_US.ISO8859-1',
+    'ph':                                   'ph_PH.ISO8859-1',
+    'ph_ph':                                'ph_PH.ISO8859-1',
+    'pl':                                   'pl_PL.ISO8859-2',
+    'pl_pl':                                'pl_PL.ISO8859-2',
+    'polish':                               'pl_PL.ISO8859-2',
+    'portuguese':                           'pt_PT.ISO8859-1',
+    'portuguese_brazil':                    'pt_BR.ISO8859-1',
+    'posix':                                'C',
+    'posix-utf2':                           'C',
+    'pp':                                   'pp_AN.ISO8859-1',
+    'pp_an':                                'pp_AN.ISO8859-1',
+    'ps_af':                                'ps_AF.UTF-8',
+    'pt':                                   'pt_PT.ISO8859-1',
+    'pt_br':                                'pt_BR.ISO8859-1',
+    'pt_pt':                                'pt_PT.ISO8859-1',
+    'quz_pe':                               'quz_PE.UTF-8',
+    'raj_in':                               'raj_IN.UTF-8',
+    'ro':                                   'ro_RO.ISO8859-2',
+    'ro_ro':                                'ro_RO.ISO8859-2',
+    'romanian':                             'ro_RO.ISO8859-2',
+    'ru':                                   'ru_RU.UTF-8',
+    'ru_ru':                                'ru_RU.UTF-8',
+    'ru_ua':                                'ru_UA.KOI8-U',
+    'rumanian':                             'ro_RO.ISO8859-2',
+    'russian':                              'ru_RU.KOI8-R',
+    'rw':                                   'rw_RW.ISO8859-1',
+    'rw_rw':                                'rw_RW.ISO8859-1',
+    'sa_in':                                'sa_IN.UTF-8',
+    'sat_in':                               'sat_IN.UTF-8',
+    'sc_it':                                'sc_IT.UTF-8',
+    'sd':                                   'sd_IN.UTF-8',
+    'sd_in':                                'sd_IN.UTF-8',
+    'sd_in@devanagari.utf8':                'sd_IN.UTF-8@devanagari',
+    'sd_pk':                                'sd_PK.UTF-8',
+    'se_no':                                'se_NO.UTF-8',
+    'serbocroatian':                        'sr_RS.UTF-8@latin',
+    'sgs_lt':                               'sgs_LT.UTF-8',
+    'sh':                                   'sr_RS.UTF-8@latin',
+    'sh_ba.iso88592@bosnia':                'sr_CS.ISO8859-2',
+    'sh_hr':                                'sh_HR.ISO8859-2',
+    'sh_hr.iso88592':                       'hr_HR.ISO8859-2',
+    'sh_sp':                                'sr_CS.ISO8859-2',
+    'sh_yu':                                'sr_RS.UTF-8@latin',
+    'shn_mm':                               'shn_MM.UTF-8',
+    'shs_ca':                               'shs_CA.UTF-8',
+    'si':                                   'si_LK.UTF-8',
+    'si_lk':                                'si_LK.UTF-8',
+    'sid_et':                               'sid_ET.UTF-8',
+    'sinhala':                              'si_LK.UTF-8',
+    'sk':                                   'sk_SK.ISO8859-2',
+    'sk_sk':                                'sk_SK.ISO8859-2',
+    'sl':                                   'sl_SI.ISO8859-2',
+    'sl_cs':                                'sl_CS.ISO8859-2',
+    'sl_si':                                'sl_SI.ISO8859-2',
+    'slovak':                               'sk_SK.ISO8859-2',
+    'slovene':                              'sl_SI.ISO8859-2',
+    'slovenian':                            'sl_SI.ISO8859-2',
+    'sm_ws':                                'sm_WS.UTF-8',
+    'so_dj':                                'so_DJ.ISO8859-1',
+    'so_et':                                'so_ET.UTF-8',
+    'so_ke':                                'so_KE.ISO8859-1',
+    'so_so':                                'so_SO.ISO8859-1',
+    'sp':                                   'sr_CS.ISO8859-5',
+    'sp_yu':                                'sr_CS.ISO8859-5',
+    'spanish':                              'es_ES.ISO8859-1',
+    'spanish_spain':                        'es_ES.ISO8859-1',
+    'sq':                                   'sq_AL.ISO8859-2',
+    'sq_al':                                'sq_AL.ISO8859-2',
+    'sq_mk':                                'sq_MK.UTF-8',
+    'sr':                                   'sr_RS.UTF-8',
+    'sr@cyrillic':                          'sr_RS.UTF-8',
+    'sr@latn':                              'sr_CS.UTF-8@latin',
+    'sr_cs':                                'sr_CS.UTF-8',
+    'sr_cs.iso88592@latn':                  'sr_CS.ISO8859-2',
+    'sr_cs@latn':                           'sr_CS.UTF-8@latin',
+    'sr_me':                                'sr_ME.UTF-8',
+    'sr_rs':                                'sr_RS.UTF-8',
+    'sr_rs@latn':                           'sr_RS.UTF-8@latin',
+    'sr_sp':                                'sr_CS.ISO8859-2',
+    'sr_yu':                                'sr_RS.UTF-8@latin',
+    'sr_yu.cp1251@cyrillic':                'sr_CS.CP1251',
+    'sr_yu.iso88592':                       'sr_CS.ISO8859-2',
+    'sr_yu.iso88595':                       'sr_CS.ISO8859-5',
+    'sr_yu.iso88595@cyrillic':              'sr_CS.ISO8859-5',
+    'sr_yu.microsoftcp1251@cyrillic':       'sr_CS.CP1251',
+    'sr_yu.utf8':                           'sr_RS.UTF-8',
+    'sr_yu.utf8@cyrillic':                  'sr_RS.UTF-8',
+    'sr_yu@cyrillic':                       'sr_RS.UTF-8',
+    'ss':                                   'ss_ZA.ISO8859-1',
+    'ss_za':                                'ss_ZA.ISO8859-1',
+    'st':                                   'st_ZA.ISO8859-1',
+    'st_za':                                'st_ZA.ISO8859-1',
+    'sv':                                   'sv_SE.ISO8859-1',
+    'sv_fi':                                'sv_FI.ISO8859-1',
+    'sv_se':                                'sv_SE.ISO8859-1',
+    'sw_ke':                                'sw_KE.UTF-8',
+    'sw_tz':                                'sw_TZ.UTF-8',
+    'swedish':                              'sv_SE.ISO8859-1',
+    'szl_pl':                               'szl_PL.UTF-8',
+    'ta':                                   'ta_IN.TSCII-0',
+    'ta_in':                                'ta_IN.TSCII-0',
+    'ta_in.tscii':                          'ta_IN.TSCII-0',
+    'ta_in.tscii0':                         'ta_IN.TSCII-0',
+    'ta_lk':                                'ta_LK.UTF-8',
+    'tcy_in.utf8':                          'tcy_IN.UTF-8',
+    'te':                                   'te_IN.UTF-8',
+    'te_in':                                'te_IN.UTF-8',
+    'tg':                                   'tg_TJ.KOI8-C',
+    'tg_tj':                                'tg_TJ.KOI8-C',
+    'th':                                   'th_TH.ISO8859-11',
+    'th_th':                                'th_TH.ISO8859-11',
+    'th_th.tactis':                         'th_TH.TIS620',
+    'th_th.tis620':                         'th_TH.TIS620',
+    'thai':                                 'th_TH.ISO8859-11',
+    'the_np':                               'the_NP.UTF-8',
+    'ti_er':                                'ti_ER.UTF-8',
+    'ti_et':                                'ti_ET.UTF-8',
+    'tig_er':                               'tig_ER.UTF-8',
+    'tk_tm':                                'tk_TM.UTF-8',
+    'tl':                                   'tl_PH.ISO8859-1',
+    'tl_ph':                                'tl_PH.ISO8859-1',
+    'tn':                                   'tn_ZA.ISO8859-15',
+    'tn_za':                                'tn_ZA.ISO8859-15',
+    'to_to':                                'to_TO.UTF-8',
+    'tpi_pg':                               'tpi_PG.UTF-8',
+    'tr':                                   'tr_TR.ISO8859-9',
+    'tr_cy':                                'tr_CY.ISO8859-9',
+    'tr_tr':                                'tr_TR.ISO8859-9',
+    'ts':                                   'ts_ZA.ISO8859-1',
+    'ts_za':                                'ts_ZA.ISO8859-1',
+    'tt':                                   'tt_RU.TATAR-CYR',
+    'tt_ru':                                'tt_RU.TATAR-CYR',
+    'tt_ru.tatarcyr':                       'tt_RU.TATAR-CYR',
+    'tt_ru@iqtelif':                        'tt_RU.UTF-8@iqtelif',
+    'turkish':                              'tr_TR.ISO8859-9',
+    'ug_cn':                                'ug_CN.UTF-8',
+    'uk':                                   'uk_UA.KOI8-U',
+    'uk_ua':                                'uk_UA.KOI8-U',
+    'univ':                                 'en_US.utf',
+    'universal':                            'en_US.utf',
+    'universal.utf8@ucs4':                  'en_US.UTF-8',
+    'unm_us':                               'unm_US.UTF-8',
+    'ur':                                   'ur_PK.CP1256',
+    'ur_in':                                'ur_IN.UTF-8',
+    'ur_pk':                                'ur_PK.CP1256',
+    'uz':                                   'uz_UZ.UTF-8',
+    'uz_uz':                                'uz_UZ.UTF-8',
+    'uz_uz@cyrillic':                       'uz_UZ.UTF-8',
+    've':                                   've_ZA.UTF-8',
+    've_za':                                've_ZA.UTF-8',
+    'vi':                                   'vi_VN.TCVN',
+    'vi_vn':                                'vi_VN.TCVN',
+    'vi_vn.tcvn':                           'vi_VN.TCVN',
+    'vi_vn.tcvn5712':                       'vi_VN.TCVN',
+    'vi_vn.viscii':                         'vi_VN.VISCII',
+    'vi_vn.viscii111':                      'vi_VN.VISCII',
+    'wa':                                   'wa_BE.ISO8859-1',
+    'wa_be':                                'wa_BE.ISO8859-1',
+    'wae_ch':                               'wae_CH.UTF-8',
+    'wal_et':                               'wal_ET.UTF-8',
+    'wo_sn':                                'wo_SN.UTF-8',
+    'xh':                                   'xh_ZA.ISO8859-1',
+    'xh_za':                                'xh_ZA.ISO8859-1',
+    'yi':                                   'yi_US.CP1255',
+    'yi_us':                                'yi_US.CP1255',
+    'yo_ng':                                'yo_NG.UTF-8',
+    'yue_hk':                               'yue_HK.UTF-8',
+    'yuw_pg':                               'yuw_PG.UTF-8',
+    'zh':                                   'zh_CN.eucCN',
+    'zh_cn':                                'zh_CN.gb2312',
+    'zh_cn.big5':                           'zh_TW.big5',
+    'zh_cn.euc':                            'zh_CN.eucCN',
+    'zh_hk':                                'zh_HK.big5hkscs',
+    'zh_hk.big5hk':                         'zh_HK.big5hkscs',
+    'zh_sg':                                'zh_SG.GB2312',
+    'zh_sg.gbk':                            'zh_SG.GBK',
+    'zh_tw':                                'zh_TW.big5',
+    'zh_tw.euc':                            'zh_TW.eucTW',
+    'zh_tw.euctw':                          'zh_TW.eucTW',
+    'zu':                                   'zu_ZA.ISO8859-1',
+    'zu_za':                                'zu_ZA.ISO8859-1',
+}
+
+#
+# This maps Windows language identifiers to locale strings.
+#
+# This list has been updated from
+# http://msdn.microsoft.com/library/default.asp?url=/library/en-us/intl/nls_238z.asp
+# to include every locale up to Windows Vista.
+#
+# NOTE: this mapping is incomplete.  If your language is missing, please
+# submit a bug report to the Python bug tracker at http://bugs.python.org/
+# Make sure you include the missing language identifier and the suggested
+# locale code.
+#
+
+windows_locale = {
+    0x0436: "af_ZA", # Afrikaans
+    0x041c: "sq_AL", # Albanian
+    0x0484: "gsw_FR",# Alsatian - France
+    0x045e: "am_ET", # Amharic - Ethiopia
+    0x0401: "ar_SA", # Arabic - Saudi Arabia
+    0x0801: "ar_IQ", # Arabic - Iraq
+    0x0c01: "ar_EG", # Arabic - Egypt
+    0x1001: "ar_LY", # Arabic - Libya
+    0x1401: "ar_DZ", # Arabic - Algeria
+    0x1801: "ar_MA", # Arabic - Morocco
+    0x1c01: "ar_TN", # Arabic - Tunisia
+    0x2001: "ar_OM", # Arabic - Oman
+    0x2401: "ar_YE", # Arabic - Yemen
+    0x2801: "ar_SY", # Arabic - Syria
+    0x2c01: "ar_JO", # Arabic - Jordan
+    0x3001: "ar_LB", # Arabic - Lebanon
+    0x3401: "ar_KW", # Arabic - Kuwait
+    0x3801: "ar_AE", # Arabic - United Arab Emirates
+    0x3c01: "ar_BH", # Arabic - Bahrain
+    0x4001: "ar_QA", # Arabic - Qatar
+    0x042b: "hy_AM", # Armenian
+    0x044d: "as_IN", # Assamese - India
+    0x042c: "az_AZ", # Azeri - Latin
+    0x082c: "az_AZ", # Azeri - Cyrillic
+    0x046d: "ba_RU", # Bashkir
+    0x042d: "eu_ES", # Basque - Russia
+    0x0423: "be_BY", # Belarusian
+    0x0445: "bn_IN", # Begali
+    0x201a: "bs_BA", # Bosnian - Cyrillic
+    0x141a: "bs_BA", # Bosnian - Latin
+    0x047e: "br_FR", # Breton - France
+    0x0402: "bg_BG", # Bulgarian
+#    0x0455: "my_MM", # Burmese - Not supported
+    0x0403: "ca_ES", # Catalan
+    0x0004: "zh_CHS",# Chinese - Simplified
+    0x0404: "zh_TW", # Chinese - Taiwan
+    0x0804: "zh_CN", # Chinese - PRC
+    0x0c04: "zh_HK", # Chinese - Hong Kong S.A.R.
+    0x1004: "zh_SG", # Chinese - Singapore
+    0x1404: "zh_MO", # Chinese - Macao S.A.R.
+    0x7c04: "zh_CHT",# Chinese - Traditional
+    0x0483: "co_FR", # Corsican - France
+    0x041a: "hr_HR", # Croatian
+    0x101a: "hr_BA", # Croatian - Bosnia
+    0x0405: "cs_CZ", # Czech
+    0x0406: "da_DK", # Danish
+    0x048c: "gbz_AF",# Dari - Afghanistan
+    0x0465: "div_MV",# Divehi - Maldives
+    0x0413: "nl_NL", # Dutch - The Netherlands
+    0x0813: "nl_BE", # Dutch - Belgium
+    0x0409: "en_US", # English - United States
+    0x0809: "en_GB", # English - United Kingdom
+    0x0c09: "en_AU", # English - Australia
+    0x1009: "en_CA", # English - Canada
+    0x1409: "en_NZ", # English - New Zealand
+    0x1809: "en_IE", # English - Ireland
+    0x1c09: "en_ZA", # English - South Africa
+    0x2009: "en_JA", # English - Jamaica
+    0x2409: "en_CB", # English - Caribbean
+    0x2809: "en_BZ", # English - Belize
+    0x2c09: "en_TT", # English - Trinidad
+    0x3009: "en_ZW", # English - Zimbabwe
+    0x3409: "en_PH", # English - Philippines
+    0x4009: "en_IN", # English - India
+    0x4409: "en_MY", # English - Malaysia
+    0x4809: "en_IN", # English - Singapore
+    0x0425: "et_EE", # Estonian
+    0x0438: "fo_FO", # Faroese
+    0x0464: "fil_PH",# Filipino
+    0x040b: "fi_FI", # Finnish
+    0x040c: "fr_FR", # French - France
+    0x080c: "fr_BE", # French - Belgium
+    0x0c0c: "fr_CA", # French - Canada
+    0x100c: "fr_CH", # French - Switzerland
+    0x140c: "fr_LU", # French - Luxembourg
+    0x180c: "fr_MC", # French - Monaco
+    0x0462: "fy_NL", # Frisian - Netherlands
+    0x0456: "gl_ES", # Galician
+    0x0437: "ka_GE", # Georgian
+    0x0407: "de_DE", # German - Germany
+    0x0807: "de_CH", # German - Switzerland
+    0x0c07: "de_AT", # German - Austria
+    0x1007: "de_LU", # German - Luxembourg
+    0x1407: "de_LI", # German - Liechtenstein
+    0x0408: "el_GR", # Greek
+    0x046f: "kl_GL", # Greenlandic - Greenland
+    0x0447: "gu_IN", # Gujarati
+    0x0468: "ha_NG", # Hausa - Latin
+    0x040d: "he_IL", # Hebrew
+    0x0439: "hi_IN", # Hindi
+    0x040e: "hu_HU", # Hungarian
+    0x040f: "is_IS", # Icelandic
+    0x0421: "id_ID", # Indonesian
+    0x045d: "iu_CA", # Inuktitut - Syllabics
+    0x085d: "iu_CA", # Inuktitut - Latin
+    0x083c: "ga_IE", # Irish - Ireland
+    0x0410: "it_IT", # Italian - Italy
+    0x0810: "it_CH", # Italian - Switzerland
+    0x0411: "ja_JP", # Japanese
+    0x044b: "kn_IN", # Kannada - India
+    0x043f: "kk_KZ", # Kazakh
+    0x0453: "kh_KH", # Khmer - Cambodia
+    0x0486: "qut_GT",# K'iche - Guatemala
+    0x0487: "rw_RW", # Kinyarwanda - Rwanda
+    0x0457: "kok_IN",# Konkani
+    0x0412: "ko_KR", # Korean
+    0x0440: "ky_KG", # Kyrgyz
+    0x0454: "lo_LA", # Lao - Lao PDR
+    0x0426: "lv_LV", # Latvian
+    0x0427: "lt_LT", # Lithuanian
+    0x082e: "dsb_DE",# Lower Sorbian - Germany
+    0x046e: "lb_LU", # Luxembourgish
+    0x042f: "mk_MK", # FYROM Macedonian
+    0x043e: "ms_MY", # Malay - Malaysia
+    0x083e: "ms_BN", # Malay - Brunei Darussalam
+    0x044c: "ml_IN", # Malayalam - India
+    0x043a: "mt_MT", # Maltese
+    0x0481: "mi_NZ", # Maori
+    0x047a: "arn_CL",# Mapudungun
+    0x044e: "mr_IN", # Marathi
+    0x047c: "moh_CA",# Mohawk - Canada
+    0x0450: "mn_MN", # Mongolian - Cyrillic
+    0x0850: "mn_CN", # Mongolian - PRC
+    0x0461: "ne_NP", # Nepali
+    0x0414: "nb_NO", # Norwegian - Bokmal
+    0x0814: "nn_NO", # Norwegian - Nynorsk
+    0x0482: "oc_FR", # Occitan - France
+    0x0448: "or_IN", # Oriya - India
+    0x0463: "ps_AF", # Pashto - Afghanistan
+    0x0429: "fa_IR", # Persian
+    0x0415: "pl_PL", # Polish
+    0x0416: "pt_BR", # Portuguese - Brazil
+    0x0816: "pt_PT", # Portuguese - Portugal
+    0x0446: "pa_IN", # Punjabi
+    0x046b: "quz_BO",# Quechua (Bolivia)
+    0x086b: "quz_EC",# Quechua (Ecuador)
+    0x0c6b: "quz_PE",# Quechua (Peru)
+    0x0418: "ro_RO", # Romanian - Romania
+    0x0417: "rm_CH", # Romansh
+    0x0419: "ru_RU", # Russian
+    0x243b: "smn_FI",# Sami Finland
+    0x103b: "smj_NO",# Sami Norway
+    0x143b: "smj_SE",# Sami Sweden
+    0x043b: "se_NO", # Sami Northern Norway
+    0x083b: "se_SE", # Sami Northern Sweden
+    0x0c3b: "se_FI", # Sami Northern Finland
+    0x203b: "sms_FI",# Sami Skolt
+    0x183b: "sma_NO",# Sami Southern Norway
+    0x1c3b: "sma_SE",# Sami Southern Sweden
+    0x044f: "sa_IN", # Sanskrit
+    0x0c1a: "sr_SP", # Serbian - Cyrillic
+    0x1c1a: "sr_BA", # Serbian - Bosnia Cyrillic
+    0x081a: "sr_SP", # Serbian - Latin
+    0x181a: "sr_BA", # Serbian - Bosnia Latin
+    0x045b: "si_LK", # Sinhala - Sri Lanka
+    0x046c: "ns_ZA", # Northern Sotho
+    0x0432: "tn_ZA", # Setswana - Southern Africa
+    0x041b: "sk_SK", # Slovak
+    0x0424: "sl_SI", # Slovenian
+    0x040a: "es_ES", # Spanish - Spain
+    0x080a: "es_MX", # Spanish - Mexico
+    0x0c0a: "es_ES", # Spanish - Spain (Modern)
+    0x100a: "es_GT", # Spanish - Guatemala
+    0x140a: "es_CR", # Spanish - Costa Rica
+    0x180a: "es_PA", # Spanish - Panama
+    0x1c0a: "es_DO", # Spanish - Dominican Republic
+    0x200a: "es_VE", # Spanish - Venezuela
+    0x240a: "es_CO", # Spanish - Colombia
+    0x280a: "es_PE", # Spanish - Peru
+    0x2c0a: "es_AR", # Spanish - Argentina
+    0x300a: "es_EC", # Spanish - Ecuador
+    0x340a: "es_CL", # Spanish - Chile
+    0x380a: "es_UR", # Spanish - Uruguay
+    0x3c0a: "es_PY", # Spanish - Paraguay
+    0x400a: "es_BO", # Spanish - Bolivia
+    0x440a: "es_SV", # Spanish - El Salvador
+    0x480a: "es_HN", # Spanish - Honduras
+    0x4c0a: "es_NI", # Spanish - Nicaragua
+    0x500a: "es_PR", # Spanish - Puerto Rico
+    0x540a: "es_US", # Spanish - United States
+#    0x0430: "", # Sutu - Not supported
+    0x0441: "sw_KE", # Swahili
+    0x041d: "sv_SE", # Swedish - Sweden
+    0x081d: "sv_FI", # Swedish - Finland
+    0x045a: "syr_SY",# Syriac
+    0x0428: "tg_TJ", # Tajik - Cyrillic
+    0x085f: "tmz_DZ",# Tamazight - Latin
+    0x0449: "ta_IN", # Tamil
+    0x0444: "tt_RU", # Tatar
+    0x044a: "te_IN", # Telugu
+    0x041e: "th_TH", # Thai
+    0x0851: "bo_BT", # Tibetan - Bhutan
+    0x0451: "bo_CN", # Tibetan - PRC
+    0x041f: "tr_TR", # Turkish
+    0x0442: "tk_TM", # Turkmen - Cyrillic
+    0x0480: "ug_CN", # Uighur - Arabic
+    0x0422: "uk_UA", # Ukrainian
+    0x042e: "wen_DE",# Upper Sorbian - Germany
+    0x0420: "ur_PK", # Urdu
+    0x0820: "ur_IN", # Urdu - India
+    0x0443: "uz_UZ", # Uzbek - Latin
+    0x0843: "uz_UZ", # Uzbek - Cyrillic
+    0x042a: "vi_VN", # Vietnamese
+    0x0452: "cy_GB", # Welsh
+    0x0488: "wo_SN", # Wolof - Senegal
+    0x0434: "xh_ZA", # Xhosa - South Africa
+    0x0485: "sah_RU",# Yakut - Cyrillic
+    0x0478: "ii_CN", # Yi - PRC
+    0x046a: "yo_NG", # Yoruba - Nigeria
+    0x0435: "zu_ZA", # Zulu
+}
+
+def _print_locale():
+
+    """ Test function.
+    """
+    categories = {}
+    def _init_categories(categories=categories):
+        for k,v in globals().items():
+            if k[:3] == 'LC_':
+                categories[k] = v
+    _init_categories()
+    del categories['LC_ALL']
+
+    print('Locale defaults as determined by getdefaultlocale():')
+    print('-'*72)
+    lang, enc = getdefaultlocale()
+    print('Language: ', lang or '(undefined)')
+    print('Encoding: ', enc or '(undefined)')
+    print()
+
+    print('Locale settings on startup:')
+    print('-'*72)
+    for name,category in categories.items():
+        print(name, '...')
+        lang, enc = getlocale(category)
+        print('   Language: ', lang or '(undefined)')
+        print('   Encoding: ', enc or '(undefined)')
+        print()
+
+    print()
+    print('Locale settings after calling resetlocale():')
+    print('-'*72)
+    resetlocale()
+    for name,category in categories.items():
+        print(name, '...')
+        lang, enc = getlocale(category)
+        print('   Language: ', lang or '(undefined)')
+        print('   Encoding: ', enc or '(undefined)')
+        print()
+
+    try:
+        setlocale(LC_ALL, "")
+    except:
+        print('NOTE:')
+        print('setlocale(LC_ALL, "") does not support the default locale')
+        print('given in the OS environment variables.')
+    else:
+        print()
+        print('Locale settings after calling setlocale(LC_ALL, ""):')
+        print('-'*72)
+        for name,category in categories.items():
+            print(name, '...')
+            lang, enc = getlocale(category)
+            print('   Language: ', lang or '(undefined)')
+            print('   Encoding: ', enc or '(undefined)')
+            print()
+
+###
+
+try:
+    LC_MESSAGES
+except NameError:
+    pass
+else:
+    __all__.append("LC_MESSAGES")
+
+if __name__=='__main__':
+    print('Locale aliasing:')
+    print()
+    _print_locale()
+    print()
+    print('Number formatting:')
+    print()
+    _test()
author	thegeorg <thegeorg@yandex-team.com>	2024-02-19 02:38:52 +0300
committer	thegeorg <thegeorg@yandex-team.com>	2024-02-19 02:50:43 +0300
commit	d96fa07134c06472bfee6718b5cfd1679196fc99 (patch)
tree	31ec344fa9d3ff8dc038692516b6438dfbdb8a2d /contrib/tools/python3/Lib/locale.py
parent	452cf9e068aef7110e35e654c5d47eb80111ef89 (diff)
download	ydb-d96fa07134c06472bfee6718b5cfd1679196fc99.tar.gz