1 files changed, 1544 insertions, 0 deletions
diff --git a/contrib/tools/python3/Modules/unicodedata.c b/contrib/tools/python3/Modules/unicodedata.c
new file mode 100644
index 0000000000..41dcd5f8f8
--- /dev/null
+++ b/contrib/tools/python3/Modules/unicodedata.c
@@ -0,0 +1,1544 @@
+/* ------------------------------------------------------------------------
+
+   unicodedata -- Provides access to the Unicode database.
+
+   The current version number is reported in the unidata_version constant.
+
+   Written by Marc-Andre Lemburg (mal@lemburg.com).
+   Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)
+   Modified by Martin v. Löwis (martin@v.loewis.de)
+
+   Copyright (c) Corporation for National Research Initiatives.
+
+   ------------------------------------------------------------------------ */
+
+#ifndef Py_BUILD_CORE_BUILTIN
+#  define Py_BUILD_CORE_MODULE 1
+#endif
+
+#define PY_SSIZE_T_CLEAN
+
+#include "Python.h"
+#include "pycore_ucnhash.h"       // _PyUnicode_Name_CAPI
+#include "structmember.h"         // PyMemberDef
+
+#include <stdbool.h>
+
+/*[clinic input]
+module unicodedata
+class unicodedata.UCD 'PreviousDBVersion *' '<not used>'
+[clinic start generated code]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=e47113e05924be43]*/
+
+/* character properties */
+
+typedef struct {
+    const unsigned char category;       /* index into
+                                           _PyUnicode_CategoryNames */
+    const unsigned char combining;      /* combining class value 0 - 255 */
+    const unsigned char bidirectional;  /* index into
+                                           _PyUnicode_BidirectionalNames */
+    const unsigned char mirrored;       /* true if mirrored in bidir mode */
+    const unsigned char east_asian_width;       /* index into
+                                                   _PyUnicode_EastAsianWidth */
+    const unsigned char normalization_quick_check; /* see is_normalized() */
+} _PyUnicode_DatabaseRecord;
+
+typedef struct change_record {
+    /* sequence of fields should be the same as in merge_old_version */
+    const unsigned char bidir_changed;
+    const unsigned char category_changed;
+    const unsigned char decimal_changed;
+    const unsigned char mirrored_changed;
+    const unsigned char east_asian_width_changed;
+    const double numeric_changed;
+} change_record;
+
+/* data file generated by Tools/unicode/makeunicodedata.py */
+#include "unicodedata_db.h"
+
+static const _PyUnicode_DatabaseRecord*
+_getrecord_ex(Py_UCS4 code)
+{
+    int index;
+    if (code >= 0x110000)
+        index = 0;
+    else {
+        index = index1[(code>>SHIFT)];
+        index = index2[(index<<SHIFT)+(code&((1<<SHIFT)-1))];
+    }
+
+    return &_PyUnicode_Database_Records[index];
+}
+
+/* ------------- Previous-version API ------------------------------------- */
+typedef struct previous_version {
+    PyObject_HEAD
+    const char *name;
+    const change_record* (*getrecord)(Py_UCS4);
+    Py_UCS4 (*normalization)(Py_UCS4);
+} PreviousDBVersion;
+
+#include "clinic/unicodedata.c.h"
+
+#define get_old_record(self, v)    ((((PreviousDBVersion*)self)->getrecord)(v))
+
+static PyMemberDef DB_members[] = {
+        {"unidata_version", T_STRING, offsetof(PreviousDBVersion, name), READONLY},
+        {NULL}
+};
+
+// Check if self is an unicodedata.UCD instance.
+// If self is NULL (when the PyCapsule C API is used), return 0.
+// PyModule_Check() is used to avoid having to retrieve the ucd_type.
+// See unicodedata_functions comment to the rationale of this macro.
+#define UCD_Check(self) (self != NULL && !PyModule_Check(self))
+
+static PyObject*
+new_previous_version(PyTypeObject *ucd_type,
+                     const char*name, const change_record* (*getrecord)(Py_UCS4),
+                     Py_UCS4 (*normalization)(Py_UCS4))
+{
+    PreviousDBVersion *self;
+    self = PyObject_GC_New(PreviousDBVersion, ucd_type);
+    if (self == NULL)
+        return NULL;
+    self->name = name;
+    self->getrecord = getrecord;
+    self->normalization = normalization;
+    PyObject_GC_Track(self);
+    return (PyObject*)self;
+}
+
+
+/* --- Module API --------------------------------------------------------- */
+
+/*[clinic input]
+unicodedata.UCD.decimal
+
+    self: self
+    chr: int(accept={str})
+    default: object=NULL
+    /
+
+Converts a Unicode character into its equivalent decimal value.
+
+Returns the decimal value assigned to the character chr as integer.
+If no such value is defined, default is returned, or, if not given,
+ValueError is raised.
+[clinic start generated code]*/
+
+static PyObject *
+unicodedata_UCD_decimal_impl(PyObject *self, int chr,
+                             PyObject *default_value)
+/*[clinic end generated code: output=be23376e1a185231 input=933f8107993f23d0]*/
+{
+    int have_old = 0;
+    long rc;
+    Py_UCS4 c = (Py_UCS4)chr;
+
+    if (UCD_Check(self)) {
+        const change_record *old = get_old_record(self, c);
+        if (old->category_changed == 0) {
+            /* unassigned */
+            have_old = 1;
+            rc = -1;
+        }
+        else if (old->decimal_changed != 0xFF) {
+            have_old = 1;
+            rc = old->decimal_changed;
+        }
+    }
+
+    if (!have_old)
+        rc = Py_UNICODE_TODECIMAL(c);
+    if (rc < 0) {
+        if (default_value == NULL) {
+            PyErr_SetString(PyExc_ValueError,
+                            "not a decimal");
+            return NULL;
+        }
+        else {
+            return Py_NewRef(default_value);
+        }
+    }
+    return PyLong_FromLong(rc);
+}
+
+/*[clinic input]
+unicodedata.UCD.digit
+
+    self: self
+    chr: int(accept={str})
+    default: object=NULL
+    /
+
+Converts a Unicode character into its equivalent digit value.
+
+Returns the digit value assigned to the character chr as integer.
+If no such value is defined, default is returned, or, if not given,
+ValueError is raised.
+[clinic start generated code]*/
+
+static PyObject *
+unicodedata_UCD_digit_impl(PyObject *self, int chr, PyObject *default_value)
+/*[clinic end generated code: output=96e18c950171fd2f input=e27d6e4565cd29f2]*/
+{
+    long rc;
+    Py_UCS4 c = (Py_UCS4)chr;
+    rc = Py_UNICODE_TODIGIT(c);
+    if (rc < 0) {
+        if (default_value == NULL) {
+            PyErr_SetString(PyExc_ValueError, "not a digit");
+            return NULL;
+        }
+        else {
+            return Py_NewRef(default_value);
+        }
+    }
+    return PyLong_FromLong(rc);
+}
+
+/*[clinic input]
+unicodedata.UCD.numeric
+
+    self: self
+    chr: int(accept={str})
+    default: object=NULL
+    /
+
+Converts a Unicode character into its equivalent numeric value.
+
+Returns the numeric value assigned to the character chr as float.
+If no such value is defined, default is returned, or, if not given,
+ValueError is raised.
+[clinic start generated code]*/
+
+static PyObject *
+unicodedata_UCD_numeric_impl(PyObject *self, int chr,
+                             PyObject *default_value)
+/*[clinic end generated code: output=53ce281fe85b10c4 input=fdf5871a5542893c]*/
+{
+    int have_old = 0;
+    double rc;
+    Py_UCS4 c = (Py_UCS4)chr;
+
+    if (UCD_Check(self)) {
+        const change_record *old = get_old_record(self, c);
+        if (old->category_changed == 0) {
+            /* unassigned */
+            have_old = 1;
+            rc = -1.0;
+        }
+        else if (old->decimal_changed != 0xFF) {
+            have_old = 1;
+            rc = old->decimal_changed;
+        }
+    }
+
+    if (!have_old)
+        rc = Py_UNICODE_TONUMERIC(c);
+    if (rc == -1.0) {
+        if (default_value == NULL) {
+            PyErr_SetString(PyExc_ValueError, "not a numeric character");
+            return NULL;
+        }
+        else {
+            return Py_NewRef(default_value);
+        }
+    }
+    return PyFloat_FromDouble(rc);
+}
+
+/*[clinic input]
+unicodedata.UCD.category
+
+    self: self
+    chr: int(accept={str})
+    /
+
+Returns the general category assigned to the character chr as string.
+[clinic start generated code]*/
+
+static PyObject *
+unicodedata_UCD_category_impl(PyObject *self, int chr)
+/*[clinic end generated code: output=8571539ee2e6783a input=27d6f3d85050bc06]*/
+{
+    int index;
+    Py_UCS4 c = (Py_UCS4)chr;
+    index = (int) _getrecord_ex(c)->category;
+    if (UCD_Check(self)) {
+        const change_record *old = get_old_record(self, c);
+        if (old->category_changed != 0xFF)
+            index = old->category_changed;
+    }
+    return PyUnicode_FromString(_PyUnicode_CategoryNames[index]);
+}
+
+/*[clinic input]
+unicodedata.UCD.bidirectional
+
+    self: self
+    chr: int(accept={str})
+    /
+
+Returns the bidirectional class assigned to the character chr as string.
+
+If no such value is defined, an empty string is returned.
+[clinic start generated code]*/
+
+static PyObject *
+unicodedata_UCD_bidirectional_impl(PyObject *self, int chr)
+/*[clinic end generated code: output=d36310ce2039bb92 input=b3d8f42cebfcf475]*/
+{
+    int index;
+    Py_UCS4 c = (Py_UCS4)chr;
+    index = (int) _getrecord_ex(c)->bidirectional;
+    if (UCD_Check(self)) {
+        const change_record *old = get_old_record(self, c);
+        if (old->category_changed == 0)
+            index = 0; /* unassigned */
+        else if (old->bidir_changed != 0xFF)
+            index = old->bidir_changed;
+    }
+    return PyUnicode_FromString(_PyUnicode_BidirectionalNames[index]);
+}
+
+/*[clinic input]
+unicodedata.UCD.combining -> int
+
+    self: self
+    chr: int(accept={str})
+    /
+
+Returns the canonical combining class assigned to the character chr as integer.
+
+Returns 0 if no combining class is defined.
+[clinic start generated code]*/
+
+static int
+unicodedata_UCD_combining_impl(PyObject *self, int chr)
+/*[clinic end generated code: output=cad056d0cb6a5920 input=9f2d6b2a95d0a22a]*/
+{
+    int index;
+    Py_UCS4 c = (Py_UCS4)chr;
+    index = (int) _getrecord_ex(c)->combining;
+    if (UCD_Check(self)) {
+        const change_record *old = get_old_record(self, c);
+        if (old->category_changed == 0)
+            index = 0; /* unassigned */
+    }
+    return index;
+}
+
+/*[clinic input]
+unicodedata.UCD.mirrored -> int
+
+    self: self
+    chr: int(accept={str})
+    /
+
+Returns the mirrored property assigned to the character chr as integer.
+
+Returns 1 if the character has been identified as a "mirrored"
+character in bidirectional text, 0 otherwise.
+[clinic start generated code]*/
+
+static int
+unicodedata_UCD_mirrored_impl(PyObject *self, int chr)
+/*[clinic end generated code: output=2532dbf8121b50e6 input=5dd400d351ae6f3b]*/
+{
+    int index;
+    Py_UCS4 c = (Py_UCS4)chr;
+    index = (int) _getrecord_ex(c)->mirrored;
+    if (UCD_Check(self)) {
+        const change_record *old = get_old_record(self, c);
+        if (old->category_changed == 0)
+            index = 0; /* unassigned */
+        else if (old->mirrored_changed != 0xFF)
+            index = old->mirrored_changed;
+    }
+    return index;
+}
+
+/*[clinic input]
+unicodedata.UCD.east_asian_width
+
+    self: self
+    chr: int(accept={str})
+    /
+
+Returns the east asian width assigned to the character chr as string.
+[clinic start generated code]*/
+
+static PyObject *
+unicodedata_UCD_east_asian_width_impl(PyObject *self, int chr)
+/*[clinic end generated code: output=484e8537d9ee8197 input=c4854798aab026e0]*/
+{
+    int index;
+    Py_UCS4 c = (Py_UCS4)chr;
+    index = (int) _getrecord_ex(c)->east_asian_width;
+    if (UCD_Check(self)) {
+        const change_record *old = get_old_record(self, c);
+        if (old->category_changed == 0)
+            index = 0; /* unassigned */
+        else if (old->east_asian_width_changed != 0xFF)
+            index = old->east_asian_width_changed;
+    }
+    return PyUnicode_FromString(_PyUnicode_EastAsianWidthNames[index]);
+}
+
+/*[clinic input]
+unicodedata.UCD.decomposition
+
+    self: self
+    chr: int(accept={str})
+    /
+
+Returns the character decomposition mapping assigned to the character chr as string.
+
+An empty string is returned in case no such mapping is defined.
+[clinic start generated code]*/
+
+static PyObject *
+unicodedata_UCD_decomposition_impl(PyObject *self, int chr)
+/*[clinic end generated code: output=7d699f3ec7565d27 input=e4c12459ad68507b]*/
+{
+    char decomp[256];
+    int code, index, count;
+    size_t i;
+    unsigned int prefix_index;
+    Py_UCS4 c = (Py_UCS4)chr;
+
+    code = (int)c;
+
+    if (UCD_Check(self)) {
+        const change_record *old = get_old_record(self, c);
+        if (old->category_changed == 0)
+            return PyUnicode_FromString(""); /* unassigned */
+    }
+
+    if (code < 0 || code >= 0x110000)
+        index = 0;
+    else {
+        index = decomp_index1[(code>>DECOMP_SHIFT)];
+        index = decomp_index2[(index<<DECOMP_SHIFT)+
+                             (code&((1<<DECOMP_SHIFT)-1))];
+    }
+
+    /* high byte is number of hex bytes (usually one or two), low byte
+       is prefix code (from*/
+    count = decomp_data[index] >> 8;
+
+    /* XXX: could allocate the PyString up front instead
+       (strlen(prefix) + 5 * count + 1 bytes) */
+
+    /* Based on how index is calculated above and decomp_data is generated
+       from Tools/unicode/makeunicodedata.py, it should not be possible
+       to overflow decomp_prefix. */
+    prefix_index = decomp_data[index] & 255;
+    assert(prefix_index < Py_ARRAY_LENGTH(decomp_prefix));
+
+    /* copy prefix */
+    i = strlen(decomp_prefix[prefix_index]);
+    memcpy(decomp, decomp_prefix[prefix_index], i);
+
+    while (count-- > 0) {
+        if (i)
+            decomp[i++] = ' ';
+        assert(i < sizeof(decomp));
+        PyOS_snprintf(decomp + i, sizeof(decomp) - i, "%04X",
+                      decomp_data[++index]);
+        i += strlen(decomp + i);
+    }
+    return PyUnicode_FromStringAndSize(decomp, i);
+}
+
+static void
+get_decomp_record(PyObject *self, Py_UCS4 code,
+                  int *index, int *prefix, int *count)
+{
+    if (code >= 0x110000) {
+        *index = 0;
+    }
+    else if (UCD_Check(self)
+             && get_old_record(self, code)->category_changed==0) {
+        /* unassigned in old version */
+        *index = 0;
+    }
+    else {
+        *index = decomp_index1[(code>>DECOMP_SHIFT)];
+        *index = decomp_index2[(*index<<DECOMP_SHIFT)+
+                               (code&((1<<DECOMP_SHIFT)-1))];
+    }
+
+    /* high byte is number of hex bytes (usually one or two), low byte
+       is prefix code (from*/
+    *count = decomp_data[*index] >> 8;
+    *prefix = decomp_data[*index] & 255;
+
+    (*index)++;
+}
+
+#define SBase   0xAC00
+#define LBase   0x1100
+#define VBase   0x1161
+#define TBase   0x11A7
+#define LCount  19
+#define VCount  21
+#define TCount  28
+#define NCount  (VCount*TCount)
+#define SCount  (LCount*NCount)
+
+static PyObject*
+nfd_nfkd(PyObject *self, PyObject *input, int k)
+{
+    PyObject *result;
+    Py_UCS4 *output;
+    Py_ssize_t i, o, osize;
+    int kind;
+    const void *data;
+    /* Longest decomposition in Unicode 3.2: U+FDFA */
+    Py_UCS4 stack[20];
+    Py_ssize_t space, isize;
+    int index, prefix, count, stackptr;
+    unsigned char prev, cur;
+
+    stackptr = 0;
+    isize = PyUnicode_GET_LENGTH(input);
+    space = isize;
+    /* Overallocate at most 10 characters. */
+    if (space > 10) {
+        if (space <= PY_SSIZE_T_MAX - 10)
+            space += 10;
+    }
+    else {
+        space *= 2;
+    }
+    osize = space;
+    output = PyMem_NEW(Py_UCS4, space);
+    if (!output) {
+        PyErr_NoMemory();
+        return NULL;
+    }
+    i = o = 0;
+    kind = PyUnicode_KIND(input);
+    data = PyUnicode_DATA(input);
+
+    while (i < isize) {
+        stack[stackptr++] = PyUnicode_READ(kind, data, i++);
+        while(stackptr) {
+            Py_UCS4 code = stack[--stackptr];
+            /* Hangul Decomposition adds three characters in
+               a single step, so we need at least that much room. */
+            if (space < 3) {
+                Py_UCS4 *new_output;
+                osize += 10;
+                space += 10;
+                new_output = PyMem_Realloc(output, osize*sizeof(Py_UCS4));
+                if (new_output == NULL) {
+                    PyMem_Free(output);
+                    PyErr_NoMemory();
+                    return NULL;
+                }
+                output = new_output;
+            }
+            /* Hangul Decomposition. */
+            if (SBase <= code && code < (SBase+SCount)) {
+                int SIndex = code - SBase;
+                int L = LBase + SIndex / NCount;
+                int V = VBase + (SIndex % NCount) / TCount;
+                int T = TBase + SIndex % TCount;
+                output[o++] = L;
+                output[o++] = V;
+                space -= 2;
+                if (T != TBase) {
+                    output[o++] = T;
+                    space --;
+                }
+                continue;
+            }
+            /* normalization changes */
+            if (UCD_Check(self)) {
+                Py_UCS4 value = ((PreviousDBVersion*)self)->normalization(code);
+                if (value != 0) {
+                    stack[stackptr++] = value;
+                    continue;
+                }
+            }
+
+            /* Other decompositions. */
+            get_decomp_record(self, code, &index, &prefix, &count);
+
+            /* Copy character if it is not decomposable, or has a
+               compatibility decomposition, but we do NFD. */
+            if (!count || (prefix && !k)) {
+                output[o++] = code;
+                space--;
+                continue;
+            }
+            /* Copy decomposition onto the stack, in reverse
+               order.  */
+            while(count) {
+                code = decomp_data[index + (--count)];
+                stack[stackptr++] = code;
+            }
+        }
+    }
+
+    result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
+                                       output, o);
+    PyMem_Free(output);
+    if (!result)
+        return NULL;
+    /* result is guaranteed to be ready, as it is compact. */
+    kind = PyUnicode_KIND(result);
+    data = PyUnicode_DATA(result);
+
+    /* Sort canonically. */
+    i = 0;
+    prev = _getrecord_ex(PyUnicode_READ(kind, data, i))->combining;
+    for (i++; i < PyUnicode_GET_LENGTH(result); i++) {
+        cur = _getrecord_ex(PyUnicode_READ(kind, data, i))->combining;
+        if (prev == 0 || cur == 0 || prev <= cur) {
+            prev = cur;
+            continue;
+        }
+        /* Non-canonical order. Need to switch *i with previous. */
+        o = i - 1;
+        while (1) {
+            Py_UCS4 tmp = PyUnicode_READ(kind, data, o+1);
+            PyUnicode_WRITE(kind, data, o+1,
+                            PyUnicode_READ(kind, data, o));
+            PyUnicode_WRITE(kind, data, o, tmp);
+            o--;
+            if (o < 0)
+                break;
+            prev = _getrecord_ex(PyUnicode_READ(kind, data, o))->combining;
+            if (prev == 0 || prev <= cur)
+                break;
+        }
+        prev = _getrecord_ex(PyUnicode_READ(kind, data, i))->combining;
+    }
+    return result;
+}
+
+static int
+find_nfc_index(const struct reindex* nfc, Py_UCS4 code)
+{
+    unsigned int index;
+    for (index = 0; nfc[index].start; index++) {
+        unsigned int start = nfc[index].start;
+        if (code < start)
+            return -1;
+        if (code <= start + nfc[index].count) {
+            unsigned int delta = code - start;
+            return nfc[index].index + delta;
+        }
+    }
+    return -1;
+}
+
+static PyObject*
+nfc_nfkc(PyObject *self, PyObject *input, int k)
+{
+    PyObject *result;
+    int kind;
+    const void *data;
+    Py_UCS4 *output;
+    Py_ssize_t i, i1, o, len;
+    int f,l,index,index1,comb;
+    Py_UCS4 code;
+    Py_ssize_t skipped[20];
+    int cskipped = 0;
+
+    result = nfd_nfkd(self, input, k);
+    if (!result)
+        return NULL;
+    /* result will be "ready". */
+    kind = PyUnicode_KIND(result);
+    data = PyUnicode_DATA(result);
+    len = PyUnicode_GET_LENGTH(result);
+
+    /* We allocate a buffer for the output.
+       If we find that we made no changes, we still return
+       the NFD result. */
+    output = PyMem_NEW(Py_UCS4, len);
+    if (!output) {
+        PyErr_NoMemory();
+        Py_DECREF(result);
+        return 0;
+    }
+    i = o = 0;
+
+  again:
+    while (i < len) {
+      for (index = 0; index < cskipped; index++) {
+          if (skipped[index] == i) {
+              /* *i character is skipped.
+                 Remove from list. */
+              skipped[index] = skipped[cskipped-1];
+              cskipped--;
+              i++;
+              goto again; /* continue while */
+          }
+      }
+      /* Hangul Composition. We don't need to check for <LV,T>
+         pairs, since we always have decomposed data. */
+      code = PyUnicode_READ(kind, data, i);
+      if (LBase <= code && code < (LBase+LCount) &&
+          i + 1 < len &&
+          VBase <= PyUnicode_READ(kind, data, i+1) &&
+          PyUnicode_READ(kind, data, i+1) < (VBase+VCount)) {
+          /* check L character is a modern leading consonant (0x1100 ~ 0x1112)
+             and V character is a modern vowel (0x1161 ~ 0x1175). */
+          int LIndex, VIndex;
+          LIndex = code - LBase;
+          VIndex = PyUnicode_READ(kind, data, i+1) - VBase;
+          code = SBase + (LIndex*VCount+VIndex)*TCount;
+          i+=2;
+          if (i < len &&
+              TBase < PyUnicode_READ(kind, data, i) &&
+              PyUnicode_READ(kind, data, i) < (TBase+TCount)) {
+              /* check T character is a modern trailing consonant
+                 (0x11A8 ~ 0x11C2). */
+              code += PyUnicode_READ(kind, data, i)-TBase;
+              i++;
+          }
+          output[o++] = code;
+          continue;
+      }
+
+      /* code is still input[i] here */
+      f = find_nfc_index(nfc_first, code);
+      if (f == -1) {
+          output[o++] = code;
+          i++;
+          continue;
+      }
+      /* Find next unblocked character. */
+      i1 = i+1;
+      comb = 0;
+      /* output base character for now; might be updated later. */
+      output[o] = PyUnicode_READ(kind, data, i);
+      while (i1 < len) {
+          Py_UCS4 code1 = PyUnicode_READ(kind, data, i1);
+          int comb1 = _getrecord_ex(code1)->combining;
+          if (comb) {
+              if (comb1 == 0)
+                  break;
+              if (comb >= comb1) {
+                  /* Character is blocked. */
+                  i1++;
+                  continue;
+              }
+          }
+          l = find_nfc_index(nfc_last, code1);
+          /* i1 cannot be combined with i. If i1
+             is a starter, we don't need to look further.
+             Otherwise, record the combining class. */
+          if (l == -1) {
+            not_combinable:
+              if (comb1 == 0)
+                  break;
+              comb = comb1;
+              i1++;
+              continue;
+          }
+          index = f*TOTAL_LAST + l;
+          index1 = comp_index[index >> COMP_SHIFT];
+          code = comp_data[(index1<<COMP_SHIFT)+
+                           (index&((1<<COMP_SHIFT)-1))];
+          if (code == 0)
+              goto not_combinable;
+
+          /* Replace the original character. */
+          output[o] = code;
+          /* Mark the second character unused. */
+          assert(cskipped < 20);
+          skipped[cskipped++] = i1;
+          i1++;
+          f = find_nfc_index(nfc_first, output[o]);
+          if (f == -1)
+              break;
+      }
+      /* Output character was already written.
+         Just advance the indices. */
+      o++; i++;
+    }
+    if (o == len) {
+        /* No changes. Return original string. */
+        PyMem_Free(output);
+        return result;
+    }
+    Py_DECREF(result);
+    result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
+                                       output, o);
+    PyMem_Free(output);
+    return result;
+}
+
+// This needs to match the logic in makeunicodedata.py
+// which constructs the quickcheck data.
+typedef enum {YES = 0, MAYBE = 1, NO = 2} QuickcheckResult;
+
+/* Run the Unicode normalization "quickcheck" algorithm.
+ *
+ * Return YES or NO if quickcheck determines the input is certainly
+ * normalized or certainly not, and MAYBE if quickcheck is unable to
+ * tell.
+ *
+ * If `yes_only` is true, then return MAYBE as soon as we determine
+ * the answer is not YES.
+ *
+ * For background and details on the algorithm, see UAX #15:
+ *   https://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms
+ */
+static QuickcheckResult
+is_normalized_quickcheck(PyObject *self, PyObject *input, bool nfc, bool k,
+                         bool yes_only)
+{
+    /* UCD 3.2.0 is requested, quickchecks must be disabled. */
+    if (UCD_Check(self)) {
+        return MAYBE;
+    }
+
+    if (PyUnicode_IS_ASCII(input)) {
+        return YES;
+    }
+
+    Py_ssize_t i, len;
+    int kind;
+    const void *data;
+    unsigned char prev_combining = 0;
+
+    /* The two quickcheck bits at this shift have type QuickcheckResult. */
+    int quickcheck_shift = (nfc ? 4 : 0) + (k ? 2 : 0);
+
+    QuickcheckResult result = YES; /* certainly normalized, unless we find something */
+
+    i = 0;
+    kind = PyUnicode_KIND(input);
+    data = PyUnicode_DATA(input);
+    len = PyUnicode_GET_LENGTH(input);
+    while (i < len) {
+        Py_UCS4 ch = PyUnicode_READ(kind, data, i++);
+        const _PyUnicode_DatabaseRecord *record = _getrecord_ex(ch);
+
+        unsigned char combining = record->combining;
+        if (combining && prev_combining > combining)
+            return NO; /* non-canonical sort order, not normalized */
+        prev_combining = combining;
+
+        unsigned char quickcheck_whole = record->normalization_quick_check;
+        if (yes_only) {
+            if (quickcheck_whole & (3 << quickcheck_shift))
+                return MAYBE;
+        } else {
+            switch ((quickcheck_whole >> quickcheck_shift) & 3) {
+            case NO:
+              return NO;
+            case MAYBE:
+              result = MAYBE; /* this string might need normalization */
+            }
+        }
+    }
+    return result;
+}
+
+/*[clinic input]
+unicodedata.UCD.is_normalized
+
+    self: self
+    form: unicode
+    unistr as input: unicode
+    /
+
+Return whether the Unicode string unistr is in the normal form 'form'.
+
+Valid values for form are 'NFC', 'NFKC', 'NFD', and 'NFKD'.
+[clinic start generated code]*/
+
+static PyObject *
+unicodedata_UCD_is_normalized_impl(PyObject *self, PyObject *form,
+                                   PyObject *input)
+/*[clinic end generated code: output=11e5a3694e723ca5 input=a544f14cea79e508]*/
+{
+    if (PyUnicode_READY(input) == -1) {
+        return NULL;
+    }
+
+    if (PyUnicode_GET_LENGTH(input) == 0) {
+        /* special case empty input strings. */
+        Py_RETURN_TRUE;
+    }
+
+    PyObject *result;
+    bool nfc = false;
+    bool k = false;
+    QuickcheckResult m;
+
+    PyObject *cmp;
+    int match = 0;
+
+    if (PyUnicode_CompareWithASCIIString(form, "NFC") == 0) {
+        nfc = true;
+    }
+    else if (PyUnicode_CompareWithASCIIString(form, "NFKC") == 0) {
+        nfc = true;
+        k = true;
+    }
+    else if (PyUnicode_CompareWithASCIIString(form, "NFD") == 0) {
+        /* matches default values for `nfc` and `k` */
+    }
+    else if (PyUnicode_CompareWithASCIIString(form, "NFKD") == 0) {
+        k = true;
+    }
+    else {
+        PyErr_SetString(PyExc_ValueError, "invalid normalization form");
+        return NULL;
+    }
+
+    m = is_normalized_quickcheck(self, input, nfc, k, false);
+
+    if (m == MAYBE) {
+        cmp = (nfc ? nfc_nfkc : nfd_nfkd)(self, input, k);
+        if (cmp == NULL) {
+            return NULL;
+        }
+        match = PyUnicode_Compare(input, cmp);
+        Py_DECREF(cmp);
+        result = (match == 0) ? Py_True : Py_False;
+    }
+    else {
+        result = (m == YES) ? Py_True : Py_False;
+    }
+
+    return Py_NewRef(result);
+}
+
+
+/*[clinic input]
+unicodedata.UCD.normalize
+
+    self: self
+    form: unicode
+    unistr as input: unicode
+    /
+
+Return the normal form 'form' for the Unicode string unistr.
+
+Valid values for form are 'NFC', 'NFKC', 'NFD', and 'NFKD'.
+[clinic start generated code]*/
+
+static PyObject *
+unicodedata_UCD_normalize_impl(PyObject *self, PyObject *form,
+                               PyObject *input)
+/*[clinic end generated code: output=05ca4385a2ad6983 input=3a5206c0ad2833fb]*/
+{
+    if (PyUnicode_GET_LENGTH(input) == 0) {
+        /* Special case empty input strings, since resizing
+           them  later would cause internal errors. */
+        return Py_NewRef(input);
+    }
+
+    if (PyUnicode_CompareWithASCIIString(form, "NFC") == 0) {
+        if (is_normalized_quickcheck(self, input,
+                                     true,  false, true) == YES) {
+            return Py_NewRef(input);
+        }
+        return nfc_nfkc(self, input, 0);
+    }
+    if (PyUnicode_CompareWithASCIIString(form, "NFKC") == 0) {
+        if (is_normalized_quickcheck(self, input,
+                                     true,  true,  true) == YES) {
+            return Py_NewRef(input);
+        }
+        return nfc_nfkc(self, input, 1);
+    }
+    if (PyUnicode_CompareWithASCIIString(form, "NFD") == 0) {
+        if (is_normalized_quickcheck(self, input,
+                                     false, false, true) == YES) {
+            return Py_NewRef(input);
+        }
+        return nfd_nfkd(self, input, 0);
+    }
+    if (PyUnicode_CompareWithASCIIString(form, "NFKD") == 0) {
+        if (is_normalized_quickcheck(self, input,
+                                     false, true,  true) == YES) {
+            return Py_NewRef(input);
+        }
+        return nfd_nfkd(self, input, 1);
+    }
+    PyErr_SetString(PyExc_ValueError, "invalid normalization form");
+    return NULL;
+}
+
+/* -------------------------------------------------------------------- */
+/* unicode character name tables */
+
+/* data file generated by Tools/unicode/makeunicodedata.py */
+#include "unicodename_db.h"
+
+/* -------------------------------------------------------------------- */
+/* database code (cut and pasted from the unidb package) */
+
+static unsigned long
+_gethash(const char *s, int len, int scale)
+{
+    int i;
+    unsigned long h = 0;
+    unsigned long ix;
+    for (i = 0; i < len; i++) {
+        h = (h * scale) + (unsigned char) Py_TOUPPER(s[i]);
+        ix = h & 0xff000000;
+        if (ix)
+            h = (h ^ ((ix>>24) & 0xff)) & 0x00ffffff;
+    }
+    return h;
+}
+
+static const char * const hangul_syllables[][3] = {
+    { "G",  "A",   ""   },
+    { "GG", "AE",  "G"  },
+    { "N",  "YA",  "GG" },
+    { "D",  "YAE", "GS" },
+    { "DD", "EO",  "N", },
+    { "R",  "E",   "NJ" },
+    { "M",  "YEO", "NH" },
+    { "B",  "YE",  "D"  },
+    { "BB", "O",   "L"  },
+    { "S",  "WA",  "LG" },
+    { "SS", "WAE", "LM" },
+    { "",   "OE",  "LB" },
+    { "J",  "YO",  "LS" },
+    { "JJ", "U",   "LT" },
+    { "C",  "WEO", "LP" },
+    { "K",  "WE",  "LH" },
+    { "T",  "WI",  "M"  },
+    { "P",  "YU",  "B"  },
+    { "H",  "EU",  "BS" },
+    { 0,    "YI",  "S"  },
+    { 0,    "I",   "SS" },
+    { 0,    0,     "NG" },
+    { 0,    0,     "J"  },
+    { 0,    0,     "C"  },
+    { 0,    0,     "K"  },
+    { 0,    0,     "T"  },
+    { 0,    0,     "P"  },
+    { 0,    0,     "H"  }
+};
+
+/* These ranges need to match makeunicodedata.py:cjk_ranges. */
+static int
+is_unified_ideograph(Py_UCS4 code)
+{
+    return
+        (0x3400 <= code && code <= 0x4DBF)   || /* CJK Ideograph Extension A */
+        (0x4E00 <= code && code <= 0x9FFF)   || /* CJK Ideograph */
+        (0x20000 <= code && code <= 0x2A6DF) || /* CJK Ideograph Extension B */
+        (0x2A700 <= code && code <= 0x2B739) || /* CJK Ideograph Extension C */
+        (0x2B740 <= code && code <= 0x2B81D) || /* CJK Ideograph Extension D */
+        (0x2B820 <= code && code <= 0x2CEA1) || /* CJK Ideograph Extension E */
+        (0x2CEB0 <= code && code <= 0x2EBE0) || /* CJK Ideograph Extension F */
+        (0x30000 <= code && code <= 0x3134A) || /* CJK Ideograph Extension G */
+        (0x31350 <= code && code <= 0x323AF);   /* CJK Ideograph Extension H */
+}
+
+/* macros used to determine if the given code point is in the PUA range that
+ * we are using to store aliases and named sequences */
+#define IS_ALIAS(cp) ((cp >= aliases_start) && (cp < aliases_end))
+#define IS_NAMED_SEQ(cp) ((cp >= named_sequences_start) && \
+                          (cp < named_sequences_end))
+
+static int
+_getucname(PyObject *self,
+           Py_UCS4 code, char* buffer, int buflen, int with_alias_and_seq)
+{
+    /* Find the name associated with the given code point.
+     * If with_alias_and_seq is 1, check for names in the Private Use Area 15
+     * that we are using for aliases and named sequences. */
+    int offset;
+    int i;
+    int word;
+    const unsigned char* w;
+
+    if (code >= 0x110000)
+        return 0;
+
+    /* XXX should we just skip all the code points in the PUAs here? */
+    if (!with_alias_and_seq && (IS_ALIAS(code) || IS_NAMED_SEQ(code)))
+        return 0;
+
+    if (UCD_Check(self)) {
+        /* in 3.2.0 there are no aliases and named sequences */
+        const change_record *old;
+        if (IS_ALIAS(code) || IS_NAMED_SEQ(code))
+            return 0;
+        old = get_old_record(self, code);
+        if (old->category_changed == 0) {
+            /* unassigned */
+            return 0;
+        }
+    }
+
+    if (SBase <= code && code < SBase+SCount) {
+        /* Hangul syllable. */
+        int SIndex = code - SBase;
+        int L = SIndex / NCount;
+        int V = (SIndex % NCount) / TCount;
+        int T = SIndex % TCount;
+
+        if (buflen < 27)
+            /* Worst case: HANGUL SYLLABLE <10chars>. */
+            return 0;
+        strcpy(buffer, "HANGUL SYLLABLE ");
+        buffer += 16;
+        strcpy(buffer, hangul_syllables[L][0]);
+        buffer += strlen(hangul_syllables[L][0]);
+        strcpy(buffer, hangul_syllables[V][1]);
+        buffer += strlen(hangul_syllables[V][1]);
+        strcpy(buffer, hangul_syllables[T][2]);
+        buffer += strlen(hangul_syllables[T][2]);
+        *buffer = '\0';
+        return 1;
+    }
+
+    if (is_unified_ideograph(code)) {
+        if (buflen < 28)
+            /* Worst case: CJK UNIFIED IDEOGRAPH-20000 */
+            return 0;
+        sprintf(buffer, "CJK UNIFIED IDEOGRAPH-%X", code);
+        return 1;
+    }
+
+    /* get offset into phrasebook */
+    offset = phrasebook_offset1[(code>>phrasebook_shift)];
+    offset = phrasebook_offset2[(offset<<phrasebook_shift) +
+                               (code&((1<<phrasebook_shift)-1))];
+    if (!offset)
+        return 0;
+
+    i = 0;
+
+    for (;;) {
+        /* get word index */
+        word = phrasebook[offset] - phrasebook_short;
+        if (word >= 0) {
+            word = (word << 8) + phrasebook[offset+1];
+            offset += 2;
+        } else
+            word = phrasebook[offset++];
+        if (i) {
+            if (i > buflen)
+                return 0; /* buffer overflow */
+            buffer[i++] = ' ';
+        }
+        /* copy word string from lexicon.  the last character in the
+           word has bit 7 set.  the last word in a string ends with
+           0x80 */
+        w = lexicon + lexicon_offset[word];
+        while (*w < 128) {
+            if (i >= buflen)
+                return 0; /* buffer overflow */
+            buffer[i++] = *w++;
+        }
+        if (i >= buflen)
+            return 0; /* buffer overflow */
+        buffer[i++] = *w & 127;
+        if (*w == 128)
+            break; /* end of word */
+    }
+
+    return 1;
+}
+
+static int
+capi_getucname(Py_UCS4 code,
+               char* buffer, int buflen,
+               int with_alias_and_seq)
+{
+    return _getucname(NULL, code, buffer, buflen, with_alias_and_seq);
+
+}
+
+static int
+_cmpname(PyObject *self, int code, const char* name, int namelen)
+{
+    /* check if code corresponds to the given name */
+    int i;
+    char buffer[NAME_MAXLEN+1];
+    if (!_getucname(self, code, buffer, NAME_MAXLEN, 1))
+        return 0;
+    for (i = 0; i < namelen; i++) {
+        if (Py_TOUPPER(name[i]) != buffer[i])
+            return 0;
+    }
+    return buffer[namelen] == '\0';
+}
+
+static void
+find_syllable(const char *str, int *len, int *pos, int count, int column)
+{
+    int i, len1;
+    *len = -1;
+    for (i = 0; i < count; i++) {
+        const char *s = hangul_syllables[i][column];
+        len1 = Py_SAFE_DOWNCAST(strlen(s), size_t, int);
+        if (len1 <= *len)
+            continue;
+        if (strncmp(str, s, len1) == 0) {
+            *len = len1;
+            *pos = i;
+        }
+    }
+    if (*len == -1) {
+        *len = 0;
+    }
+}
+
+static int
+_check_alias_and_seq(unsigned int cp, Py_UCS4* code, int with_named_seq)
+{
+    /* check if named sequences are allowed */
+    if (!with_named_seq && IS_NAMED_SEQ(cp))
+        return 0;
+    /* if the code point is in the PUA range that we use for aliases,
+     * convert it to obtain the right code point */
+    if (IS_ALIAS(cp))
+        *code = name_aliases[cp-aliases_start];
+    else
+        *code = cp;
+    return 1;
+}
+
+static int
+_getcode(PyObject* self,
+         const char* name, int namelen, Py_UCS4* code, int with_named_seq)
+{
+    /* Return the code point associated with the given name.
+     * Named aliases are resolved too (unless self != NULL (i.e. we are using
+     * 3.2.0)).  If with_named_seq is 1, returns the PUA code point that we are
+     * using for the named sequence, and the caller must then convert it. */
+    unsigned int h, v;
+    unsigned int mask = code_size-1;
+    unsigned int i, incr;
+
+    /* Check for hangul syllables. */
+    if (strncmp(name, "HANGUL SYLLABLE ", 16) == 0) {
+        int len, L = -1, V = -1, T = -1;
+        const char *pos = name + 16;
+        find_syllable(pos, &len, &L, LCount, 0);
+        pos += len;
+        find_syllable(pos, &len, &V, VCount, 1);
+        pos += len;
+        find_syllable(pos, &len, &T, TCount, 2);
+        pos += len;
+        if (L != -1 && V != -1 && T != -1 && pos-name == namelen) {
+            *code = SBase + (L*VCount+V)*TCount + T;
+            return 1;
+        }
+        /* Otherwise, it's an illegal syllable name. */
+        return 0;
+    }
+
+    /* Check for unified ideographs. */
+    if (strncmp(name, "CJK UNIFIED IDEOGRAPH-", 22) == 0) {
+        /* Four or five hexdigits must follow. */
+        v = 0;
+        name += 22;
+        namelen -= 22;
+        if (namelen != 4 && namelen != 5)
+            return 0;
+        while (namelen--) {
+            v *= 16;
+            if (*name >= '0' && *name <= '9')
+                v += *name - '0';
+            else if (*name >= 'A' && *name <= 'F')
+                v += *name - 'A' + 10;
+            else
+                return 0;
+            name++;
+        }
+        if (!is_unified_ideograph(v))
+            return 0;
+        *code = v;
+        return 1;
+    }
+
+    /* the following is the same as python's dictionary lookup, with
+       only minor changes.  see the makeunicodedata script for more
+       details */
+
+    h = (unsigned int) _gethash(name, namelen, code_magic);
+    i = (~h) & mask;
+    v = code_hash[i];
+    if (!v)
+        return 0;
+    if (_cmpname(self, v, name, namelen)) {
+        return _check_alias_and_seq(v, code, with_named_seq);
+    }
+    incr = (h ^ (h >> 3)) & mask;
+    if (!incr)
+        incr = mask;
+    for (;;) {
+        i = (i + incr) & mask;
+        v = code_hash[i];
+        if (!v)
+            return 0;
+        if (_cmpname(self, v, name, namelen)) {
+            return _check_alias_and_seq(v, code, with_named_seq);
+        }
+        incr = incr << 1;
+        if (incr > mask)
+            incr = incr ^ code_poly;
+    }
+}
+
+static int
+capi_getcode(const char* name, int namelen, Py_UCS4* code,
+             int with_named_seq)
+{
+    return _getcode(NULL, name, namelen, code, with_named_seq);
+
+}
+
+static void
+unicodedata_destroy_capi(PyObject *capsule)
+{
+    void *capi = PyCapsule_GetPointer(capsule, PyUnicodeData_CAPSULE_NAME);
+    PyMem_Free(capi);
+}
+
+static PyObject *
+unicodedata_create_capi(void)
+{
+    _PyUnicode_Name_CAPI *capi = PyMem_Malloc(sizeof(_PyUnicode_Name_CAPI));
+    if (capi == NULL) {
+        PyErr_NoMemory();
+        return NULL;
+    }
+    capi->getname = capi_getucname;
+    capi->getcode = capi_getcode;
+
+    PyObject *capsule = PyCapsule_New(capi,
+                                      PyUnicodeData_CAPSULE_NAME,
+                                      unicodedata_destroy_capi);
+    if (capsule == NULL) {
+        PyMem_Free(capi);
+    }
+    return capsule;
+};
+
+
+/* -------------------------------------------------------------------- */
+/* Python bindings */
+
+/*[clinic input]
+unicodedata.UCD.name
+
+    self: self
+    chr: int(accept={str})
+    default: object=NULL
+    /
+
+Returns the name assigned to the character chr as a string.
+
+If no name is defined, default is returned, or, if not given,
+ValueError is raised.
+[clinic start generated code]*/
+
+static PyObject *
+unicodedata_UCD_name_impl(PyObject *self, int chr, PyObject *default_value)
+/*[clinic end generated code: output=6bbb37a326407707 input=3e0367f534de56d9]*/
+{
+    char name[NAME_MAXLEN+1];
+    Py_UCS4 c = (Py_UCS4)chr;
+
+    if (!_getucname(self, c, name, NAME_MAXLEN, 0)) {
+        if (default_value == NULL) {
+            PyErr_SetString(PyExc_ValueError, "no such name");
+            return NULL;
+        }
+        else {
+            return Py_NewRef(default_value);
+        }
+    }
+
+    return PyUnicode_FromString(name);
+}
+
+/*[clinic input]
+unicodedata.UCD.lookup
+
+    self: self
+    name: str(accept={str, robuffer}, zeroes=True)
+    /
+
+Look up character by name.
+
+If a character with the given name is found, return the
+corresponding character.  If not found, KeyError is raised.
+[clinic start generated code]*/
+
+static PyObject *
+unicodedata_UCD_lookup_impl(PyObject *self, const char *name,
+                            Py_ssize_t name_length)
+/*[clinic end generated code: output=7f03fc4959b242f6 input=a557be0f8607a0d6]*/
+{
+    Py_UCS4 code;
+    unsigned int index;
+    if (name_length > NAME_MAXLEN) {
+        PyErr_SetString(PyExc_KeyError, "name too long");
+        return NULL;
+    }
+
+    if (!_getcode(self, name, (int)name_length, &code, 1)) {
+        PyErr_Format(PyExc_KeyError, "undefined character name '%s'", name);
+        return NULL;
+    }
+    /* check if code is in the PUA range that we use for named sequences
+       and convert it */
+    if (IS_NAMED_SEQ(code)) {
+        index = code-named_sequences_start;
+        return PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND,
+                                         named_sequences[index].seq,
+                                         named_sequences[index].seqlen);
+    }
+    return PyUnicode_FromOrdinal(code);
+}
+
+// List of functions used to define module functions *AND* unicodedata.UCD
+// methods. For module functions, self is the module. For UCD methods, self
+// is an UCD instance. The UCD_Check() macro is used to check if self is
+// an UCD instance.
+static PyMethodDef unicodedata_functions[] = {
+    UNICODEDATA_UCD_DECIMAL_METHODDEF
+    UNICODEDATA_UCD_DIGIT_METHODDEF
+    UNICODEDATA_UCD_NUMERIC_METHODDEF
+    UNICODEDATA_UCD_CATEGORY_METHODDEF
+    UNICODEDATA_UCD_BIDIRECTIONAL_METHODDEF
+    UNICODEDATA_UCD_COMBINING_METHODDEF
+    UNICODEDATA_UCD_MIRRORED_METHODDEF
+    UNICODEDATA_UCD_EAST_ASIAN_WIDTH_METHODDEF
+    UNICODEDATA_UCD_DECOMPOSITION_METHODDEF
+    UNICODEDATA_UCD_NAME_METHODDEF
+    UNICODEDATA_UCD_LOOKUP_METHODDEF
+    UNICODEDATA_UCD_IS_NORMALIZED_METHODDEF
+    UNICODEDATA_UCD_NORMALIZE_METHODDEF
+    {NULL, NULL}                /* sentinel */
+};
+
+static int
+ucd_traverse(PreviousDBVersion *self, visitproc visit, void *arg)
+{
+    Py_VISIT(Py_TYPE(self));
+    return 0;
+}
+
+static void
+ucd_dealloc(PreviousDBVersion *self)
+{
+    PyTypeObject *tp = Py_TYPE(self);
+    PyObject_GC_UnTrack(self);
+    PyObject_GC_Del(self);
+    Py_DECREF(tp);
+}
+
+static PyType_Slot ucd_type_slots[] = {
+    {Py_tp_dealloc, ucd_dealloc},
+    {Py_tp_traverse, ucd_traverse},
+    {Py_tp_getattro, PyObject_GenericGetAttr},
+    {Py_tp_methods, unicodedata_functions},
+    {Py_tp_members, DB_members},
+    {0, 0}
+};
+
+static PyType_Spec ucd_type_spec = {
+    .name = "unicodedata.UCD",
+    .basicsize = sizeof(PreviousDBVersion),
+    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION |
+              Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_IMMUTABLETYPE),
+    .slots = ucd_type_slots
+};
+
+PyDoc_STRVAR(unicodedata_docstring,
+"This module provides access to the Unicode Character Database which\n\
+defines character properties for all Unicode characters. The data in\n\
+this database is based on the UnicodeData.txt file version\n\
+" UNIDATA_VERSION " which is publicly available from ftp://ftp.unicode.org/.\n\
+\n\
+The module uses the same names and symbols as defined by the\n\
+UnicodeData File Format " UNIDATA_VERSION ".");
+
+static int
+unicodedata_exec(PyObject *module)
+{
+    if (PyModule_AddStringConstant(module, "unidata_version", UNIDATA_VERSION) < 0) {
+        return -1;
+    }
+
+    PyTypeObject *ucd_type = (PyTypeObject *)PyType_FromSpec(&ucd_type_spec);
+    if (ucd_type == NULL) {
+        return -1;
+    }
+
+    if (PyModule_AddType(module, ucd_type) < 0) {
+        Py_DECREF(ucd_type);
+        return -1;
+    }
+
+    // Unicode database version 3.2.0 used by the IDNA encoding
+    PyObject *v;
+    v = new_previous_version(ucd_type, "3.2.0",
+                             get_change_3_2_0, normalization_3_2_0);
+    Py_DECREF(ucd_type);
+    if (v == NULL) {
+        return -1;
+    }
+    if (PyModule_AddObject(module, "ucd_3_2_0", v) < 0) {
+        Py_DECREF(v);
+        return -1;
+    }
+
+    /* Export C API */
+    PyObject *capsule = unicodedata_create_capi();
+    if (capsule == NULL) {
+        return -1;
+    }
+    int rc = PyModule_AddObjectRef(module, "_ucnhash_CAPI", capsule);
+    Py_DECREF(capsule);
+    if (rc < 0) {
+        return -1;
+    }
+    return 0;
+}
+
+static PyModuleDef_Slot unicodedata_slots[] = {
+    {Py_mod_exec, unicodedata_exec},
+    {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
+    {0, NULL}
+};
+
+static struct PyModuleDef unicodedata_module = {
+    PyModuleDef_HEAD_INIT,
+    .m_name = "unicodedata",
+    .m_doc = unicodedata_docstring,
+    .m_size = 0,
+    .m_methods = unicodedata_functions,
+    .m_slots = unicodedata_slots,
+};
+
+PyMODINIT_FUNC
+PyInit_unicodedata(void)
+{
+    return PyModuleDef_Init(&unicodedata_module);
+}
+
+
+/*
+Local variables:
+c-basic-offset: 4
+indent-tabs-mode: nil
+End:
+*/