aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/tools/python3/src/Modules/unicodedata.c
diff options
context:
space:
mode:
authorshadchin <shadchin@yandex-team.ru>2022-02-10 16:44:30 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:44:30 +0300
commit2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch)
tree012bb94d777798f1f56ac1cec429509766d05181 /contrib/tools/python3/src/Modules/unicodedata.c
parent6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff)
downloadydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/tools/python3/src/Modules/unicodedata.c')
-rw-r--r--contrib/tools/python3/src/Modules/unicodedata.c314
1 files changed, 157 insertions, 157 deletions
diff --git a/contrib/tools/python3/src/Modules/unicodedata.c b/contrib/tools/python3/src/Modules/unicodedata.c
index 8a1198a2b7..847c16da72 100644
--- a/contrib/tools/python3/src/Modules/unicodedata.c
+++ b/contrib/tools/python3/src/Modules/unicodedata.c
@@ -17,15 +17,15 @@
#include "Python.h"
#include "ucnhash.h"
-#include "structmember.h" // PyMemberDef
-
-#include <stdbool.h>
-
-_Py_IDENTIFIER(NFC);
-_Py_IDENTIFIER(NFD);
-_Py_IDENTIFIER(NFKC);
-_Py_IDENTIFIER(NFKD);
-
+#include "structmember.h" // PyMemberDef
+
+#include <stdbool.h>
+
+_Py_IDENTIFIER(NFC);
+_Py_IDENTIFIER(NFD);
+_Py_IDENTIFIER(NFKC);
+_Py_IDENTIFIER(NFKD);
+
/*[clinic input]
module unicodedata
class unicodedata.UCD 'PreviousDBVersion *' '&UCD_Type'
@@ -92,7 +92,7 @@ static PyMemberDef DB_members[] = {
/* forward declaration */
static PyTypeObject UCD_Type;
-#define UCD_Check(o) Py_IS_TYPE(o, &UCD_Type)
+#define UCD_Check(o) Py_IS_TYPE(o, &UCD_Type)
static PyObject*
new_previous_version(const char*name, const change_record* (*getrecord)(Py_UCS4),
@@ -496,7 +496,7 @@ nfd_nfkd(PyObject *self, PyObject *input, int k)
Py_UCS4 *output;
Py_ssize_t i, o, osize;
int kind;
- const void *data;
+ const void *data;
/* Longest decomposition in Unicode 3.2: U+FDFA */
Py_UCS4 stack[20];
Py_ssize_t space, isize;
@@ -623,7 +623,7 @@ nfd_nfkd(PyObject *self, PyObject *input, int k)
}
static int
-find_nfc_index(const struct reindex* nfc, Py_UCS4 code)
+find_nfc_index(const struct reindex* nfc, Py_UCS4 code)
{
unsigned int index;
for (index = 0; nfc[index].start; index++) {
@@ -643,7 +643,7 @@ nfc_nfkc(PyObject *self, PyObject *input, int k)
{
PyObject *result;
int kind;
- const void *data;
+ const void *data;
Py_UCS4 *output;
Py_ssize_t i, i1, o, len;
int f,l,index,index1,comb;
@@ -709,7 +709,7 @@ nfc_nfkc(PyObject *self, PyObject *input, int k)
}
/* code is still input[i] here */
- f = find_nfc_index(nfc_first, code);
+ f = find_nfc_index(nfc_first, code);
if (f == -1) {
output[o++] = code;
i++;
@@ -732,7 +732,7 @@ nfc_nfkc(PyObject *self, PyObject *input, int k)
continue;
}
}
- l = find_nfc_index(nfc_last, code1);
+ l = find_nfc_index(nfc_last, code1);
/* i1 cannot be combined with i. If i1
is a starter, we don't need to look further.
Otherwise, record the combining class. */
@@ -757,7 +757,7 @@ nfc_nfkc(PyObject *self, PyObject *input, int k)
assert(cskipped < 20);
skipped[cskipped++] = i1;
i1++;
- f = find_nfc_index(nfc_first, output[o]);
+ f = find_nfc_index(nfc_first, output[o]);
if (f == -1)
break;
}
@@ -777,40 +777,40 @@ nfc_nfkc(PyObject *self, PyObject *input, int k)
return result;
}
-// This needs to match the logic in makeunicodedata.py
-// which constructs the quickcheck data.
-typedef enum {YES = 0, MAYBE = 1, NO = 2} QuickcheckResult;
-
-/* Run the Unicode normalization "quickcheck" algorithm.
- *
- * Return YES or NO if quickcheck determines the input is certainly
- * normalized or certainly not, and MAYBE if quickcheck is unable to
- * tell.
- *
- * If `yes_only` is true, then return MAYBE as soon as we determine
- * the answer is not YES.
- *
- * For background and details on the algorithm, see UAX #15:
- * https://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms
- */
-static QuickcheckResult
-is_normalized_quickcheck(PyObject *self, PyObject *input,
- bool nfc, bool k, bool yes_only)
+// This needs to match the logic in makeunicodedata.py
+// which constructs the quickcheck data.
+typedef enum {YES = 0, MAYBE = 1, NO = 2} QuickcheckResult;
+
+/* Run the Unicode normalization "quickcheck" algorithm.
+ *
+ * Return YES or NO if quickcheck determines the input is certainly
+ * normalized or certainly not, and MAYBE if quickcheck is unable to
+ * tell.
+ *
+ * If `yes_only` is true, then return MAYBE as soon as we determine
+ * the answer is not YES.
+ *
+ * For background and details on the algorithm, see UAX #15:
+ * https://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms
+ */
+static QuickcheckResult
+is_normalized_quickcheck(PyObject *self, PyObject *input,
+ bool nfc, bool k, bool yes_only)
{
- /* An older version of the database is requested, quickchecks must be
- disabled. */
- if (self && UCD_Check(self))
- return NO;
-
+ /* An older version of the database is requested, quickchecks must be
+ disabled. */
+ if (self && UCD_Check(self))
+ return NO;
+
Py_ssize_t i, len;
int kind;
- const void *data;
- unsigned char prev_combining = 0;
+ const void *data;
+ unsigned char prev_combining = 0;
- /* The two quickcheck bits at this shift have type QuickcheckResult. */
- int quickcheck_shift = (nfc ? 4 : 0) + (k ? 2 : 0);
+ /* The two quickcheck bits at this shift have type QuickcheckResult. */
+ int quickcheck_shift = (nfc ? 4 : 0) + (k ? 2 : 0);
- QuickcheckResult result = YES; /* certainly normalized, unless we find something */
+ QuickcheckResult result = YES; /* certainly normalized, unless we find something */
i = 0;
kind = PyUnicode_KIND(input);
@@ -819,106 +819,106 @@ is_normalized_quickcheck(PyObject *self, PyObject *input,
while (i < len) {
Py_UCS4 ch = PyUnicode_READ(kind, data, i++);
const _PyUnicode_DatabaseRecord *record = _getrecord_ex(ch);
-
+
unsigned char combining = record->combining;
if (combining && prev_combining > combining)
- return NO; /* non-canonical sort order, not normalized */
+ return NO; /* non-canonical sort order, not normalized */
prev_combining = combining;
-
- unsigned char quickcheck_whole = record->normalization_quick_check;
- if (yes_only) {
- if (quickcheck_whole & (3 << quickcheck_shift))
- return MAYBE;
- } else {
- switch ((quickcheck_whole >> quickcheck_shift) & 3) {
- case NO:
- return NO;
- case MAYBE:
- result = MAYBE; /* this string might need normalization */
- }
- }
+
+ unsigned char quickcheck_whole = record->normalization_quick_check;
+ if (yes_only) {
+ if (quickcheck_whole & (3 << quickcheck_shift))
+ return MAYBE;
+ } else {
+ switch ((quickcheck_whole >> quickcheck_shift) & 3) {
+ case NO:
+ return NO;
+ case MAYBE:
+ result = MAYBE; /* this string might need normalization */
+ }
+ }
}
- return result;
+ return result;
}
/*[clinic input]
-unicodedata.UCD.is_normalized
-
- self: self
- form: unicode
- unistr as input: unicode
- /
-
-Return whether the Unicode string unistr is in the normal form 'form'.
-
-Valid values for form are 'NFC', 'NFKC', 'NFD', and 'NFKD'.
-[clinic start generated code]*/
-
-static PyObject *
-unicodedata_UCD_is_normalized_impl(PyObject *self, PyObject *form,
- PyObject *input)
-/*[clinic end generated code: output=11e5a3694e723ca5 input=a544f14cea79e508]*/
-{
- if (PyUnicode_READY(input) == -1) {
- return NULL;
- }
-
- if (PyUnicode_GET_LENGTH(input) == 0) {
- /* special case empty input strings. */
- Py_RETURN_TRUE;
- }
-
- PyObject *result;
- bool nfc = false;
- bool k = false;
- QuickcheckResult m;
-
- PyObject *cmp;
- int match = 0;
-
- if (_PyUnicode_EqualToASCIIId(form, &PyId_NFC)) {
- nfc = true;
- }
- else if (_PyUnicode_EqualToASCIIId(form, &PyId_NFKC)) {
- nfc = true;
- k = true;
- }
- else if (_PyUnicode_EqualToASCIIId(form, &PyId_NFD)) {
- /* matches default values for `nfc` and `k` */
- }
- else if (_PyUnicode_EqualToASCIIId(form, &PyId_NFKD)) {
- k = true;
- }
- else {
- PyErr_SetString(PyExc_ValueError, "invalid normalization form");
- return NULL;
- }
-
- m = is_normalized_quickcheck(self, input, nfc, k, false);
-
- if (m == MAYBE) {
- cmp = (nfc ? nfc_nfkc : nfd_nfkd)(self, input, k);
- if (cmp == NULL) {
- return NULL;
- }
- match = PyUnicode_Compare(input, cmp);
- Py_DECREF(cmp);
- result = (match == 0) ? Py_True : Py_False;
- }
- else {
- result = (m == YES) ? Py_True : Py_False;
- }
-
- Py_INCREF(result);
- return result;
-}
-
-
-/*[clinic input]
+unicodedata.UCD.is_normalized
+
+ self: self
+ form: unicode
+ unistr as input: unicode
+ /
+
+Return whether the Unicode string unistr is in the normal form 'form'.
+
+Valid values for form are 'NFC', 'NFKC', 'NFD', and 'NFKD'.
+[clinic start generated code]*/
+
+static PyObject *
+unicodedata_UCD_is_normalized_impl(PyObject *self, PyObject *form,
+ PyObject *input)
+/*[clinic end generated code: output=11e5a3694e723ca5 input=a544f14cea79e508]*/
+{
+ if (PyUnicode_READY(input) == -1) {
+ return NULL;
+ }
+
+ if (PyUnicode_GET_LENGTH(input) == 0) {
+ /* special case empty input strings. */
+ Py_RETURN_TRUE;
+ }
+
+ PyObject *result;
+ bool nfc = false;
+ bool k = false;
+ QuickcheckResult m;
+
+ PyObject *cmp;
+ int match = 0;
+
+ if (_PyUnicode_EqualToASCIIId(form, &PyId_NFC)) {
+ nfc = true;
+ }
+ else if (_PyUnicode_EqualToASCIIId(form, &PyId_NFKC)) {
+ nfc = true;
+ k = true;
+ }
+ else if (_PyUnicode_EqualToASCIIId(form, &PyId_NFD)) {
+ /* matches default values for `nfc` and `k` */
+ }
+ else if (_PyUnicode_EqualToASCIIId(form, &PyId_NFKD)) {
+ k = true;
+ }
+ else {
+ PyErr_SetString(PyExc_ValueError, "invalid normalization form");
+ return NULL;
+ }
+
+ m = is_normalized_quickcheck(self, input, nfc, k, false);
+
+ if (m == MAYBE) {
+ cmp = (nfc ? nfc_nfkc : nfd_nfkd)(self, input, k);
+ if (cmp == NULL) {
+ return NULL;
+ }
+ match = PyUnicode_Compare(input, cmp);
+ Py_DECREF(cmp);
+ result = (match == 0) ? Py_True : Py_False;
+ }
+ else {
+ result = (m == YES) ? Py_True : Py_False;
+ }
+
+ Py_INCREF(result);
+ return result;
+}
+
+
+/*[clinic input]
unicodedata.UCD.normalize
self: self
- form: unicode
+ form: unicode
unistr as input: unicode
/
@@ -928,9 +928,9 @@ Valid values for form are 'NFC', 'NFKC', 'NFD', and 'NFKD'.
[clinic start generated code]*/
static PyObject *
-unicodedata_UCD_normalize_impl(PyObject *self, PyObject *form,
+unicodedata_UCD_normalize_impl(PyObject *self, PyObject *form,
PyObject *input)
-/*[clinic end generated code: output=05ca4385a2ad6983 input=3a5206c0ad2833fb]*/
+/*[clinic end generated code: output=05ca4385a2ad6983 input=3a5206c0ad2833fb]*/
{
if (PyUnicode_GET_LENGTH(input) == 0) {
/* Special case empty input strings, since resizing
@@ -939,29 +939,29 @@ unicodedata_UCD_normalize_impl(PyObject *self, PyObject *form,
return input;
}
- if (_PyUnicode_EqualToASCIIId(form, &PyId_NFC)) {
- if (is_normalized_quickcheck(self, input, true, false, true) == YES) {
+ if (_PyUnicode_EqualToASCIIId(form, &PyId_NFC)) {
+ if (is_normalized_quickcheck(self, input, true, false, true) == YES) {
Py_INCREF(input);
return input;
}
return nfc_nfkc(self, input, 0);
}
- if (_PyUnicode_EqualToASCIIId(form, &PyId_NFKC)) {
- if (is_normalized_quickcheck(self, input, true, true, true) == YES) {
+ if (_PyUnicode_EqualToASCIIId(form, &PyId_NFKC)) {
+ if (is_normalized_quickcheck(self, input, true, true, true) == YES) {
Py_INCREF(input);
return input;
}
return nfc_nfkc(self, input, 1);
}
- if (_PyUnicode_EqualToASCIIId(form, &PyId_NFD)) {
- if (is_normalized_quickcheck(self, input, false, false, true) == YES) {
+ if (_PyUnicode_EqualToASCIIId(form, &PyId_NFD)) {
+ if (is_normalized_quickcheck(self, input, false, false, true) == YES) {
Py_INCREF(input);
return input;
}
return nfd_nfkd(self, input, 0);
}
- if (_PyUnicode_EqualToASCIIId(form, &PyId_NFKD)) {
- if (is_normalized_quickcheck(self, input, false, true, true) == YES) {
+ if (_PyUnicode_EqualToASCIIId(form, &PyId_NFKD)) {
+ if (is_normalized_quickcheck(self, input, false, true, true) == YES) {
Py_INCREF(input);
return input;
}
@@ -987,7 +987,7 @@ _gethash(const char *s, int len, int scale)
unsigned long h = 0;
unsigned long ix;
for (i = 0; i < len; i++) {
- h = (h * scale) + (unsigned char) Py_TOUPPER(s[i]);
+ h = (h * scale) + (unsigned char) Py_TOUPPER(s[i]);
ix = h & 0xff000000;
if (ix)
h = (h ^ ((ix>>24) & 0xff)) & 0x00ffffff;
@@ -1031,14 +1031,14 @@ static int
is_unified_ideograph(Py_UCS4 code)
{
return
- (0x3400 <= code && code <= 0x4DBF) || /* CJK Ideograph Extension A */
- (0x4E00 <= code && code <= 0x9FFC) || /* CJK Ideograph */
- (0x20000 <= code && code <= 0x2A6DD) || /* CJK Ideograph Extension B */
+ (0x3400 <= code && code <= 0x4DBF) || /* CJK Ideograph Extension A */
+ (0x4E00 <= code && code <= 0x9FFC) || /* CJK Ideograph */
+ (0x20000 <= code && code <= 0x2A6DD) || /* CJK Ideograph Extension B */
(0x2A700 <= code && code <= 0x2B734) || /* CJK Ideograph Extension C */
(0x2B740 <= code && code <= 0x2B81D) || /* CJK Ideograph Extension D */
(0x2B820 <= code && code <= 0x2CEA1) || /* CJK Ideograph Extension E */
- (0x2CEB0 <= code && code <= 0x2EBE0) || /* CJK Ideograph Extension F */
- (0x30000 <= code && code <= 0x3134A); /* CJK Ideograph Extension G */
+ (0x2CEB0 <= code && code <= 0x2EBE0) || /* CJK Ideograph Extension F */
+ (0x30000 <= code && code <= 0x3134A); /* CJK Ideograph Extension G */
}
/* macros used to determine if the given code point is in the PUA range that
@@ -1057,7 +1057,7 @@ _getucname(PyObject *self, Py_UCS4 code, char* buffer, int buflen,
int offset;
int i;
int word;
- const unsigned char* w;
+ const unsigned char* w;
if (code >= 0x110000)
return 0;
@@ -1158,7 +1158,7 @@ _cmpname(PyObject *self, int code, const char* name, int namelen)
if (!_getucname(self, code, buffer, NAME_MAXLEN, 1))
return 0;
for (i = 0; i < namelen; i++) {
- if (Py_TOUPPER(name[i]) != buffer[i])
+ if (Py_TOUPPER(name[i]) != buffer[i])
return 0;
}
return buffer[namelen] == '\0';
@@ -1379,7 +1379,7 @@ static PyMethodDef unicodedata_functions[] = {
UNICODEDATA_UCD_DECOMPOSITION_METHODDEF
UNICODEDATA_UCD_NAME_METHODDEF
UNICODEDATA_UCD_LOOKUP_METHODDEF
- UNICODEDATA_UCD_IS_NORMALIZED_METHODDEF
+ UNICODEDATA_UCD_IS_NORMALIZED_METHODDEF
UNICODEDATA_UCD_NORMALIZE_METHODDEF
{NULL, NULL} /* sentinel */
};
@@ -1393,10 +1393,10 @@ static PyTypeObject UCD_Type = {
0, /*tp_itemsize*/
/* methods */
(destructor)PyObject_Del, /*tp_dealloc*/
- 0, /*tp_vectorcall_offset*/
+ 0, /*tp_vectorcall_offset*/
0, /*tp_getattr*/
0, /*tp_setattr*/
- 0, /*tp_as_async*/
+ 0, /*tp_as_async*/
0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
@@ -1456,7 +1456,7 @@ PyInit_unicodedata(void)
{
PyObject *m, *v;
- Py_SET_TYPE(&UCD_Type, &PyType_Type);
+ Py_SET_TYPE(&UCD_Type, &PyType_Type);
m = PyModule_Create(&unicodedatamodule);
if (!m)