aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/icu/common/ucase.cpp
diff options
context:
space:
mode:
authorromankoshelev <romankoshelev@yandex-team.com>2023-08-14 19:51:50 +0300
committerromankoshelev <romankoshelev@yandex-team.com>2023-08-15 01:24:11 +0300
commitcfcd865e05c0d0525ea27d1e153a043b32a85138 (patch)
tree68d3b3b25271e8a4998505897a269ff7ce119b76 /contrib/libs/icu/common/ucase.cpp
parentccb790c507bd5e8ffe2ef9886ce5ee0a7ce22a15 (diff)
downloadydb-cfcd865e05c0d0525ea27d1e153a043b32a85138.tar.gz
Update ICU to 73.2
Diffstat (limited to 'contrib/libs/icu/common/ucase.cpp')
-rw-r--r--contrib/libs/icu/common/ucase.cpp397
1 files changed, 264 insertions, 133 deletions
diff --git a/contrib/libs/icu/common/ucase.cpp b/contrib/libs/icu/common/ucase.cpp
index 4aa856507a..392e1266ae 100644
--- a/contrib/libs/icu/common/ucase.cpp
+++ b/contrib/libs/icu/common/ucase.cpp
@@ -22,27 +22,14 @@
#include "unicode/utypes.h"
#include "unicode/unistr.h"
#include "unicode/uset.h"
-#include "unicode/udata.h" /* UDataInfo */
#include "unicode/utf16.h"
-#include "ucmndata.h" /* DataHeader */
-#include "udatamem.h"
-#include "umutex.h"
-#include "uassert.h"
#include "cmemory.h"
-#include "utrie2.h"
+#include "uassert.h"
#include "ucase.h"
+#include "umutex.h"
+#include "utrie2.h"
-struct UCaseProps {
- UDataMemory *mem;
- const int32_t *indexes;
- const uint16_t *exceptions;
- const uint16_t *unfold;
-
- UTrie2 trie;
- uint8_t formatVersion[4];
-};
-
-/* ucase_props_data.h is machine-generated by gencase --csource */
+/* ucase_props_data.h is machine-generated by genprops/casepropsbuilder.cpp */
#define INCLUDED_FROM_UCASE_CPP
#include "ucase_props_data.h"
@@ -53,7 +40,7 @@ _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 /*end*/, ui
/* add the start code point to the USet */
const USetAdder *sa=(const USetAdder *)context;
sa->add(sa->set, start);
- return TRUE;
+ return true;
}
U_CFUNC void U_EXPORT2
@@ -63,7 +50,7 @@ ucase_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
}
/* add the start code point of each same-value range of the trie */
- utrie2_enum(&ucase_props_singleton.trie, NULL, _enumPropertyStartsRange, sa);
+ utrie2_enum(&ucase_props_singleton.trie, nullptr, _enumPropertyStartsRange, sa);
/* add code points with hardcoded properties, plus the ones following them */
@@ -77,6 +64,13 @@ ucase_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
/* data access primitives --------------------------------------------------- */
+U_CAPI const struct UCaseProps * U_EXPORT2
+ucase_getSingleton(int32_t *pExceptionsLength, int32_t *pUnfoldLength) {
+ *pExceptionsLength = UPRV_LENGTHOF(ucase_props_exceptions);
+ *pUnfoldLength = UPRV_LENGTHOF(ucase_props_unfold);
+ return &ucase_props_singleton;
+}
+
U_CFUNC const UTrie2 * U_EXPORT2
ucase_getTrie() {
return &ucase_props_singleton.trie;
@@ -201,47 +195,17 @@ ucase_totitle(UChar32 c) {
return c;
}
-static const UChar iDot[2] = { 0x69, 0x307 };
-static const UChar jDot[2] = { 0x6a, 0x307 };
-static const UChar iOgonekDot[3] = { 0x12f, 0x307 };
-static const UChar iDotGrave[3] = { 0x69, 0x307, 0x300 };
-static const UChar iDotAcute[3] = { 0x69, 0x307, 0x301 };
-static const UChar iDotTilde[3] = { 0x69, 0x307, 0x303 };
+static const char16_t iDot[2] = { 0x69, 0x307 };
+static const char16_t jDot[2] = { 0x6a, 0x307 };
+static const char16_t iOgonekDot[3] = { 0x12f, 0x307 };
+static const char16_t iDotGrave[3] = { 0x69, 0x307, 0x300 };
+static const char16_t iDotAcute[3] = { 0x69, 0x307, 0x301 };
+static const char16_t iDotTilde[3] = { 0x69, 0x307, 0x303 };
U_CFUNC void U_EXPORT2
ucase_addCaseClosure(UChar32 c, const USetAdder *sa) {
- uint16_t props;
-
- /*
- * Hardcode the case closure of i and its relatives and ignore the
- * data file data for these characters.
- * The Turkic dotless i and dotted I with their case mapping conditions
- * and case folding option make the related characters behave specially.
- * This code matches their closure behavior to their case folding behavior.
- */
-
- switch(c) {
- case 0x49:
- /* regular i and I are in one equivalence class */
- sa->add(sa->set, 0x69);
- return;
- case 0x69:
- sa->add(sa->set, 0x49);
- return;
- case 0x130:
- /* dotted I is in a class with <0069 0307> (for canonical equivalence with <0049 0307>) */
- sa->addString(sa->set, iDot, 2);
- return;
- case 0x131:
- /* dotless i is in a class by itself */
- return;
- default:
- /* otherwise use the data file data */
- break;
- }
-
- props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
+ uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
if(!UCASE_HAS_EXCEPTION(props)) {
if(UCASE_GET_TYPE(props)!=UCASE_NONE) {
/* add the one simple case mapping, no matter what type it is */
@@ -255,19 +219,42 @@ ucase_addCaseClosure(UChar32 c, const USetAdder *sa) {
* c has exceptions, so there may be multiple simple and/or
* full case mappings. Add them all.
*/
- const uint16_t *pe0, *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
- const UChar *closure;
+ const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
uint16_t excWord=*pe++;
- int32_t idx, closureLength, fullLength, length;
-
- pe0=pe;
+ const uint16_t *pe0=pe;
+
+ // Hardcode the case closure of i and its relatives and ignore the
+ // data file data for these characters.
+ // The Turkic dotless i and dotted I with their case mapping conditions
+ // and case folding option make the related characters behave specially.
+ // This code matches their closure behavior to their case folding behavior.
+ if (excWord&UCASE_EXC_CONDITIONAL_FOLD) {
+ // These characters have Turkic case foldings. Hardcode their closure.
+ if (c == 0x49) {
+ // Regular i and I are in one equivalence class.
+ sa->add(sa->set, 0x69);
+ return;
+ } else if (c == 0x130) {
+ // Dotted I is in a class with <0069 0307>
+ // (for canonical equivalence with <0049 0307>).
+ sa->addString(sa->set, iDot, 2);
+ return;
+ }
+ } else if (c == 0x69) {
+ sa->add(sa->set, 0x49);
+ return;
+ } else if (c == 0x131) {
+ // Dotless i is in a class by itself.
+ return;
+ }
/* add all simple case mappings */
- for(idx=UCASE_EXC_LOWER; idx<=UCASE_EXC_TITLE; ++idx) {
+ for(int32_t idx=UCASE_EXC_LOWER; idx<=UCASE_EXC_TITLE; ++idx) {
if(HAS_SLOT(excWord, idx)) {
pe=pe0;
- GET_SLOT_VALUE(excWord, idx, pe, c);
- sa->add(sa->set, c);
+ UChar32 mapping;
+ GET_SLOT_VALUE(excWord, idx, pe, mapping);
+ sa->add(sa->set, mapping);
}
}
if(HAS_SLOT(excWord, UCASE_EXC_DELTA)) {
@@ -278,19 +265,22 @@ ucase_addCaseClosure(UChar32 c, const USetAdder *sa) {
}
/* get the closure string pointer & length */
+ const char16_t *closure;
+ int32_t closureLength;
if(HAS_SLOT(excWord, UCASE_EXC_CLOSURE)) {
pe=pe0;
GET_SLOT_VALUE(excWord, UCASE_EXC_CLOSURE, pe, closureLength);
closureLength&=UCASE_CLOSURE_MAX_LENGTH; /* higher bits are reserved */
- closure=(const UChar *)pe+1; /* behind this slot, unless there are full case mappings */
+ closure=(const char16_t *)pe+1; /* behind this slot, unless there are full case mappings */
} else {
closureLength=0;
- closure=NULL;
+ closure=nullptr;
}
/* add the full case folding */
if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
pe=pe0;
+ int32_t fullLength;
GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, fullLength);
/* start of full case mapping strings */
@@ -303,9 +293,9 @@ ucase_addCaseClosure(UChar32 c, const USetAdder *sa) {
fullLength>>=4;
/* add the full case folding string */
- length=fullLength&0xf;
+ int32_t length=fullLength&0xf;
if(length!=0) {
- sa->addString(sa->set, (const UChar *)pe, length);
+ sa->addString(sa->set, (const char16_t *)pe, length);
pe+=length;
}
@@ -315,13 +305,150 @@ ucase_addCaseClosure(UChar32 c, const USetAdder *sa) {
fullLength>>=4;
pe+=fullLength;
- closure=(const UChar *)pe; /* behind full case mappings */
+ closure=(const char16_t *)pe; /* behind full case mappings */
}
/* add each code point in the closure string */
- for(idx=0; idx<closureLength;) {
- U16_NEXT_UNSAFE(closure, idx, c);
- sa->add(sa->set, c);
+ for(int32_t idx=0; idx<closureLength;) {
+ UChar32 mapping;
+ U16_NEXT_UNSAFE(closure, idx, mapping);
+ sa->add(sa->set, mapping);
+ }
+ }
+}
+
+namespace {
+
+/**
+ * Add the simple case closure mapping,
+ * except if there is not actually an scf relationship between the two characters.
+ * TODO: Unicode should probably add the corresponding scf mappings.
+ * See https://crbug.com/v8/13377 and Unicode-internal PAG issue #23.
+ * If & when those scf mappings are added, we should be able to remove all of these exceptions.
+ */
+void addOneSimpleCaseClosure(UChar32 c, UChar32 t, const USetAdder *sa) {
+ switch (c) {
+ case 0x0390:
+ if (t == 0x1FD3) { return; }
+ break;
+ case 0x03B0:
+ if (t == 0x1FE3) { return; }
+ break;
+ case 0x1FD3:
+ if (t == 0x0390) { return; }
+ break;
+ case 0x1FE3:
+ if (t == 0x03B0) { return; }
+ break;
+ case 0xFB05:
+ if (t == 0xFB06) { return; }
+ break;
+ case 0xFB06:
+ if (t == 0xFB05) { return; }
+ break;
+ default:
+ break;
+ }
+ sa->add(sa->set, t);
+}
+
+} // namespace
+
+U_CFUNC void U_EXPORT2
+ucase_addSimpleCaseClosure(UChar32 c, const USetAdder *sa) {
+ uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
+ if(!UCASE_HAS_EXCEPTION(props)) {
+ if(UCASE_GET_TYPE(props)!=UCASE_NONE) {
+ /* add the one simple case mapping, no matter what type it is */
+ int32_t delta=UCASE_GET_DELTA(props);
+ if(delta!=0) {
+ sa->add(sa->set, c+delta);
+ }
+ }
+ } else {
+ // c has exceptions. Add the mappings relevant for scf=Simple_Case_Folding.
+ const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
+ uint16_t excWord=*pe++;
+ const uint16_t *pe0=pe;
+
+ // Hardcode the case closure of i and its relatives and ignore the
+ // data file data for these characters, like in ucase_addCaseClosure().
+ if (excWord&UCASE_EXC_CONDITIONAL_FOLD) {
+ // These characters have Turkic case foldings. Hardcode their closure.
+ if (c == 0x49) {
+ // Regular i and I are in one equivalence class.
+ sa->add(sa->set, 0x69);
+ return;
+ } else if (c == 0x130) {
+ // For scf=Simple_Case_Folding, dotted I is in a class by itself.
+ return;
+ }
+ } else if (c == 0x69) {
+ sa->add(sa->set, 0x49);
+ return;
+ } else if (c == 0x131) {
+ // Dotless i is in a class by itself.
+ return;
+ }
+
+ // Add all simple case mappings.
+ for(int32_t idx=UCASE_EXC_LOWER; idx<=UCASE_EXC_TITLE; ++idx) {
+ if(HAS_SLOT(excWord, idx)) {
+ pe=pe0;
+ UChar32 mapping;
+ GET_SLOT_VALUE(excWord, idx, pe, mapping);
+ addOneSimpleCaseClosure(c, mapping, sa);
+ }
+ }
+ if(HAS_SLOT(excWord, UCASE_EXC_DELTA)) {
+ pe=pe0;
+ int32_t delta;
+ GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
+ UChar32 mapping = (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
+ addOneSimpleCaseClosure(c, mapping, sa);
+ }
+
+ /* get the closure string pointer & length */
+ const char16_t *closure;
+ int32_t closureLength;
+ if(HAS_SLOT(excWord, UCASE_EXC_CLOSURE)) {
+ pe=pe0;
+ GET_SLOT_VALUE(excWord, UCASE_EXC_CLOSURE, pe, closureLength);
+ closureLength&=UCASE_CLOSURE_MAX_LENGTH; /* higher bits are reserved */
+ closure=(const char16_t *)pe+1; /* behind this slot, unless there are full case mappings */
+ } else {
+ closureLength=0;
+ closure=nullptr;
+ }
+
+ // Skip the full case mappings.
+ if(closureLength > 0 && HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
+ pe=pe0;
+ int32_t fullLength;
+ GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, fullLength);
+
+ /* start of full case mapping strings */
+ ++pe;
+
+ fullLength&=0xffff; /* bits 16 and higher are reserved */
+
+ // Skip all 4 full case mappings.
+ pe+=fullLength&UCASE_FULL_LOWER;
+ fullLength>>=4;
+ pe+=fullLength&0xf;
+ fullLength>>=4;
+ pe+=fullLength&0xf;
+ fullLength>>=4;
+ pe+=fullLength;
+
+ closure=(const char16_t *)pe; /* behind full case mappings */
+ }
+
+ // Add each code point in the closure string whose scf maps back to c.
+ for(int32_t idx=0; idx<closureLength;) {
+ UChar32 mapping;
+ U16_NEXT_UNSAFE(closure, idx, mapping);
+ addOneSimpleCaseClosure(c, mapping, sa);
}
}
}
@@ -331,7 +458,7 @@ ucase_addCaseClosure(UChar32 c, const USetAdder *sa) {
* must be length>0 and max>0 and length<=max
*/
static inline int32_t
-strcmpMax(const UChar *s, int32_t length, const UChar *t, int32_t max) {
+strcmpMax(const char16_t *s, int32_t length, const char16_t *t, int32_t max) {
int32_t c1, c2;
max-=length; /* we require length<=max, so no need to decrement max in the loop */
@@ -356,11 +483,11 @@ strcmpMax(const UChar *s, int32_t length, const UChar *t, int32_t max) {
}
U_CFUNC UBool U_EXPORT2
-ucase_addStringCaseClosure(const UChar *s, int32_t length, const USetAdder *sa) {
+ucase_addStringCaseClosure(const char16_t *s, int32_t length, const USetAdder *sa) {
int32_t i, start, limit, result, unfoldRows, unfoldRowWidth, unfoldStringWidth;
- if(ucase_props_singleton.unfold==NULL || s==NULL) {
- return FALSE; /* no reverse case folding data, or no string */
+ if(ucase_props_singleton.unfold==nullptr || s==nullptr) {
+ return false; /* no reverse case folding data, or no string */
}
if(length<=1) {
/* the string is too short to find any match */
@@ -370,7 +497,7 @@ ucase_addStringCaseClosure(const UChar *s, int32_t length, const USetAdder *sa)
* but this does not make much practical difference because
* a single supplementary code point would just not be found
*/
- return FALSE;
+ return false;
}
const uint16_t *unfold=ucase_props_singleton.unfold;
@@ -381,7 +508,7 @@ ucase_addStringCaseClosure(const UChar *s, int32_t length, const USetAdder *sa)
if(length>unfoldStringWidth) {
/* the string is too long to find any match */
- return FALSE;
+ return false;
}
/* do a binary search for the string */
@@ -389,7 +516,7 @@ ucase_addStringCaseClosure(const UChar *s, int32_t length, const USetAdder *sa)
limit=unfoldRows;
while(start<limit) {
i=(start+limit)/2;
- const UChar *p=reinterpret_cast<const UChar *>(unfold+(i*unfoldRowWidth));
+ const char16_t *p=reinterpret_cast<const char16_t *>(unfold+(i*unfoldRowWidth));
result=strcmpMax(s, length, p, unfoldStringWidth);
if(result==0) {
@@ -401,7 +528,7 @@ ucase_addStringCaseClosure(const UChar *s, int32_t length, const USetAdder *sa)
sa->add(sa->set, c);
ucase_addCaseClosure(c, sa);
}
- return TRUE;
+ return true;
} else if(result<0) {
limit=i;
} else /* result>0 */ {
@@ -409,13 +536,13 @@ ucase_addStringCaseClosure(const UChar *s, int32_t length, const USetAdder *sa)
}
}
- return FALSE; /* string not found */
+ return false; /* string not found */
}
U_NAMESPACE_BEGIN
FullCaseFoldingIterator::FullCaseFoldingIterator()
- : unfold(reinterpret_cast<const UChar *>(ucase_props_singleton.unfold)),
+ : unfold(reinterpret_cast<const char16_t *>(ucase_props_singleton.unfold)),
unfoldRows(unfold[UCASE_UNFOLD_ROWS]),
unfoldRowWidth(unfold[UCASE_UNFOLD_ROW_WIDTH]),
unfoldStringWidth(unfold[UCASE_UNFOLD_STRING_WIDTH]),
@@ -427,7 +554,7 @@ FullCaseFoldingIterator::FullCaseFoldingIterator()
UChar32
FullCaseFoldingIterator::next(UnicodeString &full) {
// Advance past the last-delivered code point.
- const UChar *p=unfold+(currentRow*unfoldRowWidth);
+ const char16_t *p=unfold+(currentRow*unfoldRowWidth);
if(rowCpIndex>=unfoldRowWidth || p[rowCpIndex]==0) {
++currentRow;
p+=unfoldRowWidth;
@@ -437,7 +564,7 @@ FullCaseFoldingIterator::next(UnicodeString &full) {
// Set "full" to the NUL-terminated string in the first unfold column.
int32_t length=unfoldStringWidth;
while(length>0 && p[length-1]==0) { --length; }
- full.setTo(FALSE, p, length);
+ full.setTo(false, p, length);
// Return the code point.
UChar32 c;
U16_NEXT_UNSAFE(p, rowCpIndex, c);
@@ -690,7 +817,7 @@ ucase_isCaseSensitive(UChar32 c) {
* - The general category of C is
* Nonspacing Mark (Mn), or Enclosing Mark (Me), or Format Control (Cf), or
* Letter Modifier (Lm), or Symbol Modifier (Sk)
- * - C is one of the following characters
+ * - C is one of the following characters
* U+0027 APOSTROPHE
* U+00AD SOFT HYPHEN (SHY)
* U+2019 RIGHT SINGLE QUOTATION MARK
@@ -714,7 +841,7 @@ ucase_isCaseSensitive(UChar32 c) {
#define is_sep(c) ((c)=='_' || (c)=='-' || (c)==0)
/**
- * Requires non-NULL locale ID but otherwise does the equivalent of
+ * Requires non-nullptr locale ID but otherwise does the equivalent of
* checking for language codes as if uloc_getLanguage() were called:
* Accepts both 2- and 3-letter codes and accepts case variants.
*/
@@ -727,7 +854,7 @@ ucase_getCaseLocale(const char *locale) {
* examined and copied/transformed.
*
* Because this code does not want to depend on uloc, the caller must
- * pass in a non-NULL locale, i.e., may need to call uloc_getDefault().
+ * pass in a non-nullptr locale, i.e., may need to call uloc_getDefault().
*/
char c=*locale++;
// Fastpath for English "en" which is often used for default (=root locale) case mappings,
@@ -910,8 +1037,8 @@ static UBool
isFollowedByCasedLetter(UCaseContextIterator *iter, void *context, int8_t dir) {
UChar32 c;
- if(iter==NULL) {
- return FALSE;
+ if(iter==nullptr) {
+ return false;
}
for(/* dir!=0 sets direction */; (c=iter(context, dir))>=0; dir=0) {
@@ -919,13 +1046,13 @@ isFollowedByCasedLetter(UCaseContextIterator *iter, void *context, int8_t dir) {
if(type&4) {
/* case-ignorable, continue with the loop */
} else if(type!=UCASE_NONE) {
- return TRUE; /* followed by cased letter */
+ return true; /* followed by cased letter */
} else {
- return FALSE; /* uncased and not case-ignorable */
+ return false; /* uncased and not case-ignorable */
}
}
- return FALSE; /* not followed by cased letter */
+ return false; /* not followed by cased letter */
}
/* Is preceded by Soft_Dotted character with no intervening cc=230 ? */
@@ -935,20 +1062,20 @@ isPrecededBySoftDotted(UCaseContextIterator *iter, void *context) {
int32_t dotType;
int8_t dir;
- if(iter==NULL) {
- return FALSE;
+ if(iter==nullptr) {
+ return false;
}
for(dir=-1; (c=iter(context, dir))>=0; dir=0) {
dotType=getDotType(c);
if(dotType==UCASE_SOFT_DOTTED) {
- return TRUE; /* preceded by TYPE_i */
+ return true; /* preceded by TYPE_i */
} else if(dotType!=UCASE_OTHER_ACCENT) {
- return FALSE; /* preceded by different base character (not TYPE_i), or intervening cc==230 */
+ return false; /* preceded by different base character (not TYPE_i), or intervening cc==230 */
}
}
- return FALSE; /* not preceded by TYPE_i */
+ return false; /* not preceded by TYPE_i */
}
/*
@@ -992,21 +1119,21 @@ isPrecededBy_I(UCaseContextIterator *iter, void *context) {
int32_t dotType;
int8_t dir;
- if(iter==NULL) {
- return FALSE;
+ if(iter==nullptr) {
+ return false;
}
for(dir=-1; (c=iter(context, dir))>=0; dir=0) {
if(c==0x49) {
- return TRUE; /* preceded by I */
+ return true; /* preceded by I */
}
dotType=getDotType(c);
if(dotType!=UCASE_OTHER_ACCENT) {
- return FALSE; /* preceded by different base character (not I), or intervening cc==230 */
+ return false; /* preceded by different base character (not I), or intervening cc==230 */
}
}
- return FALSE; /* not preceded by I */
+ return false; /* not preceded by I */
}
/* Is followed by one or more cc==230 ? */
@@ -1016,20 +1143,20 @@ isFollowedByMoreAbove(UCaseContextIterator *iter, void *context) {
int32_t dotType;
int8_t dir;
- if(iter==NULL) {
- return FALSE;
+ if(iter==nullptr) {
+ return false;
}
for(dir=1; (c=iter(context, dir))>=0; dir=0) {
dotType=getDotType(c);
if(dotType==UCASE_ABOVE) {
- return TRUE; /* at least one cc==230 following */
+ return true; /* at least one cc==230 following */
} else if(dotType!=UCASE_OTHER_ACCENT) {
- return FALSE; /* next base character, no more cc==230 following */
+ return false; /* next base character, no more cc==230 following */
}
}
- return FALSE; /* no more cc==230 following */
+ return false; /* no more cc==230 following */
}
/* Is followed by a dot above (without cc==230 in between) ? */
@@ -1039,31 +1166,33 @@ isFollowedByDotAbove(UCaseContextIterator *iter, void *context) {
int32_t dotType;
int8_t dir;
- if(iter==NULL) {
- return FALSE;
+ if(iter==nullptr) {
+ return false;
}
for(dir=1; (c=iter(context, dir))>=0; dir=0) {
if(c==0x307) {
- return TRUE;
+ return true;
}
dotType=getDotType(c);
if(dotType!=UCASE_OTHER_ACCENT) {
- return FALSE; /* next base character or cc==230 in between */
+ return false; /* next base character or cc==230 in between */
}
}
- return FALSE; /* no dot above following */
+ return false; /* no dot above following */
}
U_CAPI int32_t U_EXPORT2
ucase_toFullLower(UChar32 c,
UCaseContextIterator *iter, void *context,
- const UChar **pString,
+ const char16_t **pString,
int32_t loc) {
// The sign of the result has meaning, input must be non-negative so that it can be returned as is.
U_ASSERT(c >= 0);
UChar32 result=c;
+ // Reset the output pointer in case it was uninitialized.
+ *pString=nullptr;
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
if(!UCASE_HAS_EXCEPTION(props)) {
if(UCASE_IS_UPPER_OR_TITLE(props)) {
@@ -1148,7 +1277,6 @@ ucase_toFullLower(UChar32 c,
0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
*/
- *pString=nullptr;
return 0; /* remove the dot (continue without output) */
} else if(loc==UCASE_LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter, context)) {
/*
@@ -1185,7 +1313,7 @@ ucase_toFullLower(UChar32 c,
full&=UCASE_FULL_LOWER;
if(full!=0) {
/* set the output pointer to the lowercase mapping */
- *pString=reinterpret_cast<const UChar *>(pe+1);
+ *pString=reinterpret_cast<const char16_t *>(pe+1);
/* return the string length */
return full;
@@ -1209,12 +1337,14 @@ ucase_toFullLower(UChar32 c,
static int32_t
toUpperOrTitle(UChar32 c,
UCaseContextIterator *iter, void *context,
- const UChar **pString,
+ const char16_t **pString,
int32_t loc,
UBool upperNotTitle) {
// The sign of the result has meaning, input must be non-negative so that it can be returned as is.
U_ASSERT(c >= 0);
UChar32 result=c;
+ // Reset the output pointer in case it was uninitialized.
+ *pString=nullptr;
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
if(!UCASE_HAS_EXCEPTION(props)) {
if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
@@ -1252,7 +1382,6 @@ toUpperOrTitle(UChar32 c,
0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
*/
- *pString=nullptr;
return 0; /* remove the dot (continue without output) */
} else if(c==0x0587) {
// See ICU-13416:
@@ -1290,7 +1419,7 @@ toUpperOrTitle(UChar32 c,
if(full!=0) {
/* set the output pointer to the result string */
- *pString=reinterpret_cast<const UChar *>(pe);
+ *pString=reinterpret_cast<const char16_t *>(pe);
/* return the string length */
return full;
@@ -1319,17 +1448,17 @@ toUpperOrTitle(UChar32 c,
U_CAPI int32_t U_EXPORT2
ucase_toFullUpper(UChar32 c,
UCaseContextIterator *iter, void *context,
- const UChar **pString,
+ const char16_t **pString,
int32_t caseLocale) {
- return toUpperOrTitle(c, iter, context, pString, caseLocale, TRUE);
+ return toUpperOrTitle(c, iter, context, pString, caseLocale, true);
}
U_CAPI int32_t U_EXPORT2
ucase_toFullTitle(UChar32 c,
UCaseContextIterator *iter, void *context,
- const UChar **pString,
+ const char16_t **pString,
int32_t caseLocale) {
- return toUpperOrTitle(c, iter, context, pString, caseLocale, FALSE);
+ return toUpperOrTitle(c, iter, context, pString, caseLocale, false);
}
/* case folding ------------------------------------------------------------- */
@@ -1444,11 +1573,13 @@ ucase_fold(UChar32 c, uint32_t options) {
U_CAPI int32_t U_EXPORT2
ucase_toFullFolding(UChar32 c,
- const UChar **pString,
+ const char16_t **pString,
uint32_t options) {
// The sign of the result has meaning, input must be non-negative so that it can be returned as is.
U_ASSERT(c >= 0);
UChar32 result=c;
+ // Reset the output pointer in case it was uninitialized.
+ *pString=nullptr;
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
if(!UCASE_HAS_EXCEPTION(props)) {
if(UCASE_IS_UPPER_OR_TITLE(props)) {
@@ -1495,7 +1626,7 @@ ucase_toFullFolding(UChar32 c,
if(full!=0) {
/* set the output pointer to the result string */
- *pString=reinterpret_cast<const UChar *>(pe);
+ *pString=reinterpret_cast<const char16_t *>(pe);
/* return the string length */
return full;
@@ -1542,7 +1673,7 @@ U_CAPI UChar32 U_EXPORT2
u_tolower(UChar32 c) {
return ucase_tolower(c);
}
-
+
/* Transforms the Unicode character to its upper case equivalent.*/
U_CAPI UChar32 U_EXPORT2
u_toupper(UChar32 c) {
@@ -1564,7 +1695,7 @@ u_foldCase(UChar32 c, uint32_t options) {
U_CFUNC int32_t U_EXPORT2
ucase_hasBinaryProperty(UChar32 c, UProperty which) {
/* case mapping properties */
- const UChar *resultString;
+ const char16_t *resultString;
switch(which) {
case UCHAR_LOWERCASE:
return (UBool)(UCASE_LOWER==ucase_getType(c));
@@ -1591,18 +1722,18 @@ ucase_hasBinaryProperty(UChar32 c, UProperty which) {
* start sets for normalization and case mappings.
*/
case UCHAR_CHANGES_WHEN_LOWERCASED:
- return (UBool)(ucase_toFullLower(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
+ return (UBool)(ucase_toFullLower(c, nullptr, nullptr, &resultString, UCASE_LOC_ROOT)>=0);
case UCHAR_CHANGES_WHEN_UPPERCASED:
- return (UBool)(ucase_toFullUpper(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
+ return (UBool)(ucase_toFullUpper(c, nullptr, nullptr, &resultString, UCASE_LOC_ROOT)>=0);
case UCHAR_CHANGES_WHEN_TITLECASED:
- return (UBool)(ucase_toFullTitle(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
+ return (UBool)(ucase_toFullTitle(c, nullptr, nullptr, &resultString, UCASE_LOC_ROOT)>=0);
/* case UCHAR_CHANGES_WHEN_CASEFOLDED: -- in uprops.c */
case UCHAR_CHANGES_WHEN_CASEMAPPED:
return (UBool)(
- ucase_toFullLower(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0 ||
- ucase_toFullUpper(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0 ||
- ucase_toFullTitle(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
+ ucase_toFullLower(c, nullptr, nullptr, &resultString, UCASE_LOC_ROOT)>=0 ||
+ ucase_toFullUpper(c, nullptr, nullptr, &resultString, UCASE_LOC_ROOT)>=0 ||
+ ucase_toFullTitle(c, nullptr, nullptr, &resultString, UCASE_LOC_ROOT)>=0);
default:
- return FALSE;
+ return false;
}
}