diff options
author | romankoshelev <romankoshelev@yandex-team.com> | 2023-08-14 19:51:50 +0300 |
---|---|---|
committer | romankoshelev <romankoshelev@yandex-team.com> | 2023-08-15 01:24:11 +0300 |
commit | cfcd865e05c0d0525ea27d1e153a043b32a85138 (patch) | |
tree | 68d3b3b25271e8a4998505897a269ff7ce119b76 /contrib/libs/icu/common/ucase.cpp | |
parent | ccb790c507bd5e8ffe2ef9886ce5ee0a7ce22a15 (diff) | |
download | ydb-cfcd865e05c0d0525ea27d1e153a043b32a85138.tar.gz |
Update ICU to 73.2
Diffstat (limited to 'contrib/libs/icu/common/ucase.cpp')
-rw-r--r-- | contrib/libs/icu/common/ucase.cpp | 397 |
1 files changed, 264 insertions, 133 deletions
diff --git a/contrib/libs/icu/common/ucase.cpp b/contrib/libs/icu/common/ucase.cpp index 4aa856507a..392e1266ae 100644 --- a/contrib/libs/icu/common/ucase.cpp +++ b/contrib/libs/icu/common/ucase.cpp @@ -22,27 +22,14 @@ #include "unicode/utypes.h" #include "unicode/unistr.h" #include "unicode/uset.h" -#include "unicode/udata.h" /* UDataInfo */ #include "unicode/utf16.h" -#include "ucmndata.h" /* DataHeader */ -#include "udatamem.h" -#include "umutex.h" -#include "uassert.h" #include "cmemory.h" -#include "utrie2.h" +#include "uassert.h" #include "ucase.h" +#include "umutex.h" +#include "utrie2.h" -struct UCaseProps { - UDataMemory *mem; - const int32_t *indexes; - const uint16_t *exceptions; - const uint16_t *unfold; - - UTrie2 trie; - uint8_t formatVersion[4]; -}; - -/* ucase_props_data.h is machine-generated by gencase --csource */ +/* ucase_props_data.h is machine-generated by genprops/casepropsbuilder.cpp */ #define INCLUDED_FROM_UCASE_CPP #include "ucase_props_data.h" @@ -53,7 +40,7 @@ _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 /*end*/, ui /* add the start code point to the USet */ const USetAdder *sa=(const USetAdder *)context; sa->add(sa->set, start); - return TRUE; + return true; } U_CFUNC void U_EXPORT2 @@ -63,7 +50,7 @@ ucase_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) { } /* add the start code point of each same-value range of the trie */ - utrie2_enum(&ucase_props_singleton.trie, NULL, _enumPropertyStartsRange, sa); + utrie2_enum(&ucase_props_singleton.trie, nullptr, _enumPropertyStartsRange, sa); /* add code points with hardcoded properties, plus the ones following them */ @@ -77,6 +64,13 @@ ucase_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) { /* data access primitives --------------------------------------------------- */ +U_CAPI const struct UCaseProps * U_EXPORT2 +ucase_getSingleton(int32_t *pExceptionsLength, int32_t *pUnfoldLength) { + *pExceptionsLength = UPRV_LENGTHOF(ucase_props_exceptions); + *pUnfoldLength = UPRV_LENGTHOF(ucase_props_unfold); + return &ucase_props_singleton; +} + U_CFUNC const UTrie2 * U_EXPORT2 ucase_getTrie() { return &ucase_props_singleton.trie; @@ -201,47 +195,17 @@ ucase_totitle(UChar32 c) { return c; } -static const UChar iDot[2] = { 0x69, 0x307 }; -static const UChar jDot[2] = { 0x6a, 0x307 }; -static const UChar iOgonekDot[3] = { 0x12f, 0x307 }; -static const UChar iDotGrave[3] = { 0x69, 0x307, 0x300 }; -static const UChar iDotAcute[3] = { 0x69, 0x307, 0x301 }; -static const UChar iDotTilde[3] = { 0x69, 0x307, 0x303 }; +static const char16_t iDot[2] = { 0x69, 0x307 }; +static const char16_t jDot[2] = { 0x6a, 0x307 }; +static const char16_t iOgonekDot[3] = { 0x12f, 0x307 }; +static const char16_t iDotGrave[3] = { 0x69, 0x307, 0x300 }; +static const char16_t iDotAcute[3] = { 0x69, 0x307, 0x301 }; +static const char16_t iDotTilde[3] = { 0x69, 0x307, 0x303 }; U_CFUNC void U_EXPORT2 ucase_addCaseClosure(UChar32 c, const USetAdder *sa) { - uint16_t props; - - /* - * Hardcode the case closure of i and its relatives and ignore the - * data file data for these characters. - * The Turkic dotless i and dotted I with their case mapping conditions - * and case folding option make the related characters behave specially. - * This code matches their closure behavior to their case folding behavior. - */ - - switch(c) { - case 0x49: - /* regular i and I are in one equivalence class */ - sa->add(sa->set, 0x69); - return; - case 0x69: - sa->add(sa->set, 0x49); - return; - case 0x130: - /* dotted I is in a class with <0069 0307> (for canonical equivalence with <0049 0307>) */ - sa->addString(sa->set, iDot, 2); - return; - case 0x131: - /* dotless i is in a class by itself */ - return; - default: - /* otherwise use the data file data */ - break; - } - - props=UTRIE2_GET16(&ucase_props_singleton.trie, c); + uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); if(!UCASE_HAS_EXCEPTION(props)) { if(UCASE_GET_TYPE(props)!=UCASE_NONE) { /* add the one simple case mapping, no matter what type it is */ @@ -255,19 +219,42 @@ ucase_addCaseClosure(UChar32 c, const USetAdder *sa) { * c has exceptions, so there may be multiple simple and/or * full case mappings. Add them all. */ - const uint16_t *pe0, *pe=GET_EXCEPTIONS(&ucase_props_singleton, props); - const UChar *closure; + const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props); uint16_t excWord=*pe++; - int32_t idx, closureLength, fullLength, length; - - pe0=pe; + const uint16_t *pe0=pe; + + // Hardcode the case closure of i and its relatives and ignore the + // data file data for these characters. + // The Turkic dotless i and dotted I with their case mapping conditions + // and case folding option make the related characters behave specially. + // This code matches their closure behavior to their case folding behavior. + if (excWord&UCASE_EXC_CONDITIONAL_FOLD) { + // These characters have Turkic case foldings. Hardcode their closure. + if (c == 0x49) { + // Regular i and I are in one equivalence class. + sa->add(sa->set, 0x69); + return; + } else if (c == 0x130) { + // Dotted I is in a class with <0069 0307> + // (for canonical equivalence with <0049 0307>). + sa->addString(sa->set, iDot, 2); + return; + } + } else if (c == 0x69) { + sa->add(sa->set, 0x49); + return; + } else if (c == 0x131) { + // Dotless i is in a class by itself. + return; + } /* add all simple case mappings */ - for(idx=UCASE_EXC_LOWER; idx<=UCASE_EXC_TITLE; ++idx) { + for(int32_t idx=UCASE_EXC_LOWER; idx<=UCASE_EXC_TITLE; ++idx) { if(HAS_SLOT(excWord, idx)) { pe=pe0; - GET_SLOT_VALUE(excWord, idx, pe, c); - sa->add(sa->set, c); + UChar32 mapping; + GET_SLOT_VALUE(excWord, idx, pe, mapping); + sa->add(sa->set, mapping); } } if(HAS_SLOT(excWord, UCASE_EXC_DELTA)) { @@ -278,19 +265,22 @@ ucase_addCaseClosure(UChar32 c, const USetAdder *sa) { } /* get the closure string pointer & length */ + const char16_t *closure; + int32_t closureLength; if(HAS_SLOT(excWord, UCASE_EXC_CLOSURE)) { pe=pe0; GET_SLOT_VALUE(excWord, UCASE_EXC_CLOSURE, pe, closureLength); closureLength&=UCASE_CLOSURE_MAX_LENGTH; /* higher bits are reserved */ - closure=(const UChar *)pe+1; /* behind this slot, unless there are full case mappings */ + closure=(const char16_t *)pe+1; /* behind this slot, unless there are full case mappings */ } else { closureLength=0; - closure=NULL; + closure=nullptr; } /* add the full case folding */ if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) { pe=pe0; + int32_t fullLength; GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, fullLength); /* start of full case mapping strings */ @@ -303,9 +293,9 @@ ucase_addCaseClosure(UChar32 c, const USetAdder *sa) { fullLength>>=4; /* add the full case folding string */ - length=fullLength&0xf; + int32_t length=fullLength&0xf; if(length!=0) { - sa->addString(sa->set, (const UChar *)pe, length); + sa->addString(sa->set, (const char16_t *)pe, length); pe+=length; } @@ -315,13 +305,150 @@ ucase_addCaseClosure(UChar32 c, const USetAdder *sa) { fullLength>>=4; pe+=fullLength; - closure=(const UChar *)pe; /* behind full case mappings */ + closure=(const char16_t *)pe; /* behind full case mappings */ } /* add each code point in the closure string */ - for(idx=0; idx<closureLength;) { - U16_NEXT_UNSAFE(closure, idx, c); - sa->add(sa->set, c); + for(int32_t idx=0; idx<closureLength;) { + UChar32 mapping; + U16_NEXT_UNSAFE(closure, idx, mapping); + sa->add(sa->set, mapping); + } + } +} + +namespace { + +/** + * Add the simple case closure mapping, + * except if there is not actually an scf relationship between the two characters. + * TODO: Unicode should probably add the corresponding scf mappings. + * See https://crbug.com/v8/13377 and Unicode-internal PAG issue #23. + * If & when those scf mappings are added, we should be able to remove all of these exceptions. + */ +void addOneSimpleCaseClosure(UChar32 c, UChar32 t, const USetAdder *sa) { + switch (c) { + case 0x0390: + if (t == 0x1FD3) { return; } + break; + case 0x03B0: + if (t == 0x1FE3) { return; } + break; + case 0x1FD3: + if (t == 0x0390) { return; } + break; + case 0x1FE3: + if (t == 0x03B0) { return; } + break; + case 0xFB05: + if (t == 0xFB06) { return; } + break; + case 0xFB06: + if (t == 0xFB05) { return; } + break; + default: + break; + } + sa->add(sa->set, t); +} + +} // namespace + +U_CFUNC void U_EXPORT2 +ucase_addSimpleCaseClosure(UChar32 c, const USetAdder *sa) { + uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); + if(!UCASE_HAS_EXCEPTION(props)) { + if(UCASE_GET_TYPE(props)!=UCASE_NONE) { + /* add the one simple case mapping, no matter what type it is */ + int32_t delta=UCASE_GET_DELTA(props); + if(delta!=0) { + sa->add(sa->set, c+delta); + } + } + } else { + // c has exceptions. Add the mappings relevant for scf=Simple_Case_Folding. + const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props); + uint16_t excWord=*pe++; + const uint16_t *pe0=pe; + + // Hardcode the case closure of i and its relatives and ignore the + // data file data for these characters, like in ucase_addCaseClosure(). + if (excWord&UCASE_EXC_CONDITIONAL_FOLD) { + // These characters have Turkic case foldings. Hardcode their closure. + if (c == 0x49) { + // Regular i and I are in one equivalence class. + sa->add(sa->set, 0x69); + return; + } else if (c == 0x130) { + // For scf=Simple_Case_Folding, dotted I is in a class by itself. + return; + } + } else if (c == 0x69) { + sa->add(sa->set, 0x49); + return; + } else if (c == 0x131) { + // Dotless i is in a class by itself. + return; + } + + // Add all simple case mappings. + for(int32_t idx=UCASE_EXC_LOWER; idx<=UCASE_EXC_TITLE; ++idx) { + if(HAS_SLOT(excWord, idx)) { + pe=pe0; + UChar32 mapping; + GET_SLOT_VALUE(excWord, idx, pe, mapping); + addOneSimpleCaseClosure(c, mapping, sa); + } + } + if(HAS_SLOT(excWord, UCASE_EXC_DELTA)) { + pe=pe0; + int32_t delta; + GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta); + UChar32 mapping = (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta; + addOneSimpleCaseClosure(c, mapping, sa); + } + + /* get the closure string pointer & length */ + const char16_t *closure; + int32_t closureLength; + if(HAS_SLOT(excWord, UCASE_EXC_CLOSURE)) { + pe=pe0; + GET_SLOT_VALUE(excWord, UCASE_EXC_CLOSURE, pe, closureLength); + closureLength&=UCASE_CLOSURE_MAX_LENGTH; /* higher bits are reserved */ + closure=(const char16_t *)pe+1; /* behind this slot, unless there are full case mappings */ + } else { + closureLength=0; + closure=nullptr; + } + + // Skip the full case mappings. + if(closureLength > 0 && HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) { + pe=pe0; + int32_t fullLength; + GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, fullLength); + + /* start of full case mapping strings */ + ++pe; + + fullLength&=0xffff; /* bits 16 and higher are reserved */ + + // Skip all 4 full case mappings. + pe+=fullLength&UCASE_FULL_LOWER; + fullLength>>=4; + pe+=fullLength&0xf; + fullLength>>=4; + pe+=fullLength&0xf; + fullLength>>=4; + pe+=fullLength; + + closure=(const char16_t *)pe; /* behind full case mappings */ + } + + // Add each code point in the closure string whose scf maps back to c. + for(int32_t idx=0; idx<closureLength;) { + UChar32 mapping; + U16_NEXT_UNSAFE(closure, idx, mapping); + addOneSimpleCaseClosure(c, mapping, sa); } } } @@ -331,7 +458,7 @@ ucase_addCaseClosure(UChar32 c, const USetAdder *sa) { * must be length>0 and max>0 and length<=max */ static inline int32_t -strcmpMax(const UChar *s, int32_t length, const UChar *t, int32_t max) { +strcmpMax(const char16_t *s, int32_t length, const char16_t *t, int32_t max) { int32_t c1, c2; max-=length; /* we require length<=max, so no need to decrement max in the loop */ @@ -356,11 +483,11 @@ strcmpMax(const UChar *s, int32_t length, const UChar *t, int32_t max) { } U_CFUNC UBool U_EXPORT2 -ucase_addStringCaseClosure(const UChar *s, int32_t length, const USetAdder *sa) { +ucase_addStringCaseClosure(const char16_t *s, int32_t length, const USetAdder *sa) { int32_t i, start, limit, result, unfoldRows, unfoldRowWidth, unfoldStringWidth; - if(ucase_props_singleton.unfold==NULL || s==NULL) { - return FALSE; /* no reverse case folding data, or no string */ + if(ucase_props_singleton.unfold==nullptr || s==nullptr) { + return false; /* no reverse case folding data, or no string */ } if(length<=1) { /* the string is too short to find any match */ @@ -370,7 +497,7 @@ ucase_addStringCaseClosure(const UChar *s, int32_t length, const USetAdder *sa) * but this does not make much practical difference because * a single supplementary code point would just not be found */ - return FALSE; + return false; } const uint16_t *unfold=ucase_props_singleton.unfold; @@ -381,7 +508,7 @@ ucase_addStringCaseClosure(const UChar *s, int32_t length, const USetAdder *sa) if(length>unfoldStringWidth) { /* the string is too long to find any match */ - return FALSE; + return false; } /* do a binary search for the string */ @@ -389,7 +516,7 @@ ucase_addStringCaseClosure(const UChar *s, int32_t length, const USetAdder *sa) limit=unfoldRows; while(start<limit) { i=(start+limit)/2; - const UChar *p=reinterpret_cast<const UChar *>(unfold+(i*unfoldRowWidth)); + const char16_t *p=reinterpret_cast<const char16_t *>(unfold+(i*unfoldRowWidth)); result=strcmpMax(s, length, p, unfoldStringWidth); if(result==0) { @@ -401,7 +528,7 @@ ucase_addStringCaseClosure(const UChar *s, int32_t length, const USetAdder *sa) sa->add(sa->set, c); ucase_addCaseClosure(c, sa); } - return TRUE; + return true; } else if(result<0) { limit=i; } else /* result>0 */ { @@ -409,13 +536,13 @@ ucase_addStringCaseClosure(const UChar *s, int32_t length, const USetAdder *sa) } } - return FALSE; /* string not found */ + return false; /* string not found */ } U_NAMESPACE_BEGIN FullCaseFoldingIterator::FullCaseFoldingIterator() - : unfold(reinterpret_cast<const UChar *>(ucase_props_singleton.unfold)), + : unfold(reinterpret_cast<const char16_t *>(ucase_props_singleton.unfold)), unfoldRows(unfold[UCASE_UNFOLD_ROWS]), unfoldRowWidth(unfold[UCASE_UNFOLD_ROW_WIDTH]), unfoldStringWidth(unfold[UCASE_UNFOLD_STRING_WIDTH]), @@ -427,7 +554,7 @@ FullCaseFoldingIterator::FullCaseFoldingIterator() UChar32 FullCaseFoldingIterator::next(UnicodeString &full) { // Advance past the last-delivered code point. - const UChar *p=unfold+(currentRow*unfoldRowWidth); + const char16_t *p=unfold+(currentRow*unfoldRowWidth); if(rowCpIndex>=unfoldRowWidth || p[rowCpIndex]==0) { ++currentRow; p+=unfoldRowWidth; @@ -437,7 +564,7 @@ FullCaseFoldingIterator::next(UnicodeString &full) { // Set "full" to the NUL-terminated string in the first unfold column. int32_t length=unfoldStringWidth; while(length>0 && p[length-1]==0) { --length; } - full.setTo(FALSE, p, length); + full.setTo(false, p, length); // Return the code point. UChar32 c; U16_NEXT_UNSAFE(p, rowCpIndex, c); @@ -690,7 +817,7 @@ ucase_isCaseSensitive(UChar32 c) { * - The general category of C is * Nonspacing Mark (Mn), or Enclosing Mark (Me), or Format Control (Cf), or * Letter Modifier (Lm), or Symbol Modifier (Sk) - * - C is one of the following characters + * - C is one of the following characters * U+0027 APOSTROPHE * U+00AD SOFT HYPHEN (SHY) * U+2019 RIGHT SINGLE QUOTATION MARK @@ -714,7 +841,7 @@ ucase_isCaseSensitive(UChar32 c) { #define is_sep(c) ((c)=='_' || (c)=='-' || (c)==0) /** - * Requires non-NULL locale ID but otherwise does the equivalent of + * Requires non-nullptr locale ID but otherwise does the equivalent of * checking for language codes as if uloc_getLanguage() were called: * Accepts both 2- and 3-letter codes and accepts case variants. */ @@ -727,7 +854,7 @@ ucase_getCaseLocale(const char *locale) { * examined and copied/transformed. * * Because this code does not want to depend on uloc, the caller must - * pass in a non-NULL locale, i.e., may need to call uloc_getDefault(). + * pass in a non-nullptr locale, i.e., may need to call uloc_getDefault(). */ char c=*locale++; // Fastpath for English "en" which is often used for default (=root locale) case mappings, @@ -910,8 +1037,8 @@ static UBool isFollowedByCasedLetter(UCaseContextIterator *iter, void *context, int8_t dir) { UChar32 c; - if(iter==NULL) { - return FALSE; + if(iter==nullptr) { + return false; } for(/* dir!=0 sets direction */; (c=iter(context, dir))>=0; dir=0) { @@ -919,13 +1046,13 @@ isFollowedByCasedLetter(UCaseContextIterator *iter, void *context, int8_t dir) { if(type&4) { /* case-ignorable, continue with the loop */ } else if(type!=UCASE_NONE) { - return TRUE; /* followed by cased letter */ + return true; /* followed by cased letter */ } else { - return FALSE; /* uncased and not case-ignorable */ + return false; /* uncased and not case-ignorable */ } } - return FALSE; /* not followed by cased letter */ + return false; /* not followed by cased letter */ } /* Is preceded by Soft_Dotted character with no intervening cc=230 ? */ @@ -935,20 +1062,20 @@ isPrecededBySoftDotted(UCaseContextIterator *iter, void *context) { int32_t dotType; int8_t dir; - if(iter==NULL) { - return FALSE; + if(iter==nullptr) { + return false; } for(dir=-1; (c=iter(context, dir))>=0; dir=0) { dotType=getDotType(c); if(dotType==UCASE_SOFT_DOTTED) { - return TRUE; /* preceded by TYPE_i */ + return true; /* preceded by TYPE_i */ } else if(dotType!=UCASE_OTHER_ACCENT) { - return FALSE; /* preceded by different base character (not TYPE_i), or intervening cc==230 */ + return false; /* preceded by different base character (not TYPE_i), or intervening cc==230 */ } } - return FALSE; /* not preceded by TYPE_i */ + return false; /* not preceded by TYPE_i */ } /* @@ -992,21 +1119,21 @@ isPrecededBy_I(UCaseContextIterator *iter, void *context) { int32_t dotType; int8_t dir; - if(iter==NULL) { - return FALSE; + if(iter==nullptr) { + return false; } for(dir=-1; (c=iter(context, dir))>=0; dir=0) { if(c==0x49) { - return TRUE; /* preceded by I */ + return true; /* preceded by I */ } dotType=getDotType(c); if(dotType!=UCASE_OTHER_ACCENT) { - return FALSE; /* preceded by different base character (not I), or intervening cc==230 */ + return false; /* preceded by different base character (not I), or intervening cc==230 */ } } - return FALSE; /* not preceded by I */ + return false; /* not preceded by I */ } /* Is followed by one or more cc==230 ? */ @@ -1016,20 +1143,20 @@ isFollowedByMoreAbove(UCaseContextIterator *iter, void *context) { int32_t dotType; int8_t dir; - if(iter==NULL) { - return FALSE; + if(iter==nullptr) { + return false; } for(dir=1; (c=iter(context, dir))>=0; dir=0) { dotType=getDotType(c); if(dotType==UCASE_ABOVE) { - return TRUE; /* at least one cc==230 following */ + return true; /* at least one cc==230 following */ } else if(dotType!=UCASE_OTHER_ACCENT) { - return FALSE; /* next base character, no more cc==230 following */ + return false; /* next base character, no more cc==230 following */ } } - return FALSE; /* no more cc==230 following */ + return false; /* no more cc==230 following */ } /* Is followed by a dot above (without cc==230 in between) ? */ @@ -1039,31 +1166,33 @@ isFollowedByDotAbove(UCaseContextIterator *iter, void *context) { int32_t dotType; int8_t dir; - if(iter==NULL) { - return FALSE; + if(iter==nullptr) { + return false; } for(dir=1; (c=iter(context, dir))>=0; dir=0) { if(c==0x307) { - return TRUE; + return true; } dotType=getDotType(c); if(dotType!=UCASE_OTHER_ACCENT) { - return FALSE; /* next base character or cc==230 in between */ + return false; /* next base character or cc==230 in between */ } } - return FALSE; /* no dot above following */ + return false; /* no dot above following */ } U_CAPI int32_t U_EXPORT2 ucase_toFullLower(UChar32 c, UCaseContextIterator *iter, void *context, - const UChar **pString, + const char16_t **pString, int32_t loc) { // The sign of the result has meaning, input must be non-negative so that it can be returned as is. U_ASSERT(c >= 0); UChar32 result=c; + // Reset the output pointer in case it was uninitialized. + *pString=nullptr; uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); if(!UCASE_HAS_EXCEPTION(props)) { if(UCASE_IS_UPPER_OR_TITLE(props)) { @@ -1148,7 +1277,6 @@ ucase_toFullLower(UChar32 c, 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE */ - *pString=nullptr; return 0; /* remove the dot (continue without output) */ } else if(loc==UCASE_LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter, context)) { /* @@ -1185,7 +1313,7 @@ ucase_toFullLower(UChar32 c, full&=UCASE_FULL_LOWER; if(full!=0) { /* set the output pointer to the lowercase mapping */ - *pString=reinterpret_cast<const UChar *>(pe+1); + *pString=reinterpret_cast<const char16_t *>(pe+1); /* return the string length */ return full; @@ -1209,12 +1337,14 @@ ucase_toFullLower(UChar32 c, static int32_t toUpperOrTitle(UChar32 c, UCaseContextIterator *iter, void *context, - const UChar **pString, + const char16_t **pString, int32_t loc, UBool upperNotTitle) { // The sign of the result has meaning, input must be non-negative so that it can be returned as is. U_ASSERT(c >= 0); UChar32 result=c; + // Reset the output pointer in case it was uninitialized. + *pString=nullptr; uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); if(!UCASE_HAS_EXCEPTION(props)) { if(UCASE_GET_TYPE(props)==UCASE_LOWER) { @@ -1252,7 +1382,6 @@ toUpperOrTitle(UChar32 c, 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE */ - *pString=nullptr; return 0; /* remove the dot (continue without output) */ } else if(c==0x0587) { // See ICU-13416: @@ -1290,7 +1419,7 @@ toUpperOrTitle(UChar32 c, if(full!=0) { /* set the output pointer to the result string */ - *pString=reinterpret_cast<const UChar *>(pe); + *pString=reinterpret_cast<const char16_t *>(pe); /* return the string length */ return full; @@ -1319,17 +1448,17 @@ toUpperOrTitle(UChar32 c, U_CAPI int32_t U_EXPORT2 ucase_toFullUpper(UChar32 c, UCaseContextIterator *iter, void *context, - const UChar **pString, + const char16_t **pString, int32_t caseLocale) { - return toUpperOrTitle(c, iter, context, pString, caseLocale, TRUE); + return toUpperOrTitle(c, iter, context, pString, caseLocale, true); } U_CAPI int32_t U_EXPORT2 ucase_toFullTitle(UChar32 c, UCaseContextIterator *iter, void *context, - const UChar **pString, + const char16_t **pString, int32_t caseLocale) { - return toUpperOrTitle(c, iter, context, pString, caseLocale, FALSE); + return toUpperOrTitle(c, iter, context, pString, caseLocale, false); } /* case folding ------------------------------------------------------------- */ @@ -1444,11 +1573,13 @@ ucase_fold(UChar32 c, uint32_t options) { U_CAPI int32_t U_EXPORT2 ucase_toFullFolding(UChar32 c, - const UChar **pString, + const char16_t **pString, uint32_t options) { // The sign of the result has meaning, input must be non-negative so that it can be returned as is. U_ASSERT(c >= 0); UChar32 result=c; + // Reset the output pointer in case it was uninitialized. + *pString=nullptr; uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); if(!UCASE_HAS_EXCEPTION(props)) { if(UCASE_IS_UPPER_OR_TITLE(props)) { @@ -1495,7 +1626,7 @@ ucase_toFullFolding(UChar32 c, if(full!=0) { /* set the output pointer to the result string */ - *pString=reinterpret_cast<const UChar *>(pe); + *pString=reinterpret_cast<const char16_t *>(pe); /* return the string length */ return full; @@ -1542,7 +1673,7 @@ U_CAPI UChar32 U_EXPORT2 u_tolower(UChar32 c) { return ucase_tolower(c); } - + /* Transforms the Unicode character to its upper case equivalent.*/ U_CAPI UChar32 U_EXPORT2 u_toupper(UChar32 c) { @@ -1564,7 +1695,7 @@ u_foldCase(UChar32 c, uint32_t options) { U_CFUNC int32_t U_EXPORT2 ucase_hasBinaryProperty(UChar32 c, UProperty which) { /* case mapping properties */ - const UChar *resultString; + const char16_t *resultString; switch(which) { case UCHAR_LOWERCASE: return (UBool)(UCASE_LOWER==ucase_getType(c)); @@ -1591,18 +1722,18 @@ ucase_hasBinaryProperty(UChar32 c, UProperty which) { * start sets for normalization and case mappings. */ case UCHAR_CHANGES_WHEN_LOWERCASED: - return (UBool)(ucase_toFullLower(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0); + return (UBool)(ucase_toFullLower(c, nullptr, nullptr, &resultString, UCASE_LOC_ROOT)>=0); case UCHAR_CHANGES_WHEN_UPPERCASED: - return (UBool)(ucase_toFullUpper(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0); + return (UBool)(ucase_toFullUpper(c, nullptr, nullptr, &resultString, UCASE_LOC_ROOT)>=0); case UCHAR_CHANGES_WHEN_TITLECASED: - return (UBool)(ucase_toFullTitle(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0); + return (UBool)(ucase_toFullTitle(c, nullptr, nullptr, &resultString, UCASE_LOC_ROOT)>=0); /* case UCHAR_CHANGES_WHEN_CASEFOLDED: -- in uprops.c */ case UCHAR_CHANGES_WHEN_CASEMAPPED: return (UBool)( - ucase_toFullLower(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0 || - ucase_toFullUpper(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0 || - ucase_toFullTitle(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0); + ucase_toFullLower(c, nullptr, nullptr, &resultString, UCASE_LOC_ROOT)>=0 || + ucase_toFullUpper(c, nullptr, nullptr, &resultString, UCASE_LOC_ROOT)>=0 || + ucase_toFullTitle(c, nullptr, nullptr, &resultString, UCASE_LOC_ROOT)>=0); default: - return FALSE; + return false; } } |