Update ICU to 73.2

author: romankoshelev <romankoshelev@yandex-team.com> 2023-08-14 19:51:50 +0300
committer: romankoshelev <romankoshelev@yandex-team.com> 2023-08-15 01:24:11 +0300
commit: cfcd865e05c0d0525ea27d1e153a043b32a85138 (patch)
tree: 68d3b3b25271e8a4998505897a269ff7ce119b76 /contrib/libs/icu/common/ucase.cpp
parent: ccb790c507bd5e8ffe2ef9886ce5ee0a7ce22a15 (diff)
download: ydb-cfcd865e05c0d0525ea27d1e153a043b32a85138.tar.gz
1 files changed, 264 insertions, 133 deletions
diff --git a/contrib/libs/icu/common/ucase.cpp b/contrib/libs/icu/common/ucase.cpp
index 4aa856507a..392e1266ae 100644
--- a/contrib/libs/icu/common/ucase.cpp
+++ b/contrib/libs/icu/common/ucase.cpp
@@ -22,27 +22,14 @@
 #include "unicode/utypes.h"
 #include "unicode/unistr.h"
 #include "unicode/uset.h"
-#include "unicode/udata.h" /* UDataInfo */
 #include "unicode/utf16.h"
-#include "ucmndata.h" /* DataHeader */
-#include "udatamem.h"
-#include "umutex.h"
-#include "uassert.h"
 #include "cmemory.h"
-#include "utrie2.h"
+#include "uassert.h"
 #include "ucase.h"
+#include "umutex.h"
+#include "utrie2.h"
 
-struct UCaseProps {
-    UDataMemory *mem;
-    const int32_t *indexes;
-    const uint16_t *exceptions;
-    const uint16_t *unfold;
-
-    UTrie2 trie;
-    uint8_t formatVersion[4];
-};
-
-/* ucase_props_data.h is machine-generated by gencase --csource */
+/* ucase_props_data.h is machine-generated by genprops/casepropsbuilder.cpp */
 #define INCLUDED_FROM_UCASE_CPP
 #include "ucase_props_data.h"
 
@@ -53,7 +40,7 @@ _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 /*end*/, ui
     /* add the start code point to the USet */
     const USetAdder *sa=(const USetAdder *)context;
     sa->add(sa->set, start);
-    return TRUE;
+    return true;
 }
 
 U_CFUNC void U_EXPORT2
@@ -63,7 +50,7 @@ ucase_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
     }
 
     /* add the start code point of each same-value range of the trie */
-    utrie2_enum(&ucase_props_singleton.trie, NULL, _enumPropertyStartsRange, sa);
+    utrie2_enum(&ucase_props_singleton.trie, nullptr, _enumPropertyStartsRange, sa);
 
     /* add code points with hardcoded properties, plus the ones following them */
 
@@ -77,6 +64,13 @@ ucase_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
 
 /* data access primitives --------------------------------------------------- */
 
+U_CAPI const struct UCaseProps * U_EXPORT2
+ucase_getSingleton(int32_t *pExceptionsLength, int32_t *pUnfoldLength) {
+    *pExceptionsLength = UPRV_LENGTHOF(ucase_props_exceptions);
+    *pUnfoldLength = UPRV_LENGTHOF(ucase_props_unfold);
+    return &ucase_props_singleton;
+}
+
 U_CFUNC const UTrie2 * U_EXPORT2
 ucase_getTrie() {
     return &ucase_props_singleton.trie;
@@ -201,47 +195,17 @@ ucase_totitle(UChar32 c) {
     return c;
 }
 
-static const UChar iDot[2] = { 0x69, 0x307 };
-static const UChar jDot[2] = { 0x6a, 0x307 };
-static const UChar iOgonekDot[3] = { 0x12f, 0x307 };
-static const UChar iDotGrave[3] = { 0x69, 0x307, 0x300 };
-static const UChar iDotAcute[3] = { 0x69, 0x307, 0x301 };
-static const UChar iDotTilde[3] = { 0x69, 0x307, 0x303 };
+static const char16_t iDot[2] = { 0x69, 0x307 };
+static const char16_t jDot[2] = { 0x6a, 0x307 };
+static const char16_t iOgonekDot[3] = { 0x12f, 0x307 };
+static const char16_t iDotGrave[3] = { 0x69, 0x307, 0x300 };
+static const char16_t iDotAcute[3] = { 0x69, 0x307, 0x301 };
+static const char16_t iDotTilde[3] = { 0x69, 0x307, 0x303 };
 
 
 U_CFUNC void U_EXPORT2
 ucase_addCaseClosure(UChar32 c, const USetAdder *sa) {
-    uint16_t props;
-
-    /*
-     * Hardcode the case closure of i and its relatives and ignore the
-     * data file data for these characters.
-     * The Turkic dotless i and dotted I with their case mapping conditions
-     * and case folding option make the related characters behave specially.
-     * This code matches their closure behavior to their case folding behavior.
-     */
-
-    switch(c) {
-    case 0x49:
-        /* regular i and I are in one equivalence class */
-        sa->add(sa->set, 0x69);
-        return;
-    case 0x69:
-        sa->add(sa->set, 0x49);
-        return;
-    case 0x130:
-        /* dotted I is in a class with <0069 0307> (for canonical equivalence with <0049 0307>) */
-        sa->addString(sa->set, iDot, 2);
-        return;
-    case 0x131:
-        /* dotless i is in a class by itself */
-        return;
-    default:
-        /* otherwise use the data file data */
-        break;
-    }
-
-    props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
+    uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
     if(!UCASE_HAS_EXCEPTION(props)) {
         if(UCASE_GET_TYPE(props)!=UCASE_NONE) {
             /* add the one simple case mapping, no matter what type it is */
@@ -255,19 +219,42 @@ ucase_addCaseClosure(UChar32 c, const USetAdder *sa) {
          * c has exceptions, so there may be multiple simple and/or
          * full case mappings. Add them all.
          */
-        const uint16_t *pe0, *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
-        const UChar *closure;
+        const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
         uint16_t excWord=*pe++;
-        int32_t idx, closureLength, fullLength, length;
-
-        pe0=pe;
+        const uint16_t *pe0=pe;
+
+        // Hardcode the case closure of i and its relatives and ignore the
+        // data file data for these characters.
+        // The Turkic dotless i and dotted I with their case mapping conditions
+        // and case folding option make the related characters behave specially.
+        // This code matches their closure behavior to their case folding behavior.
+        if (excWord&UCASE_EXC_CONDITIONAL_FOLD) {
+            // These characters have Turkic case foldings. Hardcode their closure.
+            if (c == 0x49) {
+                // Regular i and I are in one equivalence class.
+                sa->add(sa->set, 0x69);
+                return;
+            } else if (c == 0x130) {
+                // Dotted I is in a class with <0069 0307>
+                // (for canonical equivalence with <0049 0307>).
+                sa->addString(sa->set, iDot, 2);
+                return;
+            }
+        } else if (c == 0x69) {
+            sa->add(sa->set, 0x49);
+            return;
+        } else if (c == 0x131) {
+            // Dotless i is in a class by itself.
+            return;
+        }
 
         /* add all simple case mappings */
-        for(idx=UCASE_EXC_LOWER; idx<=UCASE_EXC_TITLE; ++idx) {
+        for(int32_t idx=UCASE_EXC_LOWER; idx<=UCASE_EXC_TITLE; ++idx) {
             if(HAS_SLOT(excWord, idx)) {
                 pe=pe0;
-                GET_SLOT_VALUE(excWord, idx, pe, c);
-                sa->add(sa->set, c);
+                UChar32 mapping;
+                GET_SLOT_VALUE(excWord, idx, pe, mapping);
+                sa->add(sa->set, mapping);
             }
         }
         if(HAS_SLOT(excWord, UCASE_EXC_DELTA)) {
@@ -278,19 +265,22 @@ ucase_addCaseClosure(UChar32 c, const USetAdder *sa) {
         }
 
         /* get the closure string pointer & length */
+        const char16_t *closure;
+        int32_t closureLength;
         if(HAS_SLOT(excWord, UCASE_EXC_CLOSURE)) {
             pe=pe0;
             GET_SLOT_VALUE(excWord, UCASE_EXC_CLOSURE, pe, closureLength);
             closureLength&=UCASE_CLOSURE_MAX_LENGTH; /* higher bits are reserved */
-            closure=(const UChar *)pe+1; /* behind this slot, unless there are full case mappings */
+            closure=(const char16_t *)pe+1; /* behind this slot, unless there are full case mappings */
         } else {
             closureLength=0;
-            closure=NULL;
+            closure=nullptr;
         }
 
         /* add the full case folding */
         if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
             pe=pe0;
+            int32_t fullLength;
             GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, fullLength);
 
             /* start of full case mapping strings */
@@ -303,9 +293,9 @@ ucase_addCaseClosure(UChar32 c, const USetAdder *sa) {
             fullLength>>=4;
 
             /* add the full case folding string */
-            length=fullLength&0xf;
+            int32_t length=fullLength&0xf;
             if(length!=0) {
-                sa->addString(sa->set, (const UChar *)pe, length);
+                sa->addString(sa->set, (const char16_t *)pe, length);
                 pe+=length;
             }
 
@@ -315,13 +305,150 @@ ucase_addCaseClosure(UChar32 c, const USetAdder *sa) {
             fullLength>>=4;
             pe+=fullLength;
 
-            closure=(const UChar *)pe; /* behind full case mappings */
+            closure=(const char16_t *)pe; /* behind full case mappings */
         }
 
         /* add each code point in the closure string */
-        for(idx=0; idx<closureLength;) {
-            U16_NEXT_UNSAFE(closure, idx, c);
-            sa->add(sa->set, c);
+        for(int32_t idx=0; idx<closureLength;) {
+            UChar32 mapping;
+            U16_NEXT_UNSAFE(closure, idx, mapping);
+            sa->add(sa->set, mapping);
+        }
+    }
+}
+
+namespace {
+
+/**
+ * Add the simple case closure mapping,
+ * except if there is not actually an scf relationship between the two characters.
+ * TODO: Unicode should probably add the corresponding scf mappings.
+ * See https://crbug.com/v8/13377 and Unicode-internal PAG issue #23.
+ * If & when those scf mappings are added, we should be able to remove all of these exceptions.
+ */
+void addOneSimpleCaseClosure(UChar32 c, UChar32 t, const USetAdder *sa) {
+    switch (c) {
+    case 0x0390:
+        if (t == 0x1FD3) { return; }
+        break;
+    case 0x03B0:
+        if (t == 0x1FE3) { return; }
+        break;
+    case 0x1FD3:
+        if (t == 0x0390) { return; }
+        break;
+    case 0x1FE3:
+        if (t == 0x03B0) { return; }
+        break;
+    case 0xFB05:
+        if (t == 0xFB06) { return; }
+        break;
+    case 0xFB06:
+        if (t == 0xFB05) { return; }
+        break;
+    default:
+        break;
+    }
+    sa->add(sa->set, t);
+}
+
+}  // namespace
+
+U_CFUNC void U_EXPORT2
+ucase_addSimpleCaseClosure(UChar32 c, const USetAdder *sa) {
+    uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
+    if(!UCASE_HAS_EXCEPTION(props)) {
+        if(UCASE_GET_TYPE(props)!=UCASE_NONE) {
+            /* add the one simple case mapping, no matter what type it is */
+            int32_t delta=UCASE_GET_DELTA(props);
+            if(delta!=0) {
+                sa->add(sa->set, c+delta);
+            }
+        }
+    } else {
+        // c has exceptions. Add the mappings relevant for scf=Simple_Case_Folding.
+        const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
+        uint16_t excWord=*pe++;
+        const uint16_t *pe0=pe;
+
+        // Hardcode the case closure of i and its relatives and ignore the
+        // data file data for these characters, like in ucase_addCaseClosure().
+        if (excWord&UCASE_EXC_CONDITIONAL_FOLD) {
+            // These characters have Turkic case foldings. Hardcode their closure.
+            if (c == 0x49) {
+                // Regular i and I are in one equivalence class.
+                sa->add(sa->set, 0x69);
+                return;
+            } else if (c == 0x130) {
+                // For scf=Simple_Case_Folding, dotted I is in a class by itself.
+                return;
+            }
+        } else if (c == 0x69) {
+            sa->add(sa->set, 0x49);
+            return;
+        } else if (c == 0x131) {
+            // Dotless i is in a class by itself.
+            return;
+        }
+
+        // Add all simple case mappings.
+        for(int32_t idx=UCASE_EXC_LOWER; idx<=UCASE_EXC_TITLE; ++idx) {
+            if(HAS_SLOT(excWord, idx)) {
+                pe=pe0;
+                UChar32 mapping;
+                GET_SLOT_VALUE(excWord, idx, pe, mapping);
+                addOneSimpleCaseClosure(c, mapping, sa);
+            }
+        }
+        if(HAS_SLOT(excWord, UCASE_EXC_DELTA)) {
+            pe=pe0;
+            int32_t delta;
+            GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
+            UChar32 mapping = (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
+            addOneSimpleCaseClosure(c, mapping, sa);
+        }
+
+        /* get the closure string pointer & length */
+        const char16_t *closure;
+        int32_t closureLength;
+        if(HAS_SLOT(excWord, UCASE_EXC_CLOSURE)) {
+            pe=pe0;
+            GET_SLOT_VALUE(excWord, UCASE_EXC_CLOSURE, pe, closureLength);
+            closureLength&=UCASE_CLOSURE_MAX_LENGTH; /* higher bits are reserved */
+            closure=(const char16_t *)pe+1; /* behind this slot, unless there are full case mappings */
+        } else {
+            closureLength=0;
+            closure=nullptr;
+        }
+
+        // Skip the full case mappings.
+        if(closureLength > 0 && HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
+            pe=pe0;
+            int32_t fullLength;
+            GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, fullLength);
+
+            /* start of full case mapping strings */
+            ++pe;
+
+            fullLength&=0xffff; /* bits 16 and higher are reserved */
+
+            // Skip all 4 full case mappings.
+            pe+=fullLength&UCASE_FULL_LOWER;
+            fullLength>>=4;
+            pe+=fullLength&0xf;
+            fullLength>>=4;
+            pe+=fullLength&0xf;
+            fullLength>>=4;
+            pe+=fullLength;
+
+            closure=(const char16_t *)pe; /* behind full case mappings */
+        }
+
+        // Add each code point in the closure string whose scf maps back to c.
+        for(int32_t idx=0; idx<closureLength;) {
+            UChar32 mapping;
+            U16_NEXT_UNSAFE(closure, idx, mapping);
+            addOneSimpleCaseClosure(c, mapping, sa);
         }
     }
 }
@@ -331,7 +458,7 @@ ucase_addCaseClosure(UChar32 c, const USetAdder *sa) {
  * must be length>0 and max>0 and length<=max
  */
 static inline int32_t
-strcmpMax(const UChar *s, int32_t length, const UChar *t, int32_t max) {
+strcmpMax(const char16_t *s, int32_t length, const char16_t *t, int32_t max) {
     int32_t c1, c2;
 
     max-=length; /* we require length<=max, so no need to decrement max in the loop */
@@ -356,11 +483,11 @@ strcmpMax(const UChar *s, int32_t length, const UChar *t, int32_t max) {
 }
 
 U_CFUNC UBool U_EXPORT2
-ucase_addStringCaseClosure(const UChar *s, int32_t length, const USetAdder *sa) {
+ucase_addStringCaseClosure(const char16_t *s, int32_t length, const USetAdder *sa) {
     int32_t i, start, limit, result, unfoldRows, unfoldRowWidth, unfoldStringWidth;
 
-    if(ucase_props_singleton.unfold==NULL || s==NULL) {
-        return FALSE; /* no reverse case folding data, or no string */
+    if(ucase_props_singleton.unfold==nullptr || s==nullptr) {
+        return false; /* no reverse case folding data, or no string */
     }
     if(length<=1) {
         /* the string is too short to find any match */
@@ -370,7 +497,7 @@ ucase_addStringCaseClosure(const UChar *s, int32_t length, const USetAdder *sa)
          * but this does not make much practical difference because
          * a single supplementary code point would just not be found
          */
-        return FALSE;
+        return false;
     }
 
     const uint16_t *unfold=ucase_props_singleton.unfold;
@@ -381,7 +508,7 @@ ucase_addStringCaseClosure(const UChar *s, int32_t length, const USetAdder *sa)
 
     if(length>unfoldStringWidth) {
         /* the string is too long to find any match */
-        return FALSE;
+        return false;
     }
 
     /* do a binary search for the string */
@@ -389,7 +516,7 @@ ucase_addStringCaseClosure(const UChar *s, int32_t length, const USetAdder *sa)
     limit=unfoldRows;
     while(start<limit) {
         i=(start+limit)/2;
-        const UChar *p=reinterpret_cast<const UChar *>(unfold+(i*unfoldRowWidth));
+        const char16_t *p=reinterpret_cast<const char16_t *>(unfold+(i*unfoldRowWidth));
         result=strcmpMax(s, length, p, unfoldStringWidth);
 
         if(result==0) {
@@ -401,7 +528,7 @@ ucase_addStringCaseClosure(const UChar *s, int32_t length, const USetAdder *sa)
                 sa->add(sa->set, c);
                 ucase_addCaseClosure(c, sa);
             }
-            return TRUE;
+            return true;
         } else if(result<0) {
             limit=i;
         } else /* result>0 */ {
@@ -409,13 +536,13 @@ ucase_addStringCaseClosure(const UChar *s, int32_t length, const USetAdder *sa)
         }
     }
 
-    return FALSE; /* string not found */
+    return false; /* string not found */
 }
 
 U_NAMESPACE_BEGIN
 
 FullCaseFoldingIterator::FullCaseFoldingIterator()
-        : unfold(reinterpret_cast<const UChar *>(ucase_props_singleton.unfold)),
+        : unfold(reinterpret_cast<const char16_t *>(ucase_props_singleton.unfold)),
           unfoldRows(unfold[UCASE_UNFOLD_ROWS]),
           unfoldRowWidth(unfold[UCASE_UNFOLD_ROW_WIDTH]),
           unfoldStringWidth(unfold[UCASE_UNFOLD_STRING_WIDTH]),
@@ -427,7 +554,7 @@ FullCaseFoldingIterator::FullCaseFoldingIterator()
 UChar32
 FullCaseFoldingIterator::next(UnicodeString &full) {
     // Advance past the last-delivered code point.
-    const UChar *p=unfold+(currentRow*unfoldRowWidth);
+    const char16_t *p=unfold+(currentRow*unfoldRowWidth);
     if(rowCpIndex>=unfoldRowWidth || p[rowCpIndex]==0) {
         ++currentRow;
         p+=unfoldRowWidth;
@@ -437,7 +564,7 @@ FullCaseFoldingIterator::next(UnicodeString &full) {
     // Set "full" to the NUL-terminated string in the first unfold column.
     int32_t length=unfoldStringWidth;
     while(length>0 && p[length-1]==0) { --length; }
-    full.setTo(FALSE, p, length);
+    full.setTo(false, p, length);
     // Return the code point.
     UChar32 c;
     U16_NEXT_UNSAFE(p, rowCpIndex, c);
@@ -690,7 +817,7 @@ ucase_isCaseSensitive(UChar32 c) {
  *   - The general category of C is
  *     Nonspacing Mark (Mn), or Enclosing Mark (Me), or Format Control (Cf), or
  *     Letter Modifier (Lm), or Symbol Modifier (Sk)
- *   - C is one of the following characters 
+ *   - C is one of the following characters
  *     U+0027 APOSTROPHE
  *     U+00AD SOFT HYPHEN (SHY)
  *     U+2019 RIGHT SINGLE QUOTATION MARK
@@ -714,7 +841,7 @@ ucase_isCaseSensitive(UChar32 c) {
 #define is_sep(c) ((c)=='_' || (c)=='-' || (c)==0)
 
 /**
- * Requires non-NULL locale ID but otherwise does the equivalent of
+ * Requires non-nullptr locale ID but otherwise does the equivalent of
  * checking for language codes as if uloc_getLanguage() were called:
  * Accepts both 2- and 3-letter codes and accepts case variants.
  */
@@ -727,7 +854,7 @@ ucase_getCaseLocale(const char *locale) {
      * examined and copied/transformed.
      *
      * Because this code does not want to depend on uloc, the caller must
-     * pass in a non-NULL locale, i.e., may need to call uloc_getDefault().
+     * pass in a non-nullptr locale, i.e., may need to call uloc_getDefault().
      */
     char c=*locale++;
     // Fastpath for English "en" which is often used for default (=root locale) case mappings,
@@ -910,8 +1037,8 @@ static UBool
 isFollowedByCasedLetter(UCaseContextIterator *iter, void *context, int8_t dir) {
     UChar32 c;
 
-    if(iter==NULL) {
-        return FALSE;
+    if(iter==nullptr) {
+        return false;
     }
 
     for(/* dir!=0 sets direction */; (c=iter(context, dir))>=0; dir=0) {
@@ -919,13 +1046,13 @@ isFollowedByCasedLetter(UCaseContextIterator *iter, void *context, int8_t dir) {
         if(type&4) {
             /* case-ignorable, continue with the loop */
         } else if(type!=UCASE_NONE) {
-            return TRUE; /* followed by cased letter */
+            return true; /* followed by cased letter */
         } else {
-            return FALSE; /* uncased and not case-ignorable */
+            return false; /* uncased and not case-ignorable */
         }
     }
 
-    return FALSE; /* not followed by cased letter */
+    return false; /* not followed by cased letter */
 }
 
 /* Is preceded by Soft_Dotted character with no intervening cc=230 ? */
@@ -935,20 +1062,20 @@ isPrecededBySoftDotted(UCaseContextIterator *iter, void *context) {
     int32_t dotType;
     int8_t dir;
 
-    if(iter==NULL) {
-        return FALSE;
+    if(iter==nullptr) {
+        return false;
     }
 
     for(dir=-1; (c=iter(context, dir))>=0; dir=0) {
         dotType=getDotType(c);
         if(dotType==UCASE_SOFT_DOTTED) {
-            return TRUE; /* preceded by TYPE_i */
+            return true; /* preceded by TYPE_i */
         } else if(dotType!=UCASE_OTHER_ACCENT) {
-            return FALSE; /* preceded by different base character (not TYPE_i), or intervening cc==230 */
+            return false; /* preceded by different base character (not TYPE_i), or intervening cc==230 */
         }
     }
 
-    return FALSE; /* not preceded by TYPE_i */
+    return false; /* not preceded by TYPE_i */
 }
 
 /*
@@ -992,21 +1119,21 @@ isPrecededBy_I(UCaseContextIterator *iter, void *context) {
     int32_t dotType;
     int8_t dir;
 
-    if(iter==NULL) {
-        return FALSE;
+    if(iter==nullptr) {
+        return false;
     }
 
     for(dir=-1; (c=iter(context, dir))>=0; dir=0) {
         if(c==0x49) {
-            return TRUE; /* preceded by I */
+            return true; /* preceded by I */
         }
         dotType=getDotType(c);
         if(dotType!=UCASE_OTHER_ACCENT) {
-            return FALSE; /* preceded by different base character (not I), or intervening cc==230 */
+            return false; /* preceded by different base character (not I), or intervening cc==230 */
         }
     }
 
-    return FALSE; /* not preceded by I */
+    return false; /* not preceded by I */
 }
 
 /* Is followed by one or more cc==230 ? */
@@ -1016,20 +1143,20 @@ isFollowedByMoreAbove(UCaseContextIterator *iter, void *context) {
     int32_t dotType;
     int8_t dir;
 
-    if(iter==NULL) {
-        return FALSE;
+    if(iter==nullptr) {
+        return false;
     }
 
     for(dir=1; (c=iter(context, dir))>=0; dir=0) {
         dotType=getDotType(c);
         if(dotType==UCASE_ABOVE) {
-            return TRUE; /* at least one cc==230 following */
+            return true; /* at least one cc==230 following */
         } else if(dotType!=UCASE_OTHER_ACCENT) {
-            return FALSE; /* next base character, no more cc==230 following */
+            return false; /* next base character, no more cc==230 following */
         }
     }
 
-    return FALSE; /* no more cc==230 following */
+    return false; /* no more cc==230 following */
 }
 
 /* Is followed by a dot above (without cc==230 in between) ? */
@@ -1039,31 +1166,33 @@ isFollowedByDotAbove(UCaseContextIterator *iter, void *context) {
     int32_t dotType;
     int8_t dir;
 
-    if(iter==NULL) {
-        return FALSE;
+    if(iter==nullptr) {
+        return false;
     }
 
     for(dir=1; (c=iter(context, dir))>=0; dir=0) {
         if(c==0x307) {
-            return TRUE;
+            return true;
         }
         dotType=getDotType(c);
         if(dotType!=UCASE_OTHER_ACCENT) {
-            return FALSE; /* next base character or cc==230 in between */
+            return false; /* next base character or cc==230 in between */
         }
     }
 
-    return FALSE; /* no dot above following */
+    return false; /* no dot above following */
 }
 
 U_CAPI int32_t U_EXPORT2
 ucase_toFullLower(UChar32 c,
                   UCaseContextIterator *iter, void *context,
-                  const UChar **pString,
+                  const char16_t **pString,
                   int32_t loc) {
     // The sign of the result has meaning, input must be non-negative so that it can be returned as is.
     U_ASSERT(c >= 0);
     UChar32 result=c;
+    // Reset the output pointer in case it was uninitialized.
+    *pString=nullptr;
     uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
     if(!UCASE_HAS_EXCEPTION(props)) {
         if(UCASE_IS_UPPER_OR_TITLE(props)) {
@@ -1148,7 +1277,6 @@ ucase_toFullLower(UChar32 c,
                     0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
                     0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
                  */
-                *pString=nullptr;
                 return 0; /* remove the dot (continue without output) */
             } else if(loc==UCASE_LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter, context)) {
                 /*
@@ -1185,7 +1313,7 @@ ucase_toFullLower(UChar32 c,
             full&=UCASE_FULL_LOWER;
             if(full!=0) {
                 /* set the output pointer to the lowercase mapping */
-                *pString=reinterpret_cast<const UChar *>(pe+1);
+                *pString=reinterpret_cast<const char16_t *>(pe+1);
 
                 /* return the string length */
                 return full;
@@ -1209,12 +1337,14 @@ ucase_toFullLower(UChar32 c,
 static int32_t
 toUpperOrTitle(UChar32 c,
                UCaseContextIterator *iter, void *context,
-               const UChar **pString,
+               const char16_t **pString,
                int32_t loc,
                UBool upperNotTitle) {
     // The sign of the result has meaning, input must be non-negative so that it can be returned as is.
     U_ASSERT(c >= 0);
     UChar32 result=c;
+    // Reset the output pointer in case it was uninitialized.
+    *pString=nullptr;
     uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
     if(!UCASE_HAS_EXCEPTION(props)) {
         if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
@@ -1252,7 +1382,6 @@ toUpperOrTitle(UChar32 c,
 
                     0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
                  */
-                *pString=nullptr;
                 return 0; /* remove the dot (continue without output) */
             } else if(c==0x0587) {
                 // See ICU-13416:
@@ -1290,7 +1419,7 @@ toUpperOrTitle(UChar32 c,
 
             if(full!=0) {
                 /* set the output pointer to the result string */
-                *pString=reinterpret_cast<const UChar *>(pe);
+                *pString=reinterpret_cast<const char16_t *>(pe);
 
                 /* return the string length */
                 return full;
@@ -1319,17 +1448,17 @@ toUpperOrTitle(UChar32 c,
 U_CAPI int32_t U_EXPORT2
 ucase_toFullUpper(UChar32 c,
                   UCaseContextIterator *iter, void *context,
-                  const UChar **pString,
+                  const char16_t **pString,
                   int32_t caseLocale) {
-    return toUpperOrTitle(c, iter, context, pString, caseLocale, TRUE);
+    return toUpperOrTitle(c, iter, context, pString, caseLocale, true);
 }
 
 U_CAPI int32_t U_EXPORT2
 ucase_toFullTitle(UChar32 c,
                   UCaseContextIterator *iter, void *context,
-                  const UChar **pString,
+                  const char16_t **pString,
                   int32_t caseLocale) {
-    return toUpperOrTitle(c, iter, context, pString, caseLocale, FALSE);
+    return toUpperOrTitle(c, iter, context, pString, caseLocale, false);
 }
 
 /* case folding ------------------------------------------------------------- */
@@ -1444,11 +1573,13 @@ ucase_fold(UChar32 c, uint32_t options) {
 
 U_CAPI int32_t U_EXPORT2
 ucase_toFullFolding(UChar32 c,
-                    const UChar **pString,
+                    const char16_t **pString,
                     uint32_t options) {
     // The sign of the result has meaning, input must be non-negative so that it can be returned as is.
     U_ASSERT(c >= 0);
     UChar32 result=c;
+    // Reset the output pointer in case it was uninitialized.
+    *pString=nullptr;
     uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
     if(!UCASE_HAS_EXCEPTION(props)) {
         if(UCASE_IS_UPPER_OR_TITLE(props)) {
@@ -1495,7 +1626,7 @@ ucase_toFullFolding(UChar32 c,
 
             if(full!=0) {
                 /* set the output pointer to the result string */
-                *pString=reinterpret_cast<const UChar *>(pe);
+                *pString=reinterpret_cast<const char16_t *>(pe);
 
                 /* return the string length */
                 return full;
@@ -1542,7 +1673,7 @@ U_CAPI UChar32 U_EXPORT2
 u_tolower(UChar32 c) {
     return ucase_tolower(c);
 }
-    
+
 /* Transforms the Unicode character to its upper case equivalent.*/
 U_CAPI UChar32 U_EXPORT2
 u_toupper(UChar32 c) {
@@ -1564,7 +1695,7 @@ u_foldCase(UChar32 c, uint32_t options) {
 U_CFUNC int32_t U_EXPORT2
 ucase_hasBinaryProperty(UChar32 c, UProperty which) {
     /* case mapping properties */
-    const UChar *resultString;
+    const char16_t *resultString;
     switch(which) {
     case UCHAR_LOWERCASE:
         return (UBool)(UCASE_LOWER==ucase_getType(c));
@@ -1591,18 +1722,18 @@ ucase_hasBinaryProperty(UChar32 c, UProperty which) {
      * start sets for normalization and case mappings.
      */
     case UCHAR_CHANGES_WHEN_LOWERCASED:
-        return (UBool)(ucase_toFullLower(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
+        return (UBool)(ucase_toFullLower(c, nullptr, nullptr, &resultString, UCASE_LOC_ROOT)>=0);
     case UCHAR_CHANGES_WHEN_UPPERCASED:
-        return (UBool)(ucase_toFullUpper(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
+        return (UBool)(ucase_toFullUpper(c, nullptr, nullptr, &resultString, UCASE_LOC_ROOT)>=0);
     case UCHAR_CHANGES_WHEN_TITLECASED:
-        return (UBool)(ucase_toFullTitle(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
+        return (UBool)(ucase_toFullTitle(c, nullptr, nullptr, &resultString, UCASE_LOC_ROOT)>=0);
     /* case UCHAR_CHANGES_WHEN_CASEFOLDED: -- in uprops.c */
     case UCHAR_CHANGES_WHEN_CASEMAPPED:
         return (UBool)(
-            ucase_toFullLower(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0 ||
-            ucase_toFullUpper(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0 ||
-            ucase_toFullTitle(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
+            ucase_toFullLower(c, nullptr, nullptr, &resultString, UCASE_LOC_ROOT)>=0 ||
+            ucase_toFullUpper(c, nullptr, nullptr, &resultString, UCASE_LOC_ROOT)>=0 ||
+            ucase_toFullTitle(c, nullptr, nullptr, &resultString, UCASE_LOC_ROOT)>=0);
     default:
-        return FALSE;
+        return false;
     }
 }
author	romankoshelev <romankoshelev@yandex-team.com>	2023-08-14 19:51:50 +0300
committer	romankoshelev <romankoshelev@yandex-team.com>	2023-08-15 01:24:11 +0300
commit	cfcd865e05c0d0525ea27d1e153a043b32a85138 (patch)
tree	68d3b3b25271e8a4998505897a269ff7ce119b76 /contrib/libs/icu/common/ucase.cpp
parent	ccb790c507bd5e8ffe2ef9886ce5ee0a7ce22a15 (diff)
download	ydb-cfcd865e05c0d0525ea27d1e153a043b32a85138.tar.gz