Update ICU to 75.1

904da4ae1c86fc5542eac7f1cd18d97b72eb8517
author: romankoshelev <romankoshelev@yandex-team.com> 2024-05-13 11:00:27 +0300
committer: romankoshelev <romankoshelev@yandex-team.com> 2024-05-13 11:13:05 +0300
commit: 5b22fadb0f035a3b82c328e0ae710ad2b92f6eac (patch)
tree: e15dc649c79c4fb78f35cd6694dfe9af9bfcc0ad /contrib/libs/icu/common/uniset_props.cpp
parent: 5946aa7d3cbca62f6bcf074e8a2b9346e7a96af4 (diff)
download: ydb-5b22fadb0f035a3b82c328e0ae710ad2b92f6eac.tar.gz
1 files changed, 32 insertions, 17 deletions
diff --git a/contrib/libs/icu/common/uniset_props.cpp b/contrib/libs/icu/common/uniset_props.cpp
index b3dbdf93c8..e6f880c9f2 100644
--- a/contrib/libs/icu/common/uniset_props.cpp
+++ b/contrib/libs/icu/common/uniset_props.cpp
@@ -106,7 +106,7 @@ uniset_getUnicode32Instance(UErrorCode &errorCode) {
 // caseCompare(), but they also make UnicodeSet work for simple patterns when
 // no Unicode properties data is available - when caseCompare() fails
 
-static inline UBool
+inline UBool
 isPerlOpen(const UnicodeString &pattern, int32_t pos) {
     char16_t c;
     return pattern.charAt(pos)==u'\\' && ((c=pattern.charAt(pos+1))==u'p' || c==u'P');
@@ -117,12 +117,12 @@ isPerlClose(const UnicodeString &pattern, int32_t pos) {
     return pattern.charAt(pos)==u'}';
 }*/
 
-static inline UBool
+inline UBool
 isNameOpen(const UnicodeString &pattern, int32_t pos) {
     return pattern.charAt(pos)==u'\\' && pattern.charAt(pos+1)==u'N';
 }
 
-static inline UBool
+inline UBool
 isPOSIXOpen(const UnicodeString &pattern, int32_t pos) {
     return pattern.charAt(pos)==u'[' && pattern.charAt(pos+1)==u':';
 }
@@ -226,14 +226,14 @@ namespace {
 class UnicodeSetPointer {
     UnicodeSet* p;
 public:
-    inline UnicodeSetPointer() : p(0) {}
+    inline UnicodeSetPointer() : p(nullptr) {}
     inline ~UnicodeSetPointer() { delete p; }
     inline UnicodeSet* pointer() { return p; }
     inline UBool allocate() {
-        if (p == 0) {
+        if (p == nullptr) {
             p = new UnicodeSet();
         }
-        return p != 0;
+        return p != nullptr;
     }
 };
 
@@ -300,7 +300,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
 
         UChar32 c = 0;
         UBool literal = false;
-        UnicodeSet* nested = 0; // alias - do not delete
+        UnicodeSet* nested = nullptr; // alias - do not delete
 
         // -------- Check for property pattern
 
@@ -352,9 +352,9 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
                         continue;
                     }
                 }
-            } else if (symbols != 0) {
+            } else if (symbols != nullptr) {
                 const UnicodeFunctor *m = symbols->lookupMatcher(c);
-                if (m != 0) {
+                if (m != nullptr) {
                     const UnicodeSet *ms = dynamic_cast<const UnicodeSet *>(m);
                     if (ms == nullptr) {
                         ec = U_MALFORMED_SET;
@@ -390,7 +390,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
                 patLocal.append(op);
             }
 
-            if (nested == 0) {
+            if (nested == nullptr) {
                 // lazy allocation
                 if (!scratch.allocate()) {
                     ec = U_MEMORY_ALLOCATION_ERROR;
@@ -549,7 +549,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
                     c = chars.next(opts, literal, ec);
                     if (U_FAILURE(ec)) return;
                     UBool anchor = (c == u']' && !literal);
-                    if (symbols == 0 && !anchor) {
+                    if (symbols == nullptr && !anchor) {
                         c = SymbolTable::SYMBOL_REF;
                         chars.setPos(backup);
                         break; // literal '$'
@@ -657,16 +657,16 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
 
 namespace {
 
-static UBool numericValueFilter(UChar32 ch, void* context) {
+UBool numericValueFilter(UChar32 ch, void* context) {
     return u_getNumericValue(ch) == *(double*)context;
 }
 
-static UBool generalCategoryMaskFilter(UChar32 ch, void* context) {
+UBool generalCategoryMaskFilter(UChar32 ch, void* context) {
     int32_t value = *(int32_t*)context;
     return (U_GET_GC_MASK((UChar32) ch) & value) != 0;
 }
 
-static UBool versionFilter(UChar32 ch, void* context) {
+UBool versionFilter(UChar32 ch, void* context) {
     static const UVersionInfo none = { 0, 0, 0, 0 };
     UVersionInfo v;
     u_charAge(ch, v);
@@ -679,15 +679,19 @@ typedef struct {
     int32_t value;
 } IntPropertyContext;
 
-static UBool intPropertyFilter(UChar32 ch, void* context) {
+UBool intPropertyFilter(UChar32 ch, void* context) {
     IntPropertyContext* c = (IntPropertyContext*)context;
     return u_getIntPropertyValue((UChar32) ch, c->prop) == c->value;
 }
 
-static UBool scriptExtensionsFilter(UChar32 ch, void* context) {
+UBool scriptExtensionsFilter(UChar32 ch, void* context) {
     return uscript_hasScript(ch, *(UScriptCode*)context);
 }
 
+UBool idTypeFilter(UChar32 ch, void* context) {
+    return u_hasIDType(ch, *(UIdentifierType*)context);
+}
+
 }  // namespace
 
 /**
@@ -744,7 +748,7 @@ void UnicodeSet::applyFilter(UnicodeSet::Filter filter,
 
 namespace {
 
-static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) {
+UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) {
     /* Note: we use ' ' in compiler code page */
     int32_t j = 0;
     char ch;
@@ -782,6 +786,10 @@ UnicodeSet::applyIntPropertyValue(UProperty prop, int32_t value, UErrorCode& ec)
         const UnicodeSet* inclusions = CharacterProperties::getInclusionsForProperty(prop, ec);
         UScriptCode script = (UScriptCode)value;
         applyFilter(scriptExtensionsFilter, &script, inclusions, ec);
+    } else if (prop == UCHAR_IDENTIFIER_TYPE) {
+        const UnicodeSet* inclusions = CharacterProperties::getInclusionsForProperty(prop, ec);
+        UIdentifierType idType = (UIdentifierType)value;
+        applyFilter(idTypeFilter, &idType, inclusions, ec);
     } else if (0 <= prop && prop < UCHAR_BINARY_LIMIT) {
         if (value == 0 || value == 1) {
             const USet *set = u_getBinaryPropertySet(prop, &ec);
@@ -915,6 +923,13 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop,
                 }
                 // fall through to calling applyIntPropertyValue()
                 break;
+            case UCHAR_IDENTIFIER_TYPE:
+                v = u_getPropertyValueEnum(p, vname.data());
+                if (v == UCHAR_INVALID_CODE) {
+                    FAIL(ec);
+                }
+                // fall through to calling applyIntPropertyValue()
+                break;
             default:
                 // p is a non-binary, non-enumerated property that we
                 // don't support (yet).
author	romankoshelev <romankoshelev@yandex-team.com>	2024-05-13 11:00:27 +0300
committer	romankoshelev <romankoshelev@yandex-team.com>	2024-05-13 11:13:05 +0300
commit	5b22fadb0f035a3b82c328e0ae710ad2b92f6eac (patch)
tree	e15dc649c79c4fb78f35cd6694dfe9af9bfcc0ad /contrib/libs/icu/common/uniset_props.cpp
parent	5946aa7d3cbca62f6bcf074e8a2b9346e7a96af4 (diff)
download	ydb-5b22fadb0f035a3b82c328e0ae710ad2b92f6eac.tar.gz