diff options
author | romankoshelev <romankoshelev@yandex-team.com> | 2024-05-13 11:00:27 +0300 |
---|---|---|
committer | romankoshelev <romankoshelev@yandex-team.com> | 2024-05-13 11:13:05 +0300 |
commit | 5b22fadb0f035a3b82c328e0ae710ad2b92f6eac (patch) | |
tree | e15dc649c79c4fb78f35cd6694dfe9af9bfcc0ad /contrib/libs/icu/common/uniset_props.cpp | |
parent | 5946aa7d3cbca62f6bcf074e8a2b9346e7a96af4 (diff) | |
download | ydb-5b22fadb0f035a3b82c328e0ae710ad2b92f6eac.tar.gz |
Update ICU to 75.1
904da4ae1c86fc5542eac7f1cd18d97b72eb8517
Diffstat (limited to 'contrib/libs/icu/common/uniset_props.cpp')
-rw-r--r-- | contrib/libs/icu/common/uniset_props.cpp | 49 |
1 files changed, 32 insertions, 17 deletions
diff --git a/contrib/libs/icu/common/uniset_props.cpp b/contrib/libs/icu/common/uniset_props.cpp index b3dbdf93c8..e6f880c9f2 100644 --- a/contrib/libs/icu/common/uniset_props.cpp +++ b/contrib/libs/icu/common/uniset_props.cpp @@ -106,7 +106,7 @@ uniset_getUnicode32Instance(UErrorCode &errorCode) { // caseCompare(), but they also make UnicodeSet work for simple patterns when // no Unicode properties data is available - when caseCompare() fails -static inline UBool +inline UBool isPerlOpen(const UnicodeString &pattern, int32_t pos) { char16_t c; return pattern.charAt(pos)==u'\\' && ((c=pattern.charAt(pos+1))==u'p' || c==u'P'); @@ -117,12 +117,12 @@ isPerlClose(const UnicodeString &pattern, int32_t pos) { return pattern.charAt(pos)==u'}'; }*/ -static inline UBool +inline UBool isNameOpen(const UnicodeString &pattern, int32_t pos) { return pattern.charAt(pos)==u'\\' && pattern.charAt(pos+1)==u'N'; } -static inline UBool +inline UBool isPOSIXOpen(const UnicodeString &pattern, int32_t pos) { return pattern.charAt(pos)==u'[' && pattern.charAt(pos+1)==u':'; } @@ -226,14 +226,14 @@ namespace { class UnicodeSetPointer { UnicodeSet* p; public: - inline UnicodeSetPointer() : p(0) {} + inline UnicodeSetPointer() : p(nullptr) {} inline ~UnicodeSetPointer() { delete p; } inline UnicodeSet* pointer() { return p; } inline UBool allocate() { - if (p == 0) { + if (p == nullptr) { p = new UnicodeSet(); } - return p != 0; + return p != nullptr; } }; @@ -300,7 +300,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars, UChar32 c = 0; UBool literal = false; - UnicodeSet* nested = 0; // alias - do not delete + UnicodeSet* nested = nullptr; // alias - do not delete // -------- Check for property pattern @@ -352,9 +352,9 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars, continue; } } - } else if (symbols != 0) { + } else if (symbols != nullptr) { const UnicodeFunctor *m = symbols->lookupMatcher(c); - if (m != 0) { + if (m != nullptr) { const UnicodeSet *ms = dynamic_cast<const UnicodeSet *>(m); if (ms == nullptr) { ec = U_MALFORMED_SET; @@ -390,7 +390,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars, patLocal.append(op); } - if (nested == 0) { + if (nested == nullptr) { // lazy allocation if (!scratch.allocate()) { ec = U_MEMORY_ALLOCATION_ERROR; @@ -549,7 +549,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars, c = chars.next(opts, literal, ec); if (U_FAILURE(ec)) return; UBool anchor = (c == u']' && !literal); - if (symbols == 0 && !anchor) { + if (symbols == nullptr && !anchor) { c = SymbolTable::SYMBOL_REF; chars.setPos(backup); break; // literal '$' @@ -657,16 +657,16 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars, namespace { -static UBool numericValueFilter(UChar32 ch, void* context) { +UBool numericValueFilter(UChar32 ch, void* context) { return u_getNumericValue(ch) == *(double*)context; } -static UBool generalCategoryMaskFilter(UChar32 ch, void* context) { +UBool generalCategoryMaskFilter(UChar32 ch, void* context) { int32_t value = *(int32_t*)context; return (U_GET_GC_MASK((UChar32) ch) & value) != 0; } -static UBool versionFilter(UChar32 ch, void* context) { +UBool versionFilter(UChar32 ch, void* context) { static const UVersionInfo none = { 0, 0, 0, 0 }; UVersionInfo v; u_charAge(ch, v); @@ -679,15 +679,19 @@ typedef struct { int32_t value; } IntPropertyContext; -static UBool intPropertyFilter(UChar32 ch, void* context) { +UBool intPropertyFilter(UChar32 ch, void* context) { IntPropertyContext* c = (IntPropertyContext*)context; return u_getIntPropertyValue((UChar32) ch, c->prop) == c->value; } -static UBool scriptExtensionsFilter(UChar32 ch, void* context) { +UBool scriptExtensionsFilter(UChar32 ch, void* context) { return uscript_hasScript(ch, *(UScriptCode*)context); } +UBool idTypeFilter(UChar32 ch, void* context) { + return u_hasIDType(ch, *(UIdentifierType*)context); +} + } // namespace /** @@ -744,7 +748,7 @@ void UnicodeSet::applyFilter(UnicodeSet::Filter filter, namespace { -static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) { +UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) { /* Note: we use ' ' in compiler code page */ int32_t j = 0; char ch; @@ -782,6 +786,10 @@ UnicodeSet::applyIntPropertyValue(UProperty prop, int32_t value, UErrorCode& ec) const UnicodeSet* inclusions = CharacterProperties::getInclusionsForProperty(prop, ec); UScriptCode script = (UScriptCode)value; applyFilter(scriptExtensionsFilter, &script, inclusions, ec); + } else if (prop == UCHAR_IDENTIFIER_TYPE) { + const UnicodeSet* inclusions = CharacterProperties::getInclusionsForProperty(prop, ec); + UIdentifierType idType = (UIdentifierType)value; + applyFilter(idTypeFilter, &idType, inclusions, ec); } else if (0 <= prop && prop < UCHAR_BINARY_LIMIT) { if (value == 0 || value == 1) { const USet *set = u_getBinaryPropertySet(prop, &ec); @@ -915,6 +923,13 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop, } // fall through to calling applyIntPropertyValue() break; + case UCHAR_IDENTIFIER_TYPE: + v = u_getPropertyValueEnum(p, vname.data()); + if (v == UCHAR_INVALID_CODE) { + FAIL(ec); + } + // fall through to calling applyIntPropertyValue() + break; default: // p is a non-binary, non-enumerated property that we // don't support (yet). |