diff options
Diffstat (limited to 'contrib/libs/icu/common/characterproperties.cpp')
-rw-r--r-- | contrib/libs/icu/common/characterproperties.cpp | 766 |
1 files changed, 383 insertions, 383 deletions
diff --git a/contrib/libs/icu/common/characterproperties.cpp b/contrib/libs/icu/common/characterproperties.cpp index 7b50a4e205..6cfa722021 100644 --- a/contrib/libs/icu/common/characterproperties.cpp +++ b/contrib/libs/icu/common/characterproperties.cpp @@ -1,383 +1,383 @@ -// © 2018 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html - -// characterproperties.cpp -// created: 2018sep03 Markus W. Scherer - -#include "unicode/utypes.h" -#include "unicode/localpointer.h" -#include "unicode/uchar.h" -#include "unicode/ucpmap.h" -#include "unicode/ucptrie.h" -#include "unicode/umutablecptrie.h" -#include "unicode/uniset.h" -#include "unicode/uscript.h" -#include "unicode/uset.h" -#include "cmemory.h" -#include "mutex.h" -#include "normalizer2impl.h" -#include "uassert.h" -#include "ubidi_props.h" -#include "ucase.h" -#include "ucln_cmn.h" -#include "umutex.h" -#include "uprops.h" - -using icu::LocalPointer; -#if !UCONFIG_NO_NORMALIZATION -using icu::Normalizer2Factory; -using icu::Normalizer2Impl; -#endif -using icu::UInitOnce; -using icu::UnicodeSet; - -namespace { - -UBool U_CALLCONV characterproperties_cleanup(); - -constexpr int32_t NUM_INCLUSIONS = UPROPS_SRC_COUNT + UCHAR_INT_LIMIT - UCHAR_INT_START; - -struct Inclusion { - UnicodeSet *fSet = nullptr; - UInitOnce fInitOnce = U_INITONCE_INITIALIZER; -}; -Inclusion gInclusions[NUM_INCLUSIONS]; // cached getInclusions() - -UnicodeSet *sets[UCHAR_BINARY_LIMIT] = {}; - -UCPMap *maps[UCHAR_INT_LIMIT - UCHAR_INT_START] = {}; - -icu::UMutex cpMutex; - -//---------------------------------------------------------------- -// Inclusions list -//---------------------------------------------------------------- - -// USetAdder implementation -// Does not use uset.h to reduce code dependencies -void U_CALLCONV -_set_add(USet *set, UChar32 c) { - ((UnicodeSet *)set)->add(c); -} - -void U_CALLCONV -_set_addRange(USet *set, UChar32 start, UChar32 end) { - ((UnicodeSet *)set)->add(start, end); -} - -void U_CALLCONV -_set_addString(USet *set, const UChar *str, int32_t length) { - ((UnicodeSet *)set)->add(icu::UnicodeString((UBool)(length<0), str, length)); -} - -UBool U_CALLCONV characterproperties_cleanup() { - for (Inclusion &in: gInclusions) { - delete in.fSet; - in.fSet = nullptr; - in.fInitOnce.reset(); - } - for (int32_t i = 0; i < UPRV_LENGTHOF(sets); ++i) { - delete sets[i]; - sets[i] = nullptr; - } - for (int32_t i = 0; i < UPRV_LENGTHOF(maps); ++i) { - ucptrie_close(reinterpret_cast<UCPTrie *>(maps[i])); - maps[i] = nullptr; - } - return TRUE; -} - -void U_CALLCONV initInclusion(UPropertySource src, UErrorCode &errorCode) { - // This function is invoked only via umtx_initOnce(). - U_ASSERT(0 <= src && src < UPROPS_SRC_COUNT); - if (src == UPROPS_SRC_NONE) { - errorCode = U_INTERNAL_PROGRAM_ERROR; - return; - } - U_ASSERT(gInclusions[src].fSet == nullptr); - - LocalPointer<UnicodeSet> incl(new UnicodeSet()); - if (incl.isNull()) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - return; - } - USetAdder sa = { - (USet *)incl.getAlias(), - _set_add, - _set_addRange, - _set_addString, - nullptr, // don't need remove() - nullptr // don't need removeRange() - }; - - switch(src) { - case UPROPS_SRC_CHAR: - uchar_addPropertyStarts(&sa, &errorCode); - break; - case UPROPS_SRC_PROPSVEC: - upropsvec_addPropertyStarts(&sa, &errorCode); - break; - case UPROPS_SRC_CHAR_AND_PROPSVEC: - uchar_addPropertyStarts(&sa, &errorCode); - upropsvec_addPropertyStarts(&sa, &errorCode); - break; -#if !UCONFIG_NO_NORMALIZATION - case UPROPS_SRC_CASE_AND_NORM: { - const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); - if(U_SUCCESS(errorCode)) { - impl->addPropertyStarts(&sa, errorCode); - } - ucase_addPropertyStarts(&sa, &errorCode); - break; - } - case UPROPS_SRC_NFC: { - const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); - if(U_SUCCESS(errorCode)) { - impl->addPropertyStarts(&sa, errorCode); - } - break; - } - case UPROPS_SRC_NFKC: { - const Normalizer2Impl *impl=Normalizer2Factory::getNFKCImpl(errorCode); - if(U_SUCCESS(errorCode)) { - impl->addPropertyStarts(&sa, errorCode); - } - break; - } - case UPROPS_SRC_NFKC_CF: { - const Normalizer2Impl *impl=Normalizer2Factory::getNFKC_CFImpl(errorCode); - if(U_SUCCESS(errorCode)) { - impl->addPropertyStarts(&sa, errorCode); - } - break; - } - case UPROPS_SRC_NFC_CANON_ITER: { - const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); - if(U_SUCCESS(errorCode)) { - impl->addCanonIterPropertyStarts(&sa, errorCode); - } - break; - } -#endif - case UPROPS_SRC_CASE: - ucase_addPropertyStarts(&sa, &errorCode); - break; - case UPROPS_SRC_BIDI: - ubidi_addPropertyStarts(&sa, &errorCode); - break; - case UPROPS_SRC_INPC: - case UPROPS_SRC_INSC: - case UPROPS_SRC_VO: - uprops_addPropertyStarts((UPropertySource)src, &sa, &errorCode); - break; - default: - errorCode = U_INTERNAL_PROGRAM_ERROR; - break; - } - - if (U_FAILURE(errorCode)) { - return; - } - if (incl->isBogus()) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - return; - } - // Compact for caching. - incl->compact(); - gInclusions[src].fSet = incl.orphan(); - ucln_common_registerCleanup(UCLN_COMMON_CHARACTERPROPERTIES, characterproperties_cleanup); -} - -const UnicodeSet *getInclusionsForSource(UPropertySource src, UErrorCode &errorCode) { - if (U_FAILURE(errorCode)) { return nullptr; } - if (src < 0 || UPROPS_SRC_COUNT <= src) { - errorCode = U_ILLEGAL_ARGUMENT_ERROR; - return nullptr; - } - Inclusion &i = gInclusions[src]; - umtx_initOnce(i.fInitOnce, &initInclusion, src, errorCode); - return i.fSet; -} - -void U_CALLCONV initIntPropInclusion(UProperty prop, UErrorCode &errorCode) { - // This function is invoked only via umtx_initOnce(). - U_ASSERT(UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT); - int32_t inclIndex = UPROPS_SRC_COUNT + prop - UCHAR_INT_START; - U_ASSERT(gInclusions[inclIndex].fSet == nullptr); - UPropertySource src = uprops_getSource(prop); - const UnicodeSet *incl = getInclusionsForSource(src, errorCode); - if (U_FAILURE(errorCode)) { - return; - } - - LocalPointer<UnicodeSet> intPropIncl(new UnicodeSet(0, 0)); - if (intPropIncl.isNull()) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - return; - } - int32_t numRanges = incl->getRangeCount(); - int32_t prevValue = 0; - for (int32_t i = 0; i < numRanges; ++i) { - UChar32 rangeEnd = incl->getRangeEnd(i); - for (UChar32 c = incl->getRangeStart(i); c <= rangeEnd; ++c) { - // TODO: Get a UCharacterProperty.IntProperty to avoid the property dispatch. - int32_t value = u_getIntPropertyValue(c, prop); - if (value != prevValue) { - intPropIncl->add(c); - prevValue = value; - } - } - } - - if (intPropIncl->isBogus()) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - return; - } - // Compact for caching. - intPropIncl->compact(); - gInclusions[inclIndex].fSet = intPropIncl.orphan(); - ucln_common_registerCleanup(UCLN_COMMON_CHARACTERPROPERTIES, characterproperties_cleanup); -} - -} // namespace - -U_NAMESPACE_BEGIN - -const UnicodeSet *CharacterProperties::getInclusionsForProperty( - UProperty prop, UErrorCode &errorCode) { - if (U_FAILURE(errorCode)) { return nullptr; } - if (UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT) { - int32_t inclIndex = UPROPS_SRC_COUNT + prop - UCHAR_INT_START; - Inclusion &i = gInclusions[inclIndex]; - umtx_initOnce(i.fInitOnce, &initIntPropInclusion, prop, errorCode); - return i.fSet; - } else { - UPropertySource src = uprops_getSource(prop); - return getInclusionsForSource(src, errorCode); - } -} - -U_NAMESPACE_END - -namespace { - -UnicodeSet *makeSet(UProperty property, UErrorCode &errorCode) { - if (U_FAILURE(errorCode)) { return nullptr; } - LocalPointer<UnicodeSet> set(new UnicodeSet()); - if (set.isNull()) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - return nullptr; - } - const UnicodeSet *inclusions = - icu::CharacterProperties::getInclusionsForProperty(property, errorCode); - if (U_FAILURE(errorCode)) { return nullptr; } - int32_t numRanges = inclusions->getRangeCount(); - UChar32 startHasProperty = -1; - - for (int32_t i = 0; i < numRanges; ++i) { - UChar32 rangeEnd = inclusions->getRangeEnd(i); - for (UChar32 c = inclusions->getRangeStart(i); c <= rangeEnd; ++c) { - // TODO: Get a UCharacterProperty.BinaryProperty to avoid the property dispatch. - if (u_hasBinaryProperty(c, property)) { - if (startHasProperty < 0) { - // Transition from false to true. - startHasProperty = c; - } - } else if (startHasProperty >= 0) { - // Transition from true to false. - set->add(startHasProperty, c - 1); - startHasProperty = -1; - } - } - } - if (startHasProperty >= 0) { - set->add(startHasProperty, 0x10FFFF); - } - set->freeze(); - return set.orphan(); -} - -UCPMap *makeMap(UProperty property, UErrorCode &errorCode) { - if (U_FAILURE(errorCode)) { return nullptr; } - uint32_t nullValue = property == UCHAR_SCRIPT ? USCRIPT_UNKNOWN : 0; - icu::LocalUMutableCPTriePointer mutableTrie( - umutablecptrie_open(nullValue, nullValue, &errorCode)); - const UnicodeSet *inclusions = - icu::CharacterProperties::getInclusionsForProperty(property, errorCode); - if (U_FAILURE(errorCode)) { return nullptr; } - int32_t numRanges = inclusions->getRangeCount(); - UChar32 start = 0; - uint32_t value = nullValue; - - for (int32_t i = 0; i < numRanges; ++i) { - UChar32 rangeEnd = inclusions->getRangeEnd(i); - for (UChar32 c = inclusions->getRangeStart(i); c <= rangeEnd; ++c) { - // TODO: Get a UCharacterProperty.IntProperty to avoid the property dispatch. - uint32_t nextValue = u_getIntPropertyValue(c, property); - if (value != nextValue) { - if (value != nullValue) { - umutablecptrie_setRange(mutableTrie.getAlias(), start, c - 1, value, &errorCode); - } - start = c; - value = nextValue; - } - } - } - if (value != 0) { - umutablecptrie_setRange(mutableTrie.getAlias(), start, 0x10FFFF, value, &errorCode); - } - - UCPTrieType type; - if (property == UCHAR_BIDI_CLASS || property == UCHAR_GENERAL_CATEGORY) { - type = UCPTRIE_TYPE_FAST; - } else { - type = UCPTRIE_TYPE_SMALL; - } - UCPTrieValueWidth valueWidth; - // TODO: UCharacterProperty.IntProperty - int32_t max = u_getIntPropertyMaxValue(property); - if (max <= 0xff) { - valueWidth = UCPTRIE_VALUE_BITS_8; - } else if (max <= 0xffff) { - valueWidth = UCPTRIE_VALUE_BITS_16; - } else { - valueWidth = UCPTRIE_VALUE_BITS_32; - } - return reinterpret_cast<UCPMap *>( - umutablecptrie_buildImmutable(mutableTrie.getAlias(), type, valueWidth, &errorCode)); -} - -} // namespace - -U_NAMESPACE_USE - -U_CAPI const USet * U_EXPORT2 -u_getBinaryPropertySet(UProperty property, UErrorCode *pErrorCode) { - if (U_FAILURE(*pErrorCode)) { return nullptr; } - if (property < 0 || UCHAR_BINARY_LIMIT <= property) { - *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; - return nullptr; - } - Mutex m(&cpMutex); - UnicodeSet *set = sets[property]; - if (set == nullptr) { - sets[property] = set = makeSet(property, *pErrorCode); - } - if (U_FAILURE(*pErrorCode)) { return nullptr; } - return set->toUSet(); -} - -U_CAPI const UCPMap * U_EXPORT2 -u_getIntPropertyMap(UProperty property, UErrorCode *pErrorCode) { - if (U_FAILURE(*pErrorCode)) { return nullptr; } - if (property < UCHAR_INT_START || UCHAR_INT_LIMIT <= property) { - *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; - return nullptr; - } - Mutex m(&cpMutex); - UCPMap *map = maps[property - UCHAR_INT_START]; - if (map == nullptr) { - maps[property - UCHAR_INT_START] = map = makeMap(property, *pErrorCode); - } - return map; -} +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// characterproperties.cpp +// created: 2018sep03 Markus W. Scherer + +#include "unicode/utypes.h" +#include "unicode/localpointer.h" +#include "unicode/uchar.h" +#include "unicode/ucpmap.h" +#include "unicode/ucptrie.h" +#include "unicode/umutablecptrie.h" +#include "unicode/uniset.h" +#include "unicode/uscript.h" +#include "unicode/uset.h" +#include "cmemory.h" +#include "mutex.h" +#include "normalizer2impl.h" +#include "uassert.h" +#include "ubidi_props.h" +#include "ucase.h" +#include "ucln_cmn.h" +#include "umutex.h" +#include "uprops.h" + +using icu::LocalPointer; +#if !UCONFIG_NO_NORMALIZATION +using icu::Normalizer2Factory; +using icu::Normalizer2Impl; +#endif +using icu::UInitOnce; +using icu::UnicodeSet; + +namespace { + +UBool U_CALLCONV characterproperties_cleanup(); + +constexpr int32_t NUM_INCLUSIONS = UPROPS_SRC_COUNT + UCHAR_INT_LIMIT - UCHAR_INT_START; + +struct Inclusion { + UnicodeSet *fSet = nullptr; + UInitOnce fInitOnce = U_INITONCE_INITIALIZER; +}; +Inclusion gInclusions[NUM_INCLUSIONS]; // cached getInclusions() + +UnicodeSet *sets[UCHAR_BINARY_LIMIT] = {}; + +UCPMap *maps[UCHAR_INT_LIMIT - UCHAR_INT_START] = {}; + +icu::UMutex cpMutex; + +//---------------------------------------------------------------- +// Inclusions list +//---------------------------------------------------------------- + +// USetAdder implementation +// Does not use uset.h to reduce code dependencies +void U_CALLCONV +_set_add(USet *set, UChar32 c) { + ((UnicodeSet *)set)->add(c); +} + +void U_CALLCONV +_set_addRange(USet *set, UChar32 start, UChar32 end) { + ((UnicodeSet *)set)->add(start, end); +} + +void U_CALLCONV +_set_addString(USet *set, const UChar *str, int32_t length) { + ((UnicodeSet *)set)->add(icu::UnicodeString((UBool)(length<0), str, length)); +} + +UBool U_CALLCONV characterproperties_cleanup() { + for (Inclusion &in: gInclusions) { + delete in.fSet; + in.fSet = nullptr; + in.fInitOnce.reset(); + } + for (int32_t i = 0; i < UPRV_LENGTHOF(sets); ++i) { + delete sets[i]; + sets[i] = nullptr; + } + for (int32_t i = 0; i < UPRV_LENGTHOF(maps); ++i) { + ucptrie_close(reinterpret_cast<UCPTrie *>(maps[i])); + maps[i] = nullptr; + } + return TRUE; +} + +void U_CALLCONV initInclusion(UPropertySource src, UErrorCode &errorCode) { + // This function is invoked only via umtx_initOnce(). + U_ASSERT(0 <= src && src < UPROPS_SRC_COUNT); + if (src == UPROPS_SRC_NONE) { + errorCode = U_INTERNAL_PROGRAM_ERROR; + return; + } + U_ASSERT(gInclusions[src].fSet == nullptr); + + LocalPointer<UnicodeSet> incl(new UnicodeSet()); + if (incl.isNull()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + USetAdder sa = { + (USet *)incl.getAlias(), + _set_add, + _set_addRange, + _set_addString, + nullptr, // don't need remove() + nullptr // don't need removeRange() + }; + + switch(src) { + case UPROPS_SRC_CHAR: + uchar_addPropertyStarts(&sa, &errorCode); + break; + case UPROPS_SRC_PROPSVEC: + upropsvec_addPropertyStarts(&sa, &errorCode); + break; + case UPROPS_SRC_CHAR_AND_PROPSVEC: + uchar_addPropertyStarts(&sa, &errorCode); + upropsvec_addPropertyStarts(&sa, &errorCode); + break; +#if !UCONFIG_NO_NORMALIZATION + case UPROPS_SRC_CASE_AND_NORM: { + const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); + if(U_SUCCESS(errorCode)) { + impl->addPropertyStarts(&sa, errorCode); + } + ucase_addPropertyStarts(&sa, &errorCode); + break; + } + case UPROPS_SRC_NFC: { + const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); + if(U_SUCCESS(errorCode)) { + impl->addPropertyStarts(&sa, errorCode); + } + break; + } + case UPROPS_SRC_NFKC: { + const Normalizer2Impl *impl=Normalizer2Factory::getNFKCImpl(errorCode); + if(U_SUCCESS(errorCode)) { + impl->addPropertyStarts(&sa, errorCode); + } + break; + } + case UPROPS_SRC_NFKC_CF: { + const Normalizer2Impl *impl=Normalizer2Factory::getNFKC_CFImpl(errorCode); + if(U_SUCCESS(errorCode)) { + impl->addPropertyStarts(&sa, errorCode); + } + break; + } + case UPROPS_SRC_NFC_CANON_ITER: { + const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); + if(U_SUCCESS(errorCode)) { + impl->addCanonIterPropertyStarts(&sa, errorCode); + } + break; + } +#endif + case UPROPS_SRC_CASE: + ucase_addPropertyStarts(&sa, &errorCode); + break; + case UPROPS_SRC_BIDI: + ubidi_addPropertyStarts(&sa, &errorCode); + break; + case UPROPS_SRC_INPC: + case UPROPS_SRC_INSC: + case UPROPS_SRC_VO: + uprops_addPropertyStarts((UPropertySource)src, &sa, &errorCode); + break; + default: + errorCode = U_INTERNAL_PROGRAM_ERROR; + break; + } + + if (U_FAILURE(errorCode)) { + return; + } + if (incl->isBogus()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + // Compact for caching. + incl->compact(); + gInclusions[src].fSet = incl.orphan(); + ucln_common_registerCleanup(UCLN_COMMON_CHARACTERPROPERTIES, characterproperties_cleanup); +} + +const UnicodeSet *getInclusionsForSource(UPropertySource src, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return nullptr; } + if (src < 0 || UPROPS_SRC_COUNT <= src) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + Inclusion &i = gInclusions[src]; + umtx_initOnce(i.fInitOnce, &initInclusion, src, errorCode); + return i.fSet; +} + +void U_CALLCONV initIntPropInclusion(UProperty prop, UErrorCode &errorCode) { + // This function is invoked only via umtx_initOnce(). + U_ASSERT(UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT); + int32_t inclIndex = UPROPS_SRC_COUNT + prop - UCHAR_INT_START; + U_ASSERT(gInclusions[inclIndex].fSet == nullptr); + UPropertySource src = uprops_getSource(prop); + const UnicodeSet *incl = getInclusionsForSource(src, errorCode); + if (U_FAILURE(errorCode)) { + return; + } + + LocalPointer<UnicodeSet> intPropIncl(new UnicodeSet(0, 0)); + if (intPropIncl.isNull()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + int32_t numRanges = incl->getRangeCount(); + int32_t prevValue = 0; + for (int32_t i = 0; i < numRanges; ++i) { + UChar32 rangeEnd = incl->getRangeEnd(i); + for (UChar32 c = incl->getRangeStart(i); c <= rangeEnd; ++c) { + // TODO: Get a UCharacterProperty.IntProperty to avoid the property dispatch. + int32_t value = u_getIntPropertyValue(c, prop); + if (value != prevValue) { + intPropIncl->add(c); + prevValue = value; + } + } + } + + if (intPropIncl->isBogus()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + // Compact for caching. + intPropIncl->compact(); + gInclusions[inclIndex].fSet = intPropIncl.orphan(); + ucln_common_registerCleanup(UCLN_COMMON_CHARACTERPROPERTIES, characterproperties_cleanup); +} + +} // namespace + +U_NAMESPACE_BEGIN + +const UnicodeSet *CharacterProperties::getInclusionsForProperty( + UProperty prop, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return nullptr; } + if (UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT) { + int32_t inclIndex = UPROPS_SRC_COUNT + prop - UCHAR_INT_START; + Inclusion &i = gInclusions[inclIndex]; + umtx_initOnce(i.fInitOnce, &initIntPropInclusion, prop, errorCode); + return i.fSet; + } else { + UPropertySource src = uprops_getSource(prop); + return getInclusionsForSource(src, errorCode); + } +} + +U_NAMESPACE_END + +namespace { + +UnicodeSet *makeSet(UProperty property, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return nullptr; } + LocalPointer<UnicodeSet> set(new UnicodeSet()); + if (set.isNull()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + const UnicodeSet *inclusions = + icu::CharacterProperties::getInclusionsForProperty(property, errorCode); + if (U_FAILURE(errorCode)) { return nullptr; } + int32_t numRanges = inclusions->getRangeCount(); + UChar32 startHasProperty = -1; + + for (int32_t i = 0; i < numRanges; ++i) { + UChar32 rangeEnd = inclusions->getRangeEnd(i); + for (UChar32 c = inclusions->getRangeStart(i); c <= rangeEnd; ++c) { + // TODO: Get a UCharacterProperty.BinaryProperty to avoid the property dispatch. + if (u_hasBinaryProperty(c, property)) { + if (startHasProperty < 0) { + // Transition from false to true. + startHasProperty = c; + } + } else if (startHasProperty >= 0) { + // Transition from true to false. + set->add(startHasProperty, c - 1); + startHasProperty = -1; + } + } + } + if (startHasProperty >= 0) { + set->add(startHasProperty, 0x10FFFF); + } + set->freeze(); + return set.orphan(); +} + +UCPMap *makeMap(UProperty property, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return nullptr; } + uint32_t nullValue = property == UCHAR_SCRIPT ? USCRIPT_UNKNOWN : 0; + icu::LocalUMutableCPTriePointer mutableTrie( + umutablecptrie_open(nullValue, nullValue, &errorCode)); + const UnicodeSet *inclusions = + icu::CharacterProperties::getInclusionsForProperty(property, errorCode); + if (U_FAILURE(errorCode)) { return nullptr; } + int32_t numRanges = inclusions->getRangeCount(); + UChar32 start = 0; + uint32_t value = nullValue; + + for (int32_t i = 0; i < numRanges; ++i) { + UChar32 rangeEnd = inclusions->getRangeEnd(i); + for (UChar32 c = inclusions->getRangeStart(i); c <= rangeEnd; ++c) { + // TODO: Get a UCharacterProperty.IntProperty to avoid the property dispatch. + uint32_t nextValue = u_getIntPropertyValue(c, property); + if (value != nextValue) { + if (value != nullValue) { + umutablecptrie_setRange(mutableTrie.getAlias(), start, c - 1, value, &errorCode); + } + start = c; + value = nextValue; + } + } + } + if (value != 0) { + umutablecptrie_setRange(mutableTrie.getAlias(), start, 0x10FFFF, value, &errorCode); + } + + UCPTrieType type; + if (property == UCHAR_BIDI_CLASS || property == UCHAR_GENERAL_CATEGORY) { + type = UCPTRIE_TYPE_FAST; + } else { + type = UCPTRIE_TYPE_SMALL; + } + UCPTrieValueWidth valueWidth; + // TODO: UCharacterProperty.IntProperty + int32_t max = u_getIntPropertyMaxValue(property); + if (max <= 0xff) { + valueWidth = UCPTRIE_VALUE_BITS_8; + } else if (max <= 0xffff) { + valueWidth = UCPTRIE_VALUE_BITS_16; + } else { + valueWidth = UCPTRIE_VALUE_BITS_32; + } + return reinterpret_cast<UCPMap *>( + umutablecptrie_buildImmutable(mutableTrie.getAlias(), type, valueWidth, &errorCode)); +} + +} // namespace + +U_NAMESPACE_USE + +U_CAPI const USet * U_EXPORT2 +u_getBinaryPropertySet(UProperty property, UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { return nullptr; } + if (property < 0 || UCHAR_BINARY_LIMIT <= property) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + Mutex m(&cpMutex); + UnicodeSet *set = sets[property]; + if (set == nullptr) { + sets[property] = set = makeSet(property, *pErrorCode); + } + if (U_FAILURE(*pErrorCode)) { return nullptr; } + return set->toUSet(); +} + +U_CAPI const UCPMap * U_EXPORT2 +u_getIntPropertyMap(UProperty property, UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { return nullptr; } + if (property < UCHAR_INT_START || UCHAR_INT_LIMIT <= property) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + Mutex m(&cpMutex); + UCPMap *map = maps[property - UCHAR_INT_START]; + if (map == nullptr) { + maps[property - UCHAR_INT_START] = map = makeMap(property, *pErrorCode); + } + return map; +} |