diff options
author | mcheshkov <mcheshkov@yandex-team.ru> | 2022-02-10 16:46:16 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:16 +0300 |
commit | 1312621288956f199a5bd5342b0133d4395fa725 (patch) | |
tree | 1a2c5ffcf89eb53ecd79dbc9bc0a195c27404d0c /contrib/libs/icu/common/static_unicode_sets.cpp | |
parent | e9d19cec64684c9c1e6b0c98297e5b895cf904fe (diff) | |
download | ydb-1312621288956f199a5bd5342b0133d4395fa725.tar.gz |
Restoring authorship annotation for <mcheshkov@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/icu/common/static_unicode_sets.cpp')
-rw-r--r-- | contrib/libs/icu/common/static_unicode_sets.cpp | 490 |
1 files changed, 245 insertions, 245 deletions
diff --git a/contrib/libs/icu/common/static_unicode_sets.cpp b/contrib/libs/icu/common/static_unicode_sets.cpp index 94bf08dc96..5dab3931a7 100644 --- a/contrib/libs/icu/common/static_unicode_sets.cpp +++ b/contrib/libs/icu/common/static_unicode_sets.cpp @@ -1,245 +1,245 @@ -// © 2018 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_FORMATTING - -// Allow implicit conversion from char16_t* to UnicodeString for this file: -// Helpful in toString methods and elsewhere. -#define UNISTR_FROM_STRING_EXPLICIT - -#include "static_unicode_sets.h" -#include "umutex.h" -#include "ucln_cmn.h" -#include "unicode/uniset.h" -#include "uresimp.h" -#include "cstring.h" -#include "uassert.h" - -using namespace icu; -using namespace icu::unisets; - - -namespace { - -UnicodeSet* gUnicodeSets[UNISETS_KEY_COUNT] = {}; - -// Save the empty instance in static memory to have well-defined behavior if a -// regular UnicodeSet cannot be allocated. -alignas(UnicodeSet) -char gEmptyUnicodeSet[sizeof(UnicodeSet)]; - -// Whether the gEmptyUnicodeSet is initialized and ready to use. -UBool gEmptyUnicodeSetInitialized = FALSE; - -inline UnicodeSet* getImpl(Key key) { - UnicodeSet* candidate = gUnicodeSets[key]; - if (candidate == nullptr) { - return reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet); - } - return candidate; -} - -UnicodeSet* computeUnion(Key k1, Key k2) { - UnicodeSet* result = new UnicodeSet(); - if (result == nullptr) { - return nullptr; - } - result->addAll(*getImpl(k1)); - result->addAll(*getImpl(k2)); - result->freeze(); - return result; -} - -UnicodeSet* computeUnion(Key k1, Key k2, Key k3) { - UnicodeSet* result = new UnicodeSet(); - if (result == nullptr) { - return nullptr; - } - result->addAll(*getImpl(k1)); - result->addAll(*getImpl(k2)); - result->addAll(*getImpl(k3)); - result->freeze(); - return result; -} - - -void saveSet(Key key, const UnicodeString& unicodeSetPattern, UErrorCode& status) { - // assert unicodeSets.get(key) == null; - gUnicodeSets[key] = new UnicodeSet(unicodeSetPattern, status); -} - -class ParseDataSink : public ResourceSink { - public: - void put(const char* key, ResourceValue& value, UBool /*noFallback*/, UErrorCode& status) U_OVERRIDE { - ResourceTable contextsTable = value.getTable(status); - if (U_FAILURE(status)) { return; } - for (int i = 0; contextsTable.getKeyAndValue(i, key, value); i++) { - if (uprv_strcmp(key, "date") == 0) { - // ignore - } else { - ResourceTable strictnessTable = value.getTable(status); - if (U_FAILURE(status)) { return; } - for (int j = 0; strictnessTable.getKeyAndValue(j, key, value); j++) { - bool isLenient = (uprv_strcmp(key, "lenient") == 0); - ResourceArray array = value.getArray(status); - if (U_FAILURE(status)) { return; } - for (int k = 0; k < array.getSize(); k++) { - array.getValue(k, value); - UnicodeString str = value.getUnicodeString(status); - if (U_FAILURE(status)) { return; } - // There is both lenient and strict data for comma/period, - // but not for any of the other symbols. - if (str.indexOf(u'.') != -1) { - saveSet(isLenient ? PERIOD : STRICT_PERIOD, str, status); - } else if (str.indexOf(u',') != -1) { - saveSet(isLenient ? COMMA : STRICT_COMMA, str, status); - } else if (str.indexOf(u'+') != -1) { - saveSet(PLUS_SIGN, str, status); - } else if (str.indexOf(u'-') != -1) { - saveSet(MINUS_SIGN, str, status); - } else if (str.indexOf(u'$') != -1) { - saveSet(DOLLAR_SIGN, str, status); - } else if (str.indexOf(u'£') != -1) { - saveSet(POUND_SIGN, str, status); - } else if (str.indexOf(u'₹') != -1) { - saveSet(RUPEE_SIGN, str, status); - } else if (str.indexOf(u'¥') != -1) { - saveSet(YEN_SIGN, str, status); - } else if (str.indexOf(u'₩') != -1) { - saveSet(WON_SIGN, str, status); - } else if (str.indexOf(u'%') != -1) { - saveSet(PERCENT_SIGN, str, status); - } else if (str.indexOf(u'‰') != -1) { - saveSet(PERMILLE_SIGN, str, status); - } else if (str.indexOf(u'’') != -1) { - saveSet(APOSTROPHE_SIGN, str, status); - } else { - // Unknown class of parse lenients - // TODO(ICU-20428): Make ICU automatically accept new classes? - U_ASSERT(FALSE); - } - if (U_FAILURE(status)) { return; } - } - } - } - } - } -}; - - -icu::UInitOnce gNumberParseUniSetsInitOnce = U_INITONCE_INITIALIZER; - -UBool U_CALLCONV cleanupNumberParseUniSets() { - if (gEmptyUnicodeSetInitialized) { - reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet)->~UnicodeSet(); - gEmptyUnicodeSetInitialized = FALSE; - } - for (int32_t i = 0; i < UNISETS_KEY_COUNT; i++) { - delete gUnicodeSets[i]; - gUnicodeSets[i] = nullptr; - } - gNumberParseUniSetsInitOnce.reset(); - return TRUE; -} - -void U_CALLCONV initNumberParseUniSets(UErrorCode& status) { - ucln_common_registerCleanup(UCLN_COMMON_NUMPARSE_UNISETS, cleanupNumberParseUniSets); - - // Initialize the empty instance for well-defined fallback behavior - new(gEmptyUnicodeSet) UnicodeSet(); - reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet)->freeze(); - gEmptyUnicodeSetInitialized = TRUE; - - // These sets were decided after discussion with icu-design@. See tickets #13084 and #13309. - // Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property). - gUnicodeSets[DEFAULT_IGNORABLES] = new UnicodeSet( - u"[[:Zs:][\\u0009][:Bidi_Control:][:Variation_Selector:]]", status); - gUnicodeSets[STRICT_IGNORABLES] = new UnicodeSet(u"[[:Bidi_Control:]]", status); - - LocalUResourceBundlePointer rb(ures_open(nullptr, "root", &status)); - if (U_FAILURE(status)) { return; } - ParseDataSink sink; - ures_getAllItemsWithFallback(rb.getAlias(), "parse", sink, status); - if (U_FAILURE(status)) { return; } - - // NOTE: It is OK for these assertions to fail if there was a no-data build. - U_ASSERT(gUnicodeSets[COMMA] != nullptr); - U_ASSERT(gUnicodeSets[STRICT_COMMA] != nullptr); - U_ASSERT(gUnicodeSets[PERIOD] != nullptr); - U_ASSERT(gUnicodeSets[STRICT_PERIOD] != nullptr); - U_ASSERT(gUnicodeSets[APOSTROPHE_SIGN] != nullptr); - - LocalPointer<UnicodeSet> otherGrouping(new UnicodeSet( - u"[٬‘\\u0020\\u00A0\\u2000-\\u200A\\u202F\\u205F\\u3000]", - status - ), status); - if (U_FAILURE(status)) { return; } - otherGrouping->addAll(*gUnicodeSets[APOSTROPHE_SIGN]); - gUnicodeSets[OTHER_GROUPING_SEPARATORS] = otherGrouping.orphan(); - gUnicodeSets[ALL_SEPARATORS] = computeUnion(COMMA, PERIOD, OTHER_GROUPING_SEPARATORS); - gUnicodeSets[STRICT_ALL_SEPARATORS] = computeUnion( - STRICT_COMMA, STRICT_PERIOD, OTHER_GROUPING_SEPARATORS); - - U_ASSERT(gUnicodeSets[MINUS_SIGN] != nullptr); - U_ASSERT(gUnicodeSets[PLUS_SIGN] != nullptr); - U_ASSERT(gUnicodeSets[PERCENT_SIGN] != nullptr); - U_ASSERT(gUnicodeSets[PERMILLE_SIGN] != nullptr); - - gUnicodeSets[INFINITY_SIGN] = new UnicodeSet(u"[∞]", status); - if (U_FAILURE(status)) { return; } - - U_ASSERT(gUnicodeSets[DOLLAR_SIGN] != nullptr); - U_ASSERT(gUnicodeSets[POUND_SIGN] != nullptr); - U_ASSERT(gUnicodeSets[RUPEE_SIGN] != nullptr); - U_ASSERT(gUnicodeSets[YEN_SIGN] != nullptr); - U_ASSERT(gUnicodeSets[WON_SIGN] != nullptr); - - gUnicodeSets[DIGITS] = new UnicodeSet(u"[:digit:]", status); - if (U_FAILURE(status)) { return; } - gUnicodeSets[DIGITS_OR_ALL_SEPARATORS] = computeUnion(DIGITS, ALL_SEPARATORS); - gUnicodeSets[DIGITS_OR_STRICT_ALL_SEPARATORS] = computeUnion(DIGITS, STRICT_ALL_SEPARATORS); - - for (auto* uniset : gUnicodeSets) { - if (uniset != nullptr) { - uniset->freeze(); - } - } -} - -} - -const UnicodeSet* unisets::get(Key key) { - UErrorCode localStatus = U_ZERO_ERROR; - umtx_initOnce(gNumberParseUniSetsInitOnce, &initNumberParseUniSets, localStatus); - if (U_FAILURE(localStatus)) { - return reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet); - } - return getImpl(key); -} - -Key unisets::chooseFrom(UnicodeString str, Key key1) { - return get(key1)->contains(str) ? key1 : NONE; -} - -Key unisets::chooseFrom(UnicodeString str, Key key1, Key key2) { - return get(key1)->contains(str) ? key1 : chooseFrom(str, key2); -} - -//Key unisets::chooseCurrency(UnicodeString str) { -// if (get(DOLLAR_SIGN)->contains(str)) { -// return DOLLAR_SIGN; -// } else if (get(POUND_SIGN)->contains(str)) { -// return POUND_SIGN; -// } else if (get(RUPEE_SIGN)->contains(str)) { -// return RUPEE_SIGN; -// } else if (get(YEN_SIGN)->contains(str)) { -// return YEN_SIGN; -// } else { -// return NONE; -// } -//} - - -#endif /* #if !UCONFIG_NO_FORMATTING */ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +// Allow implicit conversion from char16_t* to UnicodeString for this file: +// Helpful in toString methods and elsewhere. +#define UNISTR_FROM_STRING_EXPLICIT + +#include "static_unicode_sets.h" +#include "umutex.h" +#include "ucln_cmn.h" +#include "unicode/uniset.h" +#include "uresimp.h" +#include "cstring.h" +#include "uassert.h" + +using namespace icu; +using namespace icu::unisets; + + +namespace { + +UnicodeSet* gUnicodeSets[UNISETS_KEY_COUNT] = {}; + +// Save the empty instance in static memory to have well-defined behavior if a +// regular UnicodeSet cannot be allocated. +alignas(UnicodeSet) +char gEmptyUnicodeSet[sizeof(UnicodeSet)]; + +// Whether the gEmptyUnicodeSet is initialized and ready to use. +UBool gEmptyUnicodeSetInitialized = FALSE; + +inline UnicodeSet* getImpl(Key key) { + UnicodeSet* candidate = gUnicodeSets[key]; + if (candidate == nullptr) { + return reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet); + } + return candidate; +} + +UnicodeSet* computeUnion(Key k1, Key k2) { + UnicodeSet* result = new UnicodeSet(); + if (result == nullptr) { + return nullptr; + } + result->addAll(*getImpl(k1)); + result->addAll(*getImpl(k2)); + result->freeze(); + return result; +} + +UnicodeSet* computeUnion(Key k1, Key k2, Key k3) { + UnicodeSet* result = new UnicodeSet(); + if (result == nullptr) { + return nullptr; + } + result->addAll(*getImpl(k1)); + result->addAll(*getImpl(k2)); + result->addAll(*getImpl(k3)); + result->freeze(); + return result; +} + + +void saveSet(Key key, const UnicodeString& unicodeSetPattern, UErrorCode& status) { + // assert unicodeSets.get(key) == null; + gUnicodeSets[key] = new UnicodeSet(unicodeSetPattern, status); +} + +class ParseDataSink : public ResourceSink { + public: + void put(const char* key, ResourceValue& value, UBool /*noFallback*/, UErrorCode& status) U_OVERRIDE { + ResourceTable contextsTable = value.getTable(status); + if (U_FAILURE(status)) { return; } + for (int i = 0; contextsTable.getKeyAndValue(i, key, value); i++) { + if (uprv_strcmp(key, "date") == 0) { + // ignore + } else { + ResourceTable strictnessTable = value.getTable(status); + if (U_FAILURE(status)) { return; } + for (int j = 0; strictnessTable.getKeyAndValue(j, key, value); j++) { + bool isLenient = (uprv_strcmp(key, "lenient") == 0); + ResourceArray array = value.getArray(status); + if (U_FAILURE(status)) { return; } + for (int k = 0; k < array.getSize(); k++) { + array.getValue(k, value); + UnicodeString str = value.getUnicodeString(status); + if (U_FAILURE(status)) { return; } + // There is both lenient and strict data for comma/period, + // but not for any of the other symbols. + if (str.indexOf(u'.') != -1) { + saveSet(isLenient ? PERIOD : STRICT_PERIOD, str, status); + } else if (str.indexOf(u',') != -1) { + saveSet(isLenient ? COMMA : STRICT_COMMA, str, status); + } else if (str.indexOf(u'+') != -1) { + saveSet(PLUS_SIGN, str, status); + } else if (str.indexOf(u'-') != -1) { + saveSet(MINUS_SIGN, str, status); + } else if (str.indexOf(u'$') != -1) { + saveSet(DOLLAR_SIGN, str, status); + } else if (str.indexOf(u'£') != -1) { + saveSet(POUND_SIGN, str, status); + } else if (str.indexOf(u'₹') != -1) { + saveSet(RUPEE_SIGN, str, status); + } else if (str.indexOf(u'¥') != -1) { + saveSet(YEN_SIGN, str, status); + } else if (str.indexOf(u'₩') != -1) { + saveSet(WON_SIGN, str, status); + } else if (str.indexOf(u'%') != -1) { + saveSet(PERCENT_SIGN, str, status); + } else if (str.indexOf(u'‰') != -1) { + saveSet(PERMILLE_SIGN, str, status); + } else if (str.indexOf(u'’') != -1) { + saveSet(APOSTROPHE_SIGN, str, status); + } else { + // Unknown class of parse lenients + // TODO(ICU-20428): Make ICU automatically accept new classes? + U_ASSERT(FALSE); + } + if (U_FAILURE(status)) { return; } + } + } + } + } + } +}; + + +icu::UInitOnce gNumberParseUniSetsInitOnce = U_INITONCE_INITIALIZER; + +UBool U_CALLCONV cleanupNumberParseUniSets() { + if (gEmptyUnicodeSetInitialized) { + reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet)->~UnicodeSet(); + gEmptyUnicodeSetInitialized = FALSE; + } + for (int32_t i = 0; i < UNISETS_KEY_COUNT; i++) { + delete gUnicodeSets[i]; + gUnicodeSets[i] = nullptr; + } + gNumberParseUniSetsInitOnce.reset(); + return TRUE; +} + +void U_CALLCONV initNumberParseUniSets(UErrorCode& status) { + ucln_common_registerCleanup(UCLN_COMMON_NUMPARSE_UNISETS, cleanupNumberParseUniSets); + + // Initialize the empty instance for well-defined fallback behavior + new(gEmptyUnicodeSet) UnicodeSet(); + reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet)->freeze(); + gEmptyUnicodeSetInitialized = TRUE; + + // These sets were decided after discussion with icu-design@. See tickets #13084 and #13309. + // Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property). + gUnicodeSets[DEFAULT_IGNORABLES] = new UnicodeSet( + u"[[:Zs:][\\u0009][:Bidi_Control:][:Variation_Selector:]]", status); + gUnicodeSets[STRICT_IGNORABLES] = new UnicodeSet(u"[[:Bidi_Control:]]", status); + + LocalUResourceBundlePointer rb(ures_open(nullptr, "root", &status)); + if (U_FAILURE(status)) { return; } + ParseDataSink sink; + ures_getAllItemsWithFallback(rb.getAlias(), "parse", sink, status); + if (U_FAILURE(status)) { return; } + + // NOTE: It is OK for these assertions to fail if there was a no-data build. + U_ASSERT(gUnicodeSets[COMMA] != nullptr); + U_ASSERT(gUnicodeSets[STRICT_COMMA] != nullptr); + U_ASSERT(gUnicodeSets[PERIOD] != nullptr); + U_ASSERT(gUnicodeSets[STRICT_PERIOD] != nullptr); + U_ASSERT(gUnicodeSets[APOSTROPHE_SIGN] != nullptr); + + LocalPointer<UnicodeSet> otherGrouping(new UnicodeSet( + u"[٬‘\\u0020\\u00A0\\u2000-\\u200A\\u202F\\u205F\\u3000]", + status + ), status); + if (U_FAILURE(status)) { return; } + otherGrouping->addAll(*gUnicodeSets[APOSTROPHE_SIGN]); + gUnicodeSets[OTHER_GROUPING_SEPARATORS] = otherGrouping.orphan(); + gUnicodeSets[ALL_SEPARATORS] = computeUnion(COMMA, PERIOD, OTHER_GROUPING_SEPARATORS); + gUnicodeSets[STRICT_ALL_SEPARATORS] = computeUnion( + STRICT_COMMA, STRICT_PERIOD, OTHER_GROUPING_SEPARATORS); + + U_ASSERT(gUnicodeSets[MINUS_SIGN] != nullptr); + U_ASSERT(gUnicodeSets[PLUS_SIGN] != nullptr); + U_ASSERT(gUnicodeSets[PERCENT_SIGN] != nullptr); + U_ASSERT(gUnicodeSets[PERMILLE_SIGN] != nullptr); + + gUnicodeSets[INFINITY_SIGN] = new UnicodeSet(u"[∞]", status); + if (U_FAILURE(status)) { return; } + + U_ASSERT(gUnicodeSets[DOLLAR_SIGN] != nullptr); + U_ASSERT(gUnicodeSets[POUND_SIGN] != nullptr); + U_ASSERT(gUnicodeSets[RUPEE_SIGN] != nullptr); + U_ASSERT(gUnicodeSets[YEN_SIGN] != nullptr); + U_ASSERT(gUnicodeSets[WON_SIGN] != nullptr); + + gUnicodeSets[DIGITS] = new UnicodeSet(u"[:digit:]", status); + if (U_FAILURE(status)) { return; } + gUnicodeSets[DIGITS_OR_ALL_SEPARATORS] = computeUnion(DIGITS, ALL_SEPARATORS); + gUnicodeSets[DIGITS_OR_STRICT_ALL_SEPARATORS] = computeUnion(DIGITS, STRICT_ALL_SEPARATORS); + + for (auto* uniset : gUnicodeSets) { + if (uniset != nullptr) { + uniset->freeze(); + } + } +} + +} + +const UnicodeSet* unisets::get(Key key) { + UErrorCode localStatus = U_ZERO_ERROR; + umtx_initOnce(gNumberParseUniSetsInitOnce, &initNumberParseUniSets, localStatus); + if (U_FAILURE(localStatus)) { + return reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet); + } + return getImpl(key); +} + +Key unisets::chooseFrom(UnicodeString str, Key key1) { + return get(key1)->contains(str) ? key1 : NONE; +} + +Key unisets::chooseFrom(UnicodeString str, Key key1, Key key2) { + return get(key1)->contains(str) ? key1 : chooseFrom(str, key2); +} + +//Key unisets::chooseCurrency(UnicodeString str) { +// if (get(DOLLAR_SIGN)->contains(str)) { +// return DOLLAR_SIGN; +// } else if (get(POUND_SIGN)->contains(str)) { +// return POUND_SIGN; +// } else if (get(RUPEE_SIGN)->contains(str)) { +// return RUPEE_SIGN; +// } else if (get(YEN_SIGN)->contains(str)) { +// return YEN_SIGN; +// } else { +// return NONE; +// } +//} + + +#endif /* #if !UCONFIG_NO_FORMATTING */ |