aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/icu/common/static_unicode_sets.cpp
diff options
context:
space:
mode:
authormcheshkov <mcheshkov@yandex-team.ru>2022-02-10 16:46:16 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:46:16 +0300
commit1312621288956f199a5bd5342b0133d4395fa725 (patch)
tree1a2c5ffcf89eb53ecd79dbc9bc0a195c27404d0c /contrib/libs/icu/common/static_unicode_sets.cpp
parente9d19cec64684c9c1e6b0c98297e5b895cf904fe (diff)
downloadydb-1312621288956f199a5bd5342b0133d4395fa725.tar.gz
Restoring authorship annotation for <mcheshkov@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/icu/common/static_unicode_sets.cpp')
-rw-r--r--contrib/libs/icu/common/static_unicode_sets.cpp490
1 files changed, 245 insertions, 245 deletions
diff --git a/contrib/libs/icu/common/static_unicode_sets.cpp b/contrib/libs/icu/common/static_unicode_sets.cpp
index 94bf08dc96..5dab3931a7 100644
--- a/contrib/libs/icu/common/static_unicode_sets.cpp
+++ b/contrib/libs/icu/common/static_unicode_sets.cpp
@@ -1,245 +1,245 @@
-// © 2018 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_FORMATTING
-
-// Allow implicit conversion from char16_t* to UnicodeString for this file:
-// Helpful in toString methods and elsewhere.
-#define UNISTR_FROM_STRING_EXPLICIT
-
-#include "static_unicode_sets.h"
-#include "umutex.h"
-#include "ucln_cmn.h"
-#include "unicode/uniset.h"
-#include "uresimp.h"
-#include "cstring.h"
-#include "uassert.h"
-
-using namespace icu;
-using namespace icu::unisets;
-
-
-namespace {
-
-UnicodeSet* gUnicodeSets[UNISETS_KEY_COUNT] = {};
-
-// Save the empty instance in static memory to have well-defined behavior if a
-// regular UnicodeSet cannot be allocated.
-alignas(UnicodeSet)
-char gEmptyUnicodeSet[sizeof(UnicodeSet)];
-
-// Whether the gEmptyUnicodeSet is initialized and ready to use.
-UBool gEmptyUnicodeSetInitialized = FALSE;
-
-inline UnicodeSet* getImpl(Key key) {
- UnicodeSet* candidate = gUnicodeSets[key];
- if (candidate == nullptr) {
- return reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet);
- }
- return candidate;
-}
-
-UnicodeSet* computeUnion(Key k1, Key k2) {
- UnicodeSet* result = new UnicodeSet();
- if (result == nullptr) {
- return nullptr;
- }
- result->addAll(*getImpl(k1));
- result->addAll(*getImpl(k2));
- result->freeze();
- return result;
-}
-
-UnicodeSet* computeUnion(Key k1, Key k2, Key k3) {
- UnicodeSet* result = new UnicodeSet();
- if (result == nullptr) {
- return nullptr;
- }
- result->addAll(*getImpl(k1));
- result->addAll(*getImpl(k2));
- result->addAll(*getImpl(k3));
- result->freeze();
- return result;
-}
-
-
-void saveSet(Key key, const UnicodeString& unicodeSetPattern, UErrorCode& status) {
- // assert unicodeSets.get(key) == null;
- gUnicodeSets[key] = new UnicodeSet(unicodeSetPattern, status);
-}
-
-class ParseDataSink : public ResourceSink {
- public:
- void put(const char* key, ResourceValue& value, UBool /*noFallback*/, UErrorCode& status) U_OVERRIDE {
- ResourceTable contextsTable = value.getTable(status);
- if (U_FAILURE(status)) { return; }
- for (int i = 0; contextsTable.getKeyAndValue(i, key, value); i++) {
- if (uprv_strcmp(key, "date") == 0) {
- // ignore
- } else {
- ResourceTable strictnessTable = value.getTable(status);
- if (U_FAILURE(status)) { return; }
- for (int j = 0; strictnessTable.getKeyAndValue(j, key, value); j++) {
- bool isLenient = (uprv_strcmp(key, "lenient") == 0);
- ResourceArray array = value.getArray(status);
- if (U_FAILURE(status)) { return; }
- for (int k = 0; k < array.getSize(); k++) {
- array.getValue(k, value);
- UnicodeString str = value.getUnicodeString(status);
- if (U_FAILURE(status)) { return; }
- // There is both lenient and strict data for comma/period,
- // but not for any of the other symbols.
- if (str.indexOf(u'.') != -1) {
- saveSet(isLenient ? PERIOD : STRICT_PERIOD, str, status);
- } else if (str.indexOf(u',') != -1) {
- saveSet(isLenient ? COMMA : STRICT_COMMA, str, status);
- } else if (str.indexOf(u'+') != -1) {
- saveSet(PLUS_SIGN, str, status);
- } else if (str.indexOf(u'-') != -1) {
- saveSet(MINUS_SIGN, str, status);
- } else if (str.indexOf(u'$') != -1) {
- saveSet(DOLLAR_SIGN, str, status);
- } else if (str.indexOf(u'£') != -1) {
- saveSet(POUND_SIGN, str, status);
- } else if (str.indexOf(u'₹') != -1) {
- saveSet(RUPEE_SIGN, str, status);
- } else if (str.indexOf(u'¥') != -1) {
- saveSet(YEN_SIGN, str, status);
- } else if (str.indexOf(u'₩') != -1) {
- saveSet(WON_SIGN, str, status);
- } else if (str.indexOf(u'%') != -1) {
- saveSet(PERCENT_SIGN, str, status);
- } else if (str.indexOf(u'‰') != -1) {
- saveSet(PERMILLE_SIGN, str, status);
- } else if (str.indexOf(u'’') != -1) {
- saveSet(APOSTROPHE_SIGN, str, status);
- } else {
- // Unknown class of parse lenients
- // TODO(ICU-20428): Make ICU automatically accept new classes?
- U_ASSERT(FALSE);
- }
- if (U_FAILURE(status)) { return; }
- }
- }
- }
- }
- }
-};
-
-
-icu::UInitOnce gNumberParseUniSetsInitOnce = U_INITONCE_INITIALIZER;
-
-UBool U_CALLCONV cleanupNumberParseUniSets() {
- if (gEmptyUnicodeSetInitialized) {
- reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet)->~UnicodeSet();
- gEmptyUnicodeSetInitialized = FALSE;
- }
- for (int32_t i = 0; i < UNISETS_KEY_COUNT; i++) {
- delete gUnicodeSets[i];
- gUnicodeSets[i] = nullptr;
- }
- gNumberParseUniSetsInitOnce.reset();
- return TRUE;
-}
-
-void U_CALLCONV initNumberParseUniSets(UErrorCode& status) {
- ucln_common_registerCleanup(UCLN_COMMON_NUMPARSE_UNISETS, cleanupNumberParseUniSets);
-
- // Initialize the empty instance for well-defined fallback behavior
- new(gEmptyUnicodeSet) UnicodeSet();
- reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet)->freeze();
- gEmptyUnicodeSetInitialized = TRUE;
-
- // These sets were decided after discussion with icu-design@. See tickets #13084 and #13309.
- // Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property).
- gUnicodeSets[DEFAULT_IGNORABLES] = new UnicodeSet(
- u"[[:Zs:][\\u0009][:Bidi_Control:][:Variation_Selector:]]", status);
- gUnicodeSets[STRICT_IGNORABLES] = new UnicodeSet(u"[[:Bidi_Control:]]", status);
-
- LocalUResourceBundlePointer rb(ures_open(nullptr, "root", &status));
- if (U_FAILURE(status)) { return; }
- ParseDataSink sink;
- ures_getAllItemsWithFallback(rb.getAlias(), "parse", sink, status);
- if (U_FAILURE(status)) { return; }
-
- // NOTE: It is OK for these assertions to fail if there was a no-data build.
- U_ASSERT(gUnicodeSets[COMMA] != nullptr);
- U_ASSERT(gUnicodeSets[STRICT_COMMA] != nullptr);
- U_ASSERT(gUnicodeSets[PERIOD] != nullptr);
- U_ASSERT(gUnicodeSets[STRICT_PERIOD] != nullptr);
- U_ASSERT(gUnicodeSets[APOSTROPHE_SIGN] != nullptr);
-
- LocalPointer<UnicodeSet> otherGrouping(new UnicodeSet(
- u"[٬‘\\u0020\\u00A0\\u2000-\\u200A\\u202F\\u205F\\u3000]",
- status
- ), status);
- if (U_FAILURE(status)) { return; }
- otherGrouping->addAll(*gUnicodeSets[APOSTROPHE_SIGN]);
- gUnicodeSets[OTHER_GROUPING_SEPARATORS] = otherGrouping.orphan();
- gUnicodeSets[ALL_SEPARATORS] = computeUnion(COMMA, PERIOD, OTHER_GROUPING_SEPARATORS);
- gUnicodeSets[STRICT_ALL_SEPARATORS] = computeUnion(
- STRICT_COMMA, STRICT_PERIOD, OTHER_GROUPING_SEPARATORS);
-
- U_ASSERT(gUnicodeSets[MINUS_SIGN] != nullptr);
- U_ASSERT(gUnicodeSets[PLUS_SIGN] != nullptr);
- U_ASSERT(gUnicodeSets[PERCENT_SIGN] != nullptr);
- U_ASSERT(gUnicodeSets[PERMILLE_SIGN] != nullptr);
-
- gUnicodeSets[INFINITY_SIGN] = new UnicodeSet(u"[∞]", status);
- if (U_FAILURE(status)) { return; }
-
- U_ASSERT(gUnicodeSets[DOLLAR_SIGN] != nullptr);
- U_ASSERT(gUnicodeSets[POUND_SIGN] != nullptr);
- U_ASSERT(gUnicodeSets[RUPEE_SIGN] != nullptr);
- U_ASSERT(gUnicodeSets[YEN_SIGN] != nullptr);
- U_ASSERT(gUnicodeSets[WON_SIGN] != nullptr);
-
- gUnicodeSets[DIGITS] = new UnicodeSet(u"[:digit:]", status);
- if (U_FAILURE(status)) { return; }
- gUnicodeSets[DIGITS_OR_ALL_SEPARATORS] = computeUnion(DIGITS, ALL_SEPARATORS);
- gUnicodeSets[DIGITS_OR_STRICT_ALL_SEPARATORS] = computeUnion(DIGITS, STRICT_ALL_SEPARATORS);
-
- for (auto* uniset : gUnicodeSets) {
- if (uniset != nullptr) {
- uniset->freeze();
- }
- }
-}
-
-}
-
-const UnicodeSet* unisets::get(Key key) {
- UErrorCode localStatus = U_ZERO_ERROR;
- umtx_initOnce(gNumberParseUniSetsInitOnce, &initNumberParseUniSets, localStatus);
- if (U_FAILURE(localStatus)) {
- return reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet);
- }
- return getImpl(key);
-}
-
-Key unisets::chooseFrom(UnicodeString str, Key key1) {
- return get(key1)->contains(str) ? key1 : NONE;
-}
-
-Key unisets::chooseFrom(UnicodeString str, Key key1, Key key2) {
- return get(key1)->contains(str) ? key1 : chooseFrom(str, key2);
-}
-
-//Key unisets::chooseCurrency(UnicodeString str) {
-// if (get(DOLLAR_SIGN)->contains(str)) {
-// return DOLLAR_SIGN;
-// } else if (get(POUND_SIGN)->contains(str)) {
-// return POUND_SIGN;
-// } else if (get(RUPEE_SIGN)->contains(str)) {
-// return RUPEE_SIGN;
-// } else if (get(YEN_SIGN)->contains(str)) {
-// return YEN_SIGN;
-// } else {
-// return NONE;
-// }
-//}
-
-
-#endif /* #if !UCONFIG_NO_FORMATTING */
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+// Allow implicit conversion from char16_t* to UnicodeString for this file:
+// Helpful in toString methods and elsewhere.
+#define UNISTR_FROM_STRING_EXPLICIT
+
+#include "static_unicode_sets.h"
+#include "umutex.h"
+#include "ucln_cmn.h"
+#include "unicode/uniset.h"
+#include "uresimp.h"
+#include "cstring.h"
+#include "uassert.h"
+
+using namespace icu;
+using namespace icu::unisets;
+
+
+namespace {
+
+UnicodeSet* gUnicodeSets[UNISETS_KEY_COUNT] = {};
+
+// Save the empty instance in static memory to have well-defined behavior if a
+// regular UnicodeSet cannot be allocated.
+alignas(UnicodeSet)
+char gEmptyUnicodeSet[sizeof(UnicodeSet)];
+
+// Whether the gEmptyUnicodeSet is initialized and ready to use.
+UBool gEmptyUnicodeSetInitialized = FALSE;
+
+inline UnicodeSet* getImpl(Key key) {
+ UnicodeSet* candidate = gUnicodeSets[key];
+ if (candidate == nullptr) {
+ return reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet);
+ }
+ return candidate;
+}
+
+UnicodeSet* computeUnion(Key k1, Key k2) {
+ UnicodeSet* result = new UnicodeSet();
+ if (result == nullptr) {
+ return nullptr;
+ }
+ result->addAll(*getImpl(k1));
+ result->addAll(*getImpl(k2));
+ result->freeze();
+ return result;
+}
+
+UnicodeSet* computeUnion(Key k1, Key k2, Key k3) {
+ UnicodeSet* result = new UnicodeSet();
+ if (result == nullptr) {
+ return nullptr;
+ }
+ result->addAll(*getImpl(k1));
+ result->addAll(*getImpl(k2));
+ result->addAll(*getImpl(k3));
+ result->freeze();
+ return result;
+}
+
+
+void saveSet(Key key, const UnicodeString& unicodeSetPattern, UErrorCode& status) {
+ // assert unicodeSets.get(key) == null;
+ gUnicodeSets[key] = new UnicodeSet(unicodeSetPattern, status);
+}
+
+class ParseDataSink : public ResourceSink {
+ public:
+ void put(const char* key, ResourceValue& value, UBool /*noFallback*/, UErrorCode& status) U_OVERRIDE {
+ ResourceTable contextsTable = value.getTable(status);
+ if (U_FAILURE(status)) { return; }
+ for (int i = 0; contextsTable.getKeyAndValue(i, key, value); i++) {
+ if (uprv_strcmp(key, "date") == 0) {
+ // ignore
+ } else {
+ ResourceTable strictnessTable = value.getTable(status);
+ if (U_FAILURE(status)) { return; }
+ for (int j = 0; strictnessTable.getKeyAndValue(j, key, value); j++) {
+ bool isLenient = (uprv_strcmp(key, "lenient") == 0);
+ ResourceArray array = value.getArray(status);
+ if (U_FAILURE(status)) { return; }
+ for (int k = 0; k < array.getSize(); k++) {
+ array.getValue(k, value);
+ UnicodeString str = value.getUnicodeString(status);
+ if (U_FAILURE(status)) { return; }
+ // There is both lenient and strict data for comma/period,
+ // but not for any of the other symbols.
+ if (str.indexOf(u'.') != -1) {
+ saveSet(isLenient ? PERIOD : STRICT_PERIOD, str, status);
+ } else if (str.indexOf(u',') != -1) {
+ saveSet(isLenient ? COMMA : STRICT_COMMA, str, status);
+ } else if (str.indexOf(u'+') != -1) {
+ saveSet(PLUS_SIGN, str, status);
+ } else if (str.indexOf(u'-') != -1) {
+ saveSet(MINUS_SIGN, str, status);
+ } else if (str.indexOf(u'$') != -1) {
+ saveSet(DOLLAR_SIGN, str, status);
+ } else if (str.indexOf(u'£') != -1) {
+ saveSet(POUND_SIGN, str, status);
+ } else if (str.indexOf(u'₹') != -1) {
+ saveSet(RUPEE_SIGN, str, status);
+ } else if (str.indexOf(u'¥') != -1) {
+ saveSet(YEN_SIGN, str, status);
+ } else if (str.indexOf(u'₩') != -1) {
+ saveSet(WON_SIGN, str, status);
+ } else if (str.indexOf(u'%') != -1) {
+ saveSet(PERCENT_SIGN, str, status);
+ } else if (str.indexOf(u'‰') != -1) {
+ saveSet(PERMILLE_SIGN, str, status);
+ } else if (str.indexOf(u'’') != -1) {
+ saveSet(APOSTROPHE_SIGN, str, status);
+ } else {
+ // Unknown class of parse lenients
+ // TODO(ICU-20428): Make ICU automatically accept new classes?
+ U_ASSERT(FALSE);
+ }
+ if (U_FAILURE(status)) { return; }
+ }
+ }
+ }
+ }
+ }
+};
+
+
+icu::UInitOnce gNumberParseUniSetsInitOnce = U_INITONCE_INITIALIZER;
+
+UBool U_CALLCONV cleanupNumberParseUniSets() {
+ if (gEmptyUnicodeSetInitialized) {
+ reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet)->~UnicodeSet();
+ gEmptyUnicodeSetInitialized = FALSE;
+ }
+ for (int32_t i = 0; i < UNISETS_KEY_COUNT; i++) {
+ delete gUnicodeSets[i];
+ gUnicodeSets[i] = nullptr;
+ }
+ gNumberParseUniSetsInitOnce.reset();
+ return TRUE;
+}
+
+void U_CALLCONV initNumberParseUniSets(UErrorCode& status) {
+ ucln_common_registerCleanup(UCLN_COMMON_NUMPARSE_UNISETS, cleanupNumberParseUniSets);
+
+ // Initialize the empty instance for well-defined fallback behavior
+ new(gEmptyUnicodeSet) UnicodeSet();
+ reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet)->freeze();
+ gEmptyUnicodeSetInitialized = TRUE;
+
+ // These sets were decided after discussion with icu-design@. See tickets #13084 and #13309.
+ // Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property).
+ gUnicodeSets[DEFAULT_IGNORABLES] = new UnicodeSet(
+ u"[[:Zs:][\\u0009][:Bidi_Control:][:Variation_Selector:]]", status);
+ gUnicodeSets[STRICT_IGNORABLES] = new UnicodeSet(u"[[:Bidi_Control:]]", status);
+
+ LocalUResourceBundlePointer rb(ures_open(nullptr, "root", &status));
+ if (U_FAILURE(status)) { return; }
+ ParseDataSink sink;
+ ures_getAllItemsWithFallback(rb.getAlias(), "parse", sink, status);
+ if (U_FAILURE(status)) { return; }
+
+ // NOTE: It is OK for these assertions to fail if there was a no-data build.
+ U_ASSERT(gUnicodeSets[COMMA] != nullptr);
+ U_ASSERT(gUnicodeSets[STRICT_COMMA] != nullptr);
+ U_ASSERT(gUnicodeSets[PERIOD] != nullptr);
+ U_ASSERT(gUnicodeSets[STRICT_PERIOD] != nullptr);
+ U_ASSERT(gUnicodeSets[APOSTROPHE_SIGN] != nullptr);
+
+ LocalPointer<UnicodeSet> otherGrouping(new UnicodeSet(
+ u"[٬‘\\u0020\\u00A0\\u2000-\\u200A\\u202F\\u205F\\u3000]",
+ status
+ ), status);
+ if (U_FAILURE(status)) { return; }
+ otherGrouping->addAll(*gUnicodeSets[APOSTROPHE_SIGN]);
+ gUnicodeSets[OTHER_GROUPING_SEPARATORS] = otherGrouping.orphan();
+ gUnicodeSets[ALL_SEPARATORS] = computeUnion(COMMA, PERIOD, OTHER_GROUPING_SEPARATORS);
+ gUnicodeSets[STRICT_ALL_SEPARATORS] = computeUnion(
+ STRICT_COMMA, STRICT_PERIOD, OTHER_GROUPING_SEPARATORS);
+
+ U_ASSERT(gUnicodeSets[MINUS_SIGN] != nullptr);
+ U_ASSERT(gUnicodeSets[PLUS_SIGN] != nullptr);
+ U_ASSERT(gUnicodeSets[PERCENT_SIGN] != nullptr);
+ U_ASSERT(gUnicodeSets[PERMILLE_SIGN] != nullptr);
+
+ gUnicodeSets[INFINITY_SIGN] = new UnicodeSet(u"[∞]", status);
+ if (U_FAILURE(status)) { return; }
+
+ U_ASSERT(gUnicodeSets[DOLLAR_SIGN] != nullptr);
+ U_ASSERT(gUnicodeSets[POUND_SIGN] != nullptr);
+ U_ASSERT(gUnicodeSets[RUPEE_SIGN] != nullptr);
+ U_ASSERT(gUnicodeSets[YEN_SIGN] != nullptr);
+ U_ASSERT(gUnicodeSets[WON_SIGN] != nullptr);
+
+ gUnicodeSets[DIGITS] = new UnicodeSet(u"[:digit:]", status);
+ if (U_FAILURE(status)) { return; }
+ gUnicodeSets[DIGITS_OR_ALL_SEPARATORS] = computeUnion(DIGITS, ALL_SEPARATORS);
+ gUnicodeSets[DIGITS_OR_STRICT_ALL_SEPARATORS] = computeUnion(DIGITS, STRICT_ALL_SEPARATORS);
+
+ for (auto* uniset : gUnicodeSets) {
+ if (uniset != nullptr) {
+ uniset->freeze();
+ }
+ }
+}
+
+}
+
+const UnicodeSet* unisets::get(Key key) {
+ UErrorCode localStatus = U_ZERO_ERROR;
+ umtx_initOnce(gNumberParseUniSetsInitOnce, &initNumberParseUniSets, localStatus);
+ if (U_FAILURE(localStatus)) {
+ return reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet);
+ }
+ return getImpl(key);
+}
+
+Key unisets::chooseFrom(UnicodeString str, Key key1) {
+ return get(key1)->contains(str) ? key1 : NONE;
+}
+
+Key unisets::chooseFrom(UnicodeString str, Key key1, Key key2) {
+ return get(key1)->contains(str) ? key1 : chooseFrom(str, key2);
+}
+
+//Key unisets::chooseCurrency(UnicodeString str) {
+// if (get(DOLLAR_SIGN)->contains(str)) {
+// return DOLLAR_SIGN;
+// } else if (get(POUND_SIGN)->contains(str)) {
+// return POUND_SIGN;
+// } else if (get(RUPEE_SIGN)->contains(str)) {
+// return RUPEE_SIGN;
+// } else if (get(YEN_SIGN)->contains(str)) {
+// return YEN_SIGN;
+// } else {
+// return NONE;
+// }
+//}
+
+
+#endif /* #if !UCONFIG_NO_FORMATTING */