aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/icu/i18n/numparse_decimal.cpp
diff options
context:
space:
mode:
authormcheshkov <mcheshkov@yandex-team.ru>2022-02-10 16:46:15 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:46:15 +0300
commite9d19cec64684c9c1e6b0c98297e5b895cf904fe (patch)
tree2768b1223e96a8a0610a93d18425d9647c1123c8 /contrib/libs/icu/i18n/numparse_decimal.cpp
parent60040c91ffe701a84689b2c6310ff845e65cff42 (diff)
downloadydb-e9d19cec64684c9c1e6b0c98297e5b895cf904fe.tar.gz
Restoring authorship annotation for <mcheshkov@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/icu/i18n/numparse_decimal.cpp')
-rw-r--r--contrib/libs/icu/i18n/numparse_decimal.cpp918
1 files changed, 459 insertions, 459 deletions
diff --git a/contrib/libs/icu/i18n/numparse_decimal.cpp b/contrib/libs/icu/i18n/numparse_decimal.cpp
index cf1e815672..813512c06b 100644
--- a/contrib/libs/icu/i18n/numparse_decimal.cpp
+++ b/contrib/libs/icu/i18n/numparse_decimal.cpp
@@ -1,459 +1,459 @@
-// © 2018 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_FORMATTING
-
-// Allow implicit conversion from char16_t* to UnicodeString for this file:
-// Helpful in toString methods and elsewhere.
-#define UNISTR_FROM_STRING_EXPLICIT
-
-#include "numparse_types.h"
-#include "numparse_decimal.h"
-#include "static_unicode_sets.h"
-#include "numparse_utils.h"
-#include "unicode/uchar.h"
-#include "putilimp.h"
-#include "number_decimalquantity.h"
-#include "string_segment.h"
-
-using namespace icu;
-using namespace icu::numparse;
-using namespace icu::numparse::impl;
-
-
-DecimalMatcher::DecimalMatcher(const DecimalFormatSymbols& symbols, const Grouper& grouper,
- parse_flags_t parseFlags) {
- if (0 != (parseFlags & PARSE_FLAG_MONETARY_SEPARATORS)) {
- groupingSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kMonetaryGroupingSeparatorSymbol);
- decimalSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kMonetarySeparatorSymbol);
- } else {
- groupingSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol);
- decimalSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol);
- }
- bool strictSeparators = 0 != (parseFlags & PARSE_FLAG_STRICT_SEPARATORS);
- unisets::Key groupingKey = strictSeparators ? unisets::STRICT_ALL_SEPARATORS
- : unisets::ALL_SEPARATORS;
-
- // Attempt to find separators in the static cache
-
- groupingUniSet = unisets::get(groupingKey);
- unisets::Key decimalKey = unisets::chooseFrom(
- decimalSeparator,
- strictSeparators ? unisets::STRICT_COMMA : unisets::COMMA,
- strictSeparators ? unisets::STRICT_PERIOD : unisets::PERIOD);
- if (decimalKey >= 0) {
- decimalUniSet = unisets::get(decimalKey);
- } else if (!decimalSeparator.isEmpty()) {
- auto* set = new UnicodeSet();
- set->add(decimalSeparator.char32At(0));
- set->freeze();
- decimalUniSet = set;
- fLocalDecimalUniSet.adoptInstead(set);
- } else {
- decimalUniSet = unisets::get(unisets::EMPTY);
- }
-
- if (groupingKey >= 0 && decimalKey >= 0) {
- // Everything is available in the static cache
- separatorSet = groupingUniSet;
- leadSet = unisets::get(
- strictSeparators ? unisets::DIGITS_OR_ALL_SEPARATORS
- : unisets::DIGITS_OR_STRICT_ALL_SEPARATORS);
- } else {
- auto* set = new UnicodeSet();
- set->addAll(*groupingUniSet);
- set->addAll(*decimalUniSet);
- set->freeze();
- separatorSet = set;
- fLocalSeparatorSet.adoptInstead(set);
- leadSet = nullptr;
- }
-
- UChar32 cpZero = symbols.getCodePointZero();
- if (cpZero == -1 || !u_isdigit(cpZero) || u_digit(cpZero, 10) != 0) {
- // Uncommon case: okay to allocate.
- auto digitStrings = new UnicodeString[10];
- fLocalDigitStrings.adoptInstead(digitStrings);
- for (int32_t i = 0; i <= 9; i++) {
- digitStrings[i] = symbols.getConstDigitSymbol(i);
- }
- }
-
- requireGroupingMatch = 0 != (parseFlags & PARSE_FLAG_STRICT_GROUPING_SIZE);
- groupingDisabled = 0 != (parseFlags & PARSE_FLAG_GROUPING_DISABLED);
- integerOnly = 0 != (parseFlags & PARSE_FLAG_INTEGER_ONLY);
- grouping1 = grouper.getPrimary();
- grouping2 = grouper.getSecondary();
-
- // Fraction grouping parsing is disabled for now but could be enabled later.
- // See http://bugs.icu-project.org/trac/ticket/10794
- // fractionGrouping = 0 != (parseFlags & PARSE_FLAG_FRACTION_GROUPING_ENABLED);
-}
-
-bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
- return match(segment, result, 0, status);
-}
-
-bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, int8_t exponentSign,
- UErrorCode&) const {
- if (result.seenNumber() && exponentSign == 0) {
- // A number has already been consumed.
- return false;
- } else if (exponentSign != 0) {
- // scientific notation always comes after the number
- U_ASSERT(!result.quantity.bogus);
- }
-
- // Initial offset before any character consumption.
- int32_t initialOffset = segment.getOffset();
-
- // Return value: whether to ask for more characters.
- bool maybeMore = false;
-
- // All digits consumed so far.
- number::impl::DecimalQuantity digitsConsumed;
- digitsConsumed.bogus = true;
-
- // The total number of digits after the decimal place, used for scaling the result.
- int32_t digitsAfterDecimalPlace = 0;
-
- // The actual grouping and decimal separators used in the string.
- // If non-null, we have seen that token.
- UnicodeString actualGroupingString;
- UnicodeString actualDecimalString;
- actualGroupingString.setToBogus();
- actualDecimalString.setToBogus();
-
- // Information for two groups: the previous group and the current group.
- //
- // Each group has three pieces of information:
- //
- // Offset: the string position of the beginning of the group, including a leading separator
- // if there was a leading separator. This is needed in case we need to rewind the parse to
- // that position.
- //
- // Separator type:
- // 0 => beginning of string
- // 1 => lead separator is a grouping separator
- // 2 => lead separator is a decimal separator
- //
- // Count: the number of digits in the group. If -1, the group has been validated.
- int32_t currGroupOffset = 0;
- int32_t currGroupSepType = 0;
- int32_t currGroupCount = 0;
- int32_t prevGroupOffset = -1;
- int32_t prevGroupSepType = -1;
- int32_t prevGroupCount = -1;
-
- while (segment.length() > 0) {
- maybeMore = false;
-
- // Attempt to match a digit.
- int8_t digit = -1;
-
- // Try by code point digit value.
- UChar32 cp = segment.getCodePoint();
- if (u_isdigit(cp)) {
- segment.adjustOffset(U16_LENGTH(cp));
- digit = static_cast<int8_t>(u_digit(cp, 10));
- }
-
- // Try by digit string.
- if (digit == -1 && !fLocalDigitStrings.isNull()) {
- for (int32_t i = 0; i < 10; i++) {
- const UnicodeString& str = fLocalDigitStrings[i];
- if (str.isEmpty()) {
- continue;
- }
- int32_t overlap = segment.getCommonPrefixLength(str);
- if (overlap == str.length()) {
- segment.adjustOffset(overlap);
- digit = static_cast<int8_t>(i);
- break;
- }
- maybeMore = maybeMore || (overlap == segment.length());
- }
- }
-
- if (digit >= 0) {
- // Digit was found.
- if (digitsConsumed.bogus) {
- digitsConsumed.bogus = false;
- digitsConsumed.clear();
- }
- digitsConsumed.appendDigit(digit, 0, true);
- currGroupCount++;
- if (!actualDecimalString.isBogus()) {
- digitsAfterDecimalPlace++;
- }
- continue;
- }
-
- // Attempt to match a literal grouping or decimal separator.
- bool isDecimal = false;
- bool isGrouping = false;
-
- // 1) Attempt the decimal separator string literal.
- // if (we have not seen a decimal separator yet) { ... }
- if (actualDecimalString.isBogus() && !decimalSeparator.isEmpty()) {
- int32_t overlap = segment.getCommonPrefixLength(decimalSeparator);
- maybeMore = maybeMore || (overlap == segment.length());
- if (overlap == decimalSeparator.length()) {
- isDecimal = true;
- actualDecimalString = decimalSeparator;
- }
- }
-
- // 2) Attempt to match the actual grouping string literal.
- if (!actualGroupingString.isBogus()) {
- int32_t overlap = segment.getCommonPrefixLength(actualGroupingString);
- maybeMore = maybeMore || (overlap == segment.length());
- if (overlap == actualGroupingString.length()) {
- isGrouping = true;
- }
- }
-
- // 2.5) Attempt to match a new the grouping separator string literal.
- // if (we have not seen a grouping or decimal separator yet) { ... }
- if (!groupingDisabled && actualGroupingString.isBogus() && actualDecimalString.isBogus() &&
- !groupingSeparator.isEmpty()) {
- int32_t overlap = segment.getCommonPrefixLength(groupingSeparator);
- maybeMore = maybeMore || (overlap == segment.length());
- if (overlap == groupingSeparator.length()) {
- isGrouping = true;
- actualGroupingString = groupingSeparator;
- }
- }
-
- // 3) Attempt to match a decimal separator from the equivalence set.
- // if (we have not seen a decimal separator yet) { ... }
- // The !isGrouping is to confirm that we haven't yet matched the current character.
- if (!isGrouping && actualDecimalString.isBogus()) {
- if (decimalUniSet->contains(cp)) {
- isDecimal = true;
- actualDecimalString = UnicodeString(cp);
- }
- }
-
- // 4) Attempt to match a grouping separator from the equivalence set.
- // if (we have not seen a grouping or decimal separator yet) { ... }
- if (!groupingDisabled && actualGroupingString.isBogus() && actualDecimalString.isBogus()) {
- if (groupingUniSet->contains(cp)) {
- isGrouping = true;
- actualGroupingString = UnicodeString(cp);
- }
- }
-
- // Leave if we failed to match this as a separator.
- if (!isDecimal && !isGrouping) {
- break;
- }
-
- // Check for conditions when we don't want to accept the separator.
- if (isDecimal && integerOnly) {
- break;
- } else if (currGroupSepType == 2 && isGrouping) {
- // Fraction grouping
- break;
- }
-
- // Validate intermediate grouping sizes.
- bool prevValidSecondary = validateGroup(prevGroupSepType, prevGroupCount, false);
- bool currValidPrimary = validateGroup(currGroupSepType, currGroupCount, true);
- if (!prevValidSecondary || (isDecimal && !currValidPrimary)) {
- // Invalid grouping sizes.
- if (isGrouping && currGroupCount == 0) {
- // Trailing grouping separators: these are taken care of below
- U_ASSERT(currGroupSepType == 1);
- } else if (requireGroupingMatch) {
- // Strict mode: reject the parse
- digitsConsumed.clear();
- digitsConsumed.bogus = true;
- }
- break;
- } else if (requireGroupingMatch && currGroupCount == 0 && currGroupSepType == 1) {
- break;
- } else {
- // Grouping sizes OK so far.
- prevGroupOffset = currGroupOffset;
- prevGroupCount = currGroupCount;
- if (isDecimal) {
- // Do not validate this group any more.
- prevGroupSepType = -1;
- } else {
- prevGroupSepType = currGroupSepType;
- }
- }
-
- // OK to accept the separator.
- // Special case: don't update currGroup if it is empty; this allows two grouping
- // separators in a row in lenient mode.
- if (currGroupCount != 0) {
- currGroupOffset = segment.getOffset();
- }
- currGroupSepType = isGrouping ? 1 : 2;
- currGroupCount = 0;
- if (isGrouping) {
- segment.adjustOffset(actualGroupingString.length());
- } else {
- segment.adjustOffset(actualDecimalString.length());
- }
- }
-
- // End of main loop.
- // Back up if there was a trailing grouping separator.
- // Shift prev -> curr so we can check it as a final group.
- if (currGroupSepType != 2 && currGroupCount == 0) {
- maybeMore = true;
- segment.setOffset(currGroupOffset);
- currGroupOffset = prevGroupOffset;
- currGroupSepType = prevGroupSepType;
- currGroupCount = prevGroupCount;
- prevGroupOffset = -1;
- prevGroupSepType = 0;
- prevGroupCount = 1;
- }
-
- // Validate final grouping sizes.
- bool prevValidSecondary = validateGroup(prevGroupSepType, prevGroupCount, false);
- bool currValidPrimary = validateGroup(currGroupSepType, currGroupCount, true);
- if (!requireGroupingMatch) {
- // The cases we need to handle here are lone digits.
- // Examples: "1,1" "1,1," "1,1,1" "1,1,1," ",1" (all parse as 1)
- // See more examples in numberformattestspecification.txt
- int32_t digitsToRemove = 0;
- if (!prevValidSecondary) {
- segment.setOffset(prevGroupOffset);
- digitsToRemove += prevGroupCount;
- digitsToRemove += currGroupCount;
- } else if (!currValidPrimary && (prevGroupSepType != 0 || prevGroupCount != 0)) {
- maybeMore = true;
- segment.setOffset(currGroupOffset);
- digitsToRemove += currGroupCount;
- }
- if (digitsToRemove != 0) {
- digitsConsumed.adjustMagnitude(-digitsToRemove);
- digitsConsumed.truncate();
- }
- prevValidSecondary = true;
- currValidPrimary = true;
- }
- if (currGroupSepType != 2 && (!prevValidSecondary || !currValidPrimary)) {
- // Grouping failure.
- digitsConsumed.bogus = true;
- }
-
- // Strings that start with a separator but have no digits,
- // or strings that failed a grouping size check.
- if (digitsConsumed.bogus) {
- maybeMore = maybeMore || (segment.length() == 0);
- segment.setOffset(initialOffset);
- return maybeMore;
- }
-
- // We passed all inspections. Start post-processing.
-
- // Adjust for fraction part.
- digitsConsumed.adjustMagnitude(-digitsAfterDecimalPlace);
-
- // Set the digits, either normal or exponent.
- if (exponentSign != 0 && segment.getOffset() != initialOffset) {
- bool overflow = false;
- if (digitsConsumed.fitsInLong()) {
- int64_t exponentLong = digitsConsumed.toLong(false);
- U_ASSERT(exponentLong >= 0);
- if (exponentLong <= INT32_MAX) {
- auto exponentInt = static_cast<int32_t>(exponentLong);
- if (result.quantity.adjustMagnitude(exponentSign * exponentInt)) {
- overflow = true;
- }
- } else {
- overflow = true;
- }
- } else {
- overflow = true;
- }
- if (overflow) {
- if (exponentSign == -1) {
- // Set to zero
- result.quantity.clear();
- } else {
- // Set to infinity
- result.quantity.bogus = true;
- result.flags |= FLAG_INFINITY;
- }
- }
- } else {
- result.quantity = digitsConsumed;
- }
-
- // Set other information into the result and return.
- if (!actualDecimalString.isBogus()) {
- result.flags |= FLAG_HAS_DECIMAL_SEPARATOR;
- }
- result.setCharsConsumed(segment);
- return segment.length() == 0 || maybeMore;
-}
-
-bool DecimalMatcher::validateGroup(int32_t sepType, int32_t count, bool isPrimary) const {
- if (requireGroupingMatch) {
- if (sepType == -1) {
- // No such group (prevGroup before first shift).
- return true;
- } else if (sepType == 0) {
- // First group.
- if (isPrimary) {
- // No grouping separators is OK.
- return true;
- } else {
- return count != 0 && count <= grouping2;
- }
- } else if (sepType == 1) {
- // Middle group.
- if (isPrimary) {
- return count == grouping1;
- } else {
- return count == grouping2;
- }
- } else {
- U_ASSERT(sepType == 2);
- // After the decimal separator.
- return true;
- }
- } else {
- if (sepType == 1) {
- // #11230: don't accept middle groups with only 1 digit.
- return count != 1;
- } else {
- return true;
- }
- }
-}
-
-bool DecimalMatcher::smokeTest(const StringSegment& segment) const {
- // The common case uses a static leadSet for efficiency.
- if (fLocalDigitStrings.isNull() && leadSet != nullptr) {
- return segment.startsWith(*leadSet);
- }
- if (segment.startsWith(*separatorSet) || u_isdigit(segment.getCodePoint())) {
- return true;
- }
- if (fLocalDigitStrings.isNull()) {
- return false;
- }
- for (int32_t i = 0; i < 10; i++) {
- if (segment.startsWith(fLocalDigitStrings[i])) {
- return true;
- }
- }
- return false;
-}
-
-UnicodeString DecimalMatcher::toString() const {
- return u"<Decimal>";
-}
-
-
-#endif /* #if !UCONFIG_NO_FORMATTING */
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+// Allow implicit conversion from char16_t* to UnicodeString for this file:
+// Helpful in toString methods and elsewhere.
+#define UNISTR_FROM_STRING_EXPLICIT
+
+#include "numparse_types.h"
+#include "numparse_decimal.h"
+#include "static_unicode_sets.h"
+#include "numparse_utils.h"
+#include "unicode/uchar.h"
+#include "putilimp.h"
+#include "number_decimalquantity.h"
+#include "string_segment.h"
+
+using namespace icu;
+using namespace icu::numparse;
+using namespace icu::numparse::impl;
+
+
+DecimalMatcher::DecimalMatcher(const DecimalFormatSymbols& symbols, const Grouper& grouper,
+ parse_flags_t parseFlags) {
+ if (0 != (parseFlags & PARSE_FLAG_MONETARY_SEPARATORS)) {
+ groupingSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kMonetaryGroupingSeparatorSymbol);
+ decimalSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kMonetarySeparatorSymbol);
+ } else {
+ groupingSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol);
+ decimalSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol);
+ }
+ bool strictSeparators = 0 != (parseFlags & PARSE_FLAG_STRICT_SEPARATORS);
+ unisets::Key groupingKey = strictSeparators ? unisets::STRICT_ALL_SEPARATORS
+ : unisets::ALL_SEPARATORS;
+
+ // Attempt to find separators in the static cache
+
+ groupingUniSet = unisets::get(groupingKey);
+ unisets::Key decimalKey = unisets::chooseFrom(
+ decimalSeparator,
+ strictSeparators ? unisets::STRICT_COMMA : unisets::COMMA,
+ strictSeparators ? unisets::STRICT_PERIOD : unisets::PERIOD);
+ if (decimalKey >= 0) {
+ decimalUniSet = unisets::get(decimalKey);
+ } else if (!decimalSeparator.isEmpty()) {
+ auto* set = new UnicodeSet();
+ set->add(decimalSeparator.char32At(0));
+ set->freeze();
+ decimalUniSet = set;
+ fLocalDecimalUniSet.adoptInstead(set);
+ } else {
+ decimalUniSet = unisets::get(unisets::EMPTY);
+ }
+
+ if (groupingKey >= 0 && decimalKey >= 0) {
+ // Everything is available in the static cache
+ separatorSet = groupingUniSet;
+ leadSet = unisets::get(
+ strictSeparators ? unisets::DIGITS_OR_ALL_SEPARATORS
+ : unisets::DIGITS_OR_STRICT_ALL_SEPARATORS);
+ } else {
+ auto* set = new UnicodeSet();
+ set->addAll(*groupingUniSet);
+ set->addAll(*decimalUniSet);
+ set->freeze();
+ separatorSet = set;
+ fLocalSeparatorSet.adoptInstead(set);
+ leadSet = nullptr;
+ }
+
+ UChar32 cpZero = symbols.getCodePointZero();
+ if (cpZero == -1 || !u_isdigit(cpZero) || u_digit(cpZero, 10) != 0) {
+ // Uncommon case: okay to allocate.
+ auto digitStrings = new UnicodeString[10];
+ fLocalDigitStrings.adoptInstead(digitStrings);
+ for (int32_t i = 0; i <= 9; i++) {
+ digitStrings[i] = symbols.getConstDigitSymbol(i);
+ }
+ }
+
+ requireGroupingMatch = 0 != (parseFlags & PARSE_FLAG_STRICT_GROUPING_SIZE);
+ groupingDisabled = 0 != (parseFlags & PARSE_FLAG_GROUPING_DISABLED);
+ integerOnly = 0 != (parseFlags & PARSE_FLAG_INTEGER_ONLY);
+ grouping1 = grouper.getPrimary();
+ grouping2 = grouper.getSecondary();
+
+ // Fraction grouping parsing is disabled for now but could be enabled later.
+ // See http://bugs.icu-project.org/trac/ticket/10794
+ // fractionGrouping = 0 != (parseFlags & PARSE_FLAG_FRACTION_GROUPING_ENABLED);
+}
+
+bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
+ return match(segment, result, 0, status);
+}
+
+bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, int8_t exponentSign,
+ UErrorCode&) const {
+ if (result.seenNumber() && exponentSign == 0) {
+ // A number has already been consumed.
+ return false;
+ } else if (exponentSign != 0) {
+ // scientific notation always comes after the number
+ U_ASSERT(!result.quantity.bogus);
+ }
+
+ // Initial offset before any character consumption.
+ int32_t initialOffset = segment.getOffset();
+
+ // Return value: whether to ask for more characters.
+ bool maybeMore = false;
+
+ // All digits consumed so far.
+ number::impl::DecimalQuantity digitsConsumed;
+ digitsConsumed.bogus = true;
+
+ // The total number of digits after the decimal place, used for scaling the result.
+ int32_t digitsAfterDecimalPlace = 0;
+
+ // The actual grouping and decimal separators used in the string.
+ // If non-null, we have seen that token.
+ UnicodeString actualGroupingString;
+ UnicodeString actualDecimalString;
+ actualGroupingString.setToBogus();
+ actualDecimalString.setToBogus();
+
+ // Information for two groups: the previous group and the current group.
+ //
+ // Each group has three pieces of information:
+ //
+ // Offset: the string position of the beginning of the group, including a leading separator
+ // if there was a leading separator. This is needed in case we need to rewind the parse to
+ // that position.
+ //
+ // Separator type:
+ // 0 => beginning of string
+ // 1 => lead separator is a grouping separator
+ // 2 => lead separator is a decimal separator
+ //
+ // Count: the number of digits in the group. If -1, the group has been validated.
+ int32_t currGroupOffset = 0;
+ int32_t currGroupSepType = 0;
+ int32_t currGroupCount = 0;
+ int32_t prevGroupOffset = -1;
+ int32_t prevGroupSepType = -1;
+ int32_t prevGroupCount = -1;
+
+ while (segment.length() > 0) {
+ maybeMore = false;
+
+ // Attempt to match a digit.
+ int8_t digit = -1;
+
+ // Try by code point digit value.
+ UChar32 cp = segment.getCodePoint();
+ if (u_isdigit(cp)) {
+ segment.adjustOffset(U16_LENGTH(cp));
+ digit = static_cast<int8_t>(u_digit(cp, 10));
+ }
+
+ // Try by digit string.
+ if (digit == -1 && !fLocalDigitStrings.isNull()) {
+ for (int32_t i = 0; i < 10; i++) {
+ const UnicodeString& str = fLocalDigitStrings[i];
+ if (str.isEmpty()) {
+ continue;
+ }
+ int32_t overlap = segment.getCommonPrefixLength(str);
+ if (overlap == str.length()) {
+ segment.adjustOffset(overlap);
+ digit = static_cast<int8_t>(i);
+ break;
+ }
+ maybeMore = maybeMore || (overlap == segment.length());
+ }
+ }
+
+ if (digit >= 0) {
+ // Digit was found.
+ if (digitsConsumed.bogus) {
+ digitsConsumed.bogus = false;
+ digitsConsumed.clear();
+ }
+ digitsConsumed.appendDigit(digit, 0, true);
+ currGroupCount++;
+ if (!actualDecimalString.isBogus()) {
+ digitsAfterDecimalPlace++;
+ }
+ continue;
+ }
+
+ // Attempt to match a literal grouping or decimal separator.
+ bool isDecimal = false;
+ bool isGrouping = false;
+
+ // 1) Attempt the decimal separator string literal.
+ // if (we have not seen a decimal separator yet) { ... }
+ if (actualDecimalString.isBogus() && !decimalSeparator.isEmpty()) {
+ int32_t overlap = segment.getCommonPrefixLength(decimalSeparator);
+ maybeMore = maybeMore || (overlap == segment.length());
+ if (overlap == decimalSeparator.length()) {
+ isDecimal = true;
+ actualDecimalString = decimalSeparator;
+ }
+ }
+
+ // 2) Attempt to match the actual grouping string literal.
+ if (!actualGroupingString.isBogus()) {
+ int32_t overlap = segment.getCommonPrefixLength(actualGroupingString);
+ maybeMore = maybeMore || (overlap == segment.length());
+ if (overlap == actualGroupingString.length()) {
+ isGrouping = true;
+ }
+ }
+
+ // 2.5) Attempt to match a new the grouping separator string literal.
+ // if (we have not seen a grouping or decimal separator yet) { ... }
+ if (!groupingDisabled && actualGroupingString.isBogus() && actualDecimalString.isBogus() &&
+ !groupingSeparator.isEmpty()) {
+ int32_t overlap = segment.getCommonPrefixLength(groupingSeparator);
+ maybeMore = maybeMore || (overlap == segment.length());
+ if (overlap == groupingSeparator.length()) {
+ isGrouping = true;
+ actualGroupingString = groupingSeparator;
+ }
+ }
+
+ // 3) Attempt to match a decimal separator from the equivalence set.
+ // if (we have not seen a decimal separator yet) { ... }
+ // The !isGrouping is to confirm that we haven't yet matched the current character.
+ if (!isGrouping && actualDecimalString.isBogus()) {
+ if (decimalUniSet->contains(cp)) {
+ isDecimal = true;
+ actualDecimalString = UnicodeString(cp);
+ }
+ }
+
+ // 4) Attempt to match a grouping separator from the equivalence set.
+ // if (we have not seen a grouping or decimal separator yet) { ... }
+ if (!groupingDisabled && actualGroupingString.isBogus() && actualDecimalString.isBogus()) {
+ if (groupingUniSet->contains(cp)) {
+ isGrouping = true;
+ actualGroupingString = UnicodeString(cp);
+ }
+ }
+
+ // Leave if we failed to match this as a separator.
+ if (!isDecimal && !isGrouping) {
+ break;
+ }
+
+ // Check for conditions when we don't want to accept the separator.
+ if (isDecimal && integerOnly) {
+ break;
+ } else if (currGroupSepType == 2 && isGrouping) {
+ // Fraction grouping
+ break;
+ }
+
+ // Validate intermediate grouping sizes.
+ bool prevValidSecondary = validateGroup(prevGroupSepType, prevGroupCount, false);
+ bool currValidPrimary = validateGroup(currGroupSepType, currGroupCount, true);
+ if (!prevValidSecondary || (isDecimal && !currValidPrimary)) {
+ // Invalid grouping sizes.
+ if (isGrouping && currGroupCount == 0) {
+ // Trailing grouping separators: these are taken care of below
+ U_ASSERT(currGroupSepType == 1);
+ } else if (requireGroupingMatch) {
+ // Strict mode: reject the parse
+ digitsConsumed.clear();
+ digitsConsumed.bogus = true;
+ }
+ break;
+ } else if (requireGroupingMatch && currGroupCount == 0 && currGroupSepType == 1) {
+ break;
+ } else {
+ // Grouping sizes OK so far.
+ prevGroupOffset = currGroupOffset;
+ prevGroupCount = currGroupCount;
+ if (isDecimal) {
+ // Do not validate this group any more.
+ prevGroupSepType = -1;
+ } else {
+ prevGroupSepType = currGroupSepType;
+ }
+ }
+
+ // OK to accept the separator.
+ // Special case: don't update currGroup if it is empty; this allows two grouping
+ // separators in a row in lenient mode.
+ if (currGroupCount != 0) {
+ currGroupOffset = segment.getOffset();
+ }
+ currGroupSepType = isGrouping ? 1 : 2;
+ currGroupCount = 0;
+ if (isGrouping) {
+ segment.adjustOffset(actualGroupingString.length());
+ } else {
+ segment.adjustOffset(actualDecimalString.length());
+ }
+ }
+
+ // End of main loop.
+ // Back up if there was a trailing grouping separator.
+ // Shift prev -> curr so we can check it as a final group.
+ if (currGroupSepType != 2 && currGroupCount == 0) {
+ maybeMore = true;
+ segment.setOffset(currGroupOffset);
+ currGroupOffset = prevGroupOffset;
+ currGroupSepType = prevGroupSepType;
+ currGroupCount = prevGroupCount;
+ prevGroupOffset = -1;
+ prevGroupSepType = 0;
+ prevGroupCount = 1;
+ }
+
+ // Validate final grouping sizes.
+ bool prevValidSecondary = validateGroup(prevGroupSepType, prevGroupCount, false);
+ bool currValidPrimary = validateGroup(currGroupSepType, currGroupCount, true);
+ if (!requireGroupingMatch) {
+ // The cases we need to handle here are lone digits.
+ // Examples: "1,1" "1,1," "1,1,1" "1,1,1," ",1" (all parse as 1)
+ // See more examples in numberformattestspecification.txt
+ int32_t digitsToRemove = 0;
+ if (!prevValidSecondary) {
+ segment.setOffset(prevGroupOffset);
+ digitsToRemove += prevGroupCount;
+ digitsToRemove += currGroupCount;
+ } else if (!currValidPrimary && (prevGroupSepType != 0 || prevGroupCount != 0)) {
+ maybeMore = true;
+ segment.setOffset(currGroupOffset);
+ digitsToRemove += currGroupCount;
+ }
+ if (digitsToRemove != 0) {
+ digitsConsumed.adjustMagnitude(-digitsToRemove);
+ digitsConsumed.truncate();
+ }
+ prevValidSecondary = true;
+ currValidPrimary = true;
+ }
+ if (currGroupSepType != 2 && (!prevValidSecondary || !currValidPrimary)) {
+ // Grouping failure.
+ digitsConsumed.bogus = true;
+ }
+
+ // Strings that start with a separator but have no digits,
+ // or strings that failed a grouping size check.
+ if (digitsConsumed.bogus) {
+ maybeMore = maybeMore || (segment.length() == 0);
+ segment.setOffset(initialOffset);
+ return maybeMore;
+ }
+
+ // We passed all inspections. Start post-processing.
+
+ // Adjust for fraction part.
+ digitsConsumed.adjustMagnitude(-digitsAfterDecimalPlace);
+
+ // Set the digits, either normal or exponent.
+ if (exponentSign != 0 && segment.getOffset() != initialOffset) {
+ bool overflow = false;
+ if (digitsConsumed.fitsInLong()) {
+ int64_t exponentLong = digitsConsumed.toLong(false);
+ U_ASSERT(exponentLong >= 0);
+ if (exponentLong <= INT32_MAX) {
+ auto exponentInt = static_cast<int32_t>(exponentLong);
+ if (result.quantity.adjustMagnitude(exponentSign * exponentInt)) {
+ overflow = true;
+ }
+ } else {
+ overflow = true;
+ }
+ } else {
+ overflow = true;
+ }
+ if (overflow) {
+ if (exponentSign == -1) {
+ // Set to zero
+ result.quantity.clear();
+ } else {
+ // Set to infinity
+ result.quantity.bogus = true;
+ result.flags |= FLAG_INFINITY;
+ }
+ }
+ } else {
+ result.quantity = digitsConsumed;
+ }
+
+ // Set other information into the result and return.
+ if (!actualDecimalString.isBogus()) {
+ result.flags |= FLAG_HAS_DECIMAL_SEPARATOR;
+ }
+ result.setCharsConsumed(segment);
+ return segment.length() == 0 || maybeMore;
+}
+
+bool DecimalMatcher::validateGroup(int32_t sepType, int32_t count, bool isPrimary) const {
+ if (requireGroupingMatch) {
+ if (sepType == -1) {
+ // No such group (prevGroup before first shift).
+ return true;
+ } else if (sepType == 0) {
+ // First group.
+ if (isPrimary) {
+ // No grouping separators is OK.
+ return true;
+ } else {
+ return count != 0 && count <= grouping2;
+ }
+ } else if (sepType == 1) {
+ // Middle group.
+ if (isPrimary) {
+ return count == grouping1;
+ } else {
+ return count == grouping2;
+ }
+ } else {
+ U_ASSERT(sepType == 2);
+ // After the decimal separator.
+ return true;
+ }
+ } else {
+ if (sepType == 1) {
+ // #11230: don't accept middle groups with only 1 digit.
+ return count != 1;
+ } else {
+ return true;
+ }
+ }
+}
+
+bool DecimalMatcher::smokeTest(const StringSegment& segment) const {
+ // The common case uses a static leadSet for efficiency.
+ if (fLocalDigitStrings.isNull() && leadSet != nullptr) {
+ return segment.startsWith(*leadSet);
+ }
+ if (segment.startsWith(*separatorSet) || u_isdigit(segment.getCodePoint())) {
+ return true;
+ }
+ if (fLocalDigitStrings.isNull()) {
+ return false;
+ }
+ for (int32_t i = 0; i < 10; i++) {
+ if (segment.startsWith(fLocalDigitStrings[i])) {
+ return true;
+ }
+ }
+ return false;
+}
+
+UnicodeString DecimalMatcher::toString() const {
+ return u"<Decimal>";
+}
+
+
+#endif /* #if !UCONFIG_NO_FORMATTING */