diff options
author | mcheshkov <mcheshkov@yandex-team.ru> | 2022-02-10 16:46:15 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:15 +0300 |
commit | e9d19cec64684c9c1e6b0c98297e5b895cf904fe (patch) | |
tree | 2768b1223e96a8a0610a93d18425d9647c1123c8 /contrib/libs/icu/i18n/numparse_decimal.cpp | |
parent | 60040c91ffe701a84689b2c6310ff845e65cff42 (diff) | |
download | ydb-e9d19cec64684c9c1e6b0c98297e5b895cf904fe.tar.gz |
Restoring authorship annotation for <mcheshkov@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/icu/i18n/numparse_decimal.cpp')
-rw-r--r-- | contrib/libs/icu/i18n/numparse_decimal.cpp | 918 |
1 files changed, 459 insertions, 459 deletions
diff --git a/contrib/libs/icu/i18n/numparse_decimal.cpp b/contrib/libs/icu/i18n/numparse_decimal.cpp index cf1e815672..813512c06b 100644 --- a/contrib/libs/icu/i18n/numparse_decimal.cpp +++ b/contrib/libs/icu/i18n/numparse_decimal.cpp @@ -1,459 +1,459 @@ -// © 2018 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_FORMATTING - -// Allow implicit conversion from char16_t* to UnicodeString for this file: -// Helpful in toString methods and elsewhere. -#define UNISTR_FROM_STRING_EXPLICIT - -#include "numparse_types.h" -#include "numparse_decimal.h" -#include "static_unicode_sets.h" -#include "numparse_utils.h" -#include "unicode/uchar.h" -#include "putilimp.h" -#include "number_decimalquantity.h" -#include "string_segment.h" - -using namespace icu; -using namespace icu::numparse; -using namespace icu::numparse::impl; - - -DecimalMatcher::DecimalMatcher(const DecimalFormatSymbols& symbols, const Grouper& grouper, - parse_flags_t parseFlags) { - if (0 != (parseFlags & PARSE_FLAG_MONETARY_SEPARATORS)) { - groupingSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kMonetaryGroupingSeparatorSymbol); - decimalSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kMonetarySeparatorSymbol); - } else { - groupingSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol); - decimalSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol); - } - bool strictSeparators = 0 != (parseFlags & PARSE_FLAG_STRICT_SEPARATORS); - unisets::Key groupingKey = strictSeparators ? unisets::STRICT_ALL_SEPARATORS - : unisets::ALL_SEPARATORS; - - // Attempt to find separators in the static cache - - groupingUniSet = unisets::get(groupingKey); - unisets::Key decimalKey = unisets::chooseFrom( - decimalSeparator, - strictSeparators ? unisets::STRICT_COMMA : unisets::COMMA, - strictSeparators ? unisets::STRICT_PERIOD : unisets::PERIOD); - if (decimalKey >= 0) { - decimalUniSet = unisets::get(decimalKey); - } else if (!decimalSeparator.isEmpty()) { - auto* set = new UnicodeSet(); - set->add(decimalSeparator.char32At(0)); - set->freeze(); - decimalUniSet = set; - fLocalDecimalUniSet.adoptInstead(set); - } else { - decimalUniSet = unisets::get(unisets::EMPTY); - } - - if (groupingKey >= 0 && decimalKey >= 0) { - // Everything is available in the static cache - separatorSet = groupingUniSet; - leadSet = unisets::get( - strictSeparators ? unisets::DIGITS_OR_ALL_SEPARATORS - : unisets::DIGITS_OR_STRICT_ALL_SEPARATORS); - } else { - auto* set = new UnicodeSet(); - set->addAll(*groupingUniSet); - set->addAll(*decimalUniSet); - set->freeze(); - separatorSet = set; - fLocalSeparatorSet.adoptInstead(set); - leadSet = nullptr; - } - - UChar32 cpZero = symbols.getCodePointZero(); - if (cpZero == -1 || !u_isdigit(cpZero) || u_digit(cpZero, 10) != 0) { - // Uncommon case: okay to allocate. - auto digitStrings = new UnicodeString[10]; - fLocalDigitStrings.adoptInstead(digitStrings); - for (int32_t i = 0; i <= 9; i++) { - digitStrings[i] = symbols.getConstDigitSymbol(i); - } - } - - requireGroupingMatch = 0 != (parseFlags & PARSE_FLAG_STRICT_GROUPING_SIZE); - groupingDisabled = 0 != (parseFlags & PARSE_FLAG_GROUPING_DISABLED); - integerOnly = 0 != (parseFlags & PARSE_FLAG_INTEGER_ONLY); - grouping1 = grouper.getPrimary(); - grouping2 = grouper.getSecondary(); - - // Fraction grouping parsing is disabled for now but could be enabled later. - // See http://bugs.icu-project.org/trac/ticket/10794 - // fractionGrouping = 0 != (parseFlags & PARSE_FLAG_FRACTION_GROUPING_ENABLED); -} - -bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const { - return match(segment, result, 0, status); -} - -bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, int8_t exponentSign, - UErrorCode&) const { - if (result.seenNumber() && exponentSign == 0) { - // A number has already been consumed. - return false; - } else if (exponentSign != 0) { - // scientific notation always comes after the number - U_ASSERT(!result.quantity.bogus); - } - - // Initial offset before any character consumption. - int32_t initialOffset = segment.getOffset(); - - // Return value: whether to ask for more characters. - bool maybeMore = false; - - // All digits consumed so far. - number::impl::DecimalQuantity digitsConsumed; - digitsConsumed.bogus = true; - - // The total number of digits after the decimal place, used for scaling the result. - int32_t digitsAfterDecimalPlace = 0; - - // The actual grouping and decimal separators used in the string. - // If non-null, we have seen that token. - UnicodeString actualGroupingString; - UnicodeString actualDecimalString; - actualGroupingString.setToBogus(); - actualDecimalString.setToBogus(); - - // Information for two groups: the previous group and the current group. - // - // Each group has three pieces of information: - // - // Offset: the string position of the beginning of the group, including a leading separator - // if there was a leading separator. This is needed in case we need to rewind the parse to - // that position. - // - // Separator type: - // 0 => beginning of string - // 1 => lead separator is a grouping separator - // 2 => lead separator is a decimal separator - // - // Count: the number of digits in the group. If -1, the group has been validated. - int32_t currGroupOffset = 0; - int32_t currGroupSepType = 0; - int32_t currGroupCount = 0; - int32_t prevGroupOffset = -1; - int32_t prevGroupSepType = -1; - int32_t prevGroupCount = -1; - - while (segment.length() > 0) { - maybeMore = false; - - // Attempt to match a digit. - int8_t digit = -1; - - // Try by code point digit value. - UChar32 cp = segment.getCodePoint(); - if (u_isdigit(cp)) { - segment.adjustOffset(U16_LENGTH(cp)); - digit = static_cast<int8_t>(u_digit(cp, 10)); - } - - // Try by digit string. - if (digit == -1 && !fLocalDigitStrings.isNull()) { - for (int32_t i = 0; i < 10; i++) { - const UnicodeString& str = fLocalDigitStrings[i]; - if (str.isEmpty()) { - continue; - } - int32_t overlap = segment.getCommonPrefixLength(str); - if (overlap == str.length()) { - segment.adjustOffset(overlap); - digit = static_cast<int8_t>(i); - break; - } - maybeMore = maybeMore || (overlap == segment.length()); - } - } - - if (digit >= 0) { - // Digit was found. - if (digitsConsumed.bogus) { - digitsConsumed.bogus = false; - digitsConsumed.clear(); - } - digitsConsumed.appendDigit(digit, 0, true); - currGroupCount++; - if (!actualDecimalString.isBogus()) { - digitsAfterDecimalPlace++; - } - continue; - } - - // Attempt to match a literal grouping or decimal separator. - bool isDecimal = false; - bool isGrouping = false; - - // 1) Attempt the decimal separator string literal. - // if (we have not seen a decimal separator yet) { ... } - if (actualDecimalString.isBogus() && !decimalSeparator.isEmpty()) { - int32_t overlap = segment.getCommonPrefixLength(decimalSeparator); - maybeMore = maybeMore || (overlap == segment.length()); - if (overlap == decimalSeparator.length()) { - isDecimal = true; - actualDecimalString = decimalSeparator; - } - } - - // 2) Attempt to match the actual grouping string literal. - if (!actualGroupingString.isBogus()) { - int32_t overlap = segment.getCommonPrefixLength(actualGroupingString); - maybeMore = maybeMore || (overlap == segment.length()); - if (overlap == actualGroupingString.length()) { - isGrouping = true; - } - } - - // 2.5) Attempt to match a new the grouping separator string literal. - // if (we have not seen a grouping or decimal separator yet) { ... } - if (!groupingDisabled && actualGroupingString.isBogus() && actualDecimalString.isBogus() && - !groupingSeparator.isEmpty()) { - int32_t overlap = segment.getCommonPrefixLength(groupingSeparator); - maybeMore = maybeMore || (overlap == segment.length()); - if (overlap == groupingSeparator.length()) { - isGrouping = true; - actualGroupingString = groupingSeparator; - } - } - - // 3) Attempt to match a decimal separator from the equivalence set. - // if (we have not seen a decimal separator yet) { ... } - // The !isGrouping is to confirm that we haven't yet matched the current character. - if (!isGrouping && actualDecimalString.isBogus()) { - if (decimalUniSet->contains(cp)) { - isDecimal = true; - actualDecimalString = UnicodeString(cp); - } - } - - // 4) Attempt to match a grouping separator from the equivalence set. - // if (we have not seen a grouping or decimal separator yet) { ... } - if (!groupingDisabled && actualGroupingString.isBogus() && actualDecimalString.isBogus()) { - if (groupingUniSet->contains(cp)) { - isGrouping = true; - actualGroupingString = UnicodeString(cp); - } - } - - // Leave if we failed to match this as a separator. - if (!isDecimal && !isGrouping) { - break; - } - - // Check for conditions when we don't want to accept the separator. - if (isDecimal && integerOnly) { - break; - } else if (currGroupSepType == 2 && isGrouping) { - // Fraction grouping - break; - } - - // Validate intermediate grouping sizes. - bool prevValidSecondary = validateGroup(prevGroupSepType, prevGroupCount, false); - bool currValidPrimary = validateGroup(currGroupSepType, currGroupCount, true); - if (!prevValidSecondary || (isDecimal && !currValidPrimary)) { - // Invalid grouping sizes. - if (isGrouping && currGroupCount == 0) { - // Trailing grouping separators: these are taken care of below - U_ASSERT(currGroupSepType == 1); - } else if (requireGroupingMatch) { - // Strict mode: reject the parse - digitsConsumed.clear(); - digitsConsumed.bogus = true; - } - break; - } else if (requireGroupingMatch && currGroupCount == 0 && currGroupSepType == 1) { - break; - } else { - // Grouping sizes OK so far. - prevGroupOffset = currGroupOffset; - prevGroupCount = currGroupCount; - if (isDecimal) { - // Do not validate this group any more. - prevGroupSepType = -1; - } else { - prevGroupSepType = currGroupSepType; - } - } - - // OK to accept the separator. - // Special case: don't update currGroup if it is empty; this allows two grouping - // separators in a row in lenient mode. - if (currGroupCount != 0) { - currGroupOffset = segment.getOffset(); - } - currGroupSepType = isGrouping ? 1 : 2; - currGroupCount = 0; - if (isGrouping) { - segment.adjustOffset(actualGroupingString.length()); - } else { - segment.adjustOffset(actualDecimalString.length()); - } - } - - // End of main loop. - // Back up if there was a trailing grouping separator. - // Shift prev -> curr so we can check it as a final group. - if (currGroupSepType != 2 && currGroupCount == 0) { - maybeMore = true; - segment.setOffset(currGroupOffset); - currGroupOffset = prevGroupOffset; - currGroupSepType = prevGroupSepType; - currGroupCount = prevGroupCount; - prevGroupOffset = -1; - prevGroupSepType = 0; - prevGroupCount = 1; - } - - // Validate final grouping sizes. - bool prevValidSecondary = validateGroup(prevGroupSepType, prevGroupCount, false); - bool currValidPrimary = validateGroup(currGroupSepType, currGroupCount, true); - if (!requireGroupingMatch) { - // The cases we need to handle here are lone digits. - // Examples: "1,1" "1,1," "1,1,1" "1,1,1," ",1" (all parse as 1) - // See more examples in numberformattestspecification.txt - int32_t digitsToRemove = 0; - if (!prevValidSecondary) { - segment.setOffset(prevGroupOffset); - digitsToRemove += prevGroupCount; - digitsToRemove += currGroupCount; - } else if (!currValidPrimary && (prevGroupSepType != 0 || prevGroupCount != 0)) { - maybeMore = true; - segment.setOffset(currGroupOffset); - digitsToRemove += currGroupCount; - } - if (digitsToRemove != 0) { - digitsConsumed.adjustMagnitude(-digitsToRemove); - digitsConsumed.truncate(); - } - prevValidSecondary = true; - currValidPrimary = true; - } - if (currGroupSepType != 2 && (!prevValidSecondary || !currValidPrimary)) { - // Grouping failure. - digitsConsumed.bogus = true; - } - - // Strings that start with a separator but have no digits, - // or strings that failed a grouping size check. - if (digitsConsumed.bogus) { - maybeMore = maybeMore || (segment.length() == 0); - segment.setOffset(initialOffset); - return maybeMore; - } - - // We passed all inspections. Start post-processing. - - // Adjust for fraction part. - digitsConsumed.adjustMagnitude(-digitsAfterDecimalPlace); - - // Set the digits, either normal or exponent. - if (exponentSign != 0 && segment.getOffset() != initialOffset) { - bool overflow = false; - if (digitsConsumed.fitsInLong()) { - int64_t exponentLong = digitsConsumed.toLong(false); - U_ASSERT(exponentLong >= 0); - if (exponentLong <= INT32_MAX) { - auto exponentInt = static_cast<int32_t>(exponentLong); - if (result.quantity.adjustMagnitude(exponentSign * exponentInt)) { - overflow = true; - } - } else { - overflow = true; - } - } else { - overflow = true; - } - if (overflow) { - if (exponentSign == -1) { - // Set to zero - result.quantity.clear(); - } else { - // Set to infinity - result.quantity.bogus = true; - result.flags |= FLAG_INFINITY; - } - } - } else { - result.quantity = digitsConsumed; - } - - // Set other information into the result and return. - if (!actualDecimalString.isBogus()) { - result.flags |= FLAG_HAS_DECIMAL_SEPARATOR; - } - result.setCharsConsumed(segment); - return segment.length() == 0 || maybeMore; -} - -bool DecimalMatcher::validateGroup(int32_t sepType, int32_t count, bool isPrimary) const { - if (requireGroupingMatch) { - if (sepType == -1) { - // No such group (prevGroup before first shift). - return true; - } else if (sepType == 0) { - // First group. - if (isPrimary) { - // No grouping separators is OK. - return true; - } else { - return count != 0 && count <= grouping2; - } - } else if (sepType == 1) { - // Middle group. - if (isPrimary) { - return count == grouping1; - } else { - return count == grouping2; - } - } else { - U_ASSERT(sepType == 2); - // After the decimal separator. - return true; - } - } else { - if (sepType == 1) { - // #11230: don't accept middle groups with only 1 digit. - return count != 1; - } else { - return true; - } - } -} - -bool DecimalMatcher::smokeTest(const StringSegment& segment) const { - // The common case uses a static leadSet for efficiency. - if (fLocalDigitStrings.isNull() && leadSet != nullptr) { - return segment.startsWith(*leadSet); - } - if (segment.startsWith(*separatorSet) || u_isdigit(segment.getCodePoint())) { - return true; - } - if (fLocalDigitStrings.isNull()) { - return false; - } - for (int32_t i = 0; i < 10; i++) { - if (segment.startsWith(fLocalDigitStrings[i])) { - return true; - } - } - return false; -} - -UnicodeString DecimalMatcher::toString() const { - return u"<Decimal>"; -} - - -#endif /* #if !UCONFIG_NO_FORMATTING */ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +// Allow implicit conversion from char16_t* to UnicodeString for this file: +// Helpful in toString methods and elsewhere. +#define UNISTR_FROM_STRING_EXPLICIT + +#include "numparse_types.h" +#include "numparse_decimal.h" +#include "static_unicode_sets.h" +#include "numparse_utils.h" +#include "unicode/uchar.h" +#include "putilimp.h" +#include "number_decimalquantity.h" +#include "string_segment.h" + +using namespace icu; +using namespace icu::numparse; +using namespace icu::numparse::impl; + + +DecimalMatcher::DecimalMatcher(const DecimalFormatSymbols& symbols, const Grouper& grouper, + parse_flags_t parseFlags) { + if (0 != (parseFlags & PARSE_FLAG_MONETARY_SEPARATORS)) { + groupingSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kMonetaryGroupingSeparatorSymbol); + decimalSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kMonetarySeparatorSymbol); + } else { + groupingSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol); + decimalSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol); + } + bool strictSeparators = 0 != (parseFlags & PARSE_FLAG_STRICT_SEPARATORS); + unisets::Key groupingKey = strictSeparators ? unisets::STRICT_ALL_SEPARATORS + : unisets::ALL_SEPARATORS; + + // Attempt to find separators in the static cache + + groupingUniSet = unisets::get(groupingKey); + unisets::Key decimalKey = unisets::chooseFrom( + decimalSeparator, + strictSeparators ? unisets::STRICT_COMMA : unisets::COMMA, + strictSeparators ? unisets::STRICT_PERIOD : unisets::PERIOD); + if (decimalKey >= 0) { + decimalUniSet = unisets::get(decimalKey); + } else if (!decimalSeparator.isEmpty()) { + auto* set = new UnicodeSet(); + set->add(decimalSeparator.char32At(0)); + set->freeze(); + decimalUniSet = set; + fLocalDecimalUniSet.adoptInstead(set); + } else { + decimalUniSet = unisets::get(unisets::EMPTY); + } + + if (groupingKey >= 0 && decimalKey >= 0) { + // Everything is available in the static cache + separatorSet = groupingUniSet; + leadSet = unisets::get( + strictSeparators ? unisets::DIGITS_OR_ALL_SEPARATORS + : unisets::DIGITS_OR_STRICT_ALL_SEPARATORS); + } else { + auto* set = new UnicodeSet(); + set->addAll(*groupingUniSet); + set->addAll(*decimalUniSet); + set->freeze(); + separatorSet = set; + fLocalSeparatorSet.adoptInstead(set); + leadSet = nullptr; + } + + UChar32 cpZero = symbols.getCodePointZero(); + if (cpZero == -1 || !u_isdigit(cpZero) || u_digit(cpZero, 10) != 0) { + // Uncommon case: okay to allocate. + auto digitStrings = new UnicodeString[10]; + fLocalDigitStrings.adoptInstead(digitStrings); + for (int32_t i = 0; i <= 9; i++) { + digitStrings[i] = symbols.getConstDigitSymbol(i); + } + } + + requireGroupingMatch = 0 != (parseFlags & PARSE_FLAG_STRICT_GROUPING_SIZE); + groupingDisabled = 0 != (parseFlags & PARSE_FLAG_GROUPING_DISABLED); + integerOnly = 0 != (parseFlags & PARSE_FLAG_INTEGER_ONLY); + grouping1 = grouper.getPrimary(); + grouping2 = grouper.getSecondary(); + + // Fraction grouping parsing is disabled for now but could be enabled later. + // See http://bugs.icu-project.org/trac/ticket/10794 + // fractionGrouping = 0 != (parseFlags & PARSE_FLAG_FRACTION_GROUPING_ENABLED); +} + +bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const { + return match(segment, result, 0, status); +} + +bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, int8_t exponentSign, + UErrorCode&) const { + if (result.seenNumber() && exponentSign == 0) { + // A number has already been consumed. + return false; + } else if (exponentSign != 0) { + // scientific notation always comes after the number + U_ASSERT(!result.quantity.bogus); + } + + // Initial offset before any character consumption. + int32_t initialOffset = segment.getOffset(); + + // Return value: whether to ask for more characters. + bool maybeMore = false; + + // All digits consumed so far. + number::impl::DecimalQuantity digitsConsumed; + digitsConsumed.bogus = true; + + // The total number of digits after the decimal place, used for scaling the result. + int32_t digitsAfterDecimalPlace = 0; + + // The actual grouping and decimal separators used in the string. + // If non-null, we have seen that token. + UnicodeString actualGroupingString; + UnicodeString actualDecimalString; + actualGroupingString.setToBogus(); + actualDecimalString.setToBogus(); + + // Information for two groups: the previous group and the current group. + // + // Each group has three pieces of information: + // + // Offset: the string position of the beginning of the group, including a leading separator + // if there was a leading separator. This is needed in case we need to rewind the parse to + // that position. + // + // Separator type: + // 0 => beginning of string + // 1 => lead separator is a grouping separator + // 2 => lead separator is a decimal separator + // + // Count: the number of digits in the group. If -1, the group has been validated. + int32_t currGroupOffset = 0; + int32_t currGroupSepType = 0; + int32_t currGroupCount = 0; + int32_t prevGroupOffset = -1; + int32_t prevGroupSepType = -1; + int32_t prevGroupCount = -1; + + while (segment.length() > 0) { + maybeMore = false; + + // Attempt to match a digit. + int8_t digit = -1; + + // Try by code point digit value. + UChar32 cp = segment.getCodePoint(); + if (u_isdigit(cp)) { + segment.adjustOffset(U16_LENGTH(cp)); + digit = static_cast<int8_t>(u_digit(cp, 10)); + } + + // Try by digit string. + if (digit == -1 && !fLocalDigitStrings.isNull()) { + for (int32_t i = 0; i < 10; i++) { + const UnicodeString& str = fLocalDigitStrings[i]; + if (str.isEmpty()) { + continue; + } + int32_t overlap = segment.getCommonPrefixLength(str); + if (overlap == str.length()) { + segment.adjustOffset(overlap); + digit = static_cast<int8_t>(i); + break; + } + maybeMore = maybeMore || (overlap == segment.length()); + } + } + + if (digit >= 0) { + // Digit was found. + if (digitsConsumed.bogus) { + digitsConsumed.bogus = false; + digitsConsumed.clear(); + } + digitsConsumed.appendDigit(digit, 0, true); + currGroupCount++; + if (!actualDecimalString.isBogus()) { + digitsAfterDecimalPlace++; + } + continue; + } + + // Attempt to match a literal grouping or decimal separator. + bool isDecimal = false; + bool isGrouping = false; + + // 1) Attempt the decimal separator string literal. + // if (we have not seen a decimal separator yet) { ... } + if (actualDecimalString.isBogus() && !decimalSeparator.isEmpty()) { + int32_t overlap = segment.getCommonPrefixLength(decimalSeparator); + maybeMore = maybeMore || (overlap == segment.length()); + if (overlap == decimalSeparator.length()) { + isDecimal = true; + actualDecimalString = decimalSeparator; + } + } + + // 2) Attempt to match the actual grouping string literal. + if (!actualGroupingString.isBogus()) { + int32_t overlap = segment.getCommonPrefixLength(actualGroupingString); + maybeMore = maybeMore || (overlap == segment.length()); + if (overlap == actualGroupingString.length()) { + isGrouping = true; + } + } + + // 2.5) Attempt to match a new the grouping separator string literal. + // if (we have not seen a grouping or decimal separator yet) { ... } + if (!groupingDisabled && actualGroupingString.isBogus() && actualDecimalString.isBogus() && + !groupingSeparator.isEmpty()) { + int32_t overlap = segment.getCommonPrefixLength(groupingSeparator); + maybeMore = maybeMore || (overlap == segment.length()); + if (overlap == groupingSeparator.length()) { + isGrouping = true; + actualGroupingString = groupingSeparator; + } + } + + // 3) Attempt to match a decimal separator from the equivalence set. + // if (we have not seen a decimal separator yet) { ... } + // The !isGrouping is to confirm that we haven't yet matched the current character. + if (!isGrouping && actualDecimalString.isBogus()) { + if (decimalUniSet->contains(cp)) { + isDecimal = true; + actualDecimalString = UnicodeString(cp); + } + } + + // 4) Attempt to match a grouping separator from the equivalence set. + // if (we have not seen a grouping or decimal separator yet) { ... } + if (!groupingDisabled && actualGroupingString.isBogus() && actualDecimalString.isBogus()) { + if (groupingUniSet->contains(cp)) { + isGrouping = true; + actualGroupingString = UnicodeString(cp); + } + } + + // Leave if we failed to match this as a separator. + if (!isDecimal && !isGrouping) { + break; + } + + // Check for conditions when we don't want to accept the separator. + if (isDecimal && integerOnly) { + break; + } else if (currGroupSepType == 2 && isGrouping) { + // Fraction grouping + break; + } + + // Validate intermediate grouping sizes. + bool prevValidSecondary = validateGroup(prevGroupSepType, prevGroupCount, false); + bool currValidPrimary = validateGroup(currGroupSepType, currGroupCount, true); + if (!prevValidSecondary || (isDecimal && !currValidPrimary)) { + // Invalid grouping sizes. + if (isGrouping && currGroupCount == 0) { + // Trailing grouping separators: these are taken care of below + U_ASSERT(currGroupSepType == 1); + } else if (requireGroupingMatch) { + // Strict mode: reject the parse + digitsConsumed.clear(); + digitsConsumed.bogus = true; + } + break; + } else if (requireGroupingMatch && currGroupCount == 0 && currGroupSepType == 1) { + break; + } else { + // Grouping sizes OK so far. + prevGroupOffset = currGroupOffset; + prevGroupCount = currGroupCount; + if (isDecimal) { + // Do not validate this group any more. + prevGroupSepType = -1; + } else { + prevGroupSepType = currGroupSepType; + } + } + + // OK to accept the separator. + // Special case: don't update currGroup if it is empty; this allows two grouping + // separators in a row in lenient mode. + if (currGroupCount != 0) { + currGroupOffset = segment.getOffset(); + } + currGroupSepType = isGrouping ? 1 : 2; + currGroupCount = 0; + if (isGrouping) { + segment.adjustOffset(actualGroupingString.length()); + } else { + segment.adjustOffset(actualDecimalString.length()); + } + } + + // End of main loop. + // Back up if there was a trailing grouping separator. + // Shift prev -> curr so we can check it as a final group. + if (currGroupSepType != 2 && currGroupCount == 0) { + maybeMore = true; + segment.setOffset(currGroupOffset); + currGroupOffset = prevGroupOffset; + currGroupSepType = prevGroupSepType; + currGroupCount = prevGroupCount; + prevGroupOffset = -1; + prevGroupSepType = 0; + prevGroupCount = 1; + } + + // Validate final grouping sizes. + bool prevValidSecondary = validateGroup(prevGroupSepType, prevGroupCount, false); + bool currValidPrimary = validateGroup(currGroupSepType, currGroupCount, true); + if (!requireGroupingMatch) { + // The cases we need to handle here are lone digits. + // Examples: "1,1" "1,1," "1,1,1" "1,1,1," ",1" (all parse as 1) + // See more examples in numberformattestspecification.txt + int32_t digitsToRemove = 0; + if (!prevValidSecondary) { + segment.setOffset(prevGroupOffset); + digitsToRemove += prevGroupCount; + digitsToRemove += currGroupCount; + } else if (!currValidPrimary && (prevGroupSepType != 0 || prevGroupCount != 0)) { + maybeMore = true; + segment.setOffset(currGroupOffset); + digitsToRemove += currGroupCount; + } + if (digitsToRemove != 0) { + digitsConsumed.adjustMagnitude(-digitsToRemove); + digitsConsumed.truncate(); + } + prevValidSecondary = true; + currValidPrimary = true; + } + if (currGroupSepType != 2 && (!prevValidSecondary || !currValidPrimary)) { + // Grouping failure. + digitsConsumed.bogus = true; + } + + // Strings that start with a separator but have no digits, + // or strings that failed a grouping size check. + if (digitsConsumed.bogus) { + maybeMore = maybeMore || (segment.length() == 0); + segment.setOffset(initialOffset); + return maybeMore; + } + + // We passed all inspections. Start post-processing. + + // Adjust for fraction part. + digitsConsumed.adjustMagnitude(-digitsAfterDecimalPlace); + + // Set the digits, either normal or exponent. + if (exponentSign != 0 && segment.getOffset() != initialOffset) { + bool overflow = false; + if (digitsConsumed.fitsInLong()) { + int64_t exponentLong = digitsConsumed.toLong(false); + U_ASSERT(exponentLong >= 0); + if (exponentLong <= INT32_MAX) { + auto exponentInt = static_cast<int32_t>(exponentLong); + if (result.quantity.adjustMagnitude(exponentSign * exponentInt)) { + overflow = true; + } + } else { + overflow = true; + } + } else { + overflow = true; + } + if (overflow) { + if (exponentSign == -1) { + // Set to zero + result.quantity.clear(); + } else { + // Set to infinity + result.quantity.bogus = true; + result.flags |= FLAG_INFINITY; + } + } + } else { + result.quantity = digitsConsumed; + } + + // Set other information into the result and return. + if (!actualDecimalString.isBogus()) { + result.flags |= FLAG_HAS_DECIMAL_SEPARATOR; + } + result.setCharsConsumed(segment); + return segment.length() == 0 || maybeMore; +} + +bool DecimalMatcher::validateGroup(int32_t sepType, int32_t count, bool isPrimary) const { + if (requireGroupingMatch) { + if (sepType == -1) { + // No such group (prevGroup before first shift). + return true; + } else if (sepType == 0) { + // First group. + if (isPrimary) { + // No grouping separators is OK. + return true; + } else { + return count != 0 && count <= grouping2; + } + } else if (sepType == 1) { + // Middle group. + if (isPrimary) { + return count == grouping1; + } else { + return count == grouping2; + } + } else { + U_ASSERT(sepType == 2); + // After the decimal separator. + return true; + } + } else { + if (sepType == 1) { + // #11230: don't accept middle groups with only 1 digit. + return count != 1; + } else { + return true; + } + } +} + +bool DecimalMatcher::smokeTest(const StringSegment& segment) const { + // The common case uses a static leadSet for efficiency. + if (fLocalDigitStrings.isNull() && leadSet != nullptr) { + return segment.startsWith(*leadSet); + } + if (segment.startsWith(*separatorSet) || u_isdigit(segment.getCodePoint())) { + return true; + } + if (fLocalDigitStrings.isNull()) { + return false; + } + for (int32_t i = 0; i < 10; i++) { + if (segment.startsWith(fLocalDigitStrings[i])) { + return true; + } + } + return false; +} + +UnicodeString DecimalMatcher::toString() const { + return u"<Decimal>"; +} + + +#endif /* #if !UCONFIG_NO_FORMATTING */ |