aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/icu/i18n/numparse_scientific.cpp
diff options
context:
space:
mode:
authorSergey Uzhakov <uzhastik@gmail.com>2022-06-22 21:23:19 +0300
committerSergey Uzhakov <uzhastik@gmail.com>2022-06-22 21:23:19 +0300
commitb86c509aaa5b19d5659a4548395a47c9e87826e9 (patch)
tree98025d7bd574b788f6679b24f91711ea86bf07ae /contrib/libs/icu/i18n/numparse_scientific.cpp
parentf55ada30d924b55d15fad9001944df1323a9598a (diff)
downloadydb-b86c509aaa5b19d5659a4548395a47c9e87826e9.tar.gz
YQ-1154: allow pg translator in OSS
ref:bc7b8dcc7b45e5f527a87a1bed622dff6f06d41a
Diffstat (limited to 'contrib/libs/icu/i18n/numparse_scientific.cpp')
-rw-r--r--contrib/libs/icu/i18n/numparse_scientific.cpp163
1 files changed, 163 insertions, 0 deletions
diff --git a/contrib/libs/icu/i18n/numparse_scientific.cpp b/contrib/libs/icu/i18n/numparse_scientific.cpp
new file mode 100644
index 0000000000..4b88cd998f
--- /dev/null
+++ b/contrib/libs/icu/i18n/numparse_scientific.cpp
@@ -0,0 +1,163 @@
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+// Allow implicit conversion from char16_t* to UnicodeString for this file:
+// Helpful in toString methods and elsewhere.
+#define UNISTR_FROM_STRING_EXPLICIT
+
+#include "numparse_types.h"
+#include "numparse_scientific.h"
+#include "static_unicode_sets.h"
+#include "string_segment.h"
+
+using namespace icu;
+using namespace icu::numparse;
+using namespace icu::numparse::impl;
+
+
+namespace {
+
+inline const UnicodeSet& minusSignSet() {
+ return *unisets::get(unisets::MINUS_SIGN);
+}
+
+inline const UnicodeSet& plusSignSet() {
+ return *unisets::get(unisets::PLUS_SIGN);
+}
+
+} // namespace
+
+
+ScientificMatcher::ScientificMatcher(const DecimalFormatSymbols& dfs, const Grouper& grouper)
+ : fExponentSeparatorString(dfs.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol)),
+ fExponentMatcher(dfs, grouper, PARSE_FLAG_INTEGER_ONLY | PARSE_FLAG_GROUPING_DISABLED),
+ fIgnorablesMatcher(PARSE_FLAG_STRICT_IGNORABLES) {
+
+ const UnicodeString& minusSign = dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol);
+ if (minusSignSet().contains(minusSign)) {
+ fCustomMinusSign.setToBogus();
+ } else {
+ fCustomMinusSign = minusSign;
+ }
+
+ const UnicodeString& plusSign = dfs.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol);
+ if (plusSignSet().contains(plusSign)) {
+ fCustomPlusSign.setToBogus();
+ } else {
+ fCustomPlusSign = plusSign;
+ }
+}
+
+bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
+ // Only accept scientific notation after the mantissa.
+ if (!result.seenNumber()) {
+ return false;
+ }
+
+ // Only accept one exponent per string.
+ if (0 != (result.flags & FLAG_HAS_EXPONENT)) {
+ return false;
+ }
+
+ // First match the scientific separator, and then match another number after it.
+ // NOTE: This is guarded by the smoke test; no need to check fExponentSeparatorString length again.
+ int32_t initialOffset = segment.getOffset();
+ int32_t overlap = segment.getCommonPrefixLength(fExponentSeparatorString);
+ if (overlap == fExponentSeparatorString.length()) {
+ // Full exponent separator match.
+
+ // First attempt to get a code point, returning true if we can't get one.
+ if (segment.length() == overlap) {
+ return true;
+ }
+ segment.adjustOffset(overlap);
+
+ // Allow ignorables before the sign.
+ // Note: call site is guarded by the segment.length() check above.
+ // Note: the ignorables matcher should not touch the result.
+ fIgnorablesMatcher.match(segment, result, status);
+ if (segment.length() == 0) {
+ segment.setOffset(initialOffset);
+ return true;
+ }
+
+ // Allow a sign, and then try to match digits.
+ int8_t exponentSign = 1;
+ if (segment.startsWith(minusSignSet())) {
+ exponentSign = -1;
+ segment.adjustOffsetByCodePoint();
+ } else if (segment.startsWith(plusSignSet())) {
+ segment.adjustOffsetByCodePoint();
+ } else if (segment.startsWith(fCustomMinusSign)) {
+ overlap = segment.getCommonPrefixLength(fCustomMinusSign);
+ if (overlap != fCustomMinusSign.length()) {
+ // Partial custom sign match
+ segment.setOffset(initialOffset);
+ return true;
+ }
+ exponentSign = -1;
+ segment.adjustOffset(overlap);
+ } else if (segment.startsWith(fCustomPlusSign)) {
+ overlap = segment.getCommonPrefixLength(fCustomPlusSign);
+ if (overlap != fCustomPlusSign.length()) {
+ // Partial custom sign match
+ segment.setOffset(initialOffset);
+ return true;
+ }
+ segment.adjustOffset(overlap);
+ }
+
+ // Return true if the segment is empty.
+ if (segment.length() == 0) {
+ segment.setOffset(initialOffset);
+ return true;
+ }
+
+ // Allow ignorables after the sign.
+ // Note: call site is guarded by the segment.length() check above.
+ // Note: the ignorables matcher should not touch the result.
+ fIgnorablesMatcher.match(segment, result, status);
+ if (segment.length() == 0) {
+ segment.setOffset(initialOffset);
+ return true;
+ }
+
+ // We are supposed to accept E0 after NaN, so we need to make sure result.quantity is available.
+ bool wasBogus = result.quantity.bogus;
+ result.quantity.bogus = false;
+ int digitsOffset = segment.getOffset();
+ bool digitsReturnValue = fExponentMatcher.match(segment, result, exponentSign, status);
+ result.quantity.bogus = wasBogus;
+
+ if (segment.getOffset() != digitsOffset) {
+ // At least one exponent digit was matched.
+ result.flags |= FLAG_HAS_EXPONENT;
+ } else {
+ // No exponent digits were matched
+ segment.setOffset(initialOffset);
+ }
+ return digitsReturnValue;
+
+ } else if (overlap == segment.length()) {
+ // Partial exponent separator match
+ return true;
+ }
+
+ // No match
+ return false;
+}
+
+bool ScientificMatcher::smokeTest(const StringSegment& segment) const {
+ return segment.startsWith(fExponentSeparatorString);
+}
+
+UnicodeString ScientificMatcher::toString() const {
+ return u"<Scientific>";
+}
+
+
+#endif /* #if !UCONFIG_NO_FORMATTING */