aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/icu/i18n/measunit_extra.cpp
diff options
context:
space:
mode:
authormcheshkov <mcheshkov@yandex-team.ru>2022-02-10 16:46:15 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:46:15 +0300
commite9d19cec64684c9c1e6b0c98297e5b895cf904fe (patch)
tree2768b1223e96a8a0610a93d18425d9647c1123c8 /contrib/libs/icu/i18n/measunit_extra.cpp
parent60040c91ffe701a84689b2c6310ff845e65cff42 (diff)
downloadydb-e9d19cec64684c9c1e6b0c98297e5b895cf904fe.tar.gz
Restoring authorship annotation for <mcheshkov@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/icu/i18n/measunit_extra.cpp')
-rw-r--r--contrib/libs/icu/i18n/measunit_extra.cpp1786
1 files changed, 893 insertions, 893 deletions
diff --git a/contrib/libs/icu/i18n/measunit_extra.cpp b/contrib/libs/icu/i18n/measunit_extra.cpp
index aeb60017a1..ab2b40d9b7 100644
--- a/contrib/libs/icu/i18n/measunit_extra.cpp
+++ b/contrib/libs/icu/i18n/measunit_extra.cpp
@@ -1,893 +1,893 @@
-// © 2020 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-
-// Extra functions for MeasureUnit not needed for all clients.
-// Separate .o file so that it can be removed for modularity.
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_FORMATTING
-
-// Allow implicit conversion from char16_t* to UnicodeString for this file:
-// Helpful in toString methods and elsewhere.
-#define UNISTR_FROM_STRING_EXPLICIT
-
-#include <cstdlib>
-#include "cstring.h"
-#include "measunit_impl.h"
-#include "uarrsort.h"
-#include "uassert.h"
-#include "ucln_in.h"
-#include "umutex.h"
-#include "unicode/errorcode.h"
-#include "unicode/localpointer.h"
-#include "unicode/measunit.h"
-#include "unicode/ucharstrie.h"
-#include "unicode/ucharstriebuilder.h"
-
-#include "cstr.h"
-
-U_NAMESPACE_BEGIN
-
-
-namespace {
-
-// TODO: Propose a new error code for this?
-constexpr UErrorCode kUnitIdentifierSyntaxError = U_ILLEGAL_ARGUMENT_ERROR;
-
-// Trie value offset for SI Prefixes. This is big enough to ensure we only
-// insert positive integers into the trie.
-constexpr int32_t kSIPrefixOffset = 64;
-
-// Trie value offset for compound parts, e.g. "-per-", "-", "-and-".
-constexpr int32_t kCompoundPartOffset = 128;
-
-enum CompoundPart {
- // Represents "-per-"
- COMPOUND_PART_PER = kCompoundPartOffset,
- // Represents "-"
- COMPOUND_PART_TIMES,
- // Represents "-and-"
- COMPOUND_PART_AND,
-};
-
-// Trie value offset for "per-".
-constexpr int32_t kInitialCompoundPartOffset = 192;
-
-enum InitialCompoundPart {
- // Represents "per-", the only compound part that can appear at the start of
- // an identifier.
- INITIAL_COMPOUND_PART_PER = kInitialCompoundPartOffset,
-};
-
-// Trie value offset for powers like "square-", "cubic-", "p2-" etc.
-constexpr int32_t kPowerPartOffset = 256;
-
-enum PowerPart {
- POWER_PART_P2 = kPowerPartOffset + 2,
- POWER_PART_P3,
- POWER_PART_P4,
- POWER_PART_P5,
- POWER_PART_P6,
- POWER_PART_P7,
- POWER_PART_P8,
- POWER_PART_P9,
- POWER_PART_P10,
- POWER_PART_P11,
- POWER_PART_P12,
- POWER_PART_P13,
- POWER_PART_P14,
- POWER_PART_P15,
-};
-
-// Trie value offset for simple units, e.g. "gram", "nautical-mile",
-// "fluid-ounce-imperial".
-constexpr int32_t kSimpleUnitOffset = 512;
-
-const struct SIPrefixStrings {
- const char* const string;
- UMeasureSIPrefix value;
-} gSIPrefixStrings[] = {
- { "yotta", UMEASURE_SI_PREFIX_YOTTA },
- { "zetta", UMEASURE_SI_PREFIX_ZETTA },
- { "exa", UMEASURE_SI_PREFIX_EXA },
- { "peta", UMEASURE_SI_PREFIX_PETA },
- { "tera", UMEASURE_SI_PREFIX_TERA },
- { "giga", UMEASURE_SI_PREFIX_GIGA },
- { "mega", UMEASURE_SI_PREFIX_MEGA },
- { "kilo", UMEASURE_SI_PREFIX_KILO },
- { "hecto", UMEASURE_SI_PREFIX_HECTO },
- { "deka", UMEASURE_SI_PREFIX_DEKA },
- { "deci", UMEASURE_SI_PREFIX_DECI },
- { "centi", UMEASURE_SI_PREFIX_CENTI },
- { "milli", UMEASURE_SI_PREFIX_MILLI },
- { "micro", UMEASURE_SI_PREFIX_MICRO },
- { "nano", UMEASURE_SI_PREFIX_NANO },
- { "pico", UMEASURE_SI_PREFIX_PICO },
- { "femto", UMEASURE_SI_PREFIX_FEMTO },
- { "atto", UMEASURE_SI_PREFIX_ATTO },
- { "zepto", UMEASURE_SI_PREFIX_ZEPTO },
- { "yocto", UMEASURE_SI_PREFIX_YOCTO },
-};
-
-// TODO(ICU-21059): Get this list from data
-const char16_t* const gSimpleUnits[] = {
- u"candela",
- u"carat",
- u"gram",
- u"ounce",
- u"ounce-troy",
- u"pound",
- u"kilogram",
- u"stone",
- u"ton",
- u"metric-ton",
- u"earth-mass",
- u"solar-mass",
- u"point",
- u"inch",
- u"foot",
- u"yard",
- u"meter",
- u"fathom",
- u"furlong",
- u"mile",
- u"nautical-mile",
- u"mile-scandinavian",
- u"100-kilometer",
- u"earth-radius",
- u"solar-radius",
- u"astronomical-unit",
- u"light-year",
- u"parsec",
- u"second",
- u"minute",
- u"hour",
- u"day",
- u"day-person",
- u"week",
- u"week-person",
- u"month",
- u"month-person",
- u"year",
- u"year-person",
- u"decade",
- u"century",
- u"ampere",
- u"fahrenheit",
- u"kelvin",
- u"celsius",
- u"arc-second",
- u"arc-minute",
- u"degree",
- u"radian",
- u"revolution",
- u"item",
- u"mole",
- u"permillion",
- u"permyriad",
- u"permille",
- u"percent",
- u"karat",
- u"portion",
- u"bit",
- u"byte",
- u"dot",
- u"pixel",
- u"em",
- u"hertz",
- u"newton",
- u"pound-force",
- u"pascal",
- u"bar",
- u"atmosphere",
- u"ofhg",
- u"electronvolt",
- u"dalton",
- u"joule",
- u"calorie",
- u"british-thermal-unit",
- u"foodcalorie",
- u"therm-us",
- u"watt",
- u"horsepower",
- u"solar-luminosity",
- u"volt",
- u"ohm",
- u"dunam",
- u"acre",
- u"hectare",
- u"teaspoon",
- u"tablespoon",
- u"fluid-ounce-imperial",
- u"fluid-ounce",
- u"cup",
- u"cup-metric",
- u"pint",
- u"pint-metric",
- u"quart",
- u"liter",
- u"gallon",
- u"gallon-imperial",
- u"bushel",
- u"barrel",
- u"knot",
- u"g-force",
- u"lux",
-};
-
-icu::UInitOnce gUnitExtrasInitOnce = U_INITONCE_INITIALIZER;
-
-char16_t* kSerializedUnitExtrasStemTrie = nullptr;
-
-UBool U_CALLCONV cleanupUnitExtras() {
- uprv_free(kSerializedUnitExtrasStemTrie);
- kSerializedUnitExtrasStemTrie = nullptr;
- gUnitExtrasInitOnce.reset();
- return TRUE;
-}
-
-void U_CALLCONV initUnitExtras(UErrorCode& status) {
- ucln_i18n_registerCleanup(UCLN_I18N_UNIT_EXTRAS, cleanupUnitExtras);
-
- UCharsTrieBuilder b(status);
- if (U_FAILURE(status)) { return; }
-
- // Add SI prefixes
- for (const auto& siPrefixInfo : gSIPrefixStrings) {
- UnicodeString uSIPrefix(siPrefixInfo.string, -1, US_INV);
- b.add(uSIPrefix, siPrefixInfo.value + kSIPrefixOffset, status);
- }
- if (U_FAILURE(status)) { return; }
-
- // Add syntax parts (compound, power prefixes)
- b.add(u"-per-", COMPOUND_PART_PER, status);
- b.add(u"-", COMPOUND_PART_TIMES, status);
- b.add(u"-and-", COMPOUND_PART_AND, status);
- b.add(u"per-", INITIAL_COMPOUND_PART_PER, status);
- b.add(u"square-", POWER_PART_P2, status);
- b.add(u"cubic-", POWER_PART_P3, status);
- b.add(u"p2-", POWER_PART_P2, status);
- b.add(u"p3-", POWER_PART_P3, status);
- b.add(u"p4-", POWER_PART_P4, status);
- b.add(u"p5-", POWER_PART_P5, status);
- b.add(u"p6-", POWER_PART_P6, status);
- b.add(u"p7-", POWER_PART_P7, status);
- b.add(u"p8-", POWER_PART_P8, status);
- b.add(u"p9-", POWER_PART_P9, status);
- b.add(u"p10-", POWER_PART_P10, status);
- b.add(u"p11-", POWER_PART_P11, status);
- b.add(u"p12-", POWER_PART_P12, status);
- b.add(u"p13-", POWER_PART_P13, status);
- b.add(u"p14-", POWER_PART_P14, status);
- b.add(u"p15-", POWER_PART_P15, status);
- if (U_FAILURE(status)) { return; }
-
- // Add sanctioned simple units by offset
- int32_t simpleUnitOffset = kSimpleUnitOffset;
- for (auto simpleUnit : gSimpleUnits) {
- b.add(simpleUnit, simpleUnitOffset++, status);
- }
-
- // Build the CharsTrie
- // TODO: Use SLOW or FAST here?
- UnicodeString result;
- b.buildUnicodeString(USTRINGTRIE_BUILD_FAST, result, status);
- if (U_FAILURE(status)) { return; }
-
- // Copy the result into the global constant pointer
- size_t numBytes = result.length() * sizeof(char16_t);
- kSerializedUnitExtrasStemTrie = static_cast<char16_t*>(uprv_malloc(numBytes));
- uprv_memcpy(kSerializedUnitExtrasStemTrie, result.getBuffer(), numBytes);
-}
-
-class Token {
-public:
- Token(int32_t match) : fMatch(match) {}
-
- enum Type {
- TYPE_UNDEFINED,
- TYPE_SI_PREFIX,
- // Token type for "-per-", "-", and "-and-".
- TYPE_COMPOUND_PART,
- // Token type for "per-".
- TYPE_INITIAL_COMPOUND_PART,
- TYPE_POWER_PART,
- TYPE_SIMPLE_UNIT,
- };
-
- // Calling getType() is invalid, resulting in an assertion failure, if Token
- // value isn't positive.
- Type getType() const {
- U_ASSERT(fMatch > 0);
- if (fMatch < kCompoundPartOffset) {
- return TYPE_SI_PREFIX;
- }
- if (fMatch < kInitialCompoundPartOffset) {
- return TYPE_COMPOUND_PART;
- }
- if (fMatch < kPowerPartOffset) {
- return TYPE_INITIAL_COMPOUND_PART;
- }
- if (fMatch < kSimpleUnitOffset) {
- return TYPE_POWER_PART;
- }
- return TYPE_SIMPLE_UNIT;
- }
-
- UMeasureSIPrefix getSIPrefix() const {
- U_ASSERT(getType() == TYPE_SI_PREFIX);
- return static_cast<UMeasureSIPrefix>(fMatch - kSIPrefixOffset);
- }
-
- // Valid only for tokens with type TYPE_COMPOUND_PART.
- int32_t getMatch() const {
- U_ASSERT(getType() == TYPE_COMPOUND_PART);
- return fMatch;
- }
-
- int32_t getInitialCompoundPart() const {
- // Even if there is only one InitialCompoundPart value, we have this
- // function for the simplicity of code consistency.
- U_ASSERT(getType() == TYPE_INITIAL_COMPOUND_PART);
- // Defensive: if this assert fails, code using this function also needs
- // to change.
- U_ASSERT(fMatch == INITIAL_COMPOUND_PART_PER);
- return fMatch;
- }
-
- int8_t getPower() const {
- U_ASSERT(getType() == TYPE_POWER_PART);
- return static_cast<int8_t>(fMatch - kPowerPartOffset);
- }
-
- int32_t getSimpleUnitIndex() const {
- U_ASSERT(getType() == TYPE_SIMPLE_UNIT);
- return fMatch - kSimpleUnitOffset;
- }
-
-private:
- int32_t fMatch;
-};
-
-class Parser {
-public:
- /**
- * Factory function for parsing the given identifier.
- *
- * @param source The identifier to parse. This function does not make a copy
- * of source: the underlying string that source points at, must outlive the
- * parser.
- * @param status ICU error code.
- */
- static Parser from(StringPiece source, UErrorCode& status) {
- if (U_FAILURE(status)) {
- return Parser();
- }
- umtx_initOnce(gUnitExtrasInitOnce, &initUnitExtras, status);
- if (U_FAILURE(status)) {
- return Parser();
- }
- return Parser(source);
- }
-
- MeasureUnitImpl parse(UErrorCode& status) {
- MeasureUnitImpl result;
- parseImpl(result, status);
- return result;
- }
-
-private:
- // Tracks parser progress: the offset into fSource.
- int32_t fIndex = 0;
-
- // Since we're not owning this memory, whatever is passed to the constructor
- // should live longer than this Parser - and the parser shouldn't return any
- // references to that string.
- StringPiece fSource;
- UCharsTrie fTrie;
-
- // Set to true when we've seen a "-per-" or a "per-", after which all units
- // are in the denominator. Until we find an "-and-", at which point the
- // identifier is invalid pending TODO(CLDR-13700).
- bool fAfterPer = false;
-
- Parser() : fSource(""), fTrie(u"") {}
-
- Parser(StringPiece source)
- : fSource(source), fTrie(kSerializedUnitExtrasStemTrie) {}
-
- inline bool hasNext() const {
- return fIndex < fSource.length();
- }
-
- // Returns the next Token parsed from fSource, advancing fIndex to the end
- // of that token in fSource. In case of U_FAILURE(status), the token
- // returned will cause an abort if getType() is called on it.
- Token nextToken(UErrorCode& status) {
- fTrie.reset();
- int32_t match = -1;
- // Saves the position in the fSource string for the end of the most
- // recent matching token.
- int32_t previ = -1;
- // Find the longest token that matches a value in the trie:
- while (fIndex < fSource.length()) {
- auto result = fTrie.next(fSource.data()[fIndex++]);
- if (result == USTRINGTRIE_NO_MATCH) {
- break;
- } else if (result == USTRINGTRIE_NO_VALUE) {
- continue;
- }
- U_ASSERT(USTRINGTRIE_HAS_VALUE(result));
- match = fTrie.getValue();
- previ = fIndex;
- if (result == USTRINGTRIE_FINAL_VALUE) {
- break;
- }
- U_ASSERT(result == USTRINGTRIE_INTERMEDIATE_VALUE);
- // continue;
- }
-
- if (match < 0) {
- status = kUnitIdentifierSyntaxError;
- } else {
- fIndex = previ;
- }
- return Token(match);
- }
-
- /**
- * Returns the next "single unit" via result.
- *
- * If a "-per-" was parsed, the result will have appropriate negative
- * dimensionality.
- *
- * Returns an error if we parse both compound units and "-and-", since mixed
- * compound units are not yet supported - TODO(CLDR-13700).
- *
- * @param result Will be overwritten by the result, if status shows success.
- * @param sawAnd If an "-and-" was parsed prior to finding the "single
- * unit", sawAnd is set to true. If not, it is left as is.
- * @param status ICU error code.
- */
- void nextSingleUnit(SingleUnitImpl& result, bool& sawAnd, UErrorCode& status) {
- if (U_FAILURE(status)) {
- return;
- }
-
- // state:
- // 0 = no tokens seen yet (will accept power, SI prefix, or simple unit)
- // 1 = power token seen (will not accept another power token)
- // 2 = SI prefix token seen (will not accept a power or SI prefix token)
- int32_t state = 0;
-
- bool atStart = fIndex == 0;
- Token token = nextToken(status);
- if (U_FAILURE(status)) { return; }
-
- if (atStart) {
- // Identifiers optionally start with "per-".
- if (token.getType() == Token::TYPE_INITIAL_COMPOUND_PART) {
- U_ASSERT(token.getInitialCompoundPart() == INITIAL_COMPOUND_PART_PER);
- fAfterPer = true;
- result.dimensionality = -1;
-
- token = nextToken(status);
- if (U_FAILURE(status)) { return; }
- }
- } else {
- // All other SingleUnit's are separated from previous SingleUnit's
- // via a compound part:
- if (token.getType() != Token::TYPE_COMPOUND_PART) {
- status = kUnitIdentifierSyntaxError;
- return;
- }
-
- switch (token.getMatch()) {
- case COMPOUND_PART_PER:
- if (sawAnd) {
- // Mixed compound units not yet supported,
- // TODO(CLDR-13700).
- status = kUnitIdentifierSyntaxError;
- return;
- }
- fAfterPer = true;
- result.dimensionality = -1;
- break;
-
- case COMPOUND_PART_TIMES:
- if (fAfterPer) {
- result.dimensionality = -1;
- }
- break;
-
- case COMPOUND_PART_AND:
- if (fAfterPer) {
- // Can't start with "-and-", and mixed compound units
- // not yet supported, TODO(CLDR-13700).
- status = kUnitIdentifierSyntaxError;
- return;
- }
- sawAnd = true;
- break;
- }
-
- token = nextToken(status);
- if (U_FAILURE(status)) { return; }
- }
-
- // Read tokens until we have a complete SingleUnit or we reach the end.
- while (true) {
- switch (token.getType()) {
- case Token::TYPE_POWER_PART:
- if (state > 0) {
- status = kUnitIdentifierSyntaxError;
- return;
- }
- result.dimensionality *= token.getPower();
- state = 1;
- break;
-
- case Token::TYPE_SI_PREFIX:
- if (state > 1) {
- status = kUnitIdentifierSyntaxError;
- return;
- }
- result.siPrefix = token.getSIPrefix();
- state = 2;
- break;
-
- case Token::TYPE_SIMPLE_UNIT:
- result.index = token.getSimpleUnitIndex();
- return;
-
- default:
- status = kUnitIdentifierSyntaxError;
- return;
- }
-
- if (!hasNext()) {
- // We ran out of tokens before finding a complete single unit.
- status = kUnitIdentifierSyntaxError;
- return;
- }
- token = nextToken(status);
- if (U_FAILURE(status)) {
- return;
- }
- }
- }
-
- /// @param result is modified, not overridden. Caller must pass in a
- /// default-constructed (empty) MeasureUnitImpl instance.
- void parseImpl(MeasureUnitImpl& result, UErrorCode& status) {
- if (U_FAILURE(status)) {
- return;
- }
- if (fSource.empty()) {
- // The dimenionless unit: nothing to parse. leave result as is.
- return;
- }
- int32_t unitNum = 0;
- while (hasNext()) {
- bool sawAnd = false;
- SingleUnitImpl singleUnit;
- nextSingleUnit(singleUnit, sawAnd, status);
- if (U_FAILURE(status)) {
- return;
- }
- U_ASSERT(!singleUnit.isDimensionless());
- bool added = result.append(singleUnit, status);
- if (sawAnd && !added) {
- // Two similar units are not allowed in a mixed unit
- status = kUnitIdentifierSyntaxError;
- return;
- }
- if ((++unitNum) >= 2) {
- // nextSingleUnit fails appropriately for "per" and "and" in the
- // same identifier. It doesn't fail for other compound units
- // (COMPOUND_PART_TIMES). Consequently we take care of that
- // here.
- UMeasureUnitComplexity complexity =
- sawAnd ? UMEASURE_UNIT_MIXED : UMEASURE_UNIT_COMPOUND;
- if (unitNum == 2) {
- U_ASSERT(result.complexity == UMEASURE_UNIT_SINGLE);
- result.complexity = complexity;
- } else if (result.complexity != complexity) {
- // Can't have mixed compound units
- status = kUnitIdentifierSyntaxError;
- return;
- }
- }
- }
- }
-};
-
-int32_t U_CALLCONV
-compareSingleUnits(const void* /*context*/, const void* left, const void* right) {
- auto realLeft = static_cast<const SingleUnitImpl* const*>(left);
- auto realRight = static_cast<const SingleUnitImpl* const*>(right);
- return (*realLeft)->compareTo(**realRight);
-}
-
-/**
- * Generate the identifier string for a single unit in place.
- *
- * Does not support the dimensionless SingleUnitImpl: calling serializeSingle
- * with the dimensionless unit results in an U_INTERNAL_PROGRAM_ERROR.
- *
- * @param first If singleUnit is part of a compound unit, and not its first
- * single unit, set this to false. Otherwise: set to true.
- */
-void serializeSingle(const SingleUnitImpl& singleUnit, bool first, CharString& output, UErrorCode& status) {
- if (first && singleUnit.dimensionality < 0) {
- // Essentially the "unary per". For compound units with a numerator, the
- // caller takes care of the "binary per".
- output.append("per-", status);
- }
-
- if (singleUnit.isDimensionless()) {
- status = U_INTERNAL_PROGRAM_ERROR;
- return;
- }
- int8_t posPower = std::abs(singleUnit.dimensionality);
- if (posPower == 0) {
- status = U_INTERNAL_PROGRAM_ERROR;
- } else if (posPower == 1) {
- // no-op
- } else if (posPower == 2) {
- output.append("square-", status);
- } else if (posPower == 3) {
- output.append("cubic-", status);
- } else if (posPower < 10) {
- output.append('p', status);
- output.append(posPower + '0', status);
- output.append('-', status);
- } else if (posPower <= 15) {
- output.append("p1", status);
- output.append('0' + (posPower % 10), status);
- output.append('-', status);
- } else {
- status = kUnitIdentifierSyntaxError;
- }
- if (U_FAILURE(status)) {
- return;
- }
-
- if (singleUnit.siPrefix != UMEASURE_SI_PREFIX_ONE) {
- for (const auto& siPrefixInfo : gSIPrefixStrings) {
- if (siPrefixInfo.value == singleUnit.siPrefix) {
- output.append(siPrefixInfo.string, status);
- break;
- }
- }
- }
- if (U_FAILURE(status)) {
- return;
- }
-
- output.appendInvariantChars(gSimpleUnits[singleUnit.index], status);
-}
-
-/**
- * Normalize a MeasureUnitImpl and generate the identifier string in place.
- */
-void serialize(MeasureUnitImpl& impl, UErrorCode& status) {
- if (U_FAILURE(status)) {
- return;
- }
- U_ASSERT(impl.identifier.isEmpty());
- if (impl.units.length() == 0) {
- // Dimensionless, constructed by the default constructor: no appending
- // to impl.identifier, we wish it to contain the zero-length string.
- return;
- }
- if (impl.complexity == UMEASURE_UNIT_COMPOUND) {
- // Note: don't sort a MIXED unit
- uprv_sortArray(
- impl.units.getAlias(),
- impl.units.length(),
- sizeof(impl.units[0]),
- compareSingleUnits,
- nullptr,
- false,
- &status);
- if (U_FAILURE(status)) {
- return;
- }
- }
- serializeSingle(*impl.units[0], true, impl.identifier, status);
- if (impl.units.length() == 1) {
- return;
- }
- for (int32_t i = 1; i < impl.units.length(); i++) {
- const SingleUnitImpl& prev = *impl.units[i-1];
- const SingleUnitImpl& curr = *impl.units[i];
- if (impl.complexity == UMEASURE_UNIT_MIXED) {
- impl.identifier.append("-and-", status);
- serializeSingle(curr, true, impl.identifier, status);
- } else {
- if (prev.dimensionality > 0 && curr.dimensionality < 0) {
- impl.identifier.append("-per-", status);
- } else {
- impl.identifier.append('-', status);
- }
- serializeSingle(curr, false, impl.identifier, status);
- }
- }
-
-}
-
-/**
- * Appends a SingleUnitImpl to a MeasureUnitImpl.
- *
- * @return true if a new item was added. If unit is the dimensionless unit, it
- * is never added: the return value will always be false.
- */
-bool appendImpl(MeasureUnitImpl& impl, const SingleUnitImpl& unit, UErrorCode& status) {
- if (unit.isDimensionless()) {
- // We don't append dimensionless units.
- return false;
- }
- // Find a similar unit that already exists, to attempt to coalesce
- SingleUnitImpl* oldUnit = nullptr;
- for (int32_t i = 0; i < impl.units.length(); i++) {
- auto* candidate = impl.units[i];
- if (candidate->isCompatibleWith(unit)) {
- oldUnit = candidate;
- }
- }
- if (oldUnit) {
- // Both dimensionalities will be positive, or both will be negative, by
- // virtue of isCompatibleWith().
- oldUnit->dimensionality += unit.dimensionality;
- } else {
- SingleUnitImpl* destination = impl.units.emplaceBack();
- if (!destination) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return false;
- }
- *destination = unit;
- }
- return (oldUnit == nullptr);
-}
-
-} // namespace
-
-
-SingleUnitImpl SingleUnitImpl::forMeasureUnit(const MeasureUnit& measureUnit, UErrorCode& status) {
- MeasureUnitImpl temp;
- const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(measureUnit, temp, status);
- if (U_FAILURE(status)) {
- return {};
- }
- if (impl.units.length() == 0) {
- return {};
- }
- if (impl.units.length() == 1) {
- return *impl.units[0];
- }
- status = U_ILLEGAL_ARGUMENT_ERROR;
- return {};
-}
-
-MeasureUnit SingleUnitImpl::build(UErrorCode& status) const {
- MeasureUnitImpl temp;
- temp.append(*this, status);
- return std::move(temp).build(status);
-}
-
-
-MeasureUnitImpl MeasureUnitImpl::forIdentifier(StringPiece identifier, UErrorCode& status) {
- return Parser::from(identifier, status).parse(status);
-}
-
-const MeasureUnitImpl& MeasureUnitImpl::forMeasureUnit(
- const MeasureUnit& measureUnit, MeasureUnitImpl& memory, UErrorCode& status) {
- if (measureUnit.fImpl) {
- return *measureUnit.fImpl;
- } else {
- memory = Parser::from(measureUnit.getIdentifier(), status).parse(status);
- return memory;
- }
-}
-
-MeasureUnitImpl MeasureUnitImpl::forMeasureUnitMaybeCopy(
- const MeasureUnit& measureUnit, UErrorCode& status) {
- if (measureUnit.fImpl) {
- return measureUnit.fImpl->copy(status);
- } else {
- return Parser::from(measureUnit.getIdentifier(), status).parse(status);
- }
-}
-
-void MeasureUnitImpl::takeReciprocal(UErrorCode& /*status*/) {
- identifier.clear();
- for (int32_t i = 0; i < units.length(); i++) {
- units[i]->dimensionality *= -1;
- }
-}
-
-bool MeasureUnitImpl::append(const SingleUnitImpl& singleUnit, UErrorCode& status) {
- identifier.clear();
- return appendImpl(*this, singleUnit, status);
-}
-
-MeasureUnit MeasureUnitImpl::build(UErrorCode& status) && {
- serialize(*this, status);
- return MeasureUnit(std::move(*this));
-}
-
-
-MeasureUnit MeasureUnit::forIdentifier(StringPiece identifier, UErrorCode& status) {
- return Parser::from(identifier, status).parse(status).build(status);
-}
-
-UMeasureUnitComplexity MeasureUnit::getComplexity(UErrorCode& status) const {
- MeasureUnitImpl temp;
- return MeasureUnitImpl::forMeasureUnit(*this, temp, status).complexity;
-}
-
-UMeasureSIPrefix MeasureUnit::getSIPrefix(UErrorCode& status) const {
- return SingleUnitImpl::forMeasureUnit(*this, status).siPrefix;
-}
-
-MeasureUnit MeasureUnit::withSIPrefix(UMeasureSIPrefix prefix, UErrorCode& status) const {
- SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);
- singleUnit.siPrefix = prefix;
- return singleUnit.build(status);
-}
-
-int32_t MeasureUnit::getDimensionality(UErrorCode& status) const {
- SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);
- if (U_FAILURE(status)) { return 0; }
- if (singleUnit.isDimensionless()) {
- return 0;
- }
- return singleUnit.dimensionality;
-}
-
-MeasureUnit MeasureUnit::withDimensionality(int32_t dimensionality, UErrorCode& status) const {
- SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);
- singleUnit.dimensionality = dimensionality;
- return singleUnit.build(status);
-}
-
-MeasureUnit MeasureUnit::reciprocal(UErrorCode& status) const {
- MeasureUnitImpl impl = MeasureUnitImpl::forMeasureUnitMaybeCopy(*this, status);
- impl.takeReciprocal(status);
- return std::move(impl).build(status);
-}
-
-MeasureUnit MeasureUnit::product(const MeasureUnit& other, UErrorCode& status) const {
- MeasureUnitImpl impl = MeasureUnitImpl::forMeasureUnitMaybeCopy(*this, status);
- MeasureUnitImpl temp;
- const MeasureUnitImpl& otherImpl = MeasureUnitImpl::forMeasureUnit(other, temp, status);
- if (impl.complexity == UMEASURE_UNIT_MIXED || otherImpl.complexity == UMEASURE_UNIT_MIXED) {
- status = U_ILLEGAL_ARGUMENT_ERROR;
- return {};
- }
- for (int32_t i = 0; i < otherImpl.units.length(); i++) {
- impl.append(*otherImpl.units[i], status);
- }
- if (impl.units.length() > 1) {
- impl.complexity = UMEASURE_UNIT_COMPOUND;
- }
- return std::move(impl).build(status);
-}
-
-LocalArray<MeasureUnit> MeasureUnit::splitToSingleUnits(int32_t& outCount, UErrorCode& status) const {
- MeasureUnitImpl temp;
- const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(*this, temp, status);
- outCount = impl.units.length();
- MeasureUnit* arr = new MeasureUnit[outCount];
- for (int32_t i = 0; i < outCount; i++) {
- arr[i] = impl.units[i]->build(status);
- }
- return LocalArray<MeasureUnit>(arr, status);
-}
-
-
-U_NAMESPACE_END
-
-#endif /* !UNCONFIG_NO_FORMATTING */
+// © 2020 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// Extra functions for MeasureUnit not needed for all clients.
+// Separate .o file so that it can be removed for modularity.
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+// Allow implicit conversion from char16_t* to UnicodeString for this file:
+// Helpful in toString methods and elsewhere.
+#define UNISTR_FROM_STRING_EXPLICIT
+
+#include <cstdlib>
+#include "cstring.h"
+#include "measunit_impl.h"
+#include "uarrsort.h"
+#include "uassert.h"
+#include "ucln_in.h"
+#include "umutex.h"
+#include "unicode/errorcode.h"
+#include "unicode/localpointer.h"
+#include "unicode/measunit.h"
+#include "unicode/ucharstrie.h"
+#include "unicode/ucharstriebuilder.h"
+
+#include "cstr.h"
+
+U_NAMESPACE_BEGIN
+
+
+namespace {
+
+// TODO: Propose a new error code for this?
+constexpr UErrorCode kUnitIdentifierSyntaxError = U_ILLEGAL_ARGUMENT_ERROR;
+
+// Trie value offset for SI Prefixes. This is big enough to ensure we only
+// insert positive integers into the trie.
+constexpr int32_t kSIPrefixOffset = 64;
+
+// Trie value offset for compound parts, e.g. "-per-", "-", "-and-".
+constexpr int32_t kCompoundPartOffset = 128;
+
+enum CompoundPart {
+ // Represents "-per-"
+ COMPOUND_PART_PER = kCompoundPartOffset,
+ // Represents "-"
+ COMPOUND_PART_TIMES,
+ // Represents "-and-"
+ COMPOUND_PART_AND,
+};
+
+// Trie value offset for "per-".
+constexpr int32_t kInitialCompoundPartOffset = 192;
+
+enum InitialCompoundPart {
+ // Represents "per-", the only compound part that can appear at the start of
+ // an identifier.
+ INITIAL_COMPOUND_PART_PER = kInitialCompoundPartOffset,
+};
+
+// Trie value offset for powers like "square-", "cubic-", "p2-" etc.
+constexpr int32_t kPowerPartOffset = 256;
+
+enum PowerPart {
+ POWER_PART_P2 = kPowerPartOffset + 2,
+ POWER_PART_P3,
+ POWER_PART_P4,
+ POWER_PART_P5,
+ POWER_PART_P6,
+ POWER_PART_P7,
+ POWER_PART_P8,
+ POWER_PART_P9,
+ POWER_PART_P10,
+ POWER_PART_P11,
+ POWER_PART_P12,
+ POWER_PART_P13,
+ POWER_PART_P14,
+ POWER_PART_P15,
+};
+
+// Trie value offset for simple units, e.g. "gram", "nautical-mile",
+// "fluid-ounce-imperial".
+constexpr int32_t kSimpleUnitOffset = 512;
+
+const struct SIPrefixStrings {
+ const char* const string;
+ UMeasureSIPrefix value;
+} gSIPrefixStrings[] = {
+ { "yotta", UMEASURE_SI_PREFIX_YOTTA },
+ { "zetta", UMEASURE_SI_PREFIX_ZETTA },
+ { "exa", UMEASURE_SI_PREFIX_EXA },
+ { "peta", UMEASURE_SI_PREFIX_PETA },
+ { "tera", UMEASURE_SI_PREFIX_TERA },
+ { "giga", UMEASURE_SI_PREFIX_GIGA },
+ { "mega", UMEASURE_SI_PREFIX_MEGA },
+ { "kilo", UMEASURE_SI_PREFIX_KILO },
+ { "hecto", UMEASURE_SI_PREFIX_HECTO },
+ { "deka", UMEASURE_SI_PREFIX_DEKA },
+ { "deci", UMEASURE_SI_PREFIX_DECI },
+ { "centi", UMEASURE_SI_PREFIX_CENTI },
+ { "milli", UMEASURE_SI_PREFIX_MILLI },
+ { "micro", UMEASURE_SI_PREFIX_MICRO },
+ { "nano", UMEASURE_SI_PREFIX_NANO },
+ { "pico", UMEASURE_SI_PREFIX_PICO },
+ { "femto", UMEASURE_SI_PREFIX_FEMTO },
+ { "atto", UMEASURE_SI_PREFIX_ATTO },
+ { "zepto", UMEASURE_SI_PREFIX_ZEPTO },
+ { "yocto", UMEASURE_SI_PREFIX_YOCTO },
+};
+
+// TODO(ICU-21059): Get this list from data
+const char16_t* const gSimpleUnits[] = {
+ u"candela",
+ u"carat",
+ u"gram",
+ u"ounce",
+ u"ounce-troy",
+ u"pound",
+ u"kilogram",
+ u"stone",
+ u"ton",
+ u"metric-ton",
+ u"earth-mass",
+ u"solar-mass",
+ u"point",
+ u"inch",
+ u"foot",
+ u"yard",
+ u"meter",
+ u"fathom",
+ u"furlong",
+ u"mile",
+ u"nautical-mile",
+ u"mile-scandinavian",
+ u"100-kilometer",
+ u"earth-radius",
+ u"solar-radius",
+ u"astronomical-unit",
+ u"light-year",
+ u"parsec",
+ u"second",
+ u"minute",
+ u"hour",
+ u"day",
+ u"day-person",
+ u"week",
+ u"week-person",
+ u"month",
+ u"month-person",
+ u"year",
+ u"year-person",
+ u"decade",
+ u"century",
+ u"ampere",
+ u"fahrenheit",
+ u"kelvin",
+ u"celsius",
+ u"arc-second",
+ u"arc-minute",
+ u"degree",
+ u"radian",
+ u"revolution",
+ u"item",
+ u"mole",
+ u"permillion",
+ u"permyriad",
+ u"permille",
+ u"percent",
+ u"karat",
+ u"portion",
+ u"bit",
+ u"byte",
+ u"dot",
+ u"pixel",
+ u"em",
+ u"hertz",
+ u"newton",
+ u"pound-force",
+ u"pascal",
+ u"bar",
+ u"atmosphere",
+ u"ofhg",
+ u"electronvolt",
+ u"dalton",
+ u"joule",
+ u"calorie",
+ u"british-thermal-unit",
+ u"foodcalorie",
+ u"therm-us",
+ u"watt",
+ u"horsepower",
+ u"solar-luminosity",
+ u"volt",
+ u"ohm",
+ u"dunam",
+ u"acre",
+ u"hectare",
+ u"teaspoon",
+ u"tablespoon",
+ u"fluid-ounce-imperial",
+ u"fluid-ounce",
+ u"cup",
+ u"cup-metric",
+ u"pint",
+ u"pint-metric",
+ u"quart",
+ u"liter",
+ u"gallon",
+ u"gallon-imperial",
+ u"bushel",
+ u"barrel",
+ u"knot",
+ u"g-force",
+ u"lux",
+};
+
+icu::UInitOnce gUnitExtrasInitOnce = U_INITONCE_INITIALIZER;
+
+char16_t* kSerializedUnitExtrasStemTrie = nullptr;
+
+UBool U_CALLCONV cleanupUnitExtras() {
+ uprv_free(kSerializedUnitExtrasStemTrie);
+ kSerializedUnitExtrasStemTrie = nullptr;
+ gUnitExtrasInitOnce.reset();
+ return TRUE;
+}
+
+void U_CALLCONV initUnitExtras(UErrorCode& status) {
+ ucln_i18n_registerCleanup(UCLN_I18N_UNIT_EXTRAS, cleanupUnitExtras);
+
+ UCharsTrieBuilder b(status);
+ if (U_FAILURE(status)) { return; }
+
+ // Add SI prefixes
+ for (const auto& siPrefixInfo : gSIPrefixStrings) {
+ UnicodeString uSIPrefix(siPrefixInfo.string, -1, US_INV);
+ b.add(uSIPrefix, siPrefixInfo.value + kSIPrefixOffset, status);
+ }
+ if (U_FAILURE(status)) { return; }
+
+ // Add syntax parts (compound, power prefixes)
+ b.add(u"-per-", COMPOUND_PART_PER, status);
+ b.add(u"-", COMPOUND_PART_TIMES, status);
+ b.add(u"-and-", COMPOUND_PART_AND, status);
+ b.add(u"per-", INITIAL_COMPOUND_PART_PER, status);
+ b.add(u"square-", POWER_PART_P2, status);
+ b.add(u"cubic-", POWER_PART_P3, status);
+ b.add(u"p2-", POWER_PART_P2, status);
+ b.add(u"p3-", POWER_PART_P3, status);
+ b.add(u"p4-", POWER_PART_P4, status);
+ b.add(u"p5-", POWER_PART_P5, status);
+ b.add(u"p6-", POWER_PART_P6, status);
+ b.add(u"p7-", POWER_PART_P7, status);
+ b.add(u"p8-", POWER_PART_P8, status);
+ b.add(u"p9-", POWER_PART_P9, status);
+ b.add(u"p10-", POWER_PART_P10, status);
+ b.add(u"p11-", POWER_PART_P11, status);
+ b.add(u"p12-", POWER_PART_P12, status);
+ b.add(u"p13-", POWER_PART_P13, status);
+ b.add(u"p14-", POWER_PART_P14, status);
+ b.add(u"p15-", POWER_PART_P15, status);
+ if (U_FAILURE(status)) { return; }
+
+ // Add sanctioned simple units by offset
+ int32_t simpleUnitOffset = kSimpleUnitOffset;
+ for (auto simpleUnit : gSimpleUnits) {
+ b.add(simpleUnit, simpleUnitOffset++, status);
+ }
+
+ // Build the CharsTrie
+ // TODO: Use SLOW or FAST here?
+ UnicodeString result;
+ b.buildUnicodeString(USTRINGTRIE_BUILD_FAST, result, status);
+ if (U_FAILURE(status)) { return; }
+
+ // Copy the result into the global constant pointer
+ size_t numBytes = result.length() * sizeof(char16_t);
+ kSerializedUnitExtrasStemTrie = static_cast<char16_t*>(uprv_malloc(numBytes));
+ uprv_memcpy(kSerializedUnitExtrasStemTrie, result.getBuffer(), numBytes);
+}
+
+class Token {
+public:
+ Token(int32_t match) : fMatch(match) {}
+
+ enum Type {
+ TYPE_UNDEFINED,
+ TYPE_SI_PREFIX,
+ // Token type for "-per-", "-", and "-and-".
+ TYPE_COMPOUND_PART,
+ // Token type for "per-".
+ TYPE_INITIAL_COMPOUND_PART,
+ TYPE_POWER_PART,
+ TYPE_SIMPLE_UNIT,
+ };
+
+ // Calling getType() is invalid, resulting in an assertion failure, if Token
+ // value isn't positive.
+ Type getType() const {
+ U_ASSERT(fMatch > 0);
+ if (fMatch < kCompoundPartOffset) {
+ return TYPE_SI_PREFIX;
+ }
+ if (fMatch < kInitialCompoundPartOffset) {
+ return TYPE_COMPOUND_PART;
+ }
+ if (fMatch < kPowerPartOffset) {
+ return TYPE_INITIAL_COMPOUND_PART;
+ }
+ if (fMatch < kSimpleUnitOffset) {
+ return TYPE_POWER_PART;
+ }
+ return TYPE_SIMPLE_UNIT;
+ }
+
+ UMeasureSIPrefix getSIPrefix() const {
+ U_ASSERT(getType() == TYPE_SI_PREFIX);
+ return static_cast<UMeasureSIPrefix>(fMatch - kSIPrefixOffset);
+ }
+
+ // Valid only for tokens with type TYPE_COMPOUND_PART.
+ int32_t getMatch() const {
+ U_ASSERT(getType() == TYPE_COMPOUND_PART);
+ return fMatch;
+ }
+
+ int32_t getInitialCompoundPart() const {
+ // Even if there is only one InitialCompoundPart value, we have this
+ // function for the simplicity of code consistency.
+ U_ASSERT(getType() == TYPE_INITIAL_COMPOUND_PART);
+ // Defensive: if this assert fails, code using this function also needs
+ // to change.
+ U_ASSERT(fMatch == INITIAL_COMPOUND_PART_PER);
+ return fMatch;
+ }
+
+ int8_t getPower() const {
+ U_ASSERT(getType() == TYPE_POWER_PART);
+ return static_cast<int8_t>(fMatch - kPowerPartOffset);
+ }
+
+ int32_t getSimpleUnitIndex() const {
+ U_ASSERT(getType() == TYPE_SIMPLE_UNIT);
+ return fMatch - kSimpleUnitOffset;
+ }
+
+private:
+ int32_t fMatch;
+};
+
+class Parser {
+public:
+ /**
+ * Factory function for parsing the given identifier.
+ *
+ * @param source The identifier to parse. This function does not make a copy
+ * of source: the underlying string that source points at, must outlive the
+ * parser.
+ * @param status ICU error code.
+ */
+ static Parser from(StringPiece source, UErrorCode& status) {
+ if (U_FAILURE(status)) {
+ return Parser();
+ }
+ umtx_initOnce(gUnitExtrasInitOnce, &initUnitExtras, status);
+ if (U_FAILURE(status)) {
+ return Parser();
+ }
+ return Parser(source);
+ }
+
+ MeasureUnitImpl parse(UErrorCode& status) {
+ MeasureUnitImpl result;
+ parseImpl(result, status);
+ return result;
+ }
+
+private:
+ // Tracks parser progress: the offset into fSource.
+ int32_t fIndex = 0;
+
+ // Since we're not owning this memory, whatever is passed to the constructor
+ // should live longer than this Parser - and the parser shouldn't return any
+ // references to that string.
+ StringPiece fSource;
+ UCharsTrie fTrie;
+
+ // Set to true when we've seen a "-per-" or a "per-", after which all units
+ // are in the denominator. Until we find an "-and-", at which point the
+ // identifier is invalid pending TODO(CLDR-13700).
+ bool fAfterPer = false;
+
+ Parser() : fSource(""), fTrie(u"") {}
+
+ Parser(StringPiece source)
+ : fSource(source), fTrie(kSerializedUnitExtrasStemTrie) {}
+
+ inline bool hasNext() const {
+ return fIndex < fSource.length();
+ }
+
+ // Returns the next Token parsed from fSource, advancing fIndex to the end
+ // of that token in fSource. In case of U_FAILURE(status), the token
+ // returned will cause an abort if getType() is called on it.
+ Token nextToken(UErrorCode& status) {
+ fTrie.reset();
+ int32_t match = -1;
+ // Saves the position in the fSource string for the end of the most
+ // recent matching token.
+ int32_t previ = -1;
+ // Find the longest token that matches a value in the trie:
+ while (fIndex < fSource.length()) {
+ auto result = fTrie.next(fSource.data()[fIndex++]);
+ if (result == USTRINGTRIE_NO_MATCH) {
+ break;
+ } else if (result == USTRINGTRIE_NO_VALUE) {
+ continue;
+ }
+ U_ASSERT(USTRINGTRIE_HAS_VALUE(result));
+ match = fTrie.getValue();
+ previ = fIndex;
+ if (result == USTRINGTRIE_FINAL_VALUE) {
+ break;
+ }
+ U_ASSERT(result == USTRINGTRIE_INTERMEDIATE_VALUE);
+ // continue;
+ }
+
+ if (match < 0) {
+ status = kUnitIdentifierSyntaxError;
+ } else {
+ fIndex = previ;
+ }
+ return Token(match);
+ }
+
+ /**
+ * Returns the next "single unit" via result.
+ *
+ * If a "-per-" was parsed, the result will have appropriate negative
+ * dimensionality.
+ *
+ * Returns an error if we parse both compound units and "-and-", since mixed
+ * compound units are not yet supported - TODO(CLDR-13700).
+ *
+ * @param result Will be overwritten by the result, if status shows success.
+ * @param sawAnd If an "-and-" was parsed prior to finding the "single
+ * unit", sawAnd is set to true. If not, it is left as is.
+ * @param status ICU error code.
+ */
+ void nextSingleUnit(SingleUnitImpl& result, bool& sawAnd, UErrorCode& status) {
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ // state:
+ // 0 = no tokens seen yet (will accept power, SI prefix, or simple unit)
+ // 1 = power token seen (will not accept another power token)
+ // 2 = SI prefix token seen (will not accept a power or SI prefix token)
+ int32_t state = 0;
+
+ bool atStart = fIndex == 0;
+ Token token = nextToken(status);
+ if (U_FAILURE(status)) { return; }
+
+ if (atStart) {
+ // Identifiers optionally start with "per-".
+ if (token.getType() == Token::TYPE_INITIAL_COMPOUND_PART) {
+ U_ASSERT(token.getInitialCompoundPart() == INITIAL_COMPOUND_PART_PER);
+ fAfterPer = true;
+ result.dimensionality = -1;
+
+ token = nextToken(status);
+ if (U_FAILURE(status)) { return; }
+ }
+ } else {
+ // All other SingleUnit's are separated from previous SingleUnit's
+ // via a compound part:
+ if (token.getType() != Token::TYPE_COMPOUND_PART) {
+ status = kUnitIdentifierSyntaxError;
+ return;
+ }
+
+ switch (token.getMatch()) {
+ case COMPOUND_PART_PER:
+ if (sawAnd) {
+ // Mixed compound units not yet supported,
+ // TODO(CLDR-13700).
+ status = kUnitIdentifierSyntaxError;
+ return;
+ }
+ fAfterPer = true;
+ result.dimensionality = -1;
+ break;
+
+ case COMPOUND_PART_TIMES:
+ if (fAfterPer) {
+ result.dimensionality = -1;
+ }
+ break;
+
+ case COMPOUND_PART_AND:
+ if (fAfterPer) {
+ // Can't start with "-and-", and mixed compound units
+ // not yet supported, TODO(CLDR-13700).
+ status = kUnitIdentifierSyntaxError;
+ return;
+ }
+ sawAnd = true;
+ break;
+ }
+
+ token = nextToken(status);
+ if (U_FAILURE(status)) { return; }
+ }
+
+ // Read tokens until we have a complete SingleUnit or we reach the end.
+ while (true) {
+ switch (token.getType()) {
+ case Token::TYPE_POWER_PART:
+ if (state > 0) {
+ status = kUnitIdentifierSyntaxError;
+ return;
+ }
+ result.dimensionality *= token.getPower();
+ state = 1;
+ break;
+
+ case Token::TYPE_SI_PREFIX:
+ if (state > 1) {
+ status = kUnitIdentifierSyntaxError;
+ return;
+ }
+ result.siPrefix = token.getSIPrefix();
+ state = 2;
+ break;
+
+ case Token::TYPE_SIMPLE_UNIT:
+ result.index = token.getSimpleUnitIndex();
+ return;
+
+ default:
+ status = kUnitIdentifierSyntaxError;
+ return;
+ }
+
+ if (!hasNext()) {
+ // We ran out of tokens before finding a complete single unit.
+ status = kUnitIdentifierSyntaxError;
+ return;
+ }
+ token = nextToken(status);
+ if (U_FAILURE(status)) {
+ return;
+ }
+ }
+ }
+
+ /// @param result is modified, not overridden. Caller must pass in a
+ /// default-constructed (empty) MeasureUnitImpl instance.
+ void parseImpl(MeasureUnitImpl& result, UErrorCode& status) {
+ if (U_FAILURE(status)) {
+ return;
+ }
+ if (fSource.empty()) {
+ // The dimenionless unit: nothing to parse. leave result as is.
+ return;
+ }
+ int32_t unitNum = 0;
+ while (hasNext()) {
+ bool sawAnd = false;
+ SingleUnitImpl singleUnit;
+ nextSingleUnit(singleUnit, sawAnd, status);
+ if (U_FAILURE(status)) {
+ return;
+ }
+ U_ASSERT(!singleUnit.isDimensionless());
+ bool added = result.append(singleUnit, status);
+ if (sawAnd && !added) {
+ // Two similar units are not allowed in a mixed unit
+ status = kUnitIdentifierSyntaxError;
+ return;
+ }
+ if ((++unitNum) >= 2) {
+ // nextSingleUnit fails appropriately for "per" and "and" in the
+ // same identifier. It doesn't fail for other compound units
+ // (COMPOUND_PART_TIMES). Consequently we take care of that
+ // here.
+ UMeasureUnitComplexity complexity =
+ sawAnd ? UMEASURE_UNIT_MIXED : UMEASURE_UNIT_COMPOUND;
+ if (unitNum == 2) {
+ U_ASSERT(result.complexity == UMEASURE_UNIT_SINGLE);
+ result.complexity = complexity;
+ } else if (result.complexity != complexity) {
+ // Can't have mixed compound units
+ status = kUnitIdentifierSyntaxError;
+ return;
+ }
+ }
+ }
+ }
+};
+
+int32_t U_CALLCONV
+compareSingleUnits(const void* /*context*/, const void* left, const void* right) {
+ auto realLeft = static_cast<const SingleUnitImpl* const*>(left);
+ auto realRight = static_cast<const SingleUnitImpl* const*>(right);
+ return (*realLeft)->compareTo(**realRight);
+}
+
+/**
+ * Generate the identifier string for a single unit in place.
+ *
+ * Does not support the dimensionless SingleUnitImpl: calling serializeSingle
+ * with the dimensionless unit results in an U_INTERNAL_PROGRAM_ERROR.
+ *
+ * @param first If singleUnit is part of a compound unit, and not its first
+ * single unit, set this to false. Otherwise: set to true.
+ */
+void serializeSingle(const SingleUnitImpl& singleUnit, bool first, CharString& output, UErrorCode& status) {
+ if (first && singleUnit.dimensionality < 0) {
+ // Essentially the "unary per". For compound units with a numerator, the
+ // caller takes care of the "binary per".
+ output.append("per-", status);
+ }
+
+ if (singleUnit.isDimensionless()) {
+ status = U_INTERNAL_PROGRAM_ERROR;
+ return;
+ }
+ int8_t posPower = std::abs(singleUnit.dimensionality);
+ if (posPower == 0) {
+ status = U_INTERNAL_PROGRAM_ERROR;
+ } else if (posPower == 1) {
+ // no-op
+ } else if (posPower == 2) {
+ output.append("square-", status);
+ } else if (posPower == 3) {
+ output.append("cubic-", status);
+ } else if (posPower < 10) {
+ output.append('p', status);
+ output.append(posPower + '0', status);
+ output.append('-', status);
+ } else if (posPower <= 15) {
+ output.append("p1", status);
+ output.append('0' + (posPower % 10), status);
+ output.append('-', status);
+ } else {
+ status = kUnitIdentifierSyntaxError;
+ }
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ if (singleUnit.siPrefix != UMEASURE_SI_PREFIX_ONE) {
+ for (const auto& siPrefixInfo : gSIPrefixStrings) {
+ if (siPrefixInfo.value == singleUnit.siPrefix) {
+ output.append(siPrefixInfo.string, status);
+ break;
+ }
+ }
+ }
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ output.appendInvariantChars(gSimpleUnits[singleUnit.index], status);
+}
+
+/**
+ * Normalize a MeasureUnitImpl and generate the identifier string in place.
+ */
+void serialize(MeasureUnitImpl& impl, UErrorCode& status) {
+ if (U_FAILURE(status)) {
+ return;
+ }
+ U_ASSERT(impl.identifier.isEmpty());
+ if (impl.units.length() == 0) {
+ // Dimensionless, constructed by the default constructor: no appending
+ // to impl.identifier, we wish it to contain the zero-length string.
+ return;
+ }
+ if (impl.complexity == UMEASURE_UNIT_COMPOUND) {
+ // Note: don't sort a MIXED unit
+ uprv_sortArray(
+ impl.units.getAlias(),
+ impl.units.length(),
+ sizeof(impl.units[0]),
+ compareSingleUnits,
+ nullptr,
+ false,
+ &status);
+ if (U_FAILURE(status)) {
+ return;
+ }
+ }
+ serializeSingle(*impl.units[0], true, impl.identifier, status);
+ if (impl.units.length() == 1) {
+ return;
+ }
+ for (int32_t i = 1; i < impl.units.length(); i++) {
+ const SingleUnitImpl& prev = *impl.units[i-1];
+ const SingleUnitImpl& curr = *impl.units[i];
+ if (impl.complexity == UMEASURE_UNIT_MIXED) {
+ impl.identifier.append("-and-", status);
+ serializeSingle(curr, true, impl.identifier, status);
+ } else {
+ if (prev.dimensionality > 0 && curr.dimensionality < 0) {
+ impl.identifier.append("-per-", status);
+ } else {
+ impl.identifier.append('-', status);
+ }
+ serializeSingle(curr, false, impl.identifier, status);
+ }
+ }
+
+}
+
+/**
+ * Appends a SingleUnitImpl to a MeasureUnitImpl.
+ *
+ * @return true if a new item was added. If unit is the dimensionless unit, it
+ * is never added: the return value will always be false.
+ */
+bool appendImpl(MeasureUnitImpl& impl, const SingleUnitImpl& unit, UErrorCode& status) {
+ if (unit.isDimensionless()) {
+ // We don't append dimensionless units.
+ return false;
+ }
+ // Find a similar unit that already exists, to attempt to coalesce
+ SingleUnitImpl* oldUnit = nullptr;
+ for (int32_t i = 0; i < impl.units.length(); i++) {
+ auto* candidate = impl.units[i];
+ if (candidate->isCompatibleWith(unit)) {
+ oldUnit = candidate;
+ }
+ }
+ if (oldUnit) {
+ // Both dimensionalities will be positive, or both will be negative, by
+ // virtue of isCompatibleWith().
+ oldUnit->dimensionality += unit.dimensionality;
+ } else {
+ SingleUnitImpl* destination = impl.units.emplaceBack();
+ if (!destination) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return false;
+ }
+ *destination = unit;
+ }
+ return (oldUnit == nullptr);
+}
+
+} // namespace
+
+
+SingleUnitImpl SingleUnitImpl::forMeasureUnit(const MeasureUnit& measureUnit, UErrorCode& status) {
+ MeasureUnitImpl temp;
+ const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(measureUnit, temp, status);
+ if (U_FAILURE(status)) {
+ return {};
+ }
+ if (impl.units.length() == 0) {
+ return {};
+ }
+ if (impl.units.length() == 1) {
+ return *impl.units[0];
+ }
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return {};
+}
+
+MeasureUnit SingleUnitImpl::build(UErrorCode& status) const {
+ MeasureUnitImpl temp;
+ temp.append(*this, status);
+ return std::move(temp).build(status);
+}
+
+
+MeasureUnitImpl MeasureUnitImpl::forIdentifier(StringPiece identifier, UErrorCode& status) {
+ return Parser::from(identifier, status).parse(status);
+}
+
+const MeasureUnitImpl& MeasureUnitImpl::forMeasureUnit(
+ const MeasureUnit& measureUnit, MeasureUnitImpl& memory, UErrorCode& status) {
+ if (measureUnit.fImpl) {
+ return *measureUnit.fImpl;
+ } else {
+ memory = Parser::from(measureUnit.getIdentifier(), status).parse(status);
+ return memory;
+ }
+}
+
+MeasureUnitImpl MeasureUnitImpl::forMeasureUnitMaybeCopy(
+ const MeasureUnit& measureUnit, UErrorCode& status) {
+ if (measureUnit.fImpl) {
+ return measureUnit.fImpl->copy(status);
+ } else {
+ return Parser::from(measureUnit.getIdentifier(), status).parse(status);
+ }
+}
+
+void MeasureUnitImpl::takeReciprocal(UErrorCode& /*status*/) {
+ identifier.clear();
+ for (int32_t i = 0; i < units.length(); i++) {
+ units[i]->dimensionality *= -1;
+ }
+}
+
+bool MeasureUnitImpl::append(const SingleUnitImpl& singleUnit, UErrorCode& status) {
+ identifier.clear();
+ return appendImpl(*this, singleUnit, status);
+}
+
+MeasureUnit MeasureUnitImpl::build(UErrorCode& status) && {
+ serialize(*this, status);
+ return MeasureUnit(std::move(*this));
+}
+
+
+MeasureUnit MeasureUnit::forIdentifier(StringPiece identifier, UErrorCode& status) {
+ return Parser::from(identifier, status).parse(status).build(status);
+}
+
+UMeasureUnitComplexity MeasureUnit::getComplexity(UErrorCode& status) const {
+ MeasureUnitImpl temp;
+ return MeasureUnitImpl::forMeasureUnit(*this, temp, status).complexity;
+}
+
+UMeasureSIPrefix MeasureUnit::getSIPrefix(UErrorCode& status) const {
+ return SingleUnitImpl::forMeasureUnit(*this, status).siPrefix;
+}
+
+MeasureUnit MeasureUnit::withSIPrefix(UMeasureSIPrefix prefix, UErrorCode& status) const {
+ SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);
+ singleUnit.siPrefix = prefix;
+ return singleUnit.build(status);
+}
+
+int32_t MeasureUnit::getDimensionality(UErrorCode& status) const {
+ SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);
+ if (U_FAILURE(status)) { return 0; }
+ if (singleUnit.isDimensionless()) {
+ return 0;
+ }
+ return singleUnit.dimensionality;
+}
+
+MeasureUnit MeasureUnit::withDimensionality(int32_t dimensionality, UErrorCode& status) const {
+ SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);
+ singleUnit.dimensionality = dimensionality;
+ return singleUnit.build(status);
+}
+
+MeasureUnit MeasureUnit::reciprocal(UErrorCode& status) const {
+ MeasureUnitImpl impl = MeasureUnitImpl::forMeasureUnitMaybeCopy(*this, status);
+ impl.takeReciprocal(status);
+ return std::move(impl).build(status);
+}
+
+MeasureUnit MeasureUnit::product(const MeasureUnit& other, UErrorCode& status) const {
+ MeasureUnitImpl impl = MeasureUnitImpl::forMeasureUnitMaybeCopy(*this, status);
+ MeasureUnitImpl temp;
+ const MeasureUnitImpl& otherImpl = MeasureUnitImpl::forMeasureUnit(other, temp, status);
+ if (impl.complexity == UMEASURE_UNIT_MIXED || otherImpl.complexity == UMEASURE_UNIT_MIXED) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return {};
+ }
+ for (int32_t i = 0; i < otherImpl.units.length(); i++) {
+ impl.append(*otherImpl.units[i], status);
+ }
+ if (impl.units.length() > 1) {
+ impl.complexity = UMEASURE_UNIT_COMPOUND;
+ }
+ return std::move(impl).build(status);
+}
+
+LocalArray<MeasureUnit> MeasureUnit::splitToSingleUnits(int32_t& outCount, UErrorCode& status) const {
+ MeasureUnitImpl temp;
+ const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(*this, temp, status);
+ outCount = impl.units.length();
+ MeasureUnit* arr = new MeasureUnit[outCount];
+ for (int32_t i = 0; i < outCount; i++) {
+ arr[i] = impl.units[i]->build(status);
+ }
+ return LocalArray<MeasureUnit>(arr, status);
+}
+
+
+U_NAMESPACE_END
+
+#endif /* !UNCONFIG_NO_FORMATTING */