diff options
author | romankoshelev <romankoshelev@yandex-team.com> | 2023-08-09 20:07:20 +0300 |
---|---|---|
committer | romankoshelev <romankoshelev@yandex-team.com> | 2023-08-09 20:59:13 +0300 |
commit | fd82fb12fb45e71a02c628e45b12c50c0dd0d308 (patch) | |
tree | f582b79f9002ab1d083e9acda600dfb3551c47b6 /contrib/libs/icu/i18n | |
parent | bf862ddf5c6178e1bb5e4fb3f7c61015deebe284 (diff) | |
download | ydb-fd82fb12fb45e71a02c628e45b12c50c0dd0d308.tar.gz |
Update ICU to 70.1
Diffstat (limited to 'contrib/libs/icu/i18n')
284 files changed, 11360 insertions, 6892 deletions
diff --git a/contrib/libs/icu/i18n/alphaindex.cpp b/contrib/libs/icu/i18n/alphaindex.cpp index 9c312bd8e6..34407f677a 100644 --- a/contrib/libs/icu/i18n/alphaindex.cpp +++ b/contrib/libs/icu/i18n/alphaindex.cpp @@ -293,6 +293,7 @@ int32_t AlphabeticIndex::getRecordCount(UErrorCode &status) { } void AlphabeticIndex::initLabels(UVector &indexCharacters, UErrorCode &errorCode) const { + U_ASSERT(indexCharacters.hasDeleter()); const Normalizer2 *nfkdNormalizer = Normalizer2::getNFKDInstance(errorCode); if (U_FAILURE(errorCode)) { return; } @@ -305,7 +306,7 @@ void AlphabeticIndex::initLabels(UVector &indexCharacters, UErrorCode &errorCode // That is, we might have c, ch, d, where "ch" sorts just like "c", "h". // We filter out those cases. UnicodeSetIterator iter(*initialLabels_); - while (iter.next()) { + while (U_SUCCESS(errorCode) && iter.next()) { const UnicodeString *item = &iter.getString(); LocalPointer<UnicodeString> ownedItem; UBool checkDistinct; @@ -455,7 +456,7 @@ BucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const { errorCode = U_MEMORY_ALLOCATION_ERROR; return NULL; } - bucketList->addElement(bucket, errorCode); + bucketList->addElementX(bucket, errorCode); if (U_FAILURE(errorCode)) { return NULL; } UnicodeString temp; @@ -485,7 +486,7 @@ BucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const { errorCode = U_MEMORY_ALLOCATION_ERROR; return NULL; } - bucketList->addElement(bucket, errorCode); + bucketList->addElementX(bucket, errorCode); } } // Add a bucket with the current label. @@ -494,7 +495,7 @@ BucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const { errorCode = U_MEMORY_ALLOCATION_ERROR; return NULL; } - bucketList->addElement(bucket, errorCode); + bucketList->addElementX(bucket, errorCode); // Remember ASCII and Pinyin buckets for Pinyin redirects. UChar c; if (current.length() == 1 && 0x41 <= (c = current.charAt(0)) && c <= 0x5A) { // A-Z @@ -533,7 +534,7 @@ BucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const { return NULL; } bucket->displayBucket_ = singleBucket; - bucketList->addElement(bucket, errorCode); + bucketList->addElementX(bucket, errorCode); hasInvisibleBuckets = TRUE; break; } @@ -557,7 +558,7 @@ BucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const { errorCode = U_MEMORY_ALLOCATION_ERROR; return NULL; } - bucketList->addElement(bucket, errorCode); // final + bucketList->addElementX(bucket, errorCode); // final if (hasPinyin) { // Redirect Pinyin buckets. @@ -610,7 +611,7 @@ BucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const { for (int32_t j = 0; j < bucketList->size(); ++j) { bucket = getBucket(*bucketList, j); if (bucket->displayBucket_ == NULL) { - publicBucketList->addElement(bucket, errorCode); + publicBucketList->addElementX(bucket, errorCode); } } if (U_FAILURE(errorCode)) { return NULL; } @@ -684,7 +685,7 @@ void AlphabeticIndex::initBuckets(UErrorCode &errorCode) { return; } } - bucket->records_->addElement(r, errorCode); + bucket->records_->addElementX(r, errorCode); } } @@ -799,13 +800,13 @@ UnicodeString AlphabeticIndex::separated(const UnicodeString &item) { } -UBool AlphabeticIndex::operator==(const AlphabeticIndex& /* other */) const { - return FALSE; +bool AlphabeticIndex::operator==(const AlphabeticIndex& /* other */) const { + return false; } -UBool AlphabeticIndex::operator!=(const AlphabeticIndex& /* other */) const { - return FALSE; +bool AlphabeticIndex::operator!=(const AlphabeticIndex& /* other */) const { + return false; } @@ -1015,7 +1016,7 @@ UVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) { status = U_MEMORY_ALLOCATION_ERROR; return NULL; } - dest->addElement(s, status); + dest->addElementX(s, status); } return dest.orphan(); } @@ -1078,7 +1079,7 @@ AlphabeticIndex & AlphabeticIndex::addRecord(const UnicodeString &name, const vo status = U_MEMORY_ALLOCATION_ERROR; return *this; } - inputList_->addElement(r, status); + inputList_->addElementX(r, status); clearBuckets(); //std::string ss; //std::string ss2; diff --git a/contrib/libs/icu/i18n/anytrans.h b/contrib/libs/icu/i18n/anytrans.h index 627dee3c81..67ebb2e7d2 100644 --- a/contrib/libs/icu/i18n/anytrans.h +++ b/contrib/libs/icu/i18n/anytrans.h @@ -66,18 +66,18 @@ public: /** * Transliterator API. */ - virtual AnyTransliterator* clone() const; + virtual AnyTransliterator* clone() const override; /** * Implements {@link Transliterator#handleTransliterate}. */ virtual void handleTransliterate(Replaceable& text, UTransPosition& index, - UBool incremental) const; + UBool incremental) const override; /** * ICU "poor man's RTTI", returns a UClassID for the actual class. */ - virtual UClassID getDynamicClassID() const; + virtual UClassID getDynamicClassID() const override; /** * ICU "poor man's RTTI", returns a UClassID for this class. diff --git a/contrib/libs/icu/i18n/astro.cpp b/contrib/libs/icu/i18n/astro.cpp index f17b6db912..6dc463b4ca 100644 --- a/contrib/libs/icu/i18n/astro.cpp +++ b/contrib/libs/icu/i18n/astro.cpp @@ -379,7 +379,7 @@ double CalendarAstronomer::getJulianCentury() { */ double CalendarAstronomer::getGreenwichSidereal() { if (isINVALID(siderealTime)) { - // See page 86 of "Practial Astronomy with your Calculator", + // See page 86 of "Practical Astronomy with your Calculator", // by Peter Duffet-Smith, for details on the algorithm. double UT = normalize(fTime/(double)HOUR_MS, 24.); @@ -460,7 +460,7 @@ CalendarAstronomer::Equatorial& CalendarAstronomer::eclipticToEquatorial(Calenda */ CalendarAstronomer::Equatorial& CalendarAstronomer::eclipticToEquatorial(CalendarAstronomer::Equatorial& result, double eclipLong, double eclipLat) { - // See page 42 of "Practial Astronomy with your Calculator", + // See page 42 of "Practical Astronomy with your Calculator", // by Peter Duffet-Smith, for details on the algorithm. double obliq = eclipticObliquity(); @@ -623,7 +623,7 @@ static double trueAnomaly(double meanAnomaly, double eccentricity) */ double CalendarAstronomer::getSunLongitude() { - // See page 86 of "Practial Astronomy with your Calculator", + // See page 86 of "Practical Astronomy with your Calculator", // by Peter Duffet-Smith, for details on the algorithm. if (isINVALID(sunLongitude)) { @@ -637,7 +637,7 @@ double CalendarAstronomer::getSunLongitude() */ /*public*/ void CalendarAstronomer::getSunLongitude(double jDay, double &longitude, double &meanAnomaly) { - // See page 86 of "Practial Astronomy with your Calculator", + // See page 86 of "Practical Astronomy with your Calculator", // by Peter Duffet-Smith, for details on the algorithm. double day = jDay - JD_EPOCH; // Days since epoch @@ -723,7 +723,7 @@ CalendarAstronomer::AngleFunc::~AngleFunc() {} class SunTimeAngleFunc : public CalendarAstronomer::AngleFunc { public: virtual ~SunTimeAngleFunc(); - virtual double eval(CalendarAstronomer& a) { return a.getSunLongitude(); } + virtual double eval(CalendarAstronomer& a) override { return a.getSunLongitude(); } }; SunTimeAngleFunc::~SunTimeAngleFunc() {} @@ -743,7 +743,7 @@ CalendarAstronomer::CoordFunc::~CoordFunc() {} class RiseSetCoordFunc : public CalendarAstronomer::CoordFunc { public: virtual ~RiseSetCoordFunc(); - virtual void eval(CalendarAstronomer::Equatorial& result, CalendarAstronomer&a) { a.getSunPosition(result); } + virtual void eval(CalendarAstronomer::Equatorial& result, CalendarAstronomer& a) override { a.getSunPosition(result); } }; RiseSetCoordFunc::~RiseSetCoordFunc() {} @@ -1066,7 +1066,7 @@ UDate CalendarAstronomer::getSunRiseSet(UBool rise) const CalendarAstronomer::Equatorial& CalendarAstronomer::getMoonPosition() { // - // See page 142 of "Practial Astronomy with your Calculator", + // See page 142 of "Practical Astronomy with your Calculator", // by Peter Duffet-Smith, for details on the algorithm. // if (moonPositionSet == FALSE) { @@ -1154,7 +1154,7 @@ const CalendarAstronomer::Equatorial& CalendarAstronomer::getMoonPosition() * @deprecated ICU 2.4. This class may be removed or modified. */ double CalendarAstronomer::getMoonAge() { - // See page 147 of "Practial Astronomy with your Calculator", + // See page 147 of "Practical Astronomy with your Calculator", // by Peter Duffet-Smith, for details on the algorithm. // // Force the moon's position to be calculated. We're going to use @@ -1181,7 +1181,7 @@ double CalendarAstronomer::getMoonAge() { * @deprecated ICU 2.4. This class may be removed or modified. */ double CalendarAstronomer::getMoonPhase() { - // See page 147 of "Practial Astronomy with your Calculator", + // See page 147 of "Practical Astronomy with your Calculator", // by Peter Duffet-Smith, for details on the algorithm. return 0.5 * (1 - cos(getMoonAge())); } @@ -1225,7 +1225,7 @@ const CalendarAstronomer::MoonAge CalendarAstronomer::FULL_MOON() { class MoonTimeAngleFunc : public CalendarAstronomer::AngleFunc { public: virtual ~MoonTimeAngleFunc(); - virtual double eval(CalendarAstronomer&a) { return a.getMoonAge(); } + virtual double eval(CalendarAstronomer& a) override { return a.getMoonAge(); } }; MoonTimeAngleFunc::~MoonTimeAngleFunc() {} @@ -1239,8 +1239,8 @@ MoonTimeAngleFunc::~MoonTimeAngleFunc() {} * longitude will have the desired value. * <p> * @param desired The desired longitude. - * @param next <tt>true</tt> if the next occurrance of the phase - * is desired, <tt>false</tt> for the previous occurrance. + * @param next <tt>true</tt> if the next occurrence of the phase + * is desired, <tt>false</tt> for the previous occurrence. * @internal * @deprecated ICU 2.4. This class may be removed or modified. */ @@ -1259,8 +1259,8 @@ UDate CalendarAstronomer::getMoonTime(double desired, UBool next) * desired phase. * <p> * @param desired The desired phase of the moon. - * @param next <tt>true</tt> if the next occurrance of the phase - * is desired, <tt>false</tt> for the previous occurrance. + * @param next <tt>true</tt> if the next occurrence of the phase + * is desired, <tt>false</tt> for the previous occurrence. * @internal * @deprecated ICU 2.4. This class may be removed or modified. */ @@ -1271,7 +1271,7 @@ UDate CalendarAstronomer::getMoonTime(const CalendarAstronomer::MoonAge& desired class MoonRiseSetCoordFunc : public CalendarAstronomer::CoordFunc { public: virtual ~MoonRiseSetCoordFunc(); - virtual void eval(CalendarAstronomer::Equatorial& result, CalendarAstronomer&a) { result = a.getMoonPosition(); } + virtual void eval(CalendarAstronomer::Equatorial& result, CalendarAstronomer& a) override { result = a.getMoonPosition(); } }; MoonRiseSetCoordFunc::~MoonRiseSetCoordFunc() {} diff --git a/contrib/libs/icu/i18n/astro.h b/contrib/libs/icu/i18n/astro.h index a246489005..372a79ac67 100644 --- a/contrib/libs/icu/i18n/astro.h +++ b/contrib/libs/icu/i18n/astro.h @@ -626,8 +626,8 @@ public: * longitude will have the desired value. * <p> * @param desired The desired longitude. - * @param next <tt>true</tt> if the next occurrance of the phase - * is desired, <tt>false</tt> for the previous occurrance. + * @param next <tt>true</tt> if the next occurrence of the phase + * is desired, <tt>false</tt> for the previous occurrence. * @internal */ UDate getMoonTime(double desired, UBool next); diff --git a/contrib/libs/icu/i18n/basictz.cpp b/contrib/libs/icu/i18n/basictz.cpp index 54ee5a1a2b..7b5449f416 100644 --- a/contrib/libs/icu/i18n/basictz.cpp +++ b/contrib/libs/icu/i18n/basictz.cpp @@ -293,71 +293,77 @@ BasicTimeZone::getTimeZoneRulesAfter(UDate start, InitialTimeZoneRule*& initial, } const InitialTimeZoneRule *orgini; - const TimeZoneRule **orgtrs = NULL; TimeZoneTransition tzt; - UBool avail; - UVector *orgRules = NULL; + bool avail; int32_t ruleCount; - TimeZoneRule *r = NULL; - UBool *done = NULL; - InitialTimeZoneRule *res_initial = NULL; - UVector *filteredRules = NULL; + TimeZoneRule *r = nullptr; UnicodeString name; int32_t i; UDate time, t; - UDate *newTimes = NULL; UDate firstStart; - UBool bFinalStd = FALSE, bFinalDst = FALSE; + UBool bFinalStd = false, bFinalDst = false; + + initial = nullptr; + transitionRules = nullptr; // Original transition rules ruleCount = countTransitionRules(status); if (U_FAILURE(status)) { return; } - orgRules = new UVector(ruleCount, status); + LocalPointer<UVector> orgRules( + new UVector(uprv_deleteUObject, nullptr, ruleCount, status), status); if (U_FAILURE(status)) { return; } - orgtrs = (const TimeZoneRule**)uprv_malloc(sizeof(TimeZoneRule*)*ruleCount); - if (orgtrs == NULL) { + LocalMemory<const TimeZoneRule *> orgtrs( + static_cast<const TimeZoneRule **>(uprv_malloc(sizeof(TimeZoneRule*)*ruleCount))); + if (orgtrs.isNull()) { status = U_MEMORY_ALLOCATION_ERROR; - goto error; + return; } - getTimeZoneRules(orgini, orgtrs, ruleCount, status); + getTimeZoneRules(orgini, &orgtrs[0], ruleCount, status); if (U_FAILURE(status)) { - goto error; + return; } for (i = 0; i < ruleCount; i++) { - orgRules->addElement(orgtrs[i]->clone(), status); + LocalPointer<TimeZoneRule> lpRule(orgtrs[i]->clone(), status); + orgRules->adoptElement(lpRule.orphan(), status); if (U_FAILURE(status)) { - goto error; + return; } } - uprv_free(orgtrs); - orgtrs = NULL; avail = getPreviousTransition(start, TRUE, tzt); if (!avail) { // No need to filter out rules only applicable to time before the start initial = orgini->clone(); - transitionRules = orgRules; + if (initial == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + transitionRules = orgRules.orphan(); return; } - done = (UBool*)uprv_malloc(sizeof(UBool)*ruleCount); - if (done == NULL) { + LocalMemory<bool> done(static_cast<bool *>(uprv_malloc(sizeof(bool)*ruleCount))); + if (done.isNull()) { status = U_MEMORY_ALLOCATION_ERROR; - goto error; + return; } - filteredRules = new UVector(status); + LocalPointer<UVector> filteredRules( + new UVector(uprv_deleteUObject, nullptr, status), status); if (U_FAILURE(status)) { - goto error; + return; } // Create initial rule tzt.getTo()->getName(name); - res_initial = new InitialTimeZoneRule(name, tzt.getTo()->getRawOffset(), - tzt.getTo()->getDSTSavings()); + LocalPointer<InitialTimeZoneRule> res_initial( + new InitialTimeZoneRule(name, tzt.getTo()->getRawOffset(), tzt.getTo()->getDSTSavings()), status); + if (U_FAILURE(status)) { + return; + } // Mark rules which does not need to be processed for (i = 0; i < ruleCount; i++) { @@ -378,7 +384,7 @@ BasicTimeZone::getTimeZoneRulesAfter(UDate start, InitialTimeZoneRule*& initial, // the same time. // TODO: fix getNextTransition() to prevent it? status = U_INVALID_STATE_ERROR; - goto error; + return; } time = updatedTime; @@ -392,7 +398,7 @@ BasicTimeZone::getTimeZoneRulesAfter(UDate start, InitialTimeZoneRule*& initial, if (i >= ruleCount) { // This case should never happen status = U_INVALID_STATE_ERROR; - goto error; + return; } if (done[i]) { continue; @@ -418,12 +424,13 @@ BasicTimeZone::getTimeZoneRulesAfter(UDate start, InitialTimeZoneRule*& initial, tar->getFirstStart(tzt.getFrom()->getRawOffset(), tzt.getFrom()->getDSTSavings(), firstStart); if (firstStart > start) { // Just add the rule as is - filteredRules->addElement(tar->clone(), status); + LocalPointer<TimeArrayTimeZoneRule> lpTar(tar->clone(), status); + filteredRules->adoptElement(lpTar.orphan(), status); if (U_FAILURE(status)) { - goto error; + return; } } else { - // Colllect transitions after the start time + // Collect transitions after the start time int32_t startTimes; DateTimeRule::TimeRuleType timeType; int32_t idx; @@ -442,28 +449,25 @@ BasicTimeZone::getTimeZoneRulesAfter(UDate start, InitialTimeZoneRule*& initial, break; } } + if (U_FAILURE(status)) { + return; + } int32_t asize = startTimes - idx; if (asize > 0) { - newTimes = (UDate*)uprv_malloc(sizeof(UDate) * asize); - if (newTimes == NULL) { + LocalMemory<UDate> newTimes(static_cast<UDate *>(uprv_malloc(sizeof(UDate) * asize))); + if (newTimes.isNull()) { status = U_MEMORY_ALLOCATION_ERROR; - goto error; + return; } for (int32_t newidx = 0; newidx < asize; newidx++) { tar->getStartTimeAt(idx + newidx, newTimes[newidx]); - if (U_FAILURE(status)) { - uprv_free(newTimes); - newTimes = NULL; - goto error; - } } tar->getName(name); - TimeArrayTimeZoneRule *newTar = new TimeArrayTimeZoneRule(name, - tar->getRawOffset(), tar->getDSTSavings(), newTimes, asize, timeType); - uprv_free(newTimes); - filteredRules->addElement(newTar, status); + LocalPointer<TimeArrayTimeZoneRule> newTar(new TimeArrayTimeZoneRule( + name, tar->getRawOffset(), tar->getDSTSavings(), &newTimes[0], asize, timeType), status); + filteredRules->adoptElement(newTar.orphan(), status); if (U_FAILURE(status)) { - goto error; + return; } } } @@ -472,9 +476,10 @@ BasicTimeZone::getTimeZoneRulesAfter(UDate start, InitialTimeZoneRule*& initial, ar->getFirstStart(tzt.getFrom()->getRawOffset(), tzt.getFrom()->getDSTSavings(), firstStart); if (firstStart == tzt.getTime()) { // Just add the rule as is - filteredRules->addElement(ar->clone(), status); + LocalPointer<AnnualTimeZoneRule> arClone(ar->clone(), status); + filteredRules->adoptElement(arClone.orphan(), status); if (U_FAILURE(status)) { - goto error; + return; } } else { // Calculate the transition year @@ -482,11 +487,11 @@ BasicTimeZone::getTimeZoneRulesAfter(UDate start, InitialTimeZoneRule*& initial, Grego::timeToFields(tzt.getTime(), year, month, dom, dow, doy, mid); // Re-create the rule ar->getName(name); - AnnualTimeZoneRule *newAr = new AnnualTimeZoneRule(name, ar->getRawOffset(), ar->getDSTSavings(), - *(ar->getRule()), year, ar->getEndYear()); - filteredRules->addElement(newAr, status); + LocalPointer<AnnualTimeZoneRule> newAr(new AnnualTimeZoneRule(name, ar->getRawOffset(), ar->getDSTSavings(), + *(ar->getRule()), year, ar->getEndYear()), status); + filteredRules->adoptElement(newAr.orphan(), status); if (U_FAILURE(status)) { - goto error; + return; } } // check if this is a final rule @@ -500,61 +505,33 @@ BasicTimeZone::getTimeZoneRulesAfter(UDate start, InitialTimeZoneRule*& initial, } } } - done[i] = TRUE; + done[i] = true; } // Set the results - if (orgRules != NULL) { - while (!orgRules->isEmpty()) { - r = (TimeZoneRule*)orgRules->orphanElementAt(0); - delete r; - } - delete orgRules; - } - if (done != NULL) { - uprv_free(done); - } - - initial = res_initial; - transitionRules = filteredRules; + initial = res_initial.orphan(); + transitionRules = filteredRules.orphan(); return; - -error: - if (orgtrs != NULL) { - uprv_free(orgtrs); - } - if (orgRules != NULL) { - while (!orgRules->isEmpty()) { - r = (TimeZoneRule*)orgRules->orphanElementAt(0); - delete r; - } - delete orgRules; - } - if (done != NULL) { - if (filteredRules != NULL) { - while (!filteredRules->isEmpty()) { - r = (TimeZoneRule*)filteredRules->orphanElementAt(0); - delete r; - } - delete filteredRules; - } - delete res_initial; - uprv_free(done); - } - - initial = NULL; - transitionRules = NULL; } void -BasicTimeZone::getOffsetFromLocal(UDate /*date*/, int32_t /*nonExistingTimeOpt*/, int32_t /*duplicatedTimeOpt*/, - int32_t& /*rawOffset*/, int32_t& /*dstOffset*/, UErrorCode& status) const { +BasicTimeZone::getOffsetFromLocal(UDate /*date*/, UTimeZoneLocalOption /*nonExistingTimeOpt*/, + UTimeZoneLocalOption /*duplicatedTimeOpt*/, + int32_t& /*rawOffset*/, int32_t& /*dstOffset*/, + UErrorCode& status) const { if (U_FAILURE(status)) { return; } status = U_UNSUPPORTED_ERROR; } +void BasicTimeZone::getOffsetFromLocal(UDate date, int32_t nonExistingTimeOpt, int32_t duplicatedTimeOpt, + int32_t& rawOffset, int32_t& dstOffset, + UErrorCode& status) const { + getOffsetFromLocal(date, (UTimeZoneLocalOption)nonExistingTimeOpt, + (UTimeZoneLocalOption)duplicatedTimeOpt, rawOffset, dstOffset, status); +} + U_NAMESPACE_END #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/contrib/libs/icu/i18n/brktrans.cpp b/contrib/libs/icu/i18n/brktrans.cpp index 46b0e345da..f0ec8407db 100644 --- a/contrib/libs/icu/i18n/brktrans.cpp +++ b/contrib/libs/icu/i18n/brktrans.cpp @@ -106,7 +106,7 @@ void BreakTransliterator::handleTransliterate(Replaceable& text, UTransPosition& int32_t boundary; for(boundary = bi->next(); boundary != UBRK_DONE && boundary < offsets.limit; boundary = bi->next()) { if (boundary == 0) continue; - // HACK: Check to see that preceeding item was a letter + // HACK: Check to see that preceding item was a letter UChar32 cp = sText.char32At(boundary-1); int type = u_charType(cp); diff --git a/contrib/libs/icu/i18n/brktrans.h b/contrib/libs/icu/i18n/brktrans.h index cb3def9e92..5dcc8c50c0 100644 --- a/contrib/libs/icu/i18n/brktrans.h +++ b/contrib/libs/icu/i18n/brktrans.h @@ -54,7 +54,7 @@ public: * Transliterator API. * @return A copy of the object. */ - virtual BreakTransliterator* clone() const; + virtual BreakTransliterator* clone() const override; virtual const UnicodeString &getInsertion() const; @@ -63,7 +63,7 @@ public: /** * ICU "poor man's RTTI", returns a UClassID for the actual class. */ - virtual UClassID getDynamicClassID() const; + virtual UClassID getDynamicClassID() const override; /** * ICU "poor man's RTTI", returns a UClassID for this class. @@ -82,7 +82,7 @@ public: * pos.contextLimit. Otherwise, assume the text is complete. */ virtual void handleTransliterate(Replaceable& text, UTransPosition& offset, - UBool isIncremental) const; + UBool isIncremental) const override; private: LocalPointer<BreakIterator> cachedBI; diff --git a/contrib/libs/icu/i18n/buddhcal.h b/contrib/libs/icu/i18n/buddhcal.h index 0ad0886df1..2ef5c52493 100644 --- a/contrib/libs/icu/i18n/buddhcal.h +++ b/contrib/libs/icu/i18n/buddhcal.h @@ -94,7 +94,7 @@ public: * @return return a polymorphic copy of this calendar. * @internal */ - virtual BuddhistCalendar* clone() const; + virtual BuddhistCalendar* clone() const override; public: /** @@ -107,7 +107,7 @@ public: * same class ID. Objects of other classes have different class IDs. * @internal */ - virtual UClassID getDynamicClassID(void) const; + virtual UClassID getDynamicClassID(void) const override; /** * Return the class ID for this class. This is useful only for comparing to a return @@ -128,7 +128,7 @@ public: * @return calendar type * @internal */ - virtual const char * getType() const; + virtual const char * getType() const override; private: BuddhistCalendar(); // default constructor not implemented @@ -142,13 +142,13 @@ private: * @return the extended year * @internal */ - virtual int32_t handleGetExtendedYear(); + virtual int32_t handleGetExtendedYear() override; /** * Subclasses may override this method to compute several fields * specific to each calendar system. * @internal */ - virtual void handleComputeFields(int32_t julianDay, UErrorCode& status); + virtual void handleComputeFields(int32_t julianDay, UErrorCode& status) override; /** * Subclass API for defining limits of different types. * @param field one of the field numbers @@ -156,7 +156,7 @@ private: * <code>LEAST_MAXIMUM</code>, or <code>MAXIMUM</code> * @internal */ - virtual int32_t handleGetLimit(UCalendarDateFields field, ELimitType limitType) const; + virtual int32_t handleGetLimit(UCalendarDateFields field, ELimitType limitType) const override; /** * Return the Julian day number of day before the first day of the * given month in the given extended year. Subclasses should override @@ -171,26 +171,26 @@ private: * @internal */ virtual int32_t handleComputeMonthStart(int32_t eyear, int32_t month, - UBool useMonth) const; + UBool useMonth) const override; /** - * Returns TRUE because the Buddhist Calendar does have a default century + * Returns true because the Buddhist Calendar does have a default century * @internal */ - virtual UBool haveDefaultCentury() const; + virtual UBool haveDefaultCentury() const override; /** * Returns the date of the start of the default century * @return start of century - in milliseconds since epoch, 1970 * @internal */ - virtual UDate defaultCenturyStart() const; + virtual UDate defaultCenturyStart() const override; /** * Returns the year in which the default century begins * @internal */ - virtual int32_t defaultCenturyStartYear() const; + virtual int32_t defaultCenturyStartYear() const override; }; U_NAMESPACE_END diff --git a/contrib/libs/icu/i18n/calendar.cpp b/contrib/libs/icu/i18n/calendar.cpp index 981f09c574..8405d08d3c 100644 --- a/contrib/libs/icu/i18n/calendar.cpp +++ b/contrib/libs/icu/i18n/calendar.cpp @@ -266,7 +266,7 @@ static ECalType getCalendarTypeForLocale(const char *locid) { //TODO: ULOC_FULL_NAME is out of date and too small.. char canonicalName[256]; - // canonicalize, so grandfathered variant will be transformed to keywords + // Canonicalize, so that an old-style variant will be transformed to keywords. // e.g ja_JP_TRADITIONAL -> ja_JP@calendar=japanese // NOTE: Since ICU-20187, ja_JP_TRADITIONAL no longer canonicalizes, and // the Gregorian calendar is returned instead. @@ -308,7 +308,7 @@ static ECalType getCalendarTypeForLocale(const char *locid) { calTypeBuf[0] = 0; if (U_SUCCESS(status) && order != NULL) { - // the first calender type is the default for the region + // the first calendar type is the default for the region int32_t len = 0; const UChar *uCalType = ures_getStringByIndex(order, 0, &len, &status); if (len < (int32_t)sizeof(calTypeBuf)) { @@ -427,7 +427,7 @@ protected: // return isStandardSupportedKeyword(keyword, status); //} - virtual void updateVisibleIDs(Hashtable& result, UErrorCode& status) const + virtual void updateVisibleIDs(Hashtable& result, UErrorCode& status) const override { if (U_SUCCESS(status)) { for(int32_t i=0;gCalTypes[i] != NULL;i++) { @@ -439,7 +439,7 @@ protected: } } - virtual UObject* create(const ICUServiceKey& key, const ICUService* /*service*/, UErrorCode& status) const { + virtual UObject* create(const ICUServiceKey& key, const ICUService* /*service*/, UErrorCode& status) const override { #ifdef U_DEBUG_CALSVC if(dynamic_cast<const LocaleKey*>(&key) == NULL) { fprintf(stderr, "::create - not a LocaleKey!\n"); @@ -485,7 +485,7 @@ public: DefaultCalendarFactory() : ICUResourceBundleFactory() { } virtual ~DefaultCalendarFactory(); protected: - virtual UObject* create(const ICUServiceKey& key, const ICUService* /*service*/, UErrorCode& status) const { + virtual UObject* create(const ICUServiceKey& key, const ICUService* /*service*/, UErrorCode& status) const override { LocaleKey &lkey = (LocaleKey&)key; Locale loc; @@ -517,7 +517,7 @@ public: virtual ~CalendarService(); - virtual UObject* cloneInstance(UObject* instance) const { + virtual UObject* cloneInstance(UObject* instance) const override { UnicodeString *s = dynamic_cast<UnicodeString *>(instance); if(s != NULL) { return s->clone(); @@ -530,7 +530,7 @@ public: } } - virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const { + virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const override { LocaleKey& lkey = (LocaleKey&)key; //int32_t kind = lkey.kind(); @@ -555,7 +555,7 @@ public: return nc; } - virtual UBool isDefault() const { + virtual UBool isDefault() const override { return countFactories() == 1; } }; @@ -870,7 +870,7 @@ Calendar::createInstance(const TimeZone& zone, UErrorCode& success) Calendar* U_EXPORT2 Calendar::createInstance(const Locale& aLocale, UErrorCode& success) { - return createInstance(TimeZone::createDefault(), aLocale, success); + return createInstance(TimeZone::forLocaleOrDefault(aLocale), aLocale, success); } // ------------------------------------- Adopting @@ -956,7 +956,7 @@ Calendar::makeInstance(const Locale& aLocale, UErrorCode& success) { #ifdef U_DEBUG_CALSVC fprintf(stderr, "%p: setting week count data to locale %s, actual locale %s\n", c, (const char*)aLocale.getName(), (const char *)actualLoc.getName()); #endif - c->setWeekData(aLocale, c->getType(), success); // set the correct locale (this was an indirected calendar) + c->setWeekData(aLocale, c->getType(), success); // set the correct locale (this was an indirect calendar) char keyword[ULOC_FULLNAME_CAPACITY] = ""; UErrorCode tmpStatus = U_ZERO_ERROR; @@ -1031,7 +1031,7 @@ Calendar::getCalendarTypeFromLocale( } } -UBool +bool Calendar::operator==(const Calendar& that) const { UErrorCode status = U_ZERO_ERROR; @@ -1558,7 +1558,7 @@ void Calendar::computeFields(UErrorCode &ec) // fields computed by handleComputeFields(). computeWeekFields(ec); - // Compute time-related fields. These are indepent of the date and + // Compute time-related fields. These are independent of the date and // of the subclass algorithm. They depend only on the local zone // wall milliseconds in day. int32_t millisInDay = (int32_t) (localMillis - (days * kOneDay)); @@ -2291,7 +2291,7 @@ int32_t Calendar::fieldDifference(UDate targetMs, UCalendarDateFields field, UEr if (U_FAILURE(ec)) return 0; int32_t min = 0; double startMs = getTimeInMillis(ec); - // Always add from the start millis. This accomodates + // Always add from the start millis. This accommodates // operations like adding years from February 29, 2000 up to // February 29, 2004. If 1, 1, 1, 1 is added to the year // field, the DOM gets pinned to 28 and stays there, giving an @@ -3083,7 +3083,7 @@ void Calendar::computeTime(UErrorCode& status) { } /** - * Find the previous zone transtion near the given time. + * Find the previous zone transition near the given time. */ UBool Calendar::getImmediatePreviousZoneTransition(UDate base, UDate *transitionTime, UErrorCode& status) const { BasicTimeZone *btz = getBasicTimeZone(); @@ -3163,8 +3163,8 @@ int32_t Calendar::computeZoneOffset(double millis, double millisInDay, UErrorCod UDate wall = millis + millisInDay; BasicTimeZone* btz = getBasicTimeZone(); if (btz) { - int duplicatedTimeOpt = (fRepeatedWallTime == UCAL_WALLTIME_FIRST) ? BasicTimeZone::kFormer : BasicTimeZone::kLatter; - int nonExistingTimeOpt = (fSkippedWallTime == UCAL_WALLTIME_FIRST) ? BasicTimeZone::kLatter : BasicTimeZone::kFormer; + UTimeZoneLocalOption duplicatedTimeOpt = (fRepeatedWallTime == UCAL_WALLTIME_FIRST) ? UCAL_TZ_LOCAL_FORMER : UCAL_TZ_LOCAL_LATTER; + UTimeZoneLocalOption nonExistingTimeOpt = (fSkippedWallTime == UCAL_WALLTIME_FIRST) ? UCAL_TZ_LOCAL_LATTER : UCAL_TZ_LOCAL_FORMER; btz->getOffsetFromLocal(wall, nonExistingTimeOpt, duplicatedTimeOpt, rawOffset, dstOffset, ec); } else { const TimeZone& tz = getTimeZone(); @@ -3197,7 +3197,7 @@ int32_t Calendar::computeZoneOffset(double millis, double millisInDay, UErrorCod // recalculate offsets from the resolved time (non-wall). // When the given wall time falls into skipped wall time, // the offsets will be based on the zone offsets AFTER - // the transition (which means, earliest possibe interpretation). + // the transition (which means, earliest possible interpretation). UDate tgmt = wall - (rawOffset + dstOffset); tz.getOffset(tgmt, FALSE, rawOffset, dstOffset, ec); } diff --git a/contrib/libs/icu/i18n/casetrn.h b/contrib/libs/icu/i18n/casetrn.h index 2ec3e736de..a00480db60 100644 --- a/contrib/libs/icu/i18n/casetrn.h +++ b/contrib/libs/icu/i18n/casetrn.h @@ -58,7 +58,7 @@ public: * Transliterator API. * @return a copy of the object. */ - virtual CaseMapTransliterator* clone() const = 0; + virtual CaseMapTransliterator* clone() const override = 0; /** * ICU "poor man's RTTI", returns a UClassID for the actual class. @@ -82,7 +82,7 @@ protected: */ virtual void handleTransliterate(Replaceable& text, UTransPosition& offsets, - UBool isIncremental) const; + UBool isIncremental) const override; UCaseMapFull *fMap; diff --git a/contrib/libs/icu/i18n/cecal.cpp b/contrib/libs/icu/i18n/cecal.cpp index 00faa8ac07..cb97c40a3c 100644 --- a/contrib/libs/icu/i18n/cecal.cpp +++ b/contrib/libs/icu/i18n/cecal.cpp @@ -49,7 +49,7 @@ static const int32_t LIMITS[UCAL_FIELD_COUNT][4] = { //------------------------------------------------------------------------- CECalendar::CECalendar(const Locale& aLocale, UErrorCode& success) -: Calendar(TimeZone::createDefault(), aLocale, success) +: Calendar(TimeZone::forLocaleOrDefault(aLocale), aLocale, success) { setTimeInMillis(getNow(), success); } diff --git a/contrib/libs/icu/i18n/cecal.h b/contrib/libs/icu/i18n/cecal.h index c380f0bea3..9ac71f6ba3 100644 --- a/contrib/libs/icu/i18n/cecal.h +++ b/contrib/libs/icu/i18n/cecal.h @@ -68,13 +68,13 @@ protected: * Return JD of start of given month/extended year * @internal */ - virtual int32_t handleComputeMonthStart(int32_t eyear, int32_t month, UBool useMonth) const; + virtual int32_t handleComputeMonthStart(int32_t eyear, int32_t month, UBool useMonth) const override; /** * Calculate the limit for a specified type of limit and field * @internal */ - virtual int32_t handleGetLimit(UCalendarDateFields field, ELimitType limitType) const; + virtual int32_t handleGetLimit(UCalendarDateFields field, ELimitType limitType) const override; /** * (Overrides Calendar) Return true if the current date for this Calendar is in @@ -85,13 +85,13 @@ protected: * false, otherwise. * @internal */ - virtual UBool inDaylightTime(UErrorCode&) const; + virtual UBool inDaylightTime(UErrorCode&) const override; /** - * Returns TRUE because Coptic/Ethiopic Calendar does have a default century + * Returns true because Coptic/Ethiopic Calendar does have a default century * @internal */ - virtual UBool haveDefaultCentury() const; + virtual UBool haveDefaultCentury() const override; protected: /** diff --git a/contrib/libs/icu/i18n/chnsecal.cpp b/contrib/libs/icu/i18n/chnsecal.cpp index 4c03812715..f8fb4a40f1 100644 --- a/contrib/libs/icu/i18n/chnsecal.cpp +++ b/contrib/libs/icu/i18n/chnsecal.cpp @@ -123,7 +123,7 @@ ChineseCalendar* ChineseCalendar::clone() const { } ChineseCalendar::ChineseCalendar(const Locale& aLocale, UErrorCode& success) -: Calendar(TimeZone::createDefault(), aLocale, success), +: Calendar(TimeZone::forLocaleOrDefault(aLocale), aLocale, success), isLeapYear(FALSE), fEpochYear(CHINESE_EPOCH_YEAR), fZoneAstroCalc(getChineseCalZoneAstroCalc()) @@ -133,7 +133,7 @@ ChineseCalendar::ChineseCalendar(const Locale& aLocale, UErrorCode& success) ChineseCalendar::ChineseCalendar(const Locale& aLocale, int32_t epochYear, const TimeZone* zoneAstroCalc, UErrorCode &success) -: Calendar(TimeZone::createDefault(), aLocale, success), +: Calendar(TimeZone::forLocaleOrDefault(aLocale), aLocale, success), isLeapYear(FALSE), fEpochYear(epochYear), fZoneAstroCalc(zoneAstroCalc) diff --git a/contrib/libs/icu/i18n/chnsecal.h b/contrib/libs/icu/i18n/chnsecal.h index a0c21b6b5c..61ef2d3cad 100644 --- a/contrib/libs/icu/i18n/chnsecal.h +++ b/contrib/libs/icu/i18n/chnsecal.h @@ -144,7 +144,7 @@ class U_I18N_API ChineseCalendar : public Calendar { virtual ~ChineseCalendar(); // clone - virtual ChineseCalendar* clone() const; + virtual ChineseCalendar* clone() const override; private: @@ -162,18 +162,18 @@ class U_I18N_API ChineseCalendar : public Calendar { //---------------------------------------------------------------------- protected: - virtual int32_t handleGetLimit(UCalendarDateFields field, ELimitType limitType) const; - virtual int32_t handleGetMonthLength(int32_t extendedYear, int32_t month) const; - virtual int32_t handleComputeMonthStart(int32_t eyear, int32_t month, UBool useMonth) const; - virtual int32_t handleGetExtendedYear(); - virtual void handleComputeFields(int32_t julianDay, UErrorCode &status); - virtual const UFieldResolutionTable* getFieldResolutionTable() const; + virtual int32_t handleGetLimit(UCalendarDateFields field, ELimitType limitType) const override; + virtual int32_t handleGetMonthLength(int32_t extendedYear, int32_t month) const override; + virtual int32_t handleComputeMonthStart(int32_t eyear, int32_t month, UBool useMonth) const override; + virtual int32_t handleGetExtendedYear() override; + virtual void handleComputeFields(int32_t julianDay, UErrorCode &status) override; + virtual const UFieldResolutionTable* getFieldResolutionTable() const override; public: - virtual void add(UCalendarDateFields field, int32_t amount, UErrorCode &status); - virtual void add(EDateFields field, int32_t amount, UErrorCode &status); - virtual void roll(UCalendarDateFields field, int32_t amount, UErrorCode &status); - virtual void roll(EDateFields field, int32_t amount, UErrorCode &status); + virtual void add(UCalendarDateFields field, int32_t amount, UErrorCode &status) override; + virtual void add(EDateFields field, int32_t amount, UErrorCode &status) override; + virtual void roll(UCalendarDateFields field, int32_t amount, UErrorCode &status) override; + virtual void roll(EDateFields field, int32_t amount, UErrorCode &status) override; //---------------------------------------------------------------------- // Internal methods & astronomical calculations @@ -204,7 +204,7 @@ class U_I18N_API ChineseCalendar : public Calendar { * same class ID. Objects of other classes have different class IDs. * @internal */ - virtual UClassID getDynamicClassID(void) const; + virtual UClassID getDynamicClassID(void) const override; /** * Return the class ID for this class. This is useful only for comparing to a return @@ -225,7 +225,7 @@ class U_I18N_API ChineseCalendar : public Calendar { * @return calendar type * @internal */ - virtual const char * getType() const; + virtual const char * getType() const override; protected: @@ -238,27 +238,27 @@ class U_I18N_API ChineseCalendar : public Calendar { * false, otherwise. * @internal */ - virtual UBool inDaylightTime(UErrorCode& status) const; + virtual UBool inDaylightTime(UErrorCode& status) const override; /** - * Returns TRUE because the Islamic Calendar does have a default century + * Returns true because the Islamic Calendar does have a default century * @internal */ - virtual UBool haveDefaultCentury() const; + virtual UBool haveDefaultCentury() const override; /** * Returns the date of the start of the default century * @return start of century - in milliseconds since epoch, 1970 * @internal */ - virtual UDate defaultCenturyStart() const; + virtual UDate defaultCenturyStart() const override; /** * Returns the year in which the default century begins * @internal */ - virtual int32_t defaultCenturyStartYear() const; + virtual int32_t defaultCenturyStartYear() const override; private: // default century stuff. diff --git a/contrib/libs/icu/i18n/choicfmt.cpp b/contrib/libs/icu/i18n/choicfmt.cpp index 7e26bb7a1f..d06eec35fa 100644 --- a/contrib/libs/icu/i18n/choicfmt.cpp +++ b/contrib/libs/icu/i18n/choicfmt.cpp @@ -132,11 +132,11 @@ ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern, } // ------------------------------------- -UBool +bool ChoiceFormat::operator==(const Format& that) const { - if (this == &that) return TRUE; - if (!NumberFormat::operator==(that)) return FALSE; + if (this == &that) return true; + if (!NumberFormat::operator==(that)) return false; ChoiceFormat& thatAlias = (ChoiceFormat&)that; return msgPattern == thatAlias.msgPattern; } diff --git a/contrib/libs/icu/i18n/coleitr.cpp b/contrib/libs/icu/i18n/coleitr.cpp index 64d3ab4d2b..48c1da9015 100644 --- a/contrib/libs/icu/i18n/coleitr.cpp +++ b/contrib/libs/icu/i18n/coleitr.cpp @@ -99,7 +99,7 @@ int32_t CollationElementIterator::getOffset() const /** * Get the ordering priority of the next character in the string. * @return the next character's ordering. Returns NULLORDER if an error has -* occured or if the end of string has been reached +* occurred or if the end of string has been reached */ int32_t CollationElementIterator::next(UErrorCode& status) { @@ -137,17 +137,17 @@ int32_t CollationElementIterator::next(UErrorCode& status) return firstHalf; } -UBool CollationElementIterator::operator!=( +bool CollationElementIterator::operator!=( const CollationElementIterator& other) const { return !(*this == other); } -UBool CollationElementIterator::operator==( +bool CollationElementIterator::operator==( const CollationElementIterator& that) const { if (this == &that) { - return TRUE; + return true; } return @@ -162,7 +162,7 @@ UBool CollationElementIterator::operator==( * Get the ordering priority of the previous collation element in the string. * @param status the error code status. * @return the previous element's ordering. Returns NULLORDER if an error has -* occured or if the start of string has been reached. +* occurred or if the start of string has been reached. */ int32_t CollationElementIterator::previous(UErrorCode& status) { @@ -398,8 +398,8 @@ class MaxExpSink : public ContractionsAndExpansions::CESink { public: MaxExpSink(UHashtable *h, UErrorCode &ec) : maxExpansions(h), errorCode(ec) {} virtual ~MaxExpSink(); - virtual void handleCE(int64_t /*ce*/) {} - virtual void handleExpansion(const int64_t ces[], int32_t length) { + virtual void handleCE(int64_t /*ce*/) override {} + virtual void handleExpansion(const int64_t ces[], int32_t length) override { if (length <= 1) { // We do not need to add single CEs into the map. return; diff --git a/contrib/libs/icu/i18n/coll.cpp b/contrib/libs/icu/i18n/coll.cpp index 7b87b41dff..fe73118da0 100644 --- a/contrib/libs/icu/i18n/coll.cpp +++ b/contrib/libs/icu/i18n/coll.cpp @@ -129,7 +129,7 @@ class ICUCollatorFactory : public ICUResourceBundleFactory { ICUCollatorFactory() : ICUResourceBundleFactory(UnicodeString(U_ICUDATA_COLL, -1, US_INV)) { } virtual ~ICUCollatorFactory(); protected: - virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const; + virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const override; }; ICUCollatorFactory::~ICUCollatorFactory() {} @@ -162,11 +162,11 @@ public: virtual ~ICUCollatorService(); - virtual UObject* cloneInstance(UObject* instance) const { + virtual UObject* cloneInstance(UObject* instance) const override { return ((Collator*)instance)->clone(); } - virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* actualID, UErrorCode& status) const { + virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* actualID, UErrorCode& status) const override { LocaleKey& lkey = (LocaleKey&)key; if (actualID) { // Ugly Hack Alert! We return an empty actualID to signal @@ -179,7 +179,7 @@ public: return Collator::makeInstance(loc, status); } - virtual UObject* getKey(ICUServiceKey& key, UnicodeString* actualReturn, UErrorCode& status) const { + virtual UObject* getKey(ICUServiceKey& key, UnicodeString* actualReturn, UErrorCode& status) const override { UnicodeString ar; if (actualReturn == NULL) { actualReturn = &ar; @@ -187,7 +187,7 @@ public: return (Collator*)ICULocaleService::getKey(key, actualReturn, status); } - virtual UBool isDefault() const { + virtual UBool isDefault() const override { return countFactories() == 1; } }; @@ -604,7 +604,7 @@ UnicodeString& U_EXPORT2 Collator::getDisplayName(const Locale& objectLocale, /** * Default constructor. * Constructor is different from the old default Collator constructor. -* The task for determing the default collation strength and normalization mode +* The task for determining the default collation strength and normalization mode * is left to the child class. */ Collator::Collator() @@ -636,15 +636,15 @@ Collator::Collator(const Collator &other) { } -UBool Collator::operator==(const Collator& other) const +bool Collator::operator==(const Collator& other) const { // Subclasses: Call this method and then add more specific checks. return typeid(*this) == typeid(other); } -UBool Collator::operator!=(const Collator& other) const +bool Collator::operator!=(const Collator& other) const { - return (UBool)!(*this == other); + return !operator==(other); } int32_t U_EXPORT2 Collator::getBound(const uint8_t *source, @@ -721,10 +721,10 @@ public: virtual ~CFactory(); - virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const; + virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const override; protected: - virtual const Hashtable* getSupportedIDs(UErrorCode& status) const + virtual const Hashtable* getSupportedIDs(UErrorCode& status) const override { if (U_SUCCESS(status)) { return _ids; @@ -733,7 +733,7 @@ protected: } virtual UnicodeString& - getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const; + getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const override; }; CFactory::~CFactory() @@ -803,7 +803,7 @@ private: int32_t index; public: static UClassID U_EXPORT2 getStaticClassID(void); - virtual UClassID getDynamicClassID(void) const; + virtual UClassID getDynamicClassID(void) const override; public: CollationLocaleListEnumeration() : index(0) @@ -814,7 +814,7 @@ public: virtual ~CollationLocaleListEnumeration(); - virtual StringEnumeration * clone() const + virtual StringEnumeration * clone() const override { CollationLocaleListEnumeration *result = new CollationLocaleListEnumeration(); if (result) { @@ -823,11 +823,11 @@ public: return result; } - virtual int32_t count(UErrorCode &/*status*/) const { + virtual int32_t count(UErrorCode &/*status*/) const override { return availableLocaleListCount; } - virtual const char* next(int32_t* resultLength, UErrorCode& /*status*/) { + virtual const char* next(int32_t* resultLength, UErrorCode& /*status*/) override { const char* result; if(index < availableLocaleListCount) { result = availableLocaleList[index++].getName(); @@ -843,13 +843,13 @@ public: return result; } - virtual const UnicodeString* snext(UErrorCode& status) { + virtual const UnicodeString* snext(UErrorCode& status) override { int32_t resultLength = 0; const char *s = next(&resultLength, status); return setChars(s, resultLength, status); } - virtual void reset(UErrorCode& /*status*/) { + virtual void reset(UErrorCode& /*status*/) override { index = 0; } }; diff --git a/contrib/libs/icu/i18n/collation.h b/contrib/libs/icu/i18n/collation.h index e9256c9c12..6a449a3eb6 100644 --- a/contrib/libs/icu/i18n/collation.h +++ b/contrib/libs/icu/i18n/collation.h @@ -250,7 +250,7 @@ public: * Tag for a lead surrogate code unit. * Optional optimization for UTF-16 string processing. * Bits 31..10: Unused, 0. - * 9.. 8: =0: All associated supplementary code points are unassigned-implict. + * 9.. 8: =0: All associated supplementary code points are unassigned-implicit. * =1: All associated supplementary code points fall back to the base data. * else: (Normally 2) Look up the data for the supplementary code point. */ @@ -356,7 +356,7 @@ public: } /** - * @return TRUE if the ce32 yields one or more CEs without further data lookups + * @return true if the ce32 yields one or more CEs without further data lookups */ static UBool isSelfContainedCE32(uint32_t ce32) { return !isSpecialCE32(ce32) || diff --git a/contrib/libs/icu/i18n/collationbuilder.cpp b/contrib/libs/icu/i18n/collationbuilder.cpp index 45ac6ddcd5..5d4611b851 100644 --- a/contrib/libs/icu/i18n/collationbuilder.cpp +++ b/contrib/libs/icu/i18n/collationbuilder.cpp @@ -59,7 +59,7 @@ public: virtual void getRules( const char *localeID, const char *collationType, UnicodeString &rules, - const char *&errorReason, UErrorCode &errorCode); + const char *&errorReason, UErrorCode &errorCode) override; }; BundleImporter::~BundleImporter() {} @@ -577,7 +577,7 @@ CollationBuilder::getSpecialResetPosition(const UnicodeString &str, parserErrorReason = "LDML forbids tailoring to U+FFFF"; return 0; default: - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } int32_t index = findOrInsertNodeForRootCE(ce, strength, errorCode); @@ -688,7 +688,7 @@ CollationBuilder::addRelation(int32_t strength, const UnicodeString &prefix, // A Hangul syllable completely inside a contraction is ok. } // Note: If there is a prefix, then the parser checked that - // both the prefix and the string beging with NFC boundaries (not Jamo V or T). + // both the prefix and the string begin with NFC boundaries (not Jamo V or T). // Therefore: prefix.isEmpty() || !isJamoVOrT(nfdString.charAt(0)) // (While handling a Hangul syllable, prefixes on Jamo V or T // would not see the previous Jamo of that syllable.) @@ -1581,7 +1581,7 @@ class CEFinalizer : public CollationDataBuilder::CEModifier { public: CEFinalizer(const int64_t *ces) : finalCEs(ces) {} virtual ~CEFinalizer(); - virtual int64_t modifyCE32(uint32_t ce32) const { + virtual int64_t modifyCE32(uint32_t ce32) const override { U_ASSERT(!Collation::isSpecialCE32(ce32)); if(CollationBuilder::isTempCE32(ce32)) { // retain case bits @@ -1590,7 +1590,7 @@ public: return Collation::NO_CE; } } - virtual int64_t modifyCE(int64_t ce) const { + virtual int64_t modifyCE(int64_t ce) const override { if(CollationBuilder::isTempCE(ce)) { // retain case bits return finalCEs[CollationBuilder::indexFromTempCE(ce)] | (ce & 0xc000); diff --git a/contrib/libs/icu/i18n/collationbuilder.h b/contrib/libs/icu/i18n/collationbuilder.h index 2f20050f93..59d3c5d24b 100644 --- a/contrib/libs/icu/i18n/collationbuilder.h +++ b/contrib/libs/icu/i18n/collationbuilder.h @@ -42,7 +42,7 @@ public: CollationBuilder(const CollationTailoring *base, UErrorCode &errorCode); virtual ~CollationBuilder(); - void disableFastLatin() { fastLatinEnabled = FALSE; } + void disableFastLatin() { fastLatinEnabled = false; } CollationTailoring *parseAndBuild(const UnicodeString &ruleString, const UVersionInfo rulesVersion, @@ -57,7 +57,7 @@ private: /** Implements CollationRuleParser::Sink. */ virtual void addReset(int32_t strength, const UnicodeString &str, - const char *&errorReason, UErrorCode &errorCode); + const char *&errorReason, UErrorCode &errorCode) override; /** * Returns the secondary or tertiary weight preceding the current node's weight. * node=nodes[index]. @@ -70,7 +70,7 @@ private: /** Implements CollationRuleParser::Sink. */ virtual void addRelation(int32_t strength, const UnicodeString &prefix, const UnicodeString &str, const UnicodeString &extension, - const char *&errorReason, UErrorCode &errorCode); + const char *&errorReason, UErrorCode &errorCode) override; /** * Picks one of the current CEs and finds or inserts a node in the graph @@ -115,11 +115,11 @@ private: /** Implements CollationRuleParser::Sink. */ virtual void suppressContractions(const UnicodeSet &set, const char *&parserErrorReason, - UErrorCode &errorCode); + UErrorCode &errorCode) override; /** Implements CollationRuleParser::Sink. */ virtual void optimize(const UnicodeSet &set, const char *&parserErrorReason, - UErrorCode &errorCode); + UErrorCode &errorCode) override; /** * Adds the mapping and its canonical closure. diff --git a/contrib/libs/icu/i18n/collationdatabuilder.cpp b/contrib/libs/icu/i18n/collationdatabuilder.cpp index 53361b86c7..25050aa777 100644 --- a/contrib/libs/icu/i18n/collationdatabuilder.cpp +++ b/contrib/libs/icu/i18n/collationdatabuilder.cpp @@ -131,18 +131,18 @@ public: int32_t fetchCEs(const UnicodeString &str, int32_t start, int64_t ces[], int32_t cesLength); - virtual void resetToOffset(int32_t newOffset); - virtual int32_t getOffset() const; + virtual void resetToOffset(int32_t newOffset) override; + virtual int32_t getOffset() const override; - virtual UChar32 nextCodePoint(UErrorCode &errorCode); - virtual UChar32 previousCodePoint(UErrorCode &errorCode); + virtual UChar32 nextCodePoint(UErrorCode &errorCode) override; + virtual UChar32 previousCodePoint(UErrorCode &errorCode) override; protected: - virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode); - virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode); + virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode) override; + virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode) override; - virtual uint32_t getDataCE32(UChar32 c) const; - virtual uint32_t getCE32FromBuilderData(uint32_t ce32, UErrorCode &errorCode); + virtual uint32_t getDataCE32(UChar32 c) const override; + virtual uint32_t getCE32FromBuilderData(uint32_t ce32, UErrorCode &errorCode) override; CollationDataBuilder &builder; CollationData builderData; @@ -255,12 +255,18 @@ DataBuilderCollationIterator::getDataCE32(UChar32 c) const { uint32_t DataBuilderCollationIterator::getCE32FromBuilderData(uint32_t ce32, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return 0; } U_ASSERT(Collation::hasCE32Tag(ce32, Collation::BUILDER_DATA_TAG)); if((ce32 & CollationDataBuilder::IS_BUILDER_JAMO_CE32) != 0) { UChar32 jamo = Collation::indexFromCE32(ce32); return utrie2_get32(builder.trie, jamo); } else { ConditionalCE32 *cond = builder.getConditionalCE32ForCE32(ce32); + if (cond == nullptr) { + errorCode = U_INTERNAL_PROGRAM_ERROR; + // TODO: ICU-21531 figure out why this happens. + return 0; + } if(cond->builtCE32 == Collation::NO_CE32) { // Build the context-sensitive mappings into their runtime form and cache the result. cond->builtCE32 = builder.buildContext(cond, errorCode); @@ -521,7 +527,7 @@ CollationDataBuilder::addConditionalCE32(const UnicodeString &context, uint32_t errorCode = U_MEMORY_ALLOCATION_ERROR; return -1; } - conditionalCE32s.addElement(cond, errorCode); + conditionalCE32s.addElementX(cond, errorCode); return index; } @@ -852,7 +858,7 @@ CollationDataBuilder::copyFromBaseCE32(UChar32 c, uint32_t ce32, UBool withConte ce32 = encodeOneCE(Collation::unassignedCEFromCodePoint(c), errorCode); break; default: - UPRV_UNREACHABLE; // require ce32 == base->getFinalCE32(ce32) + UPRV_UNREACHABLE_EXIT; // require ce32 == base->getFinalCE32(ce32) } return ce32; } diff --git a/contrib/libs/icu/i18n/collationdatabuilder.h b/contrib/libs/icu/i18n/collationdatabuilder.h index fee444deee..6ae77772fd 100644 --- a/contrib/libs/icu/i18n/collationdatabuilder.h +++ b/contrib/libs/icu/i18n/collationdatabuilder.h @@ -73,12 +73,12 @@ public: } /** - * @return TRUE if this builder has mappings (e.g., add() has been called) + * @return true if this builder has mappings (e.g., add() has been called) */ UBool hasMappings() const { return modified; } /** - * @return TRUE if c has CEs in this builder + * @return true if c has CEs in this builder */ UBool isAssigned(UChar32 c) const; @@ -118,7 +118,7 @@ public: * @param primary primary weight for 'start' * @param step per-code point primary-weight increment * @param errorCode ICU in/out error code - * @return TRUE if an OFFSET_TAG range was used for start..end + * @return true if an OFFSET_TAG range was used for start..end */ UBool maybeSetPrimaryRange(UChar32 start, UChar32 end, uint32_t primary, int32_t step, @@ -150,7 +150,7 @@ public: void optimize(const UnicodeSet &set, UErrorCode &errorCode); void suppressContractions(const UnicodeSet &set, UErrorCode &errorCode); - void enableFastLatin() { fastLatinEnabled = TRUE; } + void enableFastLatin() { fastLatinEnabled = true; } virtual void build(CollationData &data, UErrorCode &errorCode); /** diff --git a/contrib/libs/icu/i18n/collationfcd.cpp b/contrib/libs/icu/i18n/collationfcd.cpp index 9f73ff3898..0be4150b09 100644 --- a/contrib/libs/icu/i18n/collationfcd.cpp +++ b/contrib/libs/icu/i18n/collationfcd.cpp @@ -1,6 +1,5 @@ -// © 2016 and later: Unicode, Inc. and others. +// Copyright (C) 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html -// // Copyright (C) 1999-2016, International Business Machines // Corporation and others. All Rights Reserved. // @@ -8,7 +7,6 @@ // // machine-generated by: icu/tools/unicode/c/genuca/genuca.cpp - #include "unicode/utypes.h" #if !UCONFIG_NO_COLLATION @@ -22,27 +20,27 @@ const uint8_t CollationFCD::lcccIndex[2048]={ 0,0,0,0,0,0,0,0,1,1,2,3,0,0,0,0, 0,0,0,0,4,0,0,0,0,0,0,0,5,6,7,0, 8,0,9,0xa,0,0,0xb,0xc,0xd,0xe,0xf,0,0,0,0,0x10, -0x11,0x12,0x13,0,0,0,0x14,0x15,0,0x16,0x17,0,0,0x16,0x18,0x19, -0,0x16,0x18,0,0,0x16,0x18,0,0,0x16,0x18,0,0,0,0x18,0, -0,0,0x1a,0,0,0x16,0x18,0,0,0x1b,0x18,0,0,0,0x1c,0, -0,0x1d,0x1e,0,0,0x1d,0x1e,0,0x1f,0x20,0,0x21,0x22,0,0x23,0, -0,0x24,0,0,0x18,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0x25,0,0,0,0,0, +0x11,0x12,0x13,0,0x14,0,0x15,0x16,0,0x17,0x18,0,0,0x17,0x19,0x1a, +0,0x17,0x19,0,0,0x17,0x19,0,0,0x17,0x19,0,0,0,0x19,0, +0,0x17,0x1b,0,0,0x17,0x19,0,0,0x1c,0x19,0,0,0,0x1d,0, +0,0x1e,0x1f,0,0,0x1e,0x1f,0,0x20,0x21,0,0x22,0x23,0,0x24,0, +0,0x25,0,0,0x19,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0x26,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0x26,0x26,0,0,0,0,0x27,0, -0,0,0,0,0,0x28,0,0,0,0x13,0,0,0,0,0,0, -0x29,0,0,0x2a,0,0x2b,0x2c,0,0,0x26,0x2d,0x2e,0,0x2f,0,0x30, -0,0x31,0,0,0,0,0x32,0x33,0,0,0,0,0,0,1,0x34, +0,0,0,0,0,0,0,0,0x27,0x28,0,0,0,0,0x29,0, +0,0,0,0,0,0x2a,0,0,0,0x13,0,0,0,0,0,0, +0x2b,0,0,0x2c,0,0x2d,0x2e,0,0,0x28,0x2f,0x30,0,0x31,0,0x32, +0,0x33,0,0,0,0,0x34,0x35,0,0,0,0,0,0,1,1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0x35,0x36,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0x36,0x37,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0x37,0,0,0,0x38,0,0,0,1, +0,0,0,0,0,0,0,0x38,0,0,0,0x39,0,0,0,1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0x39,0,0,0x3a,0,0,0,0,0,0,0,0,0,0,0, +0,0x3a,0,0,0x3b,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, @@ -101,9 +99,9 @@ const uint8_t CollationFCD::lcccIndex[2048]={ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0x3b,0x3c,0,0,0x3d,0,0,0,0,0,0,0,0, -0x23,0x3e,0,0,0,0,0x2d,0x3f,0,0x40,0x41,0,0,0x41,0x2c,0, -0,0,0,0,0,0x42,0x43,0x44,0,0,0,0,0,0,0,0x18, +0,0,0,0x3c,0x3d,0,0,0x3e,0,0,0,0,0,0,0,0, +0x24,0x3f,0,0,0,0,0x2f,0x40,0,0x41,0x42,0,0,0x42,0x43,0, +0,0,0,0,0,0x44,0x45,0x46,0,0,0,0,0,0,0,0x19, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, @@ -126,7 +124,7 @@ const uint8_t CollationFCD::lcccIndex[2048]={ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0x45,0x46,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x47,0x48,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, @@ -143,17 +141,17 @@ const uint8_t CollationFCD::lcccIndex[2048]={ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0x19,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x1a,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; -const uint32_t CollationFCD::lcccBits[71]={ +const uint32_t CollationFCD::lcccBits[73]={ 0,0xffffffff,0xffff7fff,0xffff,0xf8,0xfffe0000,0xbfffffff,0xb6,0x7ff0000,0xfffff800,0x10000,0x9fc00000,0x3d9f,0x20000,0xffff0000,0x7ff, -0x200ff800,0xfbc00000,0x3eef,0xe000000,0xfff80000,0xfffffffb,0x10000000,0x1e2000,0x2000,0x40000000,0x602000,0x18000000,0x400,0x7000000,0xf00,0x3000000, -0x2a00000,0x3c3e0000,0xdf,0x40,0x6800000,0xe0000000,0x100000,0x20040000,0x200,0x1800000,0x9fe00001,0xbfff0000,1,0x10,0xff800,0xc00, -0xc0040,0x800000,0xfff70000,0x31021fd,0xfbffffff,0x1fff0000,0x1ffe2,0x38000,0x80000000,0xfc00,0x6000000,0x3ff08000,0xc0000000,0x30000,0x1000,0x3ffff, -0x3800,0x80000,0xc19d0000,2,0x400000,0xc0000fd,0x5108000 +0x200ff800,0xfbc00000,0x3eef,0xe000000,0xff000000,0xfffffc00,0xfffffffb,0x10000000,0x1e2000,0x2000,0x40000000,0x602000,0x18000000,0x400,0x7000000,0xf00, +0x3000000,0x2a00000,0x3c3e0000,0xdf,0x40,0x6800000,0xe0000000,0x300000,0x100000,0x20040000,0x200,0x1800000,0x9fe00001,0xbfff0000,0x7fff,0x10, +0xff800,0xc00,0xc0040,0x800000,0xfff70000,0x31021fd,0x1fff0000,0x1ffe2,0x38000,0x80000000,0xfc00,0x6000000,0x3ff08000,0xc0000000,0x30000,0x1000, +0x3ffff,0x3800,0x80000,1,0xc19d0000,2,0x400000,0xc0000fd,0x5108000 }; const uint8_t CollationFCD::tcccIndex[2048]={ @@ -161,27 +159,27 @@ const uint8_t CollationFCD::tcccIndex[2048]={ 0xb,0xc,0,0,0,0,0,0,1,1,0xd,0xe,0xf,0x10,0x11,0, 0x12,0x13,0x14,0x15,0x16,0,0x17,0x18,0,0,0,0,0x19,0x1a,0x1b,0, 0x1c,0x1d,0x1e,0x1f,0,0,0x20,0x21,0x22,0x23,0x24,0,0,0,0,0x25, -0x26,0x27,0x28,0,0,0,0x29,0x2a,0,0x2b,0x2c,0,0,0x2d,0x2e,0x2f, -0,0x30,0x31,0,0,0x2d,0x32,0,0,0x2d,0x33,0,0,0,0x32,0, -0,0,0x34,0,0,0x2d,0x32,0,0,0x35,0x32,0,0,0,0x36,0, -0,0x37,0x38,0,0,0x37,0x38,0,0x39,0x3a,0,0x3b,0x3c,0,0x3d,0, -0,0x3e,0,0,0x32,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0x3f,0,0,0,0,0, +0x26,0x27,0x28,0,0x29,0,0x2a,0x2b,0,0x2c,0x2d,0,0,0x2e,0x2f,0x30, +0,0x31,0x32,0,0,0x2e,0x33,0,0,0x2e,0x34,0,0,0,0x33,0, +0,0x2e,0x35,0,0,0x2e,0x33,0,0,0x36,0x33,0,0,0,0x37,0, +0,0x38,0x39,0,0,0x38,0x39,0,0x3a,0x3b,0,0x3c,0x3d,0,0x3e,0, +0,0x3f,0,0,0x33,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0x40,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0x40,0x40,0,0,0,0,0x41,0, -0,0,0,0,0,0x42,0,0,0,0x28,0,0,0,0,0,0, -0x43,0,0,0x44,0,0x45,0x46,0,0,0x40,0x47,0x48,0,0x49,0,0x4a, -0,0x4b,0,0,0,0,0x4c,0x4d,0,0,0,0,0,0,1,0x4e, -1,1,1,1,0x4f,1,1,0x50,0x51,1,0x52,0x53,1,0x54,0x55,0x56, -0,0,0,0,0,0,0x57,0x58,0,0x59,0,0,0x5a,0x5b,0x5c,0, -0x5d,0x5e,0x5f,0x60,0x61,0x62,0,0x63,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x41,0x42,0,0,0,0,0x43,0, +0,0,0,0,0,0x44,0,0,0,0x28,0,0,0,0,0,0, +0x45,0,0,0x46,0,0x47,0x48,0,0,0x42,0x49,0x4a,0,0x4b,0,0x4c, +0,0x4d,0,0,0,0,0x4e,0x4f,0,0,0,0,0,0,1,1, +1,1,1,1,0x50,1,1,0x51,0x52,1,0x53,0x54,1,0x55,0x56,0x57, +0,0,0,0,0,0,0x58,0x59,0,0x5a,0,0,0x5b,0x5c,0x5d,0, +0x5e,0x5f,0x60,0x61,0x62,0x63,0,0x64,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0x2d,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0x64,0,0,0,0x65,0,0,0,1, +0,0,0,0,0,0,0x2e,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0x65,0,0,0,0x66,0,0,0,1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0x66,0x67,0x68,0x69,0x67,0x68,0x6a,0,0,0,0,0,0,0,0, +0,0x67,0x68,0x69,0x6a,0x68,0x69,0x6b,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, @@ -240,9 +238,9 @@ const uint8_t CollationFCD::tcccIndex[2048]={ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0x6b,0x6c,0,0,0x6d,0,0,0,0,0,0,0,0, -0x3d,0x6e,0,0,0,0,0x47,0x6f,0,0x70,0x71,0,0,0x71,0x46,0, -0,0,0,0,0,0x72,0x73,0x74,0,0,0,0,0,0,0,0x32, +0,0,0,0x6c,0x6d,0,0,0x6e,0,0,0,0,0,0,0,0, +0x3e,0x6f,0,0,0,0,0x49,0x70,0,0x71,0x72,0,0,0x72,0x73,0, +0,0,0,0,0,0x74,0x75,0x76,0,0,0,0,0,0,0,0x33, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, @@ -265,7 +263,7 @@ const uint8_t CollationFCD::tcccIndex[2048]={ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0x75,0x76,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x77,0x78,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, @@ -282,20 +280,20 @@ const uint8_t CollationFCD::tcccIndex[2048]={ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0x3f,0x77,0x78,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x40,0x79,0x7a,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0xe,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; -const uint32_t CollationFCD::tcccBits[121]={ +const uint32_t CollationFCD::tcccBits[123]={ 0,0xffffffff,0x3e7effbf,0xbe7effbf,0xfffcffff,0x7ef1ff3f,0xfff3f1f8,0x7fffff3f,0x18003,0xdfffe000,0xff31ffcf,0xcfffffff,0xfffc0,0xffff7fff,0xffff,0x1d760, 0x1fc00,0x187c00,0x200708b,0x2000000,0x708b0000,0xc00000,0xf8,0xfccf0006,0x33ffcfc,0xfffe0000,0xbfffffff,0xb6,0x7ff0000,0x7c,0xfffff800,0x10000, -0x9fc80005,0x3d9f,0x20000,0xffff0000,0x7ff,0x200ff800,0xfbc00000,0x3eef,0xe000000,0xfff80000,0xfffffffb,0x10120200,0xff1e2000,0x10000000,0xb0002000,0x40000000, -0x10480000,0x4e002000,0x2000,0x30002000,0x602100,0x18000000,0x24000400,0x7000000,0xf00,0x3000000,0x2a00000,0x3d7e0000,0xdf,0x40,0x6800000,0xe0000000, -0x100000,0x20040000,0x200,0x1800000,0x9fe00001,0xbfff0000,1,0x10,0xff800,0xc00,0xc0040,0x800000,0xfff70000,0x31021fd,0xfbffffff,0xbffffff, -0x3ffffff,0x3f3fffff,0xaaff3f3f,0x3fffffff,0x1fdfffff,0xefcfffde,0x1fdc7fff,0x1fff0000,0x1ffe2,0x800,0xc000000,0x4000,0xe000,0x1210,0x50,0x292, -0x333e005,0x333,0xf000,0x3c0f,0x38000,0x80000000,0xfc00,0x55555000,0x36db02a5,0x46100000,0x47900000,0x3ff08000,0xc0000000,0x30000,0x1000,0x3ffff, -0x3800,0x80000,0xc19d0000,2,0x400000,0xc0000fd,0x5108000,0x5f7ffc00,0x7fdb +0x9fc80005,0x3d9f,0x20000,0xffff0000,0x7ff,0x200ff800,0xfbc00000,0x3eef,0xe000000,0xff000000,0xfffffc00,0xfffffffb,0x10120200,0xff1e2000,0x10000000,0xb0002000, +0x40000000,0x10480000,0x4e002000,0x2000,0x30002000,0x602100,0x18000000,0x24000400,0x7000000,0xf00,0x3000000,0x2a00000,0x3d7e0000,0xdf,0x40,0x6800000, +0xe0000000,0x300000,0x100000,0x20040000,0x200,0x1800000,0x9fe00001,0xbfff0000,0x7fff,0x10,0xff800,0xc00,0xc0040,0x800000,0xfff70000,0x31021fd, +0xbffffff,0x3ffffff,0x3f3fffff,0xaaff3f3f,0x3fffffff,0x1fdfffff,0xefcfffde,0x1fdc7fff,0x1fff0000,0x1ffe2,0x800,0xc000000,0x4000,0xe000,0x1210,0x50, +0x292,0x333e005,0x333,0xf000,0x3c0f,0x38000,0x80000000,0xfc00,0x55555000,0x36db02a5,0x46100000,0x47900000,0x3ff08000,0xc0000000,0x30000,0x1000, +0x3ffff,0x3800,0x80000,1,0xc19d0000,2,0x400000,0xc0000fd,0x5108000,0x5f7ffc00,0x7fdb }; U_NAMESPACE_END diff --git a/contrib/libs/icu/i18n/collationfcd.h b/contrib/libs/icu/i18n/collationfcd.h index ec7167d76b..3a5738efb2 100644 --- a/contrib/libs/icu/i18n/collationfcd.h +++ b/contrib/libs/icu/i18n/collationfcd.h @@ -84,7 +84,7 @@ public: // Handles all of Unicode 0..10FFFF. // c can be negative, e.g., U_SENTINEL. // U+0300 is the first character with lccc!=0. - if(c < 0x300) { return FALSE; } + if(c < 0x300) { return false; } if(c > 0xffff) { c = U16_LEAD(c); } int32_t i; return @@ -101,7 +101,7 @@ public: * This is a fast and imprecise test. * * @param c a code point - * @return TRUE if c is U+0F73, U+0F75 or U+0F81 or one of several other Tibetan characters + * @return true if c is U+0F73, U+0F75 or U+0F81 or one of several other Tibetan characters */ static inline UBool maybeTibetanCompositeVowel(UChar32 c) { return (c & 0x1fff01) == 0xf01; @@ -116,7 +116,7 @@ public: * They have distinct lccc/tccc combinations: 129/130 or 129/132. * * @param fcd16 the FCD value (lccc/tccc combination) of a code point - * @return TRUE if fcd16 is from U+0F73, U+0F75 or U+0F81 + * @return true if fcd16 is from U+0F73, U+0F75 or U+0F81 */ static inline UBool isFCD16OfTibetanCompositeVowel(uint16_t fcd16) { return fcd16 == 0x8182 || fcd16 == 0x8184; diff --git a/contrib/libs/icu/i18n/collationiterator.cpp b/contrib/libs/icu/i18n/collationiterator.cpp index 18ccf014f0..6bfdfbe7c7 100644 --- a/contrib/libs/icu/i18n/collationiterator.cpp +++ b/contrib/libs/icu/i18n/collationiterator.cpp @@ -168,7 +168,7 @@ CollationIterator::~CollationIterator() { delete skipped; } -UBool +bool CollationIterator::operator==(const CollationIterator &other) const { // Subclasses: Call this method and then add more specific checks. // Compare the iterator state but not the collation data (trie & data fields): @@ -180,12 +180,12 @@ CollationIterator::operator==(const CollationIterator &other) const { cesIndex == other.cesIndex && numCpFwd == other.numCpFwd && isNumeric == other.isNumeric)) { - return FALSE; + return false; } for(int32_t i = 0; i < ceBuffer.length; ++i) { - if(ceBuffer.get(i) != other.ceBuffer.get(i)) { return FALSE; } + if(ceBuffer.get(i) != other.ceBuffer.get(i)) { return false; } } - return TRUE; + return true; } void diff --git a/contrib/libs/icu/i18n/collationiterator.h b/contrib/libs/icu/i18n/collationiterator.h index 12e05b4482..73cb93b934 100644 --- a/contrib/libs/icu/i18n/collationiterator.h +++ b/contrib/libs/icu/i18n/collationiterator.h @@ -76,9 +76,9 @@ private: // (Rather than buffer.getCapacity().) if(length < INITIAL_CAPACITY || ensureAppendCapacity(1, errorCode)) { ++length; - return TRUE; + return true; } else { - return FALSE; + return false; } } @@ -109,8 +109,8 @@ public: virtual ~CollationIterator(); - virtual UBool operator==(const CollationIterator &other) const; - inline UBool operator!=(const CollationIterator &other) const { + virtual bool operator==(const CollationIterator &other) const; + inline bool operator!=(const CollationIterator &other) const { return !operator==(other); } @@ -251,9 +251,9 @@ protected: virtual UBool foundNULTerminator(); /** - * @return FALSE if surrogate code points U+D800..U+DFFF + * @return false if surrogate code points U+D800..U+DFFF * map to their own implicit primary weights (for UTF-16), - * or TRUE if they map to CE(U+FFFD) (for UTF-8) + * or true if they map to CE(U+FFFD) (for UTF-8) */ virtual UBool forbidSurrogateCodePoints() const; diff --git a/contrib/libs/icu/i18n/collationkeys.h b/contrib/libs/icu/i18n/collationkeys.h index 60d9e50c0d..5b41d14c0b 100644 --- a/contrib/libs/icu/i18n/collationkeys.h +++ b/contrib/libs/icu/i18n/collationkeys.h @@ -38,7 +38,7 @@ public: void IgnoreBytes(int32_t numIgnore) { ignore_ = numIgnore; } - virtual void Append(const char *bytes, int32_t n); + virtual void Append(const char *bytes, int32_t n) override; void Append(uint32_t b) { if (ignore_ > 0) { --ignore_; @@ -52,7 +52,7 @@ public: virtual char *GetAppendBuffer(int32_t min_capacity, int32_t desired_capacity_hint, char *scratch, int32_t scratch_capacity, - int32_t *result_capacity); + int32_t *result_capacity) override; int32_t NumberOfBytesAppended() const { return appended_; } /** @@ -65,7 +65,7 @@ public: } UBool Overflowed() const { return appended_ > capacity_; } - /** @return FALSE if memory allocation failed */ + /** @return false if memory allocation failed */ UBool IsOk() const { return buffer_ != NULL; } protected: @@ -94,8 +94,8 @@ public: virtual ~LevelCallback(); /** * @param level The next level about to be written to the ByteSink. - * @return TRUE if the level is to be written - * (the base class implementation always returns TRUE) + * @return true if the level is to be written + * (the base class implementation always returns true) */ virtual UBool needToWrite(Collation::Level level); }; @@ -103,7 +103,7 @@ public: /** * Writes the sort key bytes for minLevel up to the iterator data's strength. * Optionally writes the case level. - * Stops writing levels when callback.needToWrite(level) returns FALSE. + * Stops writing levels when callback.needToWrite(level) returns false. * Separates levels with the LEVEL_SEPARATOR_BYTE * but does not write a TERMINATOR_BYTE. */ diff --git a/contrib/libs/icu/i18n/collationsettings.cpp b/contrib/libs/icu/i18n/collationsettings.cpp index 534e20df3e..9eeab48331 100644 --- a/contrib/libs/icu/i18n/collationsettings.cpp +++ b/contrib/libs/icu/i18n/collationsettings.cpp @@ -48,15 +48,15 @@ CollationSettings::~CollationSettings() { } } -UBool +bool CollationSettings::operator==(const CollationSettings &other) const { - if(options != other.options) { return FALSE; } - if((options & ALTERNATE_MASK) != 0 && variableTop != other.variableTop) { return FALSE; } - if(reorderCodesLength != other.reorderCodesLength) { return FALSE; } + if(options != other.options) { return false; } + if((options & ALTERNATE_MASK) != 0 && variableTop != other.variableTop) { return false; } + if(reorderCodesLength != other.reorderCodesLength) { return false; } for(int32_t i = 0; i < reorderCodesLength; ++i) { - if(reorderCodes[i] != other.reorderCodes[i]) { return FALSE; } + if(reorderCodes[i] != other.reorderCodes[i]) { return false; } } - return TRUE; + return true; } int32_t diff --git a/contrib/libs/icu/i18n/collationsettings.h b/contrib/libs/icu/i18n/collationsettings.h index 83e775d443..3da8f6214f 100644 --- a/contrib/libs/icu/i18n/collationsettings.h +++ b/contrib/libs/icu/i18n/collationsettings.h @@ -115,9 +115,9 @@ struct U_I18N_API CollationSettings : public SharedObject { CollationSettings(const CollationSettings &other); virtual ~CollationSettings(); - UBool operator==(const CollationSettings &other) const; + bool operator==(const CollationSettings &other) const; - inline UBool operator!=(const CollationSettings &other) const { + inline bool operator!=(const CollationSettings &other) const { return !operator==(other); } diff --git a/contrib/libs/icu/i18n/collationtailoring.h b/contrib/libs/icu/i18n/collationtailoring.h index e1bc34c7d7..5fc2bac2d3 100644 --- a/contrib/libs/icu/i18n/collationtailoring.h +++ b/contrib/libs/icu/i18n/collationtailoring.h @@ -50,7 +50,7 @@ struct U_I18N_API CollationTailoring : public SharedObject { virtual ~CollationTailoring(); /** - * Returns TRUE if the constructor could not initialize properly. + * Returns true if the constructor could not initialize properly. */ UBool isBogus() { return settings == NULL; } diff --git a/contrib/libs/icu/i18n/collationweights.h b/contrib/libs/icu/i18n/collationweights.h index b415882184..0d20b927b2 100644 --- a/contrib/libs/icu/i18n/collationweights.h +++ b/contrib/libs/icu/i18n/collationweights.h @@ -62,7 +62,7 @@ public: * weights less than this one. * @param n The number of collation element weights w necessary such that * lowerLimit<w<upperLimit in lexical order. - * @return TRUE if it is possible to fit n elements between the limits + * @return true if it is possible to fit n elements between the limits */ UBool allocWeights(uint32_t lowerLimit, uint32_t upperLimit, int32_t n); diff --git a/contrib/libs/icu/i18n/coptccal.h b/contrib/libs/icu/i18n/coptccal.h index e9e812dbc7..5c51af04ca 100644 --- a/contrib/libs/icu/i18n/coptccal.h +++ b/contrib/libs/icu/i18n/coptccal.h @@ -145,14 +145,14 @@ public: * @return return a polymorphic copy of this calendar. * @internal */ - virtual CopticCalendar* clone() const; + virtual CopticCalendar* clone() const override; /** * return the calendar type, "coptic" * @return calendar type * @internal */ - const char * getType() const; + const char * getType() const override; protected: //------------------------------------------------------------------------- @@ -163,32 +163,32 @@ protected: * Return the extended year defined by the current fields. * @internal */ - virtual int32_t handleGetExtendedYear(); + virtual int32_t handleGetExtendedYear() override; /** * Compute fields from the JD * @internal */ - virtual void handleComputeFields(int32_t julianDay, UErrorCode &status); + virtual void handleComputeFields(int32_t julianDay, UErrorCode &status) override; /** * Returns the date of the start of the default century * @return start of century - in milliseconds since epoch, 1970 * @internal */ - virtual UDate defaultCenturyStart() const; + virtual UDate defaultCenturyStart() const override; /** * Returns the year in which the default century begins * @internal */ - virtual int32_t defaultCenturyStartYear() const; + virtual int32_t defaultCenturyStartYear() const override; /** * Return the date offset from Julian * @internal */ - virtual int32_t getJDEpochOffset() const; + virtual int32_t getJDEpochOffset() const override; public: @@ -202,7 +202,7 @@ public: * same class ID. Objects of other classes have different class IDs. * @internal */ - virtual UClassID getDynamicClassID(void) const; + virtual UClassID getDynamicClassID(void) const override; /** * Return the class ID for this class. This is useful only for comparing to a return diff --git a/contrib/libs/icu/i18n/cpdtrans.cpp b/contrib/libs/icu/i18n/cpdtrans.cpp index 82ee54a77b..dc0217ba61 100644 --- a/contrib/libs/icu/i18n/cpdtrans.cpp +++ b/contrib/libs/icu/i18n/cpdtrans.cpp @@ -282,6 +282,7 @@ void CompoundTransliterator::freeTransliterators(void) { CompoundTransliterator& CompoundTransliterator::operator=( const CompoundTransliterator& t) { + if (this == &t) { return *this; } // self-assignment: no-op Transliterator::operator=(t); int32_t i = 0; UBool failed = FALSE; diff --git a/contrib/libs/icu/i18n/cpdtrans.h b/contrib/libs/icu/i18n/cpdtrans.h index a2c7abbd69..af60cb827e 100644 --- a/contrib/libs/icu/i18n/cpdtrans.h +++ b/contrib/libs/icu/i18n/cpdtrans.h @@ -98,7 +98,7 @@ public: /** * Transliterator API. */ - virtual CompoundTransliterator* clone() const; + virtual CompoundTransliterator* clone() const override; /** * Returns the number of transliterators in this chain. @@ -131,39 +131,39 @@ public: * to recreate this transliterator. * @param result the string to receive the rules. Previous * contents will be deleted. - * @param escapeUnprintable if TRUE then convert unprintable + * @param escapeUnprintable if true then convert unprintable * character to their hex escape representations, \uxxxx or * \Uxxxxxxxx. Unprintable characters are those other than * U+000A, U+0020..U+007E. */ virtual UnicodeString& toRules(UnicodeString& result, - UBool escapeUnprintable) const; + UBool escapeUnprintable) const override; protected: /** * Implement Transliterator framework */ - virtual void handleGetSourceSet(UnicodeSet& result) const; + virtual void handleGetSourceSet(UnicodeSet& result) const override; public: /** * Override Transliterator framework */ - virtual UnicodeSet& getTargetSet(UnicodeSet& result) const; + virtual UnicodeSet& getTargetSet(UnicodeSet& result) const override; protected: /** * Implements {@link Transliterator#handleTransliterate}. */ virtual void handleTransliterate(Replaceable& text, UTransPosition& idx, - UBool incremental) const; + UBool incremental) const override; public: /** * ICU "poor man's RTTI", returns a UClassID for the actual class. */ - virtual UClassID getDynamicClassID() const; + virtual UClassID getDynamicClassID() const override; /** * ICU "poor man's RTTI", returns a UClassID for this class. diff --git a/contrib/libs/icu/i18n/csdetect.cpp b/contrib/libs/icu/i18n/csdetect.cpp index babb308430..84f0776542 100644 --- a/contrib/libs/icu/i18n/csdetect.cpp +++ b/contrib/libs/icu/i18n/csdetect.cpp @@ -385,7 +385,7 @@ enumNext(UEnumeration *en, int32_t *resultLength, UErrorCode * /*status*/) { // ucsdet_getDetectableCharsets UBool *enabledArray = ((Context *)en->context)->enabledRecognizers; if (enabledArray != NULL) { - // custome set + // custom set while (currName == NULL && ((Context *)en->context)->currIndex < fCSRecognizers_size) { if (enabledArray[((Context *)en->context)->currIndex]) { currName = fCSRecognizers[((Context *)en->context)->currIndex]->recognizer->getName(); diff --git a/contrib/libs/icu/i18n/csr2022.cpp b/contrib/libs/icu/i18n/csr2022.cpp index ff26e5e911..e064c426a2 100644 --- a/contrib/libs/icu/i18n/csr2022.cpp +++ b/contrib/libs/icu/i18n/csr2022.cpp @@ -83,7 +83,7 @@ scanInput: } // - // Initial quality is based on relative proportion of recongized vs. + // Initial quality is based on relative proportion of recognized vs. // unrecognized escape sequences. // All good: quality = 100; // half or less good: quality = 0; diff --git a/contrib/libs/icu/i18n/csr2022.h b/contrib/libs/icu/i18n/csr2022.h index cde9019b46..4418728f0e 100644 --- a/contrib/libs/icu/i18n/csr2022.h +++ b/contrib/libs/icu/i18n/csr2022.h @@ -21,7 +21,7 @@ U_NAMESPACE_BEGIN class CharsetMatch; /** - * class CharsetRecog_2022 part of the ICU charset detection imlementation. + * class CharsetRecog_2022 part of the ICU charset detection implementation. * This is a superclass for the individual detectors for * each of the detectable members of the ISO 2022 family * of encodings. @@ -62,9 +62,9 @@ class CharsetRecog_2022JP :public CharsetRecog_2022 public: virtual ~CharsetRecog_2022JP(); - const char *getName() const; + const char *getName() const override; - UBool match(InputText *textIn, CharsetMatch *results) const; + UBool match(InputText *textIn, CharsetMatch *results) const override; }; #if !UCONFIG_ONLY_HTML_CONVERSION @@ -72,9 +72,9 @@ class CharsetRecog_2022KR :public CharsetRecog_2022 { public: virtual ~CharsetRecog_2022KR(); - const char *getName() const; + const char *getName() const override; - UBool match(InputText *textIn, CharsetMatch *results) const; + UBool match(InputText *textIn, CharsetMatch *results) const override; }; @@ -83,9 +83,9 @@ class CharsetRecog_2022CN :public CharsetRecog_2022 public: virtual ~CharsetRecog_2022CN(); - const char* getName() const; + const char* getName() const override; - UBool match(InputText *textIn, CharsetMatch *results) const; + UBool match(InputText *textIn, CharsetMatch *results) const override; }; #endif diff --git a/contrib/libs/icu/i18n/csrecog.h b/contrib/libs/icu/i18n/csrecog.h index 713fd4e86b..944a5007fe 100644 --- a/contrib/libs/icu/i18n/csrecog.h +++ b/contrib/libs/icu/i18n/csrecog.h @@ -43,8 +43,8 @@ class CharsetRecognizer : public UMemory * Try the given input text against this Charset, and fill in the results object * with the quality of the match plus other information related to the match. * - * Return TRUE if the the input bytes are a potential match, and - * FALSE if the input data is not compatible with, or illegal in this charset. + * Return true if the the input bytes are a potential match, and + * false if the input data is not compatible with, or illegal in this charset. */ virtual UBool match(InputText *textIn, CharsetMatch *results) const = 0; diff --git a/contrib/libs/icu/i18n/csrmbcs.cpp b/contrib/libs/icu/i18n/csrmbcs.cpp index 5579ba8ec3..4c5bdfa560 100644 --- a/contrib/libs/icu/i18n/csrmbcs.cpp +++ b/contrib/libs/icu/i18n/csrmbcs.cpp @@ -23,7 +23,7 @@ U_NAMESPACE_BEGIN static const uint16_t commonChars_sjis [] = { // TODO: This set of data comes from the character frequency- -// of-occurence analysis tool. The data needs to be moved +// of-occurrence analysis tool. The data needs to be moved // into a resource and loaded from there. 0x8140, 0x8141, 0x8142, 0x8145, 0x815b, 0x8169, 0x816a, 0x8175, 0x8176, 0x82a0, 0x82a2, 0x82a4, 0x82a9, 0x82aa, 0x82ab, 0x82ad, 0x82af, 0x82b1, 0x82b3, 0x82b5, @@ -34,7 +34,7 @@ static const uint16_t commonChars_sjis [] = { static const uint16_t commonChars_euc_jp[] = { // TODO: This set of data comes from the character frequency- -// of-occurence analysis tool. The data needs to be moved +// of-occurrence analysis tool. The data needs to be moved // into a resource and loaded from there. 0xa1a1, 0xa1a2, 0xa1a3, 0xa1a6, 0xa1bc, 0xa1ca, 0xa1cb, 0xa1d6, 0xa1d7, 0xa4a2, 0xa4a4, 0xa4a6, 0xa4a8, 0xa4aa, 0xa4ab, 0xa4ac, 0xa4ad, 0xa4af, 0xa4b1, 0xa4b3, @@ -49,7 +49,7 @@ static const uint16_t commonChars_euc_jp[] = { static const uint16_t commonChars_euc_kr[] = { // TODO: This set of data comes from the character frequency- -// of-occurence analysis tool. The data needs to be moved +// of-occurrence analysis tool. The data needs to be moved // into a resource and loaded from there. 0xb0a1, 0xb0b3, 0xb0c5, 0xb0cd, 0xb0d4, 0xb0e6, 0xb0ed, 0xb0f8, 0xb0fa, 0xb0fc, 0xb1b8, 0xb1b9, 0xb1c7, 0xb1d7, 0xb1e2, 0xb3aa, 0xb3bb, 0xb4c2, 0xb4cf, 0xb4d9, @@ -64,7 +64,7 @@ static const uint16_t commonChars_euc_kr[] = { static const uint16_t commonChars_big5[] = { // TODO: This set of data comes from the character frequency- -// of-occurence analysis tool. The data needs to be moved +// of-occurrence analysis tool. The data needs to be moved // into a resource and loaded from there. 0xa140, 0xa141, 0xa142, 0xa143, 0xa147, 0xa149, 0xa175, 0xa176, 0xa440, 0xa446, 0xa447, 0xa448, 0xa451, 0xa454, 0xa457, 0xa464, 0xa46a, 0xa46c, 0xa477, 0xa4a3, @@ -79,7 +79,7 @@ static const uint16_t commonChars_big5[] = { static const uint16_t commonChars_gb_18030[] = { // TODO: This set of data comes from the character frequency- -// of-occurence analysis tool. The data needs to be moved +// of-occurrence analysis tool. The data needs to be moved // into a resource and loaded from there. 0xa1a1, 0xa1a2, 0xa1a3, 0xa1a4, 0xa1b0, 0xa1b1, 0xa1f1, 0xa1f3, 0xa3a1, 0xa3ac, 0xa3ba, 0xb1a8, 0xb1b8, 0xb1be, 0xb2bb, 0xb3c9, 0xb3f6, 0xb4f3, 0xb5bd, 0xb5c4, @@ -186,7 +186,7 @@ int32_t CharsetRecog_mbcs::match_mbcs(InputText *det, const uint16_t commonChars if (doubleByteCharCount == 0 && totalCharCount < 10) { // There weren't any multibyte sequences, and there was a low density of non-ASCII single bytes. // We don't have enough data to have any confidence. - // Statistical analysis of single byte non-ASCII charcters would probably help here. + // Statistical analysis of single byte non-ASCII characters would probably help here. confidence = 0; } else { @@ -209,7 +209,7 @@ int32_t CharsetRecog_mbcs::match_mbcs(InputText *det, const uint16_t commonChars } if (commonChars == 0) { - // We have no statistics on frequently occuring characters. + // We have no statistics on frequently occurring characters. // Assess confidence purely on having a reasonable number of // multi-byte characters (the more the better) confidence = 30 + doubleByteCharCount - 20*badCharCount; @@ -219,7 +219,7 @@ int32_t CharsetRecog_mbcs::match_mbcs(InputText *det, const uint16_t commonChars } } else { // - // Frequency of occurence statistics exist. + // Frequency of occurrence statistics exist. // double maxVal = log((double)doubleByteCharCount / 4); /*(float)?*/ diff --git a/contrib/libs/icu/i18n/csrmbcs.h b/contrib/libs/icu/i18n/csrmbcs.h index 8ccf1d56a9..ff7fc4e2a7 100644 --- a/contrib/libs/icu/i18n/csrmbcs.h +++ b/contrib/libs/icu/i18n/csrmbcs.h @@ -71,9 +71,9 @@ public: * @return the charset name. */ - const char *getName() const = 0; - const char *getLanguage() const = 0; - UBool match(InputText* input, CharsetMatch *results) const = 0; + const char *getName() const override = 0; + const char *getLanguage() const override = 0; + UBool match(InputText* input, CharsetMatch *results) const override = 0; /** * Get the next character (however many bytes it is) from the input data @@ -100,12 +100,12 @@ class CharsetRecog_sjis : public CharsetRecog_mbcs { public: virtual ~CharsetRecog_sjis(); - UBool nextChar(IteratedChar *it, InputText *det) const; + UBool nextChar(IteratedChar *it, InputText *det) const override; - UBool match(InputText* input, CharsetMatch *results) const; + UBool match(InputText* input, CharsetMatch *results) const override; - const char *getName() const; - const char *getLanguage() const; + const char *getName() const override; + const char *getLanguage() const override; }; @@ -121,17 +121,17 @@ class CharsetRecog_euc : public CharsetRecog_mbcs public: virtual ~CharsetRecog_euc(); - const char *getName() const = 0; - const char *getLanguage() const = 0; + const char *getName() const override = 0; + const char *getLanguage() const override = 0; - UBool match(InputText* input, CharsetMatch *results) const = 0; + UBool match(InputText* input, CharsetMatch *results) const override = 0; /* * (non-Javadoc) * Get the next character value for EUC based encodings. * Character "value" is simply the raw bytes that make up the character * packed into an int. */ - UBool nextChar(IteratedChar *it, InputText *det) const; + UBool nextChar(IteratedChar *it, InputText *det) const override; }; /** @@ -143,10 +143,10 @@ class CharsetRecog_euc_jp : public CharsetRecog_euc public: virtual ~CharsetRecog_euc_jp(); - const char *getName() const; - const char *getLanguage() const; + const char *getName() const override; + const char *getLanguage() const override; - UBool match(InputText* input, CharsetMatch *results) const; + UBool match(InputText* input, CharsetMatch *results) const override; }; /** @@ -158,10 +158,10 @@ class CharsetRecog_euc_kr : public CharsetRecog_euc public: virtual ~CharsetRecog_euc_kr(); - const char *getName() const; - const char *getLanguage() const; + const char *getName() const override; + const char *getLanguage() const override; - UBool match(InputText* input, CharsetMatch *results) const; + UBool match(InputText* input, CharsetMatch *results) const override; }; /** @@ -174,12 +174,12 @@ class CharsetRecog_big5 : public CharsetRecog_mbcs public: virtual ~CharsetRecog_big5(); - UBool nextChar(IteratedChar* it, InputText* det) const; + UBool nextChar(IteratedChar* it, InputText* det) const override; - const char *getName() const; - const char *getLanguage() const; + const char *getName() const override; + const char *getLanguage() const override; - UBool match(InputText* input, CharsetMatch *results) const; + UBool match(InputText* input, CharsetMatch *results) const override; }; @@ -193,12 +193,12 @@ class CharsetRecog_gb_18030 : public CharsetRecog_mbcs public: virtual ~CharsetRecog_gb_18030(); - UBool nextChar(IteratedChar* it, InputText* det) const; + UBool nextChar(IteratedChar* it, InputText* det) const override; - const char *getName() const; - const char *getLanguage() const; + const char *getName() const override; + const char *getLanguage() const override; - UBool match(InputText* input, CharsetMatch *results) const; + UBool match(InputText* input, CharsetMatch *results) const override; }; U_NAMESPACE_END diff --git a/contrib/libs/icu/i18n/csrsbcs.h b/contrib/libs/icu/i18n/csrsbcs.h index 9768783679..96f982c59b 100644 --- a/contrib/libs/icu/i18n/csrsbcs.h +++ b/contrib/libs/icu/i18n/csrsbcs.h @@ -63,8 +63,8 @@ public: private: int32_t alef; int32_t isLamAlef(int32_t b); - int32_t nextByte(InputText *det); - void parseCharacters(InputText *det); + int32_t nextByte(InputText *det) override; + void parseCharacters(InputText *det) override; }; #endif @@ -74,8 +74,8 @@ class CharsetRecog_sbcs : public CharsetRecognizer public: CharsetRecog_sbcs(); virtual ~CharsetRecog_sbcs(); - virtual const char *getName() const = 0; - virtual UBool match(InputText *det, CharsetMatch *results) const = 0; + virtual const char *getName() const override = 0; + virtual UBool match(InputText *det, CharsetMatch *results) const override = 0; virtual int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const; }; @@ -83,23 +83,23 @@ class CharsetRecog_8859_1 : public CharsetRecog_sbcs { public: virtual ~CharsetRecog_8859_1(); - const char *getName() const; - virtual UBool match(InputText *det, CharsetMatch *results) const; + const char *getName() const override; + virtual UBool match(InputText *det, CharsetMatch *results) const override; }; class CharsetRecog_8859_2 : public CharsetRecog_sbcs { public: virtual ~CharsetRecog_8859_2(); - const char *getName() const; - virtual UBool match(InputText *det, CharsetMatch *results) const; + const char *getName() const override; + virtual UBool match(InputText *det, CharsetMatch *results) const override; }; class CharsetRecog_8859_5 : public CharsetRecog_sbcs { public: virtual ~CharsetRecog_8859_5(); - const char *getName() const; + const char *getName() const override; }; class CharsetRecog_8859_6 : public CharsetRecog_sbcs @@ -107,7 +107,7 @@ class CharsetRecog_8859_6 : public CharsetRecog_sbcs public: virtual ~CharsetRecog_8859_6(); - const char *getName() const; + const char *getName() const override; }; class CharsetRecog_8859_7 : public CharsetRecog_sbcs @@ -115,7 +115,7 @@ class CharsetRecog_8859_7 : public CharsetRecog_sbcs public: virtual ~CharsetRecog_8859_7(); - const char *getName() const; + const char *getName() const override; }; class CharsetRecog_8859_8 : public CharsetRecog_sbcs @@ -123,7 +123,7 @@ class CharsetRecog_8859_8 : public CharsetRecog_sbcs public: virtual ~CharsetRecog_8859_8(); - virtual const char *getName() const; + virtual const char *getName() const override; }; class CharsetRecog_8859_9 : public CharsetRecog_sbcs @@ -131,7 +131,7 @@ class CharsetRecog_8859_9 : public CharsetRecog_sbcs public: virtual ~CharsetRecog_8859_9(); - const char *getName() const; + const char *getName() const override; }; @@ -141,9 +141,9 @@ class CharsetRecog_8859_5_ru : public CharsetRecog_8859_5 public: virtual ~CharsetRecog_8859_5_ru(); - const char *getLanguage() const; + const char *getLanguage() const override; - virtual UBool match(InputText *det, CharsetMatch *results) const; + virtual UBool match(InputText *det, CharsetMatch *results) const override; }; class CharsetRecog_8859_6_ar : public CharsetRecog_8859_6 @@ -151,9 +151,9 @@ class CharsetRecog_8859_6_ar : public CharsetRecog_8859_6 public: virtual ~CharsetRecog_8859_6_ar(); - const char *getLanguage() const; + const char *getLanguage() const override; - virtual UBool match(InputText *det, CharsetMatch *results) const; + virtual UBool match(InputText *det, CharsetMatch *results) const override; }; class CharsetRecog_8859_7_el : public CharsetRecog_8859_7 @@ -161,9 +161,9 @@ class CharsetRecog_8859_7_el : public CharsetRecog_8859_7 public: virtual ~CharsetRecog_8859_7_el(); - const char *getLanguage() const; + const char *getLanguage() const override; - virtual UBool match(InputText *det, CharsetMatch *results) const; + virtual UBool match(InputText *det, CharsetMatch *results) const override; }; class CharsetRecog_8859_8_I_he : public CharsetRecog_8859_8 @@ -171,11 +171,11 @@ class CharsetRecog_8859_8_I_he : public CharsetRecog_8859_8 public: virtual ~CharsetRecog_8859_8_I_he(); - const char *getName() const; + const char *getName() const override; - const char *getLanguage() const; + const char *getLanguage() const override; - virtual UBool match(InputText *det, CharsetMatch *results) const; + virtual UBool match(InputText *det, CharsetMatch *results) const override; }; class CharsetRecog_8859_8_he : public CharsetRecog_8859_8 @@ -183,9 +183,9 @@ class CharsetRecog_8859_8_he : public CharsetRecog_8859_8 public: virtual ~CharsetRecog_8859_8_he (); - const char *getLanguage() const; + const char *getLanguage() const override; - virtual UBool match(InputText *det, CharsetMatch *results) const; + virtual UBool match(InputText *det, CharsetMatch *results) const override; }; class CharsetRecog_8859_9_tr : public CharsetRecog_8859_9 @@ -193,9 +193,9 @@ class CharsetRecog_8859_9_tr : public CharsetRecog_8859_9 public: virtual ~CharsetRecog_8859_9_tr (); - const char *getLanguage() const; + const char *getLanguage() const override; - virtual UBool match(InputText *det, CharsetMatch *results) const; + virtual UBool match(InputText *det, CharsetMatch *results) const override; }; class CharsetRecog_windows_1256 : public CharsetRecog_sbcs @@ -203,11 +203,11 @@ class CharsetRecog_windows_1256 : public CharsetRecog_sbcs public: virtual ~CharsetRecog_windows_1256(); - const char *getName() const; + const char *getName() const override; - const char *getLanguage() const; + const char *getLanguage() const override; - virtual UBool match(InputText *det, CharsetMatch *results) const; + virtual UBool match(InputText *det, CharsetMatch *results) const override; }; class CharsetRecog_windows_1251 : public CharsetRecog_sbcs @@ -215,11 +215,11 @@ class CharsetRecog_windows_1251 : public CharsetRecog_sbcs public: virtual ~CharsetRecog_windows_1251(); - const char *getName() const; + const char *getName() const override; - const char *getLanguage() const; + const char *getLanguage() const override; - virtual UBool match(InputText *det, CharsetMatch *results) const; + virtual UBool match(InputText *det, CharsetMatch *results) const override; }; @@ -228,11 +228,11 @@ class CharsetRecog_KOI8_R : public CharsetRecog_sbcs public: virtual ~CharsetRecog_KOI8_R(); - const char *getName() const; + const char *getName() const override; - const char *getLanguage() const; + const char *getLanguage() const override; - virtual UBool match(InputText *det, CharsetMatch *results) const; + virtual UBool match(InputText *det, CharsetMatch *results) const override; }; #if !UCONFIG_ONLY_HTML_CONVERSION @@ -241,24 +241,24 @@ class CharsetRecog_IBM424_he : public CharsetRecog_sbcs public: virtual ~CharsetRecog_IBM424_he(); - const char *getLanguage() const; + const char *getLanguage() const override; }; class CharsetRecog_IBM424_he_rtl : public CharsetRecog_IBM424_he { public: virtual ~CharsetRecog_IBM424_he_rtl(); - const char *getName() const; + const char *getName() const override; - virtual UBool match(InputText *det, CharsetMatch *results) const; + virtual UBool match(InputText *det, CharsetMatch *results) const override; }; class CharsetRecog_IBM424_he_ltr : public CharsetRecog_IBM424_he { virtual ~CharsetRecog_IBM424_he_ltr(); - const char *getName() const; + const char *getName() const override; - virtual UBool match(InputText *det, CharsetMatch *results) const; + virtual UBool match(InputText *det, CharsetMatch *results) const override; }; class CharsetRecog_IBM420_ar : public CharsetRecog_sbcs @@ -266,8 +266,8 @@ class CharsetRecog_IBM420_ar : public CharsetRecog_sbcs public: virtual ~CharsetRecog_IBM420_ar(); - const char *getLanguage() const; - int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const; + const char *getLanguage() const override; + int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const override; }; @@ -275,17 +275,17 @@ class CharsetRecog_IBM420_ar_rtl : public CharsetRecog_IBM420_ar { public: virtual ~CharsetRecog_IBM420_ar_rtl(); - const char *getName() const; + const char *getName() const override; - virtual UBool match(InputText *det, CharsetMatch *results) const; + virtual UBool match(InputText *det, CharsetMatch *results) const override; }; class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM420_ar { virtual ~CharsetRecog_IBM420_ar_ltr(); - const char *getName() const; + const char *getName() const override; - virtual UBool match(InputText *det, CharsetMatch *results) const; + virtual UBool match(InputText *det, CharsetMatch *results) const override; }; #endif diff --git a/contrib/libs/icu/i18n/csrucode.cpp b/contrib/libs/icu/i18n/csrucode.cpp index 59f2dbe284..480dae1400 100644 --- a/contrib/libs/icu/i18n/csrucode.cpp +++ b/contrib/libs/icu/i18n/csrucode.cpp @@ -144,7 +144,7 @@ UBool CharsetRecog_UTF_32::match(InputText* textIn, CharsetMatch *results) const } - // Cook up some sort of confidence score, based on presense of a BOM + // Cook up some sort of confidence score, based on presence of a BOM // and the existence of valid and/or invalid multi-byte sequences. if (hasBOM && numInvalid==0) { confidence = 100; @@ -155,7 +155,7 @@ UBool CharsetRecog_UTF_32::match(InputText* textIn, CharsetMatch *results) const } else if (numValid > 0 && numInvalid == 0) { confidence = 80; } else if (numValid > numInvalid*10) { - // Probably corruput UTF-32BE data. Valid sequences aren't likely by chance. + // Probably corrupt UTF-32BE data. Valid sequences aren't likely by chance. confidence = 25; } diff --git a/contrib/libs/icu/i18n/csrucode.h b/contrib/libs/icu/i18n/csrucode.h index cef35447fb..78e08d22f1 100644 --- a/contrib/libs/icu/i18n/csrucode.h +++ b/contrib/libs/icu/i18n/csrucode.h @@ -33,12 +33,12 @@ public: /* (non-Javadoc) * @see com.ibm.icu.text.CharsetRecognizer#getName() */ - const char* getName() const = 0; + const char* getName() const override = 0; /* (non-Javadoc) * @see com.ibm.icu.text.CharsetRecognizer#match(com.ibm.icu.text.CharsetDetector) */ - UBool match(InputText* textIn, CharsetMatch *results) const = 0; + UBool match(InputText* textIn, CharsetMatch *results) const override = 0; }; @@ -48,9 +48,9 @@ public: virtual ~CharsetRecog_UTF_16_BE(); - const char *getName() const; + const char *getName() const override; - UBool match(InputText* textIn, CharsetMatch *results) const; + UBool match(InputText* textIn, CharsetMatch *results) const override; }; class CharsetRecog_UTF_16_LE : public CharsetRecog_Unicode @@ -59,9 +59,9 @@ public: virtual ~CharsetRecog_UTF_16_LE(); - const char *getName() const; + const char *getName() const override; - UBool match(InputText* textIn, CharsetMatch *results) const; + UBool match(InputText* textIn, CharsetMatch *results) const override; }; class CharsetRecog_UTF_32 : public CharsetRecog_Unicode @@ -72,34 +72,34 @@ public: virtual ~CharsetRecog_UTF_32(); - const char* getName() const = 0; + const char* getName() const override = 0; - UBool match(InputText* textIn, CharsetMatch *results) const; + UBool match(InputText* textIn, CharsetMatch *results) const override; }; class CharsetRecog_UTF_32_BE : public CharsetRecog_UTF_32 { protected: - int32_t getChar(const uint8_t *input, int32_t index) const; + int32_t getChar(const uint8_t *input, int32_t index) const override; public: virtual ~CharsetRecog_UTF_32_BE(); - const char *getName() const; + const char *getName() const override; }; class CharsetRecog_UTF_32_LE : public CharsetRecog_UTF_32 { protected: - int32_t getChar(const uint8_t *input, int32_t index) const; + int32_t getChar(const uint8_t *input, int32_t index) const override; public: virtual ~CharsetRecog_UTF_32_LE(); - const char* getName() const; + const char* getName() const override; }; U_NAMESPACE_END diff --git a/contrib/libs/icu/i18n/csrutf8.cpp b/contrib/libs/icu/i18n/csrutf8.cpp index b42bd8b39e..3f16224ea6 100644 --- a/contrib/libs/icu/i18n/csrutf8.cpp +++ b/contrib/libs/icu/i18n/csrutf8.cpp @@ -99,7 +99,7 @@ UBool CharsetRecog_UTF8::match(InputText* input, CharsetMatch *results) const { // accepts ASCII with confidence = 10. confidence = 15; } else if (numValid > numInvalid*10) { - // Probably corruput utf-8 data. Valid sequences aren't likely by chance. + // Probably corrupt utf-8 data. Valid sequences aren't likely by chance. confidence = 25; } diff --git a/contrib/libs/icu/i18n/csrutf8.h b/contrib/libs/icu/i18n/csrutf8.h index 6089eb6f75..bcfb38ac95 100644 --- a/contrib/libs/icu/i18n/csrutf8.h +++ b/contrib/libs/icu/i18n/csrutf8.h @@ -29,12 +29,12 @@ class CharsetRecog_UTF8: public CharsetRecognizer { virtual ~CharsetRecog_UTF8(); - const char *getName() const; + const char *getName() const override; /* (non-Javadoc) * @see com.ibm.icu.text.CharsetRecognizer#match(com.ibm.icu.text.CharsetDetector) */ - UBool match(InputText *input, CharsetMatch *results) const; + UBool match(InputText *input, CharsetMatch *results) const override; }; diff --git a/contrib/libs/icu/i18n/currfmt.h b/contrib/libs/icu/i18n/currfmt.h index 69a031957b..2a75cae2bb 100644 --- a/contrib/libs/icu/i18n/currfmt.h +++ b/contrib/libs/icu/i18n/currfmt.h @@ -57,7 +57,7 @@ class CurrencyFormat : public MeasureFormat { /** * Override Format API. */ - virtual CurrencyFormat* clone() const; + virtual CurrencyFormat* clone() const override; using MeasureFormat::format; @@ -68,19 +68,19 @@ class CurrencyFormat : public MeasureFormat { virtual UnicodeString& format(const Formattable& obj, UnicodeString& appendTo, FieldPosition& pos, - UErrorCode& ec) const; + UErrorCode& ec) const override; /** * Override Format API. */ virtual void parseObject(const UnicodeString& source, Formattable& result, - ParsePosition& pos) const; + ParsePosition& pos) const override; /** * Override Format API. */ - virtual UClassID getDynamicClassID() const; + virtual UClassID getDynamicClassID() const override; /** * Returns the class ID for this class. diff --git a/contrib/libs/icu/i18n/currpinf.cpp b/contrib/libs/icu/i18n/currpinf.cpp index af9c837af8..a2676ab5a1 100644 --- a/contrib/libs/icu/i18n/currpinf.cpp +++ b/contrib/libs/icu/i18n/currpinf.cpp @@ -145,7 +145,7 @@ CurrencyPluralInfo::~CurrencyPluralInfo() { fLocale = nullptr; } -UBool +bool CurrencyPluralInfo::operator==(const CurrencyPluralInfo& info) const { #ifdef CURRENCY_PLURAL_INFO_DEBUG if (*fPluralRules == *info.fPluralRules) { diff --git a/contrib/libs/icu/i18n/dangical.cpp b/contrib/libs/icu/i18n/dangical.cpp index 02db40368e..57fe80220b 100644 --- a/contrib/libs/icu/i18n/dangical.cpp +++ b/contrib/libs/icu/i18n/dangical.cpp @@ -52,7 +52,7 @@ U_NAMESPACE_BEGIN //------------------------------------------------------------------------- DangiCalendar::DangiCalendar(const Locale& aLocale, UErrorCode& success) -: ChineseCalendar(aLocale, DANGI_EPOCH_YEAR, getDangiCalZoneAstroCalc(), success) +: ChineseCalendar(aLocale, DANGI_EPOCH_YEAR, getDangiCalZoneAstroCalc(success), success) { } @@ -103,32 +103,41 @@ const char *DangiCalendar::getType() const { * 1898-1911: GMT+8 * 1912- : GMT+9 */ -static void U_CALLCONV initDangiCalZoneAstroCalc(void) { - U_ASSERT(gDangiCalendarZoneAstroCalc == NULL); +static void U_CALLCONV initDangiCalZoneAstroCalc(UErrorCode &status) { + U_ASSERT(gDangiCalendarZoneAstroCalc == nullptr); const UDate millis1897[] = { (UDate)((1897 - 1970) * 365 * kOneDay) }; // some days of error is not a problem here const UDate millis1898[] = { (UDate)((1898 - 1970) * 365 * kOneDay) }; // some days of error is not a problem here const UDate millis1912[] = { (UDate)((1912 - 1970) * 365 * kOneDay) }; // this doesn't create an issue for 1911/12/20 - InitialTimeZoneRule* initialTimeZone = new InitialTimeZoneRule(UNICODE_STRING_SIMPLE("GMT+8"), 8*kOneHour, 0); - TimeZoneRule* rule1897 = new TimeArrayTimeZoneRule(UNICODE_STRING_SIMPLE("Korean 1897"), 7*kOneHour, 0, millis1897, 1, DateTimeRule::STANDARD_TIME); - TimeZoneRule* rule1898to1911 = new TimeArrayTimeZoneRule(UNICODE_STRING_SIMPLE("Korean 1898-1911"), 8*kOneHour, 0, millis1898, 1, DateTimeRule::STANDARD_TIME); - TimeZoneRule* ruleFrom1912 = new TimeArrayTimeZoneRule(UNICODE_STRING_SIMPLE("Korean 1912-"), 9*kOneHour, 0, millis1912, 1, DateTimeRule::STANDARD_TIME); - UErrorCode status = U_ZERO_ERROR; - RuleBasedTimeZone* dangiCalZoneAstroCalc = new RuleBasedTimeZone(UNICODE_STRING_SIMPLE("KOREA_ZONE"), initialTimeZone); // adopts initialTimeZone - dangiCalZoneAstroCalc->addTransitionRule(rule1897, status); // adopts rule1897 - dangiCalZoneAstroCalc->addTransitionRule(rule1898to1911, status); - dangiCalZoneAstroCalc->addTransitionRule(ruleFrom1912, status); + LocalPointer<InitialTimeZoneRule> initialTimeZone(new InitialTimeZoneRule( + UnicodeString(u"GMT+8"), 8*kOneHour, 0), status); + + LocalPointer<TimeZoneRule> rule1897(new TimeArrayTimeZoneRule( + UnicodeString(u"Korean 1897"), 7*kOneHour, 0, millis1897, 1, DateTimeRule::STANDARD_TIME), status); + + LocalPointer<TimeZoneRule> rule1898to1911(new TimeArrayTimeZoneRule( + UnicodeString(u"Korean 1898-1911"), 8*kOneHour, 0, millis1898, 1, DateTimeRule::STANDARD_TIME), status); + + LocalPointer<TimeZoneRule> ruleFrom1912(new TimeArrayTimeZoneRule( + UnicodeString(u"Korean 1912-"), 9*kOneHour, 0, millis1912, 1, DateTimeRule::STANDARD_TIME), status); + + LocalPointer<RuleBasedTimeZone> dangiCalZoneAstroCalc(new RuleBasedTimeZone( + UnicodeString(u"KOREA_ZONE"), initialTimeZone.orphan()), status); // adopts initialTimeZone + + if (U_FAILURE(status)) { + return; + } + dangiCalZoneAstroCalc->addTransitionRule(rule1897.orphan(), status); // adopts rule1897 + dangiCalZoneAstroCalc->addTransitionRule(rule1898to1911.orphan(), status); + dangiCalZoneAstroCalc->addTransitionRule(ruleFrom1912.orphan(), status); dangiCalZoneAstroCalc->complete(status); if (U_SUCCESS(status)) { - gDangiCalendarZoneAstroCalc = dangiCalZoneAstroCalc; - } else { - delete dangiCalZoneAstroCalc; - gDangiCalendarZoneAstroCalc = NULL; + gDangiCalendarZoneAstroCalc = dangiCalZoneAstroCalc.orphan(); } ucln_i18n_registerCleanup(UCLN_I18N_DANGI_CALENDAR, calendar_dangi_cleanup); } -const TimeZone* DangiCalendar::getDangiCalZoneAstroCalc(void) const { - umtx_initOnce(gDangiCalendarInitOnce, &initDangiCalZoneAstroCalc); +const TimeZone* DangiCalendar::getDangiCalZoneAstroCalc(UErrorCode &status) const { + umtx_initOnce(gDangiCalendarInitOnce, &initDangiCalZoneAstroCalc, status); return gDangiCalendarZoneAstroCalc; } diff --git a/contrib/libs/icu/i18n/dangical.h b/contrib/libs/icu/i18n/dangical.h index ece805e36d..9d0437264e 100644 --- a/contrib/libs/icu/i18n/dangical.h +++ b/contrib/libs/icu/i18n/dangical.h @@ -66,7 +66,7 @@ class DangiCalendar : public ChineseCalendar { * Clone. * @internal */ - virtual DangiCalendar* clone() const; + virtual DangiCalendar* clone() const override; //---------------------------------------------------------------------- // Internal methods & astronomical calculations @@ -74,7 +74,7 @@ class DangiCalendar : public ChineseCalendar { private: - const TimeZone* getDangiCalZoneAstroCalc(void) const; + const TimeZone* getDangiCalZoneAstroCalc(UErrorCode &status) const; // UObject stuff public: @@ -83,7 +83,7 @@ class DangiCalendar : public ChineseCalendar { * same class ID. Objects of other classes have different class IDs. * @internal */ - virtual UClassID getDynamicClassID(void) const; + virtual UClassID getDynamicClassID(void) const override; /** * Return the class ID for this class. This is useful only for comparing to a return @@ -104,7 +104,7 @@ class DangiCalendar : public ChineseCalendar { * @return calendar type * @internal */ - const char * getType() const; + const char * getType() const override; private: diff --git a/contrib/libs/icu/i18n/datefmt.cpp b/contrib/libs/icu/i18n/datefmt.cpp index a0e039cd50..fed8f79aa0 100644 --- a/contrib/libs/icu/i18n/datefmt.cpp +++ b/contrib/libs/icu/i18n/datefmt.cpp @@ -68,6 +68,14 @@ const DateFmtBestPattern *LocaleCacheKey<DateFmtBestPattern>::createObject( class U_I18N_API DateFmtBestPatternKey : public LocaleCacheKey<DateFmtBestPattern> { private: UnicodeString fSkeleton; +protected: + virtual bool equals(const CacheKeyBase &other) const override { + if (!LocaleCacheKey<DateFmtBestPattern>::equals(other)) { + return false; + } + // We know that this and other are of same class if we get this far. + return operator==(static_cast<const DateFmtBestPatternKey &>(other)); + } public: DateFmtBestPatternKey( const Locale &loc, @@ -79,27 +87,17 @@ public: LocaleCacheKey<DateFmtBestPattern>(other), fSkeleton(other.fSkeleton) { } virtual ~DateFmtBestPatternKey(); - virtual int32_t hashCode() const { + virtual int32_t hashCode() const override { return (int32_t)(37u * (uint32_t)LocaleCacheKey<DateFmtBestPattern>::hashCode() + (uint32_t)fSkeleton.hashCode()); } - virtual UBool operator==(const CacheKeyBase &other) const { - // reflexive - if (this == &other) { - return TRUE; - } - if (!LocaleCacheKey<DateFmtBestPattern>::operator==(other)) { - return FALSE; - } - // We know that this and other are of same class if we get this far. - const DateFmtBestPatternKey &realOther = - static_cast<const DateFmtBestPatternKey &>(other); - return (realOther.fSkeleton == fSkeleton); + inline bool operator==(const DateFmtBestPatternKey &other) const { + return fSkeleton == other.fSkeleton; } - virtual CacheKeyBase *clone() const { + virtual CacheKeyBase *clone() const override { return new DateFmtBestPatternKey(*this); } virtual const DateFmtBestPattern *createObject( - const void * /*unused*/, UErrorCode &status) const { + const void * /*unused*/, UErrorCode &status) const override { LocalPointer<DateTimePatternGenerator> dtpg( DateTimePatternGenerator::createInstance(fLoc, status)); if (U_FAILURE(status)) { @@ -174,7 +172,7 @@ DateFormat::~DateFormat() //---------------------------------------------------------------------- -UBool +bool DateFormat::operator==(const Format& other) const { // This protected comparison operator should only be called by subclasses diff --git a/contrib/libs/icu/i18n/dayperiodrules.cpp b/contrib/libs/icu/i18n/dayperiodrules.cpp index e364ecb708..efe92dd794 100644 --- a/contrib/libs/icu/i18n/dayperiodrules.cpp +++ b/contrib/libs/icu/i18n/dayperiodrules.cpp @@ -50,7 +50,7 @@ struct DayPeriodRulesDataSink : public ResourceSink { } virtual ~DayPeriodRulesDataSink(); - virtual void put(const char *key, ResourceValue &value, UBool, UErrorCode &errorCode) { + virtual void put(const char *key, ResourceValue &value, UBool, UErrorCode &errorCode) override { ResourceTable dayPeriodData = value.getTable(errorCode); if (U_FAILURE(errorCode)) { return; } @@ -282,7 +282,7 @@ struct DayPeriodRulesDataSink : public ResourceSink { struct DayPeriodRulesCountSink : public ResourceSink { virtual ~DayPeriodRulesCountSink(); - virtual void put(const char *key, ResourceValue &value, UBool, UErrorCode &errorCode) { + virtual void put(const char *key, ResourceValue &value, UBool, UErrorCode &errorCode) override { ResourceTable rules = value.getTable(errorCode); if (U_FAILURE(errorCode)) { return; } diff --git a/contrib/libs/icu/i18n/dayperiodrules.h b/contrib/libs/icu/i18n/dayperiodrules.h index 610c6175bf..4bfca762b8 100644 --- a/contrib/libs/icu/i18n/dayperiodrules.h +++ b/contrib/libs/icu/i18n/dayperiodrules.h @@ -66,7 +66,7 @@ private: // Sets period type for all hours in [startHour, limitHour). void add(int32_t startHour, int32_t limitHour, DayPeriod period); - // Returns TRUE if for all i, DayPeriodForHour[i] has a type other than UNKNOWN. + // Returns true if for all i, DayPeriodForHour[i] has a type other than UNKNOWN. // Values of HasNoon and HasMidnight do not affect the return value. UBool allHoursAreSet(); diff --git a/contrib/libs/icu/i18n/dcfmtsym.cpp b/contrib/libs/icu/i18n/dcfmtsym.cpp index 15418bfe65..4f5bae4e11 100644 --- a/contrib/libs/icu/i18n/dcfmtsym.cpp +++ b/contrib/libs/icu/i18n/dcfmtsym.cpp @@ -92,6 +92,7 @@ static const char *gNumberElementKeys[DecimalFormatSymbols::kFormatSymbolCount] NULL, /* eight digit - get it from the numbering system */ NULL, /* nine digit - get it from the numbering system */ "superscriptingExponent", /* Multiplication (x) symbol for exponents */ + "approximatelySign" /* Approximately sign symbol */ }; // ------------------------------------- @@ -174,29 +175,29 @@ DecimalFormatSymbols::operator=(const DecimalFormatSymbols& rhs) // ------------------------------------- -UBool +bool DecimalFormatSymbols::operator==(const DecimalFormatSymbols& that) const { if (this == &that) { - return TRUE; + return true; } if (fIsCustomCurrencySymbol != that.fIsCustomCurrencySymbol) { - return FALSE; + return false; } if (fIsCustomIntlCurrencySymbol != that.fIsCustomIntlCurrencySymbol) { - return FALSE; + return false; } for(int32_t i = 0; i < (int32_t)kFormatSymbolCount; ++i) { if(fSymbols[(ENumberFormatSymbol)i] != that.fSymbols[(ENumberFormatSymbol)i]) { - return FALSE; + return false; } } for(int32_t i = 0; i < (int32_t)UNUM_CURRENCY_SPACING_COUNT; ++i) { if(currencySpcBeforeSym[i] != that.currencySpcBeforeSym[i]) { - return FALSE; + return false; } if(currencySpcAfterSym[i] != that.currencySpcAfterSym[i]) { - return FALSE; + return false; } } // No need to check fCodePointZero since it is based on fSymbols @@ -221,7 +222,7 @@ struct DecFmtSymDataSink : public ResourceSink { // Destination for data, modified via setters. DecimalFormatSymbols& dfs; // Boolean array of whether or not we have seen a particular symbol yet. - // Can't simpy check fSymbols because it is pre-populated with defaults. + // Can't simply check fSymbols because it is pre-populated with defaults. UBool seenSymbol[DecimalFormatSymbols::kFormatSymbolCount]; // Constructor/Destructor @@ -231,7 +232,7 @@ struct DecFmtSymDataSink : public ResourceSink { virtual ~DecFmtSymDataSink(); virtual void put(const char *key, ResourceValue &value, UBool /*noFallback*/, - UErrorCode &errorCode) { + UErrorCode &errorCode) override { ResourceTable symbolsTable = value.getTable(errorCode); if (U_FAILURE(errorCode)) { return; } for (int32_t j = 0; symbolsTable.getKeyAndValue(j, key, value); ++j) { @@ -286,7 +287,7 @@ struct CurrencySpacingSink : public ResourceSink { virtual ~CurrencySpacingSink(); virtual void put(const char *key, ResourceValue &value, UBool /*noFallback*/, - UErrorCode &errorCode) { + UErrorCode &errorCode) override { ResourceTable spacingTypesTable = value.getTable(errorCode); for (int32_t i = 0; spacingTypesTable.getKeyAndValue(i, key, value); ++i) { UBool beforeCurrency; @@ -508,6 +509,7 @@ DecimalFormatSymbols::initialize() { fSymbols[kSignificantDigitSymbol] = (UChar)0x0040; // '@' significant digit fSymbols[kMonetaryGroupingSeparatorSymbol].remove(); // fSymbols[kExponentMultiplicationSymbol] = (UChar)0xd7; // 'x' multiplication symbol for exponents + fSymbols[kApproximatelySignSymbol] = u'~'; // '~' approximately sign fIsCustomCurrencySymbol = FALSE; fIsCustomIntlCurrencySymbol = FALSE; fCodePointZero = 0x30; diff --git a/contrib/libs/icu/i18n/decContext.cpp b/contrib/libs/icu/i18n/decContext.cpp index 6ec6d32afb..421d65b43f 100644 --- a/contrib/libs/icu/i18n/decContext.cpp +++ b/contrib/libs/icu/i18n/decContext.cpp @@ -150,7 +150,7 @@ U_CAPI uInt U_EXPORT2 uprv_decContextGetStatus(decContext *context) { /* newstatus is the source for the bits to be restored */ /* mask indicates the bits to be restored (the status bit that */ /* corresponds to each 1 bit in the mask is set to the value of */ -/* the correspnding bit in newstatus) */ +/* the corresponding bit in newstatus) */ /* returns context */ /* */ /* No error is possible. */ diff --git a/contrib/libs/icu/i18n/decContext.h b/contrib/libs/icu/i18n/decContext.h index e145777d1e..59ab65e592 100644 --- a/contrib/libs/icu/i18n/decContext.h +++ b/contrib/libs/icu/i18n/decContext.h @@ -250,21 +250,21 @@ #define DEC_INIT_DECQUAD DEC_INIT_DECIMAL128 /* decContext routines */ - U_INTERNAL decContext * U_EXPORT2 uprv_decContextClearStatus(decContext *, uint32_t); - U_INTERNAL decContext * U_EXPORT2 uprv_decContextDefault(decContext *, int32_t); - U_INTERNAL enum rounding U_EXPORT2 uprv_decContextGetRounding(decContext *); - U_INTERNAL uint32_t U_EXPORT2 uprv_decContextGetStatus(decContext *); - U_INTERNAL decContext * U_EXPORT2 uprv_decContextRestoreStatus(decContext *, uint32_t, uint32_t); - U_INTERNAL uint32_t U_EXPORT2 uprv_decContextSaveStatus(decContext *, uint32_t); - U_INTERNAL decContext * U_EXPORT2 uprv_decContextSetRounding(decContext *, enum rounding); - U_INTERNAL decContext * U_EXPORT2 uprv_decContextSetStatus(decContext *, uint32_t); - U_INTERNAL decContext * U_EXPORT2 uprv_decContextSetStatusFromString(decContext *, const char *); - U_INTERNAL decContext * U_EXPORT2 uprv_decContextSetStatusFromStringQuiet(decContext *, const char *); - U_INTERNAL decContext * U_EXPORT2 uprv_decContextSetStatusQuiet(decContext *, uint32_t); - U_INTERNAL const char * U_EXPORT2 uprv_decContextStatusToString(const decContext *); - U_INTERNAL int32_t U_EXPORT2 uprv_decContextTestEndian(uint8_t); - U_INTERNAL uint32_t U_EXPORT2 uprv_decContextTestSavedStatus(uint32_t, uint32_t); - U_INTERNAL uint32_t U_EXPORT2 uprv_decContextTestStatus(decContext *, uint32_t); - U_INTERNAL decContext * U_EXPORT2 uprv_decContextZeroStatus(decContext *); + U_CAPI decContext * U_EXPORT2 uprv_decContextClearStatus(decContext *, uint32_t); + U_CAPI decContext * U_EXPORT2 uprv_decContextDefault(decContext *, int32_t); + U_CAPI enum rounding U_EXPORT2 uprv_decContextGetRounding(decContext *); + U_CAPI uint32_t U_EXPORT2 uprv_decContextGetStatus(decContext *); + U_CAPI decContext * U_EXPORT2 uprv_decContextRestoreStatus(decContext *, uint32_t, uint32_t); + U_CAPI uint32_t U_EXPORT2 uprv_decContextSaveStatus(decContext *, uint32_t); + U_CAPI decContext * U_EXPORT2 uprv_decContextSetRounding(decContext *, enum rounding); + U_CAPI decContext * U_EXPORT2 uprv_decContextSetStatus(decContext *, uint32_t); + U_CAPI decContext * U_EXPORT2 uprv_decContextSetStatusFromString(decContext *, const char *); + U_CAPI decContext * U_EXPORT2 uprv_decContextSetStatusFromStringQuiet(decContext *, const char *); + U_CAPI decContext * U_EXPORT2 uprv_decContextSetStatusQuiet(decContext *, uint32_t); + U_CAPI const char * U_EXPORT2 uprv_decContextStatusToString(const decContext *); + U_CAPI int32_t U_EXPORT2 uprv_decContextTestEndian(uint8_t); + U_CAPI uint32_t U_EXPORT2 uprv_decContextTestSavedStatus(uint32_t, uint32_t); + U_CAPI uint32_t U_EXPORT2 uprv_decContextTestStatus(decContext *, uint32_t); + U_CAPI decContext * U_EXPORT2 uprv_decContextZeroStatus(decContext *); #endif diff --git a/contrib/libs/icu/i18n/decNumber.cpp b/contrib/libs/icu/i18n/decNumber.cpp index cee2f8e949..71477d8202 100644 --- a/contrib/libs/icu/i18n/decNumber.cpp +++ b/contrib/libs/icu/i18n/decNumber.cpp @@ -2203,7 +2203,7 @@ U_CAPI decNumber * U_EXPORT2 uprv_decNumberPower(decNumber *res, const decNumber /* if a negative power the constant 1 is needed, and if not subset */ /* invert the lhs now rather than inverting the result later */ if (decNumberIsNegative(rhs)) { /* was a **-n [hence digits>0] */ - decNumber *inv=invbuff; /* asssume use fixed buffer */ + decNumber *inv=invbuff; /* assume use fixed buffer */ uprv_decNumberCopy(&dnOne, dac); /* dnOne=1; [needed now or later] */ #if DECSUBSET if (set->extended) { /* need to calculate 1/lhs */ @@ -3776,7 +3776,7 @@ static void decToString(const decNumber *dn, char *string, Flag eng) { /* Finally add the E-part, if needed. It will never be 0, has a base maximum and minimum of +999999999 through -999999999, but - could range down to -1999999998 for anormal numbers */ + could range down to -1999999998 for abnormal numbers */ if (e!=0) { Flag had=0; /* 1=had non-zero */ *c='E'; c++; @@ -3831,7 +3831,7 @@ static void decToString(const decNumber *dn, char *string, Flag eng) { /* */ /* Addition, especially x=x+1, is speed-critical. */ /* The static buffer is larger than might be expected to allow for */ -/* calls from higher-level funtions (notable exp). */ +/* calls from higher-level functions (notable exp). */ /* ------------------------------------------------------------------ */ static decNumber * decAddOp(decNumber *res, const decNumber *lhs, const decNumber *rhs, decContext *set, @@ -4247,7 +4247,7 @@ static decNumber * decAddOp(decNumber *res, const decNumber *lhs, /* long subtractions. These are acc and var1 respectively. */ /* var1 is a copy of the lhs coefficient, var2 is the rhs coefficient.*/ /* The static buffers may be larger than might be expected to allow */ -/* for calls from higher-level funtions (notable exp). */ +/* for calls from higher-level functions (notable exp). */ /* ------------------------------------------------------------------ */ static decNumber * decDivideOp(decNumber *res, const decNumber *lhs, const decNumber *rhs, @@ -5242,7 +5242,7 @@ static decNumber * decMultiplyOp(decNumber *res, const decNumber *lhs, /* exp(-x) where x can be the tiniest number (Ntiny). */ /* */ /* 2. Normalizing x to be <=0.1 (instead of <=1) reduces loop */ -/* iterations by appoximately a third with additional (although */ +/* iterations by approximately a third with additional (although */ /* diminishing) returns as the range is reduced to even smaller */ /* fractions. However, h (the power of 10 used to correct the */ /* result at the end, see below) must be kept <=8 as otherwise */ @@ -5616,7 +5616,7 @@ static const uShort LNnn[90]={9016, 8652, 8316, 8008, 7724, 7456, 7208, /* would certainly save at least one if it were made ten times */ /* bigger, too (for truncated fractions 0.100 through 0.999). */ /* However, for most practical evaluations, at least four or five */ -/* iterations will be neede -- so this would only speed up by */ +/* iterations will be needed -- so this would only speed up by */ /* 20-25% and that probably does not justify increasing the table */ /* size. */ /* */ diff --git a/contrib/libs/icu/i18n/decNumber.h b/contrib/libs/icu/i18n/decNumber.h index 7182e789e5..ddcc50e2ef 100644 --- a/contrib/libs/icu/i18n/decNumber.h +++ b/contrib/libs/icu/i18n/decNumber.h @@ -112,74 +112,74 @@ /* decNumber public functions and macros */ /* ---------------------------------------------------------------- */ /* Conversions */ - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberFromInt32(decNumber *, int32_t); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberFromUInt32(decNumber *, uint32_t); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberFromString(decNumber *, const char *, decContext *); - U_INTERNAL char * U_EXPORT2 uprv_decNumberToString(const decNumber *, char *); - U_INTERNAL char * U_EXPORT2 uprv_decNumberToEngString(const decNumber *, char *); - U_INTERNAL uint32_t U_EXPORT2 uprv_decNumberToUInt32(const decNumber *, decContext *); - U_INTERNAL int32_t U_EXPORT2 uprv_decNumberToInt32(const decNumber *, decContext *); - U_INTERNAL uint8_t * U_EXPORT2 uprv_decNumberGetBCD(const decNumber *, uint8_t *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberSetBCD(decNumber *, const uint8_t *, uint32_t); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberFromInt32(decNumber *, int32_t); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberFromUInt32(decNumber *, uint32_t); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberFromString(decNumber *, const char *, decContext *); + U_CAPI char * U_EXPORT2 uprv_decNumberToString(const decNumber *, char *); + U_CAPI char * U_EXPORT2 uprv_decNumberToEngString(const decNumber *, char *); + U_CAPI uint32_t U_EXPORT2 uprv_decNumberToUInt32(const decNumber *, decContext *); + U_CAPI int32_t U_EXPORT2 uprv_decNumberToInt32(const decNumber *, decContext *); + U_CAPI uint8_t * U_EXPORT2 uprv_decNumberGetBCD(const decNumber *, uint8_t *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberSetBCD(decNumber *, const uint8_t *, uint32_t); /* Operators and elementary functions */ - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberAbs(decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberAdd(decNumber *, const decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberAnd(decNumber *, const decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberCompare(decNumber *, const decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberCompareSignal(decNumber *, const decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberCompareTotal(decNumber *, const decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberCompareTotalMag(decNumber *, const decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberDivide(decNumber *, const decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberDivideInteger(decNumber *, const decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberExp(decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberFMA(decNumber *, const decNumber *, const decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberInvert(decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberLn(decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberLogB(decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberLog10(decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberMax(decNumber *, const decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberMaxMag(decNumber *, const decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberMin(decNumber *, const decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberMinMag(decNumber *, const decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberMinus(decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberMultiply(decNumber *, const decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberNormalize(decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberOr(decNumber *, const decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberPlus(decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberPower(decNumber *, const decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberQuantize(decNumber *, const decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberReduce(decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberRemainder(decNumber *, const decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberRemainderNear(decNumber *, const decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberRescale(decNumber *, const decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberRotate(decNumber *, const decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberSameQuantum(decNumber *, const decNumber *, const decNumber *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberScaleB(decNumber *, const decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberShift(decNumber *, const decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberSquareRoot(decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberSubtract(decNumber *, const decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberToIntegralExact(decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberToIntegralValue(decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberXor(decNumber *, const decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberAbs(decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberAdd(decNumber *, const decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberAnd(decNumber *, const decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberCompare(decNumber *, const decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberCompareSignal(decNumber *, const decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberCompareTotal(decNumber *, const decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberCompareTotalMag(decNumber *, const decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberDivide(decNumber *, const decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberDivideInteger(decNumber *, const decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberExp(decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberFMA(decNumber *, const decNumber *, const decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberInvert(decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberLn(decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberLogB(decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberLog10(decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberMax(decNumber *, const decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberMaxMag(decNumber *, const decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberMin(decNumber *, const decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberMinMag(decNumber *, const decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberMinus(decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberMultiply(decNumber *, const decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberNormalize(decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberOr(decNumber *, const decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberPlus(decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberPower(decNumber *, const decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberQuantize(decNumber *, const decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberReduce(decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberRemainder(decNumber *, const decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberRemainderNear(decNumber *, const decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberRescale(decNumber *, const decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberRotate(decNumber *, const decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberSameQuantum(decNumber *, const decNumber *, const decNumber *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberScaleB(decNumber *, const decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberShift(decNumber *, const decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberSquareRoot(decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberSubtract(decNumber *, const decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberToIntegralExact(decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberToIntegralValue(decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberXor(decNumber *, const decNumber *, const decNumber *, decContext *); /* Utilities */ enum decClass uprv_decNumberClass(const decNumber *, decContext *); - U_INTERNAL const char * U_EXPORT2 uprv_decNumberClassToString(enum decClass); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberCopy(decNumber *, const decNumber *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberCopyAbs(decNumber *, const decNumber *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberCopyNegate(decNumber *, const decNumber *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberCopySign(decNumber *, const decNumber *, const decNumber *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberNextMinus(decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberNextPlus(decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberNextToward(decNumber *, const decNumber *, const decNumber *, decContext *); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberTrim(decNumber *); - U_INTERNAL const char * U_EXPORT2 uprv_decNumberVersion(void); - U_INTERNAL decNumber * U_EXPORT2 uprv_decNumberZero(decNumber *); + U_CAPI const char * U_EXPORT2 uprv_decNumberClassToString(enum decClass); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberCopy(decNumber *, const decNumber *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberCopyAbs(decNumber *, const decNumber *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberCopyNegate(decNumber *, const decNumber *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberCopySign(decNumber *, const decNumber *, const decNumber *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberNextMinus(decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberNextPlus(decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberNextToward(decNumber *, const decNumber *, const decNumber *, decContext *); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberTrim(decNumber *); + U_CAPI const char * U_EXPORT2 uprv_decNumberVersion(void); + U_CAPI decNumber * U_EXPORT2 uprv_decNumberZero(decNumber *); /* Functions for testing decNumbers (normality depends on context) */ - U_INTERNAL int32_t U_EXPORT2 uprv_decNumberIsNormal(const decNumber *, decContext *); - U_INTERNAL int32_t U_EXPORT2 uprv_decNumberIsSubnormal(const decNumber *, decContext *); + U_CAPI int32_t U_EXPORT2 uprv_decNumberIsNormal(const decNumber *, decContext *); + U_CAPI int32_t U_EXPORT2 uprv_decNumberIsSubnormal(const decNumber *, decContext *); /* Macros for testing decNumber *dn */ #define decNumberIsCanonical(dn) (1) /* All decNumbers are saintly */ diff --git a/contrib/libs/icu/i18n/decNumberLocal.h b/contrib/libs/icu/i18n/decNumberLocal.h index e8d1b38653..1c5a79b702 100644 --- a/contrib/libs/icu/i18n/decNumberLocal.h +++ b/contrib/libs/icu/i18n/decNumberLocal.h @@ -146,7 +146,7 @@ /* ---------------------------------------------------------------- */ - /* Definitions for arbitary-precision modules (only valid after */ + /* Definitions for arbitrary-precision modules (only valid after */ /* decNumber.h has been included) */ /* ---------------------------------------------------------------- */ diff --git a/contrib/libs/icu/i18n/decimfmt.cpp b/contrib/libs/icu/i18n/decimfmt.cpp index daa1129a6a..bca3336679 100644 --- a/contrib/libs/icu/i18n/decimfmt.cpp +++ b/contrib/libs/icu/i18n/decimfmt.cpp @@ -137,7 +137,7 @@ DecimalFormat::setAttribute(UNumberFormatAttribute attr, int32_t newValue, UErro if (U_FAILURE(status)) { return *this; } if (fields == nullptr) { - // We only get here if an OOM error happend during construction, copy construction, assignment, or modification. + // We only get here if an OOM error happened during construction, copy construction, assignment, or modification. status = U_MEMORY_ALLOCATION_ERROR; return *this; } @@ -271,7 +271,7 @@ int32_t DecimalFormat::getAttribute(UNumberFormatAttribute attr, UErrorCode& sta if (U_FAILURE(status)) { return -1; } if (fields == nullptr) { - // We only get here if an OOM error happend during construction, copy construction, assignment, or modification. + // We only get here if an OOM error happened during construction, copy construction, assignment, or modification. status = U_MEMORY_ALLOCATION_ERROR; return -1; } @@ -439,7 +439,7 @@ DecimalFormat::DecimalFormat(const DecimalFormat& source) : NumberFormat(source) return; // no way to report an error. } UErrorCode status = U_ZERO_ERROR; - fields->symbols.adoptInsteadAndCheckErrorCode(new DecimalFormatSymbols(*source.fields->symbols), status); + fields->symbols.adoptInsteadAndCheckErrorCode(new DecimalFormatSymbols(*source.getDecimalFormatSymbols()), status); // In order to simplify error handling logic in the various getters/setters/etc, we do not allow // any partially populated DecimalFormatFields object. We must have a fully complete fields object // or else we set it to nullptr. @@ -463,7 +463,7 @@ DecimalFormat& DecimalFormat::operator=(const DecimalFormat& rhs) { fields->properties = rhs.fields->properties; fields->exportedProperties.clear(); UErrorCode status = U_ZERO_ERROR; - LocalPointer<DecimalFormatSymbols> dfs(new DecimalFormatSymbols(*rhs.fields->symbols), status); + LocalPointer<DecimalFormatSymbols> dfs(new DecimalFormatSymbols(*rhs.getDecimalFormatSymbols()), status); if (U_FAILURE(status)) { // We failed to allocate DecimalFormatSymbols, release fields and its members. // We must have a fully complete fields object, we cannot have partially populated members. @@ -497,7 +497,7 @@ DecimalFormat* DecimalFormat::clone() const { return nullptr; } -UBool DecimalFormat::operator==(const Format& other) const { +bool DecimalFormat::operator==(const Format& other) const { auto* otherDF = dynamic_cast<const DecimalFormat*>(&other); if (otherDF == nullptr) { return false; @@ -507,7 +507,7 @@ UBool DecimalFormat::operator==(const Format& other) const { if (fields == nullptr || otherDF->fields == nullptr) { return false; } - return fields->properties == otherDF->fields->properties && *fields->symbols == *otherDF->fields->symbols; + return fields->properties == otherDF->fields->properties && *getDecimalFormatSymbols() == *otherDF->getDecimalFormatSymbols(); } UnicodeString& DecimalFormat::format(double number, UnicodeString& appendTo, FieldPosition& pos) const { @@ -534,7 +534,7 @@ UnicodeString& DecimalFormat::format(double number, UnicodeString& appendTo, Fie return appendTo; // don't overwrite status if it's already a failure. } if (fields == nullptr) { - // We only get here if an OOM error happend during construction, copy construction, assignment, or modification. + // We only get here if an OOM error happened during construction, copy construction, assignment, or modification. status = U_MEMORY_ALLOCATION_ERROR; appendTo.setToBogus(); return appendTo; @@ -558,7 +558,7 @@ DecimalFormat::format(double number, UnicodeString& appendTo, FieldPositionItera return appendTo; // don't overwrite status if it's already a failure. } if (fields == nullptr) { - // We only get here if an OOM error happend during construction, copy construction, assignment, or modification. + // We only get here if an OOM error happened during construction, copy construction, assignment, or modification. status = U_MEMORY_ALLOCATION_ERROR; appendTo.setToBogus(); return appendTo; @@ -614,7 +614,7 @@ UnicodeString& DecimalFormat::format(int64_t number, UnicodeString& appendTo, Fi return appendTo; // don't overwrite status if it's already a failure. } if (fields == nullptr) { - // We only get here if an OOM error happend during construction, copy construction, assignment, or modification. + // We only get here if an OOM error happened during construction, copy construction, assignment, or modification. status = U_MEMORY_ALLOCATION_ERROR; appendTo.setToBogus(); return appendTo; @@ -638,7 +638,7 @@ DecimalFormat::format(int64_t number, UnicodeString& appendTo, FieldPositionIter return appendTo; // don't overwrite status if it's already a failure. } if (fields == nullptr) { - // We only get here if an OOM error happend during construction, copy construction, assignment, or modification. + // We only get here if an OOM error happened during construction, copy construction, assignment, or modification. status = U_MEMORY_ALLOCATION_ERROR; appendTo.setToBogus(); return appendTo; @@ -662,7 +662,7 @@ DecimalFormat::format(StringPiece number, UnicodeString& appendTo, FieldPosition return appendTo; // don't overwrite status if it's already a failure. } if (fields == nullptr) { - // We only get here if an OOM error happend during construction, copy construction, assignment, or modification. + // We only get here if an OOM error happened during construction, copy construction, assignment, or modification. status = U_MEMORY_ALLOCATION_ERROR; appendTo.setToBogus(); return appendTo; @@ -682,7 +682,7 @@ UnicodeString& DecimalFormat::format(const DecimalQuantity& number, UnicodeStrin return appendTo; // don't overwrite status if it's already a failure. } if (fields == nullptr) { - // We only get here if an OOM error happend during construction, copy construction, assignment, or modification. + // We only get here if an OOM error happened during construction, copy construction, assignment, or modification. status = U_MEMORY_ALLOCATION_ERROR; appendTo.setToBogus(); return appendTo; @@ -703,7 +703,7 @@ DecimalFormat::format(const DecimalQuantity& number, UnicodeString& appendTo, Fi return appendTo; // don't overwrite status if it's already a failure. } if (fields == nullptr) { - // We only get here if an OOM error happend during construction, copy construction, assignment, or modification. + // We only get here if an OOM error happened during construction, copy construction, assignment, or modification. status = U_MEMORY_ALLOCATION_ERROR; appendTo.setToBogus(); return appendTo; @@ -794,7 +794,11 @@ const DecimalFormatSymbols* DecimalFormat::getDecimalFormatSymbols(void) const { if (fields == nullptr) { return nullptr; } - return fields->symbols.getAlias(); + if (!fields->symbols.isNull()) { + return fields->symbols.getAlias(); + } else { + return fields->formatter.getDecimalFormatSymbols(); + } } void DecimalFormat::adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt) { @@ -1073,7 +1077,7 @@ void DecimalFormat::setFormatWidth(int32_t width) { UnicodeString DecimalFormat::getPadCharacterString() const { if (fields == nullptr || fields->properties.padString.isBogus()) { // Readonly-alias the static string kFallbackPaddingString - return {TRUE, kFallbackPaddingString, -1}; + return {true, kFallbackPaddingString, -1}; } else { return fields->properties.padString; } @@ -1304,7 +1308,7 @@ void DecimalFormat::setFormatFailIfMoreThanMaxDigits(UBool value) { UnicodeString& DecimalFormat::toPattern(UnicodeString& result) const { if (fields == nullptr) { - // We only get here if an OOM error happend during construction, copy construction, assignment, or modification. + // We only get here if an OOM error happened during construction, copy construction, assignment, or modification. result.setToBogus(); return result; } @@ -1318,6 +1322,7 @@ UnicodeString& DecimalFormat::toPattern(UnicodeString& result) const { !tprops.currency.isNull() || !tprops.currencyPluralInfo.fPtr.isNull() || !tprops.currencyUsage.isNull() || + tprops.currencyAsDecimal || AffixUtils::hasCurrencySymbols(tprops.positivePrefixPattern, localStatus) || AffixUtils::hasCurrencySymbols(tprops.positiveSuffixPattern, localStatus) || AffixUtils::hasCurrencySymbols(tprops.negativePrefixPattern, localStatus) || @@ -1333,13 +1338,13 @@ UnicodeString& DecimalFormat::toPattern(UnicodeString& result) const { UnicodeString& DecimalFormat::toLocalizedPattern(UnicodeString& result) const { if (fields == nullptr) { - // We only get here if an OOM error happend during construction, copy construction, assignment, or modification. + // We only get here if an OOM error happened during construction, copy construction, assignment, or modification. result.setToBogus(); return result; } ErrorCode localStatus; result = toPattern(result); - result = PatternStringUtils::convertLocalized(result, *fields->symbols, true, localStatus); + result = PatternStringUtils::convertLocalized(result, *getDecimalFormatSymbols(), true, localStatus); return result; } @@ -1352,7 +1357,7 @@ void DecimalFormat::applyPattern(const UnicodeString& pattern, UErrorCode& statu // don't overwrite status if it's already a failure. if (U_FAILURE(status)) { return; } if (fields == nullptr) { - // We only get here if an OOM error happend during construction, copy construction, assignment, or modification. + // We only get here if an OOM error happened during construction, copy construction, assignment, or modification. status = U_MEMORY_ALLOCATION_ERROR; return; } @@ -1370,12 +1375,12 @@ void DecimalFormat::applyLocalizedPattern(const UnicodeString& localizedPattern, // don't overwrite status if it's already a failure. if (U_FAILURE(status)) { return; } if (fields == nullptr) { - // We only get here if an OOM error happend during construction, copy construction, assignment, or modification. + // We only get here if an OOM error happened during construction, copy construction, assignment, or modification. status = U_MEMORY_ALLOCATION_ERROR; return; } UnicodeString pattern = PatternStringUtils::convertLocalized( - localizedPattern, *fields->symbols, false, status); + localizedPattern, *getDecimalFormatSymbols(), false, status); applyPattern(pattern, status); } @@ -1509,7 +1514,7 @@ void DecimalFormat::setCurrency(const char16_t* theCurrency, UErrorCode& ec) { // don't overwrite ec if it's already a failure. if (U_FAILURE(ec)) { return; } if (fields == nullptr) { - // We only get here if an OOM error happend during construction, copy construction, assignment, or modification. + // We only get here if an OOM error happened during construction, copy construction, assignment, or modification. ec = U_MEMORY_ALLOCATION_ERROR; return; } @@ -1521,7 +1526,7 @@ void DecimalFormat::setCurrency(const char16_t* theCurrency, UErrorCode& ec) { NumberFormat::setCurrency(theCurrency, ec); // to set field for compatibility fields->properties.currency = currencyUnit; // In Java, the DecimalFormatSymbols is mutable. Why not in C++? - LocalPointer<DecimalFormatSymbols> newSymbols(new DecimalFormatSymbols(*fields->symbols), ec); + LocalPointer<DecimalFormatSymbols> newSymbols(new DecimalFormatSymbols(*getDecimalFormatSymbols()), ec); newSymbols->setCurrency(currencyUnit.getISOCurrency(), ec); fields->symbols.adoptInsteadAndCheckErrorCode(newSymbols.orphan(), ec); touch(ec); @@ -1536,7 +1541,7 @@ void DecimalFormat::setCurrencyUsage(UCurrencyUsage newUsage, UErrorCode* ec) { // don't overwrite ec if it's already a failure. if (U_FAILURE(*ec)) { return; } if (fields == nullptr) { - // We only get here if an OOM error happend during construction, copy construction, assignment, or modification. + // We only get here if an OOM error happened during construction, copy construction, assignment, or modification. *ec = U_MEMORY_ALLOCATION_ERROR; return; } @@ -1561,7 +1566,7 @@ DecimalFormat::formatToDecimalQuantity(double number, DecimalQuantity& output, U // don't overwrite status if it's already a failure. if (U_FAILURE(status)) { return; } if (fields == nullptr) { - // We only get here if an OOM error happend during construction, copy construction, assignment, or modification. + // We only get here if an OOM error happened during construction, copy construction, assignment, or modification. status = U_MEMORY_ALLOCATION_ERROR; return; } @@ -1573,7 +1578,7 @@ void DecimalFormat::formatToDecimalQuantity(const Formattable& number, DecimalQu // don't overwrite status if it's already a failure. if (U_FAILURE(status)) { return; } if (fields == nullptr) { - // We only get here if an OOM error happend during construction, copy construction, assignment, or modification. + // We only get here if an OOM error happened during construction, copy construction, assignment, or modification. status = U_MEMORY_ALLOCATION_ERROR; return; } @@ -1587,7 +1592,7 @@ const number::LocalizedNumberFormatter* DecimalFormat::toNumberFormatter(UErrorC // We sometimes need to return nullptr here (see ICU-20380) if (U_FAILURE(status)) { return nullptr; } if (fields == nullptr) { - // We only get here if an OOM error happend during construction, copy construction, assignment, or modification. + // We only get here if an OOM error happened during construction, copy construction, assignment, or modification. status = U_MEMORY_ALLOCATION_ERROR; return nullptr; } @@ -1600,7 +1605,7 @@ void DecimalFormat::touch(UErrorCode& status) { return; } if (fields == nullptr) { - // We only get here if an OOM error happend during construction, copy construction, assignment, or modification. + // We only get here if an OOM error happened during construction, copy construction, assignment, or modification. // For regular construction, the caller should have checked the status variable for errors. // For copy construction, there is unfortunately nothing to report the error, so we need to guard against // this possible bad state here and set the status to an error. @@ -1608,9 +1613,11 @@ void DecimalFormat::touch(UErrorCode& status) { return; } - // In C++, fields->symbols is the source of truth for the locale. - Locale locale = fields->symbols->getLocale(); - + // In C++, fields->symbols (or, if it's null, the DecimalFormatSymbols owned by the underlying LocalizedNumberFormatter) + // is the source of truth for the locale. + const DecimalFormatSymbols* symbols = getDecimalFormatSymbols(); + Locale locale = symbols->getLocale(); + // Note: The formatter is relatively cheap to create, and we need it to populate fields->exportedProperties, // so automatically recompute it here. The parser is a bit more expensive and is not needed until the // parse method is called, so defer that until needed. @@ -1618,10 +1625,14 @@ void DecimalFormat::touch(UErrorCode& status) { // Since memory has already been allocated for the formatter, we can move assign a stack-allocated object // and don't need to call new. (Which is slower and could possibly fail). + // [Note that "symbols" above might point to the DecimalFormatSymbols object owned by fields->formatter. + // That's okay, because NumberPropertyMapper::create() will clone it before fields->formatter's assignment + // operator deletes it. But it does mean that "symbols" can't be counted on to be good after this line.] fields->formatter = NumberPropertyMapper::create( - fields->properties, *fields->symbols, fields->warehouse, fields->exportedProperties, status + fields->properties, *symbols, fields->warehouse, fields->exportedProperties, status ).locale(locale); - + fields->symbols.adoptInstead(nullptr); // the fields->symbols property is only temporary, until we can copy it into a new LocalizedNumberFormatter + // Do this after fields->exportedProperties are set up setupFastFormat(); @@ -1668,7 +1679,7 @@ const numparse::impl::NumberParserImpl* DecimalFormat::getParser(UErrorCode& sta } // Try computing the parser on our own - auto* temp = NumberParserImpl::createParserFromProperties(fields->properties, *fields->symbols, false, status); + auto* temp = NumberParserImpl::createParserFromProperties(fields->properties, *getDecimalFormatSymbols(), false, status); if (U_FAILURE(status)) { return nullptr; } @@ -1701,7 +1712,7 @@ const numparse::impl::NumberParserImpl* DecimalFormat::getCurrencyParser(UErrorC } // Try computing the parser on our own - auto* temp = NumberParserImpl::createParserFromProperties(fields->properties, *fields->symbols, true, status); + auto* temp = NumberParserImpl::createParserFromProperties(fields->properties, *getDecimalFormatSymbols(), true, status); if (temp == nullptr) { status = U_MEMORY_ALLOCATION_ERROR; // although we may still dereference, call sites should be guarded @@ -1775,11 +1786,13 @@ void DecimalFormat::setupFastFormat() { return; } + const DecimalFormatSymbols* symbols = getDecimalFormatSymbols(); + // Grouping (secondary grouping is forbidden in equalsDefaultExceptFastFormat): bool groupingUsed = fields->properties.groupingUsed; int32_t groupingSize = fields->properties.groupingSize; bool unusualGroupingSize = groupingSize > 0 && groupingSize != 3; - const UnicodeString& groupingString = fields->symbols->getConstSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol); + const UnicodeString& groupingString = symbols->getConstSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol); if (groupingUsed && (unusualGroupingSize || groupingString.length() != 1)) { trace("no fast format: grouping\n"); fields->canUseFastFormat = false; @@ -1805,8 +1818,8 @@ void DecimalFormat::setupFastFormat() { } // Other symbols: - const UnicodeString& minusSignString = fields->symbols->getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol); - UChar32 codePointZero = fields->symbols->getCodePointZero(); + const UnicodeString& minusSignString = symbols->getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol); + UChar32 codePointZero = symbols->getCodePointZero(); if (minusSignString.length() != 1 || U16_LENGTH(codePointZero) != 1) { trace("no fast format: symbols\n"); fields->canUseFastFormat = false; diff --git a/contrib/libs/icu/i18n/double-conversion-bignum-dtoa.cpp b/contrib/libs/icu/i18n/double-conversion-bignum-dtoa.cpp index a95910df04..638e9cb046 100644 --- a/contrib/libs/icu/i18n/double-conversion-bignum-dtoa.cpp +++ b/contrib/libs/icu/i18n/double-conversion-bignum-dtoa.cpp @@ -290,7 +290,7 @@ static void GenerateShortestDigits(Bignum* numerator, Bignum* denominator, // Let v = numerator / denominator < 10. // Then we generate 'count' digits of d = x.xxxxx... (without the decimal point) -// from left to right. Once 'count' digits have been produced we decide wether +// from left to right. Once 'count' digits have been produced we decide whether // to round up or down. Remainders of exactly .5 round upwards. Numbers such // as 9.999999 propagate a carry all the way, and change the // exponent (decimal_point), when rounding upwards. @@ -384,7 +384,7 @@ static void BignumToFixed(int requested_digits, int* decimal_point, // Returns an estimation of k such that 10^(k-1) <= v < 10^k where // v = f * 2^exponent and 2^52 <= f < 2^53. // v is hence a normalized double with the given exponent. The output is an -// approximation for the exponent of the decimal approimation .digits * 10^k. +// approximation for the exponent of the decimal approximation .digits * 10^k. // // The result might undershoot by 1 in which case 10^k <= v < 10^k+1. // Note: this property holds for v's upper boundary m+ too. @@ -562,7 +562,7 @@ static void InitialScaledStartValuesNegativeExponentNegativePower( // // Let ep == estimated_power, then the returned values will satisfy: // v / 10^ep = numerator / denominator. -// v's boundarys m- and m+: +// v's boundaries m- and m+: // m- / 10^ep == v / 10^ep - delta_minus / denominator // m+ / 10^ep == v / 10^ep + delta_plus / denominator // Or in other words: diff --git a/contrib/libs/icu/i18n/double-conversion-double-to-string.cpp b/contrib/libs/icu/i18n/double-conversion-double-to-string.cpp index 44c176f4f9..90ba436060 100644 --- a/contrib/libs/icu/i18n/double-conversion-double-to-string.cpp +++ b/contrib/libs/icu/i18n/double-conversion-double-to-string.cpp @@ -107,19 +107,19 @@ void DoubleToStringConverter::CreateExponentialRepresentation( result_builder->AddCharacter('+'); } } - if (exponent == 0) { - result_builder->AddCharacter('0'); - return; - } DOUBLE_CONVERSION_ASSERT(exponent < 1e4); // Changing this constant requires updating the comment of DoubleToStringConverter constructor const int kMaxExponentLength = 5; char buffer[kMaxExponentLength + 1]; buffer[kMaxExponentLength] = '\0'; int first_char_pos = kMaxExponentLength; - while (exponent > 0) { - buffer[--first_char_pos] = '0' + (exponent % 10); - exponent /= 10; + if (exponent == 0) { + buffer[--first_char_pos] = '0'; + } else { + while (exponent > 0) { + buffer[--first_char_pos] = '0' + (exponent % 10); + exponent /= 10; + } } // Add prefix '0' to make exponent width >= min(min_exponent_with_, kMaxExponentLength) // For example: convert 1e+9 -> 1e+09, if min_exponent_with_ is set to 2 @@ -342,9 +342,21 @@ bool DoubleToStringConverter::ToPrecision(double value, int exponent = decimal_point - 1; int extra_zero = ((flags_ & EMIT_TRAILING_ZERO_AFTER_POINT) != 0) ? 1 : 0; - if ((-decimal_point + 1 > max_leading_padding_zeroes_in_precision_mode_) || + bool as_exponential = + (-decimal_point + 1 > max_leading_padding_zeroes_in_precision_mode_) || (decimal_point - precision + extra_zero > - max_trailing_padding_zeroes_in_precision_mode_)) { + max_trailing_padding_zeroes_in_precision_mode_); + if ((flags_ & NO_TRAILING_ZERO) != 0) { + // Truncate trailing zeros that occur after the decimal point (if exponential, + // that is everything after the first digit). + int stop = as_exponential ? 1 : std::max(1, decimal_point); + while (decimal_rep_length > stop && decimal_rep[decimal_rep_length - 1] == '0') { + --decimal_rep_length; + } + // Clamp precision to avoid the code below re-adding the zeros. + precision = std::min(precision, decimal_rep_length); + } + if (as_exponential) { // Fill buffer to contain 'precision' digits. // Usually the buffer is already at the correct length, but 'DoubleToAscii' // is allowed to return less characters. diff --git a/contrib/libs/icu/i18n/double-conversion-double-to-string.h b/contrib/libs/icu/i18n/double-conversion-double-to-string.h index 27bd867848..1fae2e8771 100644 --- a/contrib/libs/icu/i18n/double-conversion-double-to-string.h +++ b/contrib/libs/icu/i18n/double-conversion-double-to-string.h @@ -48,12 +48,11 @@ namespace double_conversion { class DoubleToStringConverter { public: -#if 0 // not needed for ICU // When calling ToFixed with a double > 10^kMaxFixedDigitsBeforePoint // or a requested_digits parameter > kMaxFixedDigitsAfterPoint then the // function returns false. static const int kMaxFixedDigitsBeforePoint = 60; - static const int kMaxFixedDigitsAfterPoint = 60; + static const int kMaxFixedDigitsAfterPoint = 100; // When calling ToExponential with a requested_digits // parameter > kMaxExponentialDigits then the function returns false. @@ -65,12 +64,36 @@ class DoubleToStringConverter { static const int kMinPrecisionDigits = 1; static const int kMaxPrecisionDigits = 120; + // The maximal number of digits that are needed to emit a double in base 10. + // A higher precision can be achieved by using more digits, but the shortest + // accurate representation of any double will never use more digits than + // kBase10MaximalLength. + // Note that DoubleToAscii null-terminates its input. So the given buffer + // should be at least kBase10MaximalLength + 1 characters long. + static const int kBase10MaximalLength = 17; + + // The maximal number of digits that are needed to emit a single in base 10. + // A higher precision can be achieved by using more digits, but the shortest + // accurate representation of any single will never use more digits than + // kBase10MaximalLengthSingle. + static const int kBase10MaximalLengthSingle = 9; + + // The length of the longest string that 'ToShortest' can produce when the + // converter is instantiated with EcmaScript defaults (see + // 'EcmaScriptConverter') + // This value does not include the trailing '\0' character. + // This amount of characters is needed for negative values that hit the + // 'decimal_in_shortest_low' limit. For example: "-0.0000033333333333333333" + static const int kMaxCharsEcmaScriptShortest = 25; + +#if 0 // not needed for ICU enum Flags { NO_FLAGS = 0, EMIT_POSITIVE_EXPONENT_SIGN = 1, EMIT_TRAILING_DECIMAL_POINT = 2, EMIT_TRAILING_ZERO_AFTER_POINT = 4, - UNIQUE_ZERO = 8 + UNIQUE_ZERO = 8, + NO_TRAILING_ZERO = 16 }; // Flags should be a bit-or combination of the possible Flags-enum. @@ -82,9 +105,13 @@ class DoubleToStringConverter { // Example: 2345.0 is converted to "2345.". // - EMIT_TRAILING_ZERO_AFTER_POINT: in addition to a trailing decimal point // emits a trailing '0'-character. This flag requires the - // EXMIT_TRAILING_DECIMAL_POINT flag. + // EMIT_TRAILING_DECIMAL_POINT flag. // Example: 2345.0 is converted to "2345.0". // - UNIQUE_ZERO: "-0.0" is converted to "0.0". + // - NO_TRAILING_ZERO: Trailing zeros are removed from the fractional portion + // of the result in precision mode. Matches printf's %g. + // When EMIT_TRAILING_ZERO_AFTER_POINT is also given, one trailing zero is + // preserved. // // Infinity symbol and nan_symbol provide the string representation for these // special values. If the string is NULL and the special value is encountered @@ -111,7 +138,7 @@ class DoubleToStringConverter { // Example with max_leading_padding_zeroes_in_precision_mode = 6. // ToPrecision(0.0000012345, 2) -> "0.0000012" // ToPrecision(0.00000012345, 2) -> "1.2e-7" - // Similarily the converter may add up to + // Similarly the converter may add up to // max_trailing_padding_zeroes_in_precision_mode in precision mode to avoid // returning an exponential representation. A zero added by the // EMIT_TRAILING_ZERO_AFTER_POINT flag is counted for this limit. @@ -152,6 +179,14 @@ class DoubleToStringConverter { } // Returns a converter following the EcmaScript specification. + // + // Flags: UNIQUE_ZERO and EMIT_POSITIVE_EXPONENT_SIGN. + // Special values: "Infinity" and "NaN". + // Lower case 'e' for exponential values. + // decimal_in_shortest_low: -6 + // decimal_in_shortest_high: 21 + // max_leading_padding_zeroes_in_precision_mode: 6 + // max_trailing_padding_zeroes_in_precision_mode: 0 static const DoubleToStringConverter& EcmaScriptConverter(); // Computes the shortest string of digits that correctly represent the input @@ -161,7 +196,7 @@ class DoubleToStringConverter { // Example with decimal_in_shortest_low = -6, // decimal_in_shortest_high = 21, // EMIT_POSITIVE_EXPONENT_SIGN activated, and - // EMIT_TRAILING_DECIMAL_POINT deactived: + // EMIT_TRAILING_DECIMAL_POINT deactivated: // ToShortest(0.000001) -> "0.000001" // ToShortest(0.0000001) -> "1e-7" // ToShortest(111111111111111111111.0) -> "111111111111111110000" @@ -177,6 +212,21 @@ class DoubleToStringConverter { // Returns true if the conversion succeeds. The conversion always succeeds // except when the input value is special and no infinity_symbol or // nan_symbol has been given to the constructor. + // + // The length of the longest result is the maximum of the length of the + // following string representations (each with possible examples): + // - NaN and negative infinity: "NaN", "-Infinity", "-inf". + // - -10^(decimal_in_shortest_high - 1): + // "-100000000000000000000", "-1000000000000000.0" + // - the longest string in range [0; -10^decimal_in_shortest_low]. Generally, + // this string is 3 + kBase10MaximalLength - decimal_in_shortest_low. + // (Sign, '0', decimal point, padding zeroes for decimal_in_shortest_low, + // and the significant digits). + // "-0.0000033333333333333333", "-0.0012345678901234567" + // - the longest exponential representation. (A negative number with + // kBase10MaximalLength significant digits). + // "-1.7976931348623157e+308", "-1.7976931348623157E308" + // In addition, the buffer must be able to hold the trailing '\0' character. bool ToShortest(double value, StringBuilder* result_builder) const { return ToShortestIeeeNumber(value, result_builder, SHORTEST); } @@ -217,9 +267,11 @@ class DoubleToStringConverter { // been provided to the constructor, // - 'value' > 10^kMaxFixedDigitsBeforePoint, or // - 'requested_digits' > kMaxFixedDigitsAfterPoint. - // The last two conditions imply that the result will never contain more than - // 1 + kMaxFixedDigitsBeforePoint + 1 + kMaxFixedDigitsAfterPoint characters + // The last two conditions imply that the result for non-special values never + // contains more than + // 1 + kMaxFixedDigitsBeforePoint + 1 + kMaxFixedDigitsAfterPoint characters // (one additional character for the sign, and one for the decimal point). + // In addition, the buffer must be able to hold the trailing '\0' character. bool ToFixed(double value, int requested_digits, StringBuilder* result_builder) const; @@ -248,14 +300,17 @@ class DoubleToStringConverter { // - the input value is special and no infinity_symbol or nan_symbol has // been provided to the constructor, // - 'requested_digits' > kMaxExponentialDigits. - // The last condition implies that the result will never contain more than + // + // The last condition implies that the result never contains more than // kMaxExponentialDigits + 8 characters (the sign, the digit before the // decimal point, the decimal point, the exponent character, the // exponent's sign, and at most 3 exponent digits). + // In addition, the buffer must be able to hold the trailing '\0' character. bool ToExponential(double value, int requested_digits, StringBuilder* result_builder) const; + // Computes 'precision' leading digits of the given 'value' and returns them // either in exponential or decimal format, depending on // max_{leading|trailing}_padding_zeroes_in_precision_mode (given to the @@ -265,7 +320,7 @@ class DoubleToStringConverter { // Example with max_leading_padding_zeroes_in_precision_mode = 6. // ToPrecision(0.0000012345, 2) -> "0.0000012" // ToPrecision(0.00000012345, 2) -> "1.2e-7" - // Similarily the converter may add up to + // Similarly the converter may add up to // max_trailing_padding_zeroes_in_precision_mode in precision mode to avoid // returning an exponential representation. A zero added by the // EMIT_TRAILING_ZERO_AFTER_POINT flag is counted for this limit. @@ -287,9 +342,11 @@ class DoubleToStringConverter { // been provided to the constructor, // - precision < kMinPericisionDigits // - precision > kMaxPrecisionDigits - // The last condition implies that the result will never contain more than + // + // The last condition implies that the result never contains more than // kMaxPrecisionDigits + 7 characters (the sign, the decimal point, the // exponent character, the exponent's sign, and at most 3 exponent digits). + // In addition, the buffer must be able to hold the trailing '\0' character. bool ToPrecision(double value, int precision, StringBuilder* result_builder) const; @@ -310,14 +367,6 @@ class DoubleToStringConverter { PRECISION }; - // The maximal number of digits that are needed to emit a double in base 10. - // A higher precision can be achieved by using more digits, but the shortest - // accurate representation of any double will never use more digits than - // kBase10MaximalLength. - // Note that DoubleToAscii null-terminates its input. So the given buffer - // should be at least kBase10MaximalLength + 1 characters long. - static const int kBase10MaximalLength = 17; - // Converts the given double 'v' to digit characters. 'v' must not be NaN, // +Infinity, or -Infinity. In SHORTEST_SINGLE-mode this restriction also // applies to 'v' after it has been casted to a single-precision float. That diff --git a/contrib/libs/icu/i18n/double-conversion-fast-dtoa.cpp b/contrib/libs/icu/i18n/double-conversion-fast-dtoa.cpp index 87a3d536bf..06e4cf1255 100644 --- a/contrib/libs/icu/i18n/double-conversion-fast-dtoa.cpp +++ b/contrib/libs/icu/i18n/double-conversion-fast-dtoa.cpp @@ -579,7 +579,7 @@ static bool Grisu3(double v, // the difference between w and boundary_minus/plus (a power of 2) and to // compute scaled_boundary_minus/plus by subtracting/adding from // scaled_w. However the code becomes much less readable and the speed - // enhancements are not terriffic. + // enhancements are not terrific. DiyFp scaled_boundary_minus = DiyFp::Times(boundary_minus, ten_mk); DiyFp scaled_boundary_plus = DiyFp::Times(boundary_plus, ten_mk); @@ -587,7 +587,7 @@ static bool Grisu3(double v, // v == (double) (scaled_w * 10^-mk). // Set decimal_exponent == -mk and pass it to DigitGen. If scaled_w is not an // integer than it will be updated. For instance if scaled_w == 1.23 then - // the buffer will be filled with "123" und the decimal_exponent will be + // the buffer will be filled with "123" and the decimal_exponent will be // decreased by 2. int kappa; bool result = DigitGen(scaled_boundary_minus, scaled_w, scaled_boundary_plus, diff --git a/contrib/libs/icu/i18n/double-conversion-string-to-double.cpp b/contrib/libs/icu/i18n/double-conversion-string-to-double.cpp index 548cad1f30..c72bb26277 100644 --- a/contrib/libs/icu/i18n/double-conversion-string-to-double.cpp +++ b/contrib/libs/icu/i18n/double-conversion-string-to-double.cpp @@ -51,6 +51,18 @@ // ICU PATCH: Wrap in ICU namespace U_NAMESPACE_BEGIN +#ifdef _MSC_VER +# if _MSC_VER >= 1900 +// Fix MSVC >= 2015 (_MSC_VER == 1900) warning +// C4244: 'argument': conversion from 'const uc16' to 'char', possible loss of data +// against Advance and friends, when instantiated with **it as char, not uc16. + __pragma(warning(disable: 4244)) +# endif +# if _MSC_VER <= 1700 // VS2012, see IsDecimalDigitForRadix warning fix, below +# define VS2012_RADIXWARN +# endif +#endif + namespace double_conversion { namespace { @@ -170,9 +182,9 @@ static double SignedZero(bool sign) { // // The function is small and could be inlined, but VS2012 emitted a warning // because it constant-propagated the radix and concluded that the last -// condition was always true. By moving it into a separate function the -// compiler wouldn't warn anymore. -#ifdef _MSC_VER +// condition was always true. Moving it into a separate function and +// suppressing optimisation keeps the compiler from warning. +#ifdef VS2012_RADIXWARN #pragma optimize("",off) static bool IsDecimalDigitForRadix(int c, int radix) { return '0' <= c && c <= '9' && (c - '0') < radix; @@ -738,11 +750,17 @@ double StringToDoubleConverter::StringToIeee( DOUBLE_CONVERSION_ASSERT(buffer_pos < kBufferSize); buffer[buffer_pos] = '\0'; + // Code above ensures there are no leading zeros and the buffer has fewer than + // kMaxSignificantDecimalDigits characters. Trim trailing zeros. + Vector<const char> chars(buffer, buffer_pos); + chars = TrimTrailingZeros(chars); + exponent += buffer_pos - chars.length(); + double converted; if (read_as_double) { - converted = Strtod(Vector<const char>(buffer, buffer_pos), exponent); + converted = StrtodTrimmed(chars, exponent); } else { - converted = Strtof(Vector<const char>(buffer, buffer_pos), exponent); + converted = StrtofTrimmed(chars, exponent); } *processed_characters_count = static_cast<int>(current - input); return sign? -converted: converted; @@ -782,6 +800,42 @@ float StringToDoubleConverter::StringToFloat( processed_characters_count)); } + +template<> +double StringToDoubleConverter::StringTo<double>( + const char* buffer, + int length, + int* processed_characters_count) const { + return StringToDouble(buffer, length, processed_characters_count); +} + + +template<> +float StringToDoubleConverter::StringTo<float>( + const char* buffer, + int length, + int* processed_characters_count) const { + return StringToFloat(buffer, length, processed_characters_count); +} + + +template<> +double StringToDoubleConverter::StringTo<double>( + const uc16* buffer, + int length, + int* processed_characters_count) const { + return StringToDouble(buffer, length, processed_characters_count); +} + + +template<> +float StringToDoubleConverter::StringTo<float>( + const uc16* buffer, + int length, + int* processed_characters_count) const { + return StringToFloat(buffer, length, processed_characters_count); +} + } // namespace double_conversion // ICU PATCH: Close ICU namespace diff --git a/contrib/libs/icu/i18n/double-conversion-string-to-double.h b/contrib/libs/icu/i18n/double-conversion-string-to-double.h index 2eb0c1f897..9f6f530711 100644 --- a/contrib/libs/icu/i18n/double-conversion-string-to-double.h +++ b/contrib/libs/icu/i18n/double-conversion-string-to-double.h @@ -100,7 +100,7 @@ class StringToDoubleConverter { // This *must* start with "0x" and separate the exponent with "p". // Examples: 0x1.2p3 == 9.0 // 0x10.1p0 == 16.0625 - // ALLOW_HEX and ALLOW_HEX_FLOATS are indendent. + // ALLOW_HEX and ALLOW_HEX_FLOATS are indented. // // empty_string_value is returned when an empty string is given as input. // If ALLOW_LEADING_SPACES or ALLOW_TRAILING_SPACES are set, then a string @@ -218,6 +218,18 @@ class StringToDoubleConverter { int length, int* processed_characters_count) const; + // Same as StringToDouble for T = double, and StringToFloat for T = float. + template <typename T> + T StringTo(const char* buffer, + int length, + int* processed_characters_count) const; + + // Same as StringTo above but for 16 bit characters. + template <typename T> + T StringTo(const uc16* buffer, + int length, + int* processed_characters_count) const; + private: const int flags_; const double empty_string_value_; diff --git a/contrib/libs/icu/i18n/double-conversion-strtod.cpp b/contrib/libs/icu/i18n/double-conversion-strtod.cpp index ee6377782b..eea8203281 100644 --- a/contrib/libs/icu/i18n/double-conversion-strtod.cpp +++ b/contrib/libs/icu/i18n/double-conversion-strtod.cpp @@ -115,17 +115,6 @@ static Vector<const char> TrimLeadingZeros(Vector<const char> buffer) { return Vector<const char>(buffer.start(), 0); } - -static Vector<const char> TrimTrailingZeros(Vector<const char> buffer) { - for (int i = buffer.length() - 1; i >= 0; --i) { - if (buffer[i] != '0') { - return buffer.SubVector(0, i + 1); - } - } - return Vector<const char>(buffer.start(), 0); -} - - static void CutToMaxSignificantDigits(Vector<const char> buffer, int exponent, char* significant_buffer, @@ -216,12 +205,14 @@ static bool DoubleStrtod(Vector<const char> trimmed, int exponent, double* result) { #if !defined(DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS) + // Avoid "unused parameter" warnings + (void) trimmed; + (void) exponent; + (void) result; // On x86 the floating-point stack can be 64 or 80 bits wide. If it is // 80 bits wide (as is the case on Linux) then double-rounding occurs and the // result is not accurate. // We know that Windows32 uses 64 bits and is therefore accurate. - // Note that the ARM simulator is compiled for 32bits. It therefore exhibits - // the same problem. return false; #else if (trimmed.length() <= kMaxExactDoubleIntegerDecimalDigits) { @@ -473,6 +464,11 @@ static bool IsNonZeroDigit(const char d) { return ('1' <= d) && (d <= '9'); } +#ifdef __has_cpp_attribute +#if __has_cpp_attribute(maybe_unused) +[[maybe_unused]] +#endif +#endif static bool AssertTrimmedDigits(const Vector<const char>& buffer) { for(int i = 0; i < buffer.length(); ++i) { if(!IsDigit(buffer[i])) { @@ -545,6 +541,12 @@ float Strtof(Vector<const char> buffer, int exponent) { TrimAndCut(buffer, exponent, copy_buffer, kMaxSignificantDecimalDigits, &trimmed, &updated_exponent); exponent = updated_exponent; + return StrtofTrimmed(trimmed, exponent); +} + +float StrtofTrimmed(Vector<const char> trimmed, int exponent) { + DOUBLE_CONVERSION_ASSERT(trimmed.length() <= kMaxSignificantDecimalDigits); + DOUBLE_CONVERSION_ASSERT(AssertTrimmedDigits(trimmed)); double double_guess; bool is_correct = ComputeGuess(trimmed, exponent, &double_guess); @@ -564,7 +566,7 @@ float Strtof(Vector<const char> buffer, int exponent) { // low-precision (3 digits): // when read from input: 123 // when rounded from high precision: 124. - // To do this we simply look at the neigbors of the correct result and see + // To do this we simply look at the neighbors of the correct result and see // if they would round to the same float. If the guess is not correct we have // to look at four values (since two different doubles could be the correct // double). diff --git a/contrib/libs/icu/i18n/double-conversion-strtod.h b/contrib/libs/icu/i18n/double-conversion-strtod.h index 50ef746401..abfe00a333 100644 --- a/contrib/libs/icu/i18n/double-conversion-strtod.h +++ b/contrib/libs/icu/i18n/double-conversion-strtod.h @@ -54,11 +54,25 @@ double Strtod(Vector<const char> buffer, int exponent); // contain a dot or a sign. It must not start with '0', and must not be empty. float Strtof(Vector<const char> buffer, int exponent); -// For special use cases, the heart of the Strtod() function is also available -// separately, it assumes that 'trimmed' is as produced by TrimAndCut(), i.e. -// no leading or trailing zeros, also no lone zero, and not 'too many' digits. +// Same as Strtod, but assumes that 'trimmed' is already trimmed, as if run +// through TrimAndCut. That is, 'trimmed' must have no leading or trailing +// zeros, must not be a lone zero, and must not have 'too many' digits. double StrtodTrimmed(Vector<const char> trimmed, int exponent); +// Same as Strtof, but assumes that 'trimmed' is already trimmed, as if run +// through TrimAndCut. That is, 'trimmed' must have no leading or trailing +// zeros, must not be a lone zero, and must not have 'too many' digits. +float StrtofTrimmed(Vector<const char> trimmed, int exponent); + +inline Vector<const char> TrimTrailingZeros(Vector<const char> buffer) { + for (int i = buffer.length() - 1; i >= 0; --i) { + if (buffer[i] != '0') { + return buffer.SubVector(0, i + 1); + } + } + return Vector<const char>(buffer.start(), 0); +} + } // namespace double_conversion // ICU PATCH: Close ICU namespace diff --git a/contrib/libs/icu/i18n/double-conversion-utils.h b/contrib/libs/icu/i18n/double-conversion-utils.h index 8c6a0e16e0..7f23e0a825 100644 --- a/contrib/libs/icu/i18n/double-conversion-utils.h +++ b/contrib/libs/icu/i18n/double-conversion-utils.h @@ -44,7 +44,7 @@ #include "uassert.h" #ifndef DOUBLE_CONVERSION_ASSERT #define DOUBLE_CONVERSION_ASSERT(condition) \ - U_ASSERT(condition); + U_ASSERT(condition) #endif #ifndef DOUBLE_CONVERSION_UNIMPLEMENTED #define DOUBLE_CONVERSION_UNIMPLEMENTED() (abort()) @@ -118,7 +118,8 @@ int main(int argc, char** argv) { defined(__ARMEL__) || defined(__avr32__) || defined(_M_ARM) || defined(_M_ARM64) || \ defined(__hppa__) || defined(__ia64__) || \ defined(__mips__) || \ - defined(__nios2__) || \ + defined(__loongarch__) || \ + defined(__nios2__) || defined(__ghs) || \ defined(__powerpc__) || defined(__ppc__) || defined(__ppc64__) || \ defined(_POWER) || defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \ defined(__sparc__) || defined(__sparc) || defined(__s390__) || \ diff --git a/contrib/libs/icu/i18n/dtfmtsym.cpp b/contrib/libs/icu/i18n/dtfmtsym.cpp index 690f6a4cae..ab5f2b612c 100644 --- a/contrib/libs/icu/i18n/dtfmtsym.cpp +++ b/contrib/libs/icu/i18n/dtfmtsym.cpp @@ -392,8 +392,10 @@ DateFormatSymbols::copyData(const DateFormatSymbols& other) { fTimeSeparator.fastCopyFrom(other.fTimeSeparator); // fastCopyFrom() - see assignArray comments assignArray(fQuarters, fQuartersCount, other.fQuarters, other.fQuartersCount); assignArray(fShortQuarters, fShortQuartersCount, other.fShortQuarters, other.fShortQuartersCount); + assignArray(fNarrowQuarters, fNarrowQuartersCount, other.fNarrowQuarters, other.fNarrowQuartersCount); assignArray(fStandaloneQuarters, fStandaloneQuartersCount, other.fStandaloneQuarters, other.fStandaloneQuartersCount); assignArray(fStandaloneShortQuarters, fStandaloneShortQuartersCount, other.fStandaloneShortQuarters, other.fStandaloneShortQuartersCount); + assignArray(fStandaloneNarrowQuarters, fStandaloneNarrowQuartersCount, other.fStandaloneNarrowQuarters, other.fStandaloneNarrowQuartersCount); assignArray(fWideDayPeriods, fWideDayPeriodsCount, other.fWideDayPeriods, other.fWideDayPeriodsCount); assignArray(fNarrowDayPeriods, fNarrowDayPeriodsCount, @@ -450,6 +452,7 @@ DateFormatSymbols::copyData(const DateFormatSymbols& other) { */ DateFormatSymbols& DateFormatSymbols::operator=(const DateFormatSymbols& other) { + if (this == &other) { return *this; } // self-assignment: no-op dispose(); copyData(other); @@ -484,8 +487,10 @@ void DateFormatSymbols::dispose() delete[] fNarrowAmPms; delete[] fQuarters; delete[] fShortQuarters; + delete[] fNarrowQuarters; delete[] fStandaloneQuarters; delete[] fStandaloneShortQuarters; + delete[] fStandaloneNarrowQuarters; delete[] fLeapMonthPatterns; delete[] fShortYearNames; delete[] fShortZodiacNames; @@ -534,12 +539,12 @@ DateFormatSymbols::arrayCompare(const UnicodeString* array1, return TRUE; } -UBool +bool DateFormatSymbols::operator==(const DateFormatSymbols& other) const { // First do cheap comparisons if (this == &other) { - return TRUE; + return true; } if (fErasCount == other.fErasCount && fEraNamesCount == other.fEraNamesCount && @@ -562,8 +567,10 @@ DateFormatSymbols::operator==(const DateFormatSymbols& other) const fNarrowAmPmsCount == other.fNarrowAmPmsCount && fQuartersCount == other.fQuartersCount && fShortQuartersCount == other.fShortQuartersCount && + fNarrowQuartersCount == other.fNarrowQuartersCount && fStandaloneQuartersCount == other.fStandaloneQuartersCount && fStandaloneShortQuartersCount == other.fStandaloneShortQuartersCount && + fStandaloneNarrowQuartersCount == other.fStandaloneNarrowQuartersCount && fLeapMonthPatternsCount == other.fLeapMonthPatternsCount && fShortYearNamesCount == other.fShortYearNamesCount && fShortZodiacNamesCount == other.fShortZodiacNamesCount && @@ -598,8 +605,10 @@ DateFormatSymbols::operator==(const DateFormatSymbols& other) const fTimeSeparator == other.fTimeSeparator && arrayCompare(fQuarters, other.fQuarters, fQuartersCount) && arrayCompare(fShortQuarters, other.fShortQuarters, fShortQuartersCount) && + arrayCompare(fNarrowQuarters, other.fNarrowQuarters, fNarrowQuartersCount) && arrayCompare(fStandaloneQuarters, other.fStandaloneQuarters, fStandaloneQuartersCount) && arrayCompare(fStandaloneShortQuarters, other.fStandaloneShortQuarters, fStandaloneShortQuartersCount) && + arrayCompare(fStandaloneNarrowQuarters, other.fStandaloneNarrowQuarters, fStandaloneNarrowQuartersCount) && arrayCompare(fLeapMonthPatterns, other.fLeapMonthPatterns, fLeapMonthPatternsCount) && arrayCompare(fShortYearNames, other.fShortYearNames, fShortYearNamesCount) && arrayCompare(fShortZodiacNames, other.fShortZodiacNames, fShortZodiacNamesCount) && @@ -616,22 +625,22 @@ DateFormatSymbols::operator==(const DateFormatSymbols& other) const // Compare the contents of fZoneStrings if (fZoneStrings == NULL && other.fZoneStrings == NULL) { if (fZSFLocale == other.fZSFLocale) { - return TRUE; + return true; } } else if (fZoneStrings != NULL && other.fZoneStrings != NULL) { if (fZoneStringsRowCount == other.fZoneStringsRowCount && fZoneStringsColCount == other.fZoneStringsColCount) { - UBool cmpres = TRUE; + bool cmpres = true; for (int32_t i = 0; (i < fZoneStringsRowCount) && cmpres; i++) { cmpres = arrayCompare(fZoneStrings[i], other.fZoneStrings[i], fZoneStringsColCount); } return cmpres; } } - return FALSE; + return false; } } - return FALSE; + return false; } //------------------------------------------------------ @@ -808,8 +817,8 @@ DateFormatSymbols::getQuarters(int32_t &count, DtContextType context, DtWidthTyp returnValue = fShortQuarters; break; case NARROW : - count = 0; - returnValue = NULL; + count = fNarrowQuartersCount; + returnValue = fNarrowQuarters; break; case DT_WIDTH_COUNT : break; @@ -827,8 +836,8 @@ DateFormatSymbols::getQuarters(int32_t &count, DtContextType context, DtWidthTyp returnValue = fStandaloneShortQuarters; break; case NARROW : - count = 0; - returnValue = NULL; + count = fStandaloneNarrowQuartersCount; + returnValue = fStandaloneNarrowQuarters; break; case DT_WIDTH_COUNT : break; @@ -1177,13 +1186,11 @@ DateFormatSymbols::setQuarters(const UnicodeString* quartersArray, int32_t count fShortQuartersCount = count; break; case NARROW : - /* if (fNarrowQuarters) delete[] fNarrowQuarters; fNarrowQuarters = newUnicodeStringArray(count); uprv_arrayCopy( quartersArray,fNarrowQuarters,count); fNarrowQuartersCount = count; - */ break; default : break; @@ -1206,13 +1213,11 @@ DateFormatSymbols::setQuarters(const UnicodeString* quartersArray, int32_t count fStandaloneShortQuartersCount = count; break; case NARROW : - /* if (fStandaloneNarrowQuarters) delete[] fStandaloneNarrowQuarters; fStandaloneNarrowQuarters = newUnicodeStringArray(count); uprv_arrayCopy( quartersArray,fStandaloneNarrowQuarters,count); fStandaloneNarrowQuartersCount = count; - */ break; default : break; @@ -1540,7 +1545,7 @@ struct CalendarDataSink : public ResourceSink { aliasPathPairs.removeAllElements(); } - virtual void put(const char *key, ResourceValue &value, UBool, UErrorCode &errorCode) { + virtual void put(const char *key, ResourceValue &value, UBool, UErrorCode &errorCode) override { if (U_FAILURE(errorCode)) { return; } U_ASSERT(!currentCalendarType.isEmpty()); @@ -1570,7 +1575,7 @@ struct CalendarDataSink : public ResourceSink { if (U_FAILURE(errorCode)) { return; } } LocalPointer<UnicodeString> aliasRelativePathCopy(new UnicodeString(aliasRelativePath), errorCode); - resourcesToVisitNext->addElement(aliasRelativePathCopy.getAlias(), errorCode); + resourcesToVisitNext->addElementX(aliasRelativePathCopy.getAlias(), errorCode); if (U_FAILURE(errorCode)) { return; } // Only release ownership after resourcesToVisitNext takes it (no error happened): aliasRelativePathCopy.orphan(); @@ -1580,12 +1585,12 @@ struct CalendarDataSink : public ResourceSink { // Register same-calendar alias if (arrays.get(aliasRelativePath) == NULL && maps.get(aliasRelativePath) == NULL) { LocalPointer<UnicodeString> aliasRelativePathCopy(new UnicodeString(aliasRelativePath), errorCode); - aliasPathPairs.addElement(aliasRelativePathCopy.getAlias(), errorCode); + aliasPathPairs.addElementX(aliasRelativePathCopy.getAlias(), errorCode); if (U_FAILURE(errorCode)) { return; } // Only release ownership after aliasPathPairs takes it (no error happened): aliasRelativePathCopy.orphan(); LocalPointer<UnicodeString> keyUStringCopy(new UnicodeString(keyUString), errorCode); - aliasPathPairs.addElement(keyUStringCopy.getAlias(), errorCode); + aliasPathPairs.addElementX(keyUStringCopy.getAlias(), errorCode); if (U_FAILURE(errorCode)) { return; } // Only release ownership after aliasPathPairs takes it (no error happened): keyUStringCopy.orphan(); @@ -1756,12 +1761,12 @@ struct CalendarDataSink : public ResourceSink { if (aliasType == SAME_CALENDAR) { // Store the alias path and the current path on aliasPathPairs LocalPointer<UnicodeString> aliasRelativePathCopy(new UnicodeString(aliasRelativePath), errorCode); - aliasPathPairs.addElement(aliasRelativePathCopy.getAlias(), errorCode); + aliasPathPairs.addElementX(aliasRelativePathCopy.getAlias(), errorCode); if (U_FAILURE(errorCode)) { return; } // Only release ownership after aliasPathPairs takes it (no error happened): aliasRelativePathCopy.orphan(); LocalPointer<UnicodeString> pathCopy(new UnicodeString(path), errorCode); - aliasPathPairs.addElement(pathCopy.getAlias(), errorCode); + aliasPathPairs.addElementX(pathCopy.getAlias(), errorCode); if (U_FAILURE(errorCode)) { return; } // Only release ownership after aliasPathPairs takes it (no error happened): pathCopy.orphan(); @@ -2066,10 +2071,14 @@ DateFormatSymbols::initializeData(const Locale& locale, const char *type, UError fQuartersCount = 0; fShortQuarters = NULL; fShortQuartersCount = 0; + fNarrowQuarters = NULL; + fNarrowQuartersCount = 0; fStandaloneQuarters = NULL; fStandaloneQuartersCount = 0; fStandaloneShortQuarters = NULL; fStandaloneShortQuartersCount = 0; + fStandaloneNarrowQuarters = NULL; + fStandaloneNarrowQuartersCount = 0; fLeapMonthPatterns = NULL; fLeapMonthPatternsCount = 0; fShortYearNames = NULL; @@ -2102,7 +2111,7 @@ DateFormatSymbols::initializeData(const Locale& locale, const char *type, UError if (U_FAILURE(status)) return; - // Create a CalendarDataSink to process this data and the resouce bundles + // Create a CalendarDataSink to process this data and the resource bundles CalendarDataSink calendarSink(status); UResourceBundle *rb = ures_open(NULL, locale.getBaseName(), &status); UResourceBundle *cb = ures_getByKey(rb, gCalendarTag, NULL, &status); @@ -2330,7 +2339,7 @@ DateFormatSymbols::initializeData(const Locale& locale, const char *type, UError // If format/narrow not available, use standalone/narrow assignArray(fNarrowMonths, fNarrowMonthsCount, fStandaloneNarrowMonths, fStandaloneNarrowMonthsCount); } else if (narrowMonthsEC != U_MISSING_RESOURCE_ERROR && standaloneNarrowMonthsEC == U_MISSING_RESOURCE_ERROR) { - // If standalone/narrow not availabe, use format/narrow + // If standalone/narrow not available, use format/narrow assignArray(fStandaloneNarrowMonths, fStandaloneNarrowMonthsCount, fNarrowMonths, fNarrowMonthsCount); } else if (narrowMonthsEC == U_MISSING_RESOURCE_ERROR && standaloneNarrowMonthsEC == U_MISSING_RESOURCE_ERROR) { // If neither is available, use format/abbreviated @@ -2373,6 +2382,16 @@ DateFormatSymbols::initializeData(const Locale& locale, const char *type, UError assignArray(fStandaloneShortQuarters, fStandaloneShortQuartersCount, fShortQuarters, fShortQuartersCount); } + // unlike the fields above, narrow format quarters fall back on narrow standalone quarters + initField(&fStandaloneNarrowQuarters, fStandaloneNarrowQuartersCount, calendarSink, + buildResourcePath(path, gQuartersTag, gNamesStandaloneTag, gNamesNarrowTag, status), status); + initField(&fNarrowQuarters, fNarrowQuartersCount, calendarSink, + buildResourcePath(path, gQuartersTag, gNamesFormatTag, gNamesNarrowTag, status), status); + if(status == U_MISSING_RESOURCE_ERROR) { + status = U_ZERO_ERROR; + assignArray(fNarrowQuarters, fNarrowQuartersCount, fStandaloneNarrowQuarters, fStandaloneNarrowQuartersCount); + } + // ICU 3.8 or later version no longer uses localized date-time pattern characters by default (ticket#5597) /* // fastCopyFrom()/setTo() - see assignArray comments @@ -2481,8 +2500,10 @@ DateFormatSymbols::initializeData(const Locale& locale, const char *type, UError initField(&fNarrowAmPms, fNarrowAmPmsCount, (const UChar *)gLastResortAmPmMarkers, kAmPmNum, kAmPmLen, status); initField(&fQuarters, fQuartersCount, (const UChar *)gLastResortQuarters, kQuarterNum, kQuarterLen, status); initField(&fShortQuarters, fShortQuartersCount, (const UChar *)gLastResortQuarters, kQuarterNum, kQuarterLen, status); + initField(&fNarrowQuarters, fNarrowQuartersCount, (const UChar *)gLastResortQuarters, kQuarterNum, kQuarterLen, status); initField(&fStandaloneQuarters, fStandaloneQuartersCount, (const UChar *)gLastResortQuarters, kQuarterNum, kQuarterLen, status); initField(&fStandaloneShortQuarters, fStandaloneShortQuartersCount, (const UChar *)gLastResortQuarters, kQuarterNum, kQuarterLen, status); + initField(&fStandaloneNarrowQuarters, fStandaloneNarrowQuartersCount, (const UChar *)gLastResortQuarters, kQuarterNum, kQuarterLen, status); fLocalPatternChars.setTo(TRUE, gPatternChars, PATTERN_CHARS_LEN); } } diff --git a/contrib/libs/icu/i18n/dtitv_impl.h b/contrib/libs/icu/i18n/dtitv_impl.h index c7addf37fb..6fc16bb3e0 100644 --- a/contrib/libs/icu/i18n/dtitv_impl.h +++ b/contrib/libs/icu/i18n/dtitv_impl.h @@ -64,11 +64,13 @@ #define LOW_Z ((UChar)0x007A) #define CAP_A ((UChar)0x0041) +#define CAP_B ((UChar)0x0042) #define CAP_C ((UChar)0x0043) #define CAP_D ((UChar)0x0044) #define CAP_E ((UChar)0x0045) #define CAP_F ((UChar)0x0046) #define CAP_G ((UChar)0x0047) +#define CAP_J ((UChar)0x004A) #define CAP_H ((UChar)0x0048) #define CAP_K ((UChar)0x004B) #define CAP_L ((UChar)0x004C) diff --git a/contrib/libs/icu/i18n/dtitvfmt.cpp b/contrib/libs/icu/i18n/dtitvfmt.cpp index b71a571d1c..298fb62be0 100644 --- a/contrib/libs/icu/i18n/dtitvfmt.cpp +++ b/contrib/libs/icu/i18n/dtitvfmt.cpp @@ -23,6 +23,7 @@ #include "unicode/dtptngen.h" #include "unicode/dtitvinf.h" #include "unicode/simpleformatter.h" +#include "unicode/udisplaycontext.h" #include "cmemory.h" #include "cstring.h" #include "dtitv_impl.h" @@ -143,7 +144,8 @@ DateIntervalFormat::DateIntervalFormat() fLocale(Locale::getRoot()), fDatePattern(nullptr), fTimePattern(nullptr), - fDateTimeFormat(nullptr) + fDateTimeFormat(nullptr), + fCapitalizationContext(UDISPCTX_CAPITALIZATION_NONE) {} @@ -156,7 +158,8 @@ DateIntervalFormat::DateIntervalFormat(const DateIntervalFormat& itvfmt) fLocale(itvfmt.fLocale), fDatePattern(nullptr), fTimePattern(nullptr), - fDateTimeFormat(nullptr) { + fDateTimeFormat(nullptr), + fCapitalizationContext(UDISPCTX_CAPITALIZATION_NONE) { *this = itvfmt; } @@ -203,6 +206,7 @@ DateIntervalFormat::operator=(const DateIntervalFormat& itvfmt) { fDatePattern = (itvfmt.fDatePattern)? itvfmt.fDatePattern->clone(): nullptr; fTimePattern = (itvfmt.fTimePattern)? itvfmt.fTimePattern->clone(): nullptr; fDateTimeFormat = (itvfmt.fDateTimeFormat)? itvfmt.fDateTimeFormat->clone(): nullptr; + fCapitalizationContext = itvfmt.fCapitalizationContext; } return *this; } @@ -225,36 +229,37 @@ DateIntervalFormat::clone() const { } -UBool +bool DateIntervalFormat::operator==(const Format& other) const { - if (typeid(*this) != typeid(other)) {return FALSE;} + if (typeid(*this) != typeid(other)) {return false;} const DateIntervalFormat* fmt = (DateIntervalFormat*)&other; - if (this == fmt) {return TRUE;} - if (!Format::operator==(other)) {return FALSE;} - if ((fInfo != fmt->fInfo) && (fInfo == nullptr || fmt->fInfo == nullptr)) {return FALSE;} - if (fInfo && fmt->fInfo && (*fInfo != *fmt->fInfo )) {return FALSE;} + if (this == fmt) {return true;} + if (!Format::operator==(other)) {return false;} + if ((fInfo != fmt->fInfo) && (fInfo == nullptr || fmt->fInfo == nullptr)) {return false;} + if (fInfo && fmt->fInfo && (*fInfo != *fmt->fInfo )) {return false;} { Mutex lock(&gFormatterMutex); - if (fDateFormat != fmt->fDateFormat && (fDateFormat == nullptr || fmt->fDateFormat == nullptr)) {return FALSE;} - if (fDateFormat && fmt->fDateFormat && (*fDateFormat != *fmt->fDateFormat)) {return FALSE;} + if (fDateFormat != fmt->fDateFormat && (fDateFormat == nullptr || fmt->fDateFormat == nullptr)) {return false;} + if (fDateFormat && fmt->fDateFormat && (*fDateFormat != *fmt->fDateFormat)) {return false;} } // note: fFromCalendar and fToCalendar hold no persistent state, and therefore do not participate in operator ==. - // fDateFormat has the master calendar for the DateIntervalFormat. - if (fSkeleton != fmt->fSkeleton) {return FALSE;} - if (fDatePattern != fmt->fDatePattern && (fDatePattern == nullptr || fmt->fDatePattern == nullptr)) {return FALSE;} - if (fDatePattern && fmt->fDatePattern && (*fDatePattern != *fmt->fDatePattern)) {return FALSE;} - if (fTimePattern != fmt->fTimePattern && (fTimePattern == nullptr || fmt->fTimePattern == nullptr)) {return FALSE;} - if (fTimePattern && fmt->fTimePattern && (*fTimePattern != *fmt->fTimePattern)) {return FALSE;} - if (fDateTimeFormat != fmt->fDateTimeFormat && (fDateTimeFormat == nullptr || fmt->fDateTimeFormat == nullptr)) {return FALSE;} - if (fDateTimeFormat && fmt->fDateTimeFormat && (*fDateTimeFormat != *fmt->fDateTimeFormat)) {return FALSE;} - if (fLocale != fmt->fLocale) {return FALSE;} + // fDateFormat has the primary calendar for the DateIntervalFormat. + if (fSkeleton != fmt->fSkeleton) {return false;} + if (fDatePattern != fmt->fDatePattern && (fDatePattern == nullptr || fmt->fDatePattern == nullptr)) {return false;} + if (fDatePattern && fmt->fDatePattern && (*fDatePattern != *fmt->fDatePattern)) {return false;} + if (fTimePattern != fmt->fTimePattern && (fTimePattern == nullptr || fmt->fTimePattern == nullptr)) {return false;} + if (fTimePattern && fmt->fTimePattern && (*fTimePattern != *fmt->fTimePattern)) {return false;} + if (fDateTimeFormat != fmt->fDateTimeFormat && (fDateTimeFormat == nullptr || fmt->fDateTimeFormat == nullptr)) {return false;} + if (fDateTimeFormat && fmt->fDateTimeFormat && (*fDateTimeFormat != *fmt->fDateTimeFormat)) {return false;} + if (fLocale != fmt->fLocale) {return false;} for (int32_t i = 0; i< DateIntervalInfo::kIPI_MAX_INDEX; ++i ) { - if (fIntervalPatterns[i].firstPart != fmt->fIntervalPatterns[i].firstPart) {return FALSE;} - if (fIntervalPatterns[i].secondPart != fmt->fIntervalPatterns[i].secondPart ) {return FALSE;} - if (fIntervalPatterns[i].laterDateFirst != fmt->fIntervalPatterns[i].laterDateFirst) {return FALSE;} + if (fIntervalPatterns[i].firstPart != fmt->fIntervalPatterns[i].firstPart) {return false;} + if (fIntervalPatterns[i].secondPart != fmt->fIntervalPatterns[i].secondPart ) {return false;} + if (fIntervalPatterns[i].laterDateFirst != fmt->fIntervalPatterns[i].laterDateFirst) {return false;} } - return TRUE; + if (fCapitalizationContext != fmt->fCapitalizationContext) {return false;} + return true; } @@ -409,6 +414,7 @@ UnicodeString& DateIntervalFormat::formatIntervalImpl( } +// The following is only called from within the gFormatterMutex lock UnicodeString& DateIntervalFormat::formatImpl(Calendar& fromCalendar, Calendar& toCalendar, @@ -464,6 +470,11 @@ DateIntervalFormat::formatImpl(Calendar& fromCalendar, if ( U_FAILURE(status) ) { return appendTo; } + UErrorCode tempStatus = U_ZERO_ERROR; // for setContext, ignored + // Set up fDateFormat to handle the first or only part of the interval + // (override later for any second part). Inside lock, OK to modify fDateFormat. + fDateFormat->setContext(fCapitalizationContext, tempStatus); + if ( field == UCAL_FIELD_COUNT ) { /* ignore the millisecond etc. small fields' difference. * use single date when all the above are the same. @@ -521,6 +532,9 @@ DateIntervalFormat::formatImpl(Calendar& fromCalendar, if ( !intervalPattern.secondPart.isEmpty() ) { fDateFormat->applyPattern(intervalPattern.secondPart); + // No capitalization for second part of interval + tempStatus = U_ZERO_ERROR; + fDateFormat->setContext(UDISPCTX_CAPITALIZATION_NONE, tempStatus); fDateFormat->_format(*secondCal, appendTo, fphandler, status); } fDateFormat->applyPattern(originalPattern); @@ -583,7 +597,7 @@ DateIntervalFormat::adoptTimeZone(TimeZone* zone) if (fDateFormat != nullptr) { fDateFormat->adoptTimeZone(zone); } - // The fDateFormat has the master calendar for the DateIntervalFormat and has + // The fDateFormat has the primary calendar for the DateIntervalFormat and has // ownership of any adopted TimeZone; fFromCalendar and fToCalendar are internal // work clones of that calendar (and should not also be given ownership of the // adopted TimeZone). @@ -601,7 +615,7 @@ DateIntervalFormat::setTimeZone(const TimeZone& zone) if (fDateFormat != nullptr) { fDateFormat->setTimeZone(zone); } - // The fDateFormat has the master calendar for the DateIntervalFormat; + // The fDateFormat has the primary calendar for the DateIntervalFormat; // fFromCalendar and fToCalendar are internal work clones of that calendar. if (fFromCalendar) { fFromCalendar->setTimeZone(zone); @@ -622,6 +636,30 @@ DateIntervalFormat::getTimeZone() const return *(TimeZone::createDefault()); } +void +DateIntervalFormat::setContext(UDisplayContext value, UErrorCode& status) +{ + if (U_FAILURE(status)) + return; + if ( (UDisplayContextType)((uint32_t)value >> 8) == UDISPCTX_TYPE_CAPITALIZATION ) { + fCapitalizationContext = value; + } else { + status = U_ILLEGAL_ARGUMENT_ERROR; + } +} + +UDisplayContext +DateIntervalFormat::getContext(UDisplayContextType type, UErrorCode& status) const +{ + if (U_FAILURE(status)) + return (UDisplayContext)0; + if (type != UDISPCTX_TYPE_CAPITALIZATION) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return (UDisplayContext)0; + } + return fCapitalizationContext; +} + DateIntervalFormat::DateIntervalFormat(const Locale& locale, DateIntervalInfo* dtItvInfo, const UnicodeString* skeleton, @@ -633,7 +671,8 @@ DateIntervalFormat::DateIntervalFormat(const Locale& locale, fLocale(locale), fDatePattern(nullptr), fTimePattern(nullptr), - fDateTimeFormat(nullptr) + fDateTimeFormat(nullptr), + fCapitalizationContext(UDISPCTX_CAPITALIZATION_NONE) { LocalPointer<DateIntervalInfo> info(dtItvInfo, status); LocalPointer<SimpleDateFormat> dtfmt(static_cast<SimpleDateFormat *>( @@ -665,7 +704,7 @@ DateIntervalFormat::create(const Locale& locale, status = U_MEMORY_ALLOCATION_ERROR; delete dtitvinf; } else if ( U_FAILURE(status) ) { - // safe to delete f, although nothing acutally is saved + // safe to delete f, although nothing actually is saved delete f; f = 0; } @@ -751,7 +790,7 @@ DateIntervalFormat::initializePattern(UErrorCode& status) { /* the difference between time skeleton and normalizedTimeSkeleton are: * 1. (Formerly, normalized time skeleton folded 'H' to 'h'; no longer true) - * 2. 'a' is omitted in normalized time skeleton. + * 2. (Formerly, 'a' was omitted in normalized time skeleton; this is now handled elsewhere) * 3. there is only one appearance for 'h' or 'H', 'm','v', 'z' in normalized * time skeleton * @@ -760,7 +799,8 @@ DateIntervalFormat::initializePattern(UErrorCode& status) { * 2. 'E' and 'EE' are normalized into 'EEE' * 3. 'MM' is normalized into 'M' */ - getDateTimeSkeleton(fSkeleton, dateSkeleton, normalizedDateSkeleton, + UnicodeString convertedSkeleton = normalizeHourMetacharacters(fSkeleton); + getDateTimeSkeleton(convertedSkeleton, dateSkeleton, normalizedDateSkeleton, timeSkeleton, normalizedTimeSkeleton); #ifdef DTITVFMT_DEBUG @@ -823,6 +863,14 @@ DateIntervalFormat::initializePattern(UErrorCode& status) { setPatternInfo(UCAL_DATE, nullptr, &pattern, fInfo->getDefaultOrder()); setPatternInfo(UCAL_MONTH, nullptr, &pattern, fInfo->getDefaultOrder()); setPatternInfo(UCAL_YEAR, nullptr, &pattern, fInfo->getDefaultOrder()); + + timeSkeleton.insert(0, CAP_G); + pattern = DateFormat::getBestPattern( + locale, timeSkeleton, status); + if ( U_FAILURE(status) ) { + return; + } + setPatternInfo(UCAL_ERA, nullptr, &pattern, fInfo->getDefaultOrder()); } else { // TODO: fall back } @@ -849,15 +897,23 @@ DateIntervalFormat::initializePattern(UErrorCode& status) { setPatternInfo(UCAL_DATE, nullptr, &pattern, fInfo->getDefaultOrder()); setPatternInfo(UCAL_MONTH, nullptr, &pattern, fInfo->getDefaultOrder()); setPatternInfo(UCAL_YEAR, nullptr, &pattern, fInfo->getDefaultOrder()); + + timeSkeleton.insert(0, CAP_G); + pattern = DateFormat::getBestPattern( + locale, timeSkeleton, status); + if ( U_FAILURE(status) ) { + return; + } + setPatternInfo(UCAL_ERA, nullptr, &pattern, fInfo->getDefaultOrder()); } else { /* if both present, - * 1) when the year, month, or day differs, + * 1) when the era, year, month, or day differs, * concatenate the two original expressions with a separator between, * 2) otherwise, present the date followed by the * range expression for the time. */ /* - * 1) when the year, month, or day differs, + * 1) when the era, year, month, or day differs, * concatenate the two original expressions with a separator between, */ // if field exists, use fall back @@ -877,6 +933,11 @@ DateIntervalFormat::initializePattern(UErrorCode& status) { skeleton.insert(0, LOW_Y); setFallbackPattern(UCAL_YEAR, skeleton, status); } + if ( !fieldExistsInSkeleton(UCAL_ERA, dateSkeleton) ) { + // then prefix skeleton with 'G' + skeleton.insert(0, CAP_G); + setFallbackPattern(UCAL_ERA, skeleton, status); + } /* * 2) otherwise, present the date followed by the @@ -899,6 +960,91 @@ DateIntervalFormat::initializePattern(UErrorCode& status) { +UnicodeString +DateIntervalFormat::normalizeHourMetacharacters(const UnicodeString& skeleton) const { + UnicodeString result = skeleton; + + UChar hourMetachar = u'\0'; + int32_t metacharStart = 0; + int32_t metacharCount = 0; + for (int32_t i = 0; i < result.length(); i++) { + UChar c = result[i]; + if (c == LOW_J || c == CAP_J || c == CAP_C) { + if (hourMetachar == u'\0') { + hourMetachar = c; + metacharStart = i; + } + ++metacharCount; + } else { + if (hourMetachar != u'\0') { + break; + } + } + } + + if (hourMetachar != u'\0') { + UErrorCode err = U_ZERO_ERROR; + UChar hourChar = CAP_H; + UChar dayPeriodChar = LOW_A; + UnicodeString convertedPattern = DateFormat::getBestPattern(fLocale, UnicodeString(hourMetachar), err); + + if (U_SUCCESS(err)) { + // strip literal text from the pattern (so literal characters don't get mistaken for pattern + // characters-- such as the 'h' in 'Uhr' in Germam) + int32_t firstQuotePos; + while ((firstQuotePos = convertedPattern.indexOf(u'\'')) != -1) { + int32_t secondQuotePos = convertedPattern.indexOf(u'\'', firstQuotePos + 1); + if (secondQuotePos == -1) { + secondQuotePos = firstQuotePos; + } + convertedPattern.replace(firstQuotePos, (secondQuotePos - firstQuotePos) + 1, UnicodeString()); + } + + if (convertedPattern.indexOf(LOW_H) != -1) { + hourChar = LOW_H; + } else if (convertedPattern.indexOf(CAP_K) != -1) { + hourChar = CAP_K; + } else if (convertedPattern.indexOf(LOW_K) != -1) { + hourChar = LOW_K; + } + + if (convertedPattern.indexOf(LOW_B) != -1) { + dayPeriodChar = LOW_B; + } else if (convertedPattern.indexOf(CAP_B) != -1) { + dayPeriodChar = CAP_B; + } + } + + if (hourChar == CAP_H || hourChar == LOW_K) { + result.replace(metacharStart, metacharCount, hourChar); + } else { + UnicodeString hourAndDayPeriod(hourChar); + switch (metacharCount) { + case 1: + case 2: + default: + hourAndDayPeriod.append(UnicodeString(dayPeriodChar)); + break; + case 3: + case 4: + for (int32_t i = 0; i < 4; i++) { + hourAndDayPeriod.append(dayPeriodChar); + } + break; + case 5: + case 6: + for (int32_t i = 0; i < 5; i++) { + hourAndDayPeriod.append(dayPeriodChar); + } + break; + } + result.replace(metacharStart, metacharCount, hourAndDayPeriod); + } + } + return result; +} + + void U_EXPORT2 DateIntervalFormat::getDateTimeSkeleton(const UnicodeString& skeleton, UnicodeString& dateSkeleton, @@ -911,11 +1057,10 @@ DateIntervalFormat::getDateTimeSkeleton(const UnicodeString& skeleton, int32_t dCount = 0; int32_t MCount = 0; int32_t yCount = 0; - int32_t hCount = 0; - int32_t HCount = 0; int32_t mCount = 0; int32_t vCount = 0; int32_t zCount = 0; + UChar hourChar = u'\0'; int32_t i; for (i = 0; i < skeleton.length(); ++i) { @@ -956,17 +1101,14 @@ DateIntervalFormat::getDateTimeSkeleton(const UnicodeString& skeleton, normalizedDateSkeleton.append(ch); dateSkeleton.append(ch); break; - case LOW_A: - // 'a' is implicitly handled - timeSkeleton.append(ch); - break; case LOW_H: - timeSkeleton.append(ch); - ++hCount; - break; case CAP_H: + case LOW_K: + case CAP_K: timeSkeleton.append(ch); - ++HCount; + if (hourChar == u'\0') { + hourChar = ch; + } break; case LOW_M: timeSkeleton.append(ch); @@ -980,14 +1122,15 @@ DateIntervalFormat::getDateTimeSkeleton(const UnicodeString& skeleton, ++vCount; timeSkeleton.append(ch); break; + case LOW_A: case CAP_V: case CAP_Z: - case LOW_K: - case CAP_K: case LOW_J: case LOW_S: case CAP_S: case CAP_A: + case LOW_B: + case CAP_B: timeSkeleton.append(ch); normalizedTimeSkeleton.append(ch); break; @@ -1023,11 +1166,8 @@ DateIntervalFormat::getDateTimeSkeleton(const UnicodeString& skeleton, } /* generate normalized form for time */ - if ( HCount != 0 ) { - normalizedTimeSkeleton.append(CAP_H); - } - else if ( hCount != 0 ) { - normalizedTimeSkeleton.append(LOW_H); + if ( hourChar != u'\0' ) { + normalizedTimeSkeleton.append(hourChar); } if ( mCount != 0 ) { normalizedTimeSkeleton.append(LOW_M); @@ -1303,7 +1443,11 @@ DateIntervalFormat::setIntervalPattern(UCalendarDateFields field, if ( field == UCAL_AM_PM ) { fInfo->getIntervalPattern(*bestSkeleton, UCAL_HOUR, pattern,status); if ( !pattern.isEmpty() ) { - setIntervalPattern(field, pattern); + UBool suppressDayPeriodField = fSkeleton.indexOf(CAP_J) != -1; + UnicodeString adjustIntervalPattern; + adjustFieldWidth(*skeleton, *bestSkeleton, pattern, differenceInfo, + suppressDayPeriodField, adjustIntervalPattern); + setIntervalPattern(field, adjustIntervalPattern); } return false; } @@ -1335,10 +1479,11 @@ DateIntervalFormat::setIntervalPattern(UCalendarDateFields field, } } if ( !pattern.isEmpty() ) { - if ( differenceInfo != 0 ) { + UBool suppressDayPeriodField = fSkeleton.indexOf(CAP_J) != -1; + if ( differenceInfo != 0 || suppressDayPeriodField) { UnicodeString adjustIntervalPattern; adjustFieldWidth(*skeleton, *bestSkeleton, pattern, differenceInfo, - adjustIntervalPattern); + suppressDayPeriodField, adjustIntervalPattern); setIntervalPattern(field, adjustIntervalPattern); } else { setIntervalPattern(field, pattern); @@ -1425,6 +1570,7 @@ DateIntervalFormat::splitPatternInto2Part(const UnicodeString& intervalPattern) return (i - count); } +// The following is only called from fallbackFormat, i.e. within the gFormatterMutex lock void DateIntervalFormat::fallbackFormatRange( Calendar& fromCalendar, Calendar& toCalendar, @@ -1441,12 +1587,15 @@ void DateIntervalFormat::fallbackFormatRange( int32_t offsets[2]; UnicodeString patternBody = sf.getTextWithNoArguments(offsets, 2); + UErrorCode tempStatus = U_ZERO_ERROR; // for setContext, ignored // TODO(ICU-20406): Use SimpleFormatter Iterator interface when available. if (offsets[0] < offsets[1]) { firstIndex = 0; appendTo.append(patternBody.tempSubStringBetween(0, offsets[0])); fDateFormat->_format(fromCalendar, appendTo, fphandler, status); appendTo.append(patternBody.tempSubStringBetween(offsets[0], offsets[1])); + // No capitalization for second part of interval + fDateFormat->setContext(UDISPCTX_CAPITALIZATION_NONE, tempStatus); fDateFormat->_format(toCalendar, appendTo, fphandler, status); appendTo.append(patternBody.tempSubStringBetween(offsets[1])); } else { @@ -1454,11 +1603,14 @@ void DateIntervalFormat::fallbackFormatRange( appendTo.append(patternBody.tempSubStringBetween(0, offsets[1])); fDateFormat->_format(toCalendar, appendTo, fphandler, status); appendTo.append(patternBody.tempSubStringBetween(offsets[1], offsets[0])); + // No capitalization for second part of interval + fDateFormat->setContext(UDISPCTX_CAPITALIZATION_NONE, tempStatus); fDateFormat->_format(fromCalendar, appendTo, fphandler, status); appendTo.append(patternBody.tempSubStringBetween(offsets[0])); } } +// The following is only called from formatImpl, i.e. within the gFormatterMutex lock UnicodeString& DateIntervalFormat::fallbackFormat(Calendar& fromCalendar, Calendar& toCalendar, @@ -1483,6 +1635,7 @@ DateIntervalFormat::fallbackFormat(Calendar& fromCalendar, UnicodeString fullPattern; // for saving the pattern in fDateFormat fDateFormat->toPattern(fullPattern); // save current pattern, restore later + UErrorCode tempStatus = U_ZERO_ERROR; // for setContext, ignored // {0} is time range // {1} is single date portion // TODO(ICU-20406): Use SimpleFormatter Iterator interface when available. @@ -1492,6 +1645,8 @@ DateIntervalFormat::fallbackFormat(Calendar& fromCalendar, fallbackFormatRange(fromCalendar, toCalendar, appendTo, firstIndex, fphandler, status); appendTo.append(patternBody.tempSubStringBetween(offsets[0], offsets[1])); fDateFormat->applyPattern(*fDatePattern); + // No capitalization for second portion + fDateFormat->setContext(UDISPCTX_CAPITALIZATION_NONE, tempStatus); fDateFormat->_format(fromCalendar, appendTo, fphandler, status); appendTo.append(patternBody.tempSubStringBetween(offsets[1])); } else { @@ -1500,6 +1655,8 @@ DateIntervalFormat::fallbackFormat(Calendar& fromCalendar, fDateFormat->_format(fromCalendar, appendTo, fphandler, status); appendTo.append(patternBody.tempSubStringBetween(offsets[1], offsets[0])); fDateFormat->applyPattern(*fTimePattern); + // No capitalization for second portion + fDateFormat->setContext(UDISPCTX_CAPITALIZATION_NONE, tempStatus); fallbackFormatRange(fromCalendar, toCalendar, appendTo, firstIndex, fphandler, status); appendTo.append(patternBody.tempSubStringBetween(offsets[0])); } @@ -1530,6 +1687,7 @@ DateIntervalFormat::adjustFieldWidth(const UnicodeString& inputSkeleton, const UnicodeString& bestMatchSkeleton, const UnicodeString& bestIntervalPattern, int8_t differenceInfo, + UBool suppressDayPeriodField, UnicodeString& adjustedPtn) { adjustedPtn = bestIntervalPattern; int32_t inputSkeletonFieldWidth[] = @@ -1556,19 +1714,40 @@ DateIntervalFormat::adjustFieldWidth(const UnicodeString& inputSkeleton, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + const int8_t PATTERN_CHAR_BASE = 0x41; + DateIntervalInfo::parseSkeleton(inputSkeleton, inputSkeletonFieldWidth); DateIntervalInfo::parseSkeleton(bestMatchSkeleton, bestMatchSkeletonFieldWidth); - if ( differenceInfo == 2 ) { - adjustedPtn.findAndReplace(UnicodeString((UChar)0x76 /* v */), - UnicodeString((UChar)0x7a /* z */)); + if (suppressDayPeriodField) { + findReplaceInPattern(adjustedPtn, UnicodeString(LOW_A), UnicodeString()); + findReplaceInPattern(adjustedPtn, UnicodeString(" "), UnicodeString(" ")); + adjustedPtn.trim(); } + if ( differenceInfo == 2 ) { + if (inputSkeleton.indexOf(LOW_Z) != -1) { + findReplaceInPattern(adjustedPtn, UnicodeString(LOW_V), UnicodeString(LOW_Z)); + } + if (inputSkeleton.indexOf(CAP_K) != -1) { + findReplaceInPattern(adjustedPtn, UnicodeString(LOW_H), UnicodeString(CAP_K)); + } + if (inputSkeleton.indexOf(LOW_K) != -1) { + findReplaceInPattern(adjustedPtn, UnicodeString(CAP_H), UnicodeString(LOW_K)); + } + if (inputSkeleton.indexOf(LOW_B) != -1) { + findReplaceInPattern(adjustedPtn, UnicodeString(LOW_A), UnicodeString(LOW_B)); + } + } + if (adjustedPtn.indexOf(LOW_A) != -1 && bestMatchSkeletonFieldWidth[LOW_A - PATTERN_CHAR_BASE] == 0) { + bestMatchSkeletonFieldWidth[LOW_A - PATTERN_CHAR_BASE] = 1; + } + if (adjustedPtn.indexOf(LOW_B) != -1 && bestMatchSkeletonFieldWidth[LOW_B - PATTERN_CHAR_BASE] == 0) { + bestMatchSkeletonFieldWidth[LOW_B - PATTERN_CHAR_BASE] = 1; + } UBool inQuote = false; UChar prevCh = 0; int32_t count = 0; - const int8_t PATTERN_CHAR_BASE = 0x41; - // loop through the pattern string character by character int32_t adjustedPtnLength = adjustedPtn.length(); int32_t i; @@ -1634,6 +1813,39 @@ DateIntervalFormat::adjustFieldWidth(const UnicodeString& inputSkeleton, } } +void +DateIntervalFormat::findReplaceInPattern(UnicodeString& targetString, + const UnicodeString& strToReplace, + const UnicodeString& strToReplaceWith) { + int32_t firstQuoteIndex = targetString.indexOf(u'\''); + if (firstQuoteIndex == -1) { + targetString.findAndReplace(strToReplace, strToReplaceWith); + } else { + UnicodeString result; + UnicodeString source = targetString; + + while (firstQuoteIndex >= 0) { + int32_t secondQuoteIndex = source.indexOf(u'\'', firstQuoteIndex + 1); + if (secondQuoteIndex == -1) { + secondQuoteIndex = source.length() - 1; + } + + UnicodeString unquotedText(source, 0, firstQuoteIndex); + UnicodeString quotedText(source, firstQuoteIndex, secondQuoteIndex - firstQuoteIndex + 1); + + unquotedText.findAndReplace(strToReplace, strToReplaceWith); + result += unquotedText; + result += quotedText; + + source.remove(0, secondQuoteIndex + 1); + firstQuoteIndex = source.indexOf(u'\''); + } + source.findAndReplace(strToReplace, strToReplaceWith); + result += source; + targetString = result; + } +} + void diff --git a/contrib/libs/icu/i18n/dtitvinf.cpp b/contrib/libs/icu/i18n/dtitvinf.cpp index 25536346ec..6052894b58 100644 --- a/contrib/libs/icu/i18n/dtitvinf.cpp +++ b/contrib/libs/icu/i18n/dtitvinf.cpp @@ -50,7 +50,6 @@ U_NAMESPACE_BEGIN UOBJECT_DEFINE_RTTI_IMPLEMENTATION(DateIntervalInfo) static const char gCalendarTag[]="calendar"; -static const char gGenericTag[]="generic"; static const char gGregorianTag[]="gregorian"; static const char gIntervalDateTimePatternTag[]="intervalFormats"; static const char gFallbackPatternTag[]="fallback"; @@ -165,13 +164,13 @@ DateIntervalInfo::~DateIntervalInfo() { } -UBool +bool DateIntervalInfo::operator==(const DateIntervalInfo& other) const { - UBool equal = ( + bool equal = ( fFallbackIntervalPattern == other.fFallbackIntervalPattern && fFirstDateInPtnIsLaterDate == other.fFirstDateInPtnIsLaterDate ); - if ( equal == TRUE ) { + if ( equal ) { equal = fIntervalPatterns->equals(*(other.fIntervalPatterns)); } @@ -240,7 +239,7 @@ struct DateIntervalInfo::DateIntervalSink : public ResourceSink { : dateIntervalInfo(diInfo), nextCalendarType(currentCalendarType, -1, US_INV) { } virtual ~DateIntervalSink(); - virtual void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &errorCode) { + virtual void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &errorCode) override { if (U_FAILURE(errorCode)) { return; } // Iterate over all the calendar entries and only pick the 'intervalFormats' table. @@ -339,6 +338,9 @@ struct DateIntervalInfo::DateIntervalSink : public ResourceSink { return UCAL_DATE; } else if (c0 == 'a') { return UCAL_AM_PM; + } else if (c0 == 'B') { + // TODO: Using AM/PM as a proxy for flexible day period isn't really correct, but it's close + return UCAL_AM_PM; } else if (c0 == 'h' || c0 == 'H') { return UCAL_HOUR; } else if (c0 == 'm') { @@ -432,23 +434,6 @@ DateIntervalInfo::initializeData(const Locale& locale, UErrorCode& status) if ( U_SUCCESS(status) ) { resStr = ures_getStringByKeyWithFallback(itvDtPtnResource, gFallbackPatternTag, &resStrLen, &status); - if ( U_FAILURE(status) ) { - // Try to find "fallback" from "generic" to work around the bug in - // ures_getByKeyWithFallback - UErrorCode localStatus = U_ZERO_ERROR; - UResourceBundle *genericCalBundle = - ures_getByKeyWithFallback(calBundle, gGenericTag, nullptr, &localStatus); - UResourceBundle *genericItvDtPtnResource = - ures_getByKeyWithFallback( - genericCalBundle, gIntervalDateTimePatternTag, nullptr, &localStatus); - resStr = ures_getStringByKeyWithFallback( - genericItvDtPtnResource, gFallbackPatternTag, &resStrLen, &localStatus); - ures_close(genericItvDtPtnResource); - ures_close(genericCalBundle); - if ( U_SUCCESS(localStatus) ) { - status = U_USING_FALLBACK_WARNING;; - } - } } if ( U_SUCCESS(status) && (resStr != nullptr)) { @@ -594,20 +579,23 @@ DateIntervalInfo::getBestSkeleton(const UnicodeString& skeleton, const int32_t DIFFERENT_FIELD = 0x1000; const int32_t STRING_NUMERIC_DIFFERENCE = 0x100; const int32_t BASE = 0x41; - const UChar CHAR_V = 0x0076; - const UChar CHAR_Z = 0x007A; - // hack for 'v' and 'z'. - // resource bundle only have time skeletons ending with 'v', - // but not for time skeletons ending with 'z'. - UBool replaceZWithV = false; + // hack for certain alternate characters + // resource bundles only have time skeletons containing 'v', 'h', and 'H' + // but not time skeletons containing 'z', 'K', or 'k' + // the skeleton may also include 'a' or 'b', which never occur in the resource bundles, so strip them out too + UBool replacedAlternateChars = false; const UnicodeString* inputSkeleton = &skeleton; UnicodeString copySkeleton; - if ( skeleton.indexOf(CHAR_Z) != -1 ) { + if ( skeleton.indexOf(LOW_Z) != -1 || skeleton.indexOf(LOW_K) != -1 || skeleton.indexOf(CAP_K) != -1 || skeleton.indexOf(LOW_A) != -1 || skeleton.indexOf(LOW_B) != -1 ) { copySkeleton = skeleton; - copySkeleton.findAndReplace(UnicodeString(CHAR_Z), UnicodeString(CHAR_V)); + copySkeleton.findAndReplace(UnicodeString(LOW_Z), UnicodeString(LOW_V)); + copySkeleton.findAndReplace(UnicodeString(LOW_K), UnicodeString(CAP_H)); + copySkeleton.findAndReplace(UnicodeString(CAP_K), UnicodeString(LOW_H)); + copySkeleton.findAndReplace(UnicodeString(LOW_A), UnicodeString()); + copySkeleton.findAndReplace(UnicodeString(LOW_B), UnicodeString()); inputSkeleton = ©Skeleton; - replaceZWithV = true; + replacedAlternateChars = true; } parseSkeleton(*inputSkeleton, inputSkeletonFieldWidth); @@ -616,7 +604,7 @@ DateIntervalInfo::getBestSkeleton(const UnicodeString& skeleton, // 0 means exact the same skeletons; // 1 means having the same field, but with different length, - // 2 means only z/v differs + // 2 means only z/v, h/K, or H/k differs // -1 means having different field. bestMatchDistanceInfo = 0; int8_t fieldLength = UPRV_LENGTHOF(skeletonFieldWidth); @@ -672,7 +660,7 @@ DateIntervalInfo::getBestSkeleton(const UnicodeString& skeleton, break; } } - if ( replaceZWithV && bestMatchDistanceInfo != -1 ) { + if ( replacedAlternateChars && bestMatchDistanceInfo != -1 ) { bestMatchDistanceInfo = 2; } return bestSkeleton; diff --git a/contrib/libs/icu/i18n/dtptngen.cpp b/contrib/libs/icu/i18n/dtptngen.cpp index 02be4f054b..6aee1750f9 100644 --- a/contrib/libs/icu/i18n/dtptngen.cpp +++ b/contrib/libs/icu/i18n/dtptngen.cpp @@ -311,6 +311,16 @@ DateTimePatternGenerator::createInstance(const Locale& locale, UErrorCode& statu return U_SUCCESS(status) ? result.orphan() : nullptr; } +DateTimePatternGenerator* U_EXPORT2 +DateTimePatternGenerator::createInstanceNoStdPat(const Locale& locale, UErrorCode& status) { + if (U_FAILURE(status)) { + return nullptr; + } + LocalPointer<DateTimePatternGenerator> result( + new DateTimePatternGenerator(locale, status, true), status); + return U_SUCCESS(status) ? result.orphan() : nullptr; +} + DateTimePatternGenerator* U_EXPORT2 DateTimePatternGenerator::createEmptyInstance(UErrorCode& status) { if (U_FAILURE(status)) { @@ -336,7 +346,7 @@ DateTimePatternGenerator::DateTimePatternGenerator(UErrorCode &status) : } } -DateTimePatternGenerator::DateTimePatternGenerator(const Locale& locale, UErrorCode &status) : +DateTimePatternGenerator::DateTimePatternGenerator(const Locale& locale, UErrorCode &status, UBool skipStdPatterns) : skipMatcher(nullptr), fAvailableFormatKeyHash(nullptr), fDefaultHourFormatChar(0), @@ -350,7 +360,7 @@ DateTimePatternGenerator::DateTimePatternGenerator(const Locale& locale, UErrorC internalErrorCode = status = U_MEMORY_ALLOCATION_ERROR; } else { - initData(locale, status); + initData(locale, status, skipStdPatterns); } } @@ -414,31 +424,31 @@ DateTimePatternGenerator::operator=(const DateTimePatternGenerator& other) { } -UBool +bool DateTimePatternGenerator::operator==(const DateTimePatternGenerator& other) const { if (this == &other) { - return TRUE; + return true; } if ((pLocale==other.pLocale) && (patternMap->equals(*other.patternMap)) && (dateTimeFormat==other.dateTimeFormat) && (decimal==other.decimal)) { for ( int32_t i=0 ; i<UDATPG_FIELD_COUNT; ++i ) { if (appendItemFormats[i] != other.appendItemFormats[i]) { - return FALSE; + return false; } for (int32_t j=0; j< UDATPG_WIDTH_COUNT; ++j ) { if (fieldDisplayNames[i][j] != other.fieldDisplayNames[i][j]) { - return FALSE; + return false; } } } - return TRUE; + return true; } else { - return FALSE; + return false; } } -UBool +bool DateTimePatternGenerator::operator!=(const DateTimePatternGenerator& other) const { return !operator==(other); } @@ -489,13 +499,15 @@ enum AllowedHourFormat{ } // namespace void -DateTimePatternGenerator::initData(const Locale& locale, UErrorCode &status) { +DateTimePatternGenerator::initData(const Locale& locale, UErrorCode &status, UBool skipStdPatterns) { //const char *baseLangName = locale.getBaseName(); // unused skipMatcher = nullptr; fAvailableFormatKeyHash=nullptr; addCanonicalItems(status); - addICUPatterns(locale, status); + if (!skipStdPatterns) { // skip to prevent circular dependency when called from SimpleDateFormat::construct + addICUPatterns(locale, status); + } addCLDRData(locale, status); setDateTimeFromCalendar(locale, status); setDecimalSymbols(locale, status); @@ -513,7 +525,7 @@ struct AllowedHourFormatsSink : public ResourceSink { virtual ~AllowedHourFormatsSink(); virtual void put(const char *key, ResourceValue &value, UBool /*noFallback*/, - UErrorCode &errorCode) { + UErrorCode &errorCode) override { ResourceTable timeData = value.getTable(errorCode); if (U_FAILURE(errorCode)) { return; } for (int32_t i = 0; timeData.getKeyAndValue(i, key, value); ++i) { @@ -733,7 +745,7 @@ DateTimePatternGenerator::getDefaultHourCycle(UErrorCode& status) const { case LOW_K: return UDAT_HOUR_CYCLE_24; default: - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } } @@ -893,7 +905,7 @@ DateTimePatternGenerator::getCalendarTypeToUse(const Locale& locale, CharString& err = localStatus; return; } - if (calendarTypeLen < ULOC_KEYWORDS_CAPACITY) { + if (calendarTypeLen > 0 && calendarTypeLen < ULOC_KEYWORDS_CAPACITY) { destination.clear().append(calendarType, -1, err); if (U_FAILURE(err)) { return; } } @@ -921,16 +933,12 @@ struct DateTimePatternGenerator::AppendItemFormatsSink : public ResourceSink { virtual ~AppendItemFormatsSink(); virtual void put(const char *key, ResourceValue &value, UBool /*noFallback*/, - UErrorCode &errorCode) { - ResourceTable itemsTable = value.getTable(errorCode); - if (U_FAILURE(errorCode)) { return; } - for (int32_t i = 0; itemsTable.getKeyAndValue(i, key, value); ++i) { - UDateTimePatternField field = dtpg.getAppendFormatNumber(key); - if (field == UDATPG_FIELD_COUNT) { continue; } - const UnicodeString& valueStr = value.getUnicodeString(errorCode); - if (dtpg.getAppendItemFormat(field).isEmpty() && !valueStr.isEmpty()) { - dtpg.setAppendItemFormat(field, valueStr); - } + UErrorCode &errorCode) override { + UDateTimePatternField field = dtpg.getAppendFormatNumber(key); + if (field == UDATPG_FIELD_COUNT) { return; } + const UnicodeString& valueStr = value.getUnicodeString(errorCode); + if (dtpg.getAppendItemFormat(field).isEmpty() && !valueStr.isEmpty()) { + dtpg.setAppendItemFormat(field, valueStr); } } @@ -954,23 +962,16 @@ struct DateTimePatternGenerator::AppendItemNamesSink : public ResourceSink { virtual ~AppendItemNamesSink(); virtual void put(const char *key, ResourceValue &value, UBool /*noFallback*/, - UErrorCode &errorCode) { - ResourceTable itemsTable = value.getTable(errorCode); + UErrorCode &errorCode) override { + UDateTimePGDisplayWidth width; + UDateTimePatternField field = dtpg.getFieldAndWidthIndices(key, &width); + if (field == UDATPG_FIELD_COUNT) { return; } + ResourceTable detailsTable = value.getTable(errorCode); if (U_FAILURE(errorCode)) { return; } - for (int32_t i = 0; itemsTable.getKeyAndValue(i, key, value); ++i) { - UDateTimePGDisplayWidth width; - UDateTimePatternField field = dtpg.getFieldAndWidthIndices(key, &width); - if (field == UDATPG_FIELD_COUNT) { continue; } - ResourceTable detailsTable = value.getTable(errorCode); - if (U_FAILURE(errorCode)) { return; } - for (int32_t j = 0; detailsTable.getKeyAndValue(j, key, value); ++j) { - if (uprv_strcmp(key, "dn") != 0) { continue; } - const UnicodeString& valueStr = value.getUnicodeString(errorCode); - if (dtpg.getFieldDisplayName(field,width).isEmpty() && !valueStr.isEmpty()) { - dtpg.setFieldDisplayName(field,width,valueStr); - } - break; - } + if (!detailsTable.findValue("dn", value)) { return; } + const UnicodeString& valueStr = value.getUnicodeString(errorCode); + if (U_SUCCESS(errorCode) && dtpg.getFieldDisplayName(field,width).isEmpty() && !valueStr.isEmpty()) { + dtpg.setFieldDisplayName(field,width,valueStr); } } @@ -1013,19 +1014,15 @@ struct DateTimePatternGenerator::AvailableFormatsSink : public ResourceSink { virtual ~AvailableFormatsSink(); virtual void put(const char *key, ResourceValue &value, UBool isRoot, - UErrorCode &errorCode) { - ResourceTable itemsTable = value.getTable(errorCode); - if (U_FAILURE(errorCode)) { return; } - for (int32_t i = 0; itemsTable.getKeyAndValue(i, key, value); ++i) { - const UnicodeString formatKey(key, -1, US_INV); - if (!dtpg.isAvailableFormatSet(formatKey) ) { - dtpg.setAvailableFormat(formatKey, errorCode); - // Add pattern with its associated skeleton. Override any duplicate - // derived from std patterns, but not a previous availableFormats entry: - const UnicodeString& formatValue = value.getUnicodeString(errorCode); - conflictingPattern.remove(); - dtpg.addPatternWithSkeleton(formatValue, &formatKey, !isRoot, conflictingPattern, errorCode); - } + UErrorCode &errorCode) override { + const UnicodeString formatKey(key, -1, US_INV); + if (!dtpg.isAvailableFormatSet(formatKey) ) { + dtpg.setAvailableFormat(formatKey, errorCode); + // Add pattern with its associated skeleton. Override any duplicate + // derived from std patterns, but not a previous availableFormats entry: + const UnicodeString& formatValue = value.getUnicodeString(errorCode); + conflictingPattern.remove(); + dtpg.addPatternWithSkeleton(formatValue, &formatKey, !isRoot, conflictingPattern, errorCode); } } }; @@ -1060,13 +1057,13 @@ DateTimePatternGenerator::addCLDRData(const Locale& locale, UErrorCode& errorCod .append('/', errorCode) .append(DT_DateTimeAppendItemsTag, errorCode); // i.e., calendar/xxx/appendItems if (U_FAILURE(errorCode)) { return; } - ures_getAllItemsWithFallback(rb.getAlias(), path.data(), appendItemFormatsSink, err); + ures_getAllChildrenWithFallback(rb.getAlias(), path.data(), appendItemFormatsSink, err); appendItemFormatsSink.fillInMissing(); // Load CLDR item names. err = U_ZERO_ERROR; AppendItemNamesSink appendItemNamesSink(*this); - ures_getAllItemsWithFallback(rb.getAlias(), DT_DateTimeFieldsTag, appendItemNamesSink, err); + ures_getAllChildrenWithFallback(rb.getAlias(), DT_DateTimeFieldsTag, appendItemNamesSink, err); appendItemNamesSink.fillInMissing(); // Load the available formats from CLDR. @@ -1081,7 +1078,7 @@ DateTimePatternGenerator::addCLDRData(const Locale& locale, UErrorCode& errorCod .append('/', errorCode) .append(DT_DateTimeAvailableFormatsTag, errorCode); // i.e., calendar/xxx/availableFormats if (U_FAILURE(errorCode)) { return; } - ures_getAllItemsWithFallback(rb.getAlias(), path.data(), availableFormatsSink, err); + ures_getAllChildrenWithFallback(rb.getAlias(), path.data(), availableFormatsSink, err); } void @@ -1636,7 +1633,11 @@ DateTimePatternGenerator::adjustFieldTypes(const UnicodeString& pattern, (typeValue==UDATPG_MINUTE_FIELD && (options & UDATPG_MATCH_MINUTE_FIELD_LENGTH)==0) || (typeValue==UDATPG_SECOND_FIELD && (options & UDATPG_MATCH_SECOND_FIELD_LENGTH)==0) ) { adjFieldLen = field.length(); - } else if (specifiedSkeleton) { + } else if (specifiedSkeleton && reqFieldChar != LOW_C && reqFieldChar != LOW_E) { + // (we skip this section for 'c' and 'e' because unlike the other characters considered in this function, + // they have no minimum field length-- 'E' and 'EE' are equivalent to 'EEE', but 'e' and 'ee' are not + // equivalent to 'eee' -- see the entries for "week day" in + // https://www.unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table for more info) int32_t skelFieldLen = specifiedSkeleton->original.getFieldLength(typeValue); UBool patFieldIsNumeric = (row->type > 0); UBool skelFieldIsNumeric = (specifiedSkeleton->type[typeValue] > 0); @@ -1651,6 +1652,9 @@ DateTimePatternGenerator::adjustFieldTypes(const UnicodeString& pattern, && (typeValue!= UDATPG_YEAR_FIELD || reqFieldChar==CAP_Y)) ? reqFieldChar : field.charAt(0); + if (c == CAP_E && adjFieldLen < 3) { + c = LOW_E; + } if (typeValue == UDATPG_HOUR_FIELD && fDefaultHourFormatChar != 0) { // The adjustment here is required to match spec (https://www.unicode.org/reports/tr35/tr35-dates.html#dfst-hour). // It is necessary to match the hour-cycle preferred by the Locale. @@ -1941,7 +1945,7 @@ PatternMap::copyFrom(const PatternMap& other, UErrorCode& status) { if (prevElem != nullptr) { prevElem->next.adoptInstead(curElem); } else { - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } } prevElem = curElem; @@ -2788,7 +2792,7 @@ DTSkeletonEnumeration::DTSkeletonEnumeration(PatternMap& patternMap, dtStrEnum t if (U_FAILURE(status)) { return; } - fSkeletons->addElement(newElem.getAlias(), status); + fSkeletons->addElementX(newElem.getAlias(), status); if (U_FAILURE(status)) { fSkeletons.adoptInstead(nullptr); return; @@ -2861,7 +2865,7 @@ DTRedundantEnumeration::add(const UnicodeString& pattern, UErrorCode& status) { if (U_FAILURE(status)) { return; } - fPatterns->addElement(newElem.getAlias(), status); + fPatterns->addElementX(newElem.getAlias(), status); if (U_FAILURE(status)) { fPatterns.adoptInstead(nullptr); return; diff --git a/contrib/libs/icu/i18n/dtptngen_impl.h b/contrib/libs/icu/i18n/dtptngen_impl.h index ade9f57331..5caae11654 100644 --- a/contrib/libs/icu/i18n/dtptngen_impl.h +++ b/contrib/libs/icu/i18n/dtptngen_impl.h @@ -134,20 +134,20 @@ public: UnicodeString& appendTo(UnicodeString& string) const; UnicodeString& appendFieldTo(int32_t field, UnicodeString& string) const; UChar getFirstChar() const; - inline UBool operator==(const SkeletonFields& other) const; - inline UBool operator!=(const SkeletonFields& other) const; + inline bool operator==(const SkeletonFields& other) const; + inline bool operator!=(const SkeletonFields& other) const; private: int8_t chars[UDATPG_FIELD_COUNT]; int8_t lengths[UDATPG_FIELD_COUNT]; }; -inline UBool SkeletonFields::operator==(const SkeletonFields& other) const { +inline bool SkeletonFields::operator==(const SkeletonFields& other) const { return (uprv_memcmp(chars, other.chars, sizeof(chars)) == 0 && uprv_memcmp(lengths, other.lengths, sizeof(lengths)) == 0); } -inline UBool SkeletonFields::operator!=(const SkeletonFields& other) const { +inline bool SkeletonFields::operator!=(const SkeletonFields& other) const { return (! operator==(other)); } @@ -195,7 +195,7 @@ public: void getQuoteLiteral(UnicodeString& quote, int32_t *itemIndex); UBool isPatternSeparator(const UnicodeString& field) const; static UBool isQuoteLiteral(const UnicodeString& s); - static int32_t getCanonicalIndex(const UnicodeString& s) { return getCanonicalIndex(s, TRUE); } + static int32_t getCanonicalIndex(const UnicodeString& s) { return getCanonicalIndex(s, true); } static int32_t getCanonicalIndex(const UnicodeString& s, UBool strict); private: @@ -279,10 +279,10 @@ public: DTSkeletonEnumeration(PatternMap& patternMap, dtStrEnum type, UErrorCode& status); virtual ~DTSkeletonEnumeration(); static UClassID U_EXPORT2 getStaticClassID(void); - virtual UClassID getDynamicClassID(void) const; - virtual const UnicodeString* snext(UErrorCode& status); - virtual void reset(UErrorCode& status); - virtual int32_t count(UErrorCode& status) const; + virtual UClassID getDynamicClassID(void) const override; + virtual const UnicodeString* snext(UErrorCode& status) override; + virtual void reset(UErrorCode& status) override; + virtual int32_t count(UErrorCode& status) const override; private: int32_t pos; UBool isCanonicalItem(const UnicodeString& item); @@ -294,10 +294,10 @@ public: DTRedundantEnumeration(); virtual ~DTRedundantEnumeration(); static UClassID U_EXPORT2 getStaticClassID(void); - virtual UClassID getDynamicClassID(void) const; - virtual const UnicodeString* snext(UErrorCode& status); - virtual void reset(UErrorCode& status); - virtual int32_t count(UErrorCode& status) const; + virtual UClassID getDynamicClassID(void) const override; + virtual const UnicodeString* snext(UErrorCode& status) override; + virtual void reset(UErrorCode& status) override; + virtual int32_t count(UErrorCode& status) const override; void add(const UnicodeString &pattern, UErrorCode& status); private: int32_t pos; diff --git a/contrib/libs/icu/i18n/dtrule.cpp b/contrib/libs/icu/i18n/dtrule.cpp index 6847f1d16e..63949b63aa 100644 --- a/contrib/libs/icu/i18n/dtrule.cpp +++ b/contrib/libs/icu/i18n/dtrule.cpp @@ -81,7 +81,7 @@ DateTimeRule::operator=(const DateTimeRule& right) { return *this; } -UBool +bool DateTimeRule::operator==(const DateTimeRule& that) const { return ((this == &that) || (typeid(*this) == typeid(that) && @@ -94,7 +94,7 @@ DateTimeRule::operator==(const DateTimeRule& that) const { fTimeRuleType == that.fTimeRuleType)); } -UBool +bool DateTimeRule::operator!=(const DateTimeRule& that) const { return !operator==(that); } diff --git a/contrib/libs/icu/i18n/esctrn.h b/contrib/libs/icu/i18n/esctrn.h index 2a2c6dcfe9..a4282ea86a 100644 --- a/contrib/libs/icu/i18n/esctrn.h +++ b/contrib/libs/icu/i18n/esctrn.h @@ -115,12 +115,12 @@ class EscapeTransliterator : public Transliterator { /** * Transliterator API. */ - virtual EscapeTransliterator* clone() const; + virtual EscapeTransliterator* clone() const override; /** * ICU "poor man's RTTI", returns a UClassID for the actual class. */ - virtual UClassID getDynamicClassID() const; + virtual UClassID getDynamicClassID() const override; /** * ICU "poor man's RTTI", returns a UClassID for this class. @@ -133,7 +133,7 @@ class EscapeTransliterator : public Transliterator { * Implements {@link Transliterator#handleTransliterate}. */ virtual void handleTransliterate(Replaceable& text, UTransPosition& offset, - UBool isIncremental) const; + UBool isIncremental) const override; }; diff --git a/contrib/libs/icu/i18n/ethpccal.h b/contrib/libs/icu/i18n/ethpccal.h index d1e8f424b5..0cc5b6c535 100644 --- a/contrib/libs/icu/i18n/ethpccal.h +++ b/contrib/libs/icu/i18n/ethpccal.h @@ -141,14 +141,14 @@ public: * @return return a polymorphic copy of this calendar. * @internal */ - virtual EthiopicCalendar* clone() const; + virtual EthiopicCalendar* clone() const override; /** * return the calendar type, "ethiopic" * @return calendar type * @internal */ - virtual const char * getType() const; + virtual const char * getType() const override; /** * Set Alem or Mihret era. @@ -173,38 +173,38 @@ protected: * Return the extended year defined by the current fields. * @internal */ - virtual int32_t handleGetExtendedYear(); + virtual int32_t handleGetExtendedYear() override; /** * Compute fields from the JD * @internal */ - virtual void handleComputeFields(int32_t julianDay, UErrorCode &status); + virtual void handleComputeFields(int32_t julianDay, UErrorCode &status) override; /** * Calculate the limit for a specified type of limit and field * @internal */ - virtual int32_t handleGetLimit(UCalendarDateFields field, ELimitType limitType) const; + virtual int32_t handleGetLimit(UCalendarDateFields field, ELimitType limitType) const override; /** * Returns the date of the start of the default century * @return start of century - in milliseconds since epoch, 1970 * @internal */ - virtual UDate defaultCenturyStart() const; + virtual UDate defaultCenturyStart() const override; /** * Returns the year in which the default century begins * @internal */ - virtual int32_t defaultCenturyStartYear() const; + virtual int32_t defaultCenturyStartYear() const override; /** * Return the date offset from Julian * @internal */ - virtual int32_t getJDEpochOffset() const; + virtual int32_t getJDEpochOffset() const override; private: /** @@ -229,7 +229,7 @@ public: * same class ID. Objects of other classes have different class IDs. * @internal */ - virtual UClassID getDynamicClassID(void) const; + virtual UClassID getDynamicClassID(void) const override; /** * Return the class ID for this class. This is useful only for comparing to a return diff --git a/contrib/libs/icu/i18n/fmtable.cpp b/contrib/libs/icu/i18n/fmtable.cpp index dbfd3c26ba..7a9a81ded5 100644 --- a/contrib/libs/icu/i18n/fmtable.cpp +++ b/contrib/libs/icu/i18n/fmtable.cpp @@ -177,7 +177,7 @@ Formattable::Formattable(const UnicodeString& stringToCopy) // ------------------------------------- // Creates a formattable object with a UnicodeString* value. -// (adopting symantics) +// (adopting semantics) Formattable::Formattable(UnicodeString* stringToAdopt) { @@ -275,18 +275,18 @@ Formattable::operator=(const Formattable& source) // ------------------------------------- -UBool +bool Formattable::operator==(const Formattable& that) const { int32_t i; - if (this == &that) return TRUE; + if (this == &that) return true; - // Returns FALSE if the data types are different. - if (fType != that.fType) return FALSE; + // Returns false if the data types are different. + if (fType != that.fType) return false; // Compares the actual data values. - UBool equal = TRUE; + bool equal = true; switch (fType) { case kDate: equal = (fValue.fDate == that.fValue.fDate); @@ -303,20 +303,20 @@ Formattable::operator==(const Formattable& that) const break; case kArray: if (fValue.fArrayAndCount.fCount != that.fValue.fArrayAndCount.fCount) { - equal = FALSE; + equal = false; break; } // Checks each element for equality. for (i=0; i<fValue.fArrayAndCount.fCount; ++i) { if (fValue.fArrayAndCount.fArray[i] != that.fValue.fArrayAndCount.fArray[i]) { - equal = FALSE; + equal = false; break; } } break; case kObject: if (fValue.fObject == NULL || that.fValue.fObject == NULL) { - equal = FALSE; + equal = false; } else { equal = objectEquals(fValue.fObject, that.fValue.fObject); } @@ -895,7 +895,7 @@ U_NAMESPACE_END U_NAMESPACE_USE -U_DRAFT UFormattable* U_EXPORT2 +U_CAPI UFormattable* U_EXPORT2 ufmt_open(UErrorCode *status) { if( U_FAILURE(*status) ) { return NULL; @@ -908,14 +908,14 @@ ufmt_open(UErrorCode *status) { return fmt; } -U_DRAFT void U_EXPORT2 +U_CAPI void U_EXPORT2 ufmt_close(UFormattable *fmt) { Formattable *obj = Formattable::fromUFormattable(fmt); delete obj; } -U_INTERNAL UFormattableType U_EXPORT2 +U_CAPI UFormattableType U_EXPORT2 ufmt_getType(const UFormattable *fmt, UErrorCode *status) { if(U_FAILURE(*status)) { return (UFormattableType)UFMT_COUNT; @@ -925,27 +925,27 @@ ufmt_getType(const UFormattable *fmt, UErrorCode *status) { } -U_INTERNAL UBool U_EXPORT2 +U_CAPI UBool U_EXPORT2 ufmt_isNumeric(const UFormattable *fmt) { const Formattable *obj = Formattable::fromUFormattable(fmt); return obj->isNumeric(); } -U_DRAFT UDate U_EXPORT2 +U_CAPI UDate U_EXPORT2 ufmt_getDate(const UFormattable *fmt, UErrorCode *status) { const Formattable *obj = Formattable::fromUFormattable(fmt); return obj->getDate(*status); } -U_DRAFT double U_EXPORT2 +U_CAPI double U_EXPORT2 ufmt_getDouble(UFormattable *fmt, UErrorCode *status) { Formattable *obj = Formattable::fromUFormattable(fmt); return obj->getDouble(*status); } -U_DRAFT int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 ufmt_getLong(UFormattable *fmt, UErrorCode *status) { Formattable *obj = Formattable::fromUFormattable(fmt); @@ -953,7 +953,7 @@ ufmt_getLong(UFormattable *fmt, UErrorCode *status) { } -U_DRAFT const void *U_EXPORT2 +U_CAPI const void *U_EXPORT2 ufmt_getObject(const UFormattable *fmt, UErrorCode *status) { const Formattable *obj = Formattable::fromUFormattable(fmt); @@ -966,7 +966,7 @@ ufmt_getObject(const UFormattable *fmt, UErrorCode *status) { return ret; } -U_DRAFT const UChar* U_EXPORT2 +U_CAPI const UChar* U_EXPORT2 ufmt_getUChars(UFormattable *fmt, int32_t *len, UErrorCode *status) { Formattable *obj = Formattable::fromUFormattable(fmt); @@ -986,7 +986,7 @@ ufmt_getUChars(UFormattable *fmt, int32_t *len, UErrorCode *status) { return str.getTerminatedBuffer(); } -U_DRAFT int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 ufmt_getArrayLength(const UFormattable* fmt, UErrorCode *status) { const Formattable *obj = Formattable::fromUFormattable(fmt); @@ -995,7 +995,7 @@ ufmt_getArrayLength(const UFormattable* fmt, UErrorCode *status) { return count; } -U_DRAFT UFormattable * U_EXPORT2 +U_CAPI UFormattable * U_EXPORT2 ufmt_getArrayItemByIndex(UFormattable* fmt, int32_t n, UErrorCode *status) { Formattable *obj = Formattable::fromUFormattable(fmt); int32_t count; @@ -1010,7 +1010,7 @@ ufmt_getArrayItemByIndex(UFormattable* fmt, int32_t n, UErrorCode *status) { } } -U_DRAFT const char * U_EXPORT2 +U_CAPI const char * U_EXPORT2 ufmt_getDecNumChars(UFormattable *fmt, int32_t *len, UErrorCode *status) { if(U_FAILURE(*status)) { return ""; @@ -1031,7 +1031,7 @@ ufmt_getDecNumChars(UFormattable *fmt, int32_t *len, UErrorCode *status) { } } -U_DRAFT int64_t U_EXPORT2 +U_CAPI int64_t U_EXPORT2 ufmt_getInt64(UFormattable *fmt, UErrorCode *status) { Formattable *obj = Formattable::fromUFormattable(fmt); return obj->getInt64(*status); diff --git a/contrib/libs/icu/i18n/fmtable_cnv.cpp b/contrib/libs/icu/i18n/fmtable_cnv.cpp index 9a64792779..bc3847b696 100644 --- a/contrib/libs/icu/i18n/fmtable_cnv.cpp +++ b/contrib/libs/icu/i18n/fmtable_cnv.cpp @@ -30,8 +30,6 @@ U_NAMESPACE_BEGIN // ------------------------------------- // Creates a formattable object with a char* string. // This API is useless. The API that takes a UnicodeString is actually just as good. -// This is just a grandfathered API. - Formattable::Formattable(const char* stringToCopy) { init(); diff --git a/contrib/libs/icu/i18n/format.cpp b/contrib/libs/icu/i18n/format.cpp index e5abbe9eb0..10856a4acb 100644 --- a/contrib/libs/icu/i18n/format.cpp +++ b/contrib/libs/icu/i18n/format.cpp @@ -26,7 +26,7 @@ #include "unicode/utypes.h" #ifndef U_I18N_IMPLEMENTATION -#error U_I18N_IMPLEMENTATION not set - must be set for all ICU source files in i18n/ - see http://userguide.icu-project.org/howtouseicu +#error U_I18N_IMPLEMENTATION not set - must be set for all ICU source files in i18n/ - see https://unicode-org.github.io/icu/userguide/howtouseicu #endif /* @@ -155,7 +155,7 @@ Format::parseObject(const UnicodeString& source, // ------------------------------------- -UBool +bool Format::operator==(const Format& that) const { // Subclasses: Call this method and then add more specific checks. @@ -167,7 +167,7 @@ Format::operator==(const Format& that) const * Simple function for initializing a UParseError from a UnicodeString. * * @param pattern The pattern to copy into the parseError - * @param pos The position in pattern where the error occured + * @param pos The position in pattern where the error occurred * @param parseError The UParseError object to fill in * @draft ICU 2.4 */ diff --git a/contrib/libs/icu/i18n/formatted_string_builder.cpp b/contrib/libs/icu/i18n/formatted_string_builder.cpp index 5aabc31cc4..734078644b 100644 --- a/contrib/libs/icu/i18n/formatted_string_builder.cpp +++ b/contrib/libs/icu/i18n/formatted_string_builder.cpp @@ -276,6 +276,11 @@ int32_t FormattedStringBuilder::prepareForInsertHelper(int32_t index, int32_t co char16_t *oldChars = getCharPtr(); Field *oldFields = getFieldPtr(); if (fLength + count > oldCapacity) { + if ((fLength + count) > INT32_MAX / 2) { + // If we continue, then newCapacity will overflow int32_t in the next line. + status = U_INPUT_TOO_LONG_ERROR; + return -1; + } int32_t newCapacity = (fLength + count) * 2; int32_t newZero = newCapacity / 2 - (fLength + count) / 2; @@ -330,12 +335,14 @@ int32_t FormattedStringBuilder::prepareForInsertHelper(int32_t index, int32_t co fZero = newZero; fLength += count; } + U_ASSERT((fZero + index) >= 0); return fZero + index; } int32_t FormattedStringBuilder::remove(int32_t index, int32_t count) { // TODO: Reset the heap here? (If the string after removal can fit on stack?) int32_t position = index + fZero; + U_ASSERT(position >= 0); uprv_memmove2(getCharPtr() + position, getCharPtr() + position + count, sizeof(char16_t) * (fLength - index - count)); diff --git a/contrib/libs/icu/i18n/formatted_string_builder.h b/contrib/libs/icu/i18n/formatted_string_builder.h index 4567dc1d66..92bcf07d78 100644 --- a/contrib/libs/icu/i18n/formatted_string_builder.h +++ b/contrib/libs/icu/i18n/formatted_string_builder.h @@ -25,7 +25,7 @@ class FormattedValueStringBuilderImpl; * * <ol> * <li>Efficient prepend as well as append. - * <li>Keeps tracks of Fields in an efficient manner. + * <li>Keeps track of Fields in an efficient manner. * </ol> * * See also FormattedValueStringBuilderImpl. @@ -55,7 +55,6 @@ class U_I18N_API FormattedStringBuilder : public UMemory { // Convention: bottom 4 bits for field, top 4 bits for field category. // Field category 0 implies the number category so that the number field // literals can be directly passed as a Field type. - // See the helper functions in "StringBuilderFieldUtils" below. // Exported as U_I18N_API so it can be used by other exports on Windows. struct U_I18N_API Field { uint8_t bits; diff --git a/contrib/libs/icu/i18n/formattedval_impl.h b/contrib/libs/icu/i18n/formattedval_impl.h index 7bee374286..2b9a3970d2 100644 --- a/contrib/libs/icu/i18n/formattedval_impl.h +++ b/contrib/libs/icu/i18n/formattedval_impl.h @@ -69,6 +69,9 @@ U_NAMESPACE_BEGIN /** * Implementation of FormattedValue using FieldPositionHandler to accept fields. + * + * TODO(ICU-20897): This class is unused. If it is not needed when fixing ICU-20897, + * it should be deleted. */ class FormattedValueFieldPositionIteratorImpl : public UMemory, public FormattedValue { public: @@ -114,6 +117,26 @@ private: }; +// Internal struct that must be exported for MSVC +struct U_I18N_API SpanInfo { + UFieldCategory category; + int32_t spanValue; + int32_t start; + int32_t length; +}; + +// Export an explicit template instantiation of the MaybeStackArray that +// is used as a data member of CEBuffer. +// +// When building DLLs for Windows this is required even though +// no direct access to the MaybeStackArray leaks out of the i18n library. +// +// See digitlst.h, pluralaffix.h, datefmt.h, and others for similar examples. +// +#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN +template class U_I18N_API MaybeStackArray<SpanInfo, 8>; +#endif + /** * Implementation of FormattedValue based on FormattedStringBuilder. * @@ -146,13 +169,28 @@ public: inline const FormattedStringBuilder& getStringRef() const { return fString; } + void resetString(); + + /** + * Adds additional metadata used for span fields. + * + * category: the category to use for the span field. + * spanValue: the value of the span field: index of the list item, for example. + * start: the start position within the string of the span. -1 if unknown. + * length: the length of the span, used to split adjacent fields. + */ + void appendSpanInfo(UFieldCategory category, int32_t spanValue, int32_t start, int32_t length, UErrorCode& status); + void prependSpanInfo(UFieldCategory category, int32_t spanValue, int32_t start, int32_t length, UErrorCode& status); private: FormattedStringBuilder fString; FormattedStringBuilder::Field fNumericField; + MaybeStackArray<SpanInfo, 8> spanIndices; + int32_t spanIndicesCount = 0; bool nextPositionImpl(ConstrainedFieldPosition& cfpos, FormattedStringBuilder::Field numericField, UErrorCode& status) const; static bool isIntOrGroup(FormattedStringBuilder::Field field); + static bool isTrimmable(FormattedStringBuilder::Field field); int32_t trimBack(int32_t limit) const; int32_t trimFront(int32_t start) const; }; @@ -211,7 +249,7 @@ struct UFormattedValueImpl : public UMemory, public UFormattedValueApiHelper { return fData->appendTo(appendable, status); \ } \ UBool Name::nextPosition(ConstrainedFieldPosition& cfpos, UErrorCode& status) const { \ - UPRV_FORMATTED_VALUE_METHOD_GUARD(FALSE) \ + UPRV_FORMATTED_VALUE_METHOD_GUARD(false) \ return fData->nextPosition(cfpos, status); \ } @@ -230,7 +268,7 @@ struct UFormattedValueImpl : public UMemory, public UFormattedValueApiHelper { } \ return static_cast<HelperType*>(impl)->exportForC(); \ } \ - U_DRAFT const UFormattedValue* U_EXPORT2 \ + U_CAPI const UFormattedValue* U_EXPORT2 \ Prefix ## _resultAsValue (const CType* uresult, UErrorCode* ec) { \ const ImplType* result = HelperType::validate(uresult, *ec); \ if (U_FAILURE(*ec)) { return nullptr; } \ diff --git a/contrib/libs/icu/i18n/formattedval_sbimpl.cpp b/contrib/libs/icu/i18n/formattedval_sbimpl.cpp index dfe3af6686..70ffacac4b 100644 --- a/contrib/libs/icu/i18n/formattedval_sbimpl.cpp +++ b/contrib/libs/icu/i18n/formattedval_sbimpl.cpp @@ -15,6 +15,7 @@ #include "formatted_string_builder.h" #include "number_utils.h" #include "static_unicode_sets.h" +#include "unicode/listformatter.h" U_NAMESPACE_BEGIN @@ -45,19 +46,19 @@ Appendable& FormattedValueStringBuilderImpl::appendTo(Appendable& appendable, UE UBool FormattedValueStringBuilderImpl::nextPosition(ConstrainedFieldPosition& cfpos, UErrorCode& status) const { // NOTE: MSVC sometimes complains when implicitly converting between bool and UBool - return nextPositionImpl(cfpos, fNumericField, status) ? TRUE : FALSE; + return nextPositionImpl(cfpos, fNumericField, status) ? true : false; } UBool FormattedValueStringBuilderImpl::nextFieldPosition(FieldPosition& fp, UErrorCode& status) const { int32_t rawField = fp.getField(); if (rawField == FieldPosition::DONT_CARE) { - return FALSE; + return false; } if (rawField < 0 || rawField >= UNUM_FIELD_COUNT) { status = U_ILLEGAL_ARGUMENT_ERROR; - return FALSE; + return false; } ConstrainedFieldPosition cfpos; @@ -66,7 +67,7 @@ UBool FormattedValueStringBuilderImpl::nextFieldPosition(FieldPosition& fp, UErr if (nextPositionImpl(cfpos, kUndefinedField, status)) { fp.setBeginIndex(cfpos.getStart()); fp.setEndIndex(cfpos.getLimit()); - return TRUE; + return true; } // Special case: fraction should start after integer if fraction is not present @@ -84,7 +85,7 @@ UBool FormattedValueStringBuilderImpl::nextFieldPosition(FieldPosition& fp, UErr fp.setEndIndex(i - fString.fZero); } - return FALSE; + return false; } void FormattedValueStringBuilderImpl::getAllFieldPositions(FieldPositionIteratorHandler& fpih, @@ -95,6 +96,11 @@ void FormattedValueStringBuilderImpl::getAllFieldPositions(FieldPositionIterator } } +void FormattedValueStringBuilderImpl::resetString() { + fString.clear(); + spanIndicesCount = 0; +} + // Signal the end of the string using a field that doesn't exist and that is // different from kUndefinedField, which is used for "null field". static constexpr Field kEndField = Field(0xf, 0xf); @@ -102,6 +108,27 @@ static constexpr Field kEndField = Field(0xf, 0xf); bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition& cfpos, Field numericField, UErrorCode& /*status*/) const { int32_t fieldStart = -1; Field currField = kUndefinedField; + bool prevIsSpan = false; + int32_t nextSpanStart = -1; + if (spanIndicesCount > 0) { + int64_t si = cfpos.getInt64IterationContext(); + U_ASSERT(si <= spanIndicesCount); + if (si < spanIndicesCount) { + nextSpanStart = spanIndices[si].start; + } + if (si > 0) { + prevIsSpan = cfpos.getCategory() == spanIndices[si-1].category + && cfpos.getField() == spanIndices[si-1].spanValue; + } + } + bool prevIsNumeric = false; + if (numericField != kUndefinedField) { + prevIsNumeric = cfpos.getCategory() == numericField.getCategory() + && cfpos.getField() == numericField.getField(); + } + bool prevIsInteger = cfpos.getCategory() == UFIELD_CATEGORY_NUMBER + && cfpos.getField() == UNUM_INTEGER_FIELD; + for (int32_t i = fString.fZero + cfpos.getLimit(); i <= fString.fZero + fString.fLength; i++) { Field _field = (i < fString.fZero + fString.fLength) ? fString.getFieldPtr()[i] : kEndField; // Case 1: currently scanning a field. @@ -109,7 +136,7 @@ bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition& if (currField != _field) { int32_t end = i - fString.fZero; // Grouping separators can be whitespace; don't throw them out! - if (currField != Field(UFIELD_CATEGORY_NUMBER, UNUM_GROUPING_SEPARATOR_FIELD)) { + if (isTrimmable(currField)) { end = trimBack(i - fString.fZero); } if (end <= fieldStart) { @@ -120,7 +147,7 @@ bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition& continue; } int32_t start = fieldStart; - if (currField != Field(UFIELD_CATEGORY_NUMBER, UNUM_GROUPING_SEPARATOR_FIELD)) { + if (isTrimmable(currField)) { start = trimFront(start); } cfpos.setState(currField.getCategory(), currField.getField(), start, end); @@ -128,11 +155,38 @@ bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition& } continue; } + // Special case: emit normalField if we are pointing at the end of spanField. + if (i > fString.fZero && prevIsSpan) { + int64_t si = cfpos.getInt64IterationContext() - 1; + U_ASSERT(si >= 0); + int32_t previ = i - spanIndices[si].length; + U_ASSERT(previ >= fString.fZero); + Field prevField = fString.getFieldPtr()[previ]; + if (prevField == Field(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD)) { + // Special handling for ULISTFMT_ELEMENT_FIELD + if (cfpos.matchesField(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD)) { + fieldStart = i - fString.fZero - spanIndices[si].length; + int32_t end = fieldStart + spanIndices[si].length; + cfpos.setState( + UFIELD_CATEGORY_LIST, + ULISTFMT_ELEMENT_FIELD, + fieldStart, + end); + return true; + } else { + prevIsSpan = false; + } + } else { + // Re-wind, since there may be multiple fields in the span. + i = previ; + _field = prevField; + } + } // Special case: coalesce the INTEGER if we are pointing at the end of the INTEGER. if (cfpos.matchesField(UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD) && i > fString.fZero - // don't return the same field twice in a row: - && i - fString.fZero > cfpos.getLimit() + && !prevIsInteger + && !prevIsNumeric && isIntOrGroup(fString.getFieldPtr()[i - 1]) && !isIntOrGroup(_field)) { int j = i - 1; @@ -148,13 +202,11 @@ bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition& if (numericField != kUndefinedField && cfpos.matchesField(numericField.getCategory(), numericField.getField()) && i > fString.fZero - // don't return the same field twice in a row: - && (i - fString.fZero > cfpos.getLimit() - || cfpos.getCategory() != numericField.getCategory() - || cfpos.getField() != numericField.getField()) + && !prevIsNumeric && fString.getFieldPtr()[i - 1].isNumeric() && !_field.isNumeric()) { - int j = i - 1; + // Re-wind to the beginning of the field and then emit it + int32_t j = i - 1; for (; j >= fString.fZero && fString.getFieldPtr()[j].isNumeric(); j--) {} cfpos.setState( numericField.getCategory(), @@ -163,30 +215,116 @@ bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition& i - fString.fZero); return true; } + // Check for span field + if (!prevIsSpan && ( + _field == Field(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD) || + i - fString.fZero == nextSpanStart)) { + int64_t si = cfpos.getInt64IterationContext(); + if (si >= spanIndicesCount) { + break; + } + UFieldCategory spanCategory = spanIndices[si].category; + int32_t spanValue = spanIndices[si].spanValue; + int32_t length = spanIndices[si].length; + cfpos.setInt64IterationContext(si + 1); + if (si + 1 < spanIndicesCount) { + nextSpanStart = spanIndices[si + 1].start; + } + if (cfpos.matchesField(spanCategory, spanValue)) { + fieldStart = i - fString.fZero; + int32_t end = fieldStart + length; + cfpos.setState( + spanCategory, + spanValue, + fieldStart, + end); + return true; + } else if (_field == Field(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD)) { + // Special handling for ULISTFMT_ELEMENT_FIELD + if (cfpos.matchesField(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD)) { + fieldStart = i - fString.fZero; + int32_t end = fieldStart + length; + cfpos.setState( + UFIELD_CATEGORY_LIST, + ULISTFMT_ELEMENT_FIELD, + fieldStart, + end); + return true; + } else { + // Failed to match; jump ahead + i += length - 1; + // goto loopend + } + } + } // Special case: skip over INTEGER; will be coalesced later. - if (_field == Field(UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD)) { + else if (_field == Field(UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD)) { _field = kUndefinedField; } - // Case 2: no field starting at this position. - if (_field.isUndefined() || _field == kEndField) { - continue; + // No field starting at this position. + else if (_field.isUndefined() || _field == kEndField) { + // goto loopend } - // Case 3: check for field starting at this position - if (cfpos.matchesField(_field.getCategory(), _field.getField())) { + // No SpanField + else if (cfpos.matchesField(_field.getCategory(), _field.getField())) { fieldStart = i - fString.fZero; currField = _field; } + // loopend: + prevIsSpan = false; + prevIsNumeric = false; + prevIsInteger = false; } U_ASSERT(currField == kUndefinedField); + // Always set the position to the end so that we don't revisit previous sections + cfpos.setState( + cfpos.getCategory(), + cfpos.getField(), + fString.fLength, + fString.fLength); return false; } +void FormattedValueStringBuilderImpl::appendSpanInfo(UFieldCategory category, int32_t spanValue, int32_t start, int32_t length, UErrorCode& status) { + if (U_FAILURE(status)) { return; } + U_ASSERT(spanIndices.getCapacity() >= spanIndicesCount); + if (spanIndices.getCapacity() == spanIndicesCount) { + if (!spanIndices.resize(spanIndicesCount * 2, spanIndicesCount)) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + } + spanIndices[spanIndicesCount] = {category, spanValue, start, length}; + spanIndicesCount++; +} + +void FormattedValueStringBuilderImpl::prependSpanInfo(UFieldCategory category, int32_t spanValue, int32_t start, int32_t length, UErrorCode& status) { + if (U_FAILURE(status)) { return; } + U_ASSERT(spanIndices.getCapacity() >= spanIndicesCount); + if (spanIndices.getCapacity() == spanIndicesCount) { + if (!spanIndices.resize(spanIndicesCount * 2, spanIndicesCount)) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + } + for (int32_t i = spanIndicesCount - 1; i >= 0; i--) { + spanIndices[i+1] = spanIndices[i]; + } + spanIndices[0] = {category, spanValue, start, length}; + spanIndicesCount++; +} + bool FormattedValueStringBuilderImpl::isIntOrGroup(Field field) { return field == Field(UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD) || field == Field(UFIELD_CATEGORY_NUMBER, UNUM_GROUPING_SEPARATOR_FIELD); } +bool FormattedValueStringBuilderImpl::isTrimmable(Field field) { + return field != Field(UFIELD_CATEGORY_NUMBER, UNUM_GROUPING_SEPARATOR_FIELD) + && field.getCategory() != UFIELD_CATEGORY_LIST; +} + int32_t FormattedValueStringBuilderImpl::trimBack(int32_t limit) const { return unisets::get(unisets::DEFAULT_IGNORABLES)->spanBack( fString.getCharPtr() + fString.fZero, diff --git a/contrib/libs/icu/i18n/formattedvalue.cpp b/contrib/libs/icu/i18n/formattedvalue.cpp index e2c9c42fc8..1030661f22 100644 --- a/contrib/libs/icu/i18n/formattedvalue.cpp +++ b/contrib/libs/icu/i18n/formattedvalue.cpp @@ -49,7 +49,7 @@ UBool ConstrainedFieldPosition::matchesField(int32_t category, int32_t field) co case UCFPOS_CONSTRAINT_FIELD: return fCategory == category && fField == field; default: - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } } @@ -193,7 +193,7 @@ ucfpos_close(UConstrainedFieldPosition* ptr) { } -U_DRAFT const UChar* U_EXPORT2 +U_CAPI const UChar* U_EXPORT2 ufmtval_getString( const UFormattedValue* ufmtval, int32_t* pLength, @@ -209,11 +209,13 @@ ufmtval_getString( if (pLength != nullptr) { *pLength = readOnlyAlias.length(); } + // Note: this line triggers -Wreturn-local-addr, but it is safe because toTempString is + // defined to return memory owned by the ufmtval argument. return readOnlyAlias.getBuffer(); } -U_DRAFT UBool U_EXPORT2 +U_CAPI UBool U_EXPORT2 ufmtval_nextPosition( const UFormattedValue* ufmtval, UConstrainedFieldPosition* ucfpos, diff --git a/contrib/libs/icu/i18n/fphdlimp.h b/contrib/libs/icu/i18n/fphdlimp.h index b9fa9b2181..4fb0c7b6fe 100644 --- a/contrib/libs/icu/i18n/fphdlimp.h +++ b/contrib/libs/icu/i18n/fphdlimp.h @@ -41,8 +41,8 @@ class U_I18N_API FieldPositionHandler: public UMemory { class FieldPositionOnlyHandler : public FieldPositionHandler { FieldPosition& pos; - UBool acceptFirstOnly = FALSE; - UBool seenFirst = FALSE; + UBool acceptFirstOnly = false; + UBool seenFirst = false; public: FieldPositionOnlyHandler(FieldPosition& pos); diff --git a/contrib/libs/icu/i18n/fpositer.cpp b/contrib/libs/icu/i18n/fpositer.cpp index 75d529eb8c..096896d7b3 100644 --- a/contrib/libs/icu/i18n/fpositer.cpp +++ b/contrib/libs/icu/i18n/fpositer.cpp @@ -45,17 +45,17 @@ FieldPositionIterator::FieldPositionIterator(const FieldPositionIterator &rhs) } } -UBool FieldPositionIterator::operator==(const FieldPositionIterator &rhs) const { +bool FieldPositionIterator::operator==(const FieldPositionIterator &rhs) const { if (&rhs == this) { - return TRUE; + return true; } if (pos != rhs.pos) { - return FALSE; + return false; } if (!data) { return rhs.data == NULL; } - return rhs.data ? data->operator==(*rhs.data) : FALSE; + return rhs.data ? data->operator==(*rhs.data) : false; } void FieldPositionIterator::setData(UVector32 *adopt, UErrorCode& status) { diff --git a/contrib/libs/icu/i18n/funcrepl.h b/contrib/libs/icu/i18n/funcrepl.h index fe41f6caaa..529a10ebbf 100644 --- a/contrib/libs/icu/i18n/funcrepl.h +++ b/contrib/libs/icu/i18n/funcrepl.h @@ -70,13 +70,13 @@ class FunctionReplacer : public UnicodeFunctor, public UnicodeReplacer { /** * Implement UnicodeFunctor */ - virtual FunctionReplacer* clone() const; + virtual FunctionReplacer* clone() const override; /** * UnicodeFunctor API. Cast 'this' to a UnicodeReplacer* pointer * and return the pointer. */ - virtual UnicodeReplacer* toReplacer() const; + virtual UnicodeReplacer* toReplacer() const override; /** * UnicodeReplacer API @@ -84,28 +84,28 @@ class FunctionReplacer : public UnicodeFunctor, public UnicodeReplacer { virtual int32_t replace(Replaceable& text, int32_t start, int32_t limit, - int32_t& cursor); + int32_t& cursor) override; /** * UnicodeReplacer API */ virtual UnicodeString& toReplacerPattern(UnicodeString& rule, - UBool escapeUnprintable) const; + UBool escapeUnprintable) const override; /** * Implement UnicodeReplacer */ - virtual void addReplacementSetTo(UnicodeSet& toUnionTo) const; + virtual void addReplacementSetTo(UnicodeSet& toUnionTo) const override; /** * UnicodeFunctor API */ - virtual void setData(const TransliterationRuleData*); + virtual void setData(const TransliterationRuleData*) override; /** * ICU "poor man's RTTI", returns a UClassID for the actual class. */ - virtual UClassID getDynamicClassID() const; + virtual UClassID getDynamicClassID() const override; /** * ICU "poor man's RTTI", returns a UClassID for this class. diff --git a/contrib/libs/icu/i18n/gregocal.cpp b/contrib/libs/icu/i18n/gregocal.cpp index 6b15171c12..31d36300ae 100644 --- a/contrib/libs/icu/i18n/gregocal.cpp +++ b/contrib/libs/icu/i18n/gregocal.cpp @@ -185,7 +185,7 @@ fIsGregorian(TRUE), fInvertGregorian(FALSE) // ------------------------------------- GregorianCalendar::GregorianCalendar(const Locale& aLocale, UErrorCode& status) -: Calendar(TimeZone::createDefault(), aLocale, status), +: Calendar(TimeZone::forLocaleOrDefault(aLocale), aLocale, status), fGregorianCutover(kPapalCutover), fCutoverJulianDay(kCutoverJulianDay), fNormalizedGregorianCutover(fGregorianCutover), fGregorianCutoverYear(1582), fIsGregorian(TRUE), fInvertGregorian(FALSE) @@ -398,7 +398,7 @@ void GregorianCalendar::handleComputeFields(int32_t julianDay, UErrorCode& statu // with 8 AD. Before 8 AD the spacing is irregular; every 3 years // from 45 BC to 9 BC, and then none until 8 AD. However, we don't // implement this historical detail; instead, we implement the - // computatinally cleaner proleptic calendar, which assumes + // computationally cleaner proleptic calendar, which assumes // consistent 4-year cycles throughout time. UBool isLeap = ((eyear&0x3) == 0); // equiv. to (eyear%4 == 0) diff --git a/contrib/libs/icu/i18n/gregoimp.h b/contrib/libs/icu/i18n/gregoimp.h index 06eb323845..b1a5bc22c2 100644 --- a/contrib/libs/icu/i18n/gregoimp.h +++ b/contrib/libs/icu/i18n/gregoimp.h @@ -86,7 +86,7 @@ class ClockMath { * 0 <= remainder < divisor. * * Works around edge-case bugs. Handles pathological input - * (divident >> divisor) reasonably. + * (dividend >> divisor) reasonably. * * Calling with a divisor <= 0 is disallowed. */ @@ -148,9 +148,9 @@ class ClockMath { class Grego { public: /** - * Return TRUE if the given year is a leap year. + * Return true if the given year is a leap year. * @param year Gregorian year, with 0 == 1 BCE, -1 == 2 BCE, etc. - * @return TRUE if the year is a leap year + * @return true if the year is a leap year */ static inline UBool isLeapYear(int32_t year); @@ -214,7 +214,7 @@ class Grego { * @param dom output parameter to receive day-of-month (1-based) * @param dow output parameter to receive day-of-week (1-based, 1==Sun) * @param doy output parameter to receive day-of-year (1-based) - * @param mid output parameter to recieve millis-in-day + * @param mid output parameter to receive millis-in-day */ static void timeToFields(UDate time, int32_t& year, int32_t& month, int32_t& dom, int32_t& dow, int32_t& doy, int32_t& mid); diff --git a/contrib/libs/icu/i18n/hebrwcal.cpp b/contrib/libs/icu/i18n/hebrwcal.cpp index c8fb8a1679..4d8e59cef9 100644 --- a/contrib/libs/icu/i18n/hebrwcal.cpp +++ b/contrib/libs/icu/i18n/hebrwcal.cpp @@ -155,7 +155,7 @@ U_NAMESPACE_BEGIN * @internal */ HebrewCalendar::HebrewCalendar(const Locale& aLocale, UErrorCode& success) -: Calendar(TimeZone::createDefault(), aLocale, success) +: Calendar(TimeZone::forLocaleOrDefault(aLocale), aLocale, success) { setTimeInMillis(getNow(), success); // Call this again now that the vtable is set up properly. @@ -393,7 +393,8 @@ int32_t HebrewCalendar::startOfYear(int32_t year, UErrorCode &status) int32_t day = CalendarCache::get(&gCache, year, status); if (day == 0) { - int32_t months = (235 * year - 234) / 19; // # of months before year + // # of months before year + int32_t months = (int32_t)ClockMath::floorDivide((235 * (int64_t)year - 234), (int64_t)19); int64_t frac = (int64_t)months * MONTH_FRACT + BAHARAD; // Fractional part of day # day = months * 29 + (int32_t)(frac / DAY_PARTS); // Whole # part of calculation @@ -566,8 +567,8 @@ void HebrewCalendar::validateField(UCalendarDateFields field, UErrorCode &status */ void HebrewCalendar::handleComputeFields(int32_t julianDay, UErrorCode &status) { int32_t d = julianDay - 347997; - double m = ((d * (double)DAY_PARTS)/ (double) MONTH_PARTS); // Months (approx) - int32_t year = (int32_t)( ((19. * m + 234.) / 235.) + 1.); // Years (approx) + double m = ClockMath::floorDivide((d * (double)DAY_PARTS), (double) MONTH_PARTS); // Months (approx) + int32_t year = (int32_t)(ClockMath::floorDivide((19. * m + 234.), 235.) + 1.); // Years (approx) int32_t ys = startOfYear(year, status); // 1st day of year int32_t dayOfYear = (d - ys); diff --git a/contrib/libs/icu/i18n/hebrwcal.h b/contrib/libs/icu/i18n/hebrwcal.h index 08136de32a..ae4401832d 100644 --- a/contrib/libs/icu/i18n/hebrwcal.h +++ b/contrib/libs/icu/i18n/hebrwcal.h @@ -192,7 +192,7 @@ public: * @return return a polymorphic copy of this calendar. * @internal */ - virtual HebrewCalendar* clone() const; + virtual HebrewCalendar* clone() const override; public: /** @@ -205,7 +205,7 @@ public: * same class ID. Objects of other classes have different class IDs. * @internal */ - virtual UClassID getDynamicClassID(void) const; + virtual UClassID getDynamicClassID(void) const override; /** * Return the class ID for this class. This is useful only for comparing to a return @@ -226,7 +226,7 @@ public: * @return calendar type * @internal */ - virtual const char * getType() const; + virtual const char * getType() const override; // Calendar API @@ -242,11 +242,11 @@ public: * previously set in the time field is invalid, this will be set to * an error status. */ - virtual void add(UCalendarDateFields field, int32_t amount, UErrorCode& status); + virtual void add(UCalendarDateFields field, int32_t amount, UErrorCode& status) override; /** * @deprecated ICU 2.6 use UCalendarDateFields instead of EDateFields */ - virtual void add(EDateFields field, int32_t amount, UErrorCode& status); + virtual void add(EDateFields field, int32_t amount, UErrorCode& status) override; /** @@ -260,7 +260,7 @@ public: * an error status. * @internal */ - virtual void roll(UCalendarDateFields field, int32_t amount, UErrorCode& status); + virtual void roll(UCalendarDateFields field, int32_t amount, UErrorCode& status) override; /** * (Overrides Calendar) Rolls up or down by the given amount in the specified field. @@ -273,7 +273,7 @@ public: * an error status. * @deprecated ICU 2.6. Use roll(UCalendarDateFields field, int32_t amount, UErrorCode& status) instead. ` */ - virtual void roll(EDateFields field, int32_t amount, UErrorCode& status); + virtual void roll(EDateFields field, int32_t amount, UErrorCode& status) override; /** * @internal @@ -303,7 +303,7 @@ public: * <code>LEAST_MAXIMUM</code>, or <code>MAXIMUM</code> * @internal */ - virtual int32_t handleGetLimit(UCalendarDateFields field, ELimitType limitType) const; + virtual int32_t handleGetLimit(UCalendarDateFields field, ELimitType limitType) const override; /** * Return the number of days in the given month of the given extended @@ -312,7 +312,7 @@ public: * implementation than the default implementation in Calendar. * @internal */ - virtual int32_t handleGetMonthLength(int32_t extendedYear, int32_t month) const; + virtual int32_t handleGetMonthLength(int32_t extendedYear, int32_t month) const override; /** * Return the number of days in the given extended year of this @@ -321,7 +321,7 @@ public: * default implementation in Calendar. * @stable ICU 2.0 */ - virtual int32_t handleGetYearLength(int32_t eyear) const; + virtual int32_t handleGetYearLength(int32_t eyear) const override; /** * Subclasses may override this method to compute several fields * specific to each calendar system. These are: @@ -337,7 +337,7 @@ public: * a calendar with the specified Julian/Gregorian cutover date. * @internal */ - virtual void handleComputeFields(int32_t julianDay, UErrorCode &status); + virtual void handleComputeFields(int32_t julianDay, UErrorCode &status) override; /** * Return the extended year defined by the current fields. This will * use the UCAL_EXTENDED_YEAR field or the UCAL_YEAR and supra-year fields (such @@ -346,7 +346,7 @@ public: * @return the extended year * @internal */ - virtual int32_t handleGetExtendedYear(); + virtual int32_t handleGetExtendedYear() override; /** * Return the Julian day number of day before the first day of the * given month in the given extended year. Subclasses should override @@ -361,7 +361,7 @@ public: * @internal */ virtual int32_t handleComputeMonthStart(int32_t eyear, int32_t month, - UBool useMonth) const; + UBool useMonth) const override; /** @@ -370,7 +370,7 @@ public: * special handling for month validation for Hebrew calendar. * @internal */ - virtual void validateField(UCalendarDateFields field, UErrorCode &status); + virtual void validateField(UCalendarDateFields field, UErrorCode &status) override; protected: @@ -383,26 +383,26 @@ public: * false, otherwise. * @internal */ - virtual UBool inDaylightTime(UErrorCode& status) const; + virtual UBool inDaylightTime(UErrorCode& status) const override; - /** - * Returns TRUE because the Hebrew Calendar does have a default century - * @internal - */ - virtual UBool haveDefaultCentury() const; + /** + * Returns true because the Hebrew Calendar does have a default century + * @internal + */ + virtual UBool haveDefaultCentury() const override; - /** - * Returns the date of the start of the default century - * @return start of century - in milliseconds since epoch, 1970 - * @internal - */ - virtual UDate defaultCenturyStart() const; + /** + * Returns the date of the start of the default century + * @return start of century - in milliseconds since epoch, 1970 + * @internal + */ + virtual UDate defaultCenturyStart() const override; - /** - * Returns the year in which the default century begins - * @internal - */ - virtual int32_t defaultCenturyStartYear() const; + /** + * Returns the year in which the default century begins + * @internal + */ + virtual int32_t defaultCenturyStartYear() const override; private: // Calendar-specific implementation /** diff --git a/contrib/libs/icu/i18n/indiancal.cpp b/contrib/libs/icu/i18n/indiancal.cpp index f1ab853b94..7bba7f5092 100644 --- a/contrib/libs/icu/i18n/indiancal.cpp +++ b/contrib/libs/icu/i18n/indiancal.cpp @@ -40,7 +40,7 @@ IndianCalendar* IndianCalendar::clone() const { } IndianCalendar::IndianCalendar(const Locale& aLocale, UErrorCode& success) - : Calendar(TimeZone::createDefault(), aLocale, success) + : Calendar(TimeZone::forLocaleOrDefault(aLocale), aLocale, success) { setTimeInMillis(getNow(), success); // Call this again now that the vtable is set up properly. } @@ -83,7 +83,6 @@ static const int32_t LIMITS[UCAL_FIELD_COUNT][4] = { {/*N/A*/-1,/*N/A*/-1,/*N/A*/-1,/*N/A*/-1}, // IS_LEAP_MONTH }; -static const double JULIAN_EPOCH = 1721425.5; static const int32_t INDIAN_ERA_START = 78; static const int32_t INDIAN_YEAR_START = 80; @@ -96,7 +95,7 @@ int32_t IndianCalendar::handleGetLimit(UCalendarDateFields field, ELimitType lim */ static UBool isGregorianLeap(int32_t year) { - return ((year % 4) == 0) && (!(((year % 100) == 0) && ((year % 400) != 0))); + return Grego::isLeapYear(year); } //---------------------------------------------------------------------- @@ -137,56 +136,22 @@ int32_t IndianCalendar::handleGetYearLength(int32_t eyear) const { * Returns the Julian Day corresponding to gregorian date * * @param year The Gregorian year - * @param month The month in Gregorian Year + * @param month The month in Gregorian Year, 0 based. * @param date The date in Gregorian day in month */ static double gregorianToJD(int32_t year, int32_t month, int32_t date) { - double julianDay = (JULIAN_EPOCH - 1) + - (365 * (year - 1)) + - uprv_floor((year - 1) / 4) + - (-uprv_floor((year - 1) / 100)) + - uprv_floor((year - 1) / 400) + - uprv_floor((((367 * month) - 362) / 12) + - ((month <= 2) ? 0 : - (isGregorianLeap(year) ? -1 : -2) - ) + - date); - - return julianDay; + return Grego::fieldsToDay(year, month, date) + kEpochStartAsJulianDay - 0.5; } /* * Returns the Gregorian Date corresponding to a given Julian Day + * Month is 0 based. * @param jd The Julian Day */ static int32_t* jdToGregorian(double jd, int32_t gregorianDate[3]) { - double wjd, depoch, quadricent, dqc, cent, dcent, quad, dquad, yindex, yearday, leapadj; - int32_t year, month, day; - wjd = uprv_floor(jd - 0.5) + 0.5; - depoch = wjd - JULIAN_EPOCH; - quadricent = uprv_floor(depoch / 146097); - dqc = (int32_t)uprv_floor(depoch) % 146097; - cent = uprv_floor(dqc / 36524); - dcent = (int32_t)uprv_floor(dqc) % 36524; - quad = uprv_floor(dcent / 1461); - dquad = (int32_t)uprv_floor(dcent) % 1461; - yindex = uprv_floor(dquad / 365); - year = (int32_t)((quadricent * 400) + (cent * 100) + (quad * 4) + yindex); - if (!((cent == 4) || (yindex == 4))) { - year++; - } - yearday = wjd - gregorianToJD(year, 1, 1); - leapadj = ((wjd < gregorianToJD(year, 3, 1)) ? 0 - : - (isGregorianLeap(year) ? 1 : 2) - ); - month = (int32_t)uprv_floor((((yearday + leapadj) * 12) + 373) / 367); - day = (int32_t)(wjd - gregorianToJD(year, month, 1)) + 1; - - gregorianDate[0] = year; - gregorianDate[1] = month; - gregorianDate[2] = day; - + int32_t gdow; + Grego::dayToFields(jd - kEpochStartAsJulianDay, + gregorianDate[0], gregorianDate[1], gregorianDate[2], gdow); return gregorianDate; } @@ -203,11 +168,11 @@ static double IndianToJD(int32_t year, int32_t month, int32_t date) { if(isGregorianLeap(gyear)) { leapMonth = 31; - start = gregorianToJD(gyear, 3, 21); + start = gregorianToJD(gyear, 2 /* The third month in 0 based month */, 21); } else { leapMonth = 30; - start = gregorianToJD(gyear, 3, 22); + start = gregorianToJD(gyear, 2 /* The third month in 0 based month */, 22); } if (month == 1) { @@ -243,7 +208,7 @@ int32_t IndianCalendar::handleComputeMonthStart(int32_t eyear, int32_t month, UB //month is 0 based; converting it to 1-based int32_t imonth; - // If the month is out of range, adjust it into range, and adjust the extended eyar accordingly + // If the month is out of range, adjust it into range, and adjust the extended year accordingly if (month < 0 || month > 11) { eyear += (int32_t)ClockMath::floorDivide(month, 12, month); } @@ -297,7 +262,7 @@ void IndianCalendar::handleComputeFields(int32_t julianDay, UErrorCode& /* stat gregorianYear = jdToGregorian(julianDay, gd)[0]; // Gregorian date for Julian day IndianYear = gregorianYear - INDIAN_ERA_START; // Year in Saka era - jdAtStartOfGregYear = gregorianToJD(gregorianYear, 1, 1); // JD at start of Gregorian year + jdAtStartOfGregYear = gregorianToJD(gregorianYear, 0, 1); // JD at start of Gregorian year yday = (int32_t)(julianDay - jdAtStartOfGregYear); // Day number in Gregorian year (starting from 0) if (yday < INDIAN_YEAR_START) { diff --git a/contrib/libs/icu/i18n/indiancal.h b/contrib/libs/icu/i18n/indiancal.h index e259d9bc17..029515cf54 100644 --- a/contrib/libs/icu/i18n/indiancal.h +++ b/contrib/libs/icu/i18n/indiancal.h @@ -25,7 +25,7 @@ U_NAMESPACE_BEGIN * Concrete class which provides the Indian calendar. * <P> * <code>IndianCalendar</code> is a subclass of <code>Calendar</code> - * that numbers years since the begining of SAKA ERA. This is the civil calendar + * that numbers years since the beginning of SAKA ERA. This is the civil calendar * which is accepted by government of India as Indian National Calendar. * The two calendars most widely used in India today are the Vikrama calendar * followed in North India and the Shalivahana or Saka calendar which is followed @@ -147,7 +147,7 @@ public: * @param aLocale The given locale. * @param success Indicates the status of IndianCalendar object construction. * Returns U_ZERO_ERROR if constructed successfully. - * @param beCivil Whether the calendar should be civil (default-TRUE) or religious (FALSE) + * @param beCivil Whether the calendar should be civil (default-true) or religious (false) * @internal */ IndianCalendar(const Locale& aLocale, UErrorCode &success); @@ -186,7 +186,7 @@ public: // TODO: copy c'tor, etc // clone - virtual IndianCalendar* clone() const; + virtual IndianCalendar* clone() const override; private: /** @@ -200,7 +200,7 @@ public: /** * @internal */ - virtual int32_t handleGetLimit(UCalendarDateFields field, ELimitType limitType) const; + virtual int32_t handleGetLimit(UCalendarDateFields field, ELimitType limitType) const override; /** * Return the length (in days) of the given month. @@ -209,13 +209,13 @@ public: * @param year The month(0-based) in Indian year * @internal */ - virtual int32_t handleGetMonthLength(int32_t extendedYear, int32_t month) const; + virtual int32_t handleGetMonthLength(int32_t extendedYear, int32_t month) const override; /** * Return the number of days in the given Indian year * @internal */ - virtual int32_t handleGetYearLength(int32_t extendedYear) const; + virtual int32_t handleGetYearLength(int32_t extendedYear) const override; //------------------------------------------------------------------------- // Functions for converting from field values to milliseconds.... @@ -225,7 +225,7 @@ public: /** * @internal */ - virtual int32_t handleComputeMonthStart(int32_t eyear, int32_t month, UBool useMonth) const; + virtual int32_t handleComputeMonthStart(int32_t eyear, int32_t month, UBool useMonth) const override; //------------------------------------------------------------------------- // Functions for converting from milliseconds to field values @@ -234,7 +234,7 @@ public: /** * @internal */ - virtual int32_t handleGetExtendedYear(); + virtual int32_t handleGetExtendedYear() override; /** * Override Calendar to compute several fields specific to the Indian @@ -252,7 +252,7 @@ public: * calendar equivalents for the given Julian day. * @internal */ - virtual void handleComputeFields(int32_t julianDay, UErrorCode &status); + virtual void handleComputeFields(int32_t julianDay, UErrorCode &status) override; // UObject stuff public: @@ -261,7 +261,7 @@ public: * same class ID. Objects of other classes have different class IDs. * @internal */ - virtual UClassID getDynamicClassID(void) const; + virtual UClassID getDynamicClassID(void) const override; /** * Return the class ID for this class. This is useful only for comparing to a return @@ -282,7 +282,7 @@ public: * @return calendar type * @internal */ - virtual const char * getType() const; + virtual const char * getType() const override; private: IndianCalendar(); // default constructor not implemented @@ -299,27 +299,27 @@ protected: * false, otherwise. * @internal */ - virtual UBool inDaylightTime(UErrorCode& status) const; + virtual UBool inDaylightTime(UErrorCode& status) const override; /** - * Returns TRUE because the Indian Calendar does have a default century + * Returns true because the Indian Calendar does have a default century * @internal */ - virtual UBool haveDefaultCentury() const; + virtual UBool haveDefaultCentury() const override; /** * Returns the date of the start of the default century * @return start of century - in milliseconds since epoch, 1970 * @internal */ - virtual UDate defaultCenturyStart() const; + virtual UDate defaultCenturyStart() const override; /** * Returns the year in which the default century begins * @internal */ - virtual int32_t defaultCenturyStartYear() const; + virtual int32_t defaultCenturyStartYear() const override; }; U_NAMESPACE_END diff --git a/contrib/libs/icu/i18n/inputext.cpp b/contrib/libs/icu/i18n/inputext.cpp index 2d4f8a388a..fa4939e8f4 100644 --- a/contrib/libs/icu/i18n/inputext.cpp +++ b/contrib/libs/icu/i18n/inputext.cpp @@ -141,7 +141,7 @@ void InputText::MungeInput(UBool fStripTags) { } // - // Tally up the byte occurence statistics. + // Tally up the byte occurrence statistics. // These are available for use by the various detectors. // diff --git a/contrib/libs/icu/i18n/inputext.h b/contrib/libs/icu/i18n/inputext.h index 8edc561fc6..fb92dc0634 100644 --- a/contrib/libs/icu/i18n/inputext.h +++ b/contrib/libs/icu/i18n/inputext.h @@ -44,7 +44,7 @@ public: int32_t fInputLen; // Length of the byte data in fInputBytes. // byte frequency statistics for the input text. // Value is percent, not absolute. - // Value is rounded up, so zero really means zero occurences. + // Value is rounded up, so zero really means zero occurrences. int16_t *fByteStats; UBool fC1Bytes; // True if any bytes in the range 0x80 - 0x9F are in the input;false by default char *fDeclaredEncoding; diff --git a/contrib/libs/icu/i18n/islamcal.cpp b/contrib/libs/icu/i18n/islamcal.cpp index 582b3365a6..de37e6b939 100644 --- a/contrib/libs/icu/i18n/islamcal.cpp +++ b/contrib/libs/icu/i18n/islamcal.cpp @@ -222,7 +222,7 @@ const char *IslamicCalendar::getType() const { sType = "islamic-umalqura"; break; default: - UPRV_UNREACHABLE; // out of range + UPRV_UNREACHABLE_EXIT; // out of range } return sType; } @@ -232,7 +232,7 @@ IslamicCalendar* IslamicCalendar::clone() const { } IslamicCalendar::IslamicCalendar(const Locale& aLocale, UErrorCode& success, ECalculationType type) -: Calendar(TimeZone::createDefault(), aLocale, success), +: Calendar(TimeZone::forLocaleOrDefault(aLocale), aLocale, success), cType(type) { setTimeInMillis(getNow(), success); // Call this again now that the vtable is set up properly. @@ -368,7 +368,7 @@ int32_t IslamicCalendar::yearStart(int32_t year) const{ if (cType == CIVIL || cType == TBLA || (cType == UMALQURA && (year < UMALQURA_YEAR_START || year > UMALQURA_YEAR_END))) { - return (year-1)*354 + ClockMath::floorDivide((3+11*year),30); + return (year-1)*354 + ClockMath::floorDivide((3+11*(int64_t)year),(int64_t)30); } else if(cType==ASTRONOMICAL){ return trueMonthStart(12*(year-1)); } else { @@ -391,7 +391,7 @@ int32_t IslamicCalendar::monthStart(int32_t year, int32_t month) const { if (cType == CIVIL || cType == TBLA) { // This does not handle months out of the range 0..11 return (int32_t)uprv_ceil(29.5*month) - + (year-1)*354 + (int32_t)ClockMath::floorDivide((3+11*year),30); + + (year-1)*354 + (int32_t)ClockMath::floorDivide((3+11*(int64_t)year),(int64_t)30); } else if(cType==ASTRONOMICAL){ return trueMonthStart(12*(year-1) + month); } else { @@ -447,7 +447,8 @@ int32_t IslamicCalendar::trueMonthStart(int32_t month) const } } while (age < 0); } - start = (int32_t)ClockMath::floorDivide((origin - HIJRA_MILLIS), (double)kOneDay) + 1; + start = (int32_t)(ClockMath::floorDivide( + (int64_t)((int64_t)origin - HIJRA_MILLIS), (int64_t)kOneDay) + 1); CalendarCache::put(&gMonthCache, month, start, status); } trueMonthStartEnd : @@ -639,13 +640,14 @@ void IslamicCalendar::handleComputeFields(int32_t julianDay, UErrorCode &status) months--; } - year = months / 12 + 1; - month = months % 12; + year = months >= 0 ? ((months / 12) + 1) : ((months + 1 ) / 12); + month = ((months % 12) + 12 ) % 12; } else if(cType == UMALQURA) { int32_t umalquraStartdays = yearStart(UMALQURA_YEAR_START) ; if( days < umalquraStartdays){ //Use Civil calculation - year = (int)ClockMath::floorDivide( (double)(30 * days + 10646) , 10631.0 ); + year = (int32_t)ClockMath::floorDivide( + (30 * (int64_t)days + 10646) , (int64_t)10631.0 ); month = (int32_t)uprv_ceil((days - 29 - yearStart(year)) / 29.5 ); month = month<11?month:11; startDate = monthStart(year, month); @@ -673,7 +675,7 @@ void IslamicCalendar::handleComputeFields(int32_t julianDay, UErrorCode &status) month = m; } } else { // invalid 'civil' - UPRV_UNREACHABLE; // should not get here, out of range + UPRV_UNREACHABLE_EXIT; // should not get here, out of range } dayOfMonth = (days - monthStart(year, month)) + 1; diff --git a/contrib/libs/icu/i18n/islamcal.h b/contrib/libs/icu/i18n/islamcal.h index fde58478c0..a29b7daa08 100644 --- a/contrib/libs/icu/i18n/islamcal.h +++ b/contrib/libs/icu/i18n/islamcal.h @@ -67,7 +67,7 @@ U_NAMESPACE_BEGIN * moon's illumination, and other factors, it is possible to determine the start * of a lunar month with a fairly high degree of certainty. However, these * calculations are extremely complicated and thus slow, so most algorithms, - * including the one used here, are only approximations of the true astronical + * including the one used here, are only approximations of the true astronomical * calculations. At present, the approximations used in this class are fairly * simplistic; they will be improved in later versions of the code. * <p> @@ -230,7 +230,7 @@ class U_I18N_API IslamicCalendar : public Calendar { // TODO: copy c'tor, etc // clone - virtual IslamicCalendar* clone() const; + virtual IslamicCalendar* clone() const override; private: /** @@ -292,7 +292,7 @@ class U_I18N_API IslamicCalendar : public Calendar { /** * @internal */ - virtual int32_t handleGetLimit(UCalendarDateFields field, ELimitType limitType) const; + virtual int32_t handleGetLimit(UCalendarDateFields field, ELimitType limitType) const override; /** * Return the length (in days) of the given month. @@ -301,13 +301,13 @@ class U_I18N_API IslamicCalendar : public Calendar { * @param year The hijri month, 0-based * @internal */ - virtual int32_t handleGetMonthLength(int32_t extendedYear, int32_t month) const; + virtual int32_t handleGetMonthLength(int32_t extendedYear, int32_t month) const override; /** * Return the number of days in the given Islamic year * @internal */ - virtual int32_t handleGetYearLength(int32_t extendedYear) const; + virtual int32_t handleGetYearLength(int32_t extendedYear) const override; //------------------------------------------------------------------------- // Functions for converting from field values to milliseconds.... @@ -317,7 +317,7 @@ class U_I18N_API IslamicCalendar : public Calendar { /** * @internal */ - virtual int32_t handleComputeMonthStart(int32_t eyear, int32_t month, UBool useMonth) const; + virtual int32_t handleComputeMonthStart(int32_t eyear, int32_t month, UBool useMonth) const override; //------------------------------------------------------------------------- // Functions for converting from milliseconds to field values @@ -326,7 +326,7 @@ class U_I18N_API IslamicCalendar : public Calendar { /** * @internal */ - virtual int32_t handleGetExtendedYear(); + virtual int32_t handleGetExtendedYear() override; /** * Override Calendar to compute several fields specific to the Islamic @@ -344,7 +344,7 @@ class U_I18N_API IslamicCalendar : public Calendar { * calendar equivalents for the given Julian day. * @internal */ - virtual void handleComputeFields(int32_t julianDay, UErrorCode &status); + virtual void handleComputeFields(int32_t julianDay, UErrorCode &status) override; // UObject stuff public: @@ -353,7 +353,7 @@ class U_I18N_API IslamicCalendar : public Calendar { * same class ID. Objects of other classes have different class IDs. * @internal */ - virtual UClassID getDynamicClassID(void) const; + virtual UClassID getDynamicClassID(void) const override; /** * Return the class ID for this class. This is useful only for comparing to a return @@ -374,7 +374,7 @@ class U_I18N_API IslamicCalendar : public Calendar { * @return calendar type * @internal */ - virtual const char * getType() const; + virtual const char * getType() const override; private: IslamicCalendar(); // default constructor not implemented @@ -391,27 +391,27 @@ class U_I18N_API IslamicCalendar : public Calendar { * false, otherwise. * @internal */ - virtual UBool inDaylightTime(UErrorCode& status) const; + virtual UBool inDaylightTime(UErrorCode& status) const override; /** - * Returns TRUE because the Islamic Calendar does have a default century + * Returns true because the Islamic Calendar does have a default century * @internal */ - virtual UBool haveDefaultCentury() const; + virtual UBool haveDefaultCentury() const override; /** * Returns the date of the start of the default century * @return start of century - in milliseconds since epoch, 1970 * @internal */ - virtual UDate defaultCenturyStart() const; + virtual UDate defaultCenturyStart() const override; /** * Returns the year in which the default century begins * @internal */ - virtual int32_t defaultCenturyStartYear() const; + virtual int32_t defaultCenturyStartYear() const override; private: /** diff --git a/contrib/libs/icu/i18n/japancal.h b/contrib/libs/icu/i18n/japancal.h index 03e6361c9f..8851344052 100644 --- a/contrib/libs/icu/i18n/japancal.h +++ b/contrib/libs/icu/i18n/japancal.h @@ -116,20 +116,20 @@ public: * @return return a polymorphic copy of this calendar. * @internal */ - virtual JapaneseCalendar* clone() const; + virtual JapaneseCalendar* clone() const override; /** * Return the extended year defined by the current fields. In the * Japanese calendar case, this is equal to the equivalent extended Gregorian year. * @internal */ - virtual int32_t handleGetExtendedYear(); + virtual int32_t handleGetExtendedYear() override; /** * Return the maximum value that this field could have, given the current date. * @internal */ - virtual int32_t getActualMaximum(UCalendarDateFields field, UErrorCode& status) const; + virtual int32_t getActualMaximum(UCalendarDateFields field, UErrorCode& status) const override; public: @@ -143,7 +143,7 @@ public: * same class ID. Objects of other classes have different class IDs. * @internal */ - virtual UClassID getDynamicClassID(void) const; + virtual UClassID getDynamicClassID(void) const override; /** * Return the class ID for this class. This is useful only for comparing to a return @@ -164,24 +164,24 @@ public: * @return calendar type * @internal */ - virtual const char * getType() const; + virtual const char * getType() const override; /** - * @return FALSE - no default century in Japanese + * @return false - no default century in Japanese * @internal */ - virtual UBool haveDefaultCentury() const; + virtual UBool haveDefaultCentury() const override; /** * Not used - no default century. * @internal */ - virtual UDate defaultCenturyStart() const; + virtual UDate defaultCenturyStart() const override; /** * Not used - no default century. * @internal */ - virtual int32_t defaultCenturyStartYear() const; + virtual int32_t defaultCenturyStartYear() const override; private: JapaneseCalendar(); // default constructor not implemented @@ -191,19 +191,19 @@ protected: * Calculate the era for internal computation * @internal */ - virtual int32_t internalGetEra() const; + virtual int32_t internalGetEra() const override; /** * Compute fields from the JD * @internal */ - virtual void handleComputeFields(int32_t julianDay, UErrorCode& status); + virtual void handleComputeFields(int32_t julianDay, UErrorCode& status) override; /** * Calculate the limit for a specified type of limit and field * @internal */ - virtual int32_t handleGetLimit(UCalendarDateFields field, ELimitType limitType) const; + virtual int32_t handleGetLimit(UCalendarDateFields field, ELimitType limitType) const override; /*** * Called by computeJulianDay. Returns the default month (0-based) for the year, @@ -212,7 +212,7 @@ protected: * @param eyear the extended year * @internal */ - virtual int32_t getDefaultMonthInYear(int32_t eyear); + virtual int32_t getDefaultMonthInYear(int32_t eyear) override; /*** * Called by computeJulianDay. Returns the default day (1-based) for the month, @@ -222,7 +222,7 @@ protected: * @param mon the month in the year * @internal */ - virtual int32_t getDefaultDayInMonth(int32_t eyear, int32_t month); + virtual int32_t getDefaultDayInMonth(int32_t eyear, int32_t month) override; }; U_NAMESPACE_END diff --git a/contrib/libs/icu/i18n/listformatter.cpp b/contrib/libs/icu/i18n/listformatter.cpp index da99c9291d..4142fa461d 100644 --- a/contrib/libs/icu/i18n/listformatter.cpp +++ b/contrib/libs/icu/i18n/listformatter.cpp @@ -16,6 +16,10 @@ * created by: Umesh P. Nair */ +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + #include "cmemory.h" #include "unicode/fpositer.h" // FieldPositionIterator #include "unicode/listformatter.h" @@ -52,10 +56,12 @@ public: virtual PatternHandler* clone() const { return new PatternHandler(twoPattern, endPattern); } + /** Argument: final string in the list. */ virtual const SimpleFormatter& getTwoPattern(const UnicodeString&) const { return twoPattern; } + /** Argument: final string in the list. */ virtual const SimpleFormatter& getEndPattern(const UnicodeString&) const { return endPattern; } @@ -169,21 +175,21 @@ PatternHandler* createPatternHandler( UErrorCode& status) { if (uprv_strcmp(lang, "es") == 0) { // Spanish - UnicodeString spanishYStr(TRUE, spanishY, -1); + UnicodeString spanishYStr(true, spanishY, -1); bool twoIsY = two == spanishYStr; bool endIsY = end == spanishYStr; if (twoIsY || endIsY) { - UnicodeString replacement(TRUE, spanishE, -1); + UnicodeString replacement(true, spanishE, -1); return new ContextualHandler( shouldChangeToE, twoIsY ? replacement : two, two, endIsY ? replacement : end, end, status); } - UnicodeString spanishOStr(TRUE, spanishO, -1); + UnicodeString spanishOStr(true, spanishO, -1); bool twoIsO = two == spanishOStr; bool endIsO = end == spanishOStr; if (twoIsO || endIsO) { - UnicodeString replacement(TRUE, spanishU, -1); + UnicodeString replacement(true, spanishU, -1); return new ContextualHandler( shouldChangeToU, twoIsO ? replacement : two, two, @@ -191,11 +197,11 @@ PatternHandler* createPatternHandler( } } else if (uprv_strcmp(lang, "he") == 0 || uprv_strcmp(lang, "iw") == 0) { // Hebrew - UnicodeString hebrewVavStr(TRUE, hebrewVav, -1); + UnicodeString hebrewVavStr(true, hebrewVav, -1); bool twoIsVav = two == hebrewVavStr; bool endIsVav = end == hebrewVavStr; if (twoIsVav || endIsVav) { - UnicodeString replacement(TRUE, hebrewVavDash, -1); + UnicodeString replacement(true, hebrewVavDash, -1); return new ContextualHandler( shouldChangeToVavDash, twoIsVav ? replacement : two, two, @@ -236,17 +242,15 @@ ListFormatInternal(const ListFormatInternal &other) : }; -#if !UCONFIG_NO_FORMATTING -class FormattedListData : public FormattedValueFieldPositionIteratorImpl { +class FormattedListData : public FormattedValueStringBuilderImpl { public: - FormattedListData(UErrorCode& status) : FormattedValueFieldPositionIteratorImpl(5, status) {} + FormattedListData(UErrorCode&) : FormattedValueStringBuilderImpl(kUndefinedField) {} virtual ~FormattedListData(); }; FormattedListData::~FormattedListData() = default; UPRV_FORMATTED_VALUE_SUBCLASS_AUTO_IMPL(FormattedList) -#endif static Hashtable* listPatternHash = nullptr; @@ -255,7 +259,7 @@ U_CDECL_BEGIN static UBool U_CALLCONV uprv_listformatter_cleanup() { delete listPatternHash; listPatternHash = nullptr; - return TRUE; + return true; } static void U_CALLCONV @@ -348,7 +352,6 @@ const ListFormatInternal* ListFormatter::getListFormatInternal( return result; } -#if !UCONFIG_NO_FORMATTING static const char* typeWidthToStyleString(UListFormatterType type, UListFormatterWidth width) { switch (type) { case ULISTFMT_TYPE_AND: @@ -392,7 +395,6 @@ static const char* typeWidthToStyleString(UListFormatterType type, UListFormatte return nullptr; } -#endif static const UChar solidus = 0x2F; static const UChar aliasPrefix[] = { 0x6C,0x69,0x73,0x74,0x50,0x61,0x74,0x74,0x65,0x72,0x6E,0x2F }; // "listPattern/" @@ -442,7 +444,7 @@ struct ListFormatter::ListPatternsSink : public ResourceSink { } virtual void put(const char *key, ResourceValue &value, UBool /*noFallback*/, - UErrorCode &errorCode) { + UErrorCode &errorCode) override { aliasedStyle[0] = 0; if (value.getType() == URES_ALIAS) { setAliasedStyle(value.getAliasUnicodeString(errorCode)); @@ -513,14 +515,9 @@ ListFormatter* ListFormatter::createInstance(UErrorCode& errorCode) { } ListFormatter* ListFormatter::createInstance(const Locale& locale, UErrorCode& errorCode) { -#if !UCONFIG_NO_FORMATTING return createInstance(locale, ULISTFMT_TYPE_AND, ULISTFMT_WIDTH_WIDE, errorCode); -#else - return createInstance(locale, "standard", errorCode); -#endif } -#if !UCONFIG_NO_FORMATTING ListFormatter* ListFormatter::createInstance( const Locale& locale, UListFormatterType type, UListFormatterWidth width, UErrorCode& errorCode) { const char* style = typeWidthToStyleString(type, width); @@ -530,7 +527,6 @@ ListFormatter* ListFormatter::createInstance( } return createInstance(locale, style, errorCode); } -#endif ListFormatter* ListFormatter::createInstance(const Locale& locale, const char *style, UErrorCode& errorCode) { const ListFormatInternal* listFormatInternal = getListFormatInternal(locale, style, errorCode); @@ -557,50 +553,89 @@ ListFormatter::~ListFormatter() { delete owned; } -/** - * Joins first and second using the pattern pat. - * On entry offset is an offset into first or -1 if offset unspecified. - * On exit offset is offset of second in result if recordOffset was set - * Otherwise if it was >=0 it is set to point into result where it used - * to point into first. On exit, result is the join of first and second - * according to pat. Any previous value of result gets replaced. - */ -static void joinStringsAndReplace( - const SimpleFormatter& pat, - const UnicodeString& first, - const UnicodeString& second, - UnicodeString &result, - UBool recordOffset, - int32_t &offset, - int32_t *offsetFirst, - int32_t *offsetSecond, - UErrorCode& errorCode) { - if (U_FAILURE(errorCode)) { - return; - } - const UnicodeString *params[2] = {&first, &second}; - int32_t offsets[2]; - pat.formatAndReplace( - params, - UPRV_LENGTHOF(params), - result, - offsets, - UPRV_LENGTHOF(offsets), - errorCode); - if (U_FAILURE(errorCode)) { - return; +namespace { + +class FormattedListBuilder { +public: + LocalPointer<FormattedListData> data; + + /** For lists of length 1+ */ + FormattedListBuilder(const UnicodeString& start, UErrorCode& status) + : data(new FormattedListData(status), status) { + if (U_SUCCESS(status)) { + data->getStringRef().append( + start, + {UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD}, + status); + data->appendSpanInfo(UFIELD_CATEGORY_LIST_SPAN, 0, -1, start.length(), status); + } } - if (offsets[0] == -1 || offsets[1] == -1) { - errorCode = U_INVALID_FORMAT_ERROR; - return; + + /** For lists of length 0 */ + FormattedListBuilder(UErrorCode& status) + : data(new FormattedListData(status), status) { } - if (recordOffset) { - offset = offsets[1]; - } else if (offset >= 0) { - offset += offsets[0]; + + void append(const SimpleFormatter& pattern, const UnicodeString& next, int32_t position, UErrorCode& status) { + if (U_FAILURE(status)) { + return; + } + if (pattern.getArgumentLimit() != 2) { + status = U_INTERNAL_PROGRAM_ERROR; + return; + } + // In the pattern, {0} are the pre-existing elements and {1} is the new element. + int32_t offsets[] = {0, 0}; + UnicodeString temp = pattern.getTextWithNoArguments(offsets, 2); + if (offsets[0] <= offsets[1]) { + // prefix{0}infix{1}suffix + // Prepend prefix, then append infix, element, and suffix + data->getStringRef().insert( + 0, + temp.tempSubStringBetween(0, offsets[0]), + {UFIELD_CATEGORY_LIST, ULISTFMT_LITERAL_FIELD}, + status); + data->getStringRef().append( + temp.tempSubStringBetween(offsets[0], offsets[1]), + {UFIELD_CATEGORY_LIST, ULISTFMT_LITERAL_FIELD}, + status); + data->getStringRef().append( + next, + {UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD}, + status); + data->appendSpanInfo(UFIELD_CATEGORY_LIST_SPAN, position, -1, next.length(), status); + data->getStringRef().append( + temp.tempSubString(offsets[1]), + {UFIELD_CATEGORY_LIST, ULISTFMT_LITERAL_FIELD}, + status); + } else { + // prefix{1}infix{0}suffix + // Prepend infix, element, and prefix, then append suffix. + // (We prepend in reverse order because prepending at index 0 is fast.) + data->getStringRef().insert( + 0, + temp.tempSubStringBetween(offsets[1], offsets[0]), + {UFIELD_CATEGORY_LIST, ULISTFMT_LITERAL_FIELD}, + status); + data->getStringRef().insert( + 0, + next, + {UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD}, + status); + data->prependSpanInfo(UFIELD_CATEGORY_LIST_SPAN, position, -1, next.length(), status); + data->getStringRef().insert( + 0, + temp.tempSubStringBetween(0, offsets[1]), + {UFIELD_CATEGORY_LIST, ULISTFMT_LITERAL_FIELD}, + status); + data->getStringRef().append( + temp.tempSubString(offsets[0]), + {UFIELD_CATEGORY_LIST, ULISTFMT_LITERAL_FIELD}, + status); + } } - if (offsetFirst != nullptr) *offsetFirst = offsets[0]; - if (offsetSecond != nullptr) *offsetSecond = offsets[1]; +}; + } UnicodeString& ListFormatter::format( @@ -619,190 +654,86 @@ UnicodeString& ListFormatter::format( int32_t index, int32_t &offset, UErrorCode& errorCode) const { - return format_(items, nItems, appendTo, index, offset, nullptr, errorCode); + int32_t initialOffset = appendTo.length(); + auto result = formatStringsToValue(items, nItems, errorCode); + UnicodeStringAppendable appendable(appendTo); + result.appendTo(appendable, errorCode); + if (index >= 0) { + ConstrainedFieldPosition cfpos; + cfpos.constrainField(UFIELD_CATEGORY_LIST_SPAN, index); + result.nextPosition(cfpos, errorCode); + offset = initialOffset + cfpos.getStart(); + } + return appendTo; } -#if !UCONFIG_NO_FORMATTING FormattedList ListFormatter::formatStringsToValue( const UnicodeString items[], int32_t nItems, UErrorCode& errorCode) const { - LocalPointer<FormattedListData> result(new FormattedListData(errorCode), errorCode); - if (U_FAILURE(errorCode)) { - return FormattedList(errorCode); + if (nItems == 0) { + FormattedListBuilder result(errorCode); + if (U_FAILURE(errorCode)) { + return FormattedList(errorCode); + } else { + return FormattedList(result.data.orphan()); + } + } else if (nItems == 1) { + FormattedListBuilder result(items[0], errorCode); + result.data->getStringRef().writeTerminator(errorCode); + if (U_FAILURE(errorCode)) { + return FormattedList(errorCode); + } else { + return FormattedList(result.data.orphan()); + } + } else if (nItems == 2) { + FormattedListBuilder result(items[0], errorCode); + if (U_FAILURE(errorCode)) { + return FormattedList(errorCode); + } + result.append( + data->patternHandler->getTwoPattern(items[1]), + items[1], + 1, + errorCode); + result.data->getStringRef().writeTerminator(errorCode); + if (U_FAILURE(errorCode)) { + return FormattedList(errorCode); + } else { + return FormattedList(result.data.orphan()); + } } - UnicodeString string; - int32_t offset; - auto handler = result->getHandler(errorCode); - handler.setCategory(UFIELD_CATEGORY_LIST); - format_(items, nItems, string, -1, offset, &handler, errorCode); - handler.getError(errorCode); - result->appendString(string, errorCode); + + FormattedListBuilder result(items[0], errorCode); if (U_FAILURE(errorCode)) { return FormattedList(errorCode); } - - // Add span fields and sort - ConstrainedFieldPosition cfpos; - cfpos.constrainField(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD); - int32_t i = 0; - handler.setCategory(UFIELD_CATEGORY_LIST_SPAN); - while (result->nextPosition(cfpos, errorCode)) { - handler.addAttribute(i++, cfpos.getStart(), cfpos.getLimit()); + result.append( + data->startPattern, + items[1], + 1, + errorCode); + for (int32_t i = 2; i < nItems - 1; i++) { + result.append( + data->middlePattern, + items[i], + i, + errorCode); } - handler.getError(errorCode); + result.append( + data->patternHandler->getEndPattern(items[nItems-1]), + items[nItems-1], + nItems-1, + errorCode); + result.data->getStringRef().writeTerminator(errorCode); if (U_FAILURE(errorCode)) { return FormattedList(errorCode); + } else { + return FormattedList(result.data.orphan()); } - result->sort(); - - return FormattedList(result.orphan()); } -#endif -UnicodeString& ListFormatter::format_( - const UnicodeString items[], - int32_t nItems, - UnicodeString& appendTo, - int32_t index, - int32_t &offset, - FieldPositionHandler* handler, - UErrorCode& errorCode) const { -#if !UCONFIG_NO_FORMATTING - offset = -1; - if (U_FAILURE(errorCode)) { - return appendTo; - } - if (data == nullptr) { - errorCode = U_INVALID_STATE_ERROR; - return appendTo; - } - if (nItems <= 0) { - return appendTo; - } - if (nItems == 1) { - if (index == 0) { - offset = appendTo.length(); - } - if (handler != nullptr) { - handler->addAttribute(ULISTFMT_ELEMENT_FIELD, - appendTo.length(), - appendTo.length() + items[0].length()); - } - appendTo.append(items[0]); - return appendTo; - } - UnicodeString result(items[0]); - if (index == 0) { - offset = 0; - } - int32_t offsetFirst = 0; - int32_t offsetSecond = 0; - int32_t prefixLength = 0; - // for n items, there are 2 * (n + 1) boundary including 0 and the upper - // edge. - MaybeStackArray<int32_t, 10> offsets((handler != nullptr) ? 2 * (nItems + 1): 0); - if (nItems == 2) { - joinStringsAndReplace( - data->patternHandler->getTwoPattern(items[1]), - result, - items[1], - result, - index == 1, - offset, - &offsetFirst, - &offsetSecond, - errorCode); - } else { - joinStringsAndReplace( - data->startPattern, - result, - items[1], - result, - index == 1, - offset, - &offsetFirst, - &offsetSecond, - errorCode); - } - if (handler != nullptr) { - offsets[0] = 0; - prefixLength += offsetFirst; - offsets[1] = offsetSecond - prefixLength; - } - if (nItems > 2) { - for (int32_t i = 2; i < nItems - 1; ++i) { - joinStringsAndReplace( - data->middlePattern, - result, - items[i], - result, - index == i, - offset, - &offsetFirst, - &offsetSecond, - errorCode); - if (handler != nullptr) { - prefixLength += offsetFirst; - offsets[i] = offsetSecond - prefixLength; - } - } - joinStringsAndReplace( - data->patternHandler->getEndPattern(items[nItems - 1]), - result, - items[nItems - 1], - result, - index == nItems - 1, - offset, - &offsetFirst, - &offsetSecond, - errorCode); - if (handler != nullptr) { - prefixLength += offsetFirst; - offsets[nItems - 1] = offsetSecond - prefixLength; - } - } - if (handler != nullptr) { - // If there are already some data in appendTo, we need to adjust the index - // by shifting that lenght while insert into handler. - int32_t shift = appendTo.length() + prefixLength; - // Output the ULISTFMT_ELEMENT_FIELD in the order of the input elements - for (int32_t i = 0; i < nItems; ++i) { - offsets[i + nItems] = offsets[i] + items[i].length() + shift; - offsets[i] += shift; - handler->addAttribute( - ULISTFMT_ELEMENT_FIELD, // id - offsets[i], // index - offsets[i + nItems]); // limit - } - // The locale pattern may reorder the items (such as in ur-IN locale), - // so we cannot assume the array is in accendning order. - // To handle the edging case, just insert the two ends into the array - // and sort. Then we output ULISTFMT_LITERAL_FIELD if the indecies - // between the even and odd position are not the same in the sorted array. - offsets[2 * nItems] = shift - prefixLength; - offsets[2 * nItems + 1] = result.length() + shift - prefixLength; - uprv_sortArray(offsets.getAlias(), 2 * (nItems + 1), sizeof(int32_t), - uprv_int32Comparator, nullptr, - false, &errorCode); - for (int32_t i = 0; i <= nItems; ++i) { - if (offsets[i * 2] != offsets[i * 2 + 1]) { - handler->addAttribute( - ULISTFMT_LITERAL_FIELD, // id - offsets[i * 2], // index - offsets[i * 2 + 1]); // limit - } - } - } - if (U_SUCCESS(errorCode)) { - if (offset >= 0) { - offset += appendTo.length(); - } - appendTo += result; - } -#endif - return appendTo; -} - U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/contrib/libs/icu/i18n/measfmt.cpp b/contrib/libs/icu/i18n/measfmt.cpp index e05d66d413..a9a56a3b58 100644 --- a/contrib/libs/icu/i18n/measfmt.cpp +++ b/contrib/libs/icu/i18n/measfmt.cpp @@ -427,12 +427,12 @@ MeasureFormat::~MeasureFormat() { delete listFormatter; } -UBool MeasureFormat::operator==(const Format &other) const { +bool MeasureFormat::operator==(const Format &other) const { if (this == &other) { // Same object, equal - return TRUE; + return true; } if (!Format::operator==(other)) { - return FALSE; + return false; } const MeasureFormat &rhs = static_cast<const MeasureFormat &>(other); @@ -441,7 +441,7 @@ UBool MeasureFormat::operator==(const Format &other) const { // differing widths aren't equivalent if (fWidth != rhs.fWidth) { - return FALSE; + return false; } // Width the same check locales. // We don't need to check locales if both objects have same cache. @@ -451,10 +451,10 @@ UBool MeasureFormat::operator==(const Format &other) const { const char *rhsLocaleId = rhs.getLocaleID(status); if (U_FAILURE(status)) { // On failure, assume not equal - return FALSE; + return false; } if (uprv_strcmp(localeId, rhsLocaleId) != 0) { - return FALSE; + return false; } } // Locales same, check NumberFormat if shared data differs. @@ -581,7 +581,10 @@ void MeasureFormat::initMeasureFormat( UMeasureFormatWidth w, NumberFormat *nfToAdopt, UErrorCode &status) { - static const char *listStyles[] = {"unit", "unit-short", "unit-narrow"}; + static const UListFormatterWidth listWidths[] = { + ULISTFMT_WIDTH_WIDE, + ULISTFMT_WIDTH_SHORT, + ULISTFMT_WIDTH_NARROW}; LocalPointer<NumberFormat> nf(nfToAdopt); if (U_FAILURE(status)) { return; @@ -620,7 +623,8 @@ void MeasureFormat::initMeasureFormat( delete listFormatter; listFormatter = ListFormatter::createInstance( locale, - listStyles[getRegularWidth(fWidth)], + ULISTFMT_TYPE_UNITS, + listWidths[getRegularWidth(fWidth)], status); } @@ -861,7 +865,7 @@ UnicodeString &MeasureFormat::formatMeasuresSlowTrack( return appendTo; } // Fix up FieldPosition indexes if our field is found. - if (offset != -1) { + if (fieldPositionFoundIndex != -1 && offset != -1) { pos.setBeginIndex(fpos.getBeginIndex() + offset); pos.setEndIndex(fpos.getEndIndex() + offset); } diff --git a/contrib/libs/icu/i18n/measunit.cpp b/contrib/libs/icu/i18n/measunit.cpp index 4edf130b7e..5ad3991974 100644 --- a/contrib/libs/icu/i18n/measunit.cpp +++ b/contrib/libs/icu/i18n/measunit.cpp @@ -31,64 +31,39 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(MeasureUnit) // the "End generated code" comment is auto generated code // and must not be edited manually. For instructions on how to correctly // update this code, refer to: -// http://site.icu-project.org/design/formatting/measureformat/updating-measure-unit +// https://icu.unicode.org/design/formatting/measureformat/updating-measure-unit // -// Start generated code - +// Start generated code for measunit.cpp +// Maps from Type ID to offset in gSubTypes. static const int32_t gOffsets[] = { 0, 2, 7, 17, - 25, - 29, - 328, - 339, - 355, - 359, - 368, + 27, + 31, + 330, + 341, + 357, + 361, 370, - 374, - 381, - 402, - 404, - 418, - 421, + 373, + 377, + 385, + 407, + 411, + 426, 427, - 437, - 441, - 445, + 433, + 443, 447, - 474 + 451, + 453, + 487 }; -static const int32_t gIndexes[] = { - 0, - 2, - 7, - 17, - 25, - 29, - 29, - 40, - 56, - 60, - 69, - 71, - 75, - 82, - 103, - 105, - 119, - 122, - 128, - 138, - 142, - 146, - 148, - 175 -}; +static const int32_t kCurrencyOffset = 5; // Must be sorted alphabetically. static const char * const gTypes[] = { @@ -136,7 +111,9 @@ static const char * const gSubTypes[] = { "square-meter", "square-mile", "square-yard", + "item", "karat", + "milligram-ofglucose-per-deciliter", "milligram-per-deciliter", "millimole-per-liter", "mole", @@ -487,12 +464,14 @@ static const char * const gSubTypes[] = { "kilojoule", "kilowatt-hour", "therm-us", + "kilowatt-hour-per-100-kilometer", "newton", "pound-force", "gigahertz", "hertz", "kilohertz", "megahertz", + "dot", "dot-per-centimeter", "dot-per-inch", "em", @@ -503,6 +482,7 @@ static const char * const gSubTypes[] = { "astronomical-unit", "centimeter", "decimeter", + "earth-radius", "fathom", "foot", "furlong", @@ -521,11 +501,14 @@ static const char * const gSubTypes[] = { "point", "solar-radius", "yard", + "candela", + "lumen", "lux", "solar-luminosity", "carat", "dalton", "earth-mass", + "grain", "gram", "kilogram", "metric-ton", @@ -537,9 +520,7 @@ static const char * const gSubTypes[] = { "solar-mass", "stone", "ton", - "", // TODO(ICU-21076): manual edit of what should have been generated by Java. - "percent", // TODO(ICU-21076): regenerate, deal with duplication. - "permille", // TODO(ICU-21076): regenerate, deal with duplication. + "", "gigawatt", "horsepower", "kilowatt", @@ -580,35 +561,28 @@ static const char * const gSubTypes[] = { "cup", "cup-metric", "deciliter", + "dessert-spoon", + "dessert-spoon-imperial", + "dram", + "drop", "fluid-ounce", "fluid-ounce-imperial", "gallon", "gallon-imperial", "hectoliter", + "jigger", "liter", "megaliter", "milliliter", + "pinch", "pint", "pint-metric", "quart", + "quart-imperial", "tablespoon", "teaspoon" }; -// Must be sorted by first value and then second value. -static int32_t unitPerUnitToSingleUnit[][4] = { - {378, 382, 12, 5}, - {378, 387, 12, 6}, - {388, 343, 19, 0}, - {390, 350, 19, 2}, - {392, 343, 19, 3}, - {392, 463, 4, 2}, - {392, 464, 4, 3}, - {411, 460, 3, 1}, - {414, 12, 18, 9}, - {466, 388, 4, 1} -}; - // Shortcuts to the base unit in order to make the default constructor fast static const int32_t kBaseTypeIdx = 16; static const int32_t kBaseSubTypeIdx = 0; @@ -749,68 +723,84 @@ MeasureUnit MeasureUnit::getSquareYard() { return MeasureUnit(2, 9); } -MeasureUnit *MeasureUnit::createKarat(UErrorCode &status) { +MeasureUnit *MeasureUnit::createItem(UErrorCode &status) { return MeasureUnit::create(3, 0, status); } -MeasureUnit MeasureUnit::getKarat() { +MeasureUnit MeasureUnit::getItem() { return MeasureUnit(3, 0); } -MeasureUnit *MeasureUnit::createMilligramPerDeciliter(UErrorCode &status) { +MeasureUnit *MeasureUnit::createKarat(UErrorCode &status) { return MeasureUnit::create(3, 1, status); } -MeasureUnit MeasureUnit::getMilligramPerDeciliter() { +MeasureUnit MeasureUnit::getKarat() { return MeasureUnit(3, 1); } -MeasureUnit *MeasureUnit::createMillimolePerLiter(UErrorCode &status) { +MeasureUnit *MeasureUnit::createMilligramOfglucosePerDeciliter(UErrorCode &status) { return MeasureUnit::create(3, 2, status); } -MeasureUnit MeasureUnit::getMillimolePerLiter() { +MeasureUnit MeasureUnit::getMilligramOfglucosePerDeciliter() { return MeasureUnit(3, 2); } -MeasureUnit *MeasureUnit::createMole(UErrorCode &status) { +MeasureUnit *MeasureUnit::createMilligramPerDeciliter(UErrorCode &status) { return MeasureUnit::create(3, 3, status); } -MeasureUnit MeasureUnit::getMole() { +MeasureUnit MeasureUnit::getMilligramPerDeciliter() { return MeasureUnit(3, 3); } -MeasureUnit *MeasureUnit::createPartPerMillion(UErrorCode &status) { - return MeasureUnit::create(3, 6, status); +MeasureUnit *MeasureUnit::createMillimolePerLiter(UErrorCode &status) { + return MeasureUnit::create(3, 4, status); } -MeasureUnit MeasureUnit::getPartPerMillion() { - return MeasureUnit(3, 6); +MeasureUnit MeasureUnit::getMillimolePerLiter() { + return MeasureUnit(3, 4); +} + +MeasureUnit *MeasureUnit::createMole(UErrorCode &status) { + return MeasureUnit::create(3, 5, status); +} + +MeasureUnit MeasureUnit::getMole() { + return MeasureUnit(3, 5); } MeasureUnit *MeasureUnit::createPercent(UErrorCode &status) { - return MeasureUnit::create(3, 4, status); + return MeasureUnit::create(3, 6, status); } MeasureUnit MeasureUnit::getPercent() { - return MeasureUnit(3, 4); + return MeasureUnit(3, 6); } MeasureUnit *MeasureUnit::createPermille(UErrorCode &status) { - return MeasureUnit::create(3, 5, status); + return MeasureUnit::create(3, 7, status); } MeasureUnit MeasureUnit::getPermille() { - return MeasureUnit(3, 5); + return MeasureUnit(3, 7); +} + +MeasureUnit *MeasureUnit::createPartPerMillion(UErrorCode &status) { + return MeasureUnit::create(3, 8, status); +} + +MeasureUnit MeasureUnit::getPartPerMillion() { + return MeasureUnit(3, 8); } MeasureUnit *MeasureUnit::createPermyriad(UErrorCode &status) { - return MeasureUnit::create(3, 7, status); + return MeasureUnit::create(3, 9, status); } MeasureUnit MeasureUnit::getPermyriad() { - return MeasureUnit(3, 7); + return MeasureUnit(3, 9); } MeasureUnit *MeasureUnit::createLiterPer100Kilometers(UErrorCode &status) { @@ -1165,22 +1155,30 @@ MeasureUnit MeasureUnit::getThermUs() { return MeasureUnit(9, 8); } -MeasureUnit *MeasureUnit::createNewton(UErrorCode &status) { +MeasureUnit *MeasureUnit::createKilowattHourPer100Kilometer(UErrorCode &status) { return MeasureUnit::create(10, 0, status); } -MeasureUnit MeasureUnit::getNewton() { +MeasureUnit MeasureUnit::getKilowattHourPer100Kilometer() { return MeasureUnit(10, 0); } -MeasureUnit *MeasureUnit::createPoundForce(UErrorCode &status) { +MeasureUnit *MeasureUnit::createNewton(UErrorCode &status) { return MeasureUnit::create(10, 1, status); } -MeasureUnit MeasureUnit::getPoundForce() { +MeasureUnit MeasureUnit::getNewton() { return MeasureUnit(10, 1); } +MeasureUnit *MeasureUnit::createPoundForce(UErrorCode &status) { + return MeasureUnit::create(10, 2, status); +} + +MeasureUnit MeasureUnit::getPoundForce() { + return MeasureUnit(10, 2); +} + MeasureUnit *MeasureUnit::createGigahertz(UErrorCode &status) { return MeasureUnit::create(11, 0, status); } @@ -1213,62 +1211,70 @@ MeasureUnit MeasureUnit::getMegahertz() { return MeasureUnit(11, 3); } -MeasureUnit *MeasureUnit::createDotPerCentimeter(UErrorCode &status) { +MeasureUnit *MeasureUnit::createDot(UErrorCode &status) { return MeasureUnit::create(12, 0, status); } -MeasureUnit MeasureUnit::getDotPerCentimeter() { +MeasureUnit MeasureUnit::getDot() { return MeasureUnit(12, 0); } -MeasureUnit *MeasureUnit::createDotPerInch(UErrorCode &status) { +MeasureUnit *MeasureUnit::createDotPerCentimeter(UErrorCode &status) { return MeasureUnit::create(12, 1, status); } -MeasureUnit MeasureUnit::getDotPerInch() { +MeasureUnit MeasureUnit::getDotPerCentimeter() { return MeasureUnit(12, 1); } -MeasureUnit *MeasureUnit::createEm(UErrorCode &status) { +MeasureUnit *MeasureUnit::createDotPerInch(UErrorCode &status) { return MeasureUnit::create(12, 2, status); } -MeasureUnit MeasureUnit::getEm() { +MeasureUnit MeasureUnit::getDotPerInch() { return MeasureUnit(12, 2); } -MeasureUnit *MeasureUnit::createMegapixel(UErrorCode &status) { +MeasureUnit *MeasureUnit::createEm(UErrorCode &status) { return MeasureUnit::create(12, 3, status); } -MeasureUnit MeasureUnit::getMegapixel() { +MeasureUnit MeasureUnit::getEm() { return MeasureUnit(12, 3); } -MeasureUnit *MeasureUnit::createPixel(UErrorCode &status) { +MeasureUnit *MeasureUnit::createMegapixel(UErrorCode &status) { return MeasureUnit::create(12, 4, status); } -MeasureUnit MeasureUnit::getPixel() { +MeasureUnit MeasureUnit::getMegapixel() { return MeasureUnit(12, 4); } -MeasureUnit *MeasureUnit::createPixelPerCentimeter(UErrorCode &status) { +MeasureUnit *MeasureUnit::createPixel(UErrorCode &status) { return MeasureUnit::create(12, 5, status); } -MeasureUnit MeasureUnit::getPixelPerCentimeter() { +MeasureUnit MeasureUnit::getPixel() { return MeasureUnit(12, 5); } -MeasureUnit *MeasureUnit::createPixelPerInch(UErrorCode &status) { +MeasureUnit *MeasureUnit::createPixelPerCentimeter(UErrorCode &status) { return MeasureUnit::create(12, 6, status); } -MeasureUnit MeasureUnit::getPixelPerInch() { +MeasureUnit MeasureUnit::getPixelPerCentimeter() { return MeasureUnit(12, 6); } +MeasureUnit *MeasureUnit::createPixelPerInch(UErrorCode &status) { + return MeasureUnit::create(12, 7, status); +} + +MeasureUnit MeasureUnit::getPixelPerInch() { + return MeasureUnit(12, 7); +} + MeasureUnit *MeasureUnit::createAstronomicalUnit(UErrorCode &status) { return MeasureUnit::create(13, 0, status); } @@ -1293,166 +1299,190 @@ MeasureUnit MeasureUnit::getDecimeter() { return MeasureUnit(13, 2); } -MeasureUnit *MeasureUnit::createFathom(UErrorCode &status) { +MeasureUnit *MeasureUnit::createEarthRadius(UErrorCode &status) { return MeasureUnit::create(13, 3, status); } -MeasureUnit MeasureUnit::getFathom() { +MeasureUnit MeasureUnit::getEarthRadius() { return MeasureUnit(13, 3); } -MeasureUnit *MeasureUnit::createFoot(UErrorCode &status) { +MeasureUnit *MeasureUnit::createFathom(UErrorCode &status) { return MeasureUnit::create(13, 4, status); } -MeasureUnit MeasureUnit::getFoot() { +MeasureUnit MeasureUnit::getFathom() { return MeasureUnit(13, 4); } -MeasureUnit *MeasureUnit::createFurlong(UErrorCode &status) { +MeasureUnit *MeasureUnit::createFoot(UErrorCode &status) { return MeasureUnit::create(13, 5, status); } -MeasureUnit MeasureUnit::getFurlong() { +MeasureUnit MeasureUnit::getFoot() { return MeasureUnit(13, 5); } -MeasureUnit *MeasureUnit::createInch(UErrorCode &status) { +MeasureUnit *MeasureUnit::createFurlong(UErrorCode &status) { return MeasureUnit::create(13, 6, status); } -MeasureUnit MeasureUnit::getInch() { +MeasureUnit MeasureUnit::getFurlong() { return MeasureUnit(13, 6); } -MeasureUnit *MeasureUnit::createKilometer(UErrorCode &status) { +MeasureUnit *MeasureUnit::createInch(UErrorCode &status) { return MeasureUnit::create(13, 7, status); } -MeasureUnit MeasureUnit::getKilometer() { +MeasureUnit MeasureUnit::getInch() { return MeasureUnit(13, 7); } -MeasureUnit *MeasureUnit::createLightYear(UErrorCode &status) { +MeasureUnit *MeasureUnit::createKilometer(UErrorCode &status) { return MeasureUnit::create(13, 8, status); } -MeasureUnit MeasureUnit::getLightYear() { +MeasureUnit MeasureUnit::getKilometer() { return MeasureUnit(13, 8); } -MeasureUnit *MeasureUnit::createMeter(UErrorCode &status) { +MeasureUnit *MeasureUnit::createLightYear(UErrorCode &status) { return MeasureUnit::create(13, 9, status); } -MeasureUnit MeasureUnit::getMeter() { +MeasureUnit MeasureUnit::getLightYear() { return MeasureUnit(13, 9); } -MeasureUnit *MeasureUnit::createMicrometer(UErrorCode &status) { +MeasureUnit *MeasureUnit::createMeter(UErrorCode &status) { return MeasureUnit::create(13, 10, status); } -MeasureUnit MeasureUnit::getMicrometer() { +MeasureUnit MeasureUnit::getMeter() { return MeasureUnit(13, 10); } -MeasureUnit *MeasureUnit::createMile(UErrorCode &status) { +MeasureUnit *MeasureUnit::createMicrometer(UErrorCode &status) { return MeasureUnit::create(13, 11, status); } -MeasureUnit MeasureUnit::getMile() { +MeasureUnit MeasureUnit::getMicrometer() { return MeasureUnit(13, 11); } -MeasureUnit *MeasureUnit::createMileScandinavian(UErrorCode &status) { +MeasureUnit *MeasureUnit::createMile(UErrorCode &status) { return MeasureUnit::create(13, 12, status); } -MeasureUnit MeasureUnit::getMileScandinavian() { +MeasureUnit MeasureUnit::getMile() { return MeasureUnit(13, 12); } -MeasureUnit *MeasureUnit::createMillimeter(UErrorCode &status) { +MeasureUnit *MeasureUnit::createMileScandinavian(UErrorCode &status) { return MeasureUnit::create(13, 13, status); } -MeasureUnit MeasureUnit::getMillimeter() { +MeasureUnit MeasureUnit::getMileScandinavian() { return MeasureUnit(13, 13); } -MeasureUnit *MeasureUnit::createNanometer(UErrorCode &status) { +MeasureUnit *MeasureUnit::createMillimeter(UErrorCode &status) { return MeasureUnit::create(13, 14, status); } -MeasureUnit MeasureUnit::getNanometer() { +MeasureUnit MeasureUnit::getMillimeter() { return MeasureUnit(13, 14); } -MeasureUnit *MeasureUnit::createNauticalMile(UErrorCode &status) { +MeasureUnit *MeasureUnit::createNanometer(UErrorCode &status) { return MeasureUnit::create(13, 15, status); } -MeasureUnit MeasureUnit::getNauticalMile() { +MeasureUnit MeasureUnit::getNanometer() { return MeasureUnit(13, 15); } -MeasureUnit *MeasureUnit::createParsec(UErrorCode &status) { +MeasureUnit *MeasureUnit::createNauticalMile(UErrorCode &status) { return MeasureUnit::create(13, 16, status); } -MeasureUnit MeasureUnit::getParsec() { +MeasureUnit MeasureUnit::getNauticalMile() { return MeasureUnit(13, 16); } -MeasureUnit *MeasureUnit::createPicometer(UErrorCode &status) { +MeasureUnit *MeasureUnit::createParsec(UErrorCode &status) { return MeasureUnit::create(13, 17, status); } -MeasureUnit MeasureUnit::getPicometer() { +MeasureUnit MeasureUnit::getParsec() { return MeasureUnit(13, 17); } -MeasureUnit *MeasureUnit::createPoint(UErrorCode &status) { +MeasureUnit *MeasureUnit::createPicometer(UErrorCode &status) { return MeasureUnit::create(13, 18, status); } -MeasureUnit MeasureUnit::getPoint() { +MeasureUnit MeasureUnit::getPicometer() { return MeasureUnit(13, 18); } -MeasureUnit *MeasureUnit::createSolarRadius(UErrorCode &status) { +MeasureUnit *MeasureUnit::createPoint(UErrorCode &status) { return MeasureUnit::create(13, 19, status); } -MeasureUnit MeasureUnit::getSolarRadius() { +MeasureUnit MeasureUnit::getPoint() { return MeasureUnit(13, 19); } -MeasureUnit *MeasureUnit::createYard(UErrorCode &status) { +MeasureUnit *MeasureUnit::createSolarRadius(UErrorCode &status) { return MeasureUnit::create(13, 20, status); } -MeasureUnit MeasureUnit::getYard() { +MeasureUnit MeasureUnit::getSolarRadius() { return MeasureUnit(13, 20); } -MeasureUnit *MeasureUnit::createLux(UErrorCode &status) { +MeasureUnit *MeasureUnit::createYard(UErrorCode &status) { + return MeasureUnit::create(13, 21, status); +} + +MeasureUnit MeasureUnit::getYard() { + return MeasureUnit(13, 21); +} + +MeasureUnit *MeasureUnit::createCandela(UErrorCode &status) { return MeasureUnit::create(14, 0, status); } -MeasureUnit MeasureUnit::getLux() { +MeasureUnit MeasureUnit::getCandela() { return MeasureUnit(14, 0); } -MeasureUnit *MeasureUnit::createSolarLuminosity(UErrorCode &status) { +MeasureUnit *MeasureUnit::createLumen(UErrorCode &status) { return MeasureUnit::create(14, 1, status); } -MeasureUnit MeasureUnit::getSolarLuminosity() { +MeasureUnit MeasureUnit::getLumen() { return MeasureUnit(14, 1); } +MeasureUnit *MeasureUnit::createLux(UErrorCode &status) { + return MeasureUnit::create(14, 2, status); +} + +MeasureUnit MeasureUnit::getLux() { + return MeasureUnit(14, 2); +} + +MeasureUnit *MeasureUnit::createSolarLuminosity(UErrorCode &status) { + return MeasureUnit::create(14, 3, status); +} + +MeasureUnit MeasureUnit::getSolarLuminosity() { + return MeasureUnit(14, 3); +} + MeasureUnit *MeasureUnit::createCarat(UErrorCode &status) { return MeasureUnit::create(15, 0, status); } @@ -1477,94 +1507,102 @@ MeasureUnit MeasureUnit::getEarthMass() { return MeasureUnit(15, 2); } -MeasureUnit *MeasureUnit::createGram(UErrorCode &status) { +MeasureUnit *MeasureUnit::createGrain(UErrorCode &status) { return MeasureUnit::create(15, 3, status); } -MeasureUnit MeasureUnit::getGram() { +MeasureUnit MeasureUnit::getGrain() { return MeasureUnit(15, 3); } -MeasureUnit *MeasureUnit::createKilogram(UErrorCode &status) { +MeasureUnit *MeasureUnit::createGram(UErrorCode &status) { return MeasureUnit::create(15, 4, status); } -MeasureUnit MeasureUnit::getKilogram() { +MeasureUnit MeasureUnit::getGram() { return MeasureUnit(15, 4); } -MeasureUnit *MeasureUnit::createMetricTon(UErrorCode &status) { +MeasureUnit *MeasureUnit::createKilogram(UErrorCode &status) { return MeasureUnit::create(15, 5, status); } -MeasureUnit MeasureUnit::getMetricTon() { +MeasureUnit MeasureUnit::getKilogram() { return MeasureUnit(15, 5); } -MeasureUnit *MeasureUnit::createMicrogram(UErrorCode &status) { +MeasureUnit *MeasureUnit::createMetricTon(UErrorCode &status) { return MeasureUnit::create(15, 6, status); } -MeasureUnit MeasureUnit::getMicrogram() { +MeasureUnit MeasureUnit::getMetricTon() { return MeasureUnit(15, 6); } -MeasureUnit *MeasureUnit::createMilligram(UErrorCode &status) { +MeasureUnit *MeasureUnit::createMicrogram(UErrorCode &status) { return MeasureUnit::create(15, 7, status); } -MeasureUnit MeasureUnit::getMilligram() { +MeasureUnit MeasureUnit::getMicrogram() { return MeasureUnit(15, 7); } -MeasureUnit *MeasureUnit::createOunce(UErrorCode &status) { +MeasureUnit *MeasureUnit::createMilligram(UErrorCode &status) { return MeasureUnit::create(15, 8, status); } -MeasureUnit MeasureUnit::getOunce() { +MeasureUnit MeasureUnit::getMilligram() { return MeasureUnit(15, 8); } -MeasureUnit *MeasureUnit::createOunceTroy(UErrorCode &status) { +MeasureUnit *MeasureUnit::createOunce(UErrorCode &status) { return MeasureUnit::create(15, 9, status); } -MeasureUnit MeasureUnit::getOunceTroy() { +MeasureUnit MeasureUnit::getOunce() { return MeasureUnit(15, 9); } -MeasureUnit *MeasureUnit::createPound(UErrorCode &status) { +MeasureUnit *MeasureUnit::createOunceTroy(UErrorCode &status) { return MeasureUnit::create(15, 10, status); } -MeasureUnit MeasureUnit::getPound() { +MeasureUnit MeasureUnit::getOunceTroy() { return MeasureUnit(15, 10); } -MeasureUnit *MeasureUnit::createSolarMass(UErrorCode &status) { +MeasureUnit *MeasureUnit::createPound(UErrorCode &status) { return MeasureUnit::create(15, 11, status); } -MeasureUnit MeasureUnit::getSolarMass() { +MeasureUnit MeasureUnit::getPound() { return MeasureUnit(15, 11); } -MeasureUnit *MeasureUnit::createStone(UErrorCode &status) { +MeasureUnit *MeasureUnit::createSolarMass(UErrorCode &status) { return MeasureUnit::create(15, 12, status); } -MeasureUnit MeasureUnit::getStone() { +MeasureUnit MeasureUnit::getSolarMass() { return MeasureUnit(15, 12); } -MeasureUnit *MeasureUnit::createTon(UErrorCode &status) { +MeasureUnit *MeasureUnit::createStone(UErrorCode &status) { return MeasureUnit::create(15, 13, status); } -MeasureUnit MeasureUnit::getTon() { +MeasureUnit MeasureUnit::getStone() { return MeasureUnit(15, 13); } +MeasureUnit *MeasureUnit::createTon(UErrorCode &status) { + return MeasureUnit::create(15, 14, status); +} + +MeasureUnit MeasureUnit::getTon() { + return MeasureUnit(15, 14); +} + MeasureUnit *MeasureUnit::createGigawatt(UErrorCode &status) { return MeasureUnit::create(17, 0, status); } @@ -1885,111 +1923,167 @@ MeasureUnit MeasureUnit::getDeciliter() { return MeasureUnit(22, 13); } -MeasureUnit *MeasureUnit::createFluidOunce(UErrorCode &status) { +MeasureUnit *MeasureUnit::createDessertSpoon(UErrorCode &status) { return MeasureUnit::create(22, 14, status); } -MeasureUnit MeasureUnit::getFluidOunce() { +MeasureUnit MeasureUnit::getDessertSpoon() { return MeasureUnit(22, 14); } -MeasureUnit *MeasureUnit::createFluidOunceImperial(UErrorCode &status) { +MeasureUnit *MeasureUnit::createDessertSpoonImperial(UErrorCode &status) { return MeasureUnit::create(22, 15, status); } -MeasureUnit MeasureUnit::getFluidOunceImperial() { +MeasureUnit MeasureUnit::getDessertSpoonImperial() { return MeasureUnit(22, 15); } -MeasureUnit *MeasureUnit::createGallon(UErrorCode &status) { +MeasureUnit *MeasureUnit::createDram(UErrorCode &status) { return MeasureUnit::create(22, 16, status); } -MeasureUnit MeasureUnit::getGallon() { +MeasureUnit MeasureUnit::getDram() { return MeasureUnit(22, 16); } -MeasureUnit *MeasureUnit::createGallonImperial(UErrorCode &status) { +MeasureUnit *MeasureUnit::createDrop(UErrorCode &status) { return MeasureUnit::create(22, 17, status); } -MeasureUnit MeasureUnit::getGallonImperial() { +MeasureUnit MeasureUnit::getDrop() { return MeasureUnit(22, 17); } -MeasureUnit *MeasureUnit::createHectoliter(UErrorCode &status) { +MeasureUnit *MeasureUnit::createFluidOunce(UErrorCode &status) { return MeasureUnit::create(22, 18, status); } -MeasureUnit MeasureUnit::getHectoliter() { +MeasureUnit MeasureUnit::getFluidOunce() { return MeasureUnit(22, 18); } -MeasureUnit *MeasureUnit::createLiter(UErrorCode &status) { +MeasureUnit *MeasureUnit::createFluidOunceImperial(UErrorCode &status) { return MeasureUnit::create(22, 19, status); } -MeasureUnit MeasureUnit::getLiter() { +MeasureUnit MeasureUnit::getFluidOunceImperial() { return MeasureUnit(22, 19); } -MeasureUnit *MeasureUnit::createMegaliter(UErrorCode &status) { +MeasureUnit *MeasureUnit::createGallon(UErrorCode &status) { return MeasureUnit::create(22, 20, status); } -MeasureUnit MeasureUnit::getMegaliter() { +MeasureUnit MeasureUnit::getGallon() { return MeasureUnit(22, 20); } -MeasureUnit *MeasureUnit::createMilliliter(UErrorCode &status) { +MeasureUnit *MeasureUnit::createGallonImperial(UErrorCode &status) { return MeasureUnit::create(22, 21, status); } -MeasureUnit MeasureUnit::getMilliliter() { +MeasureUnit MeasureUnit::getGallonImperial() { return MeasureUnit(22, 21); } -MeasureUnit *MeasureUnit::createPint(UErrorCode &status) { +MeasureUnit *MeasureUnit::createHectoliter(UErrorCode &status) { return MeasureUnit::create(22, 22, status); } -MeasureUnit MeasureUnit::getPint() { +MeasureUnit MeasureUnit::getHectoliter() { return MeasureUnit(22, 22); } -MeasureUnit *MeasureUnit::createPintMetric(UErrorCode &status) { +MeasureUnit *MeasureUnit::createJigger(UErrorCode &status) { return MeasureUnit::create(22, 23, status); } -MeasureUnit MeasureUnit::getPintMetric() { +MeasureUnit MeasureUnit::getJigger() { return MeasureUnit(22, 23); } -MeasureUnit *MeasureUnit::createQuart(UErrorCode &status) { +MeasureUnit *MeasureUnit::createLiter(UErrorCode &status) { return MeasureUnit::create(22, 24, status); } -MeasureUnit MeasureUnit::getQuart() { +MeasureUnit MeasureUnit::getLiter() { return MeasureUnit(22, 24); } -MeasureUnit *MeasureUnit::createTablespoon(UErrorCode &status) { +MeasureUnit *MeasureUnit::createMegaliter(UErrorCode &status) { return MeasureUnit::create(22, 25, status); } -MeasureUnit MeasureUnit::getTablespoon() { +MeasureUnit MeasureUnit::getMegaliter() { return MeasureUnit(22, 25); } -MeasureUnit *MeasureUnit::createTeaspoon(UErrorCode &status) { +MeasureUnit *MeasureUnit::createMilliliter(UErrorCode &status) { return MeasureUnit::create(22, 26, status); } -MeasureUnit MeasureUnit::getTeaspoon() { +MeasureUnit MeasureUnit::getMilliliter() { return MeasureUnit(22, 26); } -// End generated code +MeasureUnit *MeasureUnit::createPinch(UErrorCode &status) { + return MeasureUnit::create(22, 27, status); +} + +MeasureUnit MeasureUnit::getPinch() { + return MeasureUnit(22, 27); +} + +MeasureUnit *MeasureUnit::createPint(UErrorCode &status) { + return MeasureUnit::create(22, 28, status); +} + +MeasureUnit MeasureUnit::getPint() { + return MeasureUnit(22, 28); +} + +MeasureUnit *MeasureUnit::createPintMetric(UErrorCode &status) { + return MeasureUnit::create(22, 29, status); +} + +MeasureUnit MeasureUnit::getPintMetric() { + return MeasureUnit(22, 29); +} + +MeasureUnit *MeasureUnit::createQuart(UErrorCode &status) { + return MeasureUnit::create(22, 30, status); +} + +MeasureUnit MeasureUnit::getQuart() { + return MeasureUnit(22, 30); +} + +MeasureUnit *MeasureUnit::createQuartImperial(UErrorCode &status) { + return MeasureUnit::create(22, 31, status); +} + +MeasureUnit MeasureUnit::getQuartImperial() { + return MeasureUnit(22, 31); +} + +MeasureUnit *MeasureUnit::createTablespoon(UErrorCode &status) { + return MeasureUnit::create(22, 32, status); +} + +MeasureUnit MeasureUnit::getTablespoon() { + return MeasureUnit(22, 32); +} + +MeasureUnit *MeasureUnit::createTeaspoon(UErrorCode &status) { + return MeasureUnit::create(22, 33, status); +} + +MeasureUnit MeasureUnit::getTeaspoon() { + return MeasureUnit(22, 33); +} + +// End generated code for measunit.cpp static int32_t binarySearch( const char * const * array, int32_t start, int32_t end, StringPiece key) { @@ -2038,7 +2132,9 @@ MeasureUnit &MeasureUnit::operator=(const MeasureUnit &other) { if (this == &other) { return *this; } - delete fImpl; + if (fImpl != nullptr) { + delete fImpl; + } if (other.fImpl) { ErrorCode localStatus; fImpl = new MeasureUnitImpl(other.fImpl->copy(localStatus)); @@ -2059,7 +2155,9 @@ MeasureUnit &MeasureUnit::operator=(MeasureUnit &&other) noexcept { if (this == &other) { return *this; } - delete fImpl; + if (fImpl != nullptr) { + delete fImpl; + } fImpl = other.fImpl; other.fImpl = nullptr; fTypeId = other.fTypeId; @@ -2072,8 +2170,10 @@ MeasureUnit *MeasureUnit::clone() const { } MeasureUnit::~MeasureUnit() { - delete fImpl; - fImpl = nullptr; + if (fImpl != nullptr) { + delete fImpl; + fImpl = nullptr; + } } const char *MeasureUnit::getType() const { @@ -2096,21 +2196,17 @@ const char *MeasureUnit::getIdentifier() const { return fImpl ? fImpl->identifier.data() : gSubTypes[getOffset()]; } -UBool MeasureUnit::operator==(const UObject& other) const { +bool MeasureUnit::operator==(const UObject& other) const { if (this == &other) { // Same object, equal - return TRUE; + return true; } if (typeid(*this) != typeid(other)) { // Different types, not equal - return FALSE; + return false; } const MeasureUnit &rhs = static_cast<const MeasureUnit&>(other); return uprv_strcmp(getIdentifier(), rhs.getIdentifier()) == 0; } -int32_t MeasureUnit::getIndex() const { - return gIndexes[fTypeId] + fSubTypeId; -} - int32_t MeasureUnit::getAvailable( MeasureUnit *dest, int32_t destCapacity, @@ -2173,26 +2269,14 @@ StringEnumeration* MeasureUnit::getAvailableTypes(UErrorCode &errorCode) { return result; } -int32_t MeasureUnit::getIndexCount() { - return gIndexes[UPRV_LENGTHOF(gIndexes) - 1]; -} - -int32_t MeasureUnit::internalGetIndexForTypeAndSubtype(const char *type, const char *subtype) { - int32_t t = binarySearch(gTypes, 0, UPRV_LENGTHOF(gTypes), type); - if (t < 0) { - return t; - } - int32_t st = binarySearch(gSubTypes, gOffsets[t], gOffsets[t + 1], subtype); - if (st < 0) { - return st; - } - return gIndexes[t] + st - gOffsets[t]; -} - bool MeasureUnit::findBySubType(StringPiece subType, MeasureUnit* output) { + // Sanity checking kCurrencyOffset and final entry in gOffsets + U_ASSERT(uprv_strcmp(gTypes[kCurrencyOffset], "currency") == 0); + U_ASSERT(gOffsets[UPRV_LENGTHOF(gOffsets) - 1] == UPRV_LENGTHOF(gSubTypes)); + for (int32_t t = 0; t < UPRV_LENGTHOF(gOffsets) - 1; t++) { // Skip currency units - if (gIndexes[t] == gIndexes[t + 1]) { + if (t == kCurrencyOffset) { continue; } int32_t st = binarySearch(gSubTypes, gOffsets[t], gOffsets[t + 1], subType); @@ -2204,41 +2288,6 @@ bool MeasureUnit::findBySubType(StringPiece subType, MeasureUnit* output) { return false; } -MeasureUnit MeasureUnit::resolveUnitPerUnit( - const MeasureUnit &unit, const MeasureUnit &perUnit, bool* isResolved) { - int32_t unitOffset = unit.getOffset(); - int32_t perUnitOffset = perUnit.getOffset(); - if (unitOffset == -1 || perUnitOffset == -1) { - *isResolved = false; - return MeasureUnit(); - } - - // binary search for (unitOffset, perUnitOffset) - int32_t start = 0; - int32_t end = UPRV_LENGTHOF(unitPerUnitToSingleUnit); - while (start < end) { - int32_t mid = (start + end) / 2; - int32_t *midRow = unitPerUnitToSingleUnit[mid]; - if (unitOffset < midRow[0]) { - end = mid; - } else if (unitOffset > midRow[0]) { - start = mid + 1; - } else if (perUnitOffset < midRow[1]) { - end = mid; - } else if (perUnitOffset > midRow[1]) { - start = mid + 1; - } else { - // We found a resolution for our unit / per-unit combo - // return it. - *isResolved = true; - return MeasureUnit(midRow[2], midRow[3]); - } - } - - *isResolved = false; - return MeasureUnit(); -} - MeasureUnit *MeasureUnit::create(int typeId, int subTypeId, UErrorCode &status) { if (U_FAILURE(status)) { return NULL; @@ -2256,7 +2305,7 @@ void MeasureUnit::initTime(const char *timeId) { fTypeId = result; result = binarySearch(gSubTypes, gOffsets[fTypeId], gOffsets[fTypeId + 1], timeId); U_ASSERT(result != -1); - fSubTypeId = result - gOffsets[fTypeId]; + fSubTypeId = result - gOffsets[fTypeId]; } void MeasureUnit::initCurrency(StringPiece isoCurrency) { @@ -2279,20 +2328,13 @@ void MeasureUnit::initCurrency(StringPiece isoCurrency) { fSubTypeId = result - gOffsets[fTypeId]; } -void MeasureUnit::initNoUnit(const char *subtype) { - int32_t result = binarySearch(gTypes, 0, UPRV_LENGTHOF(gTypes), "none"); - U_ASSERT(result != -1); - fTypeId = result; - result = binarySearch(gSubTypes, gOffsets[fTypeId], gOffsets[fTypeId + 1], subtype); - U_ASSERT(result != -1); - fSubTypeId = result - gOffsets[fTypeId]; -} - void MeasureUnit::setTo(int32_t typeId, int32_t subTypeId) { fTypeId = typeId; fSubTypeId = subTypeId; - delete fImpl; - fImpl = nullptr; + if (fImpl != nullptr) { + delete fImpl; + fImpl = nullptr; + } } int32_t MeasureUnit::getOffset() const { @@ -2302,6 +2344,20 @@ int32_t MeasureUnit::getOffset() const { return gOffsets[fTypeId] + fSubTypeId; } +MeasureUnitImpl MeasureUnitImpl::copy(UErrorCode &status) const { + MeasureUnitImpl result; + result.complexity = complexity; + result.identifier.append(identifier, status); + for (int32_t i = 0; i < singleUnits.length(); i++) { + SingleUnitImpl *item = result.singleUnits.emplaceBack(*singleUnits[i]); + if (!item) { + status = U_MEMORY_ALLOCATION_ERROR; + return result; + } + } + return result; +} + U_NAMESPACE_END #endif /* !UNCONFIG_NO_FORMATTING */ diff --git a/contrib/libs/icu/i18n/measunit_extra.cpp b/contrib/libs/icu/i18n/measunit_extra.cpp index aeb60017a1..8281119007 100644 --- a/contrib/libs/icu/i18n/measunit_extra.cpp +++ b/contrib/libs/icu/i18n/measunit_extra.cpp @@ -12,20 +12,25 @@ // Helpful in toString methods and elsewhere. #define UNISTR_FROM_STRING_EXPLICIT -#include <cstdlib> +#include "charstr.h" +#include "cmemory.h" #include "cstring.h" #include "measunit_impl.h" +#include "resource.h" #include "uarrsort.h" #include "uassert.h" #include "ucln_in.h" #include "umutex.h" -#include "unicode/errorcode.h" +#include "unicode/bytestrie.h" +#include "unicode/bytestriebuilder.h" #include "unicode/localpointer.h" -#include "unicode/measunit.h" -#include "unicode/ucharstrie.h" -#include "unicode/ucharstriebuilder.h" - -#include "cstr.h" +#include "unicode/stringpiece.h" +#include "unicode/stringtriebuilder.h" +#include "unicode/ures.h" +#include "unicode/ustringtrie.h" +#include "uresimp.h" +#include "util.h" +#include <cstdlib> U_NAMESPACE_BEGIN @@ -35,12 +40,20 @@ namespace { // TODO: Propose a new error code for this? constexpr UErrorCode kUnitIdentifierSyntaxError = U_ILLEGAL_ARGUMENT_ERROR; -// Trie value offset for SI Prefixes. This is big enough to ensure we only +// Trie value offset for SI or binary prefixes. This is big enough to ensure we only // insert positive integers into the trie. -constexpr int32_t kSIPrefixOffset = 64; +constexpr int32_t kPrefixOffset = 64; +static_assert(kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MIN_BIN > 0, + "kPrefixOffset is too small for minimum UMeasurePrefix value"); +static_assert(kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MIN_SI > 0, + "kPrefixOffset is too small for minimum UMeasurePrefix value"); // Trie value offset for compound parts, e.g. "-per-", "-", "-and-". constexpr int32_t kCompoundPartOffset = 128; +static_assert(kCompoundPartOffset > kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MAX_BIN, + "Ambiguous token values: prefix tokens are overlapping with CompoundPart tokens"); +static_assert(kCompoundPartOffset > kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MAX_SI, + "Ambiguous token values: prefix tokens are overlapping with CompoundPart tokens"); enum CompoundPart { // Represents "-per-" @@ -60,7 +73,7 @@ enum InitialCompoundPart { INITIAL_COMPOUND_PART_PER = kInitialCompoundPartOffset, }; -// Trie value offset for powers like "square-", "cubic-", "p2-" etc. +// Trie value offset for powers like "square-", "cubic-", "pow2-" etc. constexpr int32_t kPowerPartOffset = 256; enum PowerPart { @@ -84,201 +97,368 @@ enum PowerPart { // "fluid-ounce-imperial". constexpr int32_t kSimpleUnitOffset = 512; -const struct SIPrefixStrings { +const struct UnitPrefixStrings { const char* const string; - UMeasureSIPrefix value; -} gSIPrefixStrings[] = { - { "yotta", UMEASURE_SI_PREFIX_YOTTA }, - { "zetta", UMEASURE_SI_PREFIX_ZETTA }, - { "exa", UMEASURE_SI_PREFIX_EXA }, - { "peta", UMEASURE_SI_PREFIX_PETA }, - { "tera", UMEASURE_SI_PREFIX_TERA }, - { "giga", UMEASURE_SI_PREFIX_GIGA }, - { "mega", UMEASURE_SI_PREFIX_MEGA }, - { "kilo", UMEASURE_SI_PREFIX_KILO }, - { "hecto", UMEASURE_SI_PREFIX_HECTO }, - { "deka", UMEASURE_SI_PREFIX_DEKA }, - { "deci", UMEASURE_SI_PREFIX_DECI }, - { "centi", UMEASURE_SI_PREFIX_CENTI }, - { "milli", UMEASURE_SI_PREFIX_MILLI }, - { "micro", UMEASURE_SI_PREFIX_MICRO }, - { "nano", UMEASURE_SI_PREFIX_NANO }, - { "pico", UMEASURE_SI_PREFIX_PICO }, - { "femto", UMEASURE_SI_PREFIX_FEMTO }, - { "atto", UMEASURE_SI_PREFIX_ATTO }, - { "zepto", UMEASURE_SI_PREFIX_ZEPTO }, - { "yocto", UMEASURE_SI_PREFIX_YOCTO }, + UMeasurePrefix value; +} gUnitPrefixStrings[] = { + // SI prefixes + { "yotta", UMEASURE_PREFIX_YOTTA }, + { "zetta", UMEASURE_PREFIX_ZETTA }, + { "exa", UMEASURE_PREFIX_EXA }, + { "peta", UMEASURE_PREFIX_PETA }, + { "tera", UMEASURE_PREFIX_TERA }, + { "giga", UMEASURE_PREFIX_GIGA }, + { "mega", UMEASURE_PREFIX_MEGA }, + { "kilo", UMEASURE_PREFIX_KILO }, + { "hecto", UMEASURE_PREFIX_HECTO }, + { "deka", UMEASURE_PREFIX_DEKA }, + { "deci", UMEASURE_PREFIX_DECI }, + { "centi", UMEASURE_PREFIX_CENTI }, + { "milli", UMEASURE_PREFIX_MILLI }, + { "micro", UMEASURE_PREFIX_MICRO }, + { "nano", UMEASURE_PREFIX_NANO }, + { "pico", UMEASURE_PREFIX_PICO }, + { "femto", UMEASURE_PREFIX_FEMTO }, + { "atto", UMEASURE_PREFIX_ATTO }, + { "zepto", UMEASURE_PREFIX_ZEPTO }, + { "yocto", UMEASURE_PREFIX_YOCTO }, + // Binary prefixes + { "yobi", UMEASURE_PREFIX_YOBI }, + { "zebi", UMEASURE_PREFIX_ZEBI }, + { "exbi", UMEASURE_PREFIX_EXBI }, + { "pebi", UMEASURE_PREFIX_PEBI }, + { "tebi", UMEASURE_PREFIX_TEBI }, + { "gibi", UMEASURE_PREFIX_GIBI }, + { "mebi", UMEASURE_PREFIX_MEBI }, + { "kibi", UMEASURE_PREFIX_KIBI }, }; -// TODO(ICU-21059): Get this list from data -const char16_t* const gSimpleUnits[] = { - u"candela", - u"carat", - u"gram", - u"ounce", - u"ounce-troy", - u"pound", - u"kilogram", - u"stone", - u"ton", - u"metric-ton", - u"earth-mass", - u"solar-mass", - u"point", - u"inch", - u"foot", - u"yard", - u"meter", - u"fathom", - u"furlong", - u"mile", - u"nautical-mile", - u"mile-scandinavian", - u"100-kilometer", - u"earth-radius", - u"solar-radius", - u"astronomical-unit", - u"light-year", - u"parsec", - u"second", - u"minute", - u"hour", - u"day", - u"day-person", - u"week", - u"week-person", - u"month", - u"month-person", - u"year", - u"year-person", - u"decade", - u"century", - u"ampere", - u"fahrenheit", - u"kelvin", - u"celsius", - u"arc-second", - u"arc-minute", - u"degree", - u"radian", - u"revolution", - u"item", - u"mole", - u"permillion", - u"permyriad", - u"permille", - u"percent", - u"karat", - u"portion", - u"bit", - u"byte", - u"dot", - u"pixel", - u"em", - u"hertz", - u"newton", - u"pound-force", - u"pascal", - u"bar", - u"atmosphere", - u"ofhg", - u"electronvolt", - u"dalton", - u"joule", - u"calorie", - u"british-thermal-unit", - u"foodcalorie", - u"therm-us", - u"watt", - u"horsepower", - u"solar-luminosity", - u"volt", - u"ohm", - u"dunam", - u"acre", - u"hectare", - u"teaspoon", - u"tablespoon", - u"fluid-ounce-imperial", - u"fluid-ounce", - u"cup", - u"cup-metric", - u"pint", - u"pint-metric", - u"quart", - u"liter", - u"gallon", - u"gallon-imperial", - u"bushel", - u"barrel", - u"knot", - u"g-force", - u"lux", +/** + * A ResourceSink that collects simple unit identifiers from the keys of the + * convertUnits table into an array, and adds these values to a TrieBuilder, + * with associated values being their index into this array plus a specified + * offset. + * + * Example code: + * + * UErrorCode status = U_ZERO_ERROR; + * BytesTrieBuilder b(status); + * int32_t ARR_SIZE = 200; + * const char *unitIdentifiers[ARR_SIZE]; + * int32_t *unitCategories[ARR_SIZE]; + * SimpleUnitIdentifiersSink identifierSink(gSerializedUnitCategoriesTrie, unitIdentifiers, + * unitCategories, ARR_SIZE, b, kTrieValueOffset); + * LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status)); + * ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", identifierSink, status); + */ +class SimpleUnitIdentifiersSink : public icu::ResourceSink { + public: + /** + * Constructor. + * @param quantitiesTrieData The data for constructing a quantitiesTrie, + * which maps from a simple unit identifier to an index into the + * gCategories array. + * @param out Array of char* to which pointers to the simple unit + * identifiers will be saved. (Does not take ownership.) + * @param outCategories Array of int32_t to which category indexes will be + * saved: this corresponds to simple unit IDs saved to `out`, mapping + * from the ID to the value produced by the quantitiesTrie (which is an + * index into the gCategories array). + * @param outSize The size of `out` and `outCategories`. + * @param trieBuilder The trie builder to which the simple unit identifier + * should be added. The trie builder must outlive this resource sink. + * @param trieValueOffset This is added to the index of the identifier in + * the `out` array, before adding to `trieBuilder` as the value + * associated with the identifier. + */ + explicit SimpleUnitIdentifiersSink(StringPiece quantitiesTrieData, const char **out, + int32_t *outCategories, int32_t outSize, + BytesTrieBuilder &trieBuilder, int32_t trieValueOffset) + : outArray(out), outCategories(outCategories), outSize(outSize), trieBuilder(trieBuilder), + trieValueOffset(trieValueOffset), quantitiesTrieData(quantitiesTrieData), outIndex(0) {} + + /** + * Adds the table keys found in value to the output vector. + * @param key The key of the resource passed to `value`: the second + * parameter of the ures_getAllItemsWithFallback() call. + * @param value Should be a ResourceTable value, if + * ures_getAllItemsWithFallback() was called correctly for this sink. + * @param noFallback Ignored. + * @param status The standard ICU error code output parameter. + */ + void put(const char * /*key*/, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override { + ResourceTable table = value.getTable(status); + if (U_FAILURE(status)) return; + + if (outIndex + table.getSize() > outSize) { + status = U_INDEX_OUTOFBOUNDS_ERROR; + return; + } + + BytesTrie quantitiesTrie(quantitiesTrieData.data()); + + // Collect keys from the table resource. + const char *simpleUnitID; + for (int32_t i = 0; table.getKeyAndValue(i, simpleUnitID, value); ++i) { + U_ASSERT(i < table.getSize()); + U_ASSERT(outIndex < outSize); + if (uprv_strcmp(simpleUnitID, "kilogram") == 0) { + // For parsing, we use "gram", the prefixless metric mass unit. We + // thus ignore the SI Base Unit of Mass: it exists due to being the + // mass conversion target unit, but not needed for MeasureUnit + // parsing. + continue; + } + outArray[outIndex] = simpleUnitID; + trieBuilder.add(simpleUnitID, trieValueOffset + outIndex, status); + + // Find the base target unit for this simple unit + ResourceTable table = value.getTable(status); + if (U_FAILURE(status)) { return; } + if (!table.findValue("target", value)) { + status = U_INVALID_FORMAT_ERROR; + break; + } + int32_t len; + const UChar* uTarget = value.getString(len, status); + CharString target; + target.appendInvariantChars(uTarget, len, status); + if (U_FAILURE(status)) { return; } + quantitiesTrie.reset(); + UStringTrieResult result = quantitiesTrie.next(target.data(), target.length()); + if (!USTRINGTRIE_HAS_VALUE(result)) { + status = U_INVALID_FORMAT_ERROR; + break; + } + outCategories[outIndex] = quantitiesTrie.getValue(); + + outIndex++; + } + } + + private: + const char **outArray; + int32_t *outCategories; + int32_t outSize; + BytesTrieBuilder &trieBuilder; + int32_t trieValueOffset; + + StringPiece quantitiesTrieData; + + int32_t outIndex; +}; + +/** + * A ResourceSink that collects information from `unitQuantities` in the `units` + * resource to provide key->value lookups from base unit to category, as well as + * preserving ordering information for these categories. See `units.txt`. + * + * For example: "kilogram" -> "mass", "meter-per-second" -> "speed". + * + * In C++ unitQuantity values are collected in order into a UChar* array, while + * unitQuantity keys are added added to a TrieBuilder, with associated values + * being the index into the aforementioned UChar* array. + */ +class CategoriesSink : public icu::ResourceSink { + public: + /** + * Constructor. + * @param out Array of UChar* to which unitQuantity values will be saved. + * The pointers returned not owned: they point directly at the resource + * strings in static memory. + * @param outSize The size of the `out` array. + * @param trieBuilder The trie builder to which the keys (base units) of + * each unitQuantity will be added, each with value being the offset + * into `out`. + */ + explicit CategoriesSink(const UChar **out, int32_t &outSize, BytesTrieBuilder &trieBuilder) + : outQuantitiesArray(out), outSize(outSize), trieBuilder(trieBuilder), outIndex(0) {} + + void put(const char * /*key*/, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override { + ResourceArray array = value.getArray(status); + if (U_FAILURE(status)) { + return; + } + + if (outIndex + array.getSize() > outSize) { + status = U_INDEX_OUTOFBOUNDS_ERROR; + return; + } + + for (int32_t i = 0; array.getValue(i, value); ++i) { + U_ASSERT(outIndex < outSize); + ResourceTable table = value.getTable(status); + if (U_FAILURE(status)) { + return; + } + if (table.getSize() != 1) { + status = U_INVALID_FORMAT_ERROR; + return; + } + const char *key; + table.getKeyAndValue(0, key, value); + int32_t uTmpLen; + outQuantitiesArray[outIndex] = value.getString(uTmpLen, status); + trieBuilder.add(key, outIndex, status); + outIndex++; + } + } + + private: + const UChar **outQuantitiesArray; + int32_t &outSize; + BytesTrieBuilder &trieBuilder; + + int32_t outIndex; }; icu::UInitOnce gUnitExtrasInitOnce = U_INITONCE_INITIALIZER; -char16_t* kSerializedUnitExtrasStemTrie = nullptr; +// Array of simple unit IDs. +// +// The array memory itself is owned by this pointer, but the individual char* in +// that array point at static memory. (Note that these char* are also returned +// by SingleUnitImpl::getSimpleUnitID().) +const char **gSimpleUnits = nullptr; + +// Maps from the value associated with each simple unit ID to an index into the +// gCategories array. +int32_t *gSimpleUnitCategories = nullptr; + +char *gSerializedUnitExtrasStemTrie = nullptr; + +// Array of UChar* pointing at the unit categories (aka "quantities", aka +// "types"), as found in the `unitQuantities` resource. The array memory itself +// is owned by this pointer, but the individual UChar* in that array point at +// static memory. +const UChar **gCategories = nullptr; +// Number of items in `gCategories`. +int32_t gCategoriesCount = 0; +// Serialized BytesTrie for mapping from base units to indices into gCategories. +char *gSerializedUnitCategoriesTrie = nullptr; UBool U_CALLCONV cleanupUnitExtras() { - uprv_free(kSerializedUnitExtrasStemTrie); - kSerializedUnitExtrasStemTrie = nullptr; + uprv_free(gSerializedUnitCategoriesTrie); + gSerializedUnitCategoriesTrie = nullptr; + uprv_free(gCategories); + gCategories = nullptr; + uprv_free(gSerializedUnitExtrasStemTrie); + gSerializedUnitExtrasStemTrie = nullptr; + uprv_free(gSimpleUnitCategories); + gSimpleUnitCategories = nullptr; + uprv_free(gSimpleUnits); + gSimpleUnits = nullptr; gUnitExtrasInitOnce.reset(); return TRUE; } void U_CALLCONV initUnitExtras(UErrorCode& status) { ucln_i18n_registerCleanup(UCLN_I18N_UNIT_EXTRAS, cleanupUnitExtras); + LocalUResourceBundlePointer unitsBundle(ures_openDirect(nullptr, "units", &status)); - UCharsTrieBuilder b(status); + // Collect unitQuantities information into gSerializedUnitCategoriesTrie and gCategories. + const char *CATEGORY_TABLE_NAME = "unitQuantities"; + LocalUResourceBundlePointer unitQuantities( + ures_getByKey(unitsBundle.getAlias(), CATEGORY_TABLE_NAME, nullptr, &status)); + if (U_FAILURE(status)) { return; } + gCategoriesCount = unitQuantities.getAlias()->fSize; + size_t quantitiesMallocSize = sizeof(UChar *) * gCategoriesCount; + gCategories = static_cast<const UChar **>(uprv_malloc(quantitiesMallocSize)); + if (gCategories == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + uprv_memset(gCategories, 0, quantitiesMallocSize); + BytesTrieBuilder quantitiesBuilder(status); + CategoriesSink categoriesSink(gCategories, gCategoriesCount, quantitiesBuilder); + ures_getAllItemsWithFallback(unitsBundle.getAlias(), CATEGORY_TABLE_NAME, categoriesSink, status); + StringPiece resultQuantities = quantitiesBuilder.buildStringPiece(USTRINGTRIE_BUILD_FAST, status); + if (U_FAILURE(status)) { return; } + // Copy the result into the global constant pointer + size_t numBytesQuantities = resultQuantities.length(); + gSerializedUnitCategoriesTrie = static_cast<char *>(uprv_malloc(numBytesQuantities)); + if (gSerializedUnitCategoriesTrie == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + uprv_memcpy(gSerializedUnitCategoriesTrie, resultQuantities.data(), numBytesQuantities); + + // Build the BytesTrie that Parser needs for parsing unit identifiers. + + BytesTrieBuilder b(status); if (U_FAILURE(status)) { return; } - // Add SI prefixes - for (const auto& siPrefixInfo : gSIPrefixStrings) { - UnicodeString uSIPrefix(siPrefixInfo.string, -1, US_INV); - b.add(uSIPrefix, siPrefixInfo.value + kSIPrefixOffset, status); + // Add SI and binary prefixes + for (const auto& unitPrefixInfo : gUnitPrefixStrings) { + b.add(unitPrefixInfo.string, unitPrefixInfo.value + kPrefixOffset, status); } if (U_FAILURE(status)) { return; } // Add syntax parts (compound, power prefixes) - b.add(u"-per-", COMPOUND_PART_PER, status); - b.add(u"-", COMPOUND_PART_TIMES, status); - b.add(u"-and-", COMPOUND_PART_AND, status); - b.add(u"per-", INITIAL_COMPOUND_PART_PER, status); - b.add(u"square-", POWER_PART_P2, status); - b.add(u"cubic-", POWER_PART_P3, status); - b.add(u"p2-", POWER_PART_P2, status); - b.add(u"p3-", POWER_PART_P3, status); - b.add(u"p4-", POWER_PART_P4, status); - b.add(u"p5-", POWER_PART_P5, status); - b.add(u"p6-", POWER_PART_P6, status); - b.add(u"p7-", POWER_PART_P7, status); - b.add(u"p8-", POWER_PART_P8, status); - b.add(u"p9-", POWER_PART_P9, status); - b.add(u"p10-", POWER_PART_P10, status); - b.add(u"p11-", POWER_PART_P11, status); - b.add(u"p12-", POWER_PART_P12, status); - b.add(u"p13-", POWER_PART_P13, status); - b.add(u"p14-", POWER_PART_P14, status); - b.add(u"p15-", POWER_PART_P15, status); + b.add("-per-", COMPOUND_PART_PER, status); + b.add("-", COMPOUND_PART_TIMES, status); + b.add("-and-", COMPOUND_PART_AND, status); + b.add("per-", INITIAL_COMPOUND_PART_PER, status); + b.add("square-", POWER_PART_P2, status); + b.add("cubic-", POWER_PART_P3, status); + b.add("pow2-", POWER_PART_P2, status); + b.add("pow3-", POWER_PART_P3, status); + b.add("pow4-", POWER_PART_P4, status); + b.add("pow5-", POWER_PART_P5, status); + b.add("pow6-", POWER_PART_P6, status); + b.add("pow7-", POWER_PART_P7, status); + b.add("pow8-", POWER_PART_P8, status); + b.add("pow9-", POWER_PART_P9, status); + b.add("pow10-", POWER_PART_P10, status); + b.add("pow11-", POWER_PART_P11, status); + b.add("pow12-", POWER_PART_P12, status); + b.add("pow13-", POWER_PART_P13, status); + b.add("pow14-", POWER_PART_P14, status); + b.add("pow15-", POWER_PART_P15, status); if (U_FAILURE(status)) { return; } - // Add sanctioned simple units by offset - int32_t simpleUnitOffset = kSimpleUnitOffset; - for (auto simpleUnit : gSimpleUnits) { - b.add(simpleUnit, simpleUnitOffset++, status); + // Add sanctioned simple units by offset: simple units all have entries in + // units/convertUnits resources. + LocalUResourceBundlePointer convertUnits( + ures_getByKey(unitsBundle.getAlias(), "convertUnits", nullptr, &status)); + if (U_FAILURE(status)) { return; } + + // Allocate enough space: with identifierSink below skipping kilogram, we're + // probably allocating one more than needed. + int32_t simpleUnitsCount = convertUnits.getAlias()->fSize; + int32_t arrayMallocSize = sizeof(char *) * simpleUnitsCount; + gSimpleUnits = static_cast<const char **>(uprv_malloc(arrayMallocSize)); + if (gSimpleUnits == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + uprv_memset(gSimpleUnits, 0, arrayMallocSize); + arrayMallocSize = sizeof(int32_t) * simpleUnitsCount; + gSimpleUnitCategories = static_cast<int32_t *>(uprv_malloc(arrayMallocSize)); + if (gSimpleUnitCategories == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; } + uprv_memset(gSimpleUnitCategories, 0, arrayMallocSize); + + // Populate gSimpleUnits and build the associated trie. + SimpleUnitIdentifiersSink identifierSink(resultQuantities, gSimpleUnits, gSimpleUnitCategories, + simpleUnitsCount, b, kSimpleUnitOffset); + ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", identifierSink, status); // Build the CharsTrie // TODO: Use SLOW or FAST here? - UnicodeString result; - b.buildUnicodeString(USTRINGTRIE_BUILD_FAST, result, status); + StringPiece result = b.buildStringPiece(USTRINGTRIE_BUILD_FAST, status); if (U_FAILURE(status)) { return; } // Copy the result into the global constant pointer - size_t numBytes = result.length() * sizeof(char16_t); - kSerializedUnitExtrasStemTrie = static_cast<char16_t*>(uprv_malloc(numBytes)); - uprv_memcpy(kSerializedUnitExtrasStemTrie, result.getBuffer(), numBytes); + size_t numBytes = result.length(); + gSerializedUnitExtrasStemTrie = static_cast<char *>(uprv_malloc(numBytes)); + if (gSerializedUnitExtrasStemTrie == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + uprv_memcpy(gSerializedUnitExtrasStemTrie, result.data(), numBytes); } class Token { @@ -287,7 +467,7 @@ public: enum Type { TYPE_UNDEFINED, - TYPE_SI_PREFIX, + TYPE_PREFIX, // Token type for "-per-", "-", and "-and-". TYPE_COMPOUND_PART, // Token type for "per-". @@ -301,7 +481,7 @@ public: Type getType() const { U_ASSERT(fMatch > 0); if (fMatch < kCompoundPartOffset) { - return TYPE_SI_PREFIX; + return TYPE_PREFIX; } if (fMatch < kInitialCompoundPartOffset) { return TYPE_COMPOUND_PART; @@ -315,9 +495,9 @@ public: return TYPE_SIMPLE_UNIT; } - UMeasureSIPrefix getSIPrefix() const { - U_ASSERT(getType() == TYPE_SI_PREFIX); - return static_cast<UMeasureSIPrefix>(fMatch - kSIPrefixOffset); + UMeasurePrefix getUnitPrefix() const { + U_ASSERT(getType() == TYPE_PREFIX); + return static_cast<UMeasurePrefix>(fMatch - kPrefixOffset); } // Valid only for tokens with type TYPE_COMPOUND_PART. @@ -373,7 +553,53 @@ public: MeasureUnitImpl parse(UErrorCode& status) { MeasureUnitImpl result; - parseImpl(result, status); + + if (U_FAILURE(status)) { + return result; + } + if (fSource.empty()) { + // The dimenionless unit: nothing to parse. leave result as is. + return result; + } + + while (hasNext()) { + bool sawAnd = false; + + SingleUnitImpl singleUnit = nextSingleUnit(sawAnd, status); + if (U_FAILURE(status)) { + return result; + } + + bool added = result.appendSingleUnit(singleUnit, status); + if (U_FAILURE(status)) { + return result; + } + + if (sawAnd && !added) { + // Two similar units are not allowed in a mixed unit. + status = kUnitIdentifierSyntaxError; + return result; + } + + if (result.singleUnits.length() >= 2) { + // nextSingleUnit fails appropriately for "per" and "and" in the + // same identifier. It doesn't fail for other compound units + // (COMPOUND_PART_TIMES). Consequently we take care of that + // here. + UMeasureUnitComplexity complexity = + sawAnd ? UMEASURE_UNIT_MIXED : UMEASURE_UNIT_COMPOUND; + if (result.singleUnits.length() == 2) { + // After appending two singleUnits, the complexity will be `UMEASURE_UNIT_COMPOUND` + U_ASSERT(result.complexity == UMEASURE_UNIT_COMPOUND); + result.complexity = complexity; + } else if (result.complexity != complexity) { + // Can't have mixed compound units + status = kUnitIdentifierSyntaxError; + return result; + } + } + } + return result; } @@ -385,7 +611,7 @@ private: // should live longer than this Parser - and the parser shouldn't return any // references to that string. StringPiece fSource; - UCharsTrie fTrie; + BytesTrie fTrie; // Set to true when we've seen a "-per-" or a "per-", after which all units // are in the denominator. Until we find an "-and-", at which point the @@ -395,7 +621,7 @@ private: Parser() : fSource(""), fTrie(u"") {} Parser(StringPiece source) - : fSource(source), fTrie(kSerializedUnitExtrasStemTrie) {} + : fSource(source), fTrie(gSerializedUnitExtrasStemTrie) {} inline bool hasNext() const { return fIndex < fSource.length(); @@ -450,20 +676,23 @@ private: * unit", sawAnd is set to true. If not, it is left as is. * @param status ICU error code. */ - void nextSingleUnit(SingleUnitImpl& result, bool& sawAnd, UErrorCode& status) { + SingleUnitImpl nextSingleUnit(bool &sawAnd, UErrorCode &status) { + SingleUnitImpl result; if (U_FAILURE(status)) { - return; + return result; } // state: - // 0 = no tokens seen yet (will accept power, SI prefix, or simple unit) + // 0 = no tokens seen yet (will accept power, SI or binary prefix, or simple unit) // 1 = power token seen (will not accept another power token) - // 2 = SI prefix token seen (will not accept a power or SI prefix token) + // 2 = SI or binary prefix token seen (will not accept a power, or SI or binary prefix token) int32_t state = 0; bool atStart = fIndex == 0; Token token = nextToken(status); - if (U_FAILURE(status)) { return; } + if (U_FAILURE(status)) { + return result; + } if (atStart) { // Identifiers optionally start with "per-". @@ -473,14 +702,16 @@ private: result.dimensionality = -1; token = nextToken(status); - if (U_FAILURE(status)) { return; } + if (U_FAILURE(status)) { + return result; + } } } else { // All other SingleUnit's are separated from previous SingleUnit's // via a compound part: if (token.getType() != Token::TYPE_COMPOUND_PART) { status = kUnitIdentifierSyntaxError; - return; + return result; } switch (token.getMatch()) { @@ -489,7 +720,7 @@ private: // Mixed compound units not yet supported, // TODO(CLDR-13700). status = kUnitIdentifierSyntaxError; - return; + return result; } fAfterPer = true; result.dimensionality = -1; @@ -506,14 +737,16 @@ private: // Can't start with "-and-", and mixed compound units // not yet supported, TODO(CLDR-13700). status = kUnitIdentifierSyntaxError; - return; + return result; } sawAnd = true; break; } token = nextToken(status); - if (U_FAILURE(status)) { return; } + if (U_FAILURE(status)) { + return result; + } } // Read tokens until we have a complete SingleUnit or we reach the end. @@ -522,87 +755,46 @@ private: case Token::TYPE_POWER_PART: if (state > 0) { status = kUnitIdentifierSyntaxError; - return; + return result; } result.dimensionality *= token.getPower(); state = 1; break; - case Token::TYPE_SI_PREFIX: + case Token::TYPE_PREFIX: if (state > 1) { status = kUnitIdentifierSyntaxError; - return; + return result; } - result.siPrefix = token.getSIPrefix(); + result.unitPrefix = token.getUnitPrefix(); state = 2; break; case Token::TYPE_SIMPLE_UNIT: result.index = token.getSimpleUnitIndex(); - return; + return result; default: status = kUnitIdentifierSyntaxError; - return; + return result; } if (!hasNext()) { // We ran out of tokens before finding a complete single unit. status = kUnitIdentifierSyntaxError; - return; + return result; } token = nextToken(status); if (U_FAILURE(status)) { - return; + return result; } } - } - /// @param result is modified, not overridden. Caller must pass in a - /// default-constructed (empty) MeasureUnitImpl instance. - void parseImpl(MeasureUnitImpl& result, UErrorCode& status) { - if (U_FAILURE(status)) { - return; - } - if (fSource.empty()) { - // The dimenionless unit: nothing to parse. leave result as is. - return; - } - int32_t unitNum = 0; - while (hasNext()) { - bool sawAnd = false; - SingleUnitImpl singleUnit; - nextSingleUnit(singleUnit, sawAnd, status); - if (U_FAILURE(status)) { - return; - } - U_ASSERT(!singleUnit.isDimensionless()); - bool added = result.append(singleUnit, status); - if (sawAnd && !added) { - // Two similar units are not allowed in a mixed unit - status = kUnitIdentifierSyntaxError; - return; - } - if ((++unitNum) >= 2) { - // nextSingleUnit fails appropriately for "per" and "and" in the - // same identifier. It doesn't fail for other compound units - // (COMPOUND_PART_TIMES). Consequently we take care of that - // here. - UMeasureUnitComplexity complexity = - sawAnd ? UMEASURE_UNIT_MIXED : UMEASURE_UNIT_COMPOUND; - if (unitNum == 2) { - U_ASSERT(result.complexity == UMEASURE_UNIT_SINGLE); - result.complexity = complexity; - } else if (result.complexity != complexity) { - // Can't have mixed compound units - status = kUnitIdentifierSyntaxError; - return; - } - } - } + return result; } }; +// Sorting function wrapping SingleUnitImpl::compareTo for use with uprv_sortArray. int32_t U_CALLCONV compareSingleUnits(const void* /*context*/, const void* left, const void* right) { auto realLeft = static_cast<const SingleUnitImpl* const*>(left); @@ -610,162 +802,129 @@ compareSingleUnits(const void* /*context*/, const void* left, const void* right) return (*realLeft)->compareTo(**realRight); } -/** - * Generate the identifier string for a single unit in place. - * - * Does not support the dimensionless SingleUnitImpl: calling serializeSingle - * with the dimensionless unit results in an U_INTERNAL_PROGRAM_ERROR. - * - * @param first If singleUnit is part of a compound unit, and not its first - * single unit, set this to false. Otherwise: set to true. - */ -void serializeSingle(const SingleUnitImpl& singleUnit, bool first, CharString& output, UErrorCode& status) { - if (first && singleUnit.dimensionality < 0) { - // Essentially the "unary per". For compound units with a numerator, the - // caller takes care of the "binary per". - output.append("per-", status); +// Returns an index into the gCategories array, for the "unitQuantity" (aka +// "type" or "category") associated with the given base unit identifier. Returns +// -1 on failure, together with U_UNSUPPORTED_ERROR. +int32_t getUnitCategoryIndex(BytesTrie &trie, StringPiece baseUnitIdentifier, UErrorCode &status) { + UStringTrieResult result = trie.reset().next(baseUnitIdentifier.data(), baseUnitIdentifier.length()); + if (!USTRINGTRIE_HAS_VALUE(result)) { + status = U_UNSUPPORTED_ERROR; + return -1; } - if (singleUnit.isDimensionless()) { - status = U_INTERNAL_PROGRAM_ERROR; - return; + return trie.getValue(); +} + +} // namespace + +U_CAPI int32_t U_EXPORT2 +umeas_getPrefixPower(UMeasurePrefix unitPrefix) { + if (unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_BIN && + unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_BIN) { + return unitPrefix - UMEASURE_PREFIX_INTERNAL_ONE_BIN; } - int8_t posPower = std::abs(singleUnit.dimensionality); - if (posPower == 0) { - status = U_INTERNAL_PROGRAM_ERROR; - } else if (posPower == 1) { - // no-op - } else if (posPower == 2) { - output.append("square-", status); - } else if (posPower == 3) { - output.append("cubic-", status); - } else if (posPower < 10) { - output.append('p', status); - output.append(posPower + '0', status); - output.append('-', status); - } else if (posPower <= 15) { - output.append("p1", status); - output.append('0' + (posPower % 10), status); - output.append('-', status); - } else { - status = kUnitIdentifierSyntaxError; + U_ASSERT(unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_SI && + unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_SI); + return unitPrefix - UMEASURE_PREFIX_ONE; +} + +U_CAPI int32_t U_EXPORT2 +umeas_getPrefixBase(UMeasurePrefix unitPrefix) { + if (unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_BIN && + unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_BIN) { + return 1024; } + U_ASSERT(unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_SI && + unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_SI); + return 10; +} + +CharString U_I18N_API getUnitQuantity(const MeasureUnitImpl &baseMeasureUnitImpl, UErrorCode &status) { + CharString result; + MeasureUnitImpl baseUnitImpl = baseMeasureUnitImpl.copy(status); + UErrorCode localStatus = U_ZERO_ERROR; + umtx_initOnce(gUnitExtrasInitOnce, &initUnitExtras, status); if (U_FAILURE(status)) { - return; + return result; } + BytesTrie trie(gSerializedUnitCategoriesTrie); - if (singleUnit.siPrefix != UMEASURE_SI_PREFIX_ONE) { - for (const auto& siPrefixInfo : gSIPrefixStrings) { - if (siPrefixInfo.value == singleUnit.siPrefix) { - output.append(siPrefixInfo.string, status); - break; - } - } - } + baseUnitImpl.serialize(status); + StringPiece identifier = baseUnitImpl.identifier.data(); + int32_t idx = getUnitCategoryIndex(trie, identifier, localStatus); if (U_FAILURE(status)) { - return; + return result; } - output.appendInvariantChars(gSimpleUnits[singleUnit.index], status); -} + // In case the base unit identifier did not match any entry. + if (U_FAILURE(localStatus)) { + localStatus = U_ZERO_ERROR; + baseUnitImpl.takeReciprocal(status); + baseUnitImpl.serialize(status); + identifier.set(baseUnitImpl.identifier.data()); + idx = getUnitCategoryIndex(trie, identifier, localStatus); -/** - * Normalize a MeasureUnitImpl and generate the identifier string in place. - */ -void serialize(MeasureUnitImpl& impl, UErrorCode& status) { - if (U_FAILURE(status)) { - return; - } - U_ASSERT(impl.identifier.isEmpty()); - if (impl.units.length() == 0) { - // Dimensionless, constructed by the default constructor: no appending - // to impl.identifier, we wish it to contain the zero-length string. - return; - } - if (impl.complexity == UMEASURE_UNIT_COMPOUND) { - // Note: don't sort a MIXED unit - uprv_sortArray( - impl.units.getAlias(), - impl.units.length(), - sizeof(impl.units[0]), - compareSingleUnits, - nullptr, - false, - &status); if (U_FAILURE(status)) { - return; + return result; } } - serializeSingle(*impl.units[0], true, impl.identifier, status); - if (impl.units.length() == 1) { - return; + + // In case the reciprocal of the base unit identifier did not match any entry. + MeasureUnitImpl simplifiedUnit = baseMeasureUnitImpl.copyAndSimplify(status); + if (U_FAILURE(status)) { + return result; } - for (int32_t i = 1; i < impl.units.length(); i++) { - const SingleUnitImpl& prev = *impl.units[i-1]; - const SingleUnitImpl& curr = *impl.units[i]; - if (impl.complexity == UMEASURE_UNIT_MIXED) { - impl.identifier.append("-and-", status); - serializeSingle(curr, true, impl.identifier, status); - } else { - if (prev.dimensionality > 0 && curr.dimensionality < 0) { - impl.identifier.append("-per-", status); - } else { - impl.identifier.append('-', status); - } - serializeSingle(curr, false, impl.identifier, status); + if (U_FAILURE(localStatus)) { + localStatus = U_ZERO_ERROR; + simplifiedUnit.serialize(status); + identifier.set(simplifiedUnit.identifier.data()); + idx = getUnitCategoryIndex(trie, identifier, localStatus); + + if (U_FAILURE(status)) { + return result; } } -} + // In case the simplified base unit identifier did not match any entry. + if (U_FAILURE(localStatus)) { + localStatus = U_ZERO_ERROR; + simplifiedUnit.takeReciprocal(status); + simplifiedUnit.serialize(status); + identifier.set(simplifiedUnit.identifier.data()); + idx = getUnitCategoryIndex(trie, identifier, localStatus); -/** - * Appends a SingleUnitImpl to a MeasureUnitImpl. - * - * @return true if a new item was added. If unit is the dimensionless unit, it - * is never added: the return value will always be false. - */ -bool appendImpl(MeasureUnitImpl& impl, const SingleUnitImpl& unit, UErrorCode& status) { - if (unit.isDimensionless()) { - // We don't append dimensionless units. - return false; - } - // Find a similar unit that already exists, to attempt to coalesce - SingleUnitImpl* oldUnit = nullptr; - for (int32_t i = 0; i < impl.units.length(); i++) { - auto* candidate = impl.units[i]; - if (candidate->isCompatibleWith(unit)) { - oldUnit = candidate; + if (U_FAILURE(status)) { + return result; } } - if (oldUnit) { - // Both dimensionalities will be positive, or both will be negative, by - // virtue of isCompatibleWith(). - oldUnit->dimensionality += unit.dimensionality; - } else { - SingleUnitImpl* destination = impl.units.emplaceBack(); - if (!destination) { - status = U_MEMORY_ALLOCATION_ERROR; - return false; - } - *destination = unit; + + // If there is no match at all, throw an exception. + if (U_FAILURE(localStatus)) { + status = U_INVALID_FORMAT_ERROR; + return result; } - return (oldUnit == nullptr); -} -} // namespace + if (idx < 0 || idx >= gCategoriesCount) { + status = U_INVALID_FORMAT_ERROR; + return result; + } + result.appendInvariantChars(gCategories[idx], u_strlen(gCategories[idx]), status); + return result; +} +// In ICU4J, this is MeasureUnit.getSingleUnitImpl(). SingleUnitImpl SingleUnitImpl::forMeasureUnit(const MeasureUnit& measureUnit, UErrorCode& status) { MeasureUnitImpl temp; const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(measureUnit, temp, status); if (U_FAILURE(status)) { return {}; } - if (impl.units.length() == 0) { + if (impl.singleUnits.length() == 0) { return {}; } - if (impl.units.length() == 1) { - return *impl.units[0]; + if (impl.singleUnits.length() == 1) { + return *impl.singleUnits[0]; } status = U_ILLEGAL_ARGUMENT_ERROR; return {}; @@ -773,10 +932,71 @@ SingleUnitImpl SingleUnitImpl::forMeasureUnit(const MeasureUnit& measureUnit, UE MeasureUnit SingleUnitImpl::build(UErrorCode& status) const { MeasureUnitImpl temp; - temp.append(*this, status); + temp.appendSingleUnit(*this, status); + // TODO(icu-units#28): the MeasureUnitImpl::build() method uses + // findBySubtype, which is relatively slow. + // - At the time of loading the simple unit IDs, we could also save a + // mapping to the builtin MeasureUnit type and subtype they correspond to. + // - This method could then check dimensionality and index, and if both are + // 1, directly return MeasureUnit instances very quickly. return std::move(temp).build(status); } +const char *SingleUnitImpl::getSimpleUnitID() const { + return gSimpleUnits[index]; +} + +void SingleUnitImpl::appendNeutralIdentifier(CharString &result, UErrorCode &status) const { + int32_t absPower = std::abs(this->dimensionality); + + U_ASSERT(absPower > 0); // "this function does not support the dimensionless single units"; + + if (absPower == 1) { + // no-op + } else if (absPower == 2) { + result.append(StringPiece("square-"), status); + } else if (absPower == 3) { + result.append(StringPiece("cubic-"), status); + } else if (absPower <= 15) { + result.append(StringPiece("pow"), status); + result.appendNumber(absPower, status); + result.append(StringPiece("-"), status); + } else { + status = U_ILLEGAL_ARGUMENT_ERROR; // Unit Identifier Syntax Error + return; + } + + if (U_FAILURE(status)) { + return; + } + + if (this->unitPrefix != UMEASURE_PREFIX_ONE) { + bool found = false; + for (const auto &unitPrefixInfo : gUnitPrefixStrings) { + // TODO: consider using binary search? If we do this, add a unit + // test to ensure gUnitPrefixStrings is sorted? + if (unitPrefixInfo.value == this->unitPrefix) { + result.append(unitPrefixInfo.string, status); + found = true; + break; + } + } + if (!found) { + status = U_UNSUPPORTED_ERROR; + return; + } + } + + result.append(StringPiece(this->getSimpleUnitID()), status); +} + +int32_t SingleUnitImpl::getUnitCategoryIndex() const { + return gSimpleUnitCategories[index]; +} + +MeasureUnitImpl::MeasureUnitImpl(const SingleUnitImpl &singleUnit, UErrorCode &status) { + this->appendSingleUnit(singleUnit, status); +} MeasureUnitImpl MeasureUnitImpl::forIdentifier(StringPiece identifier, UErrorCode& status) { return Parser::from(identifier, status).parse(status); @@ -803,22 +1023,165 @@ MeasureUnitImpl MeasureUnitImpl::forMeasureUnitMaybeCopy( void MeasureUnitImpl::takeReciprocal(UErrorCode& /*status*/) { identifier.clear(); - for (int32_t i = 0; i < units.length(); i++) { - units[i]->dimensionality *= -1; + for (int32_t i = 0; i < singleUnits.length(); i++) { + singleUnits[i]->dimensionality *= -1; } } -bool MeasureUnitImpl::append(const SingleUnitImpl& singleUnit, UErrorCode& status) { +MeasureUnitImpl MeasureUnitImpl::copyAndSimplify(UErrorCode &status) const { + MeasureUnitImpl result; + for (int32_t i = 0; i < singleUnits.length(); i++) { + const SingleUnitImpl &singleUnit = *this->singleUnits[i]; + + // The following `for` loop will cause time complexity to be O(n^2). + // However, n is very small (number of units, generally, at maximum equal to 10) + bool unitExist = false; + for (int32_t j = 0; j < result.singleUnits.length(); j++) { + if (uprv_strcmp(result.singleUnits[j]->getSimpleUnitID(), singleUnit.getSimpleUnitID()) == + 0 && + result.singleUnits[j]->unitPrefix == singleUnit.unitPrefix) { + unitExist = true; + result.singleUnits[j]->dimensionality = + result.singleUnits[j]->dimensionality + singleUnit.dimensionality; + break; + } + } + + if (!unitExist) { + result.appendSingleUnit(singleUnit, status); + } + } + + return result; +} + +bool MeasureUnitImpl::appendSingleUnit(const SingleUnitImpl &singleUnit, UErrorCode &status) { identifier.clear(); - return appendImpl(*this, singleUnit, status); + + if (singleUnit.isDimensionless()) { + // Do not append dimensionless units. + return false; + } + + // Find a similar unit that already exists, to attempt to coalesce + SingleUnitImpl *oldUnit = nullptr; + for (int32_t i = 0; i < this->singleUnits.length(); i++) { + auto *candidate = this->singleUnits[i]; + if (candidate->isCompatibleWith(singleUnit)) { + oldUnit = candidate; + } + } + + if (oldUnit) { + // Both dimensionalities will be positive, or both will be negative, by + // virtue of isCompatibleWith(). + oldUnit->dimensionality += singleUnit.dimensionality; + + return false; + } + + // Add a copy of singleUnit + // NOTE: MaybeStackVector::emplaceBackAndCheckErrorCode creates new copy of singleUnit. + this->singleUnits.emplaceBackAndCheckErrorCode(status, singleUnit); + if (U_FAILURE(status)) { + return false; + } + + // If the MeasureUnitImpl is `UMEASURE_UNIT_SINGLE` and after the appending a unit, the `singleUnits` + // contains more than one. thus means the complexity should be `UMEASURE_UNIT_COMPOUND` + if (this->singleUnits.length() > 1 && + this->complexity == UMeasureUnitComplexity::UMEASURE_UNIT_SINGLE) { + this->complexity = UMeasureUnitComplexity::UMEASURE_UNIT_COMPOUND; + } + + return true; +} + +MaybeStackVector<MeasureUnitImplWithIndex> +MeasureUnitImpl::extractIndividualUnitsWithIndices(UErrorCode &status) const { + MaybeStackVector<MeasureUnitImplWithIndex> result; + + if (this->complexity != UMeasureUnitComplexity::UMEASURE_UNIT_MIXED) { + result.emplaceBackAndCheckErrorCode(status, 0, *this, status); + return result; + } + + for (int32_t i = 0; i < singleUnits.length(); ++i) { + result.emplaceBackAndCheckErrorCode(status, i, *singleUnits[i], status); + if (U_FAILURE(status)) { + return result; + } + } + + return result; +} + +/** + * Normalize a MeasureUnitImpl and generate the identifier string in place. + */ +void MeasureUnitImpl::serialize(UErrorCode &status) { + if (U_FAILURE(status)) { + return; + } + + if (this->singleUnits.length() == 0) { + // Dimensionless, constructed by the default constructor. + return; + } + + if (this->complexity == UMEASURE_UNIT_COMPOUND) { + // Note: don't sort a MIXED unit + uprv_sortArray(this->singleUnits.getAlias(), this->singleUnits.length(), + sizeof(this->singleUnits[0]), compareSingleUnits, nullptr, false, &status); + if (U_FAILURE(status)) { + return; + } + } + + CharString result; + bool beforePer = true; + bool firstTimeNegativeDimension = false; + for (int32_t i = 0; i < this->singleUnits.length(); i++) { + if (beforePer && (*this->singleUnits[i]).dimensionality < 0) { + beforePer = false; + firstTimeNegativeDimension = true; + } else if ((*this->singleUnits[i]).dimensionality < 0) { + firstTimeNegativeDimension = false; + } + + if (U_FAILURE(status)) { + return; + } + + if (this->complexity == UMeasureUnitComplexity::UMEASURE_UNIT_MIXED) { + if (result.length() != 0) { + result.append(StringPiece("-and-"), status); + } + } else { + if (firstTimeNegativeDimension) { + if (result.length() == 0) { + result.append(StringPiece("per-"), status); + } else { + result.append(StringPiece("-per-"), status); + } + } else { + if (result.length() != 0) { + result.append(StringPiece("-"), status); + } + } + } + + this->singleUnits[i]->appendNeutralIdentifier(result, status); + } + + this->identifier = CharString(result, status); } MeasureUnit MeasureUnitImpl::build(UErrorCode& status) && { - serialize(*this, status); + this->serialize(status); return MeasureUnit(std::move(*this)); } - MeasureUnit MeasureUnit::forIdentifier(StringPiece identifier, UErrorCode& status) { return Parser::from(identifier, status).parse(status).build(status); } @@ -828,13 +1191,13 @@ UMeasureUnitComplexity MeasureUnit::getComplexity(UErrorCode& status) const { return MeasureUnitImpl::forMeasureUnit(*this, temp, status).complexity; } -UMeasureSIPrefix MeasureUnit::getSIPrefix(UErrorCode& status) const { - return SingleUnitImpl::forMeasureUnit(*this, status).siPrefix; +UMeasurePrefix MeasureUnit::getPrefix(UErrorCode& status) const { + return SingleUnitImpl::forMeasureUnit(*this, status).unitPrefix; } -MeasureUnit MeasureUnit::withSIPrefix(UMeasureSIPrefix prefix, UErrorCode& status) const { +MeasureUnit MeasureUnit::withPrefix(UMeasurePrefix prefix, UErrorCode& status) const { SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status); - singleUnit.siPrefix = prefix; + singleUnit.unitPrefix = prefix; return singleUnit.build(status); } @@ -867,22 +1230,26 @@ MeasureUnit MeasureUnit::product(const MeasureUnit& other, UErrorCode& status) c status = U_ILLEGAL_ARGUMENT_ERROR; return {}; } - for (int32_t i = 0; i < otherImpl.units.length(); i++) { - impl.append(*otherImpl.units[i], status); + for (int32_t i = 0; i < otherImpl.singleUnits.length(); i++) { + impl.appendSingleUnit(*otherImpl.singleUnits[i], status); } - if (impl.units.length() > 1) { + if (impl.singleUnits.length() > 1) { impl.complexity = UMEASURE_UNIT_COMPOUND; } return std::move(impl).build(status); } -LocalArray<MeasureUnit> MeasureUnit::splitToSingleUnits(int32_t& outCount, UErrorCode& status) const { +LocalArray<MeasureUnit> MeasureUnit::splitToSingleUnitsImpl(int32_t& outCount, UErrorCode& status) const { MeasureUnitImpl temp; const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(*this, temp, status); - outCount = impl.units.length(); + outCount = impl.singleUnits.length(); MeasureUnit* arr = new MeasureUnit[outCount]; + if (arr == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return LocalArray<MeasureUnit>(); + } for (int32_t i = 0; i < outCount; i++) { - arr[i] = impl.units[i]->build(status); + arr[i] = impl.singleUnits[i]->build(status); } return LocalArray<MeasureUnit>(arr, status); } diff --git a/contrib/libs/icu/i18n/measunit_impl.h b/contrib/libs/icu/i18n/measunit_impl.h index c69d243b3b..c60ff2fc33 100644 --- a/contrib/libs/icu/i18n/measunit_impl.h +++ b/contrib/libs/icu/i18n/measunit_impl.h @@ -14,15 +14,34 @@ U_NAMESPACE_BEGIN +namespace number { +namespace impl { +class LongNameHandler; +} +} // namespace number static const char16_t kDefaultCurrency[] = u"XXX"; static const char kDefaultCurrency8[] = "XXX"; +/** + * Looks up the "unitQuantity" (aka "type" or "category") of a base unit + * identifier. The category is returned via `result`, which must initially be + * empty. + * + * This only supports base units: other units must be resolved to base units + * before passing to this function, otherwise U_UNSUPPORTED_ERROR status may be + * returned. + * + * Categories are found in `unitQuantities` in the `units` resource (see + * `units.txt`). + */ +// TODO: make this function accepts any `MeasureUnit` as Java and move it to the `UnitsData` class. +CharString U_I18N_API getUnitQuantity(const MeasureUnitImpl &baseMeasureUnitImpl, UErrorCode &status); /** - * A struct representing a single unit (optional SI prefix and dimensionality). + * A struct representing a single unit (optional SI or binary prefix, and dimensionality). */ -struct SingleUnitImpl : public UMemory { +struct U_I18N_API SingleUnitImpl : public UMemory { /** * Gets a single unit from the MeasureUnit. If there are multiple single units, sets an error * code and returns the base dimensionless unit. Parses if necessary. @@ -33,9 +52,35 @@ struct SingleUnitImpl : public UMemory { MeasureUnit build(UErrorCode& status) const; /** + * Returns the "simple unit ID", without SI or dimensionality prefix: this + * instance may represent a square-kilometer, but only "meter" will be + * returned. + * + * The returned pointer points at memory that exists for the duration of the + * program's running. + */ + const char *getSimpleUnitID() const; + + /** + * Generates and append a neutral identifier string for a single unit which means we do not include + * the dimension signal. + */ + void appendNeutralIdentifier(CharString &result, UErrorCode &status) const; + + /** + * Returns the index of this unit's "quantity" in unitQuantities (in + * measunit_extra.cpp). The value of this index determines sort order for + * normalization of unit identifiers. + */ + int32_t getUnitCategoryIndex() const; + + /** * Compare this SingleUnitImpl to another SingleUnitImpl for the sake of * sorting and coalescing. * + * Sort order of units is specified by UTS #35 + * (https://unicode.org/reports/tr35/tr35-info.html#Unit_Identifier_Normalization). + * * Takes the sign of dimensionality into account, but not the absolute * value: per-meter is not considered the same as meter, but meter is * considered the same as square-meter. @@ -52,26 +97,65 @@ struct SingleUnitImpl : public UMemory { if (dimensionality > 0 && other.dimensionality < 0) { return -1; } + + // Sort by official quantity order + int32_t thisQuantity = this->getUnitCategoryIndex(); + int32_t otherQuantity = other.getUnitCategoryIndex(); + if (thisQuantity < otherQuantity) { + return -1; + } + if (thisQuantity > otherQuantity) { + return 1; + } + + // If quantity order didn't help, then we go by index. if (index < other.index) { return -1; } if (index > other.index) { return 1; } - if (siPrefix < other.siPrefix) { + + // When comparing binary prefixes vs SI prefixes, instead of comparing the actual values, we can + // multiply the binary prefix power by 3 and compare the powers. if they are equal, we can can + // compare the bases. + // NOTE: this methodology will fail if the binary prefix more than or equal 98. + int32_t unitBase = umeas_getPrefixBase(unitPrefix); + int32_t otherUnitBase = umeas_getPrefixBase(other.unitPrefix); + + // Values for comparison purposes only. + int32_t unitPower = unitBase == 1024 /* Binary Prefix */ ? umeas_getPrefixPower(unitPrefix) * 3 + : umeas_getPrefixPower(unitPrefix); + int32_t otherUnitPower = + otherUnitBase == 1024 /* Binary Prefix */ ? umeas_getPrefixPower(other.unitPrefix) * 3 + : umeas_getPrefixPower(other.unitPrefix); + + // NOTE: if the unitPower is less than the other, + // we return 1 not -1. Thus because we want th sorting order + // for the bigger prefix to be before the smaller. + // Example: megabyte should come before kilobyte. + if (unitPower < otherUnitPower) { + return 1; + } + if (unitPower > otherUnitPower) { return -1; } - if (siPrefix > other.siPrefix) { + + if (unitBase < otherUnitBase) { return 1; } + if (unitBase > otherUnitBase) { + return -1; + } + return 0; } /** * Return whether this SingleUnitImpl is compatible with another for the purpose of coalescing. * - * Units with the same base unit and SI prefix should match, except that they must also have - * the same dimensionality sign, such that we don't merge numerator and denominator. + * Units with the same base unit and SI or binary prefix should match, except that they must also + * have the same dimensionality sign, such that we don't merge numerator and denominator. */ bool isCompatibleWith(const SingleUnitImpl& other) const { return (compareTo(other) == 0); @@ -88,7 +172,8 @@ struct SingleUnitImpl : public UMemory { /** * Simple unit index, unique for every simple unit, -1 for the dimensionless - * unit. This is an index into a string list in measunit_extra.cpp. + * unit. This is an index into a string list in measunit_extra.cpp, as + * loaded by SimpleUnitIdentifiersSink. * * The default value is -1, meaning the dimensionless unit: * isDimensionless() will return true, until index is changed. @@ -96,11 +181,11 @@ struct SingleUnitImpl : public UMemory { int32_t index = -1; /** - * SI prefix. + * SI or binary prefix. * * This is ignored for the dimensionless unit. */ - UMeasureSIPrefix siPrefix = UMEASURE_SI_PREFIX_ONE; + UMeasurePrefix unitPrefix = UMEASURE_PREFIX_ONE; /** * Dimensionality. @@ -110,14 +195,34 @@ struct SingleUnitImpl : public UMemory { int32_t dimensionality = 1; }; +// Forward declaration +struct MeasureUnitImplWithIndex; + +// Export explicit template instantiations of MaybeStackArray, MemoryPool and +// MaybeStackVector. This is required when building DLLs for Windows. (See +// datefmt.h, collationiterator.h, erarules.h and others for similar examples.) +#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN +template class U_I18N_API MaybeStackArray<SingleUnitImpl *, 8>; +template class U_I18N_API MemoryPool<SingleUnitImpl, 8>; +template class U_I18N_API MaybeStackVector<SingleUnitImpl, 8>; +#endif /** * Internal representation of measurement units. Capable of representing all complexities of units, * including mixed and compound units. */ -struct MeasureUnitImpl : public UMemory { +class U_I18N_API MeasureUnitImpl : public UMemory { + public: + MeasureUnitImpl() = default; + MeasureUnitImpl(MeasureUnitImpl &&other) = default; + // No copy constructor, use MeasureUnitImpl::copy() to make it explicit. + MeasureUnitImpl(const MeasureUnitImpl &other, UErrorCode &status) = delete; + MeasureUnitImpl(const SingleUnitImpl &singleUnit, UErrorCode &status); + + MeasureUnitImpl &operator=(MeasureUnitImpl &&other) noexcept = default; + /** Extract the MeasureUnitImpl from a MeasureUnit. */ - static inline const MeasureUnitImpl* get(const MeasureUnit& measureUnit) { + static inline const MeasureUnitImpl *get(const MeasureUnit &measureUnit) { return measureUnit.fImpl; } @@ -169,43 +274,101 @@ struct MeasureUnitImpl : public UMemory { /** * Create a copy of this MeasureUnitImpl. Don't use copy constructor to make this explicit. */ - inline MeasureUnitImpl copy(UErrorCode& status) const { - MeasureUnitImpl result; - result.complexity = complexity; - result.units.appendAll(units, status); - result.identifier.append(identifier, status); - return result; - } + MeasureUnitImpl copy(UErrorCode& status) const; + + /** + * Extracts the list of all the individual units inside the `MeasureUnitImpl` with their indices. + * For example: + * - if the `MeasureUnitImpl` is `foot-per-hour` + * it will return a list of 1 {(0, `foot-per-hour`)} + * - if the `MeasureUnitImpl` is `foot-and-inch` + * it will return a list of 2 {(0, `foot`), (1, `inch`)} + */ + MaybeStackVector<MeasureUnitImplWithIndex> + extractIndividualUnitsWithIndices(UErrorCode &status) const; /** Mutates this MeasureUnitImpl to take the reciprocal. */ void takeReciprocal(UErrorCode& status); /** + * Returns a simplified version of the unit. + * NOTE: the simplification happen when there are two units equals in their base unit and their + * prefixes. + * + * Example 1: "square-meter-per-meter" --> "meter" + * Example 2: "square-millimeter-per-meter" --> "square-millimeter-per-meter" + */ + MeasureUnitImpl copyAndSimplify(UErrorCode &status) const; + + /** * Mutates this MeasureUnitImpl to append a single unit. * * @return true if a new item was added. If unit is the dimensionless unit, * it is never added: the return value will always be false. */ - bool append(const SingleUnitImpl& singleUnit, UErrorCode& status); + bool appendSingleUnit(const SingleUnitImpl& singleUnit, UErrorCode& status); + + /** + * Normalizes a MeasureUnitImpl and generate the identifier string in place. + */ + void serialize(UErrorCode &status); /** The complexity, either SINGLE, COMPOUND, or MIXED. */ UMeasureUnitComplexity complexity = UMEASURE_UNIT_SINGLE; /** - * The list of simple units. These may be summed or multiplied, based on the + * The list of single units. These may be summed or multiplied, based on the * value of the complexity field. * * The "dimensionless" unit (SingleUnitImpl default constructor) must not be * added to this list. */ - MaybeStackVector<SingleUnitImpl> units; + MaybeStackVector<SingleUnitImpl> singleUnits; /** * The full unit identifier. Owned by the MeasureUnitImpl. Empty if not computed. */ CharString identifier; + + // For calling serialize + // TODO(icu-units#147): revisit serialization + friend class number::impl::LongNameHandler; +}; + +struct U_I18N_API MeasureUnitImplWithIndex : public UMemory { + const int32_t index; + MeasureUnitImpl unitImpl; + // Makes a copy of unitImpl. + MeasureUnitImplWithIndex(int32_t index, const MeasureUnitImpl &unitImpl, UErrorCode &status) + : index(index), unitImpl(unitImpl.copy(status)) { + } + MeasureUnitImplWithIndex(int32_t index, const SingleUnitImpl &singleUnitImpl, UErrorCode &status) + : index(index), unitImpl(MeasureUnitImpl(singleUnitImpl, status)) { + } }; +// Export explicit template instantiations of MaybeStackArray, MemoryPool and +// MaybeStackVector. This is required when building DLLs for Windows. (See +// datefmt.h, collationiterator.h, erarules.h and others for similar examples.) +#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN +template class U_I18N_API MaybeStackArray<MeasureUnitImplWithIndex *, 8>; +template class U_I18N_API MemoryPool<MeasureUnitImplWithIndex, 8>; +template class U_I18N_API MaybeStackVector<MeasureUnitImplWithIndex, 8>; + +// Export an explicit template instantiation of the LocalPointer that is used as a +// data member of MeasureUnitImpl. +// (When building DLLs for Windows this is required.) +#if defined(_MSC_VER) +// Ignore warning 4661 as LocalPointerBase does not use operator== or operator!= +#pragma warning(push) +#pragma warning(disable : 4661) +#endif +template class U_I18N_API LocalPointerBase<MeasureUnitImpl>; +template class U_I18N_API LocalPointer<MeasureUnitImpl>; +#if defined(_MSC_VER) +#pragma warning(pop) +#endif +#endif U_NAMESPACE_END diff --git a/contrib/libs/icu/i18n/measure.cpp b/contrib/libs/icu/i18n/measure.cpp index bffa44215e..b9c47fd401 100644 --- a/contrib/libs/icu/i18n/measure.cpp +++ b/contrib/libs/icu/i18n/measure.cpp @@ -23,7 +23,7 @@ U_NAMESPACE_BEGIN UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Measure) -Measure::Measure() {} +Measure::Measure() : unit(nullptr) {} Measure::Measure(const Formattable& _number, MeasureUnit* adoptedUnit, UErrorCode& ec) : @@ -35,7 +35,7 @@ Measure::Measure(const Formattable& _number, MeasureUnit* adoptedUnit, } Measure::Measure(const Measure& other) : - UObject(other), unit(0) { + UObject(other), unit(nullptr) { *this = other; } @@ -43,7 +43,11 @@ Measure& Measure::operator=(const Measure& other) { if (this != &other) { delete unit; number = other.number; - unit = other.unit->clone(); + if (other.unit != nullptr) { + unit = other.unit->clone(); + } else { + unit = nullptr; + } } return *this; } @@ -56,12 +60,12 @@ Measure::~Measure() { delete unit; } -UBool Measure::operator==(const UObject& other) const { +bool Measure::operator==(const UObject& other) const { if (this == &other) { // Same object, equal - return TRUE; + return true; } if (typeid(*this) != typeid(other)) { // Different types, not equal - return FALSE; + return false; } const Measure &m = static_cast<const Measure&>(other); return number == m.number && diff --git a/contrib/libs/icu/i18n/msgfmt.cpp b/contrib/libs/icu/i18n/msgfmt.cpp index 3ca368ef95..b8cb2e2ca5 100644 --- a/contrib/libs/icu/i18n/msgfmt.cpp +++ b/contrib/libs/icu/i18n/msgfmt.cpp @@ -389,10 +389,10 @@ MessageFormat::operator=(const MessageFormat& that) return *this; } -UBool +bool MessageFormat::operator==(const Format& rhs) const { - if (this == &rhs) return TRUE; + if (this == &rhs) return true; MessageFormat& that = (MessageFormat&)rhs; @@ -400,37 +400,37 @@ MessageFormat::operator==(const Format& rhs) const if (!Format::operator==(rhs) || msgPattern != that.msgPattern || fLocale != that.fLocale) { - return FALSE; + return false; } // Compare hashtables. if ((customFormatArgStarts == NULL) != (that.customFormatArgStarts == NULL)) { - return FALSE; + return false; } if (customFormatArgStarts == NULL) { - return TRUE; + return true; } UErrorCode ec = U_ZERO_ERROR; const int32_t count = uhash_count(customFormatArgStarts); const int32_t rhs_count = uhash_count(that.customFormatArgStarts); if (count != rhs_count) { - return FALSE; + return false; } int32_t idx = 0, rhs_idx = 0, pos = UHASH_FIRST, rhs_pos = UHASH_FIRST; for (; idx < count && rhs_idx < rhs_count && U_SUCCESS(ec); ++idx, ++rhs_idx) { const UHashElement* cur = uhash_nextElement(customFormatArgStarts, &pos); const UHashElement* rhs_cur = uhash_nextElement(that.customFormatArgStarts, &rhs_pos); if (cur->key.integer != rhs_cur->key.integer) { - return FALSE; + return false; } const Format* format = (const Format*)uhash_iget(cachedFormatters, cur->key.integer); const Format* rhs_format = (const Format*)uhash_iget(that.cachedFormatters, rhs_cur->key.integer); if (*format != *rhs_format) { - return FALSE; + return false; } } - return TRUE; + return true; } // ------------------------------------- @@ -637,7 +637,7 @@ MessageFormat::adoptFormats(Format** newFormats, // ------------------------------------- // Sets the new formats array and updates the array count. -// This MessageFormat instance maks a copy of the new formats. +// This MessageFormat instance makes a copy of the new formats. void MessageFormat::setFormats(const Format** newFormats, @@ -862,7 +862,7 @@ MessageFormat::getFormatNames(UErrorCode& status) { fFormatNames->setDeleter(uprv_deleteUObject); for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) { - fFormatNames->addElement(new UnicodeString(getArgName(partIndex + 1)), status); + fFormatNames->addElementX(new UnicodeString(getArgName(partIndex + 1)), status); } StringEnumeration* nameEnumerator = new FormatNameEnumeration(fFormatNames, status); @@ -1107,7 +1107,7 @@ void MessageFormat::format(int32_t msgStart, const void *plNumber, } else if (argType == UMSGPAT_ARG_TYPE_NONE || (cachedFormatters && uhash_iget(cachedFormatters, i - 2))) { // We arrive here if getCachedFormatter returned NULL, but there was actually an element in the hash table. // This can only happen if the hash table contained a DummyFormat, so the if statement above is a check - // for the hash table containind DummyFormat. + // for the hash table containing DummyFormat. if (arg->isNumeric()) { const NumberFormat* nf = getDefaultNumberFormat(success); appendTo.formatAndAppend(nf, *arg, success); @@ -1440,7 +1440,7 @@ MessageFormat::parse(int32_t msgStart, argType==UMSGPAT_ARG_TYPE_NONE || (cachedFormatters && uhash_iget(cachedFormatters, i -2))) { // We arrive here if getCachedFormatter returned NULL, but there was actually an element in the hash table. // This can only happen if the hash table contained a DummyFormat, so the if statement above is a check - // for the hash table containind DummyFormat. + // for the hash table containing DummyFormat. // Match as a string. // if at end, use longest possible match @@ -1869,8 +1869,8 @@ UBool MessageFormat::equalFormats(const void* left, const void* right) { } -UBool MessageFormat::DummyFormat::operator==(const Format&) const { - return TRUE; +bool MessageFormat::DummyFormat::operator==(const Format&) const { + return true; } MessageFormat::DummyFormat* MessageFormat::DummyFormat::clone() const { diff --git a/contrib/libs/icu/i18n/msgfmt_impl.h b/contrib/libs/icu/i18n/msgfmt_impl.h index 1cece1a094..5798838913 100644 --- a/contrib/libs/icu/i18n/msgfmt_impl.h +++ b/contrib/libs/icu/i18n/msgfmt_impl.h @@ -29,10 +29,10 @@ public: FormatNameEnumeration(UVector *fFormatNames, UErrorCode& status); virtual ~FormatNameEnumeration(); static UClassID U_EXPORT2 getStaticClassID(void); - virtual UClassID getDynamicClassID(void) const; - virtual const UnicodeString* snext(UErrorCode& status); - virtual void reset(UErrorCode& status); - virtual int32_t count(UErrorCode& status) const; + virtual UClassID getDynamicClassID(void) const override; + virtual const UnicodeString* snext(UErrorCode& status) override; + virtual void reset(UErrorCode& status) override; + virtual int32_t count(UErrorCode& status) const override; private: int32_t pos; UVector *fFormatNames; diff --git a/contrib/libs/icu/i18n/name2uni.cpp b/contrib/libs/icu/i18n/name2uni.cpp index dcf8d852e2..ffbbf152d3 100644 --- a/contrib/libs/icu/i18n/name2uni.cpp +++ b/contrib/libs/icu/i18n/name2uni.cpp @@ -119,7 +119,7 @@ void NameUnicodeTransliterator::handleTransliterate(Replaceable& text, UTransPos return; } - // Accomodate the longest possible name + // Accommodate the longest possible name ++maxLen; // allow for temporary trailing space char* cbuf = (char*) uprv_malloc(maxLen); if (cbuf == NULL) { diff --git a/contrib/libs/icu/i18n/name2uni.h b/contrib/libs/icu/i18n/name2uni.h index 44ad85fb82..6881c6bc85 100644 --- a/contrib/libs/icu/i18n/name2uni.h +++ b/contrib/libs/icu/i18n/name2uni.h @@ -49,12 +49,12 @@ public: * Transliterator API. * @return A copy of the object. */ - virtual NameUnicodeTransliterator* clone() const; + virtual NameUnicodeTransliterator* clone() const override; /** * ICU "poor man's RTTI", returns a UClassID for the actual class. */ - virtual UClassID getDynamicClassID() const; + virtual UClassID getDynamicClassID() const override; /** * ICU "poor man's RTTI", returns a UClassID for this class. @@ -73,7 +73,7 @@ public: * pos.contextLimit. Otherwise, assume the text is complete. */ virtual void handleTransliterate(Replaceable& text, UTransPosition& offset, - UBool isIncremental) const; + UBool isIncremental) const override; /** * Set of characters which occur in Unicode character names. diff --git a/contrib/libs/icu/i18n/nfrs.cpp b/contrib/libs/icu/i18n/nfrs.cpp index e7b17b46c3..df04e33e04 100644 --- a/contrib/libs/icu/i18n/nfrs.cpp +++ b/contrib/libs/icu/i18n/nfrs.cpp @@ -37,7 +37,7 @@ enum { /** 0.x */ PROPER_FRACTION_RULE_INDEX = 2, /** x.0 */ - MASTER_RULE_INDEX = 3, + DEFAULT_RULE_INDEX = 3, /** Inf */ INFINITY_RULE_INDEX = 4, /** NaN */ @@ -231,7 +231,7 @@ NFRuleSet::parseRules(UnicodeString& description, UErrorCode& status) // (this isn't a for loop because we might be deleting items from // the vector-- we want to make sure we only increment i when - // we _didn't_ delete aything from the vector) + // we _didn't_ delete anything from the vector) int32_t rulesSize = rules.size(); for (int32_t i = 0; i < rulesSize; i++) { NFRule* rule = rules[i]; @@ -278,8 +278,8 @@ void NFRuleSet::setNonNumericalRule(NFRule *rule) { else if (baseValue == NFRule::kProperFractionRule) { setBestFractionRule(PROPER_FRACTION_RULE_INDEX, rule, TRUE); } - else if (baseValue == NFRule::kMasterRule) { - setBestFractionRule(MASTER_RULE_INDEX, rule, TRUE); + else if (baseValue == NFRule::kDefaultRule) { + setBestFractionRule(DEFAULT_RULE_INDEX, rule, TRUE); } else if (baseValue == NFRule::kInfinityRule) { delete nonNumericalRules[INFINITY_RULE_INDEX]; @@ -323,7 +323,7 @@ NFRuleSet::~NFRuleSet() for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) { if (i != IMPROPER_FRACTION_RULE_INDEX && i != PROPER_FRACTION_RULE_INDEX - && i != MASTER_RULE_INDEX) + && i != DEFAULT_RULE_INDEX) { delete nonNumericalRules[i]; } @@ -344,7 +344,7 @@ util_equalRules(const NFRule* rule1, const NFRule* rule2) return FALSE; } -UBool +bool NFRuleSet::operator==(const NFRuleSet& rhs) const { if (rules.size() == rhs.rules.size() && @@ -354,19 +354,19 @@ NFRuleSet::operator==(const NFRuleSet& rhs) const // ...then compare the non-numerical rule lists... for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) { if (!util_equalRules(nonNumericalRules[i], rhs.nonNumericalRules[i])) { - return FALSE; + return false; } } // ...then compare the rule lists... for (uint32_t i = 0; i < rules.size(); ++i) { if (*rules[i] != *rhs.rules[i]) { - return FALSE; + return false; } } - return TRUE; + return true; } - return FALSE; + return false; } void @@ -375,7 +375,7 @@ NFRuleSet::setDecimalFormatSymbols(const DecimalFormatSymbols &newSymbols, UErro rules[i]->setDecimalFormatSymbols(newSymbols, status); } // Switch the fraction rules to mirror the DecimalFormatSymbols. - for (int32_t nonNumericalIdx = IMPROPER_FRACTION_RULE_INDEX; nonNumericalIdx <= MASTER_RULE_INDEX; nonNumericalIdx++) { + for (int32_t nonNumericalIdx = IMPROPER_FRACTION_RULE_INDEX; nonNumericalIdx <= DEFAULT_RULE_INDEX; nonNumericalIdx++) { if (nonNumericalRules[nonNumericalIdx]) { for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) { NFRule *fractionRule = fractionRules[fIdx]; @@ -472,9 +472,9 @@ NFRuleSet::findDoubleRule(double number) const } } - // if there's a master rule, use it to format the number - if (nonNumericalRules[MASTER_RULE_INDEX]) { - return nonNumericalRules[MASTER_RULE_INDEX]; + // if there's a default rule, use it to format the number + if (nonNumericalRules[DEFAULT_RULE_INDEX]) { + return nonNumericalRules[DEFAULT_RULE_INDEX]; } // and if we haven't yet returned a rule, use findNormalRule() @@ -507,13 +507,13 @@ NFRuleSet::findNormalRule(int64_t number) const // do them in findRule(), because the version of format() that // takes a long bypasses findRule() and goes straight to this // function. This function does skip the fraction rules since - // we know the value is an integer (it also skips the master + // we know the value is an integer (it also skips the default // rule, since it's considered a fraction rule. Skipping the - // master rule in this function is also how we avoid infinite + // default rule in this function is also how we avoid infinite // recursion) // {dlf} unfortunately this fails if there are no rules except - // special rules. If there are no rules, use the master rule. + // special rules. If there are no rules, use the default rule. // binary-search the rule list for the applicable rule // (a rule is used for all values from its base value to @@ -553,8 +553,8 @@ NFRuleSet::findNormalRule(int64_t number) const } return result; } - // else use the master rule - return nonNumericalRules[MASTER_RULE_INDEX]; + // else use the default rule + return nonNumericalRules[DEFAULT_RULE_INDEX]; } /** @@ -630,7 +630,7 @@ NFRuleSet::findFractionRuleSetRule(double number) const // value, then the first one (the one we found above) is used if // the numerator of the fraction is 1 and the second one is used if // the numerator of the fraction is anything else (this lets us - // do things like "one third"/"two thirds" without haveing to define + // do things like "one third"/"two thirds" without having to define // a whole bunch of extra rule sets) if ((unsigned)(winner + 1) < rules.size() && rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) { @@ -647,7 +647,7 @@ NFRuleSet::findFractionRuleSetRule(double number) const /** * Parses a string. Matches the string to be parsed against each * of its rules (with a base value less than upperBound) and returns - * the value produced by the rule that matched the most charcters + * the value produced by the rule that matched the most characters * in the source string. * @param text The string to parse * @param parsePosition The initial position is ignored and assumed @@ -762,7 +762,7 @@ NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBoun #ifdef RBNF_DEBUG fprintf(stderr, "<nfrs> exit\n"); #endif - // finally, update the parse postion we were passed to point to the + // finally, update the parse position we were passed to point to the // first character we didn't use, and return the result that // corresponds to that string of characters pos = highWaterMark; @@ -792,7 +792,7 @@ NFRuleSet::appendRules(UnicodeString& result) const if (nonNumericalRules[i]) { if (rule->getBaseValue() == NFRule::kImproperFractionRule || rule->getBaseValue() == NFRule::kProperFractionRule - || rule->getBaseValue() == NFRule::kMasterRule) + || rule->getBaseValue() == NFRule::kDefaultRule) { for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) { NFRule *fractionRule = fractionRules[fIdx]; diff --git a/contrib/libs/icu/i18n/nfrs.h b/contrib/libs/icu/i18n/nfrs.h index db03c9039d..a6ad3a3bb7 100644 --- a/contrib/libs/icu/i18n/nfrs.h +++ b/contrib/libs/icu/i18n/nfrs.h @@ -36,12 +36,12 @@ public: void parseRules(UnicodeString& rules, UErrorCode& status); void setNonNumericalRule(NFRule *rule); void setBestFractionRule(int32_t originalIndex, NFRule *newRule, UBool rememberRule); - void makeIntoFractionRuleSet() { fIsFractionRuleSet = TRUE; } + void makeIntoFractionRuleSet() { fIsFractionRuleSet = true; } ~NFRuleSet(); - UBool operator==(const NFRuleSet& rhs) const; - UBool operator!=(const NFRuleSet& rhs) const { return !operator==(rhs); } + bool operator==(const NFRuleSet& rhs) const; + bool operator!=(const NFRuleSet& rhs) const { return !operator==(rhs); } UBool isPublic() const { return fIsPublic; } @@ -93,11 +93,11 @@ int64_t util64_fromDouble(double d); uint64_t util64_pow(uint32_t radix, uint16_t exponent); // convert n to digit string in buffer, return length of string -uint32_t util64_tou(int64_t n, UChar* buffer, uint32_t buflen, uint32_t radix = 10, UBool raw = FALSE); +uint32_t util64_tou(int64_t n, UChar* buffer, uint32_t buflen, uint32_t radix = 10, UBool raw = false); #ifdef RBNF_DEBUG int64_t util64_utoi(const UChar* str, uint32_t radix = 10); -uint32_t util64_toa(int64_t n, char* buffer, uint32_t buflen, uint32_t radix = 10, UBool raw = FALSE); +uint32_t util64_toa(int64_t n, char* buffer, uint32_t buflen, uint32_t radix = 10, UBool raw = false); int64_t util64_atoi(const char* str, uint32_t radix); #endif diff --git a/contrib/libs/icu/i18n/nfrule.cpp b/contrib/libs/icu/i18n/nfrule.cpp index 3ad0291649..4bb0785127 100644 --- a/contrib/libs/icu/i18n/nfrule.cpp +++ b/contrib/libs/icu/i18n/nfrule.cpp @@ -115,7 +115,7 @@ NFRule::makeRules(UnicodeString& description, // we know we're making at least one rule, so go ahead and // new it up and initialize its basevalue and divisor // (this also strips the rule descriptor, if any, off the - // descripton string) + // description string) NFRule* rule1 = new NFRule(rbnf, description, status); /* test for NULL */ if (rule1 == 0) { @@ -153,7 +153,7 @@ NFRule::makeRules(UnicodeString& description, if ((rule1->baseValue > 0 && (rule1->baseValue % util64_pow(rule1->radix, rule1->exponent)) == 0) || rule1->getType() == kImproperFractionRule - || rule1->getType() == kMasterRule) { + || rule1->getType() == kDefaultRule) { // if it passes that test, new up the second rule. If the // rule set both rules will belong to is a fraction rule @@ -181,9 +181,9 @@ NFRule::makeRules(UnicodeString& description, } // if the description began with "x.0" and contains bracketed - // text, it describes both the master rule and the + // text, it describes both the default rule and the // improper fraction rule - else if (rule1->getType() == kMasterRule) { + else if (rule1->getType() == kDefaultRule) { rule2->baseValue = rule1->baseValue; rule1->setType(kImproperFractionRule); } @@ -193,7 +193,7 @@ NFRule::makeRules(UnicodeString& description, rule2->radix = rule1->radix; rule2->exponent = rule1->exponent; - // rule2's rule text omits the stuff in brackets: initalize + // rule2's rule text omits the stuff in brackets: initialize // its rule text and substitutions accordingly sbuf.append(description, 0, brack1); if (brack2 + 1 < description.length()) { @@ -376,7 +376,7 @@ NFRule::parseRuleDescriptor(UnicodeString& description, UErrorCode& status) decimalPoint = descriptor.charAt(1); } else if (firstChar == gX && lastChar == gZero) { - setBaseValue(kMasterRule, status); + setBaseValue(kDefaultRule, status); decimalPoint = descriptor.charAt(1); } else if (descriptor.compare(gNaN, 3) == 0) { @@ -631,7 +631,7 @@ util_equalSubstitutions(const NFSubstitution* sub1, const NFSubstitution* sub2) * @param that The rule to compare this one against * @return True is the two rules are functionally equivalent */ -UBool +bool NFRule::operator==(const NFRule& rhs) const { return baseValue == rhs.baseValue @@ -663,7 +663,7 @@ NFRule::_appendRuleText(UnicodeString& result) const case kNegativeNumberRule: result.append(gMinusX, 2); break; case kImproperFractionRule: result.append(gX).append(decimalPoint == 0 ? gDot : decimalPoint).append(gX); break; case kProperFractionRule: result.append(gZero).append(decimalPoint == 0 ? gDot : decimalPoint).append(gX); break; - case kMasterRule: result.append(gX).append(decimalPoint == 0 ? gDot : decimalPoint).append(gZero); break; + case kDefaultRule: result.append(gX).append(decimalPoint == 0 ? gDot : decimalPoint).append(gZero); break; case kInfinityRule: result.append(gInf, 3); break; case kNaNRule: result.append(gNaN, 3); break; default: @@ -1297,6 +1297,10 @@ NFRule::prefixLength(const UnicodeString& str, const UnicodeString& prefix, UErr #if !UCONFIG_NO_COLLATION // go through all this grief if we're in lenient-parse mode if (formatter->isLenient()) { + // Check if non-lenient rule finds the text before call lenient parsing + if (str.startsWith(prefix)) { + return prefix.length(); + } // get the formatter's collator and use it to create two // collation element iterators, one over the target string // and another over the prefix (right now, we'll throw an @@ -1505,9 +1509,15 @@ NFRule::findText(const UnicodeString& str, return str.indexOf(key, startingAt); } else { - // but if lenient parsing is turned ON, we've got some work - // ahead of us - return findTextLenient(str, key, startingAt, length); + // Check if non-lenient rule finds the text before call lenient parsing + *length = key.length(); + int32_t pos = str.indexOf(key, startingAt); + if(pos >= 0) { + return pos; + } else { + // but if lenient parsing is turned ON, we've got some work ahead of us + return findTextLenient(str, key, startingAt, length); + } } } @@ -1523,7 +1533,7 @@ NFRule::findTextLenient(const UnicodeString& str, // in JDK 1.2, CollationElementIterator provides us with an // API to map between character offsets and collation elements // and we can do this by marching through the string comparing - // collation elements. We can't do that in JDK 1.1. Insted, + // collation elements. We can't do that in JDK 1.1. Instead, // we have to go through this horrible slow mess: int32_t p = startingAt; int32_t keyLen = 0; diff --git a/contrib/libs/icu/i18n/nfrule.h b/contrib/libs/icu/i18n/nfrule.h index ed33eaa5af..5e615e485c 100644 --- a/contrib/libs/icu/i18n/nfrule.h +++ b/contrib/libs/icu/i18n/nfrule.h @@ -38,7 +38,7 @@ public: kNegativeNumberRule = -1, kImproperFractionRule = -2, kProperFractionRule = -3, - kMasterRule = -4, + kDefaultRule = -4, kInfinityRule = -5, kNaNRule = -6, kOtherRule = -7 @@ -54,8 +54,8 @@ public: NFRule(const RuleBasedNumberFormat* rbnf, const UnicodeString &ruleText, UErrorCode &status); ~NFRule(); - UBool operator==(const NFRule& rhs) const; - UBool operator!=(const NFRule& rhs) const { return !operator==(rhs); } + bool operator==(const NFRule& rhs) const; + bool operator!=(const NFRule& rhs) const { return !operator==(rhs); } ERuleType getType() const { return (ERuleType)(baseValue <= kNoBase ? (ERuleType)baseValue : kOtherRule); } void setType(ERuleType ruleType) { baseValue = (int32_t)ruleType; } diff --git a/contrib/libs/icu/i18n/nfsubs.cpp b/contrib/libs/icu/i18n/nfsubs.cpp index 208543d1ac..70ac18be46 100644 --- a/contrib/libs/icu/i18n/nfsubs.cpp +++ b/contrib/libs/icu/i18n/nfsubs.cpp @@ -58,15 +58,15 @@ public: UErrorCode& status); virtual ~SameValueSubstitution(); - virtual int64_t transformNumber(int64_t number) const { return number; } - virtual double transformNumber(double number) const { return number; } - virtual double composeRuleValue(double newRuleValue, double /*oldRuleValue*/) const { return newRuleValue; } - virtual double calcUpperBound(double oldUpperBound) const { return oldUpperBound; } - virtual UChar tokenChar() const { return (UChar)0x003d; } // '=' + virtual int64_t transformNumber(int64_t number) const override { return number; } + virtual double transformNumber(double number) const override { return number; } + virtual double composeRuleValue(double newRuleValue, double /*oldRuleValue*/) const override { return newRuleValue; } + virtual double calcUpperBound(double oldUpperBound) const override { return oldUpperBound; } + virtual UChar tokenChar() const override { return (UChar)0x003d; } // '=' public: static UClassID getStaticClassID(void); - virtual UClassID getDynamicClassID(void) const; + virtual UClassID getDynamicClassID(void) const override; }; SameValueSubstitution::~SameValueSubstitution() {} @@ -88,7 +88,7 @@ public: } virtual ~MultiplierSubstitution(); - virtual void setDivisor(int32_t radix, int16_t exponent, UErrorCode& status) { + virtual void setDivisor(int32_t radix, int16_t exponent, UErrorCode& status) override { divisor = util64_pow(radix, exponent); if(divisor == 0) { @@ -96,13 +96,13 @@ public: } } - virtual UBool operator==(const NFSubstitution& rhs) const; + virtual bool operator==(const NFSubstitution& rhs) const override; - virtual int64_t transformNumber(int64_t number) const { + virtual int64_t transformNumber(int64_t number) const override { return number / divisor; } - virtual double transformNumber(double number) const { + virtual double transformNumber(double number) const override { if (getRuleSet()) { return uprv_floor(number / divisor); } else { @@ -110,17 +110,17 @@ public: } } - virtual double composeRuleValue(double newRuleValue, double /*oldRuleValue*/) const { + virtual double composeRuleValue(double newRuleValue, double /*oldRuleValue*/) const override { return newRuleValue * divisor; } - virtual double calcUpperBound(double /*oldUpperBound*/) const { return static_cast<double>(divisor); } + virtual double calcUpperBound(double /*oldUpperBound*/) const override { return static_cast<double>(divisor); } - virtual UChar tokenChar() const { return (UChar)0x003c; } // '<' + virtual UChar tokenChar() const override { return (UChar)0x003c; } // '<' public: static UClassID getStaticClassID(void); - virtual UClassID getDynamicClassID(void) const; + virtual UClassID getDynamicClassID(void) const override; }; MultiplierSubstitution::~MultiplierSubstitution() {} @@ -137,7 +137,7 @@ public: UErrorCode& status); virtual ~ModulusSubstitution(); - virtual void setDivisor(int32_t radix, int16_t exponent, UErrorCode& status) { + virtual void setDivisor(int32_t radix, int16_t exponent, UErrorCode& status) override { divisor = util64_pow(radix, exponent); if (divisor == 0) { @@ -145,13 +145,13 @@ public: } } - virtual UBool operator==(const NFSubstitution& rhs) const; + virtual bool operator==(const NFSubstitution& rhs) const override; - virtual void doSubstitution(int64_t number, UnicodeString& toInsertInto, int32_t pos, int32_t recursionCount, UErrorCode& status) const; - virtual void doSubstitution(double number, UnicodeString& toInsertInto, int32_t pos, int32_t recursionCount, UErrorCode& status) const; + virtual void doSubstitution(int64_t number, UnicodeString& toInsertInto, int32_t pos, int32_t recursionCount, UErrorCode& status) const override; + virtual void doSubstitution(double number, UnicodeString& toInsertInto, int32_t pos, int32_t recursionCount, UErrorCode& status) const override; - virtual int64_t transformNumber(int64_t number) const { return number % divisor; } - virtual double transformNumber(double number) const { return uprv_fmod(number, static_cast<double>(divisor)); } + virtual int64_t transformNumber(int64_t number) const override { return number % divisor; } + virtual double transformNumber(double number) const override { return uprv_fmod(number, static_cast<double>(divisor)); } virtual UBool doParse(const UnicodeString& text, ParsePosition& parsePosition, @@ -159,23 +159,23 @@ public: double upperBound, UBool lenientParse, uint32_t nonNumericalExecutedRuleMask, - Formattable& result) const; + Formattable& result) const override; - virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const { + virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const override { return oldRuleValue - uprv_fmod(oldRuleValue, static_cast<double>(divisor)) + newRuleValue; } - virtual double calcUpperBound(double /*oldUpperBound*/) const { return static_cast<double>(divisor); } + virtual double calcUpperBound(double /*oldUpperBound*/) const override { return static_cast<double>(divisor); } - virtual UBool isModulusSubstitution() const { return TRUE; } + virtual UBool isModulusSubstitution() const override { return TRUE; } - virtual UChar tokenChar() const { return (UChar)0x003e; } // '>' + virtual UChar tokenChar() const override { return (UChar)0x003e; } // '>' - virtual void toString(UnicodeString& result) const; + virtual void toString(UnicodeString& result) const override; public: static UClassID getStaticClassID(void); - virtual UClassID getDynamicClassID(void) const; + virtual UClassID getDynamicClassID(void) const override; }; ModulusSubstitution::~ModulusSubstitution() {} @@ -189,15 +189,15 @@ public: : NFSubstitution(_pos, _ruleSet, description, status) {} virtual ~IntegralPartSubstitution(); - virtual int64_t transformNumber(int64_t number) const { return number; } - virtual double transformNumber(double number) const { return uprv_floor(number); } - virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const { return newRuleValue + oldRuleValue; } - virtual double calcUpperBound(double /*oldUpperBound*/) const { return DBL_MAX; } - virtual UChar tokenChar() const { return (UChar)0x003c; } // '<' + virtual int64_t transformNumber(int64_t number) const override { return number; } + virtual double transformNumber(double number) const override { return uprv_floor(number); } + virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const override { return newRuleValue + oldRuleValue; } + virtual double calcUpperBound(double /*oldUpperBound*/) const override { return DBL_MAX; } + virtual UChar tokenChar() const override { return (UChar)0x003c; } // '<' public: static UClassID getStaticClassID(void); - virtual UClassID getDynamicClassID(void) const; + virtual UClassID getDynamicClassID(void) const override; }; IntegralPartSubstitution::~IntegralPartSubstitution() {} @@ -213,12 +213,12 @@ public: UErrorCode& status); virtual ~FractionalPartSubstitution(); - virtual UBool operator==(const NFSubstitution& rhs) const; + virtual bool operator==(const NFSubstitution& rhs) const override; - virtual void doSubstitution(double number, UnicodeString& toInsertInto, int32_t pos, int32_t recursionCount, UErrorCode& status) const; - virtual void doSubstitution(int64_t /*number*/, UnicodeString& /*toInsertInto*/, int32_t /*_pos*/, int32_t /*recursionCount*/, UErrorCode& /*status*/) const {} - virtual int64_t transformNumber(int64_t /*number*/) const { return 0; } - virtual double transformNumber(double number) const { return number - uprv_floor(number); } + virtual void doSubstitution(double number, UnicodeString& toInsertInto, int32_t pos, int32_t recursionCount, UErrorCode& status) const override; + virtual void doSubstitution(int64_t /*number*/, UnicodeString& /*toInsertInto*/, int32_t /*_pos*/, int32_t /*recursionCount*/, UErrorCode& /*status*/) const override {} + virtual int64_t transformNumber(int64_t /*number*/) const override { return 0; } + virtual double transformNumber(double number) const override { return number - uprv_floor(number); } virtual UBool doParse(const UnicodeString& text, ParsePosition& parsePosition, @@ -226,15 +226,15 @@ public: double upperBound, UBool lenientParse, uint32_t nonNumericalExecutedRuleMask, - Formattable& result) const; + Formattable& result) const override; - virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const { return newRuleValue + oldRuleValue; } - virtual double calcUpperBound(double /*oldUpperBound*/) const { return 0.0; } - virtual UChar tokenChar() const { return (UChar)0x003e; } // '>' + virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const override { return newRuleValue + oldRuleValue; } + virtual double calcUpperBound(double /*oldUpperBound*/) const override { return 0.0; } + virtual UChar tokenChar() const override { return (UChar)0x003e; } // '>' public: static UClassID getStaticClassID(void); - virtual UClassID getDynamicClassID(void) const; + virtual UClassID getDynamicClassID(void) const override; }; FractionalPartSubstitution::~FractionalPartSubstitution() {} @@ -248,15 +248,15 @@ public: : NFSubstitution(_pos, _ruleSet, description, status) {} virtual ~AbsoluteValueSubstitution(); - virtual int64_t transformNumber(int64_t number) const { return number >= 0 ? number : -number; } - virtual double transformNumber(double number) const { return uprv_fabs(number); } - virtual double composeRuleValue(double newRuleValue, double /*oldRuleValue*/) const { return -newRuleValue; } - virtual double calcUpperBound(double /*oldUpperBound*/) const { return DBL_MAX; } - virtual UChar tokenChar() const { return (UChar)0x003e; } // '>' + virtual int64_t transformNumber(int64_t number) const override { return number >= 0 ? number : -number; } + virtual double transformNumber(double number) const override { return uprv_fabs(number); } + virtual double composeRuleValue(double newRuleValue, double /*oldRuleValue*/) const override { return -newRuleValue; } + virtual double calcUpperBound(double /*oldUpperBound*/) const override { return DBL_MAX; } + virtual UChar tokenChar() const override { return (UChar)0x003e; } // '>' public: static UClassID getStaticClassID(void); - virtual UClassID getDynamicClassID(void) const; + virtual UClassID getDynamicClassID(void) const override; }; AbsoluteValueSubstitution::~AbsoluteValueSubstitution() {} @@ -285,30 +285,30 @@ public: } virtual ~NumeratorSubstitution(); - virtual UBool operator==(const NFSubstitution& rhs) const; + virtual bool operator==(const NFSubstitution& rhs) const override; - virtual int64_t transformNumber(int64_t number) const { return number * ldenominator; } - virtual double transformNumber(double number) const { return uprv_round(number * denominator); } + virtual int64_t transformNumber(int64_t number) const override { return number * ldenominator; } + virtual double transformNumber(double number) const override { return uprv_round(number * denominator); } - virtual void doSubstitution(int64_t /*number*/, UnicodeString& /*toInsertInto*/, int32_t /*_pos*/, int32_t /*recursionCount*/, UErrorCode& /*status*/) const {} - virtual void doSubstitution(double number, UnicodeString& toInsertInto, int32_t pos, int32_t recursionCount, UErrorCode& status) const; + virtual void doSubstitution(int64_t /*number*/, UnicodeString& /*toInsertInto*/, int32_t /*_pos*/, int32_t /*recursionCount*/, UErrorCode& /*status*/) const override {} + virtual void doSubstitution(double number, UnicodeString& toInsertInto, int32_t pos, int32_t recursionCount, UErrorCode& status) const override; virtual UBool doParse(const UnicodeString& text, ParsePosition& parsePosition, double baseValue, double upperBound, UBool /*lenientParse*/, uint32_t nonNumericalExecutedRuleMask, - Formattable& result) const; + Formattable& result) const override; - virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const { return newRuleValue / oldRuleValue; } - virtual double calcUpperBound(double /*oldUpperBound*/) const { return denominator; } - virtual UChar tokenChar() const { return (UChar)0x003c; } // '<' + virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const override { return newRuleValue / oldRuleValue; } + virtual double calcUpperBound(double /*oldUpperBound*/) const override { return denominator; } + virtual UChar tokenChar() const override { return (UChar)0x003c; } // '<' private: static const UChar LTLT[2]; public: static UClassID getStaticClassID(void); - virtual UClassID getDynamicClassID(void) const; + virtual UClassID getDynamicClassID(void) const override; }; NumeratorSubstitution::~NumeratorSubstitution() {} @@ -342,7 +342,7 @@ NFSubstitution::makeSubstitution(int32_t pos, // IntegralPartSubstitution else if (rule->getBaseValue() == NFRule::kImproperFractionRule || rule->getBaseValue() == NFRule::kProperFractionRule - || rule->getBaseValue() == NFRule::kMasterRule) { + || rule->getBaseValue() == NFRule::kDefaultRule) { return new IntegralPartSubstitution(pos, ruleSet, description, status); } @@ -371,7 +371,7 @@ NFSubstitution::makeSubstitution(int32_t pos, // FractionalPartSubstitution else if (rule->getBaseValue() == NFRule::kImproperFractionRule || rule->getBaseValue() == NFRule::kProperFractionRule - || rule->getBaseValue() == NFRule::kMasterRule) { + || rule->getBaseValue() == NFRule::kDefaultRule) { return new FractionalPartSubstitution(pos, ruleSet, description, status); } @@ -515,7 +515,7 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(NFSubstitution) * @param The substitution to compare this one to * @return true if the two substitutions are functionally equivalent */ -UBool +bool NFSubstitution::operator==(const NFSubstitution& rhs) const { // compare class and all of the fields all substitutions have @@ -810,7 +810,7 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SameValueSubstitution) UOBJECT_DEFINE_RTTI_IMPLEMENTATION(MultiplierSubstitution) -UBool MultiplierSubstitution::operator==(const NFSubstitution& rhs) const +bool MultiplierSubstitution::operator==(const NFSubstitution& rhs) const { return NFSubstitution::operator==(rhs) && divisor == ((const MultiplierSubstitution*)&rhs)->divisor; @@ -845,7 +845,7 @@ ModulusSubstitution::ModulusSubstitution(int32_t _pos, } if (0 == description.compare(gGreaterGreaterGreaterThan, 3)) { - // the >>> token doesn't alter how this substituion calculates the + // the >>> token doesn't alter how this substitution calculates the // values it uses for formatting and parsing, but it changes // what's done with that value after it's obtained: >>> short- // circuits the rule-search process and goes straight to the @@ -856,7 +856,7 @@ ModulusSubstitution::ModulusSubstitution(int32_t _pos, UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ModulusSubstitution) -UBool ModulusSubstitution::operator==(const NFSubstitution& rhs) const +bool ModulusSubstitution::operator==(const NFSubstitution& rhs) const { return NFSubstitution::operator==(rhs) && divisor == ((const ModulusSubstitution*)&rhs)->divisor && @@ -1195,7 +1195,7 @@ FractionalPartSubstitution::doParse(const UnicodeString& text, } } -UBool +bool FractionalPartSubstitution::operator==(const NFSubstitution& rhs) const { return NFSubstitution::operator==(rhs) && @@ -1327,7 +1327,7 @@ NumeratorSubstitution::doParse(const UnicodeString& text, return TRUE; } -UBool +bool NumeratorSubstitution::operator==(const NFSubstitution& rhs) const { return NFSubstitution::operator==(rhs) && diff --git a/contrib/libs/icu/i18n/nfsubs.h b/contrib/libs/icu/i18n/nfsubs.h index 948627c0cc..0abd98be09 100644 --- a/contrib/libs/icu/i18n/nfsubs.h +++ b/contrib/libs/icu/i18n/nfsubs.h @@ -74,7 +74,7 @@ public: * @param rhs the object to be compared with. * @return true if the given Format objects are semantically equal. */ - virtual UBool operator==(const NFSubstitution& rhs) const; + virtual bool operator==(const NFSubstitution& rhs) const; /** * Return true if the given Format objects are semantically unequal. @@ -82,7 +82,7 @@ public: * @param rhs the object to be compared with. * @return true if the given Format objects are semantically unequal. */ - UBool operator!=(const NFSubstitution& rhs) const { return !operator==(rhs); } + bool operator!=(const NFSubstitution& rhs) const { return !operator==(rhs); } /** * Sets the substitution's divisor. Used by NFRule.setBaseValue(). @@ -250,7 +250,7 @@ private: public: static UClassID getStaticClassID(void); - virtual UClassID getDynamicClassID(void) const; + virtual UClassID getDynamicClassID(void) const override; }; U_NAMESPACE_END diff --git a/contrib/libs/icu/i18n/nortrans.h b/contrib/libs/icu/i18n/nortrans.h index 198ed29c95..01cb97ab38 100644 --- a/contrib/libs/icu/i18n/nortrans.h +++ b/contrib/libs/icu/i18n/nortrans.h @@ -44,12 +44,12 @@ class NormalizationTransliterator : public Transliterator { * Transliterator API. * @return A copy of the object. */ - virtual NormalizationTransliterator* clone() const; + virtual NormalizationTransliterator* clone() const override; /** * ICU "poor man's RTTI", returns a UClassID for the actual class. */ - virtual UClassID getDynamicClassID() const; + virtual UClassID getDynamicClassID() const override; /** * ICU "poor man's RTTI", returns a UClassID for this class. @@ -68,7 +68,7 @@ class NormalizationTransliterator : public Transliterator { * pos.contextLimit. Otherwise, assume the text is complete. */ virtual void handleTransliterate(Replaceable& text, UTransPosition& offset, - UBool isIncremental) const; + UBool isIncremental) const override; public: /** diff --git a/contrib/libs/icu/i18n/nounit.cpp b/contrib/libs/icu/i18n/nounit.cpp deleted file mode 100644 index 1d4aa05506..0000000000 --- a/contrib/libs/icu/i18n/nounit.cpp +++ /dev/null @@ -1,42 +0,0 @@ -// © 2017 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html - -#include "unicode/nounit.h" -#include "uassert.h" - -#if !UCONFIG_NO_FORMATTING - -U_NAMESPACE_BEGIN - -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(NoUnit) - -NoUnit U_EXPORT2 NoUnit::base() { - return NoUnit(""); -} - -NoUnit U_EXPORT2 NoUnit::percent() { - return NoUnit("percent"); -} - -NoUnit U_EXPORT2 NoUnit::permille() { - return NoUnit("permille"); -} - -NoUnit::NoUnit(const char* subtype) { - initNoUnit(subtype); -} - -NoUnit::NoUnit(const NoUnit& other) : MeasureUnit(other) { -} - -NoUnit* NoUnit::clone() const { - return new NoUnit(*this); -} - -NoUnit::~NoUnit() { -} - - -U_NAMESPACE_END - -#endif diff --git a/contrib/libs/icu/i18n/nultrans.h b/contrib/libs/icu/i18n/nultrans.h index 36c92fa7b1..f5f2fbc911 100644 --- a/contrib/libs/icu/i18n/nultrans.h +++ b/contrib/libs/icu/i18n/nultrans.h @@ -45,19 +45,19 @@ public: * Transliterator API. * @internal Use transliterator factory methods instead since this class will be removed in that release. */ - virtual NullTransliterator* clone() const; + virtual NullTransliterator* clone() const override; /** * Implements {@link Transliterator#handleTransliterate}. * @internal Use transliterator factory methods instead since this class will be removed in that release. */ virtual void handleTransliterate(Replaceable& text, UTransPosition& offset, - UBool isIncremental) const; + UBool isIncremental) const override; /** * ICU "poor man's RTTI", returns a UClassID for the actual class. */ - virtual UClassID getDynamicClassID() const; + virtual UClassID getDynamicClassID() const override; /** * ICU "poor man's RTTI", returns a UClassID for this class. diff --git a/contrib/libs/icu/i18n/number_affixutils.cpp b/contrib/libs/icu/i18n/number_affixutils.cpp index a74ec2d634..f9c154c885 100644 --- a/contrib/libs/icu/i18n/number_affixutils.cpp +++ b/contrib/libs/icu/i18n/number_affixutils.cpp @@ -64,7 +64,7 @@ int32_t AffixUtils::estimateLength(const UnicodeString &patternString, UErrorCod } break; default: - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } offset += U16_LENGTH(cp); @@ -134,6 +134,9 @@ Field AffixUtils::getFieldForType(AffixPatternType type) { return {UFIELD_CATEGORY_NUMBER, UNUM_SIGN_FIELD}; case TYPE_PLUS_SIGN: return {UFIELD_CATEGORY_NUMBER, UNUM_SIGN_FIELD}; + case TYPE_APPROXIMATELY_SIGN: + // TODO: Introduce a new field for the approximately sign? + return {UFIELD_CATEGORY_NUMBER, UNUM_SIGN_FIELD}; case TYPE_PERCENT: return {UFIELD_CATEGORY_NUMBER, UNUM_PERCENT_FIELD}; case TYPE_PERMILLE: @@ -151,7 +154,7 @@ Field AffixUtils::getFieldForType(AffixPatternType type) { case TYPE_CURRENCY_OVERFLOW: return {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD}; default: - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } } @@ -295,6 +298,8 @@ AffixTag AffixUtils::nextToken(AffixTag tag, const UnicodeString &patternString, return makeTag(offset + count, TYPE_MINUS_SIGN, STATE_BASE, 0); case u'+': return makeTag(offset + count, TYPE_PLUS_SIGN, STATE_BASE, 0); + case u'~': + return makeTag(offset + count, TYPE_APPROXIMATELY_SIGN, STATE_BASE, 0); case u'%': return makeTag(offset + count, TYPE_PERCENT, STATE_BASE, 0); case u'‰': @@ -385,7 +390,7 @@ AffixTag AffixUtils::nextToken(AffixTag tag, const UnicodeString &patternString, return makeTag(offset, TYPE_CURRENCY_OVERFLOW, STATE_BASE, 0); } default: - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } } // End of string @@ -414,7 +419,7 @@ AffixTag AffixUtils::nextToken(AffixTag tag, const UnicodeString &patternString, case STATE_OVERFLOW_CURR: return makeTag(offset, TYPE_CURRENCY_OVERFLOW, STATE_BASE, 0); default: - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } } diff --git a/contrib/libs/icu/i18n/number_asformat.cpp b/contrib/libs/icu/i18n/number_asformat.cpp index 9d10d1f558..8f2314d689 100644 --- a/contrib/libs/icu/i18n/number_asformat.cpp +++ b/contrib/libs/icu/i18n/number_asformat.cpp @@ -32,7 +32,7 @@ LocalizedNumberFormatterAsFormat::LocalizedNumberFormatterAsFormat( LocalizedNumberFormatterAsFormat::~LocalizedNumberFormatterAsFormat() = default; -UBool LocalizedNumberFormatterAsFormat::operator==(const Format& other) const { +bool LocalizedNumberFormatterAsFormat::operator==(const Format& other) const { auto* _other = dynamic_cast<const LocalizedNumberFormatterAsFormat*>(&other); if (_other == nullptr) { return false; diff --git a/contrib/libs/icu/i18n/number_asformat.h b/contrib/libs/icu/i18n/number_asformat.h index 7b0a1dee6f..394b9a811f 100644 --- a/contrib/libs/icu/i18n/number_asformat.h +++ b/contrib/libs/icu/i18n/number_asformat.h @@ -25,7 +25,6 @@ namespace impl { * A wrapper around LocalizedNumberFormatter implementing the Format interface, enabling improved * compatibility with other APIs. * - * @draft ICU 62 * @see NumberFormatter */ class U_I18N_API LocalizedNumberFormatterAsFormat : public Format { @@ -40,7 +39,7 @@ class U_I18N_API LocalizedNumberFormatterAsFormat : public Format { /** * Equals operator. */ - UBool operator==(const Format& other) const U_OVERRIDE; + bool operator==(const Format& other) const U_OVERRIDE; /** * Creates a copy of this object. diff --git a/contrib/libs/icu/i18n/number_capi.cpp b/contrib/libs/icu/i18n/number_capi.cpp index 21a600d6ab..b87dbd93e5 100644 --- a/contrib/libs/icu/i18n/number_capi.cpp +++ b/contrib/libs/icu/i18n/number_capi.cpp @@ -13,6 +13,7 @@ #include "number_utypes.h" #include "numparse_types.h" #include "formattedval_impl.h" +#include "number_decnum.h" #include "unicode/numberformatter.h" #include "unicode/unumberformatter.h" @@ -115,7 +116,8 @@ unumf_formatInt(const UNumberFormatter* uformatter, int64_t value, UFormattedNum auto* result = UFormattedNumberApiHelper::validate(uresult, *ec); if (U_FAILURE(*ec)) { return; } - result->fData.getStringRef().clear(); + result->fData.resetString(); + result->fData.quantity.clear(); result->fData.quantity.setToLong(value); formatter->fFormatter.formatImpl(&result->fData, *ec); } @@ -127,7 +129,8 @@ unumf_formatDouble(const UNumberFormatter* uformatter, double value, UFormattedN auto* result = UFormattedNumberApiHelper::validate(uresult, *ec); if (U_FAILURE(*ec)) { return; } - result->fData.getStringRef().clear(); + result->fData.resetString(); + result->fData.quantity.clear(); result->fData.quantity.setToDouble(value); formatter->fFormatter.formatImpl(&result->fData, *ec); } @@ -139,7 +142,8 @@ unumf_formatDecimal(const UNumberFormatter* uformatter, const char* value, int32 auto* result = UFormattedNumberApiHelper::validate(uresult, *ec); if (U_FAILURE(*ec)) { return; } - result->fData.getStringRef().clear(); + result->fData.resetString(); + result->fData.quantity.clear(); result->fData.quantity.setToDecNumber({value, valueLen}, *ec); if (U_FAILURE(*ec)) { return; } formatter->fFormatter.formatImpl(&result->fData, *ec); @@ -196,6 +200,23 @@ unumf_resultGetAllFieldPositions(const UFormattedNumber* uresult, UFieldPosition result->fData.getAllFieldPositions(fpih, *ec); } +U_CAPI int32_t U_EXPORT2 +unumf_resultToDecimalNumber( + const UFormattedNumber* uresult, + char* dest, + int32_t destCapacity, + UErrorCode* ec) { + const auto* result = UFormattedNumberApiHelper::validate(uresult, *ec); + if (U_FAILURE(*ec)) { + return 0; + } + DecNum decnum; + return result->fData.quantity + .toDecNum(decnum, *ec) + .toCharString(*ec) + .extract(dest, destCapacity, *ec); +} + U_CAPI void U_EXPORT2 unumf_close(UNumberFormatter* f) { UErrorCode localStatus = U_ZERO_ERROR; diff --git a/contrib/libs/icu/i18n/number_compact.cpp b/contrib/libs/icu/i18n/number_compact.cpp index e1fef8feb5..62692f444d 100644 --- a/contrib/libs/icu/i18n/number_compact.cpp +++ b/contrib/libs/icu/i18n/number_compact.cpp @@ -55,7 +55,7 @@ int32_t countZeros(const UChar *patternString, int32_t patternLength) { } // namespace // NOTE: patterns and multipliers both get zero-initialized. -CompactData::CompactData() : patterns(), multipliers(), largestMagnitude(0), isEmpty(TRUE) { +CompactData::CompactData() : patterns(), multipliers(), largestMagnitude(0), isEmpty(true) { } void CompactData::populate(const Locale &locale, const char *nsName, CompactStyle compactStyle, @@ -104,14 +104,30 @@ int32_t CompactData::getMultiplier(int32_t magnitude) const { return multipliers[magnitude]; } -const UChar *CompactData::getPattern(int32_t magnitude, StandardPlural::Form plural) const { +const UChar *CompactData::getPattern( + int32_t magnitude, + const PluralRules *rules, + const DecimalQuantity &dq) const { if (magnitude < 0) { return nullptr; } if (magnitude > largestMagnitude) { magnitude = largestMagnitude; } - const UChar *patternString = patterns[getIndex(magnitude, plural)]; + const UChar *patternString = nullptr; + if (dq.hasIntegerValue()) { + int64_t i = dq.toLong(true); + if (i == 0) { + patternString = patterns[getIndex(magnitude, StandardPlural::Form::EQ_0)]; + } else if (i == 1) { + patternString = patterns[getIndex(magnitude, StandardPlural::Form::EQ_1)]; + } + if (patternString != nullptr) { + return patternString; + } + } + StandardPlural::Form plural = utils::getStandardPlural(rules, dq); + patternString = patterns[getIndex(magnitude, plural)]; if (patternString == nullptr && plural != StandardPlural::OTHER) { // Fall back to "other" plural variant patternString = patterns[getIndex(magnitude, StandardPlural::OTHER)]; @@ -142,7 +158,7 @@ void CompactData::getUniquePatterns(UVector &output, UErrorCode &status) const { // The string was not found; add it to the UVector. // ANDY: This requires a const_cast. Why? - output.addElement(const_cast<UChar *>(pattern), status); + output.addElementX(const_cast<UChar *>(pattern), status); continue_outer: continue; @@ -166,7 +182,6 @@ void CompactData::CompactDataSink::put(const char *key, ResourceValue &value, UB ResourceTable pluralVariantsTable = value.getTable(status); if (U_FAILURE(status)) { return; } for (int i4 = 0; pluralVariantsTable.getKeyAndValue(i4, key, value); ++i4) { - // Skip this magnitude/plural if we already have it from a child locale. // Note: This also skips USE_FALLBACK entries. StandardPlural::Form plural = StandardPlural::fromString(key, status); @@ -291,8 +306,7 @@ void CompactHandler::processQuantity(DecimalQuantity &quantity, MicroProps &micr magnitude -= multiplier; } - StandardPlural::Form plural = utils::getStandardPlural(rules, quantity); - const UChar *patternString = data.getPattern(magnitude, plural); + const UChar *patternString = data.getPattern(magnitude, rules, quantity); if (patternString == nullptr) { // Use the default (non-compact) modifier. // No need to take any action. diff --git a/contrib/libs/icu/i18n/number_compact.h b/contrib/libs/icu/i18n/number_compact.h index 199d39f659..9802b9fb10 100644 --- a/contrib/libs/icu/i18n/number_compact.h +++ b/contrib/libs/icu/i18n/number_compact.h @@ -28,7 +28,10 @@ class CompactData : public MultiplierProducer { int32_t getMultiplier(int32_t magnitude) const U_OVERRIDE; - const UChar *getPattern(int32_t magnitude, StandardPlural::Form plural) const; + const UChar *getPattern( + int32_t magnitude, + const PluralRules *rules, + const DecimalQuantity &dq) const; void getUniquePatterns(UVector &output, UErrorCode &status) const; diff --git a/contrib/libs/icu/i18n/number_currencysymbols.cpp b/contrib/libs/icu/i18n/number_currencysymbols.cpp index 4d6fb2cb1d..da1812f49f 100644 --- a/contrib/libs/icu/i18n/number_currencysymbols.cpp +++ b/contrib/libs/icu/i18n/number_currencysymbols.cpp @@ -44,6 +44,16 @@ UnicodeString CurrencySymbols::getNarrowCurrencySymbol(UErrorCode& status) const return loadSymbol(UCURR_NARROW_SYMBOL_NAME, status); } +UnicodeString CurrencySymbols::getFormalCurrencySymbol(UErrorCode& status) const { + // Note: currently no override is available for formal currency symbol + return loadSymbol(UCURR_FORMAL_SYMBOL_NAME, status); +} + +UnicodeString CurrencySymbols::getVariantCurrencySymbol(UErrorCode& status) const { + // Note: currently no override is available for variant currency symbol + return loadSymbol(UCURR_VARIANT_SYMBOL_NAME, status); +} + UnicodeString CurrencySymbols::getCurrencySymbol(UErrorCode& status) const { if (!fCurrencySymbol.isBogus()) { return fCurrencySymbol; @@ -66,7 +76,7 @@ UnicodeString CurrencySymbols::loadSymbol(UCurrNameStyle selector, UErrorCode& s if (symbol == isoCode) { return UnicodeString(isoCode, 3); } else { - return UnicodeString(TRUE, symbol, symbolLen); + return UnicodeString(true, symbol, symbolLen); } } @@ -94,7 +104,7 @@ UnicodeString CurrencySymbols::getPluralName(StandardPlural::Form plural, UError if (symbol == isoCode) { return UnicodeString(isoCode, 3); } else { - return UnicodeString(TRUE, symbol, symbolLen); + return UnicodeString(true, symbol, symbolLen); } } diff --git a/contrib/libs/icu/i18n/number_currencysymbols.h b/contrib/libs/icu/i18n/number_currencysymbols.h index 9996bf96ae..7e38fdf828 100644 --- a/contrib/libs/icu/i18n/number_currencysymbols.h +++ b/contrib/libs/icu/i18n/number_currencysymbols.h @@ -31,6 +31,10 @@ class U_I18N_API CurrencySymbols : public UMemory { UnicodeString getNarrowCurrencySymbol(UErrorCode& status) const; + UnicodeString getFormalCurrencySymbol(UErrorCode& status) const; + + UnicodeString getVariantCurrencySymbol(UErrorCode& status) const; + UnicodeString getCurrencySymbol(UErrorCode& status) const; UnicodeString getIntlCurrencySymbol(UErrorCode& status) const; diff --git a/contrib/libs/icu/i18n/number_decimalquantity.cpp b/contrib/libs/icu/i18n/number_decimalquantity.cpp index 482e93dc7a..6a2847b1c1 100644 --- a/contrib/libs/icu/i18n/number_decimalquantity.cpp +++ b/contrib/libs/icu/i18n/number_decimalquantity.cpp @@ -20,6 +20,7 @@ #include "charstr.h" #include "number_utils.h" #include "uassert.h" +#include "util.h" using namespace icu; using namespace icu::number; @@ -272,6 +273,9 @@ double DecimalQuantity::getPluralOperand(PluralOperand operand) const { return fractionCountWithoutTrailingZeros(); case PLURAL_OPERAND_E: return static_cast<double>(getExponent()); + case PLURAL_OPERAND_C: + // Plural operand `c` is currently an alias for `e`. + return static_cast<double>(getExponent()); default: return std::abs(toDouble()); } @@ -285,6 +289,11 @@ void DecimalQuantity::adjustExponent(int delta) { exponent = exponent + delta; } +void DecimalQuantity::resetExponent() { + adjustMagnitude(exponent); + exponent = 0; +} + bool DecimalQuantity::hasIntegerValue() const { return scale >= 0; } @@ -530,7 +539,11 @@ void DecimalQuantity::_setToDecNum(const DecNum& decnum, UErrorCode& status) { if (decnum.isNegative()) { flags |= NEGATIVE_FLAG; } - if (!decnum.isZero()) { + if (decnum.isNaN()) { + flags |= NAN_FLAG; + } else if (decnum.isInfinity()) { + flags |= INFINITY_FLAG; + } else if (!decnum.isZero()) { readDecNumberToBcd(decnum); compact(); } @@ -626,19 +639,24 @@ double DecimalQuantity::toDouble() const { &count); } -void DecimalQuantity::toDecNum(DecNum& output, UErrorCode& status) const { +DecNum& DecimalQuantity::toDecNum(DecNum& output, UErrorCode& status) const { // Special handling for zero if (precision == 0) { output.setTo("0", status); + return output; } // Use the BCD constructor. We need to do a little bit of work to convert, though. // The decNumber constructor expects most-significant first, but we store least-significant first. - MaybeStackArray<uint8_t, 20> ubcd(precision); + MaybeStackArray<uint8_t, 20> ubcd(precision, status); + if (U_FAILURE(status)) { + return output; + } for (int32_t m = 0; m < precision; m++) { ubcd[precision - m - 1] = static_cast<uint8_t>(getDigitPos(m)); } output.setTo(ubcd.getAlias(), precision, scale, isNegative(), status); + return output; } void DecimalQuantity::truncate() { @@ -814,6 +832,7 @@ void DecimalQuantity::roundToMagnitude(int32_t magnitude, RoundingMode roundingM // Perform truncation if (position >= precision) { + U_ASSERT(trailingDigit == 0); setBcdToZero(); scale = magnitude; } else { @@ -831,6 +850,10 @@ void DecimalQuantity::roundToMagnitude(int32_t magnitude, RoundingMode roundingM // do not return: use the bubbling logic below } else { setDigitPos(0, 5); + // If the quantity was set to 0, we may need to restore a digit. + if (precision == 0) { + precision = 1; + } // compact not necessary: digit at position 0 is nonzero return; } @@ -1004,13 +1027,8 @@ void DecimalQuantity::shiftLeft(int32_t numDigits) { } if (usingBytes) { ensureCapacity(precision + numDigits); - int i = precision + numDigits - 1; - for (; i >= numDigits; i--) { - fBCD.bcdBytes.ptr[i] = fBCD.bcdBytes.ptr[i - numDigits]; - } - for (; i >= 0; i--) { - fBCD.bcdBytes.ptr[i] = 0; - } + uprv_memmove(fBCD.bcdBytes.ptr + numDigits, fBCD.bcdBytes.ptr, precision); + uprv_memset(fBCD.bcdBytes.ptr, 0, numDigits); } else { fBCD.bcdLong <<= (numDigits * 4); } @@ -1324,7 +1342,11 @@ bool DecimalQuantity::operator==(const DecimalQuantity& other) const { } UnicodeString DecimalQuantity::toString() const { - MaybeStackArray<char, 30> digits(precision + 1); + UErrorCode localStatus = U_ZERO_ERROR; + MaybeStackArray<char, 30> digits(precision + 1, localStatus); + if (U_FAILURE(localStatus)) { + return ICU_Utility::makeBogusString(); + } for (int32_t i = 0; i < precision; i++) { digits[i] = getDigitPos(precision - i - 1) + '0'; } diff --git a/contrib/libs/icu/i18n/number_decimalquantity.h b/contrib/libs/icu/i18n/number_decimalquantity.h index d9b35c0336..107c09a96a 100644 --- a/contrib/libs/icu/i18n/number_decimalquantity.h +++ b/contrib/libs/icu/i18n/number_decimalquantity.h @@ -20,7 +20,7 @@ namespace impl { class DecNum; /** - * An class for representing a number to be processed by the decimal formatting pipeline. Includes + * A class for representing a number to be processed by the decimal formatting pipeline. Includes * methods for rounding, plural rules, and decimal digit extraction. * * <p>By design, this is NOT IMMUTABLE and NOT THREAD SAFE. It is intended to be an intermediate @@ -136,7 +136,7 @@ class U_I18N_API DecimalQuantity : public IFixedDecimal, public UMemory { * this method with delta=-3 will change the value to "1.23456". * * @param delta The number of magnitudes of ten to change by. - * @return true if integer overflow occured; false otherwise. + * @return true if integer overflow occurred; false otherwise. */ bool adjustMagnitude(int32_t delta); @@ -167,6 +167,11 @@ class U_I18N_API DecimalQuantity : public IFixedDecimal, public UMemory { void adjustExponent(int32_t delta); /** + * Resets the DecimalQuantity to the value before adjustMagnitude and adjustExponent. + */ + void resetExponent(); + + /** * @return Whether the value represented by this {@link DecimalQuantity} is * zero, infinity, or NaN. */ @@ -209,7 +214,7 @@ class U_I18N_API DecimalQuantity : public IFixedDecimal, public UMemory { double toDouble() const; /** Computes a DecNum representation of this DecimalQuantity, saving it to the output parameter. */ - void toDecNum(DecNum& output, UErrorCode& status) const; + DecNum& toDecNum(DecNum& output, UErrorCode& status) const; DecimalQuantity &setToInt(int32_t n); @@ -217,7 +222,13 @@ class U_I18N_API DecimalQuantity : public IFixedDecimal, public UMemory { DecimalQuantity &setToDouble(double n); - /** decNumber is similar to BigDecimal in Java. */ + /** + * Produces a DecimalQuantity that was parsed from a string by the decNumber + * C Library. + * + * decNumber is similar to BigDecimal in Java, and supports parsing strings + * such as "123.456621E+40". + */ DecimalQuantity &setToDecNumber(StringPiece n, UErrorCode& status); /** Internal method if the caller already has a DecNum. */ @@ -422,7 +433,9 @@ class U_I18N_API DecimalQuantity : public IFixedDecimal, public UMemory { /** * Sets the digit in the BCD list. This method only sets the digit; it is the caller's - * responsibility to call {@link #compact} after setting the digit. + * responsibility to call {@link #compact} after setting the digit, and to ensure + * that the precision field is updated to reflect the correct number of digits if a + * nonzero digit is added to the decimal. * * @param position The position of the digit to pop, counted in BCD units from the least * significant digit. If outside the range supported by the implementation, an AssertionError diff --git a/contrib/libs/icu/i18n/number_decimfmtprops.cpp b/contrib/libs/icu/i18n/number_decimfmtprops.cpp index 30481ce5bf..7fa58bbc7a 100644 --- a/contrib/libs/icu/i18n/number_decimfmtprops.cpp +++ b/contrib/libs/icu/i18n/number_decimfmtprops.cpp @@ -21,7 +21,7 @@ char kRawDefaultProperties[sizeof(DecimalFormatProperties)]; icu::UInitOnce gDefaultPropertiesInitOnce = U_INITONCE_INITIALIZER; void U_CALLCONV initDefaultProperties(UErrorCode&) { - // can't fail, uses placement new into staticly allocated space. + // can't fail, uses placement new into statically allocated space. new(kRawDefaultProperties) DecimalFormatProperties(); // set to the default instance } @@ -40,6 +40,7 @@ void DecimalFormatProperties::clear() { decimalPatternMatchRequired = false; decimalSeparatorAlwaysShown = false; exponentSignAlwaysShown = false; + currencyAsDecimal = false; formatFailIfMoreThanMaxDigits = false; formatWidth = -1; groupingSize = -1; @@ -88,6 +89,7 @@ DecimalFormatProperties::_equals(const DecimalFormatProperties& other, bool igno eq = eq && currencyUsage == other.currencyUsage; eq = eq && decimalSeparatorAlwaysShown == other.decimalSeparatorAlwaysShown; eq = eq && exponentSignAlwaysShown == other.exponentSignAlwaysShown; + eq = eq && currencyAsDecimal == other.currencyAsDecimal; eq = eq && formatFailIfMoreThanMaxDigits == other.formatFailIfMoreThanMaxDigits; eq = eq && formatWidth == other.formatWidth; eq = eq && magnitudeMultiplier == other.magnitudeMultiplier; diff --git a/contrib/libs/icu/i18n/number_decimfmtprops.h b/contrib/libs/icu/i18n/number_decimfmtprops.h index 1ce84d9dc3..5f72f64984 100644 --- a/contrib/libs/icu/i18n/number_decimfmtprops.h +++ b/contrib/libs/icu/i18n/number_decimfmtprops.h @@ -38,7 +38,7 @@ namespace impl { // Exported as U_I18N_API because it is a public member field of exported DecimalFormatProperties // Using this wrapper is rather unfortunate, but is needed on Windows platforms in order to allow -// for DLL-exporting an fully specified template instantiation. +// for DLL-exporting a fully specified template instantiation. class U_I18N_API CurrencyPluralInfoWrapper { public: LocalPointer<CurrencyPluralInfo> fPtr; @@ -52,7 +52,8 @@ public: } CurrencyPluralInfoWrapper& operator=(const CurrencyPluralInfoWrapper& other) { - if (!other.fPtr.isNull()) { + if (this != &other && // self-assignment: no-op + !other.fPtr.isNull()) { fPtr.adoptInstead(new CurrencyPluralInfo(*other.fPtr)); } return *this; @@ -104,6 +105,7 @@ struct U_I18N_API DecimalFormatProperties : public UMemory { bool decimalPatternMatchRequired; bool decimalSeparatorAlwaysShown; bool exponentSignAlwaysShown; + bool currencyAsDecimal; bool formatFailIfMoreThanMaxDigits; // ICU4C-only int32_t formatWidth; int32_t groupingSize; diff --git a/contrib/libs/icu/i18n/number_decnum.h b/contrib/libs/icu/i18n/number_decnum.h index 0c7399dbdd..94a0b31bcb 100644 --- a/contrib/libs/icu/i18n/number_decnum.h +++ b/contrib/libs/icu/i18n/number_decnum.h @@ -9,6 +9,7 @@ #include "decNumber.h" #include "charstr.h" +#include "bytesinkutil.h" U_NAMESPACE_BEGIN @@ -55,8 +56,22 @@ class U_I18N_API DecNum : public UMemory { bool isZero() const; + /** Is infinity or NaN */ + bool isSpecial() const; + + bool isInfinity() const; + + bool isNaN() const; + void toString(ByteSink& output, UErrorCode& status) const; + inline CharString toCharString(UErrorCode& status) const { + CharString cstr; + CharStringByteSink sink(&cstr); + toString(sink, status); + return cstr; + } + inline const decNumber* getRawDecNumber() const { return fData.getAlias(); } diff --git a/contrib/libs/icu/i18n/number_fluent.cpp b/contrib/libs/icu/i18n/number_fluent.cpp index 9cdb8b7156..fd486afb51 100644 --- a/contrib/libs/icu/i18n/number_fluent.cpp +++ b/contrib/libs/icu/i18n/number_fluent.cpp @@ -13,6 +13,7 @@ #include "number_asformat.h" #include "number_utils.h" #include "number_utypes.h" +#include "number_mapper.h" #include "util.h" #include "fphdlimp.h" @@ -274,6 +275,34 @@ Derived NumberFormatterSettings<Derived>::scale(const Scale& scale)&& { } template<typename Derived> +Derived NumberFormatterSettings<Derived>::usage(const StringPiece usage) const& { + Derived copy(*this); + copy.fMacros.usage.set(usage); + return copy; +} + +template<typename Derived> +Derived NumberFormatterSettings<Derived>::usage(const StringPiece usage)&& { + Derived move(std::move(*this)); + move.fMacros.usage.set(usage); + return move; +} + +template<typename Derived> +Derived NumberFormatterSettings<Derived>::unitDisplayCase(const StringPiece unitDisplayCase) const& { + Derived copy(*this); + copy.fMacros.unitDisplayCase.set(unitDisplayCase); + return copy; +} + +template<typename Derived> +Derived NumberFormatterSettings<Derived>::unitDisplayCase(const StringPiece unitDisplayCase)&& { + Derived move(std::move(*this)); + move.fMacros.unitDisplayCase.set(unitDisplayCase); + return move; +} + +template<typename Derived> Derived NumberFormatterSettings<Derived>::padding(const Padder& padder) const& { Derived copy(*this); copy.fMacros.padder = padder; @@ -400,7 +429,8 @@ LocalizedNumberFormatter::LocalizedNumberFormatter(const LNF& other) LocalizedNumberFormatter::LocalizedNumberFormatter(const NFS<LNF>& other) : NFS<LNF>(other) { - // No additional fields to assign (let call count and compiled formatter reset to defaults) + UErrorCode localStatus = U_ZERO_ERROR; // Can't bubble up the error + lnfCopyHelper(static_cast<const LNF&>(other), localStatus); } LocalizedNumberFormatter::LocalizedNumberFormatter(LocalizedNumberFormatter&& src) U_NOEXCEPT @@ -408,38 +438,26 @@ LocalizedNumberFormatter::LocalizedNumberFormatter(LocalizedNumberFormatter&& sr LocalizedNumberFormatter::LocalizedNumberFormatter(NFS<LNF>&& src) U_NOEXCEPT : NFS<LNF>(std::move(src)) { - // For the move operators, copy over the compiled formatter. - // Note: if the formatter is not compiled, call count information is lost. - if (static_cast<LNF&&>(src).fCompiled != nullptr) { - lnfMoveHelper(static_cast<LNF&&>(src)); - } + lnfMoveHelper(std::move(static_cast<LNF&&>(src))); } LocalizedNumberFormatter& LocalizedNumberFormatter::operator=(const LNF& other) { + if (this == &other) { return *this; } // self-assignment: no-op NFS<LNF>::operator=(static_cast<const NFS<LNF>&>(other)); - // Reset to default values. - clear(); + UErrorCode localStatus = U_ZERO_ERROR; // Can't bubble up the error + lnfCopyHelper(other, localStatus); return *this; } LocalizedNumberFormatter& LocalizedNumberFormatter::operator=(LNF&& src) U_NOEXCEPT { NFS<LNF>::operator=(static_cast<NFS<LNF>&&>(src)); - // For the move operators, copy over the compiled formatter. - // Note: if the formatter is not compiled, call count information is lost. - if (static_cast<LNF&&>(src).fCompiled != nullptr) { - // Formatter is compiled - lnfMoveHelper(static_cast<LNF&&>(src)); - } else { - clear(); - } + lnfMoveHelper(std::move(src)); return *this; } -void LocalizedNumberFormatter::clear() { - // Reset to default values. +void LocalizedNumberFormatter::resetCompiled() { auto* callCount = reinterpret_cast<u_atomic_int32_t*>(fUnsafeCallCount); umtx_storeRelease(*callCount, 0); - delete fCompiled; fCompiled = nullptr; } @@ -447,19 +465,56 @@ void LocalizedNumberFormatter::lnfMoveHelper(LNF&& src) { // Copy over the compiled formatter and set call count to INT32_MIN as in computeCompiled(). // Don't copy the call count directly because doing so requires a loadAcquire/storeRelease. // The bits themselves appear to be platform-dependent, so copying them might not be safe. - auto* callCount = reinterpret_cast<u_atomic_int32_t*>(fUnsafeCallCount); - umtx_storeRelease(*callCount, INT32_MIN); delete fCompiled; - fCompiled = src.fCompiled; - // Reset the source object to leave it in a safe state. - auto* srcCallCount = reinterpret_cast<u_atomic_int32_t*>(src.fUnsafeCallCount); - umtx_storeRelease(*srcCallCount, 0); - src.fCompiled = nullptr; + if (src.fCompiled != nullptr) { + auto* callCount = reinterpret_cast<u_atomic_int32_t*>(fUnsafeCallCount); + umtx_storeRelease(*callCount, INT32_MIN); + fCompiled = src.fCompiled; + // Reset the source object to leave it in a safe state. + src.resetCompiled(); + } else { + resetCompiled(); + } + + // Unconditionally move the warehouse + delete fWarehouse; + fWarehouse = src.fWarehouse; + src.fWarehouse = nullptr; +} + +void LocalizedNumberFormatter::lnfCopyHelper(const LNF&, UErrorCode& status) { + // When copying, always reset the compiled formatter. + delete fCompiled; + resetCompiled(); + + // If MacroProps has a reference to AffixPatternProvider, we need to copy it. + // If MacroProps has a reference to PluralRules, copy that one, too. + delete fWarehouse; + if (fMacros.affixProvider || fMacros.rules) { + LocalPointer<DecimalFormatWarehouse> warehouse(new DecimalFormatWarehouse(), status); + if (U_FAILURE(status)) { + fWarehouse = nullptr; + return; + } + if (fMacros.affixProvider) { + warehouse->affixProvider.setTo(fMacros.affixProvider, status); + fMacros.affixProvider = &warehouse->affixProvider.get(); + } + if (fMacros.rules) { + warehouse->rules.adoptInsteadAndCheckErrorCode( + new PluralRules(*fMacros.rules), status); + fMacros.rules = warehouse->rules.getAlias(); + } + fWarehouse = warehouse.orphan(); + } else { + fWarehouse = nullptr; + } } LocalizedNumberFormatter::~LocalizedNumberFormatter() { delete fCompiled; + delete fWarehouse; } LocalizedNumberFormatter::LocalizedNumberFormatter(const MacroProps& macros, const Locale& locale) { @@ -480,123 +535,6 @@ LocalizedNumberFormatter UnlocalizedNumberFormatter::locale(const Locale& locale return LocalizedNumberFormatter(std::move(fMacros), locale); } -SymbolsWrapper::SymbolsWrapper(const SymbolsWrapper& other) { - doCopyFrom(other); -} - -SymbolsWrapper::SymbolsWrapper(SymbolsWrapper&& src) U_NOEXCEPT { - doMoveFrom(std::move(src)); -} - -SymbolsWrapper& SymbolsWrapper::operator=(const SymbolsWrapper& other) { - if (this == &other) { - return *this; - } - doCleanup(); - doCopyFrom(other); - return *this; -} - -SymbolsWrapper& SymbolsWrapper::operator=(SymbolsWrapper&& src) U_NOEXCEPT { - if (this == &src) { - return *this; - } - doCleanup(); - doMoveFrom(std::move(src)); - return *this; -} - -SymbolsWrapper::~SymbolsWrapper() { - doCleanup(); -} - -void SymbolsWrapper::setTo(const DecimalFormatSymbols& dfs) { - doCleanup(); - fType = SYMPTR_DFS; - fPtr.dfs = new DecimalFormatSymbols(dfs); -} - -void SymbolsWrapper::setTo(const NumberingSystem* ns) { - doCleanup(); - fType = SYMPTR_NS; - fPtr.ns = ns; -} - -void SymbolsWrapper::doCopyFrom(const SymbolsWrapper& other) { - fType = other.fType; - switch (fType) { - case SYMPTR_NONE: - // No action necessary - break; - case SYMPTR_DFS: - // Memory allocation failures are exposed in copyErrorTo() - if (other.fPtr.dfs != nullptr) { - fPtr.dfs = new DecimalFormatSymbols(*other.fPtr.dfs); - } else { - fPtr.dfs = nullptr; - } - break; - case SYMPTR_NS: - // Memory allocation failures are exposed in copyErrorTo() - if (other.fPtr.ns != nullptr) { - fPtr.ns = new NumberingSystem(*other.fPtr.ns); - } else { - fPtr.ns = nullptr; - } - break; - } -} - -void SymbolsWrapper::doMoveFrom(SymbolsWrapper&& src) { - fType = src.fType; - switch (fType) { - case SYMPTR_NONE: - // No action necessary - break; - case SYMPTR_DFS: - fPtr.dfs = src.fPtr.dfs; - src.fPtr.dfs = nullptr; - break; - case SYMPTR_NS: - fPtr.ns = src.fPtr.ns; - src.fPtr.ns = nullptr; - break; - } -} - -void SymbolsWrapper::doCleanup() { - switch (fType) { - case SYMPTR_NONE: - // No action necessary - break; - case SYMPTR_DFS: - delete fPtr.dfs; - break; - case SYMPTR_NS: - delete fPtr.ns; - break; - } -} - -bool SymbolsWrapper::isDecimalFormatSymbols() const { - return fType == SYMPTR_DFS; -} - -bool SymbolsWrapper::isNumberingSystem() const { - return fType == SYMPTR_NS; -} - -const DecimalFormatSymbols* SymbolsWrapper::getDecimalFormatSymbols() const { - U_ASSERT(fType == SYMPTR_DFS); - return fPtr.dfs; -} - -const NumberingSystem* SymbolsWrapper::getNumberingSystem() const { - U_ASSERT(fType == SYMPTR_NS); - return fPtr.ns; -} - - FormattedNumber LocalizedNumberFormatter::formatInt(int64_t value, UErrorCode& status) const { if (U_FAILURE(status)) { return FormattedNumber(U_ILLEGAL_ARGUMENT_ERROR); } auto results = new UFormattedNumberData(); @@ -676,9 +614,9 @@ LocalizedNumberFormatter::formatDecimalQuantity(const DecimalQuantity& dq, UErro void LocalizedNumberFormatter::formatImpl(impl::UFormattedNumberData* results, UErrorCode& status) const { if (computeCompiled(status)) { - fCompiled->format(results->quantity, results->getStringRef(), status); + fCompiled->format(results, status); } else { - NumberFormatterImpl::formatStatic(fMacros, results->quantity, results->getStringRef(), status); + NumberFormatterImpl::formatStatic(fMacros, results, status); } if (U_FAILURE(status)) { return; @@ -760,6 +698,10 @@ int32_t LocalizedNumberFormatter::getCallCount() const { // Note: toFormat defined in number_asformat.cpp +const DecimalFormatSymbols* LocalizedNumberFormatter::getDecimalFormatSymbols() const { + return fMacros.symbols.getDecimalFormatSymbols(); +} + #if (U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN) && defined(_MSC_VER) // Warning 4661. #pragma warning(pop) diff --git a/contrib/libs/icu/i18n/number_formatimpl.cpp b/contrib/libs/icu/i18n/number_formatimpl.cpp index 5bba09cfb5..96e3e9e7c6 100644 --- a/contrib/libs/icu/i18n/number_formatimpl.cpp +++ b/contrib/libs/icu/i18n/number_formatimpl.cpp @@ -25,20 +25,21 @@ using namespace icu::number; using namespace icu::number::impl; -MicroPropsGenerator::~MicroPropsGenerator() = default; - - NumberFormatterImpl::NumberFormatterImpl(const MacroProps& macros, UErrorCode& status) : NumberFormatterImpl(macros, true, status) { } -int32_t NumberFormatterImpl::formatStatic(const MacroProps& macros, DecimalQuantity& inValue, - FormattedStringBuilder& outString, UErrorCode& status) { +int32_t NumberFormatterImpl::formatStatic(const MacroProps ¯os, UFormattedNumberData *results, + UErrorCode &status) { + DecimalQuantity &inValue = results->quantity; + FormattedStringBuilder &outString = results->getStringRef(); NumberFormatterImpl impl(macros, false, status); MicroProps& micros = impl.preProcessUnsafe(inValue, status); if (U_FAILURE(status)) { return 0; } int32_t length = writeNumber(micros, inValue, outString, 0, status); length += writeAffixes(micros, outString, 0, length, status); + results->outputUnit = std::move(micros.outputUnit); + results->gender = micros.gender; return length; } @@ -54,13 +55,16 @@ int32_t NumberFormatterImpl::getPrefixSuffixStatic(const MacroProps& macros, Sig // The "unsafe" method simply re-uses fMicros, eliminating the extra copy operation. // See MicroProps::processQuantity() for details. -int32_t NumberFormatterImpl::format(DecimalQuantity& inValue, FormattedStringBuilder& outString, - UErrorCode& status) const { +int32_t NumberFormatterImpl::format(UFormattedNumberData *results, UErrorCode &status) const { + DecimalQuantity &inValue = results->quantity; + FormattedStringBuilder &outString = results->getStringRef(); MicroProps micros; preProcess(inValue, micros, status); if (U_FAILURE(status)) { return 0; } int32_t length = writeNumber(micros, inValue, outString, 0, status); length += writeAffixes(micros, outString, 0, length, status); + results->outputUnit = std::move(micros.outputUnit); + results->gender = micros.gender; return length; } @@ -130,12 +134,15 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe, // Pre-compute a few values for efficiency. bool isCurrency = utils::unitIsCurrency(macros.unit); - bool isNoUnit = utils::unitIsNoUnit(macros.unit); + bool isBaseUnit = utils::unitIsBaseUnit(macros.unit); bool isPercent = utils::unitIsPercent(macros.unit); bool isPermille = utils::unitIsPermille(macros.unit); + bool isCompactNotation = macros.notation.fType == Notation::NTN_COMPACT; bool isAccounting = - macros.sign == UNUM_SIGN_ACCOUNTING || macros.sign == UNUM_SIGN_ACCOUNTING_ALWAYS || - macros.sign == UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO; + macros.sign == UNUM_SIGN_ACCOUNTING || + macros.sign == UNUM_SIGN_ACCOUNTING_ALWAYS || + macros.sign == UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO || + macros.sign == UNUM_SIGN_ACCOUNTING_NEGATIVE; CurrencyUnit currency(u"", status); if (isCurrency) { currency = CurrencyUnit(macros.unit, status); // Restore CurrencyUnit from MeasureUnit @@ -144,8 +151,20 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe, if (macros.unitWidth != UNUM_UNIT_WIDTH_COUNT) { unitWidth = macros.unitWidth; } - bool isCldrUnit = !isCurrency && !isNoUnit && - (unitWidth == UNUM_UNIT_WIDTH_FULL_NAME || !(isPercent || isPermille)); + // Use CLDR unit data for all MeasureUnits (not currency and not + // no-unit), except use the dedicated percent pattern for percent and + // permille. However, use the CLDR unit data for percent/permille if a + // long name was requested OR if compact notation is being used, since + // compact notation overrides the middle modifier (micros.modMiddle) + // normally used for the percent pattern. + bool isCldrUnit = !isCurrency + && !isBaseUnit + && (unitWidth == UNUM_UNIT_WIDTH_FULL_NAME + || !(isPercent || isPermille) + || isCompactNotation + ); + bool isMixedUnit = isCldrUnit && (uprv_strcmp(macros.unit.getType(), "") == 0) && + macros.unit.getComplexity(status) == UMEASURE_UNIT_MIXED; // Select the numbering system. LocalPointer<const NumberingSystem> nsLocal; @@ -162,6 +181,9 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe, uprv_strncpy(fMicros.nsName, nsName, 8); fMicros.nsName[8] = 0; // guarantee NUL-terminated + // Default gender: none. + fMicros.gender = ""; + // Resolve the symbols. Do this here because currency may need to customize them. if (macros.symbols.isDecimalFormatSymbols()) { fMicros.symbols = macros.symbols.getDecimalFormatSymbols(); @@ -222,6 +244,24 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe, /// START POPULATING THE DEFAULT MICROPROPS AND BUILDING THE MICROPROPS GENERATOR /// ///////////////////////////////////////////////////////////////////////////////////// + // Unit Preferences and Conversions as our first step + if (macros.usage.isSet()) { + if (!isCldrUnit) { + // We only support "usage" when the input unit is specified, and is + // a CLDR Unit. + status = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + auto usagePrefsHandler = + new UsagePrefsHandler(macros.locale, macros.unit, macros.usage.fValue, chain, status); + fUsagePrefsHandler.adoptInsteadAndCheckErrorCode(usagePrefsHandler, status); + chain = fUsagePrefsHandler.getAlias(); + } else if (isMixedUnit) { + auto unitConversionHandler = new UnitConversionHandler(macros.unit, chain, status); + fUnitConversionHandler.adoptInsteadAndCheckErrorCode(unitConversionHandler, status); + chain = fUnitConversionHandler.getAlias(); + } + // Multiplier if (macros.scale.isValid()) { fMicros.helpers.multiplier.setAndChain(macros.scale, chain); @@ -232,20 +272,18 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe, Precision precision; if (!macros.precision.isBogus()) { precision = macros.precision; - } else if (macros.notation.fType == Notation::NTN_COMPACT) { + } else if (isCompactNotation) { precision = Precision::integer().withMinDigits(2); } else if (isCurrency) { precision = Precision::currency(UCURR_USAGE_STANDARD); + } else if (macros.usage.isSet()) { + // Bogus Precision - it will get set in the UsagePrefsHandler instead + precision = Precision(); } else { precision = Precision::maxFraction(6); } UNumberFormatRoundingMode roundingMode; - if (macros.roundingMode != kDefaultMode) { - roundingMode = macros.roundingMode; - } else { - // Temporary until ICU 64 - roundingMode = precision.fRoundingMode; - } + roundingMode = macros.roundingMode; fMicros.rounder = {precision, roundingMode, currency, status}; if (U_FAILURE(status)) { return nullptr; @@ -254,7 +292,7 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe, // Grouping strategy if (!macros.grouper.isBogus()) { fMicros.grouping = macros.grouper; - } else if (macros.notation.fType == Notation::NTN_COMPACT) { + } else if (isCompactNotation) { // Compact notation uses minGrouping by default since ICU 59 fMicros.grouping = Grouper::forStrategy(UNUM_GROUPING_MIN2); } else { @@ -314,11 +352,12 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe, return nullptr; } fPatternModifier.adoptInstead(patternModifier); - patternModifier->setPatternInfo( - macros.affixProvider != nullptr ? macros.affixProvider - : static_cast<const AffixPatternProvider*>(fPatternInfo.getAlias()), - kUndefinedField); - patternModifier->setPatternAttributes(fMicros.sign, isPermille); + const AffixPatternProvider* affixProvider = + macros.affixProvider != nullptr + ? macros.affixProvider + : static_cast<const AffixPatternProvider*>(fPatternInfo.getAlias()); + patternModifier->setPatternInfo(affixProvider, kUndefinedField); + patternModifier->setPatternAttributes(fMicros.sign, isPermille, macros.approximately); if (patternModifier->needsPlurals()) { patternModifier->setSymbols( fMicros.symbols, @@ -330,32 +369,66 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe, patternModifier->setSymbols(fMicros.symbols, currency, unitWidth, nullptr, status); } if (safe) { - fImmutablePatternModifier.adoptInstead(patternModifier->createImmutable(status)); + fImmutablePatternModifier.adoptInsteadAndCheckErrorCode(patternModifier->createImmutable(status), + status); } if (U_FAILURE(status)) { return nullptr; } + // currencyAsDecimal + if (affixProvider->currencyAsDecimal()) { + fMicros.currencyAsDecimal = patternModifier->getCurrencySymbolForUnitWidth(status); + } + // Outer modifier (CLDR units and currency long names) if (isCldrUnit) { - fLongNameHandler.adoptInstead( - LongNameHandler::forMeasureUnit( - macros.locale, - macros.unit, - macros.perUnit, - unitWidth, - resolvePluralRules(macros.rules, macros.locale, status), - chain, - status)); - chain = fLongNameHandler.getAlias(); + const char *unitDisplayCase = ""; + if (macros.unitDisplayCase.isSet()) { + unitDisplayCase = macros.unitDisplayCase.fValue; + } + if (macros.usage.isSet()) { + fLongNameMultiplexer.adoptInsteadAndCheckErrorCode( + LongNameMultiplexer::forMeasureUnits( + macros.locale, *fUsagePrefsHandler->getOutputUnits(), unitWidth, unitDisplayCase, + resolvePluralRules(macros.rules, macros.locale, status), chain, status), + status); + chain = fLongNameMultiplexer.getAlias(); + } else if (isMixedUnit) { + fMixedUnitLongNameHandler.adoptInsteadAndCheckErrorCode(new MixedUnitLongNameHandler(), + status); + MixedUnitLongNameHandler::forMeasureUnit( + macros.locale, macros.unit, unitWidth, unitDisplayCase, + resolvePluralRules(macros.rules, macros.locale, status), chain, + fMixedUnitLongNameHandler.getAlias(), status); + chain = fMixedUnitLongNameHandler.getAlias(); + } else { + MeasureUnit unit = macros.unit; + if (!utils::unitIsBaseUnit(macros.perUnit)) { + unit = unit.product(macros.perUnit.reciprocal(status), status); + // This isn't strictly necessary, but was what we specced out + // when perUnit became a backward-compatibility thing: + // unit/perUnit use case is only valid if both units are + // built-ins, or the product is a built-in. + if (uprv_strcmp(unit.getType(), "") == 0 && + (uprv_strcmp(macros.unit.getType(), "") == 0 || + uprv_strcmp(macros.perUnit.getType(), "") == 0)) { + status = U_UNSUPPORTED_ERROR; + return nullptr; + } + } + fLongNameHandler.adoptInsteadAndCheckErrorCode(new LongNameHandler(), status); + LongNameHandler::forMeasureUnit(macros.locale, unit, unitWidth, unitDisplayCase, + resolvePluralRules(macros.rules, macros.locale, status), + chain, fLongNameHandler.getAlias(), status); + chain = fLongNameHandler.getAlias(); + } } else if (isCurrency && unitWidth == UNUM_UNIT_WIDTH_FULL_NAME) { - fLongNameHandler.adoptInstead( - LongNameHandler::forCurrencyLongNames( - macros.locale, - currency, - resolvePluralRules(macros.rules, macros.locale, status), - chain, - status)); + fLongNameHandler.adoptInsteadAndCheckErrorCode( + LongNameHandler::forCurrencyLongNames( + macros.locale, currency, resolvePluralRules(macros.rules, macros.locale, status), chain, + status), + status); chain = fLongNameHandler.getAlias(); } else { // No outer modifier required @@ -366,7 +439,7 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe, } // Compact notation - if (macros.notation.fType == Notation::NTN_COMPACT) { + if (isCompactNotation) { CompactType compactType = (isCurrency && unitWidth != UNUM_UNIT_WIDTH_FULL_NAME) ? CompactType::TYPE_CURRENCY : CompactType::TYPE_DECIMAL; auto newCompactHandler = new CompactHandler( @@ -379,6 +452,9 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe, safe, chain, status); + if (U_FAILURE(status)) { + return nullptr; + } if (newCompactHandler == nullptr) { status = U_MEMORY_ALLOCATION_ERROR; return nullptr; @@ -417,6 +493,7 @@ NumberFormatterImpl::resolvePluralRules(const PluralRules* rulesPtr, const Local int32_t NumberFormatterImpl::writeAffixes(const MicroProps& micros, FormattedStringBuilder& string, int32_t start, int32_t end, UErrorCode& status) { + U_ASSERT(micros.modOuter != nullptr); // Always apply the inner modifier (which is "strong"). int32_t length = micros.modInner->apply(string, start, end, status); if (micros.padding.isValid()) { @@ -453,15 +530,27 @@ int32_t NumberFormatterImpl::writeNumber(const MicroProps& micros, DecimalQuanti // Add the decimal point if (quantity.getLowerDisplayMagnitude() < 0 || micros.decimal == UNUM_DECIMAL_SEPARATOR_ALWAYS) { - length += string.insert( + if (!micros.currencyAsDecimal.isBogus()) { + length += string.insert( length + index, - micros.useCurrency ? micros.symbols->getSymbol( - DecimalFormatSymbols::ENumberFormatSymbol::kMonetarySeparatorSymbol) : micros - .symbols - ->getSymbol( - DecimalFormatSymbols::ENumberFormatSymbol::kDecimalSeparatorSymbol), + micros.currencyAsDecimal, + {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD}, + status); + } else if (micros.useCurrency) { + length += string.insert( + length + index, + micros.symbols->getSymbol( + DecimalFormatSymbols::ENumberFormatSymbol::kMonetarySeparatorSymbol), {UFIELD_CATEGORY_NUMBER, UNUM_DECIMAL_SEPARATOR_FIELD}, status); + } else { + length += string.insert( + length + index, + micros.symbols->getSymbol( + DecimalFormatSymbols::ENumberFormatSymbol::kDecimalSeparatorSymbol), + {UFIELD_CATEGORY_NUMBER, UNUM_DECIMAL_SEPARATOR_FIELD}, + status); + } } // Add the fraction digits diff --git a/contrib/libs/icu/i18n/number_formatimpl.h b/contrib/libs/icu/i18n/number_formatimpl.h index 084bc4a9d0..d7be1468b6 100644 --- a/contrib/libs/icu/i18n/number_formatimpl.h +++ b/contrib/libs/icu/i18n/number_formatimpl.h @@ -10,11 +10,13 @@ #include "number_types.h" #include "formatted_string_builder.h" #include "number_patternstring.h" +#include "number_usageprefs.h" #include "number_utils.h" #include "number_patternmodifier.h" #include "number_longnames.h" #include "number_compact.h" #include "number_microprops.h" +#include "number_utypes.h" U_NAMESPACE_BEGIN namespace number { namespace impl { @@ -32,11 +34,16 @@ class NumberFormatterImpl : public UMemory { NumberFormatterImpl(const MacroProps ¯os, UErrorCode &status); /** + * Default constructor; leaves the NumberFormatterImpl in an undefined state. + * Takes an error code to prevent the method from being called accidentally. + */ + NumberFormatterImpl(UErrorCode &) {} + + /** * Builds and evaluates an "unsafe" MicroPropsGenerator, which is cheaper but can be used only once. */ - static int32_t - formatStatic(const MacroProps ¯os, DecimalQuantity &inValue, FormattedStringBuilder &outString, - UErrorCode &status); + static int32_t formatStatic(const MacroProps ¯os, UFormattedNumberData *results, + UErrorCode &status); /** * Prints only the prefix and suffix; used for DecimalFormat getters. @@ -51,7 +58,7 @@ class NumberFormatterImpl : public UMemory { /** * Evaluates the "safe" MicroPropsGenerator created by "fromMacros". */ - int32_t format(DecimalQuantity& inValue, FormattedStringBuilder& outString, UErrorCode& status) const; + int32_t format(UFormattedNumberData *results, UErrorCode &status) const; /** * Like format(), but saves the result into an output MicroProps without additional processing. @@ -82,7 +89,9 @@ class NumberFormatterImpl : public UMemory { int32_t end, UErrorCode& status); private: - // Head of the MicroPropsGenerator linked list: + // Head of the MicroPropsGenerator linked list. Subclasses' processQuantity + // methods process this list in a parent-first order, such that the last + // item added, which this points to, typically has its logic executed last. const MicroPropsGenerator *fMicroPropsGenerator = nullptr; // Tail of the list: @@ -90,21 +99,22 @@ class NumberFormatterImpl : public UMemory { // Other fields possibly used by the number formatting pipeline: // TODO: Convert more of these LocalPointers to value objects to reduce the number of news? + LocalPointer<const UsagePrefsHandler> fUsagePrefsHandler; + LocalPointer<const UnitConversionHandler> fUnitConversionHandler; LocalPointer<const DecimalFormatSymbols> fSymbols; LocalPointer<const PluralRules> fRules; LocalPointer<const ParsedPatternInfo> fPatternInfo; LocalPointer<const ScientificHandler> fScientificHandler; LocalPointer<MutablePatternModifier> fPatternModifier; LocalPointer<ImmutablePatternModifier> fImmutablePatternModifier; - LocalPointer<const LongNameHandler> fLongNameHandler; + LocalPointer<LongNameHandler> fLongNameHandler; + // TODO: use a common base class that enables fLongNameHandler, + // fLongNameMultiplexer, and fMixedUnitLongNameHandler to be merged into one + // member? + LocalPointer<MixedUnitLongNameHandler> fMixedUnitLongNameHandler; + LocalPointer<const LongNameMultiplexer> fLongNameMultiplexer; LocalPointer<const CompactHandler> fCompactHandler; - // Value objects possibly used by the number formatting pipeline: - struct Warehouse { - CurrencySymbols fCurrencySymbols; - } fWarehouse; - - NumberFormatterImpl(const MacroProps ¯os, bool safe, UErrorCode &status); MicroProps& preProcessUnsafe(DecimalQuantity &inValue, UErrorCode &status); diff --git a/contrib/libs/icu/i18n/number_grouping.cpp b/contrib/libs/icu/i18n/number_grouping.cpp index 41f727a458..9ba639e67e 100644 --- a/contrib/libs/icu/i18n/number_grouping.cpp +++ b/contrib/libs/icu/i18n/number_grouping.cpp @@ -47,7 +47,7 @@ Grouper Grouper::forStrategy(UNumberGroupingStrategy grouping) { case UNUM_GROUPING_THOUSANDS: return {3, 3, 1, grouping}; default: - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } } @@ -64,6 +64,13 @@ Grouper Grouper::forProperties(const DecimalFormatProperties& properties) { } void Grouper::setLocaleData(const impl::ParsedPatternInfo &patternInfo, const Locale& locale) { + if (fMinGrouping == -2) { + fMinGrouping = getMinGroupingForLocale(locale); + } else if (fMinGrouping == -3) { + fMinGrouping = static_cast<int16_t>(uprv_max(2, getMinGroupingForLocale(locale))); + } else { + // leave fMinGrouping alone + } if (fGrouping1 != -2 && fGrouping2 != -4) { return; } @@ -76,13 +83,6 @@ void Grouper::setLocaleData(const impl::ParsedPatternInfo &patternInfo, const Lo if (grouping3 == -1) { grouping2 = grouping1; } - if (fMinGrouping == -2) { - fMinGrouping = getMinGroupingForLocale(locale); - } else if (fMinGrouping == -3) { - fMinGrouping = static_cast<int16_t>(uprv_max(2, getMinGroupingForLocale(locale))); - } else { - // leave fMinGrouping alone - } fGrouping1 = grouping1; fGrouping2 = grouping2; } diff --git a/contrib/libs/icu/i18n/number_integerwidth.cpp b/contrib/libs/icu/i18n/number_integerwidth.cpp index d62aef444d..10b853423c 100644 --- a/contrib/libs/icu/i18n/number_integerwidth.cpp +++ b/contrib/libs/icu/i18n/number_integerwidth.cpp @@ -40,6 +40,9 @@ IntegerWidth IntegerWidth::truncateAt(int32_t maxInt) { } void IntegerWidth::apply(impl::DecimalQuantity& quantity, UErrorCode& status) const { + if (U_FAILURE(status)) { + return; + } if (fHasError) { status = U_ILLEGAL_ARGUMENT_ERROR; } else if (fUnion.minMaxInt.fMaxInt == -1) { diff --git a/contrib/libs/icu/i18n/number_longnames.cpp b/contrib/libs/icu/i18n/number_longnames.cpp index bb32d0381a..5a4cf6321c 100644 --- a/contrib/libs/icu/i18n/number_longnames.cpp +++ b/contrib/libs/icu/i18n/number_longnames.cpp @@ -5,11 +5,14 @@ #if !UCONFIG_NO_FORMATTING +#include <cstdlib> + #include "unicode/simpleformatter.h" #include "unicode/ures.h" #include "ureslocs.h" #include "charstr.h" #include "uresimp.h" +#include "measunit_impl.h" #include "number_longnames.h" #include "number_microprops.h" #include <algorithm> @@ -22,22 +25,100 @@ using namespace icu::number::impl; namespace { +/** + * Display Name (this format has no placeholder). + * + * Used as an index into the LongNameHandler::simpleFormats array. Units + * resources cover the normal set of PluralRules keys, as well as `dnam` and + * `per` forms. + */ constexpr int32_t DNAM_INDEX = StandardPlural::Form::COUNT; +/** + * "per" form (e.g. "{0} per day" is day's "per" form). + * + * Used as an index into the LongNameHandler::simpleFormats array. Units + * resources cover the normal set of PluralRules keys, as well as `dnam` and + * `per` forms. + */ constexpr int32_t PER_INDEX = StandardPlural::Form::COUNT + 1; -constexpr int32_t ARRAY_LENGTH = StandardPlural::Form::COUNT + 2; +/** + * Gender of the word, in languages with grammatical gender. + */ +constexpr int32_t GENDER_INDEX = StandardPlural::Form::COUNT + 2; +// Number of keys in the array populated by PluralTableSink. +constexpr int32_t ARRAY_LENGTH = StandardPlural::Form::COUNT + 3; + +// TODO(icu-units#28): load this list from resources, after creating a "&set" +// function for use in ldml2icu rules. +const int32_t GENDER_COUNT = 7; +const char *gGenders[GENDER_COUNT] = {"animate", "common", "feminine", "inanimate", + "masculine", "neuter", "personal"}; + +// Converts a UnicodeString to a const char*, either pointing to a string in +// gGenders, or pointing to an empty string if an appropriate string was not +// found. +const char *getGenderString(UnicodeString uGender, UErrorCode status) { + if (uGender.length() == 0) { + return ""; + } + CharString gender; + gender.appendInvariantChars(uGender, status); + if (U_FAILURE(status)) { + return ""; + } + int32_t first = 0; + int32_t last = GENDER_COUNT; + while (first < last) { + int32_t mid = (first + last) / 2; + int32_t cmp = uprv_strcmp(gender.data(), gGenders[mid]); + if (cmp == 0) { + return gGenders[mid]; + } else if (cmp > 0) { + first = mid + 1; + } else if (cmp < 0) { + last = mid; + } + } + // We don't return an error in case our gGenders list is incomplete in + // production. + // + // TODO(icu-units#28): a unit test checking all locales' genders are covered + // by gGenders? Else load a complete list of genders found in + // grammaticalFeatures in an initOnce. + return ""; +} +// Returns the array index that corresponds to the given pluralKeyword. static int32_t getIndex(const char* pluralKeyword, UErrorCode& status) { - // pluralKeyword can also be "dnam" or "per" - if (uprv_strcmp(pluralKeyword, "dnam") == 0) { - return DNAM_INDEX; - } else if (uprv_strcmp(pluralKeyword, "per") == 0) { - return PER_INDEX; - } else { - StandardPlural::Form plural = StandardPlural::fromString(pluralKeyword, status); - return plural; + // pluralKeyword can also be "dnam", "per", or "gender" + switch (*pluralKeyword) { + case 'd': + if (uprv_strcmp(pluralKeyword + 1, "nam") == 0) { + return DNAM_INDEX; + } + break; + case 'g': + if (uprv_strcmp(pluralKeyword + 1, "ender") == 0) { + return GENDER_INDEX; + } + break; + case 'p': + if (uprv_strcmp(pluralKeyword + 1, "er") == 0) { + return PER_INDEX; + } + break; + default: + break; } + StandardPlural::Form plural = StandardPlural::fromString(pluralKeyword, status); + return plural; } +// Selects a string out of the `strings` array which corresponds to the +// specified plural form, with fallback to the OTHER form. +// +// The `strings` array must have ARRAY_LENGTH items: one corresponding to each +// of the plural forms, plus a display name ("dnam") and a "per" form. static UnicodeString getWithPlural( const UnicodeString* strings, StandardPlural::Form plural, @@ -53,13 +134,249 @@ static UnicodeString getWithPlural( return result; } +enum PlaceholderPosition { PH_EMPTY, PH_NONE, PH_BEGINNING, PH_MIDDLE, PH_END }; + +/** + * Returns three outputs extracted from pattern. + * + * @param coreUnit is extracted as per Extract(...) in the spec: + * https://unicode.org/reports/tr35/tr35-general.html#compound-units + * @param PlaceholderPosition indicates where in the string the placeholder was + * found. + * @param joinerChar Iff the placeholder was at the beginning or end, joinerChar + * contains the space character (if any) that separated the placeholder from + * the rest of the pattern. Otherwise, joinerChar is set to NUL. Only one + * space character is considered. + */ +void extractCorePattern(const UnicodeString &pattern, + UnicodeString &coreUnit, + PlaceholderPosition &placeholderPosition, + UChar &joinerChar) { + joinerChar = 0; + int32_t len = pattern.length(); + if (pattern.startsWith(u"{0}", 3)) { + placeholderPosition = PH_BEGINNING; + if (u_isJavaSpaceChar(pattern[3])) { + joinerChar = pattern[3]; + coreUnit.setTo(pattern, 4, len - 4); + } else { + coreUnit.setTo(pattern, 3, len - 3); + } + } else if (pattern.endsWith(u"{0}", 3)) { + placeholderPosition = PH_END; + if (u_isJavaSpaceChar(pattern[len - 4])) { + coreUnit.setTo(pattern, 0, len - 4); + joinerChar = pattern[len - 4]; + } else { + coreUnit.setTo(pattern, 0, len - 3); + } + } else if (pattern.indexOf(u"{0}", 3, 1, len - 2) == -1) { + placeholderPosition = PH_NONE; + coreUnit = pattern; + } else { + placeholderPosition = PH_MIDDLE; + coreUnit = pattern; + } +} ////////////////////////// /// BEGIN DATA LOADING /// ////////////////////////// +// Gets the gender of a built-in unit: unit must be a built-in. Returns an empty +// string both in case of unknown gender and in case of unknown unit. +UnicodeString +getGenderForBuiltin(const Locale &locale, const MeasureUnit &builtinUnit, UErrorCode &status) { + LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status)); + if (U_FAILURE(status)) { return {}; } + + // Map duration-year-person, duration-week-person, etc. to duration-year, duration-week, ... + // TODO(ICU-20400): Get duration-*-person data properly with aliases. + StringPiece subtypeForResource; + int32_t subtypeLen = static_cast<int32_t>(uprv_strlen(builtinUnit.getSubtype())); + if (subtypeLen > 7 && uprv_strcmp(builtinUnit.getSubtype() + subtypeLen - 7, "-person") == 0) { + subtypeForResource = {builtinUnit.getSubtype(), subtypeLen - 7}; + } else { + subtypeForResource = builtinUnit.getSubtype(); + } + + CharString key; + key.append("units/", status); + key.append(builtinUnit.getType(), status); + key.append("/", status); + key.append(subtypeForResource, status); + key.append("/gender", status); + + UErrorCode localStatus = status; + int32_t resultLen = 0; + const UChar *result = + ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &resultLen, &localStatus); + if (U_SUCCESS(localStatus)) { + status = localStatus; + return UnicodeString(true, result, resultLen); + } else { + // TODO(icu-units#28): "$unitRes/gender" does not exist. Do we want to + // check whether the parent "$unitRes" exists? Then we could return + // U_MISSING_RESOURCE_ERROR for incorrect usage (e.g. builtinUnit not + // being a builtin). + return {}; + } +} + +// Loads data from a resource tree with paths matching +// $key/$pluralForm/$gender/$case, with lateral inheritance for missing cases +// and genders. +// +// An InflectedPluralSink is configured to load data for a specific gender and +// case. It loads all plural forms, because selection between plural forms is +// dependent upon the value being formatted. +// +// See data/unit/de.txt and data/unit/fr.txt for examples - take a look at +// units/compound/power2: German has case, French has differences for gender, +// but no case. +// +// TODO(icu-units#138): Conceptually similar to PluralTableSink, however the +// tree structures are different. After homogenizing the structures, we may be +// able to unify the two classes. +// +// TODO: Spec violation: expects presence of "count" - does not fallback to an +// absent "count"! If this fallback were added, getCompoundValue could be +// superseded? +class InflectedPluralSink : public ResourceSink { + public: + // Accepts `char*` rather than StringPiece because + // ResourceTable::findValue(...) requires a null-terminated `char*`. + // + // NOTE: outArray MUST have a length of at least ARRAY_LENGTH. No bounds + // checking is performed. + explicit InflectedPluralSink(const char *gender, const char *caseVariant, UnicodeString *outArray) + : gender(gender), caseVariant(caseVariant), outArray(outArray) { + // Initialize the array to bogus strings. + for (int32_t i = 0; i < ARRAY_LENGTH; i++) { + outArray[i].setToBogus(); + } + } + + // See ResourceSink::put(). + void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) U_OVERRIDE { + int32_t pluralIndex = getIndex(key, status); + if (U_FAILURE(status)) { return; } + if (!outArray[pluralIndex].isBogus()) { + // We already have a pattern + return; + } + ResourceTable genderTable = value.getTable(status); + ResourceTable caseTable; // This instance has to outlive `value` + if (loadForPluralForm(genderTable, caseTable, value, status)) { + outArray[pluralIndex] = value.getUnicodeString(status); + } + } + + private: + // Tries to load data for the configured gender from `genderTable`. Returns + // true if found, returning the data in `value`. The returned data will be + // for the configured gender if found, falling back to "neuter" and + // no-gender if not. The caseTable parameter holds the intermediate + // ResourceTable for the sake of lifetime management. + bool loadForPluralForm(const ResourceTable &genderTable, + ResourceTable &caseTable, + ResourceValue &value, + UErrorCode &status) { + if (uprv_strcmp(gender, "") != 0) { + if (loadForGender(genderTable, gender, caseTable, value, status)) { + return true; + } + if (uprv_strcmp(gender, "neuter") != 0 && + loadForGender(genderTable, "neuter", caseTable, value, status)) { + return true; + } + } + if (loadForGender(genderTable, "_", caseTable, value, status)) { + return true; + } + return false; + } + + // Tries to load data for the given gender from `genderTable`. Returns true + // if found, returning the data in `value`. The returned data will be for + // the configured case if found, falling back to "nominative" and no-case if + // not. + bool loadForGender(const ResourceTable &genderTable, + const char *genderVal, + ResourceTable &caseTable, + ResourceValue &value, + UErrorCode &status) { + if (!genderTable.findValue(genderVal, value)) { + return false; + } + caseTable = value.getTable(status); + if (uprv_strcmp(caseVariant, "") != 0) { + if (loadForCase(caseTable, caseVariant, value)) { + return true; + } + if (uprv_strcmp(caseVariant, "nominative") != 0 && + loadForCase(caseTable, "nominative", value)) { + return true; + } + } + if (loadForCase(caseTable, "_", value)) { + return true; + } + return false; + } + + // Tries to load data for the given case from `caseTable`. Returns true if + // found, returning the data in `value`. + bool loadForCase(const ResourceTable &caseTable, const char *caseValue, ResourceValue &value) { + if (!caseTable.findValue(caseValue, value)) { + return false; + } + return true; + } + + const char *gender; + const char *caseVariant; + UnicodeString *outArray; +}; + +// Fetches localised formatting patterns for the given subKey. See documentation +// for InflectedPluralSink for details. +// +// Data is loaded for the appropriate unit width, with missing data filled in +// from unitsShort. +void getInflectedMeasureData(StringPiece subKey, + const Locale &locale, + const UNumberUnitWidth &width, + const char *gender, + const char *caseVariant, + UnicodeString *outArray, + UErrorCode &status) { + InflectedPluralSink sink(gender, caseVariant, outArray); + LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status)); + if (U_FAILURE(status)) { return; } + + CharString key; + key.append("units", status); + if (width == UNUM_UNIT_WIDTH_NARROW) { + key.append("Narrow", status); + } else if (width == UNUM_UNIT_WIDTH_SHORT) { + key.append("Short", status); + } + key.append("/", status); + key.append(subKey, status); + + UErrorCode localStatus = status; + ures_getAllChildrenWithFallback(unitsBundle.getAlias(), key.data(), sink, localStatus); + if (width == UNUM_UNIT_WIDTH_SHORT) { + status = localStatus; + return; + } +} + class PluralTableSink : public ResourceSink { public: + // NOTE: outArray MUST have a length of at least ARRAY_LENGTH. No bounds + // checking is performed. explicit PluralTableSink(UnicodeString *outArray) : outArray(outArray) { // Initialize the array to bogus strings. for (int32_t i = 0; i < ARRAY_LENGTH; i++) { @@ -68,39 +385,71 @@ class PluralTableSink : public ResourceSink { } void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) U_OVERRIDE { - ResourceTable pluralsTable = value.getTable(status); + if (uprv_strcmp(key, "case") == 0) { + return; + } + int32_t index = getIndex(key, status); if (U_FAILURE(status)) { return; } - for (int32_t i = 0; pluralsTable.getKeyAndValue(i, key, value); ++i) { - int32_t index = getIndex(key, status); - if (U_FAILURE(status)) { return; } - if (!outArray[index].isBogus()) { - continue; - } - outArray[index] = value.getUnicodeString(status); - if (U_FAILURE(status)) { return; } + if (!outArray[index].isBogus()) { + return; } + outArray[index] = value.getUnicodeString(status); + if (U_FAILURE(status)) { return; } } private: UnicodeString *outArray; }; -// NOTE: outArray MUST have room for all StandardPlural values. No bounds checking is performed. - -void getMeasureData(const Locale &locale, const MeasureUnit &unit, const UNumberUnitWidth &width, - UnicodeString *outArray, UErrorCode &status) { +/** + * Populates outArray with `locale`-specific values for `unit` through use of + * PluralTableSink. Only the set of basic units are supported! + * + * Reading from resources *unitsNarrow* and *unitsShort* (for width + * UNUM_UNIT_WIDTH_NARROW), or just *unitsShort* (for width + * UNUM_UNIT_WIDTH_SHORT). For other widths, it reads just "units". + * + * @param unit must be a built-in unit, i.e. must have a type and subtype, + * listed in gTypes and gSubTypes in measunit.cpp. + * @param unitDisplayCase the empty string and "nominative" are treated the + * same. For other cases, strings for the requested case are used if found. + * (For any missing case-specific data, we fall back to nominative.) + * @param outArray must be of fixed length ARRAY_LENGTH. + */ +void getMeasureData(const Locale &locale, + const MeasureUnit &unit, + const UNumberUnitWidth &width, + const char *unitDisplayCase, + UnicodeString *outArray, + UErrorCode &status) { PluralTableSink sink(outArray); LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status)); if (U_FAILURE(status)) { return; } + CharString subKey; + subKey.append("/", status); + subKey.append(unit.getType(), status); + subKey.append("/", status); + // Map duration-year-person, duration-week-person, etc. to duration-year, duration-week, ... // TODO(ICU-20400): Get duration-*-person data properly with aliases. - StringPiece subtypeForResource; int32_t subtypeLen = static_cast<int32_t>(uprv_strlen(unit.getSubtype())); if (subtypeLen > 7 && uprv_strcmp(unit.getSubtype() + subtypeLen - 7, "-person") == 0) { - subtypeForResource = {unit.getSubtype(), subtypeLen - 7}; + subKey.append({unit.getSubtype(), subtypeLen - 7}, status); } else { - subtypeForResource = unit.getSubtype(); + subKey.append({unit.getSubtype(), subtypeLen}, status); + } + + if (width != UNUM_UNIT_WIDTH_FULL_NAME) { + UErrorCode localStatus = status; + CharString genderKey; + genderKey.append("units", localStatus); + genderKey.append(subKey, localStatus); + genderKey.append("/gender", localStatus); + StackUResourceBundle fillIn; + ures_getByKeyWithFallback(unitsBundle.getAlias(), genderKey.data(), fillIn.getAlias(), + &localStatus); + outArray[GENDER_INDEX] = ures_getUnicodeString(fillIn.getAlias(), &localStatus); } CharString key; @@ -110,30 +459,41 @@ void getMeasureData(const Locale &locale, const MeasureUnit &unit, const UNumber } else if (width == UNUM_UNIT_WIDTH_SHORT) { key.append("Short", status); } - key.append("/", status); - key.append(unit.getType(), status); - key.append("/", status); - key.append(subtypeForResource, status); + key.append(subKey, status); + + // Grab desired case first, if available. Then grab no-case data to fill in + // the gaps. + if (width == UNUM_UNIT_WIDTH_FULL_NAME && unitDisplayCase[0] != 0) { + CharString caseKey; + caseKey.append(key, status); + caseKey.append("/case/", status); + caseKey.append(unitDisplayCase, status); + + UErrorCode localStatus = U_ZERO_ERROR; + // TODO(icu-units#138): our fallback logic is not spec-compliant: + // lateral fallback should happen before locale fallback. Switch to + // getInflectedMeasureData after homogenizing data format? Find a unit + // test case that demonstrates the incorrect fallback logic (via + // regional variant of an inflected language?) + ures_getAllChildrenWithFallback(unitsBundle.getAlias(), caseKey.data(), sink, localStatus); + } + // TODO(icu-units#138): our fallback logic is not spec-compliant: we + // check the given case, then go straight to the no-case data. The spec + // states we should first look for case="nominative". As part of #138, + // either get the spec changed, or add unit tests that warn us if + // case="nominative" data differs from no-case data? UErrorCode localStatus = U_ZERO_ERROR; - ures_getAllItemsWithFallback(unitsBundle.getAlias(), key.data(), sink, localStatus); + ures_getAllChildrenWithFallback(unitsBundle.getAlias(), key.data(), sink, localStatus); if (width == UNUM_UNIT_WIDTH_SHORT) { if (U_FAILURE(localStatus)) { status = localStatus; } return; } - - // TODO(ICU-13353): The fallback to short does not work in ICU4C. - // Manually fall back to short (this is done automatically in Java). - key.clear(); - key.append("unitsShort/", status); - key.append(unit.getType(), status); - key.append("/", status); - key.append(subtypeForResource, status); - ures_getAllItemsWithFallback(unitsBundle.getAlias(), key.data(), sink, status); } +// NOTE: outArray MUST have a length of at least ARRAY_LENGTH. void getCurrencyLongNameData(const Locale &locale, const CurrencyUnit ¤cy, UnicodeString *outArray, UErrorCode &status) { // In ICU4J, this method gets a CurrencyData from CurrencyData.provider. @@ -141,7 +501,7 @@ void getCurrencyLongNameData(const Locale &locale, const CurrencyUnit ¤cy, PluralTableSink sink(outArray); LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_CURR, locale.getName(), &status)); if (U_FAILURE(status)) { return; } - ures_getAllItemsWithFallback(unitsBundle.getAlias(), "CurrencyUnitPatterns", sink, status); + ures_getAllChildrenWithFallback(unitsBundle.getAlias(), "CurrencyUnitPatterns", sink, status); if (U_FAILURE(status)) { return; } for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) { UnicodeString &pattern = outArray[i]; @@ -162,7 +522,10 @@ void getCurrencyLongNameData(const Locale &locale, const CurrencyUnit ¤cy, } } -UnicodeString getPerUnitFormat(const Locale& locale, const UNumberUnitWidth &width, UErrorCode& status) { +UnicodeString getCompoundValue(StringPiece compoundKey, + const Locale &locale, + const UNumberUnitWidth &width, + UErrorCode &status) { LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status)); if (U_FAILURE(status)) { return {}; } CharString key; @@ -172,90 +535,893 @@ UnicodeString getPerUnitFormat(const Locale& locale, const UNumberUnitWidth &wid } else if (width == UNUM_UNIT_WIDTH_SHORT) { key.append("Short", status); } - key.append("/compound/per", status); + key.append("/compound/", status); + key.append(compoundKey, status); + + UErrorCode localStatus = status; int32_t len = 0; - const UChar* ptr = ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &len, &status); + const UChar *ptr = + ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &len, &localStatus); + if (U_FAILURE(localStatus) && width != UNUM_UNIT_WIDTH_SHORT) { + // Fall back to short, which contains more compound data + key.clear(); + key.append("unitsShort/compound/", status); + key.append(compoundKey, status); + ptr = ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &len, &status); + } else { + status = localStatus; + } + if (U_FAILURE(status)) { + return {}; + } return UnicodeString(ptr, len); } +/** + * Loads and applies deriveComponent rules from CLDR's grammaticalFeatures.xml. + * + * Consider a deriveComponent rule that looks like this: + * + * <deriveComponent feature="case" structure="per" value0="compound" value1="nominative"/> + * + * Instantiating an instance as follows: + * + * DerivedComponents d(loc, "case", "per"); + * + * Applying the rule in the XML element above, `d.value0("foo")` will be "foo", + * and `d.value1("foo")` will be "nominative". + * + * The values returned by value0(...) and value1(...) are valid only while the + * instance exists. In case of any kind of failure, value0(...) and value1(...) + * will return "". + */ +class DerivedComponents { + public: + /** + * Constructor. + * + * The feature and structure parameters must be null-terminated. The string + * referenced by compoundValue must exist for longer than the + * DerivedComponents instance. + */ + DerivedComponents(const Locale &locale, const char *feature, const char *structure) { + StackUResourceBundle derivationsBundle, stackBundle; + ures_openDirectFillIn(derivationsBundle.getAlias(), NULL, "grammaticalFeatures", &status); + ures_getByKey(derivationsBundle.getAlias(), "grammaticalData", derivationsBundle.getAlias(), + &status); + ures_getByKey(derivationsBundle.getAlias(), "derivations", derivationsBundle.getAlias(), + &status); + if (U_FAILURE(status)) { + return; + } + UErrorCode localStatus = U_ZERO_ERROR; + // TODO(icu-units#28): use standard normal locale resolution algorithms + // rather than just grabbing language: + ures_getByKey(derivationsBundle.getAlias(), locale.getLanguage(), stackBundle.getAlias(), + &localStatus); + // TODO(icu-units#28): + // - code currently assumes if the locale exists, the rules are there - + // instead of falling back to root when the requested rule is missing. + // - investigate ures.h functions, see if one that uses res_findResource() + // might be better (or use res_findResource directly), or maybe help + // improve ures documentation to guide function selection? + if (localStatus == U_MISSING_RESOURCE_ERROR) { + ures_getByKey(derivationsBundle.getAlias(), "root", stackBundle.getAlias(), &status); + } else { + status = localStatus; + } + ures_getByKey(stackBundle.getAlias(), "component", stackBundle.getAlias(), &status); + ures_getByKey(stackBundle.getAlias(), feature, stackBundle.getAlias(), &status); + ures_getByKey(stackBundle.getAlias(), structure, stackBundle.getAlias(), &status); + UnicodeString val0 = ures_getUnicodeStringByIndex(stackBundle.getAlias(), 0, &status); + UnicodeString val1 = ures_getUnicodeStringByIndex(stackBundle.getAlias(), 1, &status); + if (U_SUCCESS(status)) { + if (val0.compare(UnicodeString(u"compound")) == 0) { + compound0_ = true; + } else { + compound0_ = false; + value0_.appendInvariantChars(val0, status); + } + if (val1.compare(UnicodeString(u"compound")) == 0) { + compound1_ = true; + } else { + compound1_ = false; + value1_.appendInvariantChars(val1, status); + } + } + } + + // Returns a StringPiece that is only valid as long as the instance exists. + StringPiece value0(const StringPiece compoundValue) const { + return compound0_ ? compoundValue : value0_.toStringPiece(); + } + + // Returns a StringPiece that is only valid as long as the instance exists. + StringPiece value1(const StringPiece compoundValue) const { + return compound1_ ? compoundValue : value1_.toStringPiece(); + } + + // Returns a char* that is only valid as long as the instance exists. + const char *value0(const char *compoundValue) const { + return compound0_ ? compoundValue : value0_.data(); + } + + // Returns a char* that is only valid as long as the instance exists. + const char *value1(const char *compoundValue) const { + return compound1_ ? compoundValue : value1_.data(); + } + + private: + UErrorCode status = U_ZERO_ERROR; + + // Holds strings referred to by value0 and value1; + bool compound0_ = false, compound1_ = false; + CharString value0_, value1_; +}; + +// TODO(icu-units#28): test somehow? Associate with an ICU ticket for adding +// testsuite support for testing with synthetic data? +/** + * Loads and returns the value in rules that look like these: + * + * <deriveCompound feature="gender" structure="per" value="0"/> + * <deriveCompound feature="gender" structure="times" value="1"/> + * + * Currently a fake example, but spec compliant: + * <deriveCompound feature="gender" structure="power" value="feminine"/> + * + * NOTE: If U_FAILURE(status), returns an empty string. + */ +UnicodeString +getDeriveCompoundRule(Locale locale, const char *feature, const char *structure, UErrorCode &status) { + StackUResourceBundle derivationsBundle, stackBundle; + ures_openDirectFillIn(derivationsBundle.getAlias(), NULL, "grammaticalFeatures", &status); + ures_getByKey(derivationsBundle.getAlias(), "grammaticalData", derivationsBundle.getAlias(), + &status); + ures_getByKey(derivationsBundle.getAlias(), "derivations", derivationsBundle.getAlias(), &status); + // TODO: use standard normal locale resolution algorithms rather than just grabbing language: + ures_getByKey(derivationsBundle.getAlias(), locale.getLanguage(), stackBundle.getAlias(), &status); + // TODO: + // - code currently assumes if the locale exists, the rules are there - + // instead of falling back to root when the requested rule is missing. + // - investigate ures.h functions, see if one that uses res_findResource() + // might be better (or use res_findResource directly), or maybe help + // improve ures documentation to guide function selection? + if (status == U_MISSING_RESOURCE_ERROR) { + status = U_ZERO_ERROR; + ures_getByKey(derivationsBundle.getAlias(), "root", stackBundle.getAlias(), &status); + } + ures_getByKey(stackBundle.getAlias(), "compound", stackBundle.getAlias(), &status); + ures_getByKey(stackBundle.getAlias(), feature, stackBundle.getAlias(), &status); + UnicodeString uVal = ures_getUnicodeStringByKey(stackBundle.getAlias(), structure, &status); + if (U_FAILURE(status)) { + return {}; + } + U_ASSERT(!uVal.isBogus()); + return uVal; +} + +// Returns the gender string for structures following these rules: +// +// <deriveCompound feature="gender" structure="per" value="0"/> +// <deriveCompound feature="gender" structure="times" value="1"/> +// +// Fake example: +// <deriveCompound feature="gender" structure="power" value="feminine"/> +// +// data0 and data1 should be pattern arrays (UnicodeString[ARRAY_SIZE]) that +// correspond to value="0" and value="1". +// +// Pass a nullptr to data1 if the structure has no concept of value="1" (e.g. +// "prefix" doesn't). +UnicodeString getDerivedGender(Locale locale, + const char *structure, + UnicodeString *data0, + UnicodeString *data1, + UErrorCode &status) { + UnicodeString val = getDeriveCompoundRule(locale, "gender", structure, status); + if (val.length() == 1) { + switch (val[0]) { + case u'0': + return data0[GENDER_INDEX]; + case u'1': + if (data1 == nullptr) { + return {}; + } + return data1[GENDER_INDEX]; + } + } + return val; +} + //////////////////////// /// END DATA LOADING /// //////////////////////// -} // namespace +// TODO: promote this somewhere? It's based on patternprops.cpp' trimWhitespace +const UChar *trimSpaceChars(const UChar *s, int32_t &length) { + if (length <= 0 || (!u_isJavaSpaceChar(s[0]) && !u_isJavaSpaceChar(s[length - 1]))) { + return s; + } + int32_t start = 0; + int32_t limit = length; + while (start < limit && u_isJavaSpaceChar(s[start])) { + ++start; + } + if (start < limit) { + // There is non-white space at start; we will not move limit below that, + // so we need not test start<limit in the loop. + while (u_isJavaSpaceChar(s[limit - 1])) { + --limit; + } + } + length = limit - start; + return s + start; +} -LongNameHandler* -LongNameHandler::forMeasureUnit(const Locale &loc, const MeasureUnit &unitRef, const MeasureUnit &perUnit, - const UNumberUnitWidth &width, const PluralRules *rules, - const MicroPropsGenerator *parent, UErrorCode &status) { - if (uprv_strlen(unitRef.getType()) == 0 || uprv_strlen(perUnit.getType()) == 0) { - // TODO(ICU-20941): Unsanctioned unit. Not yet fully supported. Set an error code. - status = U_UNSUPPORTED_ERROR; - return nullptr; +/** + * Calculates the gender of an arbitrary unit: this is the *second* + * implementation of an algorithm to do this: + * + * Gender is also calculated in "processPatternTimes": that code path is "bottom + * up", loading the gender for every component of a compound unit (at the same + * time as loading the Long Names formatting patterns), even if the gender is + * unneeded, then combining the single units' genders into the compound unit's + * gender, according to the rules. This algorithm does a lazier "top-down" + * evaluation, starting with the compound unit, calculating which single unit's + * gender is needed by breaking it down according to the rules, and then loading + * only the gender of the one single unit who's gender is needed. + * + * For future refactorings: + * 1. we could drop processPatternTimes' gender calculation and just call this + * function: for UNUM_UNIT_WIDTH_FULL_NAME, the unit gender is in the very + * same table as the formatting patterns, so loading it then may be + * efficient. For other unit widths however, it needs to be explicitly looked + * up anyway. + * 2. alternatively, if CLDR is providing all the genders we need such that we + * don't need to calculate them in ICU anymore, we could drop this function + * and keep only processPatternTimes' calculation. (And optimise it a bit?) + * + * @param locale The desired locale. + * @param unit The measure unit to calculate the gender for. + * @return The gender string for the unit, or an empty string if unknown or + * ungendered. + */ +UnicodeString calculateGenderForUnit(const Locale &locale, const MeasureUnit &unit, UErrorCode &status) { + MeasureUnitImpl impl; + const MeasureUnitImpl& mui = MeasureUnitImpl::forMeasureUnit(unit, impl, status); + int32_t singleUnitIndex = 0; + if (mui.complexity == UMEASURE_UNIT_COMPOUND) { + int32_t startSlice = 0; + // inclusive + int32_t endSlice = mui.singleUnits.length()-1; + U_ASSERT(endSlice > 0); // Else it would not be COMPOUND + if (mui.singleUnits[endSlice]->dimensionality < 0) { + // We have a -per- construct + UnicodeString perRule = getDeriveCompoundRule(locale, "gender", "per", status); + if (perRule.length() != 1) { + // Fixed gender for -per- units + return perRule; + } + if (perRule[0] == u'1') { + // Find the start of the denominator. We already know there is one. + while (mui.singleUnits[startSlice]->dimensionality >= 0) { + startSlice++; + } + } else { + // Find the end of the numerator + while (endSlice >= 0 && mui.singleUnits[endSlice]->dimensionality < 0) { + endSlice--; + } + if (endSlice < 0) { + // We have only a denominator, e.g. "per-second". + // TODO(icu-units#28): find out what gender to use in the + // absence of a first value - mentioned in CLDR-14253. + return {}; + } + } + } + if (endSlice > startSlice) { + // We have a -times- construct + UnicodeString timesRule = getDeriveCompoundRule(locale, "gender", "times", status); + if (timesRule.length() != 1) { + // Fixed gender for -times- units + return timesRule; + } + if (timesRule[0] == u'0') { + endSlice = startSlice; + } else { + // We assume timesRule[0] == u'1' + startSlice = endSlice; + } + } + U_ASSERT(startSlice == endSlice); + singleUnitIndex = startSlice; + } else if (mui.complexity == UMEASURE_UNIT_MIXED) { + status = U_INTERNAL_PROGRAM_ERROR; + return {}; + } else { + U_ASSERT(mui.complexity == UMEASURE_UNIT_SINGLE); + U_ASSERT(mui.singleUnits.length() == 1); } - MeasureUnit unit = unitRef; - if (uprv_strcmp(perUnit.getType(), "none") != 0) { - // Compound unit: first try to simplify (e.g., meters per second is its own unit). - bool isResolved = false; - MeasureUnit resolved = MeasureUnit::resolveUnitPerUnit(unit, perUnit, &isResolved); - if (isResolved) { - unit = resolved; - } else { - // No simplified form is available. - return forCompoundUnit(loc, unit, perUnit, width, rules, parent, status); + // Now we know which singleUnit's gender we want + const SingleUnitImpl *singleUnit = mui.singleUnits[singleUnitIndex]; + // Check for any power-prefix gender override: + if (std::abs(singleUnit->dimensionality) != 1) { + UnicodeString powerRule = getDeriveCompoundRule(locale, "gender", "power", status); + if (powerRule.length() != 1) { + // Fixed gender for -powN- units + return powerRule; } + // powerRule[0] == u'0'; u'1' not currently in spec. } + // Check for any SI and binary prefix gender override: + if (std::abs(singleUnit->dimensionality) != 1) { + UnicodeString prefixRule = getDeriveCompoundRule(locale, "gender", "prefix", status); + if (prefixRule.length() != 1) { + // Fixed gender for -powN- units + return prefixRule; + } + // prefixRule[0] == u'0'; u'1' not currently in spec. + } + // Now we've boiled it down to the gender of one simple unit identifier: + return getGenderForBuiltin(locale, MeasureUnit::forIdentifier(singleUnit->getSimpleUnitID(), status), + status); +} - auto* result = new LongNameHandler(rules, parent); - if (result == nullptr) { - status = U_MEMORY_ALLOCATION_ERROR; - return nullptr; +void maybeCalculateGender(const Locale &locale, + const MeasureUnit &unitRef, + UnicodeString *outArray, + UErrorCode &status) { + if (outArray[GENDER_INDEX].isBogus()) { + UnicodeString meterGender = getGenderForBuiltin(locale, MeasureUnit::getMeter(), status); + if (meterGender.isEmpty()) { + // No gender for meter: assume ungendered language + return; + } + // We have a gendered language, but are lacking gender for unitRef. + outArray[GENDER_INDEX] = calculateGenderForUnit(locale, unitRef, status); } - UnicodeString simpleFormats[ARRAY_LENGTH]; - getMeasureData(loc, unit, width, simpleFormats, status); - if (U_FAILURE(status)) { return result; } - result->simpleFormatsToModifiers(simpleFormats, {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status); - return result; } -LongNameHandler* -LongNameHandler::forCompoundUnit(const Locale &loc, const MeasureUnit &unit, const MeasureUnit &perUnit, - const UNumberUnitWidth &width, const PluralRules *rules, - const MicroPropsGenerator *parent, UErrorCode &status) { - auto* result = new LongNameHandler(rules, parent); - if (result == nullptr) { - status = U_MEMORY_ALLOCATION_ERROR; - return nullptr; +} // namespace + +void LongNameHandler::forMeasureUnit(const Locale &loc, + const MeasureUnit &unitRef, + const UNumberUnitWidth &width, + const char *unitDisplayCase, + const PluralRules *rules, + const MicroPropsGenerator *parent, + LongNameHandler *fillIn, + UErrorCode &status) { + // From https://unicode.org/reports/tr35/tr35-general.html#compound-units - + // Points 1 and 2 are mostly handled by MeasureUnit: + // + // 1. If the unitId is empty or invalid, fail + // 2. Put the unitId into normalized order + U_ASSERT(fillIn != nullptr); + + if (uprv_strcmp(unitRef.getType(), "") != 0) { + // Handling built-in units: + // + // 3. Set result to be getValue(unitId with length, pluralCategory, caseVariant) + // - If result is not empty, return it + UnicodeString simpleFormats[ARRAY_LENGTH]; + getMeasureData(loc, unitRef, width, unitDisplayCase, simpleFormats, status); + maybeCalculateGender(loc, unitRef, simpleFormats, status); + if (U_FAILURE(status)) { + return; + } + fillIn->rules = rules; + fillIn->parent = parent; + fillIn->simpleFormatsToModifiers(simpleFormats, + {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status); + if (!simpleFormats[GENDER_INDEX].isBogus()) { + fillIn->gender = getGenderString(simpleFormats[GENDER_INDEX], status); + } + return; + + // TODO(icu-units#145): figure out why this causes a failure in + // format/MeasureFormatTest/TestIndividualPluralFallback and other + // tests, when it should have been an alternative for the lines above: + + // forArbitraryUnit(loc, unitRef, width, unitDisplayCase, fillIn, status); + // fillIn->rules = rules; + // fillIn->parent = parent; + // return; + } else { + // Check if it is a MeasureUnit this constructor handles: this + // constructor does not handle mixed units + U_ASSERT(unitRef.getComplexity(status) != UMEASURE_UNIT_MIXED); + forArbitraryUnit(loc, unitRef, width, unitDisplayCase, fillIn, status); + fillIn->rules = rules; + fillIn->parent = parent; + return; } - UnicodeString primaryData[ARRAY_LENGTH]; - getMeasureData(loc, unit, width, primaryData, status); - if (U_FAILURE(status)) { return result; } - UnicodeString secondaryData[ARRAY_LENGTH]; - getMeasureData(loc, perUnit, width, secondaryData, status); - if (U_FAILURE(status)) { return result; } +} + +void LongNameHandler::forArbitraryUnit(const Locale &loc, + const MeasureUnit &unitRef, + const UNumberUnitWidth &width, + const char *unitDisplayCase, + LongNameHandler *fillIn, + UErrorCode &status) { + if (U_FAILURE(status)) { + return; + } + if (fillIn == nullptr) { + status = U_INTERNAL_PROGRAM_ERROR; + return; + } + + // Numbered list items are from the algorithms at + // https://unicode.org/reports/tr35/tr35-general.html#compound-units: + // + // 4. Divide the unitId into numerator (the part before the "-per-") and + // denominator (the part after the "-per-). If both are empty, fail + MeasureUnitImpl unit; + MeasureUnitImpl perUnit; + { + MeasureUnitImpl fullUnit = MeasureUnitImpl::forMeasureUnitMaybeCopy(unitRef, status); + if (U_FAILURE(status)) { + return; + } + for (int32_t i = 0; i < fullUnit.singleUnits.length(); i++) { + SingleUnitImpl *subUnit = fullUnit.singleUnits[i]; + if (subUnit->dimensionality > 0) { + unit.appendSingleUnit(*subUnit, status); + } else { + subUnit->dimensionality *= -1; + perUnit.appendSingleUnit(*subUnit, status); + } + } + } + + // TODO(icu-units#28): check placeholder logic, see if it needs to be + // present here instead of only in processPatternTimes: + // + // 5. Set both globalPlaceholder and globalPlaceholderPosition to be empty + + DerivedComponents derivedPerCases(loc, "case", "per"); - UnicodeString perUnitFormat; - if (!secondaryData[PER_INDEX].isBogus()) { - perUnitFormat = secondaryData[PER_INDEX]; + // 6. numeratorUnitString + UnicodeString numeratorUnitData[ARRAY_LENGTH]; + processPatternTimes(std::move(unit), loc, width, derivedPerCases.value0(unitDisplayCase), + numeratorUnitData, status); + + // 7. denominatorUnitString + UnicodeString denominatorUnitData[ARRAY_LENGTH]; + processPatternTimes(std::move(perUnit), loc, width, derivedPerCases.value1(unitDisplayCase), + denominatorUnitData, status); + + // TODO(icu-units#139): + // - implement DerivedComponents for "plural/times" and "plural/power": + // French has different rules, we'll be producing the wrong results + // currently. (Prove via tests!) + // - implement DerivedComponents for "plural/per", "plural/prefix", + // "case/times", "case/power", and "case/prefix" - although they're + // currently hardcoded. Languages with different rules are surely on the + // way. + // + // Currently we only use "case/per", "plural/times", "case/times", and + // "case/power". + // + // This may have impact on multiSimpleFormatsToModifiers(...) below too? + // These rules are currently (ICU 69) all the same and hard-coded below. + UnicodeString perUnitPattern; + if (!denominatorUnitData[PER_INDEX].isBogus()) { + // If we have no denominator, we obtain the empty string: + perUnitPattern = denominatorUnitData[PER_INDEX]; } else { - UnicodeString rawPerUnitFormat = getPerUnitFormat(loc, width, status); - if (U_FAILURE(status)) { return result; } - // rawPerUnitFormat is something like "{0}/{1}"; we need to substitute in the secondary unit. - SimpleFormatter compiled(rawPerUnitFormat, 2, 2, status); - if (U_FAILURE(status)) { return result; } - UnicodeString secondaryFormat = getWithPlural(secondaryData, StandardPlural::Form::ONE, status); - if (U_FAILURE(status)) { return result; } + // 8. Set perPattern to be getValue([per], locale, length) + UnicodeString rawPerUnitFormat = getCompoundValue("per", loc, width, status); + // rawPerUnitFormat is something like "{0} per {1}"; we need to substitute in the secondary unit. + SimpleFormatter perPatternFormatter(rawPerUnitFormat, 2, 2, status); + if (U_FAILURE(status)) { + return; + } + // Plural and placeholder handling for 7. denominatorUnitString: + // TODO(icu-units#139): hardcoded: + // <deriveComponent feature="plural" structure="per" value0="compound" value1="one"/> + UnicodeString denominatorFormat = + getWithPlural(denominatorUnitData, StandardPlural::Form::ONE, status); // Some "one" pattern may not contain "{0}". For example in "ar" or "ne" locale. - SimpleFormatter secondaryCompiled(secondaryFormat, 0, 1, status); - if (U_FAILURE(status)) { return result; } - UnicodeString secondaryString = secondaryCompiled.getTextWithNoArguments().trim(); - // TODO: Why does UnicodeString need to be explicit in the following line? - compiled.format(UnicodeString(u"{0}"), secondaryString, perUnitFormat, status); - if (U_FAILURE(status)) { return result; } - } - result->multiSimpleFormatsToModifiers(primaryData, perUnitFormat, {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status); - return result; + SimpleFormatter denominatorFormatter(denominatorFormat, 0, 1, status); + if (U_FAILURE(status)) { + return; + } + UnicodeString denominatorPattern = denominatorFormatter.getTextWithNoArguments(); + int32_t trimmedLen = denominatorPattern.length(); + const UChar *trimmed = trimSpaceChars(denominatorPattern.getBuffer(), trimmedLen); + UnicodeString denominatorString(false, trimmed, trimmedLen); + // 9. If the denominatorString is empty, set result to + // [numeratorString], otherwise set result to format(perPattern, + // numeratorString, denominatorString) + // + // TODO(icu-units#28): Why does UnicodeString need to be explicit in the + // following line? + perPatternFormatter.format(UnicodeString(u"{0}"), denominatorString, perUnitPattern, status); + if (U_FAILURE(status)) { + return; + } + } + if (perUnitPattern.length() == 0) { + fillIn->simpleFormatsToModifiers(numeratorUnitData, + {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status); + } else { + fillIn->multiSimpleFormatsToModifiers(numeratorUnitData, perUnitPattern, + {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status); + } + + // Gender + // + // TODO(icu-units#28): find out what gender to use in the absence of a first + // value - e.g. what's the gender of "per-second"? Mentioned in CLDR-14253. + // + // gender/per deriveCompound rules don't say: + // <deriveCompound feature="gender" structure="per" value="0"/> <!-- gender(gram-per-meter) ← gender(gram) --> + fillIn->gender = getGenderString( + getDerivedGender(loc, "per", numeratorUnitData, denominatorUnitData, status), status); +} + +void LongNameHandler::processPatternTimes(MeasureUnitImpl &&productUnit, + Locale loc, + const UNumberUnitWidth &width, + const char *caseVariant, + UnicodeString *outArray, + UErrorCode &status) { + if (U_FAILURE(status)) { + return; + } + if (productUnit.complexity == UMEASURE_UNIT_MIXED) { + // These are handled by MixedUnitLongNameHandler + status = U_UNSUPPORTED_ERROR; + return; + } + +#if U_DEBUG + for (int32_t pluralIndex = 0; pluralIndex < ARRAY_LENGTH; pluralIndex++) { + U_ASSERT(outArray[pluralIndex].length() == 0); + U_ASSERT(!outArray[pluralIndex].isBogus()); + } +#endif + + if (productUnit.identifier.isEmpty()) { + // TODO(icu-units#28): consider when serialize should be called. + // identifier might also be empty for MeasureUnit(). + productUnit.serialize(status); + } + if (U_FAILURE(status)) { + return; + } + if (productUnit.identifier.length() == 0) { + // MeasureUnit(): no units: return empty strings. + return; + } + + MeasureUnit builtinUnit; + if (MeasureUnit::findBySubType(productUnit.identifier.toStringPiece(), &builtinUnit)) { + // TODO(icu-units#145): spec doesn't cover builtin-per-builtin, it + // breaks them all down. Do we want to drop this? + // - findBySubType isn't super efficient, if we skip it and go to basic + // singles, we don't have to construct MeasureUnit's anymore. + // - Check all the existing unit tests that fail without this: is it due + // to incorrect fallback via getMeasureData? + // - Do those unit tests cover this code path representatively? + if (builtinUnit != MeasureUnit()) { + getMeasureData(loc, builtinUnit, width, caseVariant, outArray, status); + maybeCalculateGender(loc, builtinUnit, outArray, status); + } + return; + } + + // 2. Set timesPattern to be getValue(times, locale, length) + UnicodeString timesPattern = getCompoundValue("times", loc, width, status); + SimpleFormatter timesPatternFormatter(timesPattern, 2, 2, status); + if (U_FAILURE(status)) { + return; + } + + PlaceholderPosition globalPlaceholder[ARRAY_LENGTH]; + UChar globalJoinerChar = 0; + // Numbered list items are from the algorithms at + // https://unicode.org/reports/tr35/tr35-general.html#compound-units: + // + // pattern(...) point 5: + // - Set both globalPlaceholder and globalPlaceholderPosition to be empty + // + // 3. Set result to be empty + for (int32_t pluralIndex = 0; pluralIndex < ARRAY_LENGTH; pluralIndex++) { + // Initial state: empty string pattern, via all falling back to OTHER: + if (pluralIndex == StandardPlural::Form::OTHER) { + outArray[pluralIndex].remove(); + } else { + outArray[pluralIndex].setToBogus(); + } + globalPlaceholder[pluralIndex] = PH_EMPTY; + } + + // Empty string represents "compound" (propagate the plural form). + const char *pluralCategory = ""; + DerivedComponents derivedTimesPlurals(loc, "plural", "times"); + DerivedComponents derivedTimesCases(loc, "case", "times"); + DerivedComponents derivedPowerCases(loc, "case", "power"); + + // 4. For each single_unit in product_unit + for (int32_t singleUnitIndex = 0; singleUnitIndex < productUnit.singleUnits.length(); + singleUnitIndex++) { + SingleUnitImpl *singleUnit = productUnit.singleUnits[singleUnitIndex]; + const char *singlePluralCategory; + const char *singleCaseVariant; + // TODO(icu-units#28): ensure we have unit tests that change/fail if we + // assign incorrect case variants here: + if (singleUnitIndex < productUnit.singleUnits.length() - 1) { + // 4.1. If hasMultiple + singlePluralCategory = derivedTimesPlurals.value0(pluralCategory); + singleCaseVariant = derivedTimesCases.value0(caseVariant); + pluralCategory = derivedTimesPlurals.value1(pluralCategory); + caseVariant = derivedTimesCases.value1(caseVariant); + } else { + singlePluralCategory = derivedTimesPlurals.value1(pluralCategory); + singleCaseVariant = derivedTimesCases.value1(caseVariant); + } + + // 4.2. Get the gender of that single_unit + MeasureUnit simpleUnit; + if (!MeasureUnit::findBySubType(singleUnit->getSimpleUnitID(), &simpleUnit)) { + // Ideally all simple units should be known, but they're not: + // 100-kilometer is internally treated as a simple unit, but it is + // not a built-in unit and does not have formatting data in CLDR 39. + // + // TODO(icu-units#28): test (desirable) invariants in unit tests. + status = U_UNSUPPORTED_ERROR; + return; + } + const char *gender = getGenderString(getGenderForBuiltin(loc, simpleUnit, status), status); + + // 4.3. If singleUnit starts with a dimensionality_prefix, such as 'square-' + U_ASSERT(singleUnit->dimensionality > 0); + int32_t dimensionality = singleUnit->dimensionality; + UnicodeString dimensionalityPrefixPatterns[ARRAY_LENGTH]; + if (dimensionality != 1) { + // 4.3.1. set dimensionalityPrefixPattern to be + // getValue(that dimensionality_prefix, locale, length, singlePluralCategory, singleCaseVariant, gender), + // such as "{0} kwadratowym" + CharString dimensionalityKey("compound/power", status); + dimensionalityKey.appendNumber(dimensionality, status); + getInflectedMeasureData(dimensionalityKey.toStringPiece(), loc, width, gender, + singleCaseVariant, dimensionalityPrefixPatterns, status); + if (U_FAILURE(status)) { + // At the time of writing, only pow2 and pow3 are supported. + // Attempting to format other powers results in a + // U_RESOURCE_TYPE_MISMATCH. We convert the error if we + // understand it: + if (status == U_RESOURCE_TYPE_MISMATCH && dimensionality > 3) { + status = U_UNSUPPORTED_ERROR; + } + return; + } + + // TODO(icu-units#139): + // 4.3.2. set singlePluralCategory to be power0(singlePluralCategory) + + // 4.3.3. set singleCaseVariant to be power0(singleCaseVariant) + singleCaseVariant = derivedPowerCases.value0(singleCaseVariant); + // 4.3.4. remove the dimensionality_prefix from singleUnit + singleUnit->dimensionality = 1; + } + + // 4.4. if singleUnit starts with an si_prefix, such as 'centi' + UMeasurePrefix prefix = singleUnit->unitPrefix; + UnicodeString prefixPattern; + if (prefix != UMEASURE_PREFIX_ONE) { + // 4.4.1. set siPrefixPattern to be getValue(that si_prefix, locale, + // length), such as "centy{0}" + CharString prefixKey; + // prefixKey looks like "1024p3" or "10p-2": + prefixKey.appendNumber(umeas_getPrefixBase(prefix), status); + prefixKey.append('p', status); + prefixKey.appendNumber(umeas_getPrefixPower(prefix), status); + // Contains a pattern like "centy{0}". + prefixPattern = getCompoundValue(prefixKey.toStringPiece(), loc, width, status); + + // 4.4.2. set singlePluralCategory to be prefix0(singlePluralCategory) + // + // TODO(icu-units#139): that refers to these rules: + // <deriveComponent feature="plural" structure="prefix" value0="one" value1="compound"/> + // though I'm not sure what other value they might end up having. + // + // 4.4.3. set singleCaseVariant to be prefix0(singleCaseVariant) + // + // TODO(icu-units#139): that refers to: + // <deriveComponent feature="case" structure="prefix" value0="nominative" + // value1="compound"/> but the prefix (value0) doesn't have case, the rest simply + // propagates. + + // 4.4.4. remove the si_prefix from singleUnit + singleUnit->unitPrefix = UMEASURE_PREFIX_ONE; + } + + // 4.5. Set corePattern to be the getValue(singleUnit, locale, length, + // singlePluralCategory, singleCaseVariant), such as "{0} metrem" + UnicodeString singleUnitArray[ARRAY_LENGTH]; + // At this point we are left with a Simple Unit: + U_ASSERT(uprv_strcmp(singleUnit->build(status).getIdentifier(), singleUnit->getSimpleUnitID()) == + 0); + getMeasureData(loc, singleUnit->build(status), width, singleCaseVariant, singleUnitArray, + status); + if (U_FAILURE(status)) { + // Shouldn't happen if we have data for all single units + return; + } + + // Calculate output gender + if (!singleUnitArray[GENDER_INDEX].isBogus()) { + U_ASSERT(!singleUnitArray[GENDER_INDEX].isEmpty()); + UnicodeString uVal; + + if (prefix != UMEASURE_PREFIX_ONE) { + singleUnitArray[GENDER_INDEX] = + getDerivedGender(loc, "prefix", singleUnitArray, nullptr, status); + } + + if (dimensionality != 1) { + singleUnitArray[GENDER_INDEX] = + getDerivedGender(loc, "power", singleUnitArray, nullptr, status); + } + + UnicodeString timesGenderRule = getDeriveCompoundRule(loc, "gender", "times", status); + if (timesGenderRule.length() == 1) { + switch (timesGenderRule[0]) { + case u'0': + if (singleUnitIndex == 0) { + U_ASSERT(outArray[GENDER_INDEX].isBogus()); + outArray[GENDER_INDEX] = singleUnitArray[GENDER_INDEX]; + } + break; + case u'1': + if (singleUnitIndex == productUnit.singleUnits.length() - 1) { + U_ASSERT(outArray[GENDER_INDEX].isBogus()); + outArray[GENDER_INDEX] = singleUnitArray[GENDER_INDEX]; + } + } + } else { + if (outArray[GENDER_INDEX].isBogus()) { + outArray[GENDER_INDEX] = timesGenderRule; + } + } + } + + // Calculate resulting patterns for each plural form + for (int32_t pluralIndex = 0; pluralIndex < StandardPlural::Form::COUNT; pluralIndex++) { + StandardPlural::Form plural = static_cast<StandardPlural::Form>(pluralIndex); + + // singleUnitArray[pluralIndex] looks something like "{0} Meter" + if (outArray[pluralIndex].isBogus()) { + if (singleUnitArray[pluralIndex].isBogus()) { + // Let the usual plural fallback mechanism take care of this + // plural form + continue; + } else { + // Since our singleUnit can have a plural form that outArray + // doesn't yet have (relying on fallback to OTHER), we start + // by grabbing it with the normal plural fallback mechanism + outArray[pluralIndex] = getWithPlural(outArray, plural, status); + if (U_FAILURE(status)) { + return; + } + } + } + + if (uprv_strcmp(singlePluralCategory, "") != 0) { + plural = static_cast<StandardPlural::Form>(getIndex(singlePluralCategory, status)); + } + + // 4.6. Extract(corePattern, coreUnit, placeholder, placeholderPosition) from that pattern. + UnicodeString coreUnit; + PlaceholderPosition placeholderPosition; + UChar joinerChar; + extractCorePattern(getWithPlural(singleUnitArray, plural, status), coreUnit, + placeholderPosition, joinerChar); + + // 4.7 If the position is middle, then fail + if (placeholderPosition == PH_MIDDLE) { + status = U_UNSUPPORTED_ERROR; + return; + } + + // 4.8. If globalPlaceholder is empty + if (globalPlaceholder[pluralIndex] == PH_EMPTY) { + globalPlaceholder[pluralIndex] = placeholderPosition; + globalJoinerChar = joinerChar; + } else { + // Expect all units involved to have the same placeholder position + U_ASSERT(globalPlaceholder[pluralIndex] == placeholderPosition); + // TODO(icu-units#28): Do we want to add a unit test that checks + // for consistent joiner chars? Probably not, given how + // inconsistent they are. File a CLDR ticket with examples? + } + // Now coreUnit would be just "Meter" + + // 4.9. If siPrefixPattern is not empty + if (prefix != UMEASURE_PREFIX_ONE) { + SimpleFormatter prefixCompiled(prefixPattern, 1, 1, status); + if (U_FAILURE(status)) { + return; + } + + // 4.9.1. Set coreUnit to be the combineLowercasing(locale, length, siPrefixPattern, + // coreUnit) + UnicodeString tmp; + // combineLowercasing(locale, length, prefixPattern, coreUnit) + // + // TODO(icu-units#28): run this only if prefixPattern does not + // contain space characters - do languages "as", "bn", "hi", + // "kk", etc have concepts of upper and lower case?: + if (width == UNUM_UNIT_WIDTH_FULL_NAME) { + coreUnit.toLower(loc); + } + prefixCompiled.format(coreUnit, tmp, status); + if (U_FAILURE(status)) { + return; + } + coreUnit = tmp; + } + + // 4.10. If dimensionalityPrefixPattern is not empty + if (dimensionality != 1) { + SimpleFormatter dimensionalityCompiled( + getWithPlural(dimensionalityPrefixPatterns, plural, status), 1, 1, status); + if (U_FAILURE(status)) { + return; + } + + // 4.10.1. Set coreUnit to be the combineLowercasing(locale, length, + // dimensionalityPrefixPattern, coreUnit) + UnicodeString tmp; + // combineLowercasing(locale, length, prefixPattern, coreUnit) + // + // TODO(icu-units#28): run this only if prefixPattern does not + // contain space characters - do languages "as", "bn", "hi", + // "kk", etc have concepts of upper and lower case?: + if (width == UNUM_UNIT_WIDTH_FULL_NAME) { + coreUnit.toLower(loc); + } + dimensionalityCompiled.format(coreUnit, tmp, status); + if (U_FAILURE(status)) { + return; + } + coreUnit = tmp; + } + + if (outArray[pluralIndex].length() == 0) { + // 4.11. If the result is empty, set result to be coreUnit + outArray[pluralIndex] = coreUnit; + } else { + // 4.12. Otherwise set result to be format(timesPattern, result, coreUnit) + UnicodeString tmp; + timesPatternFormatter.format(outArray[pluralIndex], coreUnit, tmp, status); + outArray[pluralIndex] = tmp; + } + } + } + for (int32_t pluralIndex = 0; pluralIndex < StandardPlural::Form::COUNT; pluralIndex++) { + if (globalPlaceholder[pluralIndex] == PH_BEGINNING) { + UnicodeString tmp; + tmp.append(u"{0}", 3); + if (globalJoinerChar != 0) { + tmp.append(globalJoinerChar); + } + tmp.append(outArray[pluralIndex]); + outArray[pluralIndex] = tmp; + } else if (globalPlaceholder[pluralIndex] == PH_END) { + if (globalJoinerChar != 0) { + outArray[pluralIndex].append(globalJoinerChar); + } + outArray[pluralIndex].append(u"{0}", 3); + } + } } UnicodeString LongNameHandler::getUnitDisplayName( @@ -267,7 +1433,7 @@ UnicodeString LongNameHandler::getUnitDisplayName( return ICU_Utility::makeBogusString(); } UnicodeString simpleFormats[ARRAY_LENGTH]; - getMeasureData(loc, unit, width, simpleFormats, status); + getMeasureData(loc, unit, width, "", simpleFormats, status); return simpleFormats[DNAM_INDEX]; } @@ -281,7 +1447,7 @@ UnicodeString LongNameHandler::getUnitPattern( return ICU_Utility::makeBogusString(); } UnicodeString simpleFormats[ARRAY_LENGTH]; - getMeasureData(loc, unit, width, simpleFormats, status); + getMeasureData(loc, unit, width, "", simpleFormats, status); // The above already handles fallback from other widths to short if (U_FAILURE(status)) { return ICU_Utility::makeBogusString(); @@ -304,6 +1470,7 @@ LongNameHandler* LongNameHandler::forCurrencyLongNames(const Locale &loc, const getCurrencyLongNameData(loc, currency, simpleFormats, status); if (U_FAILURE(status)) { return nullptr; } result->simpleFormatsToModifiers(simpleFormats, {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD}, status); + // TODO(icu-units#28): currency gender? return result; } @@ -328,8 +1495,12 @@ void LongNameHandler::multiSimpleFormatsToModifiers(const UnicodeString *leadFor UnicodeString leadFormat = getWithPlural(leadFormats, plural, status); if (U_FAILURE(status)) { return; } UnicodeString compoundFormat; - trailCompiled.format(leadFormat, compoundFormat, status); - if (U_FAILURE(status)) { return; } + if (leadFormat.length() == 0) { + compoundFormat = trailFormat; + } else { + trailCompiled.format(leadFormat, compoundFormat, status); + if (U_FAILURE(status)) { return; } + } SimpleFormatter compoundCompiled(compoundFormat, 0, 1, status); if (U_FAILURE(status)) { return; } fModifiers[i] = SimpleModifier(compoundCompiled, field, false, {this, SIGNUM_POS_ZERO, plural}); @@ -338,13 +1509,238 @@ void LongNameHandler::multiSimpleFormatsToModifiers(const UnicodeString *leadFor void LongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps µs, UErrorCode &status) const { - parent->processQuantity(quantity, micros, status); + if (parent != NULL) { + parent->processQuantity(quantity, micros, status); + } StandardPlural::Form pluralForm = utils::getPluralSafe(micros.rounder, rules, quantity, status); micros.modOuter = &fModifiers[pluralForm]; + micros.gender = gender; } const Modifier* LongNameHandler::getModifier(Signum /*signum*/, StandardPlural::Form plural) const { return &fModifiers[plural]; } +void MixedUnitLongNameHandler::forMeasureUnit(const Locale &loc, + const MeasureUnit &mixedUnit, + const UNumberUnitWidth &width, + const char *unitDisplayCase, + const PluralRules *rules, + const MicroPropsGenerator *parent, + MixedUnitLongNameHandler *fillIn, + UErrorCode &status) { + U_ASSERT(mixedUnit.getComplexity(status) == UMEASURE_UNIT_MIXED); + U_ASSERT(fillIn != nullptr); + if (U_FAILURE(status)) { + return; + } + + MeasureUnitImpl temp; + const MeasureUnitImpl &impl = MeasureUnitImpl::forMeasureUnit(mixedUnit, temp, status); + // Defensive, for production code: + if (impl.complexity != UMEASURE_UNIT_MIXED) { + // Should be using the normal LongNameHandler + status = U_UNSUPPORTED_ERROR; + return; + } + + fillIn->fMixedUnitCount = impl.singleUnits.length(); + fillIn->fMixedUnitData.adoptInstead(new UnicodeString[fillIn->fMixedUnitCount * ARRAY_LENGTH]); + for (int32_t i = 0; i < fillIn->fMixedUnitCount; i++) { + // Grab data for each of the components. + UnicodeString *unitData = &fillIn->fMixedUnitData[i * ARRAY_LENGTH]; + // TODO(CLDR-14502): check from the CLDR-14502 ticket whether this + // propagation of unitDisplayCase is correct: + getMeasureData(loc, impl.singleUnits[i]->build(status), width, unitDisplayCase, unitData, + status); + // TODO(ICU-21494): if we add support for gender for mixed units, we may + // need maybeCalculateGender() here. + } + + // TODO(icu-units#120): Make sure ICU doesn't output zero-valued + // high-magnitude fields + // * for mixed units count N, produce N listFormatters, one for each subset + // that might be formatted. + UListFormatterWidth listWidth = ULISTFMT_WIDTH_SHORT; + if (width == UNUM_UNIT_WIDTH_NARROW) { + listWidth = ULISTFMT_WIDTH_NARROW; + } else if (width == UNUM_UNIT_WIDTH_FULL_NAME) { + // This might be the same as SHORT in most languages: + listWidth = ULISTFMT_WIDTH_WIDE; + } + fillIn->fListFormatter.adoptInsteadAndCheckErrorCode( + ListFormatter::createInstance(loc, ULISTFMT_TYPE_UNITS, listWidth, status), status); + // TODO(ICU-21494): grab gender of each unit, calculate the gender + // associated with this list formatter, save it for later. + fillIn->rules = rules; + fillIn->parent = parent; + + // We need a localised NumberFormatter for the numbers of the bigger units + // (providing Arabic numerals, for example). + fillIn->fNumberFormatter = NumberFormatter::withLocale(loc); +} + +void MixedUnitLongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps µs, + UErrorCode &status) const { + U_ASSERT(fMixedUnitCount > 1); + if (parent != nullptr) { + parent->processQuantity(quantity, micros, status); + } + micros.modOuter = getMixedUnitModifier(quantity, micros, status); +} + +const Modifier *MixedUnitLongNameHandler::getMixedUnitModifier(DecimalQuantity &quantity, + MicroProps µs, + UErrorCode &status) const { + if (micros.mixedMeasuresCount == 0) { + U_ASSERT(micros.mixedMeasuresCount > 0); // Mixed unit: we must have more than one unit value + status = U_UNSUPPORTED_ERROR; + return µs.helpers.emptyWeakModifier; + } + + // Algorithm: + // + // For the mixed-units measurement of: "3 yard, 1 foot, 2.6 inch", we should + // find "3 yard" and "1 foot" in micros.mixedMeasures. + // + // Obtain long-names with plural forms corresponding to measure values: + // * {0} yards, {0} foot, {0} inches + // + // Format the integer values appropriately and modify with the format + // strings: + // - 3 yards, 1 foot + // + // Use ListFormatter to combine, with one placeholder: + // - 3 yards, 1 foot and {0} inches + // + // Return a SimpleModifier for this pattern, letting the rest of the + // pipeline take care of the remaining inches. + + LocalArray<UnicodeString> outputMeasuresList(new UnicodeString[fMixedUnitCount], status); + if (U_FAILURE(status)) { + return µs.helpers.emptyWeakModifier; + } + + StandardPlural::Form quantityPlural = StandardPlural::Form::OTHER; + for (int32_t i = 0; i < micros.mixedMeasuresCount; i++) { + DecimalQuantity fdec; + + // If numbers are negative, only the first number needs to have its + // negative sign formatted. + int64_t number = i > 0 ? std::abs(micros.mixedMeasures[i]) : micros.mixedMeasures[i]; + + if (micros.indexOfQuantity == i) { // Insert placeholder for `quantity` + // If quantity is not the first value and quantity is negative + if (micros.indexOfQuantity > 0 && quantity.isNegative()) { + quantity.negate(); + } + + StandardPlural::Form quantityPlural = + utils::getPluralSafe(micros.rounder, rules, quantity, status); + UnicodeString quantityFormatWithPlural = + getWithPlural(&fMixedUnitData[i * ARRAY_LENGTH], quantityPlural, status); + SimpleFormatter quantityFormatter(quantityFormatWithPlural, 0, 1, status); + quantityFormatter.format(UnicodeString(u"{0}"), outputMeasuresList[i], status); + } else { + fdec.setToLong(number); + StandardPlural::Form pluralForm = utils::getStandardPlural(rules, fdec); + UnicodeString simpleFormat = + getWithPlural(&fMixedUnitData[i * ARRAY_LENGTH], pluralForm, status); + SimpleFormatter compiledFormatter(simpleFormat, 0, 1, status); + UnicodeString num; + auto appendable = UnicodeStringAppendable(num); + + fNumberFormatter.formatDecimalQuantity(fdec, status).appendTo(appendable, status); + compiledFormatter.format(num, outputMeasuresList[i], status); + } + } + + // TODO(ICU-21494): implement gender for lists of mixed units. Presumably we + // can set micros.gender to the gender associated with the list formatter in + // use below (once we have correct support for that). And then document this + // appropriately? "getMixedUnitModifier" doesn't sound like it would do + // something like this. + + // Combine list into a "premixed" pattern + UnicodeString premixedFormatPattern; + fListFormatter->format(outputMeasuresList.getAlias(), fMixedUnitCount, premixedFormatPattern, + status); + SimpleFormatter premixedCompiled(premixedFormatPattern, 0, 1, status); + if (U_FAILURE(status)) { + return µs.helpers.emptyWeakModifier; + } + + micros.helpers.mixedUnitModifier = + SimpleModifier(premixedCompiled, kUndefinedField, false, {this, SIGNUM_POS_ZERO, quantityPlural}); + return µs.helpers.mixedUnitModifier; +} + +const Modifier *MixedUnitLongNameHandler::getModifier(Signum /*signum*/, + StandardPlural::Form /*plural*/) const { + // TODO(icu-units#28): investigate this method when investigating where + // ModifierStore::getModifier() gets used. To be sure it remains + // unreachable: + UPRV_UNREACHABLE_EXIT; + return nullptr; +} + +LongNameMultiplexer *LongNameMultiplexer::forMeasureUnits(const Locale &loc, + const MaybeStackVector<MeasureUnit> &units, + const UNumberUnitWidth &width, + const char *unitDisplayCase, + const PluralRules *rules, + const MicroPropsGenerator *parent, + UErrorCode &status) { + LocalPointer<LongNameMultiplexer> result(new LongNameMultiplexer(parent), status); + if (U_FAILURE(status)) { + return nullptr; + } + U_ASSERT(units.length() > 0); + if (result->fHandlers.resize(units.length()) == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + result->fMeasureUnits.adoptInstead(new MeasureUnit[units.length()]); + for (int32_t i = 0, length = units.length(); i < length; i++) { + const MeasureUnit &unit = *units[i]; + result->fMeasureUnits[i] = unit; + if (unit.getComplexity(status) == UMEASURE_UNIT_MIXED) { + MixedUnitLongNameHandler *mlnh = result->fMixedUnitHandlers.createAndCheckErrorCode(status); + MixedUnitLongNameHandler::forMeasureUnit(loc, unit, width, unitDisplayCase, rules, NULL, + mlnh, status); + result->fHandlers[i] = mlnh; + } else { + LongNameHandler *lnh = result->fLongNameHandlers.createAndCheckErrorCode(status); + LongNameHandler::forMeasureUnit(loc, unit, width, unitDisplayCase, rules, NULL, lnh, status); + result->fHandlers[i] = lnh; + } + if (U_FAILURE(status)) { + return nullptr; + } + } + return result.orphan(); +} + +void LongNameMultiplexer::processQuantity(DecimalQuantity &quantity, MicroProps µs, + UErrorCode &status) const { + // We call parent->processQuantity() from the Multiplexer, instead of + // letting LongNameHandler handle it: we don't know which LongNameHandler to + // call until we've called the parent! + fParent->processQuantity(quantity, micros, status); + + // Call the correct LongNameHandler based on outputUnit + for (int i = 0; i < fHandlers.getCapacity(); i++) { + if (fMeasureUnits[i] == micros.outputUnit) { + fHandlers[i]->processQuantity(quantity, micros, status); + return; + } + } + if (U_FAILURE(status)) { + return; + } + // We shouldn't receive any outputUnit for which we haven't already got a + // LongNameHandler: + status = U_INTERNAL_PROGRAM_ERROR; +} + #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/contrib/libs/icu/i18n/number_longnames.h b/contrib/libs/icu/i18n/number_longnames.h index a19425aa26..06949989d7 100644 --- a/contrib/libs/icu/i18n/number_longnames.h +++ b/contrib/libs/icu/i18n/number_longnames.h @@ -7,6 +7,8 @@ #ifndef __NUMBER_LONGNAMES_H__ #define __NUMBER_LONGNAMES_H__ +#include "cmemory.h" +#include "unicode/listformatter.h" #include "unicode/uversion.h" #include "number_utils.h" #include "number_modifiers.h" @@ -14,6 +16,8 @@ U_NAMESPACE_BEGIN namespace number { namespace impl { +// LongNameHandler takes care of formatting currency and measurement unit names, +// as well as populating the gender of measure units. class LongNameHandler : public MicroPropsGenerator, public ModifierStore, public UMemory { public: static UnicodeString getUnitDisplayName( @@ -22,6 +26,8 @@ class LongNameHandler : public MicroPropsGenerator, public ModifierStore, public UNumberUnitWidth width, UErrorCode& status); + // This function does not support inflections or other newer NumberFormatter + // features: it exists to support the older not-recommended MeasureFormat. static UnicodeString getUnitPattern( const Locale& loc, const MeasureUnit& unit, @@ -33,34 +39,230 @@ class LongNameHandler : public MicroPropsGenerator, public ModifierStore, public forCurrencyLongNames(const Locale &loc, const CurrencyUnit ¤cy, const PluralRules *rules, const MicroPropsGenerator *parent, UErrorCode &status); - static LongNameHandler* - forMeasureUnit(const Locale &loc, const MeasureUnit &unit, const MeasureUnit &perUnit, - const UNumberUnitWidth &width, const PluralRules *rules, - const MicroPropsGenerator *parent, UErrorCode &status); + /** + * Construct a localized LongNameHandler for the specified MeasureUnit. + * + * Mixed units are not supported, use MixedUnitLongNameHandler::forMeasureUnit. + * + * This function uses a fillIn instead of returning a pointer, because we + * want to fill in instances in a MemoryPool (which cannot adopt pointers it + * didn't create itself). + * + * @param loc The desired locale. + * @param unitRef The measure unit to construct a LongNameHandler for. + * @param width Specifies the desired unit rendering. + * @param unitDisplayCase Specifies the desired grammatical case. If the + * specified case is not found, we fall back to nominative or no-case. + * @param rules Does not take ownership. + * @param parent Does not take ownership. + * @param fillIn Required. + */ + static void forMeasureUnit(const Locale &loc, + const MeasureUnit &unitRef, + const UNumberUnitWidth &width, + const char *unitDisplayCase, + const PluralRules *rules, + const MicroPropsGenerator *parent, + LongNameHandler *fillIn, + UErrorCode &status); + /** + * Selects the plural-appropriate Modifier from the set of fModifiers based + * on the plural form. + */ void processQuantity(DecimalQuantity &quantity, MicroProps µs, UErrorCode &status) const U_OVERRIDE; const Modifier* getModifier(Signum signum, StandardPlural::Form plural) const U_OVERRIDE; private: + // A set of pre-computed modifiers, one for each plural form. SimpleModifier fModifiers[StandardPlural::Form::COUNT]; + // Not owned const PluralRules *rules; + // Not owned const MicroPropsGenerator *parent; + // Grammatical gender of the formatted result. Not owned: must point at + // static or global strings. + const char *gender = ""; LongNameHandler(const PluralRules *rules, const MicroPropsGenerator *parent) - : rules(rules), parent(parent) {} + : rules(rules), parent(parent) { + } - static LongNameHandler* - forCompoundUnit(const Locale &loc, const MeasureUnit &unit, const MeasureUnit &perUnit, - const UNumberUnitWidth &width, const PluralRules *rules, - const MicroPropsGenerator *parent, UErrorCode &status); + LongNameHandler() : rules(nullptr), parent(nullptr) { + } + + // Enables MemoryPool<LongNameHandler>::emplaceBack(): requires access to + // the private constructors. + friend class MemoryPool<LongNameHandler>; + + // Allow macrosToMicroGenerator to call the private default constructor. + friend class NumberFormatterImpl; + + // Fills in LongNameHandler fields for formatting units identified `unit`. + static void forArbitraryUnit(const Locale &loc, + const MeasureUnit &unit, + const UNumberUnitWidth &width, + const char *unitDisplayCase, + LongNameHandler *fillIn, + UErrorCode &status); + // Roughly corresponds to patternTimes(...) in the spec: + // https://unicode.org/reports/tr35/tr35-general.html#compound-units + // + // productUnit is an rvalue reference to indicate this function consumes it, + // leaving it in a not-useful / undefined state. + static void processPatternTimes(MeasureUnitImpl &&productUnit, + Locale loc, + const UNumberUnitWidth &width, + const char *caseVariant, + UnicodeString *outArray, + UErrorCode &status); + + // Sets fModifiers to use the patterns from `simpleFormats`. void simpleFormatsToModifiers(const UnicodeString *simpleFormats, Field field, UErrorCode &status); + + // Sets fModifiers to a combination of `leadFormats` (one per plural form) + // and `trailFormat` appended to each. + // + // With a leadFormat of "{0}m" and a trailFormat of "{0}/s", it produces a + // pattern of "{0}m/s" by inserting each leadFormat pattern into trailFormat. void multiSimpleFormatsToModifiers(const UnicodeString *leadFormats, UnicodeString trailFormat, Field field, UErrorCode &status); }; +// Similar to LongNameHandler, but only for MIXED units. +class MixedUnitLongNameHandler : public MicroPropsGenerator, public ModifierStore, public UMemory { + public: + /** + * Construct a localized MixedUnitLongNameHandler for the specified + * MeasureUnit. It must be a MIXED unit. + * + * This function uses a fillIn instead of returning a pointer, because we + * want to fill in instances in a MemoryPool (which cannot adopt pointers it + * didn't create itself). + * + * @param loc The desired locale. + * @param mixedUnit The mixed measure unit to construct a + * MixedUnitLongNameHandler for. + * @param width Specifies the desired unit rendering. + * @param unitDisplayCase Specifies the desired grammatical case. If the + * specified case is not found, we fall back to nominative or no-case. + * @param rules Does not take ownership. + * @param parent Does not take ownership. + * @param fillIn Required. + */ + static void forMeasureUnit(const Locale &loc, + const MeasureUnit &mixedUnit, + const UNumberUnitWidth &width, + const char *unitDisplayCase, + const PluralRules *rules, + const MicroPropsGenerator *parent, + MixedUnitLongNameHandler *fillIn, + UErrorCode &status); + + /** + * Produces a plural-appropriate Modifier for a mixed unit: `quantity` is + * taken as the final smallest unit, while the larger unit values must be + * provided via `micros.mixedMeasures`. + */ + void processQuantity(DecimalQuantity &quantity, MicroProps µs, + UErrorCode &status) const U_OVERRIDE; + + // Required for ModifierStore. And ModifierStore is required by + // SimpleModifier constructor's last parameter. We assert his will never get + // called though. + const Modifier *getModifier(Signum signum, StandardPlural::Form plural) const U_OVERRIDE; + + private: + // Not owned + const PluralRules *rules; + + // Not owned + const MicroPropsGenerator *parent; + + // Total number of units in the MeasureUnit this handler was configured for: + // for "foot-and-inch", this will be 2. + int32_t fMixedUnitCount = 1; + + // Stores unit data for each of the individual units. For each unit, it + // stores ARRAY_LENGTH strings, as returned by getMeasureData. (Each unit + // with index `i` has ARRAY_LENGTH strings starting at index + // `i*ARRAY_LENGTH` in this array.) + LocalArray<UnicodeString> fMixedUnitData; + + // Formats the larger units of Mixed Unit measurements. + LocalizedNumberFormatter fNumberFormatter; + + // Joins mixed units together. + LocalPointer<ListFormatter> fListFormatter; + + MixedUnitLongNameHandler(const PluralRules *rules, const MicroPropsGenerator *parent) + : rules(rules), parent(parent) { + } + + MixedUnitLongNameHandler() : rules(nullptr), parent(nullptr) { + } + + // Allow macrosToMicroGenerator to call the private default constructor. + friend class NumberFormatterImpl; + + // Enables MemoryPool<LongNameHandler>::emplaceBack(): requires access to + // the private constructors. + friend class MemoryPool<MixedUnitLongNameHandler>; + + // For a mixed unit, returns a Modifier that takes only one parameter: the + // smallest and final unit of the set. The bigger units' values and labels + // get baked into this Modifier, together with the unit label of the final + // unit. + const Modifier *getMixedUnitModifier(DecimalQuantity &quantity, MicroProps µs, + UErrorCode &status) const; +}; + +/** + * A MicroPropsGenerator that multiplexes between different LongNameHandlers, + * depending on the outputUnit. + * + * See processQuantity() for the input requirements. + */ +class LongNameMultiplexer : public MicroPropsGenerator, public UMemory { + public: + // Produces a multiplexer for LongNameHandlers, one for each unit in + // `units`. An individual unit might be a mixed unit. + static LongNameMultiplexer *forMeasureUnits(const Locale &loc, + const MaybeStackVector<MeasureUnit> &units, + const UNumberUnitWidth &width, + const char *unitDisplayCase, + const PluralRules *rules, + const MicroPropsGenerator *parent, + UErrorCode &status); + + // The output unit must be provided via `micros.outputUnit`, it must match + // one of the units provided to the factory function. + void processQuantity(DecimalQuantity &quantity, MicroProps µs, + UErrorCode &status) const U_OVERRIDE; + + private: + /** + * Because we only know which LongNameHandler we wish to call after calling + * earlier MicroPropsGenerators in the chain, LongNameMultiplexer keeps the + * parent link, while the LongNameHandlers are given no parents. + */ + MemoryPool<LongNameHandler> fLongNameHandlers; + MemoryPool<MixedUnitLongNameHandler> fMixedUnitHandlers; + // Unowned pointers to instances owned by MaybeStackVectors. + MaybeStackArray<MicroPropsGenerator *, 8> fHandlers; + // Each MeasureUnit corresponds to the same-index MicroPropsGenerator + // pointed to in fHandlers. + LocalArray<MeasureUnit> fMeasureUnits; + + const MicroPropsGenerator *fParent; + + LongNameMultiplexer(const MicroPropsGenerator *parent) : fParent(parent) { + } +}; + } // namespace impl } // namespace number U_NAMESPACE_END diff --git a/contrib/libs/icu/i18n/number_mapper.cpp b/contrib/libs/icu/i18n/number_mapper.cpp index ec617438c9..2d4d47a094 100644 --- a/contrib/libs/icu/i18n/number_mapper.cpp +++ b/contrib/libs/icu/i18n/number_mapper.cpp @@ -92,6 +92,8 @@ MacroProps NumberPropertyMapper::oldToNew(const DecimalFormatProperties& propert int32_t minSig = properties.minimumSignificantDigits; int32_t maxSig = properties.maximumSignificantDigits; double roundingIncrement = properties.roundingIncrement; + // Not assigning directly to macros.roundingMode here: we change + // roundingMode if and when we also change macros.precision. RoundingMode roundingMode = properties.roundingMode.getOrDefault(UNUM_ROUND_HALFEVEN); bool explicitMinMaxFrac = minFrac != -1 || maxFrac != -1; bool explicitMinMaxSig = minSig != -1 || maxSig != -1; @@ -145,7 +147,7 @@ MacroProps NumberPropertyMapper::oldToNew(const DecimalFormatProperties& propert precision = Precision::constructCurrency(currencyUsage); } if (!precision.isBogus()) { - precision.fRoundingMode = roundingMode; + macros.roundingMode = roundingMode; macros.precision = precision; } @@ -239,7 +241,7 @@ MacroProps NumberPropertyMapper::oldToNew(const DecimalFormatProperties& propert // TODO: Reset maxSig_ = 1 + minFrac_ to follow the spec. macros.precision = Precision::constructSignificant(minSig_, maxSig_); } - macros.precision.fRoundingMode = roundingMode; + macros.roundingMode = roundingMode; } } @@ -379,7 +381,10 @@ void PropertiesAffixPatternProvider::setTo(const DecimalFormatProperties& proper AffixUtils::hasCurrencySymbols(ppp, status) || AffixUtils::hasCurrencySymbols(psp, status) || AffixUtils::hasCurrencySymbols(npp, status) || - AffixUtils::hasCurrencySymbols(nsp, status)); + AffixUtils::hasCurrencySymbols(nsp, status) || + properties.currencyAsDecimal); + + fCurrencyAsDecimal = properties.currencyAsDecimal; } char16_t PropertiesAffixPatternProvider::charAt(int flags, int i) const { @@ -444,6 +449,10 @@ bool PropertiesAffixPatternProvider::hasBody() const { return true; } +bool PropertiesAffixPatternProvider::currencyAsDecimal() const { + return fCurrencyAsDecimal; +} + void CurrencyPluralInfoAffixProvider::setTo(const CurrencyPluralInfo& cpi, const DecimalFormatProperties& properties, @@ -504,5 +513,9 @@ bool CurrencyPluralInfoAffixProvider::hasBody() const { return affixesByPlural[StandardPlural::OTHER].hasBody(); } +bool CurrencyPluralInfoAffixProvider::currencyAsDecimal() const { + return affixesByPlural[StandardPlural::OTHER].currencyAsDecimal(); +} + #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/contrib/libs/icu/i18n/number_mapper.h b/contrib/libs/icu/i18n/number_mapper.h index d18b8b3c43..8879b7a94e 100644 --- a/contrib/libs/icu/i18n/number_mapper.h +++ b/contrib/libs/icu/i18n/number_mapper.h @@ -56,12 +56,15 @@ class PropertiesAffixPatternProvider : public AffixPatternProvider, public UMemo bool hasBody() const U_OVERRIDE; + bool currencyAsDecimal() const U_OVERRIDE; + private: UnicodeString posPrefix; UnicodeString posSuffix; UnicodeString negPrefix; UnicodeString negSuffix; bool isCurrencyPattern; + bool fCurrencyAsDecimal; PropertiesAffixPatternProvider() = default; // puts instance in valid but undefined state @@ -107,6 +110,8 @@ class CurrencyPluralInfoAffixProvider : public AffixPatternProvider, public UMem bool hasBody() const U_OVERRIDE; + bool currencyAsDecimal() const U_OVERRIDE; + private: PropertiesAffixPatternProvider affixesByPlural[StandardPlural::COUNT]; @@ -136,6 +141,16 @@ class AutoAffixPatternProvider { } } + inline void setTo(const AffixPatternProvider* provider, UErrorCode& status) { + if (auto ptr = dynamic_cast<const PropertiesAffixPatternProvider*>(provider)) { + propertiesAPP = *ptr; + } else if (auto ptr = dynamic_cast<const CurrencyPluralInfoAffixProvider*>(provider)) { + currencyPluralInfoAPP = *ptr; + } else { + status = U_INTERNAL_PROGRAM_ERROR; + } + } + inline const AffixPatternProvider& get() const { if (!currencyPluralInfoAPP.isBogus()) { return currencyPluralInfoAPP; @@ -153,9 +168,9 @@ class AutoAffixPatternProvider { /** * A struct for ownership of a few objects needed for formatting. */ -struct DecimalFormatWarehouse { +struct DecimalFormatWarehouse : public UMemory { AutoAffixPatternProvider affixProvider; - + LocalPointer<PluralRules> rules; }; diff --git a/contrib/libs/icu/i18n/number_microprops.h b/contrib/libs/icu/i18n/number_microprops.h index 56512f5e6f..c34e7c17e9 100644 --- a/contrib/libs/icu/i18n/number_microprops.h +++ b/contrib/libs/icu/i18n/number_microprops.h @@ -18,10 +18,60 @@ #include "number_roundingutils.h" #include "decNumber.h" #include "charstr.h" +#include "util.h" U_NAMESPACE_BEGIN namespace number { namespace impl { +/** + * A copyable container for the integer values of mixed unit measurements. + * + * If memory allocation fails during copying, no values are copied and status is + * set to U_MEMORY_ALLOCATION_ERROR. + */ +class IntMeasures : public MaybeStackArray<int64_t, 2> { + public: + /** + * Default constructor initializes with internal T[stackCapacity] buffer. + * + * Stack Capacity: most mixed units are expected to consist of two or three + * subunits, so one or two integer measures should be enough. + */ + IntMeasures() : MaybeStackArray<int64_t, 2>() {} + + /** + * Copy constructor. + * + * If memory allocation fails during copying, no values are copied and + * status is set to U_MEMORY_ALLOCATION_ERROR. + */ + IntMeasures(const IntMeasures &other) : MaybeStackArray<int64_t, 2>() { + this->operator=(other); + } + + // Assignment operator + IntMeasures &operator=(const IntMeasures &rhs) { + if (this == &rhs) { + return *this; + } + copyFrom(rhs, status); + return *this; + } + + /** Move constructor */ + IntMeasures(IntMeasures &&src) = default; + + /** Move assignment */ + IntMeasures &operator=(IntMeasures &&src) = default; + + UErrorCode status = U_ZERO_ERROR; +}; + +/** + * MicroProps is the first MicroPropsGenerator that should be should be called, + * producing an initialized MicroProps instance that will be passed on and + * modified throughout the rest of the chain of MicroPropsGenerator instances. + */ struct MicroProps : public MicroPropsGenerator { // NOTE: All of these fields are properly initialized in NumberFormatterImpl. @@ -34,21 +84,62 @@ struct MicroProps : public MicroPropsGenerator { bool useCurrency; char nsName[9]; + // Currency symbol to be used as the decimal separator + UnicodeString currencyAsDecimal = ICU_Utility::makeBogusString(); + + // No ownership: must point at a string which will outlive MicroProps + // instances, e.g. a string with static storage duration, or just a string + // that will never be deallocated or modified. + const char *gender; + // Note: This struct has no direct ownership of the following pointers. const DecimalFormatSymbols* symbols; + + // Pointers to Modifiers provided by the number formatting pipeline (when + // the value is known): + + // A Modifier provided by LongNameHandler, used for currency long names and + // units. If there is no LongNameHandler needed, this should be an + // EmptyModifier. (This is typically the third modifier applied.) const Modifier* modOuter; + // A Modifier for short currencies and compact notation. (This is typically + // the second modifier applied.) const Modifier* modMiddle = nullptr; + // A Modifier provided by ScientificHandler, used for scientific notation. + // This is typically the first modifier applied. const Modifier* modInner; // The following "helper" fields may optionally be used during the MicroPropsGenerator. // They live here to retain memory. struct { + // The ScientificModifier for which ScientificHandler is responsible. + // ScientificHandler::processQuantity() modifies this Modifier. ScientificModifier scientificModifier; + // EmptyModifier used for modOuter EmptyModifier emptyWeakModifier{false}; + // EmptyModifier used for modInner EmptyModifier emptyStrongModifier{true}; MultiplierFormatHandler multiplier; + // A Modifier used for Mixed Units. When formatting mixed units, + // LongNameHandler assigns this Modifier. + SimpleModifier mixedUnitModifier; } helpers; + // The MeasureUnit with which the output is represented. May also have + // UMEASURE_UNIT_MIXED complexity, in which case mixedMeasures comes into + // play. + MeasureUnit outputUnit; + + // Contains all the values of each unit in mixed units. For quantity (which is the floating value of + // the smallest unit in the mixed unit), the value stores in `quantity`. + // NOTE: the value of quantity in `mixedMeasures` will be left unset. + IntMeasures mixedMeasures; + + // Points to quantity position, -1 if the position is not set yet. + int32_t indexOfQuantity = -1; + + // Number of mixedMeasures that have been populated + int32_t mixedMeasuresCount = 0; MicroProps() = default; @@ -56,7 +147,23 @@ struct MicroProps : public MicroPropsGenerator { MicroProps& operator=(const MicroProps& other) = default; - void processQuantity(DecimalQuantity&, MicroProps& micros, UErrorCode& status) const U_OVERRIDE { + /** + * As MicroProps is the "base instance", this implementation of + * MicroPropsGenerator::processQuantity() just ensures that the output + * `micros` is correctly initialized. + * + * For the "safe" invocation of this function, micros must not be *this, + * such that a copy of the base instance is made. For the "unsafe" path, + * this function can be used only once, because the base MicroProps instance + * will be modified and thus not be available for re-use. + * + * @param quantity The quantity for consideration and optional mutation. + * @param micros The MicroProps instance to populate. If this parameter is + * not already `*this`, it will be overwritten with a copy of `*this`. + */ + void processQuantity(DecimalQuantity &quantity, MicroProps µs, + UErrorCode &status) const U_OVERRIDE { + (void) quantity; (void) status; if (this == µs) { // Unsafe path: no need to perform a copy. @@ -65,6 +172,7 @@ struct MicroProps : public MicroPropsGenerator { U_ASSERT(exhausted); } else { // Safe path: copy self into the output micros. + U_ASSERT(!exhausted); micros = *this; } } diff --git a/contrib/libs/icu/i18n/number_modifiers.cpp b/contrib/libs/icu/i18n/number_modifiers.cpp index 3becb7ba85..b74bfd06fc 100644 --- a/contrib/libs/icu/i18n/number_modifiers.cpp +++ b/contrib/libs/icu/i18n/number_modifiers.cpp @@ -25,13 +25,13 @@ const int32_t ARG_NUM_LIMIT = 0x100; icu::UInitOnce gDefaultCurrencySpacingInitOnce = U_INITONCE_INITIALIZER; UnicodeSet *UNISET_DIGIT = nullptr; -UnicodeSet *UNISET_NOTS = nullptr; +UnicodeSet *UNISET_NOTSZ = nullptr; UBool U_CALLCONV cleanupDefaultCurrencySpacing() { delete UNISET_DIGIT; UNISET_DIGIT = nullptr; - delete UNISET_NOTS; - UNISET_NOTS = nullptr; + delete UNISET_NOTSZ; + UNISET_NOTSZ = nullptr; gDefaultCurrencySpacingInitOnce.reset(); return TRUE; } @@ -39,13 +39,13 @@ UBool U_CALLCONV cleanupDefaultCurrencySpacing() { void U_CALLCONV initDefaultCurrencySpacing(UErrorCode &status) { ucln_i18n_registerCleanup(UCLN_I18N_CURRENCY_SPACING, cleanupDefaultCurrencySpacing); UNISET_DIGIT = new UnicodeSet(UnicodeString(u"[:digit:]"), status); - UNISET_NOTS = new UnicodeSet(UnicodeString(u"[:^S:]"), status); - if (UNISET_DIGIT == nullptr || UNISET_NOTS == nullptr) { + UNISET_NOTSZ = new UnicodeSet(UnicodeString(u"[[:^S:]&[:^Z:]]"), status); + if (UNISET_DIGIT == nullptr || UNISET_NOTSZ == nullptr) { status = U_MEMORY_ALLOCATION_ERROR; return; } UNISET_DIGIT->freeze(); - UNISET_NOTS->freeze(); + UNISET_NOTSZ->freeze(); } } // namespace @@ -92,13 +92,13 @@ bool ConstantAffixModifier::isStrong() const { bool ConstantAffixModifier::containsField(Field field) const { (void)field; // This method is not currently used. - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } void ConstantAffixModifier::getParameters(Parameters& output) const { (void)output; // This method is not currently used. - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } bool ConstantAffixModifier::semanticallyEquivalent(const Modifier& other) const { @@ -181,7 +181,7 @@ bool SimpleModifier::isStrong() const { bool SimpleModifier::containsField(Field field) const { (void)field; // This method is not currently used. - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } void SimpleModifier::getParameters(Parameters& output) const { @@ -469,8 +469,8 @@ CurrencySpacingEnabledModifier::getUnicodeSet(const DecimalFormatSymbols &symbol status); if (pattern.compare(u"[:digit:]", -1) == 0) { return *UNISET_DIGIT; - } else if (pattern.compare(u"[:^S:]", -1) == 0) { - return *UNISET_NOTS; + } else if (pattern.compare(u"[[:^S:]&[:^Z:]]", -1) == 0) { + return *UNISET_NOTSZ; } else { return UnicodeSet(pattern, status); } diff --git a/contrib/libs/icu/i18n/number_modifiers.h b/contrib/libs/icu/i18n/number_modifiers.h index 375254310c..09af3f4881 100644 --- a/contrib/libs/icu/i18n/number_modifiers.h +++ b/contrib/libs/icu/i18n/number_modifiers.h @@ -273,7 +273,7 @@ class U_I18N_API EmptyModifier : public Modifier, public UMemory { }; /** - * This implementation of ModifierStore adopts Modifer pointers. + * This implementation of ModifierStore adopts Modifier pointers. */ class U_I18N_API AdoptingModifierStore : public ModifierStore, public UMemory { public: diff --git a/contrib/libs/icu/i18n/number_multiplier.cpp b/contrib/libs/icu/i18n/number_multiplier.cpp index 8f07e548de..58e1e441bd 100644 --- a/contrib/libs/icu/i18n/number_multiplier.cpp +++ b/contrib/libs/icu/i18n/number_multiplier.cpp @@ -46,6 +46,7 @@ Scale::Scale(const Scale& other) } Scale& Scale::operator=(const Scale& other) { + if (this == &other) { return *this; } // self-assignment: no-op fMagnitude = other.fMagnitude; if (other.fArbitrary != nullptr) { UErrorCode localStatus = U_ZERO_ERROR; diff --git a/contrib/libs/icu/i18n/number_output.cpp b/contrib/libs/icu/i18n/number_output.cpp index 40192a9225..2c2c25eaed 100644 --- a/contrib/libs/icu/i18n/number_output.cpp +++ b/contrib/libs/icu/i18n/number_output.cpp @@ -5,11 +5,13 @@ #if !UCONFIG_NO_FORMATTING +#include "unicode/measunit.h" #include "unicode/numberformatter.h" #include "number_utypes.h" #include "util.h" #include "number_decimalquantity.h" #include "number_decnum.h" +#include "numrange_impl.h" U_NAMESPACE_BEGIN namespace number { @@ -32,6 +34,16 @@ void FormattedNumber::getAllFieldPositionsImpl(FieldPositionIteratorHandler& fpi fData->getAllFieldPositions(fpih, status); } +MeasureUnit FormattedNumber::getOutputUnit(UErrorCode& status) const { + UPRV_FORMATTED_VALUE_METHOD_GUARD(MeasureUnit()) + return fData->outputUnit; +} + +const char *FormattedNumber::getGender(UErrorCode &status) const { + UPRV_FORMATTED_VALUE_METHOD_GUARD("") + return fData->gender; +} + void FormattedNumber::getDecimalQuantity(impl::DecimalQuantity& output, UErrorCode& status) const { UPRV_FORMATTED_VALUE_METHOD_GUARD(UPRV_NOARG) output = fData->quantity; @@ -41,6 +53,32 @@ void FormattedNumber::getDecimalQuantity(impl::DecimalQuantity& output, UErrorCo impl::UFormattedNumberData::~UFormattedNumberData() = default; +UPRV_FORMATTED_VALUE_SUBCLASS_AUTO_IMPL(FormattedNumberRange) + +#define UPRV_NOARG + +void FormattedNumberRange::getDecimalNumbers(ByteSink& sink1, ByteSink& sink2, UErrorCode& status) const { + UPRV_FORMATTED_VALUE_METHOD_GUARD(UPRV_NOARG) + impl::DecNum decnum1; + impl::DecNum decnum2; + fData->quantity1.toDecNum(decnum1, status).toString(sink1, status); + fData->quantity2.toDecNum(decnum2, status).toString(sink2, status); +} + +UNumberRangeIdentityResult FormattedNumberRange::getIdentityResult(UErrorCode& status) const { + UPRV_FORMATTED_VALUE_METHOD_GUARD(UNUM_IDENTITY_RESULT_NOT_EQUAL) + return fData->identityResult; +} + +const impl::UFormattedNumberRangeData* FormattedNumberRange::getData(UErrorCode& status) const { + UPRV_FORMATTED_VALUE_METHOD_GUARD(nullptr) + return fData; +} + + +impl::UFormattedNumberRangeData::~UFormattedNumberRangeData() = default; + + } // namespace number U_NAMESPACE_END diff --git a/contrib/libs/icu/i18n/number_patternmodifier.cpp b/contrib/libs/icu/i18n/number_patternmodifier.cpp index 45602942ae..b6543b262b 100644 --- a/contrib/libs/icu/i18n/number_patternmodifier.cpp +++ b/contrib/libs/icu/i18n/number_patternmodifier.cpp @@ -28,9 +28,13 @@ void MutablePatternModifier::setPatternInfo(const AffixPatternProvider* patternI fField = field; } -void MutablePatternModifier::setPatternAttributes(UNumberSignDisplay signDisplay, bool perMille) { +void MutablePatternModifier::setPatternAttributes( + UNumberSignDisplay signDisplay, + bool perMille, + bool approximately) { fSignDisplay = signDisplay; fPerMilleReplacesPercent = perMille; + fApproximately = approximately; } void MutablePatternModifier::setSymbols(const DecimalFormatSymbols* symbols, @@ -244,19 +248,19 @@ bool MutablePatternModifier::isStrong() const { bool MutablePatternModifier::containsField(Field field) const { (void)field; // This method is not currently used. - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } void MutablePatternModifier::getParameters(Parameters& output) const { (void)output; // This method is not currently used. - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } bool MutablePatternModifier::semanticallyEquivalent(const Modifier& other) const { (void)other; // This method is not currently used. - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } int32_t MutablePatternModifier::insertPrefix(FormattedStringBuilder& sb, int position, UErrorCode& status) { @@ -277,6 +281,7 @@ void MutablePatternModifier::prepareAffix(bool isPrefix) { *fPatternInfo, isPrefix, PatternStringUtils::resolveSignDisplay(fSignDisplay, fSignum), + fApproximately, fPlural, fPerMilleReplacesPercent, currentAffix); @@ -289,22 +294,14 @@ UnicodeString MutablePatternModifier::getSymbol(AffixPatternType type) const { return fSymbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kMinusSignSymbol); case AffixPatternType::TYPE_PLUS_SIGN: return fSymbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kPlusSignSymbol); + case AffixPatternType::TYPE_APPROXIMATELY_SIGN: + return fSymbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kApproximatelySignSymbol); case AffixPatternType::TYPE_PERCENT: return fSymbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kPercentSymbol); case AffixPatternType::TYPE_PERMILLE: return fSymbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kPerMillSymbol); - case AffixPatternType::TYPE_CURRENCY_SINGLE: { - // UnitWidth ISO and HIDDEN overrides the singular currency symbol. - if (fUnitWidth == UNumberUnitWidth::UNUM_UNIT_WIDTH_ISO_CODE) { - return fCurrencySymbols.getIntlCurrencySymbol(localStatus); - } else if (fUnitWidth == UNumberUnitWidth::UNUM_UNIT_WIDTH_HIDDEN) { - return UnicodeString(); - } else if (fUnitWidth == UNumberUnitWidth::UNUM_UNIT_WIDTH_NARROW) { - return fCurrencySymbols.getNarrowCurrencySymbol(localStatus); - } else { - return fCurrencySymbols.getCurrencySymbol(localStatus); - } - } + case AffixPatternType::TYPE_CURRENCY_SINGLE: + return getCurrencySymbolForUnitWidth(localStatus); case AffixPatternType::TYPE_CURRENCY_DOUBLE: return fCurrencySymbols.getIntlCurrencySymbol(localStatus); case AffixPatternType::TYPE_CURRENCY_TRIPLE: @@ -318,13 +315,32 @@ UnicodeString MutablePatternModifier::getSymbol(AffixPatternType type) const { case AffixPatternType::TYPE_CURRENCY_QUINT: return UnicodeString(u"\uFFFD"); default: - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; + } +} + +UnicodeString MutablePatternModifier::getCurrencySymbolForUnitWidth(UErrorCode& status) const { + switch (fUnitWidth) { + case UNumberUnitWidth::UNUM_UNIT_WIDTH_NARROW: + return fCurrencySymbols.getNarrowCurrencySymbol(status); + case UNumberUnitWidth::UNUM_UNIT_WIDTH_SHORT: + return fCurrencySymbols.getCurrencySymbol(status); + case UNumberUnitWidth::UNUM_UNIT_WIDTH_ISO_CODE: + return fCurrencySymbols.getIntlCurrencySymbol(status); + case UNumberUnitWidth::UNUM_UNIT_WIDTH_FORMAL: + return fCurrencySymbols.getFormalCurrencySymbol(status); + case UNumberUnitWidth::UNUM_UNIT_WIDTH_VARIANT: + return fCurrencySymbols.getVariantCurrencySymbol(status); + case UNumberUnitWidth::UNUM_UNIT_WIDTH_HIDDEN: + return UnicodeString(); + default: + return fCurrencySymbols.getCurrencySymbol(status); } } UnicodeString MutablePatternModifier::toUnicodeString() const { // Never called by AffixUtils - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/contrib/libs/icu/i18n/number_patternmodifier.h b/contrib/libs/icu/i18n/number_patternmodifier.h index 5ba842d569..4f825e1ed2 100644 --- a/contrib/libs/icu/i18n/number_patternmodifier.h +++ b/contrib/libs/icu/i18n/number_patternmodifier.h @@ -116,8 +116,10 @@ class U_I18N_API MutablePatternModifier * Whether to force a plus sign on positive numbers. * @param perMille * Whether to substitute the percent sign in the pattern with a permille sign. + * @param approximately + * Whether to prepend approximately to the sign */ - void setPatternAttributes(UNumberSignDisplay signDisplay, bool perMille); + void setPatternAttributes(UNumberSignDisplay signDisplay, bool perMille, bool approximately); /** * Sets locale-specific details that affect the symbols substituted into the pattern string affixes. @@ -193,6 +195,11 @@ class U_I18N_API MutablePatternModifier */ UnicodeString getSymbol(AffixPatternType type) const U_OVERRIDE; + /** + * Returns the currency symbol for the unit width specified in setSymbols() + */ + UnicodeString getCurrencySymbolForUnitWidth(UErrorCode& status) const; + UnicodeString toUnicodeString() const; private: @@ -204,6 +211,7 @@ class U_I18N_API MutablePatternModifier Field fField; UNumberSignDisplay fSignDisplay; bool fPerMilleReplacesPercent; + bool fApproximately; // Symbol details (initialized in setSymbols) const DecimalFormatSymbols *fSymbols; diff --git a/contrib/libs/icu/i18n/number_patternstring.cpp b/contrib/libs/icu/i18n/number_patternstring.cpp index 9d84505606..e819d39e96 100644 --- a/contrib/libs/icu/i18n/number_patternstring.cpp +++ b/contrib/libs/icu/i18n/number_patternstring.cpp @@ -50,7 +50,7 @@ PatternParser::parseToExistingProperties(const UnicodeString& pattern, DecimalFo char16_t ParsedPatternInfo::charAt(int32_t flags, int32_t index) const { const Endpoints& endpoints = getEndpoints(flags); if (index < 0 || index >= endpoints.end - endpoints.start) { - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } return pattern.charAt(endpoints.start + index); } @@ -115,6 +115,10 @@ bool ParsedPatternInfo::hasBody() const { return positive.integerTotal > 0; } +bool ParsedPatternInfo::currencyAsDecimal() const { + return positive.hasCurrencyDecimal; +} + ///////////////////////////////////////////////////// /// BEGIN RECURSIVE DESCENT PARSER IMPLEMENTATION /// ///////////////////////////////////////////////////// @@ -127,8 +131,20 @@ UChar32 ParsedPatternInfo::ParserState::peek() { } } +UChar32 ParsedPatternInfo::ParserState::peek2() { + if (offset == pattern.length()) { + return -1; + } + int32_t cp1 = pattern.char32At(offset); + int32_t offset2 = offset + U16_LENGTH(cp1); + if (offset2 == pattern.length()) { + return -1; + } + return pattern.char32At(offset2); +} + UChar32 ParsedPatternInfo::ParserState::next() { - int codePoint = peek(); + int32_t codePoint = peek(); offset += U16_LENGTH(codePoint); return codePoint; } @@ -286,6 +302,35 @@ void ParsedPatternInfo::consumeFormat(UErrorCode& status) { currentSubpattern->widthExceptAffixes += 1; consumeFractionFormat(status); if (U_FAILURE(status)) { return; } + } else if (state.peek() == u'¤') { + // Check if currency is a decimal separator + switch (state.peek2()) { + case u'#': + case u'0': + case u'1': + case u'2': + case u'3': + case u'4': + case u'5': + case u'6': + case u'7': + case u'8': + case u'9': + break; + default: + // Currency symbol followed by a non-numeric character; + // treat as a normal affix. + return; + } + // Currency symbol is followed by a numeric character; + // treat as a decimal separator. + currentSubpattern->hasCurrencySign = true; + currentSubpattern->hasCurrencyDecimal = true; + currentSubpattern->hasDecimal = true; + currentSubpattern->widthExceptAffixes += 1; + state.next(); // consume the symbol + consumeFractionFormat(status); + if (U_FAILURE(status)) { return; } } } @@ -565,6 +610,9 @@ PatternParser::patternInfoToProperties(DecimalFormatProperties& properties, Pars properties.decimalSeparatorAlwaysShown = false; } + // Persist the currency as decimal separator + properties.currencyAsDecimal = positive.hasCurrencyDecimal; + // Scientific notation settings if (positive.exponentZeros > 0) { properties.exponentSignAlwaysShown = positive.exponentHasPlusSign; @@ -750,7 +798,11 @@ UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatP } // Decimal separator if (magnitude == 0 && (alwaysShowDecimal || mN < 0)) { - sb.append(u'.'); + if (properties.currencyAsDecimal) { + sb.append(u'¤'); + } else { + sb.append(u'.'); + } } if (!useGrouping) { continue; @@ -820,7 +872,7 @@ UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatP // Copy the positive digit format into the negative. // This is optional; the pattern is the same as if '#' were appended here instead. // NOTE: It is not safe to append the UnicodeString to itself, so we need to copy. - // See http://bugs.icu-project.org/trac/ticket/13707 + // See https://unicode-org.atlassian.net/browse/ICU-13707 UnicodeString copy(sb); sb.append(copy, afterPrefixPos, beforeSuffixPos - afterPrefixPos); sb.append(affixProvider.get().getString(AffixPatternProvider::AFFIX_NEG_SUFFIX)); @@ -869,6 +921,7 @@ PatternStringUtils::convertLocalized(const UnicodeString& input, const DecimalFo UnicodeString table[LEN][2]; int standIdx = toLocalized ? 0 : 1; int localIdx = toLocalized ? 1 : 0; + // TODO: Add approximately sign here? table[0][standIdx] = u"%"; table[0][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPercentSymbol); table[1][standIdx] = u"‰"; @@ -1001,6 +1054,7 @@ PatternStringUtils::convertLocalized(const UnicodeString& input, const DecimalFo void PatternStringUtils::patternInfoToStringBuilder(const AffixPatternProvider& patternInfo, bool isPrefix, PatternSignType patternSignType, + bool approximately, StandardPlural::Form plural, bool perMilleReplacesPercent, UnicodeString& output) { @@ -1012,7 +1066,7 @@ void PatternStringUtils::patternInfoToStringBuilder(const AffixPatternProvider& // (If not, we will use the positive subpattern.) bool useNegativeAffixPattern = patternInfo.hasNegativeSubpattern() && (patternSignType == PATTERN_SIGN_TYPE_NEG - || (patternInfo.negativeHasMinusSign() && plusReplacesMinusSign)); + || (patternInfo.negativeHasMinusSign() && (plusReplacesMinusSign || approximately))); // Resolve the flags for the affix pattern. int flags = 0; @@ -1034,10 +1088,24 @@ void PatternStringUtils::patternInfoToStringBuilder(const AffixPatternProvider& } else if (patternSignType == PATTERN_SIGN_TYPE_NEG) { prependSign = true; } else { - prependSign = plusReplacesMinusSign; + prependSign = plusReplacesMinusSign || approximately; } - // Compute the length of the affix pattern. + // What symbols should take the place of the sign placeholder? + const char16_t* signSymbols = u"-"; + if (approximately) { + if (plusReplacesMinusSign) { + signSymbols = u"~+"; + } else if (patternSignType == PATTERN_SIGN_TYPE_NEG) { + signSymbols = u"~-"; + } else { + signSymbols = u"~"; + } + } else if (plusReplacesMinusSign) { + signSymbols = u"+"; + } + + // Compute the number of tokens in the affix pattern (signSymbols is considered one token). int length = patternInfo.length(flags) + (prependSign ? 1 : 0); // Finally, set the result into the StringBuilder. @@ -1051,8 +1119,13 @@ void PatternStringUtils::patternInfoToStringBuilder(const AffixPatternProvider& } else { candidate = patternInfo.charAt(flags, index); } - if (plusReplacesMinusSign && candidate == u'-') { - candidate = u'+'; + if (candidate == u'-') { + if (u_strlen(signSymbols) == 1) { + candidate = signSymbols[0]; + } else { + output.append(signSymbols[0]); + candidate = signSymbols[1]; + } } if (perMilleReplacesPercent && candidate == u'%') { candidate = u'‰'; @@ -1106,6 +1179,20 @@ PatternSignType PatternStringUtils::resolveSignDisplay(UNumberSignDisplay signDi } break; + case UNUM_SIGN_NEGATIVE: + case UNUM_SIGN_ACCOUNTING_NEGATIVE: + switch (signum) { + case SIGNUM_NEG: + return PATTERN_SIGN_TYPE_NEG; + case SIGNUM_NEG_ZERO: + case SIGNUM_POS_ZERO: + case SIGNUM_POS: + return PATTERN_SIGN_TYPE_POS; + default: + break; + } + break; + case UNUM_SIGN_NEVER: return PATTERN_SIGN_TYPE_POS; @@ -1113,7 +1200,7 @@ PatternSignType PatternStringUtils::resolveSignDisplay(UNumberSignDisplay signDi break; } - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; return PATTERN_SIGN_TYPE_POS; } diff --git a/contrib/libs/icu/i18n/number_patternstring.h b/contrib/libs/icu/i18n/number_patternstring.h index 54f37fd7e2..94afda3722 100644 --- a/contrib/libs/icu/i18n/number_patternstring.h +++ b/contrib/libs/icu/i18n/number_patternstring.h @@ -62,6 +62,7 @@ struct U_I18N_API ParsedSubpatternInfo { bool hasPercentSign = false; bool hasPerMilleSign = false; bool hasCurrencySign = false; + bool hasCurrencyDecimal = false; bool hasMinusSign = false; bool hasPlusSign = false; @@ -104,6 +105,8 @@ struct U_I18N_API ParsedPatternInfo : public AffixPatternProvider, public UMemor bool hasBody() const U_OVERRIDE; + bool currencyAsDecimal() const U_OVERRIDE; + private: struct U_I18N_API ParserState { const UnicodeString& pattern; // reference to the parent @@ -119,8 +122,13 @@ struct U_I18N_API ParsedPatternInfo : public AffixPatternProvider, public UMemor return *this; } + /** Returns the next code point, or -1 if string is too short. */ UChar32 peek(); + /** Returns the code point after the next code point, or -1 if string is too short. */ + UChar32 peek2(); + + /** Returns the next code point and then steps forward. */ UChar32 next(); // TODO: We don't currently do anything with the message string. @@ -245,7 +253,7 @@ class U_I18N_API PatternStringUtils { * * This test is needed for both NumberPropertyMapper::oldToNew and * PatternStringUtils::propertiesToPatternString. In Java it cannot be - * exported by NumberPropertyMapper (package provate) so it is in + * exported by NumberPropertyMapper (package private) so it is in * PatternStringUtils, do the same in C. * * @param roundIncr @@ -308,6 +316,7 @@ class U_I18N_API PatternStringUtils { */ static void patternInfoToStringBuilder(const AffixPatternProvider& patternInfo, bool isPrefix, PatternSignType patternSignType, + bool approximately, StandardPlural::Form plural, bool perMilleReplacesPercent, UnicodeString& output); diff --git a/contrib/libs/icu/i18n/number_rounding.cpp b/contrib/libs/icu/i18n/number_rounding.cpp index 3ffce673ad..877df63c8f 100644 --- a/contrib/libs/icu/i18n/number_rounding.cpp +++ b/contrib/libs/icu/i18n/number_rounding.cpp @@ -5,13 +5,17 @@ #if !UCONFIG_NO_FORMATTING +#include "charstr.h" #include "uassert.h" #include "unicode/numberformatter.h" #include "number_types.h" #include "number_decimalquantity.h" #include "double-conversion.h" #include "number_roundingutils.h" +#include "number_skeletons.h" +#include "number_decnum.h" #include "putilimp.h" +#include "string_segment.h" using namespace icu; using namespace icu::number; @@ -19,6 +23,41 @@ using namespace icu::number::impl; using double_conversion::DoubleToStringConverter; +using icu::StringSegment; + +void number::impl::parseIncrementOption(const StringSegment &segment, + Precision &outPrecision, + UErrorCode &status) { + // Need to do char <-> UChar conversion... + U_ASSERT(U_SUCCESS(status)); + CharString buffer; + SKELETON_UCHAR_TO_CHAR(buffer, segment.toTempUnicodeString(), 0, segment.length(), status); + + // Utilize DecimalQuantity/decNumber to parse this for us. + DecimalQuantity dq; + UErrorCode localStatus = U_ZERO_ERROR; + DecNum decnum; + decnum.setTo({buffer.data(), buffer.length()}, localStatus); + dq.setToDecNum(decnum, localStatus); + if (U_FAILURE(localStatus) || decnum.isSpecial()) { + // throw new SkeletonSyntaxException("Invalid rounding increment", segment, e); + status = U_NUMBER_SKELETON_SYNTAX_ERROR; + return; + } + double increment = dq.toDouble(); + + // We also need to figure out how many digits. Do a brute force string operation. + int decimalOffset = 0; + while (decimalOffset < segment.length() && segment.charAt(decimalOffset) != '.') { + decimalOffset++; + } + if (decimalOffset == segment.length()) { + outPrecision = Precision::increment(increment); + } else { + int32_t fractionLength = segment.length() - decimalOffset - 1; + outPrecision = Precision::increment(increment).withMinFraction(fractionLength); + } +} namespace { @@ -84,7 +123,7 @@ digits_t roundingutils::doubleFractionLength(double input, int8_t* singleDigit) Precision Precision::unlimited() { - return Precision(RND_NONE, {}, kDefaultMode); + return Precision(RND_NONE, {}); } FractionPrecision Precision::integer() { @@ -157,6 +196,12 @@ Precision Precision::minMaxSignificantDigits(int32_t minSignificantDigits, int32 } } +Precision Precision::trailingZeroDisplay(UNumberTrailingZeroDisplay trailingZeroDisplay) const { + Precision result(*this); // copy constructor + result.fTrailingZeroDisplay = trailingZeroDisplay; + return result; +} + IncrementPrecision Precision::increment(double roundingIncrement) { if (roundingIncrement > 0.0) { return constructIncrement(roundingIncrement, 0); @@ -169,10 +214,32 @@ CurrencyPrecision Precision::currency(UCurrencyUsage currencyUsage) { return constructCurrency(currencyUsage); } +Precision FractionPrecision::withSignificantDigits( + int32_t minSignificantDigits, + int32_t maxSignificantDigits, + UNumberRoundingPriority priority) const { + if (fType == RND_ERROR) { return *this; } // no-op in error state + if (minSignificantDigits >= 1 && + maxSignificantDigits >= minSignificantDigits && + maxSignificantDigits <= kMaxIntFracSig) { + return constructFractionSignificant( + *this, + minSignificantDigits, + maxSignificantDigits, + priority); + } else { + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; + } +} + Precision FractionPrecision::withMinDigits(int32_t minSignificantDigits) const { if (fType == RND_ERROR) { return *this; } // no-op in error state if (minSignificantDigits >= 1 && minSignificantDigits <= kMaxIntFracSig) { - return constructFractionSignificant(*this, minSignificantDigits, -1); + return constructFractionSignificant( + *this, + 1, + minSignificantDigits, + UNUM_ROUNDING_PRIORITY_RELAXED); } else { return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } @@ -181,7 +248,10 @@ Precision FractionPrecision::withMinDigits(int32_t minSignificantDigits) const { Precision FractionPrecision::withMaxDigits(int32_t maxSignificantDigits) const { if (fType == RND_ERROR) { return *this; } // no-op in error state if (maxSignificantDigits >= 1 && maxSignificantDigits <= kMaxIntFracSig) { - return constructFractionSignificant(*this, -1, maxSignificantDigits); + return constructFractionSignificant(*this, + 1, + maxSignificantDigits, + UNUM_ROUNDING_PRIORITY_STRICT); } else { return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } @@ -195,11 +265,11 @@ Precision Precision::withCurrency(const CurrencyUnit ¤cy, UErrorCode &stat double increment = ucurr_getRoundingIncrementForUsage(isoCode, fUnion.currencyUsage, &status); int32_t minMaxFrac = ucurr_getDefaultFractionDigitsForUsage( isoCode, fUnion.currencyUsage, &status); - if (increment != 0.0) { - return constructIncrement(increment, minMaxFrac); - } else { - return constructFraction(minMaxFrac, minMaxFrac); - } + Precision retval = (increment != 0.0) + ? static_cast<Precision>(constructIncrement(increment, minMaxFrac)) + : static_cast<Precision>(constructFraction(minMaxFrac, minMaxFrac)); + retval.fTrailingZeroDisplay = fTrailingZeroDisplay; + return retval; } // Public method on CurrencyPrecision subclass @@ -229,7 +299,7 @@ FractionPrecision Precision::constructFraction(int32_t minFrac, int32_t maxFrac) settings.fMaxSig = -1; PrecisionUnion union_; union_.fracSig = settings; - return {RND_FRACTION, union_, kDefaultMode}; + return {RND_FRACTION, union_}; } Precision Precision::constructSignificant(int32_t minSig, int32_t maxSig) { @@ -240,17 +310,22 @@ Precision Precision::constructSignificant(int32_t minSig, int32_t maxSig) { settings.fMaxSig = static_cast<digits_t>(maxSig); PrecisionUnion union_; union_.fracSig = settings; - return {RND_SIGNIFICANT, union_, kDefaultMode}; + return {RND_SIGNIFICANT, union_}; } Precision -Precision::constructFractionSignificant(const FractionPrecision &base, int32_t minSig, int32_t maxSig) { +Precision::constructFractionSignificant( + const FractionPrecision &base, + int32_t minSig, + int32_t maxSig, + UNumberRoundingPriority priority) { FractionSignificantSettings settings = base.fUnion.fracSig; settings.fMinSig = static_cast<digits_t>(minSig); settings.fMaxSig = static_cast<digits_t>(maxSig); + settings.fPriority = priority; PrecisionUnion union_; union_.fracSig = settings; - return {RND_FRACTION_SIGNIFICANT, union_, kDefaultMode}; + return {RND_FRACTION_SIGNIFICANT, union_}; } IncrementPrecision Precision::constructIncrement(double increment, int32_t minFrac) { @@ -270,18 +345,18 @@ IncrementPrecision Precision::constructIncrement(double increment, int32_t minFr // NOTE: In C++, we must return the correct value type with the correct union. // It would be invalid to return a RND_FRACTION here because the methods on the // IncrementPrecision type assume that the union is backed by increment data. - return {RND_INCREMENT_ONE, union_, kDefaultMode}; + return {RND_INCREMENT_ONE, union_}; } else if (singleDigit == 5) { - return {RND_INCREMENT_FIVE, union_, kDefaultMode}; + return {RND_INCREMENT_FIVE, union_}; } else { - return {RND_INCREMENT, union_, kDefaultMode}; + return {RND_INCREMENT, union_}; } } CurrencyPrecision Precision::constructCurrency(UCurrencyUsage usage) { PrecisionUnion union_; union_.currencyUsage = usage; - return {RND_CURRENCY, union_, kDefaultMode}; + return {RND_CURRENCY, union_}; } @@ -341,9 +416,13 @@ RoundingImpl::chooseMultiplierAndApply(impl::DecimalQuantity &input, const impl: /** This is the method that contains the actual rounding logic. */ void RoundingImpl::apply(impl::DecimalQuantity &value, UErrorCode& status) const { + if (U_FAILURE(status)) { + return; + } if (fPassThrough) { return; } + int32_t resolvedMinFraction = 0; switch (fPrecision.fType) { case Precision::RND_BOGUS: case Precision::RND_ERROR: @@ -360,8 +439,8 @@ void RoundingImpl::apply(impl::DecimalQuantity &value, UErrorCode& status) const getRoundingMagnitudeFraction(fPrecision.fUnion.fracSig.fMaxFrac), fRoundingMode, status); - value.setMinFraction( - uprv_max(0, -getDisplayMagnitudeFraction(fPrecision.fUnion.fracSig.fMinFrac))); + resolvedMinFraction = + uprv_max(0, -getDisplayMagnitudeFraction(fPrecision.fUnion.fracSig.fMinFrac)); break; case Precision::RND_SIGNIFICANT: @@ -369,8 +448,8 @@ void RoundingImpl::apply(impl::DecimalQuantity &value, UErrorCode& status) const getRoundingMagnitudeSignificant(value, fPrecision.fUnion.fracSig.fMaxSig), fRoundingMode, status); - value.setMinFraction( - uprv_max(0, -getDisplayMagnitudeSignificant(value, fPrecision.fUnion.fracSig.fMinSig))); + resolvedMinFraction = + uprv_max(0, -getDisplayMagnitudeSignificant(value, fPrecision.fUnion.fracSig.fMinSig)); // Make sure that digits are displayed on zero. if (value.isZeroish() && fPrecision.fUnion.fracSig.fMinSig > 0) { value.setMinInteger(1); @@ -378,23 +457,21 @@ void RoundingImpl::apply(impl::DecimalQuantity &value, UErrorCode& status) const break; case Precision::RND_FRACTION_SIGNIFICANT: { - int32_t displayMag = getDisplayMagnitudeFraction(fPrecision.fUnion.fracSig.fMinFrac); - int32_t roundingMag = getRoundingMagnitudeFraction(fPrecision.fUnion.fracSig.fMaxFrac); - if (fPrecision.fUnion.fracSig.fMinSig == -1) { - // Max Sig override - int32_t candidate = getRoundingMagnitudeSignificant( - value, - fPrecision.fUnion.fracSig.fMaxSig); - roundingMag = uprv_max(roundingMag, candidate); + int32_t roundingMag1 = getRoundingMagnitudeFraction(fPrecision.fUnion.fracSig.fMaxFrac); + int32_t roundingMag2 = getRoundingMagnitudeSignificant(value, fPrecision.fUnion.fracSig.fMaxSig); + int32_t roundingMag; + if (fPrecision.fUnion.fracSig.fPriority == UNUM_ROUNDING_PRIORITY_RELAXED) { + roundingMag = uprv_min(roundingMag1, roundingMag2); } else { - // Min Sig override - int32_t candidate = getDisplayMagnitudeSignificant( - value, - fPrecision.fUnion.fracSig.fMinSig); - roundingMag = uprv_min(roundingMag, candidate); + roundingMag = uprv_max(roundingMag1, roundingMag2); } value.roundToMagnitude(roundingMag, fRoundingMode, status); - value.setMinFraction(uprv_max(0, -displayMag)); + + int32_t displayMag1 = getDisplayMagnitudeFraction(fPrecision.fUnion.fracSig.fMinFrac); + int32_t displayMag2 = getDisplayMagnitudeSignificant(value, fPrecision.fUnion.fracSig.fMinSig); + int32_t displayMag = uprv_min(displayMag1, displayMag2); + resolvedMinFraction = uprv_max(0, -displayMag); + break; } @@ -403,7 +480,7 @@ void RoundingImpl::apply(impl::DecimalQuantity &value, UErrorCode& status) const fPrecision.fUnion.increment.fIncrement, fRoundingMode, status); - value.setMinFraction(fPrecision.fUnion.increment.fMinFrac); + resolvedMinFraction = fPrecision.fUnion.increment.fMinFrac; break; case Precision::RND_INCREMENT_ONE: @@ -411,7 +488,7 @@ void RoundingImpl::apply(impl::DecimalQuantity &value, UErrorCode& status) const -fPrecision.fUnion.increment.fMaxFrac, fRoundingMode, status); - value.setMinFraction(fPrecision.fUnion.increment.fMinFrac); + resolvedMinFraction = fPrecision.fUnion.increment.fMinFrac; break; case Precision::RND_INCREMENT_FIVE: @@ -419,20 +496,27 @@ void RoundingImpl::apply(impl::DecimalQuantity &value, UErrorCode& status) const -fPrecision.fUnion.increment.fMaxFrac, fRoundingMode, status); - value.setMinFraction(fPrecision.fUnion.increment.fMinFrac); + resolvedMinFraction = fPrecision.fUnion.increment.fMinFrac; break; case Precision::RND_CURRENCY: // Call .withCurrency() before .apply()! - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; default: - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; + } + + if (fPrecision.fTrailingZeroDisplay == UNUM_TRAILING_ZERO_AUTO || + // PLURAL_OPERAND_T returns fraction digits as an integer + value.getPluralOperand(PLURAL_OPERAND_T) != 0) { + value.setMinFraction(resolvedMinFraction); } } void RoundingImpl::apply(impl::DecimalQuantity &value, int32_t minInt, UErrorCode /*status*/) { // This method is intended for the one specific purpose of helping print "00.000E0". + // Question: Is it useful to look at trailingZeroDisplay here? U_ASSERT(isSignificantDigits()); U_ASSERT(value.isZeroish()); value.setMinFraction(fPrecision.fUnion.fracSig.fMinSig - minInt); diff --git a/contrib/libs/icu/i18n/number_roundingutils.h b/contrib/libs/icu/i18n/number_roundingutils.h index 3e37f31954..06fadd29fd 100644 --- a/contrib/libs/icu/i18n/number_roundingutils.h +++ b/contrib/libs/icu/i18n/number_roundingutils.h @@ -8,6 +8,7 @@ #define __NUMBER_ROUNDINGUTILS_H__ #include "number_types.h" +#include "string_segment.h" U_NAMESPACE_BEGIN namespace number { @@ -44,6 +45,9 @@ enum Section { inline bool getRoundingDirection(bool isEven, bool isNegative, Section section, RoundingMode roundingMode, UErrorCode &status) { + if (U_FAILURE(status)) { + return false; + } switch (roundingMode) { case RoundingMode::UNUM_ROUND_UP: // round away from zero @@ -100,6 +104,45 @@ getRoundingDirection(bool isEven, bool isNegative, Section section, RoundingMode } break; + case RoundingMode::UNUM_ROUND_HALF_ODD: + switch (section) { + case SECTION_MIDPOINT: + return !isEven; + case SECTION_LOWER: + return true; + case SECTION_UPPER: + return false; + default: + break; + } + break; + + case RoundingMode::UNUM_ROUND_HALF_CEILING: + switch (section) { + case SECTION_MIDPOINT: + return isNegative; + case SECTION_LOWER: + return true; + case SECTION_UPPER: + return false; + default: + break; + } + break; + + case RoundingMode::UNUM_ROUND_HALF_FLOOR: + switch (section) { + case SECTION_MIDPOINT: + return !isNegative; + case SECTION_LOWER: + return true; + case SECTION_UPPER: + return false; + default: + break; + } + break; + default: break; } @@ -187,8 +230,22 @@ class RoundingImpl { Precision fPrecision; UNumberFormatRoundingMode fRoundingMode; bool fPassThrough = true; // default value + + // Permits access to fPrecision. + friend class units::UnitsRouter; + + // Permits access to fPrecision. + friend class UnitConversionHandler; }; +/** + * Parses Precision-related skeleton strings without knowledge of MacroProps + * - see blueprint_helpers::parseIncrementOption(). + * + * Referencing MacroProps means needing to pull in the .o files that have the + * destructors for the SymbolsWrapper, StringProp, and Scale classes. + */ +void parseIncrementOption(const StringSegment &segment, Precision &outPrecision, UErrorCode &status); } // namespace impl } // namespace number diff --git a/contrib/libs/icu/i18n/number_scientific.cpp b/contrib/libs/icu/i18n/number_scientific.cpp index 527ffbf78d..d365d982d4 100644 --- a/contrib/libs/icu/i18n/number_scientific.cpp +++ b/contrib/libs/icu/i18n/number_scientific.cpp @@ -96,7 +96,7 @@ bool ScientificModifier::isStrong() const { bool ScientificModifier::containsField(Field field) const { (void)field; // This method is not used for inner modifiers. - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } void ScientificModifier::getParameters(Parameters& output) const { diff --git a/contrib/libs/icu/i18n/number_scientific.h b/contrib/libs/icu/i18n/number_scientific.h index a55d5ed1d4..a40a6e416d 100644 --- a/contrib/libs/icu/i18n/number_scientific.h +++ b/contrib/libs/icu/i18n/number_scientific.h @@ -52,7 +52,7 @@ class ScientificHandler : public UMemory, public MicroPropsGenerator, public Mul int32_t getMultiplier(int32_t magnitude) const U_OVERRIDE; private: - const Notation::ScientificSettings& fSettings; + const Notation::ScientificSettings fSettings; const DecimalFormatSymbols *fSymbols; const MicroPropsGenerator *fParent; diff --git a/contrib/libs/icu/i18n/number_skeletons.cpp b/contrib/libs/icu/i18n/number_skeletons.cpp index 4ba2647986..de70c5cedf 100644 --- a/contrib/libs/icu/i18n/number_skeletons.cpp +++ b/contrib/libs/icu/i18n/number_skeletons.cpp @@ -10,6 +10,7 @@ #define UNISTR_FROM_STRING_EXPLICIT #include "number_decnum.h" +#include "number_roundingutils.h" #include "number_skeletons.h" #include "umutex.h" #include "ucln_in.h" @@ -67,9 +68,13 @@ void U_CALLCONV initNumberSkeletons(UErrorCode& status) { b.add(u"rounding-mode-down", STEM_ROUNDING_MODE_DOWN, status); b.add(u"rounding-mode-up", STEM_ROUNDING_MODE_UP, status); b.add(u"rounding-mode-half-even", STEM_ROUNDING_MODE_HALF_EVEN, status); + b.add(u"rounding-mode-half-odd", STEM_ROUNDING_MODE_HALF_ODD, status); + b.add(u"rounding-mode-half-ceiling", STEM_ROUNDING_MODE_HALF_CEILING, status); + b.add(u"rounding-mode-half-floor", STEM_ROUNDING_MODE_HALF_FLOOR, status); b.add(u"rounding-mode-half-down", STEM_ROUNDING_MODE_HALF_DOWN, status); b.add(u"rounding-mode-half-up", STEM_ROUNDING_MODE_HALF_UP, status); b.add(u"rounding-mode-unnecessary", STEM_ROUNDING_MODE_UNNECESSARY, status); + b.add(u"integer-width-trunc", STEM_INTEGER_WIDTH_TRUNC, status); b.add(u"group-off", STEM_GROUP_OFF, status); b.add(u"group-min2", STEM_GROUP_MIN2, status); b.add(u"group-auto", STEM_GROUP_AUTO, status); @@ -80,6 +85,8 @@ void U_CALLCONV initNumberSkeletons(UErrorCode& status) { b.add(u"unit-width-short", STEM_UNIT_WIDTH_SHORT, status); b.add(u"unit-width-full-name", STEM_UNIT_WIDTH_FULL_NAME, status); b.add(u"unit-width-iso-code", STEM_UNIT_WIDTH_ISO_CODE, status); + b.add(u"unit-width-formal", STEM_UNIT_WIDTH_FORMAL, status); + b.add(u"unit-width-variant", STEM_UNIT_WIDTH_VARIANT, status); b.add(u"unit-width-hidden", STEM_UNIT_WIDTH_HIDDEN, status); b.add(u"sign-auto", STEM_SIGN_AUTO, status); b.add(u"sign-always", STEM_SIGN_ALWAYS, status); @@ -88,6 +95,8 @@ void U_CALLCONV initNumberSkeletons(UErrorCode& status) { b.add(u"sign-accounting-always", STEM_SIGN_ACCOUNTING_ALWAYS, status); b.add(u"sign-except-zero", STEM_SIGN_EXCEPT_ZERO, status); b.add(u"sign-accounting-except-zero", STEM_SIGN_ACCOUNTING_EXCEPT_ZERO, status); + b.add(u"sign-negative", STEM_SIGN_NEGATIVE, status); + b.add(u"sign-accounting-negative", STEM_SIGN_ACCOUNTING_NEGATIVE, status); b.add(u"decimal-auto", STEM_DECIMAL_AUTO, status); b.add(u"decimal-always", STEM_DECIMAL_ALWAYS, status); if (U_FAILURE(status)) { return; } @@ -97,6 +106,7 @@ void U_CALLCONV initNumberSkeletons(UErrorCode& status) { b.add(u"measure-unit", STEM_MEASURE_UNIT, status); b.add(u"per-measure-unit", STEM_PER_MEASURE_UNIT, status); b.add(u"unit", STEM_UNIT, status); + b.add(u"usage", STEM_UNIT_USAGE, status); b.add(u"currency", STEM_CURRENCY, status); b.add(u"integer-width", STEM_INTEGER_WIDTH, status); b.add(u"numbering-system", STEM_NUMBERING_SYSTEM, status); @@ -117,6 +127,8 @@ void U_CALLCONV initNumberSkeletons(UErrorCode& status) { b.add(u"()!", STEM_SIGN_ACCOUNTING_ALWAYS, status); b.add(u"+?", STEM_SIGN_EXCEPT_ZERO, status); b.add(u"()?", STEM_SIGN_ACCOUNTING_EXCEPT_ZERO, status); + b.add(u"+-", STEM_SIGN_NEGATIVE, status); + b.add(u"()-", STEM_SIGN_ACCOUNTING_NEGATIVE, status); if (U_FAILURE(status)) { return; } // Build the CharsTrie @@ -149,21 +161,6 @@ UPRV_BLOCK_MACRO_BEGIN { \ } UPRV_BLOCK_MACRO_END -#define SKELETON_UCHAR_TO_CHAR(dest, src, start, end, status) (void)(dest); \ -UPRV_BLOCK_MACRO_BEGIN { \ - UErrorCode conversionStatus = U_ZERO_ERROR; \ - (dest).appendInvariantChars({FALSE, (src).getBuffer() + (start), (end) - (start)}, conversionStatus); \ - if (conversionStatus == U_INVARIANT_CONVERSION_ERROR) { \ - /* Don't propagate the invariant conversion error; it is a skeleton syntax error */ \ - (status) = U_NUMBER_SKELETON_SYNTAX_ERROR; \ - return; \ - } else if (U_FAILURE(conversionStatus)) { \ - (status) = conversionStatus; \ - return; \ - } \ -} UPRV_BLOCK_MACRO_END - - } // anonymous namespace @@ -180,23 +177,20 @@ Notation stem_to_object::notation(skeleton::StemEnum stem) { case STEM_NOTATION_SIMPLE: return Notation::simple(); default: - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } } MeasureUnit stem_to_object::unit(skeleton::StemEnum stem) { switch (stem) { case STEM_BASE_UNIT: - // Slicing is okay - return NoUnit::base(); // NOLINT + return MeasureUnit(); case STEM_PERCENT: - // Slicing is okay - return NoUnit::percent(); // NOLINT + return MeasureUnit::getPercent(); case STEM_PERMILLE: - // Slicing is okay - return NoUnit::permille(); // NOLINT + return MeasureUnit::getPermille(); default: - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } } @@ -211,7 +205,7 @@ Precision stem_to_object::precision(skeleton::StemEnum stem) { case STEM_PRECISION_CURRENCY_CASH: return Precision::currency(UCURR_USAGE_CASH); default: - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } } @@ -227,6 +221,12 @@ UNumberFormatRoundingMode stem_to_object::roundingMode(skeleton::StemEnum stem) return UNUM_ROUND_UP; case STEM_ROUNDING_MODE_HALF_EVEN: return UNUM_ROUND_HALFEVEN; + case STEM_ROUNDING_MODE_HALF_ODD: + return UNUM_ROUND_HALF_ODD; + case STEM_ROUNDING_MODE_HALF_CEILING: + return UNUM_ROUND_HALF_CEILING; + case STEM_ROUNDING_MODE_HALF_FLOOR: + return UNUM_ROUND_HALF_FLOOR; case STEM_ROUNDING_MODE_HALF_DOWN: return UNUM_ROUND_HALFDOWN; case STEM_ROUNDING_MODE_HALF_UP: @@ -234,7 +234,7 @@ UNumberFormatRoundingMode stem_to_object::roundingMode(skeleton::StemEnum stem) case STEM_ROUNDING_MODE_UNNECESSARY: return UNUM_ROUND_UNNECESSARY; default: - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } } @@ -265,6 +265,10 @@ UNumberUnitWidth stem_to_object::unitWidth(skeleton::StemEnum stem) { return UNUM_UNIT_WIDTH_FULL_NAME; case STEM_UNIT_WIDTH_ISO_CODE: return UNUM_UNIT_WIDTH_ISO_CODE; + case STEM_UNIT_WIDTH_FORMAL: + return UNUM_UNIT_WIDTH_FORMAL; + case STEM_UNIT_WIDTH_VARIANT: + return UNUM_UNIT_WIDTH_VARIANT; case STEM_UNIT_WIDTH_HIDDEN: return UNUM_UNIT_WIDTH_HIDDEN; default: @@ -288,6 +292,10 @@ UNumberSignDisplay stem_to_object::signDisplay(skeleton::StemEnum stem) { return UNUM_SIGN_EXCEPT_ZERO; case STEM_SIGN_ACCOUNTING_EXCEPT_ZERO: return UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO; + case STEM_SIGN_NEGATIVE: + return UNUM_SIGN_NEGATIVE; + case STEM_SIGN_ACCOUNTING_NEGATIVE: + return UNUM_SIGN_ACCOUNTING_NEGATIVE; default: return UNUM_SIGN_COUNT; // for objects, throw; for enums, return COUNT } @@ -322,6 +330,15 @@ void enum_to_stem_string::roundingMode(UNumberFormatRoundingMode value, UnicodeS case UNUM_ROUND_HALFEVEN: sb.append(u"rounding-mode-half-even", -1); break; + case UNUM_ROUND_HALF_ODD: + sb.append(u"rounding-mode-half-odd", -1); + break; + case UNUM_ROUND_HALF_CEILING: + sb.append(u"rounding-mode-half-ceiling", -1); + break; + case UNUM_ROUND_HALF_FLOOR: + sb.append(u"rounding-mode-half-floor", -1); + break; case UNUM_ROUND_HALFDOWN: sb.append(u"rounding-mode-half-down", -1); break; @@ -332,7 +349,7 @@ void enum_to_stem_string::roundingMode(UNumberFormatRoundingMode value, UnicodeS sb.append(u"rounding-mode-unnecessary", -1); break; default: - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } } @@ -354,7 +371,7 @@ void enum_to_stem_string::groupingStrategy(UNumberGroupingStrategy value, Unicod sb.append(u"group-thousands", -1); break; default: - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } } @@ -372,11 +389,17 @@ void enum_to_stem_string::unitWidth(UNumberUnitWidth value, UnicodeString& sb) { case UNUM_UNIT_WIDTH_ISO_CODE: sb.append(u"unit-width-iso-code", -1); break; + case UNUM_UNIT_WIDTH_FORMAL: + sb.append(u"unit-width-formal", -1); + break; + case UNUM_UNIT_WIDTH_VARIANT: + sb.append(u"unit-width-variant", -1); + break; case UNUM_UNIT_WIDTH_HIDDEN: sb.append(u"unit-width-hidden", -1); break; default: - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } } @@ -403,8 +426,14 @@ void enum_to_stem_string::signDisplay(UNumberSignDisplay value, UnicodeString& s case UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO: sb.append(u"sign-accounting-except-zero", -1); break; + case UNUM_SIGN_NEGATIVE: + sb.append(u"sign-negative", -1); + break; + case UNUM_SIGN_ACCOUNTING_NEGATIVE: + sb.append(u"sign-accounting-negative", -1); + break; default: - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } } @@ -418,7 +447,7 @@ enum_to_stem_string::decimalSeparatorDisplay(UNumberDecimalSeparatorDisplay valu sb.append(u"decimal-always", -1); break; default: - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } } @@ -470,6 +499,7 @@ UnicodeString skeleton::generate(const MacroProps& macros, UErrorCode& status) { MacroProps skeleton::parseSkeleton( const UnicodeString& skeletonString, int32_t& errOffset, UErrorCode& status) { U_ASSERT(U_SUCCESS(status)); + U_ASSERT(kSerializedStemTrie != nullptr); // Add a trailing whitespace to the end of the skeleton string to make code cleaner. UnicodeString tempSkeletonString(skeletonString); @@ -550,6 +580,7 @@ MacroProps skeleton::parseSkeleton( case STATE_MEASURE_UNIT: case STATE_PER_MEASURE_UNIT: case STATE_IDENTIFIER_UNIT: + case STATE_UNIT_USAGE: case STATE_CURRENCY_UNIT: case STATE_INTEGER_WIDTH: case STATE_NUMBERING_SYSTEM: @@ -575,6 +606,8 @@ MacroProps skeleton::parseSkeleton( ParseState skeleton::parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, SeenMacroProps& seen, MacroProps& macros, UErrorCode& status) { + U_ASSERT(U_SUCCESS(status)); + // First check for "blueprint" stems, which start with a "signal char" switch (segment.charAt(0)) { case u'.': @@ -584,7 +617,7 @@ skeleton::parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, Se case u'@': CHECK_NULL(seen, precision, status); blueprint_helpers::parseDigitsStem(segment, macros, status); - return STATE_NULL; + return STATE_PRECISION; case u'E': CHECK_NULL(seen, notation, status); blueprint_helpers::parseScientificStem(segment, macros, status); @@ -650,7 +683,7 @@ skeleton::parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, Se case STEM_PRECISION_INTEGER: return STATE_FRACTION_PRECISION; // allows for "precision-integer/@##" default: - return STATE_NULL; + return STATE_PRECISION; } case STEM_ROUNDING_MODE_CEILING: @@ -658,6 +691,9 @@ skeleton::parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, Se case STEM_ROUNDING_MODE_DOWN: case STEM_ROUNDING_MODE_UP: case STEM_ROUNDING_MODE_HALF_EVEN: + case STEM_ROUNDING_MODE_HALF_ODD: + case STEM_ROUNDING_MODE_HALF_CEILING: + case STEM_ROUNDING_MODE_HALF_FLOOR: case STEM_ROUNDING_MODE_HALF_DOWN: case STEM_ROUNDING_MODE_HALF_UP: case STEM_ROUNDING_MODE_UNNECESSARY: @@ -665,6 +701,11 @@ skeleton::parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, Se macros.roundingMode = stem_to_object::roundingMode(stem); return STATE_NULL; + case STEM_INTEGER_WIDTH_TRUNC: + CHECK_NULL(seen, integerWidth, status); + macros.integerWidth = IntegerWidth::zeroFillTo(0).truncateAt(0); + return STATE_NULL; + case STEM_GROUP_OFF: case STEM_GROUP_MIN2: case STEM_GROUP_AUTO: @@ -683,6 +724,8 @@ skeleton::parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, Se case STEM_UNIT_WIDTH_SHORT: case STEM_UNIT_WIDTH_FULL_NAME: case STEM_UNIT_WIDTH_ISO_CODE: + case STEM_UNIT_WIDTH_FORMAL: + case STEM_UNIT_WIDTH_VARIANT: case STEM_UNIT_WIDTH_HIDDEN: CHECK_NULL(seen, unitWidth, status); macros.unitWidth = stem_to_object::unitWidth(stem); @@ -695,6 +738,8 @@ skeleton::parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, Se case STEM_SIGN_ACCOUNTING_ALWAYS: case STEM_SIGN_EXCEPT_ZERO: case STEM_SIGN_ACCOUNTING_EXCEPT_ZERO: + case STEM_SIGN_NEGATIVE: + case STEM_SIGN_ACCOUNTING_NEGATIVE: CHECK_NULL(seen, sign, status); macros.sign = stem_to_object::signDisplay(stem); return STATE_NULL; @@ -705,7 +750,7 @@ skeleton::parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, Se macros.decimal = stem_to_object::decimalSeparatorDisplay(stem); return STATE_NULL; - // Stems requiring an option: + // Stems requiring an option: case STEM_PRECISION_INCREMENT: CHECK_NULL(seen, precision, status); @@ -724,8 +769,13 @@ skeleton::parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, Se CHECK_NULL(seen, perUnit, status); return STATE_IDENTIFIER_UNIT; + case STEM_UNIT_USAGE: + CHECK_NULL(seen, usage, status); + return STATE_UNIT_USAGE; + case STEM_CURRENCY: CHECK_NULL(seen, unit, status); + CHECK_NULL(seen, perUnit, status); return STATE_CURRENCY_UNIT; case STEM_INTEGER_WIDTH: @@ -741,12 +791,13 @@ skeleton::parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, Se return STATE_SCALE; default: - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } } ParseState skeleton::parseOption(ParseState stem, const StringSegment& segment, MacroProps& macros, UErrorCode& status) { + U_ASSERT(U_SUCCESS(status)); ///// Required options: ///// @@ -763,9 +814,12 @@ ParseState skeleton::parseOption(ParseState stem, const StringSegment& segment, case STATE_IDENTIFIER_UNIT: blueprint_helpers::parseIdentifierUnitOption(segment, macros, status); return STATE_NULL; + case STATE_UNIT_USAGE: + blueprint_helpers::parseUnitUsageOption(segment, macros, status); + return STATE_NULL; case STATE_INCREMENT_PRECISION: blueprint_helpers::parseIncrementOption(segment, macros, status); - return STATE_NULL; + return STATE_PRECISION; case STATE_INTEGER_WIDTH: blueprint_helpers::parseIntegerWidthOption(segment, macros, status); return STATE_NULL; @@ -805,6 +859,22 @@ ParseState skeleton::parseOption(ParseState stem, const StringSegment& segment, switch (stem) { case STATE_FRACTION_PRECISION: if (blueprint_helpers::parseFracSigOption(segment, macros, status)) { + return STATE_PRECISION; + } + if (U_FAILURE(status)) { + return {}; + } + // If the fracSig option was not found, try normal precision options. + stem = STATE_PRECISION; + break; + default: + break; + } + + // Trailing zeros option + switch (stem) { + case STATE_PRECISION: + if (blueprint_helpers::parseTrailingZeroOption(segment, macros, status)) { return STATE_NULL; } if (U_FAILURE(status)) { @@ -833,7 +903,7 @@ void GeneratorHelpers::generateSkeleton(const MacroProps& macros, UnicodeString& sb.append(u' '); } if (U_FAILURE(status)) { return; } - if (GeneratorHelpers::perUnit(macros, sb, status)) { + if (GeneratorHelpers::usage(macros, sb, status)) { sb.append(u' '); } if (U_FAILURE(status)) { return; } @@ -879,6 +949,10 @@ void GeneratorHelpers::generateSkeleton(const MacroProps& macros, UnicodeString& status = U_UNSUPPORTED_ERROR; return; } + if (macros.unitDisplayCase.isSet()) { + status = U_UNSUPPORTED_ERROR; + return; + } if (macros.affixProvider != nullptr) { status = U_UNSUPPORTED_ERROR; return; @@ -968,6 +1042,7 @@ blueprint_helpers::generateCurrencyOption(const CurrencyUnit& currency, UnicodeS void blueprint_helpers::parseMeasureUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status) { + U_ASSERT(U_SUCCESS(status)); const UnicodeString stemString = segment.toTempUnicodeString(); // NOTE: The category (type) of the unit is guaranteed to be a valid subtag (alphanumeric) @@ -983,14 +1058,13 @@ void blueprint_helpers::parseMeasureUnitOption(const StringSegment& segment, Mac } // Need to do char <-> UChar conversion... - U_ASSERT(U_SUCCESS(status)); CharString type; SKELETON_UCHAR_TO_CHAR(type, stemString, 0, firstHyphen, status); CharString subType; SKELETON_UCHAR_TO_CHAR(subType, stemString, firstHyphen + 1, stemString.length(), status); - // Note: the largest type as of this writing (March 2018) is "volume", which has 24 units. - static constexpr int32_t CAPACITY = 30; + // Note: the largest type as of this writing (Aug 2020) is "volume", which has 33 units. + static constexpr int32_t CAPACITY = 40; MeasureUnit units[CAPACITY]; UErrorCode localStatus = U_ZERO_ERROR; int32_t numUnits = MeasureUnit::getAvailable(type.data(), units, CAPACITY, localStatus); @@ -1011,14 +1085,6 @@ void blueprint_helpers::parseMeasureUnitOption(const StringSegment& segment, Mac status = U_NUMBER_SKELETON_SYNTAX_ERROR; } -void blueprint_helpers::generateMeasureUnitOption(const MeasureUnit& measureUnit, UnicodeString& sb, - UErrorCode&) { - // Need to do char <-> UChar conversion... - sb.append(UnicodeString(measureUnit.getType(), -1, US_INV)); - sb.append(u'-'); - sb.append(UnicodeString(measureUnit.getSubtype(), -1, US_INV)); -} - void blueprint_helpers::parseMeasurePerUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status) { // A little bit of a hack: save the current unit (numerator), call the main measure unit @@ -1038,23 +1104,23 @@ void blueprint_helpers::parseIdentifierUnitOption(const StringSegment& segment, SKELETON_UCHAR_TO_CHAR(buffer, segment.toTempUnicodeString(), 0, segment.length(), status); ErrorCode internalStatus; - auto fullUnit = MeasureUnitImpl::forIdentifier(buffer.toStringPiece(), internalStatus); + macros.unit = MeasureUnit::forIdentifier(buffer.toStringPiece(), internalStatus); if (internalStatus.isFailure()) { // throw new SkeletonSyntaxException("Invalid core unit identifier", segment, e); status = U_NUMBER_SKELETON_SYNTAX_ERROR; return; } +} - // TODO(ICU-20941): Clean this up. - for (int32_t i = 0; i < fullUnit.units.length(); i++) { - SingleUnitImpl* subUnit = fullUnit.units[i]; - if (subUnit->dimensionality > 0) { - macros.unit = macros.unit.product(subUnit->build(status), status); - } else { - subUnit->dimensionality *= -1; - macros.perUnit = macros.perUnit.product(subUnit->build(status), status); - } - } +void blueprint_helpers::parseUnitUsageOption(const StringSegment &segment, MacroProps ¯os, + UErrorCode &status) { + // Need to do char <-> UChar conversion... + U_ASSERT(U_SUCCESS(status)); + CharString buffer; + SKELETON_UCHAR_TO_CHAR(buffer, segment.toTempUnicodeString(), 0, segment.length(), status); + macros.usage.set(buffer.toStringPiece()); + // We do not do any validation of the usage string: it depends on the + // unitPreferenceData in the units resources. } void blueprint_helpers::parseFractionStem(const StringSegment& segment, MacroProps& macros, @@ -1198,6 +1264,7 @@ void blueprint_helpers::parseScientificStem(const StringSegment& segment, MacroP } else if (segment.charAt(offset) == u'?') { signDisplay = UNUM_SIGN_EXCEPT_ZERO; } else { + // NOTE: Other sign displays are not included because they aren't useful in this context goto fail; } offset++; @@ -1256,21 +1323,14 @@ bool blueprint_helpers::parseFracSigOption(const StringSegment& segment, MacroPr break; } } - // For the frac-sig option, there must be minSig or maxSig but not both. - // Valid: @+, @@+, @@@+ - // Valid: @#, @##, @### - // Invalid: @, @@, @@@ - // Invalid: @@#, @@##, @@@# if (offset < segment.length()) { if (isWildcardChar(segment.charAt(offset))) { + // @+, @@+, @@@+ maxSig = -1; offset++; - } else if (minSig > 1) { - // @@#, @@##, @@@# - // throw new SkeletonSyntaxException("Invalid digits option for fraction rounder", segment); - status = U_NUMBER_SKELETON_SYNTAX_ERROR; - return false; } else { + // @#, @##, @### + // @@#, @@##, @@@# maxSig = minSig; for (; offset < segment.length(); offset++) { if (segment.charAt(offset) == u'#') { @@ -1282,68 +1342,71 @@ bool blueprint_helpers::parseFracSigOption(const StringSegment& segment, MacroPr } } else { // @, @@, @@@ - // throw new SkeletonSyntaxException("Invalid digits option for fraction rounder", segment); - status = U_NUMBER_SKELETON_SYNTAX_ERROR; - return false; + maxSig = minSig; } + UNumberRoundingPriority priority; if (offset < segment.length()) { - // throw new SkeletonSyntaxException("Invalid digits option for fraction rounder", segment); + if (maxSig == -1) { + // The wildcard character is not allowed with the priority annotation + status = U_NUMBER_SKELETON_SYNTAX_ERROR; + return false; + } + if (segment.codePointAt(offset) == u'r') { + priority = UNUM_ROUNDING_PRIORITY_RELAXED; + offset++; + } else if (segment.codePointAt(offset) == u's') { + priority = UNUM_ROUNDING_PRIORITY_STRICT; + offset++; + } else { + // Invalid digits option for fraction rounder + status = U_NUMBER_SKELETON_SYNTAX_ERROR; + return false; + } + if (offset < segment.length()) { + // Invalid digits option for fraction rounder + status = U_NUMBER_SKELETON_SYNTAX_ERROR; + return false; + } + } else if (maxSig == -1) { + // withMinDigits + maxSig = minSig; + minSig = 1; + priority = UNUM_ROUNDING_PRIORITY_RELAXED; + } else if (minSig == 1) { + // withMaxDigits + priority = UNUM_ROUNDING_PRIORITY_STRICT; + } else { + // Digits options with both min and max sig require the priority option status = U_NUMBER_SKELETON_SYNTAX_ERROR; return false; } auto& oldPrecision = static_cast<const FractionPrecision&>(macros.precision); - if (maxSig == -1) { - macros.precision = oldPrecision.withMinDigits(minSig); - } else { - macros.precision = oldPrecision.withMaxDigits(maxSig); - } + macros.precision = oldPrecision.withSignificantDigits(minSig, maxSig, priority); return true; } -void blueprint_helpers::parseIncrementOption(const StringSegment& segment, MacroProps& macros, - UErrorCode& status) { - // Need to do char <-> UChar conversion... - U_ASSERT(U_SUCCESS(status)); - CharString buffer; - SKELETON_UCHAR_TO_CHAR(buffer, segment.toTempUnicodeString(), 0, segment.length(), status); - - // Utilize DecimalQuantity/decNumber to parse this for us. - DecimalQuantity dq; - UErrorCode localStatus = U_ZERO_ERROR; - dq.setToDecNumber({buffer.data(), buffer.length()}, localStatus); - if (U_FAILURE(localStatus)) { - // throw new SkeletonSyntaxException("Invalid rounding increment", segment, e); - status = U_NUMBER_SKELETON_SYNTAX_ERROR; - return; +bool blueprint_helpers::parseTrailingZeroOption(const StringSegment& segment, MacroProps& macros, UErrorCode&) { + if (segment == u"w") { + macros.precision = macros.precision.trailingZeroDisplay(UNUM_TRAILING_ZERO_HIDE_IF_WHOLE); + return true; } - double increment = dq.toDouble(); + return false; +} - // We also need to figure out how many digits. Do a brute force string operation. - int decimalOffset = 0; - while (decimalOffset < segment.length() && segment.charAt(decimalOffset) != '.') { - decimalOffset++; - } - if (decimalOffset == segment.length()) { - macros.precision = Precision::increment(increment); - } else { - int32_t fractionLength = segment.length() - decimalOffset - 1; - macros.precision = Precision::increment(increment).withMinFraction(fractionLength); - } +void blueprint_helpers::parseIncrementOption(const StringSegment &segment, MacroProps ¯os, + UErrorCode &status) { + number::impl::parseIncrementOption(segment, macros.precision, status); } -void blueprint_helpers::generateIncrementOption(double increment, int32_t trailingZeros, UnicodeString& sb, +void blueprint_helpers::generateIncrementOption(double increment, int32_t minFrac, UnicodeString& sb, UErrorCode&) { // Utilize DecimalQuantity/double_conversion to format this for us. DecimalQuantity dq; dq.setToDouble(increment); dq.roundToInfinity(); + dq.setMinFraction(minFrac); sb.append(dq.toPlainString()); - - // We might need to append extra trailing zeros for min fraction... - if (trailingZeros > 0) { - appendMultiple(sb, u'0', trailingZeros); - } } void blueprint_helpers::parseIntegerWidthOption(const StringSegment& segment, MacroProps& macros, @@ -1432,7 +1495,7 @@ void blueprint_helpers::parseScaleOption(const StringSegment& segment, MacroProp LocalPointer<DecNum> decnum(new DecNum(), status); if (U_FAILURE(status)) { return; } decnum->setTo({buffer.data(), buffer.length()}, status); - if (U_FAILURE(status)) { + if (U_FAILURE(status) || decnum->isSpecial()) { // This is a skeleton syntax error; don't let the low-level decnum error bubble up status = U_NUMBER_SKELETON_SYNTAX_ERROR; return; @@ -1499,50 +1562,46 @@ bool GeneratorHelpers::notation(const MacroProps& macros, UnicodeString& sb, UEr } bool GeneratorHelpers::unit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) { - if (utils::unitIsCurrency(macros.unit)) { + MeasureUnit unit = macros.unit; + if (!utils::unitIsBaseUnit(macros.perUnit)) { + if (utils::unitIsCurrency(macros.unit) || utils::unitIsCurrency(macros.perUnit)) { + status = U_UNSUPPORTED_ERROR; + return false; + } + unit = unit.product(macros.perUnit.reciprocal(status), status); + } + + if (utils::unitIsCurrency(unit)) { sb.append(u"currency/", -1); - CurrencyUnit currency(macros.unit, status); + CurrencyUnit currency(unit, status); if (U_FAILURE(status)) { return false; } blueprint_helpers::generateCurrencyOption(currency, sb, status); return true; - } else if (utils::unitIsNoUnit(macros.unit)) { - if (utils::unitIsPercent(macros.unit)) { - sb.append(u"percent", -1); - return true; - } else if (utils::unitIsPermille(macros.unit)) { - sb.append(u"permille", -1); - return true; - } else { - // Default value is not shown in normalized form - return false; - } + } else if (utils::unitIsBaseUnit(unit)) { + // Default value is not shown in normalized form + return false; + } else if (utils::unitIsPercent(unit)) { + sb.append(u"percent", -1); + return true; + } else if (utils::unitIsPermille(unit)) { + sb.append(u"permille", -1); + return true; } else { - sb.append(u"measure-unit/", -1); - blueprint_helpers::generateMeasureUnitOption(macros.unit, sb, status); + sb.append(u"unit/", -1); + sb.append(unit.getIdentifier()); return true; } } -bool GeneratorHelpers::perUnit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) { - // Per-units are currently expected to be only MeasureUnits. - if (utils::unitIsNoUnit(macros.perUnit)) { - if (utils::unitIsPercent(macros.perUnit) || utils::unitIsPermille(macros.perUnit)) { - status = U_UNSUPPORTED_ERROR; - return false; - } else { - // Default value: ok to ignore - return false; - } - } else if (utils::unitIsCurrency(macros.perUnit)) { - status = U_UNSUPPORTED_ERROR; - return false; - } else { - sb.append(u"per-measure-unit/", -1); - blueprint_helpers::generateMeasureUnitOption(macros.perUnit, sb, status); +bool GeneratorHelpers::usage(const MacroProps& macros, UnicodeString& sb, UErrorCode& /* status */) { + if (macros.usage.isSet()) { + sb.append(u"usage/", -1); + sb.append(UnicodeString(macros.usage.fValue, -1, US_INV)); return true; } + return false; } bool GeneratorHelpers::precision(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) { @@ -1558,10 +1617,11 @@ bool GeneratorHelpers::precision(const MacroProps& macros, UnicodeString& sb, UE const Precision::FractionSignificantSettings& impl = macros.precision.fUnion.fracSig; blueprint_helpers::generateFractionStem(impl.fMinFrac, impl.fMaxFrac, sb, status); sb.append(u'/'); - if (impl.fMinSig == -1) { - blueprint_helpers::generateDigitsStem(1, impl.fMaxSig, sb, status); + blueprint_helpers::generateDigitsStem(impl.fMinSig, impl.fMaxSig, sb, status); + if (impl.fPriority == UNUM_ROUNDING_PRIORITY_RELAXED) { + sb.append(u'r'); } else { - blueprint_helpers::generateDigitsStem(impl.fMinSig, -1, sb, status); + sb.append(u's'); } } else if (macros.precision.fType == Precision::RND_INCREMENT || macros.precision.fType == Precision::RND_INCREMENT_ONE @@ -1570,7 +1630,7 @@ bool GeneratorHelpers::precision(const MacroProps& macros, UnicodeString& sb, UE sb.append(u"precision-increment/", -1); blueprint_helpers::generateIncrementOption( impl.fIncrement, - impl.fMinFrac - impl.fMaxFrac, + impl.fMinFrac, sb, status); } else if (macros.precision.fType == Precision::RND_CURRENCY) { @@ -1585,6 +1645,10 @@ bool GeneratorHelpers::precision(const MacroProps& macros, UnicodeString& sb, UE return false; } + if (macros.precision.fTrailingZeroDisplay == UNUM_TRAILING_ZERO_HIDE_IF_WHOLE) { + sb.append(u"/w", -1); + } + // NOTE: Always return true for rounding because the default value depends on other options. return true; } @@ -1617,10 +1681,15 @@ bool GeneratorHelpers::integerWidth(const MacroProps& macros, UnicodeString& sb, // Error or Default return false; } + const auto& minMaxInt = macros.integerWidth.fUnion.minMaxInt; + if (minMaxInt.fMinInt == 0 && minMaxInt.fMaxInt == 0) { + sb.append(u"integer-width-trunc", -1); + return true; + } sb.append(u"integer-width/", -1); blueprint_helpers::generateIntegerWidthOption( - macros.integerWidth.fUnion.minMaxInt.fMinInt, - macros.integerWidth.fUnion.minMaxInt.fMaxInt, + minMaxInt.fMinInt, + minMaxInt.fMaxInt, sb, status); return true; diff --git a/contrib/libs/icu/i18n/number_skeletons.h b/contrib/libs/icu/i18n/number_skeletons.h index d9b2c0ee0b..be41f1b323 100644 --- a/contrib/libs/icu/i18n/number_skeletons.h +++ b/contrib/libs/icu/i18n/number_skeletons.h @@ -22,10 +22,12 @@ struct SeenMacroProps; // namespace for enums and entrypoint functions namespace skeleton { -/////////////////////////////////////////////////////////////////////////////////////// -// NOTE: For an example of how to add a new stem to the number skeleton parser, see: // -// http://bugs.icu-project.org/trac/changeset/41193 // -/////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////// +// NOTE: For examples of how to add a new stem to the number skeleton parser, see: // +// https://github.com/unicode-org/icu/commit/a2a7982216b2348070dc71093775ac7195793d73 // +// and // +// https://github.com/unicode-org/icu/commit/6fe86f3934a8a5701034f648a8f7c5087e84aa28 // +//////////////////////////////////////////////////////////////////////////////////////// /** * While parsing a skeleton, this enum records what type of option we expect to find next. @@ -40,6 +42,7 @@ enum ParseState { STATE_SCIENTIFIC, STATE_FRACTION_PRECISION, + STATE_PRECISION, // Section 2: An option is required: @@ -47,6 +50,7 @@ enum ParseState { STATE_MEASURE_UNIT, STATE_PER_MEASURE_UNIT, STATE_IDENTIFIER_UNIT, + STATE_UNIT_USAGE, STATE_CURRENCY_UNIT, STATE_INTEGER_WIDTH, STATE_NUMBERING_SYSTEM, @@ -82,9 +86,13 @@ enum StemEnum { STEM_ROUNDING_MODE_DOWN, STEM_ROUNDING_MODE_UP, STEM_ROUNDING_MODE_HALF_EVEN, + STEM_ROUNDING_MODE_HALF_ODD, + STEM_ROUNDING_MODE_HALF_CEILING, + STEM_ROUNDING_MODE_HALF_FLOOR, STEM_ROUNDING_MODE_HALF_DOWN, STEM_ROUNDING_MODE_HALF_UP, STEM_ROUNDING_MODE_UNNECESSARY, + STEM_INTEGER_WIDTH_TRUNC, STEM_GROUP_OFF, STEM_GROUP_MIN2, STEM_GROUP_AUTO, @@ -95,6 +103,8 @@ enum StemEnum { STEM_UNIT_WIDTH_SHORT, STEM_UNIT_WIDTH_FULL_NAME, STEM_UNIT_WIDTH_ISO_CODE, + STEM_UNIT_WIDTH_FORMAL, + STEM_UNIT_WIDTH_VARIANT, STEM_UNIT_WIDTH_HIDDEN, STEM_SIGN_AUTO, STEM_SIGN_ALWAYS, @@ -103,6 +113,8 @@ enum StemEnum { STEM_SIGN_ACCOUNTING_ALWAYS, STEM_SIGN_EXCEPT_ZERO, STEM_SIGN_ACCOUNTING_EXCEPT_ZERO, + STEM_SIGN_NEGATIVE, + STEM_SIGN_ACCOUNTING_NEGATIVE, STEM_DECIMAL_AUTO, STEM_DECIMAL_ALWAYS, @@ -112,6 +124,7 @@ enum StemEnum { STEM_MEASURE_UNIT, STEM_PER_MEASURE_UNIT, STEM_UNIT, + STEM_UNIT_USAGE, STEM_CURRENCY, STEM_INTEGER_WIDTH, STEM_NUMBERING_SYSTEM, @@ -234,14 +247,20 @@ void parseCurrencyOption(const StringSegment& segment, MacroProps& macros, UErro void generateCurrencyOption(const CurrencyUnit& currency, UnicodeString& sb, UErrorCode& status); +// "measure-unit/" is deprecated in favour of "unit/". void parseMeasureUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); -void generateMeasureUnitOption(const MeasureUnit& measureUnit, UnicodeString& sb, UErrorCode& status); - +// "per-measure-unit/" is deprecated in favour of "unit/". void parseMeasurePerUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); +/** + * Parses unit identifiers like "meter-per-second" and "foot-and-inch", as + * specified via a "unit/" concise skeleton. + */ void parseIdentifierUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); +void parseUnitUsageOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); + void parseFractionStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status); void generateFractionStem(int32_t minFrac, int32_t maxFrac, UnicodeString& sb, UErrorCode& status); @@ -261,10 +280,13 @@ void parseIntegerStem(const StringSegment& segment, MacroProps& macros, UErrorCo /** @return Whether we successfully found and parsed a frac-sig option. */ bool parseFracSigOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); +/** @return Whether we successfully found and parsed a trailing zero option. */ +bool parseTrailingZeroOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); + void parseIncrementOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); void -generateIncrementOption(double increment, int32_t trailingZeros, UnicodeString& sb, UErrorCode& status); +generateIncrementOption(double increment, int32_t minFrac, UnicodeString& sb, UErrorCode& status); void parseIntegerWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); @@ -302,7 +324,7 @@ class GeneratorHelpers { static bool unit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); - static bool perUnit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); + static bool usage(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); static bool precision(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); @@ -332,6 +354,7 @@ struct SeenMacroProps { bool notation = false; bool unit = false; bool perUnit = false; + bool usage = false; bool precision = false; bool roundingMode = false; bool grouper = false; @@ -344,6 +367,24 @@ struct SeenMacroProps { bool scale = false; }; +namespace { + +#define SKELETON_UCHAR_TO_CHAR(dest, src, start, end, status) (void)(dest); \ +UPRV_BLOCK_MACRO_BEGIN { \ + UErrorCode conversionStatus = U_ZERO_ERROR; \ + (dest).appendInvariantChars({false, (src).getBuffer() + (start), (end) - (start)}, conversionStatus); \ + if (conversionStatus == U_INVARIANT_CONVERSION_ERROR) { \ + /* Don't propagate the invariant conversion error; it is a skeleton syntax error */ \ + (status) = U_NUMBER_SKELETON_SYNTAX_ERROR; \ + return; \ + } else if (U_FAILURE(conversionStatus)) { \ + (status) = conversionStatus; \ + return; \ + } \ +} UPRV_BLOCK_MACRO_END + +} // namespace + } // namespace impl } // namespace number U_NAMESPACE_END diff --git a/contrib/libs/icu/i18n/number_symbolswrapper.cpp b/contrib/libs/icu/i18n/number_symbolswrapper.cpp new file mode 100644 index 0000000000..ac3043d1ca --- /dev/null +++ b/contrib/libs/icu/i18n/number_symbolswrapper.cpp @@ -0,0 +1,131 @@ +// © 2020 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "number_microprops.h" +#include "unicode/numberformatter.h" + +using namespace icu; +using namespace icu::number; +using namespace icu::number::impl; + +SymbolsWrapper::SymbolsWrapper(const SymbolsWrapper &other) { + doCopyFrom(other); +} + +SymbolsWrapper::SymbolsWrapper(SymbolsWrapper &&src) U_NOEXCEPT { + doMoveFrom(std::move(src)); +} + +SymbolsWrapper &SymbolsWrapper::operator=(const SymbolsWrapper &other) { + if (this == &other) { + return *this; + } + doCleanup(); + doCopyFrom(other); + return *this; +} + +SymbolsWrapper &SymbolsWrapper::operator=(SymbolsWrapper &&src) U_NOEXCEPT { + if (this == &src) { + return *this; + } + doCleanup(); + doMoveFrom(std::move(src)); + return *this; +} + +SymbolsWrapper::~SymbolsWrapper() { + doCleanup(); +} + +void SymbolsWrapper::setTo(const DecimalFormatSymbols &dfs) { + doCleanup(); + fType = SYMPTR_DFS; + fPtr.dfs = new DecimalFormatSymbols(dfs); +} + +void SymbolsWrapper::setTo(const NumberingSystem *ns) { + doCleanup(); + fType = SYMPTR_NS; + fPtr.ns = ns; +} + +void SymbolsWrapper::doCopyFrom(const SymbolsWrapper &other) { + fType = other.fType; + switch (fType) { + case SYMPTR_NONE: + // No action necessary + break; + case SYMPTR_DFS: + // Memory allocation failures are exposed in copyErrorTo() + if (other.fPtr.dfs != nullptr) { + fPtr.dfs = new DecimalFormatSymbols(*other.fPtr.dfs); + } else { + fPtr.dfs = nullptr; + } + break; + case SYMPTR_NS: + // Memory allocation failures are exposed in copyErrorTo() + if (other.fPtr.ns != nullptr) { + fPtr.ns = new NumberingSystem(*other.fPtr.ns); + } else { + fPtr.ns = nullptr; + } + break; + } +} + +void SymbolsWrapper::doMoveFrom(SymbolsWrapper &&src) { + fType = src.fType; + switch (fType) { + case SYMPTR_NONE: + // No action necessary + break; + case SYMPTR_DFS: + fPtr.dfs = src.fPtr.dfs; + src.fPtr.dfs = nullptr; + break; + case SYMPTR_NS: + fPtr.ns = src.fPtr.ns; + src.fPtr.ns = nullptr; + break; + } +} + +void SymbolsWrapper::doCleanup() { + switch (fType) { + case SYMPTR_NONE: + // No action necessary + break; + case SYMPTR_DFS: + delete fPtr.dfs; + break; + case SYMPTR_NS: + delete fPtr.ns; + break; + } +} + +bool SymbolsWrapper::isDecimalFormatSymbols() const { + return fType == SYMPTR_DFS; +} + +bool SymbolsWrapper::isNumberingSystem() const { + return fType == SYMPTR_NS; +} + +const DecimalFormatSymbols *SymbolsWrapper::getDecimalFormatSymbols() const { + U_ASSERT(fType == SYMPTR_DFS); + return fPtr.dfs; +} + +const NumberingSystem *SymbolsWrapper::getNumberingSystem() const { + U_ASSERT(fType == SYMPTR_NS); + return fPtr.ns; +} + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/contrib/libs/icu/i18n/number_types.h b/contrib/libs/icu/i18n/number_types.h index 5c2b8cf8b5..84846efb92 100644 --- a/contrib/libs/icu/i18n/number_types.h +++ b/contrib/libs/icu/i18n/number_types.h @@ -62,26 +62,29 @@ enum AffixPatternType { // Represents a plus sign symbol '+'. TYPE_PLUS_SIGN = -2, + // Represents an approximately sign symbol '~'. + TYPE_APPROXIMATELY_SIGN = -3, + // Represents a percent sign symbol '%'. - TYPE_PERCENT = -3, + TYPE_PERCENT = -4, // Represents a permille sign symbol '‰'. - TYPE_PERMILLE = -4, + TYPE_PERMILLE = -5, // Represents a single currency symbol '¤'. - TYPE_CURRENCY_SINGLE = -5, + TYPE_CURRENCY_SINGLE = -6, // Represents a double currency symbol '¤¤'. - TYPE_CURRENCY_DOUBLE = -6, + TYPE_CURRENCY_DOUBLE = -7, // Represents a triple currency symbol '¤¤¤'. - TYPE_CURRENCY_TRIPLE = -7, + TYPE_CURRENCY_TRIPLE = -8, // Represents a quadruple currency symbol '¤¤¤¤'. - TYPE_CURRENCY_QUAD = -8, + TYPE_CURRENCY_QUAD = -9, // Represents a quintuple currency symbol '¤¤¤¤¤'. - TYPE_CURRENCY_QUINT = -9, + TYPE_CURRENCY_QUINT = -10, // Represents a sequence of six or more currency symbols. TYPE_CURRENCY_OVERFLOW = -15 @@ -137,6 +140,11 @@ class U_I18N_API AffixPatternProvider { * number instead of rendering the number. */ virtual bool hasBody() const = 0; + + /** + * True if the currency symbol should replace the decimal separator. + */ + virtual bool currencyAsDecimal() const = 0; }; @@ -246,31 +254,31 @@ class U_I18N_API ModifierStore { * itself. The {@link #processQuantity} method performs the final step in the number processing pipeline: it uses the * quantity to generate a finalized {@link MicroProps}, which can be used to render the number to output. * - * <p> * In other words, this interface is used for the parts of number processing that are <em>quantity-dependent</em>. * - * <p> * In order to allow for multiple different objects to all mutate the same MicroProps, a "chain" of MicroPropsGenerators * are linked together, and each one is responsible for manipulating a certain quantity-dependent part of the * MicroProps. At the tail of the linked list is a base instance of {@link MicroProps} with properties that are not * quantity-dependent. Each element in the linked list calls {@link #processQuantity} on its "parent", then does its * work, and then returns the result. * + * This chain of MicroPropsGenerators is typically constructed by NumberFormatterImpl::macrosToMicroGenerator() when + * constructing a NumberFormatter. + * * Exported as U_I18N_API because it is a base class for other exported types * */ class U_I18N_API MicroPropsGenerator { public: - virtual ~MicroPropsGenerator(); + virtual ~MicroPropsGenerator() = default; /** - * Considers the given {@link DecimalQuantity}, optionally mutates it, and returns a {@link MicroProps}. + * Considers the given {@link DecimalQuantity}, optionally mutates it, and + * populates a {@link MicroProps} instance. * - * @param quantity - * The quantity for consideration and optional mutation. - * @param micros - * The MicroProps instance to populate. - * @return A MicroProps instance resolved for the quantity. + * @param quantity The quantity for consideration and optional mutation. + * @param micros The MicroProps instance to populate. It will be modified as + * needed for the given quantity. */ virtual void processQuantity(DecimalQuantity& quantity, MicroProps& micros, UErrorCode& status) const = 0; diff --git a/contrib/libs/icu/i18n/number_usageprefs.cpp b/contrib/libs/icu/i18n/number_usageprefs.cpp new file mode 100644 index 0000000000..5d93d12cce --- /dev/null +++ b/contrib/libs/icu/i18n/number_usageprefs.cpp @@ -0,0 +1,214 @@ +// © 2020 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "number_usageprefs.h" +#include "cstring.h" +#include "number_decimalquantity.h" +#include "number_microprops.h" +#include "number_roundingutils.h" +#include "number_skeletons.h" +#include "unicode/char16ptr.h" +#include "unicode/currunit.h" +#include "unicode/fmtable.h" +#include "unicode/measure.h" +#include "unicode/numberformatter.h" +#include "unicode/platform.h" +#include "unicode/unum.h" +#include "unicode/urename.h" +#include "units_data.h" + +using namespace icu; +using namespace icu::number; +using namespace icu::number::impl; +using icu::StringSegment; +using icu::units::ConversionRates; + +// Copy constructor +StringProp::StringProp(const StringProp &other) : StringProp() { + this->operator=(other); +} + +// Copy assignment operator +StringProp &StringProp::operator=(const StringProp &other) { + if (this == &other) { return *this; } // self-assignment: no-op + fLength = 0; + fError = other.fError; + if (fValue != nullptr) { + uprv_free(fValue); + fValue = nullptr; + } + if (other.fValue == nullptr) { + return *this; + } + if (U_FAILURE(other.fError)) { + // We don't bother trying to allocating memory if we're in any case busy + // copying an errored StringProp. + return *this; + } + fValue = (char *)uprv_malloc(other.fLength + 1); + if (fValue == nullptr) { + fError = U_MEMORY_ALLOCATION_ERROR; + return *this; + } + fLength = other.fLength; + uprv_strncpy(fValue, other.fValue, fLength + 1); + return *this; +} + +// Move constructor +StringProp::StringProp(StringProp &&src) U_NOEXCEPT : fValue(src.fValue), + fLength(src.fLength), + fError(src.fError) { + // Take ownership away from src if necessary + src.fValue = nullptr; +} + +// Move assignment operator +StringProp &StringProp::operator=(StringProp &&src) U_NOEXCEPT { + if (this == &src) { + return *this; + } + if (fValue != nullptr) { + uprv_free(fValue); + } + fValue = src.fValue; + fLength = src.fLength; + fError = src.fError; + // Take ownership away from src if necessary + src.fValue = nullptr; + return *this; +} + +StringProp::~StringProp() { + if (fValue != nullptr) { + uprv_free(fValue); + fValue = nullptr; + } +} + +void StringProp::set(StringPiece value) { + if (fValue != nullptr) { + uprv_free(fValue); + fValue = nullptr; + } + fLength = value.length(); + fValue = (char *)uprv_malloc(fLength + 1); + if (fValue == nullptr) { + fLength = 0; + fError = U_MEMORY_ALLOCATION_ERROR; + return; + } + uprv_strncpy(fValue, value.data(), fLength); + fValue[fLength] = 0; +} + +// Populates micros.mixedMeasures and modifies quantity, based on the values in +// measures. +void mixedMeasuresToMicros(const MaybeStackVector<Measure> &measures, DecimalQuantity *quantity, + MicroProps *micros, UErrorCode status) { + micros->mixedMeasuresCount = measures.length(); + + if (micros->mixedMeasures.getCapacity() < micros->mixedMeasuresCount) { + if (micros->mixedMeasures.resize(micros->mixedMeasuresCount) == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + } + + for (int32_t i = 0; i < micros->mixedMeasuresCount; i++) { + switch (measures[i]->getNumber().getType()) { + case Formattable::kInt64: + micros->mixedMeasures[i] = measures[i]->getNumber().getInt64(); + break; + + case Formattable::kDouble: + U_ASSERT(micros->indexOfQuantity < 0); + quantity->setToDouble(measures[i]->getNumber().getDouble()); + micros->indexOfQuantity = i; + break; + + default: + U_ASSERT(0 == "Found a Measure Number which is neither a double nor an int"); + UPRV_UNREACHABLE_EXIT; + break; + } + + if (U_FAILURE(status)) { + return; + } + } + + if (micros->indexOfQuantity < 0) { + // There is no quantity. + status = U_INTERNAL_PROGRAM_ERROR; + } +} + +UsagePrefsHandler::UsagePrefsHandler(const Locale &locale, + const MeasureUnit &inputUnit, + const StringPiece usage, + const MicroPropsGenerator *parent, + UErrorCode &status) + : fUnitsRouter(inputUnit, StringPiece(locale.getCountry()), usage, status), + fParent(parent) { +} + +void UsagePrefsHandler::processQuantity(DecimalQuantity &quantity, MicroProps µs, + UErrorCode &status) const { + fParent->processQuantity(quantity, micros, status); + if (U_FAILURE(status)) { + return; + } + + quantity.roundToInfinity(); // Enables toDouble + const units::RouteResult routed = fUnitsRouter.route(quantity.toDouble(), µs.rounder, status); + if (U_FAILURE(status)) { + return; + } + const MaybeStackVector<Measure>& routedMeasures = routed.measures; + micros.outputUnit = routed.outputUnit.copy(status).build(status); + if (U_FAILURE(status)) { + return; + } + + mixedMeasuresToMicros(routedMeasures, &quantity, µs, status); +} + +UnitConversionHandler::UnitConversionHandler(const MeasureUnit &targetUnit, + const MicroPropsGenerator *parent, UErrorCode &status) + : fOutputUnit(targetUnit), fParent(parent) { + MeasureUnitImpl tempInput, tempOutput; + + ConversionRates conversionRates(status); + if (U_FAILURE(status)) { + return; + } + + const MeasureUnitImpl &targetUnitImpl = + MeasureUnitImpl::forMeasureUnit(targetUnit, tempOutput, status); + fUnitConverter.adoptInsteadAndCheckErrorCode( + new ComplexUnitsConverter(targetUnitImpl, conversionRates, status), status); +} + +void UnitConversionHandler::processQuantity(DecimalQuantity &quantity, MicroProps µs, + UErrorCode &status) const { + fParent->processQuantity(quantity, micros, status); + if (U_FAILURE(status)) { + return; + } + quantity.roundToInfinity(); // Enables toDouble + MaybeStackVector<Measure> measures = + fUnitConverter->convert(quantity.toDouble(), µs.rounder, status); + micros.outputUnit = fOutputUnit; + if (U_FAILURE(status)) { + return; + } + + mixedMeasuresToMicros(measures, &quantity, µs, status); +} + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/contrib/libs/icu/i18n/number_usageprefs.h b/contrib/libs/icu/i18n/number_usageprefs.h new file mode 100644 index 0000000000..70547225a0 --- /dev/null +++ b/contrib/libs/icu/i18n/number_usageprefs.h @@ -0,0 +1,126 @@ +// © 2020 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING +#ifndef __NUMBER_USAGEPREFS_H__ +#define __NUMBER_USAGEPREFS_H__ + +#include "cmemory.h" +#include "number_types.h" +#include "unicode/listformatter.h" +#include "unicode/localpointer.h" +#include "unicode/locid.h" +#include "unicode/measunit.h" +#include "unicode/stringpiece.h" +#include "unicode/uobject.h" +#include "units_converter.h" +#include "units_router.h" + +U_NAMESPACE_BEGIN + +using ::icu::units::ComplexUnitsConverter; +using ::icu::units::UnitsRouter; + +namespace number { +namespace impl { + +/** + * A MicroPropsGenerator which uses UnitsRouter to produce output converted to a + * MeasureUnit appropriate for a particular localized usage: see + * NumberFormatterSettings::usage(). + */ +class U_I18N_API UsagePrefsHandler : public MicroPropsGenerator, public UMemory { + public: + UsagePrefsHandler(const Locale &locale, const MeasureUnit &inputUnit, const StringPiece usage, + const MicroPropsGenerator *parent, UErrorCode &status); + + /** + * Obtains the appropriate output value, MeasureUnit and + * rounding/precision behaviour from the UnitsRouter. + * + * The output unit is passed on to the LongNameHandler via + * micros.outputUnit. + */ + void processQuantity(DecimalQuantity &quantity, MicroProps µs, + UErrorCode &status) const U_OVERRIDE; + + /** + * Returns the list of possible output units, i.e. the full set of + * preferences, for the localized, usage-specific unit preferences. + * + * The returned pointer should be valid for the lifetime of the + * UsagePrefsHandler instance. + */ + const MaybeStackVector<MeasureUnit> *getOutputUnits() const { + return fUnitsRouter.getOutputUnits(); + } + + private: + UnitsRouter fUnitsRouter; + const MicroPropsGenerator *fParent; +}; + +} // namespace impl +} // namespace number + +// Export explicit template instantiations of LocalPointerBase and LocalPointer. +// This is required when building DLLs for Windows. (See datefmt.h, +// collationiterator.h, erarules.h and others for similar examples.) +// +// Note: These need to be outside of the number::impl namespace, or Clang will +// generate a compile error. +#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN +#if defined(_MSC_VER) +// Ignore warning 4661 as LocalPointerBase does not use operator== or operator!= +#pragma warning(push) +#pragma warning(disable: 4661) +#endif +template class U_I18N_API LocalPointerBase<ComplexUnitsConverter>; +template class U_I18N_API LocalPointer<ComplexUnitsConverter>; +#if defined(_MSC_VER) +#pragma warning(pop) +#endif +#endif + +namespace number { +namespace impl { + +/** + * A MicroPropsGenerator which converts a measurement from one MeasureUnit to + * another. In particular, the output MeasureUnit may be a mixed unit. (The + * input unit may not be a mixed unit.) + */ +class U_I18N_API UnitConversionHandler : public MicroPropsGenerator, public UMemory { + public: + /** + * Constructor. + * + * @param targetUnit Specifies the output MeasureUnit. The input MeasureUnit + * is derived from it: in case of a mixed unit, the biggest unit is + * taken as the input unit. If not a mixed unit, the input unit will be + * the same as the output unit and no unit conversion takes place. + * @param parent The parent MicroPropsGenerator. + * @param status Receives status. + */ + UnitConversionHandler(const MeasureUnit &targetUnit, const MicroPropsGenerator *parent, + UErrorCode &status); + + /** + * Obtains the appropriate output values from the Unit Converter. + */ + void processQuantity(DecimalQuantity &quantity, MicroProps µs, + UErrorCode &status) const U_OVERRIDE; + private: + MeasureUnit fOutputUnit; + LocalPointer<ComplexUnitsConverter> fUnitConverter; + const MicroPropsGenerator *fParent; +}; + +} // namespace impl +} // namespace number +U_NAMESPACE_END + +#endif // __NUMBER_USAGEPREFS_H__ +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/contrib/libs/icu/i18n/number_utils.cpp b/contrib/libs/icu/i18n/number_utils.cpp index 91d7f335cd..ad70532140 100644 --- a/contrib/libs/icu/i18n/number_utils.cpp +++ b/contrib/libs/icu/i18n/number_utils.cpp @@ -70,7 +70,7 @@ const char16_t* utils::getPatternForStyle(const Locale& locale, const char* nsNa break; default: patternKey = "decimalFormat"; // silence compiler error - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } LocalUResourceBundlePointer res(ures_open(nullptr, locale.getName(), &status)); if (U_FAILURE(status)) { return u""; } @@ -180,12 +180,6 @@ void DecNum::_setTo(const char* str, int32_t maxDigits, UErrorCode& status) { status = U_UNSUPPORTED_ERROR; return; } - - // For consistency with Java BigDecimal, no support for DecNum that is NaN or Infinity! - if (decNumberIsSpecial(fData.getAlias())) { - status = U_UNSUPPORTED_ERROR; - return; - } } void @@ -252,13 +246,28 @@ bool DecNum::isZero() const { return decNumberIsZero(fData.getAlias()); } +bool DecNum::isSpecial() const { + return decNumberIsSpecial(fData.getAlias()); +} + +bool DecNum::isInfinity() const { + return decNumberIsInfinite(fData.getAlias()); +} + +bool DecNum::isNaN() const { + return decNumberIsNaN(fData.getAlias()); +} + void DecNum::toString(ByteSink& output, UErrorCode& status) const { if (U_FAILURE(status)) { return; } // "string must be at least dn->digits+14 characters long" int32_t minCapacity = fData.getAlias()->digits + 14; - MaybeStackArray<char, 30> buffer(minCapacity); + MaybeStackArray<char, 30> buffer(minCapacity, status); + if (U_FAILURE(status)) { + return; + } uprv_decNumberToString(fData, buffer.getAlias()); output.Append(buffer.getAlias(), static_cast<int32_t>(uprv_strlen(buffer.getAlias()))); } diff --git a/contrib/libs/icu/i18n/number_utils.h b/contrib/libs/icu/i18n/number_utils.h index 93195f080b..bc369c940f 100644 --- a/contrib/libs/icu/i18n/number_utils.h +++ b/contrib/libs/icu/i18n/number_utils.h @@ -49,8 +49,8 @@ inline bool unitIsCurrency(const MeasureUnit& unit) { return uprv_strcmp("currency", unit.getType()) == 0; } -inline bool unitIsNoUnit(const MeasureUnit& unit) { - return uprv_strcmp("none", unit.getType()) == 0; +inline bool unitIsBaseUnit(const MeasureUnit& unit) { + return unit == MeasureUnit(); } inline bool unitIsPercent(const MeasureUnit& unit) { diff --git a/contrib/libs/icu/i18n/number_utypes.h b/contrib/libs/icu/i18n/number_utypes.h index 7a1b7a4e80..50c861787f 100644 --- a/contrib/libs/icu/i18n/number_utypes.h +++ b/contrib/libs/icu/i18n/number_utypes.h @@ -28,9 +28,6 @@ const DecimalQuantity* validateUFormattedNumberToDecimalQuantity( * This struct is held internally by the C++ version FormattedNumber since the member types are not * declared in the public header file. * - * The DecimalQuantity is not currently being used by FormattedNumber, but at some point it could be used - * to add a toDecNumber() or similar method. - * * Exported as U_I18N_API for tests */ class U_I18N_API UFormattedNumberData : public FormattedValueStringBuilderImpl { @@ -38,7 +35,16 @@ public: UFormattedNumberData() : FormattedValueStringBuilderImpl(kUndefinedField) {} virtual ~UFormattedNumberData(); + // The formatted quantity. DecimalQuantity quantity; + + // The output unit for the formatted quantity. + // TODO(units,hugovdm): populate this correctly for the general case - it's + // currently only implemented for the .usage() use case. + MeasureUnit outputUnit; + + // The gender of the formatted output. + const char *gender = ""; }; diff --git a/contrib/libs/icu/i18n/numfmt.cpp b/contrib/libs/icu/i18n/numfmt.cpp index 7c3a0551c3..b8ce1c930f 100644 --- a/contrib/libs/icu/i18n/numfmt.cpp +++ b/contrib/libs/icu/i18n/numfmt.cpp @@ -13,7 +13,7 @@ * Date Name Description * 02/19/97 aliu Converted from java. * 03/18/97 clhuang Implemented with C++ APIs. -* 04/17/97 aliu Enlarged MAX_INTEGER_DIGITS to fully accomodate the +* 04/17/97 aliu Enlarged MAX_INTEGER_DIGITS to fully accommodate the * largest double, by default. * Changed DigitCount to int per code review. * 07/20/98 stephen Changed operator== to check for grouping @@ -285,7 +285,7 @@ NumberFormat::operator=(const NumberFormat& rhs) // ------------------------------------- -UBool +bool NumberFormat::operator==(const Format& that) const { // Format::operator== guarantees this cast is safe @@ -860,7 +860,7 @@ class ICUNumberFormatFactory : public ICUResourceBundleFactory { public: virtual ~ICUNumberFormatFactory(); protected: - virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* /* service */, UErrorCode& status) const { + virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* /* service */, UErrorCode& status) const override { return NumberFormat::makeInstance(loc, (UNumberFormatStyle)kind, status); } }; @@ -884,7 +884,7 @@ public: virtual ~NFFactory(); - virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const + virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const override { if (handlesKey(key, status)) { const LocaleKey& lkey = (const LocaleKey&)key; @@ -907,7 +907,7 @@ protected: * otherwise). This can be called often and might need to be * cached if it is expensive to create. */ - virtual const Hashtable* getSupportedIDs(UErrorCode& status) const + virtual const Hashtable* getSupportedIDs(UErrorCode& status) const override { if (U_SUCCESS(status)) { if (!_ids) { @@ -943,11 +943,11 @@ public: virtual ~ICUNumberFormatService(); - virtual UObject* cloneInstance(UObject* instance) const { + virtual UObject* cloneInstance(UObject* instance) const override { return ((NumberFormat*)instance)->clone(); } - virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /* actualID */, UErrorCode& status) const { + virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /* actualID */, UErrorCode& status) const override { LocaleKey& lkey = (LocaleKey&)key; int32_t kind = lkey.kind(); Locale loc; @@ -955,7 +955,7 @@ public: return NumberFormat::makeInstance(loc, (UNumberFormatStyle)kind, status); } - virtual UBool isDefault() const { + virtual UBool isDefault() const override { return countFactories() == 1; } }; diff --git a/contrib/libs/icu/i18n/numparse_affixes.cpp b/contrib/libs/icu/i18n/numparse_affixes.cpp index ca293e741d..1414006598 100644 --- a/contrib/libs/icu/i18n/numparse_affixes.cpp +++ b/contrib/libs/icu/i18n/numparse_affixes.cpp @@ -101,7 +101,7 @@ void AffixPatternMatcherBuilder::consumeToken(AffixPatternType type, UChar32 cp, addMatcher(fWarehouse.currency(status)); break; default: - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } } else if (fIgnorables != nullptr && fIgnorables->getSet()->contains(cp)) { @@ -127,8 +127,8 @@ void AffixPatternMatcherBuilder::addMatcher(NumberParseMatcher& matcher) { fMatchers[fMatchersLen++] = &matcher; } -AffixPatternMatcher AffixPatternMatcherBuilder::build() { - return AffixPatternMatcher(fMatchers, fMatchersLen, fPattern); +AffixPatternMatcher AffixPatternMatcherBuilder::build(UErrorCode& status) { + return AffixPatternMatcher(fMatchers, fMatchersLen, fPattern, status); } AffixTokenMatcherWarehouse::AffixTokenMatcherWarehouse(const AffixTokenMatcherSetupData* setupData) @@ -209,12 +209,13 @@ AffixPatternMatcher AffixPatternMatcher::fromAffixPattern(const UnicodeString& a AffixPatternMatcherBuilder builder(affixPattern, tokenWarehouse, ignorables); AffixUtils::iterateWithConsumer(affixPattern, builder, status); - return builder.build(); + return builder.build(status); } AffixPatternMatcher::AffixPatternMatcher(MatcherArray& matchers, int32_t matchersLen, - const UnicodeString& pattern) - : ArraySeriesMatcher(matchers, matchersLen), fPattern(pattern) {} + const UnicodeString& pattern, UErrorCode& status) + : ArraySeriesMatcher(matchers, matchersLen), fPattern(pattern, status) { +} UnicodeString AffixPatternMatcher::getPattern() const { return fPattern.toAliasedUnicodeString(); @@ -293,18 +294,20 @@ void AffixMatcherWarehouse::createAffixMatchers(const AffixPatternProvider& patt } // Generate Prefix + // TODO: Handle approximately sign? bool hasPrefix = false; PatternStringUtils::patternInfoToStringBuilder( - patternInfo, true, type, StandardPlural::OTHER, false, sb); + patternInfo, true, type, false, StandardPlural::OTHER, false, sb); fAffixPatternMatchers[numAffixPatternMatchers] = AffixPatternMatcher::fromAffixPattern( sb, *fTokenWarehouse, parseFlags, &hasPrefix, status); AffixPatternMatcher* prefix = hasPrefix ? &fAffixPatternMatchers[numAffixPatternMatchers++] : nullptr; // Generate Suffix + // TODO: Handle approximately sign? bool hasSuffix = false; PatternStringUtils::patternInfoToStringBuilder( - patternInfo, false, type, StandardPlural::OTHER, false, sb); + patternInfo, false, type, false, StandardPlural::OTHER, false, sb); fAffixPatternMatchers[numAffixPatternMatchers] = AffixPatternMatcher::fromAffixPattern( sb, *fTokenWarehouse, parseFlags, &hasSuffix, status); AffixPatternMatcher* suffix = hasSuffix ? &fAffixPatternMatchers[numAffixPatternMatchers++] @@ -446,28 +449,3 @@ UnicodeString AffixMatcher::toString() const { #endif /* #if !UCONFIG_NO_FORMATTING */ - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/contrib/libs/icu/i18n/numparse_affixes.h b/contrib/libs/icu/i18n/numparse_affixes.h index 97a17f4d78..a82b731ab5 100644 --- a/contrib/libs/icu/i18n/numparse_affixes.h +++ b/contrib/libs/icu/i18n/numparse_affixes.h @@ -128,7 +128,7 @@ class AffixPatternMatcherBuilder : public TokenConsumer, public MutableMatcherCo void consumeToken(::icu::number::impl::AffixPatternType type, UChar32 cp, UErrorCode& status) override; /** NOTE: You can build only once! */ - AffixPatternMatcher build(); + AffixPatternMatcher build(UErrorCode& status); private: ArraySeriesMatcher::MatcherArray fMatchers; @@ -160,7 +160,8 @@ class U_I18N_API AffixPatternMatcher : public ArraySeriesMatcher { private: CompactUnicodeString<4> fPattern; - AffixPatternMatcher(MatcherArray& matchers, int32_t matchersLen, const UnicodeString& pattern); + AffixPatternMatcher(MatcherArray& matchers, int32_t matchersLen, const UnicodeString& pattern, + UErrorCode& status); friend class AffixPatternMatcherBuilder; }; diff --git a/contrib/libs/icu/i18n/numparse_currency.cpp b/contrib/libs/icu/i18n/numparse_currency.cpp index 6b53a73edf..7bbb060f3d 100644 --- a/contrib/libs/icu/i18n/numparse_currency.cpp +++ b/contrib/libs/icu/i18n/numparse_currency.cpp @@ -42,7 +42,7 @@ CombinedCurrencyMatcher::CombinedCurrencyMatcher(const CurrencySymbols& currency // TODO: Figure out how to make this faster and re-enable. // Computing the "lead code points" set for fastpathing is too slow to use in production. - // See http://bugs.icu-project.org/trac/ticket/13584 + // See https://unicode-org.atlassian.net/browse/ICU-13584 // // Compute the full set of characters that could be the first in a currency to allow for // // efficient smoke test. // fLeadCodePoints.add(fCurrency1.char32At(0)); diff --git a/contrib/libs/icu/i18n/numparse_decimal.cpp b/contrib/libs/icu/i18n/numparse_decimal.cpp index cf1e815672..8b99fd7ad4 100644 --- a/contrib/libs/icu/i18n/numparse_decimal.cpp +++ b/contrib/libs/icu/i18n/numparse_decimal.cpp @@ -88,7 +88,7 @@ DecimalMatcher::DecimalMatcher(const DecimalFormatSymbols& symbols, const Groupe grouping2 = grouper.getSecondary(); // Fraction grouping parsing is disabled for now but could be enabled later. - // See http://bugs.icu-project.org/trac/ticket/10794 + // See https://unicode-org.atlassian.net/browse/ICU-10794 // fractionGrouping = 0 != (parseFlags & PARSE_FLAG_FRACTION_GROUPING_ENABLED); } diff --git a/contrib/libs/icu/i18n/numparse_decimal.h b/contrib/libs/icu/i18n/numparse_decimal.h index ec6c76487e..07c9afeccc 100644 --- a/contrib/libs/icu/i18n/numparse_decimal.h +++ b/contrib/libs/icu/i18n/numparse_decimal.h @@ -39,7 +39,7 @@ class DecimalMatcher : public NumberParseMatcher, public UMemory { bool groupingDisabled; // Fraction grouping parsing is disabled for now but could be enabled later. - // See http://bugs.icu-project.org/trac/ticket/10794 + // See https://unicode-org.atlassian.net/browse/ICU-10794 // bool fractionGrouping; /** If true, do not accept numbers in the fraction */ diff --git a/contrib/libs/icu/i18n/numparse_impl.cpp b/contrib/libs/icu/i18n/numparse_impl.cpp index 4b76da1c14..91c60747f2 100644 --- a/contrib/libs/icu/i18n/numparse_impl.cpp +++ b/contrib/libs/icu/i18n/numparse_impl.cpp @@ -285,7 +285,7 @@ void NumberParserImpl::parseGreedy(StringSegment& segment, ParsedNumber& result, i++; continue; } - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } // NOTE: If we get here, the greedy parse completed without consuming the entire string. diff --git a/contrib/libs/icu/i18n/numparse_types.h b/contrib/libs/icu/i18n/numparse_types.h index b4007c2ff5..623f0e80f1 100644 --- a/contrib/libs/icu/i18n/numparse_types.h +++ b/contrib/libs/icu/i18n/numparse_types.h @@ -64,14 +64,15 @@ class CompactUnicodeString { fBuffer[0] = 0; } - CompactUnicodeString(const UnicodeString& text) - : fBuffer(text.length() + 1) { + CompactUnicodeString(const UnicodeString& text, UErrorCode& status) + : fBuffer(text.length() + 1, status) { + if (U_FAILURE(status)) { return; } uprv_memcpy(fBuffer.getAlias(), text.getBuffer(), sizeof(UChar) * text.length()); fBuffer[text.length()] = 0; } inline UnicodeString toAliasedUnicodeString() const { - return UnicodeString(TRUE, fBuffer.getAlias(), -1); + return UnicodeString(true, fBuffer.getAlias(), -1); } bool operator==(const CompactUnicodeString& other) const { diff --git a/contrib/libs/icu/i18n/numrange_capi.cpp b/contrib/libs/icu/i18n/numrange_capi.cpp new file mode 100644 index 0000000000..bd3a9ef5e8 --- /dev/null +++ b/contrib/libs/icu/i18n/numrange_capi.cpp @@ -0,0 +1,197 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +// Allow implicit conversion from char16_t* to UnicodeString for this file: +// Helpful in toString methods and elsewhere. +#define UNISTR_FROM_STRING_EXPLICIT + +#include "fphdlimp.h" +#include "number_utypes.h" +#include "numparse_types.h" +#include "formattedval_impl.h" +#include "numrange_impl.h" +#include "number_decnum.h" +#include "unicode/numberrangeformatter.h" +#include "unicode/unumberrangeformatter.h" + +using namespace icu; +using namespace icu::number; +using namespace icu::number::impl; + + +U_NAMESPACE_BEGIN +namespace number { +namespace impl { + +/** + * Implementation class for UNumberRangeFormatter. Wraps a LocalizedRangeNumberFormatter. + */ +struct UNumberRangeFormatterData : public UMemory, + // Magic number as ASCII == "NRF" (NumberRangeFormatter) + public IcuCApiHelper<UNumberRangeFormatter, UNumberRangeFormatterData, 0x4E524600> { + LocalizedNumberRangeFormatter fFormatter; +}; + +struct UFormattedNumberRangeImpl; + +// Magic number as ASCII == "FDN" (FormatteDNumber) +typedef IcuCApiHelper<UFormattedNumberRange, UFormattedNumberRangeImpl, 0x46444E00> UFormattedNumberRangeApiHelper; + +struct UFormattedNumberRangeImpl : public UFormattedValueImpl, public UFormattedNumberRangeApiHelper { + UFormattedNumberRangeImpl(); + ~UFormattedNumberRangeImpl(); + + FormattedNumberRange fImpl; + UFormattedNumberRangeData fData; +}; + +UFormattedNumberRangeImpl::UFormattedNumberRangeImpl() + : fImpl(&fData) { + fFormattedValue = &fImpl; +} + +UFormattedNumberRangeImpl::~UFormattedNumberRangeImpl() { + // Disown the data from fImpl so it doesn't get deleted twice + fImpl.fData = nullptr; +} + +} // namespace impl +} // namespace number +U_NAMESPACE_END + + +UPRV_FORMATTED_VALUE_CAPI_NO_IMPLTYPE_AUTO_IMPL( + UFormattedNumberRange, + UFormattedNumberRangeImpl, + UFormattedNumberRangeApiHelper, + unumrf) + + +const UFormattedNumberRangeData* number::impl::validateUFormattedNumberRange( + const UFormattedNumberRange* uresult, UErrorCode& status) { + auto* result = UFormattedNumberRangeApiHelper::validate(uresult, status); + if (U_FAILURE(status)) { + return nullptr; + } + return &result->fData; +} + + +U_CAPI UNumberRangeFormatter* U_EXPORT2 +unumrf_openForSkeletonWithCollapseAndIdentityFallback( + const UChar* skeleton, + int32_t skeletonLen, + UNumberRangeCollapse collapse, + UNumberRangeIdentityFallback identityFallback, + const char* locale, + UParseError* perror, + UErrorCode* ec) { + auto* impl = new UNumberRangeFormatterData(); + if (impl == nullptr) { + *ec = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + // Readonly-alias constructor (first argument is whether we are NUL-terminated) + UnicodeString skeletonString(skeletonLen == -1, skeleton, skeletonLen); + impl->fFormatter = NumberRangeFormatter::withLocale(locale) + .numberFormatterBoth(NumberFormatter::forSkeleton(skeletonString, *perror, *ec)) + .collapse(collapse) + .identityFallback(identityFallback); + return impl->exportForC(); +} + +U_CAPI void U_EXPORT2 +unumrf_formatDoubleRange( + const UNumberRangeFormatter* uformatter, + double first, + double second, + UFormattedNumberRange* uresult, + UErrorCode* ec) { + const UNumberRangeFormatterData* formatter = UNumberRangeFormatterData::validate(uformatter, *ec); + auto* result = UFormattedNumberRangeApiHelper::validate(uresult, *ec); + if (U_FAILURE(*ec)) { return; } + + result->fData.resetString(); + result->fData.quantity1.clear(); + result->fData.quantity2.clear(); + result->fData.quantity1.setToDouble(first); + result->fData.quantity2.setToDouble(second); + formatter->fFormatter.formatImpl(result->fData, first == second, *ec); +} + +U_CAPI void U_EXPORT2 +unumrf_formatDecimalRange( + const UNumberRangeFormatter* uformatter, + const char* first, int32_t firstLen, + const char* second, int32_t secondLen, + UFormattedNumberRange* uresult, + UErrorCode* ec) { + const UNumberRangeFormatterData* formatter = UNumberRangeFormatterData::validate(uformatter, *ec); + auto* result = UFormattedNumberRangeApiHelper::validate(uresult, *ec); + if (U_FAILURE(*ec)) { return; } + + result->fData.resetString(); + result->fData.quantity1.clear(); + result->fData.quantity2.clear(); + result->fData.quantity1.setToDecNumber({first, firstLen}, *ec); + result->fData.quantity2.setToDecNumber({second, secondLen}, *ec); + formatter->fFormatter.formatImpl(result->fData, first == second, *ec); +} + +U_CAPI UNumberRangeIdentityResult U_EXPORT2 +unumrf_resultGetIdentityResult( + const UFormattedNumberRange* uresult, + UErrorCode* ec) { + auto* result = UFormattedNumberRangeApiHelper::validate(uresult, *ec); + if (U_FAILURE(*ec)) { + return UNUM_IDENTITY_RESULT_COUNT; + } + return result->fData.identityResult; +} + +U_CAPI int32_t U_EXPORT2 +unumrf_resultGetFirstDecimalNumber( + const UFormattedNumberRange* uresult, + char* dest, + int32_t destCapacity, + UErrorCode* ec) { + const auto* result = UFormattedNumberRangeApiHelper::validate(uresult, *ec); + if (U_FAILURE(*ec)) { + return 0; + } + DecNum decnum; + return result->fData.quantity1.toDecNum(decnum, *ec) + .toCharString(*ec) + .extract(dest, destCapacity, *ec); +} + +U_CAPI int32_t U_EXPORT2 +unumrf_resultGetSecondDecimalNumber( + const UFormattedNumberRange* uresult, + char* dest, + int32_t destCapacity, + UErrorCode* ec) { + const auto* result = UFormattedNumberRangeApiHelper::validate(uresult, *ec); + if (U_FAILURE(*ec)) { + return 0; + } + DecNum decnum; + return result->fData.quantity2 + .toDecNum(decnum, *ec) + .toCharString(*ec) + .extract(dest, destCapacity, *ec); +} + +U_CAPI void U_EXPORT2 +unumrf_close(UNumberRangeFormatter* f) { + UErrorCode localStatus = U_ZERO_ERROR; + const UNumberRangeFormatterData* impl = UNumberRangeFormatterData::validate(f, localStatus); + delete impl; +} + + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/contrib/libs/icu/i18n/numrange_fluent.cpp b/contrib/libs/icu/i18n/numrange_fluent.cpp index 33179026f8..f1060b3c21 100644 --- a/contrib/libs/icu/i18n/numrange_fluent.cpp +++ b/contrib/libs/icu/i18n/numrange_fluent.cpp @@ -12,6 +12,7 @@ #include "numrange_impl.h" #include "util.h" #include "number_utypes.h" +#include "number_decnum.h" using namespace icu; using namespace icu::number; @@ -244,6 +245,7 @@ LocalizedNumberRangeFormatter::LocalizedNumberRangeFormatter(NFS<LNF>&& src) U_N } LocalizedNumberRangeFormatter& LocalizedNumberRangeFormatter::operator=(const LNF& other) { + if (this == &other) { return *this; } // self-assignment: no-op NFS<LNF>::operator=(static_cast<const NFS<LNF>&>(other)); // Do not steal; just clear delete fAtomicFormatter.exchange(nullptr); @@ -375,28 +377,4 @@ LocalizedNumberRangeFormatter::getFormatter(UErrorCode& status) const { } -UPRV_FORMATTED_VALUE_SUBCLASS_AUTO_IMPL(FormattedNumberRange) - -#define UPRV_NOARG - -UnicodeString FormattedNumberRange::getFirstDecimal(UErrorCode& status) const { - UPRV_FORMATTED_VALUE_METHOD_GUARD(ICU_Utility::makeBogusString()) - return fData->quantity1.toScientificString(); -} - -UnicodeString FormattedNumberRange::getSecondDecimal(UErrorCode& status) const { - UPRV_FORMATTED_VALUE_METHOD_GUARD(ICU_Utility::makeBogusString()) - return fData->quantity2.toScientificString(); -} - -UNumberRangeIdentityResult FormattedNumberRange::getIdentityResult(UErrorCode& status) const { - UPRV_FORMATTED_VALUE_METHOD_GUARD(UNUM_IDENTITY_RESULT_NOT_EQUAL) - return fData->identityResult; -} - - -UFormattedNumberRangeData::~UFormattedNumberRangeData() = default; - - - #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/contrib/libs/icu/i18n/numrange_impl.cpp b/contrib/libs/icu/i18n/numrange_impl.cpp index 9fb3dee861..3c440c193c 100644 --- a/contrib/libs/icu/i18n/numrange_impl.cpp +++ b/contrib/libs/icu/i18n/numrange_impl.cpp @@ -12,6 +12,7 @@ #include "unicode/numberrangeformatter.h" #include "numrange_impl.h" #include "patternprops.h" +#include "pluralranges.h" #include "uresimp.h" #include "util.h" @@ -29,7 +30,8 @@ constexpr int8_t identity2d(UNumberRangeIdentityFallback a, UNumberRangeIdentity struct NumberRangeData { SimpleFormatter rangePattern; - SimpleFormatter approximatelyPattern; + // Note: approximatelyPattern is unused since ICU 69. + // SimpleFormatter approximatelyPattern; }; class NumberRangeDataSink : public ResourceSink { @@ -45,12 +47,16 @@ class NumberRangeDataSink : public ResourceSink { continue; // have already seen this pattern } fData.rangePattern = {value.getUnicodeString(status), status}; - } else if (uprv_strcmp(key, "approximately") == 0) { + } + /* + // Note: approximatelyPattern is unused since ICU 69. + else if (uprv_strcmp(key, "approximately") == 0) { if (hasApproxData()) { continue; // have already seen this pattern } fData.approximatelyPattern = {value.getUnicodeString(status), status}; } + */ } } @@ -58,21 +64,26 @@ class NumberRangeDataSink : public ResourceSink { return fData.rangePattern.getArgumentLimit() != 0; } + /* + // Note: approximatelyPattern is unused since ICU 69. bool hasApproxData() { return fData.approximatelyPattern.getArgumentLimit() != 0; } + */ bool isComplete() { - return hasRangeData() && hasApproxData(); + return hasRangeData() /* && hasApproxData() */; } void fillInDefaults(UErrorCode& status) { if (!hasRangeData()) { fData.rangePattern = {u"{0}–{1}", status}; } + /* if (!hasApproxData()) { fData.approximatelyPattern = {u"~{0}", status}; } + */ } private: @@ -106,99 +117,17 @@ void getNumberRangeData(const char* localeName, const char* nsName, NumberRangeD sink.fillInDefaults(status); } -class PluralRangesDataSink : public ResourceSink { - public: - PluralRangesDataSink(StandardPluralRanges& output) : fOutput(output) {} - - void put(const char* /*key*/, ResourceValue& value, UBool /*noFallback*/, UErrorCode& status) U_OVERRIDE { - ResourceArray entriesArray = value.getArray(status); - if (U_FAILURE(status)) { return; } - fOutput.setCapacity(entriesArray.getSize()); - for (int i = 0; entriesArray.getValue(i, value); i++) { - ResourceArray pluralFormsArray = value.getArray(status); - if (U_FAILURE(status)) { return; } - pluralFormsArray.getValue(0, value); - StandardPlural::Form first = StandardPlural::fromString(value.getUnicodeString(status), status); - if (U_FAILURE(status)) { return; } - pluralFormsArray.getValue(1, value); - StandardPlural::Form second = StandardPlural::fromString(value.getUnicodeString(status), status); - if (U_FAILURE(status)) { return; } - pluralFormsArray.getValue(2, value); - StandardPlural::Form result = StandardPlural::fromString(value.getUnicodeString(status), status); - if (U_FAILURE(status)) { return; } - fOutput.addPluralRange(first, second, result); - } - } - - private: - StandardPluralRanges& fOutput; -}; - -void getPluralRangesData(const Locale& locale, StandardPluralRanges& output, UErrorCode& status) { - if (U_FAILURE(status)) { return; } - LocalUResourceBundlePointer rb(ures_openDirect(nullptr, "pluralRanges", &status)); - if (U_FAILURE(status)) { return; } - - CharString dataPath; - dataPath.append("locales/", -1, status); - dataPath.append(locale.getLanguage(), -1, status); - if (U_FAILURE(status)) { return; } - int32_t setLen; - // Not all languages are covered: fail gracefully - UErrorCode internalStatus = U_ZERO_ERROR; - const UChar* set = ures_getStringByKeyWithFallback(rb.getAlias(), dataPath.data(), &setLen, &internalStatus); - if (U_FAILURE(internalStatus)) { return; } - - dataPath.clear(); - dataPath.append("rules/", -1, status); - dataPath.appendInvariantChars(set, setLen, status); - if (U_FAILURE(status)) { return; } - PluralRangesDataSink sink(output); - ures_getAllItemsWithFallback(rb.getAlias(), dataPath.data(), sink, status); - if (U_FAILURE(status)) { return; } -} - } // namespace -void StandardPluralRanges::initialize(const Locale& locale, UErrorCode& status) { - getPluralRangesData(locale, *this, status); -} - -void StandardPluralRanges::addPluralRange( - StandardPlural::Form first, - StandardPlural::Form second, - StandardPlural::Form result) { - U_ASSERT(fTriplesLen < fTriples.getCapacity()); - fTriples[fTriplesLen] = {first, second, result}; - fTriplesLen++; -} - -void StandardPluralRanges::setCapacity(int32_t length) { - if (length > fTriples.getCapacity()) { - fTriples.resize(length, 0); - } -} - -StandardPlural::Form -StandardPluralRanges::resolve(StandardPlural::Form first, StandardPlural::Form second) const { - for (int32_t i=0; i<fTriplesLen; i++) { - const auto& triple = fTriples[i]; - if (triple.first == first && triple.second == second) { - return triple.result; - } - } - // Default fallback - return StandardPlural::OTHER; -} - NumberRangeFormatterImpl::NumberRangeFormatterImpl(const RangeMacroProps& macros, UErrorCode& status) : formatterImpl1(macros.formatter1.fMacros, status), formatterImpl2(macros.formatter2.fMacros, status), fSameFormatters(macros.singleFormatter), fCollapse(macros.collapse), - fIdentityFallback(macros.identityFallback) { + fIdentityFallback(macros.identityFallback), + fApproximatelyFormatter(status) { const char* nsName = formatterImpl1.getRawMicroProps().nsName; if (uprv_strcmp(nsName, formatterImpl2.getRawMicroProps().nsName) != 0) { @@ -210,10 +139,19 @@ NumberRangeFormatterImpl::NumberRangeFormatterImpl(const RangeMacroProps& macros getNumberRangeData(macros.locale.getName(), nsName, data, status); if (U_FAILURE(status)) { return; } fRangeFormatter = data.rangePattern; - fApproximatelyModifier = {data.approximatelyPattern, kUndefinedField, false}; + + if (fSameFormatters && ( + fIdentityFallback == UNUM_IDENTITY_FALLBACK_APPROXIMATELY || + fIdentityFallback == UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE)) { + MacroProps approximatelyMacros(macros.formatter1.fMacros); + approximatelyMacros.approximately = true; + // Use in-place construction because NumberFormatterImpl has internal self-pointers + fApproximatelyFormatter.~NumberFormatterImpl(); + new (&fApproximatelyFormatter) NumberFormatterImpl(approximatelyMacros, status); + } // TODO: Get locale from PluralRules instead? - fPluralRanges.initialize(macros.locale, status); + fPluralRanges = StandardPluralRanges::forLocale(macros.locale, status); if (U_FAILURE(status)) { return; } } @@ -291,7 +229,7 @@ void NumberRangeFormatterImpl::format(UFormattedNumberRangeData& data, bool equa break; default: - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } } @@ -314,12 +252,14 @@ void NumberRangeFormatterImpl::formatApproximately (UFormattedNumberRangeData& d UErrorCode& status) const { if (U_FAILURE(status)) { return; } if (fSameFormatters) { - int32_t length = NumberFormatterImpl::writeNumber(micros1, data.quantity1, data.getStringRef(), 0, status); - // HEURISTIC: Desired modifier order: inner, middle, approximately, outer. - length += micros1.modInner->apply(data.getStringRef(), 0, length, status); - length += micros1.modMiddle->apply(data.getStringRef(), 0, length, status); - length += fApproximatelyModifier.apply(data.getStringRef(), 0, length, status); - micros1.modOuter->apply(data.getStringRef(), 0, length, status); + // Re-format using the approximately formatter: + MicroProps microsAppx; + data.quantity1.resetExponent(); + fApproximatelyFormatter.preProcess(data.quantity1, microsAppx, status); + int32_t length = NumberFormatterImpl::writeNumber(microsAppx, data.quantity1, data.getStringRef(), 0, status); + length += microsAppx.modInner->apply(data.getStringRef(), 0, length, status); + length += microsAppx.modMiddle->apply(data.getStringRef(), 0, length, status); + microsAppx.modOuter->apply(data.getStringRef(), 0, length, status); } else { formatRange(data, micros1, micros2, status); } @@ -387,7 +327,7 @@ void NumberRangeFormatterImpl::formatRange(UFormattedNumberRangeData& data, // INNER MODIFIER collapseInner = micros1.modInner->semanticallyEquivalent(*micros2.modInner); - // All done checking for collapsability. + // All done checking for collapsibility. break; } @@ -410,6 +350,7 @@ void NumberRangeFormatterImpl::formatRange(UFormattedNumberRangeData& data, #define UPRV_INDEX_1 (lengthPrefix + length1) #define UPRV_INDEX_2 (lengthPrefix + length1 + lengthInfix) #define UPRV_INDEX_3 (lengthPrefix + length1 + lengthInfix + length2) + #define UPRV_INDEX_4 (lengthPrefix + length1 + lengthInfix + length2 + lengthSuffix) int32_t lengthRange = SimpleModifier::formatTwoArgPattern( fRangeFormatter, @@ -444,36 +385,46 @@ void NumberRangeFormatterImpl::formatRange(UFormattedNumberRangeData& data, } length1 += NumberFormatterImpl::writeNumber(micros1, data.quantity1, string, UPRV_INDEX_0, status); - length2 += NumberFormatterImpl::writeNumber(micros2, data.quantity2, string, UPRV_INDEX_2, status); + // ICU-21684: Write the second number to a temp string to avoid repeated insert operations + FormattedStringBuilder tempString; + NumberFormatterImpl::writeNumber(micros2, data.quantity2, tempString, 0, status); + length2 += string.insert(UPRV_INDEX_2, tempString, status); // TODO: Support padding? if (collapseInner) { - // Note: this is actually a mix of prefix and suffix, but adding to infix length works const Modifier& mod = resolveModifierPlurals(*micros1.modInner, *micros2.modInner); - lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status); + lengthSuffix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_4, status); + lengthPrefix += mod.getPrefixLength(); + lengthSuffix -= mod.getPrefixLength(); } else { length1 += micros1.modInner->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status); - length2 += micros2.modInner->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status); + length2 += micros2.modInner->apply(string, UPRV_INDEX_2, UPRV_INDEX_4, status); } if (collapseMiddle) { - // Note: this is actually a mix of prefix and suffix, but adding to infix length works const Modifier& mod = resolveModifierPlurals(*micros1.modMiddle, *micros2.modMiddle); - lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status); + lengthSuffix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_4, status); + lengthPrefix += mod.getPrefixLength(); + lengthSuffix -= mod.getPrefixLength(); } else { length1 += micros1.modMiddle->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status); - length2 += micros2.modMiddle->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status); + length2 += micros2.modMiddle->apply(string, UPRV_INDEX_2, UPRV_INDEX_4, status); } if (collapseOuter) { - // Note: this is actually a mix of prefix and suffix, but adding to infix length works const Modifier& mod = resolveModifierPlurals(*micros1.modOuter, *micros2.modOuter); - lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status); + lengthSuffix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_4, status); + lengthPrefix += mod.getPrefixLength(); + lengthSuffix -= mod.getPrefixLength(); } else { length1 += micros1.modOuter->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status); - length2 += micros2.modOuter->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status); + length2 += micros2.modOuter->apply(string, UPRV_INDEX_2, UPRV_INDEX_4, status); } + + // Now that all pieces are added, save the span info. + data.appendSpanInfo(UFIELD_CATEGORY_NUMBER_RANGE_SPAN, 0, UPRV_INDEX_0, length1, status); + data.appendSpanInfo(UFIELD_CATEGORY_NUMBER_RANGE_SPAN, 1, UPRV_INDEX_2, length2, status); } diff --git a/contrib/libs/icu/i18n/numrange_impl.h b/contrib/libs/icu/i18n/numrange_impl.h index 8f4c8a40ba..ac1d8a5897 100644 --- a/contrib/libs/icu/i18n/numrange_impl.h +++ b/contrib/libs/icu/i18n/numrange_impl.h @@ -15,6 +15,7 @@ #include "number_formatimpl.h" #include "formatted_string_builder.h" #include "formattedval_impl.h" +#include "pluralranges.h" U_NAMESPACE_BEGIN namespace number { namespace impl { @@ -40,36 +41,6 @@ public: }; -class StandardPluralRanges : public UMemory { - public: - void initialize(const Locale& locale, UErrorCode& status); - StandardPlural::Form resolve(StandardPlural::Form first, StandardPlural::Form second) const; - - /** Used for data loading. */ - void addPluralRange( - StandardPlural::Form first, - StandardPlural::Form second, - StandardPlural::Form result); - - /** Used for data loading. */ - void setCapacity(int32_t length); - - private: - struct StandardPluralRangeTriple { - StandardPlural::Form first; - StandardPlural::Form second; - StandardPlural::Form result; - }; - - // TODO: An array is simple here, but it results in linear lookup time. - // Certain locales have 20-30 entries in this list. - // Consider changing to a smarter data structure. - typedef MaybeStackArray<StandardPluralRangeTriple, 3> PluralRangeTriples; - PluralRangeTriples fTriples; - int32_t fTriplesLen = 0; -}; - - class NumberRangeFormatterImpl : public UMemory { public: NumberRangeFormatterImpl(const RangeMacroProps& macros, UErrorCode& status); @@ -85,7 +56,7 @@ class NumberRangeFormatterImpl : public UMemory { UNumberRangeIdentityFallback fIdentityFallback; SimpleFormatter fRangeFormatter; - SimpleModifier fApproximatelyModifier; + NumberFormatterImpl fApproximatelyFormatter; StandardPluralRanges fPluralRanges; @@ -105,6 +76,11 @@ class NumberRangeFormatterImpl : public UMemory { }; +/** Helper function used in upluralrules.cpp */ +const UFormattedNumberRangeData* validateUFormattedNumberRange( + const UFormattedNumberRange* uresult, UErrorCode& status); + + } // namespace impl } // namespace number U_NAMESPACE_END diff --git a/contrib/libs/icu/i18n/numsys.cpp b/contrib/libs/icu/i18n/numsys.cpp index 62d555aad2..44aaf8e2a5 100644 --- a/contrib/libs/icu/i18n/numsys.cpp +++ b/contrib/libs/icu/i18n/numsys.cpp @@ -314,7 +314,7 @@ U_CFUNC void initNumsysNames(UErrorCode &status) { const char *nsName = ures_getKey(nsCurrent.getAlias()); LocalPointer<UnicodeString> newElem(new UnicodeString(nsName, -1, US_INV), status); if (U_SUCCESS(status)) { - numsysNames->addElement(newElem.getAlias(), status); + numsysNames->addElementX(newElem.getAlias(), status); if (U_SUCCESS(status)) { newElem.orphan(); // on success, the numsysNames vector owns newElem. } diff --git a/contrib/libs/icu/i18n/olsontz.cpp b/contrib/libs/icu/i18n/olsontz.cpp index d21e6e9929..cae471a5a2 100644 --- a/contrib/libs/icu/i18n/olsontz.cpp +++ b/contrib/libs/icu/i18n/olsontz.cpp @@ -197,58 +197,60 @@ OlsonTimeZone::OlsonTimeZone(const UResourceBundle* top, } // Process final rule and data, if any - const UChar *ruleIdUStr = ures_getStringByKey(res, kFINALRULE, &len, &ec); - ures_getByKey(res, kFINALRAW, r.getAlias(), &ec); - int32_t ruleRaw = ures_getInt(r.getAlias(), &ec); - ures_getByKey(res, kFINALYEAR, r.getAlias(), &ec); - int32_t ruleYear = ures_getInt(r.getAlias(), &ec); if (U_SUCCESS(ec)) { - UnicodeString ruleID(TRUE, ruleIdUStr, len); - UResourceBundle *rule = TimeZone::loadRule(top, ruleID, NULL, ec); - const int32_t *ruleData = ures_getIntVector(rule, &len, &ec); - if (U_SUCCESS(ec) && len == 11) { - UnicodeString emptyStr; - finalZone = new SimpleTimeZone( - ruleRaw * U_MILLIS_PER_SECOND, - emptyStr, - (int8_t)ruleData[0], (int8_t)ruleData[1], (int8_t)ruleData[2], - ruleData[3] * U_MILLIS_PER_SECOND, - (SimpleTimeZone::TimeMode) ruleData[4], - (int8_t)ruleData[5], (int8_t)ruleData[6], (int8_t)ruleData[7], - ruleData[8] * U_MILLIS_PER_SECOND, - (SimpleTimeZone::TimeMode) ruleData[9], - ruleData[10] * U_MILLIS_PER_SECOND, ec); - if (finalZone == NULL) { - ec = U_MEMORY_ALLOCATION_ERROR; - } else { - finalStartYear = ruleYear; + const UChar *ruleIdUStr = ures_getStringByKey(res, kFINALRULE, &len, &ec); + ures_getByKey(res, kFINALRAW, r.getAlias(), &ec); + int32_t ruleRaw = ures_getInt(r.getAlias(), &ec); + ures_getByKey(res, kFINALYEAR, r.getAlias(), &ec); + int32_t ruleYear = ures_getInt(r.getAlias(), &ec); + if (U_SUCCESS(ec)) { + UnicodeString ruleID(TRUE, ruleIdUStr, len); + UResourceBundle *rule = TimeZone::loadRule(top, ruleID, NULL, ec); + const int32_t *ruleData = ures_getIntVector(rule, &len, &ec); + if (U_SUCCESS(ec) && len == 11) { + UnicodeString emptyStr; + finalZone = new SimpleTimeZone( + ruleRaw * U_MILLIS_PER_SECOND, + emptyStr, + (int8_t)ruleData[0], (int8_t)ruleData[1], (int8_t)ruleData[2], + ruleData[3] * U_MILLIS_PER_SECOND, + (SimpleTimeZone::TimeMode) ruleData[4], + (int8_t)ruleData[5], (int8_t)ruleData[6], (int8_t)ruleData[7], + ruleData[8] * U_MILLIS_PER_SECOND, + (SimpleTimeZone::TimeMode) ruleData[9], + ruleData[10] * U_MILLIS_PER_SECOND, ec); + if (finalZone == NULL) { + ec = U_MEMORY_ALLOCATION_ERROR; + } else { + finalStartYear = ruleYear; - // Note: Setting finalStartYear to the finalZone is problematic. When a date is around - // year boundary, SimpleTimeZone may return false result when DST is observed at the - // beginning of year. We could apply safe margin (day or two), but when one of recurrent - // rules falls around year boundary, it could return false result. Without setting the - // start year, finalZone works fine around the year boundary of the start year. + // Note: Setting finalStartYear to the finalZone is problematic. When a date is around + // year boundary, SimpleTimeZone may return false result when DST is observed at the + // beginning of year. We could apply safe margin (day or two), but when one of recurrent + // rules falls around year boundary, it could return false result. Without setting the + // start year, finalZone works fine around the year boundary of the start year. - // finalZone->setStartYear(finalStartYear); + // finalZone->setStartYear(finalStartYear); - // Compute the millis for Jan 1, 0:00 GMT of the finalYear + // Compute the millis for Jan 1, 0:00 GMT of the finalYear - // Note: finalStartMillis is used for detecting either if - // historic transition data or finalZone to be used. In an - // extreme edge case - for example, two transitions fall into - // small windows of time around the year boundary, this may - // result incorrect offset computation. But I think it will - // never happen practically. Yoshito - Feb 20, 2010 - finalStartMillis = Grego::fieldsToDay(finalStartYear, 0, 1) * U_MILLIS_PER_DAY; + // Note: finalStartMillis is used for detecting either if + // historic transition data or finalZone to be used. In an + // extreme edge case - for example, two transitions fall into + // small windows of time around the year boundary, this may + // result incorrect offset computation. But I think it will + // never happen practically. Yoshito - Feb 20, 2010 + finalStartMillis = Grego::fieldsToDay(finalStartYear, 0, 1) * U_MILLIS_PER_DAY; + } + } else { + ec = U_INVALID_FORMAT_ERROR; } - } else { - ec = U_INVALID_FORMAT_ERROR; + ures_close(rule); + } else if (ec == U_MISSING_RESOURCE_ERROR) { + // No final zone + ec = U_ZERO_ERROR; } - ures_close(rule); - } else if (ec == U_MISSING_RESOURCE_ERROR) { - // No final zone - ec = U_ZERO_ERROR; } // initialize canonical ID @@ -272,6 +274,7 @@ OlsonTimeZone::OlsonTimeZone(const OlsonTimeZone& other) : * Assignment operator */ OlsonTimeZone& OlsonTimeZone::operator=(const OlsonTimeZone& other) { + if (this == &other) { return *this; } // self-assignment: no-op canonicalID = other.canonicalID; transitionTimesPre32 = other.transitionTimesPre32; @@ -308,7 +311,7 @@ OlsonTimeZone::~OlsonTimeZone() { /** * Returns true if the two TimeZone objects are equal. */ -UBool OlsonTimeZone::operator==(const TimeZone& other) const { +bool OlsonTimeZone::operator==(const TimeZone& other) const { return ((this == &other) || (typeid(*this) == typeid(other) && TimeZone::operator==(other) && @@ -397,9 +400,9 @@ void OlsonTimeZone::getOffset(UDate date, UBool local, int32_t& rawoff, } } -void -OlsonTimeZone::getOffsetFromLocal(UDate date, int32_t nonExistingTimeOpt, int32_t duplicatedTimeOpt, - int32_t& rawoff, int32_t& dstoff, UErrorCode& ec) const { +void OlsonTimeZone::getOffsetFromLocal(UDate date, UTimeZoneLocalOption nonExistingTimeOpt, + UTimeZoneLocalOption duplicatedTimeOpt, + int32_t& rawoff, int32_t& dstoff, UErrorCode& ec) const { if (U_FAILURE(ec)) { return; } @@ -803,14 +806,14 @@ OlsonTimeZone::initTransitionRules(UErrorCode& status) { } } if (finalZone != NULL) { - // Get the first occurence of final rule starts + // Get the first occurrence of final rule starts UDate startTime = (UDate)finalStartMillis; TimeZoneRule *firstFinalRule = NULL; if (finalZone->useDaylightTime()) { /* * Note: When an OlsonTimeZone is constructed, we should set the final year - * as the start year of finalZone. However, the bounday condition used for + * as the start year of finalZone. However, the boundary condition used for * getting offset from finalZone has some problems. * For now, we do not set the valid start year when the construction time * and create a clone and set the start year when extracting rules. diff --git a/contrib/libs/icu/i18n/olsontz.h b/contrib/libs/icu/i18n/olsontz.h index a3b7dcc8f3..75d86781ed 100644 --- a/contrib/libs/icu/i18n/olsontz.h +++ b/contrib/libs/icu/i18n/olsontz.h @@ -146,12 +146,12 @@ class U_I18N_API OlsonTimeZone: public BasicTimeZone { /** * Returns true if the two TimeZone objects are equal. */ - virtual UBool operator==(const TimeZone& other) const; + virtual bool operator==(const TimeZone& other) const override; /** * TimeZone API. */ - virtual OlsonTimeZone* clone() const; + virtual OlsonTimeZone* clone() const override; /** * TimeZone API. @@ -161,14 +161,14 @@ class U_I18N_API OlsonTimeZone: public BasicTimeZone { /** * TimeZone API. */ - virtual UClassID getDynamicClassID() const; + virtual UClassID getDynamicClassID() const override; /** * TimeZone API. Do not call this; prefer getOffset(UDate,...). */ virtual int32_t getOffset(uint8_t era, int32_t year, int32_t month, int32_t day, uint8_t dayOfWeek, - int32_t millis, UErrorCode& ec) const; + int32_t millis, UErrorCode& ec) const override; /** * TimeZone API. Do not call this; prefer getOffset(UDate,...). @@ -176,26 +176,28 @@ class U_I18N_API OlsonTimeZone: public BasicTimeZone { virtual int32_t getOffset(uint8_t era, int32_t year, int32_t month, int32_t day, uint8_t dayOfWeek, int32_t millis, int32_t monthLength, - UErrorCode& ec) const; + UErrorCode& ec) const override; /** * TimeZone API. */ virtual void getOffset(UDate date, UBool local, int32_t& rawOffset, - int32_t& dstOffset, UErrorCode& ec) const; + int32_t& dstOffset, UErrorCode& ec) const override; /** * BasicTimeZone API. */ - virtual void getOffsetFromLocal(UDate date, int32_t nonExistingTimeOpt, int32_t duplicatedTimeOpt, - int32_t& rawoff, int32_t& dstoff, UErrorCode& ec) const; + virtual void getOffsetFromLocal( + UDate date, UTimeZoneLocalOption nonExistingTimeOpt, + UTimeZoneLocalOption duplicatedTimeOpt, + int32_t& rawOffset, int32_t& dstOffset, UErrorCode& status) const override; /** * TimeZone API. This method has no effect since objects of this * class are quasi-immutable (the base class allows the ID to be * changed). */ - virtual void setRawOffset(int32_t offsetMillis); + virtual void setRawOffset(int32_t offsetMillis) override; /** * TimeZone API. For a historical zone, the raw offset can change @@ -203,30 +205,30 @@ class U_I18N_API OlsonTimeZone: public BasicTimeZone { * expected behavior, this method returns the raw offset for the * current moment in time. */ - virtual int32_t getRawOffset() const; + virtual int32_t getRawOffset() const override; /** * TimeZone API. For a historical zone, whether DST is used or * not varies over time. In order to approximate expected - * behavior, this method returns TRUE if DST is observed at any + * behavior, this method returns true if DST is observed at any * point in the current year. */ - virtual UBool useDaylightTime() const; + virtual UBool useDaylightTime() const override; /** * TimeZone API. */ - virtual UBool inDaylightTime(UDate date, UErrorCode& ec) const; + virtual UBool inDaylightTime(UDate date, UErrorCode& ec) const override; /** * TimeZone API. */ - virtual int32_t getDSTSavings() const; + virtual int32_t getDSTSavings() const override; /** * TimeZone API. Also comare historic transitions. */ - virtual UBool hasSameRules(const TimeZone& other) const; + virtual UBool hasSameRules(const TimeZone& other) const override; /** * BasicTimeZone API. @@ -234,9 +236,9 @@ class U_I18N_API OlsonTimeZone: public BasicTimeZone { * @param base The base time. * @param inclusive Whether the base time is inclusive or not. * @param result Receives the first transition after the base time. - * @return TRUE if the transition is found. + * @return true if the transition is found. */ - virtual UBool getNextTransition(UDate base, UBool inclusive, TimeZoneTransition& result) const; + virtual UBool getNextTransition(UDate base, UBool inclusive, TimeZoneTransition& result) const override; /** * BasicTimeZone API. @@ -244,9 +246,9 @@ class U_I18N_API OlsonTimeZone: public BasicTimeZone { * @param base The base time. * @param inclusive Whether the base time is inclusive or not. * @param result Receives the most recent transition before the base time. - * @return TRUE if the transition is found. + * @return true if the transition is found. */ - virtual UBool getPreviousTransition(UDate base, UBool inclusive, TimeZoneTransition& result) const; + virtual UBool getPreviousTransition(UDate base, UBool inclusive, TimeZoneTransition& result) const override; /** * BasicTimeZone API. @@ -256,7 +258,7 @@ class U_I18N_API OlsonTimeZone: public BasicTimeZone { * @param status Receives error status code. * @return The number of <code>TimeZoneRule</code>s representing time transitions. */ - virtual int32_t countTransitionRules(UErrorCode& status) const; + virtual int32_t countTransitionRules(UErrorCode& status) const override; /** * Gets the <code>InitialTimeZoneRule</code> and the set of <code>TimeZoneRule</code> @@ -274,7 +276,7 @@ class U_I18N_API OlsonTimeZone: public BasicTimeZone { * @param status Receives error status code. */ virtual void getTimeZoneRules(const InitialTimeZoneRule*& initial, - const TimeZoneRule* trsrules[], int32_t& trscount, UErrorCode& status) const; + const TimeZoneRule* trsrules[], int32_t& trscount, UErrorCode& status) const override; /** * Internal API returning the canonical ID of this zone. diff --git a/contrib/libs/icu/i18n/persncal.cpp b/contrib/libs/icu/i18n/persncal.cpp index 26fd294cee..d30577f337 100644 --- a/contrib/libs/icu/i18n/persncal.cpp +++ b/contrib/libs/icu/i18n/persncal.cpp @@ -79,7 +79,7 @@ PersianCalendar* PersianCalendar::clone() const { } PersianCalendar::PersianCalendar(const Locale& aLocale, UErrorCode& success) - : Calendar(TimeZone::createDefault(), aLocale, success) + : Calendar(TimeZone::forLocaleOrDefault(aLocale), aLocale, success) { setTimeInMillis(getNow(), success); // Call this again now that the vtable is set up properly. } diff --git a/contrib/libs/icu/i18n/persncal.h b/contrib/libs/icu/i18n/persncal.h index ce6d7397bf..d0f2ee5ec2 100644 --- a/contrib/libs/icu/i18n/persncal.h +++ b/contrib/libs/icu/i18n/persncal.h @@ -164,7 +164,7 @@ class PersianCalendar : public Calendar { // TODO: copy c'tor, etc // clone - virtual PersianCalendar* clone() const; + virtual PersianCalendar* clone() const override; private: /** @@ -194,7 +194,7 @@ class PersianCalendar : public Calendar { /** * @internal */ - virtual int32_t handleGetLimit(UCalendarDateFields field, ELimitType limitType) const; + virtual int32_t handleGetLimit(UCalendarDateFields field, ELimitType limitType) const override; /** * Return the length (in days) of the given month. @@ -203,13 +203,13 @@ class PersianCalendar : public Calendar { * @param year The hijri shamsi month, 0-based * @internal */ - virtual int32_t handleGetMonthLength(int32_t extendedYear, int32_t month) const; + virtual int32_t handleGetMonthLength(int32_t extendedYear, int32_t month) const override; /** * Return the number of days in the given Persian year * @internal */ - virtual int32_t handleGetYearLength(int32_t extendedYear) const; + virtual int32_t handleGetYearLength(int32_t extendedYear) const override; //------------------------------------------------------------------------- // Functions for converting from field values to milliseconds.... @@ -219,7 +219,7 @@ class PersianCalendar : public Calendar { /** * @internal */ - virtual int32_t handleComputeMonthStart(int32_t eyear, int32_t month, UBool useMonth) const; + virtual int32_t handleComputeMonthStart(int32_t eyear, int32_t month, UBool useMonth) const override; //------------------------------------------------------------------------- // Functions for converting from milliseconds to field values @@ -228,7 +228,7 @@ class PersianCalendar : public Calendar { /** * @internal */ - virtual int32_t handleGetExtendedYear(); + virtual int32_t handleGetExtendedYear() override; /** * Override Calendar to compute several fields specific to the Persian @@ -246,7 +246,7 @@ class PersianCalendar : public Calendar { * calendar equivalents for the given Julian day. * @internal */ - virtual void handleComputeFields(int32_t julianDay, UErrorCode &status); + virtual void handleComputeFields(int32_t julianDay, UErrorCode &status) override; // UObject stuff public: @@ -255,7 +255,7 @@ class PersianCalendar : public Calendar { * same class ID. Objects of other classes have different class IDs. * @internal */ - virtual UClassID getDynamicClassID(void) const; + virtual UClassID getDynamicClassID(void) const override; /** * Return the class ID for this class. This is useful only for comparing to a return @@ -276,7 +276,7 @@ class PersianCalendar : public Calendar { * @return calendar type * @internal */ - virtual const char * getType() const; + virtual const char * getType() const override; private: PersianCalendar(); // default constructor not implemented @@ -292,26 +292,26 @@ class PersianCalendar : public Calendar { * false, otherwise. * @internal */ - virtual UBool inDaylightTime(UErrorCode& status) const; + virtual UBool inDaylightTime(UErrorCode& status) const override; /** - * Returns TRUE because the Persian Calendar does have a default century + * Returns true because the Persian Calendar does have a default century * @internal */ - virtual UBool haveDefaultCentury() const; + virtual UBool haveDefaultCentury() const override; /** * Returns the date of the start of the default century * @return start of century - in milliseconds since epoch, 1970 * @internal */ - virtual UDate defaultCenturyStart() const; + virtual UDate defaultCenturyStart() const override; /** * Returns the year in which the default century begins * @internal */ - virtual int32_t defaultCenturyStartYear() const; + virtual int32_t defaultCenturyStartYear() const override; }; U_NAMESPACE_END diff --git a/contrib/libs/icu/i18n/pluralranges.cpp b/contrib/libs/icu/i18n/pluralranges.cpp new file mode 100644 index 0000000000..da10e2117d --- /dev/null +++ b/contrib/libs/icu/i18n/pluralranges.cpp @@ -0,0 +1,144 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +// Allow implicit conversion from char16_t* to UnicodeString for this file: +// Helpful in toString methods and elsewhere. +#define UNISTR_FROM_STRING_EXPLICIT + +#include "unicode/numberrangeformatter.h" +#include "pluralranges.h" +#include "uresimp.h" +#include "charstr.h" +#include "uassert.h" +#include "util.h" +#include "numrange_impl.h" + +U_NAMESPACE_BEGIN + + +namespace { + +class PluralRangesDataSink : public ResourceSink { + public: + PluralRangesDataSink(StandardPluralRanges& output) : fOutput(output) {} + + void put(const char* /*key*/, ResourceValue& value, UBool /*noFallback*/, UErrorCode& status) U_OVERRIDE { + ResourceArray entriesArray = value.getArray(status); + if (U_FAILURE(status)) { return; } + fOutput.setCapacity(entriesArray.getSize(), status); + if (U_FAILURE(status)) { return; } + for (int i = 0; entriesArray.getValue(i, value); i++) { + ResourceArray pluralFormsArray = value.getArray(status); + if (U_FAILURE(status)) { return; } + if (pluralFormsArray.getSize() != 3) { + status = U_RESOURCE_TYPE_MISMATCH; + return; + } + pluralFormsArray.getValue(0, value); + StandardPlural::Form first = StandardPlural::fromString(value.getUnicodeString(status), status); + if (U_FAILURE(status)) { return; } + pluralFormsArray.getValue(1, value); + StandardPlural::Form second = StandardPlural::fromString(value.getUnicodeString(status), status); + if (U_FAILURE(status)) { return; } + pluralFormsArray.getValue(2, value); + StandardPlural::Form result = StandardPlural::fromString(value.getUnicodeString(status), status); + if (U_FAILURE(status)) { return; } + fOutput.addPluralRange(first, second, result); + } + } + + private: + StandardPluralRanges& fOutput; +}; + +void getPluralRangesData(const Locale& locale, StandardPluralRanges& output, UErrorCode& status) { + LocalUResourceBundlePointer rb(ures_openDirect(nullptr, "pluralRanges", &status)); + if (U_FAILURE(status)) { return; } + + CharString dataPath; + dataPath.append("locales/", -1, status); + dataPath.append(locale.getLanguage(), -1, status); + if (U_FAILURE(status)) { return; } + int32_t setLen; + // Not all languages are covered: fail gracefully + UErrorCode internalStatus = U_ZERO_ERROR; + const UChar* set = ures_getStringByKeyWithFallback(rb.getAlias(), dataPath.data(), &setLen, &internalStatus); + if (U_FAILURE(internalStatus)) { return; } + + dataPath.clear(); + dataPath.append("rules/", -1, status); + dataPath.appendInvariantChars(set, setLen, status); + if (U_FAILURE(status)) { return; } + PluralRangesDataSink sink(output); + ures_getAllItemsWithFallback(rb.getAlias(), dataPath.data(), sink, status); +} + +} // namespace + + +StandardPluralRanges +StandardPluralRanges::forLocale(const Locale& locale, UErrorCode& status) { + StandardPluralRanges result; + getPluralRangesData(locale, result, status); + return result; +} + +StandardPluralRanges +StandardPluralRanges::copy(UErrorCode& status) const { + StandardPluralRanges result; + if (fTriplesLen > result.fTriples.getCapacity()) { + if (result.fTriples.resize(fTriplesLen) == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return result; + } + } + uprv_memcpy(result.fTriples.getAlias(), + fTriples.getAlias(), + fTriplesLen * sizeof(fTriples[0])); + result.fTriplesLen = fTriplesLen; + return result; +} + +LocalPointer<StandardPluralRanges> +StandardPluralRanges::toPointer(UErrorCode& status) && noexcept { + return LocalPointer<StandardPluralRanges>(new StandardPluralRanges(std::move(*this)), status); +} + +void StandardPluralRanges::addPluralRange( + StandardPlural::Form first, + StandardPlural::Form second, + StandardPlural::Form result) { + U_ASSERT(fTriplesLen < fTriples.getCapacity()); + fTriples[fTriplesLen] = {first, second, result}; + fTriplesLen++; +} + +void StandardPluralRanges::setCapacity(int32_t length, UErrorCode& status) { + if (U_FAILURE(status)) { return; } + if (length > fTriples.getCapacity()) { + if (fTriples.resize(length, 0) == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + } + } +} + +StandardPlural::Form +StandardPluralRanges::resolve(StandardPlural::Form first, StandardPlural::Form second) const { + for (int32_t i=0; i<fTriplesLen; i++) { + const auto& triple = fTriples[i]; + if (triple.first == first && triple.second == second) { + return triple.result; + } + } + // Default fallback + return StandardPlural::OTHER; +} + + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/contrib/libs/icu/i18n/pluralranges.h b/contrib/libs/icu/i18n/pluralranges.h new file mode 100644 index 0000000000..eba59c70ed --- /dev/null +++ b/contrib/libs/icu/i18n/pluralranges.h @@ -0,0 +1,67 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef __PLURALRANGES_H__ +#define __PLURALRANGES_H__ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/uobject.h" +#include "unicode/locid.h" +#include "unicode/plurrule.h" +#include "standardplural.h" +#include "cmemory.h" + +U_NAMESPACE_BEGIN + +// Forward declarations +namespace number { +namespace impl { +class UFormattedNumberRangeData; +} +} + +class StandardPluralRanges : public UMemory { + public: + /** Create a new StandardPluralRanges for the given locale */ + static StandardPluralRanges forLocale(const Locale& locale, UErrorCode& status); + + /** Explicit copy constructor */ + StandardPluralRanges copy(UErrorCode& status) const; + + /** Create an object (called on an rvalue) */ + LocalPointer<StandardPluralRanges> toPointer(UErrorCode& status) && noexcept; + + /** Select rule based on the first and second forms */ + StandardPlural::Form resolve(StandardPlural::Form first, StandardPlural::Form second) const; + + /** Used for data loading. */ + void addPluralRange( + StandardPlural::Form first, + StandardPlural::Form second, + StandardPlural::Form result); + + /** Used for data loading. */ + void setCapacity(int32_t length, UErrorCode& status); + + private: + struct StandardPluralRangeTriple { + StandardPlural::Form first; + StandardPlural::Form second; + StandardPlural::Form result; + }; + + // TODO: An array is simple here, but it results in linear lookup time. + // Certain locales have 20-30 entries in this list. + // Consider changing to a smarter data structure. + typedef MaybeStackArray<StandardPluralRangeTriple, 3> PluralRangeTriples; + PluralRangeTriples fTriples; + int32_t fTriplesLen = 0; +}; + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ +#endif //__PLURALRANGES_H__ diff --git a/contrib/libs/icu/i18n/plurfmt.cpp b/contrib/libs/icu/i18n/plurfmt.cpp index b99437630e..65e275eeeb 100644 --- a/contrib/libs/icu/i18n/plurfmt.cpp +++ b/contrib/libs/icu/i18n/plurfmt.cpp @@ -381,13 +381,13 @@ PluralFormat::operator=(const PluralFormat& other) { return *this; } -UBool +bool PluralFormat::operator==(const Format& other) const { if (this == &other) { - return TRUE; + return true; } if (!Format::operator==(other)) { - return FALSE; + return false; } const PluralFormat& o = (const PluralFormat&)other; return @@ -400,7 +400,7 @@ PluralFormat::operator==(const Format& other) const { *pluralRulesWrapper.pluralRules == *o.pluralRulesWrapper.pluralRules); } -UBool +bool PluralFormat::operator!=(const Format& other) const { return !operator==(other); } @@ -549,9 +549,15 @@ void PluralFormat::parseType(const UnicodeString& source, const NFRule *rbnfLeni UnicodeString currArg = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit()); if (rbnfLenientScanner != NULL) { - // If lenient parsing is turned ON, we've got some time consuming parsing ahead of us. - int32_t length = -1; - currMatchIndex = rbnfLenientScanner->findTextLenient(source, currArg, startingAt, &length); + // Check if non-lenient rule finds the text before call lenient parsing + int32_t tempIndex = source.indexOf(currArg, startingAt); + if (tempIndex >= 0) { + currMatchIndex = tempIndex; + } else { + // If lenient parsing is turned ON, we've got some time consuming parsing ahead of us. + int32_t length = -1; + currMatchIndex = rbnfLenientScanner->findTextLenient(source, currArg, startingAt, &length); + } } else { currMatchIndex = source.indexOf(currArg, startingAt); diff --git a/contrib/libs/icu/i18n/plurrule.cpp b/contrib/libs/icu/i18n/plurrule.cpp index 794a3d8c55..d1918c4698 100644 --- a/contrib/libs/icu/i18n/plurrule.cpp +++ b/contrib/libs/icu/i18n/plurrule.cpp @@ -19,6 +19,7 @@ #include "unicode/ures.h" #include "unicode/numfmt.h" #include "unicode/decimfmt.h" +#include "unicode/numberrangeformatter.h" #include "charstr.h" #include "cmemory.h" #include "cstring.h" @@ -36,6 +37,8 @@ #include "unifiedcache.h" #include "number_decimalquantity.h" #include "util.h" +#include "pluralranges.h" +#include "numrange_impl.h" #if !UCONFIG_NO_FORMATTING @@ -56,6 +59,8 @@ static const UChar PK_VAR_N[]={LOW_N,0}; static const UChar PK_VAR_I[]={LOW_I,0}; static const UChar PK_VAR_F[]={LOW_F,0}; static const UChar PK_VAR_T[]={LOW_T,0}; +static const UChar PK_VAR_E[]={LOW_E,0}; +static const UChar PK_VAR_C[]={LOW_C,0}; static const UChar PK_VAR_V[]={LOW_V,0}; static const UChar PK_WITHIN[]={LOW_W,LOW_I,LOW_T,LOW_H,LOW_I,LOW_N,0}; static const UChar PK_DECIMAL[]={LOW_D,LOW_E,LOW_C,LOW_I,LOW_M,LOW_A,LOW_L,0}; @@ -67,6 +72,7 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration) PluralRules::PluralRules(UErrorCode& /*status*/) : UObject(), mRules(nullptr), + mStandardPluralRanges(nullptr), mInternalStatus(U_ZERO_ERROR) { } @@ -74,6 +80,7 @@ PluralRules::PluralRules(UErrorCode& /*status*/) PluralRules::PluralRules(const PluralRules& other) : UObject(other), mRules(nullptr), + mStandardPluralRanges(nullptr), mInternalStatus(U_ZERO_ERROR) { *this=other; @@ -81,6 +88,7 @@ PluralRules::PluralRules(const PluralRules& other) PluralRules::~PluralRules() { delete mRules; + delete mStandardPluralRanges; } SharedPluralRules::~SharedPluralRules() { @@ -89,14 +97,20 @@ SharedPluralRules::~SharedPluralRules() { PluralRules* PluralRules::clone() const { - PluralRules* newObj = new PluralRules(*this); // Since clone doesn't have a 'status' parameter, the best we can do is return nullptr if // the newly created object was not fully constructed properly (an error occurred). - if (newObj != nullptr && U_FAILURE(newObj->mInternalStatus)) { - delete newObj; - newObj = nullptr; + UErrorCode localStatus = U_ZERO_ERROR; + return clone(localStatus); +} + +PluralRules* +PluralRules::clone(UErrorCode& status) const { + LocalPointer<PluralRules> newObj(new PluralRules(*this), status); + if (U_SUCCESS(status) && U_FAILURE(newObj->mInternalStatus)) { + status = newObj->mInternalStatus; + newObj.adoptInstead(nullptr); } - return newObj; + return newObj.orphan(); } PluralRules& @@ -104,6 +118,8 @@ PluralRules::operator=(const PluralRules& other) { if (this != &other) { delete mRules; mRules = nullptr; + delete mStandardPluralRanges; + mStandardPluralRanges = nullptr; mInternalStatus = other.mInternalStatus; if (U_FAILURE(mInternalStatus)) { // bail out early if the object we were copying from was already 'invalid'. @@ -119,6 +135,11 @@ PluralRules::operator=(const PluralRules& other) { mInternalStatus = mRules->fInternalStatus; } } + if (other.mStandardPluralRanges != nullptr) { + mStandardPluralRanges = other.mStandardPluralRanges->copy(mInternalStatus) + .toPointer(mInternalStatus) + .orphan(); + } } return *this; } @@ -211,11 +232,8 @@ PluralRules::forLocale(const Locale& locale, UPluralType type, UErrorCode& statu if (U_FAILURE(status)) { return nullptr; } - PluralRules *result = (*shared)->clone(); + PluralRules *result = (*shared)->clone(status); shared->removeRef(); - if (result == nullptr) { - status = U_MEMORY_ALLOCATION_ERROR; - } return result; } @@ -252,6 +270,10 @@ PluralRules::internalForLocale(const Locale& locale, UPluralType type, UErrorCod // Original impl used default rules. // Ask the question to ICU Core. + newObj->mStandardPluralRanges = StandardPluralRanges::forLocale(locale, status) + .toPointer(status) + .orphan(); + return newObj.orphan(); } @@ -272,6 +294,10 @@ PluralRules::select(const number::FormattedNumber& number, UErrorCode& status) c if (U_FAILURE(status)) { return ICU_Utility::makeBogusString(); } + if (U_FAILURE(mInternalStatus)) { + status = mInternalStatus; + return ICU_Utility::makeBogusString(); + } return select(dq); } @@ -285,6 +311,33 @@ PluralRules::select(const IFixedDecimal &number) const { } } +UnicodeString +PluralRules::select(const number::FormattedNumberRange& range, UErrorCode& status) const { + return select(range.getData(status), status); +} + +UnicodeString +PluralRules::select(const number::impl::UFormattedNumberRangeData* impl, UErrorCode& status) const { + if (U_FAILURE(status)) { + return ICU_Utility::makeBogusString(); + } + if (U_FAILURE(mInternalStatus)) { + status = mInternalStatus; + return ICU_Utility::makeBogusString(); + } + if (mStandardPluralRanges == nullptr) { + // Happens if PluralRules was constructed via createRules() + status = U_UNSUPPORTED_ERROR; + return ICU_Utility::makeBogusString(); + } + auto form1 = StandardPlural::fromString(select(impl->quantity1), status); + auto form2 = StandardPlural::fromString(select(impl->quantity2), status); + if (U_FAILURE(status)) { + return ICU_Utility::makeBogusString(); + } + auto result = mStandardPluralRanges->resolve(form1, form2); + return UnicodeString(StandardPlural::getKeyword(result), -1, US_INV); +} StringEnumeration* @@ -326,9 +379,31 @@ static double scaleForInt(double d) { return scale; } +static const double powers10[7] = {1.0, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0}; // powers of 10 for 0..6 +static double applyExponent(double source, int32_t exponent) { + if (exponent >= 0 && exponent <= 6) { + return source * powers10[exponent]; + } + return source * pow(10.0, exponent); +} + +/** + * Helper method for the overrides of getSamples() for double and FixedDecimal + * return value types. Provide only one of an allocated array of doubles or + * FixedDecimals, and a nullptr for the other. + */ static int32_t -getSamplesFromString(const UnicodeString &samples, double *dest, - int32_t destCapacity, UErrorCode& status) { +getSamplesFromString(const UnicodeString &samples, double *destDbl, + FixedDecimal* destFd, int32_t destCapacity, + UErrorCode& status) { + + if ((destDbl == nullptr && destFd == nullptr) + || (destDbl != nullptr && destFd != nullptr)) { + status = U_INTERNAL_PROGRAM_ERROR; + return 0; + } + + bool isDouble = destDbl != nullptr; int32_t sampleCount = 0; int32_t sampleStartIdx = 0; int32_t sampleEndIdx = 0; @@ -346,12 +421,15 @@ getSamplesFromString(const UnicodeString &samples, double *dest, int32_t tildeIndex = sampleRange.indexOf(TILDE); if (tildeIndex < 0) { FixedDecimal fixed(sampleRange, status); - double sampleValue = fixed.source; - if (fixed.visibleDecimalDigitCount == 0 || sampleValue != floor(sampleValue)) { - dest[sampleCount++] = sampleValue; + if (isDouble) { + double sampleValue = fixed.source; + if (fixed.visibleDecimalDigitCount == 0 || sampleValue != floor(sampleValue)) { + destDbl[sampleCount++] = applyExponent(sampleValue, fixed.exponent); + } + } else { + destFd[sampleCount++] = fixed; } } else { - FixedDecimal fixedLo(sampleRange.tempSubStringBetween(0, tildeIndex), status); FixedDecimal fixedHi(sampleRange.tempSubStringBetween(tildeIndex+1), status); double rangeLo = fixedLo.source; @@ -375,14 +453,21 @@ getSamplesFromString(const UnicodeString &samples, double *dest, rangeLo *= scale; rangeHi *= scale; for (double n=rangeLo; n<=rangeHi; n+=1) { - // Hack Alert: don't return any decimal samples with integer values that - // originated from a format with trailing decimals. - // This API is returning doubles, which can't distinguish having displayed - // zeros to the right of the decimal. - // This results in test failures with values mapping back to a different keyword. double sampleValue = n/scale; - if (!(sampleValue == floor(sampleValue) && fixedLo.visibleDecimalDigitCount > 0)) { - dest[sampleCount++] = sampleValue; + if (isDouble) { + // Hack Alert: don't return any decimal samples with integer values that + // originated from a format with trailing decimals. + // This API is returning doubles, which can't distinguish having displayed + // zeros to the right of the decimal. + // This results in test failures with values mapping back to a different keyword. + if (!(sampleValue == floor(sampleValue) && fixedLo.visibleDecimalDigitCount > 0)) { + destDbl[sampleCount++] = sampleValue; + } + } else { + int32_t v = (int32_t) fixedLo.getPluralOperand(PluralOperand::PLURAL_OPERAND_V); + int32_t e = (int32_t) fixedLo.getPluralOperand(PluralOperand::PLURAL_OPERAND_E); + FixedDecimal newSample = FixedDecimal::createWithExponent(sampleValue, v, e); + destFd[sampleCount++] = newSample; } if (sampleCount >= destCapacity) { break; @@ -394,24 +479,53 @@ getSamplesFromString(const UnicodeString &samples, double *dest, return sampleCount; } - int32_t PluralRules::getSamples(const UnicodeString &keyword, double *dest, int32_t destCapacity, UErrorCode& status) { - if (destCapacity == 0 || U_FAILURE(status)) { + if (U_FAILURE(status)) { + return 0; + } + if (U_FAILURE(mInternalStatus)) { + status = mInternalStatus; + return 0; + } + if (dest != nullptr ? destCapacity < 0 : destCapacity != 0) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + RuleChain *rc = rulesForKeyword(keyword); + if (rc == nullptr) { + return 0; + } + int32_t numSamples = getSamplesFromString(rc->fIntegerSamples, dest, nullptr, destCapacity, status); + if (numSamples == 0) { + numSamples = getSamplesFromString(rc->fDecimalSamples, dest, nullptr, destCapacity, status); + } + return numSamples; +} + +int32_t +PluralRules::getSamples(const UnicodeString &keyword, FixedDecimal *dest, + int32_t destCapacity, UErrorCode& status) { + if (U_FAILURE(status)) { return 0; } if (U_FAILURE(mInternalStatus)) { status = mInternalStatus; return 0; } + if (dest != nullptr ? destCapacity < 0 : destCapacity != 0) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } RuleChain *rc = rulesForKeyword(keyword); if (rc == nullptr) { return 0; } - int32_t numSamples = getSamplesFromString(rc->fIntegerSamples, dest, destCapacity, status); + + int32_t numSamples = getSamplesFromString(rc->fIntegerSamples, nullptr, dest, destCapacity, status); if (numSamples == 0) { - numSamples = getSamplesFromString(rc->fDecimalSamples, dest, destCapacity, status); + numSamples = getSamplesFromString(rc->fDecimalSamples, nullptr, dest, destCapacity, status); } return numSamples; } @@ -441,40 +555,40 @@ PluralRules::getKeywordOther() const { return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5); } -UBool +bool PluralRules::operator==(const PluralRules& other) const { const UnicodeString *ptrKeyword; UErrorCode status= U_ZERO_ERROR; if ( this == &other ) { - return TRUE; + return true; } LocalPointer<StringEnumeration> myKeywordList(getKeywords(status)); LocalPointer<StringEnumeration> otherKeywordList(other.getKeywords(status)); if (U_FAILURE(status)) { - return FALSE; + return false; } if (myKeywordList->count(status)!=otherKeywordList->count(status)) { - return FALSE; + return false; } myKeywordList->reset(status); while ((ptrKeyword=myKeywordList->snext(status))!=nullptr) { if (!other.isKeyword(*ptrKeyword)) { - return FALSE; + return false; } } otherKeywordList->reset(status); while ((ptrKeyword=otherKeywordList->snext(status))!=nullptr) { if (!this->isKeyword(*ptrKeyword)) { - return FALSE; + return false; } } if (U_FAILURE(status)) { - return FALSE; + return false; } - return TRUE; + return true; } @@ -600,6 +714,8 @@ PluralRuleParser::parse(const UnicodeString& ruleData, PluralRules *prules, UErr case tVariableI: case tVariableF: case tVariableT: + case tVariableE: + case tVariableC: case tVariableV: U_ASSERT(curAndConstraint != nullptr); curAndConstraint->digitsType = type; @@ -984,6 +1100,10 @@ static UnicodeString tokenString(tokenType tok) { s.append(LOW_V); break; case tVariableT: s.append(LOW_T); break; + case tVariableE: + s.append(LOW_E); break; + case tVariableC: + s.append(LOW_C); break; default: s.append(TILDE); } @@ -1160,6 +1280,8 @@ PluralRuleParser::checkSyntax(UErrorCode &status) case tVariableI: case tVariableF: case tVariableT: + case tVariableE: + case tVariableC: case tVariableV: if (type != tIs && type != tMod && type != tIn && type != tNot && type != tWithin && type != tEqual && type != tNotEqual) { @@ -1176,6 +1298,8 @@ PluralRuleParser::checkSyntax(UErrorCode &status) type == tVariableI || type == tVariableF || type == tVariableT || + type == tVariableE || + type == tVariableC || type == tVariableV || type == tAt)) { status = U_UNEXPECTED_TOKEN; @@ -1207,6 +1331,8 @@ PluralRuleParser::checkSyntax(UErrorCode &status) type != tVariableI && type != tVariableF && type != tVariableT && + type != tVariableE && + type != tVariableC && type != tVariableV) { status = U_UNEXPECTED_TOKEN; } @@ -1384,6 +1510,10 @@ PluralRuleParser::getKeyType(const UnicodeString &token, tokenType keyType) keyType = tVariableF; } else if (0 == token.compare(PK_VAR_T, 1)) { keyType = tVariableT; + } else if (0 == token.compare(PK_VAR_E, 1)) { + keyType = tVariableE; + } else if (0 == token.compare(PK_VAR_C, 1)) { + keyType = tVariableC; } else if (0 == token.compare(PK_VAR_V, 1)) { keyType = tVariableV; } else if (0 == token.compare(PK_IS, 2)) { @@ -1423,7 +1553,7 @@ PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode status = U_MEMORY_ALLOCATION_ERROR; return; } - fKeywordNames.addElement(newElem, status); + fKeywordNames.addElementX(newElem, status); if (U_FAILURE(status)) { delete newElem; return; @@ -1440,7 +1570,7 @@ PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode status = U_MEMORY_ALLOCATION_ERROR; return; } - fKeywordNames.addElement(newElem, status); + fKeywordNames.addElementX(newElem, status); if (U_FAILURE(status)) { delete newElem; return; @@ -1481,13 +1611,21 @@ PluralOperand tokenTypeToPluralOperand(tokenType tt) { return PLURAL_OPERAND_V; case tVariableT: return PLURAL_OPERAND_T; + case tVariableE: + return PLURAL_OPERAND_E; + case tVariableC: + return PLURAL_OPERAND_E; default: - UPRV_UNREACHABLE; // unexpected. + UPRV_UNREACHABLE_EXIT; // unexpected. } } -FixedDecimal::FixedDecimal(double n, int32_t v, int64_t f) { - init(n, v, f); +FixedDecimal::FixedDecimal(double n, int32_t v, int64_t f, int32_t e, int32_t c) { + init(n, v, f, e, c); +} + +FixedDecimal::FixedDecimal(double n, int32_t v, int64_t f, int32_t e) { + init(n, v, f, e); // check values. TODO make into unit test. // // long visiblePower = (int) Math.pow(10, v); @@ -1503,6 +1641,10 @@ FixedDecimal::FixedDecimal(double n, int32_t v, int64_t f) { // } } +FixedDecimal::FixedDecimal(double n, int32_t v, int64_t f) { + init(n, v, f); +} + FixedDecimal::FixedDecimal(double n, int32_t v) { // Ugly, but for samples we don't care. init(n, v, getFractionalDigits(n, v)); @@ -1522,20 +1664,50 @@ FixedDecimal::FixedDecimal() { FixedDecimal::FixedDecimal(const UnicodeString &num, UErrorCode &status) { CharString cs; - cs.appendInvariantChars(num, status); + int32_t parsedExponent = 0; + int32_t parsedCompactExponent = 0; + + int32_t exponentIdx = num.indexOf(u'e'); + if (exponentIdx < 0) { + exponentIdx = num.indexOf(u'E'); + } + int32_t compactExponentIdx = num.indexOf(u'c'); + if (compactExponentIdx < 0) { + compactExponentIdx = num.indexOf(u'C'); + } + + if (exponentIdx >= 0) { + cs.appendInvariantChars(num.tempSubString(0, exponentIdx), status); + int32_t expSubstrStart = exponentIdx + 1; + parsedExponent = ICU_Utility::parseAsciiInteger(num, expSubstrStart); + } + else if (compactExponentIdx >= 0) { + cs.appendInvariantChars(num.tempSubString(0, compactExponentIdx), status); + int32_t expSubstrStart = compactExponentIdx + 1; + parsedCompactExponent = ICU_Utility::parseAsciiInteger(num, expSubstrStart); + + parsedExponent = parsedCompactExponent; + exponentIdx = compactExponentIdx; + } + else { + cs.appendInvariantChars(num, status); + } + DecimalQuantity dl; dl.setToDecNumber(cs.toStringPiece(), status); if (U_FAILURE(status)) { init(0, 0, 0); return; } + int32_t decimalPoint = num.indexOf(DOT); double n = dl.toDouble(); if (decimalPoint == -1) { - init(n, 0, 0); + init(n, 0, 0, parsedExponent); } else { - int32_t v = num.length() - decimalPoint - 1; - init(n, v, getFractionalDigits(n, v)); + int32_t fractionNumLength = exponentIdx < 0 ? num.length() : cs.length(); + int32_t v = fractionNumLength - decimalPoint - 1; + init(n, v, getFractionalDigits(n, v), parsedExponent); } } @@ -1546,6 +1718,7 @@ FixedDecimal::FixedDecimal(const FixedDecimal &other) { decimalDigits = other.decimalDigits; decimalDigitsWithoutTrailingZeros = other.decimalDigitsWithoutTrailingZeros; intValue = other.intValue; + exponent = other.exponent; _hasIntegerValue = other._hasIntegerValue; isNegative = other.isNegative; _isNaN = other._isNaN; @@ -1554,6 +1727,10 @@ FixedDecimal::FixedDecimal(const FixedDecimal &other) { FixedDecimal::~FixedDecimal() = default; +FixedDecimal FixedDecimal::createWithExponent(double n, int32_t v, int32_t e) { + return FixedDecimal(n, v, getFractionalDigits(n, v), e); +} + void FixedDecimal::init(double n) { int32_t numFractionDigits = decimals(n); @@ -1562,10 +1739,24 @@ void FixedDecimal::init(double n) { void FixedDecimal::init(double n, int32_t v, int64_t f) { + int32_t exponent = 0; + init(n, v, f, exponent); +} + +void FixedDecimal::init(double n, int32_t v, int64_t f, int32_t e) { + // Currently, `c` is an alias for `e` + init(n, v, f, e, e); +} + +void FixedDecimal::init(double n, int32_t v, int64_t f, int32_t e, int32_t c) { isNegative = n < 0.0; source = fabs(n); _isNaN = uprv_isNaN(source); _isInfinite = uprv_isInfinite(source); + exponent = e; + if (exponent == 0) { + exponent = c; + } if (_isNaN || _isInfinite) { v = 0; f = 0; @@ -1661,7 +1852,9 @@ int64_t FixedDecimal::getFractionalDigits(double n, int32_t v) { case 3: return (int64_t)(fract*1000.0 + 0.5); default: double scaled = floor(fract * pow(10.0, (double)v) + 0.5); - if (scaled > U_INT64_MAX) { + if (scaled >= static_cast<double>(U_INT64_MAX)) { + // Note: a double cannot accurately represent U_INT64_MAX. Casting it to double + // will round up to the next representable value, which is U_INT64_MAX + 1. return U_INT64_MAX; } else { return (int64_t)scaled; @@ -1688,14 +1881,15 @@ void FixedDecimal::adjustForMinFractionDigits(int32_t minFractionDigits) { double FixedDecimal::getPluralOperand(PluralOperand operand) const { switch(operand) { - case PLURAL_OPERAND_N: return source; - case PLURAL_OPERAND_I: return static_cast<double>(intValue); + case PLURAL_OPERAND_N: return (exponent == 0 ? source : source * pow(10, exponent)); + case PLURAL_OPERAND_I: return (double) longValue(); case PLURAL_OPERAND_F: return static_cast<double>(decimalDigits); case PLURAL_OPERAND_T: return static_cast<double>(decimalDigitsWithoutTrailingZeros); case PLURAL_OPERAND_V: return visibleDecimalDigitCount; - case PLURAL_OPERAND_E: return 0; + case PLURAL_OPERAND_E: return exponent; + case PLURAL_OPERAND_C: return exponent; default: - UPRV_UNREACHABLE; // unexpected. + UPRV_UNREACHABLE_EXIT; // unexpected. } } @@ -1719,6 +1913,35 @@ int32_t FixedDecimal::getVisibleFractionDigitCount() const { return visibleDecimalDigitCount; } +bool FixedDecimal::operator==(const FixedDecimal &other) const { + return source == other.source && visibleDecimalDigitCount == other.visibleDecimalDigitCount + && decimalDigits == other.decimalDigits && exponent == other.exponent; +} + +UnicodeString FixedDecimal::toString() const { + char pattern[15]; + char buffer[20]; + if (exponent != 0) { + snprintf(pattern, sizeof(pattern), "%%.%dfe%%d", visibleDecimalDigitCount); + snprintf(buffer, sizeof(buffer), pattern, source, exponent); + } else { + snprintf(pattern, sizeof(pattern), "%%.%df", visibleDecimalDigitCount); + snprintf(buffer, sizeof(buffer), pattern, source); + } + return UnicodeString(buffer, -1, US_INV); +} + +double FixedDecimal::doubleValue() const { + return (isNegative ? -source : source) * pow(10, exponent); +} + +int64_t FixedDecimal::longValue() const { + if (exponent == 0) { + return intValue; + } else { + return (long) (pow(10, exponent) * intValue); + } +} PluralAvailableLocalesEnumeration::PluralAvailableLocalesEnumeration(UErrorCode &status) { diff --git a/contrib/libs/icu/i18n/plurrule_impl.h b/contrib/libs/icu/i18n/plurrule_impl.h index 0dc44fb62e..7274da58f0 100644 --- a/contrib/libs/icu/i18n/plurrule_impl.h +++ b/contrib/libs/icu/i18n/plurrule_impl.h @@ -30,6 +30,12 @@ #include "hash.h" #include "uassert.h" +/** + * A FixedDecimal version of UPLRULES_NO_UNIQUE_VALUE used in PluralRulesTest + * for parsing of samples. + */ +#define UPLRULES_NO_UNIQUE_VALUE_DECIMAL (FixedDecimal((double)-0.00123456777)) + class PluralRulesTest; U_NAMESPACE_BEGIN @@ -138,6 +144,8 @@ enum tokenType { tVariableF, tVariableV, tVariableT, + tVariableE, + tVariableC, tDecimal, tInteger, tEOF @@ -215,12 +223,21 @@ enum PluralOperand { PLURAL_OPERAND_W, /** - * Suppressed exponent for compact notation (exponent needed in - * scientific notation with compact notation to approximate i). + * Suppressed exponent for scientific notation (exponent needed in + * scientific notation to approximate i). */ PLURAL_OPERAND_E, /** + * This operand is currently treated as an alias for `PLURAL_OPERAND_E`. + * In the future, it will represent: + * + * Suppressed exponent for compact notation (exponent needed in + * compact notation to approximate i). + */ + PLURAL_OPERAND_C, + + /** * THIS OPERAND IS DEPRECATED AND HAS BEEN REMOVED FROM THE SPEC. * * <p>Returns the integer value, but will fail if the number has fraction digits. @@ -273,7 +290,11 @@ class U_I18N_API FixedDecimal: public IFixedDecimal, public UObject { * @param n the number, e.g. 12.345 * @param v The number of visible fraction digits, e.g. 3 * @param f The fraction digits, e.g. 345 + * @param e The exponent, e.g. 7 in 1.2e7, for scientific notation + * @param c Currently: an alias for param `e`. */ + FixedDecimal(double n, int32_t v, int64_t f, int32_t e, int32_t c); + FixedDecimal(double n, int32_t v, int64_t f, int32_t e); FixedDecimal(double n, int32_t v, int64_t f); FixedDecimal(double n, int32_t); explicit FixedDecimal(double n); @@ -282,6 +303,8 @@ class U_I18N_API FixedDecimal: public IFixedDecimal, public UObject { FixedDecimal(const UnicodeString &s, UErrorCode &ec); FixedDecimal(const FixedDecimal &other); + static FixedDecimal createWithExponent(double n, int32_t v, int32_t e); + double getPluralOperand(PluralOperand operand) const U_OVERRIDE; bool isNaN() const U_OVERRIDE; bool isInfinite() const U_OVERRIDE; @@ -291,19 +314,30 @@ class U_I18N_API FixedDecimal: public IFixedDecimal, public UObject { int32_t getVisibleFractionDigitCount() const; + void init(double n, int32_t v, int64_t f, int32_t e, int32_t c); + void init(double n, int32_t v, int64_t f, int32_t e); void init(double n, int32_t v, int64_t f); void init(double n); UBool quickInit(double n); // Try a fast-path only initialization, - // return TRUE if successful. + // return true if successful. void adjustForMinFractionDigits(int32_t min); static int64_t getFractionalDigits(double n, int32_t v); static int32_t decimals(double n); + FixedDecimal& operator=(const FixedDecimal& other) = default; + bool operator==(const FixedDecimal &other) const; + + UnicodeString toString() const; + + double doubleValue() const; + int64_t longValue() const; + double source; int32_t visibleDecimalDigitCount; int64_t decimalDigits; int64_t decimalDigitsWithoutTrailingZeros; int64_t intValue; + int32_t exponent; UBool _hasIntegerValue; UBool isNegative; UBool _isNaN; @@ -320,8 +354,8 @@ public: int32_t opNum = -1; // for mod expressions, the right operand of the mod. int32_t value = -1; // valid for 'is' rules only. UVector32 *rangeList = nullptr; // for 'in', 'within' rules. Null otherwise. - UBool negated = FALSE; // TRUE for negated rules. - UBool integerOnly = FALSE; // TRUE for 'within' rules. + UBool negated = false; // true for negated rules. + UBool integerOnly = false; // true for 'within' rules. tokenType digitsType = none; // n | i | v | f constraint. AndConstraint *next = nullptr; // Internal error status, used for errors that occur during the copy constructor. @@ -357,8 +391,8 @@ public: OrConstraint *ruleHeader = nullptr; UnicodeString fDecimalSamples; // Samples strings from rule source UnicodeString fIntegerSamples; // without @decimal or @integer, otherwise unprocessed. - UBool fDecimalSamplesUnbounded = FALSE; - UBool fIntegerSamplesUnbounded = FALSE; + UBool fDecimalSamplesUnbounded = false; + UBool fIntegerSamplesUnbounded = false; // Internal error status, used for errors that occur during the copy constructor. UErrorCode fInternalStatus = U_ZERO_ERROR; @@ -377,10 +411,10 @@ public: PluralKeywordEnumeration(RuleChain *header, UErrorCode& status); virtual ~PluralKeywordEnumeration(); static UClassID U_EXPORT2 getStaticClassID(void); - virtual UClassID getDynamicClassID(void) const; - virtual const UnicodeString* snext(UErrorCode& status); - virtual void reset(UErrorCode& status); - virtual int32_t count(UErrorCode& status) const; + virtual UClassID getDynamicClassID(void) const override; + virtual const UnicodeString* snext(UErrorCode& status) override; + virtual void reset(UErrorCode& status) override; + virtual int32_t count(UErrorCode& status) const override; private: int32_t pos; UVector fKeywordNames; @@ -391,9 +425,9 @@ class U_I18N_API PluralAvailableLocalesEnumeration: public StringEnumeration { public: PluralAvailableLocalesEnumeration(UErrorCode &status); virtual ~PluralAvailableLocalesEnumeration(); - virtual const char* next(int32_t *resultLength, UErrorCode& status); - virtual void reset(UErrorCode& status); - virtual int32_t count(UErrorCode& status) const; + virtual const char* next(int32_t *resultLength, UErrorCode& status) override; + virtual void reset(UErrorCode& status) override; + virtual int32_t count(UErrorCode& status) const override; private: UErrorCode fOpenStatus; UResourceBundle *fLocales = nullptr; diff --git a/contrib/libs/icu/i18n/quant.h b/contrib/libs/icu/i18n/quant.h index d5aa8e5eee..427a6b0480 100644 --- a/contrib/libs/icu/i18n/quant.h +++ b/contrib/libs/icu/i18n/quant.h @@ -39,13 +39,13 @@ class Quantifier : public UnicodeFunctor, public UnicodeMatcher { * and return the pointer. * @return the UnicodeMatcher pointer. */ - virtual UnicodeMatcher* toMatcher() const; + virtual UnicodeMatcher* toMatcher() const override; /** * Implement UnicodeFunctor * @return a copy of the object. */ - virtual Quantifier* clone() const; + virtual Quantifier* clone() const override; /** * Implement UnicodeMatcher @@ -62,17 +62,17 @@ class Quantifier : public UnicodeFunctor, public UnicodeMatcher { * considered for matching will be text.charAt(limit-1) in the * forward direction or text.charAt(limit+1) in the backward * direction. - * @param incremental if TRUE, then assume further characters may + * @param incremental if true, then assume further characters may * be inserted at limit and check for partial matching. Otherwise * assume the text as given is complete. * @return a match degree value indicating a full match, a partial - * match, or a mismatch. If incremental is FALSE then + * match, or a mismatch. If incremental is false then * U_PARTIAL_MATCH should never be returned. */ virtual UMatchDegree matches(const Replaceable& text, int32_t& offset, int32_t limit, - UBool incremental); + UBool incremental) override; /** * Implement UnicodeMatcher @@ -81,29 +81,29 @@ class Quantifier : public UnicodeFunctor, public UnicodeMatcher { * @return A reference to 'result'. */ virtual UnicodeString& toPattern(UnicodeString& result, - UBool escapeUnprintable = FALSE) const; + UBool escapeUnprintable = false) const override; /** * Implement UnicodeMatcher * @param v the given index value. * @return true if this rule matches the given index value. */ - virtual UBool matchesIndexValue(uint8_t v) const; + virtual UBool matchesIndexValue(uint8_t v) const override; /** * Implement UnicodeMatcher */ - virtual void addMatchSetTo(UnicodeSet& toUnionTo) const; + virtual void addMatchSetTo(UnicodeSet& toUnionTo) const override; /** * UnicodeFunctor API */ - virtual void setData(const TransliterationRuleData*); + virtual void setData(const TransliterationRuleData*) override; /** * ICU "poor man's RTTI", returns a UClassID for the actual class. */ - virtual UClassID getDynamicClassID() const; + virtual UClassID getDynamicClassID() const override; /** * ICU "poor man's RTTI", returns a UClassID for this class. diff --git a/contrib/libs/icu/i18n/quantityformatter.h b/contrib/libs/icu/i18n/quantityformatter.h index daaef4f060..841798cf88 100644 --- a/contrib/libs/icu/i18n/quantityformatter.h +++ b/contrib/libs/icu/i18n/quantityformatter.h @@ -74,18 +74,18 @@ public: * @param variant "zero", "one", "two", "few", "many", "other" * @param rawPattern the pattern for the variant e.g "{0} meters" * @param status any error returned here. - * @return TRUE on success; FALSE if status was set to a non zero error. + * @return true on success; false if status was set to a non zero error. */ UBool addIfAbsent(const char *variant, const UnicodeString &rawPattern, UErrorCode &status); /** - * returns TRUE if this object has at least the "other" variant. + * returns true if this object has at least the "other" variant. */ UBool isValid() const; /** * Gets the pattern formatter that would be used for a particular variant. - * If isValid() returns TRUE, this method is guaranteed to return a + * If isValid() returns true, this method is guaranteed to return a * non-NULL value. */ const SimpleFormatter *getByVariant(const char *variant) const; @@ -112,7 +112,7 @@ public: /** * Selects the standard plural form for the number/formatter/rules. - * TODO(13591): Remove this method. + * Used in MeasureFormat for backwards compatibility with NumberFormat. */ static StandardPlural::Form selectPlural( const Formattable &number, diff --git a/contrib/libs/icu/i18n/rbnf.cpp b/contrib/libs/icu/i18n/rbnf.cpp index 17319fb6d5..7f54fd7a33 100644 --- a/contrib/libs/icu/i18n/rbnf.cpp +++ b/contrib/libs/icu/i18n/rbnf.cpp @@ -99,8 +99,8 @@ public: return NULL; } - virtual UBool operator==(const LocalizationInfo* rhs) const; - inline UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); } + virtual bool operator==(const LocalizationInfo* rhs) const; + inline bool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); } virtual int32_t getNumberOfRuleSets(void) const = 0; virtual const UChar* getRuleSetName(int32_t index) const = 0; @@ -131,18 +131,18 @@ streq(const UChar* lhs, const UChar* rhs) { return FALSE; } -UBool +bool LocalizationInfo::operator==(const LocalizationInfo* rhs) const { if (rhs) { if (this == rhs) { - return TRUE; + return true; } int32_t rsc = getNumberOfRuleSets(); if (rsc == rhs->getNumberOfRuleSets()) { for (int i = 0; i < rsc; ++i) { if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) { - return FALSE; + return false; } } int32_t dlc = getNumberOfDisplayLocales(); @@ -152,19 +152,19 @@ LocalizationInfo::operator==(const LocalizationInfo* rhs) const { int32_t ix = rhs->indexForLocale(locale); // if no locale, ix is -1, getLocaleName returns null, so streq returns false if (!streq(locale, rhs->getLocaleName(ix))) { - return FALSE; + return false; } for (int j = 0; j < rsc; ++j) { if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) { - return FALSE; + return false; } } } - return TRUE; + return true; } } } - return FALSE; + return false; } int32_t @@ -271,11 +271,11 @@ public: static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status); virtual ~StringLocalizationInfo(); - virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; } - virtual const UChar* getRuleSetName(int32_t index) const; - virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; } - virtual const UChar* getLocaleName(int32_t index) const; - virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const; + virtual int32_t getNumberOfRuleSets(void) const override { return numRuleSets; } + virtual const UChar* getRuleSetName(int32_t index) const override; + virtual int32_t getNumberOfDisplayLocales(void) const override { return numLocales; } + virtual const UChar* getLocaleName(int32_t index) const override; + virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const override; // virtual UClassID getDynamicClassID() const; // static UClassID getStaticClassID(void); @@ -936,11 +936,11 @@ RuleBasedNumberFormat::clone() const return new RuleBasedNumberFormat(*this); } -UBool +bool RuleBasedNumberFormat::operator==(const Format& other) const { if (this == &other) { - return TRUE; + return true; } if (typeid(*this) == typeid(other)) { @@ -953,7 +953,7 @@ RuleBasedNumberFormat::operator==(const Format& other) const (localizations == NULL ? rhs.localizations == NULL : (rhs.localizations == NULL - ? FALSE + ? false : *localizations == rhs.localizations))) { NFRuleSet** p = fRuleSets; @@ -961,7 +961,7 @@ RuleBasedNumberFormat::operator==(const Format& other) const if (p == NULL) { return q == NULL; } else if (q == NULL) { - return FALSE; + return false; } while (*p && *q && (**p == **q)) { ++p; @@ -971,7 +971,7 @@ RuleBasedNumberFormat::operator==(const Format& other) const } } - return FALSE; + return false; } UnicodeString @@ -1077,7 +1077,7 @@ RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& locale return name; } - // trim trailing portion, skipping over ommitted sections + // trim trailing portion, skipping over omitted sections do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore while (len > 0 && localeStr[len-1] == 0x005F) --len; } @@ -1501,7 +1501,7 @@ RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* locali } // start by stripping the trailing whitespace from all the rules - // (this is all the whitespace follwing each semicolon in the + // (this is all the whitespace following each semicolon in the // description). This allows us to look for rule-set boundaries // by searching for ";%" without having to worry about whitespace // between the ; and the % diff --git a/contrib/libs/icu/i18n/rbt.cpp b/contrib/libs/icu/i18n/rbt.cpp index 4cef242e58..1de53e6071 100644 --- a/contrib/libs/icu/i18n/rbt.cpp +++ b/contrib/libs/icu/i18n/rbt.cpp @@ -62,7 +62,7 @@ void RuleBasedTransliterator::_construct(const UnicodeString& rules, * @param rules rules, separated by ';' * @param direction either FORWARD or REVERSE. * @param adoptedFilter the filter for this transliterator. - * @param parseError Struct to recieve information on position + * @param parseError Struct to receive information on position * of error if an error is encountered * @param status Output param set to success/failure code. * @exception IllegalArgumentException if rules are malformed @@ -101,7 +101,7 @@ RuleBasedTransliterator::RuleBasedTransliterator( }*/ /** - * Covenience constructor with no filter. + * Convenience constructor with no filter. */ /*RuleBasedTransliterator::RuleBasedTransliterator( const UnicodeString& id, @@ -114,7 +114,7 @@ RuleBasedTransliterator::RuleBasedTransliterator( }*/ /** - * Covenience constructor with no filter and FORWARD direction. + * Convenience constructor with no filter and FORWARD direction. */ /*RuleBasedTransliterator::RuleBasedTransliterator( const UnicodeString& id, @@ -126,7 +126,7 @@ RuleBasedTransliterator::RuleBasedTransliterator( }*/ /** - * Covenience constructor with FORWARD direction. + * Convenience constructor with FORWARD direction. */ /*RuleBasedTransliterator::RuleBasedTransliterator( const UnicodeString& id, diff --git a/contrib/libs/icu/i18n/rbt.h b/contrib/libs/icu/i18n/rbt.h index 97ef01e140..8a43c90d46 100644 --- a/contrib/libs/icu/i18n/rbt.h +++ b/contrib/libs/icu/i18n/rbt.h @@ -80,7 +80,7 @@ public: UErrorCode& status);*/ /** - * Covenience constructor with no filter. + * Convenience constructor with no filter. * @internal Use transliterator factory methods instead since this class will be removed in that release. */ /*RuleBasedTransliterator(const UnicodeString& id, @@ -89,7 +89,7 @@ public: UErrorCode& status);*/ /** - * Covenience constructor with no filter and FORWARD direction. + * Convenience constructor with no filter and FORWARD direction. * @internal Use transliterator factory methods instead since this class will be removed in that release. */ /*RuleBasedTransliterator(const UnicodeString& id, @@ -97,7 +97,7 @@ public: UErrorCode& status);*/ /** - * Covenience constructor with FORWARD direction. + * Convenience constructor with FORWARD direction. * @internal Use transliterator factory methods instead since this class will be removed in that release. */ /*RuleBasedTransliterator(const UnicodeString& id, @@ -108,7 +108,7 @@ private: friend class TransliteratorRegistry; // to access TransliterationRuleData convenience ctor /** - * Covenience constructor. + * Convenience constructor. * @param id the id for the transliterator. * @param theData the rule data for the transliterator. * @param adoptedFilter the filter for the transliterator @@ -144,7 +144,7 @@ public: * Implement Transliterator API. * @internal Use transliterator factory methods instead since this class will be removed in that release. */ - virtual RuleBasedTransliterator* clone() const; + virtual RuleBasedTransliterator* clone() const override; protected: /** @@ -152,7 +152,7 @@ protected: * @internal Use transliterator factory methods instead since this class will be removed in that release. */ virtual void handleTransliterate(Replaceable& text, UTransPosition& offsets, - UBool isIncremental) const; + UBool isIncremental) const override; public: /** @@ -161,26 +161,26 @@ public: * to construct a new transliterator. * @param result the string to receive the rules. Previous * contents will be deleted. - * @param escapeUnprintable if TRUE then convert unprintable + * @param escapeUnprintable if true then convert unprintable * character to their hex escape representations, \uxxxx or * \Uxxxxxxxx. Unprintable characters are those other than * U+000A, U+0020..U+007E. * @internal Use transliterator factory methods instead since this class will be removed in that release. */ virtual UnicodeString& toRules(UnicodeString& result, - UBool escapeUnprintable) const; + UBool escapeUnprintable) const override; protected: /** * Implement Transliterator framework */ - virtual void handleGetSourceSet(UnicodeSet& result) const; + virtual void handleGetSourceSet(UnicodeSet& result) const override; public: /** * Override Transliterator framework */ - virtual UnicodeSet& getTargetSet(UnicodeSet& result) const; + virtual UnicodeSet& getTargetSet(UnicodeSet& result) const override; /** * Return the class ID for this class. This is useful only for @@ -205,7 +205,7 @@ public: * class have the same class ID. Objects of other classes have * different class IDs. */ - virtual UClassID getDynamicClassID(void) const; + virtual UClassID getDynamicClassID(void) const override; private: diff --git a/contrib/libs/icu/i18n/rbt_data.cpp b/contrib/libs/icu/i18n/rbt_data.cpp index 4b596ac6c4..f3985fc768 100644 --- a/contrib/libs/icu/i18n/rbt_data.cpp +++ b/contrib/libs/icu/i18n/rbt_data.cpp @@ -72,7 +72,7 @@ TransliterationRuleData::TransliterationRuleData(const TransliterationRuleData& } } } - // Remove the array and exit if memory allocation error occured. + // Remove the array and exit if memory allocation error occurred. if (U_FAILURE(status)) { for (int32_t n = i-1; n >= 0; n--) { delete variables[n]; diff --git a/contrib/libs/icu/i18n/rbt_pars.cpp b/contrib/libs/icu/i18n/rbt_pars.cpp index 1ae5b81f03..2f207a8deb 100644 --- a/contrib/libs/icu/i18n/rbt_pars.cpp +++ b/contrib/libs/icu/i18n/rbt_pars.cpp @@ -148,12 +148,12 @@ public: virtual ~ParseData(); - virtual const UnicodeString* lookup(const UnicodeString& s) const; + virtual const UnicodeString* lookup(const UnicodeString& s) const override; - virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const; + virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const override; virtual UnicodeString parseReference(const UnicodeString& text, - ParsePosition& pos, int32_t limit) const; + ParsePosition& pos, int32_t limit) const override; /** * Return true if the given character is a matcher standin or a plain * character (non standin). @@ -945,7 +945,7 @@ void TransliteratorParser::parseRules(const UnicodeString& rule, if (c == RULE_COMMENT_CHAR) { pos = rule.indexOf((UChar)0x000A /*\n*/, pos) + 1; if (pos == 0) { - break; // No "\n" found; rest of rule is a commnet + break; // No "\n" found; rest of rule is a comment } continue; // Either fall out or restart with next line } @@ -975,10 +975,14 @@ void TransliteratorParser::parseRules(const UnicodeString& rule, if (!parsingIDs) { if (curData != NULL) { + U_ASSERT(!dataVector.hasDeleter()); if (direction == UTRANS_FORWARD) dataVector.addElement(curData, status); else dataVector.insertElementAt(curData, 0, status); + if (U_FAILURE(status)) { + delete curData; + } curData = NULL; } parsingIDs = TRUE; @@ -1031,10 +1035,14 @@ void TransliteratorParser::parseRules(const UnicodeString& rule, status = U_MEMORY_ALLOCATION_ERROR; return; } + U_ASSERT(idBlockVector.hasDeleter()); if (direction == UTRANS_FORWARD) - idBlockVector.addElement(tempstr, status); + idBlockVector.adoptElement(tempstr, status); else idBlockVector.insertElementAt(tempstr, 0, status); + if (U_FAILURE(status)) { + return; + } idBlockResult.remove(); parsingIDs = FALSE; curData = new TransliterationRuleData(status); @@ -1069,19 +1077,29 @@ void TransliteratorParser::parseRules(const UnicodeString& rule, tempstr = new UnicodeString(idBlockResult); // NULL pointer check if (tempstr == NULL) { + // TODO: Testing, forcing this path, shows many memory leaks. ICU-21701 + // intltest translit/TransliteratorTest/TestInstantiation status = U_MEMORY_ALLOCATION_ERROR; return; } if (direction == UTRANS_FORWARD) - idBlockVector.addElement(tempstr, status); + idBlockVector.adoptElement(tempstr, status); else idBlockVector.insertElementAt(tempstr, 0, status); + if (U_FAILURE(status)) { + return; + } } else if (!parsingIDs && curData != NULL) { - if (direction == UTRANS_FORWARD) + if (direction == UTRANS_FORWARD) { dataVector.addElement(curData, status); - else + } else { dataVector.insertElementAt(curData, 0, status); + } + if (U_FAILURE(status)) { + delete curData; + curData = nullptr; + } } if (U_SUCCESS(status)) { @@ -1159,7 +1177,7 @@ void TransliteratorParser::setVariableRange(int32_t start, int32_t end, UErrorCo /** * Assert that the given character is NOT within the variable range. - * If it is, return FALSE. This is neccesary to ensure that the + * If it is, return FALSE. This is necessary to ensure that the * variable range does not overlap characters used in a rule. */ UBool TransliteratorParser::checkVariableRange(UChar32 ch) const { @@ -1538,6 +1556,10 @@ UChar TransliteratorParser::generateStandInFor(UnicodeFunctor* adopted, UErrorCo return 0; } variablesVector.addElement(adopted, status); + if (U_FAILURE(status)) { + delete adopted; + return 0; + } return variableNext++; } @@ -1557,7 +1579,7 @@ UChar TransliteratorParser::getSegmentStandin(int32_t seg, UErrorCode& status) { return 0; } c = variableNext++; - // Set a placeholder in the master variables vector that will be + // Set a placeholder in the primary variables vector that will be // filled in later by setSegmentObject(). We know that we will get // called first because setSegmentObject() will call us. variablesVector.addElement((void*) NULL, status); @@ -1577,13 +1599,17 @@ void TransliteratorParser::setSegmentObject(int32_t seg, StringMatcher* adopted, if (segmentObjects.size() < seg) { segmentObjects.setSize(seg, status); } + if (U_FAILURE(status)) { + return; + } int32_t index = getSegmentStandin(seg, status) - curData->variablesBase; if (segmentObjects.elementAt(seg-1) != NULL || variablesVector.elementAt(index) != NULL) { // should never happen - status = U_INTERNAL_TRANSLITERATOR_ERROR; + if (U_SUCCESS(status)) {status = U_INTERNAL_TRANSLITERATOR_ERROR;} return; } + // Note: neither segmentObjects or variablesVector has an object deleter function. segmentObjects.setElementAt(adopted, seg-1); variablesVector.setElementAt(adopted, index); } diff --git a/contrib/libs/icu/i18n/rbt_pars.h b/contrib/libs/icu/i18n/rbt_pars.h index 61ce9727e0..d1a4cd6997 100644 --- a/contrib/libs/icu/i18n/rbt_pars.h +++ b/contrib/libs/icu/i18n/rbt_pars.h @@ -156,7 +156,7 @@ public: * call returns. * @param rules rules, separated by ';' * @param direction either FORWARD or REVERSE. - * @param pe Struct to recieve information on position + * @param pe Struct to receive information on position * of error if an error is encountered * @param ec Output param set to success/failure code. */ @@ -210,7 +210,7 @@ private: /** * Assert that the given character is NOT within the variable range. - * If it is, return FALSE. This is neccesary to ensure that the + * If it is, return false. This is necessary to ensure that the * variable range does not overlap characters used in a rule. * @param ch the given character. * @return True, if the given character is NOT within the variable range. diff --git a/contrib/libs/icu/i18n/rbt_rule.h b/contrib/libs/icu/i18n/rbt_rule.h index 5501981266..b927f5d6c0 100644 --- a/contrib/libs/icu/i18n/rbt_rule.h +++ b/contrib/libs/icu/i18n/rbt_rule.h @@ -172,9 +172,9 @@ public: * segments, or null if there are none. The array itself is adopted, * but the pointers within it are not. * @param segsCount number of elements in segs[]. - * @param anchorStart TRUE if the the rule is anchored on the left to + * @param anchorStart true if the the rule is anchored on the left to * the context start. - * @param anchorEnd TRUE if the rule is anchored on the right to the + * @param anchorEnd true if the rule is anchored on the right to the * context limit. * @param data the rule data. * @param status Output parameter filled in with success or failure status. @@ -267,11 +267,11 @@ public: * * @param text the text * @param pos the position indices - * @param incremental if TRUE, test for partial matches that may + * @param incremental if true, test for partial matches that may * be completed by additional text inserted at pos.limit. * @return one of <code>U_MISMATCH</code>, * <code>U_PARTIAL_MATCH</code>, or <code>U_MATCH</code>. If - * incremental is FALSE then U_PARTIAL_MATCH will not be returned. + * incremental is false then U_PARTIAL_MATCH will not be returned. */ UMatchDegree matchAndReplace(Replaceable& text, UTransPosition& pos, diff --git a/contrib/libs/icu/i18n/rbt_set.cpp b/contrib/libs/icu/i18n/rbt_set.cpp index d8d0384dda..abc4413c2c 100644 --- a/contrib/libs/icu/i18n/rbt_set.cpp +++ b/contrib/libs/icu/i18n/rbt_set.cpp @@ -197,7 +197,7 @@ TransliterationRuleSet::TransliterationRuleSet(const TransliterationRuleSet& oth status = U_MEMORY_ALLOCATION_ERROR; break; } - ruleVector->addElement(tempTranslitRule, status); + ruleVector->addElementX(tempTranslitRule, status); if (U_FAILURE(status)) { break; } @@ -251,7 +251,7 @@ void TransliterationRuleSet::addRule(TransliterationRule* adoptedRule, delete adoptedRule; return; } - ruleVector->addElement(adoptedRule, status); + ruleVector->addElementX(adoptedRule, status); int32_t len; if ((len = adoptedRule->getContextLength()) > maxContextLength) { @@ -316,16 +316,16 @@ void TransliterationRuleSet::freeze(UParseError& parseError,UErrorCode& status) for (j=0; j<n; ++j) { if (indexValue[j] >= 0) { if (indexValue[j] == x) { - v.addElement(ruleVector->elementAt(j), status); + v.addElementX(ruleVector->elementAt(j), status); } } else { // If the indexValue is < 0, then the first key character is // a set, and we must use the more time-consuming // matchesIndexValue check. In practice this happens - // rarely, so we seldom tread this code path. + // rarely, so we seldom treat this code path. TransliterationRule* r = (TransliterationRule*) ruleVector->elementAt(j); if (r->matchesIndexValue((uint8_t)x)) { - v.addElement(r, status); + v.addElementX(r, status); } } } @@ -396,7 +396,7 @@ void TransliterationRuleSet::freeze(UParseError& parseError,UErrorCode& status) * @param text the text to be transliterated * @param pos the position indices, which will be updated * @param incremental if TRUE, assume new text may be inserted - * at index.limit, and return FALSE if thre is a partial match. + * at index.limit, and return FALSE if there is a partial match. * @return TRUE unless a U_PARTIAL_MATCH has been obtained, * indicating that transliteration should stop until more text * arrives. diff --git a/contrib/libs/icu/i18n/rbt_set.h b/contrib/libs/icu/i18n/rbt_set.h index b4b46786bf..3a2890e8ec 100644 --- a/contrib/libs/icu/i18n/rbt_set.h +++ b/contrib/libs/icu/i18n/rbt_set.h @@ -123,14 +123,14 @@ public: /** * Transliterate the given text with the given UTransPosition - * indices. Return TRUE if the transliteration should continue - * or FALSE if it should halt (because of a U_PARTIAL_MATCH match). - * Note that FALSE is only ever returned if isIncremental is TRUE. + * indices. Return true if the transliteration should continue + * or false if it should halt (because of a U_PARTIAL_MATCH match). + * Note that false is only ever returned if isIncremental is true. * @param text the text to be transliterated * @param index the position indices, which will be updated - * @param isIncremental if TRUE, assume new text may be inserted - * at index.limit, and return FALSE if thre is a partial match. - * @return TRUE unless a U_PARTIAL_MATCH has been obtained, + * @param isIncremental if true, assume new text may be inserted + * at index.limit, and return false if thrre is a partial match. + * @return true unless a U_PARTIAL_MATCH has been obtained, * indicating that transliteration should stop until more text * arrives. */ diff --git a/contrib/libs/icu/i18n/rbtz.cpp b/contrib/libs/icu/i18n/rbtz.cpp index 3249a32a1c..495d8310d0 100644 --- a/contrib/libs/icu/i18n/rbtz.cpp +++ b/contrib/libs/icu/i18n/rbtz.cpp @@ -25,12 +25,19 @@ U_NAMESPACE_BEGIN /** * A struct representing a time zone transition */ -struct Transition { +struct Transition : public UMemory { UDate time; TimeZoneRule* from; TimeZoneRule* to; }; +U_CDECL_BEGIN +static void U_CALLCONV +deleteTransition(void* obj) { + delete static_cast<Transition *>(obj); +} +U_CDECL_END + static UBool compareRules(UVector* rules1, UVector* rules2) { if (rules1 == NULL && rules2 == NULL) { return TRUE; @@ -88,59 +95,61 @@ RuleBasedTimeZone::operator=(const RuleBasedTimeZone& right) { return *this; } -UBool +bool RuleBasedTimeZone::operator==(const TimeZone& that) const { if (this == &that) { - return TRUE; + return true; } - if (typeid(*this) != typeid(that) - || BasicTimeZone::operator==(that) == FALSE) { - return FALSE; + if (typeid(*this) != typeid(that) || !BasicTimeZone::operator==(that)) { + return false; } RuleBasedTimeZone *rbtz = (RuleBasedTimeZone*)&that; if (*fInitialRule != *(rbtz->fInitialRule)) { - return FALSE; + return false; } if (compareRules(fHistoricRules, rbtz->fHistoricRules) && compareRules(fFinalRules, rbtz->fFinalRules)) { - return TRUE; + return true; } - return FALSE; + return false; } -UBool +bool RuleBasedTimeZone::operator!=(const TimeZone& that) const { return !operator==(that); } void RuleBasedTimeZone::addTransitionRule(TimeZoneRule* rule, UErrorCode& status) { + LocalPointer<TimeZoneRule>lpRule(rule); if (U_FAILURE(status)) { return; } AnnualTimeZoneRule* atzrule = dynamic_cast<AnnualTimeZoneRule*>(rule); - if (atzrule != NULL && atzrule->getEndYear() == AnnualTimeZoneRule::MAX_YEAR) { + if (atzrule != nullptr && atzrule->getEndYear() == AnnualTimeZoneRule::MAX_YEAR) { // A final rule - if (fFinalRules == NULL) { - fFinalRules = new UVector(status); + if (fFinalRules == nullptr) { + LocalPointer<UVector> lpFinalRules(new UVector(uprv_deleteUObject, nullptr, status), status); if (U_FAILURE(status)) { return; } + fFinalRules = lpFinalRules.orphan(); } else if (fFinalRules->size() >= 2) { // Cannot handle more than two final rules status = U_INVALID_STATE_ERROR; return; } - fFinalRules->addElement((void*)rule, status); + fFinalRules->adoptElement(lpRule.orphan(), status); } else { // Non-final rule - if (fHistoricRules == NULL) { - fHistoricRules = new UVector(status); + if (fHistoricRules == nullptr) { + LocalPointer<UVector> lpHistoricRules(new UVector(uprv_deleteUObject, nullptr, status), status); if (U_FAILURE(status)) { return; } + fHistoricRules = lpHistoricRules.orphan(); } - fHistoricRules->addElement((void*)rule, status); + fHistoricRules->adoptElement(lpRule.orphan(), status); } // Mark dirty, so transitions are recalculated at next complete() call fUpToDate = FALSE; @@ -176,7 +185,6 @@ RuleBasedTimeZone::complete(UErrorCode& status) { return; } - UBool *done = NULL; // Create a TimezoneTransition and add to the list if (fHistoricRules != NULL || fFinalRules != NULL) { TimeZoneRule *curRule = fInitialRule; @@ -187,13 +195,13 @@ RuleBasedTimeZone::complete(UErrorCode& status) { if (fHistoricRules != NULL && fHistoricRules->size() > 0) { int32_t i; int32_t historicCount = fHistoricRules->size(); - done = (UBool*)uprv_malloc(sizeof(UBool) * historicCount); + LocalMemory<bool> done((bool *)uprv_malloc(sizeof(bool) * historicCount)); if (done == NULL) { status = U_MEMORY_ALLOCATION_ERROR; goto cleanup; } for (i = 0; i < historicCount; i++) { - done[i] = FALSE; + done[i] = false; } while (TRUE) { int32_t curStdOffset = curRule->getRawOffset(); @@ -214,7 +222,7 @@ RuleBasedTimeZone::complete(UErrorCode& status) { avail = r->getNextStart(lastTransitionTime, curStdOffset, curDstSavings, false, tt); if (!avail) { // No more transitions from this rule - skip this rule next time - done[i] = TRUE; + done[i] = true; } else { r->getName(name); if (*r == *curRule || @@ -267,20 +275,21 @@ RuleBasedTimeZone::complete(UErrorCode& status) { } if (fHistoricTransitions == NULL) { - fHistoricTransitions = new UVector(status); + LocalPointer<UVector> lpHistoricTransitions( + new UVector(deleteTransition, nullptr, status), status); if (U_FAILURE(status)) { goto cleanup; } + fHistoricTransitions = lpHistoricTransitions.orphan(); } - Transition *trst = (Transition*)uprv_malloc(sizeof(Transition)); - if (trst == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; + LocalPointer<Transition> trst(new Transition, status); + if (U_FAILURE(status)) { goto cleanup; } trst->time = nextTransitionTime; trst->from = curRule; trst->to = nextRule; - fHistoricTransitions->addElement(trst, status); + fHistoricTransitions->adoptElement(trst.orphan(), status); if (U_FAILURE(status)) { goto cleanup; } @@ -290,10 +299,12 @@ RuleBasedTimeZone::complete(UErrorCode& status) { } if (fFinalRules != NULL) { if (fHistoricTransitions == NULL) { - fHistoricTransitions = new UVector(status); + LocalPointer<UVector> lpHistoricTransitions( + new UVector(deleteTransition, nullptr, status), status); if (U_FAILURE(status)) { goto cleanup; } + fHistoricTransitions = lpHistoricTransitions.orphan(); } // Append the first transition for each TimeZoneRule *rule0 = (TimeZoneRule*)fFinalRules->elementAt(0); @@ -306,16 +317,10 @@ RuleBasedTimeZone::complete(UErrorCode& status) { status = U_INVALID_STATE_ERROR; goto cleanup; } - Transition *final0 = (Transition*)uprv_malloc(sizeof(Transition)); - if (final0 == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - goto cleanup; - } - Transition *final1 = (Transition*)uprv_malloc(sizeof(Transition)); - if (final1 == NULL) { - uprv_free(final0); - status = U_MEMORY_ALLOCATION_ERROR; - goto cleanup; + LocalPointer<Transition> final0(new Transition, status); + LocalPointer<Transition> final1(new Transition, status); + if (U_FAILURE(status)) { + goto cleanup; } if (tt0 < tt1) { final0->time = tt0; @@ -332,27 +337,18 @@ RuleBasedTimeZone::complete(UErrorCode& status) { final1->from = rule1; final1->to = rule0; } - fHistoricTransitions->addElement(final0, status); - if (U_FAILURE(status)) { - goto cleanup; - } - fHistoricTransitions->addElement(final1, status); + fHistoricTransitions->adoptElement(final0.orphan(), status); + fHistoricTransitions->adoptElement(final1.orphan(), status); if (U_FAILURE(status)) { goto cleanup; } } } fUpToDate = TRUE; - if (done != NULL) { - uprv_free(done); - } return; cleanup: deleteTransitions(); - if (done != NULL) { - uprv_free(done); - } fUpToDate = FALSE; } @@ -403,9 +399,9 @@ RuleBasedTimeZone::getOffset(UDate date, UBool local, int32_t& rawOffset, getOffsetInternal(date, local, kFormer, kLatter, rawOffset, dstOffset, status); } -void -RuleBasedTimeZone::getOffsetFromLocal(UDate date, int32_t nonExistingTimeOpt, int32_t duplicatedTimeOpt, - int32_t& rawOffset, int32_t& dstOffset, UErrorCode& status) const { +void RuleBasedTimeZone::getOffsetFromLocal(UDate date, UTimeZoneLocalOption nonExistingTimeOpt, + UTimeZoneLocalOption duplicatedTimeOpt, + int32_t& rawOffset, int32_t& dstOffset, UErrorCode& status) const { getOffsetInternal(date, TRUE, nonExistingTimeOpt, duplicatedTimeOpt, rawOffset, dstOffset, status); } @@ -629,16 +625,10 @@ RuleBasedTimeZone::deleteRules(void) { delete fInitialRule; fInitialRule = NULL; if (fHistoricRules != NULL) { - while (!fHistoricRules->isEmpty()) { - delete (TimeZoneRule*)(fHistoricRules->orphanElementAt(0)); - } delete fHistoricRules; fHistoricRules = NULL; } if (fFinalRules != NULL) { - while (!fFinalRules->isEmpty()) { - delete (AnnualTimeZoneRule*)(fFinalRules->orphanElementAt(0)); - } delete fFinalRules; fFinalRules = NULL; } @@ -647,10 +637,6 @@ RuleBasedTimeZone::deleteRules(void) { void RuleBasedTimeZone::deleteTransitions(void) { if (fHistoricTransitions != NULL) { - while (!fHistoricTransitions->isEmpty()) { - Transition *trs = (Transition*)fHistoricTransitions->orphanElementAt(0); - uprv_free(trs); - } delete fHistoricTransitions; } fHistoricTransitions = NULL; @@ -658,32 +644,24 @@ RuleBasedTimeZone::deleteTransitions(void) { UVector* RuleBasedTimeZone::copyRules(UVector* source) { - if (source == NULL) { - return NULL; + if (source == nullptr) { + return nullptr; } UErrorCode ec = U_ZERO_ERROR; int32_t size = source->size(); - UVector *rules = new UVector(size, ec); + LocalPointer<UVector> rules(new UVector(uprv_deleteUObject, nullptr, size, ec), ec); if (U_FAILURE(ec)) { - return NULL; + return nullptr; } int32_t i; for (i = 0; i < size; i++) { - rules->addElement(((TimeZoneRule*)source->elementAt(i))->clone(), ec); + LocalPointer<TimeZoneRule> rule(((TimeZoneRule*)source->elementAt(i))->clone(), ec); + rules->adoptElement(rule.orphan(), ec); if (U_FAILURE(ec)) { - break; + return nullptr; } } - if (U_FAILURE(ec)) { - // In case of error, clean up - for (i = 0; i < rules->size(); i++) { - TimeZoneRule *rule = (TimeZoneRule*)rules->orphanElementAt(i); - delete rule; - } - delete rules; - return NULL; - } - return rules; + return rules.orphan(); } TimeZoneRule* diff --git a/contrib/libs/icu/i18n/regexcmp.cpp b/contrib/libs/icu/i18n/regexcmp.cpp index dd777b7538..89cb658425 100644 --- a/contrib/libs/icu/i18n/regexcmp.cpp +++ b/contrib/libs/icu/i18n/regexcmp.cpp @@ -53,7 +53,7 @@ U_NAMESPACE_BEGIN // //------------------------------------------------------------------------------ RegexCompile::RegexCompile(RegexPattern *rxp, UErrorCode &status) : - fParenStack(status), fSetStack(status), fSetOpStack(status) + fParenStack(status), fSetStack(uprv_deleteUObject, nullptr, status), fSetOpStack(status) { // Lazy init of all shared global sets (needed for init()'s empty text) RegexStaticSets::initGlobals(&status); @@ -278,11 +278,6 @@ void RegexCompile::compile( if (U_FAILURE(*fStatus)) { // Bail out if the pattern had errors. - // Set stack cleanup: a successful compile would have left it empty, - // but errors can leave temporary sets hanging around. - while (!fSetStack.empty()) { - delete (UnicodeSet *)fSetStack.pop(); - } return; } @@ -473,7 +468,7 @@ UBool RegexCompile::doParseActions(int32_t action) appendOp(URX_START_CAPTURE, varsLoc); appendOp(URX_NOP, 0); - // On the Parentheses stack, start a new frame and add the postions + // On the Parentheses stack, start a new frame and add the positions // of the two NOPs. Depending on what follows in the pattern, the // NOPs may be changed to SAVE_STATE or JMP ops, with a target // address of the end of the parenthesized group. @@ -515,7 +510,7 @@ UBool RegexCompile::doParseActions(int32_t action) appendOp(URX_NOP, 0); appendOp(URX_NOP, 0); - // On the Parentheses stack, start a new frame and add the postions + // On the Parentheses stack, start a new frame and add the positions // of the two NOPs. fParenStack.push(fModeFlags, *fStatus); // Match mode state fParenStack.push(plain, *fStatus); // Begin a new frame. @@ -540,7 +535,7 @@ UBool RegexCompile::doParseActions(int32_t action) appendOp(URX_STO_SP, varLoc); appendOp(URX_NOP, 0); - // On the Parentheses stack, start a new frame and add the postions + // On the Parentheses stack, start a new frame and add the positions // of the two NOPs. Depending on what follows in the pattern, the // NOPs may be changed to SAVE_STATE or JMP ops, with a target // address of the end of the parenthesized group. @@ -557,7 +552,7 @@ UBool RegexCompile::doParseActions(int32_t action) // // Note: Addition of transparent input regions, with the need to // restore the original regions when failing out of a lookahead - // block, complicated this sequence. Some conbined opcodes + // block, complicated this sequence. Some combined opcodes // might make sense - or might not, lookahead aren't that common. // // Caution: min match length optimization knows about this @@ -594,7 +589,7 @@ UBool RegexCompile::doParseActions(int32_t action) appendOp(URX_NOP, 0); appendOp(URX_NOP, 0); - // On the Parentheses stack, start a new frame and add the postions + // On the Parentheses stack, start a new frame and add the positions // of the NOPs. fParenStack.push(fModeFlags, *fStatus); // Match mode state fParenStack.push(lookAhead, *fStatus); // Frame type. @@ -627,7 +622,7 @@ UBool RegexCompile::doParseActions(int32_t action) appendOp(URX_STATE_SAVE, 0); // dest address will be patched later. appendOp(URX_NOP, 0); - // On the Parentheses stack, start a new frame and add the postions + // On the Parentheses stack, start a new frame and add the positions // of the StateSave and NOP. fParenStack.push(fModeFlags, *fStatus); // Match mode state fParenStack.push(negLookAhead, *fStatus); // Frame type @@ -679,7 +674,7 @@ UBool RegexCompile::doParseActions(int32_t action) appendOp(URX_NOP, 0); appendOp(URX_NOP, 0); - // On the Parentheses stack, start a new frame and add the postions + // On the Parentheses stack, start a new frame and add the positions // of the URX_LB_CONT and the NOP. fParenStack.push(fModeFlags, *fStatus); // Match mode state fParenStack.push(lookBehind, *fStatus); // Frame type @@ -734,7 +729,7 @@ UBool RegexCompile::doParseActions(int32_t action) appendOp(URX_NOP, 0); appendOp(URX_NOP, 0); - // On the Parentheses stack, start a new frame and add the postions + // On the Parentheses stack, start a new frame and add the positions // of the URX_LB_CONT and the NOP. fParenStack.push(fModeFlags, *fStatus); // Match mode state fParenStack.push(lookBehindN, *fStatus); // Frame type @@ -748,7 +743,7 @@ UBool RegexCompile::doParseActions(int32_t action) case doConditionalExpr: // Conditionals such as (?(1)a:b) case doPerlInline: - // Perl inline-condtionals. (?{perl code}a|b) We're not perl, no way to do them. + // Perl inline-conditionals. (?{perl code}a|b) We're not perl, no way to do them. error(U_REGEX_UNIMPLEMENTED); break; @@ -1009,7 +1004,7 @@ UBool RegexCompile::doParseActions(int32_t action) case doIntervalInit: // The '{' opening an interval quantifier was just scanned. - // Init the counter varaiables that will accumulate the values as the digits + // Init the counter variables that will accumulate the values as the digits // are scanned. fIntervalLow = 0; fIntervalUpper = -1; @@ -1485,8 +1480,8 @@ UBool RegexCompile::doParseActions(int32_t action) case 0x78: /* 'x' */ bit = UREGEX_COMMENTS; break; case 0x2d: /* '-' */ fSetModeFlag = FALSE; break; default: - UPRV_UNREACHABLE; // Should never happen. Other chars are filtered out - // by the scanner. + UPRV_UNREACHABLE_EXIT; // Should never happen. Other chars are filtered out + // by the scanner. } if (fSetModeFlag) { fNewModeFlags |= bit; @@ -1522,9 +1517,9 @@ UBool RegexCompile::doParseActions(int32_t action) appendOp(URX_NOP, 0); appendOp(URX_NOP, 0); - // On the Parentheses stack, start a new frame and add the postions + // On the Parentheses stack, start a new frame and add the positions // of the two NOPs (a normal non-capturing () frame, except for the - // saving of the orignal mode flags.) + // saving of the original mode flags.) fParenStack.push(fModeFlags, *fStatus); fParenStack.push(flags, *fStatus); // Frame Marker fParenStack.push(fRXPat->fCompiledPat->size()-2, *fStatus); // The first NOP @@ -1656,13 +1651,16 @@ UBool RegexCompile::doParseActions(int32_t action) } case doSetBegin: - fixLiterals(FALSE); - fSetStack.push(new UnicodeSet(), *fStatus); - fSetOpStack.push(setStart, *fStatus); - if ((fModeFlags & UREGEX_CASE_INSENSITIVE) != 0) { - fSetOpStack.push(setCaseClose, *fStatus); + { + fixLiterals(FALSE); + LocalPointer<UnicodeSet> lpSet(new UnicodeSet(), *fStatus); + fSetStack.push(lpSet.orphan(), *fStatus); + fSetOpStack.push(setStart, *fStatus); + if ((fModeFlags & UREGEX_CASE_INSENSITIVE) != 0) { + fSetOpStack.push(setCaseClose, *fStatus); + } + break; } - break; case doSetBeginDifference1: // We have scanned something like [[abc]-[ @@ -1860,7 +1858,7 @@ UBool RegexCompile::doParseActions(int32_t action) } default: - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } if (U_FAILURE(*fStatus)) { @@ -1967,17 +1965,17 @@ int32_t RegexCompile::buildOp(int32_t type, int32_t val) { return 0; } if (type < 0 || type > 255) { - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } if (val > 0x00ffffff) { - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } if (val < 0) { if (!(type == URX_RESERVED_OP_N || type == URX_RESERVED_OP)) { - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } if (URX_TYPE(val) != 0xff) { - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } type = URX_RESERVED_OP_N; } @@ -2373,7 +2371,7 @@ void RegexCompile::handleCloseParen() { default: - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } // remember the next location in the compiled pattern. @@ -2396,8 +2394,8 @@ void RegexCompile::compileSet(UnicodeSet *theSet) return; } // Remove any strings from the set. - // There shoudn't be any, but just in case. - // (Case Closure can add them; if we had a simple case closure avaialble that + // There shouldn't be any, but just in case. + // (Case Closure can add them; if we had a simple case closure available that // ignored strings, that would be better.) theSet->removeAllStrings(); int32_t setSize = theSet->size(); @@ -2428,7 +2426,11 @@ void RegexCompile::compileSet(UnicodeSet *theSet) theSet->freeze(); int32_t setNumber = fRXPat->fSets->size(); fRXPat->fSets->addElement(theSet, *fStatus); - appendOp(URX_SETREF, setNumber); + if (U_SUCCESS(*fStatus)) { + appendOp(URX_SETREF, setNumber); + } else { + delete theSet; + } } } } @@ -2485,7 +2487,7 @@ void RegexCompile::compileInterval(int32_t InitOp, int32_t LoopOp) fRXPat->fCompiledPat->setElementAt(fIntervalLow, topOfBlock+2); fRXPat->fCompiledPat->setElementAt(fIntervalUpper, topOfBlock+3); - // Apend the CTR_LOOP op. The operand is the location of the CTR_INIT op. + // Append the CTR_LOOP op. The operand is the location of the CTR_INIT op. // Goes at end of the block being looped over, so just append to the code so far. appendOp(LoopOp, topOfBlock); @@ -2579,7 +2581,7 @@ UBool RegexCompile::compileInlineInterval() { // The pattern could match a string beginning with a German sharp-s // // To the ordinary case closure for a character c, we add all other -// characters cx where the case closure of cx incudes a string form that begins +// characters cx where the case closure of cx includes a string form that begins // with the original character c. // // This function could be made smarter. The full pattern string is available @@ -2593,7 +2595,8 @@ void RegexCompile::findCaseInsensitiveStarters(UChar32 c, UnicodeSet *starterCh // Machine Generated below. // It may need updating with new versions of Unicode. // Intltest test RegexTest::TestCaseInsensitiveStarters will fail if an update is needed. -// The update tool is here: svn+ssh://source.icu-project.org/repos/icu/tools/trunk/unicode/c/genregexcasing +// The update tool is here: +// https://github.com/unicode-org/icu/tree/main/tools/unicode/c/genregexcasing // Machine Generated Data. Do not hand edit. static const UChar32 RECaseFixCodePoints[] = { @@ -2634,7 +2637,7 @@ void RegexCompile::findCaseInsensitiveStarters(UChar32 c, UnicodeSet *starterCh if (c < UCHAR_MIN_VALUE || c > UCHAR_MAX_VALUE) { // This function should never be called with an invalid input character. - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } else if (u_hasBinaryProperty(c, UCHAR_CASE_SENSITIVE)) { UChar32 caseFoldedC = u_foldCase(c, U_FOLD_CASE_DEFAULT); starterChars->set(caseFoldedC, caseFoldedC); @@ -2919,7 +2922,7 @@ void RegexCompile::matchStartType() { break; - case URX_BACKSLASH_X: // Grahpeme Cluster. Minimum is 1, max unbounded. + case URX_BACKSLASH_X: // Grapheme Cluster. Minimum is 1, max unbounded. case URX_DOTANY_ALL: // . matches one or two. case URX_DOTANY: case URX_DOTANY_UNIX: @@ -3127,10 +3130,10 @@ void RegexCompile::matchStartType() { case URX_LB_END: case URX_LBN_CONT: case URX_LBN_END: - UPRV_UNREACHABLE; // Shouldn't get here. These ops should be - // consumed by the scan in URX_LA_START and LB_START + UPRV_UNREACHABLE_EXIT; // Shouldn't get here. These ops should be + // consumed by the scan in URX_LA_START and LB_START default: - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } } @@ -3286,7 +3289,7 @@ int32_t RegexCompile::minMatchLength(int32_t start, int32_t end) { case URX_BACKSLASH_R: case URX_BACKSLASH_V: case URX_ONECHAR_I: - case URX_BACKSLASH_X: // Grahpeme Cluster. Minimum is 1, max unbounded. + case URX_BACKSLASH_X: // Grapheme Cluster. Minimum is 1, max unbounded. case URX_DOTANY_ALL: // . matches one or two. case URX_DOTANY: case URX_DOTANY_UNIX: @@ -3406,7 +3409,7 @@ int32_t RegexCompile::minMatchLength(int32_t start, int32_t end) { loc++; op = (int32_t)fRXPat->fCompiledPat->elementAti(loc); if (URX_TYPE(op) == URX_LA_START) { - // The boilerplate for look-ahead includes two LA_END insturctions, + // The boilerplate for look-ahead includes two LA_END instructions, // Depth will be decremented by each one when it is seen. depth += 2; } @@ -3450,7 +3453,7 @@ int32_t RegexCompile::minMatchLength(int32_t start, int32_t end) { break; default: - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } } @@ -3475,6 +3478,9 @@ int32_t RegexCompile::minMatchLength(int32_t start, int32_t end) { // value may be longer than the actual maximum; it must // never be shorter. // +// start, end: the range of the pattern to check. +// end is inclusive. +// //------------------------------------------------------------------------------ int32_t RegexCompile::maxMatchLength(int32_t start, int32_t end) { if (U_FAILURE(*fStatus)) { @@ -3543,7 +3549,7 @@ int32_t RegexCompile::maxMatchLength(int32_t start, int32_t end) { // Call the max length unbounded, and stop further checking. case URX_BACKREF: // BackRef. Must assume that it might be a zero length match case URX_BACKREF_I: - case URX_BACKSLASH_X: // Grahpeme Cluster. Minimum is 1, max unbounded. + case URX_BACKSLASH_X: // Grapheme Cluster. Minimum is 1, max unbounded. currentLen = INT32_MAX; break; @@ -3693,7 +3699,7 @@ int32_t RegexCompile::maxMatchLength(int32_t start, int32_t end) { case URX_CTR_LOOP_NG: // These opcodes will be skipped over by code for URX_CTR_INIT. // We shouldn't encounter them here. - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; case URX_LOOP_SR_I: case URX_LOOP_DOT_I: @@ -3713,26 +3719,26 @@ int32_t RegexCompile::maxMatchLength(int32_t start, int32_t end) { // End of look-ahead ops should always be consumed by the processing at // the URX_LA_START op. - // UPRV_UNREACHABLE; + // UPRV_UNREACHABLE_EXIT; case URX_LB_START: { // Look-behind. Scan forward until the matching look-around end, // without processing the look-behind block. int32_t dataLoc = URX_VAL(op); - for (loc = loc + 1; loc < end; ++loc) { + for (loc = loc + 1; loc <= end; ++loc) { op = (int32_t)fRXPat->fCompiledPat->elementAti(loc); int32_t opType = URX_TYPE(op); if ((opType == URX_LA_END || opType == URX_LBN_END) && (URX_VAL(op) == dataLoc)) { break; } } - U_ASSERT(loc < end); + U_ASSERT(loc <= end); } break; default: - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } @@ -3887,7 +3893,7 @@ void RegexCompile::stripNOPs() { default: // Some op is unaccounted for. - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } } @@ -3924,7 +3930,7 @@ void RegexCompile::error(UErrorCode e) { UErrorCode status = U_ZERO_ERROR; // throwaway status for extracting context // Fill in the context. - // Note: extractBetween() pins supplied indicies to the string bounds. + // Note: extractBetween() pins supplied indices to the string bounds. uprv_memset(fParseErr->preContext, 0, sizeof(fParseErr->preContext)); uprv_memset(fParseErr->postContext, 0, sizeof(fParseErr->postContext)); utext_extract(fRXPat->fPattern, fScanIndex-U_PARSE_CONTEXT_LEN+1, fScanIndex, fParseErr->preContext, U_PARSE_CONTEXT_LEN, &status); @@ -4054,7 +4060,7 @@ void RegexCompile::nextChar(RegexPatternChar &c) { // // We are in free-spacing and comments mode. // Scan through any white space and comments, until we - // reach a significant character or the end of inut. + // reach a significant character or the end of input. for (;;) { if (c.fChar == (UChar32)-1) { break; // End of Input @@ -4382,7 +4388,7 @@ static inline void addIdentifierIgnorable(UnicodeSet *set, UErrorCode& ec) { // // Create a Unicode Set from a Unicode Property expression. -// This is common code underlying both \p{...} ane [:...:] expressions. +// This is common code underlying both \p{...} and [:...:] expressions. // Includes trying the Java "properties" that aren't supported as // normal ICU UnicodeSet properties // @@ -4575,6 +4581,13 @@ UnicodeSet *RegexCompile::createSetForProperty(const UnicodeString &propName, UB } while (false); // End of do loop block. Code above breaks out of the block on success or hard failure. if (U_SUCCESS(status)) { + // ICU 70 adds emoji properties of strings, but as long as Java does not say how to + // deal with properties of strings and character classes with strings, we ignore them. + // Just in case something downstream might stumble over the strings, + // we remove them from the set. + // Note that when we support strings, the complement of a property (as with \P) + // should be implemented as .complement().removeAllStrings() (code point complement). + set->removeAllStrings(); U_ASSERT(set.isValid()); if (negated) { set->complement(); @@ -4608,6 +4621,13 @@ void RegexCompile::setEval(int32_t nextOp) { fSetOpStack.popi(); U_ASSERT(fSetStack.empty() == FALSE); rightOperand = (UnicodeSet *)fSetStack.peek(); + // ICU 70 adds emoji properties of strings, but createSetForProperty() removes all strings + // (see comments there). + // We also do not yet support string literals in character classes, + // so there should not be any strings. + // Note that when we support strings, the complement of a set (as with ^ or \P) + // should be implemented as .complement().removeAllStrings() (code point complement). + U_ASSERT(!rightOperand->hasStrings()); switch (pendingSetOperation) { case setNegation: rightOperand->complement(); @@ -4638,7 +4658,7 @@ void RegexCompile::setEval(int32_t nextOp) { delete rightOperand; break; default: - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } } } @@ -4646,7 +4666,8 @@ void RegexCompile::setEval(int32_t nextOp) { void RegexCompile::setPushOp(int32_t op) { setEval(op); fSetOpStack.push(op, *fStatus); - fSetStack.push(new UnicodeSet(), *fStatus); + LocalPointer<UnicodeSet> lpSet(new UnicodeSet(), *fStatus); + fSetStack.push(lpSet.orphan(), *fStatus); } U_NAMESPACE_END diff --git a/contrib/libs/icu/i18n/regexcmp.h b/contrib/libs/icu/i18n/regexcmp.h index f2aeea909e..9e1aa170f0 100644 --- a/contrib/libs/icu/i18n/regexcmp.h +++ b/contrib/libs/icu/i18n/regexcmp.h @@ -104,7 +104,7 @@ private: int32_t LoopOp); UBool compileInlineInterval(); // Generate inline code for a {min,max} quantifier void literalChar(UChar32 c); // Compile a literal char - void fixLiterals(UBool split=FALSE); // Generate code for pending literal characters. + void fixLiterals(UBool split=false); // Generate code for pending literal characters. void insertOp(int32_t where); // Open up a slot for a new op in the // generated code at the specified location. void appendOp(int32_t op); // Append a new op to the compiled pattern. @@ -209,7 +209,7 @@ private: // initially scanned. Each new interval // encountered overwrites these values. // -1 for the upper interval value means none - // was specified (unlimited occurences.) + // was specified (unlimited occurrences.) int64_t fNameStartPos; // Starting position of a \N{NAME} name in a // pattern, valid while remainder of name is @@ -228,7 +228,7 @@ private: // in this string while being scanned. }; -// Constant values to be pushed onto fSetOpStack while scanning & evalueating [set expressions] +// Constant values to be pushed onto fSetOpStack while scanning & evaluating [set expressions] // The high 16 bits are the operator precedence, and the low 16 are a code for the operation itself. enum SetOperations { diff --git a/contrib/libs/icu/i18n/regeximp.h b/contrib/libs/icu/i18n/regeximp.h index 590d216895..bb0e1e838d 100644 --- a/contrib/libs/icu/i18n/regeximp.h +++ b/contrib/libs/icu/i18n/regeximp.h @@ -37,7 +37,7 @@ U_NAMESPACE_BEGIN #define REGEX_DUMP_DEBUG #define REGEX_RUN_DEBUG -// End of #defines inteded to be directly set. +// End of #defines intended to be directly set. #include <stdio.h> #endif @@ -140,7 +140,7 @@ enum { URX_DOLLAR_M = 42, // $ in multi-line mode. URX_CARET_M = 43, // ^ in multi-line mode. URX_LB_START = 44, // LookBehind Start. - // Paramater is data location + // Parameter is data location URX_LB_CONT = 45, // LookBehind Continue. // Param 0: the data location // Param 1: The minimum length of the look-behind match @@ -371,7 +371,7 @@ class CaseFoldingUTextIterator: public UMemory { UBool inExpansion(); // True if last char returned from next() and the // next to be returned both originated from a string - // folding of the same code point from the orignal UText. + // folding of the same code point from the original UText. private: UText &fUText; const UChar *fFoldChars; @@ -395,7 +395,7 @@ class CaseFoldingUCharIterator: public UMemory { UBool inExpansion(); // True if last char returned from next() and the // next to be returned both originated from a string - // folding of the same code point from the orignal UText. + // folding of the same code point from the original UText. int64_t getIndex(); // Return the current input buffer index. diff --git a/contrib/libs/icu/i18n/regextxt.h b/contrib/libs/icu/i18n/regextxt.h index 9cfabbe415..0f64b8437e 100644 --- a/contrib/libs/icu/i18n/regextxt.h +++ b/contrib/libs/icu/i18n/regextxt.h @@ -29,7 +29,7 @@ U_NAMESPACE_BEGIN #endif #ifdef REGEX_DISABLE_CHUNK_MODE -# define UTEXT_FULL_TEXT_IN_CHUNK(ut,len) (FALSE) +# define UTEXT_FULL_TEXT_IN_CHUNK(ut,len) (false) #else # define UTEXT_FULL_TEXT_IN_CHUNK(ut,len) ((0==((ut)->chunkNativeStart))&&((len)==((ut)->chunkNativeLimit))&&((len)==((ut)->nativeIndexingLimit))) #endif diff --git a/contrib/libs/icu/i18n/region.cpp b/contrib/libs/icu/i18n/region.cpp index 76445aef32..2e013708bb 100644 --- a/contrib/libs/icu/i18n/region.cpp +++ b/contrib/libs/icu/i18n/region.cpp @@ -128,12 +128,12 @@ void U_CALLCONV Region::loadRegionData(UErrorCode &status) { buf[rangeMarkerLocation] = 0; while ( buf[rangeMarkerLocation-1] <= endRange ) { LocalPointer<UnicodeString> newRegion(new UnicodeString(buf), status); - allRegions->addElement(newRegion.orphan(),status); + allRegions->addElementX(newRegion.orphan(),status); buf[rangeMarkerLocation-1]++; } } else { LocalPointer<UnicodeString> newRegion(new UnicodeString(regionName), status); - allRegions->addElement(newRegion.orphan(),status); + allRegions->addElementX(newRegion.orphan(),status); } } @@ -147,38 +147,25 @@ void U_CALLCONV Region::loadRegionData(UErrorCode &status) { buf[rangeMarkerLocation] = 0; while ( buf[rangeMarkerLocation-1] <= endRange ) { LocalPointer<UnicodeString> newRegion(new UnicodeString(buf), status); - allRegions->addElement(newRegion.orphan(),status); + allRegions->addElementX(newRegion.orphan(),status); buf[rangeMarkerLocation-1]++; } } else { LocalPointer<UnicodeString> newRegion(new UnicodeString(regionName), status); - allRegions->addElement(newRegion.orphan(),status); + allRegions->addElementX(newRegion.orphan(),status); } } while ( ures_hasNext(regionUnknown.getAlias()) ) { LocalPointer<UnicodeString> regionName (new UnicodeString(ures_getNextUnicodeString(regionUnknown.getAlias(),NULL,&status),status)); - allRegions->addElement(regionName.orphan(),status); + allRegions->addElementX(regionName.orphan(),status); } while ( ures_hasNext(worldContainment.getAlias()) ) { UnicodeString *continentName = new UnicodeString(ures_getNextUnicodeString(worldContainment.getAlias(),NULL,&status)); - continents->addElement(continentName,status); + continents->addElementX(continentName,status); } - UResourceBundle *groupingBundle = nullptr; - while ( ures_hasNext(groupingContainment.getAlias()) ) { - groupingBundle = ures_getNextResource(groupingContainment.getAlias(), groupingBundle, &status); - if (U_FAILURE(status)) { - break; - } - UnicodeString *groupingName = new UnicodeString(ures_getKey(groupingBundle), -1, US_INV); - if (groupingName) { - groupings->addElement(groupingName,status); - } - } - ures_close(groupingBundle); - for ( int32_t i = 0 ; i < allRegions->size() ; i++ ) { LocalPointer<Region> r(new Region(), status); if ( U_FAILURE(status) ) { @@ -203,6 +190,29 @@ void U_CALLCONV Region::loadRegionData(UErrorCode &status) { uhash_put(newRegionIDMap.getAlias(),idStrAlias,(void *)(r.orphan()),&status); // regionIDMap takes ownership } + UResourceBundle *groupingBundle = nullptr; + while ( ures_hasNext(groupingContainment.getAlias()) ) { + groupingBundle = ures_getNextResource(groupingContainment.getAlias(), groupingBundle, &status); + if (U_FAILURE(status)) { + break; + } + UnicodeString *groupingName = new UnicodeString(ures_getKey(groupingBundle), -1, US_INV); + groupings->addElementX(groupingName,status); + Region *grouping = (Region *) uhash_get(newRegionIDMap.getAlias(),groupingName); + if (grouping != NULL) { + for (int32_t i = 0; i < ures_getSize(groupingBundle); i++) { + UnicodeString child = ures_getUnicodeStringByIndex(groupingBundle, i, &status); + if (U_SUCCESS(status)) { + if (grouping->containedRegions == NULL) { + grouping->containedRegions = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status); + } + grouping->containedRegions->addElementX(new UnicodeString(child), status); + } + } + } + } + ures_close(groupingBundle); + // Process the territory aliases while ( ures_hasNext(territoryAlias.getAlias()) ) { LocalUResourceBundlePointer res(ures_getNextResource(territoryAlias.getAlias(),NULL,&status)); @@ -217,7 +227,7 @@ void U_CALLCONV Region::loadRegionData(UErrorCode &status) { if ( aliasToRegion != NULL && aliasFromRegion == NULL ) { // This is just an alias from some string to a region uhash_put(newRegionAliases.getAlias(),(void *)aliasFromStr.orphan(), (void *)aliasToRegion,&status); } else { - if ( aliasFromRegion == NULL ) { // Deprecated region code not in the master codes list - so need to create a deprecated region for it. + if ( aliasFromRegion == NULL ) { // Deprecated region code not in the primary codes list - so need to create a deprecated region for it. LocalPointer<Region> newRgn(new Region, status); if ( U_SUCCESS(status) ) { aliasFromRegion = newRgn.orphan(); @@ -257,7 +267,7 @@ void U_CALLCONV Region::loadRegionData(UErrorCode &status) { Region *target = (Region *)uhash_get(newRegionIDMap.getAlias(),(void *)¤tRegion); if (target) { LocalPointer<UnicodeString> preferredValue(new UnicodeString(target->idStr), status); - aliasFromRegion->preferredValues->addElement((void *)preferredValue.orphan(),status); // may add null if err + aliasFromRegion->preferredValues->addElementX((void *)preferredValue.orphan(),status); // may add null if err } currentRegion.remove(); } @@ -354,7 +364,7 @@ void U_CALLCONV Region::loadRegionData(UErrorCode &status) { return; // error out } childStr->fastCopyFrom(childRegion->idStr); - parentRegion->containedRegions->addElement((void *)childStr.orphan(),status); + parentRegion->containedRegions->addElementX((void *)childStr.orphan(),status); // Set the parent region to be the containing region of the child. // Regions of type GROUPING can't be set as the parent, since another region @@ -378,7 +388,7 @@ void U_CALLCONV Region::loadRegionData(UErrorCode &status) { if( U_FAILURE(status) ) { return; // error out } - availableRegions[ar->fType]->addElement((void *)arString.orphan(),status); + availableRegions[ar->fType]->addElementX((void *)arString.orphan(),status); } ucln_i18n_registerCleanup(UCLN_I18N_REGION, region_cleanup); @@ -439,7 +449,7 @@ Region::~Region () { * Returns true if the two regions are equal. * Per PMC, just use pointer compare, since we have at most one instance of each Region. */ -UBool +bool Region::operator==(const Region &that) const { return (idStr == that.idStr); } @@ -448,7 +458,7 @@ Region::operator==(const Region &that) const { * Returns true if the two regions are NOT equal; that is, if operator ==() returns false. * Per PMC, just use pointer compare, since we have at most one instance of each Region. */ -UBool +bool Region::operator!=(const Region &that) const { return (idStr != that.idStr); } @@ -617,13 +627,13 @@ Region::getContainedRegions( URegionType type, UErrorCode &status ) const { const char *regionId = cr->next(NULL,status); const Region *r = Region::getInstance(regionId,status); if ( r->getType() == type) { - result->addElement((void *)&r->idStr,status); + result->addElementX((void *)&r->idStr,status); } else { StringEnumeration *children = r->getContainedRegions(type, status); for ( int32_t j = 0 ; j < children->count(status) ; j++ ) { const char *id2 = children->next(NULL,status); const Region *r2 = Region::getInstance(id2,status); - result->addElement((void *)&r2->idStr,status); + result->addElementX((void *)&r2->idStr,status); } delete children; } @@ -703,7 +713,7 @@ RegionNameEnumeration::RegionNameEnumeration(UVector *fNameList, UErrorCode& sta for ( int32_t i = 0 ; i < fNameList->size() ; i++ ) { UnicodeString* this_region_name = (UnicodeString *)fNameList->elementAt(i); UnicodeString* new_region_name = new UnicodeString(*this_region_name); - fRegionNames->addElement((void *)new_region_name,status); + fRegionNames->addElementX((void *)new_region_name,status); } } else { diff --git a/contrib/libs/icu/i18n/region_impl.h b/contrib/libs/icu/i18n/region_impl.h index 5e5a64529a..62acaa4511 100644 --- a/contrib/libs/icu/i18n/region_impl.h +++ b/contrib/libs/icu/i18n/region_impl.h @@ -29,10 +29,10 @@ public: RegionNameEnumeration(UVector *fNameList, UErrorCode& status); virtual ~RegionNameEnumeration(); static UClassID U_EXPORT2 getStaticClassID(void); - virtual UClassID getDynamicClassID(void) const; - virtual const UnicodeString* snext(UErrorCode& status); - virtual void reset(UErrorCode& status); - virtual int32_t count(UErrorCode& status) const; + virtual UClassID getDynamicClassID(void) const override; + virtual const UnicodeString* snext(UErrorCode& status) override; + virtual void reset(UErrorCode& status) override; + virtual int32_t count(UErrorCode& status) const override; private: int32_t pos; UVector *fRegionNames; diff --git a/contrib/libs/icu/i18n/reldatefmt.cpp b/contrib/libs/icu/i18n/reldatefmt.cpp index 8c6688c5b9..6ef5160798 100644 --- a/contrib/libs/icu/i18n/reldatefmt.cpp +++ b/contrib/libs/icu/i18n/reldatefmt.cpp @@ -95,7 +95,7 @@ public: const UnicodeString emptyString; - // Mappping from source to target styles for alias fallback. + // Mapping from source to target styles for alias fallback. int32_t fallBackCache[UDAT_STYLE_COUNT]; void adoptCombinedDateAndTime(SimpleFormatter *fmtToAdopt) { @@ -588,7 +588,7 @@ struct RelDateTimeFmtDataSink : public ResourceSink { } virtual void put(const char *key, ResourceValue &value, - UBool /*noFallback*/, UErrorCode &errorCode) { + UBool /*noFallback*/, UErrorCode &errorCode) override { // Main entry point to sink ResourceTable table = value.getTable(errorCode); if (U_FAILURE(errorCode)) { return; } @@ -1126,7 +1126,7 @@ void RelativeDateTimeFormatter::formatRelativeImpl( // leaves some holes (even for data that is currently available, such as quarter). // When the new CLDR data is available, update the data storage accordingly, // rewrite this to use it directly, and rewrite the old format method to call this - // new one; that is covered by http://bugs.icu-project.org/trac/ticket/12171. + // new one; that is covered by https://unicode-org.atlassian.net/browse/ICU-12171. UDateDirection direction = UDAT_DIRECTION_COUNT; if (offset > -2.1 && offset < 2.1) { // Allow a 1% epsilon, so offsets in -1.01..-0.99 map to LAST @@ -1326,7 +1326,7 @@ ureldatefmt_formatNumeric( const URelativeDateTimeFormatter* reldatefmt, return res.extract(result, resultCapacity, *status); } -U_STABLE void U_EXPORT2 +U_CAPI void U_EXPORT2 ureldatefmt_formatNumericToResult( const URelativeDateTimeFormatter* reldatefmt, double offset, @@ -1369,7 +1369,7 @@ ureldatefmt_format( const URelativeDateTimeFormatter* reldatefmt, return res.extract(result, resultCapacity, *status); } -U_DRAFT void U_EXPORT2 +U_CAPI void U_EXPORT2 ureldatefmt_formatToResult( const URelativeDateTimeFormatter* reldatefmt, double offset, diff --git a/contrib/libs/icu/i18n/reldtfmt.cpp b/contrib/libs/icu/i18n/reldtfmt.cpp index c8ffd04646..5fdef1c0d6 100644 --- a/contrib/libs/icu/i18n/reldtfmt.cpp +++ b/contrib/libs/icu/i18n/reldtfmt.cpp @@ -135,7 +135,7 @@ RelativeDateFormat* RelativeDateFormat::clone() const { return new RelativeDateFormat(*this); } -UBool RelativeDateFormat::operator==(const Format& other) const { +bool RelativeDateFormat::operator==(const Format& other) const { if(DateFormat::operator==(other)) { // The DateFormat::operator== check for fCapitalizationContext equality above // is sufficient to check equality of all derived context-related data. @@ -146,7 +146,7 @@ UBool RelativeDateFormat::operator==(const Format& other) const { fTimePattern==that->fTimePattern && fLocale==that->fLocale ); } - return FALSE; + return false; } static const UChar APOSTROPHE = (UChar)0x0027; @@ -334,7 +334,7 @@ UDate RelativeDateFormat::parse( const UnicodeString& text, ParsePosition& pos) const { // redefined here because the other parse() function hides this function's - // cunterpart on DateFormat + // counterpart on DateFormat return DateFormat::parse(text, pos); } @@ -485,7 +485,7 @@ struct RelDateFmtDataSink : public ResourceSink { virtual ~RelDateFmtDataSink(); virtual void put(const char *key, ResourceValue &value, - UBool /*noFallback*/, UErrorCode &errorCode) { + UBool /*noFallback*/, UErrorCode &errorCode) override { ResourceTable relDayTable = value.getTable(errorCode); int32_t n = 0; int32_t len = 0; diff --git a/contrib/libs/icu/i18n/reldtfmt.h b/contrib/libs/icu/i18n/reldtfmt.h index ff48d3b5c4..98b333a02b 100644 --- a/contrib/libs/icu/i18n/reldtfmt.h +++ b/contrib/libs/icu/i18n/reldtfmt.h @@ -71,7 +71,7 @@ public: * @return A copy of the object. * @internal ICU 3.8 */ - virtual RelativeDateFormat* clone() const; + virtual RelativeDateFormat* clone() const override; /** * Return true if the given Format objects are semantically equal. Objects @@ -80,7 +80,7 @@ public: * @return true if the given Format objects are semantically equal. * @internal ICU 3.8 */ - virtual UBool operator==(const Format& other) const; + virtual bool operator==(const Format& other) const override; using DateFormat::format; @@ -103,7 +103,7 @@ public: */ virtual UnicodeString& format( Calendar& cal, UnicodeString& appendTo, - FieldPosition& pos) const; + FieldPosition& pos) const override; /** * Format an object to produce a string. This method handles Formattable @@ -122,7 +122,7 @@ public: virtual UnicodeString& format(const Formattable& obj, UnicodeString& appendTo, FieldPosition& pos, - UErrorCode& status) const; + UErrorCode& status) const override; /** @@ -146,7 +146,7 @@ public: */ virtual void parse( const UnicodeString& text, Calendar& cal, - ParsePosition& pos) const; + ParsePosition& pos) const override; /** * Parse a date/time string starting at the given parse position. For @@ -187,7 +187,7 @@ public: * @internal ICU 3.8 */ virtual UDate parse( const UnicodeString& text, - UErrorCode& status) const; + UErrorCode& status) const override; /** * Return a single pattern string generated by combining the patterns for the @@ -245,7 +245,7 @@ public: * updated with any new status from the function. * @internal ICU 53 */ - virtual void setContext(UDisplayContext value, UErrorCode& status); + virtual void setContext(UDisplayContext value, UErrorCode& status) override; private: SimpleDateFormat *fDateTimeFormatter; @@ -327,7 +327,7 @@ public: * other classes have different class IDs. * @internal ICU 3.8 */ - virtual UClassID getDynamicClassID(void) const; + virtual UClassID getDynamicClassID(void) const override; }; diff --git a/contrib/libs/icu/i18n/rematch.cpp b/contrib/libs/icu/i18n/rematch.cpp index 69909faab9..7d6eaeed8b 100644 --- a/contrib/libs/icu/i18n/rematch.cpp +++ b/contrib/libs/icu/i18n/rematch.cpp @@ -719,7 +719,7 @@ UBool RegexMatcher::find(UErrorCode &status) { if (findProgressInterrupt(startPos, status)) return FALSE; } - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; case START_START: // Matches are only possible at the start of the input string @@ -767,7 +767,7 @@ UBool RegexMatcher::find(UErrorCode &status) { return FALSE; } } - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; case START_STRING: case START_CHAR: @@ -799,7 +799,7 @@ UBool RegexMatcher::find(UErrorCode &status) { return FALSE; } } - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; case START_LINE: { @@ -879,10 +879,15 @@ UBool RegexMatcher::find(UErrorCode &status) { } default: - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_ASSERT; + // Unknown value in fPattern->fStartType, should be from StartOfMatch enum. But + // we have reports of this in production code, don't use UPRV_UNREACHABLE_EXIT. + // See ICU-21669. + status = U_INTERNAL_PROGRAM_ERROR; + return FALSE; } - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } @@ -993,7 +998,7 @@ UBool RegexMatcher::findUsingChunk(UErrorCode &status) { if (findProgressInterrupt(startPos, status)) return FALSE; } - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; case START_START: // Matches are only possible at the start of the input string @@ -1035,7 +1040,7 @@ UBool RegexMatcher::findUsingChunk(UErrorCode &status) { return FALSE; } } - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; case START_STRING: case START_CHAR: @@ -1064,7 +1069,7 @@ UBool RegexMatcher::findUsingChunk(UErrorCode &status) { return FALSE; } } - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; case START_LINE: { @@ -1135,10 +1140,15 @@ UBool RegexMatcher::findUsingChunk(UErrorCode &status) { } default: - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_ASSERT; + // Unknown value in fPattern->fStartType, should be from StartOfMatch enum. But + // we have reports of this in production code, don't use UPRV_UNREACHABLE_EXIT. + // See ICU-21669. + status = U_INTERNAL_PROGRAM_ERROR; + return FALSE; } - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } @@ -2005,7 +2015,7 @@ static UText *utext_extract_replace(UText *src, UText *dest, int64_t start, int6 return dest; } - // Caller did not provide a prexisting UText. + // Caller did not provide a preexisting UText. // Open a new one, and have it adopt the text buffer storage. if (U_FAILURE(*status)) { return NULL; @@ -2072,7 +2082,7 @@ int32_t RegexMatcher::split(UText *input, UErrorCode &status) { // - // Check arguements for validity + // Check arguments for validity // if (U_FAILURE(status)) { return 0; @@ -3717,7 +3727,7 @@ void RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) { } if (success && inputItr.inExpansion()) { - // We otained a match by consuming part of a string obtained from + // We obtained a match by consuming part of a string obtained from // case-folding a single code point of the input text. // This does not count as an overall match. success = FALSE; @@ -3913,7 +3923,7 @@ void RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) { // First time through loop. lbStartIdx = fp->fInputIdx - minML; if (lbStartIdx > 0) { - // move index to a code point boudary, if it's not on one already. + // move index to a code point boundary, if it's not on one already. UTEXT_SETNATIVEINDEX(fInputText, lbStartIdx); lbStartIdx = UTEXT_GETNATIVEINDEX(fInputText); } @@ -3962,7 +3972,7 @@ void RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) { break; } - // Look-behind match is good. Restore the orignal input string region, + // Look-behind match is good. Restore the original input string region, // which had been truncated to pin the end of the lookbehind match to the // position being looked-behind. fActiveStart = fData[opValue+2]; @@ -3999,7 +4009,7 @@ void RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) { // First time through loop. lbStartIdx = fp->fInputIdx - minML; if (lbStartIdx > 0) { - // move index to a code point boudary, if it's not on one already. + // move index to a code point boundary, if it's not on one already. UTEXT_SETNATIVEINDEX(fInputText, lbStartIdx); lbStartIdx = UTEXT_GETNATIVEINDEX(fInputText); } @@ -4051,7 +4061,7 @@ void RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) { // Look-behind expression matched, which means look-behind test as // a whole Fails - // Restore the orignal input string length, which had been truncated + // Restore the original input string length, which had been truncated // inorder to pin the end of the lookbehind match // to the position being looked-behind. fActiveStart = fData[opValue+2]; @@ -4234,7 +4244,11 @@ void RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) { default: // Trouble. The compiled pattern contains an entry with an // unrecognized type tag. - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_ASSERT; + // Unknown opcode type in opType = URX_TYPE(pat[fp->fPatIdx]). But we have + // reports of this in production code, don't use UPRV_UNREACHABLE_EXIT. + // See ICU-21669. + status = U_INTERNAL_PROGRAM_ERROR; } if (U_FAILURE(status)) { @@ -5187,7 +5201,7 @@ void RegexMatcher::MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &statu } if (success && inputItr.inExpansion()) { - // We otained a match by consuming part of a string obtained from + // We obtained a match by consuming part of a string obtained from // case-folding a single code point of the input text. // This does not count as an overall match. success = FALSE; @@ -5409,7 +5423,7 @@ void RegexMatcher::MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &statu break; } - // Look-behind match is good. Restore the orignal input string region, + // Look-behind match is good. Restore the original input string region, // which had been truncated to pin the end of the lookbehind match to the // position being looked-behind. fActiveStart = fData[opValue+2]; @@ -5489,7 +5503,7 @@ void RegexMatcher::MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &statu // Look-behind expression matched, which means look-behind test as // a whole Fails - // Restore the orignal input string length, which had been truncated + // Restore the original input string length, which had been truncated // inorder to pin the end of the lookbehind match // to the position being looked-behind. fActiveStart = fData[opValue+2]; @@ -5672,7 +5686,11 @@ void RegexMatcher::MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &statu default: // Trouble. The compiled pattern contains an entry with an // unrecognized type tag. - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_ASSERT; + // Unknown opcode type in opType = URX_TYPE(pat[fp->fPatIdx]). But we have + // reports of this in production code, don't use UPRV_UNREACHABLE_EXIT. + // See ICU-21669. + status = U_INTERNAL_PROGRAM_ERROR; } if (U_FAILURE(status)) { diff --git a/contrib/libs/icu/i18n/remtrans.h b/contrib/libs/icu/i18n/remtrans.h index 13de01594a..398cc5177c 100644 --- a/contrib/libs/icu/i18n/remtrans.h +++ b/contrib/libs/icu/i18n/remtrans.h @@ -47,7 +47,7 @@ public: * Transliterator API. * @return A copy of the object. */ - virtual RemoveTransliterator* clone() const; + virtual RemoveTransliterator* clone() const override; /** * Implements {@link Transliterator#handleTransliterate}. @@ -59,12 +59,12 @@ public: * pos.contextLimit. Otherwise, assume the text is complete. */ virtual void handleTransliterate(Replaceable& text, UTransPosition& offset, - UBool isIncremental) const; + UBool isIncremental) const override; /** * ICU "poor man's RTTI", returns a UClassID for the actual class. */ - virtual UClassID getDynamicClassID() const; + virtual UClassID getDynamicClassID() const override; /** * ICU "poor man's RTTI", returns a UClassID for this class. diff --git a/contrib/libs/icu/i18n/repattrn.cpp b/contrib/libs/icu/i18n/repattrn.cpp index b3028e04f7..8c94948d29 100644 --- a/contrib/libs/icu/i18n/repattrn.cpp +++ b/contrib/libs/icu/i18n/repattrn.cpp @@ -291,13 +291,13 @@ RegexPattern *RegexPattern::clone() const { // characters can still be considered different. // //-------------------------------------------------------------------------- -UBool RegexPattern::operator ==(const RegexPattern &other) const { +bool RegexPattern::operator ==(const RegexPattern &other) const { if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) { if (this->fPatternString != NULL && other.fPatternString != NULL) { return *(this->fPatternString) == *(other.fPatternString); } else if (this->fPattern == NULL) { if (other.fPattern == NULL) { - return TRUE; + return true; } } else if (other.fPattern != NULL) { UTEXT_SETNATIVEINDEX(this->fPattern, 0); @@ -305,7 +305,7 @@ UBool RegexPattern::operator ==(const RegexPattern &other) const { return utext_equals(this->fPattern, other.fPattern); } } - return FALSE; + return false; } //--------------------------------------------------------------------- diff --git a/contrib/libs/icu/i18n/rulebasedcollator.cpp b/contrib/libs/icu/i18n/rulebasedcollator.cpp index 60acf17815..5e5cc3db62 100644 --- a/contrib/libs/icu/i18n/rulebasedcollator.cpp +++ b/contrib/libs/icu/i18n/rulebasedcollator.cpp @@ -66,8 +66,8 @@ public: virtual ~FixedSortKeyByteSink(); private: - virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length); - virtual UBool Resize(int32_t appendCapacity, int32_t length); + virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) override; + virtual UBool Resize(int32_t appendCapacity, int32_t length) override; }; FixedSortKeyByteSink::~FixedSortKeyByteSink() {} @@ -98,8 +98,8 @@ public: virtual ~CollationKeyByteSink(); private: - virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length); - virtual UBool Resize(int32_t appendCapacity, int32_t length); + virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) override; + virtual UBool Resize(int32_t appendCapacity, int32_t length) override; CollationKey &key_; }; @@ -239,21 +239,21 @@ RuleBasedCollator &RuleBasedCollator::operator=(const RuleBasedCollator &other) UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator) -UBool +bool RuleBasedCollator::operator==(const Collator& other) const { - if(this == &other) { return TRUE; } - if(!Collator::operator==(other)) { return FALSE; } + if(this == &other) { return true; } + if(!Collator::operator==(other)) { return false; } const RuleBasedCollator &o = static_cast<const RuleBasedCollator &>(other); - if(*settings != *o.settings) { return FALSE; } - if(data == o.data) { return TRUE; } + if(*settings != *o.settings) { return false; } + if(data == o.data) { return true; } UBool thisIsRoot = data->base == NULL; UBool otherIsRoot = o.data->base == NULL; U_ASSERT(!thisIsRoot || !otherIsRoot); // otherwise their data pointers should be == - if(thisIsRoot != otherIsRoot) { return FALSE; } + if(thisIsRoot != otherIsRoot) { return false; } if((thisIsRoot || !tailoring->rules.isEmpty()) && (otherIsRoot || !o.tailoring->rules.isEmpty())) { // Shortcut: If both collators have valid rule strings, then compare those. - if(tailoring->rules == o.tailoring->rules) { return TRUE; } + if(tailoring->rules == o.tailoring->rules) { return true; } } // Different rule strings can result in the same or equivalent tailoring. // The rule strings are optional in ICU resource bundles, although included by default. @@ -261,14 +261,14 @@ RuleBasedCollator::operator==(const Collator& other) const { UErrorCode errorCode = U_ZERO_ERROR; LocalPointer<UnicodeSet> thisTailored(getTailoredSet(errorCode)); LocalPointer<UnicodeSet> otherTailored(o.getTailoredSet(errorCode)); - if(U_FAILURE(errorCode)) { return FALSE; } - if(*thisTailored != *otherTailored) { return FALSE; } + if(U_FAILURE(errorCode)) { return false; } + if(*thisTailored != *otherTailored) { return false; } // For completeness, we should compare all of the mappings; // or we should create a list of strings, sort it with one collator, // and check if both collators compare adjacent strings the same // (order & strength, down to quaternary); or similar. // Testing equality of collators seems unusual. - return TRUE; + return true; } int32_t @@ -830,7 +830,7 @@ class UTF16NFDIterator : public NFDIterator { public: UTF16NFDIterator(const UChar *text, const UChar *textLimit) : s(text), limit(textLimit) {} protected: - virtual UChar32 nextRawCodePoint() { + virtual UChar32 nextRawCodePoint() override { if(s == limit) { return U_SENTINEL; } UChar32 c = *s++; if(limit == NULL && c == 0) { @@ -882,7 +882,7 @@ public: UTF8NFDIterator(const uint8_t *text, int32_t textLength) : s(text), pos(0), length(textLength) {} protected: - virtual UChar32 nextRawCodePoint() { + virtual UChar32 nextRawCodePoint() override { if(pos == length || (s[pos] == 0 && length < 0)) { return U_SENTINEL; } UChar32 c; U8_NEXT_OR_FFFD(s, pos, length, c); @@ -899,7 +899,7 @@ public: FCDUTF8NFDIterator(const CollationData *data, const uint8_t *text, int32_t textLength) : u8ci(data, FALSE, text, 0, textLength) {} protected: - virtual UChar32 nextRawCodePoint() { + virtual UChar32 nextRawCodePoint() override { UErrorCode errorCode = U_ZERO_ERROR; return u8ci.nextCodePoint(errorCode); } @@ -911,7 +911,7 @@ class UIterNFDIterator : public NFDIterator { public: UIterNFDIterator(UCharIterator &it) : iter(it) {} protected: - virtual UChar32 nextRawCodePoint() { + virtual UChar32 nextRawCodePoint() override { return uiter_next32(&iter); } private: @@ -923,7 +923,7 @@ public: FCDUIterNFDIterator(const CollationData *data, UCharIterator &it, int32_t startIndex) : uici(data, FALSE, it, startIndex) {} protected: - virtual UChar32 nextRawCodePoint() { + virtual UChar32 nextRawCodePoint() override { UErrorCode errorCode = U_ZERO_ERROR; return uici.nextCodePoint(errorCode); } @@ -1398,7 +1398,7 @@ public: levelCapacity = sink.GetRemainingCapacity(); } virtual ~PartLevelCallback() {} - virtual UBool needToWrite(Collation::Level l) { + virtual UBool needToWrite(Collation::Level l) override { if(!sink.Overflowed()) { // Remember a level that will be at least partially written. level = l; @@ -1600,10 +1600,7 @@ RuleBasedCollator::internalGetShortDefinitionString(const char *locale, appendSubtag(result, 'Z', subtag, length, errorCode); if(U_FAILURE(errorCode)) { return 0; } - if(result.length() <= capacity) { - uprv_memcpy(buffer, result.data(), result.length()); - } - return u_terminateChars(buffer, capacity, result.length(), &errorCode); + return result.extract(buffer, capacity, errorCode); } UBool diff --git a/contrib/libs/icu/i18n/scriptset.cpp b/contrib/libs/icu/i18n/scriptset.cpp index 18a3b263b7..6a1db8c01c 100644 --- a/contrib/libs/icu/i18n/scriptset.cpp +++ b/contrib/libs/icu/i18n/scriptset.cpp @@ -44,13 +44,13 @@ ScriptSet & ScriptSet::operator =(const ScriptSet &other) { return *this; } -UBool ScriptSet::operator == (const ScriptSet &other) const { +bool ScriptSet::operator == (const ScriptSet &other) const { for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { if (bits[i] != other.bits[i]) { - return FALSE; + return false; } } - return TRUE; + return true; } UBool ScriptSet::test(UScriptCode script, UErrorCode &status) const { diff --git a/contrib/libs/icu/i18n/scriptset.h b/contrib/libs/icu/i18n/scriptset.h index a41ab737a6..51980ab7b3 100644 --- a/contrib/libs/icu/i18n/scriptset.h +++ b/contrib/libs/icu/i18n/scriptset.h @@ -41,8 +41,8 @@ class U_I18N_API ScriptSet: public UMemory { ScriptSet(const ScriptSet &other); ~ScriptSet(); - UBool operator == (const ScriptSet &other) const; - UBool operator != (const ScriptSet &other) const {return !(*this == other);} + bool operator == (const ScriptSet &other) const; + bool operator != (const ScriptSet &other) const {return !(*this == other);} ScriptSet & operator = (const ScriptSet &other); UBool test(UScriptCode script, UErrorCode &status) const; @@ -51,7 +51,7 @@ class U_I18N_API ScriptSet: public UMemory { ScriptSet &reset(UScriptCode script, UErrorCode &status); ScriptSet &intersect(const ScriptSet &other); ScriptSet &intersect(UScriptCode script, UErrorCode &status); - UBool intersects(const ScriptSet &other) const; // Sets contain at least one script in commmon. + UBool intersects(const ScriptSet &other) const; // Sets contain at least one script in common. UBool contains(const ScriptSet &other) const; // All set bits in other are also set in this. ScriptSet &setAll(); diff --git a/contrib/libs/icu/i18n/search.cpp b/contrib/libs/icu/i18n/search.cpp index f944b68455..9e559bcc71 100644 --- a/contrib/libs/icu/i18n/search.cpp +++ b/contrib/libs/icu/i18n/search.cpp @@ -178,10 +178,10 @@ const UnicodeString & SearchIterator::getText(void) const // operator overloading ---------------------------------------------- -UBool SearchIterator::operator==(const SearchIterator &that) const +bool SearchIterator::operator==(const SearchIterator &that) const { if (this == &that) { - return TRUE; + return true; } return (m_breakiterator_ == that.m_breakiterator_ && m_search_->isCanonicalMatch == that.m_search_->isCanonicalMatch && diff --git a/contrib/libs/icu/i18n/selfmt.cpp b/contrib/libs/icu/i18n/selfmt.cpp index 47e53d75de..bb18e84ef6 100644 --- a/contrib/libs/icu/i18n/selfmt.cpp +++ b/contrib/libs/icu/i18n/selfmt.cpp @@ -164,19 +164,19 @@ SelectFormat::operator=(const SelectFormat& other) { return *this; } -UBool +bool SelectFormat::operator==(const Format& other) const { if (this == &other) { - return TRUE; + return true; } if (!Format::operator==(other)) { - return FALSE; + return false; } const SelectFormat& o = (const SelectFormat&)other; return msgPattern == o.msgPattern; } -UBool +bool SelectFormat::operator!=(const Format& other) const { return !operator==(other); } diff --git a/contrib/libs/icu/i18n/simpletz.cpp b/contrib/libs/icu/i18n/simpletz.cpp index 7972e2bb04..d9b0cd8e1e 100644 --- a/contrib/libs/icu/i18n/simpletz.cpp +++ b/contrib/libs/icu/i18n/simpletz.cpp @@ -42,7 +42,7 @@ U_NAMESPACE_BEGIN UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SimpleTimeZone) // Use only for decodeStartRule() and decodeEndRule() where the year is not -// available. Set February to 29 days to accomodate rules with that date +// available. Set February to 29 days to accommodate rules with that date // and day-of-week-on-or-before-that-date mode (DOW_LE_DOM_MODE). // The compareToRule() method adjusts to February 28 in non-leap years. // @@ -231,7 +231,7 @@ SimpleTimeZone::operator=(const SimpleTimeZone &right) // ------------------------------------- -UBool +bool SimpleTimeZone::operator==(const TimeZone& that) const { return ((this == &that) || @@ -289,7 +289,7 @@ SimpleTimeZone::setStartYear(int32_t year) * month, regardless of what day of the week it is (e.g., (-2, 0) is the * next-to-last day of the month). * <li>If dayOfWeek is negative and dayOfWeekInMonth is positive, they specify the - * first specified day of the week on or after the specfied day of the month. + * first specified day of the week on or after the specified day of the month. * (e.g., (15, -SUNDAY) is the first Sunday after the 15th of the month * [or the 15th itself if the 15th is a Sunday].) * <li>If dayOfWeek and DayOfWeekInMonth are both negative, they specify the @@ -509,8 +509,10 @@ SimpleTimeZone::getOffset(uint8_t era, int32_t year, int32_t month, int32_t day, } void -SimpleTimeZone::getOffsetFromLocal(UDate date, int32_t nonExistingTimeOpt, int32_t duplicatedTimeOpt, - int32_t& rawOffsetGMT, int32_t& savingsDST, UErrorCode& status) const { +SimpleTimeZone::getOffsetFromLocal(UDate date, UTimeZoneLocalOption nonExistingTimeOpt, + UTimeZoneLocalOption duplicatedTimeOpt, int32_t& rawOffsetGMT, + int32_t& savingsDST, UErrorCode& status) const +{ if (U_FAILURE(status)) { return; } @@ -583,7 +585,7 @@ SimpleTimeZone::compareToRule(int8_t month, int8_t monthLen, int8_t prevMonthLen dayOfWeek = (int8_t)(1 + (dayOfWeek % 7)); // dayOfWeek is one-based if (dayOfMonth > monthLen) { dayOfMonth = 1; - /* When incrementing the month, it is desirible to overflow + /* When incrementing the month, it is desirable to overflow * from DECEMBER to DECEMBER+1, since we use the result to * compare against a real month. Wraparound of the value * leads to bug 4173604. */ diff --git a/contrib/libs/icu/i18n/smpdtfmt.cpp b/contrib/libs/icu/i18n/smpdtfmt.cpp index d704642b05..91748d82f9 100644 --- a/contrib/libs/icu/i18n/smpdtfmt.cpp +++ b/contrib/libs/icu/i18n/smpdtfmt.cpp @@ -54,6 +54,7 @@ #include "unicode/udisplaycontext.h" #include "unicode/brkiter.h" #include "unicode/rbnf.h" +#include "unicode/dtptngen.h" #include "uresimp.h" #include "olsontz.h" #include "patternprops.h" @@ -63,6 +64,7 @@ #include "uassert.h" #include "cmemory.h" #include "umutex.h" +#include "mutex.h" #include <float.h> #include "smpdtfst.h" #include "sharednumberformat.h" @@ -226,10 +228,17 @@ static const int32_t gFieldRangeBias[] = { }; // When calendar uses hebr numbering (i.e. he@calendar=hebrew), -// offset the years within the current millenium down to 1-999 +// offset the years within the current millennium down to 1-999 static const int32_t HEBREW_CAL_CUR_MILLENIUM_START_YEAR = 5000; static const int32_t HEBREW_CAL_CUR_MILLENIUM_END_YEAR = 6000; +/** + * Maximum range for detecting daylight offset of a time zone when parsed time zone + * string indicates it's daylight saving time, but the detected time zone does not + * observe daylight saving time at the parsed date. + */ +static const double MAX_DAYLIGHT_DETECTION_RANGE = 30*365*24*60*60*1000.0; + static UMutex LOCK; UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SimpleDateFormat) @@ -586,11 +595,29 @@ SimpleDateFormat& SimpleDateFormat::operator=(const SimpleDateFormat& other) fLocale = other.fLocale; // TimeZoneFormat can now be set independently via setter. - // If it is NULL, it will be lazily initialized from locale + // If it is NULL, it will be lazily initialized from locale. delete fTimeZoneFormat; - fTimeZoneFormat = NULL; - if (other.fTimeZoneFormat) { - fTimeZoneFormat = new TimeZoneFormat(*other.fTimeZoneFormat); + fTimeZoneFormat = nullptr; + TimeZoneFormat *otherTZFormat; + { + // Synchronization is required here, when accessing other.fTimeZoneFormat, + // because another thread may be concurrently executing other.tzFormat(), + // a logically const function that lazily creates other.fTimeZoneFormat. + // + // Without synchronization, reordered memory writes could allow us + // to see a non-null fTimeZoneFormat before the object itself was + // fully initialized. In case of a race, it doesn't matter whether + // we see a null or a fully initialized other.fTimeZoneFormat, + // only that we avoid seeing a partially initialized object. + // + // Once initialized, no const function can modify fTimeZoneFormat, + // meaning that once we have safely grabbed the other.fTimeZoneFormat + // pointer, continued synchronization is not required to use it. + Mutex m(&LOCK); + otherTZFormat = other.fTimeZoneFormat; + } + if (otherTZFormat) { + fTimeZoneFormat = new TimeZoneFormat(*otherTZFormat); } #if !UCONFIG_NO_BREAK_ITERATION @@ -631,7 +658,7 @@ SimpleDateFormat::clone() const //---------------------------------------------------------------------- -UBool +bool SimpleDateFormat::operator==(const Format& other) const { if (DateFormat::operator==(other)) { @@ -646,10 +673,16 @@ SimpleDateFormat::operator==(const Format& other) const fHaveDefaultCentury == that->fHaveDefaultCentury && fDefaultCenturyStart == that->fDefaultCenturyStart); } - return FALSE; + return false; } //---------------------------------------------------------------------- +static const UChar* timeSkeletons[4] = { + u"jmmsszzzz", // kFull + u"jmmssz", // kLong + u"jmmss", // kMedium + u"jmm", // kShort +}; void SimpleDateFormat::construct(EStyle timeStyle, EStyle dateStyle, @@ -714,35 +747,75 @@ void SimpleDateFormat::construct(EStyle timeStyle, fDateOverride.setToBogus(); fTimeOverride.setToBogus(); + UnicodeString timePattern; + if (timeStyle >= kFull && timeStyle <= kShort) { + const char* baseLocID = locale.getBaseName(); + if (baseLocID[0]!=0 && uprv_strcmp(baseLocID,"und")!=0) { + UErrorCode useStatus = U_ZERO_ERROR; + Locale baseLoc(baseLocID); + Locale validLoc(getLocale(ULOC_VALID_LOCALE, useStatus)); + if (U_SUCCESS(useStatus) && validLoc!=baseLoc) { + bool useDTPG = false; + const char* baseReg = baseLoc.getCountry(); // empty string if no region + if ((baseReg[0]!=0 && uprv_strncmp(baseReg,validLoc.getCountry(),ULOC_COUNTRY_CAPACITY)!=0) + || uprv_strncmp(baseLoc.getLanguage(),validLoc.getLanguage(),ULOC_LANG_CAPACITY)!=0) { + // use DTPG if + // * baseLoc has a region and validLoc does not have the same one (or has none), OR + // * validLoc has a different language code than baseLoc + useDTPG = true; + } + if (useDTPG) { + // The standard time formats may have the wrong time cycle, because: + // the valid locale differs in important ways (region, language) from + // the base locale. + // We could *also* check whether they do actually have a mismatch with + // the time cycle preferences for the region, but that is a lot more + // work for little or no additional benefit, since just going ahead + // and always synthesizing the time format as per the following should + // create a locale-appropriate pattern with cycle that matches the + // region preferences anyway. + LocalPointer<DateTimePatternGenerator> dtpg(DateTimePatternGenerator::createInstanceNoStdPat(locale, useStatus)); + if (U_SUCCESS(useStatus)) { + UnicodeString timeSkeleton(TRUE, timeSkeletons[timeStyle], -1); + timePattern = dtpg->getBestPattern(timeSkeleton, useStatus); + } + } + } + } + } + // if the pattern should include both date and time information, use the date/time // pattern string as a guide to tell use how to glue together the appropriate date // and time pattern strings. if ((timeStyle != kNone) && (dateStyle != kNone)) { - currentBundle.adoptInstead( - ures_getByIndex(dateTimePatterns.getAlias(), (int32_t)timeStyle, NULL, &status)); - if (U_FAILURE(status)) { - status = U_INVALID_FORMAT_ERROR; - return; - } - switch (ures_getType(currentBundle.getAlias())) { - case URES_STRING: { - resStr = ures_getString(currentBundle.getAlias(), &resStrLen, &status); - break; - } - case URES_ARRAY: { - resStr = ures_getStringByIndex(currentBundle.getAlias(), 0, &resStrLen, &status); - ovrStr = ures_getStringByIndex(currentBundle.getAlias(), 1, &ovrStrLen, &status); - fTimeOverride.setTo(TRUE, ovrStr, ovrStrLen); - break; - } - default: { + UnicodeString tempus1(timePattern); + if (tempus1.length() == 0) { + currentBundle.adoptInstead( + ures_getByIndex(dateTimePatterns.getAlias(), (int32_t)timeStyle, NULL, &status)); + if (U_FAILURE(status)) { status = U_INVALID_FORMAT_ERROR; return; } - } + switch (ures_getType(currentBundle.getAlias())) { + case URES_STRING: { + resStr = ures_getString(currentBundle.getAlias(), &resStrLen, &status); + break; + } + case URES_ARRAY: { + resStr = ures_getStringByIndex(currentBundle.getAlias(), 0, &resStrLen, &status); + ovrStr = ures_getStringByIndex(currentBundle.getAlias(), 1, &ovrStrLen, &status); + fTimeOverride.setTo(TRUE, ovrStr, ovrStrLen); + break; + } + default: { + status = U_INVALID_FORMAT_ERROR; + return; + } + } - UnicodeString tempus1(TRUE, resStr, resStrLen); + tempus1.setTo(TRUE, resStr, resStrLen); + } currentBundle.adoptInstead( ures_getByIndex(dateTimePatterns.getAlias(), (int32_t)dateStyle, NULL, &status)); @@ -784,29 +857,32 @@ void SimpleDateFormat::construct(EStyle timeStyle, // pattern string from the resources // setTo() - see DateFormatSymbols::assignArray comments else if (timeStyle != kNone) { - currentBundle.adoptInstead( - ures_getByIndex(dateTimePatterns.getAlias(), (int32_t)timeStyle, NULL, &status)); - if (U_FAILURE(status)) { - status = U_INVALID_FORMAT_ERROR; - return; - } - switch (ures_getType(currentBundle.getAlias())) { - case URES_STRING: { - resStr = ures_getString(currentBundle.getAlias(), &resStrLen, &status); - break; - } - case URES_ARRAY: { - resStr = ures_getStringByIndex(currentBundle.getAlias(), 0, &resStrLen, &status); - ovrStr = ures_getStringByIndex(currentBundle.getAlias(), 1, &ovrStrLen, &status); - fDateOverride.setTo(TRUE, ovrStr, ovrStrLen); - break; - } - default: { + fPattern.setTo(timePattern); + if (fPattern.length() == 0) { + currentBundle.adoptInstead( + ures_getByIndex(dateTimePatterns.getAlias(), (int32_t)timeStyle, NULL, &status)); + if (U_FAILURE(status)) { status = U_INVALID_FORMAT_ERROR; return; } + switch (ures_getType(currentBundle.getAlias())) { + case URES_STRING: { + resStr = ures_getString(currentBundle.getAlias(), &resStrLen, &status); + break; + } + case URES_ARRAY: { + resStr = ures_getStringByIndex(currentBundle.getAlias(), 0, &resStrLen, &status); + ovrStr = ures_getStringByIndex(currentBundle.getAlias(), 1, &ovrStrLen, &status); + fDateOverride.setTo(TRUE, ovrStr, ovrStrLen); + break; + } + default: { + status = U_INVALID_FORMAT_ERROR; + return; + } + } + fPattern.setTo(TRUE, resStr, resStrLen); } - fPattern.setTo(TRUE, resStr, resStrLen); } else if (dateStyle != kNone) { currentBundle.adoptInstead( @@ -848,7 +924,8 @@ Calendar* SimpleDateFormat::initializeCalendar(TimeZone* adoptZone, const Locale& locale, UErrorCode& status) { if(!U_FAILURE(status)) { - fCalendar = Calendar::createInstance(adoptZone?adoptZone:TimeZone::createDefault(), locale, status); + fCalendar = Calendar::createInstance( + adoptZone ? adoptZone : TimeZone::forLocaleOrDefault(locale), locale, status); } return fCalendar; } @@ -1797,7 +1874,7 @@ SimpleDateFormat::subFormat(UnicodeString &appendTo, } } else { - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } } appendTo += zoneString; @@ -1805,7 +1882,10 @@ SimpleDateFormat::subFormat(UnicodeString &appendTo, break; case UDAT_QUARTER_FIELD: - if (count >= 4) + if (count >= 5) + _appendSymbol(appendTo, value/3, fSymbols->fNarrowQuarters, + fSymbols->fNarrowQuartersCount); + else if (count == 4) _appendSymbol(appendTo, value/3, fSymbols->fQuarters, fSymbols->fQuartersCount); else if (count == 3) @@ -1816,7 +1896,10 @@ SimpleDateFormat::subFormat(UnicodeString &appendTo, break; case UDAT_STANDALONE_QUARTER_FIELD: - if (count >= 4) + if (count >= 5) + _appendSymbol(appendTo, value/3, fSymbols->fStandaloneNarrowQuarters, + fSymbols->fStandaloneNarrowQuartersCount); + else if (count == 4) _appendSymbol(appendTo, value/3, fSymbols->fStandaloneQuarters, fSymbols->fStandaloneQuartersCount); else if (count == 3) @@ -2134,7 +2217,7 @@ SimpleDateFormat::zeroPaddingNumber( //---------------------------------------------------------------------- /** - * Return true if the given format character, occuring count + * Return true if the given format character, occurring count * times, represents a numeric field. */ UBool SimpleDateFormat::isNumeric(UChar formatChar, int32_t count) { @@ -2503,10 +2586,10 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& if (btz != NULL) { if (tzTimeType == UTZFMT_TIME_TYPE_STANDARD) { btz->getOffsetFromLocal(localMillis, - BasicTimeZone::kStandard, BasicTimeZone::kStandard, raw, dst, status); + UCAL_TZ_LOCAL_STANDARD_FORMER, UCAL_TZ_LOCAL_STANDARD_LATTER, raw, dst, status); } else { btz->getOffsetFromLocal(localMillis, - BasicTimeZone::kDaylight, BasicTimeZone::kDaylight, raw, dst, status); + UCAL_TZ_LOCAL_DAYLIGHT_FORMER, UCAL_TZ_LOCAL_DAYLIGHT_LATTER, raw, dst, status); } } else { // No good way to resolve ambiguous time at transition, @@ -2524,51 +2607,47 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& } else { // tztype == TZTYPE_DST if (dst == 0) { if (btz != NULL) { - UDate time = localMillis + raw; - // We use the nearest daylight saving time rule. - TimeZoneTransition beforeTrs, afterTrs; - UDate beforeT = time, afterT = time; - int32_t beforeSav = 0, afterSav = 0; - UBool beforeTrsAvail, afterTrsAvail; - - // Search for DST rule before or on the time - while (TRUE) { - beforeTrsAvail = btz->getPreviousTransition(beforeT, TRUE, beforeTrs); - if (!beforeTrsAvail) { + // This implementation resolves daylight saving time offset + // closest rule after the given time. + UDate baseTime = localMillis + raw; + UDate time = baseTime; + UDate limit = baseTime + MAX_DAYLIGHT_DETECTION_RANGE; + TimeZoneTransition trs; + UBool trsAvail; + + // Search for DST rule after the given time + while (time < limit) { + trsAvail = btz->getNextTransition(time, FALSE, trs); + if (!trsAvail) { break; } - beforeT = beforeTrs.getTime() - 1; - beforeSav = beforeTrs.getFrom()->getDSTSavings(); - if (beforeSav != 0) { + resolvedSavings = trs.getTo()->getDSTSavings(); + if (resolvedSavings != 0) { break; } + time = trs.getTime(); } - // Search for DST rule after the time - while (TRUE) { - afterTrsAvail = btz->getNextTransition(afterT, FALSE, afterTrs); - if (!afterTrsAvail) { - break; - } - afterT = afterTrs.getTime(); - afterSav = afterTrs.getTo()->getDSTSavings(); - if (afterSav != 0) { - break; + if (resolvedSavings == 0) { + // If no DST rule after the given time was found, search for + // DST rule before. + time = baseTime; + limit = baseTime - MAX_DAYLIGHT_DETECTION_RANGE; + while (time > limit) { + trsAvail = btz->getPreviousTransition(time, TRUE, trs); + if (!trsAvail) { + break; + } + resolvedSavings = trs.getFrom()->getDSTSavings(); + if (resolvedSavings != 0) { + break; + } + time = trs.getTime() - 1; } - } - if (beforeTrsAvail && afterTrsAvail) { - if (time - beforeT > afterT - time) { - resolvedSavings = afterSav; - } else { - resolvedSavings = beforeSav; + if (resolvedSavings == 0) { + resolvedSavings = btz->getDSTSavings(); } - } else if (beforeTrsAvail && beforeSav != 0) { - resolvedSavings = beforeSav; - } else if (afterTrsAvail && afterSav != 0) { - resolvedSavings = afterSav; - } else { - resolvedSavings = btz->getDSTSavings(); } } else { resolvedSavings = tz.getDSTSavings(); @@ -2777,7 +2856,7 @@ UBool SimpleDateFormat::matchLiterals(const UnicodeString &pattern, continue; // Do not update p. } } - // hack around oldleniency being a bit of a catch-all bucket and we're just adding support specifically for paritial matches + // hack around oldleniency being a bit of a catch-all bucket and we're just adding support specifically for partial matches if(partialMatchLenient && oldLeniency) { break; } @@ -3398,7 +3477,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC return pos.getIndex(); } else { // count >= 3 // i.e., QQQ or QQQQ - // Want to be able to parse both short and long forms. + // Want to be able to parse short, long, and narrow forms. // Try count == 4 first: int32_t newStart = 0; @@ -3412,6 +3491,11 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC fSymbols->fShortQuarters, fSymbols->fShortQuartersCount, cal)) > 0) return newStart; } + if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 5) { + if ((newStart = matchQuarterString(text, start, UCAL_MONTH, + fSymbols->fNarrowQuarters, fSymbols->fNarrowQuartersCount, cal)) > 0) + return newStart; + } if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status)) return newStart; // else we allowing parsing as number, below @@ -3444,6 +3528,11 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC fSymbols->fStandaloneShortQuarters, fSymbols->fStandaloneShortQuartersCount, cal)) > 0) return newStart; } + if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 5) { + if ((newStart = matchQuarterString(text, start, UCAL_MONTH, + fSymbols->fStandaloneNarrowQuarters, fSymbols->fStandaloneNarrowQuartersCount, cal)) > 0) + return newStart; + } if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status)) return newStart; // else we allowing parsing as number, below @@ -4254,19 +4343,10 @@ SimpleDateFormat::skipUWhiteSpace(const UnicodeString& text, int32_t pos) const // Lazy TimeZoneFormat instantiation, semantically const. TimeZoneFormat * SimpleDateFormat::tzFormat(UErrorCode &status) const { - if (fTimeZoneFormat == NULL) { - umtx_lock(&LOCK); - { - if (fTimeZoneFormat == NULL) { - TimeZoneFormat *tzfmt = TimeZoneFormat::createInstance(fLocale, status); - if (U_FAILURE(status)) { - return NULL; - } - - const_cast<SimpleDateFormat *>(this)->fTimeZoneFormat = tzfmt; - } - } - umtx_unlock(&LOCK); + Mutex m(&LOCK); + if (fTimeZoneFormat == nullptr && U_SUCCESS(status)) { + const_cast<SimpleDateFormat *>(this)->fTimeZoneFormat = + TimeZoneFormat::createInstance(fLocale, status); } return fTimeZoneFormat; } diff --git a/contrib/libs/icu/i18n/sortkey.cpp b/contrib/libs/icu/i18n/sortkey.cpp index 12289482ec..441d15d426 100644 --- a/contrib/libs/icu/i18n/sortkey.cpp +++ b/contrib/libs/icu/i18n/sortkey.cpp @@ -20,7 +20,7 @@ // // 6/20/97 helena Java class name change. // 6/23/97 helena Added comments to make code more readable. -// 6/26/98 erm Canged to use byte arrays instead of UnicodeString +// 6/26/98 erm Changed to use byte arrays instead of UnicodeString // 7/31/98 erm hashCode: minimum inc should be 2 not 1, // Cleaned up operator= // 07/12/99 helena HPUX 11 CC port. @@ -137,7 +137,7 @@ CollationKey::setToBogus() return *this; } -UBool +bool CollationKey::operator==(const CollationKey& source) const { return getLength() == source.getLength() && diff --git a/contrib/libs/icu/i18n/standardplural.cpp b/contrib/libs/icu/i18n/standardplural.cpp index 0391034b3e..5a6069bf7d 100644 --- a/contrib/libs/icu/i18n/standardplural.cpp +++ b/contrib/libs/icu/i18n/standardplural.cpp @@ -23,7 +23,7 @@ U_NAMESPACE_BEGIN static const char *gKeywords[StandardPlural::COUNT] = { - "zero", "one", "two", "few", "many", "other" + "zero", "one", "two", "few", "many", "other", "=0", "=1" }; const char *StandardPlural::getKeyword(Form p) { @@ -60,21 +60,55 @@ int32_t StandardPlural::indexOrNegativeFromString(const char *keyword) { return ZERO; } break; + case '=': + if (uprv_strcmp(keyword, "0") == 0) { + return EQ_0; + } else if (uprv_strcmp(keyword, "1") == 0) { + return EQ_1; + } + break; + // Also allow "0" and "1" + case '0': + if (*keyword == 0) { + return EQ_0; + } + break; + case '1': + if (*keyword == 0) { + return EQ_1; + } + break; default: break; } return -1; } -static const UChar gZero[] = { 0x7A, 0x65, 0x72, 0x6F }; -static const UChar gOne[] = { 0x6F, 0x6E, 0x65 }; -static const UChar gTwo[] = { 0x74, 0x77, 0x6F }; -static const UChar gFew[] = { 0x66, 0x65, 0x77 }; -static const UChar gMany[] = { 0x6D, 0x61, 0x6E, 0x79 }; -static const UChar gOther[] = { 0x6F, 0x74, 0x68, 0x65, 0x72 }; +static const UChar gZero[] = u"zero"; +static const UChar gOne[] = u"one"; +static const UChar gTwo[] = u"two"; +static const UChar gFew[] = u"few"; +static const UChar gMany[] = u"many"; +static const UChar gOther[] = u"other"; +static const UChar gEq0[] = u"=0"; +static const UChar gEq1[] = u"=1"; int32_t StandardPlural::indexOrNegativeFromString(const UnicodeString &keyword) { switch (keyword.length()) { + case 1: + if (keyword.charAt(0) == '0') { + return EQ_0; + } else if (keyword.charAt(0) == '1') { + return EQ_1; + } + break; + case 2: + if (keyword.compare(gEq0, 2) == 0) { + return EQ_0; + } else if (keyword.compare(gEq1, 2) == 0) { + return EQ_1; + } + break; case 3: if (keyword.compare(gOne, 3) == 0) { return ONE; diff --git a/contrib/libs/icu/i18n/standardplural.h b/contrib/libs/icu/i18n/standardplural.h index 33e1d605f6..16593065c8 100644 --- a/contrib/libs/icu/i18n/standardplural.h +++ b/contrib/libs/icu/i18n/standardplural.h @@ -35,6 +35,8 @@ public: FEW, MANY, OTHER, + EQ_0, + EQ_1, COUNT }; diff --git a/contrib/libs/icu/i18n/strmatch.h b/contrib/libs/icu/i18n/strmatch.h index 71ae984951..6d2e392e65 100644 --- a/contrib/libs/icu/i18n/strmatch.h +++ b/contrib/libs/icu/i18n/strmatch.h @@ -78,21 +78,21 @@ class StringMatcher : public UnicodeFunctor, public UnicodeMatcher, public Unico * Implement UnicodeFunctor * @return a copy of the object. */ - virtual StringMatcher* clone() const; + virtual StringMatcher* clone() const override; /** * UnicodeFunctor API. Cast 'this' to a UnicodeMatcher* pointer * and return the pointer. * @return the UnicodeMatcher point. */ - virtual UnicodeMatcher* toMatcher() const; + virtual UnicodeMatcher* toMatcher() const override; /** * UnicodeFunctor API. Cast 'this' to a UnicodeReplacer* pointer * and return the pointer. * @return the UnicodeReplacer pointer. */ - virtual UnicodeReplacer* toReplacer() const; + virtual UnicodeReplacer* toReplacer() const override; /** * Implement UnicodeMatcher @@ -109,17 +109,17 @@ class StringMatcher : public UnicodeFunctor, public UnicodeMatcher, public Unico * considered for matching will be text.charAt(limit-1) in the * forward direction or text.charAt(limit+1) in the backward * direction. - * @param incremental if TRUE, then assume further characters may + * @param incremental if true, then assume further characters may * be inserted at limit and check for partial matching. Otherwise * assume the text as given is complete. * @return a match degree value indicating a full match, a partial - * match, or a mismatch. If incremental is FALSE then + * match, or a mismatch. If incremental is false then * U_PARTIAL_MATCH should never be returned. */ virtual UMatchDegree matches(const Replaceable& text, int32_t& offset, int32_t limit, - UBool incremental); + UBool incremental) override; /** * Implement UnicodeMatcher @@ -128,29 +128,29 @@ class StringMatcher : public UnicodeFunctor, public UnicodeMatcher, public Unico * @return A reference to 'result'. */ virtual UnicodeString& toPattern(UnicodeString& result, - UBool escapeUnprintable = FALSE) const; + UBool escapeUnprintable = false) const override; /** * Implement UnicodeMatcher - * Returns TRUE if this matcher will match a character c, where c + * Returns true if this matcher will match a character c, where c * & 0xFF == v, at offset, in the forward direction (with limit > * offset). This is used by <tt>RuleBasedTransliterator</tt> for * indexing. * @param v the given value - * @return TRUE if this matcher will match a character c, + * @return true if this matcher will match a character c, * where c & 0xFF == v */ - virtual UBool matchesIndexValue(uint8_t v) const; + virtual UBool matchesIndexValue(uint8_t v) const override; /** * Implement UnicodeMatcher */ - virtual void addMatchSetTo(UnicodeSet& toUnionTo) const; + virtual void addMatchSetTo(UnicodeSet& toUnionTo) const override; /** * Implement UnicodeFunctor */ - virtual void setData(const TransliterationRuleData*); + virtual void setData(const TransliterationRuleData*) override; /** * Replace characters in 'text' from 'start' to 'limit' with the @@ -172,7 +172,7 @@ class StringMatcher : public UnicodeFunctor, public UnicodeMatcher, public Unico virtual int32_t replace(Replaceable& text, int32_t start, int32_t limit, - int32_t& cursor); + int32_t& cursor) override; /** * Returns a string representation of this replacer. If the @@ -181,14 +181,14 @@ class StringMatcher : public UnicodeFunctor, public UnicodeMatcher, public Unico * replacer that is equal to this one. * @param result the string to receive the pattern. Previous * contents will be deleted. - * @param escapeUnprintable if TRUE then convert unprintable + * @param escapeUnprintable if true then convert unprintable * character to their hex escape representations, \\uxxxx or * \\Uxxxxxxxx. Unprintable characters are defined by * Utility.isUnprintable(). * @return a reference to 'result'. */ virtual UnicodeString& toReplacerPattern(UnicodeString& result, - UBool escapeUnprintable) const; + UBool escapeUnprintable) const override; /** * Remove any match data. This must be called before performing a @@ -199,7 +199,7 @@ class StringMatcher : public UnicodeFunctor, public UnicodeMatcher, public Unico /** * ICU "poor man's RTTI", returns a UClassID for the actual class. */ - virtual UClassID getDynamicClassID() const; + virtual UClassID getDynamicClassID() const override; /** * ICU "poor man's RTTI", returns a UClassID for this class. @@ -211,7 +211,7 @@ class StringMatcher : public UnicodeFunctor, public UnicodeMatcher, public Unico * into the given set. * @param toUnionTo the set into which to union the output characters */ - virtual void addReplacementSetTo(UnicodeSet& toUnionTo) const; + virtual void addReplacementSetTo(UnicodeSet& toUnionTo) const override; private: diff --git a/contrib/libs/icu/i18n/strrepl.h b/contrib/libs/icu/i18n/strrepl.h index 7f74d0d945..8063626a33 100644 --- a/contrib/libs/icu/i18n/strrepl.h +++ b/contrib/libs/icu/i18n/strrepl.h @@ -111,13 +111,13 @@ class StringReplacer : public UnicodeFunctor, public UnicodeReplacer { /** * Implement UnicodeFunctor */ - virtual StringReplacer* clone() const; + virtual StringReplacer* clone() const override; /** * UnicodeFunctor API. Cast 'this' to a UnicodeReplacer* pointer * and return the pointer. */ - virtual UnicodeReplacer* toReplacer() const; + virtual UnicodeReplacer* toReplacer() const override; /** * UnicodeReplacer API @@ -125,23 +125,23 @@ class StringReplacer : public UnicodeFunctor, public UnicodeReplacer { virtual int32_t replace(Replaceable& text, int32_t start, int32_t limit, - int32_t& cursor); + int32_t& cursor) override; /** * UnicodeReplacer API */ virtual UnicodeString& toReplacerPattern(UnicodeString& result, - UBool escapeUnprintable) const; + UBool escapeUnprintable) const override; /** * Implement UnicodeReplacer */ - virtual void addReplacementSetTo(UnicodeSet& toUnionTo) const; + virtual void addReplacementSetTo(UnicodeSet& toUnionTo) const override; /** * UnicodeFunctor API */ - virtual void setData(const TransliterationRuleData*); + virtual void setData(const TransliterationRuleData*) override; /** * ICU "poor man's RTTI", returns a UClassID for this class. @@ -151,7 +151,7 @@ class StringReplacer : public UnicodeFunctor, public UnicodeReplacer { /** * ICU "poor man's RTTI", returns a UClassID for the actual class. */ - virtual UClassID getDynamicClassID() const; + virtual UClassID getDynamicClassID() const override; }; U_NAMESPACE_END diff --git a/contrib/libs/icu/i18n/stsearch.cpp b/contrib/libs/icu/i18n/stsearch.cpp index 3e6ed4648b..1bade8fbd1 100644 --- a/contrib/libs/icu/i18n/stsearch.cpp +++ b/contrib/libs/icu/i18n/stsearch.cpp @@ -184,7 +184,7 @@ StringSearch::clone() const { // operator overloading --------------------------------------------- StringSearch & StringSearch::operator=(const StringSearch &that) { - if ((*this) != that) { + if (this != &that) { UErrorCode status = U_ZERO_ERROR; m_text_ = that.m_text_; m_breakiterator_ = that.m_breakiterator_; @@ -205,17 +205,17 @@ StringSearch & StringSearch::operator=(const StringSearch &that) return *this; } -UBool StringSearch::operator==(const SearchIterator &that) const +bool StringSearch::operator==(const SearchIterator &that) const { if (this == &that) { - return TRUE; + return true; } if (SearchIterator::operator ==(that)) { StringSearch &thatsrch = (StringSearch &)that; return (this->m_pattern_ == thatsrch.m_pattern_ && this->m_strsrch_->collator == thatsrch.m_strsrch_->collator); } - return FALSE; + return false; } // public get and set methods ---------------------------------------- @@ -335,7 +335,7 @@ int32_t StringSearch::handleNext(int32_t position, UErrorCode &status) // the flipping direction issue has already been handled // in next() // for boundary check purposes. this will ensure that the - // next match will not preceed the current offset + // next match will not precede the current offset // note search->matchedIndex will always be set to something // in the code m_search_->matchedIndex = position - 1; diff --git a/contrib/libs/icu/i18n/taiwncal.h b/contrib/libs/icu/i18n/taiwncal.h index 01d4d31e80..ab6b6aff09 100644 --- a/contrib/libs/icu/i18n/taiwncal.h +++ b/contrib/libs/icu/i18n/taiwncal.h @@ -91,7 +91,7 @@ public: * @return return a polymorphic copy of this calendar. * @internal */ - virtual TaiwanCalendar* clone() const; + virtual TaiwanCalendar* clone() const override; public: /** @@ -104,7 +104,7 @@ public: * same class ID. Objects of other classes have different class IDs. * @internal */ - virtual UClassID getDynamicClassID(void) const; + virtual UClassID getDynamicClassID(void) const override; /** * Return the class ID for this class. This is useful only for comparing to a return @@ -125,7 +125,7 @@ public: * @return calendar type * @internal */ - virtual const char * getType() const; + virtual const char * getType() const override; private: TaiwanCalendar(); // default constructor not implemented @@ -139,13 +139,13 @@ private: * @return the extended year * @internal */ - virtual int32_t handleGetExtendedYear(); + virtual int32_t handleGetExtendedYear() override; /** * Subclasses may override this method to compute several fields * specific to each calendar system. * @internal */ - virtual void handleComputeFields(int32_t julianDay, UErrorCode& status); + virtual void handleComputeFields(int32_t julianDay, UErrorCode& status) override; /** * Subclass API for defining limits of different types. * @param field one of the field numbers @@ -153,26 +153,26 @@ private: * <code>LEAST_MAXIMUM</code>, or <code>MAXIMUM</code> * @internal */ - virtual int32_t handleGetLimit(UCalendarDateFields field, ELimitType limitType) const; + virtual int32_t handleGetLimit(UCalendarDateFields field, ELimitType limitType) const override; /** - * Returns TRUE because the Taiwan Calendar does have a default century + * Returns true because the Taiwan Calendar does have a default century * @internal */ - virtual UBool haveDefaultCentury() const; + virtual UBool haveDefaultCentury() const override; /** * Returns the date of the start of the default century * @return start of century - in milliseconds since epoch, 1970 * @internal */ - virtual UDate defaultCenturyStart() const; + virtual UDate defaultCenturyStart() const override; /** * Returns the year in which the default century begins * @internal */ - virtual int32_t defaultCenturyStartYear() const; + virtual int32_t defaultCenturyStartYear() const override; }; U_NAMESPACE_END diff --git a/contrib/libs/icu/i18n/timezone.cpp b/contrib/libs/icu/i18n/timezone.cpp index 284334ebf7..8115a45e0a 100644 --- a/contrib/libs/icu/i18n/timezone.cpp +++ b/contrib/libs/icu/i18n/timezone.cpp @@ -311,7 +311,7 @@ void U_CALLCONV initStaticTimeZones() { // be valid even if we can't load the time zone UDataMemory. ucln_i18n_registerCleanup(UCLN_I18N_TIMEZONE, timeZone_cleanup); - // new can't fail below, as we use placement new into staticly allocated space. + // new can't fail below, as we use placement new into statically allocated space. new(gRawGMT) SimpleTimeZone(0, UnicodeString(TRUE, GMT_ID, GMT_ID_LENGTH)); new(gRawUNKNOWN) SimpleTimeZone(0, UnicodeString(TRUE, UNKNOWN_ZONE_ID, UNKNOWN_ZONE_ID_LENGTH)); @@ -376,7 +376,7 @@ TimeZone::operator=(const TimeZone &right) // ------------------------------------- -UBool +bool TimeZone::operator==(const TimeZone& that) const { return typeid(*this) == typeid(that) && @@ -445,7 +445,7 @@ TimeZone::createTimeZone(const UnicodeString& ID) if (result == NULL) { U_DEBUG_TZ_MSG(("failed to load time zone with id - falling to Etc/Unknown(GMT)")); const TimeZone& unknown = getUnknown(); - // Unknown zone uses staticly allocated memory, so creation of it can never fail due to OOM. + // Unknown zone uses statically allocated memory, so creation of it can never fail due to OOM. result = unknown.clone(); } return result; @@ -579,6 +579,24 @@ TimeZone::createDefault() // ------------------------------------- +TimeZone* U_EXPORT2 +TimeZone::forLocaleOrDefault(const Locale& locale) +{ + char buffer[ULOC_KEYWORDS_CAPACITY] = ""; + UErrorCode localStatus = U_ZERO_ERROR; + int32_t count = locale.getKeywordValue("timezone", buffer, sizeof(buffer), localStatus); + if (U_FAILURE(localStatus) || localStatus == U_STRING_NOT_TERMINATED_WARNING) { + // the "timezone" keyword exceeds ULOC_KEYWORDS_CAPACITY; ignore and use default. + count = 0; + } + if (count > 0) { + return TimeZone::createTimeZone(UnicodeString(buffer, count, US_INV)); + } + return TimeZone::createDefault(); +} + +// ------------------------------------- + void U_EXPORT2 TimeZone::adoptDefault(TimeZone* zone) { @@ -933,15 +951,15 @@ public: virtual ~TZEnumeration(); - virtual StringEnumeration *clone() const { + virtual StringEnumeration *clone() const override { return new TZEnumeration(*this); } - virtual int32_t count(UErrorCode& status) const { + virtual int32_t count(UErrorCode& status) const override { return U_FAILURE(status) ? 0 : len; } - virtual const UnicodeString* snext(UErrorCode& status) { + virtual const UnicodeString* snext(UErrorCode& status) override { if (U_SUCCESS(status) && map != NULL && pos < len) { getID(map[pos], status); ++pos; @@ -950,13 +968,13 @@ public: return 0; } - virtual void reset(UErrorCode& /*status*/) { + virtual void reset(UErrorCode& /*status*/) override { pos = 0; } public: static UClassID U_EXPORT2 getStaticClassID(void); - virtual UClassID getDynamicClassID(void) const; + virtual UClassID getDynamicClassID(void) const override; }; TZEnumeration::~TZEnumeration() { @@ -977,21 +995,40 @@ TimeZone::createTimeZoneIDEnumeration( } StringEnumeration* U_EXPORT2 +TimeZone::createEnumeration(UErrorCode& status) { + return TZEnumeration::create(UCAL_ZONE_TYPE_ANY, NULL, NULL, status); +} + +StringEnumeration* U_EXPORT2 +TimeZone::createEnumerationForRawOffset(int32_t rawOffset, UErrorCode& status) { + return TZEnumeration::create(UCAL_ZONE_TYPE_ANY, NULL, &rawOffset, status); +} + +StringEnumeration* U_EXPORT2 +TimeZone::createEnumerationForRegion(const char* region, UErrorCode& status) { + return TZEnumeration::create(UCAL_ZONE_TYPE_ANY, region, NULL, status); +} + +// +// Next 3 methods are equivalent to above, but ignores UErrorCode. +// These methods were deprecated in ICU 70. + +StringEnumeration* U_EXPORT2 TimeZone::createEnumeration() { UErrorCode ec = U_ZERO_ERROR; - return TZEnumeration::create(UCAL_ZONE_TYPE_ANY, NULL, NULL, ec); + return createEnumeration(ec); } StringEnumeration* U_EXPORT2 TimeZone::createEnumeration(int32_t rawOffset) { UErrorCode ec = U_ZERO_ERROR; - return TZEnumeration::create(UCAL_ZONE_TYPE_ANY, NULL, &rawOffset, ec); + return createEnumerationForRawOffset(rawOffset, ec); } StringEnumeration* U_EXPORT2 -TimeZone::createEnumeration(const char* country) { +TimeZone::createEnumeration(const char* region) { UErrorCode ec = U_ZERO_ERROR; - return TZEnumeration::create(UCAL_ZONE_TYPE_ANY, country, NULL, ec); + return createEnumerationForRegion(region, ec); } // --------------------------------------- @@ -1226,7 +1263,7 @@ TimeZone::getDisplayName(UBool inDaylight, EDisplayType style, const Locale& loc tzfmt->format(UTZFMT_STYLE_GENERIC_SHORT, *this, date, result, &timeType); break; default: - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } // Generic format many use Localized GMT as the final fallback. // When Localized GMT format is used, the result might not be @@ -1254,7 +1291,7 @@ TimeZone::getDisplayName(UBool inDaylight, EDisplayType style, const Locale& loc tzfmt->formatOffsetISO8601Basic(offset, FALSE, FALSE, FALSE, result, status); break; default: - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } } else { @@ -1269,7 +1306,7 @@ TimeZone::getDisplayName(UBool inDaylight, EDisplayType style, const Locale& loc nameType = inDaylight ? UTZNM_SHORT_DAYLIGHT : UTZNM_SHORT_STANDARD; break; default: - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } LocalPointer<TimeZoneNames> tznames(TimeZoneNames::createInstance(locale, status)); if (U_FAILURE(status)) { @@ -1660,7 +1697,7 @@ TimeZone::getIDForWindowsID(const UnicodeString& winid, const char* region, Unic winidKey[winKeyLen] = 0; ures_getByKey(zones, winidKey, zones, &tmperr); // use tmperr, because windows mapping might not - // be avaiable by design + // be available by design if (U_FAILURE(tmperr)) { ures_close(zones); return id; @@ -1673,7 +1710,7 @@ TimeZone::getIDForWindowsID(const UnicodeString& winid, const char* region, Unic const UChar *tzids = ures_getStringByKey(zones, region, &len, &tmperr); // use tmperr, because // regional mapping is optional if (U_SUCCESS(tmperr)) { - // first ID delimited by space is the defasult one + // first ID delimited by space is the default one const UChar *end = u_strchr(tzids, (UChar)0x20); if (end == NULL) { id.setTo(tzids, -1); diff --git a/contrib/libs/icu/i18n/titletrn.h b/contrib/libs/icu/i18n/titletrn.h index 4e45ac6f81..8409519818 100644 --- a/contrib/libs/icu/i18n/titletrn.h +++ b/contrib/libs/icu/i18n/titletrn.h @@ -52,12 +52,12 @@ class TitlecaseTransliterator : public CaseMapTransliterator { * Transliterator API. * @return a copy of the object. */ - virtual TitlecaseTransliterator* clone() const; + virtual TitlecaseTransliterator* clone() const override; /** * ICU "poor man's RTTI", returns a UClassID for the actual class. */ - virtual UClassID getDynamicClassID() const; + virtual UClassID getDynamicClassID() const override; /** * ICU "poor man's RTTI", returns a UClassID for this class. @@ -76,7 +76,7 @@ protected: * pos.contextLimit. Otherwise, assume the text is complete. */ virtual void handleTransliterate(Replaceable& text, UTransPosition& offset, - UBool isIncremental) const; + UBool isIncremental) const override; private: /** diff --git a/contrib/libs/icu/i18n/tmunit.cpp b/contrib/libs/icu/i18n/tmunit.cpp index 910489b178..361aecb92e 100644 --- a/contrib/libs/icu/i18n/tmunit.cpp +++ b/contrib/libs/icu/i18n/tmunit.cpp @@ -94,7 +94,7 @@ TimeUnit::TimeUnit(TimeUnit::UTimeUnitFields timeUnitField) { initTime("second"); break; default: - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } } diff --git a/contrib/libs/icu/i18n/tmutamt.cpp b/contrib/libs/icu/i18n/tmutamt.cpp index 25464693ea..2753c29374 100644 --- a/contrib/libs/icu/i18n/tmutamt.cpp +++ b/contrib/libs/icu/i18n/tmutamt.cpp @@ -45,7 +45,7 @@ TimeUnitAmount::operator=(const TimeUnitAmount& other) { } -UBool +bool TimeUnitAmount::operator==(const UObject& other) const { return Measure::operator==(other); } diff --git a/contrib/libs/icu/i18n/tmutfmt.cpp b/contrib/libs/icu/i18n/tmutfmt.cpp index 231ea5799c..057bb634eb 100644 --- a/contrib/libs/icu/i18n/tmutfmt.cpp +++ b/contrib/libs/icu/i18n/tmutfmt.cpp @@ -327,7 +327,7 @@ TimeUnitFormat::setup(UErrorCode& err) { } UnicodeString* pluralCount; while ((pluralCount = const_cast<UnicodeString*>(keywords->snext(err))) != NULL) { - pluralCounts.addElement(pluralCount, err); + pluralCounts.addElementX(pluralCount, err); } readFromCurrentLocale(UTMUTFMT_FULL_STYLE, gUnitsTag, pluralCounts, err); checkConsistency(UTMUTFMT_FULL_STYLE, gUnitsTag, err); @@ -362,7 +362,7 @@ struct TimeUnitFormatReadSink : public ResourceSink { virtual ~TimeUnitFormatReadSink(); - virtual void put(const char *key, ResourceValue &value, UBool, UErrorCode &errorCode) { + virtual void put(const char *key, ResourceValue &value, UBool, UErrorCode &errorCode) override { // Skip all put() calls except the first one -- discard all fallback data. if (beenHere) { return; diff --git a/contrib/libs/icu/i18n/tolowtrn.h b/contrib/libs/icu/i18n/tolowtrn.h index 2fbfb90e1b..951128ec77 100644 --- a/contrib/libs/icu/i18n/tolowtrn.h +++ b/contrib/libs/icu/i18n/tolowtrn.h @@ -50,12 +50,12 @@ class LowercaseTransliterator : public CaseMapTransliterator { * Transliterator API. * @return a copy of the object. */ - virtual LowercaseTransliterator* clone() const; + virtual LowercaseTransliterator* clone() const override; /** * ICU "poor man's RTTI", returns a UClassID for the actual class. */ - virtual UClassID getDynamicClassID() const; + virtual UClassID getDynamicClassID() const override; /** * ICU "poor man's RTTI", returns a UClassID for this class. diff --git a/contrib/libs/icu/i18n/toupptrn.h b/contrib/libs/icu/i18n/toupptrn.h index e96ca8f0ba..755e9edfcb 100644 --- a/contrib/libs/icu/i18n/toupptrn.h +++ b/contrib/libs/icu/i18n/toupptrn.h @@ -50,12 +50,12 @@ class UppercaseTransliterator : public CaseMapTransliterator { * Transliterator API. * @return a copy of the object. */ - virtual UppercaseTransliterator* clone() const; + virtual UppercaseTransliterator* clone() const override; /** * ICU "poor man's RTTI", returns a UClassID for the actual class. */ - virtual UClassID getDynamicClassID() const; + virtual UClassID getDynamicClassID() const override; /** * ICU "poor man's RTTI", returns a UClassID for this class. diff --git a/contrib/libs/icu/i18n/translit.cpp b/contrib/libs/icu/i18n/translit.cpp index ef44f42aa6..c7d6b51057 100644 --- a/contrib/libs/icu/i18n/translit.cpp +++ b/contrib/libs/icu/i18n/translit.cpp @@ -170,6 +170,7 @@ Transliterator* Transliterator::clone() const { * Assignment operator. */ Transliterator& Transliterator::operator=(const Transliterator& other) { + if (this == &other) { return *this; } // self-assignment: no-op ID = other.ID; // NUL-terminate the ID string ID.getTerminatedBuffer(); @@ -1092,6 +1093,8 @@ Transliterator::createFromRules(const UnicodeString& ID, } else { UVector transliterators(status); + // TODO ICU-21701 missing U_FAILURE check here. + // Error and nullptr checking through this whole block looks suspect. int32_t passNumber = 1; int32_t limit = parser.idBlockVector.size(); @@ -1107,10 +1110,15 @@ Transliterator::createFromRules(const UnicodeString& ID, delete temp; return nullptr; } - if (temp != NULL && typeid(*temp) != typeid(NullTransliterator)) + if (temp != NULL && typeid(*temp) != typeid(NullTransliterator)) { transliterators.addElement(temp, status); - else + if (U_FAILURE(status)) { + delete temp; + return nullptr; + } + } else { delete temp; + } } } if (!parser.dataVector.isEmpty()) { @@ -1126,6 +1134,13 @@ Transliterator::createFromRules(const UnicodeString& ID, return t; } transliterators.addElement(temprbt, status); + if (U_FAILURE(status)) { + delete temprbt; + return t; + } + // TODO: ICU-21701 the transliterators vector will leak its contents if anything goes wrong. + // Under normal operation, the CompoundTransliterator constructor adopts the + // the contents of the vector. } } diff --git a/contrib/libs/icu/i18n/transreg.cpp b/contrib/libs/icu/i18n/transreg.cpp index c412a20079..726ad56f0f 100644 --- a/contrib/libs/icu/i18n/transreg.cpp +++ b/contrib/libs/icu/i18n/transreg.cpp @@ -154,22 +154,23 @@ Transliterator* TransliteratorAlias::create(UParseError& pe, pos = aliasesOrRules.indexOf(noIDBlock, pos + 1); } - UVector transliterators(ec); + UVector transliterators(uprv_deleteUObject, nullptr, ec); UnicodeString idBlock; int32_t blockSeparatorPos = aliasesOrRules.indexOf((UChar)(0xffff)); while (blockSeparatorPos >= 0) { aliasesOrRules.extract(0, blockSeparatorPos, idBlock); aliasesOrRules.remove(0, blockSeparatorPos + 1); if (!idBlock.isEmpty()) - transliterators.addElement(Transliterator::createInstance(idBlock, UTRANS_FORWARD, pe, ec), ec); + transliterators.adoptElement(Transliterator::createInstance(idBlock, UTRANS_FORWARD, pe, ec), ec); if (!transes->isEmpty()) - transliterators.addElement(transes->orphanElementAt(0), ec); + transliterators.adoptElement(transes->orphanElementAt(0), ec); blockSeparatorPos = aliasesOrRules.indexOf((UChar)(0xffff)); } if (!aliasesOrRules.isEmpty()) - transliterators.addElement(Transliterator::createInstance(aliasesOrRules, UTRANS_FORWARD, pe, ec), ec); + transliterators.adoptElement(Transliterator::createInstance(aliasesOrRules, UTRANS_FORWARD, pe, ec), ec); while (!transes->isEmpty()) - transliterators.addElement(transes->orphanElementAt(0), ec); + transliterators.adoptElement(transes->orphanElementAt(0), ec); + transliterators.setDeleter(nullptr); if (U_SUCCESS(ec)) { t = new CompoundTransliterator(ID, transliterators, @@ -186,7 +187,7 @@ Transliterator* TransliteratorAlias::create(UParseError& pe, } break; case RULES: - UPRV_UNREACHABLE; // don't call create() if isRuleBased() returns TRUE! + UPRV_UNREACHABLE_EXIT; // don't call create() if isRuleBased() returns TRUE! } return t; } @@ -543,7 +544,7 @@ TransliteratorRegistry::TransliteratorRegistry(UErrorCode& status) : variantList.setComparer(uhash_compareCaselessUnicodeString); UnicodeString *emptyString = new UnicodeString(); if (emptyString != NULL) { - variantList.addElement(emptyString, status); + variantList.adoptElement(emptyString, status); } availableIDs.setDeleter(uprv_deleteUObject); availableIDs.setComparer(uhash_compareCaselessUnicodeString); @@ -611,6 +612,8 @@ Transliterator* TransliteratorRegistry::reget(const UnicodeString& ID, entry->entryType = TransliteratorEntry::COMPOUND_RBT; entry->compoundFilter = parser.orphanCompoundFilter(); entry->u.dataVector = new UVector(status); + // TODO ICU-21701: missing check for nullptr and failed status. + // Unclear how best to bail out. entry->stringArg.remove(); int32_t limit = parser.idBlockVector.size(); @@ -626,6 +629,9 @@ Transliterator* TransliteratorRegistry::reget(const UnicodeString& ID, if (!parser.dataVector.isEmpty()) { TransliterationRuleData* data = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0); entry->u.dataVector->addElement(data, status); + if (U_FAILURE(status)) { + delete data; + } entry->stringArg += (UChar)0xffff; // use U+FFFF to mark position of RBTs in ID block } } @@ -951,7 +957,7 @@ void TransliteratorRegistry::registerEntry(const UnicodeString& ID, if (newID != NULL) { // NUL-terminate the ID string newID->getTerminatedBuffer(); - availableIDs.addElement(newID, status); + availableIDs.adoptElement(newID, status); } } } else { @@ -992,7 +998,7 @@ void TransliteratorRegistry::registerSTV(const UnicodeString& source, } UnicodeString *variantEntry = new UnicodeString(variant); if (variantEntry != NULL) { - variantList.addElement(variantEntry, status); + variantList.adoptElement(variantEntry, status); if (U_SUCCESS(status)) { variantListIndex = variantList.size() - 1; } @@ -1320,7 +1326,7 @@ Transliterator* TransliteratorRegistry::instantiateEntry(const UnicodeString& ID return t; case TransliteratorEntry::COMPOUND_RBT: { - UVector* rbts = new UVector(entry->u.dataVector->size(), status); + UVector* rbts = new UVector(uprv_deleteUObject, nullptr, entry->u.dataVector->size(), status); // Check for null pointer if (rbts == NULL) { status = U_MEMORY_ALLOCATION_ERROR; @@ -1334,12 +1340,13 @@ Transliterator* TransliteratorRegistry::instantiateEntry(const UnicodeString& ID if (tl == 0) status = U_MEMORY_ALLOCATION_ERROR; else - rbts->addElement(tl, status); + rbts->adoptElement(tl, status); } if (U_FAILURE(status)) { delete rbts; return 0; } + rbts->setDeleter(nullptr); aliasReturn = new TransliteratorAlias(ID, entry->stringArg, rbts, entry->compoundFilter); } if (aliasReturn == 0) { @@ -1395,7 +1402,7 @@ Transliterator* TransliteratorRegistry::instantiateEntry(const UnicodeString& ID } return 0; default: - UPRV_UNREACHABLE; // can't get here + UPRV_UNREACHABLE_EXIT; // can't get here } } U_NAMESPACE_END diff --git a/contrib/libs/icu/i18n/transreg.h b/contrib/libs/icu/i18n/transreg.h index 041244e1b0..686e62ae33 100644 --- a/contrib/libs/icu/i18n/transreg.h +++ b/contrib/libs/icu/i18n/transreg.h @@ -69,7 +69,7 @@ class TransliteratorAlias : public UMemory { * it when the registry mutex is NOT held, to prevent deadlock. * It may only be called once. * - * Note: Only call create() if isRuleBased() returns FALSE. + * Note: Only call create() if isRuleBased() returns false. * * This method must be called *outside* of the TransliteratorRegistry * mutex. @@ -77,17 +77,17 @@ class TransliteratorAlias : public UMemory { Transliterator* create(UParseError&, UErrorCode&); /** - * Return TRUE if this alias is rule-based. If so, the caller + * Return true if this alias is rule-based. If so, the caller * must call parse() on it, then call TransliteratorRegistry::reget(). */ UBool isRuleBased() const; /** - * If isRuleBased() returns TRUE, then the caller must call this + * If isRuleBased() returns true, then the caller must call this * method, followed by TransliteratorRegistry::reget(). The latter * method must be called inside the TransliteratorRegistry mutex. * - * Note: Only call parse() if isRuleBased() returns TRUE. + * Note: Only call parse() if isRuleBased() returns true. * * This method must be called *outside* of the TransliteratorRegistry * mutex, because it can instantiate Transliterators embedded in @@ -103,7 +103,7 @@ class TransliteratorAlias : public UMemory { // null, zero, empty. // 2. CompoundRBT // Here ID is the ID, aliasID is the idBlock, trans is the - // contained RBT, and idSplitPoint is the offet in aliasID + // contained RBT, and idSplitPoint is the offset in aliasID // where the contained RBT goes. compoundFilter is the // compound filter, and it is _not_ owned. // 3. Rules @@ -144,7 +144,7 @@ class TransliteratorRegistry : public UMemory { public: /** - * Contructor + * Constructor * @param status Output param set to success/failure code. */ TransliteratorRegistry(UErrorCode& status); @@ -170,7 +170,7 @@ class TransliteratorRegistry : public UMemory { * @param ID the given ID * @param aliasReturn output param to receive TransliteratorAlias; * should be NULL on entry - * @param parseError Struct to recieve information on position + * @param parseError Struct to receive information on position * of error if an error is encountered * @param status Output param set to success/failure code. */ @@ -293,7 +293,7 @@ class TransliteratorRegistry : public UMemory { * Return a registered source specifier. * @param index which specifier to return, from 0 to n-1, where * n = countAvailableSources() - * @param result fill-in paramter to receive the source specifier. + * @param result fill-in parameter to receive the source specifier. * If index is out of range, result will be empty. * @return reference to result */ @@ -314,7 +314,7 @@ class TransliteratorRegistry : public UMemory { * @param index which specifier to return, from 0 to n-1, where * n = countAvailableTargets(source) * @param source the source specifier - * @param result fill-in paramter to receive the target specifier. + * @param result fill-in parameter to receive the target specifier. * If source is invalid or if index is out of range, result will * be empty. * @return reference to result @@ -345,7 +345,7 @@ class TransliteratorRegistry : public UMemory { * n = countAvailableVariants(source, target) * @param source the source specifier * @param target the target specifier - * @param result fill-in paramter to receive the variant + * @param result fill-in parameter to receive the variant * specifier. If source is invalid or if target is invalid or if * index is out of range, result will be empty. * @return reference to result @@ -417,11 +417,11 @@ class TransliteratorRegistry : public UMemory { public: Enumeration(const TransliteratorRegistry& reg); virtual ~Enumeration(); - virtual int32_t count(UErrorCode& status) const; - virtual const UnicodeString* snext(UErrorCode& status); - virtual void reset(UErrorCode& status); + virtual int32_t count(UErrorCode& status) const override; + virtual const UnicodeString* snext(UErrorCode& status) override; + virtual void reset(UErrorCode& status) override; static UClassID U_EXPORT2 getStaticClassID(); - virtual UClassID getDynamicClassID() const; + virtual UClassID getDynamicClassID() const override; private: int32_t index; const TransliteratorRegistry& reg; diff --git a/contrib/libs/icu/i18n/tridpars.cpp b/contrib/libs/icu/i18n/tridpars.cpp index 65bfc88063..0ca168e7a3 100644 --- a/contrib/libs/icu/i18n/tridpars.cpp +++ b/contrib/libs/icu/i18n/tridpars.cpp @@ -364,6 +364,8 @@ UBool TransliteratorIDParser::parseCompoundID(const UnicodeString& id, int32_t d int32_t pos = 0; int32_t withParens = 1; list.removeAllElements(); + UObjectDeleter *save = list.setDeleter(_deleteSingleID); + UnicodeSet* filter; globalFilter = NULL; canonID.truncate(0); @@ -392,7 +394,7 @@ UBool TransliteratorIDParser::parseCompoundID(const UnicodeString& id, int32_t d break; } if (dir == FORWARD) { - list.addElement(single, ec); + list.adoptElement(single, ec); } else { list.insertElementAt(single, 0, ec); } @@ -442,10 +444,10 @@ UBool TransliteratorIDParser::parseCompoundID(const UnicodeString& id, int32_t d goto FAIL; } + list.setDeleter(save); return TRUE; FAIL: - UObjectDeleter *save = list.setDeleter(_deleteSingleID); list.removeAllElements(); list.setDeleter(save); delete globalFilter; @@ -494,9 +496,8 @@ void TransliteratorIDParser::instantiateList(UVector& list, ec = U_INVALID_ID; goto RETURN; } - tlist.addElement(t, ec); + tlist.adoptElement(t, ec); if (U_FAILURE(ec)) { - delete t; goto RETURN; } } @@ -509,10 +510,7 @@ void TransliteratorIDParser::instantiateList(UVector& list, // Should never happen ec = U_INTERNAL_TRANSLITERATOR_ERROR; } - tlist.addElement(t, ec); - if (U_FAILURE(ec)) { - delete t; - } + tlist.adoptElement(t, ec); } RETURN: @@ -525,9 +523,8 @@ void TransliteratorIDParser::instantiateList(UVector& list, while (tlist.size() > 0) { t = (Transliterator*) tlist.orphanElementAt(0); - list.addElement(t, ec); + list.adoptElement(t, ec); if (U_FAILURE(ec)) { - delete t; list.removeAllElements(); break; } diff --git a/contrib/libs/icu/i18n/tridpars.h b/contrib/libs/icu/i18n/tridpars.h index 5e42f068a4..03d68ccac9 100644 --- a/contrib/libs/icu/i18n/tridpars.h +++ b/contrib/libs/icu/i18n/tridpars.h @@ -222,7 +222,7 @@ class TransliteratorIDParser /* not : public UObject because all methods are sta * @param source the given source. * @param target the given target. * @param variant the given variant - * @param isSourcePresent If TRUE then the source is present. + * @param isSourcePresent If true then the source is present. * If the source is not present, ANY will be * given as the source, and isSourcePresent will be null * @return an array of 4 strings: source, target, variant, and diff --git a/contrib/libs/icu/i18n/tzfmt.cpp b/contrib/libs/icu/i18n/tzfmt.cpp index f87f0f67b1..ef3cfad80c 100644 --- a/contrib/libs/icu/i18n/tzfmt.cpp +++ b/contrib/libs/icu/i18n/tzfmt.cpp @@ -270,7 +270,7 @@ GMTOffsetField::isValid(FieldType type, int32_t width) { case SECOND: return (width == 2); default: - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } return (width > 0); } @@ -482,11 +482,11 @@ TimeZoneFormat::operator=(const TimeZoneFormat& other) { } -UBool +bool TimeZoneFormat::operator==(const Format& other) const { TimeZoneFormat* tzfmt = (TimeZoneFormat*)&other; - UBool isEqual = + bool isEqual = fLocale == tzfmt->fLocale && fGMTPattern == tzfmt->fGMTPattern && fGMTZeroFormat == tzfmt->fGMTZeroFormat @@ -595,7 +595,7 @@ TimeZoneFormat::setGMTOffsetPattern(UTimeZoneFormatGMTOffsetPatternType type, co required = FIELDS_HMS; break; default: - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } UVector* patternItems = parseOffsetPattern(pattern, required, status); @@ -1033,7 +1033,7 @@ TimeZoneFormat::parse(UTimeZoneFormatStyle style, const UnicodeString& text, Par break; default: - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } int32_t len = 0; @@ -1873,7 +1873,7 @@ TimeZoneFormat::parseOffsetFieldsWithPattern(const UnicodeString& text, int32_t // When TimeZoneFormat parse() is called from SimpleDateFormat, // leading space characters might be truncated. If the first pattern text // starts with such character (e.g. Bidi control), then we need to - // skip the leading space charcters. + // skip the leading space characters. if (idx < text.length() && !PatternProps::isWhiteSpace(text.char32At(idx))) { while (len > 0) { UChar32 ch; @@ -2459,7 +2459,7 @@ TimeZoneFormat::parseOffsetPattern(const UnicodeString& pattern, OffsetFields re if (itemType != GMTOffsetField::TEXT) { if (GMTOffsetField::isValid(itemType, itemLength)) { GMTOffsetField* fld = GMTOffsetField::createTimeField(itemType, static_cast<uint8_t>(itemLength), status); - result->addElement(fld, status); + result->addElementX(fld, status); if (U_FAILURE(status)) { break; } @@ -2485,7 +2485,7 @@ TimeZoneFormat::parseOffsetPattern(const UnicodeString& pattern, OffsetFields re if (itemType == GMTOffsetField::TEXT) { if (text.length() > 0) { GMTOffsetField* textfld = GMTOffsetField::createText(text, status); - result->addElement(textfld, status); + result->addElementX(textfld, status); if (U_FAILURE(status)) { break; } @@ -2494,7 +2494,7 @@ TimeZoneFormat::parseOffsetPattern(const UnicodeString& pattern, OffsetFields re } else { if (GMTOffsetField::isValid(itemType, itemLength)) { GMTOffsetField* fld = GMTOffsetField::createTimeField(itemType, static_cast<uint8_t>(itemLength), status); - result->addElement(fld, status); + result->addElementX(fld, status); if (U_FAILURE(status)) { break; } @@ -2512,7 +2512,7 @@ TimeZoneFormat::parseOffsetPattern(const UnicodeString& pattern, OffsetFields re if (itemType != GMTOffsetField::TEXT) { if (GMTOffsetField::isValid(itemType, itemLength)) { GMTOffsetField* fld = GMTOffsetField::createTimeField(itemType, static_cast<uint8_t>(itemLength), status); - result->addElement(fld, status); + result->addElementX(fld, status); if (U_FAILURE(status)) { break; } @@ -2532,12 +2532,12 @@ TimeZoneFormat::parseOffsetPattern(const UnicodeString& pattern, OffsetFields re if (itemType == GMTOffsetField::TEXT) { if (text.length() > 0) { GMTOffsetField* tfld = GMTOffsetField::createText(text, status); - result->addElement(tfld, status); + result->addElementX(tfld, status); } } else { if (GMTOffsetField::isValid(itemType, itemLength)) { GMTOffsetField* fld = GMTOffsetField::createTimeField(itemType, static_cast<uint8_t>(itemLength), status); - result->addElement(fld, status); + result->addElementX(fld, status); } else { status = U_ILLEGAL_ARGUMENT_ERROR; } @@ -2729,7 +2729,7 @@ public: ZoneIdMatchHandler(); virtual ~ZoneIdMatchHandler(); - UBool handleMatch(int32_t matchLength, const CharacterNode *node, UErrorCode &status); + UBool handleMatch(int32_t matchLength, const CharacterNode *node, UErrorCode &status) override; const UChar* getID(); int32_t getMatchLen(); private: @@ -2780,15 +2780,17 @@ static void U_CALLCONV initZoneIdTrie(UErrorCode &status) { status = U_MEMORY_ALLOCATION_ERROR; return; } - StringEnumeration *tzenum = TimeZone::createEnumeration(); - const UnicodeString *id; - while ((id = tzenum->snext(status)) != NULL) { - const UChar* uid = ZoneMeta::findTimeZoneID(*id); - if (uid) { - gZoneIdTrie->put(uid, const_cast<UChar *>(uid), status); + StringEnumeration *tzenum = TimeZone::createEnumeration(status); + if (U_SUCCESS(status)) { + const UnicodeString *id; + while ((id = tzenum->snext(status)) != NULL) { + const UChar* uid = ZoneMeta::findTimeZoneID(*id); + if (uid) { + gZoneIdTrie->put(uid, const_cast<UChar *>(uid), status); + } } + delete tzenum; } - delete tzenum; } diff --git a/contrib/libs/icu/i18n/tzgnames.cpp b/contrib/libs/icu/i18n/tzgnames.cpp index e056461dc3..ed5f42d7bc 100644 --- a/contrib/libs/icu/i18n/tzgnames.cpp +++ b/contrib/libs/icu/i18n/tzgnames.cpp @@ -195,7 +195,7 @@ public: GNameSearchHandler(uint32_t types); virtual ~GNameSearchHandler(); - UBool handleMatch(int32_t matchLength, const CharacterNode *node, UErrorCode &status); + UBool handleMatch(int32_t matchLength, const CharacterNode *node, UErrorCode &status) override; UVector* getMatches(int32_t& maxMatchLen); private: @@ -244,7 +244,7 @@ GNameSearchHandler::handleMatch(int32_t matchLength, const CharacterNode *node, gmatch->gnameInfo = nameinfo; gmatch->matchLength = matchLength; gmatch->timeType = UTZFMT_TIME_TYPE_UNKNOWN; - fResults->addElement(gmatch, status); + fResults->addElementX(gmatch, status); if (U_FAILURE(status)) { uprv_free(gmatch); } else { @@ -328,7 +328,7 @@ private: // --------------------------------------------------- -// TZGNCore - core implmentation of TimeZoneGenericNames +// TZGNCore - core implementation of TimeZoneGenericNames // // TimeZoneGenericNames is parallel to TimeZoneNames, // but handles run-time generated time zone names. @@ -554,7 +554,7 @@ TZGNCore::getGenericLocationName(const UnicodeString& tzCanonicalID) { // If this is not the primary zone in the country, // use the exemplar city name. - // getExemplarLocationName should retur non-empty string + // getExemplarLocationName should return non-empty string // if the time zone is associated with a region UnicodeString city; @@ -1287,7 +1287,7 @@ TimeZoneGenericNames::createInstance(const Locale& locale, UErrorCode& status) { return instance; } -UBool +bool TimeZoneGenericNames::operator==(const TimeZoneGenericNames& other) const { // Just compare if the other object also use the same // ref entry diff --git a/contrib/libs/icu/i18n/tzgnames.h b/contrib/libs/icu/i18n/tzgnames.h index 71d9d84dc0..26112c5db6 100644 --- a/contrib/libs/icu/i18n/tzgnames.h +++ b/contrib/libs/icu/i18n/tzgnames.h @@ -11,7 +11,7 @@ /** * \file - * \brief C API: Time zone generic names classe + * \brief C API: Time zone generic names classes */ #include "unicode/utypes.h" @@ -45,8 +45,8 @@ public: static TimeZoneGenericNames* createInstance(const Locale& locale, UErrorCode& status); - virtual UBool operator==(const TimeZoneGenericNames& other) const; - virtual UBool operator!=(const TimeZoneGenericNames& other) const {return !operator==(other);} + virtual bool operator==(const TimeZoneGenericNames& other) const; + virtual bool operator!=(const TimeZoneGenericNames& other) const {return !operator==(other);} virtual TimeZoneGenericNames* clone() const; UnicodeString& getDisplayName(const TimeZone& tz, UTimeZoneGenericNameType type, diff --git a/contrib/libs/icu/i18n/tznames.cpp b/contrib/libs/icu/i18n/tznames.cpp index d789c12363..5c504d01cb 100644 --- a/contrib/libs/icu/i18n/tznames.cpp +++ b/contrib/libs/icu/i18n/tznames.cpp @@ -104,24 +104,24 @@ public: TimeZoneNamesDelegate(const Locale& locale, UErrorCode& status); virtual ~TimeZoneNamesDelegate(); - virtual UBool operator==(const TimeZoneNames& other) const; - virtual UBool operator!=(const TimeZoneNames& other) const {return !operator==(other);} - virtual TimeZoneNamesDelegate* clone() const; + virtual bool operator==(const TimeZoneNames& other) const override; + virtual bool operator!=(const TimeZoneNames& other) const {return !operator==(other);} + virtual TimeZoneNamesDelegate* clone() const override; - StringEnumeration* getAvailableMetaZoneIDs(UErrorCode& status) const; - StringEnumeration* getAvailableMetaZoneIDs(const UnicodeString& tzID, UErrorCode& status) const; - UnicodeString& getMetaZoneID(const UnicodeString& tzID, UDate date, UnicodeString& mzID) const; - UnicodeString& getReferenceZoneID(const UnicodeString& mzID, const char* region, UnicodeString& tzID) const; + StringEnumeration* getAvailableMetaZoneIDs(UErrorCode& status) const override; + StringEnumeration* getAvailableMetaZoneIDs(const UnicodeString& tzID, UErrorCode& status) const override; + UnicodeString& getMetaZoneID(const UnicodeString& tzID, UDate date, UnicodeString& mzID) const override; + UnicodeString& getReferenceZoneID(const UnicodeString& mzID, const char* region, UnicodeString& tzID) const override; - UnicodeString& getMetaZoneDisplayName(const UnicodeString& mzID, UTimeZoneNameType type, UnicodeString& name) const; - UnicodeString& getTimeZoneDisplayName(const UnicodeString& tzID, UTimeZoneNameType type, UnicodeString& name) const; + UnicodeString& getMetaZoneDisplayName(const UnicodeString& mzID, UTimeZoneNameType type, UnicodeString& name) const override; + UnicodeString& getTimeZoneDisplayName(const UnicodeString& tzID, UTimeZoneNameType type, UnicodeString& name) const override; - UnicodeString& getExemplarLocationName(const UnicodeString& tzID, UnicodeString& name) const; + UnicodeString& getExemplarLocationName(const UnicodeString& tzID, UnicodeString& name) const override; - void loadAllDisplayNames(UErrorCode& status); - void getDisplayNames(const UnicodeString& tzID, const UTimeZoneNameType types[], int32_t numTypes, UDate date, UnicodeString dest[], UErrorCode& status) const; + void loadAllDisplayNames(UErrorCode& status) override; + void getDisplayNames(const UnicodeString& tzID, const UTimeZoneNameType types[], int32_t numTypes, UDate date, UnicodeString dest[], UErrorCode& status) const override; - MatchInfoCollection* find(const UnicodeString& text, int32_t start, uint32_t types, UErrorCode& status) const; + MatchInfoCollection* find(const UnicodeString& text, int32_t start, uint32_t types, UErrorCode& status) const override; private: TimeZoneNamesDelegate(); TimeZoneNamesCacheEntry* fTZnamesCacheEntry; @@ -219,10 +219,10 @@ TimeZoneNamesDelegate::~TimeZoneNamesDelegate() { umtx_unlock(&gTimeZoneNamesLock); } -UBool +bool TimeZoneNamesDelegate::operator==(const TimeZoneNames& other) const { if (this == &other) { - return TRUE; + return true; } // Just compare if the other object also use the same // cache entry @@ -230,7 +230,7 @@ TimeZoneNamesDelegate::operator==(const TimeZoneNames& other) const { if (rhs) { return fTZnamesCacheEntry == rhs->fTZnamesCacheEntry; } - return FALSE; + return false; } TimeZoneNamesDelegate* @@ -345,7 +345,7 @@ TimeZoneNames::getDisplayName(const UnicodeString& tzID, UTimeZoneNameType type, return name; } -// Empty default implementation, to be overriden in tznames_impl.cpp. +// Empty default implementation, to be overridden in tznames_impl.cpp. void TimeZoneNames::loadAllDisplayNames(UErrorCode& /*status*/) { } @@ -419,7 +419,7 @@ TimeZoneNames::MatchInfoCollection::addZone(UTimeZoneNameType nameType, int32_t status = U_MEMORY_ALLOCATION_ERROR; return; } - matches(status)->addElement(matchInfo, status); + matches(status)->addElementX(matchInfo, status); if (U_FAILURE(status)) { delete matchInfo; } @@ -436,7 +436,7 @@ TimeZoneNames::MatchInfoCollection::addMetaZone(UTimeZoneNameType nameType, int3 status = U_MEMORY_ALLOCATION_ERROR; return; } - matches(status)->addElement(matchInfo, status); + matches(status)->addElementX(matchInfo, status); if (U_FAILURE(status)) { delete matchInfo; } diff --git a/contrib/libs/icu/i18n/tznames_impl.cpp b/contrib/libs/icu/i18n/tznames_impl.cpp index 186aaaf74d..d450b74564 100644 --- a/contrib/libs/icu/i18n/tznames_impl.cpp +++ b/contrib/libs/icu/i18n/tznames_impl.cpp @@ -155,12 +155,12 @@ CharacterNode::addValue(void *value, UObjectDeleter *valueDeleter, UErrorCode &s } return; } - values->addElement(fValues, status); + values->addElementX(fValues, status); fValues = values; fHasValuesVector = TRUE; } // Add the new value. - ((UVector *)fValues)->addElement(value, status); + ((UVector *)fValues)->addElementX(value, status); } } @@ -233,7 +233,7 @@ TextTrieMap::put(const UChar *key, void *value, UErrorCode &status) { U_ASSERT(fLazyContents != NULL); UChar *s = const_cast<UChar *>(key); - fLazyContents->addElement(s, status); + fLazyContents->addElementX(s, status); if (U_FAILURE(status)) { if (fValueDeleter) { fValueDeleter((void*) key); @@ -241,7 +241,7 @@ TextTrieMap::put(const UChar *key, void *value, UErrorCode &status) { return; } - fLazyContents->addElement(value, status); + fLazyContents->addElementX(value, status); } void @@ -533,7 +533,7 @@ const UChar *ZNStringPool::get(const UChar *s, UErrorCode &status) { // // ZNStringPool::adopt() Put a string into the hash, but do not copy the string data // into the pool's storage. Used for strings from resource bundles, -// which will perisist for the life of the zone string formatter, and +// which will persist for the life of the zone string formatter, and // therefore can be used directly without copying. const UChar *ZNStringPool::adopt(const UChar * s, UErrorCode &status) { const UChar *pooledString; @@ -776,7 +776,7 @@ struct ZNames::ZNamesLoader : public ResourceSink { clear(); ures_getAllItemsWithFallback(zoneStrings, key, *this, localStatus); - // Ignore errors, but propogate possible warnings. + // Ignore errors, but propagate possible warnings. if (U_SUCCESS(localStatus)) { errorCode = localStatus; } @@ -794,7 +794,7 @@ struct ZNames::ZNamesLoader : public ResourceSink { } virtual void put(const char* key, ResourceValue& value, UBool /*noFallback*/, - UErrorCode &errorCode) { + UErrorCode &errorCode) override { ResourceTable namesTable = value.getTable(errorCode); if (U_FAILURE(errorCode)) { return; } for (int32_t i = 0; namesTable.getKeyAndValue(i, key, value); ++i) { @@ -857,10 +857,10 @@ public: MetaZoneIDsEnumeration(UVector* mzIDs); virtual ~MetaZoneIDsEnumeration(); static UClassID U_EXPORT2 getStaticClassID(void); - virtual UClassID getDynamicClassID(void) const; - virtual const UnicodeString* snext(UErrorCode& status); - virtual void reset(UErrorCode& status); - virtual int32_t count(UErrorCode& status) const; + virtual UClassID getDynamicClassID(void) const override; + virtual const UnicodeString* snext(UErrorCode& status) override; + virtual void reset(UErrorCode& status) override; + virtual int32_t count(UErrorCode& status) const override; private: int32_t fLen; int32_t fPos; @@ -920,7 +920,7 @@ public: ZNameSearchHandler(uint32_t types); virtual ~ZNameSearchHandler(); - UBool handleMatch(int32_t matchLength, const CharacterNode *node, UErrorCode &status); + UBool handleMatch(int32_t matchLength, const CharacterNode *node, UErrorCode &status) override; TimeZoneNames::MatchInfoCollection* getMatches(int32_t& maxMatchLen); private: @@ -1104,13 +1104,13 @@ TimeZoneNamesImpl::cleanup() { } } -UBool +bool TimeZoneNamesImpl::operator==(const TimeZoneNames& other) const { if (this == &other) { - return TRUE; + return true; } // No implementation for now - return FALSE; + return false; } TimeZoneNamesImpl* @@ -1165,7 +1165,7 @@ TimeZoneNamesImpl::_getAvailableMetaZoneIDs(const UnicodeString& tzID, UErrorCod OlsonToMetaMappingEntry *map = (OlsonToMetaMappingEntry *)mappings->elementAt(i); const UChar *mzID = map->mzid; if (!mzIDs->contains((void *)mzID)) { - mzIDs->addElement((void *)mzID, status); + mzIDs->addElementX((void *)mzID, status); } } if (U_SUCCESS(status)) { @@ -1194,7 +1194,7 @@ TimeZoneNamesImpl::getReferenceZoneID(const UnicodeString& mzID, const char* reg return TimeZoneNamesImpl::_getReferenceZoneID(mzID, region, tzID); } -// static implementaion of getReferenceZoneID +// static implementation of getReferenceZoneID UnicodeString& TimeZoneNamesImpl::_getReferenceZoneID(const UnicodeString& mzID, const char* region, UnicodeString& tzID) { ZoneMeta::getZoneIdByMetazone(mzID, UnicodeString(region, -1, US_INV), tzID); @@ -1560,7 +1560,7 @@ struct TimeZoneNamesImpl::ZoneStringsLoader : public ResourceSink { } virtual void put(const char *key, ResourceValue &value, UBool noFallback, - UErrorCode &status) { + UErrorCode &status) override { ResourceTable timeZonesTable = value.getTable(status); if (U_FAILURE(status)) { return; } for (int32_t i = 0; timeZonesTable.getKeyAndValue(i, key, value); ++i) { @@ -1891,7 +1891,7 @@ public: TZDBNameSearchHandler(uint32_t types, const char* region); virtual ~TZDBNameSearchHandler(); - UBool handleMatch(int32_t matchLength, const CharacterNode *node, UErrorCode &status); + UBool handleMatch(int32_t matchLength, const CharacterNode *node, UErrorCode &status) override; TimeZoneNames::MatchInfoCollection* getMatches(int32_t& maxMatchLen); private: @@ -2156,13 +2156,13 @@ TZDBTimeZoneNames::TZDBTimeZoneNames(const Locale& locale) TZDBTimeZoneNames::~TZDBTimeZoneNames() { } -UBool +bool TZDBTimeZoneNames::operator==(const TimeZoneNames& other) const { if (this == &other) { - return TRUE; + return true; } // No implementation for now - return FALSE; + return false; } TZDBTimeZoneNames* diff --git a/contrib/libs/icu/i18n/tznames_impl.h b/contrib/libs/icu/i18n/tznames_impl.h index 1286eeb80d..d047fa3541 100644 --- a/contrib/libs/icu/i18n/tznames_impl.h +++ b/contrib/libs/icu/i18n/tznames_impl.h @@ -74,7 +74,7 @@ class U_I18N_API ZNStringPool: public UMemory { */ struct CharacterNode { // No constructor or destructor. - // We malloc and free an uninitalized array of CharacterNode objects + // We malloc and free an uninitialized array of CharacterNode objects // and clear and delete them ourselves. void clear(); @@ -92,9 +92,9 @@ struct CharacterNode { UBool fHasValuesVector; UBool fPadding; - // No value: fValues == NULL and fHasValuesVector == FALSE - // One value: fValues == value and fHasValuesVector == FALSE - // >=2 values: fValues == UVector of values and fHasValuesVector == TRUE + // No value: fValues == NULL and fHasValuesVector == false + // One value: fValues == value and fHasValuesVector == false + // >=2 values: fValues == UVector of values and fHasValuesVector == true }; inline UBool CharacterNode::hasValues() const { @@ -173,24 +173,24 @@ public: virtual ~TimeZoneNamesImpl(); - virtual UBool operator==(const TimeZoneNames& other) const; - virtual TimeZoneNamesImpl* clone() const; + virtual bool operator==(const TimeZoneNames& other) const override; + virtual TimeZoneNamesImpl* clone() const override; - StringEnumeration* getAvailableMetaZoneIDs(UErrorCode& status) const; - StringEnumeration* getAvailableMetaZoneIDs(const UnicodeString& tzID, UErrorCode& status) const; + StringEnumeration* getAvailableMetaZoneIDs(UErrorCode& status) const override; + StringEnumeration* getAvailableMetaZoneIDs(const UnicodeString& tzID, UErrorCode& status) const override; - UnicodeString& getMetaZoneID(const UnicodeString& tzID, UDate date, UnicodeString& mzID) const; - UnicodeString& getReferenceZoneID(const UnicodeString& mzID, const char* region, UnicodeString& tzID) const; + UnicodeString& getMetaZoneID(const UnicodeString& tzID, UDate date, UnicodeString& mzID) const override; + UnicodeString& getReferenceZoneID(const UnicodeString& mzID, const char* region, UnicodeString& tzID) const override; - UnicodeString& getMetaZoneDisplayName(const UnicodeString& mzID, UTimeZoneNameType type, UnicodeString& name) const; - UnicodeString& getTimeZoneDisplayName(const UnicodeString& tzID, UTimeZoneNameType type, UnicodeString& name) const; + UnicodeString& getMetaZoneDisplayName(const UnicodeString& mzID, UTimeZoneNameType type, UnicodeString& name) const override; + UnicodeString& getTimeZoneDisplayName(const UnicodeString& tzID, UTimeZoneNameType type, UnicodeString& name) const override; - UnicodeString& getExemplarLocationName(const UnicodeString& tzID, UnicodeString& name) const; + UnicodeString& getExemplarLocationName(const UnicodeString& tzID, UnicodeString& name) const override; - TimeZoneNames::MatchInfoCollection* find(const UnicodeString& text, int32_t start, uint32_t types, UErrorCode& status) const; + TimeZoneNames::MatchInfoCollection* find(const UnicodeString& text, int32_t start, uint32_t types, UErrorCode& status) const override; - void loadAllDisplayNames(UErrorCode& status); - void getDisplayNames(const UnicodeString& tzID, const UTimeZoneNameType types[], int32_t numTypes, UDate date, UnicodeString dest[], UErrorCode& status) const; + void loadAllDisplayNames(UErrorCode& status) override; + void getDisplayNames(const UnicodeString& tzID, const UTimeZoneNameType types[], int32_t numTypes, UDate date, UnicodeString dest[], UErrorCode& status) const override; static UnicodeString& getDefaultExemplarLocationName(const UnicodeString& tzID, UnicodeString& name); @@ -235,19 +235,19 @@ public: TZDBTimeZoneNames(const Locale& locale); virtual ~TZDBTimeZoneNames(); - virtual UBool operator==(const TimeZoneNames& other) const; - virtual TZDBTimeZoneNames* clone() const; + virtual bool operator==(const TimeZoneNames& other) const override; + virtual TZDBTimeZoneNames* clone() const override; - StringEnumeration* getAvailableMetaZoneIDs(UErrorCode& status) const; - StringEnumeration* getAvailableMetaZoneIDs(const UnicodeString& tzID, UErrorCode& status) const; + StringEnumeration* getAvailableMetaZoneIDs(UErrorCode& status) const override; + StringEnumeration* getAvailableMetaZoneIDs(const UnicodeString& tzID, UErrorCode& status) const override; - UnicodeString& getMetaZoneID(const UnicodeString& tzID, UDate date, UnicodeString& mzID) const; - UnicodeString& getReferenceZoneID(const UnicodeString& mzID, const char* region, UnicodeString& tzID) const; + UnicodeString& getMetaZoneID(const UnicodeString& tzID, UDate date, UnicodeString& mzID) const override; + UnicodeString& getReferenceZoneID(const UnicodeString& mzID, const char* region, UnicodeString& tzID) const override; - UnicodeString& getMetaZoneDisplayName(const UnicodeString& mzID, UTimeZoneNameType type, UnicodeString& name) const; - UnicodeString& getTimeZoneDisplayName(const UnicodeString& tzID, UTimeZoneNameType type, UnicodeString& name) const; + UnicodeString& getMetaZoneDisplayName(const UnicodeString& mzID, UTimeZoneNameType type, UnicodeString& name) const override; + UnicodeString& getTimeZoneDisplayName(const UnicodeString& tzID, UTimeZoneNameType type, UnicodeString& name) const override; - TimeZoneNames::MatchInfoCollection* find(const UnicodeString& text, int32_t start, uint32_t types, UErrorCode& status) const; + TimeZoneNames::MatchInfoCollection* find(const UnicodeString& text, int32_t start, uint32_t types, UErrorCode& status) const override; // When TZDBNames for the metazone is not available, this method returns NULL, // but does NOT set U_MISSING_RESOURCE_ERROR to status. diff --git a/contrib/libs/icu/i18n/tzrule.cpp b/contrib/libs/icu/i18n/tzrule.cpp index 759a2d4c67..a60fffbe02 100644 --- a/contrib/libs/icu/i18n/tzrule.cpp +++ b/contrib/libs/icu/i18n/tzrule.cpp @@ -53,7 +53,7 @@ TimeZoneRule::operator=(const TimeZoneRule& right) { return *this; } -UBool +bool TimeZoneRule::operator==(const TimeZoneRule& that) const { return ((this == &that) || (typeid(*this) == typeid(that) && @@ -62,7 +62,7 @@ TimeZoneRule::operator==(const TimeZoneRule& that) const { fDSTSavings == that.fDSTSavings)); } -UBool +bool TimeZoneRule::operator!=(const TimeZoneRule& that) const { return !operator==(that); } @@ -120,14 +120,14 @@ InitialTimeZoneRule::operator=(const InitialTimeZoneRule& right) { return *this; } -UBool +bool InitialTimeZoneRule::operator==(const TimeZoneRule& that) const { return ((this == &that) || (typeid(*this) == typeid(that) && TimeZoneRule::operator==(that))); } -UBool +bool InitialTimeZoneRule::operator!=(const TimeZoneRule& that) const { return !operator==(that); } @@ -226,13 +226,13 @@ AnnualTimeZoneRule::operator=(const AnnualTimeZoneRule& right) { return *this; } -UBool +bool AnnualTimeZoneRule::operator==(const TimeZoneRule& that) const { if (this == &that) { - return TRUE; + return true; } if (typeid(*this) != typeid(that)) { - return FALSE; + return false; } AnnualTimeZoneRule *atzr = (AnnualTimeZoneRule*)&that; return (*fDateTimeRule == *(atzr->fDateTimeRule) && @@ -240,7 +240,7 @@ AnnualTimeZoneRule::operator==(const TimeZoneRule& that) const { fEndYear == atzr->fEndYear); } -UBool +bool AnnualTimeZoneRule::operator!=(const TimeZoneRule& that) const { return !operator==(that); } @@ -445,31 +445,31 @@ TimeArrayTimeZoneRule::operator=(const TimeArrayTimeZoneRule& right) { return *this; } -UBool +bool TimeArrayTimeZoneRule::operator==(const TimeZoneRule& that) const { if (this == &that) { - return TRUE; + return true; } - if (typeid(*this) != typeid(that) || TimeZoneRule::operator==(that) == FALSE) { - return FALSE; + if (typeid(*this) != typeid(that) || !TimeZoneRule::operator==(that)) { + return false; } TimeArrayTimeZoneRule *tatzr = (TimeArrayTimeZoneRule*)&that; if (fTimeRuleType != tatzr->fTimeRuleType || fNumStartTimes != tatzr->fNumStartTimes) { - return FALSE; + return false; } // Compare start times - UBool res = TRUE; + bool res = true; for (int32_t i = 0; i < fNumStartTimes; i++) { if (fStartTimes[i] != tatzr->fStartTimes[i]) { - res = FALSE; + res = false; break; } } return res; } -UBool +bool TimeArrayTimeZoneRule::operator!=(const TimeZoneRule& that) const { return !operator==(that); } diff --git a/contrib/libs/icu/i18n/tztrans.cpp b/contrib/libs/icu/i18n/tztrans.cpp index 3199b78ea8..900e4be540 100644 --- a/contrib/libs/icu/i18n/tztrans.cpp +++ b/contrib/libs/icu/i18n/tztrans.cpp @@ -63,28 +63,28 @@ TimeZoneTransition::operator=(const TimeZoneTransition& right) { return *this; } -UBool +bool TimeZoneTransition::operator==(const TimeZoneTransition& that) const { if (this == &that) { - return TRUE; + return true; } if (typeid(*this) != typeid(that)) { - return FALSE; + return false; } if (fTime != that.fTime) { - return FALSE; + return false; } if ((fFrom == NULL && that.fFrom == NULL) || (fFrom != NULL && that.fFrom != NULL && *fFrom == *(that.fFrom))) { if ((fTo == NULL && that.fTo == NULL) || (fTo != NULL && that.fTo != NULL && *fTo == *(that.fTo))) { - return TRUE; + return true; } } - return FALSE; + return false; } -UBool +bool TimeZoneTransition::operator!=(const TimeZoneTransition& that) const { return !operator==(that); } diff --git a/contrib/libs/icu/i18n/ucal.cpp b/contrib/libs/icu/i18n/ucal.cpp index 67c51aea27..33f72589c5 100644 --- a/contrib/libs/icu/i18n/ucal.cpp +++ b/contrib/libs/icu/i18n/ucal.cpp @@ -33,8 +33,8 @@ U_NAMESPACE_USE static TimeZone* _createTimeZone(const UChar* zoneID, int32_t len, UErrorCode* ec) { - TimeZone* zone = NULL; - if (ec != NULL && U_SUCCESS(*ec)) { + TimeZone* zone = nullptr; + if (ec != nullptr && U_SUCCESS(*ec)) { // Note that if zoneID is invalid, we get back GMT. This odd // behavior is by design and goes back to the JDK. The only // failure we will see is a memory allocation failure. @@ -42,7 +42,7 @@ _createTimeZone(const UChar* zoneID, int32_t len, UErrorCode* ec) { UnicodeString zoneStrID; zoneStrID.setTo((UBool)(len < 0), zoneID, l); /* temporary read-only alias */ zone = TimeZone::createTimeZone(zoneStrID); - if (zone == NULL) { + if (zone == nullptr) { *ec = U_MEMORY_ALLOCATION_ERROR; } } @@ -58,20 +58,20 @@ ucal_openTimeZoneIDEnumeration(USystemTimeZoneType zoneType, const char* region, U_CAPI UEnumeration* U_EXPORT2 ucal_openTimeZones(UErrorCode* ec) { - return uenum_openFromStringEnumeration(TimeZone::createEnumeration(), ec); + return ucal_openTimeZoneIDEnumeration(UCAL_ZONE_TYPE_ANY, nullptr, nullptr, ec); } U_CAPI UEnumeration* U_EXPORT2 ucal_openCountryTimeZones(const char* country, UErrorCode* ec) { - return uenum_openFromStringEnumeration(TimeZone::createEnumeration(country), ec); + return ucal_openTimeZoneIDEnumeration(UCAL_ZONE_TYPE_ANY, country, nullptr, ec); } U_CAPI int32_t U_EXPORT2 ucal_getDefaultTimeZone(UChar* result, int32_t resultCapacity, UErrorCode* ec) { int32_t len = 0; - if (ec != NULL && U_SUCCESS(*ec)) { + if (ec != nullptr && U_SUCCESS(*ec)) { TimeZone* zone = TimeZone::createDefault(); - if (zone == NULL) { + if (zone == nullptr) { *ec = U_MEMORY_ALLOCATION_ERROR; } else { UnicodeString id; @@ -86,17 +86,17 @@ ucal_getDefaultTimeZone(UChar* result, int32_t resultCapacity, UErrorCode* ec) { U_CAPI void U_EXPORT2 ucal_setDefaultTimeZone(const UChar* zoneID, UErrorCode* ec) { TimeZone* zone = _createTimeZone(zoneID, -1, ec); - if (zone != NULL) { + if (zone != nullptr) { TimeZone::adoptDefault(zone); } } -U_DRAFT int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 ucal_getHostTimeZone(UChar* result, int32_t resultCapacity, UErrorCode* ec) { int32_t len = 0; - if (ec != NULL && U_SUCCESS(*ec)) { + if (ec != nullptr && U_SUCCESS(*ec)) { TimeZone *zone = TimeZone::detectHostTimeZone(); - if (zone == NULL) { + if (zone == nullptr) { *ec = U_MEMORY_ALLOCATION_ERROR; } else { UnicodeString id; @@ -114,7 +114,7 @@ ucal_getDSTSavings(const UChar* zoneID, UErrorCode* ec) { TimeZone* zone = _createTimeZone(zoneID, -1, ec); if (U_SUCCESS(*ec)) { SimpleTimeZone* stz = dynamic_cast<SimpleTimeZone*>(zone); - if (stz != NULL) { + if (stz != nullptr) { result = stz->getDSTSavings(); } else { // Since there is no getDSTSavings on TimeZone, we use a @@ -219,10 +219,10 @@ ucal_setTimeZone( UCalendar* cal, if(U_FAILURE(*status)) return; - TimeZone* zone = (zoneID==NULL) ? TimeZone::createDefault() + TimeZone* zone = (zoneID==nullptr) ? TimeZone::createDefault() : _createTimeZone(zoneID, len, status); - if (zone != NULL) { + if (zone != nullptr) { ((Calendar*)cal)->adoptTimeZone(zone); } } @@ -255,8 +255,8 @@ ucal_getTimeZoneDisplayName(const UCalendar* cal, const TimeZone& tz = ((Calendar*)cal)->getTimeZone(); UnicodeString id; - if(!(result==NULL && resultLength==0)) { - // NULL destination for pure preflighting: empty dummy string + if (!(result == nullptr && resultLength == 0)) { + // Null destination for pure preflighting: empty dummy string // otherwise, alias the destination buffer id.setTo(result, 0, resultLength); } @@ -298,12 +298,12 @@ ucal_setGregorianChange(UCalendar *cal, UDate date, UErrorCode *pErrorCode) { } Calendar *cpp_cal = (Calendar *)cal; GregorianCalendar *gregocal = dynamic_cast<GregorianCalendar *>(cpp_cal); - // Not if(gregocal == NULL) { + // Not if(gregocal == nullptr) { // because we really want to work only with a GregorianCalendar, not with // its subclasses like BuddhistCalendar. - if (cpp_cal == NULL) { - // We normally don't check "this" pointers for NULL, but this here avoids - // compiler-generated exception-throwing code in case cal == NULL. + if (cpp_cal == nullptr) { + // We normally don't check "this" pointers for nullptr, but this here avoids + // compiler-generated exception-throwing code in case cal == nullptr. *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; return; } @@ -321,11 +321,11 @@ ucal_getGregorianChange(const UCalendar *cal, UErrorCode *pErrorCode) { } const Calendar *cpp_cal = (const Calendar *)cal; const GregorianCalendar *gregocal = dynamic_cast<const GregorianCalendar *>(cpp_cal); - // Not if(gregocal == NULL) { + // Not if(gregocal == nullptr) { // see comments in ucal_setGregorianChange(). - if (cpp_cal == NULL) { - // We normally don't check "this" pointers for NULL, but this here avoids - // compiler-generated exception-throwing code in case cal == NULL. + if (cpp_cal == nullptr) { + // We normally don't check "this" pointers for nullptr, but this here avoids + // compiler-generated exception-throwing code in case cal == nullptr. *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; return (UDate)0; } @@ -572,11 +572,11 @@ ucal_getLimit( const UCalendar* cal, U_CAPI const char * U_EXPORT2 ucal_getLocaleByType(const UCalendar *cal, ULocDataLocaleType type, UErrorCode* status) { - if (cal == NULL) { + if (cal == nullptr) { if (U_SUCCESS(*status)) { *status = U_ILLEGAL_ARGUMENT_ERROR; } - return NULL; + return nullptr; } return ((Calendar*)cal)->getLocaleID(type, *status); } @@ -617,7 +617,7 @@ U_CAPI const char * U_EXPORT2 ucal_getType(const UCalendar *cal, UErrorCode* status) { if (U_FAILURE(*status)) { - return NULL; + return nullptr; } return ((Calendar*)cal)->getType(); } @@ -662,8 +662,8 @@ ucal_getFieldDifference(UCalendar* cal, UDate target, static const UEnumeration defaultKeywordValues = { - NULL, - NULL, + nullptr, + nullptr, ulist_close_keyword_values_iterator, ulist_count_keyword_values, uenum_unextDefault, @@ -690,7 +690,7 @@ static const char * const CAL_TYPES[] = { "islamic-umalqura", "islamic-tbla", "islamic-rgsa", - NULL + nullptr }; U_CAPI UEnumeration* U_EXPORT2 @@ -700,16 +700,16 @@ ucal_getKeywordValuesForLocale(const char * /* key */, const char* locale, UBool (void)ulocimp_getRegionForSupplementalData(locale, TRUE, prefRegion, sizeof(prefRegion), status); // Read preferred calendar values from supplementalData calendarPreference - UResourceBundle *rb = ures_openDirect(NULL, "supplementalData", status); + UResourceBundle *rb = ures_openDirect(nullptr, "supplementalData", status); ures_getByKey(rb, "calendarPreferenceData", rb, status); - UResourceBundle *order = ures_getByKey(rb, prefRegion, NULL, status); - if (*status == U_MISSING_RESOURCE_ERROR && rb != NULL) { + UResourceBundle *order = ures_getByKey(rb, prefRegion, nullptr, status); + if (*status == U_MISSING_RESOURCE_ERROR && rb != nullptr) { *status = U_ZERO_ERROR; - order = ures_getByKey(rb, "001", NULL, status); + order = ures_getByKey(rb, "001", nullptr, status); } // Create a list of calendar type strings - UList *values = NULL; + UList *values = nullptr; if (U_SUCCESS(*status)) { values = ulist_createEmptyList(status); if (U_SUCCESS(*status)) { @@ -717,7 +717,7 @@ ucal_getKeywordValuesForLocale(const char * /* key */, const char* locale, UBool int32_t len; const UChar *type = ures_getStringByIndex(order, i, &len, status); char *caltype = (char*)uprv_malloc(len + 1); - if (caltype == NULL) { + if (caltype == nullptr) { *status = U_MEMORY_ALLOCATION_ERROR; break; } @@ -732,7 +732,7 @@ ucal_getKeywordValuesForLocale(const char * /* key */, const char* locale, UBool if (U_SUCCESS(*status) && !commonlyUsed) { // If not commonlyUsed, add other available values - for (int32_t i = 0; CAL_TYPES[i] != NULL; i++) { + for (int32_t i = 0; CAL_TYPES[i] != nullptr; i++) { if (!ulist_containsString(values, CAL_TYPES[i], (int32_t)uprv_strlen(CAL_TYPES[i]))) { ulist_addItemEndList(values, CAL_TYPES[i], FALSE, status); if (U_FAILURE(*status)) { @@ -743,7 +743,7 @@ ucal_getKeywordValuesForLocale(const char * /* key */, const char* locale, UBool } if (U_FAILURE(*status)) { ulist_deleteList(values); - values = NULL; + values = nullptr; } } } @@ -751,16 +751,16 @@ ucal_getKeywordValuesForLocale(const char * /* key */, const char* locale, UBool ures_close(order); ures_close(rb); - if (U_FAILURE(*status) || values == NULL) { - return NULL; + if (U_FAILURE(*status) || values == nullptr) { + return nullptr; } // Create string enumeration UEnumeration *en = (UEnumeration*)uprv_malloc(sizeof(UEnumeration)); - if (en == NULL) { + if (en == nullptr) { *status = U_MEMORY_ALLOCATION_ERROR; ulist_deleteList(values); - return NULL; + return nullptr; } ulist_resetList(values); memcpy(en, &defaultKeywordValues, sizeof(UEnumeration)); @@ -778,7 +778,7 @@ ucal_getTimeZoneTransitionDate(const UCalendar* cal, UTimeZoneTransitionType typ UDate base = ((Calendar*)cal)->getTime(*status); const TimeZone& tz = ((Calendar*)cal)->getTimeZone(); const BasicTimeZone * btz = dynamic_cast<const BasicTimeZone *>(&tz); - if (btz != NULL && U_SUCCESS(*status)) { + if (btz != nullptr && U_SUCCESS(*status)) { TimeZoneTransition tzt; UBool inclusive = (type == UCAL_TZ_TRANSITION_NEXT_INCLUSIVE || type == UCAL_TZ_TRANSITION_PREVIOUS_INCLUSIVE); UBool result = (type == UCAL_TZ_TRANSITION_NEXT || type == UCAL_TZ_TRANSITION_NEXT_INCLUSIVE)? @@ -828,4 +828,28 @@ ucal_getTimeZoneIDForWindowsID(const UChar* winid, int32_t len, const char* regi return resultLen; } +U_CAPI void U_EXPORT2 ucal_getTimeZoneOffsetFromLocal( + const UCalendar* cal, + UTimeZoneLocalOption nonExistingTimeOpt, + UTimeZoneLocalOption duplicatedTimeOpt, + int32_t* rawOffset, int32_t* dstOffset, UErrorCode* status) +{ + if (U_FAILURE(*status)) { + return; + } + UDate date = ((Calendar*)cal)->getTime(*status); + if (U_FAILURE(*status)) { + return; + } + const TimeZone& tz = ((Calendar*)cal)->getTimeZone(); + const BasicTimeZone* btz = dynamic_cast<const BasicTimeZone *>(&tz); + if (btz == nullptr) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + btz->getOffsetFromLocal( + date, nonExistingTimeOpt, duplicatedTimeOpt, + *rawOffset, *dstOffset, *status); +} + #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/contrib/libs/icu/i18n/ucol_imp.h b/contrib/libs/icu/i18n/ucol_imp.h index a251fc461d..f463957fd4 100644 --- a/contrib/libs/icu/i18n/ucol_imp.h +++ b/contrib/libs/icu/i18n/ucol_imp.h @@ -41,10 +41,10 @@ * rules must be equivalent. * @param source first collator * @param target second collator - * @return TRUE or FALSE + * @return true or false * @internal ICU 3.0 */ -U_INTERNAL UBool U_EXPORT2 +U_CAPI UBool U_EXPORT2 ucol_equals(const UCollator *source, const UCollator *target); /** diff --git a/contrib/libs/icu/i18n/ucol_res.cpp b/contrib/libs/icu/i18n/ucol_res.cpp index aa4027eb87..b277cf3b28 100644 --- a/contrib/libs/icu/i18n/ucol_res.cpp +++ b/contrib/libs/icu/i18n/ucol_res.cpp @@ -623,7 +623,7 @@ public: virtual ~KeywordsSink(); virtual void put(const char *key, ResourceValue &value, UBool /*noFallback*/, - UErrorCode &errorCode) { + UErrorCode &errorCode) override { if (U_FAILURE(errorCode)) { return; } ResourceTable collations = value.getTable(errorCode); for (int32_t i = 0; collations.getKeyAndValue(i, key, value); ++i) { diff --git a/contrib/libs/icu/i18n/ucol_sit.cpp b/contrib/libs/icu/i18n/ucol_sit.cpp index 92f332d6d0..4dc81aebcc 100644 --- a/contrib/libs/icu/i18n/ucol_sit.cpp +++ b/contrib/libs/icu/i18n/ucol_sit.cpp @@ -372,10 +372,7 @@ int32_t ucol_sit_dumpSpecs(CollatorSpec *s, char *destination, int32_t capacity, } len += s->entries[i].length(); } else { - len += s->entries[i].length(); - if(len < capacity) { - uprv_strncat(destination,s->entries[i].data(), s->entries[i].length()); - } + len += s->entries[i].extract(destination + len, capacity - len, *status); } } } diff --git a/contrib/libs/icu/i18n/ucsdet.cpp b/contrib/libs/icu/i18n/ucsdet.cpp index 46f69cf90c..63f204d0e1 100644 --- a/contrib/libs/icu/i18n/ucsdet.cpp +++ b/contrib/libs/icu/i18n/ucsdet.cpp @@ -193,7 +193,7 @@ ucsdet_getAllDetectableCharsets(const UCharsetDetector * /*ucsd*/, UErrorCode *s return CharsetDetector::getAllDetectableCharsets(*status); } -U_DRAFT UEnumeration * U_EXPORT2 +U_CAPI UEnumeration * U_EXPORT2 ucsdet_getDetectableCharsets(const UCharsetDetector *ucsd, UErrorCode *status) { return ((CharsetDetector *)ucsd)->getDetectableCharsets(*status); diff --git a/contrib/libs/icu/i18n/udat.cpp b/contrib/libs/icu/i18n/udat.cpp index ab91bcff41..d9549d04c5 100644 --- a/contrib/libs/icu/i18n/udat.cpp +++ b/contrib/libs/icu/i18n/udat.cpp @@ -34,7 +34,7 @@ U_NAMESPACE_USE /** * Verify that fmt is a SimpleDateFormat. Invalid error if not. * @param fmt the UDateFormat, definitely a DateFormat, maybe something else - * @param status error code, will be set to failure if there is a familure or the fmt is NULL. + * @param status error code, will be set to failure if there is a failure or the fmt is NULL. */ static void verifyIsSimpleDateFormat(const UDateFormat* fmt, UErrorCode *status) { if(U_SUCCESS(*status) && @@ -82,19 +82,24 @@ static UCalendarDateFields gDateFieldMapping[] = { UCAL_ZONE_OFFSET, // UDAT_TIMEZONE_ISO_FIELD = 32 (also UCAL_DST_OFFSET) UCAL_ZONE_OFFSET, // UDAT_TIMEZONE_ISO_LOCAL_FIELD = 33 (also UCAL_DST_OFFSET) UCAL_EXTENDED_YEAR, // UDAT_RELATED_YEAR_FIELD = 34 (not an exact match) - UCAL_FIELD_COUNT, // UDAT_FIELD_COUNT = 35 + UCAL_FIELD_COUNT, // UDAT_AM_PM_MIDNIGHT_NOON_FIELD=35 (no match) + UCAL_FIELD_COUNT, // UDAT_FLEXIBLE_DAY_PERIOD_FIELD=36 (no match) + UCAL_FIELD_COUNT, // UDAT_TIME_SEPARATOR_FIELD = 37 (no match) + // UDAT_FIELD_COUNT = 38 as of ICU 67 // UCAL_IS_LEAP_MONTH is not the target of a mapping }; U_CAPI UCalendarDateFields U_EXPORT2 udat_toCalendarDateField(UDateFormatField field) { - return gDateFieldMapping[field]; + static_assert(UDAT_FIELD_COUNT == UPRV_LENGTHOF(gDateFieldMapping), + "UDateFormatField and gDateFieldMapping should have the same number of entries and be kept in sync."); + return (field >= UDAT_ERA_FIELD && field < UPRV_LENGTHOF(gDateFieldMapping))? gDateFieldMapping[field]: UCAL_FIELD_COUNT; } /* For now- one opener. */ static UDateFormatOpener gOpener = NULL; -U_INTERNAL void U_EXPORT2 +U_CAPI void U_EXPORT2 udat_registerOpener(UDateFormatOpener opener, UErrorCode *status) { if(U_FAILURE(*status)) return; @@ -107,7 +112,7 @@ udat_registerOpener(UDateFormatOpener opener, UErrorCode *status) umtx_unlock(NULL); } -U_INTERNAL UDateFormatOpener U_EXPORT2 +U_CAPI UDateFormatOpener U_EXPORT2 udat_unregisterOpener(UDateFormatOpener opener, UErrorCode *status) { if(U_FAILURE(*status)) return NULL; @@ -419,7 +424,7 @@ udat_setLenient( UDateFormat* fmt, ((DateFormat*)fmt)->setLenient(isLenient); } -U_DRAFT UBool U_EXPORT2 +U_CAPI UBool U_EXPORT2 udat_getBooleanAttribute(const UDateFormat* fmt, UDateFormatBooleanAttribute attr, UErrorCode* status) @@ -429,7 +434,7 @@ udat_getBooleanAttribute(const UDateFormat* fmt, //return FALSE; } -U_DRAFT void U_EXPORT2 +U_CAPI void U_EXPORT2 udat_setBooleanAttribute(UDateFormat *fmt, UDateFormatBooleanAttribute attr, UBool newValue, @@ -452,7 +457,7 @@ udat_setCalendar(UDateFormat* fmt, ((DateFormat*)fmt)->setCalendar(*((Calendar*)calendarToSet)); } -U_DRAFT const UNumberFormat* U_EXPORT2 +U_CAPI const UNumberFormat* U_EXPORT2 udat_getNumberFormatForField(const UDateFormat* fmt, UChar field) { UErrorCode status = U_ZERO_ERROR; @@ -467,7 +472,7 @@ udat_getNumberFormat(const UDateFormat* fmt) return (const UNumberFormat*) ((DateFormat*)fmt)->getNumberFormat(); } -U_DRAFT void U_EXPORT2 +U_CAPI void U_EXPORT2 udat_adoptNumberFormatForFields( UDateFormat* fmt, const UChar* fields, UNumberFormat* numberFormatToSet, @@ -489,7 +494,7 @@ udat_setNumberFormat(UDateFormat* fmt, ((DateFormat*)fmt)->setNumberFormat(*((NumberFormat*)numberFormatToSet)); } -U_DRAFT void U_EXPORT2 +U_CAPI void U_EXPORT2 udat_adoptNumberFormat( UDateFormat* fmt, UNumberFormat* numberFormatToAdopt) { @@ -699,6 +704,10 @@ udat_getSymbols(const UDateFormat *fmt, res = syms->getQuarters(count, DateFormatSymbols::FORMAT, DateFormatSymbols::ABBREVIATED); break; + case UDAT_NARROW_QUARTERS: + res = syms->getQuarters(count, DateFormatSymbols::FORMAT, DateFormatSymbols::NARROW); + break; + case UDAT_STANDALONE_QUARTERS: res = syms->getQuarters(count, DateFormatSymbols::STANDALONE, DateFormatSymbols::WIDE); break; @@ -707,6 +716,10 @@ udat_getSymbols(const UDateFormat *fmt, res = syms->getQuarters(count, DateFormatSymbols::STANDALONE, DateFormatSymbols::ABBREVIATED); break; + case UDAT_STANDALONE_NARROW_QUARTERS: + res = syms->getQuarters(count, DateFormatSymbols::STANDALONE, DateFormatSymbols::NARROW); + break; + case UDAT_CYCLIC_YEARS_WIDE: res = syms->getYearNames(count, DateFormatSymbols::FORMAT, DateFormatSymbols::WIDE); break; @@ -837,6 +850,10 @@ udat_countSymbols( const UDateFormat *fmt, syms->getQuarters(count, DateFormatSymbols::FORMAT, DateFormatSymbols::ABBREVIATED); break; + case UDAT_NARROW_QUARTERS: + syms->getQuarters(count, DateFormatSymbols::FORMAT, DateFormatSymbols::NARROW); + break; + case UDAT_STANDALONE_QUARTERS: syms->getQuarters(count, DateFormatSymbols::STANDALONE, DateFormatSymbols::WIDE); break; @@ -845,6 +862,10 @@ udat_countSymbols( const UDateFormat *fmt, syms->getQuarters(count, DateFormatSymbols::STANDALONE, DateFormatSymbols::ABBREVIATED); break; + case UDAT_STANDALONE_NARROW_QUARTERS: + syms->getQuarters(count, DateFormatSymbols::STANDALONE, DateFormatSymbols::NARROW); + break; + case UDAT_CYCLIC_YEARS_WIDE: syms->getYearNames(count, DateFormatSymbols::FORMAT, DateFormatSymbols::WIDE); break; @@ -1044,6 +1065,13 @@ public: } static void + setNarrowQuarter(DateFormatSymbols *syms, int32_t index, + const UChar *value, int32_t valueLength, UErrorCode &errorCode) + { + setSymbol(syms->fNarrowQuarters, syms->fNarrowQuartersCount, index, value, valueLength, errorCode); + } + + static void setStandaloneQuarter(DateFormatSymbols *syms, int32_t index, const UChar *value, int32_t valueLength, UErrorCode &errorCode) { @@ -1058,6 +1086,13 @@ public: } static void + setStandaloneNarrowQuarter(DateFormatSymbols *syms, int32_t index, + const UChar *value, int32_t valueLength, UErrorCode &errorCode) + { + setSymbol(syms->fStandaloneNarrowQuarters, syms->fStandaloneNarrowQuartersCount, index, value, valueLength, errorCode); + } + + static void setShortYearNames(DateFormatSymbols *syms, int32_t index, const UChar *value, int32_t valueLength, UErrorCode &errorCode) { @@ -1174,6 +1209,10 @@ udat_setSymbols( UDateFormat *format, DateFormatSymbolsSingleSetter::setShortQuarter(syms, index, value, valueLength, *status); break; + case UDAT_NARROW_QUARTERS: + DateFormatSymbolsSingleSetter::setNarrowQuarter(syms, index, value, valueLength, *status); + break; + case UDAT_STANDALONE_QUARTERS: DateFormatSymbolsSingleSetter::setStandaloneQuarter(syms, index, value, valueLength, *status); break; @@ -1182,6 +1221,10 @@ udat_setSymbols( UDateFormat *format, DateFormatSymbolsSingleSetter::setStandaloneShortQuarter(syms, index, value, valueLength, *status); break; + case UDAT_STANDALONE_NARROW_QUARTERS: + DateFormatSymbolsSingleSetter::setStandaloneNarrowQuarter(syms, index, value, valueLength, *status); + break; + case UDAT_CYCLIC_YEARS_ABBREVIATED: DateFormatSymbolsSingleSetter::setShortYearNames(syms, index, value, valueLength, *status); break; @@ -1242,7 +1285,7 @@ udat_getContext(const UDateFormat* fmt, UDisplayContextType type, UErrorCode* st /** * Verify that fmt is a RelativeDateFormat. Invalid error if not. * @param fmt the UDateFormat, definitely a DateFormat, maybe something else - * @param status error code, will be set to failure if there is a familure or the fmt is NULL. + * @param status error code, will be set to failure if there is a failure or the fmt is NULL. */ static void verifyIsRelativeDateFormat(const UDateFormat* fmt, UErrorCode *status) { if(U_SUCCESS(*status) && diff --git a/contrib/libs/icu/i18n/udateintervalformat.cpp b/contrib/libs/icu/i18n/udateintervalformat.cpp index 388960384b..355744346a 100644 --- a/contrib/libs/icu/i18n/udateintervalformat.cpp +++ b/contrib/libs/icu/i18n/udateintervalformat.cpp @@ -18,6 +18,7 @@ #include "unicode/timezone.h" #include "unicode/locid.h" #include "unicode/unistr.h" +#include "unicode/udisplaycontext.h" #include "formattedval_impl.h" U_NAMESPACE_USE @@ -116,7 +117,7 @@ udtitvfmt_format(const UDateIntervalFormat* formatter, } -U_DRAFT void U_EXPORT2 +U_CAPI void U_EXPORT2 udtitvfmt_formatToResult( const UDateIntervalFormat* formatter, UDate fromDate, @@ -134,7 +135,7 @@ udtitvfmt_formatToResult( } } -U_DRAFT void U_EXPORT2 +U_CAPI void U_EXPORT2 udtitvfmt_formatCalendarToResult( const UDateIntervalFormat* formatter, UCalendar* fromCalendar, @@ -151,5 +152,25 @@ udtitvfmt_formatCalendarToResult( } } +U_CAPI void U_EXPORT2 +udtitvfmt_setContext(UDateIntervalFormat* formatter, + UDisplayContext value, + UErrorCode* status) { + if (U_FAILURE(*status)) { + return; + } + reinterpret_cast<DateIntervalFormat*>(formatter)->setContext( value, *status ); +} + +U_CAPI UDisplayContext U_EXPORT2 +udtitvfmt_getContext(const UDateIntervalFormat* formatter, + UDisplayContextType type, + UErrorCode* status) { + if (U_FAILURE(*status)) { + return (UDisplayContext)0; + } + return reinterpret_cast<const DateIntervalFormat*>(formatter)->getContext( type, *status ); +} + #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/contrib/libs/icu/i18n/uitercollationiterator.h b/contrib/libs/icu/i18n/uitercollationiterator.h index 62b6f83419..e0da353f26 100644 --- a/contrib/libs/icu/i18n/uitercollationiterator.h +++ b/contrib/libs/icu/i18n/uitercollationiterator.h @@ -39,22 +39,22 @@ public: virtual ~UIterCollationIterator(); - virtual void resetToOffset(int32_t newOffset); + virtual void resetToOffset(int32_t newOffset) override; - virtual int32_t getOffset() const; + virtual int32_t getOffset() const override; - virtual UChar32 nextCodePoint(UErrorCode &errorCode); + virtual UChar32 nextCodePoint(UErrorCode &errorCode) override; - virtual UChar32 previousCodePoint(UErrorCode &errorCode); + virtual UChar32 previousCodePoint(UErrorCode &errorCode) override; protected: - virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode); + virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode) override; - virtual UChar handleGetTrailSurrogate(); + virtual UChar handleGetTrailSurrogate() override; - virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode); + virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode) override; - virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode); + virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode) override; UCharIterator &iter; }; @@ -71,22 +71,23 @@ public: virtual ~FCDUIterCollationIterator(); - virtual void resetToOffset(int32_t newOffset); + virtual void resetToOffset(int32_t newOffset) override; - virtual int32_t getOffset() const; + virtual int32_t getOffset() const override; - virtual UChar32 nextCodePoint(UErrorCode &errorCode); + virtual UChar32 nextCodePoint(UErrorCode &errorCode) override; - virtual UChar32 previousCodePoint(UErrorCode &errorCode); + virtual UChar32 previousCodePoint(UErrorCode &errorCode) override; protected: - virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode); + virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode) override; - virtual UChar handleGetTrailSurrogate(); + virtual UChar handleGetTrailSurrogate() override; - virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode); - virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode); + virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode) override; + + virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode) override; private: /** @@ -96,7 +97,7 @@ private: /** * Extends the FCD text segment forward or normalizes around pos. - * @return TRUE if success + * @return true if success */ UBool nextSegment(UErrorCode &errorCode); @@ -107,7 +108,7 @@ private: /** * Extends the FCD text segment backward or normalizes around pos. - * @return TRUE if success + * @return true if success */ UBool previousSegment(UErrorCode &errorCode); diff --git a/contrib/libs/icu/i18n/ulocdata.cpp b/contrib/libs/icu/i18n/ulocdata.cpp index 7f4e7b9b11..68b9e0cf63 100644 --- a/contrib/libs/icu/i18n/ulocdata.cpp +++ b/contrib/libs/icu/i18n/ulocdata.cpp @@ -172,7 +172,7 @@ ulocdata_getDelimiter(ULocaleData *uld, ULocaleDataDelimiterType type, return 0; } - delimiter = ures_getStringByKey(delimiterBundle, delimiterKeys[type], &len, &localStatus); + delimiter = ures_getStringByKeyWithFallback(delimiterBundle, delimiterKeys[type], &len, &localStatus); ures_close(delimiterBundle); if ( (localStatus == U_USING_DEFAULT_WARNING) && uld->noSubstitute ) { diff --git a/contrib/libs/icu/i18n/umsg.cpp b/contrib/libs/icu/i18n/umsg.cpp index 9a5344e019..c2d5a9a1f5 100644 --- a/contrib/libs/icu/i18n/umsg.cpp +++ b/contrib/libs/icu/i18n/umsg.cpp @@ -68,7 +68,7 @@ u_formatMessage(const char *locale, { va_list ap; int32_t actLen; - //argument checking defered to subsequent method calls + //argument checking deferred to subsequent method calls // start vararg processing va_start(ap, status); @@ -89,7 +89,7 @@ u_vformatMessage( const char *locale, UErrorCode *status) { - //argument checking defered to subsequent method calls + //argument checking deferred to subsequent method calls UMessageFormat *fmt = umsg_open(pattern,patternLength,locale,NULL,status); int32_t retVal = umsg_vformat(fmt,result,resultLength,ap,status); umsg_close(fmt); @@ -108,7 +108,7 @@ u_formatMessageWithError(const char *locale, { va_list ap; int32_t actLen; - //argument checking defered to subsequent method calls + //argument checking deferred to subsequent method calls // start vararg processing va_start(ap, status); @@ -130,7 +130,7 @@ u_vformatMessageWithError( const char *locale, UErrorCode *status) { - //argument checking defered to subsequent method calls + //argument checking deferred to subsequent method calls UMessageFormat *fmt = umsg_open(pattern,patternLength,locale,parseError,status); int32_t retVal = umsg_vformat(fmt,result,resultLength,ap,status); umsg_close(fmt); @@ -152,7 +152,7 @@ u_parseMessage( const char *locale, ...) { va_list ap; - //argument checking defered to subsequent method calls + //argument checking deferred to subsequent method calls // start vararg processing va_start(ap, status); @@ -171,7 +171,7 @@ u_vparseMessage(const char *locale, va_list ap, UErrorCode *status) { - //argument checking defered to subsequent method calls + //argument checking deferred to subsequent method calls UMessageFormat *fmt = umsg_open(pattern,patternLength,locale,NULL,status); int32_t count = 0; umsg_vparse(fmt,source,sourceLength,&count,ap,status); @@ -190,7 +190,7 @@ u_parseMessageWithError(const char *locale, { va_list ap; - //argument checking defered to subsequent method calls + //argument checking deferred to subsequent method calls // start vararg processing va_start(ap, status); @@ -209,7 +209,7 @@ u_vparseMessageWithError(const char *locale, UParseError *error, UErrorCode* status) { - //argument checking defered to subsequent method calls + //argument checking deferred to subsequent method calls UMessageFormat *fmt = umsg_open(pattern,patternLength,locale,error,status); int32_t count = 0; umsg_vparse(fmt,source,sourceLength,&count,ap,status); @@ -369,8 +369,8 @@ umsg_format( const UMessageFormat *fmt, { va_list ap; int32_t actLen; - //argument checking defered to last method call umsg_vformat which - //saves time when arguments are valid and we dont care when arguments are not + //argument checking deferred to last method call umsg_vformat which + //saves time when arguments are valid and we don't care when arguments are not //since we return an error anyway @@ -463,7 +463,7 @@ umsg_vformat( const UMessageFormat *fmt, default: // Unknown/unsupported argument type. - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } } UnicodeString resultStr; @@ -490,8 +490,8 @@ umsg_parse( const UMessageFormat *fmt, ...) { va_list ap; - //argument checking defered to last method call umsg_vparse which - //saves time when arguments are valid and we dont care when arguments are not + //argument checking deferred to last method call umsg_vparse which + //saves time when arguments are valid and we don't care when arguments are not //since we return an error anyway // start vararg processing @@ -590,11 +590,11 @@ umsg_vparse(const UMessageFormat *fmt, // support kObject. When MessageFormat is changed to // understand MeasureFormats, modify this code to do the // right thing. [alan] - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; // better not happen! case Formattable::kArray: - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } } diff --git a/contrib/libs/icu/i18n/unesctrn.h b/contrib/libs/icu/i18n/unesctrn.h index 57dd8d32cf..e905c5bc5f 100644 --- a/contrib/libs/icu/i18n/unesctrn.h +++ b/contrib/libs/icu/i18n/unesctrn.h @@ -77,12 +77,12 @@ class UnescapeTransliterator : public Transliterator { /** * Transliterator API. */ - virtual UnescapeTransliterator* clone() const; + virtual UnescapeTransliterator* clone() const override; /** * ICU "poor man's RTTI", returns a UClassID for the actual class. */ - virtual UClassID getDynamicClassID() const; + virtual UClassID getDynamicClassID() const override; /** * ICU "poor man's RTTI", returns a UClassID for this class. @@ -101,7 +101,7 @@ class UnescapeTransliterator : public Transliterator { * pos.contextLimit. Otherwise, assume the text is complete. */ virtual void handleTransliterate(Replaceable& text, UTransPosition& offset, - UBool isIncremental) const; + UBool isIncremental) const override; }; diff --git a/contrib/libs/icu/i18n/uni2name.cpp b/contrib/libs/icu/i18n/uni2name.cpp index 41d5c931e4..904da0207b 100644 --- a/contrib/libs/icu/i18n/uni2name.cpp +++ b/contrib/libs/icu/i18n/uni2name.cpp @@ -81,7 +81,7 @@ void UnicodeNameTransliterator::handleTransliterate(Replaceable& text, UTransPos return; } - // Accomodate the longest possible name plus padding + // Accommodate the longest possible name plus padding char* buf = (char*) uprv_malloc(maxLen); if (buf == NULL) { offsets.start = offsets.limit; diff --git a/contrib/libs/icu/i18n/uni2name.h b/contrib/libs/icu/i18n/uni2name.h index 99309c8e0f..1e01d78423 100644 --- a/contrib/libs/icu/i18n/uni2name.h +++ b/contrib/libs/icu/i18n/uni2name.h @@ -48,12 +48,12 @@ class UnicodeNameTransliterator : public Transliterator { /** * Transliterator API. */ - virtual UnicodeNameTransliterator* clone() const; + virtual UnicodeNameTransliterator* clone() const override; /** * ICU "poor man's RTTI", returns a UClassID for the actual class. */ - virtual UClassID getDynamicClassID() const; + virtual UClassID getDynamicClassID() const override; /** * ICU "poor man's RTTI", returns a UClassID for this class. @@ -72,7 +72,7 @@ class UnicodeNameTransliterator : public Transliterator { * pos.contextLimit. Otherwise, assume the text is complete. */ virtual void handleTransliterate(Replaceable& text, UTransPosition& offset, - UBool isIncremental) const; + UBool isIncremental) const override; private: /** diff --git a/contrib/libs/icu/i18n/units_complexconverter.cpp b/contrib/libs/icu/i18n/units_complexconverter.cpp new file mode 100644 index 0000000000..78cefbf7eb --- /dev/null +++ b/contrib/libs/icu/i18n/units_complexconverter.cpp @@ -0,0 +1,268 @@ +// © 2020 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include <cmath> + +#include "cmemory.h" +#include "number_decimalquantity.h" +#include "number_roundingutils.h" +#include "putilimp.h" +#include "uarrsort.h" +#include "uassert.h" +#include "unicode/fmtable.h" +#include "unicode/localpointer.h" +#include "unicode/measunit.h" +#include "unicode/measure.h" +#include "units_complexconverter.h" +#include "units_converter.h" + +U_NAMESPACE_BEGIN +namespace units { +ComplexUnitsConverter::ComplexUnitsConverter(const MeasureUnitImpl &targetUnit, + const ConversionRates &ratesInfo, UErrorCode &status) + : units_(targetUnit.extractIndividualUnitsWithIndices(status)) { + if (U_FAILURE(status)) { + return; + } + U_ASSERT(units_.length() != 0); + + // Just borrowing a pointer to the instance + MeasureUnitImpl *biggestUnit = &units_[0]->unitImpl; + for (int32_t i = 1; i < units_.length(); i++) { + if (UnitsConverter::compareTwoUnits(units_[i]->unitImpl, *biggestUnit, ratesInfo, status) > 0 && + U_SUCCESS(status)) { + biggestUnit = &units_[i]->unitImpl; + } + + if (U_FAILURE(status)) { + return; + } + } + + this->init(*biggestUnit, ratesInfo, status); +} + +ComplexUnitsConverter::ComplexUnitsConverter(StringPiece inputUnitIdentifier, + StringPiece outputUnitsIdentifier, UErrorCode &status) { + if (U_FAILURE(status)) { + return; + } + MeasureUnitImpl inputUnit = MeasureUnitImpl::forIdentifier(inputUnitIdentifier, status); + MeasureUnitImpl outputUnits = MeasureUnitImpl::forIdentifier(outputUnitsIdentifier, status); + + this->units_ = outputUnits.extractIndividualUnitsWithIndices(status); + U_ASSERT(units_.length() != 0); + + this->init(inputUnit, ConversionRates(status), status); +} + +ComplexUnitsConverter::ComplexUnitsConverter(const MeasureUnitImpl &inputUnit, + const MeasureUnitImpl &outputUnits, + const ConversionRates &ratesInfo, UErrorCode &status) + : units_(outputUnits.extractIndividualUnitsWithIndices(status)) { + if (U_FAILURE(status)) { + return; + } + + U_ASSERT(units_.length() != 0); + + this->init(inputUnit, ratesInfo, status); +} + +void ComplexUnitsConverter::init(const MeasureUnitImpl &inputUnit, + const ConversionRates &ratesInfo, + UErrorCode &status) { + // Sorts units in descending order. Therefore, we return -1 if + // the left is bigger than right and so on. + auto descendingCompareUnits = [](const void *context, const void *left, const void *right) { + UErrorCode status = U_ZERO_ERROR; + + const auto *leftPointer = static_cast<const MeasureUnitImplWithIndex *const *>(left); + const auto *rightPointer = static_cast<const MeasureUnitImplWithIndex *const *>(right); + + // Multiply by -1 to sort in descending order + return (-1) * UnitsConverter::compareTwoUnits((**leftPointer).unitImpl, // + (**rightPointer).unitImpl, // + *static_cast<const ConversionRates *>(context), // + status); + }; + + uprv_sortArray(units_.getAlias(), // + units_.length(), // + sizeof units_[0], /* NOTE: we have already asserted that the units_ is not empty.*/ // + descendingCompareUnits, // + &ratesInfo, // + false, // + &status // + ); + + // In case the `outputUnits` are `UMEASURE_UNIT_MIXED` such as `foot+inch`. In this case we need more + // converters to convert from the `inputUnit` to the first unit in the `outputUnits`. Then, a + // converter from the first unit in the `outputUnits` to the second unit and so on. + // For Example: + // - inputUnit is `meter` + // - outputUnits is `foot+inch` + // - Therefore, we need to have two converters: + // 1. a converter from `meter` to `foot` + // 2. a converter from `foot` to `inch` + // - Therefore, if the input is `2 meter`: + // 1. convert `meter` to `foot` --> 2 meter to 6.56168 feet + // 2. convert the residual of 6.56168 feet (0.56168) to inches, which will be (6.74016 + // inches) + // 3. then, the final result will be (6 feet and 6.74016 inches) + for (int i = 0, n = units_.length(); i < n; i++) { + if (i == 0) { // first element + unitsConverters_.emplaceBackAndCheckErrorCode(status, inputUnit, units_[i]->unitImpl, + ratesInfo, status); + } else { + unitsConverters_.emplaceBackAndCheckErrorCode(status, units_[i - 1]->unitImpl, + units_[i]->unitImpl, ratesInfo, status); + } + + if (U_FAILURE(status)) { + return; + } + } +} + +UBool ComplexUnitsConverter::greaterThanOrEqual(double quantity, double limit) const { + U_ASSERT(unitsConverters_.length() > 0); + + // First converter converts to the biggest quantity. + double newQuantity = unitsConverters_[0]->convert(quantity); + return newQuantity >= limit; +} + +MaybeStackVector<Measure> ComplexUnitsConverter::convert(double quantity, + icu::number::impl::RoundingImpl *rounder, + UErrorCode &status) const { + // TODO: return an error for "foot-and-foot"? + MaybeStackVector<Measure> result; + int sign = 1; + if (quantity < 0) { + quantity *= -1; + sign = -1; + } + + // For N converters: + // - the first converter converts from the input unit to the largest unit, + // - the following N-2 converters convert to bigger units for which we want integers, + // - the Nth converter (index N-1) converts to the smallest unit, for which + // we keep a double. + MaybeStackArray<int64_t, 5> intValues(unitsConverters_.length() - 1, status); + if (U_FAILURE(status)) { + return result; + } + uprv_memset(intValues.getAlias(), 0, (unitsConverters_.length() - 1) * sizeof(int64_t)); + + for (int i = 0, n = unitsConverters_.length(); i < n; ++i) { + quantity = (*unitsConverters_[i]).convert(quantity); + if (i < n - 1) { + // If quantity is at the limits of double's precision from an + // integer value, we take that integer value. + int64_t flooredQuantity = static_cast<int64_t>(floor(quantity * (1 + DBL_EPSILON))); + if (uprv_isNaN(quantity)) { + // With clang on Linux: floor does not support NaN, resulting in + // a giant negative number. For now, we produce "0 feet, NaN + // inches". TODO(icu-units#131): revisit desired output. + flooredQuantity = 0; + } + intValues[i] = flooredQuantity; + + // Keep the residual of the quantity. + // For example: `3.6 feet`, keep only `0.6 feet` + double remainder = quantity - flooredQuantity; + if (remainder < 0) { + // Because we nudged flooredQuantity up by eps, remainder may be + // negative: we must treat such a remainder as zero. + quantity = 0; + } else { + quantity = remainder; + } + } + } + + applyRounder(intValues, quantity, rounder, status); + + // Initialize empty result. We use a MaybeStackArray directly so we can + // assign pointers - for this privilege we have to take care of cleanup. + MaybeStackArray<Measure *, 4> tmpResult(unitsConverters_.length(), status); + if (U_FAILURE(status)) { + return result; + } + + // Package values into temporary Measure instances in tmpResult: + for (int i = 0, n = unitsConverters_.length(); i < n; ++i) { + if (i < n - 1) { + Formattable formattableQuantity(intValues[i] * sign); + // Measure takes ownership of the MeasureUnit* + MeasureUnit *type = new MeasureUnit(units_[i]->unitImpl.copy(status).build(status)); + tmpResult[units_[i]->index] = new Measure(formattableQuantity, type, status); + } else { // LAST ELEMENT + Formattable formattableQuantity(quantity * sign); + // Measure takes ownership of the MeasureUnit* + MeasureUnit *type = new MeasureUnit(units_[i]->unitImpl.copy(status).build(status)); + tmpResult[units_[i]->index] = new Measure(formattableQuantity, type, status); + } + } + + + // Transfer values into result and return: + for(int32_t i = 0, n = unitsConverters_.length(); i < n; ++i) { + U_ASSERT(tmpResult[i] != nullptr); + result.emplaceBackAndCheckErrorCode(status, *tmpResult[i]); + delete tmpResult[i]; + } + + return result; +} + +void ComplexUnitsConverter::applyRounder(MaybeStackArray<int64_t, 5> &intValues, double &quantity, + icu::number::impl::RoundingImpl *rounder, + UErrorCode &status) const { + if (rounder == nullptr) { + // Nothing to do for the quantity. + return; + } + + number::impl::DecimalQuantity decimalQuantity; + decimalQuantity.setToDouble(quantity); + rounder->apply(decimalQuantity, status); + if (U_FAILURE(status)) { + return; + } + quantity = decimalQuantity.toDouble(); + + int32_t lastIndex = unitsConverters_.length() - 1; + if (lastIndex == 0) { + // Only one element, no need to bubble up the carry + return; + } + + // Check if there's a carry, and bubble it back up the resulting intValues. + int64_t carry = static_cast<int64_t>(floor(unitsConverters_[lastIndex]->convertInverse(quantity) * (1 + DBL_EPSILON))); + if (carry <= 0) { + return; + } + quantity -= unitsConverters_[lastIndex]->convert(static_cast<double>(carry)); + intValues[lastIndex - 1] += carry; + + // We don't use the first converter: that one is for the input unit + for (int32_t j = lastIndex - 1; j > 0; j--) { + carry = static_cast<int64_t>(floor(unitsConverters_[j]->convertInverse(static_cast<double>(intValues[j])) * (1 + DBL_EPSILON))); + if (carry <= 0) { + return; + } + intValues[j] -= static_cast<int64_t>(round(unitsConverters_[j]->convert(static_cast<double>(carry)))); + intValues[j - 1] += carry; + } +} + +} // namespace units +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/contrib/libs/icu/i18n/units_complexconverter.h b/contrib/libs/icu/i18n/units_complexconverter.h new file mode 100644 index 0000000000..5c669b45dd --- /dev/null +++ b/contrib/libs/icu/i18n/units_complexconverter.h @@ -0,0 +1,134 @@ +// © 2020 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING +#ifndef __UNITS_COMPLEXCONVERTER_H__ +#define __UNITS_COMPLEXCONVERTER_H__ + +#include "cmemory.h" +#include "measunit_impl.h" +#include "number_roundingutils.h" +#include "unicode/errorcode.h" +#include "unicode/measure.h" +#include "units_converter.h" +#include "units_data.h" + +U_NAMESPACE_BEGIN + +// Export explicit template instantiations of MaybeStackArray, MemoryPool and +// MaybeStackVector. This is required when building DLLs for Windows. (See +// datefmt.h, collationiterator.h, erarules.h and others for similar examples.) +// +// Note: These need to be outside of the units namespace, or Clang will generate +// a compile error. +#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN +template class U_I18N_API MaybeStackArray<units::UnitsConverter*, 8>; +template class U_I18N_API MemoryPool<units::UnitsConverter, 8>; +template class U_I18N_API MaybeStackVector<units::UnitsConverter, 8>; +template class U_I18N_API MaybeStackArray<MeasureUnitImpl*, 8>; +template class U_I18N_API MemoryPool<MeasureUnitImpl, 8>; +template class U_I18N_API MaybeStackVector<MeasureUnitImpl, 8>; +template class U_I18N_API MaybeStackArray<MeasureUnit*, 8>; +template class U_I18N_API MemoryPool<MeasureUnit, 8>; +template class U_I18N_API MaybeStackVector<MeasureUnit, 8>; +#endif + +namespace units { + +/** + * Converts from single or compound unit to single, compound or mixed units. + * For example, from `meter` to `foot+inch`. + * + * DESIGN: + * This class uses `UnitsConverter` in order to perform the single converter (i.e. converters from a + * single unit to another single unit). Therefore, `ComplexUnitsConverter` class contains multiple + * instances of the `UnitsConverter` to perform the conversion. + */ +class U_I18N_API ComplexUnitsConverter : public UMemory { + public: + /** + * Constructs `ComplexUnitsConverter` for an `targetUnit` that could be Single, Compound or Mixed. + * In case of: + * 1- Single and Compound units, + * the conversion will not perform anything, the input will be equal to the output. + * 2- Mixed Unit + * the conversion will consider the input is the biggest unit. And will convert it to be spread + * through the target units. For example: if target unit is "inch-and-foot", and the input is 2.5. + * The converter will consider the input value in "foot", because foot is the biggest unit. + * Then, it will convert 2.5 feet to "inch-and-foot". + * + * @param targetUnit could be any units type (single, compound or mixed). + * @param ratesInfo + * @param status + */ + ComplexUnitsConverter(const MeasureUnitImpl &targetUnit, const ConversionRates &ratesInfo, + UErrorCode &status); + /** + * Constructor of `ComplexUnitsConverter`. + * NOTE: + * - inputUnit and outputUnits must be under the same category + * - e.g. meter to feet and inches --> all of them are length units. + * + * @param inputUnit represents the source unit. (should be single or compound unit). + * @param outputUnits represents the output unit. could be any type. (single, compound or mixed). + * @param status + */ + ComplexUnitsConverter(StringPiece inputUnitIdentifier, StringPiece outputUnitsIdentifier, + UErrorCode &status); + + /** + * Constructor of `ComplexUnitsConverter`. + * NOTE: + * - inputUnit and outputUnits must be under the same category + * - e.g. meter to feet and inches --> all of them are length units. + * + * @param inputUnit represents the source unit. (should be single or compound unit). + * @param outputUnits represents the output unit. could be any type. (single, compound or mixed). + * @param ratesInfo a ConversionRates instance containing the unit conversion rates. + * @param status + */ + ComplexUnitsConverter(const MeasureUnitImpl &inputUnit, const MeasureUnitImpl &outputUnits, + const ConversionRates &ratesInfo, UErrorCode &status); + + // Returns true if the specified `quantity` of the `inputUnit`, expressed in terms of the biggest + // unit in the MeasureUnit `outputUnit`, is greater than or equal to `limit`. + // For example, if the input unit is `meter` and the target unit is `foot+inch`. Therefore, this + // function will convert the `quantity` from `meter` to `foot`, then, it will compare the value in + // `foot` with the `limit`. + UBool greaterThanOrEqual(double quantity, double limit) const; + + // Returns outputMeasures which is an array with the corresponding values. + // - E.g. converting meters to feet and inches. + // 1 meter --> 3 feet, 3.3701 inches + // NOTE: + // the smallest element is the only element that could have fractional values. And all + // other elements are floored to the nearest integer + MaybeStackVector<Measure> + convert(double quantity, icu::number::impl::RoundingImpl *rounder, UErrorCode &status) const; + + private: + MaybeStackVector<UnitsConverter> unitsConverters_; + + // Individual units of mixed units, sorted big to small, with indices + // indicating the requested output mixed unit order. + MaybeStackVector<MeasureUnitImplWithIndex> units_; + + // Sorts units_, which must be populated before calling this, and populates + // unitsConverters_. + void init(const MeasureUnitImpl &inputUnit, const ConversionRates &ratesInfo, UErrorCode &status); + + // Applies the rounder to the quantity (last element) and bubble up any carried value to all the + // intValues. + // TODO(ICU-21288): get smarter about precision for mixed units. + void applyRounder(MaybeStackArray<int64_t, 5> &intValues, double &quantity, + icu::number::impl::RoundingImpl *rounder, UErrorCode &status) const; +}; + +} // namespace units +U_NAMESPACE_END + +#endif //__UNITS_COMPLEXCONVERTER_H__ + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/contrib/libs/icu/i18n/units_converter.cpp b/contrib/libs/icu/i18n/units_converter.cpp new file mode 100644 index 0000000000..7e946e584b --- /dev/null +++ b/contrib/libs/icu/i18n/units_converter.cpp @@ -0,0 +1,633 @@ +// © 2020 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "charstr.h" +#include "cmemory.h" +#include "double-conversion-string-to-double.h" +#include "measunit_impl.h" +#include "uassert.h" +#include "unicode/errorcode.h" +#include "unicode/localpointer.h" +#include "unicode/stringpiece.h" +#include "units_converter.h" +#include <algorithm> +#include <cmath> +#include <stdlib.h> +#include <utility> + +U_NAMESPACE_BEGIN +namespace units { + +void U_I18N_API Factor::multiplyBy(const Factor &rhs) { + factorNum *= rhs.factorNum; + factorDen *= rhs.factorDen; + for (int i = 0; i < CONSTANTS_COUNT; i++) { + constantExponents[i] += rhs.constantExponents[i]; + } + + // NOTE + // We need the offset when the source and the target are simple units. e.g. the source is + // celsius and the target is Fahrenheit. Therefore, we just keep the value using `std::max`. + offset = std::max(rhs.offset, offset); +} + +void U_I18N_API Factor::divideBy(const Factor &rhs) { + factorNum *= rhs.factorDen; + factorDen *= rhs.factorNum; + for (int i = 0; i < CONSTANTS_COUNT; i++) { + constantExponents[i] -= rhs.constantExponents[i]; + } + + // NOTE + // We need the offset when the source and the target are simple units. e.g. the source is + // celsius and the target is Fahrenheit. Therefore, we just keep the value using `std::max`. + offset = std::max(rhs.offset, offset); +} + +void U_I18N_API Factor::power(int32_t power) { + // multiply all the constant by the power. + for (int i = 0; i < CONSTANTS_COUNT; i++) { + constantExponents[i] *= power; + } + + bool shouldFlip = power < 0; // This means that after applying the absolute power, we should flip + // the Numerator and Denominator. + + factorNum = std::pow(factorNum, std::abs(power)); + factorDen = std::pow(factorDen, std::abs(power)); + + if (shouldFlip) { + // Flip Numerator and Denominator. + std::swap(factorNum, factorDen); + } +} + +void U_I18N_API Factor::applyPrefix(UMeasurePrefix unitPrefix) { + if (unitPrefix == UMeasurePrefix::UMEASURE_PREFIX_ONE) { + // No need to do anything + return; + } + + int32_t prefixPower = umeas_getPrefixPower(unitPrefix); + double prefixFactor = std::pow((double)umeas_getPrefixBase(unitPrefix), (double)std::abs(prefixPower)); + if (prefixPower >= 0) { + factorNum *= prefixFactor; + } else { + factorDen *= prefixFactor; + } +} + +void U_I18N_API Factor::substituteConstants() { + for (int i = 0; i < CONSTANTS_COUNT; i++) { + if (this->constantExponents[i] == 0) { + continue; + } + + auto absPower = std::abs(this->constantExponents[i]); + Signum powerSig = this->constantExponents[i] < 0 ? Signum::NEGATIVE : Signum::POSITIVE; + double absConstantValue = std::pow(constantsValues[i], absPower); + + if (powerSig == Signum::NEGATIVE) { + this->factorDen *= absConstantValue; + } else { + this->factorNum *= absConstantValue; + } + + this->constantExponents[i] = 0; + } +} + +namespace { + +/* Helpers */ + +using icu::double_conversion::StringToDoubleConverter; + +// TODO: Make this a shared-utility function. +// Returns `double` from a scientific number(i.e. "1", "2.01" or "3.09E+4") +double strToDouble(StringPiece strNum, UErrorCode &status) { + // We are processing well-formed input, so we don't need any special options to + // StringToDoubleConverter. + StringToDoubleConverter converter(0, 0, 0, "", ""); + int32_t count; + double result = converter.StringToDouble(strNum.data(), strNum.length(), &count); + if (count != strNum.length()) { + status = U_INVALID_FORMAT_ERROR; + } + + return result; +} + +// Returns `double` from a scientific number that could has a division sign (i.e. "1", "2.01", "3.09E+4" +// or "2E+2/3") +double strHasDivideSignToDouble(StringPiece strWithDivide, UErrorCode &status) { + int divisionSignInd = -1; + for (int i = 0, n = strWithDivide.length(); i < n; ++i) { + if (strWithDivide.data()[i] == '/') { + divisionSignInd = i; + break; + } + } + + if (divisionSignInd >= 0) { + return strToDouble(strWithDivide.substr(0, divisionSignInd), status) / + strToDouble(strWithDivide.substr(divisionSignInd + 1), status); + } + + return strToDouble(strWithDivide, status); +} + +/* + Adds single factor to a `Factor` object. Single factor means "23^2", "23.3333", "ft2m^3" ...etc. + However, complex factor are not included, such as "ft2m^3*200/3" +*/ +void addFactorElement(Factor &factor, StringPiece elementStr, Signum signum, UErrorCode &status) { + StringPiece baseStr; + StringPiece powerStr; + int32_t power = + 1; // In case the power is not written, then, the power is equal 1 ==> `ft2m^1` == `ft2m` + + // Search for the power part + int32_t powerInd = -1; + for (int32_t i = 0, n = elementStr.length(); i < n; ++i) { + if (elementStr.data()[i] == '^') { + powerInd = i; + break; + } + } + + if (powerInd > -1) { + // There is power + baseStr = elementStr.substr(0, powerInd); + powerStr = elementStr.substr(powerInd + 1); + + power = static_cast<int32_t>(strToDouble(powerStr, status)); + } else { + baseStr = elementStr; + } + + addSingleFactorConstant(baseStr, power, signum, factor, status); +} + +/* + * Extracts `Factor` from a complete string factor. e.g. "ft2m^3*1007/cup2m3*3" + */ +Factor extractFactorConversions(StringPiece stringFactor, UErrorCode &status) { + Factor result; + Signum signum = Signum::POSITIVE; + auto factorData = stringFactor.data(); + for (int32_t i = 0, start = 0, n = stringFactor.length(); i < n; i++) { + if (factorData[i] == '*' || factorData[i] == '/') { + StringPiece factorElement = stringFactor.substr(start, i - start); + addFactorElement(result, factorElement, signum, status); + + start = i + 1; // Set `start` to point to the start of the new element. + } else if (i == n - 1) { + // Last element + addFactorElement(result, stringFactor.substr(start, i + 1), signum, status); + } + + if (factorData[i] == '/') { + signum = Signum::NEGATIVE; // Change the signum because we reached the Denominator. + } + } + + return result; +} + +// Load factor for a single source +Factor loadSingleFactor(StringPiece source, const ConversionRates &ratesInfo, UErrorCode &status) { + const auto conversionUnit = ratesInfo.extractConversionInfo(source, status); + if (U_FAILURE(status)) return Factor(); + if (conversionUnit == nullptr) { + status = U_INTERNAL_PROGRAM_ERROR; + return Factor(); + } + + Factor result = extractFactorConversions(conversionUnit->factor.toStringPiece(), status); + result.offset = strHasDivideSignToDouble(conversionUnit->offset.toStringPiece(), status); + + return result; +} + +// Load Factor of a compound source unit. +// In ICU4J, this is a pair of ConversionRates.getFactorToBase() functions. +Factor loadCompoundFactor(const MeasureUnitImpl &source, const ConversionRates &ratesInfo, + UErrorCode &status) { + + Factor result; + for (int32_t i = 0, n = source.singleUnits.length(); i < n; i++) { + SingleUnitImpl singleUnit = *source.singleUnits[i]; + + Factor singleFactor = loadSingleFactor(singleUnit.getSimpleUnitID(), ratesInfo, status); + if (U_FAILURE(status)) return result; + + // Prefix before power, because: + // - square-kilometer to square-meter: (1000)^2 + // - square-kilometer to square-foot (approximate): (3.28*1000)^2 + singleFactor.applyPrefix(singleUnit.unitPrefix); + + // Apply the power of the `dimensionality` + singleFactor.power(singleUnit.dimensionality); + + result.multiplyBy(singleFactor); + } + + return result; +} + +/** + * Checks if the source unit and the target unit are simple. For example celsius or fahrenheit. But not + * square-celsius or square-fahrenheit. + * + * NOTE: + * Empty unit means simple unit. + * + * In ICU4J, this is ConversionRates.checkSimpleUnit(). + */ +UBool checkSimpleUnit(const MeasureUnitImpl &unit, UErrorCode &status) { + if (U_FAILURE(status)) return false; + + if (unit.complexity != UMEASURE_UNIT_SINGLE) { + return false; + } + if (unit.singleUnits.length() == 0) { + // Empty units means simple unit. + return true; + } + + auto singleUnit = *(unit.singleUnits[0]); + + if (singleUnit.dimensionality != 1 || singleUnit.unitPrefix != UMEASURE_PREFIX_ONE) { + return false; + } + + return true; +} + +/** + * Extract conversion rate from `source` to `target` + */ +// In ICU4J, this function is partially inlined in the UnitsConverter constructor. +void loadConversionRate(ConversionRate &conversionRate, const MeasureUnitImpl &source, + const MeasureUnitImpl &target, Convertibility unitsState, + const ConversionRates &ratesInfo, UErrorCode &status) { + // Represents the conversion factor from the source to the target. + Factor finalFactor; + + // Represents the conversion factor from the source to the base unit that specified in the conversion + // data which is considered as the root of the source and the target. + Factor sourceToBase = loadCompoundFactor(source, ratesInfo, status); + Factor targetToBase = loadCompoundFactor(target, ratesInfo, status); + + // Merger Factors + finalFactor.multiplyBy(sourceToBase); + if (unitsState == Convertibility::CONVERTIBLE) { + finalFactor.divideBy(targetToBase); + } else if (unitsState == Convertibility::RECIPROCAL) { + finalFactor.multiplyBy(targetToBase); + } else { + status = UErrorCode::U_ARGUMENT_TYPE_MISMATCH; + return; + } + + finalFactor.substituteConstants(); + + conversionRate.factorNum = finalFactor.factorNum; + conversionRate.factorDen = finalFactor.factorDen; + + // This code corresponds to ICU4J's ConversionRates.getOffset(). + // In case of simple units (such as: celsius or fahrenheit), offsets are considered. + if (checkSimpleUnit(source, status) && checkSimpleUnit(target, status)) { + conversionRate.sourceOffset = + sourceToBase.offset * sourceToBase.factorDen / sourceToBase.factorNum; + conversionRate.targetOffset = + targetToBase.offset * targetToBase.factorDen / targetToBase.factorNum; + } + // TODO(icu-units#127): should we consider failure if there's an offset for + // a not-simple-unit? What about kilokelvin / kilocelsius? + + conversionRate.reciprocal = unitsState == Convertibility::RECIPROCAL; +} + +struct UnitIndexAndDimension : UMemory { + int32_t index = 0; + int32_t dimensionality = 0; + + UnitIndexAndDimension(const SingleUnitImpl &singleUnit, int32_t multiplier) { + index = singleUnit.index; + dimensionality = singleUnit.dimensionality * multiplier; + } +}; + +void mergeSingleUnitWithDimension(MaybeStackVector<UnitIndexAndDimension> &unitIndicesWithDimension, + const SingleUnitImpl &shouldBeMerged, int32_t multiplier) { + for (int32_t i = 0; i < unitIndicesWithDimension.length(); i++) { + auto &unitWithIndex = *unitIndicesWithDimension[i]; + if (unitWithIndex.index == shouldBeMerged.index) { + unitWithIndex.dimensionality += shouldBeMerged.dimensionality * multiplier; + return; + } + } + + unitIndicesWithDimension.emplaceBack(shouldBeMerged, multiplier); +} + +void mergeUnitsAndDimensions(MaybeStackVector<UnitIndexAndDimension> &unitIndicesWithDimension, + const MeasureUnitImpl &shouldBeMerged, int32_t multiplier) { + for (int32_t unit_i = 0; unit_i < shouldBeMerged.singleUnits.length(); unit_i++) { + auto singleUnit = *shouldBeMerged.singleUnits[unit_i]; + mergeSingleUnitWithDimension(unitIndicesWithDimension, singleUnit, multiplier); + } +} + +UBool checkAllDimensionsAreZeros(const MaybeStackVector<UnitIndexAndDimension> &dimensionVector) { + for (int32_t i = 0; i < dimensionVector.length(); i++) { + if (dimensionVector[i]->dimensionality != 0) { + return false; + } + } + + return true; +} + +} // namespace + +// Conceptually, this modifies factor: factor *= baseStr^(signum*power). +// +// baseStr must be a known constant or a value that strToDouble() is able to +// parse. +void U_I18N_API addSingleFactorConstant(StringPiece baseStr, int32_t power, Signum signum, + Factor &factor, UErrorCode &status) { + if (baseStr == "ft_to_m") { + factor.constantExponents[CONSTANT_FT2M] += power * signum; + } else if (baseStr == "ft2_to_m2") { + factor.constantExponents[CONSTANT_FT2M] += 2 * power * signum; + } else if (baseStr == "ft3_to_m3") { + factor.constantExponents[CONSTANT_FT2M] += 3 * power * signum; + } else if (baseStr == "in3_to_m3") { + factor.constantExponents[CONSTANT_FT2M] += 3 * power * signum; + factor.factorDen *= 12 * 12 * 12; + } else if (baseStr == "gal_to_m3") { + factor.factorNum *= 231; + factor.constantExponents[CONSTANT_FT2M] += 3 * power * signum; + factor.factorDen *= 12 * 12 * 12; + } else if (baseStr == "gal_imp_to_m3") { + factor.constantExponents[CONSTANT_GAL_IMP2M3] += power * signum; + } else if (baseStr == "G") { + factor.constantExponents[CONSTANT_G] += power * signum; + } else if (baseStr == "gravity") { + factor.constantExponents[CONSTANT_GRAVITY] += power * signum; + } else if (baseStr == "lb_to_kg") { + factor.constantExponents[CONSTANT_LB2KG] += power * signum; + } else if (baseStr == "glucose_molar_mass") { + factor.constantExponents[CONSTANT_GLUCOSE_MOLAR_MASS] += power * signum; + } else if (baseStr == "item_per_mole") { + factor.constantExponents[CONSTANT_ITEM_PER_MOLE] += power * signum; + } else if (baseStr == "PI") { + factor.constantExponents[CONSTANT_PI] += power * signum; + } else { + if (signum == Signum::NEGATIVE) { + factor.factorDen *= std::pow(strToDouble(baseStr, status), power); + } else { + factor.factorNum *= std::pow(strToDouble(baseStr, status), power); + } + } +} + +/** + * Extracts the compound base unit of a compound unit (`source`). For example, if the source unit is + * `square-mile-per-hour`, the compound base unit will be `square-meter-per-second` + */ +MeasureUnitImpl U_I18N_API extractCompoundBaseUnit(const MeasureUnitImpl &source, + const ConversionRates &conversionRates, + UErrorCode &status) { + + MeasureUnitImpl result; + if (U_FAILURE(status)) return result; + + const auto &singleUnits = source.singleUnits; + for (int i = 0, count = singleUnits.length(); i < count; ++i) { + const auto &singleUnit = *singleUnits[i]; + // Extract `ConversionRateInfo` using the absolute unit. For example: in case of `square-meter`, + // we will use `meter` + const auto rateInfo = + conversionRates.extractConversionInfo(singleUnit.getSimpleUnitID(), status); + if (U_FAILURE(status)) { + return result; + } + if (rateInfo == nullptr) { + status = U_INTERNAL_PROGRAM_ERROR; + return result; + } + + // Multiply the power of the singleUnit by the power of the baseUnit. For example, square-hectare + // must be pow4-meter. (NOTE: hectare --> square-meter) + auto baseUnits = + MeasureUnitImpl::forIdentifier(rateInfo->baseUnit.toStringPiece(), status).singleUnits; + for (int32_t i = 0, baseUnitsCount = baseUnits.length(); i < baseUnitsCount; i++) { + baseUnits[i]->dimensionality *= singleUnit.dimensionality; + // TODO: Deal with SI-prefix + result.appendSingleUnit(*baseUnits[i], status); + + if (U_FAILURE(status)) { + return result; + } + } + } + + return result; +} + +/** + * Determine the convertibility between `source` and `target`. + * For example: + * `meter` and `foot` are `CONVERTIBLE`. + * `meter-per-second` and `second-per-meter` are `RECIPROCAL`. + * `meter` and `pound` are `UNCONVERTIBLE`. + * + * NOTE: + * Only works with SINGLE and COMPOUND units. If one of the units is a + * MIXED unit, an error will occur. For more information, see UMeasureUnitComplexity. + */ +Convertibility U_I18N_API extractConvertibility(const MeasureUnitImpl &source, + const MeasureUnitImpl &target, + const ConversionRates &conversionRates, + UErrorCode &status) { + + if (source.complexity == UMeasureUnitComplexity::UMEASURE_UNIT_MIXED || + target.complexity == UMeasureUnitComplexity::UMEASURE_UNIT_MIXED) { + status = U_ARGUMENT_TYPE_MISMATCH; + return UNCONVERTIBLE; + } + + MeasureUnitImpl sourceBaseUnit = extractCompoundBaseUnit(source, conversionRates, status); + MeasureUnitImpl targetBaseUnit = extractCompoundBaseUnit(target, conversionRates, status); + if (U_FAILURE(status)) return UNCONVERTIBLE; + + MaybeStackVector<UnitIndexAndDimension> convertible; + MaybeStackVector<UnitIndexAndDimension> reciprocal; + + mergeUnitsAndDimensions(convertible, sourceBaseUnit, 1); + mergeUnitsAndDimensions(reciprocal, sourceBaseUnit, 1); + + mergeUnitsAndDimensions(convertible, targetBaseUnit, -1); + mergeUnitsAndDimensions(reciprocal, targetBaseUnit, 1); + + if (checkAllDimensionsAreZeros(convertible)) { + return CONVERTIBLE; + } + + if (checkAllDimensionsAreZeros(reciprocal)) { + return RECIPROCAL; + } + + return UNCONVERTIBLE; +} + +UnitsConverter::UnitsConverter(const MeasureUnitImpl &source, const MeasureUnitImpl &target, + const ConversionRates &ratesInfo, UErrorCode &status) + : conversionRate_(source.copy(status), target.copy(status)) { + this->init(ratesInfo, status); +} + +UnitsConverter::UnitsConverter(StringPiece sourceIdentifier, StringPiece targetIdentifier, + UErrorCode &status) + : conversionRate_(MeasureUnitImpl::forIdentifier(sourceIdentifier, status), + MeasureUnitImpl::forIdentifier(targetIdentifier, status)) { + if (U_FAILURE(status)) { + return; + } + + ConversionRates ratesInfo(status); + this->init(ratesInfo, status); +} + +void UnitsConverter::init(const ConversionRates &ratesInfo, UErrorCode &status) { + if (U_FAILURE(status)) { + return; + } + + if (this->conversionRate_.source.complexity == UMeasureUnitComplexity::UMEASURE_UNIT_MIXED || + this->conversionRate_.target.complexity == UMeasureUnitComplexity::UMEASURE_UNIT_MIXED) { + status = U_ARGUMENT_TYPE_MISMATCH; + return; + } + + Convertibility unitsState = extractConvertibility(this->conversionRate_.source, + this->conversionRate_.target, ratesInfo, status); + if (U_FAILURE(status)) return; + if (unitsState == Convertibility::UNCONVERTIBLE) { + status = U_ARGUMENT_TYPE_MISMATCH; + return; + } + + loadConversionRate(conversionRate_, conversionRate_.source, conversionRate_.target, unitsState, + ratesInfo, status); + +} + +int32_t UnitsConverter::compareTwoUnits(const MeasureUnitImpl &firstUnit, + const MeasureUnitImpl &secondUnit, + const ConversionRates &ratesInfo, UErrorCode &status) { + if (U_FAILURE(status)) { + return 0; + } + + if (firstUnit.complexity == UMeasureUnitComplexity::UMEASURE_UNIT_MIXED || + secondUnit.complexity == UMeasureUnitComplexity::UMEASURE_UNIT_MIXED) { + status = U_ARGUMENT_TYPE_MISMATCH; + return 0; + } + + Convertibility unitsState = extractConvertibility(firstUnit, secondUnit, ratesInfo, status); + if (U_FAILURE(status)) { + return 0; + } + + if (unitsState == Convertibility::UNCONVERTIBLE || unitsState == Convertibility::RECIPROCAL) { + status = U_ARGUMENT_TYPE_MISMATCH; + return 0; + } + + // Represents the conversion factor from the firstUnit to the base + // unit that specified in the conversion data which is considered as + // the root of the firstUnit and the secondUnit. + Factor firstUnitToBase = loadCompoundFactor(firstUnit, ratesInfo, status); + Factor secondUnitToBase = loadCompoundFactor(secondUnit, ratesInfo, status); + + firstUnitToBase.substituteConstants(); + secondUnitToBase.substituteConstants(); + + double firstUnitToBaseConversionRate = firstUnitToBase.factorNum / firstUnitToBase.factorDen; + double secondUnitToBaseConversionRate = secondUnitToBase.factorNum / secondUnitToBase.factorDen; + + double diff = firstUnitToBaseConversionRate - secondUnitToBaseConversionRate; + if (diff > 0) { + return 1; + } + + if (diff < 0) { + return -1; + } + + return 0; +} + +double UnitsConverter::convert(double inputValue) const { + double result = + inputValue + conversionRate_.sourceOffset; // Reset the input to the target zero index. + // Convert the quantity to from the source scale to the target scale. + result *= conversionRate_.factorNum / conversionRate_.factorDen; + + result -= conversionRate_.targetOffset; // Set the result to its index. + + if (conversionRate_.reciprocal) { + if (result == 0) { + // TODO: demonstrate the resulting behaviour in tests... and figure + // out desired behaviour. (Theoretical result should be infinity, + // not 0.) + return 0.0; + } + result = 1.0 / result; + } + + return result; +} + +double UnitsConverter::convertInverse(double inputValue) const { + double result = inputValue; + if (conversionRate_.reciprocal) { + if (result == 0) { + // TODO: demonstrate the resulting behaviour in tests... and figure + // out desired behaviour. (Theoretical result should be infinity, + // not 0.) + return 0.0; + } + result = 1.0 / result; + } + result += conversionRate_.targetOffset; + result *= conversionRate_.factorDen / conversionRate_.factorNum; + result -= conversionRate_.sourceOffset; + return result; +} + +ConversionInfo UnitsConverter::getConversionInfo() const { + ConversionInfo result; + result.conversionRate = conversionRate_.factorNum / conversionRate_.factorDen; + result.offset = + (conversionRate_.sourceOffset * (conversionRate_.factorNum / conversionRate_.factorDen)) - + conversionRate_.targetOffset; + result.reciprocal = conversionRate_.reciprocal; + + return result; +} + +} // namespace units +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/contrib/libs/icu/i18n/units_converter.h b/contrib/libs/icu/i18n/units_converter.h new file mode 100644 index 0000000000..5c002f4147 --- /dev/null +++ b/contrib/libs/icu/i18n/units_converter.h @@ -0,0 +1,220 @@ +// © 2020 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING +#ifndef __UNITS_CONVERTER_H__ +#define __UNITS_CONVERTER_H__ + +#include "cmemory.h" +#include "measunit_impl.h" +#include "unicode/errorcode.h" +#include "unicode/stringpiece.h" +#include "unicode/uobject.h" +#include "units_converter.h" +#include "units_data.h" + +U_NAMESPACE_BEGIN +namespace units { + +/* Internal Structure */ + +// Constants corresponding to unitConstants in CLDR's units.xml. +enum Constants { + CONSTANT_FT2M, // ft_to_m + CONSTANT_PI, // PI + CONSTANT_GRAVITY, // Gravity of earth (9.80665 m/s^2), "g". + CONSTANT_G, // Newtonian constant of gravitation, "G". + CONSTANT_GAL_IMP2M3, // Gallon imp to m3 + CONSTANT_LB2KG, // Pound to Kilogram + CONSTANT_GLUCOSE_MOLAR_MASS, + CONSTANT_ITEM_PER_MOLE, + + // Must be the last element. + CONSTANTS_COUNT +}; + +// These values are a hard-coded subset of unitConstants in the units +// resources file. A unit test checks that all constants in the resource +// file are at least recognised by the code. Derived constants' values or +// hard-coded derivations are not checked. +// In ICU4J, these constants live in UnitConverter.Factor.getConversionRate(). +static const double constantsValues[CONSTANTS_COUNT] = { + 0.3048, // CONSTANT_FT2M + 411557987.0 / 131002976.0, // CONSTANT_PI + 9.80665, // CONSTANT_GRAVITY + 6.67408E-11, // CONSTANT_G + 0.00454609, // CONSTANT_GAL_IMP2M3 + 0.45359237, // CONSTANT_LB2KG + 180.1557, // CONSTANT_GLUCOSE_MOLAR_MASS + 6.02214076E+23, // CONSTANT_ITEM_PER_MOLE +}; + +typedef enum Signum { + NEGATIVE = -1, + POSITIVE = 1, +} Signum; + +/* Represents a conversion factor */ +struct U_I18N_API Factor { + double factorNum = 1; + double factorDen = 1; + double offset = 0; + bool reciprocal = false; + + // Exponents for the symbolic constants + int32_t constantExponents[CONSTANTS_COUNT] = {}; + + void multiplyBy(const Factor &rhs); + void divideBy(const Factor &rhs); + + // Apply the power to the factor. + void power(int32_t power); + + // Apply SI or binary prefix to the Factor. + void applyPrefix(UMeasurePrefix unitPrefix); + + // Does an in-place substitution of the "symbolic constants" based on + // constantExponents (resetting the exponents). + // + // In ICU4J, see UnitConverter.Factor.getConversionRate(). + void substituteConstants(); +}; + +struct U_I18N_API ConversionInfo { + double conversionRate; + double offset; + bool reciprocal; +}; + +/* + * Adds a single factor element to the `Factor`. e.g "ft3m", "2.333" or "cup2m3". But not "cup2m3^3". + */ +void U_I18N_API addSingleFactorConstant(StringPiece baseStr, int32_t power, Signum sigNum, + Factor &factor, UErrorCode &status); + +/** + * Represents the conversion rate between `source` and `target`. + */ +struct U_I18N_API ConversionRate : public UMemory { + const MeasureUnitImpl source; + const MeasureUnitImpl target; + double factorNum = 1; + double factorDen = 1; + double sourceOffset = 0; + double targetOffset = 0; + bool reciprocal = false; + + ConversionRate(MeasureUnitImpl &&source, MeasureUnitImpl &&target) + : source(std::move(source)), target(std::move(target)) {} +}; + +enum Convertibility { + RECIPROCAL, + CONVERTIBLE, + UNCONVERTIBLE, +}; + +MeasureUnitImpl U_I18N_API extractCompoundBaseUnit(const MeasureUnitImpl &source, + const ConversionRates &conversionRates, + UErrorCode &status); + +/** + * Check if the convertibility between `source` and `target`. + * For example: + * `meter` and `foot` are `CONVERTIBLE`. + * `meter-per-second` and `second-per-meter` are `RECIPROCAL`. + * `meter` and `pound` are `UNCONVERTIBLE`. + * + * NOTE: + * Only works with SINGLE and COMPOUND units. If one of the units is a + * MIXED unit, an error will occur. For more information, see UMeasureUnitComplexity. + */ +Convertibility U_I18N_API extractConvertibility(const MeasureUnitImpl &source, + const MeasureUnitImpl &target, + const ConversionRates &conversionRates, + UErrorCode &status); + +/** + * Converts from a source `MeasureUnit` to a target `MeasureUnit`. + * + * NOTE: + * Only works with SINGLE and COMPOUND units. If one of the units is a + * MIXED unit, an error will occur. For more information, see UMeasureUnitComplexity. + */ +class U_I18N_API UnitsConverter : public UMemory { + public: + /** + * Constructor of `UnitConverter`. + * NOTE: + * - source and target must be under the same category + * - e.g. meter to mile --> both of them are length units. + * NOTE: + * This constructor creates an instance of `ConversionRates` internally. + * + * @param sourceIdentifier represents the source unit identifier. + * @param targetIdentifier represents the target unit identifier. + * @param status + */ + UnitsConverter(StringPiece sourceIdentifier, StringPiece targetIdentifier, UErrorCode &status); + + /** + * Constructor of `UnitConverter`. + * NOTE: + * - source and target must be under the same category + * - e.g. meter to mile --> both of them are length units. + * + * @param source represents the source unit. + * @param target represents the target unit. + * @param ratesInfo Contains all the needed conversion rates. + * @param status + */ + UnitsConverter(const MeasureUnitImpl &source, const MeasureUnitImpl &target, + const ConversionRates &ratesInfo, UErrorCode &status); + + /** + * Compares two single units and returns 1 if the first one is greater, -1 if the second + * one is greater and 0 if they are equal. + * + * NOTE: + * Compares only single units that are convertible. + */ + static int32_t compareTwoUnits(const MeasureUnitImpl &firstUnit, const MeasureUnitImpl &SecondUnit, + const ConversionRates &ratesInfo, UErrorCode &status); + + /** + * Convert a measurement expressed in the source unit to a measurement + * expressed in the target unit. + * + * @param inputValue the value to be converted. + * @return the converted value. + */ + double convert(double inputValue) const; + + /** + * The inverse of convert(): convert a measurement expressed in the target + * unit to a measurement expressed in the source unit. + * + * @param inputValue the value to be converted. + * @return the converted value. + */ + double convertInverse(double inputValue) const; + + ConversionInfo getConversionInfo() const; + + private: + ConversionRate conversionRate_; + + /** + * Initialises the object. + */ + void init(const ConversionRates &ratesInfo, UErrorCode &status); +}; + +} // namespace units +U_NAMESPACE_END + +#endif //__UNITS_CONVERTER_H__ + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/contrib/libs/icu/i18n/units_data.cpp b/contrib/libs/icu/i18n/units_data.cpp new file mode 100644 index 0000000000..d1d1fc5bc0 --- /dev/null +++ b/contrib/libs/icu/i18n/units_data.cpp @@ -0,0 +1,413 @@ +// © 2020 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "cstring.h" +#include "number_decimalquantity.h" +#include "resource.h" +#include "uassert.h" +#include "unicode/unistr.h" +#include "unicode/ures.h" +#include "units_data.h" +#include "uresimp.h" +#include "util.h" +#include <utility> + +U_NAMESPACE_BEGIN +namespace units { + +namespace { + +using icu::number::impl::DecimalQuantity; + +void trimSpaces(CharString& factor, UErrorCode& status){ + CharString trimmed; + for (int i = 0 ; i < factor.length(); i++) { + if (factor[i] == ' ') continue; + + trimmed.append(factor[i], status); + } + + factor = std::move(trimmed); +} + +/** + * A ResourceSink that collects conversion rate information. + * + * This class is for use by ures_getAllItemsWithFallback. + */ +class ConversionRateDataSink : public ResourceSink { + public: + /** + * Constructor. + * @param out The vector to which ConversionRateInfo instances are to be + * added. This vector must outlive the use of the ResourceSink. + */ + explicit ConversionRateDataSink(MaybeStackVector<ConversionRateInfo> *out) : outVector(out) {} + + /** + * Method for use by `ures_getAllItemsWithFallback`. Adds the unit + * conversion rates that are found in `value` to the output vector. + * + * @param source This string must be "convertUnits": the resource that this + * class supports reading. + * @param value The "convertUnits" resource, containing unit conversion rate + * information. + * @param noFallback Ignored. + * @param status The standard ICU error code output parameter. + */ + void put(const char *source, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override { + if (U_FAILURE(status)) { return; } + if (uprv_strcmp(source, "convertUnits") != 0) { + // This is very strict, however it is the cheapest way to be sure + // that with `value`, we're looking at the convertUnits table. + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + ResourceTable conversionRateTable = value.getTable(status); + const char *srcUnit; + // We're reusing `value`, which seems to be a common pattern: + for (int32_t unit = 0; conversionRateTable.getKeyAndValue(unit, srcUnit, value); unit++) { + ResourceTable unitTable = value.getTable(status); + const char *key; + UnicodeString baseUnit = ICU_Utility::makeBogusString(); + UnicodeString factor = ICU_Utility::makeBogusString(); + UnicodeString offset = ICU_Utility::makeBogusString(); + for (int32_t i = 0; unitTable.getKeyAndValue(i, key, value); i++) { + if (uprv_strcmp(key, "target") == 0) { + baseUnit = value.getUnicodeString(status); + } else if (uprv_strcmp(key, "factor") == 0) { + factor = value.getUnicodeString(status); + } else if (uprv_strcmp(key, "offset") == 0) { + offset = value.getUnicodeString(status); + } + } + if (U_FAILURE(status)) { return; } + if (baseUnit.isBogus() || factor.isBogus()) { + // We could not find a usable conversion rate: bad resource. + status = U_MISSING_RESOURCE_ERROR; + return; + } + + // We don't have this ConversionRateInfo yet: add it. + ConversionRateInfo *cr = outVector->emplaceBack(); + if (!cr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } else { + cr->sourceUnit.append(srcUnit, status); + cr->baseUnit.appendInvariantChars(baseUnit, status); + cr->factor.appendInvariantChars(factor, status); + trimSpaces(cr->factor, status); + if (!offset.isBogus()) cr->offset.appendInvariantChars(offset, status); + } + } + return; + } + + private: + MaybeStackVector<ConversionRateInfo> *outVector; +}; + +bool operator<(const UnitPreferenceMetadata &a, const UnitPreferenceMetadata &b) { + return a.compareTo(b) < 0; +} + +/** + * A ResourceSink that collects unit preferences information. + * + * This class is for use by ures_getAllItemsWithFallback. + */ +class UnitPreferencesSink : public ResourceSink { + public: + /** + * Constructor. + * @param outPrefs The vector to which UnitPreference instances are to be + * added. This vector must outlive the use of the ResourceSink. + * @param outMetadata The vector to which UnitPreferenceMetadata instances + * are to be added. This vector must outlive the use of the ResourceSink. + */ + explicit UnitPreferencesSink(MaybeStackVector<UnitPreference> *outPrefs, + MaybeStackVector<UnitPreferenceMetadata> *outMetadata) + : preferences(outPrefs), metadata(outMetadata) {} + + /** + * Method for use by `ures_getAllItemsWithFallback`. Adds the unit + * preferences info that are found in `value` to the output vector. + * + * @param source This string must be "unitPreferenceData": the resource that + * this class supports reading. + * @param value The "unitPreferenceData" resource, containing unit + * preferences data. + * @param noFallback Ignored. + * @param status The standard ICU error code output parameter. Note: if an + * error is returned, outPrefs and outMetadata may be inconsistent. + */ + void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override { + if (U_FAILURE(status)) { return; } + if (uprv_strcmp(key, "unitPreferenceData") != 0) { + // This is very strict, however it is the cheapest way to be sure + // that with `value`, we're looking at the convertUnits table. + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + // The unitPreferenceData structure (see data/misc/units.txt) contains a + // hierarchy of category/usage/region, within which are a set of + // preferences. Hence three for-loops and another loop for the + // preferences themselves: + ResourceTable unitPreferenceDataTable = value.getTable(status); + const char *category; + for (int32_t i = 0; unitPreferenceDataTable.getKeyAndValue(i, category, value); i++) { + ResourceTable categoryTable = value.getTable(status); + const char *usage; + for (int32_t j = 0; categoryTable.getKeyAndValue(j, usage, value); j++) { + ResourceTable regionTable = value.getTable(status); + const char *region; + for (int32_t k = 0; regionTable.getKeyAndValue(k, region, value); k++) { + // `value` now contains the set of preferences for + // category/usage/region. + ResourceArray unitPrefs = value.getArray(status); + if (U_FAILURE(status)) { return; } + int32_t prefLen = unitPrefs.getSize(); + + // Update metadata for this set of preferences. + UnitPreferenceMetadata *meta = metadata->emplaceBack( + category, usage, region, preferences->length(), prefLen, status); + if (!meta) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + if (U_FAILURE(status)) { return; } + if (metadata->length() > 1) { + // Verify that unit preferences are sorted and + // without duplicates. + if (!(*(*metadata)[metadata->length() - 2] < + *(*metadata)[metadata->length() - 1])) { + status = U_INVALID_FORMAT_ERROR; + return; + } + } + + // Collect the individual preferences. + for (int32_t i = 0; unitPrefs.getValue(i, value); i++) { + UnitPreference *up = preferences->emplaceBack(); + if (!up) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + ResourceTable unitPref = value.getTable(status); + if (U_FAILURE(status)) { return; } + for (int32_t i = 0; unitPref.getKeyAndValue(i, key, value); ++i) { + if (uprv_strcmp(key, "unit") == 0) { + int32_t length; + const UChar *u = value.getString(length, status); + up->unit.appendInvariantChars(u, length, status); + } else if (uprv_strcmp(key, "geq") == 0) { + int32_t length; + const UChar *g = value.getString(length, status); + CharString geq; + geq.appendInvariantChars(g, length, status); + DecimalQuantity dq; + dq.setToDecNumber(geq.data(), status); + up->geq = dq.toDouble(); + } else if (uprv_strcmp(key, "skeleton") == 0) { + up->skeleton = value.getUnicodeString(status); + } + } + } + } + } + } + } + + private: + MaybeStackVector<UnitPreference> *preferences; + MaybeStackVector<UnitPreferenceMetadata> *metadata; +}; + +int32_t binarySearch(const MaybeStackVector<UnitPreferenceMetadata> *metadata, + const UnitPreferenceMetadata &desired, bool *foundCategory, bool *foundUsage, + bool *foundRegion, UErrorCode &status) { + if (U_FAILURE(status)) { return -1; } + int32_t start = 0; + int32_t end = metadata->length(); + *foundCategory = false; + *foundUsage = false; + *foundRegion = false; + while (start < end) { + int32_t mid = (start + end) / 2; + int32_t cmp = (*metadata)[mid]->compareTo(desired, foundCategory, foundUsage, foundRegion); + if (cmp < 0) { + start = mid + 1; + } else if (cmp > 0) { + end = mid; + } else { + return mid; + } + } + return -1; +} + +/** + * Finds the UnitPreferenceMetadata instance that matches the given category, + * usage and region: if missing, region falls back to "001", and usage + * repeatedly drops tailing components, eventually trying "default" + * ("land-agriculture-grain" -> "land-agriculture" -> "land" -> "default"). + * + * @param metadata The full list of UnitPreferenceMetadata instances. + * @param category The category to search for. See getUnitCategory(). + * @param usage The usage for which formatting preferences is needed. If the + * given usage is not known, automatic fallback occurs, see function description + * above. + * @param region The region for which preferences are needed. If there are no + * region-specific preferences, this function automatically falls back to the + * "001" region (global). + * @param status The standard ICU error code output parameter. + * * If an invalid category is given, status will be U_ILLEGAL_ARGUMENT_ERROR. + * * If fallback to "default" or "001" didn't resolve, status will be + * U_MISSING_RESOURCE. + * @return The index into the metadata vector which represents the appropriate + * preferences. If appropriate preferences are not found, -1 is returned. + */ +int32_t getPreferenceMetadataIndex(const MaybeStackVector<UnitPreferenceMetadata> *metadata, + StringPiece category, StringPiece usage, StringPiece region, + UErrorCode &status) { + if (U_FAILURE(status)) { return -1; } + bool foundCategory, foundUsage, foundRegion; + UnitPreferenceMetadata desired(category, usage, region, -1, -1, status); + int32_t idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status); + if (U_FAILURE(status)) { return -1; } + if (idx >= 0) { return idx; } + if (!foundCategory) { + // TODO: failures can happen if units::getUnitCategory returns a category + // that does not appear in unitPreferenceData. Do we want a unit test that + // checks unitPreferenceData has full coverage of categories? Or just trust + // CLDR? + status = U_ILLEGAL_ARGUMENT_ERROR; + return -1; + } + U_ASSERT(foundCategory); + while (!foundUsage) { + int32_t lastDashIdx = desired.usage.lastIndexOf('-'); + if (lastDashIdx > 0) { + desired.usage.truncate(lastDashIdx); + } else if (uprv_strcmp(desired.usage.data(), "default") != 0) { + desired.usage.truncate(0).append("default", status); + } else { + // "default" is not supposed to be missing for any valid category. + status = U_MISSING_RESOURCE_ERROR; + return -1; + } + idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status); + if (U_FAILURE(status)) { return -1; } + } + U_ASSERT(foundCategory); + U_ASSERT(foundUsage); + if (!foundRegion) { + if (uprv_strcmp(desired.region.data(), "001") != 0) { + desired.region.truncate(0).append("001", status); + idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status); + } + if (!foundRegion) { + // "001" is not supposed to be missing for any valid usage. + status = U_MISSING_RESOURCE_ERROR; + return -1; + } + } + U_ASSERT(foundCategory); + U_ASSERT(foundUsage); + U_ASSERT(foundRegion); + U_ASSERT(idx >= 0); + return idx; +} + +} // namespace + +UnitPreferenceMetadata::UnitPreferenceMetadata(StringPiece category, StringPiece usage, + StringPiece region, int32_t prefsOffset, + int32_t prefsCount, UErrorCode &status) { + this->category.append(category, status); + this->usage.append(usage, status); + this->region.append(region, status); + this->prefsOffset = prefsOffset; + this->prefsCount = prefsCount; +} + +int32_t UnitPreferenceMetadata::compareTo(const UnitPreferenceMetadata &other) const { + int32_t cmp = uprv_strcmp(category.data(), other.category.data()); + if (cmp == 0) { + cmp = uprv_strcmp(usage.data(), other.usage.data()); + } + if (cmp == 0) { + cmp = uprv_strcmp(region.data(), other.region.data()); + } + return cmp; +} + +int32_t UnitPreferenceMetadata::compareTo(const UnitPreferenceMetadata &other, bool *foundCategory, + bool *foundUsage, bool *foundRegion) const { + int32_t cmp = uprv_strcmp(category.data(), other.category.data()); + if (cmp == 0) { + *foundCategory = true; + cmp = uprv_strcmp(usage.data(), other.usage.data()); + } + if (cmp == 0) { + *foundUsage = true; + cmp = uprv_strcmp(region.data(), other.region.data()); + } + if (cmp == 0) { + *foundRegion = true; + } + return cmp; +} + +// TODO: this may be unnecessary. Fold into ConversionRates class? Or move to anonymous namespace? +void U_I18N_API getAllConversionRates(MaybeStackVector<ConversionRateInfo> &result, UErrorCode &status) { + LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status)); + ConversionRateDataSink sink(&result); + ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", sink, status); +} + +const ConversionRateInfo *ConversionRates::extractConversionInfo(StringPiece source, + UErrorCode &status) const { + for (size_t i = 0, n = conversionInfo_.length(); i < n; ++i) { + if (conversionInfo_[i]->sourceUnit.toStringPiece() == source) return conversionInfo_[i]; + } + + status = U_INTERNAL_PROGRAM_ERROR; + return nullptr; +} + +U_I18N_API UnitPreferences::UnitPreferences(UErrorCode &status) { + LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status)); + UnitPreferencesSink sink(&unitPrefs_, &metadata_); + ures_getAllItemsWithFallback(unitsBundle.getAlias(), "unitPreferenceData", sink, status); +} + +// TODO: make outPreferences const? +// +// TODO: consider replacing `UnitPreference **&outPreferences` with slice class +// of some kind. +void U_I18N_API UnitPreferences::getPreferencesFor(StringPiece category, StringPiece usage, + StringPiece region, + const UnitPreference *const *&outPreferences, + int32_t &preferenceCount, UErrorCode &status) const { + int32_t idx = getPreferenceMetadataIndex(&metadata_, category, usage, region, status); + if (U_FAILURE(status)) { + outPreferences = nullptr; + preferenceCount = 0; + return; + } + U_ASSERT(idx >= 0); // Failures should have been taken care of by `status`. + const UnitPreferenceMetadata *m = metadata_[idx]; + outPreferences = unitPrefs_.getAlias() + m->prefsOffset; + preferenceCount = m->prefsCount; +} + +} // namespace units +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/contrib/libs/icu/i18n/units_data.h b/contrib/libs/icu/i18n/units_data.h new file mode 100644 index 0000000000..2c19b9434b --- /dev/null +++ b/contrib/libs/icu/i18n/units_data.h @@ -0,0 +1,213 @@ +// © 2020 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING +#ifndef __UNITS_DATA_H__ +#define __UNITS_DATA_H__ + +#include <limits> + +#include "charstr.h" +#include "cmemory.h" +#include "unicode/stringpiece.h" +#include "unicode/uobject.h" + +U_NAMESPACE_BEGIN +namespace units { + +/** + * Encapsulates "convertUnits" information from units resources, specifying how + * to convert from one unit to another. + * + * Information in this class is still in the form of strings: symbolic constants + * need to be interpreted. Rationale: symbols can cancel out for higher + * precision conversion - going from feet to inches should cancel out the + * `ft_to_m` constant. + */ +class U_I18N_API ConversionRateInfo : public UMemory { + public: + ConversionRateInfo() {} + ConversionRateInfo(StringPiece sourceUnit, StringPiece baseUnit, StringPiece factor, + StringPiece offset, UErrorCode &status) + : sourceUnit(), baseUnit(), factor(), offset() { + this->sourceUnit.append(sourceUnit, status); + this->baseUnit.append(baseUnit, status); + this->factor.append(factor, status); + this->offset.append(offset, status); + } + CharString sourceUnit; + CharString baseUnit; + CharString factor; + CharString offset; +}; + +} // namespace units + +// Export explicit template instantiations of MaybeStackArray, MemoryPool and +// MaybeStackVector. This is required when building DLLs for Windows. (See +// datefmt.h, collationiterator.h, erarules.h and others for similar examples.) +// +// Note: These need to be outside of the units namespace, or Clang will generate +// a compile error. +#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN +template class U_I18N_API MaybeStackArray<units::ConversionRateInfo*, 8>; +template class U_I18N_API MemoryPool<units::ConversionRateInfo, 8>; +template class U_I18N_API MaybeStackVector<units::ConversionRateInfo, 8>; +#endif + +namespace units { + +/** + * Returns ConversionRateInfo for all supported conversions. + * + * @param result Receives the set of conversion rates. + * @param status Receives status. + */ +void U_I18N_API getAllConversionRates(MaybeStackVector<ConversionRateInfo> &result, UErrorCode &status); + +/** + * Contains all the supported conversion rates. + */ +class U_I18N_API ConversionRates { + public: + /** + * Constructor + * + * @param status Receives status. + */ + ConversionRates(UErrorCode &status) { getAllConversionRates(conversionInfo_, status); } + + /** + * Returns a pointer to the conversion rate info that match the `source`. + * + * @param source Contains the source. + * @param status Receives status. + */ + const ConversionRateInfo *extractConversionInfo(StringPiece source, UErrorCode &status) const; + + private: + MaybeStackVector<ConversionRateInfo> conversionInfo_; +}; + +// Encapsulates unitPreferenceData information from units resources, specifying +// a sequence of output unit preferences. +struct U_I18N_API UnitPreference : public UMemory { + // Set geq to 1.0 by default + UnitPreference() : geq(1.0) {} + CharString unit; + double geq; + UnicodeString skeleton; +}; + +/** + * Metadata about the preferences in UnitPreferences::unitPrefs_. + * + * This class owns all of its data. + * + * UnitPreferenceMetadata lives in the anonymous namespace, because it should + * only be useful to internal code and unit testing code. + */ +class U_I18N_API UnitPreferenceMetadata : public UMemory { + public: + UnitPreferenceMetadata() {} + // Constructor, makes copies of the parameters passed to it. + UnitPreferenceMetadata(StringPiece category, StringPiece usage, StringPiece region, + int32_t prefsOffset, int32_t prefsCount, UErrorCode &status); + + // Unit category (e.g. "length", "mass", "electric-capacitance"). + CharString category; + // Usage (e.g. "road", "vehicle-fuel", "blood-glucose"). Every category + // should have an entry for "default" usage. TODO(hugovdm): add a test for + // this. + CharString usage; + // Region code (e.g. "US", "CZ", "001"). Every usage should have an entry + // for the "001" region ("world"). TODO(hugovdm): add a test for this. + CharString region; + // Offset into the UnitPreferences::unitPrefs_ list where the relevant + // preferences are found. + int32_t prefsOffset; + // The number of preferences that form this set. + int32_t prefsCount; + + int32_t compareTo(const UnitPreferenceMetadata &other) const; + int32_t compareTo(const UnitPreferenceMetadata &other, bool *foundCategory, bool *foundUsage, + bool *foundRegion) const; +}; + +} // namespace units + +// Export explicit template instantiations of MaybeStackArray, MemoryPool and +// MaybeStackVector. This is required when building DLLs for Windows. (See +// datefmt.h, collationiterator.h, erarules.h and others for similar examples.) +// +// Note: These need to be outside of the units namespace, or Clang will generate +// a compile error. +#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN +template class U_I18N_API MaybeStackArray<units::UnitPreferenceMetadata*, 8>; +template class U_I18N_API MemoryPool<units::UnitPreferenceMetadata, 8>; +template class U_I18N_API MaybeStackVector<units::UnitPreferenceMetadata, 8>; +template class U_I18N_API MaybeStackArray<units::UnitPreference*, 8>; +template class U_I18N_API MemoryPool<units::UnitPreference, 8>; +template class U_I18N_API MaybeStackVector<units::UnitPreference, 8>; +#endif + +namespace units { + +/** + * Unit Preferences information for various locales and usages. + */ +class U_I18N_API UnitPreferences { + public: + /** + * Constructor, loads all the preference data. + * + * @param status Receives status. + */ + UnitPreferences(UErrorCode &status); + + /** + * Returns the set of unit preferences in the particular category that best + * matches the specified usage and region. + * + * If region can't be found, falls back to global (001). If usage can't be + * found, falls back to "default". + * + * @param category The category within which to look up usage and region. + * (TODO(hugovdm): improve docs on how to find the category, once the lookup + * function is added.) + * @param usage The usage parameter. (TODO(hugovdm): improve this + * documentation. Add reference to some list of usages we support.) If the + * given usage is not found, the method automatically falls back to + * "default". + * @param region The region whose preferences are desired. If there are no + * specific preferences for the requested region, the method automatically + * falls back to region "001" ("world"). + * @param outPreferences A pointer into an array of preferences: essentially + * an array slice in combination with preferenceCount. + * @param preferenceCount The number of unit preferences that belong to the + * result set. + * @param status Receives status. + * + * TODO(hugovdm): maybe replace `UnitPreference **&outPreferences` with a slice class? + */ + void getPreferencesFor(StringPiece category, StringPiece usage, StringPiece region, + const UnitPreference *const *&outPreferences, int32_t &preferenceCount, + UErrorCode &status) const; + + protected: + // Metadata about the sets of preferences, this is the index for looking up + // preferences in the unitPrefs_ list. + MaybeStackVector<UnitPreferenceMetadata> metadata_; + // All the preferences as a flat list: which usage and region preferences + // are associated with is stored in `metadata_`. + MaybeStackVector<UnitPreference> unitPrefs_; +}; + +} // namespace units +U_NAMESPACE_END + +#endif //__UNITS_DATA_H__ + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/contrib/libs/icu/i18n/units_router.cpp b/contrib/libs/icu/i18n/units_router.cpp new file mode 100644 index 0000000000..0e6082fae5 --- /dev/null +++ b/contrib/libs/icu/i18n/units_router.cpp @@ -0,0 +1,152 @@ +// © 2020 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "charstr.h" +#include "cmemory.h" +#include "cstring.h" +#include "measunit_impl.h" +#include "number_decimalquantity.h" +#include "number_roundingutils.h" +#include "resource.h" +#include "unicode/measure.h" +#include "units_data.h" +#include "units_router.h" +#include <cmath> + +U_NAMESPACE_BEGIN +namespace units { + +using number::Precision; +using number::impl::parseIncrementOption; + +Precision UnitsRouter::parseSkeletonToPrecision(icu::UnicodeString precisionSkeleton, + UErrorCode &status) { + if (U_FAILURE(status)) { + // As a member of UsagePrefsHandler, which is a friend of Precision, we + // get access to the default constructor. + return {}; + } + constexpr int32_t kSkelPrefixLen = 20; + if (!precisionSkeleton.startsWith(UNICODE_STRING_SIMPLE("precision-increment/"))) { + status = U_INVALID_FORMAT_ERROR; + return {}; + } + U_ASSERT(precisionSkeleton[kSkelPrefixLen - 1] == u'/'); + StringSegment segment(precisionSkeleton, false); + segment.adjustOffset(kSkelPrefixLen); + Precision result; + parseIncrementOption(segment, result, status); + return result; +} + +UnitsRouter::UnitsRouter(StringPiece inputUnitIdentifier, StringPiece region, StringPiece usage, + UErrorCode &status) { + this->init(MeasureUnit::forIdentifier(inputUnitIdentifier, status), region, usage, status); +} + +UnitsRouter::UnitsRouter(const MeasureUnit &inputUnit, StringPiece region, StringPiece usage, + UErrorCode &status) { + this->init(std::move(inputUnit), region, usage, status); +} + +void UnitsRouter::init(const MeasureUnit &inputUnit, StringPiece region, StringPiece usage, + UErrorCode &status) { + + if (U_FAILURE(status)) { + return; + } + + // TODO: do we want to pass in ConversionRates and UnitPreferences instead + // of loading in each UnitsRouter instance? (Or make global?) + ConversionRates conversionRates(status); + UnitPreferences prefs(status); + + MeasureUnitImpl inputUnitImpl = MeasureUnitImpl::forMeasureUnitMaybeCopy(inputUnit, status); + MeasureUnitImpl baseUnitImpl = + (extractCompoundBaseUnit(inputUnitImpl, conversionRates, status)); + CharString category = getUnitQuantity(baseUnitImpl, status); + if (U_FAILURE(status)) { + return; + } + + const UnitPreference *const *unitPreferences; + int32_t preferencesCount = 0; + prefs.getPreferencesFor(category.toStringPiece(), usage, region, unitPreferences, preferencesCount, + status); + + for (int i = 0; i < preferencesCount; ++i) { + U_ASSERT(unitPreferences[i] != nullptr); + const auto &preference = *unitPreferences[i]; + + MeasureUnitImpl complexTargetUnitImpl = + MeasureUnitImpl::forIdentifier(preference.unit.data(), status); + if (U_FAILURE(status)) { + return; + } + + UnicodeString precision = preference.skeleton; + + // For now, we only have "precision-increment" in Units Preferences skeleton. + // Therefore, we check if the skeleton starts with "precision-increment" and force the program to + // fail otherwise. + // NOTE: + // It is allowed to have an empty precision. + if (!precision.isEmpty() && !precision.startsWith(u"precision-increment", 19)) { + status = U_INTERNAL_PROGRAM_ERROR; + return; + } + + outputUnits_.emplaceBackAndCheckErrorCode(status, + complexTargetUnitImpl.copy(status).build(status)); + converterPreferences_.emplaceBackAndCheckErrorCode(status, inputUnitImpl, complexTargetUnitImpl, + preference.geq, std::move(precision), + conversionRates, status); + + if (U_FAILURE(status)) { + return; + } + } +} + +RouteResult UnitsRouter::route(double quantity, icu::number::impl::RoundingImpl *rounder, UErrorCode &status) const { + // Find the matching preference + const ConverterPreference *converterPreference = nullptr; + for (int32_t i = 0, n = converterPreferences_.length(); i < n; i++) { + converterPreference = converterPreferences_[i]; + if (converterPreference->converter.greaterThanOrEqual(std::abs(quantity) * (1 + DBL_EPSILON), + converterPreference->limit)) { + break; + } + } + U_ASSERT(converterPreference != nullptr); + + // Set up the rounder for this preference's precision + if (rounder != nullptr && rounder->fPrecision.isBogus()) { + if (converterPreference->precision.length() > 0) { + rounder->fPrecision = parseSkeletonToPrecision(converterPreference->precision, status); + } else { + // We use the same rounding mode as COMPACT notation: known to be a + // human-friendly rounding mode: integers, but add a decimal digit + // as needed to ensure we have at least 2 significant digits. + rounder->fPrecision = Precision::integer().withMinDigits(2); + } + } + + return RouteResult(converterPreference->converter.convert(quantity, rounder, status), + converterPreference->targetUnit.copy(status)); +} + +const MaybeStackVector<MeasureUnit> *UnitsRouter::getOutputUnits() const { + // TODO: consider pulling this from converterPreferences_ and dropping + // outputUnits_? + return &outputUnits_; +} + +} // namespace units +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/contrib/libs/icu/i18n/units_router.h b/contrib/libs/icu/i18n/units_router.h new file mode 100644 index 0000000000..b3300f7e27 --- /dev/null +++ b/contrib/libs/icu/i18n/units_router.h @@ -0,0 +1,166 @@ +// © 2020 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING +#ifndef __UNITS_ROUTER_H__ +#define __UNITS_ROUTER_H__ + +#include <limits> + +#include "cmemory.h" +#include "measunit_impl.h" +#include "unicode/measunit.h" +#include "unicode/stringpiece.h" +#include "unicode/uobject.h" +#include "units_complexconverter.h" +#include "units_data.h" + +U_NAMESPACE_BEGIN + +// Forward declarations +class Measure; +namespace number { +class Precision; +} + +namespace units { + +struct RouteResult : UMemory { + // A list of measures: a single measure for single units, multiple measures + // for mixed units. + // + // TODO(icu-units/icu#21): figure out the right mixed unit API. + MaybeStackVector<Measure> measures; + + // The output unit for this RouteResult. This may be a MIXED unit - for + // example: "yard-and-foot-and-inch", for which `measures` will have three + // elements. + MeasureUnitImpl outputUnit; + + RouteResult(MaybeStackVector<Measure> measures, MeasureUnitImpl outputUnit) + : measures(std::move(measures)), outputUnit(std::move(outputUnit)) {} +}; + +/** + * Contains the complex unit converter and the limit which representing the smallest value that the + * converter should accept. For example, if the converter is converting to `foot+inch` and the limit + * equals 3.0, thus means the converter should not convert to a value less than `3.0 feet`. + * + * NOTE: + * if the limit doest not has a value `i.e. (std::numeric_limits<double>::lowest())`, this mean there + * is no limit for the converter. + */ +struct ConverterPreference : UMemory { + ComplexUnitsConverter converter; + double limit; + UnicodeString precision; + + // The output unit for this ConverterPreference. This may be a MIXED unit - + // for example: "yard-and-foot-and-inch". + MeasureUnitImpl targetUnit; + + // In case there is no limit, the limit will be -inf. + ConverterPreference(const MeasureUnitImpl &source, const MeasureUnitImpl &complexTarget, + UnicodeString precision, const ConversionRates &ratesInfo, UErrorCode &status) + : ConverterPreference(source, complexTarget, std::numeric_limits<double>::lowest(), precision, + ratesInfo, status) {} + + ConverterPreference(const MeasureUnitImpl &source, const MeasureUnitImpl &complexTarget, + double limit, UnicodeString precision, const ConversionRates &ratesInfo, + UErrorCode &status) + : converter(source, complexTarget, ratesInfo, status), limit(limit), + precision(std::move(precision)), targetUnit(complexTarget.copy(status)) {} +}; + +} // namespace units + +// Export explicit template instantiations of MaybeStackArray, MemoryPool and +// MaybeStackVector. This is required when building DLLs for Windows. (See +// datefmt.h, collationiterator.h, erarules.h and others for similar examples.) +// +// Note: These need to be outside of the units namespace, or Clang will generate +// a compile error. +#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN +template class U_I18N_API MaybeStackArray<units::ConverterPreference*, 8>; +template class U_I18N_API MemoryPool<units::ConverterPreference, 8>; +template class U_I18N_API MaybeStackVector<units::ConverterPreference, 8>; +#endif + +namespace units { + +/** + * `UnitsRouter` responsible for converting from a single unit (such as `meter` or `meter-per-second`) to + * one of the complex units based on the limits. + * For example: + * if the input is `meter` and the output as following + * {`foot+inch`, limit: 3.0} + * {`inch` , limit: no value (-inf)} + * Thus means if the input in `meter` is greater than or equal to `3.0 feet`, the output will be in + * `foot+inch`, otherwise, the output will be in `inch`. + * + * NOTE: + * the output units and the their limits MUST BE in order, for example, if the output units, from the + * previous example, are the following: + * {`inch` , limit: no value (-inf)} + * {`foot+inch`, limit: 3.0} + * IN THIS CASE THE OUTPUT WILL BE ALWAYS IN `inch`. + * + * NOTE: + * the output units and their limits will be extracted from the units preferences database by knowing + * the following: + * - input unit + * - locale + * - usage + * + * DESIGN: + * `UnitRouter` uses internally `ComplexUnitConverter` in order to convert the input units to the + * desired complex units and to check the limit too. + */ +class U_I18N_API UnitsRouter { + public: + UnitsRouter(StringPiece inputUnitIdentifier, StringPiece locale, StringPiece usage, + UErrorCode &status); + UnitsRouter(const MeasureUnit &inputUnit, StringPiece locale, StringPiece usage, UErrorCode &status); + + /** + * Performs locale and usage sensitive unit conversion. + * @param quantity The quantity to convert, expressed in terms of inputUnit. + * @param rounder If not null, this RoundingImpl will be used to do rounding + * on the converted value. If the rounder lacks an fPrecision, the + * rounder will be modified to use the preferred precision for the usage + * and locale preference, alternatively with the default precision. + * @param status Receives status. + */ + RouteResult route(double quantity, icu::number::impl::RoundingImpl *rounder, UErrorCode &status) const; + + /** + * Returns the list of possible output units, i.e. the full set of + * preferences, for the localized, usage-specific unit preferences. + * + * The returned pointer should be valid for the lifetime of the + * UnitsRouter instance. + */ + const MaybeStackVector<MeasureUnit> *getOutputUnits() const; + + private: + // List of possible output units. TODO: converterPreferences_ now also has + // this data available. Maybe drop outputUnits_ and have getOutputUnits + // construct a the list from data in converterPreferences_ instead? + MaybeStackVector<MeasureUnit> outputUnits_; + + MaybeStackVector<ConverterPreference> converterPreferences_; + + static number::Precision parseSkeletonToPrecision(icu::UnicodeString precisionSkeleton, + UErrorCode &status); + + void init(const MeasureUnit &inputUnit, StringPiece locale, StringPiece usage, UErrorCode &status); +}; + +} // namespace units +U_NAMESPACE_END + +#endif //__UNITS_ROUTER_H__ + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/contrib/libs/icu/i18n/unum.cpp b/contrib/libs/icu/i18n/unum.cpp index cce3db7d0b..7043f7adc1 100644 --- a/contrib/libs/icu/i18n/unum.cpp +++ b/contrib/libs/icu/i18n/unum.cpp @@ -898,7 +898,7 @@ unum_getContext(const UNumberFormat *fmt, UDisplayContextType type, UErrorCode* return ((const NumberFormat*)fmt)->getContext(type, *status); } -U_INTERNAL UFormattable * U_EXPORT2 +U_CAPI UFormattable * U_EXPORT2 unum_parseToUFormattable(const UNumberFormat* fmt, UFormattable *result, const UChar* text, @@ -922,7 +922,7 @@ unum_parseToUFormattable(const UNumberFormat* fmt, return result; } -U_INTERNAL int32_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 unum_formatUFormattable(const UNumberFormat* fmt, const UFormattable *number, UChar *result, diff --git a/contrib/libs/icu/i18n/upluralrules.cpp b/contrib/libs/icu/i18n/upluralrules.cpp index 5119257fd8..73e59a75c4 100644 --- a/contrib/libs/icu/i18n/upluralrules.cpp +++ b/contrib/libs/icu/i18n/upluralrules.cpp @@ -20,6 +20,7 @@ #include "unicode/unumberformatter.h" #include "number_decimalquantity.h" #include "number_utypes.h" +#include "numrange_impl.h" U_NAMESPACE_USE @@ -116,6 +117,25 @@ uplrules_selectFormatted(const UPluralRules *uplrules, } U_CAPI int32_t U_EXPORT2 +uplrules_selectForRange(const UPluralRules *uplrules, + const UFormattedNumberRange* urange, + UChar *keyword, int32_t capacity, + UErrorCode *status) +{ + if (U_FAILURE(*status)) { + return 0; + } + if (keyword == NULL ? capacity != 0 : capacity < 0) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + const number::impl::UFormattedNumberRangeData* impl = + number::impl::validateUFormattedNumberRange(urange, *status); + UnicodeString result = ((PluralRules*)uplrules)->select(impl, *status); + return result.extract(keyword, capacity, *status); +} + +U_CAPI int32_t U_EXPORT2 uplrules_selectWithFormat(const UPluralRules *uplrules, double number, const UNumberFormat *fmt, diff --git a/contrib/libs/icu/i18n/uregex.cpp b/contrib/libs/icu/i18n/uregex.cpp index 7f41918cff..514159e8b7 100644 --- a/contrib/libs/icu/i18n/uregex.cpp +++ b/contrib/libs/icu/i18n/uregex.cpp @@ -1169,7 +1169,7 @@ uregex_replaceAll(URegularExpression *regexp2, uregex_reset(regexp2, 0, status); - // Note: Seperate error code variables for findNext() and appendReplacement() + // Note: Separate error code variables for findNext() and appendReplacement() // are used so that destination buffer overflow errors // in appendReplacement won't stop findNext() from working. // appendReplacement() and appendTail() special case incoming buffer @@ -1353,7 +1353,7 @@ int32_t RegexCImpl::appendReplacement(RegularExpression *regexp, } // - // Validate all paramters + // Validate all parameters // if (validateRE(regexp, TRUE, status) == FALSE) { return 0; diff --git a/contrib/libs/icu/i18n/usearch.cpp b/contrib/libs/icu/i18n/usearch.cpp index 8866de7033..80b80fa3b4 100644 --- a/contrib/libs/icu/i18n/usearch.cpp +++ b/contrib/libs/icu/i18n/usearch.cpp @@ -26,17 +26,13 @@ U_NAMESPACE_USE -// don't use Boyer-Moore -// (and if we decide to turn this on again there are several new TODOs that will need to be addressed) -#define BOYER_MOORE 0 - // internal definition --------------------------------------------------- #define LAST_BYTE_MASK_ 0xFF #define SECOND_LAST_BYTE_SHIFT_ 8 #define SUPPLEMENTARY_MIN_VALUE_ 0x10000 -static const Normalizer2Impl *g_nfcImpl = NULL; +static const Normalizer2Impl *g_nfcImpl = nullptr; // internal methods ------------------------------------------------- @@ -48,11 +44,11 @@ static const Normalizer2Impl *g_nfcImpl = NULL; */ static inline void setColEIterOffset(UCollationElements *elems, - int32_t offset) + int32_t offset, + UErrorCode &status) { // Note: Not "fast" any more after the 2013 collation rewrite. // We do not want to expose more internals than necessary. - UErrorCode status = U_ZERO_ERROR; ucol_setOffset(elems, offset, &status); } @@ -76,29 +72,10 @@ inline uint32_t getMask(UCollationStrength strength) } } -/** -* @param ce 32-bit collation element -* @return hash code -*/ -static -inline int hashFromCE32(uint32_t ce) -{ - int hc = (int)( - ((((((ce >> 24) * 37) + - (ce >> 16)) * 37) + - (ce >> 8)) * 37) + - ce); - hc %= MAX_TABLE_SIZE_; - if (hc < 0) { - hc += MAX_TABLE_SIZE_; - } - return hc; -} - U_CDECL_BEGIN static UBool U_CALLCONV usearch_cleanup(void) { - g_nfcImpl = NULL; + g_nfcImpl = nullptr; return TRUE; } U_CDECL_END @@ -112,7 +89,7 @@ U_CDECL_END static inline void initializeFCD(UErrorCode *status) { - if (g_nfcImpl == NULL) { + if (g_nfcImpl == nullptr) { g_nfcImpl = Normalizer2Factory::getNFCImpl(*status); ucln_i18n_registerCleanup(UCLN_I18N_USEARCH, usearch_cleanup); } @@ -175,18 +152,18 @@ inline int32_t getCE(const UStringSearch *strsrch, uint32_t sourcece) } /** -* Allocate a memory and returns NULL if it failed. +* Allocate a memory and returns nullptr if it failed. * Internal method, status assumed to be a success. * @param size to allocate * @param status output error if any, caller to check status before calling * method, status assumed to be success when passed in. -* @return newly allocated array, NULL otherwise +* @return newly allocated array, nullptr otherwise */ static inline void * allocateMemory(uint32_t size, UErrorCode *status) { uint32_t *result = (uint32_t *)uprv_malloc(size); - if (result == NULL) { + if (result == nullptr) { *status = U_MEMORY_ALLOCATION_ERROR; } return result; @@ -197,7 +174,7 @@ inline void * allocateMemory(uint32_t size, UErrorCode *status) * Creates a new array if we run out of space. The caller will have to * manually deallocate the newly allocated array. * Internal method, status assumed to be success, caller has to check status -* before calling this method. destination not to be NULL and has at least +* before calling this method. destination not to be nullptr and has at least * size destinationlength. * @param destination target array * @param offset destination offset to add value @@ -222,7 +199,7 @@ inline int32_t * addTouint32_tArray(int32_t *destination, int32_t *temp = (int32_t *)allocateMemory( sizeof(int32_t) * newlength, status); if (U_FAILURE(*status)) { - return NULL; + return nullptr; } uprv_memcpy(temp, destination, sizeof(int32_t) * (size_t)offset); *destinationlength = newlength; @@ -237,7 +214,7 @@ inline int32_t * addTouint32_tArray(int32_t *destination, * Creates a new array if we run out of space. The caller will have to * manually deallocate the newly allocated array. * Internal method, status assumed to be success, caller has to check status -* before calling this method. destination not to be NULL and has at least +* before calling this method. destination not to be nullptr and has at least * size destinationlength. * @param destination target array * @param offset destination offset to add value @@ -263,7 +240,7 @@ inline int64_t * addTouint64_tArray(int64_t *destination, sizeof(int64_t) * newlength, status); if (U_FAILURE(*status)) { - return NULL; + return nullptr; } uprv_memcpy(temp, destination, sizeof(int64_t) * (size_t)offset); @@ -286,11 +263,9 @@ inline int64_t * addTouint64_tArray(int64_t *destination, * @param strsrch string search data * @param status output error if any, caller to check status before calling * method, status assumed to be success when passed in. -* @return total number of expansions */ static -inline uint16_t initializePatternCETable(UStringSearch *strsrch, - UErrorCode *status) +inline void initializePatternCETable(UStringSearch *strsrch, UErrorCode *status) { UPattern *pattern = &(strsrch->pattern); uint32_t cetablesize = INITIAL_ARRAY_SIZE_; @@ -298,7 +273,7 @@ inline uint16_t initializePatternCETable(UStringSearch *strsrch, uint32_t patternlength = pattern->textLength; UCollationElements *coleiter = strsrch->utilIter; - if (coleiter == NULL) { + if (coleiter == nullptr) { coleiter = ucol_openElements(strsrch->collator, pattern->text, patternlength, status); // status will be checked in ucol_next(..) later and if it is an @@ -310,7 +285,7 @@ inline uint16_t initializePatternCETable(UStringSearch *strsrch, ucol_setText(coleiter, pattern->text, pattern->textLength, status); } if(U_FAILURE(*status)) { - return 0; + return; } if (pattern->ces != cetable && pattern->ces) { @@ -318,7 +293,6 @@ inline uint16_t initializePatternCETable(UStringSearch *strsrch, } uint32_t offset = 0; - uint16_t result = 0; int32_t ce; while ((ce = ucol_next(coleiter, status)) != UCOL_NULLORDER && @@ -330,7 +304,7 @@ inline uint16_t initializePatternCETable(UStringSearch *strsrch, patternlength - ucol_getOffset(coleiter) + 1, status); if (U_FAILURE(*status)) { - return 0; + return; } offset ++; if (cetable != temp && cetable != pattern->cesBuffer) { @@ -338,14 +312,11 @@ inline uint16_t initializePatternCETable(UStringSearch *strsrch, } cetable = temp; } - result += (uint16_t)(ucol_getMaxExpansion(coleiter, ce) - 1); } cetable[offset] = 0; pattern->ces = cetable; pattern->cesLength = offset; - - return result; } /** @@ -358,11 +329,10 @@ inline uint16_t initializePatternCETable(UStringSearch *strsrch, * @param strsrch string search data * @param status output error if any, caller to check status before calling * method, status assumed to be success when passed in. -* @return total number of expansions */ static -inline uint16_t initializePatternPCETable(UStringSearch *strsrch, - UErrorCode *status) +inline void initializePatternPCETable(UStringSearch *strsrch, + UErrorCode *status) { UPattern *pattern = &(strsrch->pattern); uint32_t pcetablesize = INITIAL_ARRAY_SIZE_; @@ -370,26 +340,25 @@ inline uint16_t initializePatternPCETable(UStringSearch *strsrch, uint32_t patternlength = pattern->textLength; UCollationElements *coleiter = strsrch->utilIter; - if (coleiter == NULL) { + if (coleiter == nullptr) { coleiter = ucol_openElements(strsrch->collator, pattern->text, patternlength, status); - // status will be checked in ucol_next(..) later and if it is an - // error UCOL_NULLORDER the result of ucol_next(..) and 0 will be + // status will be checked in nextProcessed(..) later and if it is an error + // then UCOL_PROCESSED_NULLORDER is returned by nextProcessed(..), so 0 will be // returned. strsrch->utilIter = coleiter; } else { ucol_setText(coleiter, pattern->text, pattern->textLength, status); } if(U_FAILURE(*status)) { - return 0; + return; } - if (pattern->pces != pcetable && pattern->pces != NULL) { + if (pattern->pces != pcetable && pattern->pces != nullptr) { uprv_free(pattern->pces); } uint32_t offset = 0; - uint16_t result = 0; int64_t pce; icu::UCollationPCE iter(coleiter); @@ -397,7 +366,7 @@ inline uint16_t initializePatternPCETable(UStringSearch *strsrch, // ** Should processed CEs be signed or unsigned? // ** (the rest of the code in this file seems to play fast-and-loose with // ** whether a CE is signed or unsigned. For example, look at routine above this one.) - while ((pce = iter.nextProcessed(NULL, NULL, status)) != UCOL_PROCESSED_NULLORDER && + while ((pce = iter.nextProcessed(nullptr, nullptr, status)) != UCOL_PROCESSED_NULLORDER && U_SUCCESS(*status)) { int64_t *temp = addTouint64_tArray(pcetable, offset, &pcetablesize, pce, @@ -405,7 +374,7 @@ inline uint16_t initializePatternPCETable(UStringSearch *strsrch, status); if (U_FAILURE(*status)) { - return 0; + return; } offset += 1; @@ -415,28 +384,24 @@ inline uint16_t initializePatternPCETable(UStringSearch *strsrch, } pcetable = temp; - //result += (uint16_t)(ucol_getMaxExpansion(coleiter, ce) - 1); } pcetable[offset] = 0; pattern->pces = pcetable; pattern->pcesLength = offset; - - return result; } /** * Initializes the pattern struct. -* Internal method, status assumed to be success. * @param strsrch UStringSearch data storage * @param status output error if any, caller to check status before calling * method, status assumed to be success when passed in. -* @return expansionsize the total expansion size of the pattern */ static -inline int16_t initializePattern(UStringSearch *strsrch, UErrorCode *status) +inline void initializePattern(UStringSearch *strsrch, UErrorCode *status) { - if (U_FAILURE(*status)) { return 0; } + if (U_FAILURE(*status)) { return; } + UPattern *pattern = &(strsrch->pattern); const UChar *patterntext = pattern->text; int32_t length = pattern->textLength; @@ -456,90 +421,19 @@ inline int16_t initializePattern(UStringSearch *strsrch, UErrorCode *status) } // ** HACK ** - if (strsrch->pattern.pces != NULL) { + if (strsrch->pattern.pces != nullptr) { if (strsrch->pattern.pces != strsrch->pattern.pcesBuffer) { uprv_free(strsrch->pattern.pces); } - strsrch->pattern.pces = NULL; - } - - // since intializePattern is an internal method status is a success. - return initializePatternCETable(strsrch, status); -} - -/** -* Initializing shift tables, with the default values. -* If a corresponding default value is 0, the shift table is not set. -* @param shift table for forwards shift -* @param backshift table for backwards shift -* @param cetable table containing pattern ce -* @param cesize size of the pattern ces -* @param expansionsize total size of the expansions -* @param defaultforward the default forward value -* @param defaultbackward the default backward value -*/ -static -inline void setShiftTable(int16_t shift[], int16_t backshift[], - int32_t *cetable, int32_t cesize, - int16_t expansionsize, - int16_t defaultforward, - int16_t defaultbackward) -{ - // estimate the value to shift. to do that we estimate the smallest - // number of characters to give the relevant ces, ie approximately - // the number of ces minus their expansion, since expansions can come - // from a character. - int32_t count; - for (count = 0; count < MAX_TABLE_SIZE_; count ++) { - shift[count] = defaultforward; - } - cesize --; // down to the last index - for (count = 0; count < cesize; count ++) { - // number of ces from right of array to the count - int temp = defaultforward - count - 1; - shift[hashFromCE32(cetable[count])] = temp > 1 ? static_cast<int16_t>(temp) : 1; + strsrch->pattern.pces = nullptr; } - shift[hashFromCE32(cetable[cesize])] = 1; - // for ignorables we just shift by one. see test examples. - shift[hashFromCE32(0)] = 1; - for (count = 0; count < MAX_TABLE_SIZE_; count ++) { - backshift[count] = defaultbackward; - } - for (count = cesize; count > 0; count --) { - // the original value count does not seem to work - backshift[hashFromCE32(cetable[count])] = count > expansionsize ? - (int16_t)(count - expansionsize) : 1; - } - backshift[hashFromCE32(cetable[0])] = 1; - backshift[hashFromCE32(0)] = 1; + initializePatternCETable(strsrch, status); } /** -* Building of the pattern collation element list and the boyer moore strsrch -* table. -* The canonical match will only be performed after the default match fails. -* For both cases we need to remember the size of the composed and decomposed -* versions of the string. Since the Boyer-Moore shift calculations shifts by -* a number of characters in the text and tries to match the pattern from that -* offset, the shift value can not be too large in case we miss some -* characters. To choose a right shift size, we estimate the NFC form of the -* and use its size as a shift guide. The NFC form should be the small -* possible representation of the pattern. Anyways, we'll err on the smaller -* shift size. Hence the calculation for minlength. -* Canonical match will be performed slightly differently. We'll split the -* pattern into 3 parts, the prefix accents (PA), the middle string bounded by -* the first and last base character (MS), the ending accents (EA). Matches -* will be done on MS first, and only when we match MS then some processing -* will be required for the prefix and end accents in order to determine if -* they match PA and EA. Hence the default shift values -* for the canonical match will take the size of either end's accent into -* consideration. Forwards search will take the end accents into consideration -* for the default shift values and the backwards search will take the prefix -* accents into consideration. -* If pattern has no non-ignorable ce, we return a illegal argument error. -* Internal method, status assumed to be success. +* Initializes the pattern struct and builds the pattern collation element table. * @param strsrch UStringSearch data storage * @param status for output errors if it occurs, status is assumed to be a * success when it is passed in. @@ -547,512 +441,64 @@ inline void setShiftTable(int16_t shift[], int16_t backshift[], static inline void initialize(UStringSearch *strsrch, UErrorCode *status) { - int16_t expandlength = initializePattern(strsrch, status); - if (U_SUCCESS(*status) && strsrch->pattern.cesLength > 0) { - UPattern *pattern = &strsrch->pattern; - int32_t cesize = pattern->cesLength; - - int16_t minlength = cesize > expandlength - ? (int16_t)cesize - expandlength : 1; - pattern->defaultShiftSize = minlength; - setShiftTable(pattern->shift, pattern->backShift, pattern->ces, - cesize, expandlength, minlength, minlength); - return; - } - strsrch->pattern.defaultShiftSize = 0; + initializePattern(strsrch, status); } -#if BOYER_MOORE -/** -* Check to make sure that the match length is at the end of the character by -* using the breakiterator. -* @param strsrch string search data -* @param start target text start offset -* @param end target text end offset -*/ -static -void checkBreakBoundary(const UStringSearch *strsrch, int32_t * /*start*/, - int32_t *end) -{ #if !UCONFIG_NO_BREAK_ITERATION - UBreakIterator *breakiterator = strsrch->search->internalBreakIter; - if (breakiterator) { - int32_t matchend = *end; - //int32_t matchstart = *start; - - if (!ubrk_isBoundary(breakiterator, matchend)) { - *end = ubrk_following(breakiterator, matchend); - } - - /* Check the start of the matched text to make sure it doesn't have any accents - * before it. This code may not be necessary and so it is commented out */ - /*if (!ubrk_isBoundary(breakiterator, matchstart) && !ubrk_isBoundary(breakiterator, matchstart-1)) { - *start = ubrk_preceding(breakiterator, matchstart); - }*/ - } -#endif -} - -/** -* Determine whether the target text in UStringSearch bounded by the offset -* start and end is one or more whole units of text as -* determined by the breakiterator in UStringSearch. -* @param strsrch string search data -* @param start target text start offset -* @param end target text end offset -*/ -static -UBool isBreakUnit(const UStringSearch *strsrch, int32_t start, - int32_t end) +// If the caller provided a character breakiterator we'll return that, +// otherwise we lazily create the internal break iterator. +static UBreakIterator* getBreakIterator(UStringSearch *strsrch, UErrorCode &status) { -#if !UCONFIG_NO_BREAK_ITERATION - UBreakIterator *breakiterator = strsrch->search->breakIter; - //TODO: Add here. - if (breakiterator) { - int32_t startindex = ubrk_first(breakiterator); - int32_t endindex = ubrk_last(breakiterator); - - // out-of-range indexes are never boundary positions - if (start < startindex || start > endindex || - end < startindex || end > endindex) { - return FALSE; - } - // otherwise, we can use following() on the position before the - // specified one and return true of the position we get back is the - // one the user specified - UBool result = (start == startindex || - ubrk_following(breakiterator, start - 1) == start) && - (end == endindex || - ubrk_following(breakiterator, end - 1) == end); - if (result) { - // iterates the individual ces - UCollationElements *coleiter = strsrch->utilIter; - const UChar *text = strsrch->search->text + - start; - UErrorCode status = U_ZERO_ERROR; - ucol_setText(coleiter, text, end - start, &status); - for (int32_t count = 0; count < strsrch->pattern.cesLength; - count ++) { - int32_t ce = getCE(strsrch, ucol_next(coleiter, &status)); - if (ce == UCOL_IGNORABLE) { - count --; - continue; - } - if (U_FAILURE(status) || ce != strsrch->pattern.ces[count]) { - return FALSE; - } - } - int32_t nextce = ucol_next(coleiter, &status); - while (ucol_getOffset(coleiter) == (end - start) - && getCE(strsrch, nextce) == UCOL_IGNORABLE) { - nextce = ucol_next(coleiter, &status); - } - if (ucol_getOffset(coleiter) == (end - start) - && nextce != UCOL_NULLORDER) { - // extra collation elements at the end of the match - return FALSE; - } - } - return result; + if (U_FAILURE(status)) { + return nullptr; } -#endif - return TRUE; -} -/** -* Getting the next base character offset if current offset is an accent, -* or the current offset if the current character contains a base character. -* accents the following base character will be returned -* @param text string -* @param textoffset current offset -* @param textlength length of text string -* @return the next base character or the current offset -* if the current character is contains a base character. -*/ -static -inline int32_t getNextBaseOffset(const UChar *text, - int32_t textoffset, - int32_t textlength) -{ - if (textoffset < textlength) { - int32_t temp = textoffset; - if (getFCD(text, &temp, textlength) >> SECOND_LAST_BYTE_SHIFT_) { - while (temp < textlength) { - int32_t result = temp; - if ((getFCD(text, &temp, textlength) >> - SECOND_LAST_BYTE_SHIFT_) == 0) { - return result; - } - } - return textlength; - } + if (strsrch->search->breakIter != nullptr) { + return strsrch->search->breakIter; } - return textoffset; -} -/** -* Gets the next base character offset depending on the string search pattern -* data -* @param strsrch string search data -* @param textoffset current offset, one offset away from the last character -* to search for. -* @return start index of the next base character or the current offset -* if the current character is contains a base character. -*/ -static -inline int32_t getNextUStringSearchBaseOffset(UStringSearch *strsrch, - int32_t textoffset) -{ - int32_t textlength = strsrch->search->textLength; - if (strsrch->pattern.hasSuffixAccents && - textoffset < textlength) { - int32_t temp = textoffset; - const UChar *text = strsrch->search->text; - U16_BACK_1(text, 0, temp); - if (getFCD(text, &temp, textlength) & LAST_BYTE_MASK_) { - return getNextBaseOffset(text, textoffset, textlength); - } + if (strsrch->search->internalBreakIter != nullptr) { + return strsrch->search->internalBreakIter; } - return textoffset; -} -/** -* Shifting the collation element iterator position forward to prepare for -* a following match. If the last character is a unsafe character, we'll only -* shift by 1 to capture contractions, normalization etc. -* Internal method, status assumed to be success. -* @param text strsrch string search data -* @param textoffset start text position to do search -* @param ce the text ce which failed the match. -* @param patternceindex index of the ce within the pattern ce buffer which -* failed the match -* @return final offset -*/ -static -inline int32_t shiftForward(UStringSearch *strsrch, - int32_t textoffset, - int32_t ce, - int32_t patternceindex) -{ - UPattern *pattern = &(strsrch->pattern); - if (ce != UCOL_NULLORDER) { - int32_t shift = pattern->shift[hashFromCE32(ce)]; - // this is to adjust for characters in the middle of the - // substring for matching that failed. - int32_t adjust = pattern->cesLength - patternceindex; - if (adjust > 1 && shift >= adjust) { - shift -= adjust - 1; - } - textoffset += shift; - } - else { - textoffset += pattern->defaultShiftSize; - } + // Need to create the internal break iterator. + strsrch->search->internalBreakIter = ubrk_open(UBRK_CHARACTER, + ucol_getLocaleByType(strsrch->collator, ULOC_VALID_LOCALE, &status), + strsrch->search->text, strsrch->search->textLength, &status); - textoffset = getNextUStringSearchBaseOffset(strsrch, textoffset); - // check for unsafe characters - // * if it is the start or middle of a contraction: to be done after - // a initial match is found - // * thai or lao base consonant character: similar to contraction - // * high surrogate character: similar to contraction - // * next character is a accent: shift to the next base character - return textoffset; + return strsrch->search->internalBreakIter; } -#endif // #if BOYER_MOORE +#endif /** -* sets match not found +* Sets the match result to "not found", regardless of the incoming error status. +* If an error occurs while setting the result, it is reported back. +* * @param strsrch string search data +* @param status for output errors, if they occur. */ static -inline void setMatchNotFound(UStringSearch *strsrch) +inline void setMatchNotFound(UStringSearch *strsrch, UErrorCode &status) { - // this method resets the match result regardless of the error status. + UErrorCode localStatus = U_ZERO_ERROR; + strsrch->search->matchedIndex = USEARCH_DONE; strsrch->search->matchedLength = 0; if (strsrch->search->isForwardSearching) { - setColEIterOffset(strsrch->textIter, strsrch->search->textLength); + setColEIterOffset(strsrch->textIter, strsrch->search->textLength, localStatus); } else { - setColEIterOffset(strsrch->textIter, 0); - } -} - -#if BOYER_MOORE -/** -* Gets the offset to the next safe point in text. -* ie. not the middle of a contraction, swappable characters or supplementary -* characters. -* @param collator collation sata -* @param text string to work with -* @param textoffset offset in string -* @param textlength length of text string -* @return offset to the next safe character -*/ -static -inline int32_t getNextSafeOffset(const UCollator *collator, - const UChar *text, - int32_t textoffset, - int32_t textlength) -{ - int32_t result = textoffset; // first contraction character - while (result != textlength && ucol_unsafeCP(text[result], collator)) { - result ++; - } - return result; -} - -/** -* This checks for accents in the potential match started with a . -* composite character. -* This is really painful... we have to check that composite character do not -* have any extra accents. We have to normalize the potential match and find -* the immediate decomposed character before the match. -* The first composite character would have been taken care of by the fcd -* checks in checkForwardExactMatch. -* This is the slow path after the fcd of the first character and -* the last character has been checked by checkForwardExactMatch and we -* determine that the potential match has extra non-ignorable preceding -* ces. -* E.g. looking for \u0301 acute in \u01FA A ring above and acute, -* checkExtraMatchAccent should fail since there is a middle ring in \u01FA -* Note here that accents checking are slow and cautioned in the API docs. -* Internal method, status assumed to be a success, caller should check status -* before calling this method -* @param strsrch string search data -* @param start index of the potential unfriendly composite character -* @param end index of the potential unfriendly composite character -* @param status output error status if any. -* @return TRUE if there is non-ignorable accents before at the beginning -* of the match, FALSE otherwise. -*/ - -static -UBool checkExtraMatchAccents(const UStringSearch *strsrch, int32_t start, - int32_t end, - UErrorCode *status) -{ - UBool result = FALSE; - if (strsrch->pattern.hasPrefixAccents) { - int32_t length = end - start; - int32_t offset = 0; - const UChar *text = strsrch->search->text + start; - - U16_FWD_1(text, offset, length); - // we are only concerned with the first composite character - if (unorm_quickCheck(text, offset, UNORM_NFD, status) == UNORM_NO) { - int32_t safeoffset = getNextSafeOffset(strsrch->collator, - text, 0, length); - if (safeoffset != length) { - safeoffset ++; - } - UChar *norm = NULL; - UChar buffer[INITIAL_ARRAY_SIZE_]; - int32_t size = unorm_normalize(text, safeoffset, UNORM_NFD, 0, - buffer, INITIAL_ARRAY_SIZE_, - status); - if (U_FAILURE(*status)) { - return FALSE; - } - if (size >= INITIAL_ARRAY_SIZE_) { - norm = (UChar *)allocateMemory((size + 1) * sizeof(UChar), - status); - // if allocation failed, status will be set to - // U_MEMORY_ALLOCATION_ERROR and unorm_normalize internally - // checks for it. - size = unorm_normalize(text, safeoffset, UNORM_NFD, 0, norm, - size, status); - if (U_FAILURE(*status) && norm != NULL) { - uprv_free(norm); - return FALSE; - } - } - else { - norm = buffer; - } - - UCollationElements *coleiter = strsrch->utilIter; - ucol_setText(coleiter, norm, size, status); - uint32_t firstce = strsrch->pattern.ces[0]; - UBool ignorable = TRUE; - uint32_t ce = UCOL_IGNORABLE; - while (U_SUCCESS(*status) && ce != firstce && ce != (uint32_t)UCOL_NULLORDER) { - offset = ucol_getOffset(coleiter); - if (ce != firstce && ce != UCOL_IGNORABLE) { - ignorable = FALSE; - } - ce = ucol_next(coleiter, status); - } - UChar32 codepoint; - U16_PREV(norm, 0, offset, codepoint); - result = !ignorable && (u_getCombiningClass(codepoint) != 0); - - if (norm != buffer) { - uprv_free(norm); - } - } + setColEIterOffset(strsrch->textIter, 0, localStatus); } - return result; -} - -/** -* Used by exact matches, checks if there are accents before the match. -* This is really painful... we have to check that composite characters at -* the start of the matches have to not have any extra accents. -* We check the FCD of the character first, if it starts with an accent and -* the first pattern ce does not match the first ce of the character, we bail. -* Otherwise we try normalizing the first composite -* character and find the immediate decomposed character before the match to -* see if it is an non-ignorable accent. -* Now normalizing the first composite character is enough because we ensure -* that when the match is passed in here with extra beginning ces, the -* first or last ce that match has to occur within the first character. -* E.g. looking for \u0301 acute in \u01FA A ring above and acute, -* checkExtraMatchAccent should fail since there is a middle ring in \u01FA -* Note here that accents checking are slow and cautioned in the API docs. -* @param strsrch string search data -* @param start offset -* @param end offset -* @return TRUE if there are accents on either side of the match, -* FALSE otherwise -*/ -static -UBool hasAccentsBeforeMatch(const UStringSearch *strsrch, int32_t start, - int32_t end) -{ - if (strsrch->pattern.hasPrefixAccents) { - UCollationElements *coleiter = strsrch->textIter; - UErrorCode status = U_ZERO_ERROR; - // we have been iterating forwards previously - uint32_t ignorable = TRUE; - int32_t firstce = strsrch->pattern.ces[0]; - - setColEIterOffset(coleiter, start); - int32_t ce = getCE(strsrch, ucol_next(coleiter, &status)); - if (U_FAILURE(status)) { - return TRUE; - } - while (ce != firstce) { - if (ce != UCOL_IGNORABLE) { - ignorable = FALSE; - } - ce = getCE(strsrch, ucol_next(coleiter, &status)); - if (U_FAILURE(status) || ce == UCOL_NULLORDER) { - return TRUE; - } - } - if (!ignorable && inNormBuf(coleiter)) { - // within normalization buffer, discontiguous handled here - return TRUE; - } - - // within text - int32_t temp = start; - // original code - // accent = (getFCD(strsrch->search->text, &temp, - // strsrch->search->textLength) - // >> SECOND_LAST_BYTE_SHIFT_); - // however this code does not work well with VC7 .net in release mode. - // maybe the inlines for getFCD combined with shifting has bugs in - // VC7. anyways this is a work around. - UBool accent = getFCD(strsrch->search->text, &temp, - strsrch->search->textLength) > 0xFF; - if (!accent) { - return checkExtraMatchAccents(strsrch, start, end, &status); - } - if (!ignorable) { - return TRUE; - } - if (start > 0) { - temp = start; - U16_BACK_1(strsrch->search->text, 0, temp); - if (getFCD(strsrch->search->text, &temp, - strsrch->search->textLength) & LAST_BYTE_MASK_) { - setColEIterOffset(coleiter, start); - ce = ucol_previous(coleiter, &status); - if (U_FAILURE(status) || - (ce != UCOL_NULLORDER && ce != UCOL_IGNORABLE)) { - return TRUE; - } - } - } + // If an error occurred while setting the result to not found (ex: OOM), + // then we want to report that error back to the caller. + if (U_SUCCESS(status) && U_FAILURE(localStatus)) { + status = localStatus; } - - return FALSE; } /** -* Used by exact matches, checks if there are accents bounding the match. -* Note this is the initial boundary check. If the potential match -* starts or ends with composite characters, the accents in those -* characters will be determined later. -* Not doing backwards iteration here, since discontiguos contraction for -* backwards collation element iterator, use up too many characters. -* E.g. looking for \u030A ring in \u01FA A ring above and acute, -* should fail since there is a acute at the end of \u01FA -* Note here that accents checking are slow and cautioned in the API docs. -* @param strsrch string search data -* @param start offset of match -* @param end end offset of the match -* @return TRUE if there are accents on either side of the match, -* FALSE otherwise -*/ -static -UBool hasAccentsAfterMatch(const UStringSearch *strsrch, int32_t start, - int32_t end) -{ - if (strsrch->pattern.hasSuffixAccents) { - const UChar *text = strsrch->search->text; - int32_t temp = end; - int32_t textlength = strsrch->search->textLength; - U16_BACK_1(text, 0, temp); - if (getFCD(text, &temp, textlength) & LAST_BYTE_MASK_) { - int32_t firstce = strsrch->pattern.ces[0]; - UCollationElements *coleiter = strsrch->textIter; - UErrorCode status = U_ZERO_ERROR; - int32_t ce; - setColEIterOffset(coleiter, start); - while ((ce = getCE(strsrch, ucol_next(coleiter, &status))) != firstce) { - if (U_FAILURE(status) || ce == UCOL_NULLORDER) { - return TRUE; - } - } - int32_t count = 1; - while (count < strsrch->pattern.cesLength) { - if (getCE(strsrch, ucol_next(coleiter, &status)) - == UCOL_IGNORABLE) { - // Thai can give an ignorable here. - count --; - } - if (U_FAILURE(status)) { - return TRUE; - } - count ++; - } - - ce = ucol_next(coleiter, &status); - if (U_FAILURE(status)) { - return TRUE; - } - if (ce != UCOL_NULLORDER && ce != UCOL_IGNORABLE) { - ce = getCE(strsrch, ce); - } - if (ce != UCOL_NULLORDER && ce != UCOL_IGNORABLE) { - if (ucol_getOffset(coleiter) <= end) { - return TRUE; - } - if (getFCD(text, &end, textlength) >> SECOND_LAST_BYTE_SHIFT_) { - return TRUE; - } - } - } - } - return FALSE; -} -#endif // #if BOYER_MOORE - -/** * Checks if the offset runs out of the text string * @param offset * @param textlength of the text string @@ -1072,8 +518,7 @@ inline UBool isOutOfBounds(int32_t textlength, int32_t offset) * @return TRUE if identical match is found */ static -inline UBool checkIdentical(const UStringSearch *strsrch, int32_t start, - int32_t end) +inline UBool checkIdentical(const UStringSearch *strsrch, int32_t start, int32_t end) { if (strsrch->strength != UCOL_IDENTICAL) { return TRUE; @@ -1091,1503 +536,6 @@ inline UBool checkIdentical(const UStringSearch *strsrch, int32_t start, return U_SUCCESS(status) && t2 == p2; } -#if BOYER_MOORE -/** -* Checks to see if the match is repeated -* @param strsrch string search data -* @param start new match start index -* @param end new match end index -* @return TRUE if the the match is repeated, FALSE otherwise -*/ -static -inline UBool checkRepeatedMatch(UStringSearch *strsrch, - int32_t start, - int32_t end) -{ - int32_t lastmatchindex = strsrch->search->matchedIndex; - UBool result; - if (lastmatchindex == USEARCH_DONE) { - return FALSE; - } - if (strsrch->search->isForwardSearching) { - result = start <= lastmatchindex; - } - else { - result = start >= lastmatchindex; - } - if (!result && !strsrch->search->isOverlap) { - if (strsrch->search->isForwardSearching) { - result = start < lastmatchindex + strsrch->search->matchedLength; - } - else { - result = end > lastmatchindex; - } - } - return result; -} - -/** -* Gets the collation element iterator's current offset. -* @param coleiter collation element iterator -* @param forwards flag TRUE if we are moving in th forwards direction -* @return current offset -*/ -static -inline int32_t getColElemIterOffset(const UCollationElements *coleiter, - UBool forwards) -{ - int32_t result = ucol_getOffset(coleiter); - // intricacies of the the backwards collation element iterator - if (FALSE && !forwards && inNormBuf(coleiter) && !isFCDPointerNull(coleiter)) { - result ++; - } - return result; -} - -/** -* Checks match for contraction. -* If the match ends with a partial contraction we fail. -* If the match starts too far off (because of backwards iteration) we try to -* chip off the extra characters depending on whether a breakiterator has -* been used. -* Internal method, error assumed to be success, caller has to check status -* before calling this method. -* @param strsrch string search data -* @param start offset of potential match, to be modified if necessary -* @param end offset of potential match, to be modified if necessary -* @param status output error status if any -* @return TRUE if match passes the contraction test, FALSE otherwise -*/ - -static -UBool checkNextExactContractionMatch(UStringSearch *strsrch, - int32_t *start, - int32_t *end, UErrorCode *status) -{ - UCollationElements *coleiter = strsrch->textIter; - int32_t textlength = strsrch->search->textLength; - int32_t temp = *start; - const UCollator *collator = strsrch->collator; - const UChar *text = strsrch->search->text; - // This part checks if either ends of the match contains potential - // contraction. If so we'll have to iterate through them - // The start contraction needs to be checked since ucol_previous dumps - // all characters till the first safe character into the buffer. - // *start + 1 is used to test for the unsafe characters instead of *start - // because ucol_prev takes all unsafe characters till the first safe - // character ie *start. so by testing *start + 1, we can estimate if - // excess prefix characters has been included in the potential search - // results. - if ((*end < textlength && ucol_unsafeCP(text[*end], collator)) || - (*start + 1 < textlength - && ucol_unsafeCP(text[*start + 1], collator))) { - int32_t expansion = getExpansionPrefix(coleiter); - UBool expandflag = expansion > 0; - setColEIterOffset(coleiter, *start); - while (expansion > 0) { - // getting rid of the redundant ce, caused by setOffset. - // since backward contraction/expansion may have extra ces if we - // are in the normalization buffer, hasAccentsBeforeMatch would - // have taken care of it. - // E.g. the character \u01FA will have an expansion of 3, but if - // we are only looking for acute and ring \u030A and \u0301, we'll - // have to skip the first ce in the expansion buffer. - ucol_next(coleiter, status); - if (U_FAILURE(*status)) { - return FALSE; - } - if (ucol_getOffset(coleiter) != temp) { - *start = temp; - temp = ucol_getOffset(coleiter); - } - expansion --; - } - - int32_t *patternce = strsrch->pattern.ces; - int32_t patterncelength = strsrch->pattern.cesLength; - int32_t count = 0; - while (count < patterncelength) { - int32_t ce = getCE(strsrch, ucol_next(coleiter, status)); - if (ce == UCOL_IGNORABLE) { - continue; - } - if (expandflag && count == 0 && ucol_getOffset(coleiter) != temp) { - *start = temp; - temp = ucol_getOffset(coleiter); - } - if (U_FAILURE(*status) || ce != patternce[count]) { - (*end) ++; - *end = getNextUStringSearchBaseOffset(strsrch, *end); - return FALSE; - } - count ++; - } - } - return TRUE; -} - -/** -* Checks and sets the match information if found. -* Checks -* <ul> -* <li> the potential match does not repeat the previous match -* <li> boundaries are correct -* <li> exact matches has no extra accents -* <li> identical matchesb -* <li> potential match does not end in the middle of a contraction -* <\ul> -* Otherwise the offset will be shifted to the next character. -* Internal method, status assumed to be success, caller has to check status -* before calling this method. -* @param strsrch string search data -* @param textoffset offset in the collation element text. the returned value -* will be the truncated end offset of the match or the new start -* search offset. -* @param status output error status if any -* @return TRUE if the match is valid, FALSE otherwise -*/ -static -inline UBool checkNextExactMatch(UStringSearch *strsrch, - int32_t *textoffset, UErrorCode *status) -{ - UCollationElements *coleiter = strsrch->textIter; - int32_t start = getColElemIterOffset(coleiter, FALSE); - - if (!checkNextExactContractionMatch(strsrch, &start, textoffset, status)) { - return FALSE; - } - - // this totally matches, however we need to check if it is repeating - if (!isBreakUnit(strsrch, start, *textoffset) || - checkRepeatedMatch(strsrch, start, *textoffset) || - hasAccentsBeforeMatch(strsrch, start, *textoffset) || - !checkIdentical(strsrch, start, *textoffset) || - hasAccentsAfterMatch(strsrch, start, *textoffset)) { - - (*textoffset) ++; - *textoffset = getNextUStringSearchBaseOffset(strsrch, *textoffset); - return FALSE; - } - - //Add breakiterator boundary check for primary strength search. - if (!strsrch->search->breakIter && strsrch->strength == UCOL_PRIMARY) { - checkBreakBoundary(strsrch, &start, textoffset); - } - - // totally match, we will get rid of the ending ignorables. - strsrch->search->matchedIndex = start; - strsrch->search->matchedLength = *textoffset - start; - return TRUE; -} - -/** -* Getting the previous base character offset, or the current offset if the -* current character is a base character -* @param text string -* @param textoffset one offset after the current character -* @return the offset of the next character after the base character or the first -* composed character with accents -*/ -static -inline int32_t getPreviousBaseOffset(const UChar *text, - int32_t textoffset) -{ - if (textoffset > 0) { - for (;;) { - int32_t result = textoffset; - U16_BACK_1(text, 0, textoffset); - int32_t temp = textoffset; - uint16_t fcd = getFCD(text, &temp, result); - if ((fcd >> SECOND_LAST_BYTE_SHIFT_) == 0) { - if (fcd & LAST_BYTE_MASK_) { - return textoffset; - } - return result; - } - if (textoffset == 0) { - return 0; - } - } - } - return textoffset; -} - -/** -* Getting the indexes of the accents that are not blocked in the argument -* accent array -* @param accents array of accents in nfd terminated by a 0. -* @param accentsindex array of indexes of the accents that are not blocked -*/ -static -inline int getUnblockedAccentIndex(UChar *accents, int32_t *accentsindex) -{ - int32_t index = 0; - int32_t length = u_strlen(accents); - UChar32 codepoint = 0; - int cclass = 0; - int result = 0; - int32_t temp; - while (index < length) { - temp = index; - U16_NEXT(accents, index, length, codepoint); - if (u_getCombiningClass(codepoint) != cclass) { - cclass = u_getCombiningClass(codepoint); - accentsindex[result] = temp; - result ++; - } - } - accentsindex[result] = length; - return result; -} - -/** -* Appends 3 UChar arrays to a destination array. -* Creates a new array if we run out of space. The caller will have to -* manually deallocate the newly allocated array. -* Internal method, status assumed to be success, caller has to check status -* before calling this method. destination not to be NULL and has at least -* size destinationlength. -* @param destination target array -* @param destinationlength target array size, returning the appended length -* @param source1 null-terminated first array -* @param source2 second array -* @param source2length length of second array -* @param source3 null-terminated third array -* @param status error status if any -* @return new destination array, destination if there was no new allocation -*/ -static -inline UChar * addToUCharArray( UChar *destination, - int32_t *destinationlength, - const UChar *source1, - const UChar *source2, - int32_t source2length, - const UChar *source3, - UErrorCode *status) -{ - int32_t source1length = source1 ? u_strlen(source1) : 0; - int32_t source3length = source3 ? u_strlen(source3) : 0; - if (*destinationlength < source1length + source2length + source3length + - 1) - { - destination = (UChar *)allocateMemory( - (source1length + source2length + source3length + 1) * sizeof(UChar), - status); - // if error allocating memory, status will be - // U_MEMORY_ALLOCATION_ERROR - if (U_FAILURE(*status)) { - *destinationlength = 0; - return NULL; - } - } - if (source1length != 0) { - u_memcpy(destination, source1, source1length); - } - if (source2length != 0) { - uprv_memcpy(destination + source1length, source2, - sizeof(UChar) * source2length); - } - if (source3length != 0) { - uprv_memcpy(destination + source1length + source2length, source3, - sizeof(UChar) * source3length); - } - *destinationlength = source1length + source2length + source3length; - return destination; -} - -/** -* Running through a collation element iterator to see if the contents matches -* pattern in string search data -* @param strsrch string search data -* @param coleiter collation element iterator -* @return TRUE if a match if found, FALSE otherwise -*/ -static -inline UBool checkCollationMatch(const UStringSearch *strsrch, - UCollationElements *coleiter) -{ - int patternceindex = strsrch->pattern.cesLength; - int32_t *patternce = strsrch->pattern.ces; - UErrorCode status = U_ZERO_ERROR; - while (patternceindex > 0) { - int32_t ce = getCE(strsrch, ucol_next(coleiter, &status)); - if (ce == UCOL_IGNORABLE) { - continue; - } - if (U_FAILURE(status) || ce != *patternce) { - return FALSE; - } - patternce ++; - patternceindex --; - } - return TRUE; -} - -/** -* Rearranges the front accents to try matching. -* Prefix accents in the text will be grouped according to their combining -* class and the groups will be mixed and matched to try find the perfect -* match with the pattern. -* So for instance looking for "\u0301" in "\u030A\u0301\u0325" -* step 1: split "\u030A\u0301" into 6 other type of potential accent substrings -* "\u030A", "\u0301", "\u0325", "\u030A\u0301", "\u030A\u0325", -* "\u0301\u0325". -* step 2: check if any of the generated substrings matches the pattern. -* Internal method, status is assumed to be success, caller has to check status -* before calling this method. -* @param strsrch string search match -* @param start first offset of the accents to start searching -* @param end start of the last accent set -* @param status output error status if any -* @return USEARCH_DONE if a match is not found, otherwise return the starting -* offset of the match. Note this start includes all preceding accents. -*/ -static -int32_t doNextCanonicalPrefixMatch(UStringSearch *strsrch, - int32_t start, - int32_t end, - UErrorCode *status) -{ - const UChar *text = strsrch->search->text; - int32_t textlength = strsrch->search->textLength; - int32_t tempstart = start; - - if ((getFCD(text, &tempstart, textlength) & LAST_BYTE_MASK_) == 0) { - // die... failed at a base character - return USEARCH_DONE; - } - - int32_t offset = getNextBaseOffset(text, tempstart, textlength); - start = getPreviousBaseOffset(text, tempstart); - - UChar accents[INITIAL_ARRAY_SIZE_]; - // normalizing the offensive string - unorm_normalize(text + start, offset - start, UNORM_NFD, 0, accents, - INITIAL_ARRAY_SIZE_, status); - if (U_FAILURE(*status)) { - return USEARCH_DONE; - } - - int32_t accentsindex[INITIAL_ARRAY_SIZE_]; - int32_t accentsize = getUnblockedAccentIndex(accents, - accentsindex); - int32_t count = (2 << (accentsize - 1)) - 1; - UChar buffer[INITIAL_ARRAY_SIZE_]; - UCollationElements *coleiter = strsrch->utilIter; - while (U_SUCCESS(*status) && count > 0) { - UChar *rearrange = strsrch->canonicalPrefixAccents; - // copy the base characters - for (int k = 0; k < accentsindex[0]; k ++) { - *rearrange ++ = accents[k]; - } - // forming all possible canonical rearrangement by dropping - // sets of accents - for (int i = 0; i <= accentsize - 1; i ++) { - int32_t mask = 1 << (accentsize - i - 1); - if (count & mask) { - for (int j = accentsindex[i]; j < accentsindex[i + 1]; j ++) { - *rearrange ++ = accents[j]; - } - } - } - *rearrange = 0; - int32_t matchsize = INITIAL_ARRAY_SIZE_; - UChar *match = addToUCharArray(buffer, &matchsize, - strsrch->canonicalPrefixAccents, - strsrch->search->text + offset, - end - offset, - strsrch->canonicalSuffixAccents, - status); - - // if status is a failure, ucol_setText does nothing. - // run the collator iterator through this match - ucol_setText(coleiter, match, matchsize, status); - if (U_SUCCESS(*status)) { - if (checkCollationMatch(strsrch, coleiter)) { - if (match != buffer) { - uprv_free(match); - } - return start; - } - } - count --; - } - return USEARCH_DONE; -} - -/** -* Gets the offset to the safe point in text before textoffset. -* ie. not the middle of a contraction, swappable characters or supplementary -* characters. -* @param collator collation sata -* @param text string to work with -* @param textoffset offset in string -* @param textlength length of text string -* @return offset to the previous safe character -*/ -static -inline uint32_t getPreviousSafeOffset(const UCollator *collator, - const UChar *text, - int32_t textoffset) -{ - int32_t result = textoffset; // first contraction character - while (result != 0 && ucol_unsafeCP(text[result - 1], collator)) { - result --; - } - if (result != 0) { - // the first contraction character is consider unsafe here - result --; - } - return result; -} - -/** -* Cleaning up after we passed the safe zone -* @param strsrch string search data -* @param safetext safe text array -* @param safebuffer safe text buffer -* @param coleiter collation element iterator for safe text -*/ -static -inline void cleanUpSafeText(const UStringSearch *strsrch, UChar *safetext, - UChar *safebuffer) -{ - if (safetext != safebuffer && safetext != strsrch->canonicalSuffixAccents) - { - uprv_free(safetext); - } -} - -/** -* Take the rearranged end accents and tries matching. If match failed at -* a separate preceding set of accents (separated from the rearranged on by -* at least a base character) then we rearrange the preceding accents and -* tries matching again. -* We allow skipping of the ends of the accent set if the ces do not match. -* However if the failure is found before the accent set, it fails. -* Internal method, status assumed to be success, caller has to check status -* before calling this method. -* @param strsrch string search data -* @param textoffset of the start of the rearranged accent -* @param status output error status if any -* @return USEARCH_DONE if a match is not found, otherwise return the starting -* offset of the match. Note this start includes all preceding accents. -*/ -static -int32_t doNextCanonicalSuffixMatch(UStringSearch *strsrch, - int32_t textoffset, - UErrorCode *status) -{ - const UChar *text = strsrch->search->text; - const UCollator *collator = strsrch->collator; - int32_t safelength = 0; - UChar *safetext; - int32_t safetextlength; - UChar safebuffer[INITIAL_ARRAY_SIZE_]; - UCollationElements *coleiter = strsrch->utilIter; - int32_t safeoffset = textoffset; - - if (textoffset != 0 && ucol_unsafeCP(strsrch->canonicalSuffixAccents[0], - collator)) { - safeoffset = getPreviousSafeOffset(collator, text, textoffset); - safelength = textoffset - safeoffset; - safetextlength = INITIAL_ARRAY_SIZE_; - safetext = addToUCharArray(safebuffer, &safetextlength, NULL, - text + safeoffset, safelength, - strsrch->canonicalSuffixAccents, - status); - } - else { - safetextlength = u_strlen(strsrch->canonicalSuffixAccents); - safetext = strsrch->canonicalSuffixAccents; - } - - // if status is a failure, ucol_setText does nothing - ucol_setText(coleiter, safetext, safetextlength, status); - // status checked in loop below - - int32_t *ce = strsrch->pattern.ces; - int32_t celength = strsrch->pattern.cesLength; - int ceindex = celength - 1; - UBool isSafe = TRUE; // indication flag for position in safe zone - - while (ceindex >= 0) { - int32_t textce = ucol_previous(coleiter, status); - if (U_FAILURE(*status)) { - if (isSafe) { - cleanUpSafeText(strsrch, safetext, safebuffer); - } - return USEARCH_DONE; - } - if (textce == UCOL_NULLORDER) { - // check if we have passed the safe buffer - if (coleiter == strsrch->textIter) { - cleanUpSafeText(strsrch, safetext, safebuffer); - return USEARCH_DONE; - } - cleanUpSafeText(strsrch, safetext, safebuffer); - safetext = safebuffer; - coleiter = strsrch->textIter; - setColEIterOffset(coleiter, safeoffset); - // status checked at the start of the loop - isSafe = FALSE; - continue; - } - textce = getCE(strsrch, textce); - if (textce != UCOL_IGNORABLE && textce != ce[ceindex]) { - // do the beginning stuff - int32_t failedoffset = getColElemIterOffset(coleiter, FALSE); - if (isSafe && failedoffset >= safelength) { - // alas... no hope. failed at rearranged accent set - cleanUpSafeText(strsrch, safetext, safebuffer); - return USEARCH_DONE; - } - else { - if (isSafe) { - failedoffset += safeoffset; - cleanUpSafeText(strsrch, safetext, safebuffer); - } - - // try rearranging the front accents - int32_t result = doNextCanonicalPrefixMatch(strsrch, - failedoffset, textoffset, status); - if (result != USEARCH_DONE) { - // if status is a failure, ucol_setOffset does nothing - setColEIterOffset(strsrch->textIter, result); - } - if (U_FAILURE(*status)) { - return USEARCH_DONE; - } - return result; - } - } - if (textce == ce[ceindex]) { - ceindex --; - } - } - // set offset here - if (isSafe) { - int32_t result = getColElemIterOffset(coleiter, FALSE); - // sets the text iterator here with the correct expansion and offset - int32_t leftoverces = getExpansionPrefix(coleiter); - cleanUpSafeText(strsrch, safetext, safebuffer); - if (result >= safelength) { - result = textoffset; - } - else { - result += safeoffset; - } - setColEIterOffset(strsrch->textIter, result); - strsrch->textIter->iteratordata_.toReturn = - setExpansionPrefix(strsrch->textIter, leftoverces); - return result; - } - - return ucol_getOffset(coleiter); -} - -/** -* Trying out the substring and sees if it can be a canonical match. -* This will try normalizing the end accents and arranging them into canonical -* equivalents and check their corresponding ces with the pattern ce. -* Suffix accents in the text will be grouped according to their combining -* class and the groups will be mixed and matched to try find the perfect -* match with the pattern. -* So for instance looking for "\u0301" in "\u030A\u0301\u0325" -* step 1: split "\u030A\u0301" into 6 other type of potential accent substrings -* "\u030A", "\u0301", "\u0325", "\u030A\u0301", "\u030A\u0325", -* "\u0301\u0325". -* step 2: check if any of the generated substrings matches the pattern. -* Internal method, status assumed to be success, caller has to check status -* before calling this method. -* @param strsrch string search data -* @param textoffset end offset in the collation element text that ends with -* the accents to be rearranged -* @param status error status if any -* @return TRUE if the match is valid, FALSE otherwise -*/ -static -UBool doNextCanonicalMatch(UStringSearch *strsrch, - int32_t textoffset, - UErrorCode *status) -{ - const UChar *text = strsrch->search->text; - int32_t temp = textoffset; - U16_BACK_1(text, 0, temp); - if ((getFCD(text, &temp, textoffset) & LAST_BYTE_MASK_) == 0) { - UCollationElements *coleiter = strsrch->textIter; - int32_t offset = getColElemIterOffset(coleiter, FALSE); - if (strsrch->pattern.hasPrefixAccents) { - offset = doNextCanonicalPrefixMatch(strsrch, offset, textoffset, - status); - if (U_SUCCESS(*status) && offset != USEARCH_DONE) { - setColEIterOffset(coleiter, offset); - return TRUE; - } - } - return FALSE; - } - - if (!strsrch->pattern.hasSuffixAccents) { - return FALSE; - } - - UChar accents[INITIAL_ARRAY_SIZE_]; - // offset to the last base character in substring to search - int32_t baseoffset = getPreviousBaseOffset(text, textoffset); - // normalizing the offensive string - unorm_normalize(text + baseoffset, textoffset - baseoffset, UNORM_NFD, - 0, accents, INITIAL_ARRAY_SIZE_, status); - // status checked in loop below - - int32_t accentsindex[INITIAL_ARRAY_SIZE_]; - int32_t size = getUnblockedAccentIndex(accents, accentsindex); - - // 2 power n - 1 plus the full set of accents - int32_t count = (2 << (size - 1)) - 1; - while (U_SUCCESS(*status) && count > 0) { - UChar *rearrange = strsrch->canonicalSuffixAccents; - // copy the base characters - for (int k = 0; k < accentsindex[0]; k ++) { - *rearrange ++ = accents[k]; - } - // forming all possible canonical rearrangement by dropping - // sets of accents - for (int i = 0; i <= size - 1; i ++) { - int32_t mask = 1 << (size - i - 1); - if (count & mask) { - for (int j = accentsindex[i]; j < accentsindex[i + 1]; j ++) { - *rearrange ++ = accents[j]; - } - } - } - *rearrange = 0; - int32_t offset = doNextCanonicalSuffixMatch(strsrch, baseoffset, - status); - if (offset != USEARCH_DONE) { - return TRUE; // match found - } - count --; - } - return FALSE; -} - -/** -* Gets the previous base character offset depending on the string search -* pattern data -* @param strsrch string search data -* @param textoffset current offset, current character -* @return the offset of the next character after this base character or itself -* if it is a composed character with accents -*/ -static -inline int32_t getPreviousUStringSearchBaseOffset(UStringSearch *strsrch, - int32_t textoffset) -{ - if (strsrch->pattern.hasPrefixAccents && textoffset > 0) { - const UChar *text = strsrch->search->text; - int32_t offset = textoffset; - if (getFCD(text, &offset, strsrch->search->textLength) >> - SECOND_LAST_BYTE_SHIFT_) { - return getPreviousBaseOffset(text, textoffset); - } - } - return textoffset; -} - -/** -* Checks match for contraction. -* If the match ends with a partial contraction we fail. -* If the match starts too far off (because of backwards iteration) we try to -* chip off the extra characters -* Internal method, status assumed to be success, caller has to check status -* before calling this method. -* @param strsrch string search data -* @param start offset of potential match, to be modified if necessary -* @param end offset of potential match, to be modified if necessary -* @param status output error status if any -* @return TRUE if match passes the contraction test, FALSE otherwise -*/ -static -UBool checkNextCanonicalContractionMatch(UStringSearch *strsrch, - int32_t *start, - int32_t *end, - UErrorCode *status) -{ - UCollationElements *coleiter = strsrch->textIter; - int32_t textlength = strsrch->search->textLength; - int32_t temp = *start; - const UCollator *collator = strsrch->collator; - const UChar *text = strsrch->search->text; - // This part checks if either ends of the match contains potential - // contraction. If so we'll have to iterate through them - if ((*end < textlength && ucol_unsafeCP(text[*end], collator)) || - (*start + 1 < textlength - && ucol_unsafeCP(text[*start + 1], collator))) { - int32_t expansion = getExpansionPrefix(coleiter); - UBool expandflag = expansion > 0; - setColEIterOffset(coleiter, *start); - while (expansion > 0) { - // getting rid of the redundant ce, caused by setOffset. - // since backward contraction/expansion may have extra ces if we - // are in the normalization buffer, hasAccentsBeforeMatch would - // have taken care of it. - // E.g. the character \u01FA will have an expansion of 3, but if - // we are only looking for acute and ring \u030A and \u0301, we'll - // have to skip the first ce in the expansion buffer. - ucol_next(coleiter, status); - if (U_FAILURE(*status)) { - return FALSE; - } - if (ucol_getOffset(coleiter) != temp) { - *start = temp; - temp = ucol_getOffset(coleiter); - } - expansion --; - } - - int32_t *patternce = strsrch->pattern.ces; - int32_t patterncelength = strsrch->pattern.cesLength; - int32_t count = 0; - int32_t textlength = strsrch->search->textLength; - while (count < patterncelength) { - int32_t ce = getCE(strsrch, ucol_next(coleiter, status)); - // status checked below, note that if status is a failure - // ucol_next returns UCOL_NULLORDER - if (ce == UCOL_IGNORABLE) { - continue; - } - if (expandflag && count == 0 && ucol_getOffset(coleiter) != temp) { - *start = temp; - temp = ucol_getOffset(coleiter); - } - - if (count == 0 && ce != patternce[0]) { - // accents may have extra starting ces, this occurs when a - // pure accent pattern is matched without rearrangement - // text \u0325\u0300 and looking for \u0300 - int32_t expected = patternce[0]; - if (getFCD(text, start, textlength) & LAST_BYTE_MASK_) { - ce = getCE(strsrch, ucol_next(coleiter, status)); - while (U_SUCCESS(*status) && ce != expected && - ce != UCOL_NULLORDER && - ucol_getOffset(coleiter) <= *end) { - ce = getCE(strsrch, ucol_next(coleiter, status)); - } - } - } - if (U_FAILURE(*status) || ce != patternce[count]) { - (*end) ++; - *end = getNextUStringSearchBaseOffset(strsrch, *end); - return FALSE; - } - count ++; - } - } - return TRUE; -} - -/** -* Checks and sets the match information if found. -* Checks -* <ul> -* <li> the potential match does not repeat the previous match -* <li> boundaries are correct -* <li> potential match does not end in the middle of a contraction -* <li> identical matches -* <\ul> -* Otherwise the offset will be shifted to the next character. -* Internal method, status assumed to be success, caller has to check the -* status before calling this method. -* @param strsrch string search data -* @param textoffset offset in the collation element text. the returned value -* will be the truncated end offset of the match or the new start -* search offset. -* @param status output error status if any -* @return TRUE if the match is valid, FALSE otherwise -*/ -static -inline UBool checkNextCanonicalMatch(UStringSearch *strsrch, - int32_t *textoffset, - UErrorCode *status) -{ - // to ensure that the start and ends are not composite characters - UCollationElements *coleiter = strsrch->textIter; - // if we have a canonical accent match - if ((strsrch->pattern.hasSuffixAccents && - strsrch->canonicalSuffixAccents[0]) || - (strsrch->pattern.hasPrefixAccents && - strsrch->canonicalPrefixAccents[0])) { - strsrch->search->matchedIndex = getPreviousUStringSearchBaseOffset( - strsrch, - ucol_getOffset(coleiter)); - strsrch->search->matchedLength = *textoffset - - strsrch->search->matchedIndex; - return TRUE; - } - - int32_t start = getColElemIterOffset(coleiter, FALSE); - if (!checkNextCanonicalContractionMatch(strsrch, &start, textoffset, - status) || U_FAILURE(*status)) { - return FALSE; - } - - start = getPreviousUStringSearchBaseOffset(strsrch, start); - // this totally matches, however we need to check if it is repeating - if (checkRepeatedMatch(strsrch, start, *textoffset) || - !isBreakUnit(strsrch, start, *textoffset) || - !checkIdentical(strsrch, start, *textoffset)) { - (*textoffset) ++; - *textoffset = getNextBaseOffset(strsrch->search->text, *textoffset, - strsrch->search->textLength); - return FALSE; - } - - strsrch->search->matchedIndex = start; - strsrch->search->matchedLength = *textoffset - start; - return TRUE; -} - -/** -* Shifting the collation element iterator position forward to prepare for -* a preceding match. If the first character is a unsafe character, we'll only -* shift by 1 to capture contractions, normalization etc. -* Internal method, status assumed to be success, caller has to check status -* before calling this method. -* @param text strsrch string search data -* @param textoffset start text position to do search -* @param ce the text ce which failed the match. -* @param patternceindex index of the ce within the pattern ce buffer which -* failed the match -* @return final offset -*/ -static -inline int32_t reverseShift(UStringSearch *strsrch, - int32_t textoffset, - int32_t ce, - int32_t patternceindex) -{ - if (strsrch->search->isOverlap) { - if (textoffset != strsrch->search->textLength) { - textoffset --; - } - else { - textoffset -= strsrch->pattern.defaultShiftSize; - } - } - else { - if (ce != UCOL_NULLORDER) { - int32_t shift = strsrch->pattern.backShift[hashFromCE32(ce)]; - - // this is to adjust for characters in the middle of the substring - // for matching that failed. - int32_t adjust = patternceindex; - if (adjust > 1 && shift > adjust) { - shift -= adjust - 1; - } - textoffset -= shift; - } - else { - textoffset -= strsrch->pattern.defaultShiftSize; - } - } - textoffset = getPreviousUStringSearchBaseOffset(strsrch, textoffset); - return textoffset; -} - -/** -* Checks match for contraction. -* If the match starts with a partial contraction we fail. -* Internal method, status assumed to be success, caller has to check status -* before calling this method. -* @param strsrch string search data -* @param start offset of potential match, to be modified if necessary -* @param end offset of potential match, to be modified if necessary -* @param status output error status if any -* @return TRUE if match passes the contraction test, FALSE otherwise -*/ -static -UBool checkPreviousExactContractionMatch(UStringSearch *strsrch, - int32_t *start, - int32_t *end, UErrorCode *status) -{ - UCollationElements *coleiter = strsrch->textIter; - int32_t textlength = strsrch->search->textLength; - int32_t temp = *end; - const UCollator *collator = strsrch->collator; - const UChar *text = strsrch->search->text; - // This part checks if either if the start of the match contains potential - // contraction. If so we'll have to iterate through them - // Since we used ucol_next while previously looking for the potential - // match, this guarantees that our end will not be a partial contraction, - // or a partial supplementary character. - if (*start < textlength && ucol_unsafeCP(text[*start], collator)) { - int32_t expansion = getExpansionSuffix(coleiter); - UBool expandflag = expansion > 0; - setColEIterOffset(coleiter, *end); - while (U_SUCCESS(*status) && expansion > 0) { - // getting rid of the redundant ce - // since forward contraction/expansion may have extra ces - // if we are in the normalization buffer, hasAccentsBeforeMatch - // would have taken care of it. - // E.g. the character \u01FA will have an expansion of 3, but if - // we are only looking for A ring A\u030A, we'll have to skip the - // last ce in the expansion buffer - ucol_previous(coleiter, status); - if (U_FAILURE(*status)) { - return FALSE; - } - if (ucol_getOffset(coleiter) != temp) { - *end = temp; - temp = ucol_getOffset(coleiter); - } - expansion --; - } - - int32_t *patternce = strsrch->pattern.ces; - int32_t patterncelength = strsrch->pattern.cesLength; - int32_t count = patterncelength; - while (count > 0) { - int32_t ce = getCE(strsrch, ucol_previous(coleiter, status)); - // status checked below, note that if status is a failure - // ucol_previous returns UCOL_NULLORDER - if (ce == UCOL_IGNORABLE) { - continue; - } - if (expandflag && count == 0 && - getColElemIterOffset(coleiter, FALSE) != temp) { - *end = temp; - temp = ucol_getOffset(coleiter); - } - if (U_FAILURE(*status) || ce != patternce[count - 1]) { - (*start) --; - *start = getPreviousBaseOffset(text, *start); - return FALSE; - } - count --; - } - } - return TRUE; -} - -/** -* Checks and sets the match information if found. -* Checks -* <ul> -* <li> the current match does not repeat the last match -* <li> boundaries are correct -* <li> exact matches has no extra accents -* <li> identical matches -* <\ul> -* Otherwise the offset will be shifted to the preceding character. -* Internal method, status assumed to be success, caller has to check status -* before calling this method. -* @param strsrch string search data -* @param collator -* @param coleiter collation element iterator -* @param text string -* @param textoffset offset in the collation element text. the returned value -* will be the truncated start offset of the match or the new start -* search offset. -* @param status output error status if any -* @return TRUE if the match is valid, FALSE otherwise -*/ -static -inline UBool checkPreviousExactMatch(UStringSearch *strsrch, - int32_t *textoffset, - UErrorCode *status) -{ - // to ensure that the start and ends are not composite characters - int32_t end = ucol_getOffset(strsrch->textIter); - if (!checkPreviousExactContractionMatch(strsrch, textoffset, &end, status) - || U_FAILURE(*status)) { - return FALSE; - } - - // this totally matches, however we need to check if it is repeating - // the old match - if (checkRepeatedMatch(strsrch, *textoffset, end) || - !isBreakUnit(strsrch, *textoffset, end) || - hasAccentsBeforeMatch(strsrch, *textoffset, end) || - !checkIdentical(strsrch, *textoffset, end) || - hasAccentsAfterMatch(strsrch, *textoffset, end)) { - (*textoffset) --; - *textoffset = getPreviousBaseOffset(strsrch->search->text, - *textoffset); - return FALSE; - } - - //Add breakiterator boundary check for primary strength search. - if (!strsrch->search->breakIter && strsrch->strength == UCOL_PRIMARY) { - checkBreakBoundary(strsrch, textoffset, &end); - } - - strsrch->search->matchedIndex = *textoffset; - strsrch->search->matchedLength = end - *textoffset; - return TRUE; -} - -/** -* Rearranges the end accents to try matching. -* Suffix accents in the text will be grouped according to their combining -* class and the groups will be mixed and matched to try find the perfect -* match with the pattern. -* So for instance looking for "\u0301" in "\u030A\u0301\u0325" -* step 1: split "\u030A\u0301" into 6 other type of potential accent substrings -* "\u030A", "\u0301", "\u0325", "\u030A\u0301", "\u030A\u0325", -* "\u0301\u0325". -* step 2: check if any of the generated substrings matches the pattern. -* Internal method, status assumed to be success, user has to check status -* before calling this method. -* @param strsrch string search match -* @param start offset of the first base character -* @param end start of the last accent set -* @param status only error status if any -* @return USEARCH_DONE if a match is not found, otherwise return the ending -* offset of the match. Note this start includes all following accents. -*/ -static -int32_t doPreviousCanonicalSuffixMatch(UStringSearch *strsrch, - int32_t start, - int32_t end, - UErrorCode *status) -{ - const UChar *text = strsrch->search->text; - int32_t tempend = end; - - U16_BACK_1(text, 0, tempend); - if (!(getFCD(text, &tempend, strsrch->search->textLength) & - LAST_BYTE_MASK_)) { - // die... failed at a base character - return USEARCH_DONE; - } - end = getNextBaseOffset(text, end, strsrch->search->textLength); - - if (U_SUCCESS(*status)) { - UChar accents[INITIAL_ARRAY_SIZE_]; - int32_t offset = getPreviousBaseOffset(text, end); - // normalizing the offensive string - unorm_normalize(text + offset, end - offset, UNORM_NFD, 0, accents, - INITIAL_ARRAY_SIZE_, status); - - int32_t accentsindex[INITIAL_ARRAY_SIZE_]; - int32_t accentsize = getUnblockedAccentIndex(accents, - accentsindex); - int32_t count = (2 << (accentsize - 1)) - 1; - UChar buffer[INITIAL_ARRAY_SIZE_]; - UCollationElements *coleiter = strsrch->utilIter; - while (U_SUCCESS(*status) && count > 0) { - UChar *rearrange = strsrch->canonicalSuffixAccents; - // copy the base characters - for (int k = 0; k < accentsindex[0]; k ++) { - *rearrange ++ = accents[k]; - } - // forming all possible canonical rearrangement by dropping - // sets of accents - for (int i = 0; i <= accentsize - 1; i ++) { - int32_t mask = 1 << (accentsize - i - 1); - if (count & mask) { - for (int j = accentsindex[i]; j < accentsindex[i + 1]; j ++) { - *rearrange ++ = accents[j]; - } - } - } - *rearrange = 0; - int32_t matchsize = INITIAL_ARRAY_SIZE_; - UChar *match = addToUCharArray(buffer, &matchsize, - strsrch->canonicalPrefixAccents, - strsrch->search->text + start, - offset - start, - strsrch->canonicalSuffixAccents, - status); - - // run the collator iterator through this match - // if status is a failure ucol_setText does nothing - ucol_setText(coleiter, match, matchsize, status); - if (U_SUCCESS(*status)) { - if (checkCollationMatch(strsrch, coleiter)) { - if (match != buffer) { - uprv_free(match); - } - return end; - } - } - count --; - } - } - return USEARCH_DONE; -} - -/** -* Take the rearranged start accents and tries matching. If match failed at -* a separate following set of accents (separated from the rearranged on by -* at least a base character) then we rearrange the preceding accents and -* tries matching again. -* We allow skipping of the ends of the accent set if the ces do not match. -* However if the failure is found before the accent set, it fails. -* Internal method, status assumed to be success, caller has to check status -* before calling this method. -* @param strsrch string search data -* @param textoffset of the ends of the rearranged accent -* @param status output error status if any -* @return USEARCH_DONE if a match is not found, otherwise return the ending -* offset of the match. Note this start includes all following accents. -*/ -static -int32_t doPreviousCanonicalPrefixMatch(UStringSearch *strsrch, - int32_t textoffset, - UErrorCode *status) -{ - const UChar *text = strsrch->search->text; - const UCollator *collator = strsrch->collator; - int32_t safelength = 0; - UChar *safetext; - int32_t safetextlength; - UChar safebuffer[INITIAL_ARRAY_SIZE_]; - int32_t safeoffset = textoffset; - - if (textoffset && - ucol_unsafeCP(strsrch->canonicalPrefixAccents[ - u_strlen(strsrch->canonicalPrefixAccents) - 1 - ], collator)) { - safeoffset = getNextSafeOffset(collator, text, textoffset, - strsrch->search->textLength); - safelength = safeoffset - textoffset; - safetextlength = INITIAL_ARRAY_SIZE_; - safetext = addToUCharArray(safebuffer, &safetextlength, - strsrch->canonicalPrefixAccents, - text + textoffset, safelength, - NULL, status); - } - else { - safetextlength = u_strlen(strsrch->canonicalPrefixAccents); - safetext = strsrch->canonicalPrefixAccents; - } - - UCollationElements *coleiter = strsrch->utilIter; - // if status is a failure, ucol_setText does nothing - ucol_setText(coleiter, safetext, safetextlength, status); - // status checked in loop below - - int32_t *ce = strsrch->pattern.ces; - int32_t celength = strsrch->pattern.cesLength; - int ceindex = 0; - UBool isSafe = TRUE; // safe zone indication flag for position - int32_t prefixlength = u_strlen(strsrch->canonicalPrefixAccents); - - while (ceindex < celength) { - int32_t textce = ucol_next(coleiter, status); - if (U_FAILURE(*status)) { - if (isSafe) { - cleanUpSafeText(strsrch, safetext, safebuffer); - } - return USEARCH_DONE; - } - if (textce == UCOL_NULLORDER) { - // check if we have passed the safe buffer - if (coleiter == strsrch->textIter) { - cleanUpSafeText(strsrch, safetext, safebuffer); - return USEARCH_DONE; - } - cleanUpSafeText(strsrch, safetext, safebuffer); - safetext = safebuffer; - coleiter = strsrch->textIter; - setColEIterOffset(coleiter, safeoffset); - // status checked at the start of the loop - isSafe = FALSE; - continue; - } - textce = getCE(strsrch, textce); - if (textce != UCOL_IGNORABLE && textce != ce[ceindex]) { - // do the beginning stuff - int32_t failedoffset = ucol_getOffset(coleiter); - if (isSafe && failedoffset <= prefixlength) { - // alas... no hope. failed at rearranged accent set - cleanUpSafeText(strsrch, safetext, safebuffer); - return USEARCH_DONE; - } - else { - if (isSafe) { - failedoffset = safeoffset - failedoffset; - cleanUpSafeText(strsrch, safetext, safebuffer); - } - - // try rearranging the end accents - int32_t result = doPreviousCanonicalSuffixMatch(strsrch, - textoffset, failedoffset, status); - if (result != USEARCH_DONE) { - // if status is a failure, ucol_setOffset does nothing - setColEIterOffset(strsrch->textIter, result); - } - if (U_FAILURE(*status)) { - return USEARCH_DONE; - } - return result; - } - } - if (textce == ce[ceindex]) { - ceindex ++; - } - } - // set offset here - if (isSafe) { - int32_t result = ucol_getOffset(coleiter); - // sets the text iterator here with the correct expansion and offset - int32_t leftoverces = getExpansionSuffix(coleiter); - cleanUpSafeText(strsrch, safetext, safebuffer); - if (result <= prefixlength) { - result = textoffset; - } - else { - result = textoffset + (safeoffset - result); - } - setColEIterOffset(strsrch->textIter, result); - setExpansionSuffix(strsrch->textIter, leftoverces); - return result; - } - - return ucol_getOffset(coleiter); -} - -/** -* Trying out the substring and sees if it can be a canonical match. -* This will try normalizing the starting accents and arranging them into -* canonical equivalents and check their corresponding ces with the pattern ce. -* Prefix accents in the text will be grouped according to their combining -* class and the groups will be mixed and matched to try find the perfect -* match with the pattern. -* So for instance looking for "\u0301" in "\u030A\u0301\u0325" -* step 1: split "\u030A\u0301" into 6 other type of potential accent substrings -* "\u030A", "\u0301", "\u0325", "\u030A\u0301", "\u030A\u0325", -* "\u0301\u0325". -* step 2: check if any of the generated substrings matches the pattern. -* Internal method, status assumed to be success, caller has to check status -* before calling this method. -* @param strsrch string search data -* @param textoffset start offset in the collation element text that starts -* with the accents to be rearranged -* @param status output error status if any -* @return TRUE if the match is valid, FALSE otherwise -*/ -static -UBool doPreviousCanonicalMatch(UStringSearch *strsrch, - int32_t textoffset, - UErrorCode *status) -{ - const UChar *text = strsrch->search->text; - int32_t temp = textoffset; - int32_t textlength = strsrch->search->textLength; - if ((getFCD(text, &temp, textlength) >> SECOND_LAST_BYTE_SHIFT_) == 0) { - UCollationElements *coleiter = strsrch->textIter; - int32_t offset = ucol_getOffset(coleiter); - if (strsrch->pattern.hasSuffixAccents) { - offset = doPreviousCanonicalSuffixMatch(strsrch, textoffset, - offset, status); - if (U_SUCCESS(*status) && offset != USEARCH_DONE) { - setColEIterOffset(coleiter, offset); - return TRUE; - } - } - return FALSE; - } - - if (!strsrch->pattern.hasPrefixAccents) { - return FALSE; - } - - UChar accents[INITIAL_ARRAY_SIZE_]; - // offset to the last base character in substring to search - int32_t baseoffset = getNextBaseOffset(text, textoffset, textlength); - // normalizing the offensive string - unorm_normalize(text + textoffset, baseoffset - textoffset, UNORM_NFD, - 0, accents, INITIAL_ARRAY_SIZE_, status); - // status checked in loop - - int32_t accentsindex[INITIAL_ARRAY_SIZE_]; - int32_t size = getUnblockedAccentIndex(accents, accentsindex); - - // 2 power n - 1 plus the full set of accents - int32_t count = (2 << (size - 1)) - 1; - while (U_SUCCESS(*status) && count > 0) { - UChar *rearrange = strsrch->canonicalPrefixAccents; - // copy the base characters - for (int k = 0; k < accentsindex[0]; k ++) { - *rearrange ++ = accents[k]; - } - // forming all possible canonical rearrangement by dropping - // sets of accents - for (int i = 0; i <= size - 1; i ++) { - int32_t mask = 1 << (size - i - 1); - if (count & mask) { - for (int j = accentsindex[i]; j < accentsindex[i + 1]; j ++) { - *rearrange ++ = accents[j]; - } - } - } - *rearrange = 0; - int32_t offset = doPreviousCanonicalPrefixMatch(strsrch, - baseoffset, status); - if (offset != USEARCH_DONE) { - return TRUE; // match found - } - count --; - } - return FALSE; -} - -/** -* Checks match for contraction. -* If the match starts with a partial contraction we fail. -* Internal method, status assumed to be success, caller has to check status -* before calling this method. -* @param strsrch string search data -* @param start offset of potential match, to be modified if necessary -* @param end offset of potential match, to be modified if necessary -* @param status only error status if any -* @return TRUE if match passes the contraction test, FALSE otherwise -*/ -static -UBool checkPreviousCanonicalContractionMatch(UStringSearch *strsrch, - int32_t *start, - int32_t *end, UErrorCode *status) -{ - UCollationElements *coleiter = strsrch->textIter; - int32_t textlength = strsrch->search->textLength; - int32_t temp = *end; - const UCollator *collator = strsrch->collator; - const UChar *text = strsrch->search->text; - // This part checks if either if the start of the match contains potential - // contraction. If so we'll have to iterate through them - // Since we used ucol_next while previously looking for the potential - // match, this guarantees that our end will not be a partial contraction, - // or a partial supplementary character. - if (*start < textlength && ucol_unsafeCP(text[*start], collator)) { - int32_t expansion = getExpansionSuffix(coleiter); - UBool expandflag = expansion > 0; - setColEIterOffset(coleiter, *end); - while (expansion > 0) { - // getting rid of the redundant ce - // since forward contraction/expansion may have extra ces - // if we are in the normalization buffer, hasAccentsBeforeMatch - // would have taken care of it. - // E.g. the character \u01FA will have an expansion of 3, but if - // we are only looking for A ring A\u030A, we'll have to skip the - // last ce in the expansion buffer - ucol_previous(coleiter, status); - if (U_FAILURE(*status)) { - return FALSE; - } - if (ucol_getOffset(coleiter) != temp) { - *end = temp; - temp = ucol_getOffset(coleiter); - } - expansion --; - } - - int32_t *patternce = strsrch->pattern.ces; - int32_t patterncelength = strsrch->pattern.cesLength; - int32_t count = patterncelength; - while (count > 0) { - int32_t ce = getCE(strsrch, ucol_previous(coleiter, status)); - // status checked below, note that if status is a failure - // ucol_previous returns UCOL_NULLORDER - if (ce == UCOL_IGNORABLE) { - continue; - } - if (expandflag && count == 0 && - getColElemIterOffset(coleiter, FALSE) != temp) { - *end = temp; - temp = ucol_getOffset(coleiter); - } - if (count == patterncelength && - ce != patternce[patterncelength - 1]) { - // accents may have extra starting ces, this occurs when a - // pure accent pattern is matched without rearrangement - int32_t expected = patternce[patterncelength - 1]; - U16_BACK_1(text, 0, *end); - if (getFCD(text, end, textlength) & LAST_BYTE_MASK_) { - ce = getCE(strsrch, ucol_previous(coleiter, status)); - while (U_SUCCESS(*status) && ce != expected && - ce != UCOL_NULLORDER && - ucol_getOffset(coleiter) <= *start) { - ce = getCE(strsrch, ucol_previous(coleiter, status)); - } - } - } - if (U_FAILURE(*status) || ce != patternce[count - 1]) { - (*start) --; - *start = getPreviousBaseOffset(text, *start); - return FALSE; - } - count --; - } - } - return TRUE; -} - -/** -* Checks and sets the match information if found. -* Checks -* <ul> -* <li> the potential match does not repeat the previous match -* <li> boundaries are correct -* <li> potential match does not end in the middle of a contraction -* <li> identical matches -* <\ul> -* Otherwise the offset will be shifted to the next character. -* Internal method, status assumed to be success, caller has to check status -* before calling this method. -* @param strsrch string search data -* @param textoffset offset in the collation element text. the returned value -* will be the truncated start offset of the match or the new start -* search offset. -* @param status only error status if any -* @return TRUE if the match is valid, FALSE otherwise -*/ -static -inline UBool checkPreviousCanonicalMatch(UStringSearch *strsrch, - int32_t *textoffset, - UErrorCode *status) -{ - // to ensure that the start and ends are not composite characters - UCollationElements *coleiter = strsrch->textIter; - // if we have a canonical accent match - if ((strsrch->pattern.hasSuffixAccents && - strsrch->canonicalSuffixAccents[0]) || - (strsrch->pattern.hasPrefixAccents && - strsrch->canonicalPrefixAccents[0])) { - strsrch->search->matchedIndex = *textoffset; - strsrch->search->matchedLength = - getNextUStringSearchBaseOffset(strsrch, - getColElemIterOffset(coleiter, FALSE)) - - *textoffset; - return TRUE; - } - - int32_t end = ucol_getOffset(coleiter); - if (!checkPreviousCanonicalContractionMatch(strsrch, textoffset, &end, - status) || - U_FAILURE(*status)) { - return FALSE; - } - - end = getNextUStringSearchBaseOffset(strsrch, end); - // this totally matches, however we need to check if it is repeating - if (checkRepeatedMatch(strsrch, *textoffset, end) || - !isBreakUnit(strsrch, *textoffset, end) || - !checkIdentical(strsrch, *textoffset, end)) { - (*textoffset) --; - *textoffset = getPreviousBaseOffset(strsrch->search->text, - *textoffset); - return FALSE; - } - - strsrch->search->matchedIndex = *textoffset; - strsrch->search->matchedLength = end - *textoffset; - return TRUE; -} -#endif // #if BOYER_MOORE - // constructors and destructor ------------------------------------------- U_CAPI UStringSearch * U_EXPORT2 usearch_open(const UChar *pattern, @@ -2599,12 +547,12 @@ U_CAPI UStringSearch * U_EXPORT2 usearch_open(const UChar *pattern, UErrorCode *status) { if (U_FAILURE(*status)) { - return NULL; + return nullptr; } #if UCONFIG_NO_BREAK_ITERATION - if (breakiter != NULL) { + if (breakiter != nullptr) { *status = U_UNSUPPORTED_ERROR; - return NULL; + return nullptr; } #endif if (locale) { @@ -2615,11 +563,11 @@ U_CAPI UStringSearch * U_EXPORT2 usearch_open(const UChar *pattern, patternlength, text, textlength, collator, breakiter, status); - if (result == NULL || U_FAILURE(*status)) { + if (result == nullptr || U_FAILURE(*status)) { if (collator) { ucol_close(collator); } - return NULL; + return nullptr; } else { result->ownCollator = TRUE; @@ -2627,7 +575,7 @@ U_CAPI UStringSearch * U_EXPORT2 usearch_open(const UChar *pattern, return result; } *status = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; + return nullptr; } U_CAPI UStringSearch * U_EXPORT2 usearch_openFromCollator( @@ -2640,29 +588,29 @@ U_CAPI UStringSearch * U_EXPORT2 usearch_openFromCollator( UErrorCode *status) { if (U_FAILURE(*status)) { - return NULL; + return nullptr; } #if UCONFIG_NO_BREAK_ITERATION - if (breakiter != NULL) { + if (breakiter != nullptr) { *status = U_UNSUPPORTED_ERROR; - return NULL; + return nullptr; } #endif - if (pattern == NULL || text == NULL || collator == NULL) { + if (pattern == nullptr || text == nullptr || collator == nullptr) { *status = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; + return nullptr; } // string search does not really work when numeric collation is turned on if(ucol_getAttribute(collator, UCOL_NUMERIC_COLLATION, status) == UCOL_ON) { *status = U_UNSUPPORTED_ERROR; - return NULL; + return nullptr; } if (U_SUCCESS(*status)) { initializeFCD(status); if (U_FAILURE(*status)) { - return NULL; + return nullptr; } UStringSearch *result; @@ -2674,13 +622,13 @@ U_CAPI UStringSearch * U_EXPORT2 usearch_openFromCollator( } if (textlength <= 0 || patternlength <= 0) { *status = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; + return nullptr; } result = (UStringSearch *)uprv_malloc(sizeof(UStringSearch)); - if (result == NULL) { + if (result == nullptr) { *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; + return nullptr; } result->collator = collator; @@ -2695,14 +643,14 @@ U_CAPI UStringSearch * U_EXPORT2 usearch_openFromCollator( if (U_FAILURE(*status)) { uprv_free(result); - return NULL; + return nullptr; } result->search = (USearch *)uprv_malloc(sizeof(USearch)); - if (result->search == NULL) { + if (result->search == nullptr) { *status = U_MEMORY_ALLOCATION_ERROR; uprv_free(result); - return NULL; + return nullptr; } result->search->text = text; @@ -2710,12 +658,12 @@ U_CAPI UStringSearch * U_EXPORT2 usearch_openFromCollator( result->pattern.text = pattern; result->pattern.textLength = patternlength; - result->pattern.ces = NULL; - result->pattern.pces = NULL; + result->pattern.ces = nullptr; + result->pattern.pces = nullptr; result->search->breakIter = breakiter; #if !UCONFIG_NO_BREAK_ITERATION - result->search->internalBreakIter = ubrk_open(UBRK_CHARACTER, ucol_getLocaleByType(result->collator, ULOC_VALID_LOCALE, status), text, textlength, status); + result->search->internalBreakIter = nullptr; // Lazily created. if (breakiter) { ubrk_setText(breakiter, text, textlength, status); } @@ -2724,13 +672,13 @@ U_CAPI UStringSearch * U_EXPORT2 usearch_openFromCollator( result->ownCollator = FALSE; result->search->matchedLength = 0; result->search->matchedIndex = USEARCH_DONE; - result->utilIter = NULL; + result->utilIter = nullptr; result->textIter = ucol_openElements(collator, text, textlength, status); - result->textProcessedIter = NULL; + result->textProcessedIter = nullptr; if (U_FAILURE(*status)) { usearch_close(result); - return NULL; + return nullptr; } result->search->isOverlap = FALSE; @@ -2743,12 +691,12 @@ U_CAPI UStringSearch * U_EXPORT2 usearch_openFromCollator( if (U_FAILURE(*status)) { usearch_close(result); - return NULL; + return nullptr; } return result; } - return NULL; + return nullptr; } U_CAPI void U_EXPORT2 usearch_close(UStringSearch *strsrch) @@ -2759,7 +707,7 @@ U_CAPI void U_EXPORT2 usearch_close(UStringSearch *strsrch) uprv_free(strsrch->pattern.ces); } - if (strsrch->pattern.pces != NULL && + if (strsrch->pattern.pces != nullptr && strsrch->pattern.pces != strsrch->pattern.pcesBuffer) { uprv_free(strsrch->pattern.pces); } @@ -2773,7 +721,7 @@ U_CAPI void U_EXPORT2 usearch_close(UStringSearch *strsrch) } #if !UCONFIG_NO_BREAK_ITERATION - if (strsrch->search->internalBreakIter) { + if (strsrch->search->internalBreakIter != nullptr) { ubrk_close(strsrch->search->internalBreakIter); } #endif @@ -2787,9 +735,9 @@ namespace { UBool initTextProcessedIter(UStringSearch *strsrch, UErrorCode *status) { if (U_FAILURE(*status)) { return FALSE; } - if (strsrch->textProcessedIter == NULL) { + if (strsrch->textProcessedIter == nullptr) { strsrch->textProcessedIter = new icu::UCollationPCE(strsrch->textIter); - if (strsrch->textProcessedIter == NULL) { + if (strsrch->textProcessedIter == nullptr) { *status = U_MEMORY_ALLOCATION_ERROR; return FALSE; } @@ -2804,7 +752,7 @@ UBool initTextProcessedIter(UStringSearch *strsrch, UErrorCode *status) { // set and get methods -------------------------------------------------- U_CAPI void U_EXPORT2 usearch_setOffset(UStringSearch *strsrch, - int32_t position, + int32_t position, UErrorCode *status) { if (U_SUCCESS(*status) && strsrch) { @@ -2812,7 +760,7 @@ U_CAPI void U_EXPORT2 usearch_setOffset(UStringSearch *strsrch, *status = U_INDEX_OUTOFBOUNDS_ERROR; } else { - setColEIterOffset(strsrch->textIter, position); + setColEIterOffset(strsrch->textIter, position, *status); } strsrch->search->matchedIndex = USEARCH_DONE; strsrch->search->matchedLength = 0; @@ -2832,10 +780,10 @@ U_CAPI int32_t U_EXPORT2 usearch_getOffset(const UStringSearch *strsrch) return USEARCH_DONE; } -U_CAPI void U_EXPORT2 usearch_setAttribute(UStringSearch *strsrch, - USearchAttribute attribute, - USearchAttributeValue value, - UErrorCode *status) +U_CAPI void U_EXPORT2 usearch_setAttribute(UStringSearch *strsrch, + USearchAttribute attribute, + USearchAttributeValue value, + UErrorCode *status) { if (U_SUCCESS(*status) && strsrch) { switch (attribute) @@ -2895,7 +843,7 @@ U_CAPI USearchAttributeValue U_EXPORT2 usearch_getAttribute( U_CAPI int32_t U_EXPORT2 usearch_getMatchedStart( const UStringSearch *strsrch) { - if (strsrch == NULL) { + if (strsrch == nullptr) { return USEARCH_DONE; } return strsrch->search->matchedIndex; @@ -2910,8 +858,8 @@ U_CAPI int32_t U_EXPORT2 usearch_getMatchedText(const UStringSearch *strsrch, if (U_FAILURE(*status)) { return USEARCH_DONE; } - if (strsrch == NULL || resultCapacity < 0 || (resultCapacity > 0 && - result == NULL)) { + if (strsrch == nullptr || resultCapacity < 0 || (resultCapacity > 0 && + result == nullptr)) { *status = U_ILLEGAL_ARGUMENT_ERROR; return USEARCH_DONE; } @@ -2964,7 +912,7 @@ usearch_getBreakIterator(const UStringSearch *strsrch) if (strsrch) { return strsrch->search->breakIter; } - return NULL; + return nullptr; } #endif @@ -2975,7 +923,7 @@ U_CAPI void U_EXPORT2 usearch_setText( UStringSearch *strsrch, UErrorCode *status) { if (U_SUCCESS(*status)) { - if (strsrch == NULL || text == NULL || textlength < -1 || + if (strsrch == nullptr || text == nullptr || textlength < -1 || textlength == 0) { *status = U_ILLEGAL_ARGUMENT_ERROR; } @@ -2990,11 +938,13 @@ U_CAPI void U_EXPORT2 usearch_setText( UStringSearch *strsrch, strsrch->search->matchedLength = 0; strsrch->search->reset = TRUE; #if !UCONFIG_NO_BREAK_ITERATION - if (strsrch->search->breakIter != NULL) { + if (strsrch->search->breakIter != nullptr) { ubrk_setText(strsrch->search->breakIter, text, textlength, status); } - ubrk_setText(strsrch->search->internalBreakIter, text, textlength, status); + if (strsrch->search->internalBreakIter != nullptr) { + ubrk_setText(strsrch->search->internalBreakIter, text, textlength, status); + } #endif } } @@ -3007,7 +957,7 @@ U_CAPI const UChar * U_EXPORT2 usearch_getText(const UStringSearch *strsrch, *length = strsrch->search->textLength; return strsrch->search->text; } - return NULL; + return nullptr; } U_CAPI void U_EXPORT2 usearch_setCollator( UStringSearch *strsrch, @@ -3015,17 +965,17 @@ U_CAPI void U_EXPORT2 usearch_setCollator( UStringSearch *strsrch, UErrorCode *status) { if (U_SUCCESS(*status)) { - if (collator == NULL) { + if (collator == nullptr) { *status = U_ILLEGAL_ARGUMENT_ERROR; return; } if (strsrch) { delete strsrch->textProcessedIter; - strsrch->textProcessedIter = NULL; + strsrch->textProcessedIter = nullptr; ucol_closeElements(strsrch->textIter); ucol_closeElements(strsrch->utilIter); - strsrch->textIter = strsrch->utilIter = NULL; + strsrch->textIter = strsrch->utilIter = nullptr; if (strsrch->ownCollator && (strsrch->collator != collator)) { ucol_close((UCollator *)strsrch->collator); strsrch->ownCollator = FALSE; @@ -3034,9 +984,10 @@ U_CAPI void U_EXPORT2 usearch_setCollator( UStringSearch *strsrch, strsrch->strength = ucol_getStrength(collator); strsrch->ceMask = getMask(strsrch->strength); #if !UCONFIG_NO_BREAK_ITERATION - ubrk_close(strsrch->search->internalBreakIter); - strsrch->search->internalBreakIter = ubrk_open(UBRK_CHARACTER, ucol_getLocaleByType(collator, ULOC_VALID_LOCALE, status), - strsrch->search->text, strsrch->search->textLength, status); + if (strsrch->search->internalBreakIter != nullptr) { + ubrk_close(strsrch->search->internalBreakIter); + strsrch->search->internalBreakIter = nullptr; // Lazily created. + } #endif // if status is a failure, ucol_getAttribute returns UCOL_DEFAULT strsrch->toShift = @@ -3069,7 +1020,7 @@ U_CAPI UCollator * U_EXPORT2 usearch_getCollator(const UStringSearch *strsrch) if (strsrch) { return (UCollator *)strsrch->collator; } - return NULL; + return nullptr; } U_CAPI void U_EXPORT2 usearch_setPattern( UStringSearch *strsrch, @@ -3078,7 +1029,7 @@ U_CAPI void U_EXPORT2 usearch_setPattern( UStringSearch *strsrch, UErrorCode *status) { if (U_SUCCESS(*status)) { - if (strsrch == NULL || pattern == NULL) { + if (strsrch == nullptr || pattern == nullptr) { *status = U_ILLEGAL_ARGUMENT_ERROR; } else { @@ -3098,19 +1049,19 @@ U_CAPI void U_EXPORT2 usearch_setPattern( UStringSearch *strsrch, U_CAPI const UChar* U_EXPORT2 usearch_getPattern(const UStringSearch *strsrch, - int32_t *length) + int32_t *length) { if (strsrch) { *length = strsrch->pattern.textLength; return strsrch->pattern.text; } - return NULL; + return nullptr; } -// miscellanous methods -------------------------------------------------- +// miscellaneous methods -------------------------------------------------- U_CAPI int32_t U_EXPORT2 usearch_first(UStringSearch *strsrch, - UErrorCode *status) + UErrorCode *status) { if (strsrch && U_SUCCESS(*status)) { strsrch->search->isForwardSearching = TRUE; @@ -3123,8 +1074,8 @@ U_CAPI int32_t U_EXPORT2 usearch_first(UStringSearch *strsrch, } U_CAPI int32_t U_EXPORT2 usearch_following(UStringSearch *strsrch, - int32_t position, - UErrorCode *status) + int32_t position, + UErrorCode *status) { if (strsrch && U_SUCCESS(*status)) { strsrch->search->isForwardSearching = TRUE; @@ -3138,7 +1089,7 @@ U_CAPI int32_t U_EXPORT2 usearch_following(UStringSearch *strsrch, } U_CAPI int32_t U_EXPORT2 usearch_last(UStringSearch *strsrch, - UErrorCode *status) + UErrorCode *status) { if (strsrch && U_SUCCESS(*status)) { strsrch->search->isForwardSearching = FALSE; @@ -3151,8 +1102,8 @@ U_CAPI int32_t U_EXPORT2 usearch_last(UStringSearch *strsrch, } U_CAPI int32_t U_EXPORT2 usearch_preceding(UStringSearch *strsrch, - int32_t position, - UErrorCode *status) + int32_t position, + UErrorCode *status) { if (strsrch && U_SUCCESS(*status)) { strsrch->search->isForwardSearching = FALSE; @@ -3188,7 +1139,7 @@ U_CAPI int32_t U_EXPORT2 usearch_preceding(UStringSearch *strsrch, * element iterator before using this method. */ U_CAPI int32_t U_EXPORT2 usearch_next(UStringSearch *strsrch, - UErrorCode *status) + UErrorCode *status) { if (U_SUCCESS(*status) && strsrch) { // note offset is either equivalent to the start of the previous match @@ -3198,26 +1149,14 @@ U_CAPI int32_t U_EXPORT2 usearch_next(UStringSearch *strsrch, search->reset = FALSE; int32_t textlength = search->textLength; if (search->isForwardSearching) { -#if BOYER_MOORE - if (offset == textlength - || (!search->isOverlap && - (offset + strsrch->pattern.defaultShiftSize > textlength || - (search->matchedIndex != USEARCH_DONE && - offset + search->matchedLength >= textlength)))) { - // not enough characters to match - setMatchNotFound(strsrch); - return USEARCH_DONE; - } -#else if (offset == textlength || (! search->isOverlap && (search->matchedIndex != USEARCH_DONE && offset + search->matchedLength > textlength))) { // not enough characters to match - setMatchNotFound(strsrch); + setMatchNotFound(strsrch, *status); return USEARCH_DONE; } -#endif } else { // switching direction. @@ -3243,7 +1182,7 @@ U_CAPI int32_t U_EXPORT2 usearch_next(UStringSearch *strsrch, } search->matchedLength = 0; - setColEIterOffset(strsrch->textIter, search->matchedIndex); + setColEIterOffset(strsrch->textIter, search->matchedIndex, *status); // status checked below if (search->matchedIndex == textlength) { search->matchedIndex = USEARCH_DONE; @@ -3262,7 +1201,7 @@ U_CAPI int32_t U_EXPORT2 usearch_next(UStringSearch *strsrch, } else { // for boundary check purposes. this will ensure that the - // next match will not preceed the current offset + // next match will not precede the current offset // note search->matchedIndex will always be set to something // in the code search->matchedIndex = offset - 1; @@ -3281,13 +1220,11 @@ U_CAPI int32_t U_EXPORT2 usearch_next(UStringSearch *strsrch, return USEARCH_DONE; } -#if !BOYER_MOORE if (search->matchedIndex == USEARCH_DONE) { ucol_setOffset(strsrch->textIter, search->textLength, status); } else { ucol_setOffset(strsrch->textIter, search->matchedIndex, status); } -#endif return search->matchedIndex; } @@ -3296,7 +1233,7 @@ U_CAPI int32_t U_EXPORT2 usearch_next(UStringSearch *strsrch, } U_CAPI int32_t U_EXPORT2 usearch_previous(UStringSearch *strsrch, - UErrorCode *status) + UErrorCode *status) { if (U_SUCCESS(*status) && strsrch) { int32_t offset; @@ -3305,7 +1242,7 @@ U_CAPI int32_t U_EXPORT2 usearch_previous(UStringSearch *strsrch, offset = search->textLength; search->isForwardSearching = FALSE; search->reset = FALSE; - setColEIterOffset(strsrch->textIter, offset); + setColEIterOffset(strsrch->textIter, offset, *status); } else { offset = usearch_getOffset(strsrch); @@ -3324,24 +1261,13 @@ U_CAPI int32_t U_EXPORT2 usearch_previous(UStringSearch *strsrch, } } else { -#if BOYER_MOORE - if (offset == 0 || matchedindex == 0 || - (!search->isOverlap && - (offset < strsrch->pattern.defaultShiftSize || - (matchedindex != USEARCH_DONE && - matchedindex < strsrch->pattern.defaultShiftSize)))) { - // not enough characters to match - setMatchNotFound(strsrch); - return USEARCH_DONE; - } -#else + // Could check pattern length, but the // linear search will do the right thing if (offset == 0 || matchedindex == 0) { - setMatchNotFound(strsrch); + setMatchNotFound(strsrch, *status); return USEARCH_DONE; } -#endif } if (U_SUCCESS(*status)) { @@ -3349,12 +1275,12 @@ U_CAPI int32_t U_EXPORT2 usearch_previous(UStringSearch *strsrch, search->matchedIndex = (matchedindex == USEARCH_DONE ? offset : matchedindex); if (search->matchedIndex == 0) { - setMatchNotFound(strsrch); + setMatchNotFound(strsrch, *status); // status checked below } else { // move by codepoints U16_BACK_1(search->text, 0, search->matchedIndex); - setColEIterOffset(strsrch->textIter, search->matchedIndex); + setColEIterOffset(strsrch->textIter, search->matchedIndex, *status); // status checked below search->matchedLength = 0; } @@ -3509,7 +1435,7 @@ CEIBuffer::CEIBuffer(UStringSearch *ss, UErrorCode *status) { if (bufSize>DEFAULT_CEBUFFER_SIZE) { buf = (CEI *)uprv_malloc(bufSize * sizeof(CEI)); - if (buf == NULL) { + if (buf == nullptr) { *status = U_MEMORY_ALLOCATION_ERROR; } } @@ -3544,12 +1470,13 @@ const CEI *CEIBuffer::get(int32_t index) { // Verify that it is the next one in sequence, which is all // that is allowed. if (index != limitIx) { - U_ASSERT(FALSE); - // TODO: In ICU 64 the above assert was changed to use UPRV_UNREACHABLE instead - // which unconditionally calls abort(). However, there were cases where this was - // being hit. This change is reverted for now, restoring the existing behavior. + UPRV_UNREACHABLE_ASSERT; + // TODO: In ICU 64 the above was changed from U_ASSERT to UPRV_UNREACHABLE, + // which unconditionally called abort(). However, there were cases in which it + // was being hit, so it was changed back to U_ASSERT per ICU-20680. In ICU 70, + // we now use the new UPRV_UNREACHABLE_ASSERT to better indicate the situation. // ICU-20792 tracks the follow-up work/further investigation on this. - return NULL; + return nullptr; } // Manage the circular CE buffer indexing @@ -3586,12 +1513,13 @@ const CEI *CEIBuffer::getPrevious(int32_t index) { // Verify that it is the next one in sequence, which is all // that is allowed. if (index != limitIx) { - U_ASSERT(FALSE); - // TODO: In ICU 64 the above assert was changed to use UPRV_UNREACHABLE instead - // which unconditionally calls abort(). However, there were cases where this was - // being hit. This change is reverted for now, restoring the existing behavior. + UPRV_UNREACHABLE_ASSERT; + // TODO: In ICU 64 the above was changed from U_ASSERT to UPRV_UNREACHABLE, + // which unconditionally called abort(). However, there were cases in which it + // was being hit, so it was changed back to U_ASSERT per ICU-20680. In ICU 70, + // we now use the new UPRV_UNREACHABLE_ASSERT to better indicate the situation. // ICU-20792 tracks the follow-up work/further investigation on this. - return NULL; + return nullptr; } // Manage the circular CE buffer indexing @@ -3626,7 +1554,10 @@ U_NAMESPACE_END * has an external break iterator, use that. Otherwise use the internal character * break iterator. */ -static int32_t nextBoundaryAfter(UStringSearch *strsrch, int32_t startIndex) { +static int32_t nextBoundaryAfter(UStringSearch *strsrch, int32_t startIndex, UErrorCode &status) { + if (U_FAILURE(status)) { + return startIndex; + } #if 0 const UChar *text = strsrch->search->text; int32_t textLen = strsrch->search->textLength; @@ -3665,17 +1596,12 @@ static int32_t nextBoundaryAfter(UStringSearch *strsrch, int32_t startIndex) { } return indexOfLastCharChecked; #elif !UCONFIG_NO_BREAK_ITERATION - UBreakIterator *breakiterator = strsrch->search->breakIter; - - if (breakiterator == NULL) { - breakiterator = strsrch->search->internalBreakIter; - } - - if (breakiterator != NULL) { - return ubrk_following(breakiterator, startIndex); + UBreakIterator *breakiterator = getBreakIterator(strsrch, status); + if (U_FAILURE(status)) { + return startIndex; } - return startIndex; + return ubrk_following(breakiterator, startIndex); #else // **** or should we use the original code? **** return startIndex; @@ -3688,7 +1614,10 @@ static int32_t nextBoundaryAfter(UStringSearch *strsrch, int32_t startIndex) { * has an external break iterator, test using that, otherwise test * using the internal character break iterator. */ -static UBool isBreakBoundary(UStringSearch *strsrch, int32_t index) { +static UBool isBreakBoundary(UStringSearch *strsrch, int32_t index, UErrorCode &status) { + if (U_FAILURE(status)) { + return TRUE; + } #if 0 const UChar *text = strsrch->search->text; int32_t textLen = strsrch->search->textLength; @@ -3716,13 +1645,12 @@ static UBool isBreakBoundary(UStringSearch *strsrch, int32_t index) { UBool combining = !(gcProperty==U_GCB_CONTROL || gcProperty==U_GCB_LF || gcProperty==U_GCB_CR); return !combining; #elif !UCONFIG_NO_BREAK_ITERATION - UBreakIterator *breakiterator = strsrch->search->breakIter; - - if (breakiterator == NULL) { - breakiterator = strsrch->search->internalBreakIter; + UBreakIterator *breakiterator = getBreakIterator(strsrch, status); + if (U_FAILURE(status)) { + return TRUE; } - return (breakiterator != NULL && ubrk_isBoundary(breakiterator, index)); + return ubrk_isBoundary(breakiterator, index); #else // **** or use the original code? **** return TRUE; @@ -3730,12 +1658,15 @@ static UBool isBreakBoundary(UStringSearch *strsrch, int32_t index) { } #if 0 -static UBool onBreakBoundaries(const UStringSearch *strsrch, int32_t start, int32_t end) +static UBool onBreakBoundaries(const UStringSearch *strsrch, int32_t start, int32_t end, UErrorCode &status) { -#if !UCONFIG_NO_BREAK_ITERATION - UBreakIterator *breakiterator = strsrch->search->breakIter; + if (U_FAILURE(status)) { + return TRUE; + } - if (breakiterator != NULL) { +#if !UCONFIG_NO_BREAK_ITERATION + UBreakIterator *breakiterator = getBreakIterator(strsrch, status); + if (U_SUCCESS(status)) { int32_t startindex = ubrk_first(breakiterator); int32_t endindex = ubrk_last(breakiterator); @@ -3813,10 +1744,6 @@ static UCompareCEsResult compareCE64s(int64_t targCE, int64_t patCE, int16_t com return U_CE_MATCH; } -#if BOYER_MOORE -// TODO: #if BOYER_MOORE, need 32-bit version of compareCE64s -#endif - namespace { UChar32 codePointAt(const USearch &search, int32_t index) { @@ -3852,7 +1779,7 @@ U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch *strsrch, // TODO: reject search patterns beginning with a combining char. #ifdef USEARCH_DEBUG - if (getenv("USEARCH_DEBUG") != NULL) { + if (getenv("USEARCH_DEBUG") != nullptr) { printf("Pattern CEs\n"); for (int ii=0; ii<strsrch->pattern.cesLength; ii++) { printf(" %8x", strsrch->pattern.ces[ii]); @@ -3867,21 +1794,26 @@ U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch *strsrch, if(strsrch->pattern.cesLength == 0 || startIdx < 0 || startIdx > strsrch->search->textLength || - strsrch->pattern.ces == NULL) { + strsrch->pattern.ces == nullptr) { *status = U_ILLEGAL_ARGUMENT_ERROR; return FALSE; } - if (strsrch->pattern.pces == NULL) { + if (strsrch->pattern.pces == nullptr) { initializePatternPCETable(strsrch, status); } ucol_setOffset(strsrch->textIter, startIdx, status); CEIBuffer ceb(strsrch, status); + // An out-of-memory (OOM) failure can occur in the initializePatternPCETable function + // or CEIBuffer constructor above, so we need to check the status. + if (U_FAILURE(*status)) { + return FALSE; + } int32_t targetIx = 0; - const CEI *targetCEI = NULL; + const CEI *targetCEI = nullptr; int32_t patIx; UBool found; @@ -3900,7 +1832,7 @@ U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch *strsrch, // 2. An int64_t CE weight is determined for each resulting unit (high 16 bits are primary strength, next // 16 bits are secondary, next 16 (the high 16 bits of the low 32-bit half) are tertiary. Any of these // fields that are for strengths below that of the collator are set to 0. If this makes the int64_t - // CE weight 0 (as for a combining diacritic with secondary weight when the collator strentgh is primary), + // CE weight 0 (as for a combining diacritic with secondary weight when the collator strength is primary), // then the CE is deleted, so the following code sees only CEs that are relevant. // For each CE, the lowIndex and highIndex correspond to where this CE begins and ends in the original text. // If lowIndex==highIndex, either the CE resulted from an expansion/decomposition of one of the original text @@ -3915,9 +1847,9 @@ U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch *strsrch, int64_t patCE = 0; // For targetIx > 0, this ceb.get gets a CE that is as far back in the ring buffer // (compared to the last CE fetched for the previous targetIx value) as we need to go - // for this targetIx value, so if it is non-NULL then other ceb.get calls should be OK. + // for this targetIx value, so if it is non-nullptr then other ceb.get calls should be OK. const CEI *firstCEI = ceb.get(targetIx); - if (firstCEI == NULL) { + if (firstCEI == nullptr) { *status = U_INTERNAL_PROGRAM_ERROR; found = FALSE; break; @@ -3946,7 +1878,7 @@ U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch *strsrch, } targetIxOffset += strsrch->pattern.pcesLength; // this is now the offset in target CE space to end of the match so far - if (!found && ((targetCEI == NULL) || (targetCEI->ce != UCOL_PROCESSED_NULLORDER))) { + if (!found && ((targetCEI == nullptr) || (targetCEI->ce != UCOL_PROCESSED_NULLORDER))) { // No match at this targetIx. Try again at the next. continue; } @@ -4018,9 +1950,12 @@ U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch *strsrch, // to something else. // This type of match should be rejected for not completely consuming a // combining sequence. - if (!isBreakBoundary(strsrch, mStart)) { + if (!isBreakBoundary(strsrch, mStart, *status)) { found = FALSE; } + if (U_FAILURE(*status)) { + break; + } // Check for the start of the match being within an Collation Element Expansion, // meaning that the first char of the match is only partially matched. @@ -4044,10 +1979,10 @@ U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch *strsrch, // tests in any case) // * the match limit is a normalization boundary UBool allowMidclusterMatch = FALSE; - if (strsrch->search->text != NULL && strsrch->search->textLength > maxLimit) { + if (strsrch->search->text != nullptr && strsrch->search->textLength > maxLimit) { allowMidclusterMatch = - strsrch->search->breakIter == NULL && - nextCEI != NULL && (((nextCEI->ce) >> 32) & 0xFFFF0000UL) != 0 && + strsrch->search->breakIter == nullptr && + nextCEI != nullptr && (((nextCEI->ce) >> 32) & 0xFFFF0000UL) != 0 && maxLimit >= lastCEI->highIndex && nextCEI->highIndex > maxLimit && (strsrch->nfd->hasBoundaryBefore(codePointAt(*strsrch->search, maxLimit)) || strsrch->nfd->hasBoundaryAfter(codePointBefore(*strsrch->search, maxLimit))); @@ -4061,7 +1996,7 @@ U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch *strsrch, // * do NOT require that match limit be on a breakIter boundary // Advance the match end position to the first acceptable match boundary. - // This advances the index over any combining charcters. + // This advances the index over any combining characters. mLimit = maxLimit; if (minLimit < maxLimit) { // When the last CE's low index is same with its high index, the CE is likely @@ -4070,10 +2005,10 @@ U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch *strsrch, // at the break boundary, move the position to the next boundary will result // incorrect match length when there are ignorable characters exist between // the position and the next character produces CE(s). See ticket#8482. - if (minLimit == lastCEI->highIndex && isBreakBoundary(strsrch, minLimit)) { + if (minLimit == lastCEI->highIndex && isBreakBoundary(strsrch, minLimit, *status)) { mLimit = minLimit; } else { - int32_t nba = nextBoundaryAfter(strsrch, minLimit); + int32_t nba = nextBoundaryAfter(strsrch, minLimit, *status); // Note that we can have nba < maxLimit && nba >= minLImit, in which // case we want to set mLimit to nba regardless of allowMidclusterMatch // (i.e. we back off mLimit to the previous breakIterator boundary). @@ -4083,8 +2018,12 @@ U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch *strsrch, } } + if (U_FAILURE(*status)) { + break; + } + #ifdef USEARCH_DEBUG - if (getenv("USEARCH_DEBUG") != NULL) { + if (getenv("USEARCH_DEBUG") != nullptr) { printf("minLimit, maxLimit, mLimit = %d, %d, %d\n", minLimit, maxLimit, mLimit); } #endif @@ -4096,9 +2035,12 @@ U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch *strsrch, found = FALSE; } - if (!isBreakBoundary(strsrch, mLimit)) { + if (!isBreakBoundary(strsrch, mLimit, *status)) { found = FALSE; } + if (U_FAILURE(*status)) { + break; + } } if (! checkIdentical(strsrch, mStart, mLimit)) { @@ -4111,7 +2053,7 @@ U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch *strsrch, } #ifdef USEARCH_DEBUG - if (getenv("USEARCH_DEBUG") != NULL) { + if (getenv("USEARCH_DEBUG") != nullptr) { printf("Target CEs [%d .. %d]\n", ceb.firstIx, ceb.limitIx); int32_t lastToPrint = ceb.limitIx+2; for (int ii=ceb.firstIx; ii<lastToPrint; ii++) { @@ -4123,16 +2065,21 @@ U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch *strsrch, // All Done. Store back the match bounds to the caller. // + + if (U_FAILURE(*status)) { + found = FALSE; // No match if a failure occured. + } + if (found==FALSE) { mLimit = -1; mStart = -1; } - if (matchStart != NULL) { + if (matchStart != nullptr) { *matchStart= mStart; } - if (matchLimit != NULL) { + if (matchLimit != nullptr) { *matchLimit = mLimit; } @@ -4152,7 +2099,7 @@ U_CAPI UBool U_EXPORT2 usearch_searchBackwards(UStringSearch *strsrch, // TODO: reject search patterns beginning with a combining char. #ifdef USEARCH_DEBUG - if (getenv("USEARCH_DEBUG") != NULL) { + if (getenv("USEARCH_DEBUG") != nullptr) { printf("Pattern CEs\n"); for (int ii=0; ii<strsrch->pattern.cesLength; ii++) { printf(" %8x", strsrch->pattern.ces[ii]); @@ -4162,17 +2109,17 @@ U_CAPI UBool U_EXPORT2 usearch_searchBackwards(UStringSearch *strsrch, #endif // Input parameter sanity check. - // TODO: should input indicies clip to the text length + // TODO: should input indices clip to the text length // in the same way that UText does. - if(strsrch->pattern.cesLength == 0 || + if(strsrch->pattern.cesLength == 0 || startIdx < 0 || startIdx > strsrch->search->textLength || - strsrch->pattern.ces == NULL) { + strsrch->pattern.ces == nullptr) { *status = U_ILLEGAL_ARGUMENT_ERROR; return FALSE; } - if (strsrch->pattern.pces == NULL) { + if (strsrch->pattern.pces == nullptr) { initializePatternPCETable(strsrch, status); } @@ -4189,8 +2136,11 @@ U_CAPI UBool U_EXPORT2 usearch_searchBackwards(UStringSearch *strsrch, * consider for the match. */ if (startIdx < strsrch->search->textLength) { - UBreakIterator *bi = strsrch->search->internalBreakIter; - int32_t next = ubrk_following(bi, startIdx); + UBreakIterator *breakiterator = getBreakIterator(strsrch, *status); + if (U_FAILURE(*status)) { + return FALSE; + } + int32_t next = ubrk_following(breakiterator, startIdx); ucol_setOffset(strsrch->textIter, next, status); @@ -4203,8 +2153,12 @@ U_CAPI UBool U_EXPORT2 usearch_searchBackwards(UStringSearch *strsrch, ucol_setOffset(strsrch->textIter, startIdx, status); } + // An out-of-memory (OOM) failure can occur above, so we need to check the status. + if (U_FAILURE(*status)) { + return FALSE; + } - const CEI *targetCEI = NULL; + const CEI *targetCEI = nullptr; int32_t patIx; UBool found; @@ -4227,9 +2181,9 @@ U_CAPI UBool U_EXPORT2 usearch_searchBackwards(UStringSearch *strsrch, found = TRUE; // For targetIx > limitIx, this ceb.getPrevious gets a CE that is as far back in the ring buffer // (compared to the last CE fetched for the previous targetIx value) as we need to go - // for this targetIx value, so if it is non-NULL then other ceb.getPrevious calls should be OK. + // for this targetIx value, so if it is non-nullptr then other ceb.getPrevious calls should be OK. const CEI *lastCEI = ceb.getPrevious(targetIx); - if (lastCEI == NULL) { + if (lastCEI == nullptr) { *status = U_INTERNAL_PROGRAM_ERROR; found = FALSE; break; @@ -4260,7 +2214,7 @@ U_CAPI UBool U_EXPORT2 usearch_searchBackwards(UStringSearch *strsrch, } } - if (!found && ((targetCEI == NULL) || (targetCEI->ce != UCOL_PROCESSED_NULLORDER))) { + if (!found && ((targetCEI == nullptr) || (targetCEI->ce != UCOL_PROCESSED_NULLORDER))) { // No match at this targetIx. Try again at the next. continue; } @@ -4285,9 +2239,12 @@ U_CAPI UBool U_EXPORT2 usearch_searchBackwards(UStringSearch *strsrch, // to something else. // This type of match should be rejected for not completely consuming a // combining sequence. - if (!isBreakBoundary(strsrch, mStart)) { + if (!isBreakBoundary(strsrch, mStart, *status)) { found = FALSE; } + if (U_FAILURE(*status)) { + break; + } // Look at the high index of the first CE in the match. If it's the same as the // low index, the first CE in the match is in the middle of an expansion. @@ -4327,10 +2284,10 @@ U_CAPI UBool U_EXPORT2 usearch_searchBackwards(UStringSearch *strsrch, // tests in any case) // * the match limit is a normalization boundary UBool allowMidclusterMatch = FALSE; - if (strsrch->search->text != NULL && strsrch->search->textLength > maxLimit) { + if (strsrch->search->text != nullptr && strsrch->search->textLength > maxLimit) { allowMidclusterMatch = - strsrch->search->breakIter == NULL && - nextCEI != NULL && (((nextCEI->ce) >> 32) & 0xFFFF0000UL) != 0 && + strsrch->search->breakIter == nullptr && + nextCEI != nullptr && (((nextCEI->ce) >> 32) & 0xFFFF0000UL) != 0 && maxLimit >= lastCEI->highIndex && nextCEI->highIndex > maxLimit && (strsrch->nfd->hasBoundaryBefore(codePointAt(*strsrch->search, maxLimit)) || strsrch->nfd->hasBoundaryAfter(codePointBefore(*strsrch->search, maxLimit))); @@ -4346,7 +2303,7 @@ U_CAPI UBool U_EXPORT2 usearch_searchBackwards(UStringSearch *strsrch, // Advance the match end position to the first acceptable match boundary. // This advances the index over any combining characters. if (minLimit < maxLimit) { - int32_t nba = nextBoundaryAfter(strsrch, minLimit); + int32_t nba = nextBoundaryAfter(strsrch, minLimit, *status); // Note that we can have nba < maxLimit && nba >= minLImit, in which // case we want to set mLimit to nba regardless of allowMidclusterMatch // (i.e. we back off mLimit to the previous breakIterator boundary). @@ -4363,9 +2320,12 @@ U_CAPI UBool U_EXPORT2 usearch_searchBackwards(UStringSearch *strsrch, } // Make sure the end of the match is on a break boundary - if (!isBreakBoundary(strsrch, mLimit)) { + if (!isBreakBoundary(strsrch, mLimit, *status)) { found = FALSE; } + if (U_FAILURE(*status)) { + break; + } } } else { @@ -4373,12 +2333,12 @@ U_CAPI UBool U_EXPORT2 usearch_searchBackwards(UStringSearch *strsrch, // The maximum position is detected by boundary after // the last non-ignorable CE. Combining sequence // across the start index will be truncated. - int32_t nba = nextBoundaryAfter(strsrch, minLimit); + int32_t nba = nextBoundaryAfter(strsrch, minLimit, *status); mLimit = maxLimit = (nba > 0) && (startIdx > nba) ? nba : startIdx; } #ifdef USEARCH_DEBUG - if (getenv("USEARCH_DEBUG") != NULL) { + if (getenv("USEARCH_DEBUG") != nullptr) { printf("minLimit, maxLimit, mLimit = %d, %d, %d\n", minLimit, maxLimit, mLimit); } #endif @@ -4394,7 +2354,7 @@ U_CAPI UBool U_EXPORT2 usearch_searchBackwards(UStringSearch *strsrch, } #ifdef USEARCH_DEBUG - if (getenv("USEARCH_DEBUG") != NULL) { + if (getenv("USEARCH_DEBUG") != nullptr) { printf("Target CEs [%d .. %d]\n", ceb.firstIx, ceb.limitIx); int32_t lastToPrint = ceb.limitIx+2; for (int ii=ceb.firstIx; ii<lastToPrint; ii++) { @@ -4406,16 +2366,21 @@ U_CAPI UBool U_EXPORT2 usearch_searchBackwards(UStringSearch *strsrch, // All Done. Store back the match bounds to the caller. // + + if (U_FAILURE(*status)) { + found = FALSE; // No match if a failure occured. + } + if (found==FALSE) { mLimit = -1; mStart = -1; } - if (matchStart != NULL) { + if (matchStart != nullptr) { *matchStart= mStart; } - if (matchLimit != NULL) { + if (matchLimit != nullptr) { *matchLimit = mLimit; } @@ -4427,102 +2392,10 @@ U_CAPI UBool U_EXPORT2 usearch_searchBackwards(UStringSearch *strsrch, UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status) { if (U_FAILURE(*status)) { - setMatchNotFound(strsrch); + setMatchNotFound(strsrch, *status); return FALSE; } -#if BOYER_MOORE - UCollationElements *coleiter = strsrch->textIter; - int32_t textlength = strsrch->search->textLength; - int32_t *patternce = strsrch->pattern.ces; - int32_t patterncelength = strsrch->pattern.cesLength; - int32_t textoffset = ucol_getOffset(coleiter); - - // status used in setting coleiter offset, since offset is checked in - // shiftForward before setting the coleiter offset, status never - // a failure - textoffset = shiftForward(strsrch, textoffset, UCOL_NULLORDER, - patterncelength); - while (textoffset <= textlength) - { - uint32_t patternceindex = patterncelength - 1; - int32_t targetce; - UBool found = FALSE; - int32_t lastce = UCOL_NULLORDER; - - setColEIterOffset(coleiter, textoffset); - - for (;;) { - // finding the last pattern ce match, imagine composite characters - // for example: search for pattern A in text \u00C0 - // we'll have to skip \u0300 the grave first before we get to A - targetce = ucol_previous(coleiter, status); - if (U_FAILURE(*status) || targetce == UCOL_NULLORDER) { - found = FALSE; - break; - } - targetce = getCE(strsrch, targetce); - if (targetce == UCOL_IGNORABLE && inNormBuf(coleiter)) { - // this is for the text \u0315\u0300 that requires - // normalization and pattern \u0300, where \u0315 is ignorable - continue; - } - if (lastce == UCOL_NULLORDER || lastce == UCOL_IGNORABLE) { - lastce = targetce; - } - // TODO: #if BOYER_MOORE, replace with code using 32-bit version of compareCE64s - if (targetce == patternce[patternceindex]) { - // the first ce can be a contraction - found = TRUE; - break; - } - if (!hasExpansion(coleiter)) { - found = FALSE; - break; - } - } - - //targetce = lastce; - - while (found && patternceindex > 0) { - lastce = targetce; - targetce = ucol_previous(coleiter, status); - if (U_FAILURE(*status) || targetce == UCOL_NULLORDER) { - found = FALSE; - break; - } - targetce = getCE(strsrch, targetce); - if (targetce == UCOL_IGNORABLE) { - continue; - } - - patternceindex --; - // TODO: #if BOYER_MOORE, replace with code using 32-bit version of compareCE64s - found = found && targetce == patternce[patternceindex]; - } - - targetce = lastce; - - if (!found) { - if (U_FAILURE(*status)) { - break; - } - textoffset = shiftForward(strsrch, textoffset, lastce, - patternceindex); - // status checked at loop. - patternceindex = patterncelength; - continue; - } - - if (checkNextExactMatch(strsrch, &textoffset, status)) { - // status checked in ucol_setOffset - setColEIterOffset(coleiter, strsrch->search->matchedIndex); - return TRUE; - } - } - setMatchNotFound(strsrch); - return FALSE; -#else int32_t textOffset = ucol_getOffset(strsrch->textIter); int32_t start = -1; int32_t end = -1; @@ -4532,112 +2405,18 @@ UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status) strsrch->search->matchedLength = end - start; return TRUE; } else { - setMatchNotFound(strsrch); + setMatchNotFound(strsrch, *status); return FALSE; } -#endif } UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status) { if (U_FAILURE(*status)) { - setMatchNotFound(strsrch); + setMatchNotFound(strsrch, *status); return FALSE; } -#if BOYER_MOORE - UCollationElements *coleiter = strsrch->textIter; - int32_t textlength = strsrch->search->textLength; - int32_t *patternce = strsrch->pattern.ces; - int32_t patterncelength = strsrch->pattern.cesLength; - int32_t textoffset = ucol_getOffset(coleiter); - UBool hasPatternAccents = - strsrch->pattern.hasSuffixAccents || strsrch->pattern.hasPrefixAccents; - - textoffset = shiftForward(strsrch, textoffset, UCOL_NULLORDER, - patterncelength); - strsrch->canonicalPrefixAccents[0] = 0; - strsrch->canonicalSuffixAccents[0] = 0; - - while (textoffset <= textlength) - { - int32_t patternceindex = patterncelength - 1; - int32_t targetce; - UBool found = FALSE; - int32_t lastce = UCOL_NULLORDER; - - setColEIterOffset(coleiter, textoffset); - - for (;;) { - // finding the last pattern ce match, imagine composite characters - // for example: search for pattern A in text \u00C0 - // we'll have to skip \u0300 the grave first before we get to A - targetce = ucol_previous(coleiter, status); - if (U_FAILURE(*status) || targetce == UCOL_NULLORDER) { - found = FALSE; - break; - } - targetce = getCE(strsrch, targetce); - if (lastce == UCOL_NULLORDER || lastce == UCOL_IGNORABLE) { - lastce = targetce; - } - // TODO: #if BOYER_MOORE, replace with code using 32-bit version of compareCE64s - if (targetce == patternce[patternceindex]) { - // the first ce can be a contraction - found = TRUE; - break; - } - if (!hasExpansion(coleiter)) { - found = FALSE; - break; - } - } - - while (found && patternceindex > 0) { - targetce = ucol_previous(coleiter, status); - if (U_FAILURE(*status) || targetce == UCOL_NULLORDER) { - found = FALSE; - break; - } - targetce = getCE(strsrch, targetce); - if (targetce == UCOL_IGNORABLE) { - continue; - } - - patternceindex --; - // TODO: #if BOYER_MOORE, replace with code using 32-bit version of compareCE64s - found = found && targetce == patternce[patternceindex]; - } - - // initializing the rearranged accent array - if (hasPatternAccents && !found) { - strsrch->canonicalPrefixAccents[0] = 0; - strsrch->canonicalSuffixAccents[0] = 0; - if (U_FAILURE(*status)) { - break; - } - found = doNextCanonicalMatch(strsrch, textoffset, status); - } - - if (!found) { - if (U_FAILURE(*status)) { - break; - } - textoffset = shiftForward(strsrch, textoffset, lastce, - patternceindex); - // status checked at loop - patternceindex = patterncelength; - continue; - } - - if (checkNextCanonicalMatch(strsrch, &textoffset, status)) { - setColEIterOffset(coleiter, strsrch->search->matchedIndex); - return TRUE; - } - } - setMatchNotFound(strsrch); - return FALSE; -#else int32_t textOffset = ucol_getOffset(strsrch->textIter); int32_t start = -1; int32_t end = -1; @@ -4647,114 +2426,18 @@ UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status) strsrch->search->matchedLength = end - start; return TRUE; } else { - setMatchNotFound(strsrch); + setMatchNotFound(strsrch, *status); return FALSE; } -#endif } UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status) { if (U_FAILURE(*status)) { - setMatchNotFound(strsrch); + setMatchNotFound(strsrch, *status); return FALSE; } -#if BOYER_MOORE - UCollationElements *coleiter = strsrch->textIter; - int32_t *patternce = strsrch->pattern.ces; - int32_t patterncelength = strsrch->pattern.cesLength; - int32_t textoffset = ucol_getOffset(coleiter); - - // shifting it check for setting offset - // if setOffset is called previously or there was no previous match, we - // leave the offset as it is. - if (strsrch->search->matchedIndex != USEARCH_DONE) { - textoffset = strsrch->search->matchedIndex; - } - - textoffset = reverseShift(strsrch, textoffset, UCOL_NULLORDER, - patterncelength); - - while (textoffset >= 0) - { - int32_t patternceindex = 1; - int32_t targetce; - UBool found = FALSE; - int32_t firstce = UCOL_NULLORDER; - - // if status is a failure, ucol_setOffset does nothing - setColEIterOffset(coleiter, textoffset); - - for (;;) { - // finding the first pattern ce match, imagine composite - // characters. for example: search for pattern \u0300 in text - // \u00C0, we'll have to skip A first before we get to - // \u0300 the grave accent - targetce = ucol_next(coleiter, status); - if (U_FAILURE(*status) || targetce == UCOL_NULLORDER) { - found = FALSE; - break; - } - targetce = getCE(strsrch, targetce); - if (firstce == UCOL_NULLORDER || firstce == UCOL_IGNORABLE) { - firstce = targetce; - } - if (targetce == UCOL_IGNORABLE && strsrch->strength != UCOL_PRIMARY) { - continue; - } - // TODO: #if BOYER_MOORE, replace with code using 32-bit version of compareCE64s - if (targetce == patternce[0]) { - found = TRUE; - break; - } - if (!hasExpansion(coleiter)) { - // checking for accents in composite character - found = FALSE; - break; - } - } - - //targetce = firstce; - - while (found && (patternceindex < patterncelength)) { - firstce = targetce; - targetce = ucol_next(coleiter, status); - if (U_FAILURE(*status) || targetce == UCOL_NULLORDER) { - found = FALSE; - break; - } - targetce = getCE(strsrch, targetce); - if (targetce == UCOL_IGNORABLE) { - continue; - } - - // TODO: #if BOYER_MOORE, replace with code using 32-bit version of compareCE64s - found = found && targetce == patternce[patternceindex]; - patternceindex ++; - } - - targetce = firstce; - - if (!found) { - if (U_FAILURE(*status)) { - break; - } - - textoffset = reverseShift(strsrch, textoffset, targetce, - patternceindex); - patternceindex = 0; - continue; - } - - if (checkPreviousExactMatch(strsrch, &textoffset, status)) { - setColEIterOffset(coleiter, textoffset); - return TRUE; - } - } - setMatchNotFound(strsrch); - return FALSE; -#else int32_t textOffset; if (strsrch->search->isOverlap) { @@ -4764,18 +2447,18 @@ UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status) // move the start position at the end of possible match initializePatternPCETable(strsrch, status); if (!initTextProcessedIter(strsrch, status)) { - setMatchNotFound(strsrch); + setMatchNotFound(strsrch, *status); return FALSE; } for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.pcesLength - 1; nPCEs++) { - int64_t pce = strsrch->textProcessedIter->nextProcessed(NULL, NULL, status); + int64_t pce = strsrch->textProcessedIter->nextProcessed(nullptr, nullptr, status); if (pce == UCOL_PROCESSED_NULLORDER) { // at the end of the text break; } } if (U_FAILURE(*status)) { - setMatchNotFound(strsrch); + setMatchNotFound(strsrch, *status); return FALSE; } textOffset = ucol_getOffset(strsrch->textIter); @@ -4792,122 +2475,19 @@ UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status) strsrch->search->matchedLength = end - start; return TRUE; } else { - setMatchNotFound(strsrch); + setMatchNotFound(strsrch, *status); return FALSE; } -#endif } UBool usearch_handlePreviousCanonical(UStringSearch *strsrch, UErrorCode *status) { if (U_FAILURE(*status)) { - setMatchNotFound(strsrch); + setMatchNotFound(strsrch, *status); return FALSE; } -#if BOYER_MOORE - UCollationElements *coleiter = strsrch->textIter; - int32_t *patternce = strsrch->pattern.ces; - int32_t patterncelength = strsrch->pattern.cesLength; - int32_t textoffset = ucol_getOffset(coleiter); - UBool hasPatternAccents = - strsrch->pattern.hasSuffixAccents || strsrch->pattern.hasPrefixAccents; - - // shifting it check for setting offset - // if setOffset is called previously or there was no previous match, we - // leave the offset as it is. - if (strsrch->search->matchedIndex != USEARCH_DONE) { - textoffset = strsrch->search->matchedIndex; - } - - textoffset = reverseShift(strsrch, textoffset, UCOL_NULLORDER, - patterncelength); - strsrch->canonicalPrefixAccents[0] = 0; - strsrch->canonicalSuffixAccents[0] = 0; - - while (textoffset >= 0) - { - int32_t patternceindex = 1; - int32_t targetce; - UBool found = FALSE; - int32_t firstce = UCOL_NULLORDER; - - setColEIterOffset(coleiter, textoffset); - for (;;) { - // finding the first pattern ce match, imagine composite - // characters. for example: search for pattern \u0300 in text - // \u00C0, we'll have to skip A first before we get to - // \u0300 the grave accent - targetce = ucol_next(coleiter, status); - if (U_FAILURE(*status) || targetce == UCOL_NULLORDER) { - found = FALSE; - break; - } - targetce = getCE(strsrch, targetce); - if (firstce == UCOL_NULLORDER || firstce == UCOL_IGNORABLE) { - firstce = targetce; - } - - // TODO: #if BOYER_MOORE, replace with code using 32-bit version of compareCE64s - if (targetce == patternce[0]) { - // the first ce can be a contraction - found = TRUE; - break; - } - if (!hasExpansion(coleiter)) { - // checking for accents in composite character - found = FALSE; - break; - } - } - - targetce = firstce; - - while (found && patternceindex < patterncelength) { - targetce = ucol_next(coleiter, status); - if (U_FAILURE(*status) || targetce == UCOL_NULLORDER) { - found = FALSE; - break; - } - targetce = getCE(strsrch, targetce); - if (targetce == UCOL_IGNORABLE) { - continue; - } - - // TODO: #if BOYER_MOORE, replace with code using 32-bit version of compareCE64s - found = found && targetce == patternce[patternceindex]; - patternceindex ++; - } - - // initializing the rearranged accent array - if (hasPatternAccents && !found) { - strsrch->canonicalPrefixAccents[0] = 0; - strsrch->canonicalSuffixAccents[0] = 0; - if (U_FAILURE(*status)) { - break; - } - found = doPreviousCanonicalMatch(strsrch, textoffset, status); - } - - if (!found) { - if (U_FAILURE(*status)) { - break; - } - textoffset = reverseShift(strsrch, textoffset, targetce, - patternceindex); - patternceindex = 0; - continue; - } - - if (checkPreviousCanonicalMatch(strsrch, &textoffset, status)) { - setColEIterOffset(coleiter, textoffset); - return TRUE; - } - } - setMatchNotFound(strsrch); - return FALSE; -#else int32_t textOffset; if (strsrch->search->isOverlap) { @@ -4917,18 +2497,18 @@ UBool usearch_handlePreviousCanonical(UStringSearch *strsrch, // move the start position at the end of possible match initializePatternPCETable(strsrch, status); if (!initTextProcessedIter(strsrch, status)) { - setMatchNotFound(strsrch); + setMatchNotFound(strsrch, *status); return FALSE; } for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.pcesLength - 1; nPCEs++) { - int64_t pce = strsrch->textProcessedIter->nextProcessed(NULL, NULL, status); + int64_t pce = strsrch->textProcessedIter->nextProcessed(nullptr, nullptr, status); if (pce == UCOL_PROCESSED_NULLORDER) { // at the end of the text break; } } if (U_FAILURE(*status)) { - setMatchNotFound(strsrch); + setMatchNotFound(strsrch, *status); return FALSE; } textOffset = ucol_getOffset(strsrch->textIter); @@ -4945,10 +2525,9 @@ UBool usearch_handlePreviousCanonical(UStringSearch *strsrch, strsrch->search->matchedLength = end - start; return TRUE; } else { - setMatchNotFound(strsrch); + setMatchNotFound(strsrch, *status); return FALSE; } -#endif } #endif /* #if !UCONFIG_NO_COLLATION */ diff --git a/contrib/libs/icu/i18n/uspoof.cpp b/contrib/libs/icu/i18n/uspoof.cpp index 33682389f5..dd4618baa7 100644 --- a/contrib/libs/icu/i18n/uspoof.cpp +++ b/contrib/libs/icu/i18n/uspoof.cpp @@ -82,63 +82,66 @@ void U_CALLCONV initializeStatics(UErrorCode &status) { u"\\u04FF\\u0510-\\u0529\\u052E\\u052F\\u0531-\\u0556\\u0559\\u0561-\\u0586" u"\\u05B4\\u05D0-\\u05EA\\u05EF-\\u05F2\\u0620-\\u063F\\u0641-\\u0655\\u0660-" u"\\u0669\\u0670-\\u0672\\u0674\\u0679-\\u068D\\u068F-\\u06A0\\u06A2-\\u06D3" - u"\\u06D5\\u06E5\\u06E6\\u06EE-\\u06FC\\u06FF\\u0750-\\u07B1\\u08A0-\\u08AC" - u"\\u08B2\\u08B6-\\u08C7\\u0901-\\u094D\\u094F\\u0950\\u0956\\u0957\\u0960-" - u"\\u0963\\u0966-\\u096F\\u0971-\\u0977\\u0979-\\u097F\\u0981-\\u0983\\u0985-" - u"\\u098C\\u098F\\u0990\\u0993-\\u09A8\\u09AA-\\u09B0\\u09B2\\u09B6-\\u09B9" - u"\\u09BC-\\u09C4\\u09C7\\u09C8\\u09CB-\\u09CE\\u09D7\\u09E0-\\u09E3\\u09E6-" - u"\\u09F1\\u09FE\\u0A01-\\u0A03\\u0A05-\\u0A0A\\u0A0F\\u0A10\\u0A13-\\u0A28" - u"\\u0A2A-\\u0A30\\u0A32\\u0A35\\u0A38\\u0A39\\u0A3C\\u0A3E-\\u0A42\\u0A47" - u"\\u0A48\\u0A4B-\\u0A4D\\u0A5C\\u0A66-\\u0A74\\u0A81-\\u0A83\\u0A85-\\u0A8D" - u"\\u0A8F-\\u0A91\\u0A93-\\u0AA8\\u0AAA-\\u0AB0\\u0AB2\\u0AB3\\u0AB5-\\u0AB9" - u"\\u0ABC-\\u0AC5\\u0AC7-\\u0AC9\\u0ACB-\\u0ACD\\u0AD0\\u0AE0-\\u0AE3\\u0AE6-" - u"\\u0AEF\\u0AFA-\\u0AFF\\u0B01-\\u0B03\\u0B05-\\u0B0C\\u0B0F\\u0B10\\u0B13-" - u"\\u0B28\\u0B2A-\\u0B30\\u0B32\\u0B33\\u0B35-\\u0B39\\u0B3C-\\u0B43\\u0B47" - u"\\u0B48\\u0B4B-\\u0B4D\\u0B55-\\u0B57\\u0B5F-\\u0B61\\u0B66-\\u0B6F\\u0B71" - u"\\u0B82\\u0B83\\u0B85-\\u0B8A\\u0B8E-\\u0B90\\u0B92-\\u0B95\\u0B99\\u0B9A" - u"\\u0B9C\\u0B9E\\u0B9F\\u0BA3\\u0BA4\\u0BA8-\\u0BAA\\u0BAE-\\u0BB9\\u0BBE-" - u"\\u0BC2\\u0BC6-\\u0BC8\\u0BCA-\\u0BCD\\u0BD0\\u0BD7\\u0BE6-\\u0BEF\\u0C01-" - u"\\u0C0C\\u0C0E-\\u0C10\\u0C12-\\u0C28\\u0C2A-\\u0C33\\u0C35-\\u0C39\\u0C3D-" - u"\\u0C44\\u0C46-\\u0C48\\u0C4A-\\u0C4D\\u0C55\\u0C56\\u0C60\\u0C61\\u0C66-" - u"\\u0C6F\\u0C80\\u0C82\\u0C83\\u0C85-\\u0C8C\\u0C8E-\\u0C90\\u0C92-\\u0CA8" - u"\\u0CAA-\\u0CB3\\u0CB5-\\u0CB9\\u0CBC-\\u0CC4\\u0CC6-\\u0CC8\\u0CCA-\\u0CCD" - u"\\u0CD5\\u0CD6\\u0CE0-\\u0CE3\\u0CE6-\\u0CEF\\u0CF1\\u0CF2\\u0D00\\u0D02" - u"\\u0D03\\u0D05-\\u0D0C\\u0D0E-\\u0D10\\u0D12-\\u0D3A\\u0D3D-\\u0D43\\u0D46-" - u"\\u0D48\\u0D4A-\\u0D4E\\u0D54-\\u0D57\\u0D60\\u0D61\\u0D66-\\u0D6F\\u0D7A-" - u"\\u0D7F\\u0D82\\u0D83\\u0D85-\\u0D8E\\u0D91-\\u0D96\\u0D9A-\\u0DA5\\u0DA7-" - u"\\u0DB1\\u0DB3-\\u0DBB\\u0DBD\\u0DC0-\\u0DC6\\u0DCA\\u0DCF-\\u0DD4\\u0DD6" - u"\\u0DD8-\\u0DDE\\u0DF2\\u0E01-\\u0E32\\u0E34-\\u0E3A\\u0E40-\\u0E4E\\u0E50-" - u"\\u0E59\\u0E81\\u0E82\\u0E84\\u0E86-\\u0E8A\\u0E8C-\\u0EA3\\u0EA5\\u0EA7-" - u"\\u0EB2\\u0EB4-\\u0EBD\\u0EC0-\\u0EC4\\u0EC6\\u0EC8-\\u0ECD\\u0ED0-\\u0ED9" - u"\\u0EDE\\u0EDF\\u0F00\\u0F20-\\u0F29\\u0F35\\u0F37\\u0F3E-\\u0F42\\u0F44-" - u"\\u0F47\\u0F49-\\u0F4C\\u0F4E-\\u0F51\\u0F53-\\u0F56\\u0F58-\\u0F5B\\u0F5D-" - u"\\u0F68\\u0F6A-\\u0F6C\\u0F71\\u0F72\\u0F74\\u0F7A-\\u0F80\\u0F82-\\u0F84" - u"\\u0F86-\\u0F92\\u0F94-\\u0F97\\u0F99-\\u0F9C\\u0F9E-\\u0FA1\\u0FA3-\\u0FA6" - u"\\u0FA8-\\u0FAB\\u0FAD-\\u0FB8\\u0FBA-\\u0FBC\\u0FC6\\u1000-\\u1049\\u1050-" - u"\\u109D\\u10C7\\u10CD\\u10D0-\\u10F0\\u10F7-\\u10FA\\u10FD-\\u10FF\\u1200-" - u"\\u1248\\u124A-\\u124D\\u1250-\\u1256\\u1258\\u125A-\\u125D\\u1260-\\u1288" - u"\\u128A-\\u128D\\u1290-\\u12B0\\u12B2-\\u12B5\\u12B8-\\u12BE\\u12C0\\u12C2-" - u"\\u12C5\\u12C8-\\u12D6\\u12D8-\\u1310\\u1312-\\u1315\\u1318-\\u135A\\u135D-" - u"\\u135F\\u1380-\\u138F\\u1780-\\u17A2\\u17A5-\\u17A7\\u17A9-\\u17B3\\u17B6-" - u"\\u17CA\\u17D2\\u17D7\\u17DC\\u17E0-\\u17E9\\u1C90-\\u1CBA\\u1CBD-\\u1CBF" - u"\\u1E00-\\u1E99\\u1E9E\\u1EA0-\\u1EF9\\u1F00-\\u1F15\\u1F18-\\u1F1D\\u1F20-" - u"\\u1F45\\u1F48-\\u1F4D\\u1F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F70" - u"\\u1F72\\u1F74\\u1F76\\u1F78\\u1F7A\\u1F7C\\u1F80-\\u1FB4\\u1FB6-\\u1FBA" - u"\\u1FBC\\u1FC2-\\u1FC4\\u1FC6-\\u1FC8\\u1FCA\\u1FCC\\u1FD0-\\u1FD2\\u1FD6-" - u"\\u1FDA\\u1FE0-\\u1FE2\\u1FE4-\\u1FEA\\u1FEC\\u1FF2-\\u1FF4\\u1FF6-\\u1FF8" - u"\\u1FFA\\u1FFC\\u2D27\\u2D2D\\u2D80-\\u2D96\\u2DA0-\\u2DA6\\u2DA8-\\u2DAE" - u"\\u2DB0-\\u2DB6\\u2DB8-\\u2DBE\\u2DC0-\\u2DC6\\u2DC8-\\u2DCE\\u2DD0-\\u2DD6" - u"\\u2DD8-\\u2DDE\\u3005-\\u3007\\u3041-\\u3096\\u3099\\u309A\\u309D\\u309E" - u"\\u30A1-\\u30FA\\u30FC-\\u30FE\\u3105-\\u312D\\u312F\\u31A0-\\u31BF\\u3400-" - u"\\u4DBF\\u4E00-\\u9FFC\\uA67F\\uA717-\\uA71F\\uA788\\uA78D\\uA792\\uA793" - u"\\uA7AA\\uA7AE\\uA7B8\\uA7B9\\uA7C2-\\uA7CA\\uA9E7-\\uA9FE\\uAA60-\\uAA76" - u"\\uAA7A-\\uAA7F\\uAB01-\\uAB06\\uAB09-\\uAB0E\\uAB11-\\uAB16\\uAB20-\\uAB26" - u"\\uAB28-\\uAB2E\\uAB66\\uAB67\\uAC00-\\uD7A3\\uFA0E\\uFA0F\\uFA11\\uFA13" - u"\\uFA14\\uFA1F\\uFA21\\uFA23\\uFA24\\uFA27-\\uFA29\\U00011301\\U00011303" - u"\\U0001133B\\U0001133C\\U00016FF0\\U00016FF1\\U0001B150-\\U0001B152" - u"\\U0001B164-\\U0001B167\\U00020000-\\U0002A6DD\\U0002A700-\\U0002B734" - u"\\U0002B740-\\U0002B81D\\U0002B820-\\U0002CEA1\\U0002CEB0-\\U0002EBE0" - u"\\U00030000-\\U0003134A]"; + u"\\u06D5\\u06E5\\u06E6\\u06EE-\\u06FC\\u06FF\\u0750-\\u07B1\\u0870-\\u0887" + u"\\u0889-\\u088E\\u08A0-\\u08AC\\u08B2\\u08B5-\\u08C9\\u0901-\\u094D\\u094F" + u"\\u0950\\u0956\\u0957\\u0960-\\u0963\\u0966-\\u096F\\u0971-\\u0977\\u0979-" + u"\\u097F\\u0981-\\u0983\\u0985-\\u098C\\u098F\\u0990\\u0993-\\u09A8\\u09AA-" + u"\\u09B0\\u09B2\\u09B6-\\u09B9\\u09BC-\\u09C4\\u09C7\\u09C8\\u09CB-\\u09CE" + u"\\u09D7\\u09E0-\\u09E3\\u09E6-\\u09F1\\u09FE\\u0A01-\\u0A03\\u0A05-\\u0A0A" + u"\\u0A0F\\u0A10\\u0A13-\\u0A28\\u0A2A-\\u0A30\\u0A32\\u0A35\\u0A38\\u0A39" + u"\\u0A3C\\u0A3E-\\u0A42\\u0A47\\u0A48\\u0A4B-\\u0A4D\\u0A5C\\u0A66-\\u0A74" + u"\\u0A81-\\u0A83\\u0A85-\\u0A8D\\u0A8F-\\u0A91\\u0A93-\\u0AA8\\u0AAA-\\u0AB0" + u"\\u0AB2\\u0AB3\\u0AB5-\\u0AB9\\u0ABC-\\u0AC5\\u0AC7-\\u0AC9\\u0ACB-\\u0ACD" + u"\\u0AD0\\u0AE0-\\u0AE3\\u0AE6-\\u0AEF\\u0AFA-\\u0AFF\\u0B01-\\u0B03\\u0B05-" + u"\\u0B0C\\u0B0F\\u0B10\\u0B13-\\u0B28\\u0B2A-\\u0B30\\u0B32\\u0B33\\u0B35-" + u"\\u0B39\\u0B3C-\\u0B43\\u0B47\\u0B48\\u0B4B-\\u0B4D\\u0B55-\\u0B57\\u0B5F-" + u"\\u0B61\\u0B66-\\u0B6F\\u0B71\\u0B82\\u0B83\\u0B85-\\u0B8A\\u0B8E-\\u0B90" + u"\\u0B92-\\u0B95\\u0B99\\u0B9A\\u0B9C\\u0B9E\\u0B9F\\u0BA3\\u0BA4\\u0BA8-" + u"\\u0BAA\\u0BAE-\\u0BB9\\u0BBE-\\u0BC2\\u0BC6-\\u0BC8\\u0BCA-\\u0BCD\\u0BD0" + u"\\u0BD7\\u0BE6-\\u0BEF\\u0C01-\\u0C0C\\u0C0E-\\u0C10\\u0C12-\\u0C28\\u0C2A-" + u"\\u0C33\\u0C35-\\u0C39\\u0C3C-\\u0C44\\u0C46-\\u0C48\\u0C4A-\\u0C4D\\u0C55" + u"\\u0C56\\u0C5D\\u0C60\\u0C61\\u0C66-\\u0C6F\\u0C80\\u0C82\\u0C83\\u0C85-" + u"\\u0C8C\\u0C8E-\\u0C90\\u0C92-\\u0CA8\\u0CAA-\\u0CB3\\u0CB5-\\u0CB9\\u0CBC-" + u"\\u0CC4\\u0CC6-\\u0CC8\\u0CCA-\\u0CCD\\u0CD5\\u0CD6\\u0CDD\\u0CE0-\\u0CE3" + u"\\u0CE6-\\u0CEF\\u0CF1\\u0CF2\\u0D00\\u0D02\\u0D03\\u0D05-\\u0D0C\\u0D0E-" + u"\\u0D10\\u0D12-\\u0D3A\\u0D3D-\\u0D43\\u0D46-\\u0D48\\u0D4A-\\u0D4E\\u0D54-" + u"\\u0D57\\u0D60\\u0D61\\u0D66-\\u0D6F\\u0D7A-\\u0D7F\\u0D82\\u0D83\\u0D85-" + u"\\u0D8E\\u0D91-\\u0D96\\u0D9A-\\u0DA5\\u0DA7-\\u0DB1\\u0DB3-\\u0DBB\\u0DBD" + u"\\u0DC0-\\u0DC6\\u0DCA\\u0DCF-\\u0DD4\\u0DD6\\u0DD8-\\u0DDE\\u0DF2\\u0E01-" + u"\\u0E32\\u0E34-\\u0E3A\\u0E40-\\u0E4E\\u0E50-\\u0E59\\u0E81\\u0E82\\u0E84" + u"\\u0E86-\\u0E8A\\u0E8C-\\u0EA3\\u0EA5\\u0EA7-\\u0EB2\\u0EB4-\\u0EBD\\u0EC0-" + u"\\u0EC4\\u0EC6\\u0EC8-\\u0ECD\\u0ED0-\\u0ED9\\u0EDE\\u0EDF\\u0F00\\u0F20-" + u"\\u0F29\\u0F35\\u0F37\\u0F3E-\\u0F42\\u0F44-\\u0F47\\u0F49-\\u0F4C\\u0F4E-" + u"\\u0F51\\u0F53-\\u0F56\\u0F58-\\u0F5B\\u0F5D-\\u0F68\\u0F6A-\\u0F6C\\u0F71" + u"\\u0F72\\u0F74\\u0F7A-\\u0F80\\u0F82-\\u0F84\\u0F86-\\u0F92\\u0F94-\\u0F97" + u"\\u0F99-\\u0F9C\\u0F9E-\\u0FA1\\u0FA3-\\u0FA6\\u0FA8-\\u0FAB\\u0FAD-\\u0FB8" + u"\\u0FBA-\\u0FBC\\u0FC6\\u1000-\\u1049\\u1050-\\u109D\\u10C7\\u10CD\\u10D0-" + u"\\u10F0\\u10F7-\\u10FA\\u10FD-\\u10FF\\u1200-\\u1248\\u124A-\\u124D\\u1250-" + u"\\u1256\\u1258\\u125A-\\u125D\\u1260-\\u1288\\u128A-\\u128D\\u1290-\\u12B0" + u"\\u12B2-\\u12B5\\u12B8-\\u12BE\\u12C0\\u12C2-\\u12C5\\u12C8-\\u12D6\\u12D8-" + u"\\u1310\\u1312-\\u1315\\u1318-\\u135A\\u135D-\\u135F\\u1380-\\u138F\\u1780-" + u"\\u17A2\\u17A5-\\u17A7\\u17A9-\\u17B3\\u17B6-\\u17CD\\u17D0\\u17D2\\u17D7" + u"\\u17DC\\u17E0-\\u17E9\\u1C90-\\u1CBA\\u1CBD-\\u1CBF\\u1E00-\\u1E99\\u1E9E" + u"\\u1EA0-\\u1EF9\\u1F00-\\u1F15\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D" + u"\\u1F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F70\\u1F72\\u1F74\\u1F76" + u"\\u1F78\\u1F7A\\u1F7C\\u1F80-\\u1FB4\\u1FB6-\\u1FBA\\u1FBC\\u1FC2-\\u1FC4" + u"\\u1FC6-\\u1FC8\\u1FCA\\u1FCC\\u1FD0-\\u1FD2\\u1FD6-\\u1FDA\\u1FE0-\\u1FE2" + u"\\u1FE4-\\u1FEA\\u1FEC\\u1FF2-\\u1FF4\\u1FF6-\\u1FF8\\u1FFA\\u1FFC\\u2D27" + u"\\u2D2D\\u2D80-\\u2D96\\u2DA0-\\u2DA6\\u2DA8-\\u2DAE\\u2DB0-\\u2DB6\\u2DB8-" + u"\\u2DBE\\u2DC0-\\u2DC6\\u2DC8-\\u2DCE\\u2DD0-\\u2DD6\\u2DD8-\\u2DDE\\u3005-" + u"\\u3007\\u3041-\\u3096\\u3099\\u309A\\u309D\\u309E\\u30A1-\\u30FA\\u30FC-" + u"\\u30FE\\u3105-\\u312D\\u312F\\u31A0-\\u31BF\\u3400-\\u4DBF\\u4E00-\\u9FFF" + u"\\uA67F\\uA717-\\uA71F\\uA788\\uA78D\\uA792\\uA793\\uA7AA\\uA7AE\\uA7B8" + u"\\uA7B9\\uA7C0-\\uA7CA\\uA7D0\\uA7D1\\uA7D3\\uA7D5-\\uA7D9\\uA9E7-\\uA9FE" + u"\\uAA60-\\uAA76\\uAA7A-\\uAA7F\\uAB01-\\uAB06\\uAB09-\\uAB0E\\uAB11-\\uAB16" + u"\\uAB20-\\uAB26\\uAB28-\\uAB2E\\uAB66\\uAB67\\uAC00-\\uD7A3\\uFA0E\\uFA0F" + u"\\uFA11\\uFA13\\uFA14\\uFA1F\\uFA21\\uFA23\\uFA24\\uFA27-\\uFA29\\U00011301" + u"\\U00011303\\U0001133B\\U0001133C\\U00016FF0\\U00016FF1\\U0001B11F-" + u"\\U0001B122\\U0001B150-\\U0001B152\\U0001B164-\\U0001B167\\U0001DF00-" + u"\\U0001DF1E\\U0001E7E0-\\U0001E7E6\\U0001E7E8-\\U0001E7EB\\U0001E7ED" + u"\\U0001E7EE\\U0001E7F0-\\U0001E7FE\\U00020000-\\U0002A6DF\\U0002A700-" + u"\\U0002B738\\U0002B740-\\U0002B81D\\U0002B820-\\U0002CEA1\\U0002CEB0-" + u"\\U0002EBE0\\U00030000-\\U0003134A]"; gRecommendedSet = new UnicodeSet(UnicodeString(recommendedPat), status); if (gRecommendedSet == NULL) { @@ -483,7 +486,7 @@ uspoof_areConfusableUnicodeString(const USpoofChecker *sc, // and for definitions of the types (single, whole, mixed-script) of confusables. // We only care about a few of the check flags. Ignore the others. - // If no tests relavant to this function have been specified, return an error. + // If no tests relevant to this function have been specified, return an error. // TODO: is this really the right thing to do? It's probably an error on the caller's part, // but logically we would just return 0 (no error). if ((This->fChecks & USPOOF_CONFUSABLE) == 0) { @@ -603,7 +606,7 @@ int32_t checkImpl(const SpoofImpl* This, const UnicodeString& id, CheckResult* c gNfdNormalizer->normalize(id, nfdText, *status); int32_t nfdLength = nfdText.length(); - // scan for more than one occurence of the same non-spacing mark + // scan for more than one occurrence of the same non-spacing mark // in a sequence of non-spacing marks. int32_t i; UChar32 c; diff --git a/contrib/libs/icu/i18n/uspoof_conf.cpp b/contrib/libs/icu/i18n/uspoof_conf.cpp index 1a7cdad60a..04081cabfb 100644 --- a/contrib/libs/icu/i18n/uspoof_conf.cpp +++ b/contrib/libs/icu/i18n/uspoof_conf.cpp @@ -15,7 +15,7 @@ * created on: 2009Jan05 (refactoring earlier files) * created by: Andy Heninger * -* Internal classes for compililing confusable data into its binary (runtime) form. +* Internal classes for compiling confusable data into its binary (runtime) form. */ #include "unicode/utypes.h" @@ -113,7 +113,7 @@ SPUString *SPUStringPool::getByIndex(int32_t index) { // by code point order. // Conforms to the type signature for a USortComparator in uvector.h -static int8_t U_CALLCONV SPUStringCompare(UHashTok left, UHashTok right) { +static int32_t U_CALLCONV SPUStringCompare(UHashTok left, UHashTok right) { const SPUString *sL = const_cast<const SPUString *>( static_cast<SPUString *>(left.pointer)); const SPUString *sR = const_cast<const SPUString *>( @@ -145,7 +145,7 @@ SPUString *SPUStringPool::addString(UnicodeString *src, UErrorCode &status) { return NULL; } uhash_put(fHash, src, hashedString, &status); - fVec->addElement(hashedString, status); + fVec->addElementX(hashedString, status); } return hashedString; } diff --git a/contrib/libs/icu/i18n/uspoof_conf.h b/contrib/libs/icu/i18n/uspoof_conf.h index cfa80e7ca7..600d7ea42a 100644 --- a/contrib/libs/icu/i18n/uspoof_conf.h +++ b/contrib/libs/icu/i18n/uspoof_conf.h @@ -110,7 +110,7 @@ class ConfusabledataBuilder : public UMemory { // Add an entry to the key and value tables being built // input: data from SLTable, MATable, etc. - // outut: entry added to fKeyVec and fValueVec + // output: entry added to fKeyVec and fValueVec void addKeyEntry(UChar32 keyChar, // The key character UHashtable *table, // The table, one of SATable, MATable, etc. int32_t tableFlag, // One of USPOOF_SA_TABLE_FLAG, etc. diff --git a/contrib/libs/icu/i18n/uspoof_impl.cpp b/contrib/libs/icu/i18n/uspoof_impl.cpp index 88245b7f8f..b283d81321 100644 --- a/contrib/libs/icu/i18n/uspoof_impl.cpp +++ b/contrib/libs/icu/i18n/uspoof_impl.cpp @@ -729,7 +729,7 @@ void *SpoofData::reserveSpace(int32_t numBytes, UErrorCode &status) { return NULL; } if (!fDataOwned) { - UPRV_UNREACHABLE; + UPRV_UNREACHABLE_EXIT; } numBytes = (numBytes + 15) & ~15; // Round up to a multiple of 16 diff --git a/contrib/libs/icu/i18n/uspoof_impl.h b/contrib/libs/icu/i18n/uspoof_impl.h index b111d4b16a..e75ae262bd 100644 --- a/contrib/libs/icu/i18n/uspoof_impl.h +++ b/contrib/libs/icu/i18n/uspoof_impl.h @@ -8,7 +8,7 @@ * * uspoof_impl.h * -* Implemenation header for spoof detection +* Implementation header for spoof detection * */ @@ -31,7 +31,7 @@ U_NAMESPACE_BEGIN -// The maximium length (in UTF-16 UChars) of the skeleton replacement string resulting from +// The maximum length (in UTF-16 UChars) of the skeleton replacement string resulting from // a single input code point. This is function of the unicode.org data. #define USPOOF_MAX_SKELETON_EXPANSION 20 @@ -93,7 +93,7 @@ public: static UChar32 ScanHex(const UChar *s, int32_t start, int32_t limit, UErrorCode &status); static UClassID U_EXPORT2 getStaticClassID(void); - virtual UClassID getDynamicClassID(void) const; + virtual UClassID getDynamicClassID(void) const override; // // Data Members @@ -157,7 +157,7 @@ public: // // String Table: // The strings table contains all of the value strings (those of length two or greater) -// concatentated together into one long UChar (UTF-16) array. +// concatenated together into one long UChar (UTF-16) array. // // There is no nul character or other mark between adjacent strings. // @@ -222,7 +222,7 @@ class SpoofData: public UMemory { SpoofData(const void *serializedData, int32_t length, UErrorCode &status); // Check raw Spoof Data Version compatibility. - // Return TRUE it looks good. + // Return true it looks good. UBool validateDataVersion(UErrorCode &status) const; ~SpoofData(); // Destructor not normally used. @@ -248,7 +248,7 @@ class SpoofData: public UMemory { // Get the confusable skeleton transform for a single code point. // The result is a string with a length between 1 and 18 as of Unicode 9. // This is the main public endpoint for this class. - // @return The length in UTF-16 code units of the substition string. + // @return The length in UTF-16 code units of the substitution string. int32_t confusableLookup(UChar32 inChar, UnicodeString &dest) const; // Get the number of confusable entries in this SpoofData. @@ -301,7 +301,7 @@ struct SpoofDataHeader { int32_t fMagic; // (0x3845fdef) uint8_t fFormatVersion[4]; // Data Format. Same as the value in struct UDataInfo // if there is one associated with this data. - int32_t fLength; // Total lenght in bytes of this spoof data, + int32_t fLength; // Total length in bytes of this spoof data, // including all sections, not just the header. // The following four sections refer to data representing the confusable data diff --git a/contrib/libs/icu/i18n/usrchimp.h b/contrib/libs/icu/i18n/usrchimp.h index 88b2e217db..13d825f73b 100644 --- a/contrib/libs/icu/i18n/usrchimp.h +++ b/contrib/libs/icu/i18n/usrchimp.h @@ -43,7 +43,7 @@ #define isContinuation(CE) (((CE) & UCOL_CONTINUATION_MARKER) == UCOL_CONTINUATION_MARKER) /** - * This indicates an error has occured during processing or there are no more CEs + * This indicates an error has occurred during processing or there are no more CEs * to be returned. */ #define UCOL_PROCESSED_NULLORDER ((int64_t)U_INT64_MAX) @@ -101,7 +101,7 @@ public: * @param ixHigh a pointer to an int32_t to receive the iterator index after fetching the CE. * @param status A pointer to an UErrorCode to receive any errors. * @return The next collation elements ordering, otherwise returns UCOL_PROCESSED_NULLORDER - * if an error has occured or if the end of string has been reached + * if an error has occurred or if the end of string has been reached */ int64_t nextProcessed(int32_t *ixLow, int32_t *ixHigh, UErrorCode *status); /** @@ -110,11 +110,11 @@ public: * * @param ixLow A pointer to an int32_t to receive the iterator index after fetching the CE * @param ixHigh A pointer to an int32_t to receiver the iterator index before fetching the CE - * @param status A pointer to an UErrorCode to receive any errors. Noteably + * @param status A pointer to an UErrorCode to receive any errors. Notably * a U_BUFFER_OVERFLOW_ERROR is returned if the internal stack * buffer has been exhausted. * @return The previous collation elements ordering, otherwise returns - * UCOL_PROCESSED_NULLORDER if an error has occured or if the start of + * UCOL_PROCESSED_NULLORDER if an error has occurred or if the start of * string has been reached. */ int64_t previousProcessed(int32_t *ixLow, int32_t *ixHigh, UErrorCode *status); @@ -127,7 +127,6 @@ private: U_NAMESPACE_END #define INITIAL_ARRAY_SIZE_ 256 -#define MAX_TABLE_SIZE_ 257 struct USearch { // required since collation element iterator does not have a getText API @@ -136,8 +135,8 @@ struct USearch { UBool isOverlap; UBool isCanonicalMatch; int16_t elementComparisonType; - UBreakIterator *internalBreakIter; //internal character breakiterator - UBreakIterator *breakIter; + UBreakIterator *internalBreakIter; // internal character breakiterator, lazily created. + UBreakIterator *breakIter; // caller provided character breakiterator // value USEARCH_DONE is the default value // if we are not at the start of the text or the end of the text, // depending on the iteration direction and matchedIndex is USEARCH_DONE @@ -160,9 +159,6 @@ struct UPattern { int64_t pcesBuffer[INITIAL_ARRAY_SIZE_]; UBool hasPrefixAccents; UBool hasSuffixAccents; - int16_t defaultShiftSize; - int16_t shift[MAX_TABLE_SIZE_]; - int16_t backShift[MAX_TABLE_SIZE_]; }; struct UStringSearch { @@ -182,8 +178,6 @@ struct UStringSearch { uint32_t ceMask; uint32_t variableTop; UBool toShift; - UChar canonicalPrefixAccents[INITIAL_ARRAY_SIZE_]; - UChar canonicalSuffixAccents[INITIAL_ARRAY_SIZE_]; }; /** @@ -206,7 +200,7 @@ struct UStringSearch { * the text "\u00e6" * @param strsrch string search data * @param status error status if any -* @return TRUE if an exact match is found, FALSE otherwise +* @return true if an exact match is found, false otherwise */ U_CFUNC UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status); @@ -217,7 +211,7 @@ UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status); * of beginning and ending accents if it overlaps that region. * @param strsrch string search data * @param status error status if any -* @return TRUE if a canonical match is found, FALSE otherwise +* @return true if a canonical match is found, false otherwise */ U_CFUNC UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status); @@ -227,7 +221,7 @@ UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status); * Comments follows from handleNextExact * @param strsrch string search data * @param status error status if any -* @return True if a exact math is found, FALSE otherwise. +* @return True if a exact math is found, false otherwise. */ U_CFUNC UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status); @@ -238,7 +232,7 @@ UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status); * of beginning and ending accents if it overlaps that region. * @param strsrch string search data * @param status error status if any -* @return TRUE if a canonical match is found, FALSE otherwise +* @return true if a canonical match is found, false otherwise */ U_CFUNC UBool usearch_handlePreviousCanonical(UStringSearch *strsrch, diff --git a/contrib/libs/icu/i18n/utf16collationiterator.cpp b/contrib/libs/icu/i18n/utf16collationiterator.cpp index 7598b0ee52..f1bdfabe73 100644 --- a/contrib/libs/icu/i18n/utf16collationiterator.cpp +++ b/contrib/libs/icu/i18n/utf16collationiterator.cpp @@ -37,9 +37,9 @@ UTF16CollationIterator::UTF16CollationIterator(const UTF16CollationIterator &oth UTF16CollationIterator::~UTF16CollationIterator() {} -UBool +bool UTF16CollationIterator::operator==(const CollationIterator &other) const { - if(!CollationIterator::operator==(other)) { return FALSE; } + if(!CollationIterator::operator==(other)) { return false; } const UTF16CollationIterator &o = static_cast<const UTF16CollationIterator &>(other); // Compare the iterator state but not the text: Assume that the caller does that. return (pos - start) == (o.pos - o.start); @@ -171,14 +171,14 @@ FCDUTF16CollationIterator::FCDUTF16CollationIterator(const FCDUTF16CollationIter FCDUTF16CollationIterator::~FCDUTF16CollationIterator() {} -UBool +bool FCDUTF16CollationIterator::operator==(const CollationIterator &other) const { // Skip the UTF16CollationIterator and call its parent. - if(!CollationIterator::operator==(other)) { return FALSE; } + if(!CollationIterator::operator==(other)) { return false; } const FCDUTF16CollationIterator &o = static_cast<const FCDUTF16CollationIterator &>(other); // Compare the iterator state but not the text: Assume that the caller does that. - if(checkDir != o.checkDir) { return FALSE; } - if(checkDir == 0 && (start == segmentStart) != (o.start == o.segmentStart)) { return FALSE; } + if(checkDir != o.checkDir) { return false; } + if(checkDir == 0 && (start == segmentStart) != (o.start == o.segmentStart)) { return false; } if(checkDir != 0 || start == segmentStart) { return (pos - rawStart) == (o.pos - o.rawStart); } else { diff --git a/contrib/libs/icu/i18n/utf16collationiterator.h b/contrib/libs/icu/i18n/utf16collationiterator.h index fd3a05e9ef..6db70511b0 100644 --- a/contrib/libs/icu/i18n/utf16collationiterator.h +++ b/contrib/libs/icu/i18n/utf16collationiterator.h @@ -42,11 +42,11 @@ public: virtual ~UTF16CollationIterator(); - virtual UBool operator==(const CollationIterator &other) const; + virtual bool operator==(const CollationIterator &other) const override; - virtual void resetToOffset(int32_t newOffset); + virtual void resetToOffset(int32_t newOffset) override; - virtual int32_t getOffset() const; + virtual int32_t getOffset() const override; void setText(const UChar *s, const UChar *lim) { reset(); @@ -54,9 +54,9 @@ public: limit = lim; } - virtual UChar32 nextCodePoint(UErrorCode &errorCode); + virtual UChar32 nextCodePoint(UErrorCode &errorCode) override; - virtual UChar32 previousCodePoint(UErrorCode &errorCode); + virtual UChar32 previousCodePoint(UErrorCode &errorCode) override; protected: // Copy constructor only for subclasses which set the pointers. @@ -64,15 +64,15 @@ protected: : CollationIterator(other), start(NULL), pos(NULL), limit(NULL) {} - virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode); + virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode) override; - virtual UChar handleGetTrailSurrogate(); + virtual UChar handleGetTrailSurrogate() override; - virtual UBool foundNULTerminator(); + virtual UBool foundNULTerminator() override; - virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode); + virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode) override; - virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode); + virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode) override; // UTF-16 string pointers. // limit can be NULL for NUL-terminated strings. @@ -95,24 +95,24 @@ public: virtual ~FCDUTF16CollationIterator(); - virtual UBool operator==(const CollationIterator &other) const; + virtual bool operator==(const CollationIterator &other) const override; - virtual void resetToOffset(int32_t newOffset); + virtual void resetToOffset(int32_t newOffset) override; - virtual int32_t getOffset() const; + virtual int32_t getOffset() const override; - virtual UChar32 nextCodePoint(UErrorCode &errorCode); + virtual UChar32 nextCodePoint(UErrorCode &errorCode) override; - virtual UChar32 previousCodePoint(UErrorCode &errorCode); + virtual UChar32 previousCodePoint(UErrorCode &errorCode) override; protected: - virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode); + virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode) override; - virtual UBool foundNULTerminator(); + virtual UBool foundNULTerminator() override; - virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode); + virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode) override; - virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode); + virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode) override; private: /** @@ -125,7 +125,7 @@ private: /** * Extend the FCD text segment forward or normalize around pos. * To be called when checkDir > 0 && pos != limit. - * @return TRUE if success, checkDir == 0 and pos != limit + * @return true if success, checkDir == 0 and pos != limit */ UBool nextSegment(UErrorCode &errorCode); @@ -139,7 +139,7 @@ private: /** * Extend the FCD text segment backward or normalize around pos. * To be called when checkDir < 0 && pos != start. - * @return TRUE if success, checkDir == 0 and pos != start + * @return true if success, checkDir == 0 and pos != start */ UBool previousSegment(UErrorCode &errorCode); diff --git a/contrib/libs/icu/i18n/utf8collationiterator.h b/contrib/libs/icu/i18n/utf8collationiterator.h index 9a3ec45aeb..09cfce4369 100644 --- a/contrib/libs/icu/i18n/utf8collationiterator.h +++ b/contrib/libs/icu/i18n/utf8collationiterator.h @@ -40,13 +40,13 @@ public: virtual ~UTF8CollationIterator(); - virtual void resetToOffset(int32_t newOffset); + virtual void resetToOffset(int32_t newOffset) override; - virtual int32_t getOffset() const; + virtual int32_t getOffset() const override; - virtual UChar32 nextCodePoint(UErrorCode &errorCode); + virtual UChar32 nextCodePoint(UErrorCode &errorCode) override; - virtual UChar32 previousCodePoint(UErrorCode &errorCode); + virtual UChar32 previousCodePoint(UErrorCode &errorCode) override; protected: /** @@ -54,20 +54,20 @@ protected: * together with a bogus code point. The caller will ignore that code point. * * Special values may be returned for surrogate code points, which are also illegal in UTF-8, - * but the caller will treat them like U+FFFD because forbidSurrogateCodePoints() returns TRUE. + * but the caller will treat them like U+FFFD because forbidSurrogateCodePoints() returns true. * * Valid lead surrogates are returned from inside a normalized text segment, * where handleGetTrailSurrogate() will return the matching trail surrogate. */ - virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode); + virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode) override; - virtual UBool foundNULTerminator(); + virtual UBool foundNULTerminator() override; - virtual UBool forbidSurrogateCodePoints() const; + virtual UBool forbidSurrogateCodePoints() const override; - virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode); + virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode) override; - virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode); + virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode) override; const uint8_t *u8; int32_t pos; @@ -87,24 +87,24 @@ public: virtual ~FCDUTF8CollationIterator(); - virtual void resetToOffset(int32_t newOffset); + virtual void resetToOffset(int32_t newOffset) override; - virtual int32_t getOffset() const; + virtual int32_t getOffset() const override; - virtual UChar32 nextCodePoint(UErrorCode &errorCode); + virtual UChar32 nextCodePoint(UErrorCode &errorCode) override; - virtual UChar32 previousCodePoint(UErrorCode &errorCode); + virtual UChar32 previousCodePoint(UErrorCode &errorCode) override; protected: - virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode); + virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode) override; - virtual UChar handleGetTrailSurrogate(); + virtual UChar handleGetTrailSurrogate() override; - virtual UBool foundNULTerminator(); + virtual UBool foundNULTerminator() override; - virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode); + virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode) override; - virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode); + virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode) override; private: UBool nextHasLccc() const; @@ -117,7 +117,7 @@ private: /** * Extends the FCD text segment forward or normalizes around pos. - * @return TRUE if success + * @return true if success */ UBool nextSegment(UErrorCode &errorCode); @@ -128,7 +128,7 @@ private: /** * Extends the FCD text segment backward or normalizes around pos. - * @return TRUE if success + * @return true if success */ UBool previousSegment(UErrorCode &errorCode); diff --git a/contrib/libs/icu/i18n/utrans.cpp b/contrib/libs/icu/i18n/utrans.cpp index 29013ead12..1cbba81d65 100644 --- a/contrib/libs/icu/i18n/utrans.cpp +++ b/contrib/libs/icu/i18n/utrans.cpp @@ -52,13 +52,13 @@ public: virtual void handleReplaceBetween(int32_t start, int32_t limit, - const UnicodeString& text); + const UnicodeString& text) override; virtual void extractBetween(int32_t start, int32_t limit, - UnicodeString& target) const; + UnicodeString& target) const override; - virtual void copy(int32_t start, int32_t limit, int32_t dest); + virtual void copy(int32_t start, int32_t limit, int32_t dest) override; // virtual Replaceable *clone() const { return NULL; } same as default @@ -67,7 +67,7 @@ public: * * @draft ICU 2.2 */ - virtual UClassID getDynamicClassID() const; + virtual UClassID getDynamicClassID() const override; /** * ICU "poor man's RTTI", returns a UClassID for this class. @@ -78,11 +78,11 @@ public: protected: - virtual int32_t getLength() const; + virtual int32_t getLength() const override; - virtual UChar getCharAt(int32_t offset) const; + virtual UChar getCharAt(int32_t offset) const override; - virtual UChar32 getChar32At(int32_t offset) const; + virtual UChar32 getChar32At(int32_t offset) const override; }; UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ReplaceableGlue) diff --git a/contrib/libs/icu/i18n/vtzone.cpp b/contrib/libs/icu/i18n/vtzone.cpp index 36e5834523..9111e08848 100644 --- a/contrib/libs/icu/i18n/vtzone.cpp +++ b/contrib/libs/icu/i18n/vtzone.cpp @@ -240,7 +240,7 @@ static UDate parseDateTimeString(const UnicodeString& str, int32_t offset, UErro break; } if (str.charAt(8) != 0x0054) { - // charcter "T" must be used for separating date and time + // character "T" must be used for separating date and time break; } if (length == 16) { @@ -376,7 +376,7 @@ static void getDefaultTZName(const UnicodeString &tzid, UBool isDST, UnicodeStri * dow day of week in BYDAY, or 0 when not found * wim day of week ordinal number in BYDAY, or 0 when not found * dom an array of day of month - * domCount number of availble days in dom (domCount is specifying the size of dom on input) + * domCount number of available days in dom (domCount is specifying the size of dom on input) * until time defined by UNTIL attribute or MIN_MILLIS if not available */ static void parseRRULE(const UnicodeString& rrule, int32_t& month, int32_t& dow, int32_t& wim, @@ -479,7 +479,7 @@ static void parseRRULE(const UnicodeString& rrule, int32_t& month, int32_t& dow, goto rruleParseError; } } else if (attr.compare(ICAL_BYMONTHDAY, -1) == 0) { - // Note: BYMONTHDAY may contain multiple days delimitted by comma + // Note: BYMONTHDAY may contain multiple days delimited by comma // // A value of BYMONTHDAY could be negative, for example, -1 means // the last day in a month @@ -663,7 +663,7 @@ static TimeZoneRule* createRuleByRRULE(const UnicodeString& zonename, int rawOff goto unsupportedRRule; } } - // If ealier month, go through days to find the earliest day + // If earlier month, go through days to find the earliest day if (tmp_month == earliestMonth) { for (j = 0; j < tmp_daysCount; j++) { tmp_days[j] = tmp_days[j] > 0 ? tmp_days[j] : MONTHLENGTH[tmp_month] + tmp_days[j] + 1; @@ -881,7 +881,7 @@ static DateTimeRule *toWallTimeRule(const DateTimeRule *rule, int32_t rawOffset, } /* - * Minumum implementations of stream writer/reader, writing/reading + * Minimum implementations of stream writer/reader, writing/reading * UnicodeString. For now, we do not want to introduce the dependency * on the ICU I/O stream in this module. But we want to keep the code * equivalent to the ICU4J implementation, which utilizes java.io.Writer/ @@ -983,7 +983,7 @@ VTimeZone::VTimeZone(const VTimeZone& source) if (U_SUCCESS(status)) { for (int32_t i = 0; i < size; i++) { UnicodeString *line = (UnicodeString*)source.vtzlines->elementAt(i); - vtzlines->addElement(line->clone(), status); + vtzlines->addElementX(line->clone(), status); if (U_FAILURE(status)) { break; } @@ -1028,7 +1028,7 @@ VTimeZone::operator=(const VTimeZone& right) { if (vtzlines != nullptr && U_SUCCESS(status)) { for (int32_t i = 0; i < size; i++) { UnicodeString *line = (UnicodeString*)right.vtzlines->elementAt(i); - vtzlines->addElement(line->clone(), status); + vtzlines->addElementX(line->clone(), status); if (U_FAILURE(status)) { break; } @@ -1047,13 +1047,13 @@ VTimeZone::operator=(const VTimeZone& right) { return *this; } -UBool +bool VTimeZone::operator==(const TimeZone& that) const { if (this == &that) { - return TRUE; + return true; } if (typeid(*this) != typeid(that) || !BasicTimeZone::operator==(that)) { - return FALSE; + return false; } VTimeZone *vtz = (VTimeZone*)&that; if (*tz == *(vtz->tz) @@ -1061,17 +1061,15 @@ VTimeZone::operator==(const TimeZone& that) const { && lastmod == vtz->lastmod /* && olsonzid = that.olsonzid */ /* && icutzver = that.icutzver */) { - return TRUE; + return true; } - return FALSE; + return false; } -#if __cplusplus > 201703L -UBool +bool VTimeZone::operator!=(const TimeZone& that) const { return !operator==(that); } -#endif VTimeZone* VTimeZone::createVTimeZoneByID(const UnicodeString& ID) { @@ -1219,6 +1217,12 @@ VTimeZone::getOffset(UDate date, UBool local, int32_t& rawOffset, return tz->getOffset(date, local, rawOffset, dstOffset, status); } +void VTimeZone::getOffsetFromLocal(UDate date, UTimeZoneLocalOption nonExistingTimeOpt, + UTimeZoneLocalOption duplicatedTimeOpt, + int32_t& rawOffset, int32_t& dstOffset, UErrorCode& status) const { + tz->getOffsetFromLocal(date, nonExistingTimeOpt, duplicatedTimeOpt, rawOffset, dstOffset, status); +} + void VTimeZone::setRawOffset(int32_t offsetMillis) { tz->setRawOffset(offsetMillis); @@ -1289,7 +1293,7 @@ VTimeZone::load(VTZReader& reader, UErrorCode& status) { if (U_FAILURE(status)) { goto cleanupVtzlines; } - vtzlines->addElement(element.getAlias(), status); + vtzlines->addElementX(element.getAlias(), status); if (U_FAILURE(status)) { goto cleanupVtzlines; } @@ -1311,7 +1315,7 @@ VTimeZone::load(VTZReader& reader, UErrorCode& status) { if (U_FAILURE(status)) { goto cleanupVtzlines; } - vtzlines->addElement(element.getAlias(), status); + vtzlines->addElementX(element.getAlias(), status); if (U_FAILURE(status)) { goto cleanupVtzlines; } @@ -1334,7 +1338,7 @@ VTimeZone::load(VTZReader& reader, UErrorCode& status) { if (U_FAILURE(status)) { goto cleanupVtzlines; } - vtzlines->addElement(element.getAlias(), status); + vtzlines->addElementX(element.getAlias(), status); if (U_FAILURE(status)) { goto cleanupVtzlines; } @@ -1348,7 +1352,7 @@ VTimeZone::load(VTZReader& reader, UErrorCode& status) { if (U_FAILURE(status)) { goto cleanupVtzlines; } - vtzlines->addElement(element.getAlias(), status); + vtzlines->addElementX(element.getAlias(), status); if (U_FAILURE(status)) { goto cleanupVtzlines; } @@ -1523,7 +1527,7 @@ VTimeZone::parse(UErrorCode& status) { if (dstr == nullptr) { status = U_MEMORY_ALLOCATION_ERROR; } else { - dates->addElement(dstr, status); + dates->addElementX(dstr, status); } if (U_FAILURE(status)) { goto cleanupParse; @@ -1540,7 +1544,7 @@ VTimeZone::parse(UErrorCode& status) { if (U_FAILURE(status)) { goto cleanupParse; } - dates->addElement(element.getAlias(), status); + dates->addElementX(element.getAlias(), status); if (U_FAILURE(status)) { goto cleanupParse; } @@ -1605,7 +1609,7 @@ VTimeZone::parse(UErrorCode& status) { if (startAvail && actualStart < firstStart) { // save from offset information for the earliest rule firstStart = actualStart; - // If this is STD, assume the time before this transtion + // If this is STD, assume the time before this transition // is DST when the difference is 1 hour. This might not be // accurate, but VTIMEZONE data does not have such info. if (dstSavings > 0) { @@ -1622,7 +1626,7 @@ VTimeZone::parse(UErrorCode& status) { } } } - rules->addElement(rule, status); + rules->addElementX(rule, status); if (U_FAILURE(status)) { goto cleanupParse; } @@ -1728,7 +1732,7 @@ VTimeZone::parse(UErrorCode& status) { goto cleanupParse; } rules->removeElementAt(finalRuleIdx); - rules->addElement(newRule, status); + rules->addElementX(newRule, status); if (U_FAILURE(status)) { delete newRule; goto cleanupParse; @@ -1805,7 +1809,7 @@ VTimeZone::write(VTZWriter& writer, UErrorCode& status) const { icutzprop.append(u'['); icutzprop.append(icutzver); icutzprop.append(u']'); - customProps.addElement(&icutzprop, status); + customProps.addElementX(&icutzprop, status); } writeZone(writer, *tz, &customProps, status); } @@ -1858,7 +1862,7 @@ VTimeZone::write(UDate start, VTZWriter& writer, UErrorCode& status) const { icutzprop->append(ICU_TZINFO_PARTIAL, -1); appendMillis(start, *icutzprop); icutzprop->append((UChar)0x005D/*']'*/); - customProps.addElement(icutzprop, status); + customProps.addElementX(icutzprop, status); if (U_FAILURE(status)) { delete icutzprop; goto cleanupWritePartial; @@ -1872,10 +1876,7 @@ cleanupWritePartial: delete initial; } if (transitionRules != nullptr) { - while (!transitionRules->isEmpty()) { - TimeZoneRule *tr = (TimeZoneRule*)transitionRules->orphanElementAt(0); - delete tr; - } + U_ASSERT(transitionRules->hasDeleter()); delete transitionRules; } } @@ -1893,23 +1894,25 @@ VTimeZone::writeSimple(UDate time, VTZWriter& writer, UErrorCode& status) const InitialTimeZoneRule *initial = nullptr; AnnualTimeZoneRule *std = nullptr, *dst = nullptr; getSimpleRulesNear(time, initial, std, dst, status); + LocalPointer<InitialTimeZoneRule> lpInitial(initial); + LocalPointer<AnnualTimeZoneRule> lpStd(std); + LocalPointer<AnnualTimeZoneRule> lpDst(dst); if (U_SUCCESS(status)) { // Create a RuleBasedTimeZone with the subset rule getID(tzid); - RuleBasedTimeZone rbtz(tzid, initial); - if (std != nullptr && dst != nullptr) { - rbtz.addTransitionRule(std, status); - rbtz.addTransitionRule(dst, status); + RuleBasedTimeZone rbtz(tzid, lpInitial.orphan()); + if (lpStd.isValid() && lpDst.isValid()) { + rbtz.addTransitionRule(lpStd.orphan(), status); + rbtz.addTransitionRule(lpDst.orphan(), status); } if (U_FAILURE(status)) { - goto cleanupWriteSimple; + return; } if (olsonzid.length() > 0 && icutzver.length() > 0) { - UnicodeString *icutzprop = new UnicodeString(ICU_TZINFO_PROP); - if (icutzprop == nullptr) { - status = U_MEMORY_ALLOCATION_ERROR; - goto cleanupWriteSimple; + LocalPointer<UnicodeString> icutzprop(new UnicodeString(ICU_TZINFO_PROP), status); + if (U_FAILURE(status)) { + return; } icutzprop->append(olsonzid); icutzprop->append((UChar)0x005B/*'['*/); @@ -1917,26 +1920,10 @@ VTimeZone::writeSimple(UDate time, VTZWriter& writer, UErrorCode& status) const icutzprop->append(ICU_TZINFO_SIMPLE, -1); appendMillis(time, *icutzprop); icutzprop->append((UChar)0x005D/*']'*/); - customProps.addElement(icutzprop, status); - if (U_FAILURE(status)) { - delete icutzprop; - goto cleanupWriteSimple; - } + customProps.adoptElement(icutzprop.orphan(), status); } writeZone(writer, rbtz, &customProps, status); } - return; - -cleanupWriteSimple: - if (initial != nullptr) { - delete initial; - } - if (std != nullptr) { - delete std; - } - if (dst != nullptr) { - delete dst; - } } void @@ -2517,7 +2504,7 @@ VTimeZone::writeZonePropsByDOW_LEQ_DOM(VTZWriter& writer, UBool isDst, const Uni writeZonePropsByDOW(writer, isDst, zonename, fromOffset, toOffset, month, -1*((MONTHLENGTH[month] - dayOfMonth)/7 + 1), dayOfWeek, startTime, untilTime, status); } else if (month == UCAL_FEBRUARY && dayOfMonth == 29) { - // Specical case for February + // Special case for February writeZonePropsByDOW(writer, isDst, zonename, fromOffset, toOffset, UCAL_FEBRUARY, -1, dayOfWeek, startTime, untilTime, status); } else { @@ -2651,7 +2638,7 @@ VTimeZone::endZoneProps(VTZWriter& writer, UBool isDst, UErrorCode& status) cons } /* - * Write the beggining part of RRULE line + * Write the beginning part of RRULE line */ void VTimeZone::beginRRULE(VTZWriter& writer, int32_t month, UErrorCode& status) const { diff --git a/contrib/libs/icu/i18n/vzone.h b/contrib/libs/icu/i18n/vzone.h index 17df92e17b..d1b6d8d820 100644 --- a/contrib/libs/icu/i18n/vzone.h +++ b/contrib/libs/icu/i18n/vzone.h @@ -91,7 +91,7 @@ vzone_equals(const VZone* zone1, const VZone* zone2); * @param zone, the vzone to use * @param url Receives the RFC2445 TZURL property value. * @param urlLength, length of the url - * @return TRUE if TZURL attribute is available and value is set. + * @return true if TZURL attribute is available and value is set. */ U_CAPI UBool U_EXPORT2 vzone_getTZURL(VZone* zone, UChar* & url, int32_t & urlLength); @@ -112,7 +112,7 @@ vzone_setTZURL(VZone* zone, UChar* url, int32_t urlLength); * is not set. * @param zone, the vzone to use * @param lastModified Receives the last modified date. - * @return TRUE if lastModified attribute is available and value is set. + * @return true if lastModified attribute is available and value is set. */ U_CAPI UBool U_EXPORT2 vzone_getLastModified(VZone* zone, UDate& lastModified); @@ -136,7 +136,7 @@ U_CAPI void U_EXPORT2 vzone_write(VZone* zone, UChar* & result, int32_t & resultLength, UErrorCode& status); /** - * Writes RFC2445 VTIMEZONE data for this time zone applicalbe + * Writes RFC2445 VTIMEZONE data for this time zone applicable * for dates after the specified start time. * @param zone, the vzone to use * @param start The start date. @@ -148,7 +148,7 @@ U_CAPI void U_EXPORT2 vzone_writeFromStart(VZone* zone, UDate start, UChar* & result, int32_t & resultLength, UErrorCode& status); /** - * Writes RFC2445 VTIMEZONE data applicalbe for the specified date. + * Writes RFC2445 VTIMEZONE data applicable for the specified date. * Some common iCalendar implementations can only handle a single time * zone property or a pair of standard and daylight time properties using * BYDAY rule with day of week (such as BYDAY=1SUN). This method produce @@ -283,7 +283,7 @@ vzone_useDaylightTime(VZone* zone); * @return true if the given date is in daylight savings time, * false, otherwise. */ -U_INTERNAL UBool U_EXPORT2 +U_CAPI UBool U_EXPORT2 vzone_inDaylightTime(VZone* zone, UDate date, UErrorCode& status); /** @@ -303,7 +303,7 @@ vzone_hasSameRules(VZone* zone, const VZone* other); * @param base The base time. * @param inclusive Whether the base time is inclusive or not. * @param result Receives the first transition after the base time. - * @return TRUE if the transition is found. + * @return true if the transition is found. */ U_CAPI UBool U_EXPORT2 vzone_getNextTransition(VZone* zone, UDate base, UBool inclusive, ZTrans* result); @@ -314,7 +314,7 @@ vzone_getNextTransition(VZone* zone, UDate base, UBool inclusive, ZTrans* result * @param base The base time. * @param inclusive Whether the base time is inclusive or not. * @param result Receives the most recent transition before the base time. - * @return TRUE if the transition is found. + * @return true if the transition is found. */ U_CAPI UBool U_EXPORT2 vzone_getPreviousTransition(VZone* zone, UDate base, UBool inclusive, ZTrans* result); diff --git a/contrib/libs/icu/i18n/windtfmt.cpp b/contrib/libs/icu/i18n/windtfmt.cpp index bcf272bc61..f6a990ea29 100644 --- a/contrib/libs/icu/i18n/windtfmt.cpp +++ b/contrib/libs/icu/i18n/windtfmt.cpp @@ -193,6 +193,7 @@ Win32DateFormat::~Win32DateFormat() Win32DateFormat &Win32DateFormat::operator=(const Win32DateFormat &other) { + if (this == &other) { return *this; } // self-assignment: no-op // The following handles fCalendar DateFormat::operator=(other); diff --git a/contrib/libs/icu/i18n/winnmfmt.cpp b/contrib/libs/icu/i18n/winnmfmt.cpp index 72da1be28b..8b2a9a4f95 100644 --- a/contrib/libs/icu/i18n/winnmfmt.cpp +++ b/contrib/libs/icu/i18n/winnmfmt.cpp @@ -268,6 +268,7 @@ Win32NumberFormat::~Win32NumberFormat() Win32NumberFormat &Win32NumberFormat::operator=(const Win32NumberFormat &other) { + if (this == &other) { return *this; } // self-assignment: no-op NumberFormat::operator=(other); this->fCurrency = other.fCurrency; diff --git a/contrib/libs/icu/i18n/wintzimpl.cpp b/contrib/libs/icu/i18n/wintzimpl.cpp index 433ed4c293..a6d9330063 100644 --- a/contrib/libs/icu/i18n/wintzimpl.cpp +++ b/contrib/libs/icu/i18n/wintzimpl.cpp @@ -145,7 +145,7 @@ static UBool getWindowsTimeZoneInfo(TIME_ZONE_INFORMATION *zoneInfo, const UChar } /* - * Given the timezone icuid, fill in zoneInfo by calling auxillary functions that creates a timezone and extract the + * Given the timezone icuid, fill in zoneInfo by calling auxiliary functions that creates a timezone and extract the * information to put into zoneInfo. This includes bias and standard time date and daylight saving date. */ U_CAPI UBool U_EXPORT2 diff --git a/contrib/libs/icu/i18n/zonemeta.cpp b/contrib/libs/icu/i18n/zonemeta.cpp index 72c590f424..b8afa4760f 100644 --- a/contrib/libs/icu/i18n/zonemeta.cpp +++ b/contrib/libs/icu/i18n/zonemeta.cpp @@ -477,11 +477,11 @@ ZoneMeta::getCanonicalCountry(const UnicodeString &tzid, UnicodeString &country, UErrorCode ec = U_ZERO_ERROR; if (singleZone) { if (!gSingleZoneCountries->contains((void*)region)) { - gSingleZoneCountries->addElement((void*)region, ec); + gSingleZoneCountries->addElementX((void*)region, ec); } } else { if (!gMultiZonesCountries->contains((void*)region)) { - gMultiZonesCountries->addElement((void*)region, ec); + gMultiZonesCountries->addElementX((void*)region, ec); } } } @@ -696,7 +696,7 @@ ZoneMeta::createMetazoneMappings(const UnicodeString &tzid) { } } - mzMappings->addElement(entry, status); + mzMappings->addElementX(entry, status); if (U_FAILURE(status)) { break; } @@ -801,7 +801,7 @@ static void U_CALLCONV initAvailableMetaZoneIDs () { uMzID[len] = 0; UnicodeString *usMzID = new UnicodeString(uMzID); if (uhash_get(gMetaZoneIDTable, usMzID) == NULL) { - gMetaZoneIDs->addElement((void *)uMzID, status); + gMetaZoneIDs->addElementX((void *)uMzID, status); uhash_put(gMetaZoneIDTable, (void *)usMzID, (void *)uMzID, &status); } else { uprv_free(uMzID); diff --git a/contrib/libs/icu/i18n/zonemeta.h b/contrib/libs/icu/i18n/zonemeta.h index 9dbcc878a2..f21399342b 100644 --- a/contrib/libs/icu/i18n/zonemeta.h +++ b/contrib/libs/icu/i18n/zonemeta.h @@ -40,7 +40,7 @@ public: /** * Return the canonical id for this tzid defined by CLDR, which might be the id itself. - * This overload method returns a persistent const UChar*, which is guranteed to persist + * This overload method returns a persistent const UChar*, which is guaranteed to persist * (a pointer to a resource). If the given system tzid is not known, U_ILLEGAL_ARGUMENT_ERROR * is set in the status. * @param tzid Zone ID @@ -50,7 +50,7 @@ public: static const UChar* U_EXPORT2 getCanonicalCLDRID(const UnicodeString &tzid, UErrorCode& status); /* - * Conveninent method returning CLDR canonical ID for the given time zone + * Convenient method returning CLDR canonical ID for the given time zone */ static const UChar* U_EXPORT2 getCanonicalCLDRID(const TimeZone& tz); @@ -59,7 +59,7 @@ public: * is not associated with a country, return bogus string. * @param tzid Zone ID * @param country [output] Country code - * @param isPrimary [output] TRUE if the zone is the primary zone for the country + * @param isPrimary [output] true if the zone is the primary zone for the country * @return A reference to the result country */ static UnicodeString& U_EXPORT2 getCanonicalCountry(const UnicodeString &tzid, UnicodeString &country, UBool *isPrimary = NULL); |