contrib/libs/icu/i18n/numparse_currency.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189

// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

#include "unicode/utypes.h"

#if !UCONFIG_NO_FORMATTING

// Allow implicit conversion from char16_t* to UnicodeString for this file:
// Helpful in toString methods and elsewhere.
#define UNISTR_FROM_STRING_EXPLICIT

#include "numparse_types.h"
#include "numparse_currency.h"
#include "ucurrimp.h"
#include "unicode/errorcode.h"
#include "numparse_utils.h"
#include "string_segment.h"

using namespace icu;
using namespace icu::numparse;
using namespace icu::numparse::impl;


CombinedCurrencyMatcher::CombinedCurrencyMatcher(const CurrencySymbols& currencySymbols, const DecimalFormatSymbols& dfs,
                                                 parse_flags_t parseFlags, UErrorCode& status)
        : fCurrency1(currencySymbols.getCurrencySymbol(status)),
          fCurrency2(currencySymbols.getIntlCurrencySymbol(status)),
          fUseFullCurrencyData(0 == (parseFlags & PARSE_FLAG_NO_FOREIGN_CURRENCY)),
          afterPrefixInsert(dfs.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, false, status)),
          beforeSuffixInsert(dfs.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, true, status)),
          fLocaleName(dfs.getLocale().getName(), -1, status) {
    utils::copyCurrencyCode(fCurrencyCode, currencySymbols.getIsoCode());

    // Pre-load the long names for the current locale and currency
    // if we are parsing without the full currency data.
    if (!fUseFullCurrencyData) {
        for (int32_t i=0; i<StandardPlural::COUNT; i++) {
            auto plural = static_cast<StandardPlural::Form>(i);
            fLocalLongNames[i] = currencySymbols.getPluralName(plural, status);
        }
    }

    // TODO: Figure out how to make this faster and re-enable.
    // Computing the "lead code points" set for fastpathing is too slow to use in production.
    // See https://unicode-org.atlassian.net/browse/ICU-13584
//    // Compute the full set of characters that could be the first in a currency to allow for
//    // efficient smoke test.
//    fLeadCodePoints.add(fCurrency1.char32At(0));
//    fLeadCodePoints.add(fCurrency2.char32At(0));
//    fLeadCodePoints.add(beforeSuffixInsert.char32At(0));
//    uprv_currencyLeads(fLocaleName.data(), fLeadCodePoints, status);
//    // Always apply case mapping closure for currencies
//    fLeadCodePoints.closeOver(USET_ADD_CASE_MAPPINGS);
//    fLeadCodePoints.freeze();
}

bool
CombinedCurrencyMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
    if (result.currencyCode[0] != 0) {
        return false;
    }

    // Try to match a currency spacing separator.
    int32_t initialOffset = segment.getOffset();
    bool maybeMore = false;
    if (result.seenNumber() && !beforeSuffixInsert.isEmpty()) {
        int32_t overlap = segment.getCommonPrefixLength(beforeSuffixInsert);
        if (overlap == beforeSuffixInsert.length()) {
            segment.adjustOffset(overlap);
            // Note: let currency spacing be a weak match. Don't update chars consumed.
        }
        maybeMore = maybeMore || overlap == segment.length();
    }

    // Match the currency string, and reset if we didn't find one.
    maybeMore = maybeMore || matchCurrency(segment, result, status);
    if (result.currencyCode[0] == 0) {
        segment.setOffset(initialOffset);
        return maybeMore;
    }

    // Try to match a currency spacing separator.
    if (!result.seenNumber() && !afterPrefixInsert.isEmpty()) {
        int32_t overlap = segment.getCommonPrefixLength(afterPrefixInsert);
        if (overlap == afterPrefixInsert.length()) {
            segment.adjustOffset(overlap);
            // Note: let currency spacing be a weak match. Don't update chars consumed.
        }
        maybeMore = maybeMore || overlap == segment.length();
    }

    return maybeMore;
}

bool CombinedCurrencyMatcher::matchCurrency(StringSegment& segment, ParsedNumber& result,
                                            UErrorCode& status) const {
    bool maybeMore = false;

    int32_t overlap1;
    if (!fCurrency1.isEmpty()) {
        overlap1 = segment.getCaseSensitivePrefixLength(fCurrency1);
    } else {
        overlap1 = -1;
    }
    maybeMore = maybeMore || overlap1 == segment.length();
    if (overlap1 == fCurrency1.length()) {
        utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
        segment.adjustOffset(overlap1);
        result.setCharsConsumed(segment);
        return maybeMore;
    }

    int32_t overlap2;
    if (!fCurrency2.isEmpty()) {
        // ISO codes should be accepted case-insensitive.
        // https://unicode-org.atlassian.net/browse/ICU-13696
        overlap2 = segment.getCommonPrefixLength(fCurrency2);
    } else {
        overlap2 = -1;
    }
    maybeMore = maybeMore || overlap2 == segment.length();
    if (overlap2 == fCurrency2.length()) {
        utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
        segment.adjustOffset(overlap2);
        result.setCharsConsumed(segment);
        return maybeMore;
    }

    if (fUseFullCurrencyData) {
        // Use the full currency data.
        // NOTE: This call site should be improved with #13584.
        const UnicodeString segmentString = segment.toTempUnicodeString();

        // Try to parse the currency
        ParsePosition ppos(0);
        int32_t partialMatchLen = 0;
        uprv_parseCurrency(
                fLocaleName.data(),
                segmentString,
                ppos,
                UCURR_SYMBOL_NAME, // checks for both UCURR_SYMBOL_NAME and UCURR_LONG_NAME
                &partialMatchLen,
                result.currencyCode,
                status);
        maybeMore = maybeMore || partialMatchLen == segment.length();

        if (U_SUCCESS(status) && ppos.getIndex() != 0) {
            // Complete match.
            // NOTE: The currency code should already be saved in the ParsedNumber.
            segment.adjustOffset(ppos.getIndex());
            result.setCharsConsumed(segment);
            return maybeMore;
        }

    } else {
        // Use the locale long names.
        int32_t longestFullMatch = 0;
        for (int32_t i=0; i<StandardPlural::COUNT; i++) {
            const UnicodeString& name = fLocalLongNames[i];
            int32_t overlap = segment.getCommonPrefixLength(name);
            if (overlap == name.length() && name.length() > longestFullMatch) {
                longestFullMatch = name.length();
            }
            maybeMore = maybeMore || overlap > 0;
        }
        if (longestFullMatch > 0) {
            utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
            segment.adjustOffset(longestFullMatch);
            result.setCharsConsumed(segment);
            return maybeMore;
        }
    }

    // No match found.
    return maybeMore;
}

bool CombinedCurrencyMatcher::smokeTest(const StringSegment&) const {
    // TODO: See constructor
    return true;
    //return segment.startsWith(fLeadCodePoints);
}

UnicodeString CombinedCurrencyMatcher::toString() const {
    return u"<CombinedCurrencyMatcher>";
}


#endif /* #if !UCONFIG_NO_FORMATTING */