contrib/libs/icu/i18n/number_patternstring.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339

// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

#include "unicode/utypes.h"

#if !UCONFIG_NO_FORMATTING
#ifndef __NUMBER_PATTERNSTRING_H__
#define __NUMBER_PATTERNSTRING_H__


#include <cstdint>
#include "unicode/unum.h"
#include "unicode/unistr.h"
#include "number_types.h"
#include "number_decimalquantity.h"
#include "number_decimfmtprops.h"
#include "number_affixutils.h"

U_NAMESPACE_BEGIN
namespace number::impl {

// Forward declaration
class PatternParser;

// Note: the order of fields in this enum matters for parsing.
enum PatternSignType {
    /** Render using normal positive subpattern rules */
    PATTERN_SIGN_TYPE_POS,
    /** Render using rules to force the display of a plus sign */
    PATTERN_SIGN_TYPE_POS_SIGN,
    /** Render using negative subpattern rules */
    PATTERN_SIGN_TYPE_NEG,
    /** Count for looping over the possibilities */
    PATTERN_SIGN_TYPE_COUNT
};

// Exported as U_I18N_API because it is a public member field of exported ParsedSubpatternInfo
struct U_I18N_API Endpoints {
    int32_t start = 0;
    int32_t end = 0;
};

// Exported as U_I18N_API because it is a public member field of exported ParsedPatternInfo
struct U_I18N_API ParsedSubpatternInfo {
    uint64_t groupingSizes = 0x0000ffffffff0000L;
    int32_t integerLeadingHashSigns = 0;
    int32_t integerTrailingHashSigns = 0;
    int32_t integerNumerals = 0;
    int32_t integerAtSigns = 0;
    int32_t integerTotal = 0; // for convenience
    int32_t fractionNumerals = 0;
    int32_t fractionHashSigns = 0;
    int32_t fractionTotal = 0; // for convenience
    bool hasDecimal = false;
    int32_t widthExceptAffixes = 0;
    // Note: NullableValue causes issues here with std::move.
    bool hasPadding = false;
    UNumberFormatPadPosition paddingLocation = UNUM_PAD_BEFORE_PREFIX;
    DecimalQuantity rounding;
    bool exponentHasPlusSign = false;
    int32_t exponentZeros = 0;
    bool hasPercentSign = false;
    bool hasPerMilleSign = false;
    bool hasCurrencySign = false;
    bool hasCurrencyDecimal = false;
    bool hasMinusSign = false;
    bool hasPlusSign = false;

    Endpoints prefixEndpoints;
    Endpoints suffixEndpoints;
    Endpoints paddingEndpoints;
};

// Exported as U_I18N_API because it is needed for the unit test PatternStringTest
struct U_I18N_API ParsedPatternInfo : public AffixPatternProvider, public UMemory {
    UnicodeString pattern;
    ParsedSubpatternInfo positive;
    ParsedSubpatternInfo negative;

    ParsedPatternInfo()
            : state(this->pattern), currentSubpattern(nullptr) {}

    ~ParsedPatternInfo() override = default;

    // Need to declare this explicitly because of the destructor
    ParsedPatternInfo& operator=(ParsedPatternInfo&& src) noexcept = default;

    static int32_t getLengthFromEndpoints(const Endpoints& endpoints);

    char16_t charAt(int32_t flags, int32_t index) const override;

    int32_t length(int32_t flags) const override;

    UnicodeString getString(int32_t flags) const override;

    bool positiveHasPlusSign() const override;

    bool hasNegativeSubpattern() const override;

    bool negativeHasMinusSign() const override;

    bool hasCurrencySign() const override;

    bool containsSymbolType(AffixPatternType type, UErrorCode& status) const override;

    bool hasBody() const override;

    bool currencyAsDecimal() const override;

  private:
    struct U_I18N_API ParserState {
        const UnicodeString& pattern; // reference to the parent
        int32_t offset = 0;

        explicit ParserState(const UnicodeString& _pattern)
                : pattern(_pattern) {}

        ParserState& operator=(ParserState&& src) noexcept {
            // Leave pattern reference alone; it will continue to point to the same place in memory,
            // which gets overwritten by ParsedPatternInfo's implicit move assignment.
            offset = src.offset;
            return *this;
        }

        /** Returns the next code point, or -1 if string is too short. */
        UChar32 peek();

        /** Returns the code point after the next code point, or -1 if string is too short. */
        UChar32 peek2();

        /** Returns the next code point and then steps forward. */
        UChar32 next();

        // TODO: We don't currently do anything with the message string.
        // This method is here as a shell for Java compatibility.
        inline void toParseException(const char16_t* message) { (void) message; }
    } state;

    // NOTE: In Java, these are written as pure functions.
    // In C++, they're written as methods.
    // The behavior is the same.

    // Mutable transient pointer:
    ParsedSubpatternInfo* currentSubpattern;

    // In Java, "negative == null" tells us whether or not we had a negative subpattern.
    // In C++, we need to remember in another boolean.
    bool fHasNegativeSubpattern = false;

    const Endpoints& getEndpoints(int32_t flags) const;

    /** Run the recursive descent parser. */
    void consumePattern(const UnicodeString& patternString, UErrorCode& status);

    void consumeSubpattern(UErrorCode& status);

    void consumePadding(PadPosition paddingLocation, UErrorCode& status);

    void consumeAffix(Endpoints& endpoints, UErrorCode& status);

    void consumeLiteral(UErrorCode& status);

    void consumeFormat(UErrorCode& status);

    void consumeIntegerFormat(UErrorCode& status);

    void consumeFractionFormat(UErrorCode& status);

    void consumeExponent(UErrorCode& status);

    friend class PatternParser;
};

enum IgnoreRounding {
    IGNORE_ROUNDING_NEVER = 0, IGNORE_ROUNDING_IF_CURRENCY = 1, IGNORE_ROUNDING_ALWAYS = 2
};

class U_I18N_API PatternParser {
  public:
    /**
     * Runs the recursive descent parser on the given pattern string, returning a data structure with raw information
     * about the pattern string.
     *
     * <p>
     * To obtain a more useful form of the data, consider using {@link #parseToProperties} instead.
     *
     * TODO: Change argument type to const char16_t* instead of UnicodeString?
     *
     * @param patternString
     *            The LDML decimal format pattern (Excel-style pattern) to parse.
     * @return The results of the parse.
     */
    static void parseToPatternInfo(const UnicodeString& patternString, ParsedPatternInfo& patternInfo,
                                   UErrorCode& status);

    /**
     * Parses a pattern string into a new property bag.
     *
     * @param pattern
     *            The pattern string, like "#,##0.00"
     * @param ignoreRounding
     *            Whether to leave out rounding information (minFrac, maxFrac, and rounding increment) when parsing the
     *            pattern. This may be desirable if a custom rounding mode, such as CurrencyUsage, is to be used
     *            instead.
     * @return A property bag object.
     * @throws IllegalArgumentException
     *             If there is a syntax error in the pattern string.
     */
    static DecimalFormatProperties parseToProperties(const UnicodeString& pattern,
                                                     IgnoreRounding ignoreRounding, UErrorCode& status);

    static DecimalFormatProperties parseToProperties(const UnicodeString& pattern, UErrorCode& status);

    /**
     * Parses a pattern string into an existing property bag. All properties that can be encoded into a pattern string
     * will be overwritten with either their default value or with the value coming from the pattern string. Properties
     * that cannot be encoded into a pattern string, such as rounding mode, are not modified.
     *
     * @param pattern
     *            The pattern string, like "#,##0.00"
     * @param properties
     *            The property bag object to overwrite.
     * @param ignoreRounding
     *            See {@link #parseToProperties(String pattern, int ignoreRounding)}.
     * @throws IllegalArgumentException
     *             If there was a syntax error in the pattern string.
     */
    static void parseToExistingProperties(const UnicodeString& pattern,
                                          DecimalFormatProperties& properties,
                                          IgnoreRounding ignoreRounding, UErrorCode& status);

  private:
    static void parseToExistingPropertiesImpl(const UnicodeString& pattern,
                                              DecimalFormatProperties& properties,
                                              IgnoreRounding ignoreRounding, UErrorCode& status);

    /** Finalizes the temporary data stored in the ParsedPatternInfo to the Properties. */
    static void patternInfoToProperties(DecimalFormatProperties& properties,
                                        ParsedPatternInfo& patternInfo, IgnoreRounding _ignoreRounding,
                                        UErrorCode& status);
};

class U_I18N_API PatternStringUtils {
  public:
    /**
     * Determine whether a given roundingIncrement should be ignored for formatting
     * based on the current maxFrac value (maximum fraction digits). For example a
     * roundingIncrement of 0.01 should be ignored if maxFrac is 1, but not if maxFrac
     * is 2 or more. Note that roundingIncrements are rounded up in significance, so
     * a roundingIncrement of 0.006 is treated like 0.01 for this determination, i.e.
     * it should not be ignored if maxFrac is 2 or more (but a roundingIncrement of
     * 0.005 is treated like 0.001 for significance).
     *
     * This test is needed for both NumberPropertyMapper::oldToNew and 
     * PatternStringUtils::propertiesToPatternString. In Java it cannot be
     * exported by NumberPropertyMapper (package private) so it is in
     * PatternStringUtils, do the same in C.
     *
     * @param roundIncr
     *            The roundingIncrement to be checked. Must be non-zero.
     * @param maxFrac
     *            The current maximum fraction digits value.
     * @return true if roundIncr should be ignored for formatting.
     */
     static bool ignoreRoundingIncrement(double roundIncr, int32_t maxFrac);

    /**
     * Creates a pattern string from a property bag.
     *
     * <p>
     * Since pattern strings support only a subset of the functionality available in a property bag, a new property bag
     * created from the string returned by this function may not be the same as the original property bag.
     *
     * @param properties
     *            The property bag to serialize.
     * @return A pattern string approximately serializing the property bag.
     */
    static UnicodeString propertiesToPatternString(const DecimalFormatProperties& properties,
                                                   UErrorCode& status);


    /**
     * Converts a pattern between standard notation and localized notation. Localized notation means that instead of
     * using generic placeholders in the pattern, you use the corresponding locale-specific characters instead. For
     * example, in locale <em>fr-FR</em>, the period in the pattern "0.000" means "decimal" in standard notation (as it
     * does in every other locale), but it means "grouping" in localized notation.
     *
     * <p>
     * A greedy string-substitution strategy is used to substitute locale symbols. If two symbols are ambiguous or have
     * the same prefix, the result is not well-defined.
     *
     * <p>
     * Locale symbols are not allowed to contain the ASCII quote character.
     *
     * <p>
     * This method is provided for backwards compatibility and should not be used in any new code.
     *
     * TODO(C++): This method is not yet implemented.
     *
     * @param input
     *            The pattern to convert.
     * @param symbols
     *            The symbols corresponding to the localized pattern.
     * @param toLocalized
     *            true to convert from standard to localized notation; false to convert from localized to standard
     *            notation.
     * @return The pattern expressed in the other notation.
     */
    static UnicodeString convertLocalized(const UnicodeString& input, const DecimalFormatSymbols& symbols,
                                          bool toLocalized, UErrorCode& status);

    /**
     * This method contains the heart of the logic for rendering LDML affix strings. It handles
     * sign-always-shown resolution, whether to use the positive or negative subpattern, permille
     * substitution, and plural forms for CurrencyPluralInfo.
     */
    static void patternInfoToStringBuilder(const AffixPatternProvider& patternInfo, bool isPrefix,
                                           PatternSignType patternSignType,
                                           bool approximately,
                                           StandardPlural::Form plural,
                                           bool perMilleReplacesPercent,
                                           bool dropCurrencySymbols,
                                           UnicodeString& output);

    static PatternSignType resolveSignDisplay(UNumberSignDisplay signDisplay, Signum signum);

  private:
    /** @return The number of chars inserted. */
    static int escapePaddingString(UnicodeString input, UnicodeString& output, int startIndex,
                                   UErrorCode& status);
};

} // namespace number::impl
U_NAMESPACE_END


#endif //__NUMBER_PATTERNSTRING_H__

#endif /* #if !UCONFIG_NO_FORMATTING */