aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/icu/include/unicode/messageformat2.h
blob: c5459f042f40f87a135bba13de8965fa7c8e332d (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
// © 2024 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

#include "unicode/utypes.h"

#ifndef MESSAGEFORMAT2_H
#define MESSAGEFORMAT2_H

#if U_SHOW_CPLUSPLUS_API

#if !UCONFIG_NO_FORMATTING

#if !UCONFIG_NO_MF2

/**
 * \file
 * \brief C++ API: Formats messages using the draft MessageFormat 2.0.
 */

#include "unicode/messageformat2_arguments.h"
#include "unicode/messageformat2_data_model.h"
#include "unicode/messageformat2_function_registry.h"
#include "unicode/unistr.h"

#ifndef U_HIDE_DEPRECATED_API

U_NAMESPACE_BEGIN

namespace message2 {

    class Environment;
    class MessageContext;
    class ResolvedSelector;
    class StaticErrors;

    /**
     * <p>MessageFormatter is a Technical Preview API implementing MessageFormat 2.0.
     *
     * <p>See <a target="github" href="https://github.com/unicode-org/message-format-wg/blob/main/spec/syntax.md">the
     * description of the syntax with examples and use cases</a> and the corresponding
     * <a target="github" href="https://github.com/unicode-org/message-format-wg/blob/main/spec/message.abnf">ABNF</a> grammar.</p>
     *
     * The MessageFormatter class is mutable and movable. It is not copyable.
     * (It is mutable because if it has a custom function registry, the registry may include
     * `FormatterFactory` objects implementing custom formatters, which are allowed to contain
     * mutable state.)
     *
     * @internal ICU 75 technology preview
     * @deprecated This API is for technology preview only.
     */
    class U_I18N_API MessageFormatter : public UObject {
        // Note: This class does not currently inherit from the existing
        // `Format` class.
    public:
        /**
         * Move assignment operator:
         * The source MessageFormatter will be left in a valid but undefined state.
         *
         * @internal ICU 75 technology preview
         * @deprecated This API is for technology preview only.
         */
        MessageFormatter& operator=(MessageFormatter&&) noexcept;
        /**
         * Destructor.
         *
         * @internal ICU 75 technology preview
         * @deprecated This API is for technology preview only.
         */
        virtual ~MessageFormatter();

        /**
         * Formats the message to a string, using the data model that was previously set or parsed,
         * and the given `arguments` object.
         *
         * @param arguments Reference to message arguments
         * @param status    Input/output error code used to indicate syntax errors, data model
         *                  errors, resolution errors, formatting errors, selection errors, as well
         *                  as other errors (such as memory allocation failures). Partial output
         *                  is still provided in the presence of most error types.
         * @return          The string result of formatting the message with the given arguments.
         *
         * @internal ICU 75 technology preview
         * @deprecated This API is for technology preview only.
         */
        UnicodeString formatToString(const MessageArguments& arguments, UErrorCode &status);

        /**
         * Not yet implemented; formats the message to a `FormattedMessage` object,
         * using the data model that was previously set or parsed,
         * and the given `arguments` object.
         *
         * @param arguments Reference to message arguments
         * @param status    Input/output error code used to indicate syntax errors, data model
         *                  errors, resolution errors, formatting errors, selection errors, as well
         *                  as other errors (such as memory allocation failures). Partial output
         *                  is still provided in the presence of most error types.
         * @return          The `FormattedMessage` representing the formatted message.
         *
         * @internal ICU 75 technology preview
         * @deprecated This API is for technology preview only.
         */
        FormattedMessage format(const MessageArguments& arguments, UErrorCode &status) const {
            (void) arguments;
            if (U_SUCCESS(status)) {
                status = U_UNSUPPORTED_ERROR;
            }
            return FormattedMessage(status);
        }

        /**
         * Accesses the locale that this `MessageFormatter` object was created with.
         *
         * @return A reference to the locale.
         *
         * @internal ICU 75 technology preview
         * @deprecated This API is for technology preview only.
         */
        const Locale& getLocale() const { return locale; }

        /**
         * Serializes the data model as a string in MessageFormat 2.0 syntax.
         *
         * @return result    A string representation of the data model.
         *                   The string is a valid MessageFormat 2.0 message.
         *
         * @internal ICU 75 technology preview
         * @deprecated This API is for technology preview only.
         */
        UnicodeString getPattern() const;

        /**
         * Accesses the data model referred to by this
         * `MessageFormatter` object.
         *
         * @return A reference to the data model.
         *
         * @internal ICU 75 technology preview
         * @deprecated This API is for technology preview only.
         */
        const MFDataModel& getDataModel() const;

        /**
         * Used in conjunction with the
         * MessageFormatter::Builder::setErrorHandlingBehavior() method.
         *
         * @internal ICU 76 technology preview
         * @deprecated This API is for technology preview only.
         */
        typedef enum UMFErrorHandlingBehavior {
            /**
             * Suppress errors and return best-effort output.
             *
             * @internal ICU 76 technology preview
             * @deprecated This API is for technology preview only.
             */
            U_MF_BEST_EFFORT = 0,
            /**
             * Signal all MessageFormat errors using the UErrorCode
             * argument.
             *
             * @internal ICU 76 technology preview
             * @deprecated This API is for technology preview only.
             */
            U_MF_STRICT
        } UMFErrorHandlingBehavior;

        /**
         * The mutable Builder class allows each part of the MessageFormatter to be initialized
         * separately; calling its `build()` method yields an immutable MessageFormatter.
         *
         * Not copyable or movable.
         */
        class U_I18N_API Builder : public UObject {
        private:
            friend class MessageFormatter;

            // The pattern to be parsed to generate the formatted message
            UnicodeString pattern;
            bool hasPattern = false;
            bool hasDataModel = false;
            // The data model to be used to generate the formatted message
            // Initialized either by `setDataModel()`, or by the parser
            // through a call to `setPattern()`
            MFDataModel dataModel;
            // Normalized representation of the pattern;
            // ignored if `setPattern()` wasn't called
            UnicodeString normalizedInput;
            // Errors (internal representation of parse errors)
            // Ignored if `setPattern()` wasn't called
            StaticErrors* errors;
            Locale locale;
            // Not owned
            const MFFunctionRegistry* customMFFunctionRegistry;
            // Error behavior; see comment in `MessageFormatter` class
            bool signalErrors = false;

            void clearState();
        public:
            /**
             * Sets the locale to use for formatting.
             *
             * @param locale The desired locale.
             * @return       A reference to the builder.
             *
             * @internal ICU 75 technology preview
             * @deprecated This API is for technology preview only.
             */
            Builder& setLocale(const Locale& locale);
            /**
             * Sets the pattern (contents of the message) and parses it
             * into a data model. If a data model was
             * previously set, it is removed.
             *
             * @param pattern A string in MessageFormat 2.0 syntax.
             * @param parseError Struct to receive information on the position
             *                   of an error within the pattern.
             * @param status    Input/output error code. If the
             *                  pattern cannot be parsed, set to failure code.
             * @return       A reference to the builder.
             *
             * @internal ICU 75 technology preview
             * @deprecated This API is for technology preview only.
             */
            Builder& setPattern(const UnicodeString& pattern, UParseError& parseError, UErrorCode& status);
            /**
             * Sets a custom function registry.
             *
             * @param functionRegistry Reference to the function registry to use.
             *        `functionRegistry` is not copied,
             *        and the caller must ensure its lifetime contains
             *        the lifetime of the `MessageFormatter` object built by this
             *        builder.
             * @return       A reference to the builder.
             *
             * @internal ICU 75 technology preview
             * @deprecated This API is for technology preview only.
             */
            Builder& setFunctionRegistry(const MFFunctionRegistry& functionRegistry);
            /**
             * Sets a data model. If a pattern was previously set, it is removed.
             *
             * @param dataModel Data model to format. Passed by move.
             * @return       A reference to the builder.
             *
             * @internal ICU 75 technology preview
             * @deprecated This API is for technology preview only.
             */
            Builder& setDataModel(MFDataModel&& dataModel);
            /**
             * Set the error handling behavior for this formatter.
             *
             * "Strict" error behavior means that that formatting methods
             * will set their UErrorCode arguments to signal MessageFormat
             * data model, resolution, and runtime errors. Syntax errors are
             * always signaled.
             *
             * "Best effort" error behavior means that MessageFormat errors are
             * suppressed:  formatting methods will _not_ set their
             * UErrorCode arguments to signal MessageFormat data model,
             * resolution, or runtime errors. Best-effort output
             * will be returned. Syntax errors are always signaled.
             * This is the default behavior.
             *
             * @param type An enum with type UMFErrorHandlingBehavior;
             *             if type == `U_MF_STRICT`, then
             *             errors are handled strictly.
             *             If type == `U_MF_BEST_EFFORT`, then
             *             best-effort output is returned.
             *
             * The default is to suppress all MessageFormat errors
             * and return best-effort output.
             *
             * @return       A reference to the builder.
             *
             * @internal ICU 76 technology preview
             * @deprecated This API is for technology preview only.
             */
            Builder& setErrorHandlingBehavior(UMFErrorHandlingBehavior type);
            /**
             * Constructs a new immutable MessageFormatter using the pattern or data model
             * that was previously set, and the locale (if it was previously set)
             * or default locale (otherwise).
             *
             * The builder object (`this`) can still be used after calling `build()`.
             *
             * @param status    Input/output error code.  If neither the pattern
             *                  nor the data model is set, set to failure code.
             * @return          The new MessageFormatter object
             *
             * @internal ICU 75 technology preview
             * @deprecated This API is for technology preview only.
             */
            MessageFormatter build(UErrorCode& status) const;
            /**
             * Default constructor.
             * Returns a Builder with the default locale and with no
             * data model or pattern set. Either `setPattern()`
             * or `setDataModel()` has to be called before calling `build()`.
             *
             * @param status    Input/output error code.
             *
             * @internal ICU 75 technology preview
             * @deprecated This API is for technology preview only.
             */
            Builder(UErrorCode& status);
            /**
             * Destructor.
             *
             * @internal ICU 75 technology preview
             * @deprecated This API is for technology preview only.
             */
            virtual ~Builder();
        }; // class MessageFormatter::Builder

        // TODO: Shouldn't be public; only used for testing
        /**
         * Returns a string consisting of the input with optional spaces removed.
         *
         * @return        A normalized string representation of the input
         *
         * @internal ICU 75 technology preview
         * @deprecated This API is for technology preview only.
         */
        const UnicodeString& getNormalizedPattern() const { return normalizedInput; }

    private:
        friend class Builder;
        friend class MessageContext;

        MessageFormatter(const MessageFormatter::Builder& builder, UErrorCode &status);

        MessageFormatter() = delete; // default constructor not implemented

        // Do not define default assignment operator
        const MessageFormatter &operator=(const MessageFormatter &) = delete;

        ResolvedSelector resolveVariables(const Environment& env, const data_model::Operand&, MessageContext&, UErrorCode &) const;
        ResolvedSelector resolveVariables(const Environment& env, const data_model::Expression&, MessageContext&, UErrorCode &) const;

        // Selection methods

        // Takes a vector of FormattedPlaceholders
        void resolveSelectors(MessageContext&, const Environment& env, UErrorCode&, UVector&) const;
        // Takes a vector of vectors of strings (input) and a vector of PrioritizedVariants (output)
        void filterVariants(const UVector&, UVector&, UErrorCode&) const;
        // Takes a vector of vectors of strings (input) and a vector of PrioritizedVariants (input/output)
        void sortVariants(const UVector&, UVector&, UErrorCode&) const;
        // Takes a vector of strings (input) and a vector of strings (output)
        void matchSelectorKeys(const UVector&, MessageContext&, ResolvedSelector&& rv, UVector&, UErrorCode&) const;
        // Takes a vector of FormattedPlaceholders (input),
        // and a vector of vectors of strings (output)
        void resolvePreferences(MessageContext&, UVector&, UVector&, UErrorCode&) const;

        // Formatting methods
        [[nodiscard]] FormattedPlaceholder formatLiteral(const data_model::Literal&) const;
        void formatPattern(MessageContext&, const Environment&, const data_model::Pattern&, UErrorCode&, UnicodeString&) const;
        // Formats a call to a formatting function
        // Dispatches on argument type
        [[nodiscard]] FormattedPlaceholder evalFormatterCall(FormattedPlaceholder&& argument,
                                                       MessageContext& context,
                                                       UErrorCode& status) const;
        // Dispatches on function name
        [[nodiscard]] FormattedPlaceholder evalFormatterCall(const FunctionName& functionName,
                                                       FormattedPlaceholder&& argument,
                                                       FunctionOptions&& options,
                                                       MessageContext& context,
                                                       UErrorCode& status) const;
        // Formats an expression that appears as a selector
        ResolvedSelector formatSelectorExpression(const Environment& env, const data_model::Expression&, MessageContext&, UErrorCode&) const;
        // Formats an expression that appears in a pattern or as the definition of a local variable
        [[nodiscard]] FormattedPlaceholder formatExpression(const Environment&, const data_model::Expression&, MessageContext&, UErrorCode&) const;
        [[nodiscard]] FunctionOptions resolveOptions(const Environment& env, const OptionMap&, MessageContext&, UErrorCode&) const;
        [[nodiscard]] FormattedPlaceholder formatOperand(const Environment&, const data_model::Operand&, MessageContext&, UErrorCode&) const;
        [[nodiscard]] FormattedPlaceholder evalArgument(const data_model::VariableName&, MessageContext&, UErrorCode&) const;
        void formatSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UnicodeString& result) const;

        // Function registry methods
        bool hasCustomMFFunctionRegistry() const {
            return (customMFFunctionRegistry != nullptr);
        }

        // Precondition: custom function registry exists
        // Note: this is non-const because the values in the MFFunctionRegistry are mutable
        // (a FormatterFactory can have mutable state)
        const MFFunctionRegistry& getCustomMFFunctionRegistry() const;

        bool isCustomFormatter(const FunctionName&) const;
        FormatterFactory* lookupFormatterFactory(const FunctionName&, UErrorCode& status) const;
        bool isBuiltInSelector(const FunctionName&) const;
        bool isBuiltInFormatter(const FunctionName&) const;
        bool isCustomSelector(const FunctionName&) const;
        const SelectorFactory* lookupSelectorFactory(MessageContext&, const FunctionName&, UErrorCode&) const;
        bool isSelector(const FunctionName& fn) const { return isBuiltInSelector(fn) || isCustomSelector(fn); }
        bool isFormatter(const FunctionName& fn) const { return isBuiltInFormatter(fn) || isCustomFormatter(fn); }
        const Formatter* lookupFormatter(const FunctionName&, UErrorCode&) const;

        Selector* getSelector(MessageContext&, const FunctionName&, UErrorCode&) const;
        Formatter* getFormatter(const FunctionName&, UErrorCode&) const;
        bool getDefaultFormatterNameByType(const UnicodeString&, FunctionName&) const;

        // Checking for resolution errors
        void checkDeclarations(MessageContext&, Environment*&, UErrorCode&) const;
        void check(MessageContext&, const Environment&, const data_model::Expression&, UErrorCode&) const;
        void check(MessageContext&, const Environment&, const data_model::Operand&, UErrorCode&) const;
        void check(MessageContext&, const Environment&, const OptionMap&, UErrorCode&) const;

        void initErrors(UErrorCode&);
        void clearErrors() const;
        void cleanup() noexcept;

        // The locale this MessageFormatter was created with
        /* const */ Locale locale;

        // Registry for built-in functions
        MFFunctionRegistry standardMFFunctionRegistry;
        // Registry for custom functions; may be null if no custom registry supplied
        // Note: this is *not* owned by the MessageFormatter object
        // The reason for this choice is to have a non-destructive MessageFormatter::Builder,
        // while also not requiring the function registry to be deeply-copyable. Making the
        // function registry copyable would impose a requirement on any implementations
        // of the FormatterFactory and SelectorFactory interfaces to implement a custom
        // clone() method, which is necessary to avoid sharing between copies of the
        // function registry (and thus double-frees)
        // Not deeply immutable (the values in the function registry are mutable,
        // as a FormatterFactory can have mutable state
        const MFFunctionRegistry* customMFFunctionRegistry;

        // Data model, representing the parsed message
        MFDataModel dataModel;

        // Normalized version of the input string (optional whitespace removed)
        UnicodeString normalizedInput;

        // Errors -- only used while parsing and checking for data model errors; then
        // the MessageContext keeps track of errors
        // Must be a raw pointer to avoid including the internal header file
        // defining StaticErrors
        // Owned by `this`
        StaticErrors* errors = nullptr;

        // Error handling behavior.
        // If true, then formatting methods set their UErrorCode arguments
        // to signal MessageFormat errors, and no useful output is returned.
        // If false, then MessageFormat errors are not signaled and the
        // formatting methods return best-effort output.
        // The default is false.
        bool signalErrors = false;
    }; // class MessageFormatter

} // namespace message2

U_NAMESPACE_END

#endif // U_HIDE_DEPRECATED_API

#endif /* #if !UCONFIG_NO_MF2 */

#endif /* #if !UCONFIG_NO_FORMATTING */

#endif /* U_SHOW_CPLUSPLUS_API */

#endif // MESSAGEFORMAT2_H

// eof