1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
|
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 1997-2015, International Business Machines Corporation and others.
* All Rights Reserved.
*******************************************************************************
*/
#ifndef RBNF_H
#define RBNF_H
#include "unicode/utypes.h"
#if U_SHOW_CPLUSPLUS_API
/**
* \file
* \brief C++ API: Rule Based Number Format
*/
/**
* \def U_HAVE_RBNF
* This will be 0 if RBNF support is not included in ICU
* and 1 if it is.
*
* @stable ICU 2.4
*/
#if UCONFIG_NO_FORMATTING
#define U_HAVE_RBNF 0
#else
#define U_HAVE_RBNF 1
#include "unicode/dcfmtsym.h"
#include "unicode/fmtable.h"
#include "unicode/locid.h"
#include "unicode/numfmt.h"
#include "unicode/unistr.h"
#include "unicode/strenum.h"
#include "unicode/brkiter.h"
#include "unicode/upluralrules.h"
U_NAMESPACE_BEGIN
class NFRule;
class NFRuleSet;
class LocalizationInfo;
class PluralFormat;
class RuleBasedCollator;
/**
* Tags for the predefined rulesets.
*
* @stable ICU 2.2
*/
enum URBNFRuleSetTag {
/**
* Requests predefined ruleset for spelling out numeric values in words.
* @stable ICU 2.2
*/
URBNF_SPELLOUT,
/**
* Requests predefined ruleset for the ordinal form of a number.
* @stable ICU 2.2
*/
URBNF_ORDINAL,
/**
* Requests predefined ruleset for formatting a value as a duration in hours, minutes, and seconds.
* @stable ICU 2.2
*/
URBNF_DURATION,
/**
* Requests predefined ruleset for various non-place-value numbering systems.
* WARNING: The same resource contains rule sets for a variety of different numbering systems.
* You need to call setDefaultRuleSet() on the formatter to choose the actual numbering system.
* @stable ICU 2.2
*/
URBNF_NUMBERING_SYSTEM,
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal URBNFRuleSetTag value.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
URBNF_COUNT
#endif // U_HIDE_DEPRECATED_API
};
/**
* The RuleBasedNumberFormat class formats numbers according to a set of rules. This number formatter is
* typically used for spelling out numeric values in words (e.g., 25,3476 as
* "twenty-five thousand three hundred seventy-six" or "vingt-cinq mille trois
* cents soixante-seize" or
* "fünfundzwanzigtausenddreihundertsechsundsiebzig"), but can also be used for
* other complicated formatting tasks, such as formatting a number of seconds as hours,
* minutes and seconds (e.g., 3,730 as "1:02:10").
*
* <p>The resources contain three predefined formatters for each locale: spellout, which
* spells out a value in words (123 is "one hundred twenty-three"); ordinal, which
* appends an ordinal suffix to the end of a numeral (123 is "123rd"); and
* duration, which shows a duration in seconds as hours, minutes, and seconds (123 is
* "2:03"). The client can also define more specialized <tt>RuleBasedNumberFormat</tt>s
* by supplying programmer-defined rule sets.</p>
*
* <p>The behavior of a <tt>RuleBasedNumberFormat</tt> is specified by a textual description
* that is either passed to the constructor as a <tt>String</tt> or loaded from a resource
* bundle. In its simplest form, the description consists of a semicolon-delimited list of <em>rules.</em>
* Each rule has a string of output text and a value or range of values it is applicable to.
* In a typical spellout rule set, the first twenty rules are the words for the numbers from
* 0 to 19:</p>
*
* <pre>zero; one; two; three; four; five; six; seven; eight; nine;
* ten; eleven; twelve; thirteen; fourteen; fifteen; sixteen; seventeen; eighteen; nineteen;</pre>
*
* <p>For larger numbers, we can use the preceding set of rules to format the ones place, and
* we only have to supply the words for the multiples of 10:</p>
*
* <pre> 20: twenty[->>];
* 30: thirty[->>];
* 40: forty[->>];
* 50: fifty[->>];
* 60: sixty[->>];
* 70: seventy[->>];
* 80: eighty[->>];
* 90: ninety[->>];</pre>
*
* <p>In these rules, the <em>base value</em> is spelled out explicitly and set off from the
* rule's output text with a colon. The rules are in a sorted list, and a rule is applicable
* to all numbers from its own base value to one less than the next rule's base value. The
* ">>" token is called a <em>substitution</em> and tells the formatter to
* isolate the number's ones digit, format it using this same set of rules, and place the
* result at the position of the ">>" token. Text in brackets is omitted if
* the number being formatted is an even multiple of 10 (the hyphen is a literal hyphen; 24
* is "twenty-four," not "twenty four").</p>
*
* <p>For even larger numbers, we can actually look up several parts of the number in the
* list:</p>
*
* <pre>100: << hundred[ >>];</pre>
*
* <p>The "<<" represents a new kind of substitution. The << isolates
* the hundreds digit (and any digits to its left), formats it using this same rule set, and
* places the result where the "<<" was. Notice also that the meaning of
* >> has changed: it now refers to both the tens and the ones digits. The meaning of
* both substitutions depends on the rule's base value. The base value determines the rule's <em>divisor,</em>
* which is the highest power of 10 that is less than or equal to the base value (the user
* can change this). To fill in the substitutions, the formatter divides the number being
* formatted by the divisor. The integral quotient is used to fill in the <<
* substitution, and the remainder is used to fill in the >> substitution. The meaning
* of the brackets changes similarly: text in brackets is omitted if the value being
* formatted is an even multiple of the rule's divisor. The rules are applied recursively, so
* if a substitution is filled in with text that includes another substitution, that
* substitution is also filled in.</p>
*
* <p>This rule covers values up to 999, at which point we add another rule:</p>
*
* <pre>1000: << thousand[ >>];</pre>
*
* <p>Again, the meanings of the brackets and substitution tokens shift because the rule's
* base value is a higher power of 10, changing the rule's divisor. This rule can actually be
* used all the way up to 999,999. This allows us to finish out the rules as follows:</p>
*
* <pre> 1,000,000: << million[ >>];
* 1,000,000,000: << billion[ >>];
* 1,000,000,000,000: << trillion[ >>];
* 1,000,000,000,000,000: OUT OF RANGE!;</pre>
*
* <p>Commas, periods, and spaces can be used in the base values to improve legibility and
* are ignored by the rule parser. The last rule in the list is customarily treated as an
* "overflow rule," applying to everything from its base value on up, and often (as
* in this example) being used to print out an error message or default representation.
* Notice also that the size of the major groupings in large numbers is controlled by the
* spacing of the rules: because in English we group numbers by thousand, the higher rules
* are separated from each other by a factor of 1,000.</p>
*
* <p>To see how these rules actually work in practice, consider the following example:
* Formatting 25,430 with this rule set would work like this:</p>
*
* <table border="0" width="100%">
* <tr>
* <td><strong><< thousand >></strong></td>
* <td>[the rule whose base value is 1,000 is applicable to 25,340]</td>
* </tr>
* <tr>
* <td><strong>twenty->></strong> thousand >></td>
* <td>[25,340 over 1,000 is 25. The rule for 20 applies.]</td>
* </tr>
* <tr>
* <td>twenty-<strong>five</strong> thousand >></td>
* <td>[25 mod 10 is 5. The rule for 5 is "five."</td>
* </tr>
* <tr>
* <td>twenty-five thousand <strong><< hundred >></strong></td>
* <td>[25,340 mod 1,000 is 340. The rule for 100 applies.]</td>
* </tr>
* <tr>
* <td>twenty-five thousand <strong>three</strong> hundred >></td>
* <td>[340 over 100 is 3. The rule for 3 is "three."]</td>
* </tr>
* <tr>
* <td>twenty-five thousand three hundred <strong>forty</strong></td>
* <td>[340 mod 100 is 40. The rule for 40 applies. Since 40 divides
* evenly by 10, the hyphen and substitution in the brackets are omitted.]</td>
* </tr>
* </table>
*
* <p>The above syntax suffices only to format positive integers. To format negative numbers,
* we add a special rule:</p>
*
* <pre>-x: minus >>;</pre>
*
* <p>This is called a <em>negative-number rule,</em> and is identified by "-x"
* where the base value would be. This rule is used to format all negative numbers. the
* >> token here means "find the number's absolute value, format it with these
* rules, and put the result here."</p>
*
* <p>We also add a special rule called a <em>fraction rule </em>for numbers with fractional
* parts:</p>
*
* <pre>x.x: << point >>;</pre>
*
* <p>This rule is used for all positive non-integers (negative non-integers pass through the
* negative-number rule first and then through this rule). Here, the << token refers to
* the number's integral part, and the >> to the number's fractional part. The
* fractional part is formatted as a series of single-digit numbers (e.g., 123.456 would be
* formatted as "one hundred twenty-three point four five six").</p>
*
* <p>To see how this rule syntax is applied to various languages, examine the resource data.</p>
*
* <p>There is actually much more flexibility built into the rule language than the
* description above shows. A formatter may own multiple rule sets, which can be selected by
* the caller, and which can use each other to fill in their substitutions. Substitutions can
* also be filled in with digits, using a DecimalFormat object. There is syntax that can be
* used to alter a rule's divisor in various ways. And there is provision for much more
* flexible fraction handling. A complete description of the rule syntax follows:</p>
*
* <hr>
*
* <p>The description of a <tt>RuleBasedNumberFormat</tt>'s behavior consists of one or more <em>rule
* sets.</em> Each rule set consists of a name, a colon, and a list of <em>rules.</em> A rule
* set name must begin with a % sign. Rule sets with names that begin with a single % sign
* are <em>public:</em> the caller can specify that they be used to format and parse numbers.
* Rule sets with names that begin with %% are <em>private:</em> they exist only for the use
* of other rule sets. If a formatter only has one rule set, the name may be omitted.</p>
*
* <p>The user can also specify a special "rule set" named <tt>%%lenient-parse</tt>.
* The body of <tt>%%lenient-parse</tt> isn't a set of number-formatting rules, but a <tt>RuleBasedCollator</tt>
* description which is used to define equivalences for lenient parsing. For more information
* on the syntax, see <tt>RuleBasedCollator</tt>. For more information on lenient parsing,
* see <tt>setLenientParse()</tt>. <em>Note:</em> symbols that have syntactic meaning
* in collation rules, such as '&', have no particular meaning when appearing outside
* of the <tt>lenient-parse</tt> rule set.</p>
*
* <p>The body of a rule set consists of an ordered, semicolon-delimited list of <em>rules.</em>
* Internally, every rule has a base value, a divisor, rule text, and zero, one, or two <em>substitutions.</em>
* These parameters are controlled by the description syntax, which consists of a <em>rule
* descriptor,</em> a colon, and a <em>rule body.</em></p>
*
* <p>A rule descriptor can take one of the following forms (text in <em>italics</em> is the
* name of a token):</p>
*
* <table border="0" width="100%">
* <tr>
* <td><em>bv</em>:</td>
* <td><em>bv</em> specifies the rule's base value. <em>bv</em> is a decimal
* number expressed using ASCII digits. <em>bv</em> may contain spaces, period, and commas,
* which are ignored. The rule's divisor is the highest power of 10 less than or equal to
* the base value.</td>
* </tr>
* <tr>
* <td><em>bv</em>/<em>rad</em>:</td>
* <td><em>bv</em> specifies the rule's base value. The rule's divisor is the
* highest power of <em>rad</em> less than or equal to the base value.</td>
* </tr>
* <tr>
* <td><em>bv</em>>:</td>
* <td><em>bv</em> specifies the rule's base value. To calculate the divisor,
* let the radix be 10, and the exponent be the highest exponent of the radix that yields a
* result less than or equal to the base value. Every > character after the base value
* decreases the exponent by 1. If the exponent is positive or 0, the divisor is the radix
* raised to the power of the exponent; otherwise, the divisor is 1.</td>
* </tr>
* <tr>
* <td><em>bv</em>/<em>rad</em>>:</td>
* <td><em>bv</em> specifies the rule's base value. To calculate the divisor,
* let the radix be <em>rad</em>, and the exponent be the highest exponent of the radix that
* yields a result less than or equal to the base value. Every > character after the radix
* decreases the exponent by 1. If the exponent is positive or 0, the divisor is the radix
* raised to the power of the exponent; otherwise, the divisor is 1.</td>
* </tr>
* <tr>
* <td>-x:</td>
* <td>The rule is a negative-number rule.</td>
* </tr>
* <tr>
* <td>x.x:</td>
* <td>The rule is an <em>improper fraction rule</em>. If the full stop in
* the middle of the rule name is replaced with the decimal point
* that is used in the language or DecimalFormatSymbols, then that rule will
* have precedence when formatting and parsing this rule. For example, some
* languages use the comma, and can thus be written as x,x instead. For example,
* you can use "x.x: << point >>;x,x: << comma >>;" to
* handle the decimal point that matches the language's natural spelling of
* the punctuation of either the full stop or comma.</td>
* </tr>
* <tr>
* <td>0.x:</td>
* <td>The rule is a <em>proper fraction rule</em>. If the full stop in
* the middle of the rule name is replaced with the decimal point
* that is used in the language or DecimalFormatSymbols, then that rule will
* have precedence when formatting and parsing this rule. For example, some
* languages use the comma, and can thus be written as 0,x instead. For example,
* you can use "0.x: point >>;0,x: comma >>;" to
* handle the decimal point that matches the language's natural spelling of
* the punctuation of either the full stop or comma.</td>
* </tr>
* <tr>
* <td>x.0:</td>
* <td>The rule is a <em>default rule</em>. If the full stop in
* the middle of the rule name is replaced with the decimal point
* that is used in the language or DecimalFormatSymbols, then that rule will
* have precedence when formatting and parsing this rule. For example, some
* languages use the comma, and can thus be written as x,0 instead. For example,
* you can use "x.0: << point;x,0: << comma;" to
* handle the decimal point that matches the language's natural spelling of
* the punctuation of either the full stop or comma.</td>
* </tr>
* <tr>
* <td>Inf:</td>
* <td>The rule for infinity.</td>
* </tr>
* <tr>
* <td>NaN:</td>
* <td>The rule for an IEEE 754 NaN (not a number).</td>
* </tr>
* <tr>
* <td><em>nothing</em></td>
* <td>If the rule's rule descriptor is left out, the base value is one plus the
* preceding rule's base value (or zero if this is the first rule in the list) in a normal
* rule set. In a fraction rule set, the base value is the same as the preceding rule's
* base value.</td>
* </tr>
* </table>
*
* <p>A rule set may be either a regular rule set or a <em>fraction rule set,</em> depending
* on whether it is used to format a number's integral part (or the whole number) or a
* number's fractional part. Using a rule set to format a rule's fractional part makes it a
* fraction rule set.</p>
*
* <p>Which rule is used to format a number is defined according to one of the following
* algorithms: If the rule set is a regular rule set, do the following:
*
* <ul>
* <li>If the rule set includes a default rule (and the number was passed in as a <tt>double</tt>),
* use the default rule. (If the number being formatted was passed in as a <tt>long</tt>,
* the default rule is ignored.)</li>
* <li>If the number is negative, use the negative-number rule.</li>
* <li>If the number has a fractional part and is greater than 1, use the improper fraction
* rule.</li>
* <li>If the number has a fractional part and is between 0 and 1, use the proper fraction
* rule.</li>
* <li>Binary-search the rule list for the rule with the highest base value less than or equal
* to the number. If that rule has two substitutions, its base value is not an even multiple
* of its divisor, and the number <em>is</em> an even multiple of the rule's divisor, use the
* rule that precedes it in the rule list. Otherwise, use the rule itself.</li>
* </ul>
*
* <p>If the rule set is a fraction rule set, do the following:
*
* <ul>
* <li>Ignore negative-number and fraction rules.</li>
* <li>For each rule in the list, multiply the number being formatted (which will always be
* between 0 and 1) by the rule's base value. Keep track of the distance between the result
* the nearest integer.</li>
* <li>Use the rule that produced the result closest to zero in the above calculation. In the
* event of a tie or a direct hit, use the first matching rule encountered. (The idea here is
* to try each rule's base value as a possible denominator of a fraction. Whichever
* denominator produces the fraction closest in value to the number being formatted wins.) If
* the rule following the matching rule has the same base value, use it if the numerator of
* the fraction is anything other than 1; if the numerator is 1, use the original matching
* rule. (This is to allow singular and plural forms of the rule text without a lot of extra
* hassle.)</li>
* </ul>
*
* <p>A rule's body consists of a string of characters terminated by a semicolon. The rule
* may include zero, one, or two <em>substitution tokens,</em> and a range of text in
* brackets. The brackets denote optional text (and may also include one or both
* substitutions). The exact meanings of the substitution tokens, and under what conditions
* optional text is omitted, depend on the syntax of the substitution token and the context.
* The rest of the text in a rule body is literal text that is output when the rule matches
* the number being formatted.</p>
*
* <p>A substitution token begins and ends with a <em>token character.</em> The token
* character and the context together specify a mathematical operation to be performed on the
* number being formatted. An optional <em>substitution descriptor </em>specifies how the
* value resulting from that operation is used to fill in the substitution. The position of
* the substitution token in the rule body specifies the location of the resultant text in
* the original rule text.</p>
*
* <p>The meanings of the substitution token characters are as follows:</p>
*
* <table border="0" width="100%">
* <tr>
* <td>>></td>
* <td>in normal rule</td>
* <td>Divide the number by the rule's divisor and format the remainder</td>
* </tr>
* <tr>
* <td></td>
* <td>in negative-number rule</td>
* <td>Find the absolute value of the number and format the result</td>
* </tr>
* <tr>
* <td></td>
* <td>in fraction or default rule</td>
* <td>Isolate the number's fractional part and format it.</td>
* </tr>
* <tr>
* <td></td>
* <td>in rule in fraction rule set</td>
* <td>Not allowed.</td>
* </tr>
* <tr>
* <td>>>></td>
* <td>in normal rule</td>
* <td>Divide the number by the rule's divisor and format the remainder,
* but bypass the normal rule-selection process and just use the
* rule that precedes this one in this rule list.</td>
* </tr>
* <tr>
* <td></td>
* <td>in all other rules</td>
* <td>Not allowed.</td>
* </tr>
* <tr>
* <td><<</td>
* <td>in normal rule</td>
* <td>Divide the number by the rule's divisor and format the quotient</td>
* </tr>
* <tr>
* <td></td>
* <td>in negative-number rule</td>
* <td>Not allowed.</td>
* </tr>
* <tr>
* <td></td>
* <td>in fraction or default rule</td>
* <td>Isolate the number's integral part and format it.</td>
* </tr>
* <tr>
* <td></td>
* <td>in rule in fraction rule set</td>
* <td>Multiply the number by the rule's base value and format the result.</td>
* </tr>
* <tr>
* <td>==</td>
* <td>in all rule sets</td>
* <td>Format the number unchanged</td>
* </tr>
* <tr>
* <td>[]</td>
* <td>in normal rule</td>
* <td>Omit the optional text if the number is an even multiple of the rule's divisor</td>
* </tr>
* <tr>
* <td></td>
* <td>in negative-number rule</td>
* <td>Not allowed.</td>
* </tr>
* <tr>
* <td></td>
* <td>in improper-fraction rule</td>
* <td>Omit the optional text if the number is between 0 and 1 (same as specifying both an
* x.x rule and a 0.x rule)</td>
* </tr>
* <tr>
* <td></td>
* <td>in default rule</td>
* <td>Omit the optional text if the number is an integer (same as specifying both an x.x
* rule and an x.0 rule)</td>
* </tr>
* <tr>
* <td></td>
* <td>in proper-fraction rule</td>
* <td>Not allowed.</td>
* </tr>
* <tr>
* <td></td>
* <td>in rule in fraction rule set</td>
* <td>Omit the optional text if multiplying the number by the rule's base value yields 1.</td>
* </tr>
* <tr>
* <td width="37">$(cardinal,<i>plural syntax</i>)$</td>
* <td width="23"></td>
* <td width="165" valign="top">in all rule sets</td>
* <td>This provides the ability to choose a word based on the number divided by the radix to the power of the
* exponent of the base value for the specified locale, which is normally equivalent to the << value.
* This uses the cardinal plural rules from PluralFormat. All strings used in the plural format are treated
* as the same base value for parsing.</td>
* </tr>
* <tr>
* <td width="37">$(ordinal,<i>plural syntax</i>)$</td>
* <td width="23"></td>
* <td width="165" valign="top">in all rule sets</td>
* <td>This provides the ability to choose a word based on the number divided by the radix to the power of the
* exponent of the base value for the specified locale, which is normally equivalent to the << value.
* This uses the ordinal plural rules from PluralFormat. All strings used in the plural format are treated
* as the same base value for parsing.</td>
* </tr>
* </table>
*
* <p>The substitution descriptor (i.e., the text between the token characters) may take one
* of three forms:</p>
*
* <table border="0" width="100%">
* <tr>
* <td>a rule set name</td>
* <td>Perform the mathematical operation on the number, and format the result using the
* named rule set.</td>
* </tr>
* <tr>
* <td>a DecimalFormat pattern</td>
* <td>Perform the mathematical operation on the number, and format the result using a
* DecimalFormat with the specified pattern. The pattern must begin with 0 or #.</td>
* </tr>
* <tr>
* <td>nothing</td>
* <td>Perform the mathematical operation on the number, and format the result using the rule
* set containing the current rule, except:
* <ul>
* <li>You can't have an empty substitution descriptor with a == substitution.</li>
* <li>If you omit the substitution descriptor in a >> substitution in a fraction rule,
* format the result one digit at a time using the rule set containing the current rule.</li>
* <li>If you omit the substitution descriptor in a << substitution in a rule in a
* fraction rule set, format the result using the default rule set for this formatter.</li>
* </ul>
* </td>
* </tr>
* </table>
*
* <p>Whitespace is ignored between a rule set name and a rule set body, between a rule
* descriptor and a rule body, or between rules. If a rule body begins with an apostrophe,
* the apostrophe is ignored, but all text after it becomes significant (this is how you can
* have a rule's rule text begin with whitespace). There is no escape function: the semicolon
* is not allowed in rule set names or in rule text, and the colon is not allowed in rule set
* names. The characters beginning a substitution token are always treated as the beginning
* of a substitution token.</p>
*
* <p>See the resource data and the demo program for annotated examples of real rule sets
* using these features.</p>
*
* <p><em>User subclasses are not supported.</em> While clients may write
* subclasses, such code will not necessarily work and will not be
* guaranteed to work stably from release to release.
*
* <p><b>Localizations</b></p>
* <p>Constructors are available that allow the specification of localizations for the
* public rule sets (and also allow more control over what public rule sets are available).
* Localization data is represented as a textual description. The description represents
* an array of arrays of string. The first element is an array of the public rule set names,
* each of these must be one of the public rule set names that appear in the rules. Only
* names in this array will be treated as public rule set names by the API. Each subsequent
* element is an array of localizations of these names. The first element of one of these
* subarrays is the locale name, and the remaining elements are localizations of the
* public rule set names, in the same order as they were listed in the first array.</p>
* <p>In the syntax, angle brackets '<', '>' are used to delimit the arrays, and comma ',' is used
* to separate elements of an array. Whitespace is ignored, unless quoted.</p>
* <p>For example:<pre>
* < < %foo, %bar, %baz >,
* < en, Foo, Bar, Baz >,
* < fr, 'le Foo', 'le Bar', 'le Baz' >
* < zh, \\u7532, \\u4e59, \\u4e19 > >
* </pre></p>
* @author Richard Gillam
* @see NumberFormat
* @see DecimalFormat
* @see PluralFormat
* @see PluralRules
* @stable ICU 2.0
*/
class U_I18N_API RuleBasedNumberFormat : public NumberFormat {
public:
//-----------------------------------------------------------------------
// constructors
//-----------------------------------------------------------------------
/**
* Creates a RuleBasedNumberFormat that behaves according to the description
* passed in. The formatter uses the default locale.
* @param rules A description of the formatter's desired behavior.
* See the class documentation for a complete explanation of the description
* syntax.
* @param perror The parse error if an error was encountered.
* @param status The status indicating whether the constructor succeeded.
* @stable ICU 3.2
*/
RuleBasedNumberFormat(const UnicodeString& rules, UParseError& perror, UErrorCode& status);
/**
* Creates a RuleBasedNumberFormat that behaves according to the description
* passed in. The formatter uses the default locale.
* <p>
* The localizations data provides information about the public
* rule sets and their localized display names for different
* locales. The first element in the list is an array of the names
* of the public rule sets. The first element in this array is
* the initial default ruleset. The remaining elements in the
* list are arrays of localizations of the names of the public
* rule sets. Each of these is one longer than the initial array,
* with the first String being the ULocale ID, and the remaining
* Strings being the localizations of the rule set names, in the
* same order as the initial array. Arrays are nullptr-terminated.
* @param rules A description of the formatter's desired behavior.
* See the class documentation for a complete explanation of the description
* syntax.
* @param localizations the localization information.
* names in the description. These will be copied by the constructor.
* @param perror The parse error if an error was encountered.
* @param status The status indicating whether the constructor succeeded.
* @stable ICU 3.2
*/
RuleBasedNumberFormat(const UnicodeString& rules, const UnicodeString& localizations,
UParseError& perror, UErrorCode& status);
/**
* Creates a RuleBasedNumberFormat that behaves according to the rules
* passed in. The formatter uses the specified locale to determine the
* characters to use when formatting numerals, and to define equivalences
* for lenient parsing.
* @param rules The formatter rules.
* See the class documentation for a complete explanation of the rule
* syntax.
* @param locale A locale that governs which characters are used for
* formatting values in numerals and which characters are equivalent in
* lenient parsing.
* @param perror The parse error if an error was encountered.
* @param status The status indicating whether the constructor succeeded.
* @stable ICU 2.0
*/
RuleBasedNumberFormat(const UnicodeString& rules, const Locale& locale,
UParseError& perror, UErrorCode& status);
/**
* Creates a RuleBasedNumberFormat that behaves according to the description
* passed in. The formatter uses the default locale.
* <p>
* The localizations data provides information about the public
* rule sets and their localized display names for different
* locales. The first element in the list is an array of the names
* of the public rule sets. The first element in this array is
* the initial default ruleset. The remaining elements in the
* list are arrays of localizations of the names of the public
* rule sets. Each of these is one longer than the initial array,
* with the first String being the ULocale ID, and the remaining
* Strings being the localizations of the rule set names, in the
* same order as the initial array. Arrays are nullptr-terminated.
* @param rules A description of the formatter's desired behavior.
* See the class documentation for a complete explanation of the description
* syntax.
* @param localizations a list of localizations for the rule set
* names in the description. These will be copied by the constructor.
* @param locale A locale that governs which characters are used for
* formatting values in numerals and which characters are equivalent in
* lenient parsing.
* @param perror The parse error if an error was encountered.
* @param status The status indicating whether the constructor succeeded.
* @stable ICU 3.2
*/
RuleBasedNumberFormat(const UnicodeString& rules, const UnicodeString& localizations,
const Locale& locale, UParseError& perror, UErrorCode& status);
/**
* Creates a RuleBasedNumberFormat from a predefined ruleset. The selector
* code chose among three possible predefined formats: spellout, ordinal,
* and duration.
* @param tag A selector code specifying which kind of formatter to create for that
* locale. There are four legal values: URBNF_SPELLOUT, which creates a formatter that
* spells out a value in words in the desired language, URBNF_ORDINAL, which attaches
* an ordinal suffix from the desired language to the end of a number (e.g. "123rd"),
* URBNF_DURATION, which formats a duration in seconds as hours, minutes, and seconds always rounding down,
* and URBNF_NUMBERING_SYSTEM, which is used to invoke rules for alternate numbering
* systems such as the Hebrew numbering system, or for Roman Numerals, etc.
* NOTE: If you use URBNF_NUMBERING_SYSTEM, you must also call setDefaultRuleSet() to
* specify the exact numbering system you want to use. If you want the default numbering system
* for the locale, call NumberFormat::createInstance() instead of creating a RuleBasedNumberFormat directly.
* @param locale The locale for the formatter.
* @param status The status indicating whether the constructor succeeded.
* @stable ICU 2.0
*/
RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& locale, UErrorCode& status);
//-----------------------------------------------------------------------
// boilerplate
//-----------------------------------------------------------------------
/**
* Copy constructor
* @param rhs the object to be copied from.
* @stable ICU 2.6
*/
RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs);
/**
* Assignment operator
* @param rhs the object to be copied from.
* @stable ICU 2.6
*/
RuleBasedNumberFormat& operator=(const RuleBasedNumberFormat& rhs);
/**
* Release memory allocated for a RuleBasedNumberFormat when you are finished with it.
* @stable ICU 2.6
*/
virtual ~RuleBasedNumberFormat();
/**
* Clone this object polymorphically. The caller is responsible
* for deleting the result when done.
* @return A copy of the object.
* @stable ICU 2.6
*/
virtual RuleBasedNumberFormat* clone() const override;
/**
* Return true if the given Format objects are semantically equal.
* Objects of different subclasses are considered unequal.
* @param other the object to be compared with.
* @return true if the given Format objects are semantically equal.
* @stable ICU 2.6
*/
virtual bool operator==(const Format& other) const override;
//-----------------------------------------------------------------------
// public API functions
//-----------------------------------------------------------------------
/**
* return the rules that were provided to the RuleBasedNumberFormat.
* @return the result String that was passed in
* @stable ICU 2.0
*/
virtual UnicodeString getRules() const;
/**
* Return the number of public rule set names.
* @return the number of public rule set names.
* @stable ICU 2.0
*/
virtual int32_t getNumberOfRuleSetNames() const;
/**
* Return the name of the index'th public ruleSet. If index is not valid,
* the function returns null.
* @param index the index of the ruleset
* @return the name of the index'th public ruleSet.
* @stable ICU 2.0
*/
virtual UnicodeString getRuleSetName(int32_t index) const;
/**
* Return the number of locales for which we have localized rule set display names.
* @return the number of locales for which we have localized rule set display names.
* @stable ICU 3.2
*/
virtual int32_t getNumberOfRuleSetDisplayNameLocales(void) const;
/**
* Return the index'th display name locale.
* @param index the index of the locale
* @param status set to a failure code when this function fails
* @return the locale
* @see #getNumberOfRuleSetDisplayNameLocales
* @stable ICU 3.2
*/
virtual Locale getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const;
/**
* Return the rule set display names for the provided locale. These are in the same order
* as those returned by getRuleSetName. The locale is matched against the locales for
* which there is display name data, using normal fallback rules. If no locale matches,
* the default display names are returned. (These are the internal rule set names minus
* the leading '%'.)
* @param index the index of the rule set
* @param locale the locale (returned by getRuleSetDisplayNameLocales) for which the localized
* display name is desired
* @return the display name for the given index, which might be bogus if there is an error
* @see #getRuleSetName
* @stable ICU 3.2
*/
virtual UnicodeString getRuleSetDisplayName(int32_t index,
const Locale& locale = Locale::getDefault());
/**
* Return the rule set display name for the provided rule set and locale.
* The locale is matched against the locales for which there is display name data, using
* normal fallback rules. If no locale matches, the default display name is returned.
* @return the display name for the rule set
* @stable ICU 3.2
* @see #getRuleSetDisplayName
*/
virtual UnicodeString getRuleSetDisplayName(const UnicodeString& ruleSetName,
const Locale& locale = Locale::getDefault());
using NumberFormat::format;
/**
* Formats the specified 32-bit number using the default ruleset.
* @param number The number to format.
* @param toAppendTo the string that will hold the (appended) result
* @param pos the fieldposition
* @return A textual representation of the number.
* @stable ICU 2.0
*/
virtual UnicodeString& format(int32_t number,
UnicodeString& toAppendTo,
FieldPosition& pos) const override;
/**
* Formats the specified 64-bit number using the default ruleset.
* @param number The number to format.
* @param toAppendTo the string that will hold the (appended) result
* @param pos the fieldposition
* @return A textual representation of the number.
* @stable ICU 2.1
*/
virtual UnicodeString& format(int64_t number,
UnicodeString& toAppendTo,
FieldPosition& pos) const override;
/**
* Formats the specified number using the default ruleset.
* @param number The number to format.
* @param toAppendTo the string that will hold the (appended) result
* @param pos the fieldposition
* @return A textual representation of the number.
* @stable ICU 2.0
*/
virtual UnicodeString& format(double number,
UnicodeString& toAppendTo,
FieldPosition& pos) const override;
/**
* Formats the specified number using the named ruleset.
* @param number The number to format.
* @param ruleSetName The name of the rule set to format the number with.
* This must be the name of a valid public rule set for this formatter.
* @param toAppendTo the string that will hold the (appended) result
* @param pos the fieldposition
* @param status the status
* @return A textual representation of the number.
* @stable ICU 2.0
*/
virtual UnicodeString& format(int32_t number,
const UnicodeString& ruleSetName,
UnicodeString& toAppendTo,
FieldPosition& pos,
UErrorCode& status) const;
/**
* Formats the specified 64-bit number using the named ruleset.
* @param number The number to format.
* @param ruleSetName The name of the rule set to format the number with.
* This must be the name of a valid public rule set for this formatter.
* @param toAppendTo the string that will hold the (appended) result
* @param pos the fieldposition
* @param status the status
* @return A textual representation of the number.
* @stable ICU 2.1
*/
virtual UnicodeString& format(int64_t number,
const UnicodeString& ruleSetName,
UnicodeString& toAppendTo,
FieldPosition& pos,
UErrorCode& status) const;
/**
* Formats the specified number using the named ruleset.
* @param number The number to format.
* @param ruleSetName The name of the rule set to format the number with.
* This must be the name of a valid public rule set for this formatter.
* @param toAppendTo the string that will hold the (appended) result
* @param pos the fieldposition
* @param status the status
* @return A textual representation of the number.
* @stable ICU 2.0
*/
virtual UnicodeString& format(double number,
const UnicodeString& ruleSetName,
UnicodeString& toAppendTo,
FieldPosition& pos,
UErrorCode& status) const;
protected:
/**
* Format a decimal number.
* The number is a DigitList wrapper onto a floating point decimal number.
* The default implementation in NumberFormat converts the decimal number
* to a double and formats that. Subclasses of NumberFormat that want
* to specifically handle big decimal numbers must override this method.
* class DecimalFormat does so.
*
* @param number The number, a DigitList format Decimal Floating Point.
* @param appendTo Output parameter to receive result.
* Result is appended to existing contents.
* @param pos On input: an alignment field, if desired.
* On output: the offsets of the alignment field.
* @param status Output param filled with success/failure status.
* @return Reference to 'appendTo' parameter.
* @internal
*/
virtual UnicodeString& format(const number::impl::DecimalQuantity &number,
UnicodeString& appendTo,
FieldPosition& pos,
UErrorCode& status) const override;
public:
using NumberFormat::parse;
/**
* Parses the specified string, beginning at the specified position, according
* to this formatter's rules. This will match the string against all of the
* formatter's public rule sets and return the value corresponding to the longest
* parseable substring. This function's behavior is affected by the lenient
* parse mode.
* @param text The string to parse
* @param result the result of the parse, either a double or a long.
* @param parsePosition On entry, contains the position of the first character
* in "text" to examine. On exit, has been updated to contain the position
* of the first character in "text" that wasn't consumed by the parse.
* @see #setLenient
* @stable ICU 2.0
*/
virtual void parse(const UnicodeString& text,
Formattable& result,
ParsePosition& parsePosition) const override;
#if !UCONFIG_NO_COLLATION
/**
* Turns lenient parse mode on and off.
*
* When in lenient parse mode, the formatter uses a Collator for parsing the text.
* Only primary differences are treated as significant. This means that case
* differences, accent differences, alternate spellings of the same letter
* (e.g., ae and a-umlaut in German), ignorable characters, etc. are ignored in
* matching the text. In many cases, numerals will be accepted in place of words
* or phrases as well.
*
* For example, all of the following will correctly parse as 255 in English in
* lenient-parse mode:
* <br>"two hundred fifty-five"
* <br>"two hundred fifty five"
* <br>"TWO HUNDRED FIFTY-FIVE"
* <br>"twohundredfiftyfive"
* <br>"2 hundred fifty-5"
*
* The Collator used is determined by the locale that was
* passed to this object on construction. The description passed to this object
* on construction may supply additional collation rules that are appended to the
* end of the default collator for the locale, enabling additional equivalences
* (such as adding more ignorable characters or permitting spelled-out version of
* symbols; see the demo program for examples).
*
* It's important to emphasize that even strict parsing is relatively lenient: it
* will accept some text that it won't produce as output. In English, for example,
* it will correctly parse "two hundred zero" and "fifteen hundred".
*
* @param enabled If true, turns lenient-parse mode on; if false, turns it off.
* @see RuleBasedCollator
* @stable ICU 2.0
*/
virtual void setLenient(UBool enabled) override;
/**
* Returns true if lenient-parse mode is turned on. Lenient parsing is off
* by default.
* @return true if lenient-parse mode is turned on.
* @see #setLenient
* @stable ICU 2.0
*/
virtual inline UBool isLenient(void) const override;
#endif
/**
* Override the default rule set to use. If ruleSetName is null, reset
* to the initial default rule set. If the rule set is not a public rule set name,
* U_ILLEGAL_ARGUMENT_ERROR is returned in status.
* @param ruleSetName the name of the rule set, or null to reset the initial default.
* @param status set to failure code when a problem occurs.
* @stable ICU 2.6
*/
virtual void setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status);
/**
* Return the name of the current default rule set. If the current rule set is
* not public, returns a bogus (and empty) UnicodeString.
* @return the name of the current default rule set
* @stable ICU 3.0
*/
virtual UnicodeString getDefaultRuleSetName() const;
/**
* Set a particular UDisplayContext value in the formatter, such as
* UDISPCTX_CAPITALIZATION_FOR_STANDALONE. Note: For getContext, see
* NumberFormat.
* @param value The UDisplayContext value to set.
* @param status Input/output status. If at entry this indicates a failure
* status, the function will do nothing; otherwise this will be
* updated with any new status from the function.
* @stable ICU 53
*/
virtual void setContext(UDisplayContext value, UErrorCode& status) override;
/**
* Get the rounding mode.
* @return A rounding mode
* @stable ICU 60
*/
virtual ERoundingMode getRoundingMode(void) const override;
/**
* Set the rounding mode.
* @param roundingMode A rounding mode
* @stable ICU 60
*/
virtual void setRoundingMode(ERoundingMode roundingMode) override;
public:
/**
* ICU "poor man's RTTI", returns a UClassID for this class.
*
* @stable ICU 2.8
*/
static UClassID U_EXPORT2 getStaticClassID(void);
/**
* ICU "poor man's RTTI", returns a UClassID for the actual class.
*
* @stable ICU 2.8
*/
virtual UClassID getDynamicClassID(void) const override;
/**
* Sets the decimal format symbols, which is generally not changed
* by the programmer or user. The formatter takes ownership of
* symbolsToAdopt; the client must not delete it.
*
* @param symbolsToAdopt DecimalFormatSymbols to be adopted.
* @stable ICU 49
*/
virtual void adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt);
/**
* Sets the decimal format symbols, which is generally not changed
* by the programmer or user. A clone of the symbols is created and
* the symbols is _not_ adopted; the client is still responsible for
* deleting it.
*
* @param symbols DecimalFormatSymbols.
* @stable ICU 49
*/
virtual void setDecimalFormatSymbols(const DecimalFormatSymbols& symbols);
private:
RuleBasedNumberFormat() = delete; // default constructor not implemented
// this will ref the localizations if they are not nullptr
// caller must deref to get adoption
RuleBasedNumberFormat(const UnicodeString& description, LocalizationInfo* localizations,
const Locale& locale, UParseError& perror, UErrorCode& status);
void init(const UnicodeString& rules, LocalizationInfo* localizations, UParseError& perror, UErrorCode& status);
void initCapitalizationContextInfo(const Locale& thelocale);
void dispose();
void stripWhitespace(UnicodeString& src);
void initDefaultRuleSet();
NFRuleSet* findRuleSet(const UnicodeString& name, UErrorCode& status) const;
/* friend access */
friend class NFSubstitution;
friend class NFRule;
friend class NFRuleSet;
friend class FractionalPartSubstitution;
inline NFRuleSet * getDefaultRuleSet() const;
const RuleBasedCollator * getCollator() const;
DecimalFormatSymbols * initializeDecimalFormatSymbols(UErrorCode &status);
const DecimalFormatSymbols * getDecimalFormatSymbols() const;
NFRule * initializeDefaultInfinityRule(UErrorCode &status);
const NFRule * getDefaultInfinityRule() const;
NFRule * initializeDefaultNaNRule(UErrorCode &status);
const NFRule * getDefaultNaNRule() const;
PluralFormat *createPluralFormat(UPluralType pluralType, const UnicodeString &pattern, UErrorCode& status) const;
UnicodeString& adjustForCapitalizationContext(int32_t startPos, UnicodeString& currentResult, UErrorCode& status) const;
UnicodeString& format(int64_t number, NFRuleSet *ruleSet, UnicodeString& toAppendTo, UErrorCode& status) const;
void format(double number, NFRuleSet& rs, UnicodeString& toAppendTo, UErrorCode& status) const;
private:
NFRuleSet **fRuleSets;
UnicodeString* ruleSetDescriptions;
int32_t numRuleSets;
NFRuleSet *defaultRuleSet;
Locale locale;
RuleBasedCollator* collator;
DecimalFormatSymbols* decimalFormatSymbols;
NFRule *defaultInfinityRule;
NFRule *defaultNaNRule;
ERoundingMode fRoundingMode;
UBool lenient;
UnicodeString* lenientParseRules;
LocalizationInfo* localizations;
UnicodeString originalDescription;
UBool capitalizationInfoSet;
UBool capitalizationForUIListMenu;
UBool capitalizationForStandAlone;
BreakIterator* capitalizationBrkIter;
};
// ---------------
#if !UCONFIG_NO_COLLATION
inline UBool
RuleBasedNumberFormat::isLenient(void) const {
return lenient;
}
#endif
inline NFRuleSet*
RuleBasedNumberFormat::getDefaultRuleSet() const {
return defaultRuleSet;
}
U_NAMESPACE_END
/* U_HAVE_RBNF */
#endif
#endif /* U_SHOW_CPLUSPLUS_API */
/* RBNF_H */
#endif
|