1 /*
2 *******************************************************************************
3 * Copyright (C) 2007-2014, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 *******************************************************************************
6 *
7 
8 * File PLURFMT.H
9 ********************************************************************************
10 */
11 
12 #ifndef PLURFMT
13 #define PLURFMT
14 
15 #include "unicode/utypes.h"
16 
17 /**
18  * \file
19  * \brief C++ API: PluralFormat object
20  */
21 
22 #if !UCONFIG_NO_FORMATTING
23 
24 #include "unicode/messagepattern.h"
25 #include "unicode/numfmt.h"
26 #include "unicode/plurrule.h"
27 
28 U_NAMESPACE_BEGIN
29 
30 class Hashtable;
31 class NFRule;
32 
33 /**
34  * <p>
35  * <code>PluralFormat</code> supports the creation of internationalized
36  * messages with plural inflection. It is based on <i>plural
37  * selection</i>, i.e. the caller specifies messages for each
38  * plural case that can appear in the user's language and the
39  * <code>PluralFormat</code> selects the appropriate message based on
40  * the number.
41  * </p>
42  * <h4>The Problem of Plural Forms in Internationalized Messages</h4>
43  * <p>
44  * Different languages have different ways to inflect
45  * plurals. Creating internationalized messages that include plural
46  * forms is only feasible when the framework is able to handle plural
47  * forms of <i>all</i> languages correctly. <code>ChoiceFormat</code>
48  * doesn't handle this well, because it attaches a number interval to
49  * each message and selects the message whose interval contains a
50  * given number. This can only handle a finite number of
51  * intervals. But in some languages, like Polish, one plural case
52  * applies to infinitely many intervals (e.g., the plural case applies to
53  * numbers ending with 2, 3, or 4 except those ending with 12, 13, or
54  * 14). Thus <code>ChoiceFormat</code> is not adequate.
55  * </p><p>
56  * <code>PluralFormat</code> deals with this by breaking the problem
57  * into two parts:
58  * <ul>
59  * <li>It uses <code>PluralRules</code> that can define more complex
60  *     conditions for a plural case than just a single interval. These plural
61  *     rules define both what plural cases exist in a language, and to
62  *     which numbers these cases apply.
63  * <li>It provides predefined plural rules for many languages. Thus, the programmer
64  *     need not worry about the plural cases of a language and
65  *     does not have to define the plural cases; they can simply
66  *     use the predefined keywords. The whole plural formatting of messages can
67  *     be done using localized patterns from resource bundles. For predefined plural
68  *     rules, see the CLDR <i>Language Plural Rules</i> page at
69  *    http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html
70  * </ul>
71  * </p>
72  * <h4>Usage of <code>PluralFormat</code></h4>
73  * <p>Note: Typically, plural formatting is done via <code>MessageFormat</code>
74  * with a <code>plural</code> argument type,
75  * rather than using a stand-alone <code>PluralFormat</code>.
76  * </p><p>
77  * This discussion assumes that you use <code>PluralFormat</code> with
78  * a predefined set of plural rules. You can create one using one of
79  * the constructors that takes a <code>locale</code> object. To
80  * specify the message pattern, you can either pass it to the
81  * constructor or set it explicitly using the
82  * <code>applyPattern()</code> method. The <code>format()</code>
83  * method takes a number object and selects the message of the
84  * matching plural case. This message will be returned.
85  * </p>
86  * <h5>Patterns and Their Interpretation</h5>
87  * <p>
88  * The pattern text defines the message output for each plural case of the
89  * specified locale. Syntax:
90  * <pre>
91  * pluralStyle = [offsetValue] (selector '{' message '}')+
92  * offsetValue = "offset:" number
93  * selector = explicitValue | keyword
94  * explicitValue = '=' number  // adjacent, no white space in between
95  * keyword = [^[[:Pattern_Syntax:][:Pattern_White_Space:]]]+
96  * message: see {@link MessageFormat}
97  * </pre>
98  * Pattern_White_Space between syntax elements is ignored, except
99  * between the {curly braces} and their sub-message,
100  * and between the '=' and the number of an explicitValue.
101  *
102  * </p><p>
103  * There are 6 predefined casekeyword in CLDR/ICU - 'zero', 'one', 'two', 'few', 'many' and
104  * 'other'. You always have to define a message text for the default plural case
105  * <code>other</code> which is contained in every rule set.
106  * If you do not specify a message text for a particular plural case, the
107  * message text of the plural case <code>other</code> gets assigned to this
108  * plural case.
109  * </p><p>
110  * When formatting, the input number is first matched against the explicitValue clauses.
111  * If there is no exact-number match, then a keyword is selected by calling
112  * the <code>PluralRules</code> with the input number <em>minus the offset</em>.
113  * (The offset defaults to 0 if it is omitted from the pattern string.)
114  * If there is no clause with that keyword, then the "other" clauses is returned.
115  * </p><p>
116  * An unquoted pound sign (<code>#</code>) in the selected sub-message
117  * itself (i.e., outside of arguments nested in the sub-message)
118  * is replaced by the input number minus the offset.
119  * The number-minus-offset value is formatted using a
120  * <code>NumberFormat</code> for the <code>PluralFormat</code>'s locale. If you
121  * need special number formatting, you have to use a <code>MessageFormat</code>
122  * and explicitly specify a <code>NumberFormat</code> argument.
123  * <strong>Note:</strong> That argument is formatting without subtracting the offset!
124  * If you need a custom format and have a non-zero offset, then you need to pass the
125  * number-minus-offset value as a separate parameter.
126  * </p>
127  * For a usage example, see the {@link MessageFormat} class documentation.
128  *
129  * <h4>Defining Custom Plural Rules</h4>
130  * <p>If you need to use <code>PluralFormat</code> with custom rules, you can
131  * create a <code>PluralRules</code> object and pass it to
132  * <code>PluralFormat</code>'s constructor. If you also specify a locale in this
133  * constructor, this locale will be used to format the number in the message
134  * texts.
135  * </p><p>
136  * For more information about <code>PluralRules</code>, see
137  * {@link PluralRules}.
138  * </p>
139  *
140  * ported from Java
141  * @stable ICU 4.0
142  */
143 
144 class U_I18N_API PluralFormat : public Format {
145 public:
146 
147     /**
148      * Creates a new cardinal-number <code>PluralFormat</code> for the default locale.
149      * This locale will be used to get the set of plural rules and for standard
150      * number formatting.
151      * @param status  output param set to success/failure code on exit, which
152      *                must not indicate a failure before the function call.
153      * @stable ICU 4.0
154      */
155     PluralFormat(UErrorCode& status);
156 
157     /**
158      * Creates a new cardinal-number <code>PluralFormat</code> for a given locale.
159      * @param locale the <code>PluralFormat</code> will be configured with
160      *               rules for this locale. This locale will also be used for
161      *               standard number formatting.
162      * @param status output param set to success/failure code on exit, which
163      *               must not indicate a failure before the function call.
164      * @stable ICU 4.0
165      */
166     PluralFormat(const Locale& locale, UErrorCode& status);
167 
168     /**
169      * Creates a new <code>PluralFormat</code> for a given set of rules.
170      * The standard number formatting will be done using the default locale.
171      * @param rules   defines the behavior of the <code>PluralFormat</code>
172      *                object.
173      * @param status  output param set to success/failure code on exit, which
174      *                must not indicate a failure before the function call.
175      * @stable ICU 4.0
176      */
177     PluralFormat(const PluralRules& rules, UErrorCode& status);
178 
179     /**
180      * Creates a new <code>PluralFormat</code> for a given set of rules.
181      * The standard number formatting will be done using the given locale.
182      * @param locale  the default number formatting will be done using this
183      *                locale.
184      * @param rules   defines the behavior of the <code>PluralFormat</code>
185      *                object.
186      * @param status  output param set to success/failure code on exit, which
187      *                must not indicate a failure before the function call.
188      * @stable ICU 4.0
189 	 * <p>
190 	 * <h4>Sample code</h4>
191 	 * \snippet samples/plurfmtsample/plurfmtsample.cpp PluralFormatExample1
192 	 * \snippet samples/plurfmtsample/plurfmtsample.cpp PluralFormatExample
193 	 * <p>
194      */
195     PluralFormat(const Locale& locale, const PluralRules& rules, UErrorCode& status);
196 
197     /**
198      * Creates a new <code>PluralFormat</code> for the plural type.
199      * The standard number formatting will be done using the given locale.
200      * @param locale  the default number formatting will be done using this
201      *                locale.
202      * @param type    The plural type (e.g., cardinal or ordinal).
203      * @param status  output param set to success/failure code on exit, which
204      *                must not indicate a failure before the function call.
205      * @stable ICU 50
206      */
207     PluralFormat(const Locale& locale, UPluralType type, UErrorCode& status);
208 
209     /**
210      * Creates a new cardinal-number <code>PluralFormat</code> for a given pattern string.
211      * The default locale will be used to get the set of plural rules and for
212      * standard number formatting.
213      * @param  pattern the pattern for this <code>PluralFormat</code>.
214      *                 errors are returned to status if the pattern is invalid.
215      * @param status   output param set to success/failure code on exit, which
216      *                 must not indicate a failure before the function call.
217      * @stable ICU 4.0
218      */
219     PluralFormat(const UnicodeString& pattern, UErrorCode& status);
220 
221     /**
222      * Creates a new cardinal-number <code>PluralFormat</code> for a given pattern string and
223      * locale.
224      * The locale will be used to get the set of plural rules and for
225      * standard number formatting.
226      * @param locale   the <code>PluralFormat</code> will be configured with
227      *                 rules for this locale. This locale will also be used for
228      *                 standard number formatting.
229      * @param pattern  the pattern for this <code>PluralFormat</code>.
230      *                 errors are returned to status if the pattern is invalid.
231      * @param status   output param set to success/failure code on exit, which
232      *                 must not indicate a failure before the function call.
233      * @stable ICU 4.0
234      */
235     PluralFormat(const Locale& locale, const UnicodeString& pattern, UErrorCode& status);
236 
237     /**
238      * Creates a new <code>PluralFormat</code> for a given set of rules, a
239      * pattern and a locale.
240      * @param rules    defines the behavior of the <code>PluralFormat</code>
241      *                 object.
242      * @param pattern  the pattern for this <code>PluralFormat</code>.
243      *                 errors are returned to status if the pattern is invalid.
244      * @param status   output param set to success/failure code on exit, which
245      *                 must not indicate a failure before the function call.
246      * @stable ICU 4.0
247      */
248     PluralFormat(const PluralRules& rules,
249                  const UnicodeString& pattern,
250                  UErrorCode& status);
251 
252     /**
253      * Creates a new <code>PluralFormat</code> for a given set of rules, a
254      * pattern and a locale.
255      * @param locale  the <code>PluralFormat</code> will be configured with
256      *                rules for this locale. This locale will also be used for
257      *                standard number formatting.
258      * @param rules   defines the behavior of the <code>PluralFormat</code>
259      *                object.
260      * @param pattern the pattern for this <code>PluralFormat</code>.
261      *                errors are returned to status if the pattern is invalid.
262      * @param status  output param set to success/failure code on exit, which
263      *                must not indicate a failure before the function call.
264      * @stable ICU 4.0
265      */
266     PluralFormat(const Locale& locale,
267                  const PluralRules& rules,
268                  const UnicodeString& pattern,
269                  UErrorCode& status);
270 
271     /**
272      * Creates a new <code>PluralFormat</code> for a plural type, a
273      * pattern and a locale.
274      * @param locale  the <code>PluralFormat</code> will be configured with
275      *                rules for this locale. This locale will also be used for
276      *                standard number formatting.
277      * @param type    The plural type (e.g., cardinal or ordinal).
278      * @param pattern the pattern for this <code>PluralFormat</code>.
279      *                errors are returned to status if the pattern is invalid.
280      * @param status  output param set to success/failure code on exit, which
281      *                must not indicate a failure before the function call.
282      * @stable ICU 50
283      */
284     PluralFormat(const Locale& locale,
285                  UPluralType type,
286                  const UnicodeString& pattern,
287                  UErrorCode& status);
288 
289     /**
290       * copy constructor.
291       * @stable ICU 4.0
292       */
293     PluralFormat(const PluralFormat& other);
294 
295     /**
296      * Destructor.
297      * @stable ICU 4.0
298      */
299     virtual ~PluralFormat();
300 
301     /**
302      * Sets the pattern used by this plural format.
303      * The method parses the pattern and creates a map of format strings
304      * for the plural rules.
305      * Patterns and their interpretation are specified in the class description.
306      *
307      * @param pattern the pattern for this plural format
308      *                errors are returned to status if the pattern is invalid.
309      * @param status  output param set to success/failure code on exit, which
310      *                must not indicate a failure before the function call.
311      * @stable ICU 4.0
312      */
313     void applyPattern(const UnicodeString& pattern, UErrorCode& status);
314 
315 
316     using Format::format;
317 
318     /**
319      * Formats a plural message for a given number.
320      *
321      * @param number  a number for which the plural message should be formatted
322      *                for. If no pattern has been applied to this
323      *                <code>PluralFormat</code> object yet, the formatted number
324      *                will be returned.
325      * @param status  output param set to success/failure code on exit, which
326      *                must not indicate a failure before the function call.
327      * @return        the string containing the formatted plural message.
328      * @stable ICU 4.0
329      */
330     UnicodeString format(int32_t number, UErrorCode& status) const;
331 
332     /**
333      * Formats a plural message for a given number.
334      *
335      * @param number  a number for which the plural message should be formatted
336      *                for. If no pattern has been applied to this
337      *                PluralFormat object yet, the formatted number
338      *                will be returned.
339      * @param status  output param set to success or failure code on exit, which
340      *                must not indicate a failure before the function call.
341      * @return        the string containing the formatted plural message.
342      * @stable ICU 4.0
343      */
344     UnicodeString format(double number, UErrorCode& status) const;
345 
346     /**
347      * Formats a plural message for a given number.
348      *
349      * @param number   a number for which the plural message should be formatted
350      *                 for. If no pattern has been applied to this
351      *                 <code>PluralFormat</code> object yet, the formatted number
352      *                 will be returned.
353      * @param appendTo output parameter to receive result.
354      *                 result is appended to existing contents.
355      * @param pos      On input: an alignment field, if desired.
356      *                 On output: the offsets of the alignment field.
357      * @param status   output param set to success/failure code on exit, which
358      *                 must not indicate a failure before the function call.
359      * @return         the string containing the formatted plural message.
360      * @stable ICU 4.0
361      */
362     UnicodeString& format(int32_t number,
363                           UnicodeString& appendTo,
364                           FieldPosition& pos,
365                           UErrorCode& status) const;
366 
367     /**
368      * Formats a plural message for a given number.
369      *
370      * @param number   a number for which the plural message should be formatted
371      *                 for. If no pattern has been applied to this
372      *                 PluralFormat object yet, the formatted number
373      *                 will be returned.
374      * @param appendTo output parameter to receive result.
375      *                 result is appended to existing contents.
376      * @param pos      On input: an alignment field, if desired.
377      *                 On output: the offsets of the alignment field.
378      * @param status   output param set to success/failure code on exit, which
379      *                 must not indicate a failure before the function call.
380      * @return         the string containing the formatted plural message.
381      * @stable ICU 4.0
382      */
383     UnicodeString& format(double number,
384                           UnicodeString& appendTo,
385                           FieldPosition& pos,
386                           UErrorCode& status) const;
387 
388 #ifndef U_HIDE_DEPRECATED_API
389     /**
390      * Sets the locale used by this <code>PluraFormat</code> object.
391      * Note: Calling this method resets this <code>PluraFormat</code> object,
392      *     i.e., a pattern that was applied previously will be removed,
393      *     and the NumberFormat is set to the default number format for
394      *     the locale.  The resulting format behaves the same as one
395      *     constructed from {@link #PluralFormat(const Locale& locale, UPluralType type, UErrorCode& status)}
396      *     with UPLURAL_TYPE_CARDINAL.
397      * @param locale  the <code>locale</code> to use to configure the formatter.
398      * @param status  output param set to success/failure code on exit, which
399      *                must not indicate a failure before the function call.
400      * @deprecated ICU 50 This method clears the pattern and might create
401      *             a different kind of PluralRules instance;
402      *             use one of the constructors to create a new instance instead.
403      */
404     void setLocale(const Locale& locale, UErrorCode& status);
405 #endif  /* U_HIDE_DEPRECATED_API */
406 
407     /**
408       * Sets the number format used by this formatter.  You only need to
409       * call this if you want a different number format than the default
410       * formatter for the locale.
411       * @param format  the number format to use.
412       * @param status  output param set to success/failure code on exit, which
413       *                must not indicate a failure before the function call.
414       * @stable ICU 4.0
415       */
416     void setNumberFormat(const NumberFormat* format, UErrorCode& status);
417 
418     /**
419        * Assignment operator
420        *
421        * @param other    the PluralFormat object to copy from.
422        * @stable ICU 4.0
423        */
424     PluralFormat& operator=(const PluralFormat& other);
425 
426     /**
427       * Return true if another object is semantically equal to this one.
428       *
429       * @param other    the PluralFormat object to be compared with.
430       * @return         true if other is semantically equal to this.
431       * @stable ICU 4.0
432       */
433     virtual UBool operator==(const Format& other) const;
434 
435     /**
436      * Return true if another object is semantically unequal to this one.
437      *
438      * @param other    the PluralFormat object to be compared with.
439      * @return         true if other is semantically unequal to this.
440      * @stable ICU 4.0
441      */
442     virtual UBool operator!=(const Format& other) const;
443 
444     /**
445      * Clones this Format object polymorphically.  The caller owns the
446      * result and should delete it when done.
447      * @stable ICU 4.0
448      */
449     virtual Format* clone(void) const;
450 
451    /**
452     * Formats a plural message for a number taken from a Formattable object.
453     *
454     * @param obj       The object containing a number for which the
455     *                  plural message should be formatted.
456     *                  The object must be of a numeric type.
457     * @param appendTo  output parameter to receive result.
458     *                  Result is appended to existing contents.
459     * @param pos       On input: an alignment field, if desired.
460     *                  On output: the offsets of the alignment field.
461     * @param status    output param filled with success/failure status.
462     * @return          Reference to 'appendTo' parameter.
463     * @stable ICU 4.0
464     */
465    UnicodeString& format(const Formattable& obj,
466                          UnicodeString& appendTo,
467                          FieldPosition& pos,
468                          UErrorCode& status) const;
469 
470    /**
471     * Returns the pattern from applyPattern() or constructor().
472     *
473     * @param  appendTo  output parameter to receive result.
474      *                  Result is appended to existing contents.
475     * @return the UnicodeString with inserted pattern.
476     * @stable ICU 4.0
477     */
478    UnicodeString& toPattern(UnicodeString& appendTo);
479 
480    /**
481     * This method is not yet supported by <code>PluralFormat</code>.
482     * <P>
483     * Before calling, set parse_pos.index to the offset you want to start
484     * parsing at in the source. After calling, parse_pos.index is the end of
485     * the text you parsed. If error occurs, index is unchanged.
486     * <P>
487     * When parsing, leading whitespace is discarded (with a successful parse),
488     * while trailing whitespace is left as is.
489     * <P>
490     * See Format::parseObject() for more.
491     *
492     * @param source    The string to be parsed into an object.
493     * @param result    Formattable to be set to the parse result.
494     *                  If parse fails, return contents are undefined.
495     * @param parse_pos The position to start parsing at. Upon return
496     *                  this param is set to the position after the
497     *                  last character successfully parsed. If the
498     *                  source is not parsed successfully, this param
499     *                  will remain unchanged.
500     * @stable ICU 4.0
501     */
502    virtual void parseObject(const UnicodeString& source,
503                             Formattable& result,
504                             ParsePosition& parse_pos) const;
505 
506     /**
507      * ICU "poor man's RTTI", returns a UClassID for this class.
508      *
509      * @stable ICU 4.0
510      *
511      */
512     static UClassID U_EXPORT2 getStaticClassID(void);
513 
514     /**
515      * ICU "poor man's RTTI", returns a UClassID for the actual class.
516      *
517      * @stable ICU 4.0
518      */
519      virtual UClassID getDynamicClassID() const;
520 
521 #if (defined(__xlC__) && (__xlC__ < 0x0C00)) || (U_PLATFORM == U_PF_OS390) || (U_PLATFORM ==U_PF_OS400)
522 // Work around a compiler bug on xlC 11.1 on AIX 7.1 that would
523 // prevent PluralSelectorAdapter from implementing private PluralSelector.
524 // xlC error message:
525 // 1540-0300 (S) The "private" member "class icu_49::PluralFormat::PluralSelector" cannot be accessed.
526 public:
527 #else
528 private:
529 #endif
530      /**
531       * @internal
532       */
533     class U_I18N_API PluralSelector : public UMemory {
534       public:
535         virtual ~PluralSelector();
536         /**
537          * Given a number, returns the appropriate PluralFormat keyword.
538          *
539          * @param context worker object for the selector.
540          * @param number The number to be plural-formatted.
541          * @param ec Error code.
542          * @return The selected PluralFormat keyword.
543          * @internal
544          */
545         virtual UnicodeString select(void *context, double number, UErrorCode& ec) const = 0;
546     };
547 
548     /**
549      * @internal
550      */
551     class U_I18N_API PluralSelectorAdapter : public PluralSelector {
552       public:
PluralSelectorAdapter()553         PluralSelectorAdapter() : pluralRules(NULL) {
554         }
555 
556         virtual ~PluralSelectorAdapter();
557 
558         virtual UnicodeString select(void *context, double number, UErrorCode& /*ec*/) const; /**< @internal */
559 
560         void reset();
561 
562         PluralRules* pluralRules;
563     };
564 
565 #if defined(__xlC__)
566 // End of xlC bug workaround, keep remaining definitions private.
567 private:
568 #endif
569     Locale  locale;
570     MessagePattern msgPattern;
571     NumberFormat*  numberFormat;
572     double offset;
573     PluralSelectorAdapter pluralRulesWrapper;
574 
575     PluralFormat();   // default constructor not implemented
576     void init(const PluralRules* rules, UPluralType type, UErrorCode& status);
577     /**
578      * Copies dynamically allocated values (pointer fields).
579      * Others are copied using their copy constructors and assignment operators.
580      */
581     void copyObjects(const PluralFormat& other);
582 
583     UnicodeString& format(const Formattable& numberObject, double number,
584                           UnicodeString& appendTo,
585                           FieldPosition& pos,
586                           UErrorCode& status) const; /**< @internal */
587 
588     /**
589      * Finds the PluralFormat sub-message for the given number, or the "other" sub-message.
590      * @param pattern A MessagePattern.
591      * @param partIndex the index of the first PluralFormat argument style part.
592      * @param selector the PluralSelector for mapping the number (minus offset) to a keyword.
593      * @param context worker object for the selector.
594      * @param number a number to be matched to one of the PluralFormat argument's explicit values,
595      *        or mapped via the PluralSelector.
596      * @param ec ICU error code.
597      * @return the sub-message start part index.
598      */
599     static int32_t findSubMessage(
600          const MessagePattern& pattern, int32_t partIndex,
601          const PluralSelector& selector, void *context, double number, UErrorCode& ec); /**< @internal */
602 
603     void parseType(const UnicodeString& source, const NFRule *rbnfLenientScanner,
604         Formattable& result, FieldPosition& pos) const;
605 
606     friend class MessageFormat;
607     friend class NFRule;
608 };
609 
610 U_NAMESPACE_END
611 
612 #endif /* #if !UCONFIG_NO_FORMATTING */
613 
614 #endif // _PLURFMT
615 //eof
616