1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 * Copyright (C) 2007-2013, International Business Machines Corporation and
5 * others. All Rights Reserved.
6 ********************************************************************************
7 *
8 * File MSGFMT.H
9 *
10 * Modification History:
11 *
12 *   Date        Name        Description
13 *   02/19/97    aliu        Converted from java.
14 *   03/20/97    helena      Finished first cut of implementation.
15 *   07/22/98    stephen     Removed operator!= (defined in Format)
16 *   08/19/2002  srl         Removing Javaisms
17 *******************************************************************************/
18 
19 #ifndef MSGFMT_H
20 #define MSGFMT_H
21 
22 #include "unicode/utypes.h"
23 
24 /**
25  * \file
26  * \brief C++ API: Formats messages in a language-neutral way.
27  */
28 
29 #if !UCONFIG_NO_FORMATTING
30 
31 #include "unicode/format.h"
32 #include "unicode/locid.h"
33 #include "unicode/messagepattern.h"
34 #include "unicode/parseerr.h"
35 #include "unicode/plurfmt.h"
36 #include "unicode/plurrule.h"
37 
38 U_CDECL_BEGIN
39 // Forward declaration.
40 struct UHashtable;
41 typedef struct UHashtable UHashtable; /**< @internal */
42 U_CDECL_END
43 
44 U_NAMESPACE_BEGIN
45 
46 class AppendableWrapper;
47 class DateFormat;
48 class NumberFormat;
49 
50 /**
51  * <p>MessageFormat prepares strings for display to users,
52  * with optional arguments (variables/placeholders).
53  * The arguments can occur in any order, which is necessary for translation
54  * into languages with different grammars.
55  *
56  * <p>A MessageFormat is constructed from a <em>pattern</em> string
57  * with arguments in {curly braces} which will be replaced by formatted values.
58  *
59  * <p><code>MessageFormat</code> differs from the other <code>Format</code>
60  * classes in that you create a <code>MessageFormat</code> object with one
61  * of its constructors (not with a <code>createInstance</code> style factory
62  * method). Factory methods aren't necessary because <code>MessageFormat</code>
63  * itself doesn't implement locale-specific behavior. Any locale-specific
64  * behavior is defined by the pattern that you provide and the
65  * subformats used for inserted arguments.
66  *
67  * <p>Arguments can be named (using identifiers) or numbered (using small ASCII-digit integers).
68  * Some of the API methods work only with argument numbers and throw an exception
69  * if the pattern has named arguments (see {@link #usesNamedArguments()}).
70  *
71  * <p>An argument might not specify any format type. In this case,
72  * a Number value is formatted with a default (for the locale) NumberFormat,
73  * a Date value is formatted with a default (for the locale) DateFormat,
74  * and for any other value its toString() value is used.
75  *
76  * <p>An argument might specify a "simple" type for which the specified
77  * Format object is created, cached and used.
78  *
79  * <p>An argument might have a "complex" type with nested MessageFormat sub-patterns.
80  * During formatting, one of these sub-messages is selected according to the argument value
81  * and recursively formatted.
82  *
83  * <p>After construction, a custom Format object can be set for
84  * a top-level argument, overriding the default formatting and parsing behavior
85  * for that argument.
86  * However, custom formatting can be achieved more simply by writing
87  * a typeless argument in the pattern string
88  * and supplying it with a preformatted string value.
89  *
90  * <p>When formatting, MessageFormat takes a collection of argument values
91  * and writes an output string.
92  * The argument values may be passed as an array
93  * (when the pattern contains only numbered arguments)
94  * or as an array of names and and an array of arguments (which works for both named
95  * and numbered arguments).
96  *
97  * <p>Each argument is matched with one of the input values by array index or argument name
98  * and formatted according to its pattern specification
99  * (or using a custom Format object if one was set).
100  * A numbered pattern argument is matched with an argument name that contains that number
101  * as an ASCII-decimal-digit string (without leading zero).
102  *
103  * <h4><a name="patterns">Patterns and Their Interpretation</a></h4>
104  *
105  * <code>MessageFormat</code> uses patterns of the following form:
106  * <pre>
107  * message = messageText (argument messageText)*
108  * argument = noneArg | simpleArg | complexArg
109  * complexArg = choiceArg | pluralArg | selectArg | selectordinalArg
110  *
111  * noneArg = '{' argNameOrNumber '}'
112  * simpleArg = '{' argNameOrNumber ',' argType [',' argStyle] '}'
113  * choiceArg = '{' argNameOrNumber ',' "choice" ',' choiceStyle '}'
114  * pluralArg = '{' argNameOrNumber ',' "plural" ',' pluralStyle '}'
115  * selectArg = '{' argNameOrNumber ',' "select" ',' selectStyle '}'
116  * selectordinalArg = '{' argNameOrNumber ',' "selectordinal" ',' pluralStyle '}'
117  *
118  * choiceStyle: see {@link ChoiceFormat}
119  * pluralStyle: see {@link PluralFormat}
120  * selectStyle: see {@link SelectFormat}
121  *
122  * argNameOrNumber = argName | argNumber
123  * argName = [^[[:Pattern_Syntax:][:Pattern_White_Space:]]]+
124  * argNumber = '0' | ('1'..'9' ('0'..'9')*)
125  *
126  * argType = "number" | "date" | "time" | "spellout" | "ordinal" | "duration"
127  * argStyle = "short" | "medium" | "long" | "full" | "integer" | "currency" | "percent" | argStyleText
128  * </pre>
129  *
130  * <ul>
131  *   <li>messageText can contain quoted literal strings including syntax characters.
132  *       A quoted literal string begins with an ASCII apostrophe and a syntax character
133  *       (usually a {curly brace}) and continues until the next single apostrophe.
134  *       A double ASCII apostrohpe inside or outside of a quoted string represents
135  *       one literal apostrophe.
136  *   <li>Quotable syntax characters are the {curly braces} in all messageText parts,
137  *       plus the '#' sign in a messageText immediately inside a pluralStyle,
138  *       and the '|' symbol in a messageText immediately inside a choiceStyle.
139  *   <li>See also {@link #UMessagePatternApostropheMode}
140  *   <li>In argStyleText, every single ASCII apostrophe begins and ends quoted literal text,
141  *       and unquoted {curly braces} must occur in matched pairs.
142  * </ul>
143  *
144  * <p>Recommendation: Use the real apostrophe (single quote) character
145  * \htmlonly&#x2019;\endhtmlonly (U+2019) for
146  * human-readable text, and use the ASCII apostrophe ' (U+0027)
147  * only in program syntax, like quoting in MessageFormat.
148  * See the annotations for U+0027 Apostrophe in The Unicode Standard.
149  *
150  * <p>The <code>choice</code> argument type is deprecated.
151  * Use <code>plural</code> arguments for proper plural selection,
152  * and <code>select</code> arguments for simple selection among a fixed set of choices.
153  *
154  * <p>The <code>argType</code> and <code>argStyle</code> values are used to create
155  * a <code>Format</code> instance for the format element. The following
156  * table shows how the values map to Format instances. Combinations not
157  * shown in the table are illegal. Any <code>argStyleText</code> must
158  * be a valid pattern string for the Format subclass used.
159  *
160  * <p><table border=1>
161  *    <tr>
162  *       <th>argType
163  *       <th>argStyle
164  *       <th>resulting Format object
165  *    <tr>
166  *       <td colspan=2><i>(none)</i>
167  *       <td><code>null</code>
168  *    <tr>
169  *       <td rowspan=5><code>number</code>
170  *       <td><i>(none)</i>
171  *       <td><code>NumberFormat.createInstance(getLocale(), status)</code>
172  *    <tr>
173  *       <td><code>integer</code>
174  *       <td><code>NumberFormat.createInstance(getLocale(), kNumberStyle, status)</code>
175  *    <tr>
176  *       <td><code>currency</code>
177  *       <td><code>NumberFormat.createCurrencyInstance(getLocale(), status)</code>
178  *    <tr>
179  *       <td><code>percent</code>
180  *       <td><code>NumberFormat.createPercentInstance(getLocale(), status)</code>
181  *    <tr>
182  *       <td><i>argStyleText</i>
183  *       <td><code>new DecimalFormat(argStyleText, new DecimalFormatSymbols(getLocale(), status), status)</code>
184  *    <tr>
185  *       <td rowspan=6><code>date</code>
186  *       <td><i>(none)</i>
187  *       <td><code>DateFormat.createDateInstance(kDefault, getLocale(), status)</code>
188  *    <tr>
189  *       <td><code>short</code>
190  *       <td><code>DateFormat.createDateInstance(kShort, getLocale(), status)</code>
191  *    <tr>
192  *       <td><code>medium</code>
193  *       <td><code>DateFormat.createDateInstance(kDefault, getLocale(), status)</code>
194  *    <tr>
195  *       <td><code>long</code>
196  *       <td><code>DateFormat.createDateInstance(kLong, getLocale(), status)</code>
197  *    <tr>
198  *       <td><code>full</code>
199  *       <td><code>DateFormat.createDateInstance(kFull, getLocale(), status)</code>
200  *    <tr>
201  *       <td><i>argStyleText</i>
202  *       <td><code>new SimpleDateFormat(argStyleText, getLocale(), status)
203  *    <tr>
204  *       <td rowspan=6><code>time</code>
205  *       <td><i>(none)</i>
206  *       <td><code>DateFormat.createTimeInstance(kDefault, getLocale(), status)</code>
207  *    <tr>
208  *       <td><code>short</code>
209  *       <td><code>DateFormat.createTimeInstance(kShort, getLocale(), status)</code>
210  *    <tr>
211  *       <td><code>medium</code>
212  *       <td><code>DateFormat.createTimeInstance(kDefault, getLocale(), status)</code>
213  *    <tr>
214  *       <td><code>long</code>
215  *       <td><code>DateFormat.createTimeInstance(kLong, getLocale(), status)</code>
216  *    <tr>
217  *       <td><code>full</code>
218  *       <td><code>DateFormat.createTimeInstance(kFull, getLocale(), status)</code>
219  *    <tr>
220  *       <td><i>argStyleText</i>
221  *       <td><code>new SimpleDateFormat(argStyleText, getLocale(), status)
222  *    <tr>
223  *       <td><code>spellout</code>
224  *       <td><i>argStyleText (optional)</i>
225  *       <td><code>new RuleBasedNumberFormat(URBNF_SPELLOUT, getLocale(), status)
226  *           <br/>&nbsp;&nbsp;&nbsp;&nbsp;.setDefaultRuleset(argStyleText, status);</code>
227  *    <tr>
228  *       <td><code>ordinal</code>
229  *       <td><i>argStyleText (optional)</i>
230  *       <td><code>new RuleBasedNumberFormat(URBNF_ORDINAL, getLocale(), status)
231  *           <br/>&nbsp;&nbsp;&nbsp;&nbsp;.setDefaultRuleset(argStyleText, status);</code>
232  *    <tr>
233  *       <td><code>duration</code>
234  *       <td><i>argStyleText (optional)</i>
235  *       <td><code>new RuleBasedNumberFormat(URBNF_DURATION, getLocale(), status)
236  *           <br/>&nbsp;&nbsp;&nbsp;&nbsp;.setDefaultRuleset(argStyleText, status);</code>
237  * </table>
238  * <p>
239  *
240  * <h4>Usage Information</h4>
241  *
242  * <p>Here are some examples of usage:
243  * Example 1:
244  *
245  * <pre>
246  * \code
247  *     UErrorCode success = U_ZERO_ERROR;
248  *     GregorianCalendar cal(success);
249  *     Formattable arguments[] = {
250  *         7L,
251  *         Formattable( (Date) cal.getTime(success), Formattable::kIsDate),
252  *         "a disturbance in the Force"
253  *     };
254  *
255  *     UnicodeString result;
256  *     MessageFormat::format(
257  *          "At {1,time} on {1,date}, there was {2} on planet {0,number}.",
258  *          arguments, 3, result, success );
259  *
260  *     cout << "result: " << result << endl;
261  *     //<output>: At 4:34:20 PM on 23-Mar-98, there was a disturbance
262  *     //             in the Force on planet 7.
263  * \endcode
264  * </pre>
265  *
266  * Typically, the message format will come from resources, and the
267  * arguments will be dynamically set at runtime.
268  *
269  * <p>Example 2:
270  *
271  * <pre>
272  *  \code
273  *     success = U_ZERO_ERROR;
274  *     Formattable testArgs[] = {3L, "MyDisk"};
275  *
276  *     MessageFormat form(
277  *         "The disk \"{1}\" contains {0} file(s).", success );
278  *
279  *     UnicodeString string;
280  *     FieldPosition fpos = 0;
281  *     cout << "format: " << form.format(testArgs, 2, string, fpos, success ) << endl;
282  *
283  *     // output, with different testArgs:
284  *     // output: The disk "MyDisk" contains 0 file(s).
285  *     // output: The disk "MyDisk" contains 1 file(s).
286  *     // output: The disk "MyDisk" contains 1,273 file(s).
287  *  \endcode
288  *  </pre>
289  *
290  *
291  * <p>For messages that include plural forms, you can use a plural argument:
292  * <pre>
293  * \code
294  *  success = U_ZERO_ERROR;
295  *  MessageFormat msgFmt(
296  *       "{num_files, plural, "
297  *       "=0{There are no files on disk \"{disk_name}\".}"
298  *       "=1{There is one file on disk \"{disk_name}\".}"
299  *       "other{There are # files on disk \"{disk_name}\".}}",
300  *      Locale("en"),
301  *      success);
302  *  FieldPosition fpos = 0;
303  *  Formattable testArgs[] = {0L, "MyDisk"};
304  *  UnicodeString testArgsNames[] = {"num_files", "disk_name"};
305  *  UnicodeString result;
306  *  cout << msgFmt.format(testArgs, testArgsNames, 2, result, fpos, 0, success);
307  *  testArgs[0] = 3L;
308  *  cout << msgFmt.format(testArgs, testArgsNames, 2, result, fpos, 0, success);
309  * \endcode
310  * <em>output</em>:
311  * There are no files on disk "MyDisk".
312  * There are 3 files on "MyDisk".
313  * </pre>
314  * See {@link PluralFormat} and {@link PluralRules} for details.
315  *
316  * <h4><a name="synchronization">Synchronization</a></h4>
317  *
318  * <p>MessageFormats are not synchronized.
319  * It is recommended to create separate format instances for each thread.
320  * If multiple threads access a format concurrently, it must be synchronized
321  * externally.
322  *
323  * @stable ICU 2.0
324  */
325 class U_I18N_API MessageFormat : public Format {
326 public:
327 #ifndef U_HIDE_OBSOLETE_API
328     /**
329      * Enum type for kMaxFormat.
330      * @obsolete ICU 3.0.  The 10-argument limit was removed as of ICU 2.6,
331      * rendering this enum type obsolete.
332      */
333     enum EFormatNumber {
334         /**
335          * The maximum number of arguments.
336          * @obsolete ICU 3.0.  The 10-argument limit was removed as of ICU 2.6,
337          * rendering this constant obsolete.
338          */
339         kMaxFormat = 10
340     };
341 #endif  /* U_HIDE_OBSOLETE_API */
342 
343     /**
344      * Constructs a new MessageFormat using the given pattern and the
345      * default locale.
346      *
347      * @param pattern   Pattern used to construct object.
348      * @param status    Input/output error code.  If the
349      *                  pattern cannot be parsed, set to failure code.
350      * @stable ICU 2.0
351      */
352     MessageFormat(const UnicodeString& pattern,
353                   UErrorCode &status);
354 
355     /**
356      * Constructs a new MessageFormat using the given pattern and locale.
357      * @param pattern   Pattern used to construct object.
358      * @param newLocale The locale to use for formatting dates and numbers.
359      * @param status    Input/output error code.  If the
360      *                  pattern cannot be parsed, set to failure code.
361      * @stable ICU 2.0
362      */
363     MessageFormat(const UnicodeString& pattern,
364                   const Locale& newLocale,
365                         UErrorCode& status);
366     /**
367      * Constructs a new MessageFormat using the given pattern and locale.
368      * @param pattern   Pattern used to construct object.
369      * @param newLocale The locale to use for formatting dates and numbers.
370      * @param parseError Struct to receive information on the position
371      *                   of an error within the pattern.
372      * @param status    Input/output error code.  If the
373      *                  pattern cannot be parsed, set to failure code.
374      * @stable ICU 2.0
375      */
376     MessageFormat(const UnicodeString& pattern,
377                   const Locale& newLocale,
378                   UParseError& parseError,
379                   UErrorCode& status);
380     /**
381      * Constructs a new MessageFormat from an existing one.
382      * @stable ICU 2.0
383      */
384     MessageFormat(const MessageFormat&);
385 
386     /**
387      * Assignment operator.
388      * @stable ICU 2.0
389      */
390     const MessageFormat& operator=(const MessageFormat&);
391 
392     /**
393      * Destructor.
394      * @stable ICU 2.0
395      */
396     virtual ~MessageFormat();
397 
398     /**
399      * Clones this Format object polymorphically.  The caller owns the
400      * result and should delete it when done.
401      * @stable ICU 2.0
402      */
403     virtual Format* clone(void) const;
404 
405     /**
406      * Returns true if the given Format objects are semantically equal.
407      * Objects of different subclasses are considered unequal.
408      * @param other  the object to be compared with.
409      * @return       true if the given Format objects are semantically equal.
410      * @stable ICU 2.0
411      */
412     virtual UBool operator==(const Format& other) const;
413 
414     /**
415      * Sets the locale to be used for creating argument Format objects.
416      * @param theLocale    the new locale value to be set.
417      * @stable ICU 2.0
418      */
419     virtual void setLocale(const Locale& theLocale);
420 
421     /**
422      * Gets the locale used for creating argument Format objects.
423      * format information.
424      * @return    the locale of the object.
425      * @stable ICU 2.0
426      */
427     virtual const Locale& getLocale(void) const;
428 
429     /**
430      * Applies the given pattern string to this message format.
431      *
432      * @param pattern   The pattern to be applied.
433      * @param status    Input/output error code.  If the
434      *                  pattern cannot be parsed, set to failure code.
435      * @stable ICU 2.0
436      */
437     virtual void applyPattern(const UnicodeString& pattern,
438                               UErrorCode& status);
439     /**
440      * Applies the given pattern string to this message format.
441      *
442      * @param pattern    The pattern to be applied.
443      * @param parseError Struct to receive information on the position
444      *                   of an error within the pattern.
445      * @param status    Input/output error code.  If the
446      *                  pattern cannot be parsed, set to failure code.
447      * @stable ICU 2.0
448      */
449     virtual void applyPattern(const UnicodeString& pattern,
450                              UParseError& parseError,
451                              UErrorCode& status);
452 
453     /**
454      * Sets the UMessagePatternApostropheMode and the pattern used by this message format.
455      * Parses the pattern and caches Format objects for simple argument types.
456      * Patterns and their interpretation are specified in the
457      * <a href="#patterns">class description</a>.
458      * <p>
459      * This method is best used only once on a given object to avoid confusion about the mode,
460      * and after constructing the object with an empty pattern string to minimize overhead.
461      *
462      * @param pattern    The pattern to be applied.
463      * @param aposMode   The new apostrophe mode.
464      * @param parseError Struct to receive information on the position
465      *                   of an error within the pattern.
466      *                   Can be NULL.
467      * @param status    Input/output error code.  If the
468      *                  pattern cannot be parsed, set to failure code.
469      * @stable ICU 4.8
470      */
471     virtual void applyPattern(const UnicodeString& pattern,
472                               UMessagePatternApostropheMode aposMode,
473                               UParseError* parseError,
474                               UErrorCode& status);
475 
476     /**
477      * @return this instance's UMessagePatternApostropheMode.
478      * @stable ICU 4.8
479      */
getApostropheMode()480     UMessagePatternApostropheMode getApostropheMode() const {
481         return msgPattern.getApostropheMode();
482     }
483 
484     /**
485      * Returns a pattern that can be used to recreate this object.
486      *
487      * @param appendTo  Output parameter to receive the pattern.
488      *                  Result is appended to existing contents.
489      * @return          Reference to 'appendTo' parameter.
490      * @stable ICU 2.0
491      */
492     virtual UnicodeString& toPattern(UnicodeString& appendTo) const;
493 
494     /**
495      * Sets subformats.
496      * See the class description about format numbering.
497      * The caller should not delete the Format objects after this call.
498      * <EM>The array formatsToAdopt is not itself adopted.</EM> Its
499      * ownership is retained by the caller. If the call fails because
500      * memory cannot be allocated, then the formats will be deleted
501      * by this method, and this object will remain unchanged.
502      *
503      * <p>If this format uses named arguments, the new formats are discarded
504      * and this format remains unchanged.
505      *
506      * @stable ICU 2.0
507      * @param formatsToAdopt    the format to be adopted.
508      * @param count             the size of the array.
509      */
510     virtual void adoptFormats(Format** formatsToAdopt, int32_t count);
511 
512     /**
513      * Sets subformats.
514      * See the class description about format numbering.
515      * Each item in the array is cloned into the internal array.
516      * If the call fails because memory cannot be allocated, then this
517      * object will remain unchanged.
518      *
519      * <p>If this format uses named arguments, the new formats are discarded
520      * and this format remains unchanged.
521      *
522      * @stable ICU 2.0
523      * @param newFormats the new format to be set.
524      * @param cnt        the size of the array.
525      */
526     virtual void setFormats(const Format** newFormats, int32_t cnt);
527 
528 
529     /**
530      * Sets one subformat.
531      * See the class description about format numbering.
532      * The caller should not delete the Format object after this call.
533      * If the number is over the number of formats already set,
534      * the item will be deleted and ignored.
535      *
536      * <p>If this format uses named arguments, the new format is discarded
537      * and this format remains unchanged.
538      *
539      * @stable ICU 2.0
540      * @param formatNumber     index of the subformat.
541      * @param formatToAdopt    the format to be adopted.
542      */
543     virtual void adoptFormat(int32_t formatNumber, Format* formatToAdopt);
544 
545     /**
546      * Sets one subformat.
547      * See the class description about format numbering.
548      * If the number is over the number of formats already set,
549      * the item will be ignored.
550      * @param formatNumber     index of the subformat.
551      * @param format    the format to be set.
552      * @stable ICU 2.0
553      */
554     virtual void setFormat(int32_t formatNumber, const Format& format);
555 
556     /**
557      * Gets format names. This function returns formatNames in StringEnumerations
558      * which can be used with getFormat() and setFormat() to export formattable
559      * array from current MessageFormat to another.  It is the caller's responsibility
560      * to delete the returned formatNames.
561      * @param status  output param set to success/failure code.
562      * @stable ICU 4.0
563      */
564     virtual StringEnumeration* getFormatNames(UErrorCode& status);
565 
566     /**
567      * Gets subformat pointer for given format name.
568      * This function supports both named and numbered
569      * arguments. If numbered, the formatName is the
570      * corresponding UnicodeStrings (e.g. "0", "1", "2"...).
571      * The returned Format object should not be deleted by the caller,
572      * nor should the ponter of other object .  The pointer and its
573      * contents remain valid only until the next call to any method
574      * of this class is made with this object.
575      * @param formatName the name or number specifying a format
576      * @param status  output param set to success/failure code.
577      * @stable ICU 4.0
578      */
579     virtual Format* getFormat(const UnicodeString& formatName, UErrorCode& status);
580 
581     /**
582      * Sets one subformat for given format name.
583      * See the class description about format name.
584      * This function supports both named and numbered
585      * arguments-- if numbered, the formatName is the
586      * corresponding UnicodeStrings (e.g. "0", "1", "2"...).
587      * If there is no matched formatName or wrong type,
588      * the item will be ignored.
589      * @param formatName  Name of the subformat.
590      * @param format      the format to be set.
591      * @param status  output param set to success/failure code.
592      * @stable ICU 4.0
593      */
594     virtual void setFormat(const UnicodeString& formatName, const Format& format, UErrorCode& status);
595 
596     /**
597      * Sets one subformat for given format name.
598      * See the class description about format name.
599      * This function supports both named and numbered
600      * arguments-- if numbered, the formatName is the
601      * corresponding UnicodeStrings (e.g. "0", "1", "2"...).
602      * If there is no matched formatName or wrong type,
603      * the item will be ignored.
604      * The caller should not delete the Format object after this call.
605      * @param formatName  Name of the subformat.
606      * @param formatToAdopt  Format to be adopted.
607      * @param status      output param set to success/failure code.
608      * @stable ICU 4.0
609      */
610     virtual void adoptFormat(const UnicodeString& formatName, Format* formatToAdopt, UErrorCode& status);
611 
612     /**
613      * Gets an array of subformats of this object.  The returned array
614      * should not be deleted by the caller, nor should the pointers
615      * within the array.  The array and its contents remain valid only
616      * until the next call to this format. See the class description
617      * about format numbering.
618      *
619      * @param count output parameter to receive the size of the array
620      * @return an array of count Format* objects, or NULL if out of
621      * memory.  Any or all of the array elements may be NULL.
622      * @stable ICU 2.0
623      */
624     virtual const Format** getFormats(int32_t& count) const;
625 
626 
627     using Format::format;
628 
629     /**
630      * Formats the given array of arguments into a user-readable string.
631      * Does not take ownership of the Formattable* array or its contents.
632      *
633      * <p>If this format uses named arguments, appendTo is unchanged and
634      * status is set to U_ILLEGAL_ARGUMENT_ERROR.
635      *
636      * @param source    An array of objects to be formatted.
637      * @param count     The number of elements of 'source'.
638      * @param appendTo  Output parameter to receive result.
639      *                  Result is appended to existing contents.
640      * @param ignore    Not used; inherited from base class API.
641      * @param status    Input/output error code.  If the
642      *                  pattern cannot be parsed, set to failure code.
643      * @return          Reference to 'appendTo' parameter.
644      * @stable ICU 2.0
645      */
646     UnicodeString& format(const Formattable* source,
647                           int32_t count,
648                           UnicodeString& appendTo,
649                           FieldPosition& ignore,
650                           UErrorCode& status) const;
651 
652     /**
653      * Formats the given array of arguments into a user-readable string
654      * using the given pattern.
655      *
656      * <p>If this format uses named arguments, appendTo is unchanged and
657      * status is set to U_ILLEGAL_ARGUMENT_ERROR.
658      *
659      * @param pattern   The pattern.
660      * @param arguments An array of objects to be formatted.
661      * @param count     The number of elements of 'source'.
662      * @param appendTo  Output parameter to receive result.
663      *                  Result is appended to existing contents.
664      * @param status    Input/output error code.  If the
665      *                  pattern cannot be parsed, set to failure code.
666      * @return          Reference to 'appendTo' parameter.
667      * @stable ICU 2.0
668      */
669     static UnicodeString& format(const UnicodeString& pattern,
670                                  const Formattable* arguments,
671                                  int32_t count,
672                                  UnicodeString& appendTo,
673                                  UErrorCode& status);
674 
675     /**
676      * Formats the given array of arguments into a user-readable
677      * string.  The array must be stored within a single Formattable
678      * object of type kArray. If the Formattable object type is not of
679      * type kArray, then returns a failing UErrorCode.
680      *
681      * <p>If this format uses named arguments, appendTo is unchanged and
682      * status is set to U_ILLEGAL_ARGUMENT_ERROR.
683      *
684      * @param obj       A Formattable of type kArray containing
685      *                  arguments to be formatted.
686      * @param appendTo  Output parameter to receive result.
687      *                  Result is appended to existing contents.
688      * @param pos       On input: an alignment field, if desired.
689      *                  On output: the offsets of the alignment field.
690      * @param status    Input/output error code.  If the
691      *                  pattern cannot be parsed, set to failure code.
692      * @return          Reference to 'appendTo' parameter.
693      * @stable ICU 2.0
694      */
695     virtual UnicodeString& format(const Formattable& obj,
696                                   UnicodeString& appendTo,
697                                   FieldPosition& pos,
698                                   UErrorCode& status) const;
699 
700     /**
701      * Formats the given array of arguments into a user-defined argument name
702      * array. This function supports both named and numbered
703      * arguments-- if numbered, the formatName is the
704      * corresponding UnicodeStrings (e.g. "0", "1", "2"...).
705      *
706      * @param argumentNames argument name array
707      * @param arguments An array of objects to be formatted.
708      * @param count     The number of elements of 'argumentNames' and
709      *                  arguments.  The number of argumentNames and arguments
710      *                  must be the same.
711      * @param appendTo  Output parameter to receive result.
712      *                  Result is appended to existing contents.
713      * @param status    Input/output error code.  If the
714      *                  pattern cannot be parsed, set to failure code.
715      * @return          Reference to 'appendTo' parameter.
716      * @stable ICU 4.0
717      */
718     UnicodeString& format(const UnicodeString* argumentNames,
719                           const Formattable* arguments,
720                           int32_t count,
721                           UnicodeString& appendTo,
722                           UErrorCode& status) const;
723     /**
724      * Parses the given string into an array of output arguments.
725      *
726      * @param source    String to be parsed.
727      * @param pos       On input, starting position for parse. On output,
728      *                  final position after parse.  Unchanged if parse
729      *                  fails.
730      * @param count     Output parameter to receive the number of arguments
731      *                  parsed.
732      * @return an array of parsed arguments.  The caller owns both
733      * the array and its contents.
734      * @stable ICU 2.0
735      */
736     virtual Formattable* parse(const UnicodeString& source,
737                                ParsePosition& pos,
738                                int32_t& count) const;
739 
740     /**
741      * Parses the given string into an array of output arguments.
742      *
743      * <p>If this format uses named arguments, status is set to
744      * U_ARGUMENT_TYPE_MISMATCH.
745      *
746      * @param source    String to be parsed.
747      * @param count     Output param to receive size of returned array.
748      * @param status    Input/output error code.  If the
749      *                  pattern cannot be parsed, set to failure code.
750      * @return an array of parsed arguments.  The caller owns both
751      * the array and its contents. Returns NULL if status is not U_ZERO_ERROR.
752      *
753      * @stable ICU 2.0
754      */
755     virtual Formattable* parse(const UnicodeString& source,
756                                int32_t& count,
757                                UErrorCode& status) const;
758 
759     /**
760      * Parses the given string into an array of output arguments
761      * stored within a single Formattable of type kArray.
762      *
763      * @param source    The string to be parsed into an object.
764      * @param result    Formattable to be set to the parse result.
765      *                  If parse fails, return contents are undefined.
766      * @param pos       On input, starting position for parse. On output,
767      *                  final position after parse.  Unchanged if parse
768      *                  fails.
769      * @stable ICU 2.0
770      */
771     virtual void parseObject(const UnicodeString& source,
772                              Formattable& result,
773                              ParsePosition& pos) const;
774 
775     /**
776      * Convert an 'apostrophe-friendly' pattern into a standard
777      * pattern.  Standard patterns treat all apostrophes as
778      * quotes, which is problematic in some languages, e.g.
779      * French, where apostrophe is commonly used.  This utility
780      * assumes that only an unpaired apostrophe immediately before
781      * a brace is a true quote.  Other unpaired apostrophes are paired,
782      * and the resulting standard pattern string is returned.
783      *
784      * <p><b>Note</b> it is not guaranteed that the returned pattern
785      * is indeed a valid pattern.  The only effect is to convert
786      * between patterns having different quoting semantics.
787      *
788      * @param pattern the 'apostrophe-friendly' patttern to convert
789      * @param status    Input/output error code.  If the pattern
790      *                  cannot be parsed, the failure code is set.
791      * @return the standard equivalent of the original pattern
792      * @stable ICU 3.4
793      */
794     static UnicodeString autoQuoteApostrophe(const UnicodeString& pattern,
795         UErrorCode& status);
796 
797 
798     /**
799      * Returns true if this MessageFormat uses named arguments,
800      * and false otherwise.  See class description.
801      *
802      * @return true if named arguments are used.
803      * @stable ICU 4.0
804      */
805     UBool usesNamedArguments() const;
806 
807 
808 #ifndef U_HIDE_INTERNAL_API
809     /**
810      * This API is for ICU internal use only.
811      * Please do not use it.
812      *
813      * Returns argument types count in the parsed pattern.
814      * Used to distinguish pattern "{0} d" and "d".
815      *
816      * @return           The number of formattable types in the pattern
817      * @internal
818      */
819     int32_t getArgTypeCount() const;
820 #endif  /* U_HIDE_INTERNAL_API */
821 
822     /**
823      * Returns a unique class ID POLYMORPHICALLY.  Pure virtual override.
824      * This method is to implement a simple version of RTTI, since not all
825      * C++ compilers support genuine RTTI.  Polymorphic operator==() and
826      * clone() methods call this method.
827      *
828      * @return          The class ID for this object. All objects of a
829      *                  given class have the same class ID.  Objects of
830      *                  other classes have different class IDs.
831      * @stable ICU 2.0
832      */
833     virtual UClassID getDynamicClassID(void) const;
834 
835     /**
836      * Return the class ID for this class.  This is useful only for
837      * comparing to a return value from getDynamicClassID().  For example:
838      * <pre>
839      * .   Base* polymorphic_pointer = createPolymorphicObject();
840      * .   if (polymorphic_pointer->getDynamicClassID() ==
841      * .      Derived::getStaticClassID()) ...
842      * </pre>
843      * @return          The class ID for all objects of this class.
844      * @stable ICU 2.0
845      */
846     static UClassID U_EXPORT2 getStaticClassID(void);
847 
848 #ifndef U_HIDE_INTERNAL_API
849     /**
850      * Compares two Format objects. This is used for constructing the hash
851      * tables.
852      *
853      * @param left pointer to a Format object. Must not be NULL.
854      * @param right pointer to a Format object. Must not be NULL.
855      *
856      * @return whether the two objects are the same
857      * @internal
858      */
859     static UBool equalFormats(const void* left, const void* right);
860 #endif  /* U_HIDE_INTERNAL_API */
861 
862 private:
863 
864     Locale              fLocale;
865     MessagePattern      msgPattern;
866     Format**            formatAliases; // see getFormats
867     int32_t             formatAliasesCapacity;
868 
869     MessageFormat(); // default constructor not implemented
870 
871      /**
872       * This provider helps defer instantiation of a PluralRules object
873       * until we actually need to select a keyword.
874       * For example, if the number matches an explicit-value selector like "=1"
875       * we do not need any PluralRules.
876       */
877     class U_I18N_API PluralSelectorProvider : public PluralFormat::PluralSelector {
878     public:
879         PluralSelectorProvider(const MessageFormat &mf, UPluralType type);
880         virtual ~PluralSelectorProvider();
881         virtual UnicodeString select(void *ctx, double number, UErrorCode& ec) const;
882 
883         void reset();
884     private:
885         const MessageFormat &msgFormat;
886         PluralRules* rules;
887         UPluralType type;
888     };
889 
890     /**
891      * A MessageFormat formats an array of arguments.  Each argument
892      * has an expected type, based on the pattern.  For example, if
893      * the pattern contains the subformat "{3,number,integer}", then
894      * we expect argument 3 to have type Formattable::kLong.  This
895      * array needs to grow dynamically if the MessageFormat is
896      * modified.
897      */
898     Formattable::Type* argTypes;
899     int32_t            argTypeCount;
900     int32_t            argTypeCapacity;
901 
902     /**
903      * TRUE if there are different argTypes for the same argument.
904      * This only matters when the MessageFormat is used in the plain C (umsg_xxx) API
905      * where the pattern argTypes determine how the va_arg list is read.
906      */
907     UBool hasArgTypeConflicts;
908 
909     // Variable-size array management
910     UBool allocateArgTypes(int32_t capacity, UErrorCode& status);
911 
912     /**
913      * Default Format objects used when no format is specified and a
914      * numeric or date argument is formatted.  These are volatile
915      * cache objects maintained only for performance.  They do not
916      * participate in operator=(), copy constructor(), nor
917      * operator==().
918      */
919     NumberFormat* defaultNumberFormat;
920     DateFormat*   defaultDateFormat;
921 
922     UHashtable* cachedFormatters;
923     UHashtable* customFormatArgStarts;
924 
925     PluralSelectorProvider pluralProvider;
926     PluralSelectorProvider ordinalProvider;
927 
928     /**
929      * Method to retrieve default formats (or NULL on failure).
930      * These are semantically const, but may modify *this.
931      */
932     const NumberFormat* getDefaultNumberFormat(UErrorCode&) const;
933     const DateFormat*   getDefaultDateFormat(UErrorCode&) const;
934 
935     /**
936      * Finds the word s, in the keyword list and returns the located index.
937      * @param s the keyword to be searched for.
938      * @param list the list of keywords to be searched with.
939      * @return the index of the list which matches the keyword s.
940      */
941     static int32_t findKeyword( const UnicodeString& s,
942                                 const UChar * const *list);
943 
944     /**
945      * Thin wrapper around the format(... AppendableWrapper ...) variant.
946      * Wraps the destination UnicodeString into an AppendableWrapper and
947      * supplies default values for some other parameters.
948      */
949     UnicodeString& format(const Formattable* arguments,
950                           const UnicodeString *argumentNames,
951                           int32_t cnt,
952                           UnicodeString& appendTo,
953                           FieldPosition* pos,
954                           UErrorCode& status) const;
955 
956     /**
957      * Formats the arguments and writes the result into the
958      * AppendableWrapper, updates the field position.
959      *
960      * @param msgStart      Index to msgPattern part to start formatting from.
961      * @param plNumber      NULL except when formatting a plural argument sub-message
962      *                      where a '#' is replaced by the format string for this number.
963      * @param arguments     The formattable objects array. (Must not be NULL.)
964      * @param argumentNames NULL if numbered values are used. Otherwise the same
965      *                      length as "arguments", and each entry is the name of the
966      *                      corresponding argument in "arguments".
967      * @param cnt           The length of arguments (and of argumentNames if that is not NULL).
968      * @param appendTo      Output parameter to receive the result.
969      *                      The result string is appended to existing contents.
970      * @param pos           Field position status.
971      * @param success       The error code status.
972      */
973     void format(int32_t msgStart,
974                 const void *plNumber,
975                 const Formattable* arguments,
976                 const UnicodeString *argumentNames,
977                 int32_t cnt,
978                 AppendableWrapper& appendTo,
979                 FieldPosition* pos,
980                 UErrorCode& success) const;
981 
982     UnicodeString getArgName(int32_t partIndex);
983 
984     void setArgStartFormat(int32_t argStart, Format* formatter, UErrorCode& status);
985 
986     void setCustomArgStartFormat(int32_t argStart, Format* formatter, UErrorCode& status);
987 
988     int32_t nextTopLevelArgStart(int32_t partIndex) const;
989 
990     UBool argNameMatches(int32_t partIndex, const UnicodeString& argName, int32_t argNumber);
991 
992     void cacheExplicitFormats(UErrorCode& status);
993 
994     Format* createAppropriateFormat(UnicodeString& type,
995                                     UnicodeString& style,
996                                     Formattable::Type& formattableType,
997                                     UParseError& parseError,
998                                     UErrorCode& ec);
999 
1000     const Formattable* getArgFromListByName(const Formattable* arguments,
1001                                             const UnicodeString *argumentNames,
1002                                             int32_t cnt, UnicodeString& name) const;
1003 
1004     Formattable* parse(int32_t msgStart,
1005                        const UnicodeString& source,
1006                        ParsePosition& pos,
1007                        int32_t& count,
1008                        UErrorCode& ec) const;
1009 
1010     FieldPosition* updateMetaData(AppendableWrapper& dest, int32_t prevLength,
1011                                   FieldPosition* fp, const Formattable* argId) const;
1012 
1013     /**
1014      * Finds the "other" sub-message.
1015      * @param partIndex the index of the first PluralFormat argument style part.
1016      * @return the "other" sub-message start part index.
1017      */
1018     int32_t findOtherSubMessage(int32_t partIndex) const;
1019 
1020     /**
1021      * Returns the ARG_START index of the first occurrence of the plural number in a sub-message.
1022      * Returns -1 if it is a REPLACE_NUMBER.
1023      * Returns 0 if there is neither.
1024      */
1025     int32_t findFirstPluralNumberArg(int32_t msgStart, const UnicodeString &argName) const;
1026 
1027     Format* getCachedFormatter(int32_t argumentNumber) const;
1028 
1029     UnicodeString getLiteralStringUntilNextArgument(int32_t from) const;
1030 
1031     void copyObjects(const MessageFormat& that, UErrorCode& ec);
1032 
1033     void formatComplexSubMessage(int32_t msgStart,
1034                                  const void *plNumber,
1035                                  const Formattable* arguments,
1036                                  const UnicodeString *argumentNames,
1037                                  int32_t cnt,
1038                                  AppendableWrapper& appendTo,
1039                                  UErrorCode& success) const;
1040 
1041     /**
1042      * Convenience method that ought to be in NumberFormat
1043      */
1044     NumberFormat* createIntegerFormat(const Locale& locale, UErrorCode& status) const;
1045 
1046     /**
1047      * Returns array of argument types in the parsed pattern
1048      * for use in C API.  Only for the use of umsg_vformat().  Not
1049      * for public consumption.
1050      * @param listCount  Output parameter to receive the size of array
1051      * @return           The array of formattable types in the pattern
1052      */
getArgTypeList(int32_t & listCount)1053     const Formattable::Type* getArgTypeList(int32_t& listCount) const {
1054         listCount = argTypeCount;
1055         return argTypes;
1056     }
1057 
1058     /**
1059      * Resets the internal MessagePattern, and other associated caches.
1060      */
1061     void resetPattern();
1062 
1063     /**
1064      * A DummyFormatter that we use solely to store a NULL value. UHash does
1065      * not support storing NULL values.
1066      */
1067     class U_I18N_API DummyFormat : public Format {
1068     public:
1069         virtual UBool operator==(const Format&) const;
1070         virtual Format* clone() const;
1071         virtual UnicodeString& format(const Formattable& obj,
1072                               UnicodeString& appendTo,
1073                               UErrorCode& status) const;
1074         virtual UnicodeString& format(const Formattable&,
1075                                       UnicodeString& appendTo,
1076                                       FieldPosition&,
1077                                       UErrorCode& status) const;
1078         virtual UnicodeString& format(const Formattable& obj,
1079                                       UnicodeString& appendTo,
1080                                       FieldPositionIterator* posIter,
1081                                       UErrorCode& status) const;
1082         virtual void parseObject(const UnicodeString&,
1083                                  Formattable&,
1084                                  ParsePosition&) const;
1085     };
1086 
1087     friend class MessageFormatAdapter; // getFormatTypeList() access
1088 };
1089 
1090 U_NAMESPACE_END
1091 
1092 #endif /* #if !UCONFIG_NO_FORMATTING */
1093 
1094 #endif // _MSGFMT
1095 //eof
1096