1 /*
2 * Copyright (C) 2007-2013, International Business Machines Corporation and
3 * others. All Rights Reserved.
4 ********************************************************************************
5 *
6 * File MSGFMT.H
7 *
8 * Modification History:
9 *
10 *   Date        Name        Description
11 *   02/19/97    aliu        Converted from java.
12 *   03/20/97    helena      Finished first cut of implementation.
13 *   07/22/98    stephen     Removed operator!= (defined in Format)
14 *   08/19/2002  srl         Removing Javaisms
15 *******************************************************************************/
16 
17 #ifndef MSGFMT_H
18 #define MSGFMT_H
19 
20 #include "unicode/utypes.h"
21 
22 /**
23  * \file
24  * \brief C++ API: Formats messages in a language-neutral way.
25  */
26 
27 #if !UCONFIG_NO_FORMATTING
28 
29 #include "unicode/format.h"
30 #include "unicode/locid.h"
31 #include "unicode/messagepattern.h"
32 #include "unicode/parseerr.h"
33 #include "unicode/plurfmt.h"
34 #include "unicode/plurrule.h"
35 
36 U_CDECL_BEGIN
37 // Forward declaration.
38 struct UHashtable;
39 typedef struct UHashtable UHashtable; /**< @internal */
40 U_CDECL_END
41 
42 U_NAMESPACE_BEGIN
43 
44 class AppendableWrapper;
45 class DateFormat;
46 class NumberFormat;
47 
48 /**
49  * <p>MessageFormat prepares strings for display to users,
50  * with optional arguments (variables/placeholders).
51  * The arguments can occur in any order, which is necessary for translation
52  * into languages with different grammars.
53  *
54  * <p>A MessageFormat is constructed from a <em>pattern</em> string
55  * with arguments in {curly braces} which will be replaced by formatted values.
56  *
57  * <p><code>MessageFormat</code> differs from the other <code>Format</code>
58  * classes in that you create a <code>MessageFormat</code> object with one
59  * of its constructors (not with a <code>createInstance</code> style factory
60  * method). Factory methods aren't necessary because <code>MessageFormat</code>
61  * itself doesn't implement locale-specific behavior. Any locale-specific
62  * behavior is defined by the pattern that you provide and the
63  * subformats used for inserted arguments.
64  *
65  * <p>Arguments can be named (using identifiers) or numbered (using small ASCII-digit integers).
66  * Some of the API methods work only with argument numbers and throw an exception
67  * if the pattern has named arguments (see {@link #usesNamedArguments()}).
68  *
69  * <p>An argument might not specify any format type. In this case,
70  * a Number value is formatted with a default (for the locale) NumberFormat,
71  * a Date value is formatted with a default (for the locale) DateFormat,
72  * and for any other value its toString() value is used.
73  *
74  * <p>An argument might specify a "simple" type for which the specified
75  * Format object is created, cached and used.
76  *
77  * <p>An argument might have a "complex" type with nested MessageFormat sub-patterns.
78  * During formatting, one of these sub-messages is selected according to the argument value
79  * and recursively formatted.
80  *
81  * <p>After construction, a custom Format object can be set for
82  * a top-level argument, overriding the default formatting and parsing behavior
83  * for that argument.
84  * However, custom formatting can be achieved more simply by writing
85  * a typeless argument in the pattern string
86  * and supplying it with a preformatted string value.
87  *
88  * <p>When formatting, MessageFormat takes a collection of argument values
89  * and writes an output string.
90  * The argument values may be passed as an array
91  * (when the pattern contains only numbered arguments)
92  * or as an array of names and and an array of arguments (which works for both named
93  * and numbered arguments).
94  *
95  * <p>Each argument is matched with one of the input values by array index or argument name
96  * and formatted according to its pattern specification
97  * (or using a custom Format object if one was set).
98  * A numbered pattern argument is matched with an argument name that contains that number
99  * as an ASCII-decimal-digit string (without leading zero).
100  *
101  * <h4><a name="patterns">Patterns and Their Interpretation</a></h4>
102  *
103  * <code>MessageFormat</code> uses patterns of the following form:
104  * <pre>
105  * message = messageText (argument messageText)*
106  * argument = noneArg | simpleArg | complexArg
107  * complexArg = choiceArg | pluralArg | selectArg | selectordinalArg
108  *
109  * noneArg = '{' argNameOrNumber '}'
110  * simpleArg = '{' argNameOrNumber ',' argType [',' argStyle] '}'
111  * choiceArg = '{' argNameOrNumber ',' "choice" ',' choiceStyle '}'
112  * pluralArg = '{' argNameOrNumber ',' "plural" ',' pluralStyle '}'
113  * selectArg = '{' argNameOrNumber ',' "select" ',' selectStyle '}'
114  * selectordinalArg = '{' argNameOrNumber ',' "selectordinal" ',' pluralStyle '}'
115  *
116  * choiceStyle: see {@link ChoiceFormat}
117  * pluralStyle: see {@link PluralFormat}
118  * selectStyle: see {@link SelectFormat}
119  *
120  * argNameOrNumber = argName | argNumber
121  * argName = [^[[:Pattern_Syntax:][:Pattern_White_Space:]]]+
122  * argNumber = '0' | ('1'..'9' ('0'..'9')*)
123  *
124  * argType = "number" | "date" | "time" | "spellout" | "ordinal" | "duration"
125  * argStyle = "short" | "medium" | "long" | "full" | "integer" | "currency" | "percent" | argStyleText
126  * </pre>
127  *
128  * <ul>
129  *   <li>messageText can contain quoted literal strings including syntax characters.
130  *       A quoted literal string begins with an ASCII apostrophe and a syntax character
131  *       (usually a {curly brace}) and continues until the next single apostrophe.
132  *       A double ASCII apostrohpe inside or outside of a quoted string represents
133  *       one literal apostrophe.
134  *   <li>Quotable syntax characters are the {curly braces} in all messageText parts,
135  *       plus the '#' sign in a messageText immediately inside a pluralStyle,
136  *       and the '|' symbol in a messageText immediately inside a choiceStyle.
137  *   <li>See also {@link #UMessagePatternApostropheMode}
138  *   <li>In argStyleText, every single ASCII apostrophe begins and ends quoted literal text,
139  *       and unquoted {curly braces} must occur in matched pairs.
140  * </ul>
141  *
142  * <p>Recommendation: Use the real apostrophe (single quote) character
143  * \htmlonly&#x2019;\endhtmlonly (U+2019) for
144  * human-readable text, and use the ASCII apostrophe ' (U+0027)
145  * only in program syntax, like quoting in MessageFormat.
146  * See the annotations for U+0027 Apostrophe in The Unicode Standard.
147  *
148  * <p>The <code>choice</code> argument type is deprecated.
149  * Use <code>plural</code> arguments for proper plural selection,
150  * and <code>select</code> arguments for simple selection among a fixed set of choices.
151  *
152  * <p>The <code>argType</code> and <code>argStyle</code> values are used to create
153  * a <code>Format</code> instance for the format element. The following
154  * table shows how the values map to Format instances. Combinations not
155  * shown in the table are illegal. Any <code>argStyleText</code> must
156  * be a valid pattern string for the Format subclass used.
157  *
158  * <p><table border=1>
159  *    <tr>
160  *       <th>argType
161  *       <th>argStyle
162  *       <th>resulting Format object
163  *    <tr>
164  *       <td colspan=2><i>(none)</i>
165  *       <td><code>null</code>
166  *    <tr>
167  *       <td rowspan=5><code>number</code>
168  *       <td><i>(none)</i>
169  *       <td><code>NumberFormat.createInstance(getLocale(), status)</code>
170  *    <tr>
171  *       <td><code>integer</code>
172  *       <td><code>NumberFormat.createInstance(getLocale(), kNumberStyle, status)</code>
173  *    <tr>
174  *       <td><code>currency</code>
175  *       <td><code>NumberFormat.createCurrencyInstance(getLocale(), status)</code>
176  *    <tr>
177  *       <td><code>percent</code>
178  *       <td><code>NumberFormat.createPercentInstance(getLocale(), status)</code>
179  *    <tr>
180  *       <td><i>argStyleText</i>
181  *       <td><code>new DecimalFormat(argStyleText, new DecimalFormatSymbols(getLocale(), status), status)</code>
182  *    <tr>
183  *       <td rowspan=6><code>date</code>
184  *       <td><i>(none)</i>
185  *       <td><code>DateFormat.createDateInstance(kDefault, getLocale(), status)</code>
186  *    <tr>
187  *       <td><code>short</code>
188  *       <td><code>DateFormat.createDateInstance(kShort, getLocale(), status)</code>
189  *    <tr>
190  *       <td><code>medium</code>
191  *       <td><code>DateFormat.createDateInstance(kDefault, getLocale(), status)</code>
192  *    <tr>
193  *       <td><code>long</code>
194  *       <td><code>DateFormat.createDateInstance(kLong, getLocale(), status)</code>
195  *    <tr>
196  *       <td><code>full</code>
197  *       <td><code>DateFormat.createDateInstance(kFull, getLocale(), status)</code>
198  *    <tr>
199  *       <td><i>argStyleText</i>
200  *       <td><code>new SimpleDateFormat(argStyleText, getLocale(), status)
201  *    <tr>
202  *       <td rowspan=6><code>time</code>
203  *       <td><i>(none)</i>
204  *       <td><code>DateFormat.createTimeInstance(kDefault, getLocale(), status)</code>
205  *    <tr>
206  *       <td><code>short</code>
207  *       <td><code>DateFormat.createTimeInstance(kShort, getLocale(), status)</code>
208  *    <tr>
209  *       <td><code>medium</code>
210  *       <td><code>DateFormat.createTimeInstance(kDefault, getLocale(), status)</code>
211  *    <tr>
212  *       <td><code>long</code>
213  *       <td><code>DateFormat.createTimeInstance(kLong, getLocale(), status)</code>
214  *    <tr>
215  *       <td><code>full</code>
216  *       <td><code>DateFormat.createTimeInstance(kFull, getLocale(), status)</code>
217  *    <tr>
218  *       <td><i>argStyleText</i>
219  *       <td><code>new SimpleDateFormat(argStyleText, getLocale(), status)
220  *    <tr>
221  *       <td><code>spellout</code>
222  *       <td><i>argStyleText (optional)</i>
223  *       <td><code>new RuleBasedNumberFormat(URBNF_SPELLOUT, getLocale(), status)
224  *           <br/>&nbsp;&nbsp;&nbsp;&nbsp;.setDefaultRuleset(argStyleText, status);</code>
225  *    <tr>
226  *       <td><code>ordinal</code>
227  *       <td><i>argStyleText (optional)</i>
228  *       <td><code>new RuleBasedNumberFormat(URBNF_ORDINAL, getLocale(), status)
229  *           <br/>&nbsp;&nbsp;&nbsp;&nbsp;.setDefaultRuleset(argStyleText, status);</code>
230  *    <tr>
231  *       <td><code>duration</code>
232  *       <td><i>argStyleText (optional)</i>
233  *       <td><code>new RuleBasedNumberFormat(URBNF_DURATION, getLocale(), status)
234  *           <br/>&nbsp;&nbsp;&nbsp;&nbsp;.setDefaultRuleset(argStyleText, status);</code>
235  * </table>
236  * <p>
237  *
238  * <h4>Usage Information</h4>
239  *
240  * <p>Here are some examples of usage:
241  * Example 1:
242  *
243  * <pre>
244  * \code
245  *     UErrorCode success = U_ZERO_ERROR;
246  *     GregorianCalendar cal(success);
247  *     Formattable arguments[] = {
248  *         7L,
249  *         Formattable( (Date) cal.getTime(success), Formattable::kIsDate),
250  *         "a disturbance in the Force"
251  *     };
252  *
253  *     UnicodeString result;
254  *     MessageFormat::format(
255  *          "At {1,time} on {1,date}, there was {2} on planet {0,number}.",
256  *          arguments, 3, result, success );
257  *
258  *     cout << "result: " << result << endl;
259  *     //<output>: At 4:34:20 PM on 23-Mar-98, there was a disturbance
260  *     //             in the Force on planet 7.
261  * \endcode
262  * </pre>
263  *
264  * Typically, the message format will come from resources, and the
265  * arguments will be dynamically set at runtime.
266  *
267  * <p>Example 2:
268  *
269  * <pre>
270  *  \code
271  *     success = U_ZERO_ERROR;
272  *     Formattable testArgs[] = {3L, "MyDisk"};
273  *
274  *     MessageFormat form(
275  *         "The disk \"{1}\" contains {0} file(s).", success );
276  *
277  *     UnicodeString string;
278  *     FieldPosition fpos = 0;
279  *     cout << "format: " << form.format(testArgs, 2, string, fpos, success ) << endl;
280  *
281  *     // output, with different testArgs:
282  *     // output: The disk "MyDisk" contains 0 file(s).
283  *     // output: The disk "MyDisk" contains 1 file(s).
284  *     // output: The disk "MyDisk" contains 1,273 file(s).
285  *  \endcode
286  *  </pre>
287  *
288  *
289  * <p>For messages that include plural forms, you can use a plural argument:
290  * <pre>
291  * \code
292  *  success = U_ZERO_ERROR;
293  *  MessageFormat msgFmt(
294  *       "{num_files, plural, "
295  *       "=0{There are no files on disk \"{disk_name}\".}"
296  *       "=1{There is one file on disk \"{disk_name}\".}"
297  *       "other{There are # files on disk \"{disk_name}\".}}",
298  *      Locale("en"),
299  *      success);
300  *  FieldPosition fpos = 0;
301  *  Formattable testArgs[] = {0L, "MyDisk"};
302  *  UnicodeString testArgsNames[] = {"num_files", "disk_name"};
303  *  UnicodeString result;
304  *  cout << msgFmt.format(testArgs, testArgsNames, 2, result, fpos, 0, success);
305  *  testArgs[0] = 3L;
306  *  cout << msgFmt.format(testArgs, testArgsNames, 2, result, fpos, 0, success);
307  * \endcode
308  * <em>output</em>:
309  * There are no files on disk "MyDisk".
310  * There are 3 files on "MyDisk".
311  * </pre>
312  * See {@link PluralFormat} and {@link PluralRules} for details.
313  *
314  * <h4><a name="synchronization">Synchronization</a></h4>
315  *
316  * <p>MessageFormats are not synchronized.
317  * It is recommended to create separate format instances for each thread.
318  * If multiple threads access a format concurrently, it must be synchronized
319  * externally.
320  *
321  * @stable ICU 2.0
322  */
323 class U_I18N_API MessageFormat : public Format {
324 public:
325 #ifndef U_HIDE_OBSOLETE_API
326     /**
327      * Enum type for kMaxFormat.
328      * @obsolete ICU 3.0.  The 10-argument limit was removed as of ICU 2.6,
329      * rendering this enum type obsolete.
330      */
331     enum EFormatNumber {
332         /**
333          * The maximum number of arguments.
334          * @obsolete ICU 3.0.  The 10-argument limit was removed as of ICU 2.6,
335          * rendering this constant obsolete.
336          */
337         kMaxFormat = 10
338     };
339 #endif  /* U_HIDE_OBSOLETE_API */
340 
341     /**
342      * Constructs a new MessageFormat using the given pattern and the
343      * default locale.
344      *
345      * @param pattern   Pattern used to construct object.
346      * @param status    Input/output error code.  If the
347      *                  pattern cannot be parsed, set to failure code.
348      * @stable ICU 2.0
349      */
350     MessageFormat(const UnicodeString& pattern,
351                   UErrorCode &status);
352 
353     /**
354      * Constructs a new MessageFormat using the given pattern and locale.
355      * @param pattern   Pattern used to construct object.
356      * @param newLocale The locale to use for formatting dates and numbers.
357      * @param status    Input/output error code.  If the
358      *                  pattern cannot be parsed, set to failure code.
359      * @stable ICU 2.0
360      */
361     MessageFormat(const UnicodeString& pattern,
362                   const Locale& newLocale,
363                         UErrorCode& status);
364     /**
365      * Constructs a new MessageFormat using the given pattern and locale.
366      * @param pattern   Pattern used to construct object.
367      * @param newLocale The locale to use for formatting dates and numbers.
368      * @param parseError Struct to receive information on the position
369      *                   of an error within the pattern.
370      * @param status    Input/output error code.  If the
371      *                  pattern cannot be parsed, set to failure code.
372      * @stable ICU 2.0
373      */
374     MessageFormat(const UnicodeString& pattern,
375                   const Locale& newLocale,
376                   UParseError& parseError,
377                   UErrorCode& status);
378     /**
379      * Constructs a new MessageFormat from an existing one.
380      * @stable ICU 2.0
381      */
382     MessageFormat(const MessageFormat&);
383 
384     /**
385      * Assignment operator.
386      * @stable ICU 2.0
387      */
388     const MessageFormat& operator=(const MessageFormat&);
389 
390     /**
391      * Destructor.
392      * @stable ICU 2.0
393      */
394     virtual ~MessageFormat();
395 
396     /**
397      * Clones this Format object polymorphically.  The caller owns the
398      * result and should delete it when done.
399      * @stable ICU 2.0
400      */
401     virtual Format* clone(void) const;
402 
403     /**
404      * Returns true if the given Format objects are semantically equal.
405      * Objects of different subclasses are considered unequal.
406      * @param other  the object to be compared with.
407      * @return       true if the given Format objects are semantically equal.
408      * @stable ICU 2.0
409      */
410     virtual UBool operator==(const Format& other) const;
411 
412     /**
413      * Sets the locale to be used for creating argument Format objects.
414      * @param theLocale    the new locale value to be set.
415      * @stable ICU 2.0
416      */
417     virtual void setLocale(const Locale& theLocale);
418 
419     /**
420      * Gets the locale used for creating argument Format objects.
421      * format information.
422      * @return    the locale of the object.
423      * @stable ICU 2.0
424      */
425     virtual const Locale& getLocale(void) const;
426 
427     /**
428      * Applies the given pattern string to this message format.
429      *
430      * @param pattern   The pattern to be applied.
431      * @param status    Input/output error code.  If the
432      *                  pattern cannot be parsed, set to failure code.
433      * @stable ICU 2.0
434      */
435     virtual void applyPattern(const UnicodeString& pattern,
436                               UErrorCode& status);
437     /**
438      * Applies the given pattern string to this message format.
439      *
440      * @param pattern    The pattern to be applied.
441      * @param parseError Struct to receive information on the position
442      *                   of an error within the pattern.
443      * @param status    Input/output error code.  If the
444      *                  pattern cannot be parsed, set to failure code.
445      * @stable ICU 2.0
446      */
447     virtual void applyPattern(const UnicodeString& pattern,
448                              UParseError& parseError,
449                              UErrorCode& status);
450 
451     /**
452      * Sets the UMessagePatternApostropheMode and the pattern used by this message format.
453      * Parses the pattern and caches Format objects for simple argument types.
454      * Patterns and their interpretation are specified in the
455      * <a href="#patterns">class description</a>.
456      * <p>
457      * This method is best used only once on a given object to avoid confusion about the mode,
458      * and after constructing the object with an empty pattern string to minimize overhead.
459      *
460      * @param pattern    The pattern to be applied.
461      * @param aposMode   The new apostrophe mode.
462      * @param parseError Struct to receive information on the position
463      *                   of an error within the pattern.
464      *                   Can be NULL.
465      * @param status    Input/output error code.  If the
466      *                  pattern cannot be parsed, set to failure code.
467      * @stable ICU 4.8
468      */
469     virtual void applyPattern(const UnicodeString& pattern,
470                               UMessagePatternApostropheMode aposMode,
471                               UParseError* parseError,
472                               UErrorCode& status);
473 
474     /**
475      * @return this instance's UMessagePatternApostropheMode.
476      * @stable ICU 4.8
477      */
getApostropheMode()478     UMessagePatternApostropheMode getApostropheMode() const {
479         return msgPattern.getApostropheMode();
480     }
481 
482     /**
483      * Returns a pattern that can be used to recreate this object.
484      *
485      * @param appendTo  Output parameter to receive the pattern.
486      *                  Result is appended to existing contents.
487      * @return          Reference to 'appendTo' parameter.
488      * @stable ICU 2.0
489      */
490     virtual UnicodeString& toPattern(UnicodeString& appendTo) const;
491 
492     /**
493      * Sets subformats.
494      * See the class description about format numbering.
495      * The caller should not delete the Format objects after this call.
496      * <EM>The array formatsToAdopt is not itself adopted.</EM> Its
497      * ownership is retained by the caller. If the call fails because
498      * memory cannot be allocated, then the formats will be deleted
499      * by this method, and this object will remain unchanged.
500      *
501      * <p>If this format uses named arguments, the new formats are discarded
502      * and this format remains unchanged.
503      *
504      * @stable ICU 2.0
505      * @param formatsToAdopt    the format to be adopted.
506      * @param count             the size of the array.
507      */
508     virtual void adoptFormats(Format** formatsToAdopt, int32_t count);
509 
510     /**
511      * Sets subformats.
512      * See the class description about format numbering.
513      * Each item in the array is cloned into the internal array.
514      * If the call fails because memory cannot be allocated, then this
515      * object will remain unchanged.
516      *
517      * <p>If this format uses named arguments, the new formats are discarded
518      * and this format remains unchanged.
519      *
520      * @stable ICU 2.0
521      * @param newFormats the new format to be set.
522      * @param cnt        the size of the array.
523      */
524     virtual void setFormats(const Format** newFormats, int32_t cnt);
525 
526 
527     /**
528      * Sets one subformat.
529      * See the class description about format numbering.
530      * The caller should not delete the Format object after this call.
531      * If the number is over the number of formats already set,
532      * the item will be deleted and ignored.
533      *
534      * <p>If this format uses named arguments, the new format is discarded
535      * and this format remains unchanged.
536      *
537      * @stable ICU 2.0
538      * @param formatNumber     index of the subformat.
539      * @param formatToAdopt    the format to be adopted.
540      */
541     virtual void adoptFormat(int32_t formatNumber, Format* formatToAdopt);
542 
543     /**
544      * Sets one subformat.
545      * See the class description about format numbering.
546      * If the number is over the number of formats already set,
547      * the item will be ignored.
548      * @param formatNumber     index of the subformat.
549      * @param format    the format to be set.
550      * @stable ICU 2.0
551      */
552     virtual void setFormat(int32_t formatNumber, const Format& format);
553 
554     /**
555      * Gets format names. This function returns formatNames in StringEnumerations
556      * which can be used with getFormat() and setFormat() to export formattable
557      * array from current MessageFormat to another.  It is the caller's responsibility
558      * to delete the returned formatNames.
559      * @param status  output param set to success/failure code.
560      * @stable ICU 4.0
561      */
562     virtual StringEnumeration* getFormatNames(UErrorCode& status);
563 
564     /**
565      * Gets subformat pointer for given format name.
566      * This function supports both named and numbered
567      * arguments. If numbered, the formatName is the
568      * corresponding UnicodeStrings (e.g. "0", "1", "2"...).
569      * The returned Format object should not be deleted by the caller,
570      * nor should the ponter of other object .  The pointer and its
571      * contents remain valid only until the next call to any method
572      * of this class is made with this object.
573      * @param formatName the name or number specifying a format
574      * @param status  output param set to success/failure code.
575      * @stable ICU 4.0
576      */
577     virtual Format* getFormat(const UnicodeString& formatName, UErrorCode& status);
578 
579     /**
580      * Sets one subformat for given format name.
581      * See the class description about format name.
582      * This function supports both named and numbered
583      * arguments-- if numbered, the formatName is the
584      * corresponding UnicodeStrings (e.g. "0", "1", "2"...).
585      * If there is no matched formatName or wrong type,
586      * the item will be ignored.
587      * @param formatName  Name of the subformat.
588      * @param format      the format to be set.
589      * @param status  output param set to success/failure code.
590      * @stable ICU 4.0
591      */
592     virtual void setFormat(const UnicodeString& formatName, const Format& format, UErrorCode& status);
593 
594     /**
595      * Sets one subformat for given format name.
596      * See the class description about format name.
597      * This function supports both named and numbered
598      * arguments-- if numbered, the formatName is the
599      * corresponding UnicodeStrings (e.g. "0", "1", "2"...).
600      * If there is no matched formatName or wrong type,
601      * the item will be ignored.
602      * The caller should not delete the Format object after this call.
603      * @param formatName  Name of the subformat.
604      * @param formatToAdopt  Format to be adopted.
605      * @param status      output param set to success/failure code.
606      * @stable ICU 4.0
607      */
608     virtual void adoptFormat(const UnicodeString& formatName, Format* formatToAdopt, UErrorCode& status);
609 
610     /**
611      * Gets an array of subformats of this object.  The returned array
612      * should not be deleted by the caller, nor should the pointers
613      * within the array.  The array and its contents remain valid only
614      * until the next call to this format. See the class description
615      * about format numbering.
616      *
617      * @param count output parameter to receive the size of the array
618      * @return an array of count Format* objects, or NULL if out of
619      * memory.  Any or all of the array elements may be NULL.
620      * @stable ICU 2.0
621      */
622     virtual const Format** getFormats(int32_t& count) const;
623 
624 
625     using Format::format;
626 
627     /**
628      * Formats the given array of arguments into a user-readable string.
629      * Does not take ownership of the Formattable* array or its contents.
630      *
631      * <p>If this format uses named arguments, appendTo is unchanged and
632      * status is set to U_ILLEGAL_ARGUMENT_ERROR.
633      *
634      * @param source    An array of objects to be formatted.
635      * @param count     The number of elements of 'source'.
636      * @param appendTo  Output parameter to receive result.
637      *                  Result is appended to existing contents.
638      * @param ignore    Not used; inherited from base class API.
639      * @param status    Input/output error code.  If the
640      *                  pattern cannot be parsed, set to failure code.
641      * @return          Reference to 'appendTo' parameter.
642      * @stable ICU 2.0
643      */
644     UnicodeString& format(const Formattable* source,
645                           int32_t count,
646                           UnicodeString& appendTo,
647                           FieldPosition& ignore,
648                           UErrorCode& status) const;
649 
650     /**
651      * Formats the given array of arguments into a user-readable string
652      * using the given pattern.
653      *
654      * <p>If this format uses named arguments, appendTo is unchanged and
655      * status is set to U_ILLEGAL_ARGUMENT_ERROR.
656      *
657      * @param pattern   The pattern.
658      * @param arguments An array of objects to be formatted.
659      * @param count     The number of elements of 'source'.
660      * @param appendTo  Output parameter to receive result.
661      *                  Result is appended to existing contents.
662      * @param status    Input/output error code.  If the
663      *                  pattern cannot be parsed, set to failure code.
664      * @return          Reference to 'appendTo' parameter.
665      * @stable ICU 2.0
666      */
667     static UnicodeString& format(const UnicodeString& pattern,
668                                  const Formattable* arguments,
669                                  int32_t count,
670                                  UnicodeString& appendTo,
671                                  UErrorCode& status);
672 
673     /**
674      * Formats the given array of arguments into a user-readable
675      * string.  The array must be stored within a single Formattable
676      * object of type kArray. If the Formattable object type is not of
677      * type kArray, then returns a failing UErrorCode.
678      *
679      * <p>If this format uses named arguments, appendTo is unchanged and
680      * status is set to U_ILLEGAL_ARGUMENT_ERROR.
681      *
682      * @param obj       A Formattable of type kArray containing
683      *                  arguments to be formatted.
684      * @param appendTo  Output parameter to receive result.
685      *                  Result is appended to existing contents.
686      * @param pos       On input: an alignment field, if desired.
687      *                  On output: the offsets of the alignment field.
688      * @param status    Input/output error code.  If the
689      *                  pattern cannot be parsed, set to failure code.
690      * @return          Reference to 'appendTo' parameter.
691      * @stable ICU 2.0
692      */
693     virtual UnicodeString& format(const Formattable& obj,
694                                   UnicodeString& appendTo,
695                                   FieldPosition& pos,
696                                   UErrorCode& status) const;
697 
698     /**
699      * Formats the given array of arguments into a user-defined argument name
700      * array. This function supports both named and numbered
701      * arguments-- if numbered, the formatName is the
702      * corresponding UnicodeStrings (e.g. "0", "1", "2"...).
703      *
704      * @param argumentNames argument name array
705      * @param arguments An array of objects to be formatted.
706      * @param count     The number of elements of 'argumentNames' and
707      *                  arguments.  The number of argumentNames and arguments
708      *                  must be the same.
709      * @param appendTo  Output parameter to receive result.
710      *                  Result is appended to existing contents.
711      * @param status    Input/output error code.  If the
712      *                  pattern cannot be parsed, set to failure code.
713      * @return          Reference to 'appendTo' parameter.
714      * @stable ICU 4.0
715      */
716     UnicodeString& format(const UnicodeString* argumentNames,
717                           const Formattable* arguments,
718                           int32_t count,
719                           UnicodeString& appendTo,
720                           UErrorCode& status) const;
721     /**
722      * Parses the given string into an array of output arguments.
723      *
724      * @param source    String to be parsed.
725      * @param pos       On input, starting position for parse. On output,
726      *                  final position after parse.  Unchanged if parse
727      *                  fails.
728      * @param count     Output parameter to receive the number of arguments
729      *                  parsed.
730      * @return an array of parsed arguments.  The caller owns both
731      * the array and its contents.
732      * @stable ICU 2.0
733      */
734     virtual Formattable* parse(const UnicodeString& source,
735                                ParsePosition& pos,
736                                int32_t& count) const;
737 
738     /**
739      * Parses the given string into an array of output arguments.
740      *
741      * <p>If this format uses named arguments, status is set to
742      * U_ARGUMENT_TYPE_MISMATCH.
743      *
744      * @param source    String to be parsed.
745      * @param count     Output param to receive size of returned array.
746      * @param status    Input/output error code.  If the
747      *                  pattern cannot be parsed, set to failure code.
748      * @return an array of parsed arguments.  The caller owns both
749      * the array and its contents. Returns NULL if status is not U_ZERO_ERROR.
750      *
751      * @stable ICU 2.0
752      */
753     virtual Formattable* parse(const UnicodeString& source,
754                                int32_t& count,
755                                UErrorCode& status) const;
756 
757     /**
758      * Parses the given string into an array of output arguments
759      * stored within a single Formattable of type kArray.
760      *
761      * @param source    The string to be parsed into an object.
762      * @param result    Formattable to be set to the parse result.
763      *                  If parse fails, return contents are undefined.
764      * @param pos       On input, starting position for parse. On output,
765      *                  final position after parse.  Unchanged if parse
766      *                  fails.
767      * @stable ICU 2.0
768      */
769     virtual void parseObject(const UnicodeString& source,
770                              Formattable& result,
771                              ParsePosition& pos) const;
772 
773     /**
774      * Convert an 'apostrophe-friendly' pattern into a standard
775      * pattern.  Standard patterns treat all apostrophes as
776      * quotes, which is problematic in some languages, e.g.
777      * French, where apostrophe is commonly used.  This utility
778      * assumes that only an unpaired apostrophe immediately before
779      * a brace is a true quote.  Other unpaired apostrophes are paired,
780      * and the resulting standard pattern string is returned.
781      *
782      * <p><b>Note</b> it is not guaranteed that the returned pattern
783      * is indeed a valid pattern.  The only effect is to convert
784      * between patterns having different quoting semantics.
785      *
786      * @param pattern the 'apostrophe-friendly' patttern to convert
787      * @param status    Input/output error code.  If the pattern
788      *                  cannot be parsed, the failure code is set.
789      * @return the standard equivalent of the original pattern
790      * @stable ICU 3.4
791      */
792     static UnicodeString autoQuoteApostrophe(const UnicodeString& pattern,
793         UErrorCode& status);
794 
795 
796     /**
797      * Returns true if this MessageFormat uses named arguments,
798      * and false otherwise.  See class description.
799      *
800      * @return true if named arguments are used.
801      * @stable ICU 4.0
802      */
803     UBool usesNamedArguments() const;
804 
805 
806 #ifndef U_HIDE_INTERNAL_API
807     /**
808      * This API is for ICU internal use only.
809      * Please do not use it.
810      *
811      * Returns argument types count in the parsed pattern.
812      * Used to distinguish pattern "{0} d" and "d".
813      *
814      * @return           The number of formattable types in the pattern
815      * @internal
816      */
817     int32_t getArgTypeCount() const;
818 #endif  /* U_HIDE_INTERNAL_API */
819 
820     /**
821      * Returns a unique class ID POLYMORPHICALLY.  Pure virtual override.
822      * This method is to implement a simple version of RTTI, since not all
823      * C++ compilers support genuine RTTI.  Polymorphic operator==() and
824      * clone() methods call this method.
825      *
826      * @return          The class ID for this object. All objects of a
827      *                  given class have the same class ID.  Objects of
828      *                  other classes have different class IDs.
829      * @stable ICU 2.0
830      */
831     virtual UClassID getDynamicClassID(void) const;
832 
833     /**
834      * Return the class ID for this class.  This is useful only for
835      * comparing to a return value from getDynamicClassID().  For example:
836      * <pre>
837      * .   Base* polymorphic_pointer = createPolymorphicObject();
838      * .   if (polymorphic_pointer->getDynamicClassID() ==
839      * .      Derived::getStaticClassID()) ...
840      * </pre>
841      * @return          The class ID for all objects of this class.
842      * @stable ICU 2.0
843      */
844     static UClassID U_EXPORT2 getStaticClassID(void);
845 
846 #ifndef U_HIDE_INTERNAL_API
847     /**
848      * Compares two Format objects. This is used for constructing the hash
849      * tables.
850      *
851      * @param left pointer to a Format object. Must not be NULL.
852      * @param right pointer to a Format object. Must not be NULL.
853      *
854      * @return whether the two objects are the same
855      * @internal
856      */
857     static UBool equalFormats(const void* left, const void* right);
858 #endif  /* U_HIDE_INTERNAL_API */
859 
860 private:
861 
862     Locale              fLocale;
863     MessagePattern      msgPattern;
864     Format**            formatAliases; // see getFormats
865     int32_t             formatAliasesCapacity;
866 
867     MessageFormat(); // default constructor not implemented
868 
869      /**
870       * This provider helps defer instantiation of a PluralRules object
871       * until we actually need to select a keyword.
872       * For example, if the number matches an explicit-value selector like "=1"
873       * we do not need any PluralRules.
874       */
875     class U_I18N_API PluralSelectorProvider : public PluralFormat::PluralSelector {
876     public:
877         PluralSelectorProvider(const MessageFormat &mf, UPluralType type);
878         virtual ~PluralSelectorProvider();
879         virtual UnicodeString select(void *ctx, double number, UErrorCode& ec) const;
880 
881         void reset();
882     private:
883         const MessageFormat &msgFormat;
884         PluralRules* rules;
885         UPluralType type;
886     };
887 
888     /**
889      * A MessageFormat formats an array of arguments.  Each argument
890      * has an expected type, based on the pattern.  For example, if
891      * the pattern contains the subformat "{3,number,integer}", then
892      * we expect argument 3 to have type Formattable::kLong.  This
893      * array needs to grow dynamically if the MessageFormat is
894      * modified.
895      */
896     Formattable::Type* argTypes;
897     int32_t            argTypeCount;
898     int32_t            argTypeCapacity;
899 
900     /**
901      * TRUE if there are different argTypes for the same argument.
902      * This only matters when the MessageFormat is used in the plain C (umsg_xxx) API
903      * where the pattern argTypes determine how the va_arg list is read.
904      */
905     UBool hasArgTypeConflicts;
906 
907     // Variable-size array management
908     UBool allocateArgTypes(int32_t capacity, UErrorCode& status);
909 
910     /**
911      * Default Format objects used when no format is specified and a
912      * numeric or date argument is formatted.  These are volatile
913      * cache objects maintained only for performance.  They do not
914      * participate in operator=(), copy constructor(), nor
915      * operator==().
916      */
917     NumberFormat* defaultNumberFormat;
918     DateFormat*   defaultDateFormat;
919 
920     UHashtable* cachedFormatters;
921     UHashtable* customFormatArgStarts;
922 
923     PluralSelectorProvider pluralProvider;
924     PluralSelectorProvider ordinalProvider;
925 
926     /**
927      * Method to retrieve default formats (or NULL on failure).
928      * These are semantically const, but may modify *this.
929      */
930     const NumberFormat* getDefaultNumberFormat(UErrorCode&) const;
931     const DateFormat*   getDefaultDateFormat(UErrorCode&) const;
932 
933     /**
934      * Finds the word s, in the keyword list and returns the located index.
935      * @param s the keyword to be searched for.
936      * @param list the list of keywords to be searched with.
937      * @return the index of the list which matches the keyword s.
938      */
939     static int32_t findKeyword( const UnicodeString& s,
940                                 const UChar * const *list);
941 
942     /**
943      * Thin wrapper around the format(... AppendableWrapper ...) variant.
944      * Wraps the destination UnicodeString into an AppendableWrapper and
945      * supplies default values for some other parameters.
946      */
947     UnicodeString& format(const Formattable* arguments,
948                           const UnicodeString *argumentNames,
949                           int32_t cnt,
950                           UnicodeString& appendTo,
951                           FieldPosition* pos,
952                           UErrorCode& status) const;
953 
954     /**
955      * Formats the arguments and writes the result into the
956      * AppendableWrapper, updates the field position.
957      *
958      * @param msgStart      Index to msgPattern part to start formatting from.
959      * @param plNumber      NULL except when formatting a plural argument sub-message
960      *                      where a '#' is replaced by the format string for this number.
961      * @param arguments     The formattable objects array. (Must not be NULL.)
962      * @param argumentNames NULL if numbered values are used. Otherwise the same
963      *                      length as "arguments", and each entry is the name of the
964      *                      corresponding argument in "arguments".
965      * @param cnt           The length of arguments (and of argumentNames if that is not NULL).
966      * @param appendTo      Output parameter to receive the result.
967      *                      The result string is appended to existing contents.
968      * @param pos           Field position status.
969      * @param success       The error code status.
970      */
971     void format(int32_t msgStart,
972                 const void *plNumber,
973                 const Formattable* arguments,
974                 const UnicodeString *argumentNames,
975                 int32_t cnt,
976                 AppendableWrapper& appendTo,
977                 FieldPosition* pos,
978                 UErrorCode& success) const;
979 
980     UnicodeString getArgName(int32_t partIndex);
981 
982     void setArgStartFormat(int32_t argStart, Format* formatter, UErrorCode& status);
983 
984     void setCustomArgStartFormat(int32_t argStart, Format* formatter, UErrorCode& status);
985 
986     int32_t nextTopLevelArgStart(int32_t partIndex) const;
987 
988     UBool argNameMatches(int32_t partIndex, const UnicodeString& argName, int32_t argNumber);
989 
990     void cacheExplicitFormats(UErrorCode& status);
991 
992     Format* createAppropriateFormat(UnicodeString& type,
993                                     UnicodeString& style,
994                                     Formattable::Type& formattableType,
995                                     UParseError& parseError,
996                                     UErrorCode& ec);
997 
998     const Formattable* getArgFromListByName(const Formattable* arguments,
999                                             const UnicodeString *argumentNames,
1000                                             int32_t cnt, UnicodeString& name) const;
1001 
1002     Formattable* parse(int32_t msgStart,
1003                        const UnicodeString& source,
1004                        ParsePosition& pos,
1005                        int32_t& count,
1006                        UErrorCode& ec) const;
1007 
1008     FieldPosition* updateMetaData(AppendableWrapper& dest, int32_t prevLength,
1009                                   FieldPosition* fp, const Formattable* argId) const;
1010 
1011     /**
1012      * Finds the "other" sub-message.
1013      * @param partIndex the index of the first PluralFormat argument style part.
1014      * @return the "other" sub-message start part index.
1015      */
1016     int32_t findOtherSubMessage(int32_t partIndex) const;
1017 
1018     /**
1019      * Returns the ARG_START index of the first occurrence of the plural number in a sub-message.
1020      * Returns -1 if it is a REPLACE_NUMBER.
1021      * Returns 0 if there is neither.
1022      */
1023     int32_t findFirstPluralNumberArg(int32_t msgStart, const UnicodeString &argName) const;
1024 
1025     Format* getCachedFormatter(int32_t argumentNumber) const;
1026 
1027     UnicodeString getLiteralStringUntilNextArgument(int32_t from) const;
1028 
1029     void copyObjects(const MessageFormat& that, UErrorCode& ec);
1030 
1031     void formatComplexSubMessage(int32_t msgStart,
1032                                  const void *plNumber,
1033                                  const Formattable* arguments,
1034                                  const UnicodeString *argumentNames,
1035                                  int32_t cnt,
1036                                  AppendableWrapper& appendTo,
1037                                  UErrorCode& success) const;
1038 
1039     /**
1040      * Convenience method that ought to be in NumberFormat
1041      */
1042     NumberFormat* createIntegerFormat(const Locale& locale, UErrorCode& status) const;
1043 
1044     /**
1045      * Returns array of argument types in the parsed pattern
1046      * for use in C API.  Only for the use of umsg_vformat().  Not
1047      * for public consumption.
1048      * @param listCount  Output parameter to receive the size of array
1049      * @return           The array of formattable types in the pattern
1050      */
getArgTypeList(int32_t & listCount)1051     const Formattable::Type* getArgTypeList(int32_t& listCount) const {
1052         listCount = argTypeCount;
1053         return argTypes;
1054     }
1055 
1056     /**
1057      * Resets the internal MessagePattern, and other associated caches.
1058      */
1059     void resetPattern();
1060 
1061     /**
1062      * A DummyFormatter that we use solely to store a NULL value. UHash does
1063      * not support storing NULL values.
1064      */
1065     class U_I18N_API DummyFormat : public Format {
1066     public:
1067         virtual UBool operator==(const Format&) const;
1068         virtual Format* clone() const;
1069         virtual UnicodeString& format(const Formattable& obj,
1070                               UnicodeString& appendTo,
1071                               UErrorCode& status) const;
1072         virtual UnicodeString& format(const Formattable&,
1073                                       UnicodeString& appendTo,
1074                                       FieldPosition&,
1075                                       UErrorCode& status) const;
1076         virtual UnicodeString& format(const Formattable& obj,
1077                                       UnicodeString& appendTo,
1078                                       FieldPositionIterator* posIter,
1079                                       UErrorCode& status) const;
1080         virtual void parseObject(const UnicodeString&,
1081                                  Formattable&,
1082                                  ParsePosition&) const;
1083     };
1084 
1085     friend class MessageFormatAdapter; // getFormatTypeList() access
1086 };
1087 
1088 U_NAMESPACE_END
1089 
1090 #endif /* #if !UCONFIG_NO_FORMATTING */
1091 
1092 #endif // _MSGFMT
1093 //eof
1094