1 /* 2 ******************************************************************************* 3 * Copyright (C) 2007-2014, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ******************************************************************************* 6 * 7 8 * File PLURFMT.H 9 ******************************************************************************** 10 */ 11 12 #ifndef PLURFMT 13 #define PLURFMT 14 15 #include "unicode/utypes.h" 16 17 /** 18 * \file 19 * \brief C++ API: PluralFormat object 20 */ 21 22 #if !UCONFIG_NO_FORMATTING 23 24 #include "unicode/messagepattern.h" 25 #include "unicode/numfmt.h" 26 #include "unicode/plurrule.h" 27 28 U_NAMESPACE_BEGIN 29 30 class Hashtable; 31 class NFRule; 32 33 /** 34 * <p> 35 * <code>PluralFormat</code> supports the creation of internationalized 36 * messages with plural inflection. It is based on <i>plural 37 * selection</i>, i.e. the caller specifies messages for each 38 * plural case that can appear in the user's language and the 39 * <code>PluralFormat</code> selects the appropriate message based on 40 * the number. 41 * </p> 42 * <h4>The Problem of Plural Forms in Internationalized Messages</h4> 43 * <p> 44 * Different languages have different ways to inflect 45 * plurals. Creating internationalized messages that include plural 46 * forms is only feasible when the framework is able to handle plural 47 * forms of <i>all</i> languages correctly. <code>ChoiceFormat</code> 48 * doesn't handle this well, because it attaches a number interval to 49 * each message and selects the message whose interval contains a 50 * given number. This can only handle a finite number of 51 * intervals. But in some languages, like Polish, one plural case 52 * applies to infinitely many intervals (e.g., the plural case applies to 53 * numbers ending with 2, 3, or 4 except those ending with 12, 13, or 54 * 14). Thus <code>ChoiceFormat</code> is not adequate. 55 * </p><p> 56 * <code>PluralFormat</code> deals with this by breaking the problem 57 * into two parts: 58 * <ul> 59 * <li>It uses <code>PluralRules</code> that can define more complex 60 * conditions for a plural case than just a single interval. These plural 61 * rules define both what plural cases exist in a language, and to 62 * which numbers these cases apply. 63 * <li>It provides predefined plural rules for many languages. Thus, the programmer 64 * need not worry about the plural cases of a language and 65 * does not have to define the plural cases; they can simply 66 * use the predefined keywords. The whole plural formatting of messages can 67 * be done using localized patterns from resource bundles. For predefined plural 68 * rules, see the CLDR <i>Language Plural Rules</i> page at 69 * http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html 70 * </ul> 71 * </p> 72 * <h4>Usage of <code>PluralFormat</code></h4> 73 * <p>Note: Typically, plural formatting is done via <code>MessageFormat</code> 74 * with a <code>plural</code> argument type, 75 * rather than using a stand-alone <code>PluralFormat</code>. 76 * </p><p> 77 * This discussion assumes that you use <code>PluralFormat</code> with 78 * a predefined set of plural rules. You can create one using one of 79 * the constructors that takes a <code>locale</code> object. To 80 * specify the message pattern, you can either pass it to the 81 * constructor or set it explicitly using the 82 * <code>applyPattern()</code> method. The <code>format()</code> 83 * method takes a number object and selects the message of the 84 * matching plural case. This message will be returned. 85 * </p> 86 * <h5>Patterns and Their Interpretation</h5> 87 * <p> 88 * The pattern text defines the message output for each plural case of the 89 * specified locale. Syntax: 90 * <pre> 91 * pluralStyle = [offsetValue] (selector '{' message '}')+ 92 * offsetValue = "offset:" number 93 * selector = explicitValue | keyword 94 * explicitValue = '=' number // adjacent, no white space in between 95 * keyword = [^[[:Pattern_Syntax:][:Pattern_White_Space:]]]+ 96 * message: see {@link MessageFormat} 97 * </pre> 98 * Pattern_White_Space between syntax elements is ignored, except 99 * between the {curly braces} and their sub-message, 100 * and between the '=' and the number of an explicitValue. 101 * 102 * </p><p> 103 * There are 6 predefined casekeyword in CLDR/ICU - 'zero', 'one', 'two', 'few', 'many' and 104 * 'other'. You always have to define a message text for the default plural case 105 * <code>other</code> which is contained in every rule set. 106 * If you do not specify a message text for a particular plural case, the 107 * message text of the plural case <code>other</code> gets assigned to this 108 * plural case. 109 * </p><p> 110 * When formatting, the input number is first matched against the explicitValue clauses. 111 * If there is no exact-number match, then a keyword is selected by calling 112 * the <code>PluralRules</code> with the input number <em>minus the offset</em>. 113 * (The offset defaults to 0 if it is omitted from the pattern string.) 114 * If there is no clause with that keyword, then the "other" clauses is returned. 115 * </p><p> 116 * An unquoted pound sign (<code>#</code>) in the selected sub-message 117 * itself (i.e., outside of arguments nested in the sub-message) 118 * is replaced by the input number minus the offset. 119 * The number-minus-offset value is formatted using a 120 * <code>NumberFormat</code> for the <code>PluralFormat</code>'s locale. If you 121 * need special number formatting, you have to use a <code>MessageFormat</code> 122 * and explicitly specify a <code>NumberFormat</code> argument. 123 * <strong>Note:</strong> That argument is formatting without subtracting the offset! 124 * If you need a custom format and have a non-zero offset, then you need to pass the 125 * number-minus-offset value as a separate parameter. 126 * </p> 127 * For a usage example, see the {@link MessageFormat} class documentation. 128 * 129 * <h4>Defining Custom Plural Rules</h4> 130 * <p>If you need to use <code>PluralFormat</code> with custom rules, you can 131 * create a <code>PluralRules</code> object and pass it to 132 * <code>PluralFormat</code>'s constructor. If you also specify a locale in this 133 * constructor, this locale will be used to format the number in the message 134 * texts. 135 * </p><p> 136 * For more information about <code>PluralRules</code>, see 137 * {@link PluralRules}. 138 * </p> 139 * 140 * ported from Java 141 * @stable ICU 4.0 142 */ 143 144 class U_I18N_API PluralFormat : public Format { 145 public: 146 147 /** 148 * Creates a new cardinal-number <code>PluralFormat</code> for the default locale. 149 * This locale will be used to get the set of plural rules and for standard 150 * number formatting. 151 * @param status output param set to success/failure code on exit, which 152 * must not indicate a failure before the function call. 153 * @stable ICU 4.0 154 */ 155 PluralFormat(UErrorCode& status); 156 157 /** 158 * Creates a new cardinal-number <code>PluralFormat</code> for a given locale. 159 * @param locale the <code>PluralFormat</code> will be configured with 160 * rules for this locale. This locale will also be used for 161 * standard number formatting. 162 * @param status output param set to success/failure code on exit, which 163 * must not indicate a failure before the function call. 164 * @stable ICU 4.0 165 */ 166 PluralFormat(const Locale& locale, UErrorCode& status); 167 168 /** 169 * Creates a new <code>PluralFormat</code> for a given set of rules. 170 * The standard number formatting will be done using the default locale. 171 * @param rules defines the behavior of the <code>PluralFormat</code> 172 * object. 173 * @param status output param set to success/failure code on exit, which 174 * must not indicate a failure before the function call. 175 * @stable ICU 4.0 176 */ 177 PluralFormat(const PluralRules& rules, UErrorCode& status); 178 179 /** 180 * Creates a new <code>PluralFormat</code> for a given set of rules. 181 * The standard number formatting will be done using the given locale. 182 * @param locale the default number formatting will be done using this 183 * locale. 184 * @param rules defines the behavior of the <code>PluralFormat</code> 185 * object. 186 * @param status output param set to success/failure code on exit, which 187 * must not indicate a failure before the function call. 188 * @stable ICU 4.0 189 * <p> 190 * <h4>Sample code</h4> 191 * \snippet samples/plurfmtsample/plurfmtsample.cpp PluralFormatExample1 192 * \snippet samples/plurfmtsample/plurfmtsample.cpp PluralFormatExample 193 * <p> 194 */ 195 PluralFormat(const Locale& locale, const PluralRules& rules, UErrorCode& status); 196 197 /** 198 * Creates a new <code>PluralFormat</code> for the plural type. 199 * The standard number formatting will be done using the given locale. 200 * @param locale the default number formatting will be done using this 201 * locale. 202 * @param type The plural type (e.g., cardinal or ordinal). 203 * @param status output param set to success/failure code on exit, which 204 * must not indicate a failure before the function call. 205 * @stable ICU 50 206 */ 207 PluralFormat(const Locale& locale, UPluralType type, UErrorCode& status); 208 209 /** 210 * Creates a new cardinal-number <code>PluralFormat</code> for a given pattern string. 211 * The default locale will be used to get the set of plural rules and for 212 * standard number formatting. 213 * @param pattern the pattern for this <code>PluralFormat</code>. 214 * errors are returned to status if the pattern is invalid. 215 * @param status output param set to success/failure code on exit, which 216 * must not indicate a failure before the function call. 217 * @stable ICU 4.0 218 */ 219 PluralFormat(const UnicodeString& pattern, UErrorCode& status); 220 221 /** 222 * Creates a new cardinal-number <code>PluralFormat</code> for a given pattern string and 223 * locale. 224 * The locale will be used to get the set of plural rules and for 225 * standard number formatting. 226 * @param locale the <code>PluralFormat</code> will be configured with 227 * rules for this locale. This locale will also be used for 228 * standard number formatting. 229 * @param pattern the pattern for this <code>PluralFormat</code>. 230 * errors are returned to status if the pattern is invalid. 231 * @param status output param set to success/failure code on exit, which 232 * must not indicate a failure before the function call. 233 * @stable ICU 4.0 234 */ 235 PluralFormat(const Locale& locale, const UnicodeString& pattern, UErrorCode& status); 236 237 /** 238 * Creates a new <code>PluralFormat</code> for a given set of rules, a 239 * pattern and a locale. 240 * @param rules defines the behavior of the <code>PluralFormat</code> 241 * object. 242 * @param pattern the pattern for this <code>PluralFormat</code>. 243 * errors are returned to status if the pattern is invalid. 244 * @param status output param set to success/failure code on exit, which 245 * must not indicate a failure before the function call. 246 * @stable ICU 4.0 247 */ 248 PluralFormat(const PluralRules& rules, 249 const UnicodeString& pattern, 250 UErrorCode& status); 251 252 /** 253 * Creates a new <code>PluralFormat</code> for a given set of rules, a 254 * pattern and a locale. 255 * @param locale the <code>PluralFormat</code> will be configured with 256 * rules for this locale. This locale will also be used for 257 * standard number formatting. 258 * @param rules defines the behavior of the <code>PluralFormat</code> 259 * object. 260 * @param pattern the pattern for this <code>PluralFormat</code>. 261 * errors are returned to status if the pattern is invalid. 262 * @param status output param set to success/failure code on exit, which 263 * must not indicate a failure before the function call. 264 * @stable ICU 4.0 265 */ 266 PluralFormat(const Locale& locale, 267 const PluralRules& rules, 268 const UnicodeString& pattern, 269 UErrorCode& status); 270 271 /** 272 * Creates a new <code>PluralFormat</code> for a plural type, a 273 * pattern and a locale. 274 * @param locale the <code>PluralFormat</code> will be configured with 275 * rules for this locale. This locale will also be used for 276 * standard number formatting. 277 * @param type The plural type (e.g., cardinal or ordinal). 278 * @param pattern the pattern for this <code>PluralFormat</code>. 279 * errors are returned to status if the pattern is invalid. 280 * @param status output param set to success/failure code on exit, which 281 * must not indicate a failure before the function call. 282 * @stable ICU 50 283 */ 284 PluralFormat(const Locale& locale, 285 UPluralType type, 286 const UnicodeString& pattern, 287 UErrorCode& status); 288 289 /** 290 * copy constructor. 291 * @stable ICU 4.0 292 */ 293 PluralFormat(const PluralFormat& other); 294 295 /** 296 * Destructor. 297 * @stable ICU 4.0 298 */ 299 virtual ~PluralFormat(); 300 301 /** 302 * Sets the pattern used by this plural format. 303 * The method parses the pattern and creates a map of format strings 304 * for the plural rules. 305 * Patterns and their interpretation are specified in the class description. 306 * 307 * @param pattern the pattern for this plural format 308 * errors are returned to status if the pattern is invalid. 309 * @param status output param set to success/failure code on exit, which 310 * must not indicate a failure before the function call. 311 * @stable ICU 4.0 312 */ 313 void applyPattern(const UnicodeString& pattern, UErrorCode& status); 314 315 316 using Format::format; 317 318 /** 319 * Formats a plural message for a given number. 320 * 321 * @param number a number for which the plural message should be formatted 322 * for. If no pattern has been applied to this 323 * <code>PluralFormat</code> object yet, the formatted number 324 * will be returned. 325 * @param status output param set to success/failure code on exit, which 326 * must not indicate a failure before the function call. 327 * @return the string containing the formatted plural message. 328 * @stable ICU 4.0 329 */ 330 UnicodeString format(int32_t number, UErrorCode& status) const; 331 332 /** 333 * Formats a plural message for a given number. 334 * 335 * @param number a number for which the plural message should be formatted 336 * for. If no pattern has been applied to this 337 * PluralFormat object yet, the formatted number 338 * will be returned. 339 * @param status output param set to success or failure code on exit, which 340 * must not indicate a failure before the function call. 341 * @return the string containing the formatted plural message. 342 * @stable ICU 4.0 343 */ 344 UnicodeString format(double number, UErrorCode& status) const; 345 346 /** 347 * Formats a plural message for a given number. 348 * 349 * @param number a number for which the plural message should be formatted 350 * for. If no pattern has been applied to this 351 * <code>PluralFormat</code> object yet, the formatted number 352 * will be returned. 353 * @param appendTo output parameter to receive result. 354 * result is appended to existing contents. 355 * @param pos On input: an alignment field, if desired. 356 * On output: the offsets of the alignment field. 357 * @param status output param set to success/failure code on exit, which 358 * must not indicate a failure before the function call. 359 * @return the string containing the formatted plural message. 360 * @stable ICU 4.0 361 */ 362 UnicodeString& format(int32_t number, 363 UnicodeString& appendTo, 364 FieldPosition& pos, 365 UErrorCode& status) const; 366 367 /** 368 * Formats a plural message for a given number. 369 * 370 * @param number a number for which the plural message should be formatted 371 * for. If no pattern has been applied to this 372 * PluralFormat object yet, the formatted number 373 * will be returned. 374 * @param appendTo output parameter to receive result. 375 * result is appended to existing contents. 376 * @param pos On input: an alignment field, if desired. 377 * On output: the offsets of the alignment field. 378 * @param status output param set to success/failure code on exit, which 379 * must not indicate a failure before the function call. 380 * @return the string containing the formatted plural message. 381 * @stable ICU 4.0 382 */ 383 UnicodeString& format(double number, 384 UnicodeString& appendTo, 385 FieldPosition& pos, 386 UErrorCode& status) const; 387 388 #ifndef U_HIDE_DEPRECATED_API 389 /** 390 * Sets the locale used by this <code>PluraFormat</code> object. 391 * Note: Calling this method resets this <code>PluraFormat</code> object, 392 * i.e., a pattern that was applied previously will be removed, 393 * and the NumberFormat is set to the default number format for 394 * the locale. The resulting format behaves the same as one 395 * constructed from {@link #PluralFormat(const Locale& locale, UPluralType type, UErrorCode& status)} 396 * with UPLURAL_TYPE_CARDINAL. 397 * @param locale the <code>locale</code> to use to configure the formatter. 398 * @param status output param set to success/failure code on exit, which 399 * must not indicate a failure before the function call. 400 * @deprecated ICU 50 This method clears the pattern and might create 401 * a different kind of PluralRules instance; 402 * use one of the constructors to create a new instance instead. 403 */ 404 void setLocale(const Locale& locale, UErrorCode& status); 405 #endif /* U_HIDE_DEPRECATED_API */ 406 407 /** 408 * Sets the number format used by this formatter. You only need to 409 * call this if you want a different number format than the default 410 * formatter for the locale. 411 * @param format the number format to use. 412 * @param status output param set to success/failure code on exit, which 413 * must not indicate a failure before the function call. 414 * @stable ICU 4.0 415 */ 416 void setNumberFormat(const NumberFormat* format, UErrorCode& status); 417 418 /** 419 * Assignment operator 420 * 421 * @param other the PluralFormat object to copy from. 422 * @stable ICU 4.0 423 */ 424 PluralFormat& operator=(const PluralFormat& other); 425 426 /** 427 * Return true if another object is semantically equal to this one. 428 * 429 * @param other the PluralFormat object to be compared with. 430 * @return true if other is semantically equal to this. 431 * @stable ICU 4.0 432 */ 433 virtual UBool operator==(const Format& other) const; 434 435 /** 436 * Return true if another object is semantically unequal to this one. 437 * 438 * @param other the PluralFormat object to be compared with. 439 * @return true if other is semantically unequal to this. 440 * @stable ICU 4.0 441 */ 442 virtual UBool operator!=(const Format& other) const; 443 444 /** 445 * Clones this Format object polymorphically. The caller owns the 446 * result and should delete it when done. 447 * @stable ICU 4.0 448 */ 449 virtual Format* clone(void) const; 450 451 /** 452 * Formats a plural message for a number taken from a Formattable object. 453 * 454 * @param obj The object containing a number for which the 455 * plural message should be formatted. 456 * The object must be of a numeric type. 457 * @param appendTo output parameter to receive result. 458 * Result is appended to existing contents. 459 * @param pos On input: an alignment field, if desired. 460 * On output: the offsets of the alignment field. 461 * @param status output param filled with success/failure status. 462 * @return Reference to 'appendTo' parameter. 463 * @stable ICU 4.0 464 */ 465 UnicodeString& format(const Formattable& obj, 466 UnicodeString& appendTo, 467 FieldPosition& pos, 468 UErrorCode& status) const; 469 470 /** 471 * Returns the pattern from applyPattern() or constructor(). 472 * 473 * @param appendTo output parameter to receive result. 474 * Result is appended to existing contents. 475 * @return the UnicodeString with inserted pattern. 476 * @stable ICU 4.0 477 */ 478 UnicodeString& toPattern(UnicodeString& appendTo); 479 480 /** 481 * This method is not yet supported by <code>PluralFormat</code>. 482 * <P> 483 * Before calling, set parse_pos.index to the offset you want to start 484 * parsing at in the source. After calling, parse_pos.index is the end of 485 * the text you parsed. If error occurs, index is unchanged. 486 * <P> 487 * When parsing, leading whitespace is discarded (with a successful parse), 488 * while trailing whitespace is left as is. 489 * <P> 490 * See Format::parseObject() for more. 491 * 492 * @param source The string to be parsed into an object. 493 * @param result Formattable to be set to the parse result. 494 * If parse fails, return contents are undefined. 495 * @param parse_pos The position to start parsing at. Upon return 496 * this param is set to the position after the 497 * last character successfully parsed. If the 498 * source is not parsed successfully, this param 499 * will remain unchanged. 500 * @stable ICU 4.0 501 */ 502 virtual void parseObject(const UnicodeString& source, 503 Formattable& result, 504 ParsePosition& parse_pos) const; 505 506 /** 507 * ICU "poor man's RTTI", returns a UClassID for this class. 508 * 509 * @stable ICU 4.0 510 * 511 */ 512 static UClassID U_EXPORT2 getStaticClassID(void); 513 514 /** 515 * ICU "poor man's RTTI", returns a UClassID for the actual class. 516 * 517 * @stable ICU 4.0 518 */ 519 virtual UClassID getDynamicClassID() const; 520 521 #if (defined(__xlC__) && (__xlC__ < 0x0C00)) || (U_PLATFORM == U_PF_OS390) || (U_PLATFORM ==U_PF_OS400) 522 // Work around a compiler bug on xlC 11.1 on AIX 7.1 that would 523 // prevent PluralSelectorAdapter from implementing private PluralSelector. 524 // xlC error message: 525 // 1540-0300 (S) The "private" member "class icu_49::PluralFormat::PluralSelector" cannot be accessed. 526 public: 527 #else 528 private: 529 #endif 530 /** 531 * @internal 532 */ 533 class U_I18N_API PluralSelector : public UMemory { 534 public: 535 virtual ~PluralSelector(); 536 /** 537 * Given a number, returns the appropriate PluralFormat keyword. 538 * 539 * @param context worker object for the selector. 540 * @param number The number to be plural-formatted. 541 * @param ec Error code. 542 * @return The selected PluralFormat keyword. 543 * @internal 544 */ 545 virtual UnicodeString select(void *context, double number, UErrorCode& ec) const = 0; 546 }; 547 548 /** 549 * @internal 550 */ 551 class U_I18N_API PluralSelectorAdapter : public PluralSelector { 552 public: PluralSelectorAdapter()553 PluralSelectorAdapter() : pluralRules(NULL) { 554 } 555 556 virtual ~PluralSelectorAdapter(); 557 558 virtual UnicodeString select(void *context, double number, UErrorCode& /*ec*/) const; /**< @internal */ 559 560 void reset(); 561 562 PluralRules* pluralRules; 563 }; 564 565 #if defined(__xlC__) 566 // End of xlC bug workaround, keep remaining definitions private. 567 private: 568 #endif 569 Locale locale; 570 MessagePattern msgPattern; 571 NumberFormat* numberFormat; 572 double offset; 573 PluralSelectorAdapter pluralRulesWrapper; 574 575 PluralFormat(); // default constructor not implemented 576 void init(const PluralRules* rules, UPluralType type, UErrorCode& status); 577 /** 578 * Copies dynamically allocated values (pointer fields). 579 * Others are copied using their copy constructors and assignment operators. 580 */ 581 void copyObjects(const PluralFormat& other); 582 583 UnicodeString& format(const Formattable& numberObject, double number, 584 UnicodeString& appendTo, 585 FieldPosition& pos, 586 UErrorCode& status) const; /**< @internal */ 587 588 /** 589 * Finds the PluralFormat sub-message for the given number, or the "other" sub-message. 590 * @param pattern A MessagePattern. 591 * @param partIndex the index of the first PluralFormat argument style part. 592 * @param selector the PluralSelector for mapping the number (minus offset) to a keyword. 593 * @param context worker object for the selector. 594 * @param number a number to be matched to one of the PluralFormat argument's explicit values, 595 * or mapped via the PluralSelector. 596 * @param ec ICU error code. 597 * @return the sub-message start part index. 598 */ 599 static int32_t findSubMessage( 600 const MessagePattern& pattern, int32_t partIndex, 601 const PluralSelector& selector, void *context, double number, UErrorCode& ec); /**< @internal */ 602 603 void parseType(const UnicodeString& source, const NFRule *rbnfLenientScanner, 604 Formattable& result, FieldPosition& pos) const; 605 606 friend class MessageFormat; 607 friend class NFRule; 608 }; 609 610 U_NAMESPACE_END 611 612 #endif /* #if !UCONFIG_NO_FORMATTING */ 613 614 #endif // _PLURFMT 615 //eof 616