1 /* 2 ******************************************************************************* 3 * Copyright (C) 2007-2014, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ******************************************************************************* 6 */ 7 8 package com.ibm.icu.simple; 9 10 import java.io.IOException; 11 import java.io.ObjectInputStream; 12 import java.text.NumberFormat; 13 import java.text.ParsePosition; 14 import java.util.Locale; 15 import java.util.Map; 16 17 import com.ibm.icu.simple.PluralRules.FixedDecimal; 18 import com.ibm.icu.simple.PluralRules.PluralType; 19 import com.ibm.icu.text.MessagePattern; 20 21 /** 22 * <p> 23 * <code>PluralFormat</code> supports the creation of internationalized 24 * messages with plural inflection. It is based on <i>plural 25 * selection</i>, i.e. the caller specifies messages for each 26 * plural case that can appear in the user's language and the 27 * <code>PluralFormat</code> selects the appropriate message based on 28 * the number. 29 * </p> 30 * <h4>The Problem of Plural Forms in Internationalized Messages</h4> 31 * <p> 32 * Different languages have different ways to inflect 33 * plurals. Creating internationalized messages that include plural 34 * forms is only feasible when the framework is able to handle plural 35 * forms of <i>all</i> languages correctly. <code>ChoiceFormat</code> 36 * doesn't handle this well, because it attaches a number interval to 37 * each message and selects the message whose interval contains a 38 * given number. This can only handle a finite number of 39 * intervals. But in some languages, like Polish, one plural case 40 * applies to infinitely many intervals (e.g., the paucal case applies to 41 * numbers ending with 2, 3, or 4 except those ending with 12, 13, or 42 * 14). Thus <code>ChoiceFormat</code> is not adequate. 43 * </p><p> 44 * <code>PluralFormat</code> deals with this by breaking the problem 45 * into two parts: 46 * <ul> 47 * <li>It uses <code>PluralRules</code> that can define more complex 48 * conditions for a plural case than just a single interval. These plural 49 * rules define both what plural cases exist in a language, and to 50 * which numbers these cases apply. 51 * <li>It provides predefined plural rules for many languages. Thus, the programmer 52 * need not worry about the plural cases of a language and 53 * does not have to define the plural cases; they can simply 54 * use the predefined keywords. The whole plural formatting of messages can 55 * be done using localized patterns from resource bundles. For predefined plural 56 * rules, see the CLDR <i>Language Plural Rules</i> page at 57 * http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html 58 * </ul> 59 * </p> 60 * <h4>Usage of <code>PluralFormat</code></h4> 61 * <p>Note: Typically, plural formatting is done via <code>MessageFormat</code> 62 * with a <code>plural</code> argument type, 63 * rather than using a stand-alone <code>PluralFormat</code>. 64 * </p><p> 65 * This discussion assumes that you use <code>PluralFormat</code> with 66 * a predefined set of plural rules. You can create one using one of 67 * the constructors that takes a <code>ULocale</code> object. To 68 * specify the message pattern, you can either pass it to the 69 * constructor or set it explicitly using the 70 * <code>applyPattern()</code> method. The <code>format()</code> 71 * method takes a number object and selects the message of the 72 * matching plural case. This message will be returned. 73 * </p> 74 * <h5>Patterns and Their Interpretation</h5> 75 * <p> 76 * The pattern text defines the message output for each plural case of the 77 * specified locale. Syntax: 78 * <blockquote><pre> 79 * pluralStyle = [offsetValue] (selector '{' message '}')+ 80 * offsetValue = "offset:" number 81 * selector = explicitValue | keyword 82 * explicitValue = '=' number // adjacent, no white space in between 83 * keyword = [^[[:Pattern_Syntax:][:Pattern_White_Space:]]]+ 84 * message: see {@link MessageFormat} 85 * </pre></blockquote> 86 * Pattern_White_Space between syntax elements is ignored, except 87 * between the {curly braces} and their sub-message, 88 * and between the '=' and the number of an explicitValue. 89 * 90 * </p><p> 91 * There are 6 predefined case keywords in CLDR/ICU - 'zero', 'one', 'two', 'few', 'many' and 92 * 'other'. You always have to define a message text for the default plural case 93 * "<code>other</code>" which is contained in every rule set. 94 * If you do not specify a message text for a particular plural case, the 95 * message text of the plural case "<code>other</code>" gets assigned to this 96 * plural case. 97 * </p><p> 98 * When formatting, the input number is first matched against the explicitValue clauses. 99 * If there is no exact-number match, then a keyword is selected by calling 100 * the <code>PluralRules</code> with the input number <em>minus the offset</em>. 101 * (The offset defaults to 0 if it is omitted from the pattern string.) 102 * If there is no clause with that keyword, then the "other" clauses is returned. 103 * </p><p> 104 * An unquoted pound sign (<code>#</code>) in the selected sub-message 105 * itself (i.e., outside of arguments nested in the sub-message) 106 * is replaced by the input number minus the offset. 107 * The number-minus-offset value is formatted using a 108 * <code>NumberFormat</code> for the <code>PluralFormat</code>'s locale. If you 109 * need special number formatting, you have to use a <code>MessageFormat</code> 110 * and explicitly specify a <code>NumberFormat</code> argument. 111 * <strong>Note:</strong> That argument is formatting without subtracting the offset! 112 * If you need a custom format and have a non-zero offset, then you need to pass the 113 * number-minus-offset value as a separate parameter. 114 * </p> 115 * For a usage example, see the {@link MessageFormat} class documentation. 116 * 117 * <h4>Defining Custom Plural Rules</h4> 118 * <p>If you need to use <code>PluralFormat</code> with custom rules, you can 119 * create a <code>PluralRules</code> object and pass it to 120 * <code>PluralFormat</code>'s constructor. If you also specify a locale in this 121 * constructor, this locale will be used to format the number in the message 122 * texts. 123 * </p><p> 124 * For more information about <code>PluralRules</code>, see 125 * {@link PluralRules}. 126 * </p> 127 * 128 * @author tschumann (Tim Schumann) 129 * @stable ICU 3.8 130 */ 131 public class PluralFormat /* extends UFormat */ { 132 private static final long serialVersionUID = 1L; 133 134 /** 135 * The locale used for standard number formatting and getting the predefined 136 * plural rules (if they were not defined explicitely). 137 * @serial 138 */ 139 private Locale locale_ = null; 140 141 /** 142 * The plural rules used for plural selection. 143 * @serial 144 */ 145 private PluralRules pluralRules = null; 146 147 /** 148 * The applied pattern string. 149 * @serial 150 */ 151 private String pattern = null; 152 153 /** 154 * The MessagePattern which contains the parsed structure of the pattern string. 155 */ 156 transient private MessagePattern msgPattern; 157 158 /** 159 * Obsolete with use of MessagePattern since ICU 4.8. Used to be: 160 * The format messages for each plural case. It is a mapping: 161 * <code>String</code>(plural case keyword) --> <code>String</code> 162 * (message for this plural case). 163 * @serial 164 */ 165 private Map<String, String> parsedValues = null; 166 167 /** 168 * This <code>NumberFormat</code> is used for the standard formatting of 169 * the number inserted into the message. 170 * @serial 171 */ 172 private NumberFormat numberFormat = null; 173 174 /** 175 * The offset to subtract before invoking plural rules. 176 */ 177 transient private double offset = 0; 178 179 /** 180 * Creates a new cardinal-number <code>PluralFormat</code> for the default <code>FORMAT</code> locale. 181 * This locale will be used to get the set of plural rules and for standard 182 * number formatting. 183 * @see Category#FORMAT 184 * @stable ICU 3.8 185 */ PluralFormat()186 public PluralFormat() { 187 init(null, PluralType.CARDINAL, Locale.getDefault()); // Category.FORMAT 188 } 189 190 /** 191 * Creates a new cardinal-number <code>PluralFormat</code> for a given locale. 192 * @param locale the <code>PluralFormat</code> will be configured with 193 * rules for this locale. This locale will also be used for standard 194 * number formatting. 195 * @stable ICU 3.8 196 */ PluralFormat(Locale locale)197 public PluralFormat(Locale locale) { 198 init(null, PluralType.CARDINAL, locale); 199 } 200 201 /** 202 * Creates a new <code>PluralFormat</code> for the plural type. 203 * The standard number formatting will be done using the given locale. 204 * @param locale the default number formatting will be done using this 205 * locale. 206 * @param type The plural type (e.g., cardinal or ordinal). 207 * @stable ICU 50 208 */ PluralFormat(Locale locale, PluralType type)209 public PluralFormat(Locale locale, PluralType type) { 210 init(null, type, locale); 211 } 212 213 /* 214 * Initializes the <code>PluralRules</code> object. 215 * Postcondition:<br/> 216 * <code>ulocale</code> : is <code>locale</code><br/> 217 * <code>pluralRules</code>: if <code>rules</code> != <code>null</code> 218 * it's set to rules, otherwise it is the 219 * predefined plural rule set for the locale 220 * <code>ulocale</code>.<br/> 221 * <code>parsedValues</code>: is <code>null</code><br/> 222 * <code>pattern</code>: is <code>null</code><br/> 223 * <code>numberFormat</code>: a <code>NumberFormat</code> for the locale 224 * <code>ulocale</code>. 225 */ init(PluralRules rules, PluralType type, Locale locale)226 private void init(PluralRules rules, PluralType type, Locale locale) { 227 locale_ = locale; 228 pluralRules = (rules == null) ? PluralRules.forLocale(locale, type) 229 : rules; 230 resetPattern(); 231 numberFormat = NumberFormat.getInstance(locale); 232 } 233 resetPattern()234 private void resetPattern() { 235 pattern = null; 236 if(msgPattern != null) { 237 msgPattern.clear(); 238 } 239 offset = 0; 240 } 241 242 /** 243 * Sets the pattern used by this plural format. 244 * The method parses the pattern and creates a map of format strings 245 * for the plural rules. 246 * Patterns and their interpretation are specified in the class description. 247 * 248 * @param pattern the pattern for this plural format. 249 * @throws IllegalArgumentException if the pattern is invalid. 250 * @stable ICU 3.8 251 */ applyPattern(String pattern)252 public void applyPattern(String pattern) { 253 this.pattern = pattern; 254 if (msgPattern == null) { 255 msgPattern = new MessagePattern(); 256 } 257 try { 258 msgPattern.parsePluralStyle(pattern); 259 offset = msgPattern.getPluralOffset(0); 260 } catch(RuntimeException e) { 261 resetPattern(); 262 throw e; 263 } 264 } 265 266 /** 267 * Returns the pattern for this PluralFormat. 268 * 269 * @return the pattern string 270 * @stable ICU 4.2 271 */ toPattern()272 public String toPattern() { 273 return pattern; 274 } 275 276 /** 277 * Finds the PluralFormat sub-message for the given number, or the "other" sub-message. 278 * @param pattern A MessagePattern. 279 * @param partIndex the index of the first PluralFormat argument style part. 280 * @param selector the PluralSelector for mapping the number (minus offset) to a keyword. 281 * @param context worker object for the selector. 282 * @param number a number to be matched to one of the PluralFormat argument's explicit values, 283 * or mapped via the PluralSelector. 284 * @return the sub-message start part index. 285 */ findSubMessage( MessagePattern pattern, int partIndex, PluralSelector selector, Object context, double number)286 /*package*/ static int findSubMessage( 287 MessagePattern pattern, int partIndex, 288 PluralSelector selector, Object context, double number) { 289 int count=pattern.countParts(); 290 double offset; 291 MessagePattern.Part part=pattern.getPart(partIndex); 292 if(part.getType().hasNumericValue()) { 293 offset=pattern.getNumericValue(part); 294 ++partIndex; 295 } else { 296 offset=0; 297 } 298 // The keyword is null until we need to match against a non-explicit, not-"other" value. 299 // Then we get the keyword from the selector. 300 // (In other words, we never call the selector if we match against an explicit value, 301 // or if the only non-explicit keyword is "other".) 302 String keyword=null; 303 // When we find a match, we set msgStart>0 and also set this boolean to true 304 // to avoid matching the keyword again (duplicates are allowed) 305 // while we continue to look for an explicit-value match. 306 boolean haveKeywordMatch=false; 307 // msgStart is 0 until we find any appropriate sub-message. 308 // We remember the first "other" sub-message if we have not seen any 309 // appropriate sub-message before. 310 // We remember the first matching-keyword sub-message if we have not seen 311 // one of those before. 312 // (The parser allows [does not check for] duplicate keywords. 313 // We just have to make sure to take the first one.) 314 // We avoid matching the keyword twice by also setting haveKeywordMatch=true 315 // at the first keyword match. 316 // We keep going until we find an explicit-value match or reach the end of the plural style. 317 int msgStart=0; 318 // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples 319 // until ARG_LIMIT or end of plural-only pattern. 320 do { 321 part=pattern.getPart(partIndex++); 322 MessagePattern.Part.Type type=part.getType(); 323 if(type==MessagePattern.Part.Type.ARG_LIMIT) { 324 break; 325 } 326 assert type==MessagePattern.Part.Type.ARG_SELECTOR; 327 // part is an ARG_SELECTOR followed by an optional explicit value, and then a message 328 if(pattern.getPartType(partIndex).hasNumericValue()) { 329 // explicit value like "=2" 330 part=pattern.getPart(partIndex++); 331 if(number==pattern.getNumericValue(part)) { 332 // matches explicit value 333 return partIndex; 334 } 335 } else if(!haveKeywordMatch) { 336 // plural keyword like "few" or "other" 337 // Compare "other" first and call the selector if this is not "other". 338 if(pattern.partSubstringMatches(part, "other")) { 339 if(msgStart==0) { 340 msgStart=partIndex; 341 if(keyword!=null && keyword.equals("other")) { 342 // This is the first "other" sub-message, 343 // and the selected keyword is also "other". 344 // Do not match "other" again. 345 haveKeywordMatch=true; 346 } 347 } 348 } else { 349 if(keyword==null) { 350 keyword=selector.select(context, number-offset); 351 if(msgStart!=0 && keyword.equals("other")) { 352 // We have already seen an "other" sub-message. 353 // Do not match "other" again. 354 haveKeywordMatch=true; 355 // Skip keyword matching but do getLimitPartIndex(). 356 } 357 } 358 if(!haveKeywordMatch && pattern.partSubstringMatches(part, keyword)) { 359 // keyword matches 360 msgStart=partIndex; 361 // Do not match this keyword again. 362 haveKeywordMatch=true; 363 } 364 } 365 } 366 partIndex=pattern.getLimitPartIndex(partIndex); 367 } while(++partIndex<count); 368 return msgStart; 369 } 370 371 /** 372 * Interface for selecting PluralFormat keywords for numbers. 373 * The PluralRules class was intended to implement this interface, 374 * but there is no public API that uses a PluralSelector, 375 * only MessageFormat and PluralFormat have PluralSelector implementations. 376 * Therefore, PluralRules is not marked to implement this non-public interface, 377 * to avoid confusing users. 378 * @internal 379 */ 380 /*package*/ interface PluralSelector { 381 /** 382 * Given a number, returns the appropriate PluralFormat keyword. 383 * 384 * @param context worker object for the selector. 385 * @param number The number to be plural-formatted. 386 * @return The selected PluralFormat keyword. 387 */ select(Object context, double number)388 public String select(Object context, double number); 389 } 390 391 // See PluralSelector: 392 // We could avoid this adapter class if we made PluralSelector public 393 // (or at least publicly visible) and had PluralRules implement PluralSelector. 394 private final class PluralSelectorAdapter implements PluralSelector { select(Object context, double number)395 public String select(Object context, double number) { 396 FixedDecimal dec = (FixedDecimal) context; 397 assert dec.source == number; 398 return pluralRules.select(dec); 399 } 400 } 401 transient private PluralSelectorAdapter pluralRulesWrapper = new PluralSelectorAdapter(); 402 403 /** 404 * This method is not yet supported by <code>PluralFormat</code>. 405 * @param text the string to be parsed. 406 * @param parsePosition defines the position where parsing is to begin, 407 * and upon return, the position where parsing left off. If the position 408 * has not changed upon return, then parsing failed. 409 * @return nothing because this method is not yet implemented. 410 * @throws UnsupportedOperationException will always be thrown by this method. 411 * @stable ICU 3.8 412 */ parse(String text, ParsePosition parsePosition)413 public Number parse(String text, ParsePosition parsePosition) { 414 throw new UnsupportedOperationException(); 415 } 416 417 /** 418 * This method is not yet supported by <code>PluralFormat</code>. 419 * @param source the string to be parsed. 420 * @param pos defines the position where parsing is to begin, 421 * and upon return, the position where parsing left off. If the position 422 * has not changed upon return, then parsing failed. 423 * @return nothing because this method is not yet implemented. 424 * @throws UnsupportedOperationException will always be thrown by this method. 425 * @stable ICU 3.8 426 */ parseObject(String source, ParsePosition pos)427 public Object parseObject(String source, ParsePosition pos) { 428 throw new UnsupportedOperationException(); 429 } 430 431 /** 432 * Returns true if this equals the provided PluralFormat. 433 * @param rhs the PluralFormat to compare against 434 * @return true if this equals rhs 435 * @stable ICU 3.8 436 */ equals(PluralFormat rhs)437 public boolean equals(PluralFormat rhs) { 438 return equals((Object)rhs); 439 } 440 441 /** 442 * {@inheritDoc} 443 * @stable ICU 3.8 444 */ 445 @Override hashCode()446 public int hashCode() { 447 return pluralRules.hashCode() ^ parsedValues.hashCode(); 448 } 449 450 /** 451 * {@inheritDoc} 452 * @stable ICU 3.8 453 */ 454 @Override toString()455 public String toString() { 456 StringBuilder buf = new StringBuilder(); 457 buf.append("locale=" + locale_); 458 buf.append(", rules='" + pluralRules + "'"); 459 buf.append(", pattern='" + pattern + "'"); 460 buf.append(", format='" + numberFormat + "'"); 461 return buf.toString(); 462 } 463 readObject(ObjectInputStream in)464 private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException { 465 in.defaultReadObject(); 466 pluralRulesWrapper = new PluralSelectorAdapter(); 467 // Ignore the parsedValues from an earlier class version (before ICU 4.8) 468 // and rebuild the msgPattern. 469 parsedValues = null; 470 if (pattern != null) { 471 applyPattern(pattern); 472 } 473 } 474 } 475