1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2010-2016, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * created on: 2010aug21 9 * created by: Markus W. Scherer 10 */ 11 12 package com.ibm.icu.text; 13 14 import java.util.ArrayList; 15 import java.util.Locale; 16 17 import com.ibm.icu.impl.ICUConfig; 18 import com.ibm.icu.impl.PatternProps; 19 import com.ibm.icu.util.Freezable; 20 import com.ibm.icu.util.ICUCloneNotSupportedException; 21 22 //Note: Minimize ICU dependencies, only use a very small part of the ICU core. 23 //In particular, do not depend on *Format classes. 24 25 /** 26 * Parses and represents ICU MessageFormat patterns. 27 * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat. 28 * Used in the implementations of those classes as well as in tools 29 * for message validation, translation and format conversion. 30 * <p> 31 * The parser handles all syntax relevant for identifying message arguments. 32 * This includes "complex" arguments whose style strings contain 33 * nested MessageFormat pattern substrings. 34 * For "simple" arguments (with no nested MessageFormat pattern substrings), 35 * the argument style is not parsed any further. 36 * <p> 37 * The parser handles named and numbered message arguments and allows both in one message. 38 * <p> 39 * Once a pattern has been parsed successfully, iterate through the parsed data 40 * with countParts(), getPart() and related methods. 41 * <p> 42 * The data logically represents a parse tree, but is stored and accessed 43 * as a list of "parts" for fast and simple parsing and to minimize object allocations. 44 * Arguments and nested messages are best handled via recursion. 45 * For every _START "part", {@link #getLimitPartIndex(int)} efficiently returns 46 * the index of the corresponding _LIMIT "part". 47 * <p> 48 * List of "parts": 49 * <pre> 50 * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT 51 * argument = noneArg | simpleArg | complexArg 52 * complexArg = choiceArg | pluralArg | selectArg 53 * 54 * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE 55 * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE 56 * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE 57 * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL 58 * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT 59 * 60 * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+ 61 * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+ 62 * selectStyle = (ARG_SELECTOR message)+ 63 * </pre> 64 * <ul> 65 * <li>Literal output text is not represented directly by "parts" but accessed 66 * between parts of a message, from one part's getLimit() to the next part's getIndex(). 67 * <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE. 68 * <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or 69 * the less-than-or-equal-to sign (U+2264). 70 * <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value. 71 * The optional numeric Part between each (ARG_SELECTOR, message) pair 72 * is the value of an explicit-number selector like "=2", 73 * otherwise the selector is a non-numeric identifier. 74 * <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle. 75 * </ul> 76 * <p> 77 * This class is not intended for public subclassing. 78 * 79 * @stable ICU 4.8 80 * @author Markus Scherer 81 */ 82 public final class MessagePattern implements Cloneable, Freezable<MessagePattern> { 83 /** 84 * Mode for when an apostrophe starts quoted literal text for MessageFormat output. 85 * The default is DOUBLE_OPTIONAL unless overridden via ICUConfig 86 * (/com/ibm/icu/ICUConfig.properties). 87 * <p> 88 * A pair of adjacent apostrophes always results in a single apostrophe in the output, 89 * even when the pair is between two single, text-quoting apostrophes. 90 * <p> 91 * The following table shows examples of desired MessageFormat.format() output 92 * with the pattern strings that yield that output. 93 * 94 * <table> 95 * <tr> 96 * <th>Desired output</th> 97 * <th>DOUBLE_OPTIONAL</th> 98 * <th>DOUBLE_REQUIRED</th> 99 * </tr> 100 * <tr> 101 * <td>I see {many}</td> 102 * <td>I see '{many}'</td> 103 * <td>(same)</td> 104 * </tr> 105 * <tr> 106 * <td>I said {'Wow!'}</td> 107 * <td>I said '{''Wow!''}'</td> 108 * <td>(same)</td> 109 * </tr> 110 * <tr> 111 * <td>I don't know</td> 112 * <td>I don't know OR<br> I don''t know</td> 113 * <td>I don''t know</td> 114 * </tr> 115 * </table> 116 * @stable ICU 4.8 117 */ 118 public enum ApostropheMode { 119 /** 120 * A literal apostrophe is represented by 121 * either a single or a double apostrophe pattern character. 122 * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text 123 * if it immediately precedes a curly brace {}, 124 * or a pipe symbol | if inside a choice format, 125 * or a pound symbol # if inside a plural format. 126 * <p> 127 * This is the default behavior starting with ICU 4.8. 128 * @stable ICU 4.8 129 */ 130 DOUBLE_OPTIONAL, 131 /** 132 * A literal apostrophe must be represented by 133 * a double apostrophe pattern character. 134 * A single apostrophe always starts quoted literal text. 135 * <p> 136 * This is the behavior of ICU 4.6 and earlier, and of {@link java.text.MessageFormat}. 137 * @stable ICU 4.8 138 */ 139 DOUBLE_REQUIRED 140 } 141 142 /** 143 * Constructs an empty MessagePattern with default ApostropheMode. 144 * @stable ICU 4.8 145 */ MessagePattern()146 public MessagePattern() { 147 aposMode=defaultAposMode; 148 } 149 150 /** 151 * Constructs an empty MessagePattern. 152 * @param mode Explicit ApostropheMode. 153 * @stable ICU 4.8 154 */ MessagePattern(ApostropheMode mode)155 public MessagePattern(ApostropheMode mode) { 156 aposMode=mode; 157 } 158 159 /** 160 * Constructs a MessagePattern with default ApostropheMode and 161 * parses the MessageFormat pattern string. 162 * @param pattern a MessageFormat pattern string 163 * @throws IllegalArgumentException for syntax errors in the pattern string 164 * @throws IndexOutOfBoundsException if certain limits are exceeded 165 * (e.g., argument number too high, argument name too long, etc.) 166 * @throws NumberFormatException if a number could not be parsed 167 * @stable ICU 4.8 168 */ MessagePattern(String pattern)169 public MessagePattern(String pattern) { 170 aposMode=defaultAposMode; 171 parse(pattern); 172 } 173 174 /** 175 * Parses a MessageFormat pattern string. 176 * @param pattern a MessageFormat pattern string 177 * @return this 178 * @throws IllegalArgumentException for syntax errors in the pattern string 179 * @throws IndexOutOfBoundsException if certain limits are exceeded 180 * (e.g., argument number too high, argument name too long, etc.) 181 * @throws NumberFormatException if a number could not be parsed 182 * @stable ICU 4.8 183 */ parse(String pattern)184 public MessagePattern parse(String pattern) { 185 preParse(pattern); 186 parseMessage(0, 0, 0, ArgType.NONE); 187 postParse(); 188 return this; 189 } 190 191 /** 192 * Parses a ChoiceFormat pattern string. 193 * @param pattern a ChoiceFormat pattern string 194 * @return this 195 * @throws IllegalArgumentException for syntax errors in the pattern string 196 * @throws IndexOutOfBoundsException if certain limits are exceeded 197 * (e.g., argument number too high, argument name too long, etc.) 198 * @throws NumberFormatException if a number could not be parsed 199 * @stable ICU 4.8 200 */ parseChoiceStyle(String pattern)201 public MessagePattern parseChoiceStyle(String pattern) { 202 preParse(pattern); 203 parseChoiceStyle(0, 0); 204 postParse(); 205 return this; 206 } 207 208 /** 209 * Parses a PluralFormat pattern string. 210 * @param pattern a PluralFormat pattern string 211 * @return this 212 * @throws IllegalArgumentException for syntax errors in the pattern string 213 * @throws IndexOutOfBoundsException if certain limits are exceeded 214 * (e.g., argument number too high, argument name too long, etc.) 215 * @throws NumberFormatException if a number could not be parsed 216 * @stable ICU 4.8 217 */ parsePluralStyle(String pattern)218 public MessagePattern parsePluralStyle(String pattern) { 219 preParse(pattern); 220 parsePluralOrSelectStyle(ArgType.PLURAL, 0, 0); 221 postParse(); 222 return this; 223 } 224 225 /** 226 * Parses a SelectFormat pattern string. 227 * @param pattern a SelectFormat pattern string 228 * @return this 229 * @throws IllegalArgumentException for syntax errors in the pattern string 230 * @throws IndexOutOfBoundsException if certain limits are exceeded 231 * (e.g., argument number too high, argument name too long, etc.) 232 * @throws NumberFormatException if a number could not be parsed 233 * @stable ICU 4.8 234 */ parseSelectStyle(String pattern)235 public MessagePattern parseSelectStyle(String pattern) { 236 preParse(pattern); 237 parsePluralOrSelectStyle(ArgType.SELECT, 0, 0); 238 postParse(); 239 return this; 240 } 241 242 /** 243 * Clears this MessagePattern. 244 * countParts() will return 0. 245 * @stable ICU 4.8 246 */ clear()247 public void clear() { 248 // Mostly the same as preParse(). 249 if(isFrozen()) { 250 throw new UnsupportedOperationException( 251 "Attempt to clear() a frozen MessagePattern instance."); 252 } 253 msg=null; 254 hasArgNames=hasArgNumbers=false; 255 needsAutoQuoting=false; 256 parts.clear(); 257 if(numericValues!=null) { 258 numericValues.clear(); 259 } 260 } 261 262 /** 263 * Clears this MessagePattern and sets the ApostropheMode. 264 * countParts() will return 0. 265 * @param mode The new ApostropheMode. 266 * @stable ICU 4.8 267 */ clearPatternAndSetApostropheMode(ApostropheMode mode)268 public void clearPatternAndSetApostropheMode(ApostropheMode mode) { 269 clear(); 270 aposMode=mode; 271 } 272 273 /** 274 * @param other another object to compare with. 275 * @return true if this object is equivalent to the other one. 276 * @stable ICU 4.8 277 */ 278 @Override equals(Object other)279 public boolean equals(Object other) { 280 if(this==other) { 281 return true; 282 } 283 if(other==null || getClass()!=other.getClass()) { 284 return false; 285 } 286 MessagePattern o=(MessagePattern)other; 287 return 288 aposMode.equals(o.aposMode) && 289 (msg==null ? o.msg==null : msg.equals(o.msg)) && 290 parts.equals(o.parts); 291 // No need to compare numericValues if msg and parts are the same. 292 } 293 294 /** 295 * {@inheritDoc} 296 * @stable ICU 4.8 297 */ 298 @Override hashCode()299 public int hashCode() { 300 return (aposMode.hashCode()*37+(msg!=null ? msg.hashCode() : 0))*37+parts.hashCode(); 301 } 302 303 /** 304 * @return this instance's ApostropheMode. 305 * @stable ICU 4.8 306 */ getApostropheMode()307 public ApostropheMode getApostropheMode() { 308 return aposMode; 309 } 310 311 /** 312 * @return true if getApostropheMode() == ApostropheMode.DOUBLE_REQUIRED 313 * @internal 314 */ jdkAposMode()315 /* package */ boolean jdkAposMode() { 316 return aposMode == ApostropheMode.DOUBLE_REQUIRED; 317 } 318 319 /** 320 * @return the parsed pattern string (null if none was parsed). 321 * @stable ICU 4.8 322 */ getPatternString()323 public String getPatternString() { 324 return msg; 325 } 326 327 /** 328 * Does the parsed pattern have named arguments like {first_name}? 329 * @return true if the parsed pattern has at least one named argument. 330 * @stable ICU 4.8 331 */ hasNamedArguments()332 public boolean hasNamedArguments() { 333 return hasArgNames; 334 } 335 336 /** 337 * Does the parsed pattern have numbered arguments like {2}? 338 * @return true if the parsed pattern has at least one numbered argument. 339 * @stable ICU 4.8 340 */ hasNumberedArguments()341 public boolean hasNumberedArguments() { 342 return hasArgNumbers; 343 } 344 345 /** 346 * {@inheritDoc} 347 * @stable ICU 4.8 348 */ 349 @Override toString()350 public String toString() { 351 return msg; 352 } 353 354 /** 355 * Validates and parses an argument name or argument number string. 356 * An argument name must be a "pattern identifier", that is, it must contain 357 * no Unicode Pattern_Syntax or Pattern_White_Space characters. 358 * If it only contains ASCII digits, then it must be a small integer with no leading zero. 359 * @param name Input string. 360 * @return >=0 if the name is a valid number, 361 * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits, 362 * ARG_NAME_NOT_VALID (-2) if it is neither. 363 * @stable ICU 4.8 364 */ validateArgumentName(String name)365 public static int validateArgumentName(String name) { 366 if(!PatternProps.isIdentifier(name)) { 367 return ARG_NAME_NOT_VALID; 368 } 369 return parseArgNumber(name, 0, name.length()); 370 } 371 372 /** 373 * Return value from {@link #validateArgumentName(String)} for when 374 * the string is a valid "pattern identifier" but not a number. 375 * @stable ICU 4.8 376 */ 377 public static final int ARG_NAME_NOT_NUMBER=-1; 378 379 /** 380 * Return value from {@link #validateArgumentName(String)} for when 381 * the string is invalid. 382 * It might not be a valid "pattern identifier", 383 * or it have only ASCII digits but there is a leading zero or the number is too large. 384 * @stable ICU 4.8 385 */ 386 public static final int ARG_NAME_NOT_VALID=-2; 387 388 /** 389 * Returns a version of the parsed pattern string where each ASCII apostrophe 390 * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax. 391 * <p> 392 * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}." 393 * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}." 394 * @return the deep-auto-quoted version of the parsed pattern string. 395 * @see MessageFormat#autoQuoteApostrophe(String) 396 * @stable ICU 4.8 397 */ autoQuoteApostropheDeep()398 public String autoQuoteApostropheDeep() { 399 if(!needsAutoQuoting) { 400 return msg; 401 } 402 StringBuilder modified=null; 403 // Iterate backward so that the insertion indexes do not change. 404 int count=countParts(); 405 for(int i=count; i>0;) { 406 Part part; 407 if((part=getPart(--i)).getType()==Part.Type.INSERT_CHAR) { 408 if(modified==null) { 409 modified=new StringBuilder(msg.length()+10).append(msg); 410 } 411 modified.insert(part.index, (char)part.value); 412 } 413 } 414 if(modified==null) { 415 return msg; 416 } else { 417 return modified.toString(); 418 } 419 } 420 421 /** 422 * Returns the number of "parts" created by parsing the pattern string. 423 * Returns 0 if no pattern has been parsed or clear() was called. 424 * @return the number of pattern parts. 425 * @stable ICU 4.8 426 */ countParts()427 public int countParts() { 428 return parts.size(); 429 } 430 431 /** 432 * Gets the i-th pattern "part". 433 * @param i The index of the Part data. (0..countParts()-1) 434 * @return the i-th pattern "part". 435 * @throws IndexOutOfBoundsException if i is outside the (0..countParts()-1) range 436 * @stable ICU 4.8 437 */ getPart(int i)438 public Part getPart(int i) { 439 return parts.get(i); 440 } 441 442 /** 443 * Returns the Part.Type of the i-th pattern "part". 444 * Convenience method for getPart(i).getType(). 445 * @param i The index of the Part data. (0..countParts()-1) 446 * @return The Part.Type of the i-th Part. 447 * @throws IndexOutOfBoundsException if i is outside the (0..countParts()-1) range 448 * @stable ICU 4.8 449 */ getPartType(int i)450 public Part.Type getPartType(int i) { 451 return parts.get(i).type; 452 } 453 454 /** 455 * Returns the pattern index of the specified pattern "part". 456 * Convenience method for getPart(partIndex).getIndex(). 457 * @param partIndex The index of the Part data. (0..countParts()-1) 458 * @return The pattern index of this Part. 459 * @throws IndexOutOfBoundsException if partIndex is outside the (0..countParts()-1) range 460 * @stable ICU 4.8 461 */ getPatternIndex(int partIndex)462 public int getPatternIndex(int partIndex) { 463 return parts.get(partIndex).index; 464 } 465 466 /** 467 * Returns the substring of the pattern string indicated by the Part. 468 * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()). 469 * @param part a part of this MessagePattern. 470 * @return the substring associated with part. 471 * @stable ICU 4.8 472 */ getSubstring(Part part)473 public String getSubstring(Part part) { 474 int index=part.index; 475 return msg.substring(index, index+part.length); 476 } 477 478 /** 479 * Compares the part's substring with the input string s. 480 * @param part a part of this MessagePattern. 481 * @param s a string. 482 * @return true if getSubstring(part).equals(s). 483 * @stable ICU 4.8 484 */ partSubstringMatches(Part part, String s)485 public boolean partSubstringMatches(Part part, String s) { 486 return part.length == s.length() && msg.regionMatches(part.index, s, 0, part.length); 487 } 488 489 /** 490 * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE. 491 * @param part a part of this MessagePattern. 492 * @return the part's numeric value, or NO_NUMERIC_VALUE if this is not a numeric part. 493 * @stable ICU 4.8 494 */ getNumericValue(Part part)495 public double getNumericValue(Part part) { 496 Part.Type type=part.type; 497 if(type==Part.Type.ARG_INT) { 498 return part.value; 499 } else if(type==Part.Type.ARG_DOUBLE) { 500 return numericValues.get(part.value); 501 } else { 502 return NO_NUMERIC_VALUE; 503 } 504 } 505 506 /** 507 * Special value that is returned by getNumericValue(Part) when no 508 * numeric value is defined for a part. 509 * @see #getNumericValue 510 * @stable ICU 4.8 511 */ 512 public static final double NO_NUMERIC_VALUE=-123456789; 513 514 /** 515 * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified. 516 * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1) 517 * @return the "offset:" value. 518 * @throws IndexOutOfBoundsException if pluralStart is outside the (0..countParts()-1) range 519 * @stable ICU 4.8 520 */ getPluralOffset(int pluralStart)521 public double getPluralOffset(int pluralStart) { 522 Part part=parts.get(pluralStart); 523 if(part.type.hasNumericValue()) { 524 return getNumericValue(part); 525 } else { 526 return 0; 527 } 528 } 529 530 /** 531 * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start. 532 * @param start The index of some Part data (0..countParts()-1); 533 * this Part should be of Type ARG_START or MSG_START. 534 * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level, 535 * or start itself if getPartType(msgStart)!=ARG|MSG_START. 536 * @throws IndexOutOfBoundsException if start is outside the (0..countParts()-1) range 537 * @stable ICU 4.8 538 */ getLimitPartIndex(int start)539 public int getLimitPartIndex(int start) { 540 int limit=parts.get(start).limitPartIndex; 541 if(limit<start) { 542 return start; 543 } 544 return limit; 545 } 546 547 /** 548 * A message pattern "part", representing a pattern parsing event. 549 * There is a part for the start and end of a message or argument, 550 * for quoting and escaping of and with ASCII apostrophes, 551 * and for syntax elements of "complex" arguments. 552 * @stable ICU 4.8 553 */ 554 public static final class Part { Part(Type t, int i, int l, int v)555 private Part(Type t, int i, int l, int v) { 556 type=t; 557 index=i; 558 length=(char)l; 559 value=(short)v; 560 } 561 562 /** 563 * Returns the type of this part. 564 * @return the part type. 565 * @stable ICU 4.8 566 */ getType()567 public Type getType() { 568 return type; 569 } 570 571 /** 572 * Returns the pattern string index associated with this Part. 573 * @return this part's pattern string index. 574 * @stable ICU 4.8 575 */ getIndex()576 public int getIndex() { 577 return index; 578 } 579 580 /** 581 * Returns the length of the pattern substring associated with this Part. 582 * This is 0 for some parts. 583 * @return this part's pattern substring length. 584 * @stable ICU 4.8 585 */ getLength()586 public int getLength() { 587 return length; 588 } 589 590 /** 591 * Returns the pattern string limit (exclusive-end) index associated with this Part. 592 * Convenience method for getIndex()+getLength(). 593 * @return this part's pattern string limit index, same as getIndex()+getLength(). 594 * @stable ICU 4.8 595 */ getLimit()596 public int getLimit() { 597 return index+length; 598 } 599 600 /** 601 * Returns a value associated with this part. 602 * See the documentation of each part type for details. 603 * @return the part value. 604 * @stable ICU 4.8 605 */ getValue()606 public int getValue() { 607 return value; 608 } 609 610 /** 611 * Returns the argument type if this part is of type ARG_START or ARG_LIMIT, 612 * otherwise ArgType.NONE. 613 * @return the argument type for this part. 614 * @stable ICU 4.8 615 */ getArgType()616 public ArgType getArgType() { 617 Type type=getType(); 618 if(type==Type.ARG_START || type==Type.ARG_LIMIT) { 619 return argTypes[value]; 620 } else { 621 return ArgType.NONE; 622 } 623 } 624 625 /** 626 * Part type constants. 627 * @stable ICU 4.8 628 */ 629 public enum Type { 630 /** 631 * Start of a message pattern (main or nested). 632 * The length is 0 for the top-level message 633 * and for a choice argument sub-message, otherwise 1 for the '{'. 634 * The value indicates the nesting level, starting with 0 for the main message. 635 * <p> 636 * There is always a later MSG_LIMIT part. 637 * @stable ICU 4.8 638 */ 639 MSG_START, 640 /** 641 * End of a message pattern (main or nested). 642 * The length is 0 for the top-level message and 643 * the last sub-message of a choice argument, 644 * otherwise 1 for the '}' or (in a choice argument style) the '|'. 645 * The value indicates the nesting level, starting with 0 for the main message. 646 * @stable ICU 4.8 647 */ 648 MSG_LIMIT, 649 /** 650 * Indicates a substring of the pattern string which is to be skipped when formatting. 651 * For example, an apostrophe that begins or ends quoted text 652 * would be indicated with such a part. 653 * The value is undefined and currently always 0. 654 * @stable ICU 4.8 655 */ 656 SKIP_SYNTAX, 657 /** 658 * Indicates that a syntax character needs to be inserted for auto-quoting. 659 * The length is 0. 660 * The value is the character code of the insertion character. (U+0027=APOSTROPHE) 661 * @stable ICU 4.8 662 */ 663 INSERT_CHAR, 664 /** 665 * Indicates a syntactic (non-escaped) # symbol in a plural variant. 666 * When formatting, replace this part's substring with the 667 * (value-offset) for the plural argument value. 668 * The value is undefined and currently always 0. 669 * @stable ICU 4.8 670 */ 671 REPLACE_NUMBER, 672 /** 673 * Start of an argument. 674 * The length is 1 for the '{'. 675 * The value is the ordinal value of the ArgType. Use getArgType(). 676 * <p> 677 * This part is followed by either an ARG_NUMBER or ARG_NAME, 678 * followed by optional argument sub-parts (see ArgType constants) 679 * and finally an ARG_LIMIT part. 680 * @stable ICU 4.8 681 */ 682 ARG_START, 683 /** 684 * End of an argument. 685 * The length is 1 for the '}'. 686 * The value is the ordinal value of the ArgType. Use getArgType(). 687 * @stable ICU 4.8 688 */ 689 ARG_LIMIT, 690 /** 691 * The argument number, provided by the value. 692 * @stable ICU 4.8 693 */ 694 ARG_NUMBER, 695 /** 696 * The argument name. 697 * The value is undefined and currently always 0. 698 * @stable ICU 4.8 699 */ 700 ARG_NAME, 701 /** 702 * The argument type. 703 * The value is undefined and currently always 0. 704 * @stable ICU 4.8 705 */ 706 ARG_TYPE, 707 /** 708 * The argument style text. 709 * The value is undefined and currently always 0. 710 * @stable ICU 4.8 711 */ 712 ARG_STYLE, 713 /** 714 * A selector substring in a "complex" argument style. 715 * The value is undefined and currently always 0. 716 * @stable ICU 4.8 717 */ 718 ARG_SELECTOR, 719 /** 720 * An integer value, for example the offset or an explicit selector value 721 * in a PluralFormat style. 722 * The part value is the integer value. 723 * @stable ICU 4.8 724 */ 725 ARG_INT, 726 /** 727 * A numeric value, for example the offset or an explicit selector value 728 * in a PluralFormat style. 729 * The part value is an index into an internal array of numeric values; 730 * use getNumericValue(). 731 * @stable ICU 4.8 732 */ 733 ARG_DOUBLE; 734 735 /** 736 * Indicates whether this part has a numeric value. 737 * If so, then that numeric value can be retrieved via {@link MessagePattern#getNumericValue(Part)}. 738 * @return true if this part has a numeric value. 739 * @stable ICU 4.8 740 */ hasNumericValue()741 public boolean hasNumericValue() { 742 return this==ARG_INT || this==ARG_DOUBLE; 743 } 744 } 745 746 /** 747 * @return a string representation of this part. 748 * @stable ICU 4.8 749 */ 750 @Override toString()751 public String toString() { 752 String valueString=(type==Type.ARG_START || type==Type.ARG_LIMIT) ? 753 getArgType().name() : Integer.toString(value); 754 return type.name()+"("+valueString+")@"+index; 755 } 756 757 /** 758 * @param other another object to compare with. 759 * @return true if this object is equivalent to the other one. 760 * @stable ICU 4.8 761 */ 762 @Override equals(Object other)763 public boolean equals(Object other) { 764 if(this==other) { 765 return true; 766 } 767 if(other==null || getClass()!=other.getClass()) { 768 return false; 769 } 770 Part o=(Part)other; 771 return 772 type.equals(o.type) && 773 index==o.index && 774 length==o.length && 775 value==o.value && 776 limitPartIndex==o.limitPartIndex; 777 } 778 779 /** 780 * {@inheritDoc} 781 * @stable ICU 4.8 782 */ 783 @Override hashCode()784 public int hashCode() { 785 return ((type.hashCode()*37+index)*37+length)*37+value; 786 } 787 788 private static final int MAX_LENGTH=0xffff; 789 private static final int MAX_VALUE=Short.MAX_VALUE; 790 791 // Some fields are not final because they are modified during pattern parsing. 792 // After pattern parsing, the parts are effectively immutable. 793 private final Type type; 794 private final int index; 795 private final char length; 796 private short value; 797 private int limitPartIndex; 798 } 799 800 /** 801 * Argument type constants. 802 * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts. 803 * 804 * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT, 805 * with a nesting level one greater than the surrounding message. 806 * @stable ICU 4.8 807 */ 808 public enum ArgType { 809 /** 810 * The argument has no specified type. 811 * @stable ICU 4.8 812 */ 813 NONE, 814 /** 815 * The argument has a "simple" type which is provided by the ARG_TYPE part. 816 * An ARG_STYLE part might follow that. 817 * @stable ICU 4.8 818 */ 819 SIMPLE, 820 /** 821 * The argument is a ChoiceFormat with one or more 822 * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples. 823 * @stable ICU 4.8 824 */ 825 CHOICE, 826 /** 827 * The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset 828 * (e.g., offset:1) 829 * and one or more (ARG_SELECTOR [explicit-value] message) tuples. 830 * If the selector has an explicit value (e.g., =2), then 831 * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message. 832 * Otherwise the message immediately follows the ARG_SELECTOR. 833 * @stable ICU 4.8 834 */ 835 PLURAL, 836 /** 837 * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs. 838 * @stable ICU 4.8 839 */ 840 SELECT, 841 /** 842 * The argument is an ordinal-number PluralFormat 843 * with the same style parts sequence and semantics as {@link ArgType#PLURAL}. 844 * @stable ICU 50 845 */ 846 SELECTORDINAL; 847 848 /** 849 * @return true if the argument type has a plural style part sequence and semantics, 850 * for example {@link ArgType#PLURAL} and {@link ArgType#SELECTORDINAL}. 851 * @stable ICU 50 852 */ hasPluralStyle()853 public boolean hasPluralStyle() { 854 return this == PLURAL || this == SELECTORDINAL; 855 } 856 } 857 858 /** 859 * Creates and returns a copy of this object. 860 * @return a copy of this object (or itself if frozen). 861 * @stable ICU 4.8 862 */ 863 @Override clone()864 public Object clone() { 865 if(isFrozen()) { 866 return this; 867 } else { 868 return cloneAsThawed(); 869 } 870 } 871 872 /** 873 * Creates and returns an unfrozen copy of this object. 874 * @return a copy of this object. 875 * @stable ICU 4.8 876 */ 877 @Override 878 @SuppressWarnings("unchecked") cloneAsThawed()879 public MessagePattern cloneAsThawed() { 880 MessagePattern newMsg; 881 try { 882 newMsg=(MessagePattern)super.clone(); 883 } catch (CloneNotSupportedException e) { 884 throw new ICUCloneNotSupportedException(e); 885 } 886 newMsg.parts=(ArrayList<Part>)parts.clone(); 887 if(numericValues!=null) { 888 newMsg.numericValues=(ArrayList<Double>)numericValues.clone(); 889 } 890 newMsg.frozen=false; 891 return newMsg; 892 } 893 894 /** 895 * Freezes this object, making it immutable and thread-safe. 896 * @return this 897 * @stable ICU 4.8 898 */ 899 @Override freeze()900 public MessagePattern freeze() { 901 frozen=true; 902 return this; 903 } 904 905 /** 906 * Determines whether this object is frozen (immutable) or not. 907 * @return true if this object is frozen. 908 * @stable ICU 4.8 909 */ 910 @Override isFrozen()911 public boolean isFrozen() { 912 return frozen; 913 } 914 preParse(String pattern)915 private void preParse(String pattern) { 916 if(isFrozen()) { 917 throw new UnsupportedOperationException( 918 "Attempt to parse("+prefix(pattern)+") on frozen MessagePattern instance."); 919 } 920 msg=pattern; 921 hasArgNames=hasArgNumbers=false; 922 needsAutoQuoting=false; 923 parts.clear(); 924 if(numericValues!=null) { 925 numericValues.clear(); 926 } 927 } 928 postParse()929 private void postParse() { 930 // Nothing to be done currently. 931 } 932 parseMessage(int index, int msgStartLength, int nestingLevel, ArgType parentType)933 private int parseMessage(int index, int msgStartLength, int nestingLevel, ArgType parentType) { 934 if(nestingLevel>Part.MAX_VALUE) { 935 throw new IndexOutOfBoundsException(); 936 } 937 int msgStart=parts.size(); 938 addPart(Part.Type.MSG_START, index, msgStartLength, nestingLevel); 939 index+=msgStartLength; 940 while(index<msg.length()) { 941 char c=msg.charAt(index++); 942 if(c=='\'') { 943 if(index==msg.length()) { 944 // The apostrophe is the last character in the pattern. 945 // Add a Part for auto-quoting. 946 addPart(Part.Type.INSERT_CHAR, index, 0, '\''); // value=char to be inserted 947 needsAutoQuoting=true; 948 } else { 949 c=msg.charAt(index); 950 if(c=='\'') { 951 // double apostrophe, skip the second one 952 addPart(Part.Type.SKIP_SYNTAX, index++, 1, 0); 953 } else if( 954 aposMode==ApostropheMode.DOUBLE_REQUIRED || 955 c=='{' || c=='}' || 956 (parentType==ArgType.CHOICE && c=='|') || 957 (parentType.hasPluralStyle() && c=='#') 958 ) { 959 // skip the quote-starting apostrophe 960 addPart(Part.Type.SKIP_SYNTAX, index-1, 1, 0); 961 // find the end of the quoted literal text 962 for(;;) { 963 index=msg.indexOf('\'', index+1); 964 if(index>=0) { 965 if((index+1)<msg.length() && msg.charAt(index+1)=='\'') { 966 // double apostrophe inside quoted literal text 967 // still encodes a single apostrophe, skip the second one 968 addPart(Part.Type.SKIP_SYNTAX, ++index, 1, 0); 969 } else { 970 // skip the quote-ending apostrophe 971 addPart(Part.Type.SKIP_SYNTAX, index++, 1, 0); 972 break; 973 } 974 } else { 975 // The quoted text reaches to the end of the of the message. 976 index=msg.length(); 977 // Add a Part for auto-quoting. 978 addPart(Part.Type.INSERT_CHAR, index, 0, '\''); // value=char to be inserted 979 needsAutoQuoting=true; 980 break; 981 } 982 } 983 } else { 984 // Interpret the apostrophe as literal text. 985 // Add a Part for auto-quoting. 986 addPart(Part.Type.INSERT_CHAR, index, 0, '\''); // value=char to be inserted 987 needsAutoQuoting=true; 988 } 989 } 990 } else if(parentType.hasPluralStyle() && c=='#') { 991 // The unquoted # in a plural message fragment will be replaced 992 // with the (number-offset). 993 addPart(Part.Type.REPLACE_NUMBER, index-1, 1, 0); 994 } else if(c=='{') { 995 index=parseArg(index-1, 1, nestingLevel); 996 } else if((nestingLevel>0 && c=='}') || (parentType==ArgType.CHOICE && c=='|')) { 997 // Finish the message before the terminator. 998 // In a choice style, report the "}" substring only for the following ARG_LIMIT, 999 // not for this MSG_LIMIT. 1000 int limitLength=(parentType==ArgType.CHOICE && c=='}') ? 0 : 1; 1001 addLimitPart(msgStart, Part.Type.MSG_LIMIT, index-1, limitLength, nestingLevel); 1002 if(parentType==ArgType.CHOICE) { 1003 // Let the choice style parser see the '}' or '|'. 1004 return index-1; 1005 } else { 1006 // continue parsing after the '}' 1007 return index; 1008 } 1009 } // else: c is part of literal text 1010 } 1011 if(nestingLevel>0 && !inTopLevelChoiceMessage(nestingLevel, parentType)) { 1012 throw new IllegalArgumentException( 1013 "Unmatched '{' braces in message "+prefix()); 1014 } 1015 addLimitPart(msgStart, Part.Type.MSG_LIMIT, index, 0, nestingLevel); 1016 return index; 1017 } 1018 parseArg(int index, int argStartLength, int nestingLevel)1019 private int parseArg(int index, int argStartLength, int nestingLevel) { 1020 int argStart=parts.size(); 1021 ArgType argType=ArgType.NONE; 1022 addPart(Part.Type.ARG_START, index, argStartLength, argType.ordinal()); 1023 int nameIndex=index=skipWhiteSpace(index+argStartLength); 1024 if(index==msg.length()) { 1025 throw new IllegalArgumentException( 1026 "Unmatched '{' braces in message "+prefix()); 1027 } 1028 // parse argument name or number 1029 index=skipIdentifier(index); 1030 int number=parseArgNumber(nameIndex, index); 1031 if(number>=0) { 1032 int length=index-nameIndex; 1033 if(length>Part.MAX_LENGTH || number>Part.MAX_VALUE) { 1034 throw new IndexOutOfBoundsException( 1035 "Argument number too large: "+prefix(nameIndex)); 1036 } 1037 hasArgNumbers=true; 1038 addPart(Part.Type.ARG_NUMBER, nameIndex, length, number); 1039 } else if(number==ARG_NAME_NOT_NUMBER) { 1040 int length=index-nameIndex; 1041 if(length>Part.MAX_LENGTH) { 1042 throw new IndexOutOfBoundsException( 1043 "Argument name too long: "+prefix(nameIndex)); 1044 } 1045 hasArgNames=true; 1046 addPart(Part.Type.ARG_NAME, nameIndex, length, 0); 1047 } else { // number<-1 (ARG_NAME_NOT_VALID) 1048 throw new IllegalArgumentException("Bad argument syntax: "+prefix(nameIndex)); 1049 } 1050 index=skipWhiteSpace(index); 1051 if(index==msg.length()) { 1052 throw new IllegalArgumentException( 1053 "Unmatched '{' braces in message "+prefix()); 1054 } 1055 char c=msg.charAt(index); 1056 if(c=='}') { 1057 // all done 1058 } else if(c!=',') { 1059 throw new IllegalArgumentException("Bad argument syntax: "+prefix(nameIndex)); 1060 } else /* ',' */ { 1061 // parse argument type: case-sensitive a-zA-Z 1062 int typeIndex=index=skipWhiteSpace(index+1); 1063 while(index<msg.length() && isArgTypeChar(msg.charAt(index))) { 1064 ++index; 1065 } 1066 int length=index-typeIndex; 1067 index=skipWhiteSpace(index); 1068 if(index==msg.length()) { 1069 throw new IllegalArgumentException( 1070 "Unmatched '{' braces in message "+prefix()); 1071 } 1072 if(length==0 || ((c=msg.charAt(index))!=',' && c!='}')) { 1073 throw new IllegalArgumentException("Bad argument syntax: "+prefix(nameIndex)); 1074 } 1075 if(length>Part.MAX_LENGTH) { 1076 throw new IndexOutOfBoundsException( 1077 "Argument type name too long: "+prefix(nameIndex)); 1078 } 1079 argType=ArgType.SIMPLE; 1080 if(length==6) { 1081 // case-insensitive comparisons for complex-type names 1082 if(isChoice(typeIndex)) { 1083 argType=ArgType.CHOICE; 1084 } else if(isPlural(typeIndex)) { 1085 argType=ArgType.PLURAL; 1086 } else if(isSelect(typeIndex)) { 1087 argType=ArgType.SELECT; 1088 } 1089 } else if(length==13) { 1090 if(isSelect(typeIndex) && isOrdinal(typeIndex+6)) { 1091 argType=ArgType.SELECTORDINAL; 1092 } 1093 } 1094 // change the ARG_START type from NONE to argType 1095 parts.get(argStart).value=(short)argType.ordinal(); 1096 if(argType==ArgType.SIMPLE) { 1097 addPart(Part.Type.ARG_TYPE, typeIndex, length, 0); 1098 } 1099 // look for an argument style (pattern) 1100 if(c=='}') { 1101 if(argType!=ArgType.SIMPLE) { 1102 throw new IllegalArgumentException( 1103 "No style field for complex argument: "+prefix(nameIndex)); 1104 } 1105 } else /* ',' */ { 1106 ++index; 1107 if(argType==ArgType.SIMPLE) { 1108 index=parseSimpleStyle(index); 1109 } else if(argType==ArgType.CHOICE) { 1110 index=parseChoiceStyle(index, nestingLevel); 1111 } else { 1112 index=parsePluralOrSelectStyle(argType, index, nestingLevel); 1113 } 1114 } 1115 } 1116 // Argument parsing stopped on the '}'. 1117 addLimitPart(argStart, Part.Type.ARG_LIMIT, index, 1, argType.ordinal()); 1118 return index+1; 1119 } 1120 parseSimpleStyle(int index)1121 private int parseSimpleStyle(int index) { 1122 int start=index; 1123 int nestedBraces=0; 1124 while(index<msg.length()) { 1125 char c=msg.charAt(index++); 1126 if(c=='\'') { 1127 // Treat apostrophe as quoting but include it in the style part. 1128 // Find the end of the quoted literal text. 1129 index=msg.indexOf('\'', index); 1130 if(index<0) { 1131 throw new IllegalArgumentException( 1132 "Quoted literal argument style text reaches to the end of the message: "+ 1133 prefix(start)); 1134 } 1135 // skip the quote-ending apostrophe 1136 ++index; 1137 } else if(c=='{') { 1138 ++nestedBraces; 1139 } else if(c=='}') { 1140 if(nestedBraces>0) { 1141 --nestedBraces; 1142 } else { 1143 int length=--index-start; 1144 if(length>Part.MAX_LENGTH) { 1145 throw new IndexOutOfBoundsException( 1146 "Argument style text too long: "+prefix(start)); 1147 } 1148 addPart(Part.Type.ARG_STYLE, start, length, 0); 1149 return index; 1150 } 1151 } // c is part of literal text 1152 } 1153 throw new IllegalArgumentException( 1154 "Unmatched '{' braces in message "+prefix()); 1155 } 1156 parseChoiceStyle(int index, int nestingLevel)1157 private int parseChoiceStyle(int index, int nestingLevel) { 1158 int start=index; 1159 index=skipWhiteSpace(index); 1160 if(index==msg.length() || msg.charAt(index)=='}') { 1161 throw new IllegalArgumentException( 1162 "Missing choice argument pattern in "+prefix()); 1163 } 1164 for(;;) { 1165 // The choice argument style contains |-separated (number, separator, message) triples. 1166 // Parse the number. 1167 int numberIndex=index; 1168 index=skipDouble(index); 1169 int length=index-numberIndex; 1170 if(length==0) { 1171 throw new IllegalArgumentException("Bad choice pattern syntax: "+prefix(start)); 1172 } 1173 if(length>Part.MAX_LENGTH) { 1174 throw new IndexOutOfBoundsException( 1175 "Choice number too long: "+prefix(numberIndex)); 1176 } 1177 parseDouble(numberIndex, index, true); // adds ARG_INT or ARG_DOUBLE 1178 // Parse the separator. 1179 index=skipWhiteSpace(index); 1180 if(index==msg.length()) { 1181 throw new IllegalArgumentException("Bad choice pattern syntax: "+prefix(start)); 1182 } 1183 char c=msg.charAt(index); 1184 if(!(c=='#' || c=='<' || c=='\u2264')) { // U+2264 is <= 1185 throw new IllegalArgumentException( 1186 "Expected choice separator (#<\u2264) instead of '"+c+ 1187 "' in choice pattern "+prefix(start)); 1188 } 1189 addPart(Part.Type.ARG_SELECTOR, index, 1, 0); 1190 // Parse the message fragment. 1191 index=parseMessage(++index, 0, nestingLevel+1, ArgType.CHOICE); 1192 // parseMessage(..., CHOICE) returns the index of the terminator, or msg.length(). 1193 if(index==msg.length()) { 1194 return index; 1195 } 1196 if(msg.charAt(index)=='}') { 1197 if(!inMessageFormatPattern(nestingLevel)) { 1198 throw new IllegalArgumentException( 1199 "Bad choice pattern syntax: "+prefix(start)); 1200 } 1201 return index; 1202 } // else the terminator is '|' 1203 index=skipWhiteSpace(index+1); 1204 } 1205 } 1206 parsePluralOrSelectStyle(ArgType argType, int index, int nestingLevel)1207 private int parsePluralOrSelectStyle(ArgType argType, int index, int nestingLevel) { 1208 int start=index; 1209 boolean isEmpty=true; 1210 boolean hasOther=false; 1211 for(;;) { 1212 // First, collect the selector looking for a small set of terminators. 1213 // It would be a little faster to consider the syntax of each possible 1214 // token right here, but that makes the code too complicated. 1215 index=skipWhiteSpace(index); 1216 boolean eos=index==msg.length(); 1217 if(eos || msg.charAt(index)=='}') { 1218 if(eos==inMessageFormatPattern(nestingLevel)) { 1219 throw new IllegalArgumentException( 1220 "Bad "+ 1221 argType.toString().toLowerCase(Locale.ENGLISH)+ 1222 " pattern syntax: "+prefix(start)); 1223 } 1224 if(!hasOther) { 1225 throw new IllegalArgumentException( 1226 "Missing 'other' keyword in "+ 1227 argType.toString().toLowerCase(Locale.ENGLISH)+ 1228 " pattern in "+prefix()); 1229 } 1230 return index; 1231 } 1232 int selectorIndex=index; 1233 if(argType.hasPluralStyle() && msg.charAt(selectorIndex)=='=') { 1234 // explicit-value plural selector: =double 1235 index=skipDouble(index+1); 1236 int length=index-selectorIndex; 1237 if(length==1) { 1238 throw new IllegalArgumentException( 1239 "Bad "+ 1240 argType.toString().toLowerCase(Locale.ENGLISH)+ 1241 " pattern syntax: "+prefix(start)); 1242 } 1243 if(length>Part.MAX_LENGTH) { 1244 throw new IndexOutOfBoundsException( 1245 "Argument selector too long: "+prefix(selectorIndex)); 1246 } 1247 addPart(Part.Type.ARG_SELECTOR, selectorIndex, length, 0); 1248 parseDouble(selectorIndex+1, index, false); // adds ARG_INT or ARG_DOUBLE 1249 } else { 1250 index=skipIdentifier(index); 1251 int length=index-selectorIndex; 1252 if(length==0) { 1253 throw new IllegalArgumentException( 1254 "Bad "+ 1255 argType.toString().toLowerCase(Locale.ENGLISH)+ 1256 " pattern syntax: "+prefix(start)); 1257 } 1258 // Note: The ':' in "offset:" is just beyond the skipIdentifier() range. 1259 if( argType.hasPluralStyle() && length==6 && index<msg.length() && 1260 msg.regionMatches(selectorIndex, "offset:", 0, 7) 1261 ) { 1262 // plural offset, not a selector 1263 if(!isEmpty) { 1264 throw new IllegalArgumentException( 1265 "Plural argument 'offset:' (if present) must precede key-message pairs: "+ 1266 prefix(start)); 1267 } 1268 // allow whitespace between offset: and its value 1269 int valueIndex=skipWhiteSpace(index+1); // The ':' is at index. 1270 index=skipDouble(valueIndex); 1271 if(index==valueIndex) { 1272 throw new IllegalArgumentException( 1273 "Missing value for plural 'offset:' "+prefix(start)); 1274 } 1275 if((index-valueIndex)>Part.MAX_LENGTH) { 1276 throw new IndexOutOfBoundsException( 1277 "Plural offset value too long: "+prefix(valueIndex)); 1278 } 1279 parseDouble(valueIndex, index, false); // adds ARG_INT or ARG_DOUBLE 1280 isEmpty=false; 1281 continue; // no message fragment after the offset 1282 } else { 1283 // normal selector word 1284 if(length>Part.MAX_LENGTH) { 1285 throw new IndexOutOfBoundsException( 1286 "Argument selector too long: "+prefix(selectorIndex)); 1287 } 1288 addPart(Part.Type.ARG_SELECTOR, selectorIndex, length, 0); 1289 if(msg.regionMatches(selectorIndex, "other", 0, length)) { 1290 hasOther=true; 1291 } 1292 } 1293 } 1294 1295 // parse the message fragment following the selector 1296 index=skipWhiteSpace(index); 1297 if(index==msg.length() || msg.charAt(index)!='{') { 1298 throw new IllegalArgumentException( 1299 "No message fragment after "+ 1300 argType.toString().toLowerCase(Locale.ENGLISH)+ 1301 " selector: "+prefix(selectorIndex)); 1302 } 1303 index=parseMessage(index, 1, nestingLevel+1, argType); 1304 isEmpty=false; 1305 } 1306 } 1307 1308 /** 1309 * Validates and parses an argument name or argument number string. 1310 * This internal method assumes that the input substring is a "pattern identifier". 1311 * @return >=0 if the name is a valid number, 1312 * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits, 1313 * ARG_NAME_NOT_VALID (-2) if it is neither. 1314 * @see #validateArgumentName(String) 1315 */ parseArgNumber(CharSequence s, int start, int limit)1316 private static int parseArgNumber(CharSequence s, int start, int limit) { 1317 // If the identifier contains only ASCII digits, then it is an argument _number_ 1318 // and must not have leading zeros (except "0" itself). 1319 // Otherwise it is an argument _name_. 1320 if(start>=limit) { 1321 return ARG_NAME_NOT_VALID; 1322 } 1323 int number; 1324 // Defer numeric errors until we know there are only digits. 1325 boolean badNumber; 1326 char c=s.charAt(start++); 1327 if(c=='0') { 1328 if(start==limit) { 1329 return 0; 1330 } else { 1331 number=0; 1332 badNumber=true; // leading zero 1333 } 1334 } else if('1'<=c && c<='9') { 1335 number=c-'0'; 1336 badNumber=false; 1337 } else { 1338 return ARG_NAME_NOT_NUMBER; 1339 } 1340 while(start<limit) { 1341 c=s.charAt(start++); 1342 if('0'<=c && c<='9') { 1343 if(number>=Integer.MAX_VALUE/10) { 1344 badNumber=true; // overflow 1345 } 1346 number=number*10+(c-'0'); 1347 } else { 1348 return ARG_NAME_NOT_NUMBER; 1349 } 1350 } 1351 // There are only ASCII digits. 1352 if(badNumber) { 1353 return ARG_NAME_NOT_VALID; 1354 } else { 1355 return number; 1356 } 1357 } 1358 parseArgNumber(int start, int limit)1359 private int parseArgNumber(int start, int limit) { 1360 return parseArgNumber(msg, start, limit); 1361 } 1362 1363 /** 1364 * Parses a number from the specified message substring. 1365 * @param start start index into the message string 1366 * @param limit limit index into the message string, must be start<limit 1367 * @param allowInfinity true if U+221E is allowed (for ChoiceFormat) 1368 */ parseDouble(int start, int limit, boolean allowInfinity)1369 private void parseDouble(int start, int limit, boolean allowInfinity) { 1370 assert start<limit; 1371 // fake loop for easy exit and single throw statement 1372 for(;;) { 1373 // fast path for small integers and infinity 1374 int value=0; 1375 int isNegative=0; // not boolean so that we can easily add it to value 1376 int index=start; 1377 char c=msg.charAt(index++); 1378 if(c=='-') { 1379 isNegative=1; 1380 if(index==limit) { 1381 break; // no number 1382 } 1383 c=msg.charAt(index++); 1384 } else if(c=='+') { 1385 if(index==limit) { 1386 break; // no number 1387 } 1388 c=msg.charAt(index++); 1389 } 1390 if(c==0x221e) { // infinity 1391 if(allowInfinity && index==limit) { 1392 addArgDoublePart( 1393 isNegative!=0 ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY, 1394 start, limit-start); 1395 return; 1396 } else { 1397 break; 1398 } 1399 } 1400 // try to parse the number as a small integer but fall back to a double 1401 while('0'<=c && c<='9') { 1402 value=value*10+(c-'0'); 1403 if(value>(Part.MAX_VALUE+isNegative)) { 1404 break; // not a small-enough integer 1405 } 1406 if(index==limit) { 1407 addPart(Part.Type.ARG_INT, start, limit-start, isNegative!=0 ? -value : value); 1408 return; 1409 } 1410 c=msg.charAt(index++); 1411 } 1412 // Let Double.parseDouble() throw a NumberFormatException. 1413 double numericValue=Double.parseDouble(msg.substring(start, limit)); 1414 addArgDoublePart(numericValue, start, limit-start); 1415 return; 1416 } 1417 throw new NumberFormatException( 1418 "Bad syntax for numeric value: "+msg.substring(start, limit)); 1419 } 1420 1421 /** 1422 * Appends the s[start, limit[ substring to sb, but with only half of the apostrophes 1423 * according to JDK pattern behavior. 1424 * @internal 1425 */ 1426 /* package */ static void appendReducedApostrophes(String s, int start, int limit, 1427 StringBuilder sb) { 1428 int doubleApos=-1; 1429 for(;;) { 1430 int i=s.indexOf('\'', start); 1431 if(i<0 || i>=limit) { 1432 sb.append(s, start, limit); 1433 break; 1434 } 1435 if(i==doubleApos) { 1436 // Double apostrophe at start-1 and start==i, append one. 1437 sb.append('\''); 1438 ++start; 1439 doubleApos=-1; 1440 } else { 1441 // Append text between apostrophes and skip this one. 1442 sb.append(s, start, i); 1443 doubleApos=start=i+1; 1444 } 1445 } 1446 } 1447 1448 private int skipWhiteSpace(int index) { 1449 return PatternProps.skipWhiteSpace(msg, index); 1450 } 1451 1452 private int skipIdentifier(int index) { 1453 return PatternProps.skipIdentifier(msg, index); 1454 } 1455 1456 /** 1457 * Skips a sequence of characters that could occur in a double value. 1458 * Does not fully parse or validate the value. 1459 */ 1460 private int skipDouble(int index) { 1461 while(index<msg.length()) { 1462 char c=msg.charAt(index); 1463 // U+221E: Allow the infinity symbol, for ChoiceFormat patterns. 1464 if((c<'0' && "+-.".indexOf(c)<0) || (c>'9' && c!='e' && c!='E' && c!=0x221e)) { 1465 break; 1466 } 1467 ++index; 1468 } 1469 return index; 1470 } 1471 1472 private static boolean isArgTypeChar(int c) { 1473 return ('a'<=c && c<='z') || ('A'<=c && c<='Z'); 1474 } 1475 1476 private boolean isChoice(int index) { 1477 char c; 1478 return 1479 ((c=msg.charAt(index++))=='c' || c=='C') && 1480 ((c=msg.charAt(index++))=='h' || c=='H') && 1481 ((c=msg.charAt(index++))=='o' || c=='O') && 1482 ((c=msg.charAt(index++))=='i' || c=='I') && 1483 ((c=msg.charAt(index++))=='c' || c=='C') && 1484 ((c=msg.charAt(index))=='e' || c=='E'); 1485 } 1486 1487 private boolean isPlural(int index) { 1488 char c; 1489 return 1490 ((c=msg.charAt(index++))=='p' || c=='P') && 1491 ((c=msg.charAt(index++))=='l' || c=='L') && 1492 ((c=msg.charAt(index++))=='u' || c=='U') && 1493 ((c=msg.charAt(index++))=='r' || c=='R') && 1494 ((c=msg.charAt(index++))=='a' || c=='A') && 1495 ((c=msg.charAt(index))=='l' || c=='L'); 1496 } 1497 1498 private boolean isSelect(int index) { 1499 char c; 1500 return 1501 ((c=msg.charAt(index++))=='s' || c=='S') && 1502 ((c=msg.charAt(index++))=='e' || c=='E') && 1503 ((c=msg.charAt(index++))=='l' || c=='L') && 1504 ((c=msg.charAt(index++))=='e' || c=='E') && 1505 ((c=msg.charAt(index++))=='c' || c=='C') && 1506 ((c=msg.charAt(index))=='t' || c=='T'); 1507 } 1508 1509 private boolean isOrdinal(int index) { 1510 char c; 1511 return 1512 ((c=msg.charAt(index++))=='o' || c=='O') && 1513 ((c=msg.charAt(index++))=='r' || c=='R') && 1514 ((c=msg.charAt(index++))=='d' || c=='D') && 1515 ((c=msg.charAt(index++))=='i' || c=='I') && 1516 ((c=msg.charAt(index++))=='n' || c=='N') && 1517 ((c=msg.charAt(index++))=='a' || c=='A') && 1518 ((c=msg.charAt(index))=='l' || c=='L'); 1519 } 1520 1521 /** 1522 * @return true if we are inside a MessageFormat (sub-)pattern, 1523 * as opposed to inside a top-level choice/plural/select pattern. 1524 */ 1525 private boolean inMessageFormatPattern(int nestingLevel) { 1526 return nestingLevel>0 || parts.get(0).type==Part.Type.MSG_START; 1527 } 1528 1529 /** 1530 * @return true if we are in a MessageFormat sub-pattern 1531 * of a top-level ChoiceFormat pattern. 1532 */ 1533 private boolean inTopLevelChoiceMessage(int nestingLevel, ArgType parentType) { 1534 return 1535 nestingLevel==1 && 1536 parentType==ArgType.CHOICE && 1537 parts.get(0).type!=Part.Type.MSG_START; 1538 } 1539 1540 private void addPart(Part.Type type, int index, int length, int value) { 1541 parts.add(new Part(type, index, length, value)); 1542 } 1543 1544 private void addLimitPart(int start, Part.Type type, int index, int length, int value) { 1545 parts.get(start).limitPartIndex=parts.size(); 1546 addPart(type, index, length, value); 1547 } 1548 1549 private void addArgDoublePart(double numericValue, int start, int length) { 1550 int numericIndex; 1551 if(numericValues==null) { 1552 numericValues=new ArrayList<Double>(); 1553 numericIndex=0; 1554 } else { 1555 numericIndex=numericValues.size(); 1556 if(numericIndex>Part.MAX_VALUE) { 1557 throw new IndexOutOfBoundsException("Too many numeric values"); 1558 } 1559 } 1560 numericValues.add(numericValue); 1561 addPart(Part.Type.ARG_DOUBLE, start, length, numericIndex); 1562 } 1563 1564 private static final int MAX_PREFIX_LENGTH=24; 1565 1566 /** 1567 * Returns a prefix of s.substring(start). Used for Exception messages. 1568 * @param s 1569 * @param start start index in s 1570 * @return s.substring(start) or a prefix of that 1571 */ 1572 private static String prefix(String s, int start) { 1573 StringBuilder prefix=new StringBuilder(MAX_PREFIX_LENGTH+20); 1574 if(start==0) { 1575 prefix.append("\""); 1576 } else { 1577 prefix.append("[at pattern index ").append(start).append("] \""); 1578 } 1579 int substringLength=s.length()-start; 1580 if(substringLength<=MAX_PREFIX_LENGTH) { 1581 prefix.append(start==0 ? s : s.substring(start)); 1582 } else { 1583 int limit=start+MAX_PREFIX_LENGTH-4; 1584 if(Character.isHighSurrogate(s.charAt(limit-1))) { 1585 // remove lead surrogate from the end of the prefix 1586 --limit; 1587 } 1588 prefix.append(s, start, limit).append(" ..."); 1589 } 1590 return prefix.append("\"").toString(); 1591 } 1592 1593 private static String prefix(String s) { 1594 return prefix(s, 0); 1595 } 1596 1597 private String prefix(int start) { 1598 return prefix(msg, start); 1599 } 1600 1601 private String prefix() { 1602 return prefix(msg, 0); 1603 } 1604 1605 private ApostropheMode aposMode; 1606 private String msg; 1607 private ArrayList<Part> parts=new ArrayList<Part>(); 1608 private ArrayList<Double> numericValues; 1609 private boolean hasArgNames; 1610 private boolean hasArgNumbers; 1611 private boolean needsAutoQuoting; 1612 private volatile boolean frozen; 1613 1614 private static final ApostropheMode defaultAposMode= 1615 ApostropheMode.valueOf( 1616 ICUConfig.get("com.ibm.icu.text.MessagePattern.ApostropheMode", "DOUBLE_OPTIONAL")); 1617 1618 private static final ArgType[] argTypes=ArgType.values(); 1619 } 1620