1 /* 2 ******************************************************************************* 3 * Copyright (C) 2010-2016, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * created on: 2010aug21 7 * created by: Markus W. Scherer 8 */ 9 10 package com.ibm.icu.text; 11 12 import java.util.ArrayList; 13 import java.util.Locale; 14 15 import com.ibm.icu.impl.ICUConfig; 16 import com.ibm.icu.impl.PatternProps; 17 import com.ibm.icu.util.Freezable; 18 import com.ibm.icu.util.ICUCloneNotSupportedException; 19 20 //Note: Minimize ICU dependencies, only use a very small part of the ICU core. 21 //In particular, do not depend on *Format classes. 22 23 /** 24 * Parses and represents ICU MessageFormat patterns. 25 * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat. 26 * Used in the implementations of those classes as well as in tools 27 * for message validation, translation and format conversion. 28 * <p> 29 * The parser handles all syntax relevant for identifying message arguments. 30 * This includes "complex" arguments whose style strings contain 31 * nested MessageFormat pattern substrings. 32 * For "simple" arguments (with no nested MessageFormat pattern substrings), 33 * the argument style is not parsed any further. 34 * <p> 35 * The parser handles named and numbered message arguments and allows both in one message. 36 * <p> 37 * Once a pattern has been parsed successfully, iterate through the parsed data 38 * with countParts(), getPart() and related methods. 39 * <p> 40 * The data logically represents a parse tree, but is stored and accessed 41 * as a list of "parts" for fast and simple parsing and to minimize object allocations. 42 * Arguments and nested messages are best handled via recursion. 43 * For every _START "part", {@link #getLimitPartIndex(int)} efficiently returns 44 * the index of the corresponding _LIMIT "part". 45 * <p> 46 * List of "parts": 47 * <pre> 48 * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT 49 * argument = noneArg | simpleArg | complexArg 50 * complexArg = choiceArg | pluralArg | selectArg 51 * 52 * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE 53 * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE 54 * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE 55 * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL 56 * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT 57 * 58 * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+ 59 * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+ 60 * selectStyle = (ARG_SELECTOR message)+ 61 * </pre> 62 * <ul> 63 * <li>Literal output text is not represented directly by "parts" but accessed 64 * between parts of a message, from one part's getLimit() to the next part's getIndex(). 65 * <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE. 66 * <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or 67 * the less-than-or-equal-to sign (U+2264). 68 * <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value. 69 * The optional numeric Part between each (ARG_SELECTOR, message) pair 70 * is the value of an explicit-number selector like "=2", 71 * otherwise the selector is a non-numeric identifier. 72 * <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle. 73 * </ul> 74 * <p> 75 * This class is not intended for public subclassing. 76 * 77 * @stable ICU 4.8 78 * @author Markus Scherer 79 */ 80 public final class MessagePattern implements Cloneable, Freezable<MessagePattern> { 81 /** 82 * Mode for when an apostrophe starts quoted literal text for MessageFormat output. 83 * The default is DOUBLE_OPTIONAL unless overridden via ICUConfig 84 * (/com/ibm/icu/ICUConfig.properties). 85 * <p> 86 * A pair of adjacent apostrophes always results in a single apostrophe in the output, 87 * even when the pair is between two single, text-quoting apostrophes. 88 * <p> 89 * The following table shows examples of desired MessageFormat.format() output 90 * with the pattern strings that yield that output. 91 * 92 * <table> 93 * <tr> 94 * <th>Desired output</th> 95 * <th>DOUBLE_OPTIONAL</th> 96 * <th>DOUBLE_REQUIRED</th> 97 * </tr> 98 * <tr> 99 * <td>I see {many}</td> 100 * <td>I see '{many}'</td> 101 * <td>(same)</td> 102 * </tr> 103 * <tr> 104 * <td>I said {'Wow!'}</td> 105 * <td>I said '{''Wow!''}'</td> 106 * <td>(same)</td> 107 * </tr> 108 * <tr> 109 * <td>I don't know</td> 110 * <td>I don't know OR<br> I don''t know</td> 111 * <td>I don''t know</td> 112 * </tr> 113 * </table> 114 * @stable ICU 4.8 115 */ 116 public enum ApostropheMode { 117 /** 118 * A literal apostrophe is represented by 119 * either a single or a double apostrophe pattern character. 120 * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text 121 * if it immediately precedes a curly brace {}, 122 * or a pipe symbol | if inside a choice format, 123 * or a pound symbol # if inside a plural format. 124 * <p> 125 * This is the default behavior starting with ICU 4.8. 126 * @stable ICU 4.8 127 */ 128 DOUBLE_OPTIONAL, 129 /** 130 * A literal apostrophe must be represented by 131 * a double apostrophe pattern character. 132 * A single apostrophe always starts quoted literal text. 133 * <p> 134 * This is the behavior of ICU 4.6 and earlier, and of {@link java.text.MessageFormat}. 135 * @stable ICU 4.8 136 */ 137 DOUBLE_REQUIRED 138 } 139 140 /** 141 * Constructs an empty MessagePattern with default ApostropheMode. 142 * @stable ICU 4.8 143 */ MessagePattern()144 public MessagePattern() { 145 aposMode=defaultAposMode; 146 } 147 148 /** 149 * Constructs an empty MessagePattern. 150 * @param mode Explicit ApostropheMode. 151 * @stable ICU 4.8 152 */ MessagePattern(ApostropheMode mode)153 public MessagePattern(ApostropheMode mode) { 154 aposMode=mode; 155 } 156 157 /** 158 * Constructs a MessagePattern with default ApostropheMode and 159 * parses the MessageFormat pattern string. 160 * @param pattern a MessageFormat pattern string 161 * @throws IllegalArgumentException for syntax errors in the pattern string 162 * @throws IndexOutOfBoundsException if certain limits are exceeded 163 * (e.g., argument number too high, argument name too long, etc.) 164 * @throws NumberFormatException if a number could not be parsed 165 * @stable ICU 4.8 166 */ MessagePattern(String pattern)167 public MessagePattern(String pattern) { 168 aposMode=defaultAposMode; 169 parse(pattern); 170 } 171 172 /** 173 * Parses a MessageFormat pattern string. 174 * @param pattern a MessageFormat pattern string 175 * @return this 176 * @throws IllegalArgumentException for syntax errors in the pattern string 177 * @throws IndexOutOfBoundsException if certain limits are exceeded 178 * (e.g., argument number too high, argument name too long, etc.) 179 * @throws NumberFormatException if a number could not be parsed 180 * @stable ICU 4.8 181 */ parse(String pattern)182 public MessagePattern parse(String pattern) { 183 preParse(pattern); 184 parseMessage(0, 0, 0, ArgType.NONE); 185 postParse(); 186 return this; 187 } 188 189 /** 190 * Parses a ChoiceFormat pattern string. 191 * @param pattern a ChoiceFormat pattern string 192 * @return this 193 * @throws IllegalArgumentException for syntax errors in the pattern string 194 * @throws IndexOutOfBoundsException if certain limits are exceeded 195 * (e.g., argument number too high, argument name too long, etc.) 196 * @throws NumberFormatException if a number could not be parsed 197 * @stable ICU 4.8 198 */ parseChoiceStyle(String pattern)199 public MessagePattern parseChoiceStyle(String pattern) { 200 preParse(pattern); 201 parseChoiceStyle(0, 0); 202 postParse(); 203 return this; 204 } 205 206 /** 207 * Parses a PluralFormat pattern string. 208 * @param pattern a PluralFormat pattern string 209 * @return this 210 * @throws IllegalArgumentException for syntax errors in the pattern string 211 * @throws IndexOutOfBoundsException if certain limits are exceeded 212 * (e.g., argument number too high, argument name too long, etc.) 213 * @throws NumberFormatException if a number could not be parsed 214 * @stable ICU 4.8 215 */ parsePluralStyle(String pattern)216 public MessagePattern parsePluralStyle(String pattern) { 217 preParse(pattern); 218 parsePluralOrSelectStyle(ArgType.PLURAL, 0, 0); 219 postParse(); 220 return this; 221 } 222 223 /** 224 * Parses a SelectFormat pattern string. 225 * @param pattern a SelectFormat pattern string 226 * @return this 227 * @throws IllegalArgumentException for syntax errors in the pattern string 228 * @throws IndexOutOfBoundsException if certain limits are exceeded 229 * (e.g., argument number too high, argument name too long, etc.) 230 * @throws NumberFormatException if a number could not be parsed 231 * @stable ICU 4.8 232 */ parseSelectStyle(String pattern)233 public MessagePattern parseSelectStyle(String pattern) { 234 preParse(pattern); 235 parsePluralOrSelectStyle(ArgType.SELECT, 0, 0); 236 postParse(); 237 return this; 238 } 239 240 /** 241 * Clears this MessagePattern. 242 * countParts() will return 0. 243 * @stable ICU 4.8 244 */ clear()245 public void clear() { 246 // Mostly the same as preParse(). 247 if(isFrozen()) { 248 throw new UnsupportedOperationException( 249 "Attempt to clear() a frozen MessagePattern instance."); 250 } 251 msg=null; 252 hasArgNames=hasArgNumbers=false; 253 needsAutoQuoting=false; 254 parts.clear(); 255 if(numericValues!=null) { 256 numericValues.clear(); 257 } 258 } 259 260 /** 261 * Clears this MessagePattern and sets the ApostropheMode. 262 * countParts() will return 0. 263 * @param mode The new ApostropheMode. 264 * @stable ICU 4.8 265 */ clearPatternAndSetApostropheMode(ApostropheMode mode)266 public void clearPatternAndSetApostropheMode(ApostropheMode mode) { 267 clear(); 268 aposMode=mode; 269 } 270 271 /** 272 * @param other another object to compare with. 273 * @return true if this object is equivalent to the other one. 274 * @stable ICU 4.8 275 */ 276 @Override equals(Object other)277 public boolean equals(Object other) { 278 if(this==other) { 279 return true; 280 } 281 if(other==null || getClass()!=other.getClass()) { 282 return false; 283 } 284 MessagePattern o=(MessagePattern)other; 285 return 286 aposMode.equals(o.aposMode) && 287 (msg==null ? o.msg==null : msg.equals(o.msg)) && 288 parts.equals(o.parts); 289 // No need to compare numericValues if msg and parts are the same. 290 } 291 292 /** 293 * {@inheritDoc} 294 * @stable ICU 4.8 295 */ 296 @Override hashCode()297 public int hashCode() { 298 return (aposMode.hashCode()*37+(msg!=null ? msg.hashCode() : 0))*37+parts.hashCode(); 299 } 300 301 /** 302 * @return this instance's ApostropheMode. 303 * @stable ICU 4.8 304 */ getApostropheMode()305 public ApostropheMode getApostropheMode() { 306 return aposMode; 307 } 308 309 /** 310 * @return true if getApostropheMode() == ApostropheMode.DOUBLE_REQUIRED 311 * @internal 312 */ jdkAposMode()313 /* package */ boolean jdkAposMode() { 314 return aposMode == ApostropheMode.DOUBLE_REQUIRED; 315 } 316 317 /** 318 * @return the parsed pattern string (null if none was parsed). 319 * @stable ICU 4.8 320 */ getPatternString()321 public String getPatternString() { 322 return msg; 323 } 324 325 /** 326 * Does the parsed pattern have named arguments like {first_name}? 327 * @return true if the parsed pattern has at least one named argument. 328 * @stable ICU 4.8 329 */ hasNamedArguments()330 public boolean hasNamedArguments() { 331 return hasArgNames; 332 } 333 334 /** 335 * Does the parsed pattern have numbered arguments like {2}? 336 * @return true if the parsed pattern has at least one numbered argument. 337 * @stable ICU 4.8 338 */ hasNumberedArguments()339 public boolean hasNumberedArguments() { 340 return hasArgNumbers; 341 } 342 343 /** 344 * {@inheritDoc} 345 * @stable ICU 4.8 346 */ 347 @Override toString()348 public String toString() { 349 return msg; 350 } 351 352 /** 353 * Validates and parses an argument name or argument number string. 354 * An argument name must be a "pattern identifier", that is, it must contain 355 * no Unicode Pattern_Syntax or Pattern_White_Space characters. 356 * If it only contains ASCII digits, then it must be a small integer with no leading zero. 357 * @param name Input string. 358 * @return >=0 if the name is a valid number, 359 * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits, 360 * ARG_NAME_NOT_VALID (-2) if it is neither. 361 * @stable ICU 4.8 362 */ validateArgumentName(String name)363 public static int validateArgumentName(String name) { 364 if(!PatternProps.isIdentifier(name)) { 365 return ARG_NAME_NOT_VALID; 366 } 367 return parseArgNumber(name, 0, name.length()); 368 } 369 370 /** 371 * Return value from {@link #validateArgumentName(String)} for when 372 * the string is a valid "pattern identifier" but not a number. 373 * @stable ICU 4.8 374 */ 375 public static final int ARG_NAME_NOT_NUMBER=-1; 376 377 /** 378 * Return value from {@link #validateArgumentName(String)} for when 379 * the string is invalid. 380 * It might not be a valid "pattern identifier", 381 * or it have only ASCII digits but there is a leading zero or the number is too large. 382 * @stable ICU 4.8 383 */ 384 public static final int ARG_NAME_NOT_VALID=-2; 385 386 /** 387 * Returns a version of the parsed pattern string where each ASCII apostrophe 388 * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax. 389 * <p> 390 * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}." 391 * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}." 392 * @return the deep-auto-quoted version of the parsed pattern string. 393 * @see MessageFormat#autoQuoteApostrophe(String) 394 * @stable ICU 4.8 395 */ autoQuoteApostropheDeep()396 public String autoQuoteApostropheDeep() { 397 if(!needsAutoQuoting) { 398 return msg; 399 } 400 StringBuilder modified=null; 401 // Iterate backward so that the insertion indexes do not change. 402 int count=countParts(); 403 for(int i=count; i>0;) { 404 Part part; 405 if((part=getPart(--i)).getType()==Part.Type.INSERT_CHAR) { 406 if(modified==null) { 407 modified=new StringBuilder(msg.length()+10).append(msg); 408 } 409 modified.insert(part.index, (char)part.value); 410 } 411 } 412 if(modified==null) { 413 return msg; 414 } else { 415 return modified.toString(); 416 } 417 } 418 419 /** 420 * Returns the number of "parts" created by parsing the pattern string. 421 * Returns 0 if no pattern has been parsed or clear() was called. 422 * @return the number of pattern parts. 423 * @stable ICU 4.8 424 */ countParts()425 public int countParts() { 426 return parts.size(); 427 } 428 429 /** 430 * Gets the i-th pattern "part". 431 * @param i The index of the Part data. (0..countParts()-1) 432 * @return the i-th pattern "part". 433 * @throws IndexOutOfBoundsException if i is outside the (0..countParts()-1) range 434 * @stable ICU 4.8 435 */ getPart(int i)436 public Part getPart(int i) { 437 return parts.get(i); 438 } 439 440 /** 441 * Returns the Part.Type of the i-th pattern "part". 442 * Convenience method for getPart(i).getType(). 443 * @param i The index of the Part data. (0..countParts()-1) 444 * @return The Part.Type of the i-th Part. 445 * @throws IndexOutOfBoundsException if i is outside the (0..countParts()-1) range 446 * @stable ICU 4.8 447 */ getPartType(int i)448 public Part.Type getPartType(int i) { 449 return parts.get(i).type; 450 } 451 452 /** 453 * Returns the pattern index of the specified pattern "part". 454 * Convenience method for getPart(partIndex).getIndex(). 455 * @param partIndex The index of the Part data. (0..countParts()-1) 456 * @return The pattern index of this Part. 457 * @throws IndexOutOfBoundsException if partIndex is outside the (0..countParts()-1) range 458 * @stable ICU 4.8 459 */ getPatternIndex(int partIndex)460 public int getPatternIndex(int partIndex) { 461 return parts.get(partIndex).index; 462 } 463 464 /** 465 * Returns the substring of the pattern string indicated by the Part. 466 * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()). 467 * @param part a part of this MessagePattern. 468 * @return the substring associated with part. 469 * @stable ICU 4.8 470 */ getSubstring(Part part)471 public String getSubstring(Part part) { 472 int index=part.index; 473 return msg.substring(index, index+part.length); 474 } 475 476 /** 477 * Compares the part's substring with the input string s. 478 * @param part a part of this MessagePattern. 479 * @param s a string. 480 * @return true if getSubstring(part).equals(s). 481 * @stable ICU 4.8 482 */ partSubstringMatches(Part part, String s)483 public boolean partSubstringMatches(Part part, String s) { 484 return msg.regionMatches(part.index, s, 0, part.length); 485 } 486 487 /** 488 * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE. 489 * @param part a part of this MessagePattern. 490 * @return the part's numeric value, or NO_NUMERIC_VALUE if this is not a numeric part. 491 * @stable ICU 4.8 492 */ getNumericValue(Part part)493 public double getNumericValue(Part part) { 494 Part.Type type=part.type; 495 if(type==Part.Type.ARG_INT) { 496 return part.value; 497 } else if(type==Part.Type.ARG_DOUBLE) { 498 return numericValues.get(part.value); 499 } else { 500 return NO_NUMERIC_VALUE; 501 } 502 } 503 504 /** 505 * Special value that is returned by getNumericValue(Part) when no 506 * numeric value is defined for a part. 507 * @see #getNumericValue 508 * @stable ICU 4.8 509 */ 510 public static final double NO_NUMERIC_VALUE=-123456789; 511 512 /** 513 * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified. 514 * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1) 515 * @return the "offset:" value. 516 * @throws IndexOutOfBoundsException if pluralStart is outside the (0..countParts()-1) range 517 * @stable ICU 4.8 518 */ getPluralOffset(int pluralStart)519 public double getPluralOffset(int pluralStart) { 520 Part part=parts.get(pluralStart); 521 if(part.type.hasNumericValue()) { 522 return getNumericValue(part); 523 } else { 524 return 0; 525 } 526 } 527 528 /** 529 * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start. 530 * @param start The index of some Part data (0..countParts()-1); 531 * this Part should be of Type ARG_START or MSG_START. 532 * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level, 533 * or start itself if getPartType(msgStart)!=ARG|MSG_START. 534 * @throws IndexOutOfBoundsException if start is outside the (0..countParts()-1) range 535 * @stable ICU 4.8 536 */ getLimitPartIndex(int start)537 public int getLimitPartIndex(int start) { 538 int limit=parts.get(start).limitPartIndex; 539 if(limit<start) { 540 return start; 541 } 542 return limit; 543 } 544 545 /** 546 * A message pattern "part", representing a pattern parsing event. 547 * There is a part for the start and end of a message or argument, 548 * for quoting and escaping of and with ASCII apostrophes, 549 * and for syntax elements of "complex" arguments. 550 * @stable ICU 4.8 551 */ 552 public static final class Part { Part(Type t, int i, int l, int v)553 private Part(Type t, int i, int l, int v) { 554 type=t; 555 index=i; 556 length=(char)l; 557 value=(short)v; 558 } 559 560 /** 561 * Returns the type of this part. 562 * @return the part type. 563 * @stable ICU 4.8 564 */ getType()565 public Type getType() { 566 return type; 567 } 568 569 /** 570 * Returns the pattern string index associated with this Part. 571 * @return this part's pattern string index. 572 * @stable ICU 4.8 573 */ getIndex()574 public int getIndex() { 575 return index; 576 } 577 578 /** 579 * Returns the length of the pattern substring associated with this Part. 580 * This is 0 for some parts. 581 * @return this part's pattern substring length. 582 * @stable ICU 4.8 583 */ getLength()584 public int getLength() { 585 return length; 586 } 587 588 /** 589 * Returns the pattern string limit (exclusive-end) index associated with this Part. 590 * Convenience method for getIndex()+getLength(). 591 * @return this part's pattern string limit index, same as getIndex()+getLength(). 592 * @stable ICU 4.8 593 */ getLimit()594 public int getLimit() { 595 return index+length; 596 } 597 598 /** 599 * Returns a value associated with this part. 600 * See the documentation of each part type for details. 601 * @return the part value. 602 * @stable ICU 4.8 603 */ getValue()604 public int getValue() { 605 return value; 606 } 607 608 /** 609 * Returns the argument type if this part is of type ARG_START or ARG_LIMIT, 610 * otherwise ArgType.NONE. 611 * @return the argument type for this part. 612 * @stable ICU 4.8 613 */ getArgType()614 public ArgType getArgType() { 615 Type type=getType(); 616 if(type==Type.ARG_START || type==Type.ARG_LIMIT) { 617 return argTypes[value]; 618 } else { 619 return ArgType.NONE; 620 } 621 } 622 623 /** 624 * Part type constants. 625 * @stable ICU 4.8 626 */ 627 public enum Type { 628 /** 629 * Start of a message pattern (main or nested). 630 * The length is 0 for the top-level message 631 * and for a choice argument sub-message, otherwise 1 for the '{'. 632 * The value indicates the nesting level, starting with 0 for the main message. 633 * <p> 634 * There is always a later MSG_LIMIT part. 635 * @stable ICU 4.8 636 */ 637 MSG_START, 638 /** 639 * End of a message pattern (main or nested). 640 * The length is 0 for the top-level message and 641 * the last sub-message of a choice argument, 642 * otherwise 1 for the '}' or (in a choice argument style) the '|'. 643 * The value indicates the nesting level, starting with 0 for the main message. 644 * @stable ICU 4.8 645 */ 646 MSG_LIMIT, 647 /** 648 * Indicates a substring of the pattern string which is to be skipped when formatting. 649 * For example, an apostrophe that begins or ends quoted text 650 * would be indicated with such a part. 651 * The value is undefined and currently always 0. 652 * @stable ICU 4.8 653 */ 654 SKIP_SYNTAX, 655 /** 656 * Indicates that a syntax character needs to be inserted for auto-quoting. 657 * The length is 0. 658 * The value is the character code of the insertion character. (U+0027=APOSTROPHE) 659 * @stable ICU 4.8 660 */ 661 INSERT_CHAR, 662 /** 663 * Indicates a syntactic (non-escaped) # symbol in a plural variant. 664 * When formatting, replace this part's substring with the 665 * (value-offset) for the plural argument value. 666 * The value is undefined and currently always 0. 667 * @stable ICU 4.8 668 */ 669 REPLACE_NUMBER, 670 /** 671 * Start of an argument. 672 * The length is 1 for the '{'. 673 * The value is the ordinal value of the ArgType. Use getArgType(). 674 * <p> 675 * This part is followed by either an ARG_NUMBER or ARG_NAME, 676 * followed by optional argument sub-parts (see ArgType constants) 677 * and finally an ARG_LIMIT part. 678 * @stable ICU 4.8 679 */ 680 ARG_START, 681 /** 682 * End of an argument. 683 * The length is 1 for the '}'. 684 * The value is the ordinal value of the ArgType. Use getArgType(). 685 * @stable ICU 4.8 686 */ 687 ARG_LIMIT, 688 /** 689 * The argument number, provided by the value. 690 * @stable ICU 4.8 691 */ 692 ARG_NUMBER, 693 /** 694 * The argument name. 695 * The value is undefined and currently always 0. 696 * @stable ICU 4.8 697 */ 698 ARG_NAME, 699 /** 700 * The argument type. 701 * The value is undefined and currently always 0. 702 * @stable ICU 4.8 703 */ 704 ARG_TYPE, 705 /** 706 * The argument style text. 707 * The value is undefined and currently always 0. 708 * @stable ICU 4.8 709 */ 710 ARG_STYLE, 711 /** 712 * A selector substring in a "complex" argument style. 713 * The value is undefined and currently always 0. 714 * @stable ICU 4.8 715 */ 716 ARG_SELECTOR, 717 /** 718 * An integer value, for example the offset or an explicit selector value 719 * in a PluralFormat style. 720 * The part value is the integer value. 721 * @stable ICU 4.8 722 */ 723 ARG_INT, 724 /** 725 * A numeric value, for example the offset or an explicit selector value 726 * in a PluralFormat style. 727 * The part value is an index into an internal array of numeric values; 728 * use getNumericValue(). 729 * @stable ICU 4.8 730 */ 731 ARG_DOUBLE; 732 733 /** 734 * Indicates whether this part has a numeric value. 735 * If so, then that numeric value can be retrieved via {@link MessagePattern#getNumericValue(Part)}. 736 * @return true if this part has a numeric value. 737 * @stable ICU 4.8 738 */ hasNumericValue()739 public boolean hasNumericValue() { 740 return this==ARG_INT || this==ARG_DOUBLE; 741 } 742 } 743 744 /** 745 * @return a string representation of this part. 746 * @stable ICU 4.8 747 */ 748 @Override toString()749 public String toString() { 750 String valueString=(type==Type.ARG_START || type==Type.ARG_LIMIT) ? 751 getArgType().name() : Integer.toString(value); 752 return type.name()+"("+valueString+")@"+index; 753 } 754 755 /** 756 * @param other another object to compare with. 757 * @return true if this object is equivalent to the other one. 758 * @stable ICU 4.8 759 */ 760 @Override equals(Object other)761 public boolean equals(Object other) { 762 if(this==other) { 763 return true; 764 } 765 if(other==null || getClass()!=other.getClass()) { 766 return false; 767 } 768 Part o=(Part)other; 769 return 770 type.equals(o.type) && 771 index==o.index && 772 length==o.length && 773 value==o.value && 774 limitPartIndex==o.limitPartIndex; 775 } 776 777 /** 778 * {@inheritDoc} 779 * @stable ICU 4.8 780 */ 781 @Override hashCode()782 public int hashCode() { 783 return ((type.hashCode()*37+index)*37+length)*37+value; 784 } 785 786 private static final int MAX_LENGTH=0xffff; 787 private static final int MAX_VALUE=Short.MAX_VALUE; 788 789 // Some fields are not final because they are modified during pattern parsing. 790 // After pattern parsing, the parts are effectively immutable. 791 private final Type type; 792 private final int index; 793 private final char length; 794 private short value; 795 private int limitPartIndex; 796 } 797 798 /** 799 * Argument type constants. 800 * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts. 801 * 802 * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT, 803 * with a nesting level one greater than the surrounding message. 804 * @stable ICU 4.8 805 */ 806 public enum ArgType { 807 /** 808 * The argument has no specified type. 809 * @stable ICU 4.8 810 */ 811 NONE, 812 /** 813 * The argument has a "simple" type which is provided by the ARG_TYPE part. 814 * An ARG_STYLE part might follow that. 815 * @stable ICU 4.8 816 */ 817 SIMPLE, 818 /** 819 * The argument is a ChoiceFormat with one or more 820 * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples. 821 * @stable ICU 4.8 822 */ 823 CHOICE, 824 /** 825 * The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset 826 * (e.g., offset:1) 827 * and one or more (ARG_SELECTOR [explicit-value] message) tuples. 828 * If the selector has an explicit value (e.g., =2), then 829 * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message. 830 * Otherwise the message immediately follows the ARG_SELECTOR. 831 * @stable ICU 4.8 832 */ 833 PLURAL, 834 /** 835 * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs. 836 * @stable ICU 4.8 837 */ 838 SELECT, 839 /** 840 * The argument is an ordinal-number PluralFormat 841 * with the same style parts sequence and semantics as {@link ArgType#PLURAL}. 842 * @stable ICU 50 843 */ 844 SELECTORDINAL; 845 846 /** 847 * @return true if the argument type has a plural style part sequence and semantics, 848 * for example {@link ArgType#PLURAL} and {@link ArgType#SELECTORDINAL}. 849 * @stable ICU 50 850 */ hasPluralStyle()851 public boolean hasPluralStyle() { 852 return this == PLURAL || this == SELECTORDINAL; 853 } 854 } 855 856 /** 857 * Creates and returns a copy of this object. 858 * @return a copy of this object (or itself if frozen). 859 * @stable ICU 4.8 860 */ 861 @Override clone()862 public Object clone() { 863 if(isFrozen()) { 864 return this; 865 } else { 866 return cloneAsThawed(); 867 } 868 } 869 870 /** 871 * Creates and returns an unfrozen copy of this object. 872 * @return a copy of this object. 873 * @stable ICU 4.8 874 */ 875 @SuppressWarnings("unchecked") cloneAsThawed()876 public MessagePattern cloneAsThawed() { 877 MessagePattern newMsg; 878 try { 879 newMsg=(MessagePattern)super.clone(); 880 } catch (CloneNotSupportedException e) { 881 throw new ICUCloneNotSupportedException(e); 882 } 883 newMsg.parts=(ArrayList<Part>)parts.clone(); 884 if(numericValues!=null) { 885 newMsg.numericValues=(ArrayList<Double>)numericValues.clone(); 886 } 887 newMsg.frozen=false; 888 return newMsg; 889 } 890 891 /** 892 * Freezes this object, making it immutable and thread-safe. 893 * @return this 894 * @stable ICU 4.8 895 */ freeze()896 public MessagePattern freeze() { 897 frozen=true; 898 return this; 899 } 900 901 /** 902 * Determines whether this object is frozen (immutable) or not. 903 * @return true if this object is frozen. 904 * @stable ICU 4.8 905 */ isFrozen()906 public boolean isFrozen() { 907 return frozen; 908 } 909 preParse(String pattern)910 private void preParse(String pattern) { 911 if(isFrozen()) { 912 throw new UnsupportedOperationException( 913 "Attempt to parse("+prefix(pattern)+") on frozen MessagePattern instance."); 914 } 915 msg=pattern; 916 hasArgNames=hasArgNumbers=false; 917 needsAutoQuoting=false; 918 parts.clear(); 919 if(numericValues!=null) { 920 numericValues.clear(); 921 } 922 } 923 postParse()924 private void postParse() { 925 // Nothing to be done currently. 926 } 927 parseMessage(int index, int msgStartLength, int nestingLevel, ArgType parentType)928 private int parseMessage(int index, int msgStartLength, int nestingLevel, ArgType parentType) { 929 if(nestingLevel>Part.MAX_VALUE) { 930 throw new IndexOutOfBoundsException(); 931 } 932 int msgStart=parts.size(); 933 addPart(Part.Type.MSG_START, index, msgStartLength, nestingLevel); 934 index+=msgStartLength; 935 while(index<msg.length()) { 936 char c=msg.charAt(index++); 937 if(c=='\'') { 938 if(index==msg.length()) { 939 // The apostrophe is the last character in the pattern. 940 // Add a Part for auto-quoting. 941 addPart(Part.Type.INSERT_CHAR, index, 0, '\''); // value=char to be inserted 942 needsAutoQuoting=true; 943 } else { 944 c=msg.charAt(index); 945 if(c=='\'') { 946 // double apostrophe, skip the second one 947 addPart(Part.Type.SKIP_SYNTAX, index++, 1, 0); 948 } else if( 949 aposMode==ApostropheMode.DOUBLE_REQUIRED || 950 c=='{' || c=='}' || 951 (parentType==ArgType.CHOICE && c=='|') || 952 (parentType.hasPluralStyle() && c=='#') 953 ) { 954 // skip the quote-starting apostrophe 955 addPart(Part.Type.SKIP_SYNTAX, index-1, 1, 0); 956 // find the end of the quoted literal text 957 for(;;) { 958 index=msg.indexOf('\'', index+1); 959 if(index>=0) { 960 if((index+1)<msg.length() && msg.charAt(index+1)=='\'') { 961 // double apostrophe inside quoted literal text 962 // still encodes a single apostrophe, skip the second one 963 addPart(Part.Type.SKIP_SYNTAX, ++index, 1, 0); 964 } else { 965 // skip the quote-ending apostrophe 966 addPart(Part.Type.SKIP_SYNTAX, index++, 1, 0); 967 break; 968 } 969 } else { 970 // The quoted text reaches to the end of the of the message. 971 index=msg.length(); 972 // Add a Part for auto-quoting. 973 addPart(Part.Type.INSERT_CHAR, index, 0, '\''); // value=char to be inserted 974 needsAutoQuoting=true; 975 break; 976 } 977 } 978 } else { 979 // Interpret the apostrophe as literal text. 980 // Add a Part for auto-quoting. 981 addPart(Part.Type.INSERT_CHAR, index, 0, '\''); // value=char to be inserted 982 needsAutoQuoting=true; 983 } 984 } 985 } else if(parentType.hasPluralStyle() && c=='#') { 986 // The unquoted # in a plural message fragment will be replaced 987 // with the (number-offset). 988 addPart(Part.Type.REPLACE_NUMBER, index-1, 1, 0); 989 } else if(c=='{') { 990 index=parseArg(index-1, 1, nestingLevel); 991 } else if((nestingLevel>0 && c=='}') || (parentType==ArgType.CHOICE && c=='|')) { 992 // Finish the message before the terminator. 993 // In a choice style, report the "}" substring only for the following ARG_LIMIT, 994 // not for this MSG_LIMIT. 995 int limitLength=(parentType==ArgType.CHOICE && c=='}') ? 0 : 1; 996 addLimitPart(msgStart, Part.Type.MSG_LIMIT, index-1, limitLength, nestingLevel); 997 if(parentType==ArgType.CHOICE) { 998 // Let the choice style parser see the '}' or '|'. 999 return index-1; 1000 } else { 1001 // continue parsing after the '}' 1002 return index; 1003 } 1004 } // else: c is part of literal text 1005 } 1006 if(nestingLevel>0 && !inTopLevelChoiceMessage(nestingLevel, parentType)) { 1007 throw new IllegalArgumentException( 1008 "Unmatched '{' braces in message "+prefix()); 1009 } 1010 addLimitPart(msgStart, Part.Type.MSG_LIMIT, index, 0, nestingLevel); 1011 return index; 1012 } 1013 parseArg(int index, int argStartLength, int nestingLevel)1014 private int parseArg(int index, int argStartLength, int nestingLevel) { 1015 int argStart=parts.size(); 1016 ArgType argType=ArgType.NONE; 1017 addPart(Part.Type.ARG_START, index, argStartLength, argType.ordinal()); 1018 int nameIndex=index=skipWhiteSpace(index+argStartLength); 1019 if(index==msg.length()) { 1020 throw new IllegalArgumentException( 1021 "Unmatched '{' braces in message "+prefix()); 1022 } 1023 // parse argument name or number 1024 index=skipIdentifier(index); 1025 int number=parseArgNumber(nameIndex, index); 1026 if(number>=0) { 1027 int length=index-nameIndex; 1028 if(length>Part.MAX_LENGTH || number>Part.MAX_VALUE) { 1029 throw new IndexOutOfBoundsException( 1030 "Argument number too large: "+prefix(nameIndex)); 1031 } 1032 hasArgNumbers=true; 1033 addPart(Part.Type.ARG_NUMBER, nameIndex, length, number); 1034 } else if(number==ARG_NAME_NOT_NUMBER) { 1035 int length=index-nameIndex; 1036 if(length>Part.MAX_LENGTH) { 1037 throw new IndexOutOfBoundsException( 1038 "Argument name too long: "+prefix(nameIndex)); 1039 } 1040 hasArgNames=true; 1041 addPart(Part.Type.ARG_NAME, nameIndex, length, 0); 1042 } else { // number<-1 (ARG_NAME_NOT_VALID) 1043 throw new IllegalArgumentException("Bad argument syntax: "+prefix(nameIndex)); 1044 } 1045 index=skipWhiteSpace(index); 1046 if(index==msg.length()) { 1047 throw new IllegalArgumentException( 1048 "Unmatched '{' braces in message "+prefix()); 1049 } 1050 char c=msg.charAt(index); 1051 if(c=='}') { 1052 // all done 1053 } else if(c!=',') { 1054 throw new IllegalArgumentException("Bad argument syntax: "+prefix(nameIndex)); 1055 } else /* ',' */ { 1056 // parse argument type: case-sensitive a-zA-Z 1057 int typeIndex=index=skipWhiteSpace(index+1); 1058 while(index<msg.length() && isArgTypeChar(msg.charAt(index))) { 1059 ++index; 1060 } 1061 int length=index-typeIndex; 1062 index=skipWhiteSpace(index); 1063 if(index==msg.length()) { 1064 throw new IllegalArgumentException( 1065 "Unmatched '{' braces in message "+prefix()); 1066 } 1067 if(length==0 || ((c=msg.charAt(index))!=',' && c!='}')) { 1068 throw new IllegalArgumentException("Bad argument syntax: "+prefix(nameIndex)); 1069 } 1070 if(length>Part.MAX_LENGTH) { 1071 throw new IndexOutOfBoundsException( 1072 "Argument type name too long: "+prefix(nameIndex)); 1073 } 1074 argType=ArgType.SIMPLE; 1075 if(length==6) { 1076 // case-insensitive comparisons for complex-type names 1077 if(isChoice(typeIndex)) { 1078 argType=ArgType.CHOICE; 1079 } else if(isPlural(typeIndex)) { 1080 argType=ArgType.PLURAL; 1081 } else if(isSelect(typeIndex)) { 1082 argType=ArgType.SELECT; 1083 } 1084 } else if(length==13) { 1085 if(isSelect(typeIndex) && isOrdinal(typeIndex+6)) { 1086 argType=ArgType.SELECTORDINAL; 1087 } 1088 } 1089 // change the ARG_START type from NONE to argType 1090 parts.get(argStart).value=(short)argType.ordinal(); 1091 if(argType==ArgType.SIMPLE) { 1092 addPart(Part.Type.ARG_TYPE, typeIndex, length, 0); 1093 } 1094 // look for an argument style (pattern) 1095 if(c=='}') { 1096 if(argType!=ArgType.SIMPLE) { 1097 throw new IllegalArgumentException( 1098 "No style field for complex argument: "+prefix(nameIndex)); 1099 } 1100 } else /* ',' */ { 1101 ++index; 1102 if(argType==ArgType.SIMPLE) { 1103 index=parseSimpleStyle(index); 1104 } else if(argType==ArgType.CHOICE) { 1105 index=parseChoiceStyle(index, nestingLevel); 1106 } else { 1107 index=parsePluralOrSelectStyle(argType, index, nestingLevel); 1108 } 1109 } 1110 } 1111 // Argument parsing stopped on the '}'. 1112 addLimitPart(argStart, Part.Type.ARG_LIMIT, index, 1, argType.ordinal()); 1113 return index+1; 1114 } 1115 parseSimpleStyle(int index)1116 private int parseSimpleStyle(int index) { 1117 int start=index; 1118 int nestedBraces=0; 1119 while(index<msg.length()) { 1120 char c=msg.charAt(index++); 1121 if(c=='\'') { 1122 // Treat apostrophe as quoting but include it in the style part. 1123 // Find the end of the quoted literal text. 1124 index=msg.indexOf('\'', index); 1125 if(index<0) { 1126 throw new IllegalArgumentException( 1127 "Quoted literal argument style text reaches to the end of the message: "+ 1128 prefix(start)); 1129 } 1130 // skip the quote-ending apostrophe 1131 ++index; 1132 } else if(c=='{') { 1133 ++nestedBraces; 1134 } else if(c=='}') { 1135 if(nestedBraces>0) { 1136 --nestedBraces; 1137 } else { 1138 int length=--index-start; 1139 if(length>Part.MAX_LENGTH) { 1140 throw new IndexOutOfBoundsException( 1141 "Argument style text too long: "+prefix(start)); 1142 } 1143 addPart(Part.Type.ARG_STYLE, start, length, 0); 1144 return index; 1145 } 1146 } // c is part of literal text 1147 } 1148 throw new IllegalArgumentException( 1149 "Unmatched '{' braces in message "+prefix()); 1150 } 1151 parseChoiceStyle(int index, int nestingLevel)1152 private int parseChoiceStyle(int index, int nestingLevel) { 1153 int start=index; 1154 index=skipWhiteSpace(index); 1155 if(index==msg.length() || msg.charAt(index)=='}') { 1156 throw new IllegalArgumentException( 1157 "Missing choice argument pattern in "+prefix()); 1158 } 1159 for(;;) { 1160 // The choice argument style contains |-separated (number, separator, message) triples. 1161 // Parse the number. 1162 int numberIndex=index; 1163 index=skipDouble(index); 1164 int length=index-numberIndex; 1165 if(length==0) { 1166 throw new IllegalArgumentException("Bad choice pattern syntax: "+prefix(start)); 1167 } 1168 if(length>Part.MAX_LENGTH) { 1169 throw new IndexOutOfBoundsException( 1170 "Choice number too long: "+prefix(numberIndex)); 1171 } 1172 parseDouble(numberIndex, index, true); // adds ARG_INT or ARG_DOUBLE 1173 // Parse the separator. 1174 index=skipWhiteSpace(index); 1175 if(index==msg.length()) { 1176 throw new IllegalArgumentException("Bad choice pattern syntax: "+prefix(start)); 1177 } 1178 char c=msg.charAt(index); 1179 if(!(c=='#' || c=='<' || c=='\u2264')) { // U+2264 is <= 1180 throw new IllegalArgumentException( 1181 "Expected choice separator (#<\u2264) instead of '"+c+ 1182 "' in choice pattern "+prefix(start)); 1183 } 1184 addPart(Part.Type.ARG_SELECTOR, index, 1, 0); 1185 // Parse the message fragment. 1186 index=parseMessage(++index, 0, nestingLevel+1, ArgType.CHOICE); 1187 // parseMessage(..., CHOICE) returns the index of the terminator, or msg.length(). 1188 if(index==msg.length()) { 1189 return index; 1190 } 1191 if(msg.charAt(index)=='}') { 1192 if(!inMessageFormatPattern(nestingLevel)) { 1193 throw new IllegalArgumentException( 1194 "Bad choice pattern syntax: "+prefix(start)); 1195 } 1196 return index; 1197 } // else the terminator is '|' 1198 index=skipWhiteSpace(index+1); 1199 } 1200 } 1201 parsePluralOrSelectStyle(ArgType argType, int index, int nestingLevel)1202 private int parsePluralOrSelectStyle(ArgType argType, int index, int nestingLevel) { 1203 int start=index; 1204 boolean isEmpty=true; 1205 boolean hasOther=false; 1206 for(;;) { 1207 // First, collect the selector looking for a small set of terminators. 1208 // It would be a little faster to consider the syntax of each possible 1209 // token right here, but that makes the code too complicated. 1210 index=skipWhiteSpace(index); 1211 boolean eos=index==msg.length(); 1212 if(eos || msg.charAt(index)=='}') { 1213 if(eos==inMessageFormatPattern(nestingLevel)) { 1214 throw new IllegalArgumentException( 1215 "Bad "+ 1216 argType.toString().toLowerCase(Locale.ENGLISH)+ 1217 " pattern syntax: "+prefix(start)); 1218 } 1219 if(!hasOther) { 1220 throw new IllegalArgumentException( 1221 "Missing 'other' keyword in "+ 1222 argType.toString().toLowerCase(Locale.ENGLISH)+ 1223 " pattern in "+prefix()); 1224 } 1225 return index; 1226 } 1227 int selectorIndex=index; 1228 if(argType.hasPluralStyle() && msg.charAt(selectorIndex)=='=') { 1229 // explicit-value plural selector: =double 1230 index=skipDouble(index+1); 1231 int length=index-selectorIndex; 1232 if(length==1) { 1233 throw new IllegalArgumentException( 1234 "Bad "+ 1235 argType.toString().toLowerCase(Locale.ENGLISH)+ 1236 " pattern syntax: "+prefix(start)); 1237 } 1238 if(length>Part.MAX_LENGTH) { 1239 throw new IndexOutOfBoundsException( 1240 "Argument selector too long: "+prefix(selectorIndex)); 1241 } 1242 addPart(Part.Type.ARG_SELECTOR, selectorIndex, length, 0); 1243 parseDouble(selectorIndex+1, index, false); // adds ARG_INT or ARG_DOUBLE 1244 } else { 1245 index=skipIdentifier(index); 1246 int length=index-selectorIndex; 1247 if(length==0) { 1248 throw new IllegalArgumentException( 1249 "Bad "+ 1250 argType.toString().toLowerCase(Locale.ENGLISH)+ 1251 " pattern syntax: "+prefix(start)); 1252 } 1253 // Note: The ':' in "offset:" is just beyond the skipIdentifier() range. 1254 if( argType.hasPluralStyle() && length==6 && index<msg.length() && 1255 msg.regionMatches(selectorIndex, "offset:", 0, 7) 1256 ) { 1257 // plural offset, not a selector 1258 if(!isEmpty) { 1259 throw new IllegalArgumentException( 1260 "Plural argument 'offset:' (if present) must precede key-message pairs: "+ 1261 prefix(start)); 1262 } 1263 // allow whitespace between offset: and its value 1264 int valueIndex=skipWhiteSpace(index+1); // The ':' is at index. 1265 index=skipDouble(valueIndex); 1266 if(index==valueIndex) { 1267 throw new IllegalArgumentException( 1268 "Missing value for plural 'offset:' "+prefix(start)); 1269 } 1270 if((index-valueIndex)>Part.MAX_LENGTH) { 1271 throw new IndexOutOfBoundsException( 1272 "Plural offset value too long: "+prefix(valueIndex)); 1273 } 1274 parseDouble(valueIndex, index, false); // adds ARG_INT or ARG_DOUBLE 1275 isEmpty=false; 1276 continue; // no message fragment after the offset 1277 } else { 1278 // normal selector word 1279 if(length>Part.MAX_LENGTH) { 1280 throw new IndexOutOfBoundsException( 1281 "Argument selector too long: "+prefix(selectorIndex)); 1282 } 1283 addPart(Part.Type.ARG_SELECTOR, selectorIndex, length, 0); 1284 if(msg.regionMatches(selectorIndex, "other", 0, length)) { 1285 hasOther=true; 1286 } 1287 } 1288 } 1289 1290 // parse the message fragment following the selector 1291 index=skipWhiteSpace(index); 1292 if(index==msg.length() || msg.charAt(index)!='{') { 1293 throw new IllegalArgumentException( 1294 "No message fragment after "+ 1295 argType.toString().toLowerCase(Locale.ENGLISH)+ 1296 " selector: "+prefix(selectorIndex)); 1297 } 1298 index=parseMessage(index, 1, nestingLevel+1, argType); 1299 isEmpty=false; 1300 } 1301 } 1302 1303 /** 1304 * Validates and parses an argument name or argument number string. 1305 * This internal method assumes that the input substring is a "pattern identifier". 1306 * @return >=0 if the name is a valid number, 1307 * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits, 1308 * ARG_NAME_NOT_VALID (-2) if it is neither. 1309 * @see #validateArgumentName(String) 1310 */ parseArgNumber(CharSequence s, int start, int limit)1311 private static int parseArgNumber(CharSequence s, int start, int limit) { 1312 // If the identifier contains only ASCII digits, then it is an argument _number_ 1313 // and must not have leading zeros (except "0" itself). 1314 // Otherwise it is an argument _name_. 1315 if(start>=limit) { 1316 return ARG_NAME_NOT_VALID; 1317 } 1318 int number; 1319 // Defer numeric errors until we know there are only digits. 1320 boolean badNumber; 1321 char c=s.charAt(start++); 1322 if(c=='0') { 1323 if(start==limit) { 1324 return 0; 1325 } else { 1326 number=0; 1327 badNumber=true; // leading zero 1328 } 1329 } else if('1'<=c && c<='9') { 1330 number=c-'0'; 1331 badNumber=false; 1332 } else { 1333 return ARG_NAME_NOT_NUMBER; 1334 } 1335 while(start<limit) { 1336 c=s.charAt(start++); 1337 if('0'<=c && c<='9') { 1338 if(number>=Integer.MAX_VALUE/10) { 1339 badNumber=true; // overflow 1340 } 1341 number=number*10+(c-'0'); 1342 } else { 1343 return ARG_NAME_NOT_NUMBER; 1344 } 1345 } 1346 // There are only ASCII digits. 1347 if(badNumber) { 1348 return ARG_NAME_NOT_VALID; 1349 } else { 1350 return number; 1351 } 1352 } 1353 parseArgNumber(int start, int limit)1354 private int parseArgNumber(int start, int limit) { 1355 return parseArgNumber(msg, start, limit); 1356 } 1357 1358 /** 1359 * Parses a number from the specified message substring. 1360 * @param start start index into the message string 1361 * @param limit limit index into the message string, must be start<limit 1362 * @param allowInfinity true if U+221E is allowed (for ChoiceFormat) 1363 */ parseDouble(int start, int limit, boolean allowInfinity)1364 private void parseDouble(int start, int limit, boolean allowInfinity) { 1365 assert start<limit; 1366 // fake loop for easy exit and single throw statement 1367 for(;;) { 1368 // fast path for small integers and infinity 1369 int value=0; 1370 int isNegative=0; // not boolean so that we can easily add it to value 1371 int index=start; 1372 char c=msg.charAt(index++); 1373 if(c=='-') { 1374 isNegative=1; 1375 if(index==limit) { 1376 break; // no number 1377 } 1378 c=msg.charAt(index++); 1379 } else if(c=='+') { 1380 if(index==limit) { 1381 break; // no number 1382 } 1383 c=msg.charAt(index++); 1384 } 1385 if(c==0x221e) { // infinity 1386 if(allowInfinity && index==limit) { 1387 addArgDoublePart( 1388 isNegative!=0 ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY, 1389 start, limit-start); 1390 return; 1391 } else { 1392 break; 1393 } 1394 } 1395 // try to parse the number as a small integer but fall back to a double 1396 while('0'<=c && c<='9') { 1397 value=value*10+(c-'0'); 1398 if(value>(Part.MAX_VALUE+isNegative)) { 1399 break; // not a small-enough integer 1400 } 1401 if(index==limit) { 1402 addPart(Part.Type.ARG_INT, start, limit-start, isNegative!=0 ? -value : value); 1403 return; 1404 } 1405 c=msg.charAt(index++); 1406 } 1407 // Let Double.parseDouble() throw a NumberFormatException. 1408 double numericValue=Double.parseDouble(msg.substring(start, limit)); 1409 addArgDoublePart(numericValue, start, limit-start); 1410 return; 1411 } 1412 throw new NumberFormatException( 1413 "Bad syntax for numeric value: "+msg.substring(start, limit)); 1414 } 1415 1416 /** 1417 * Appends the s[start, limit[ substring to sb, but with only half of the apostrophes 1418 * according to JDK pattern behavior. 1419 * @internal 1420 */ 1421 /* package */ static void appendReducedApostrophes(String s, int start, int limit, 1422 StringBuilder sb) { 1423 int doubleApos=-1; 1424 for(;;) { 1425 int i=s.indexOf('\'', start); 1426 if(i<0 || i>=limit) { 1427 sb.append(s, start, limit); 1428 break; 1429 } 1430 if(i==doubleApos) { 1431 // Double apostrophe at start-1 and start==i, append one. 1432 sb.append('\''); 1433 ++start; 1434 doubleApos=-1; 1435 } else { 1436 // Append text between apostrophes and skip this one. 1437 sb.append(s, start, i); 1438 doubleApos=start=i+1; 1439 } 1440 } 1441 } 1442 1443 private int skipWhiteSpace(int index) { 1444 return PatternProps.skipWhiteSpace(msg, index); 1445 } 1446 1447 private int skipIdentifier(int index) { 1448 return PatternProps.skipIdentifier(msg, index); 1449 } 1450 1451 /** 1452 * Skips a sequence of characters that could occur in a double value. 1453 * Does not fully parse or validate the value. 1454 */ 1455 private int skipDouble(int index) { 1456 while(index<msg.length()) { 1457 char c=msg.charAt(index); 1458 // U+221E: Allow the infinity symbol, for ChoiceFormat patterns. 1459 if((c<'0' && "+-.".indexOf(c)<0) || (c>'9' && c!='e' && c!='E' && c!=0x221e)) { 1460 break; 1461 } 1462 ++index; 1463 } 1464 return index; 1465 } 1466 1467 private static boolean isArgTypeChar(int c) { 1468 return ('a'<=c && c<='z') || ('A'<=c && c<='Z'); 1469 } 1470 1471 private boolean isChoice(int index) { 1472 char c; 1473 return 1474 ((c=msg.charAt(index++))=='c' || c=='C') && 1475 ((c=msg.charAt(index++))=='h' || c=='H') && 1476 ((c=msg.charAt(index++))=='o' || c=='O') && 1477 ((c=msg.charAt(index++))=='i' || c=='I') && 1478 ((c=msg.charAt(index++))=='c' || c=='C') && 1479 ((c=msg.charAt(index))=='e' || c=='E'); 1480 } 1481 1482 private boolean isPlural(int index) { 1483 char c; 1484 return 1485 ((c=msg.charAt(index++))=='p' || c=='P') && 1486 ((c=msg.charAt(index++))=='l' || c=='L') && 1487 ((c=msg.charAt(index++))=='u' || c=='U') && 1488 ((c=msg.charAt(index++))=='r' || c=='R') && 1489 ((c=msg.charAt(index++))=='a' || c=='A') && 1490 ((c=msg.charAt(index))=='l' || c=='L'); 1491 } 1492 1493 private boolean isSelect(int index) { 1494 char c; 1495 return 1496 ((c=msg.charAt(index++))=='s' || c=='S') && 1497 ((c=msg.charAt(index++))=='e' || c=='E') && 1498 ((c=msg.charAt(index++))=='l' || c=='L') && 1499 ((c=msg.charAt(index++))=='e' || c=='E') && 1500 ((c=msg.charAt(index++))=='c' || c=='C') && 1501 ((c=msg.charAt(index))=='t' || c=='T'); 1502 } 1503 1504 private boolean isOrdinal(int index) { 1505 char c; 1506 return 1507 ((c=msg.charAt(index++))=='o' || c=='O') && 1508 ((c=msg.charAt(index++))=='r' || c=='R') && 1509 ((c=msg.charAt(index++))=='d' || c=='D') && 1510 ((c=msg.charAt(index++))=='i' || c=='I') && 1511 ((c=msg.charAt(index++))=='n' || c=='N') && 1512 ((c=msg.charAt(index++))=='a' || c=='A') && 1513 ((c=msg.charAt(index))=='l' || c=='L'); 1514 } 1515 1516 /** 1517 * @return true if we are inside a MessageFormat (sub-)pattern, 1518 * as opposed to inside a top-level choice/plural/select pattern. 1519 */ 1520 private boolean inMessageFormatPattern(int nestingLevel) { 1521 return nestingLevel>0 || parts.get(0).type==Part.Type.MSG_START; 1522 } 1523 1524 /** 1525 * @return true if we are in a MessageFormat sub-pattern 1526 * of a top-level ChoiceFormat pattern. 1527 */ 1528 private boolean inTopLevelChoiceMessage(int nestingLevel, ArgType parentType) { 1529 return 1530 nestingLevel==1 && 1531 parentType==ArgType.CHOICE && 1532 parts.get(0).type!=Part.Type.MSG_START; 1533 } 1534 1535 private void addPart(Part.Type type, int index, int length, int value) { 1536 parts.add(new Part(type, index, length, value)); 1537 } 1538 1539 private void addLimitPart(int start, Part.Type type, int index, int length, int value) { 1540 parts.get(start).limitPartIndex=parts.size(); 1541 addPart(type, index, length, value); 1542 } 1543 1544 private void addArgDoublePart(double numericValue, int start, int length) { 1545 int numericIndex; 1546 if(numericValues==null) { 1547 numericValues=new ArrayList<Double>(); 1548 numericIndex=0; 1549 } else { 1550 numericIndex=numericValues.size(); 1551 if(numericIndex>Part.MAX_VALUE) { 1552 throw new IndexOutOfBoundsException("Too many numeric values"); 1553 } 1554 } 1555 numericValues.add(numericValue); 1556 addPart(Part.Type.ARG_DOUBLE, start, length, numericIndex); 1557 } 1558 1559 private static final int MAX_PREFIX_LENGTH=24; 1560 1561 /** 1562 * Returns a prefix of s.substring(start). Used for Exception messages. 1563 * @param s 1564 * @param start start index in s 1565 * @return s.substring(start) or a prefix of that 1566 */ 1567 private static String prefix(String s, int start) { 1568 StringBuilder prefix=new StringBuilder(MAX_PREFIX_LENGTH+20); 1569 if(start==0) { 1570 prefix.append("\""); 1571 } else { 1572 prefix.append("[at pattern index ").append(start).append("] \""); 1573 } 1574 int substringLength=s.length()-start; 1575 if(substringLength<=MAX_PREFIX_LENGTH) { 1576 prefix.append(start==0 ? s : s.substring(start)); 1577 } else { 1578 int limit=start+MAX_PREFIX_LENGTH-4; 1579 if(Character.isHighSurrogate(s.charAt(limit-1))) { 1580 // remove lead surrogate from the end of the prefix 1581 --limit; 1582 } 1583 prefix.append(s, start, limit).append(" ..."); 1584 } 1585 return prefix.append("\"").toString(); 1586 } 1587 1588 private static String prefix(String s) { 1589 return prefix(s, 0); 1590 } 1591 1592 private String prefix(int start) { 1593 return prefix(msg, start); 1594 } 1595 1596 private String prefix() { 1597 return prefix(msg, 0); 1598 } 1599 1600 private ApostropheMode aposMode; 1601 private String msg; 1602 private ArrayList<Part> parts=new ArrayList<Part>(); 1603 private ArrayList<Double> numericValues; 1604 private boolean hasArgNames; 1605 private boolean hasArgNumbers; 1606 private boolean needsAutoQuoting; 1607 private volatile boolean frozen; 1608 1609 private static final ApostropheMode defaultAposMode= 1610 ApostropheMode.valueOf( 1611 ICUConfig.get("com.ibm.icu.text.MessagePattern.ApostropheMode", "DOUBLE_OPTIONAL")); 1612 1613 private static final ArgType[] argTypes=ArgType.values(); 1614 } 1615