1 /*
2 *******************************************************************************
3 *   Copyright (C) 2010-2016, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 *******************************************************************************
6 *   created on: 2010aug21
7 *   created by: Markus W. Scherer
8 */
9 
10 package com.ibm.icu.text;
11 
12 import java.util.ArrayList;
13 import java.util.Locale;
14 
15 import com.ibm.icu.impl.ICUConfig;
16 import com.ibm.icu.impl.PatternProps;
17 import com.ibm.icu.util.Freezable;
18 import com.ibm.icu.util.ICUCloneNotSupportedException;
19 
20 //Note: Minimize ICU dependencies, only use a very small part of the ICU core.
21 //In particular, do not depend on *Format classes.
22 
23 /**
24  * Parses and represents ICU MessageFormat patterns.
25  * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat.
26  * Used in the implementations of those classes as well as in tools
27  * for message validation, translation and format conversion.
28  * <p>
29  * The parser handles all syntax relevant for identifying message arguments.
30  * This includes "complex" arguments whose style strings contain
31  * nested MessageFormat pattern substrings.
32  * For "simple" arguments (with no nested MessageFormat pattern substrings),
33  * the argument style is not parsed any further.
34  * <p>
35  * The parser handles named and numbered message arguments and allows both in one message.
36  * <p>
37  * Once a pattern has been parsed successfully, iterate through the parsed data
38  * with countParts(), getPart() and related methods.
39  * <p>
40  * The data logically represents a parse tree, but is stored and accessed
41  * as a list of "parts" for fast and simple parsing and to minimize object allocations.
42  * Arguments and nested messages are best handled via recursion.
43  * For every _START "part", {@link #getLimitPartIndex(int)} efficiently returns
44  * the index of the corresponding _LIMIT "part".
45  * <p>
46  * List of "parts":
47  * <pre>
48  * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT
49  * argument = noneArg | simpleArg | complexArg
50  * complexArg = choiceArg | pluralArg | selectArg
51  *
52  * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE
53  * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE
54  * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE
55  * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL
56  * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT
57  *
58  * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+
59  * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+
60  * selectStyle = (ARG_SELECTOR message)+
61  * </pre>
62  * <ul>
63  *   <li>Literal output text is not represented directly by "parts" but accessed
64  *       between parts of a message, from one part's getLimit() to the next part's getIndex().
65  *   <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE.
66  *   <li>In the choiceStyle, the ARG_SELECTOR has the '&lt;', the '#' or
67  *       the less-than-or-equal-to sign (U+2264).
68  *   <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value.
69  *       The optional numeric Part between each (ARG_SELECTOR, message) pair
70  *       is the value of an explicit-number selector like "=2",
71  *       otherwise the selector is a non-numeric identifier.
72  *   <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle.
73  * </ul>
74  * <p>
75  * This class is not intended for public subclassing.
76  *
77  * @stable ICU 4.8
78  * @author Markus Scherer
79  */
80 public final class MessagePattern implements Cloneable, Freezable<MessagePattern> {
81     /**
82      * Mode for when an apostrophe starts quoted literal text for MessageFormat output.
83      * The default is DOUBLE_OPTIONAL unless overridden via ICUConfig
84      * (/com/ibm/icu/ICUConfig.properties).
85      * <p>
86      * A pair of adjacent apostrophes always results in a single apostrophe in the output,
87      * even when the pair is between two single, text-quoting apostrophes.
88      * <p>
89      * The following table shows examples of desired MessageFormat.format() output
90      * with the pattern strings that yield that output.
91      *
92      * <table>
93      *   <tr>
94      *     <th>Desired output</th>
95      *     <th>DOUBLE_OPTIONAL</th>
96      *     <th>DOUBLE_REQUIRED</th>
97      *   </tr>
98      *   <tr>
99      *     <td>I see {many}</td>
100      *     <td>I see '{many}'</td>
101      *     <td>(same)</td>
102      *   </tr>
103      *   <tr>
104      *     <td>I said {'Wow!'}</td>
105      *     <td>I said '{''Wow!''}'</td>
106      *     <td>(same)</td>
107      *   </tr>
108      *   <tr>
109      *     <td>I don't know</td>
110      *     <td>I don't know OR<br> I don''t know</td>
111      *     <td>I don''t know</td>
112      *   </tr>
113      * </table>
114      * @stable ICU 4.8
115      */
116     public enum ApostropheMode {
117         /**
118          * A literal apostrophe is represented by
119          * either a single or a double apostrophe pattern character.
120          * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text
121          * if it immediately precedes a curly brace {},
122          * or a pipe symbol | if inside a choice format,
123          * or a pound symbol # if inside a plural format.
124          * <p>
125          * This is the default behavior starting with ICU 4.8.
126          * @stable ICU 4.8
127          */
128         DOUBLE_OPTIONAL,
129         /**
130          * A literal apostrophe must be represented by
131          * a double apostrophe pattern character.
132          * A single apostrophe always starts quoted literal text.
133          * <p>
134          * This is the behavior of ICU 4.6 and earlier, and of {@link java.text.MessageFormat}.
135          * @stable ICU 4.8
136          */
137         DOUBLE_REQUIRED
138     }
139 
140     /**
141      * Constructs an empty MessagePattern with default ApostropheMode.
142      * @stable ICU 4.8
143      */
MessagePattern()144     public MessagePattern() {
145         aposMode=defaultAposMode;
146     }
147 
148     /**
149      * Constructs an empty MessagePattern.
150      * @param mode Explicit ApostropheMode.
151      * @stable ICU 4.8
152      */
MessagePattern(ApostropheMode mode)153     public MessagePattern(ApostropheMode mode) {
154         aposMode=mode;
155     }
156 
157     /**
158      * Constructs a MessagePattern with default ApostropheMode and
159      * parses the MessageFormat pattern string.
160      * @param pattern a MessageFormat pattern string
161      * @throws IllegalArgumentException for syntax errors in the pattern string
162      * @throws IndexOutOfBoundsException if certain limits are exceeded
163      *         (e.g., argument number too high, argument name too long, etc.)
164      * @throws NumberFormatException if a number could not be parsed
165      * @stable ICU 4.8
166      */
MessagePattern(String pattern)167     public MessagePattern(String pattern) {
168         aposMode=defaultAposMode;
169         parse(pattern);
170     }
171 
172     /**
173      * Parses a MessageFormat pattern string.
174      * @param pattern a MessageFormat pattern string
175      * @return this
176      * @throws IllegalArgumentException for syntax errors in the pattern string
177      * @throws IndexOutOfBoundsException if certain limits are exceeded
178      *         (e.g., argument number too high, argument name too long, etc.)
179      * @throws NumberFormatException if a number could not be parsed
180      * @stable ICU 4.8
181      */
parse(String pattern)182     public MessagePattern parse(String pattern) {
183         preParse(pattern);
184         parseMessage(0, 0, 0, ArgType.NONE);
185         postParse();
186         return this;
187     }
188 
189     /**
190      * Parses a ChoiceFormat pattern string.
191      * @param pattern a ChoiceFormat pattern string
192      * @return this
193      * @throws IllegalArgumentException for syntax errors in the pattern string
194      * @throws IndexOutOfBoundsException if certain limits are exceeded
195      *         (e.g., argument number too high, argument name too long, etc.)
196      * @throws NumberFormatException if a number could not be parsed
197      * @stable ICU 4.8
198      */
parseChoiceStyle(String pattern)199     public MessagePattern parseChoiceStyle(String pattern) {
200         preParse(pattern);
201         parseChoiceStyle(0, 0);
202         postParse();
203         return this;
204     }
205 
206     /**
207      * Parses a PluralFormat pattern string.
208      * @param pattern a PluralFormat pattern string
209      * @return this
210      * @throws IllegalArgumentException for syntax errors in the pattern string
211      * @throws IndexOutOfBoundsException if certain limits are exceeded
212      *         (e.g., argument number too high, argument name too long, etc.)
213      * @throws NumberFormatException if a number could not be parsed
214      * @stable ICU 4.8
215      */
parsePluralStyle(String pattern)216     public MessagePattern parsePluralStyle(String pattern) {
217         preParse(pattern);
218         parsePluralOrSelectStyle(ArgType.PLURAL, 0, 0);
219         postParse();
220         return this;
221     }
222 
223     /**
224      * Parses a SelectFormat pattern string.
225      * @param pattern a SelectFormat pattern string
226      * @return this
227      * @throws IllegalArgumentException for syntax errors in the pattern string
228      * @throws IndexOutOfBoundsException if certain limits are exceeded
229      *         (e.g., argument number too high, argument name too long, etc.)
230      * @throws NumberFormatException if a number could not be parsed
231      * @stable ICU 4.8
232      */
parseSelectStyle(String pattern)233     public MessagePattern parseSelectStyle(String pattern) {
234         preParse(pattern);
235         parsePluralOrSelectStyle(ArgType.SELECT, 0, 0);
236         postParse();
237         return this;
238     }
239 
240     /**
241      * Clears this MessagePattern.
242      * countParts() will return 0.
243      * @stable ICU 4.8
244      */
clear()245     public void clear() {
246         // Mostly the same as preParse().
247         if(isFrozen()) {
248             throw new UnsupportedOperationException(
249                 "Attempt to clear() a frozen MessagePattern instance.");
250         }
251         msg=null;
252         hasArgNames=hasArgNumbers=false;
253         needsAutoQuoting=false;
254         parts.clear();
255         if(numericValues!=null) {
256             numericValues.clear();
257         }
258     }
259 
260     /**
261      * Clears this MessagePattern and sets the ApostropheMode.
262      * countParts() will return 0.
263      * @param mode The new ApostropheMode.
264      * @stable ICU 4.8
265      */
clearPatternAndSetApostropheMode(ApostropheMode mode)266     public void clearPatternAndSetApostropheMode(ApostropheMode mode) {
267         clear();
268         aposMode=mode;
269     }
270 
271     /**
272      * @param other another object to compare with.
273      * @return true if this object is equivalent to the other one.
274      * @stable ICU 4.8
275      */
276     @Override
equals(Object other)277     public boolean equals(Object other) {
278         if(this==other) {
279             return true;
280         }
281         if(other==null || getClass()!=other.getClass()) {
282             return false;
283         }
284         MessagePattern o=(MessagePattern)other;
285         return
286             aposMode.equals(o.aposMode) &&
287             (msg==null ? o.msg==null : msg.equals(o.msg)) &&
288             parts.equals(o.parts);
289         // No need to compare numericValues if msg and parts are the same.
290     }
291 
292     /**
293      * {@inheritDoc}
294      * @stable ICU 4.8
295      */
296     @Override
hashCode()297     public int hashCode() {
298         return (aposMode.hashCode()*37+(msg!=null ? msg.hashCode() : 0))*37+parts.hashCode();
299     }
300 
301     /**
302      * @return this instance's ApostropheMode.
303      * @stable ICU 4.8
304      */
getApostropheMode()305     public ApostropheMode getApostropheMode() {
306         return aposMode;
307     }
308 
309     /**
310      * @return true if getApostropheMode() == ApostropheMode.DOUBLE_REQUIRED
311      * @internal
312      */
jdkAposMode()313     /* package */ boolean jdkAposMode() {
314         return aposMode == ApostropheMode.DOUBLE_REQUIRED;
315     }
316 
317     /**
318      * @return the parsed pattern string (null if none was parsed).
319      * @stable ICU 4.8
320      */
getPatternString()321     public String getPatternString() {
322         return msg;
323     }
324 
325     /**
326      * Does the parsed pattern have named arguments like {first_name}?
327      * @return true if the parsed pattern has at least one named argument.
328      * @stable ICU 4.8
329      */
hasNamedArguments()330     public boolean hasNamedArguments() {
331         return hasArgNames;
332     }
333 
334     /**
335      * Does the parsed pattern have numbered arguments like {2}?
336      * @return true if the parsed pattern has at least one numbered argument.
337      * @stable ICU 4.8
338      */
hasNumberedArguments()339     public boolean hasNumberedArguments() {
340         return hasArgNumbers;
341     }
342 
343     /**
344      * {@inheritDoc}
345      * @stable ICU 4.8
346      */
347     @Override
toString()348     public String toString() {
349         return msg;
350     }
351 
352     /**
353      * Validates and parses an argument name or argument number string.
354      * An argument name must be a "pattern identifier", that is, it must contain
355      * no Unicode Pattern_Syntax or Pattern_White_Space characters.
356      * If it only contains ASCII digits, then it must be a small integer with no leading zero.
357      * @param name Input string.
358      * @return &gt;=0 if the name is a valid number,
359      *         ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
360      *         ARG_NAME_NOT_VALID (-2) if it is neither.
361      * @stable ICU 4.8
362      */
validateArgumentName(String name)363     public static int validateArgumentName(String name) {
364         if(!PatternProps.isIdentifier(name)) {
365             return ARG_NAME_NOT_VALID;
366         }
367         return parseArgNumber(name, 0, name.length());
368     }
369 
370     /**
371      * Return value from {@link #validateArgumentName(String)} for when
372      * the string is a valid "pattern identifier" but not a number.
373      * @stable ICU 4.8
374      */
375     public static final int ARG_NAME_NOT_NUMBER=-1;
376 
377     /**
378      * Return value from {@link #validateArgumentName(String)} for when
379      * the string is invalid.
380      * It might not be a valid "pattern identifier",
381      * or it have only ASCII digits but there is a leading zero or the number is too large.
382      * @stable ICU 4.8
383      */
384     public static final int ARG_NAME_NOT_VALID=-2;
385 
386     /**
387      * Returns a version of the parsed pattern string where each ASCII apostrophe
388      * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax.
389      * <p>
390      * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}."
391      * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}."
392      * @return the deep-auto-quoted version of the parsed pattern string.
393      * @see MessageFormat#autoQuoteApostrophe(String)
394      * @stable ICU 4.8
395      */
autoQuoteApostropheDeep()396     public String autoQuoteApostropheDeep() {
397         if(!needsAutoQuoting) {
398             return msg;
399         }
400         StringBuilder modified=null;
401         // Iterate backward so that the insertion indexes do not change.
402         int count=countParts();
403         for(int i=count; i>0;) {
404             Part part;
405             if((part=getPart(--i)).getType()==Part.Type.INSERT_CHAR) {
406                 if(modified==null) {
407                     modified=new StringBuilder(msg.length()+10).append(msg);
408                 }
409                 modified.insert(part.index, (char)part.value);
410             }
411         }
412         if(modified==null) {
413             return msg;
414         } else {
415             return modified.toString();
416         }
417     }
418 
419     /**
420      * Returns the number of "parts" created by parsing the pattern string.
421      * Returns 0 if no pattern has been parsed or clear() was called.
422      * @return the number of pattern parts.
423      * @stable ICU 4.8
424      */
countParts()425     public int countParts() {
426         return parts.size();
427     }
428 
429     /**
430      * Gets the i-th pattern "part".
431      * @param i The index of the Part data. (0..countParts()-1)
432      * @return the i-th pattern "part".
433      * @throws IndexOutOfBoundsException if i is outside the (0..countParts()-1) range
434      * @stable ICU 4.8
435      */
getPart(int i)436     public Part getPart(int i) {
437         return parts.get(i);
438     }
439 
440     /**
441      * Returns the Part.Type of the i-th pattern "part".
442      * Convenience method for getPart(i).getType().
443      * @param i The index of the Part data. (0..countParts()-1)
444      * @return The Part.Type of the i-th Part.
445      * @throws IndexOutOfBoundsException if i is outside the (0..countParts()-1) range
446      * @stable ICU 4.8
447      */
getPartType(int i)448     public Part.Type getPartType(int i) {
449         return parts.get(i).type;
450     }
451 
452     /**
453      * Returns the pattern index of the specified pattern "part".
454      * Convenience method for getPart(partIndex).getIndex().
455      * @param partIndex The index of the Part data. (0..countParts()-1)
456      * @return The pattern index of this Part.
457      * @throws IndexOutOfBoundsException if partIndex is outside the (0..countParts()-1) range
458      * @stable ICU 4.8
459      */
getPatternIndex(int partIndex)460     public int getPatternIndex(int partIndex) {
461         return parts.get(partIndex).index;
462     }
463 
464     /**
465      * Returns the substring of the pattern string indicated by the Part.
466      * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()).
467      * @param part a part of this MessagePattern.
468      * @return the substring associated with part.
469      * @stable ICU 4.8
470      */
getSubstring(Part part)471     public String getSubstring(Part part) {
472         int index=part.index;
473         return msg.substring(index, index+part.length);
474     }
475 
476     /**
477      * Compares the part's substring with the input string s.
478      * @param part a part of this MessagePattern.
479      * @param s a string.
480      * @return true if getSubstring(part).equals(s).
481      * @stable ICU 4.8
482      */
partSubstringMatches(Part part, String s)483     public boolean partSubstringMatches(Part part, String s) {
484         return msg.regionMatches(part.index, s, 0, part.length);
485     }
486 
487     /**
488      * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE.
489      * @param part a part of this MessagePattern.
490      * @return the part's numeric value, or NO_NUMERIC_VALUE if this is not a numeric part.
491      * @stable ICU 4.8
492      */
getNumericValue(Part part)493     public double getNumericValue(Part part) {
494         Part.Type type=part.type;
495         if(type==Part.Type.ARG_INT) {
496             return part.value;
497         } else if(type==Part.Type.ARG_DOUBLE) {
498             return numericValues.get(part.value);
499         } else {
500             return NO_NUMERIC_VALUE;
501         }
502     }
503 
504     /**
505      * Special value that is returned by getNumericValue(Part) when no
506      * numeric value is defined for a part.
507      * @see #getNumericValue
508      * @stable ICU 4.8
509      */
510     public static final double NO_NUMERIC_VALUE=-123456789;
511 
512     /**
513      * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
514      * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
515      * @return the "offset:" value.
516      * @throws IndexOutOfBoundsException if pluralStart is outside the (0..countParts()-1) range
517      * @stable ICU 4.8
518      */
getPluralOffset(int pluralStart)519     public double getPluralOffset(int pluralStart) {
520         Part part=parts.get(pluralStart);
521         if(part.type.hasNumericValue()) {
522             return getNumericValue(part);
523         } else {
524             return 0;
525         }
526     }
527 
528     /**
529      * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start.
530      * @param start The index of some Part data (0..countParts()-1);
531      *        this Part should be of Type ARG_START or MSG_START.
532      * @return The first i&gt;start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level,
533      *         or start itself if getPartType(msgStart)!=ARG|MSG_START.
534      * @throws IndexOutOfBoundsException if start is outside the (0..countParts()-1) range
535      * @stable ICU 4.8
536      */
getLimitPartIndex(int start)537     public int getLimitPartIndex(int start) {
538         int limit=parts.get(start).limitPartIndex;
539         if(limit<start) {
540             return start;
541         }
542         return limit;
543     }
544 
545     /**
546      * A message pattern "part", representing a pattern parsing event.
547      * There is a part for the start and end of a message or argument,
548      * for quoting and escaping of and with ASCII apostrophes,
549      * and for syntax elements of "complex" arguments.
550      * @stable ICU 4.8
551      */
552     public static final class Part {
Part(Type t, int i, int l, int v)553         private Part(Type t, int i, int l, int v) {
554             type=t;
555             index=i;
556             length=(char)l;
557             value=(short)v;
558         }
559 
560         /**
561          * Returns the type of this part.
562          * @return the part type.
563          * @stable ICU 4.8
564          */
getType()565         public Type getType() {
566             return type;
567         }
568 
569         /**
570          * Returns the pattern string index associated with this Part.
571          * @return this part's pattern string index.
572          * @stable ICU 4.8
573          */
getIndex()574         public int getIndex() {
575             return index;
576         }
577 
578         /**
579          * Returns the length of the pattern substring associated with this Part.
580          * This is 0 for some parts.
581          * @return this part's pattern substring length.
582          * @stable ICU 4.8
583          */
getLength()584         public int getLength() {
585             return length;
586         }
587 
588         /**
589          * Returns the pattern string limit (exclusive-end) index associated with this Part.
590          * Convenience method for getIndex()+getLength().
591          * @return this part's pattern string limit index, same as getIndex()+getLength().
592          * @stable ICU 4.8
593          */
getLimit()594         public int getLimit() {
595             return index+length;
596         }
597 
598         /**
599          * Returns a value associated with this part.
600          * See the documentation of each part type for details.
601          * @return the part value.
602          * @stable ICU 4.8
603          */
getValue()604         public int getValue() {
605             return value;
606         }
607 
608         /**
609          * Returns the argument type if this part is of type ARG_START or ARG_LIMIT,
610          * otherwise ArgType.NONE.
611          * @return the argument type for this part.
612          * @stable ICU 4.8
613          */
getArgType()614         public ArgType getArgType() {
615             Type type=getType();
616             if(type==Type.ARG_START || type==Type.ARG_LIMIT) {
617                 return argTypes[value];
618             } else {
619                 return ArgType.NONE;
620             }
621         }
622 
623         /**
624          * Part type constants.
625          * @stable ICU 4.8
626          */
627         public enum Type {
628             /**
629              * Start of a message pattern (main or nested).
630              * The length is 0 for the top-level message
631              * and for a choice argument sub-message, otherwise 1 for the '{'.
632              * The value indicates the nesting level, starting with 0 for the main message.
633              * <p>
634              * There is always a later MSG_LIMIT part.
635              * @stable ICU 4.8
636              */
637             MSG_START,
638             /**
639              * End of a message pattern (main or nested).
640              * The length is 0 for the top-level message and
641              * the last sub-message of a choice argument,
642              * otherwise 1 for the '}' or (in a choice argument style) the '|'.
643              * The value indicates the nesting level, starting with 0 for the main message.
644              * @stable ICU 4.8
645              */
646             MSG_LIMIT,
647             /**
648              * Indicates a substring of the pattern string which is to be skipped when formatting.
649              * For example, an apostrophe that begins or ends quoted text
650              * would be indicated with such a part.
651              * The value is undefined and currently always 0.
652              * @stable ICU 4.8
653              */
654             SKIP_SYNTAX,
655             /**
656              * Indicates that a syntax character needs to be inserted for auto-quoting.
657              * The length is 0.
658              * The value is the character code of the insertion character. (U+0027=APOSTROPHE)
659              * @stable ICU 4.8
660              */
661             INSERT_CHAR,
662             /**
663              * Indicates a syntactic (non-escaped) # symbol in a plural variant.
664              * When formatting, replace this part's substring with the
665              * (value-offset) for the plural argument value.
666              * The value is undefined and currently always 0.
667              * @stable ICU 4.8
668              */
669             REPLACE_NUMBER,
670             /**
671              * Start of an argument.
672              * The length is 1 for the '{'.
673              * The value is the ordinal value of the ArgType. Use getArgType().
674              * <p>
675              * This part is followed by either an ARG_NUMBER or ARG_NAME,
676              * followed by optional argument sub-parts (see ArgType constants)
677              * and finally an ARG_LIMIT part.
678              * @stable ICU 4.8
679              */
680             ARG_START,
681             /**
682              * End of an argument.
683              * The length is 1 for the '}'.
684              * The value is the ordinal value of the ArgType. Use getArgType().
685              * @stable ICU 4.8
686              */
687             ARG_LIMIT,
688             /**
689              * The argument number, provided by the value.
690              * @stable ICU 4.8
691              */
692             ARG_NUMBER,
693             /**
694              * The argument name.
695              * The value is undefined and currently always 0.
696              * @stable ICU 4.8
697              */
698             ARG_NAME,
699             /**
700              * The argument type.
701              * The value is undefined and currently always 0.
702              * @stable ICU 4.8
703              */
704             ARG_TYPE,
705             /**
706              * The argument style text.
707              * The value is undefined and currently always 0.
708              * @stable ICU 4.8
709              */
710             ARG_STYLE,
711             /**
712              * A selector substring in a "complex" argument style.
713              * The value is undefined and currently always 0.
714              * @stable ICU 4.8
715              */
716             ARG_SELECTOR,
717             /**
718              * An integer value, for example the offset or an explicit selector value
719              * in a PluralFormat style.
720              * The part value is the integer value.
721              * @stable ICU 4.8
722              */
723             ARG_INT,
724             /**
725              * A numeric value, for example the offset or an explicit selector value
726              * in a PluralFormat style.
727              * The part value is an index into an internal array of numeric values;
728              * use getNumericValue().
729              * @stable ICU 4.8
730              */
731             ARG_DOUBLE;
732 
733             /**
734              * Indicates whether this part has a numeric value.
735              * If so, then that numeric value can be retrieved via {@link MessagePattern#getNumericValue(Part)}.
736              * @return true if this part has a numeric value.
737              * @stable ICU 4.8
738              */
hasNumericValue()739             public boolean hasNumericValue() {
740                 return this==ARG_INT || this==ARG_DOUBLE;
741             }
742         }
743 
744         /**
745          * @return a string representation of this part.
746          * @stable ICU 4.8
747          */
748         @Override
toString()749         public String toString() {
750             String valueString=(type==Type.ARG_START || type==Type.ARG_LIMIT) ?
751                 getArgType().name() : Integer.toString(value);
752             return type.name()+"("+valueString+")@"+index;
753         }
754 
755         /**
756          * @param other another object to compare with.
757          * @return true if this object is equivalent to the other one.
758          * @stable ICU 4.8
759          */
760         @Override
equals(Object other)761         public boolean equals(Object other) {
762             if(this==other) {
763                 return true;
764             }
765             if(other==null || getClass()!=other.getClass()) {
766                 return false;
767             }
768             Part o=(Part)other;
769             return
770                 type.equals(o.type) &&
771                 index==o.index &&
772                 length==o.length &&
773                 value==o.value &&
774                 limitPartIndex==o.limitPartIndex;
775         }
776 
777         /**
778          * {@inheritDoc}
779          * @stable ICU 4.8
780          */
781         @Override
hashCode()782         public int hashCode() {
783             return ((type.hashCode()*37+index)*37+length)*37+value;
784         }
785 
786         private static final int MAX_LENGTH=0xffff;
787         private static final int MAX_VALUE=Short.MAX_VALUE;
788 
789         // Some fields are not final because they are modified during pattern parsing.
790         // After pattern parsing, the parts are effectively immutable.
791         private final Type type;
792         private final int index;
793         private final char length;
794         private short value;
795         private int limitPartIndex;
796     }
797 
798     /**
799      * Argument type constants.
800      * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts.
801      *
802      * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT,
803      * with a nesting level one greater than the surrounding message.
804      * @stable ICU 4.8
805      */
806     public enum ArgType {
807         /**
808          * The argument has no specified type.
809          * @stable ICU 4.8
810          */
811         NONE,
812         /**
813          * The argument has a "simple" type which is provided by the ARG_TYPE part.
814          * An ARG_STYLE part might follow that.
815          * @stable ICU 4.8
816          */
817         SIMPLE,
818         /**
819          * The argument is a ChoiceFormat with one or more
820          * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples.
821          * @stable ICU 4.8
822          */
823         CHOICE,
824         /**
825          * The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset
826          * (e.g., offset:1)
827          * and one or more (ARG_SELECTOR [explicit-value] message) tuples.
828          * If the selector has an explicit value (e.g., =2), then
829          * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message.
830          * Otherwise the message immediately follows the ARG_SELECTOR.
831          * @stable ICU 4.8
832          */
833         PLURAL,
834         /**
835          * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
836          * @stable ICU 4.8
837          */
838         SELECT,
839         /**
840          * The argument is an ordinal-number PluralFormat
841          * with the same style parts sequence and semantics as {@link ArgType#PLURAL}.
842          * @stable ICU 50
843          */
844         SELECTORDINAL;
845 
846         /**
847          * @return true if the argument type has a plural style part sequence and semantics,
848          * for example {@link ArgType#PLURAL} and {@link ArgType#SELECTORDINAL}.
849          * @stable ICU 50
850          */
hasPluralStyle()851         public boolean hasPluralStyle() {
852             return this == PLURAL || this == SELECTORDINAL;
853         }
854     }
855 
856     /**
857      * Creates and returns a copy of this object.
858      * @return a copy of this object (or itself if frozen).
859      * @stable ICU 4.8
860      */
861     @Override
clone()862     public Object clone() {
863         if(isFrozen()) {
864             return this;
865         } else {
866             return cloneAsThawed();
867         }
868     }
869 
870     /**
871      * Creates and returns an unfrozen copy of this object.
872      * @return a copy of this object.
873      * @stable ICU 4.8
874      */
875     @SuppressWarnings("unchecked")
cloneAsThawed()876     public MessagePattern cloneAsThawed() {
877         MessagePattern newMsg;
878         try {
879             newMsg=(MessagePattern)super.clone();
880         } catch (CloneNotSupportedException e) {
881             throw new ICUCloneNotSupportedException(e);
882         }
883         newMsg.parts=(ArrayList<Part>)parts.clone();
884         if(numericValues!=null) {
885             newMsg.numericValues=(ArrayList<Double>)numericValues.clone();
886         }
887         newMsg.frozen=false;
888         return newMsg;
889     }
890 
891     /**
892      * Freezes this object, making it immutable and thread-safe.
893      * @return this
894      * @stable ICU 4.8
895      */
freeze()896     public MessagePattern freeze() {
897         frozen=true;
898         return this;
899     }
900 
901     /**
902      * Determines whether this object is frozen (immutable) or not.
903      * @return true if this object is frozen.
904      * @stable ICU 4.8
905      */
isFrozen()906     public boolean isFrozen() {
907         return frozen;
908     }
909 
preParse(String pattern)910     private void preParse(String pattern) {
911         if(isFrozen()) {
912             throw new UnsupportedOperationException(
913                 "Attempt to parse("+prefix(pattern)+") on frozen MessagePattern instance.");
914         }
915         msg=pattern;
916         hasArgNames=hasArgNumbers=false;
917         needsAutoQuoting=false;
918         parts.clear();
919         if(numericValues!=null) {
920             numericValues.clear();
921         }
922     }
923 
postParse()924     private void postParse() {
925         // Nothing to be done currently.
926     }
927 
parseMessage(int index, int msgStartLength, int nestingLevel, ArgType parentType)928     private int parseMessage(int index, int msgStartLength, int nestingLevel, ArgType parentType) {
929         if(nestingLevel>Part.MAX_VALUE) {
930             throw new IndexOutOfBoundsException();
931         }
932         int msgStart=parts.size();
933         addPart(Part.Type.MSG_START, index, msgStartLength, nestingLevel);
934         index+=msgStartLength;
935         while(index<msg.length()) {
936             char c=msg.charAt(index++);
937             if(c=='\'') {
938                 if(index==msg.length()) {
939                     // The apostrophe is the last character in the pattern.
940                     // Add a Part for auto-quoting.
941                     addPart(Part.Type.INSERT_CHAR, index, 0, '\'');  // value=char to be inserted
942                     needsAutoQuoting=true;
943                 } else {
944                     c=msg.charAt(index);
945                     if(c=='\'') {
946                         // double apostrophe, skip the second one
947                         addPart(Part.Type.SKIP_SYNTAX, index++, 1, 0);
948                     } else if(
949                         aposMode==ApostropheMode.DOUBLE_REQUIRED ||
950                         c=='{' || c=='}' ||
951                         (parentType==ArgType.CHOICE && c=='|') ||
952                         (parentType.hasPluralStyle() && c=='#')
953                     ) {
954                         // skip the quote-starting apostrophe
955                         addPart(Part.Type.SKIP_SYNTAX, index-1, 1, 0);
956                         // find the end of the quoted literal text
957                         for(;;) {
958                             index=msg.indexOf('\'', index+1);
959                             if(index>=0) {
960                                 if((index+1)<msg.length() && msg.charAt(index+1)=='\'') {
961                                     // double apostrophe inside quoted literal text
962                                     // still encodes a single apostrophe, skip the second one
963                                     addPart(Part.Type.SKIP_SYNTAX, ++index, 1, 0);
964                                 } else {
965                                     // skip the quote-ending apostrophe
966                                     addPart(Part.Type.SKIP_SYNTAX, index++, 1, 0);
967                                     break;
968                                 }
969                             } else {
970                                 // The quoted text reaches to the end of the of the message.
971                                 index=msg.length();
972                                 // Add a Part for auto-quoting.
973                                 addPart(Part.Type.INSERT_CHAR, index, 0, '\'');  // value=char to be inserted
974                                 needsAutoQuoting=true;
975                                 break;
976                             }
977                         }
978                     } else {
979                         // Interpret the apostrophe as literal text.
980                         // Add a Part for auto-quoting.
981                         addPart(Part.Type.INSERT_CHAR, index, 0, '\'');  // value=char to be inserted
982                         needsAutoQuoting=true;
983                     }
984                 }
985             } else if(parentType.hasPluralStyle() && c=='#') {
986                 // The unquoted # in a plural message fragment will be replaced
987                 // with the (number-offset).
988                 addPart(Part.Type.REPLACE_NUMBER, index-1, 1, 0);
989             } else if(c=='{') {
990                 index=parseArg(index-1, 1, nestingLevel);
991             } else if((nestingLevel>0 && c=='}') || (parentType==ArgType.CHOICE && c=='|')) {
992                 // Finish the message before the terminator.
993                 // In a choice style, report the "}" substring only for the following ARG_LIMIT,
994                 // not for this MSG_LIMIT.
995                 int limitLength=(parentType==ArgType.CHOICE && c=='}') ? 0 : 1;
996                 addLimitPart(msgStart, Part.Type.MSG_LIMIT, index-1, limitLength, nestingLevel);
997                 if(parentType==ArgType.CHOICE) {
998                     // Let the choice style parser see the '}' or '|'.
999                     return index-1;
1000                 } else {
1001                     // continue parsing after the '}'
1002                     return index;
1003                 }
1004             }  // else: c is part of literal text
1005         }
1006         if(nestingLevel>0 && !inTopLevelChoiceMessage(nestingLevel, parentType)) {
1007             throw new IllegalArgumentException(
1008                 "Unmatched '{' braces in message "+prefix());
1009         }
1010         addLimitPart(msgStart, Part.Type.MSG_LIMIT, index, 0, nestingLevel);
1011         return index;
1012     }
1013 
parseArg(int index, int argStartLength, int nestingLevel)1014     private int parseArg(int index, int argStartLength, int nestingLevel) {
1015         int argStart=parts.size();
1016         ArgType argType=ArgType.NONE;
1017         addPart(Part.Type.ARG_START, index, argStartLength, argType.ordinal());
1018         int nameIndex=index=skipWhiteSpace(index+argStartLength);
1019         if(index==msg.length()) {
1020             throw new IllegalArgumentException(
1021                 "Unmatched '{' braces in message "+prefix());
1022         }
1023         // parse argument name or number
1024         index=skipIdentifier(index);
1025         int number=parseArgNumber(nameIndex, index);
1026         if(number>=0) {
1027             int length=index-nameIndex;
1028             if(length>Part.MAX_LENGTH || number>Part.MAX_VALUE) {
1029                 throw new IndexOutOfBoundsException(
1030                     "Argument number too large: "+prefix(nameIndex));
1031             }
1032             hasArgNumbers=true;
1033             addPart(Part.Type.ARG_NUMBER, nameIndex, length, number);
1034         } else if(number==ARG_NAME_NOT_NUMBER) {
1035             int length=index-nameIndex;
1036             if(length>Part.MAX_LENGTH) {
1037                 throw new IndexOutOfBoundsException(
1038                     "Argument name too long: "+prefix(nameIndex));
1039             }
1040             hasArgNames=true;
1041             addPart(Part.Type.ARG_NAME, nameIndex, length, 0);
1042         } else {  // number<-1 (ARG_NAME_NOT_VALID)
1043             throw new IllegalArgumentException("Bad argument syntax: "+prefix(nameIndex));
1044         }
1045         index=skipWhiteSpace(index);
1046         if(index==msg.length()) {
1047             throw new IllegalArgumentException(
1048                 "Unmatched '{' braces in message "+prefix());
1049         }
1050         char c=msg.charAt(index);
1051         if(c=='}') {
1052             // all done
1053         } else if(c!=',') {
1054             throw new IllegalArgumentException("Bad argument syntax: "+prefix(nameIndex));
1055         } else /* ',' */ {
1056             // parse argument type: case-sensitive a-zA-Z
1057             int typeIndex=index=skipWhiteSpace(index+1);
1058             while(index<msg.length() && isArgTypeChar(msg.charAt(index))) {
1059                 ++index;
1060             }
1061             int length=index-typeIndex;
1062             index=skipWhiteSpace(index);
1063             if(index==msg.length()) {
1064                 throw new IllegalArgumentException(
1065                     "Unmatched '{' braces in message "+prefix());
1066             }
1067             if(length==0 || ((c=msg.charAt(index))!=',' && c!='}')) {
1068                 throw new IllegalArgumentException("Bad argument syntax: "+prefix(nameIndex));
1069             }
1070             if(length>Part.MAX_LENGTH) {
1071                 throw new IndexOutOfBoundsException(
1072                     "Argument type name too long: "+prefix(nameIndex));
1073             }
1074             argType=ArgType.SIMPLE;
1075             if(length==6) {
1076                 // case-insensitive comparisons for complex-type names
1077                 if(isChoice(typeIndex)) {
1078                     argType=ArgType.CHOICE;
1079                 } else if(isPlural(typeIndex)) {
1080                     argType=ArgType.PLURAL;
1081                 } else if(isSelect(typeIndex)) {
1082                     argType=ArgType.SELECT;
1083                 }
1084             } else if(length==13) {
1085                 if(isSelect(typeIndex) && isOrdinal(typeIndex+6)) {
1086                     argType=ArgType.SELECTORDINAL;
1087                 }
1088             }
1089             // change the ARG_START type from NONE to argType
1090             parts.get(argStart).value=(short)argType.ordinal();
1091             if(argType==ArgType.SIMPLE) {
1092                 addPart(Part.Type.ARG_TYPE, typeIndex, length, 0);
1093             }
1094             // look for an argument style (pattern)
1095             if(c=='}') {
1096                 if(argType!=ArgType.SIMPLE) {
1097                     throw new IllegalArgumentException(
1098                         "No style field for complex argument: "+prefix(nameIndex));
1099                 }
1100             } else /* ',' */ {
1101                 ++index;
1102                 if(argType==ArgType.SIMPLE) {
1103                     index=parseSimpleStyle(index);
1104                 } else if(argType==ArgType.CHOICE) {
1105                     index=parseChoiceStyle(index, nestingLevel);
1106                 } else {
1107                     index=parsePluralOrSelectStyle(argType, index, nestingLevel);
1108                 }
1109             }
1110         }
1111         // Argument parsing stopped on the '}'.
1112         addLimitPart(argStart, Part.Type.ARG_LIMIT, index, 1, argType.ordinal());
1113         return index+1;
1114     }
1115 
parseSimpleStyle(int index)1116     private int parseSimpleStyle(int index) {
1117         int start=index;
1118         int nestedBraces=0;
1119         while(index<msg.length()) {
1120             char c=msg.charAt(index++);
1121             if(c=='\'') {
1122                 // Treat apostrophe as quoting but include it in the style part.
1123                 // Find the end of the quoted literal text.
1124                 index=msg.indexOf('\'', index);
1125                 if(index<0) {
1126                     throw new IllegalArgumentException(
1127                         "Quoted literal argument style text reaches to the end of the message: "+
1128                         prefix(start));
1129                 }
1130                 // skip the quote-ending apostrophe
1131                 ++index;
1132             } else if(c=='{') {
1133                 ++nestedBraces;
1134             } else if(c=='}') {
1135                 if(nestedBraces>0) {
1136                     --nestedBraces;
1137                 } else {
1138                     int length=--index-start;
1139                     if(length>Part.MAX_LENGTH) {
1140                         throw new IndexOutOfBoundsException(
1141                             "Argument style text too long: "+prefix(start));
1142                     }
1143                     addPart(Part.Type.ARG_STYLE, start, length, 0);
1144                     return index;
1145                 }
1146             }  // c is part of literal text
1147         }
1148         throw new IllegalArgumentException(
1149             "Unmatched '{' braces in message "+prefix());
1150     }
1151 
parseChoiceStyle(int index, int nestingLevel)1152     private int parseChoiceStyle(int index, int nestingLevel) {
1153         int start=index;
1154         index=skipWhiteSpace(index);
1155         if(index==msg.length() || msg.charAt(index)=='}') {
1156             throw new IllegalArgumentException(
1157                 "Missing choice argument pattern in "+prefix());
1158         }
1159         for(;;) {
1160             // The choice argument style contains |-separated (number, separator, message) triples.
1161             // Parse the number.
1162             int numberIndex=index;
1163             index=skipDouble(index);
1164             int length=index-numberIndex;
1165             if(length==0) {
1166                 throw new IllegalArgumentException("Bad choice pattern syntax: "+prefix(start));
1167             }
1168             if(length>Part.MAX_LENGTH) {
1169                 throw new IndexOutOfBoundsException(
1170                     "Choice number too long: "+prefix(numberIndex));
1171             }
1172             parseDouble(numberIndex, index, true);  // adds ARG_INT or ARG_DOUBLE
1173             // Parse the separator.
1174             index=skipWhiteSpace(index);
1175             if(index==msg.length()) {
1176                 throw new IllegalArgumentException("Bad choice pattern syntax: "+prefix(start));
1177             }
1178             char c=msg.charAt(index);
1179             if(!(c=='#' || c=='<' || c=='\u2264')) {  // U+2264 is <=
1180                 throw new IllegalArgumentException(
1181                     "Expected choice separator (#<\u2264) instead of '"+c+
1182                     "' in choice pattern "+prefix(start));
1183             }
1184             addPart(Part.Type.ARG_SELECTOR, index, 1, 0);
1185             // Parse the message fragment.
1186             index=parseMessage(++index, 0, nestingLevel+1, ArgType.CHOICE);
1187             // parseMessage(..., CHOICE) returns the index of the terminator, or msg.length().
1188             if(index==msg.length()) {
1189                 return index;
1190             }
1191             if(msg.charAt(index)=='}') {
1192                 if(!inMessageFormatPattern(nestingLevel)) {
1193                     throw new IllegalArgumentException(
1194                         "Bad choice pattern syntax: "+prefix(start));
1195                 }
1196                 return index;
1197             }  // else the terminator is '|'
1198             index=skipWhiteSpace(index+1);
1199         }
1200     }
1201 
parsePluralOrSelectStyle(ArgType argType, int index, int nestingLevel)1202     private int parsePluralOrSelectStyle(ArgType argType, int index, int nestingLevel) {
1203         int start=index;
1204         boolean isEmpty=true;
1205         boolean hasOther=false;
1206         for(;;) {
1207             // First, collect the selector looking for a small set of terminators.
1208             // It would be a little faster to consider the syntax of each possible
1209             // token right here, but that makes the code too complicated.
1210             index=skipWhiteSpace(index);
1211             boolean eos=index==msg.length();
1212             if(eos || msg.charAt(index)=='}') {
1213                 if(eos==inMessageFormatPattern(nestingLevel)) {
1214                     throw new IllegalArgumentException(
1215                         "Bad "+
1216                         argType.toString().toLowerCase(Locale.ENGLISH)+
1217                         " pattern syntax: "+prefix(start));
1218                 }
1219                 if(!hasOther) {
1220                     throw new IllegalArgumentException(
1221                         "Missing 'other' keyword in "+
1222                         argType.toString().toLowerCase(Locale.ENGLISH)+
1223                         " pattern in "+prefix());
1224                 }
1225                 return index;
1226             }
1227             int selectorIndex=index;
1228             if(argType.hasPluralStyle() && msg.charAt(selectorIndex)=='=') {
1229                 // explicit-value plural selector: =double
1230                 index=skipDouble(index+1);
1231                 int length=index-selectorIndex;
1232                 if(length==1) {
1233                     throw new IllegalArgumentException(
1234                         "Bad "+
1235                         argType.toString().toLowerCase(Locale.ENGLISH)+
1236                         " pattern syntax: "+prefix(start));
1237                 }
1238                 if(length>Part.MAX_LENGTH) {
1239                     throw new IndexOutOfBoundsException(
1240                         "Argument selector too long: "+prefix(selectorIndex));
1241                 }
1242                 addPart(Part.Type.ARG_SELECTOR, selectorIndex, length, 0);
1243                 parseDouble(selectorIndex+1, index, false);  // adds ARG_INT or ARG_DOUBLE
1244             } else {
1245                 index=skipIdentifier(index);
1246                 int length=index-selectorIndex;
1247                 if(length==0) {
1248                     throw new IllegalArgumentException(
1249                         "Bad "+
1250                         argType.toString().toLowerCase(Locale.ENGLISH)+
1251                         " pattern syntax: "+prefix(start));
1252                 }
1253                 // Note: The ':' in "offset:" is just beyond the skipIdentifier() range.
1254                 if( argType.hasPluralStyle() && length==6 && index<msg.length() &&
1255                     msg.regionMatches(selectorIndex, "offset:", 0, 7)
1256                 ) {
1257                     // plural offset, not a selector
1258                     if(!isEmpty) {
1259                         throw new IllegalArgumentException(
1260                             "Plural argument 'offset:' (if present) must precede key-message pairs: "+
1261                             prefix(start));
1262                     }
1263                     // allow whitespace between offset: and its value
1264                     int valueIndex=skipWhiteSpace(index+1);  // The ':' is at index.
1265                     index=skipDouble(valueIndex);
1266                     if(index==valueIndex) {
1267                         throw new IllegalArgumentException(
1268                             "Missing value for plural 'offset:' "+prefix(start));
1269                     }
1270                     if((index-valueIndex)>Part.MAX_LENGTH) {
1271                         throw new IndexOutOfBoundsException(
1272                             "Plural offset value too long: "+prefix(valueIndex));
1273                     }
1274                     parseDouble(valueIndex, index, false);  // adds ARG_INT or ARG_DOUBLE
1275                     isEmpty=false;
1276                     continue;  // no message fragment after the offset
1277                 } else {
1278                     // normal selector word
1279                     if(length>Part.MAX_LENGTH) {
1280                         throw new IndexOutOfBoundsException(
1281                             "Argument selector too long: "+prefix(selectorIndex));
1282                     }
1283                     addPart(Part.Type.ARG_SELECTOR, selectorIndex, length, 0);
1284                     if(msg.regionMatches(selectorIndex, "other", 0, length)) {
1285                         hasOther=true;
1286                     }
1287                 }
1288             }
1289 
1290             // parse the message fragment following the selector
1291             index=skipWhiteSpace(index);
1292             if(index==msg.length() || msg.charAt(index)!='{') {
1293                 throw new IllegalArgumentException(
1294                     "No message fragment after "+
1295                     argType.toString().toLowerCase(Locale.ENGLISH)+
1296                     " selector: "+prefix(selectorIndex));
1297             }
1298             index=parseMessage(index, 1, nestingLevel+1, argType);
1299             isEmpty=false;
1300         }
1301     }
1302 
1303     /**
1304      * Validates and parses an argument name or argument number string.
1305      * This internal method assumes that the input substring is a "pattern identifier".
1306      * @return &gt;=0 if the name is a valid number,
1307      *         ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
1308      *         ARG_NAME_NOT_VALID (-2) if it is neither.
1309      * @see #validateArgumentName(String)
1310      */
parseArgNumber(CharSequence s, int start, int limit)1311     private static int parseArgNumber(CharSequence s, int start, int limit) {
1312         // If the identifier contains only ASCII digits, then it is an argument _number_
1313         // and must not have leading zeros (except "0" itself).
1314         // Otherwise it is an argument _name_.
1315         if(start>=limit) {
1316             return ARG_NAME_NOT_VALID;
1317         }
1318         int number;
1319         // Defer numeric errors until we know there are only digits.
1320         boolean badNumber;
1321         char c=s.charAt(start++);
1322         if(c=='0') {
1323             if(start==limit) {
1324                 return 0;
1325             } else {
1326                 number=0;
1327                 badNumber=true;  // leading zero
1328             }
1329         } else if('1'<=c && c<='9') {
1330             number=c-'0';
1331             badNumber=false;
1332         } else {
1333             return ARG_NAME_NOT_NUMBER;
1334         }
1335         while(start<limit) {
1336             c=s.charAt(start++);
1337             if('0'<=c && c<='9') {
1338                 if(number>=Integer.MAX_VALUE/10) {
1339                     badNumber=true;  // overflow
1340                 }
1341                 number=number*10+(c-'0');
1342             } else {
1343                 return ARG_NAME_NOT_NUMBER;
1344             }
1345         }
1346         // There are only ASCII digits.
1347         if(badNumber) {
1348             return ARG_NAME_NOT_VALID;
1349         } else {
1350             return number;
1351         }
1352     }
1353 
parseArgNumber(int start, int limit)1354     private int parseArgNumber(int start, int limit) {
1355         return parseArgNumber(msg, start, limit);
1356     }
1357 
1358     /**
1359      * Parses a number from the specified message substring.
1360      * @param start start index into the message string
1361      * @param limit limit index into the message string, must be start<limit
1362      * @param allowInfinity true if U+221E is allowed (for ChoiceFormat)
1363      */
parseDouble(int start, int limit, boolean allowInfinity)1364     private void parseDouble(int start, int limit, boolean allowInfinity) {
1365         assert start<limit;
1366         // fake loop for easy exit and single throw statement
1367         for(;;) {
1368             // fast path for small integers and infinity
1369             int value=0;
1370             int isNegative=0;  // not boolean so that we can easily add it to value
1371             int index=start;
1372             char c=msg.charAt(index++);
1373             if(c=='-') {
1374                 isNegative=1;
1375                 if(index==limit) {
1376                     break;  // no number
1377                 }
1378                 c=msg.charAt(index++);
1379             } else if(c=='+') {
1380                 if(index==limit) {
1381                     break;  // no number
1382                 }
1383                 c=msg.charAt(index++);
1384             }
1385             if(c==0x221e) {  // infinity
1386                 if(allowInfinity && index==limit) {
1387                     addArgDoublePart(
1388                         isNegative!=0 ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY,
1389                         start, limit-start);
1390                     return;
1391                 } else {
1392                     break;
1393                 }
1394             }
1395             // try to parse the number as a small integer but fall back to a double
1396             while('0'<=c && c<='9') {
1397                 value=value*10+(c-'0');
1398                 if(value>(Part.MAX_VALUE+isNegative)) {
1399                     break;  // not a small-enough integer
1400                 }
1401                 if(index==limit) {
1402                     addPart(Part.Type.ARG_INT, start, limit-start, isNegative!=0 ? -value : value);
1403                     return;
1404                 }
1405                 c=msg.charAt(index++);
1406             }
1407             // Let Double.parseDouble() throw a NumberFormatException.
1408             double numericValue=Double.parseDouble(msg.substring(start, limit));
1409             addArgDoublePart(numericValue, start, limit-start);
1410             return;
1411         }
1412         throw new NumberFormatException(
1413             "Bad syntax for numeric value: "+msg.substring(start, limit));
1414     }
1415 
1416     /**
1417      * Appends the s[start, limit[ substring to sb, but with only half of the apostrophes
1418      * according to JDK pattern behavior.
1419      * @internal
1420      */
1421     /* package */ static void appendReducedApostrophes(String s, int start, int limit,
1422                                                        StringBuilder sb) {
1423         int doubleApos=-1;
1424         for(;;) {
1425             int i=s.indexOf('\'', start);
1426             if(i<0 || i>=limit) {
1427                 sb.append(s, start, limit);
1428                 break;
1429             }
1430             if(i==doubleApos) {
1431                 // Double apostrophe at start-1 and start==i, append one.
1432                 sb.append('\'');
1433                 ++start;
1434                 doubleApos=-1;
1435             } else {
1436                 // Append text between apostrophes and skip this one.
1437                 sb.append(s, start, i);
1438                 doubleApos=start=i+1;
1439             }
1440         }
1441     }
1442 
1443     private int skipWhiteSpace(int index) {
1444         return PatternProps.skipWhiteSpace(msg, index);
1445     }
1446 
1447     private int skipIdentifier(int index) {
1448         return PatternProps.skipIdentifier(msg, index);
1449     }
1450 
1451     /**
1452      * Skips a sequence of characters that could occur in a double value.
1453      * Does not fully parse or validate the value.
1454      */
1455     private int skipDouble(int index) {
1456         while(index<msg.length()) {
1457             char c=msg.charAt(index);
1458             // U+221E: Allow the infinity symbol, for ChoiceFormat patterns.
1459             if((c<'0' && "+-.".indexOf(c)<0) || (c>'9' && c!='e' && c!='E' && c!=0x221e)) {
1460                 break;
1461             }
1462             ++index;
1463         }
1464         return index;
1465     }
1466 
1467     private static boolean isArgTypeChar(int c) {
1468         return ('a'<=c && c<='z') || ('A'<=c && c<='Z');
1469     }
1470 
1471     private boolean isChoice(int index) {
1472         char c;
1473         return
1474             ((c=msg.charAt(index++))=='c' || c=='C') &&
1475             ((c=msg.charAt(index++))=='h' || c=='H') &&
1476             ((c=msg.charAt(index++))=='o' || c=='O') &&
1477             ((c=msg.charAt(index++))=='i' || c=='I') &&
1478             ((c=msg.charAt(index++))=='c' || c=='C') &&
1479             ((c=msg.charAt(index))=='e' || c=='E');
1480     }
1481 
1482     private boolean isPlural(int index) {
1483         char c;
1484         return
1485             ((c=msg.charAt(index++))=='p' || c=='P') &&
1486             ((c=msg.charAt(index++))=='l' || c=='L') &&
1487             ((c=msg.charAt(index++))=='u' || c=='U') &&
1488             ((c=msg.charAt(index++))=='r' || c=='R') &&
1489             ((c=msg.charAt(index++))=='a' || c=='A') &&
1490             ((c=msg.charAt(index))=='l' || c=='L');
1491     }
1492 
1493     private boolean isSelect(int index) {
1494         char c;
1495         return
1496             ((c=msg.charAt(index++))=='s' || c=='S') &&
1497             ((c=msg.charAt(index++))=='e' || c=='E') &&
1498             ((c=msg.charAt(index++))=='l' || c=='L') &&
1499             ((c=msg.charAt(index++))=='e' || c=='E') &&
1500             ((c=msg.charAt(index++))=='c' || c=='C') &&
1501             ((c=msg.charAt(index))=='t' || c=='T');
1502     }
1503 
1504     private boolean isOrdinal(int index) {
1505         char c;
1506         return
1507             ((c=msg.charAt(index++))=='o' || c=='O') &&
1508             ((c=msg.charAt(index++))=='r' || c=='R') &&
1509             ((c=msg.charAt(index++))=='d' || c=='D') &&
1510             ((c=msg.charAt(index++))=='i' || c=='I') &&
1511             ((c=msg.charAt(index++))=='n' || c=='N') &&
1512             ((c=msg.charAt(index++))=='a' || c=='A') &&
1513             ((c=msg.charAt(index))=='l' || c=='L');
1514     }
1515 
1516     /**
1517      * @return true if we are inside a MessageFormat (sub-)pattern,
1518      *         as opposed to inside a top-level choice/plural/select pattern.
1519      */
1520     private boolean inMessageFormatPattern(int nestingLevel) {
1521         return nestingLevel>0 || parts.get(0).type==Part.Type.MSG_START;
1522     }
1523 
1524     /**
1525      * @return true if we are in a MessageFormat sub-pattern
1526      *         of a top-level ChoiceFormat pattern.
1527      */
1528     private boolean inTopLevelChoiceMessage(int nestingLevel, ArgType parentType) {
1529         return
1530             nestingLevel==1 &&
1531             parentType==ArgType.CHOICE &&
1532             parts.get(0).type!=Part.Type.MSG_START;
1533     }
1534 
1535     private void addPart(Part.Type type, int index, int length, int value) {
1536         parts.add(new Part(type, index, length, value));
1537     }
1538 
1539     private void addLimitPart(int start, Part.Type type, int index, int length, int value) {
1540         parts.get(start).limitPartIndex=parts.size();
1541         addPart(type, index, length, value);
1542     }
1543 
1544     private void addArgDoublePart(double numericValue, int start, int length) {
1545         int numericIndex;
1546         if(numericValues==null) {
1547             numericValues=new ArrayList<Double>();
1548             numericIndex=0;
1549         } else {
1550             numericIndex=numericValues.size();
1551             if(numericIndex>Part.MAX_VALUE) {
1552                 throw new IndexOutOfBoundsException("Too many numeric values");
1553             }
1554         }
1555         numericValues.add(numericValue);
1556         addPart(Part.Type.ARG_DOUBLE, start, length, numericIndex);
1557     }
1558 
1559     private static final int MAX_PREFIX_LENGTH=24;
1560 
1561     /**
1562      * Returns a prefix of s.substring(start). Used for Exception messages.
1563      * @param s
1564      * @param start start index in s
1565      * @return s.substring(start) or a prefix of that
1566      */
1567     private static String prefix(String s, int start) {
1568         StringBuilder prefix=new StringBuilder(MAX_PREFIX_LENGTH+20);
1569         if(start==0) {
1570             prefix.append("\"");
1571         } else {
1572             prefix.append("[at pattern index ").append(start).append("] \"");
1573         }
1574         int substringLength=s.length()-start;
1575         if(substringLength<=MAX_PREFIX_LENGTH) {
1576             prefix.append(start==0 ? s : s.substring(start));
1577         } else {
1578             int limit=start+MAX_PREFIX_LENGTH-4;
1579             if(Character.isHighSurrogate(s.charAt(limit-1))) {
1580                 // remove lead surrogate from the end of the prefix
1581                 --limit;
1582             }
1583             prefix.append(s, start, limit).append(" ...");
1584         }
1585         return prefix.append("\"").toString();
1586     }
1587 
1588     private static String prefix(String s) {
1589         return prefix(s, 0);
1590     }
1591 
1592     private String prefix(int start) {
1593         return prefix(msg, start);
1594     }
1595 
1596     private String prefix() {
1597         return prefix(msg, 0);
1598     }
1599 
1600     private ApostropheMode aposMode;
1601     private String msg;
1602     private ArrayList<Part> parts=new ArrayList<Part>();
1603     private ArrayList<Double> numericValues;
1604     private boolean hasArgNames;
1605     private boolean hasArgNumbers;
1606     private boolean needsAutoQuoting;
1607     private volatile boolean frozen;
1608 
1609     private static final ApostropheMode defaultAposMode=
1610         ApostropheMode.valueOf(
1611             ICUConfig.get("com.ibm.icu.text.MessagePattern.ApostropheMode", "DOUBLE_OPTIONAL"));
1612 
1613     private static final ArgType[] argTypes=ArgType.values();
1614 }
1615