1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 /*
4 *******************************************************************************
5 *   Copyright (C) 2010-2016, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 *******************************************************************************
8 *   created on: 2010aug21
9 *   created by: Markus W. Scherer
10 */
11 
12 package com.ibm.icu.text;
13 
14 import java.util.ArrayList;
15 import java.util.Locale;
16 
17 import com.ibm.icu.impl.ICUConfig;
18 import com.ibm.icu.impl.PatternProps;
19 import com.ibm.icu.util.Freezable;
20 import com.ibm.icu.util.ICUCloneNotSupportedException;
21 
22 //Note: Minimize ICU dependencies, only use a very small part of the ICU core.
23 //In particular, do not depend on *Format classes.
24 
25 /**
26  * Parses and represents ICU MessageFormat patterns.
27  * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat.
28  * Used in the implementations of those classes as well as in tools
29  * for message validation, translation and format conversion.
30  * <p>
31  * The parser handles all syntax relevant for identifying message arguments.
32  * This includes "complex" arguments whose style strings contain
33  * nested MessageFormat pattern substrings.
34  * For "simple" arguments (with no nested MessageFormat pattern substrings),
35  * the argument style is not parsed any further.
36  * <p>
37  * The parser handles named and numbered message arguments and allows both in one message.
38  * <p>
39  * Once a pattern has been parsed successfully, iterate through the parsed data
40  * with countParts(), getPart() and related methods.
41  * <p>
42  * The data logically represents a parse tree, but is stored and accessed
43  * as a list of "parts" for fast and simple parsing and to minimize object allocations.
44  * Arguments and nested messages are best handled via recursion.
45  * For every _START "part", {@link #getLimitPartIndex(int)} efficiently returns
46  * the index of the corresponding _LIMIT "part".
47  * <p>
48  * List of "parts":
49  * <pre>
50  * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT
51  * argument = noneArg | simpleArg | complexArg
52  * complexArg = choiceArg | pluralArg | selectArg
53  *
54  * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE
55  * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE
56  * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE
57  * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL
58  * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT
59  *
60  * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+
61  * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+
62  * selectStyle = (ARG_SELECTOR message)+
63  * </pre>
64  * <ul>
65  *   <li>Literal output text is not represented directly by "parts" but accessed
66  *       between parts of a message, from one part's getLimit() to the next part's getIndex().
67  *   <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE.
68  *   <li>In the choiceStyle, the ARG_SELECTOR has the '&lt;', the '#' or
69  *       the less-than-or-equal-to sign (U+2264).
70  *   <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value.
71  *       The optional numeric Part between each (ARG_SELECTOR, message) pair
72  *       is the value of an explicit-number selector like "=2",
73  *       otherwise the selector is a non-numeric identifier.
74  *   <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle.
75  * </ul>
76  * <p>
77  * This class is not intended for public subclassing.
78  *
79  * @stable ICU 4.8
80  * @author Markus Scherer
81  */
82 public final class MessagePattern implements Cloneable, Freezable<MessagePattern> {
83     /**
84      * Mode for when an apostrophe starts quoted literal text for MessageFormat output.
85      * The default is DOUBLE_OPTIONAL unless overridden via ICUConfig
86      * (/com/ibm/icu/ICUConfig.properties).
87      * <p>
88      * A pair of adjacent apostrophes always results in a single apostrophe in the output,
89      * even when the pair is between two single, text-quoting apostrophes.
90      * <p>
91      * The following table shows examples of desired MessageFormat.format() output
92      * with the pattern strings that yield that output.
93      *
94      * <table>
95      *   <tr>
96      *     <th>Desired output</th>
97      *     <th>DOUBLE_OPTIONAL</th>
98      *     <th>DOUBLE_REQUIRED</th>
99      *   </tr>
100      *   <tr>
101      *     <td>I see {many}</td>
102      *     <td>I see '{many}'</td>
103      *     <td>(same)</td>
104      *   </tr>
105      *   <tr>
106      *     <td>I said {'Wow!'}</td>
107      *     <td>I said '{''Wow!''}'</td>
108      *     <td>(same)</td>
109      *   </tr>
110      *   <tr>
111      *     <td>I don't know</td>
112      *     <td>I don't know OR<br> I don''t know</td>
113      *     <td>I don''t know</td>
114      *   </tr>
115      * </table>
116      * @stable ICU 4.8
117      */
118     public enum ApostropheMode {
119         /**
120          * A literal apostrophe is represented by
121          * either a single or a double apostrophe pattern character.
122          * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text
123          * if it immediately precedes a curly brace {},
124          * or a pipe symbol | if inside a choice format,
125          * or a pound symbol # if inside a plural format.
126          * <p>
127          * This is the default behavior starting with ICU 4.8.
128          * @stable ICU 4.8
129          */
130         DOUBLE_OPTIONAL,
131         /**
132          * A literal apostrophe must be represented by
133          * a double apostrophe pattern character.
134          * A single apostrophe always starts quoted literal text.
135          * <p>
136          * This is the behavior of ICU 4.6 and earlier, and of {@link java.text.MessageFormat}.
137          * @stable ICU 4.8
138          */
139         DOUBLE_REQUIRED
140     }
141 
142     /**
143      * Constructs an empty MessagePattern with default ApostropheMode.
144      * @stable ICU 4.8
145      */
MessagePattern()146     public MessagePattern() {
147         aposMode=defaultAposMode;
148     }
149 
150     /**
151      * Constructs an empty MessagePattern.
152      * @param mode Explicit ApostropheMode.
153      * @stable ICU 4.8
154      */
MessagePattern(ApostropheMode mode)155     public MessagePattern(ApostropheMode mode) {
156         aposMode=mode;
157     }
158 
159     /**
160      * Constructs a MessagePattern with default ApostropheMode and
161      * parses the MessageFormat pattern string.
162      * @param pattern a MessageFormat pattern string
163      * @throws IllegalArgumentException for syntax errors in the pattern string
164      * @throws IndexOutOfBoundsException if certain limits are exceeded
165      *         (e.g., argument number too high, argument name too long, etc.)
166      * @throws NumberFormatException if a number could not be parsed
167      * @stable ICU 4.8
168      */
MessagePattern(String pattern)169     public MessagePattern(String pattern) {
170         aposMode=defaultAposMode;
171         parse(pattern);
172     }
173 
174     /**
175      * Parses a MessageFormat pattern string.
176      * @param pattern a MessageFormat pattern string
177      * @return this
178      * @throws IllegalArgumentException for syntax errors in the pattern string
179      * @throws IndexOutOfBoundsException if certain limits are exceeded
180      *         (e.g., argument number too high, argument name too long, etc.)
181      * @throws NumberFormatException if a number could not be parsed
182      * @stable ICU 4.8
183      */
parse(String pattern)184     public MessagePattern parse(String pattern) {
185         preParse(pattern);
186         parseMessage(0, 0, 0, ArgType.NONE);
187         postParse();
188         return this;
189     }
190 
191     /**
192      * Parses a ChoiceFormat pattern string.
193      * @param pattern a ChoiceFormat pattern string
194      * @return this
195      * @throws IllegalArgumentException for syntax errors in the pattern string
196      * @throws IndexOutOfBoundsException if certain limits are exceeded
197      *         (e.g., argument number too high, argument name too long, etc.)
198      * @throws NumberFormatException if a number could not be parsed
199      * @stable ICU 4.8
200      */
parseChoiceStyle(String pattern)201     public MessagePattern parseChoiceStyle(String pattern) {
202         preParse(pattern);
203         parseChoiceStyle(0, 0);
204         postParse();
205         return this;
206     }
207 
208     /**
209      * Parses a PluralFormat pattern string.
210      * @param pattern a PluralFormat pattern string
211      * @return this
212      * @throws IllegalArgumentException for syntax errors in the pattern string
213      * @throws IndexOutOfBoundsException if certain limits are exceeded
214      *         (e.g., argument number too high, argument name too long, etc.)
215      * @throws NumberFormatException if a number could not be parsed
216      * @stable ICU 4.8
217      */
parsePluralStyle(String pattern)218     public MessagePattern parsePluralStyle(String pattern) {
219         preParse(pattern);
220         parsePluralOrSelectStyle(ArgType.PLURAL, 0, 0);
221         postParse();
222         return this;
223     }
224 
225     /**
226      * Parses a SelectFormat pattern string.
227      * @param pattern a SelectFormat pattern string
228      * @return this
229      * @throws IllegalArgumentException for syntax errors in the pattern string
230      * @throws IndexOutOfBoundsException if certain limits are exceeded
231      *         (e.g., argument number too high, argument name too long, etc.)
232      * @throws NumberFormatException if a number could not be parsed
233      * @stable ICU 4.8
234      */
parseSelectStyle(String pattern)235     public MessagePattern parseSelectStyle(String pattern) {
236         preParse(pattern);
237         parsePluralOrSelectStyle(ArgType.SELECT, 0, 0);
238         postParse();
239         return this;
240     }
241 
242     /**
243      * Clears this MessagePattern.
244      * countParts() will return 0.
245      * @stable ICU 4.8
246      */
clear()247     public void clear() {
248         // Mostly the same as preParse().
249         if(isFrozen()) {
250             throw new UnsupportedOperationException(
251                 "Attempt to clear() a frozen MessagePattern instance.");
252         }
253         msg=null;
254         hasArgNames=hasArgNumbers=false;
255         needsAutoQuoting=false;
256         parts.clear();
257         if(numericValues!=null) {
258             numericValues.clear();
259         }
260     }
261 
262     /**
263      * Clears this MessagePattern and sets the ApostropheMode.
264      * countParts() will return 0.
265      * @param mode The new ApostropheMode.
266      * @stable ICU 4.8
267      */
clearPatternAndSetApostropheMode(ApostropheMode mode)268     public void clearPatternAndSetApostropheMode(ApostropheMode mode) {
269         clear();
270         aposMode=mode;
271     }
272 
273     /**
274      * @param other another object to compare with.
275      * @return true if this object is equivalent to the other one.
276      * @stable ICU 4.8
277      */
278     @Override
equals(Object other)279     public boolean equals(Object other) {
280         if(this==other) {
281             return true;
282         }
283         if(other==null || getClass()!=other.getClass()) {
284             return false;
285         }
286         MessagePattern o=(MessagePattern)other;
287         return
288             aposMode.equals(o.aposMode) &&
289             (msg==null ? o.msg==null : msg.equals(o.msg)) &&
290             parts.equals(o.parts);
291         // No need to compare numericValues if msg and parts are the same.
292     }
293 
294     /**
295      * {@inheritDoc}
296      * @stable ICU 4.8
297      */
298     @Override
hashCode()299     public int hashCode() {
300         return (aposMode.hashCode()*37+(msg!=null ? msg.hashCode() : 0))*37+parts.hashCode();
301     }
302 
303     /**
304      * @return this instance's ApostropheMode.
305      * @stable ICU 4.8
306      */
getApostropheMode()307     public ApostropheMode getApostropheMode() {
308         return aposMode;
309     }
310 
311     /**
312      * @return true if getApostropheMode() == ApostropheMode.DOUBLE_REQUIRED
313      * @internal
314      */
jdkAposMode()315     /* package */ boolean jdkAposMode() {
316         return aposMode == ApostropheMode.DOUBLE_REQUIRED;
317     }
318 
319     /**
320      * @return the parsed pattern string (null if none was parsed).
321      * @stable ICU 4.8
322      */
getPatternString()323     public String getPatternString() {
324         return msg;
325     }
326 
327     /**
328      * Does the parsed pattern have named arguments like {first_name}?
329      * @return true if the parsed pattern has at least one named argument.
330      * @stable ICU 4.8
331      */
hasNamedArguments()332     public boolean hasNamedArguments() {
333         return hasArgNames;
334     }
335 
336     /**
337      * Does the parsed pattern have numbered arguments like {2}?
338      * @return true if the parsed pattern has at least one numbered argument.
339      * @stable ICU 4.8
340      */
hasNumberedArguments()341     public boolean hasNumberedArguments() {
342         return hasArgNumbers;
343     }
344 
345     /**
346      * {@inheritDoc}
347      * @stable ICU 4.8
348      */
349     @Override
toString()350     public String toString() {
351         return msg;
352     }
353 
354     /**
355      * Validates and parses an argument name or argument number string.
356      * An argument name must be a "pattern identifier", that is, it must contain
357      * no Unicode Pattern_Syntax or Pattern_White_Space characters.
358      * If it only contains ASCII digits, then it must be a small integer with no leading zero.
359      * @param name Input string.
360      * @return &gt;=0 if the name is a valid number,
361      *         ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
362      *         ARG_NAME_NOT_VALID (-2) if it is neither.
363      * @stable ICU 4.8
364      */
validateArgumentName(String name)365     public static int validateArgumentName(String name) {
366         if(!PatternProps.isIdentifier(name)) {
367             return ARG_NAME_NOT_VALID;
368         }
369         return parseArgNumber(name, 0, name.length());
370     }
371 
372     /**
373      * Return value from {@link #validateArgumentName(String)} for when
374      * the string is a valid "pattern identifier" but not a number.
375      * @stable ICU 4.8
376      */
377     public static final int ARG_NAME_NOT_NUMBER=-1;
378 
379     /**
380      * Return value from {@link #validateArgumentName(String)} for when
381      * the string is invalid.
382      * It might not be a valid "pattern identifier",
383      * or it have only ASCII digits but there is a leading zero or the number is too large.
384      * @stable ICU 4.8
385      */
386     public static final int ARG_NAME_NOT_VALID=-2;
387 
388     /**
389      * Returns a version of the parsed pattern string where each ASCII apostrophe
390      * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax.
391      * <p>
392      * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}."
393      * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}."
394      * @return the deep-auto-quoted version of the parsed pattern string.
395      * @see MessageFormat#autoQuoteApostrophe(String)
396      * @stable ICU 4.8
397      */
autoQuoteApostropheDeep()398     public String autoQuoteApostropheDeep() {
399         if(!needsAutoQuoting) {
400             return msg;
401         }
402         StringBuilder modified=null;
403         // Iterate backward so that the insertion indexes do not change.
404         int count=countParts();
405         for(int i=count; i>0;) {
406             Part part;
407             if((part=getPart(--i)).getType()==Part.Type.INSERT_CHAR) {
408                 if(modified==null) {
409                     modified=new StringBuilder(msg.length()+10).append(msg);
410                 }
411                 modified.insert(part.index, (char)part.value);
412             }
413         }
414         if(modified==null) {
415             return msg;
416         } else {
417             return modified.toString();
418         }
419     }
420 
421     /**
422      * Returns the number of "parts" created by parsing the pattern string.
423      * Returns 0 if no pattern has been parsed or clear() was called.
424      * @return the number of pattern parts.
425      * @stable ICU 4.8
426      */
countParts()427     public int countParts() {
428         return parts.size();
429     }
430 
431     /**
432      * Gets the i-th pattern "part".
433      * @param i The index of the Part data. (0..countParts()-1)
434      * @return the i-th pattern "part".
435      * @throws IndexOutOfBoundsException if i is outside the (0..countParts()-1) range
436      * @stable ICU 4.8
437      */
getPart(int i)438     public Part getPart(int i) {
439         return parts.get(i);
440     }
441 
442     /**
443      * Returns the Part.Type of the i-th pattern "part".
444      * Convenience method for getPart(i).getType().
445      * @param i The index of the Part data. (0..countParts()-1)
446      * @return The Part.Type of the i-th Part.
447      * @throws IndexOutOfBoundsException if i is outside the (0..countParts()-1) range
448      * @stable ICU 4.8
449      */
getPartType(int i)450     public Part.Type getPartType(int i) {
451         return parts.get(i).type;
452     }
453 
454     /**
455      * Returns the pattern index of the specified pattern "part".
456      * Convenience method for getPart(partIndex).getIndex().
457      * @param partIndex The index of the Part data. (0..countParts()-1)
458      * @return The pattern index of this Part.
459      * @throws IndexOutOfBoundsException if partIndex is outside the (0..countParts()-1) range
460      * @stable ICU 4.8
461      */
getPatternIndex(int partIndex)462     public int getPatternIndex(int partIndex) {
463         return parts.get(partIndex).index;
464     }
465 
466     /**
467      * Returns the substring of the pattern string indicated by the Part.
468      * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()).
469      * @param part a part of this MessagePattern.
470      * @return the substring associated with part.
471      * @stable ICU 4.8
472      */
getSubstring(Part part)473     public String getSubstring(Part part) {
474         int index=part.index;
475         return msg.substring(index, index+part.length);
476     }
477 
478     /**
479      * Compares the part's substring with the input string s.
480      * @param part a part of this MessagePattern.
481      * @param s a string.
482      * @return true if getSubstring(part).equals(s).
483      * @stable ICU 4.8
484      */
partSubstringMatches(Part part, String s)485     public boolean partSubstringMatches(Part part, String s) {
486         return part.length == s.length() && msg.regionMatches(part.index, s, 0, part.length);
487     }
488 
489     /**
490      * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE.
491      * @param part a part of this MessagePattern.
492      * @return the part's numeric value, or NO_NUMERIC_VALUE if this is not a numeric part.
493      * @stable ICU 4.8
494      */
getNumericValue(Part part)495     public double getNumericValue(Part part) {
496         Part.Type type=part.type;
497         if(type==Part.Type.ARG_INT) {
498             return part.value;
499         } else if(type==Part.Type.ARG_DOUBLE) {
500             return numericValues.get(part.value);
501         } else {
502             return NO_NUMERIC_VALUE;
503         }
504     }
505 
506     /**
507      * Special value that is returned by getNumericValue(Part) when no
508      * numeric value is defined for a part.
509      * @see #getNumericValue
510      * @stable ICU 4.8
511      */
512     public static final double NO_NUMERIC_VALUE=-123456789;
513 
514     /**
515      * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
516      * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
517      * @return the "offset:" value.
518      * @throws IndexOutOfBoundsException if pluralStart is outside the (0..countParts()-1) range
519      * @stable ICU 4.8
520      */
getPluralOffset(int pluralStart)521     public double getPluralOffset(int pluralStart) {
522         Part part=parts.get(pluralStart);
523         if(part.type.hasNumericValue()) {
524             return getNumericValue(part);
525         } else {
526             return 0;
527         }
528     }
529 
530     /**
531      * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start.
532      * @param start The index of some Part data (0..countParts()-1);
533      *        this Part should be of Type ARG_START or MSG_START.
534      * @return The first i&gt;start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level,
535      *         or start itself if getPartType(msgStart)!=ARG|MSG_START.
536      * @throws IndexOutOfBoundsException if start is outside the (0..countParts()-1) range
537      * @stable ICU 4.8
538      */
getLimitPartIndex(int start)539     public int getLimitPartIndex(int start) {
540         int limit=parts.get(start).limitPartIndex;
541         if(limit<start) {
542             return start;
543         }
544         return limit;
545     }
546 
547     /**
548      * A message pattern "part", representing a pattern parsing event.
549      * There is a part for the start and end of a message or argument,
550      * for quoting and escaping of and with ASCII apostrophes,
551      * and for syntax elements of "complex" arguments.
552      * @stable ICU 4.8
553      */
554     public static final class Part {
Part(Type t, int i, int l, int v)555         private Part(Type t, int i, int l, int v) {
556             type=t;
557             index=i;
558             length=(char)l;
559             value=(short)v;
560         }
561 
562         /**
563          * Returns the type of this part.
564          * @return the part type.
565          * @stable ICU 4.8
566          */
getType()567         public Type getType() {
568             return type;
569         }
570 
571         /**
572          * Returns the pattern string index associated with this Part.
573          * @return this part's pattern string index.
574          * @stable ICU 4.8
575          */
getIndex()576         public int getIndex() {
577             return index;
578         }
579 
580         /**
581          * Returns the length of the pattern substring associated with this Part.
582          * This is 0 for some parts.
583          * @return this part's pattern substring length.
584          * @stable ICU 4.8
585          */
getLength()586         public int getLength() {
587             return length;
588         }
589 
590         /**
591          * Returns the pattern string limit (exclusive-end) index associated with this Part.
592          * Convenience method for getIndex()+getLength().
593          * @return this part's pattern string limit index, same as getIndex()+getLength().
594          * @stable ICU 4.8
595          */
getLimit()596         public int getLimit() {
597             return index+length;
598         }
599 
600         /**
601          * Returns a value associated with this part.
602          * See the documentation of each part type for details.
603          * @return the part value.
604          * @stable ICU 4.8
605          */
getValue()606         public int getValue() {
607             return value;
608         }
609 
610         /**
611          * Returns the argument type if this part is of type ARG_START or ARG_LIMIT,
612          * otherwise ArgType.NONE.
613          * @return the argument type for this part.
614          * @stable ICU 4.8
615          */
getArgType()616         public ArgType getArgType() {
617             Type type=getType();
618             if(type==Type.ARG_START || type==Type.ARG_LIMIT) {
619                 return argTypes[value];
620             } else {
621                 return ArgType.NONE;
622             }
623         }
624 
625         /**
626          * Part type constants.
627          * @stable ICU 4.8
628          */
629         public enum Type {
630             /**
631              * Start of a message pattern (main or nested).
632              * The length is 0 for the top-level message
633              * and for a choice argument sub-message, otherwise 1 for the '{'.
634              * The value indicates the nesting level, starting with 0 for the main message.
635              * <p>
636              * There is always a later MSG_LIMIT part.
637              * @stable ICU 4.8
638              */
639             MSG_START,
640             /**
641              * End of a message pattern (main or nested).
642              * The length is 0 for the top-level message and
643              * the last sub-message of a choice argument,
644              * otherwise 1 for the '}' or (in a choice argument style) the '|'.
645              * The value indicates the nesting level, starting with 0 for the main message.
646              * @stable ICU 4.8
647              */
648             MSG_LIMIT,
649             /**
650              * Indicates a substring of the pattern string which is to be skipped when formatting.
651              * For example, an apostrophe that begins or ends quoted text
652              * would be indicated with such a part.
653              * The value is undefined and currently always 0.
654              * @stable ICU 4.8
655              */
656             SKIP_SYNTAX,
657             /**
658              * Indicates that a syntax character needs to be inserted for auto-quoting.
659              * The length is 0.
660              * The value is the character code of the insertion character. (U+0027=APOSTROPHE)
661              * @stable ICU 4.8
662              */
663             INSERT_CHAR,
664             /**
665              * Indicates a syntactic (non-escaped) # symbol in a plural variant.
666              * When formatting, replace this part's substring with the
667              * (value-offset) for the plural argument value.
668              * The value is undefined and currently always 0.
669              * @stable ICU 4.8
670              */
671             REPLACE_NUMBER,
672             /**
673              * Start of an argument.
674              * The length is 1 for the '{'.
675              * The value is the ordinal value of the ArgType. Use getArgType().
676              * <p>
677              * This part is followed by either an ARG_NUMBER or ARG_NAME,
678              * followed by optional argument sub-parts (see ArgType constants)
679              * and finally an ARG_LIMIT part.
680              * @stable ICU 4.8
681              */
682             ARG_START,
683             /**
684              * End of an argument.
685              * The length is 1 for the '}'.
686              * The value is the ordinal value of the ArgType. Use getArgType().
687              * @stable ICU 4.8
688              */
689             ARG_LIMIT,
690             /**
691              * The argument number, provided by the value.
692              * @stable ICU 4.8
693              */
694             ARG_NUMBER,
695             /**
696              * The argument name.
697              * The value is undefined and currently always 0.
698              * @stable ICU 4.8
699              */
700             ARG_NAME,
701             /**
702              * The argument type.
703              * The value is undefined and currently always 0.
704              * @stable ICU 4.8
705              */
706             ARG_TYPE,
707             /**
708              * The argument style text.
709              * The value is undefined and currently always 0.
710              * @stable ICU 4.8
711              */
712             ARG_STYLE,
713             /**
714              * A selector substring in a "complex" argument style.
715              * The value is undefined and currently always 0.
716              * @stable ICU 4.8
717              */
718             ARG_SELECTOR,
719             /**
720              * An integer value, for example the offset or an explicit selector value
721              * in a PluralFormat style.
722              * The part value is the integer value.
723              * @stable ICU 4.8
724              */
725             ARG_INT,
726             /**
727              * A numeric value, for example the offset or an explicit selector value
728              * in a PluralFormat style.
729              * The part value is an index into an internal array of numeric values;
730              * use getNumericValue().
731              * @stable ICU 4.8
732              */
733             ARG_DOUBLE;
734 
735             /**
736              * Indicates whether this part has a numeric value.
737              * If so, then that numeric value can be retrieved via {@link MessagePattern#getNumericValue(Part)}.
738              * @return true if this part has a numeric value.
739              * @stable ICU 4.8
740              */
hasNumericValue()741             public boolean hasNumericValue() {
742                 return this==ARG_INT || this==ARG_DOUBLE;
743             }
744         }
745 
746         /**
747          * @return a string representation of this part.
748          * @stable ICU 4.8
749          */
750         @Override
toString()751         public String toString() {
752             String valueString=(type==Type.ARG_START || type==Type.ARG_LIMIT) ?
753                 getArgType().name() : Integer.toString(value);
754             return type.name()+"("+valueString+")@"+index;
755         }
756 
757         /**
758          * @param other another object to compare with.
759          * @return true if this object is equivalent to the other one.
760          * @stable ICU 4.8
761          */
762         @Override
equals(Object other)763         public boolean equals(Object other) {
764             if(this==other) {
765                 return true;
766             }
767             if(other==null || getClass()!=other.getClass()) {
768                 return false;
769             }
770             Part o=(Part)other;
771             return
772                 type.equals(o.type) &&
773                 index==o.index &&
774                 length==o.length &&
775                 value==o.value &&
776                 limitPartIndex==o.limitPartIndex;
777         }
778 
779         /**
780          * {@inheritDoc}
781          * @stable ICU 4.8
782          */
783         @Override
hashCode()784         public int hashCode() {
785             return ((type.hashCode()*37+index)*37+length)*37+value;
786         }
787 
788         private static final int MAX_LENGTH=0xffff;
789         private static final int MAX_VALUE=Short.MAX_VALUE;
790 
791         // Some fields are not final because they are modified during pattern parsing.
792         // After pattern parsing, the parts are effectively immutable.
793         private final Type type;
794         private final int index;
795         private final char length;
796         private short value;
797         private int limitPartIndex;
798     }
799 
800     /**
801      * Argument type constants.
802      * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts.
803      *
804      * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT,
805      * with a nesting level one greater than the surrounding message.
806      * @stable ICU 4.8
807      */
808     public enum ArgType {
809         /**
810          * The argument has no specified type.
811          * @stable ICU 4.8
812          */
813         NONE,
814         /**
815          * The argument has a "simple" type which is provided by the ARG_TYPE part.
816          * An ARG_STYLE part might follow that.
817          * @stable ICU 4.8
818          */
819         SIMPLE,
820         /**
821          * The argument is a ChoiceFormat with one or more
822          * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples.
823          * @stable ICU 4.8
824          */
825         CHOICE,
826         /**
827          * The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset
828          * (e.g., offset:1)
829          * and one or more (ARG_SELECTOR [explicit-value] message) tuples.
830          * If the selector has an explicit value (e.g., =2), then
831          * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message.
832          * Otherwise the message immediately follows the ARG_SELECTOR.
833          * @stable ICU 4.8
834          */
835         PLURAL,
836         /**
837          * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
838          * @stable ICU 4.8
839          */
840         SELECT,
841         /**
842          * The argument is an ordinal-number PluralFormat
843          * with the same style parts sequence and semantics as {@link ArgType#PLURAL}.
844          * @stable ICU 50
845          */
846         SELECTORDINAL;
847 
848         /**
849          * @return true if the argument type has a plural style part sequence and semantics,
850          * for example {@link ArgType#PLURAL} and {@link ArgType#SELECTORDINAL}.
851          * @stable ICU 50
852          */
hasPluralStyle()853         public boolean hasPluralStyle() {
854             return this == PLURAL || this == SELECTORDINAL;
855         }
856     }
857 
858     /**
859      * Creates and returns a copy of this object.
860      * @return a copy of this object (or itself if frozen).
861      * @stable ICU 4.8
862      */
863     @Override
clone()864     public Object clone() {
865         if(isFrozen()) {
866             return this;
867         } else {
868             return cloneAsThawed();
869         }
870     }
871 
872     /**
873      * Creates and returns an unfrozen copy of this object.
874      * @return a copy of this object.
875      * @stable ICU 4.8
876      */
877     @Override
878     @SuppressWarnings("unchecked")
cloneAsThawed()879     public MessagePattern cloneAsThawed() {
880         MessagePattern newMsg;
881         try {
882             newMsg=(MessagePattern)super.clone();
883         } catch (CloneNotSupportedException e) {
884             throw new ICUCloneNotSupportedException(e);
885         }
886         newMsg.parts=(ArrayList<Part>)parts.clone();
887         if(numericValues!=null) {
888             newMsg.numericValues=(ArrayList<Double>)numericValues.clone();
889         }
890         newMsg.frozen=false;
891         return newMsg;
892     }
893 
894     /**
895      * Freezes this object, making it immutable and thread-safe.
896      * @return this
897      * @stable ICU 4.8
898      */
899     @Override
freeze()900     public MessagePattern freeze() {
901         frozen=true;
902         return this;
903     }
904 
905     /**
906      * Determines whether this object is frozen (immutable) or not.
907      * @return true if this object is frozen.
908      * @stable ICU 4.8
909      */
910     @Override
isFrozen()911     public boolean isFrozen() {
912         return frozen;
913     }
914 
preParse(String pattern)915     private void preParse(String pattern) {
916         if(isFrozen()) {
917             throw new UnsupportedOperationException(
918                 "Attempt to parse("+prefix(pattern)+") on frozen MessagePattern instance.");
919         }
920         msg=pattern;
921         hasArgNames=hasArgNumbers=false;
922         needsAutoQuoting=false;
923         parts.clear();
924         if(numericValues!=null) {
925             numericValues.clear();
926         }
927     }
928 
postParse()929     private void postParse() {
930         // Nothing to be done currently.
931     }
932 
parseMessage(int index, int msgStartLength, int nestingLevel, ArgType parentType)933     private int parseMessage(int index, int msgStartLength, int nestingLevel, ArgType parentType) {
934         if(nestingLevel>Part.MAX_VALUE) {
935             throw new IndexOutOfBoundsException();
936         }
937         int msgStart=parts.size();
938         addPart(Part.Type.MSG_START, index, msgStartLength, nestingLevel);
939         index+=msgStartLength;
940         while(index<msg.length()) {
941             char c=msg.charAt(index++);
942             if(c=='\'') {
943                 if(index==msg.length()) {
944                     // The apostrophe is the last character in the pattern.
945                     // Add a Part for auto-quoting.
946                     addPart(Part.Type.INSERT_CHAR, index, 0, '\'');  // value=char to be inserted
947                     needsAutoQuoting=true;
948                 } else {
949                     c=msg.charAt(index);
950                     if(c=='\'') {
951                         // double apostrophe, skip the second one
952                         addPart(Part.Type.SKIP_SYNTAX, index++, 1, 0);
953                     } else if(
954                         aposMode==ApostropheMode.DOUBLE_REQUIRED ||
955                         c=='{' || c=='}' ||
956                         (parentType==ArgType.CHOICE && c=='|') ||
957                         (parentType.hasPluralStyle() && c=='#')
958                     ) {
959                         // skip the quote-starting apostrophe
960                         addPart(Part.Type.SKIP_SYNTAX, index-1, 1, 0);
961                         // find the end of the quoted literal text
962                         for(;;) {
963                             index=msg.indexOf('\'', index+1);
964                             if(index>=0) {
965                                 if((index+1)<msg.length() && msg.charAt(index+1)=='\'') {
966                                     // double apostrophe inside quoted literal text
967                                     // still encodes a single apostrophe, skip the second one
968                                     addPart(Part.Type.SKIP_SYNTAX, ++index, 1, 0);
969                                 } else {
970                                     // skip the quote-ending apostrophe
971                                     addPart(Part.Type.SKIP_SYNTAX, index++, 1, 0);
972                                     break;
973                                 }
974                             } else {
975                                 // The quoted text reaches to the end of the of the message.
976                                 index=msg.length();
977                                 // Add a Part for auto-quoting.
978                                 addPart(Part.Type.INSERT_CHAR, index, 0, '\'');  // value=char to be inserted
979                                 needsAutoQuoting=true;
980                                 break;
981                             }
982                         }
983                     } else {
984                         // Interpret the apostrophe as literal text.
985                         // Add a Part for auto-quoting.
986                         addPart(Part.Type.INSERT_CHAR, index, 0, '\'');  // value=char to be inserted
987                         needsAutoQuoting=true;
988                     }
989                 }
990             } else if(parentType.hasPluralStyle() && c=='#') {
991                 // The unquoted # in a plural message fragment will be replaced
992                 // with the (number-offset).
993                 addPart(Part.Type.REPLACE_NUMBER, index-1, 1, 0);
994             } else if(c=='{') {
995                 index=parseArg(index-1, 1, nestingLevel);
996             } else if((nestingLevel>0 && c=='}') || (parentType==ArgType.CHOICE && c=='|')) {
997                 // Finish the message before the terminator.
998                 // In a choice style, report the "}" substring only for the following ARG_LIMIT,
999                 // not for this MSG_LIMIT.
1000                 int limitLength=(parentType==ArgType.CHOICE && c=='}') ? 0 : 1;
1001                 addLimitPart(msgStart, Part.Type.MSG_LIMIT, index-1, limitLength, nestingLevel);
1002                 if(parentType==ArgType.CHOICE) {
1003                     // Let the choice style parser see the '}' or '|'.
1004                     return index-1;
1005                 } else {
1006                     // continue parsing after the '}'
1007                     return index;
1008                 }
1009             }  // else: c is part of literal text
1010         }
1011         if(nestingLevel>0 && !inTopLevelChoiceMessage(nestingLevel, parentType)) {
1012             throw new IllegalArgumentException(
1013                 "Unmatched '{' braces in message "+prefix());
1014         }
1015         addLimitPart(msgStart, Part.Type.MSG_LIMIT, index, 0, nestingLevel);
1016         return index;
1017     }
1018 
parseArg(int index, int argStartLength, int nestingLevel)1019     private int parseArg(int index, int argStartLength, int nestingLevel) {
1020         int argStart=parts.size();
1021         ArgType argType=ArgType.NONE;
1022         addPart(Part.Type.ARG_START, index, argStartLength, argType.ordinal());
1023         int nameIndex=index=skipWhiteSpace(index+argStartLength);
1024         if(index==msg.length()) {
1025             throw new IllegalArgumentException(
1026                 "Unmatched '{' braces in message "+prefix());
1027         }
1028         // parse argument name or number
1029         index=skipIdentifier(index);
1030         int number=parseArgNumber(nameIndex, index);
1031         if(number>=0) {
1032             int length=index-nameIndex;
1033             if(length>Part.MAX_LENGTH || number>Part.MAX_VALUE) {
1034                 throw new IndexOutOfBoundsException(
1035                     "Argument number too large: "+prefix(nameIndex));
1036             }
1037             hasArgNumbers=true;
1038             addPart(Part.Type.ARG_NUMBER, nameIndex, length, number);
1039         } else if(number==ARG_NAME_NOT_NUMBER) {
1040             int length=index-nameIndex;
1041             if(length>Part.MAX_LENGTH) {
1042                 throw new IndexOutOfBoundsException(
1043                     "Argument name too long: "+prefix(nameIndex));
1044             }
1045             hasArgNames=true;
1046             addPart(Part.Type.ARG_NAME, nameIndex, length, 0);
1047         } else {  // number<-1 (ARG_NAME_NOT_VALID)
1048             throw new IllegalArgumentException("Bad argument syntax: "+prefix(nameIndex));
1049         }
1050         index=skipWhiteSpace(index);
1051         if(index==msg.length()) {
1052             throw new IllegalArgumentException(
1053                 "Unmatched '{' braces in message "+prefix());
1054         }
1055         char c=msg.charAt(index);
1056         if(c=='}') {
1057             // all done
1058         } else if(c!=',') {
1059             throw new IllegalArgumentException("Bad argument syntax: "+prefix(nameIndex));
1060         } else /* ',' */ {
1061             // parse argument type: case-sensitive a-zA-Z
1062             int typeIndex=index=skipWhiteSpace(index+1);
1063             while(index<msg.length() && isArgTypeChar(msg.charAt(index))) {
1064                 ++index;
1065             }
1066             int length=index-typeIndex;
1067             index=skipWhiteSpace(index);
1068             if(index==msg.length()) {
1069                 throw new IllegalArgumentException(
1070                     "Unmatched '{' braces in message "+prefix());
1071             }
1072             if(length==0 || ((c=msg.charAt(index))!=',' && c!='}')) {
1073                 throw new IllegalArgumentException("Bad argument syntax: "+prefix(nameIndex));
1074             }
1075             if(length>Part.MAX_LENGTH) {
1076                 throw new IndexOutOfBoundsException(
1077                     "Argument type name too long: "+prefix(nameIndex));
1078             }
1079             argType=ArgType.SIMPLE;
1080             if(length==6) {
1081                 // case-insensitive comparisons for complex-type names
1082                 if(isChoice(typeIndex)) {
1083                     argType=ArgType.CHOICE;
1084                 } else if(isPlural(typeIndex)) {
1085                     argType=ArgType.PLURAL;
1086                 } else if(isSelect(typeIndex)) {
1087                     argType=ArgType.SELECT;
1088                 }
1089             } else if(length==13) {
1090                 if(isSelect(typeIndex) && isOrdinal(typeIndex+6)) {
1091                     argType=ArgType.SELECTORDINAL;
1092                 }
1093             }
1094             // change the ARG_START type from NONE to argType
1095             parts.get(argStart).value=(short)argType.ordinal();
1096             if(argType==ArgType.SIMPLE) {
1097                 addPart(Part.Type.ARG_TYPE, typeIndex, length, 0);
1098             }
1099             // look for an argument style (pattern)
1100             if(c=='}') {
1101                 if(argType!=ArgType.SIMPLE) {
1102                     throw new IllegalArgumentException(
1103                         "No style field for complex argument: "+prefix(nameIndex));
1104                 }
1105             } else /* ',' */ {
1106                 ++index;
1107                 if(argType==ArgType.SIMPLE) {
1108                     index=parseSimpleStyle(index);
1109                 } else if(argType==ArgType.CHOICE) {
1110                     index=parseChoiceStyle(index, nestingLevel);
1111                 } else {
1112                     index=parsePluralOrSelectStyle(argType, index, nestingLevel);
1113                 }
1114             }
1115         }
1116         // Argument parsing stopped on the '}'.
1117         addLimitPart(argStart, Part.Type.ARG_LIMIT, index, 1, argType.ordinal());
1118         return index+1;
1119     }
1120 
parseSimpleStyle(int index)1121     private int parseSimpleStyle(int index) {
1122         int start=index;
1123         int nestedBraces=0;
1124         while(index<msg.length()) {
1125             char c=msg.charAt(index++);
1126             if(c=='\'') {
1127                 // Treat apostrophe as quoting but include it in the style part.
1128                 // Find the end of the quoted literal text.
1129                 index=msg.indexOf('\'', index);
1130                 if(index<0) {
1131                     throw new IllegalArgumentException(
1132                         "Quoted literal argument style text reaches to the end of the message: "+
1133                         prefix(start));
1134                 }
1135                 // skip the quote-ending apostrophe
1136                 ++index;
1137             } else if(c=='{') {
1138                 ++nestedBraces;
1139             } else if(c=='}') {
1140                 if(nestedBraces>0) {
1141                     --nestedBraces;
1142                 } else {
1143                     int length=--index-start;
1144                     if(length>Part.MAX_LENGTH) {
1145                         throw new IndexOutOfBoundsException(
1146                             "Argument style text too long: "+prefix(start));
1147                     }
1148                     addPart(Part.Type.ARG_STYLE, start, length, 0);
1149                     return index;
1150                 }
1151             }  // c is part of literal text
1152         }
1153         throw new IllegalArgumentException(
1154             "Unmatched '{' braces in message "+prefix());
1155     }
1156 
parseChoiceStyle(int index, int nestingLevel)1157     private int parseChoiceStyle(int index, int nestingLevel) {
1158         int start=index;
1159         index=skipWhiteSpace(index);
1160         if(index==msg.length() || msg.charAt(index)=='}') {
1161             throw new IllegalArgumentException(
1162                 "Missing choice argument pattern in "+prefix());
1163         }
1164         for(;;) {
1165             // The choice argument style contains |-separated (number, separator, message) triples.
1166             // Parse the number.
1167             int numberIndex=index;
1168             index=skipDouble(index);
1169             int length=index-numberIndex;
1170             if(length==0) {
1171                 throw new IllegalArgumentException("Bad choice pattern syntax: "+prefix(start));
1172             }
1173             if(length>Part.MAX_LENGTH) {
1174                 throw new IndexOutOfBoundsException(
1175                     "Choice number too long: "+prefix(numberIndex));
1176             }
1177             parseDouble(numberIndex, index, true);  // adds ARG_INT or ARG_DOUBLE
1178             // Parse the separator.
1179             index=skipWhiteSpace(index);
1180             if(index==msg.length()) {
1181                 throw new IllegalArgumentException("Bad choice pattern syntax: "+prefix(start));
1182             }
1183             char c=msg.charAt(index);
1184             if(!(c=='#' || c=='<' || c=='\u2264')) {  // U+2264 is <=
1185                 throw new IllegalArgumentException(
1186                     "Expected choice separator (#<\u2264) instead of '"+c+
1187                     "' in choice pattern "+prefix(start));
1188             }
1189             addPart(Part.Type.ARG_SELECTOR, index, 1, 0);
1190             // Parse the message fragment.
1191             index=parseMessage(++index, 0, nestingLevel+1, ArgType.CHOICE);
1192             // parseMessage(..., CHOICE) returns the index of the terminator, or msg.length().
1193             if(index==msg.length()) {
1194                 return index;
1195             }
1196             if(msg.charAt(index)=='}') {
1197                 if(!inMessageFormatPattern(nestingLevel)) {
1198                     throw new IllegalArgumentException(
1199                         "Bad choice pattern syntax: "+prefix(start));
1200                 }
1201                 return index;
1202             }  // else the terminator is '|'
1203             index=skipWhiteSpace(index+1);
1204         }
1205     }
1206 
parsePluralOrSelectStyle(ArgType argType, int index, int nestingLevel)1207     private int parsePluralOrSelectStyle(ArgType argType, int index, int nestingLevel) {
1208         int start=index;
1209         boolean isEmpty=true;
1210         boolean hasOther=false;
1211         for(;;) {
1212             // First, collect the selector looking for a small set of terminators.
1213             // It would be a little faster to consider the syntax of each possible
1214             // token right here, but that makes the code too complicated.
1215             index=skipWhiteSpace(index);
1216             boolean eos=index==msg.length();
1217             if(eos || msg.charAt(index)=='}') {
1218                 if(eos==inMessageFormatPattern(nestingLevel)) {
1219                     throw new IllegalArgumentException(
1220                         "Bad "+
1221                         argType.toString().toLowerCase(Locale.ENGLISH)+
1222                         " pattern syntax: "+prefix(start));
1223                 }
1224                 if(!hasOther) {
1225                     throw new IllegalArgumentException(
1226                         "Missing 'other' keyword in "+
1227                         argType.toString().toLowerCase(Locale.ENGLISH)+
1228                         " pattern in "+prefix());
1229                 }
1230                 return index;
1231             }
1232             int selectorIndex=index;
1233             if(argType.hasPluralStyle() && msg.charAt(selectorIndex)=='=') {
1234                 // explicit-value plural selector: =double
1235                 index=skipDouble(index+1);
1236                 int length=index-selectorIndex;
1237                 if(length==1) {
1238                     throw new IllegalArgumentException(
1239                         "Bad "+
1240                         argType.toString().toLowerCase(Locale.ENGLISH)+
1241                         " pattern syntax: "+prefix(start));
1242                 }
1243                 if(length>Part.MAX_LENGTH) {
1244                     throw new IndexOutOfBoundsException(
1245                         "Argument selector too long: "+prefix(selectorIndex));
1246                 }
1247                 addPart(Part.Type.ARG_SELECTOR, selectorIndex, length, 0);
1248                 parseDouble(selectorIndex+1, index, false);  // adds ARG_INT or ARG_DOUBLE
1249             } else {
1250                 index=skipIdentifier(index);
1251                 int length=index-selectorIndex;
1252                 if(length==0) {
1253                     throw new IllegalArgumentException(
1254                         "Bad "+
1255                         argType.toString().toLowerCase(Locale.ENGLISH)+
1256                         " pattern syntax: "+prefix(start));
1257                 }
1258                 // Note: The ':' in "offset:" is just beyond the skipIdentifier() range.
1259                 if( argType.hasPluralStyle() && length==6 && index<msg.length() &&
1260                     msg.regionMatches(selectorIndex, "offset:", 0, 7)
1261                 ) {
1262                     // plural offset, not a selector
1263                     if(!isEmpty) {
1264                         throw new IllegalArgumentException(
1265                             "Plural argument 'offset:' (if present) must precede key-message pairs: "+
1266                             prefix(start));
1267                     }
1268                     // allow whitespace between offset: and its value
1269                     int valueIndex=skipWhiteSpace(index+1);  // The ':' is at index.
1270                     index=skipDouble(valueIndex);
1271                     if(index==valueIndex) {
1272                         throw new IllegalArgumentException(
1273                             "Missing value for plural 'offset:' "+prefix(start));
1274                     }
1275                     if((index-valueIndex)>Part.MAX_LENGTH) {
1276                         throw new IndexOutOfBoundsException(
1277                             "Plural offset value too long: "+prefix(valueIndex));
1278                     }
1279                     parseDouble(valueIndex, index, false);  // adds ARG_INT or ARG_DOUBLE
1280                     isEmpty=false;
1281                     continue;  // no message fragment after the offset
1282                 } else {
1283                     // normal selector word
1284                     if(length>Part.MAX_LENGTH) {
1285                         throw new IndexOutOfBoundsException(
1286                             "Argument selector too long: "+prefix(selectorIndex));
1287                     }
1288                     addPart(Part.Type.ARG_SELECTOR, selectorIndex, length, 0);
1289                     if(msg.regionMatches(selectorIndex, "other", 0, length)) {
1290                         hasOther=true;
1291                     }
1292                 }
1293             }
1294 
1295             // parse the message fragment following the selector
1296             index=skipWhiteSpace(index);
1297             if(index==msg.length() || msg.charAt(index)!='{') {
1298                 throw new IllegalArgumentException(
1299                     "No message fragment after "+
1300                     argType.toString().toLowerCase(Locale.ENGLISH)+
1301                     " selector: "+prefix(selectorIndex));
1302             }
1303             index=parseMessage(index, 1, nestingLevel+1, argType);
1304             isEmpty=false;
1305         }
1306     }
1307 
1308     /**
1309      * Validates and parses an argument name or argument number string.
1310      * This internal method assumes that the input substring is a "pattern identifier".
1311      * @return &gt;=0 if the name is a valid number,
1312      *         ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
1313      *         ARG_NAME_NOT_VALID (-2) if it is neither.
1314      * @see #validateArgumentName(String)
1315      */
parseArgNumber(CharSequence s, int start, int limit)1316     private static int parseArgNumber(CharSequence s, int start, int limit) {
1317         // If the identifier contains only ASCII digits, then it is an argument _number_
1318         // and must not have leading zeros (except "0" itself).
1319         // Otherwise it is an argument _name_.
1320         if(start>=limit) {
1321             return ARG_NAME_NOT_VALID;
1322         }
1323         int number;
1324         // Defer numeric errors until we know there are only digits.
1325         boolean badNumber;
1326         char c=s.charAt(start++);
1327         if(c=='0') {
1328             if(start==limit) {
1329                 return 0;
1330             } else {
1331                 number=0;
1332                 badNumber=true;  // leading zero
1333             }
1334         } else if('1'<=c && c<='9') {
1335             number=c-'0';
1336             badNumber=false;
1337         } else {
1338             return ARG_NAME_NOT_NUMBER;
1339         }
1340         while(start<limit) {
1341             c=s.charAt(start++);
1342             if('0'<=c && c<='9') {
1343                 if(number>=Integer.MAX_VALUE/10) {
1344                     badNumber=true;  // overflow
1345                 }
1346                 number=number*10+(c-'0');
1347             } else {
1348                 return ARG_NAME_NOT_NUMBER;
1349             }
1350         }
1351         // There are only ASCII digits.
1352         if(badNumber) {
1353             return ARG_NAME_NOT_VALID;
1354         } else {
1355             return number;
1356         }
1357     }
1358 
parseArgNumber(int start, int limit)1359     private int parseArgNumber(int start, int limit) {
1360         return parseArgNumber(msg, start, limit);
1361     }
1362 
1363     /**
1364      * Parses a number from the specified message substring.
1365      * @param start start index into the message string
1366      * @param limit limit index into the message string, must be start<limit
1367      * @param allowInfinity true if U+221E is allowed (for ChoiceFormat)
1368      */
parseDouble(int start, int limit, boolean allowInfinity)1369     private void parseDouble(int start, int limit, boolean allowInfinity) {
1370         assert start<limit;
1371         // fake loop for easy exit and single throw statement
1372         for(;;) {
1373             // fast path for small integers and infinity
1374             int value=0;
1375             int isNegative=0;  // not boolean so that we can easily add it to value
1376             int index=start;
1377             char c=msg.charAt(index++);
1378             if(c=='-') {
1379                 isNegative=1;
1380                 if(index==limit) {
1381                     break;  // no number
1382                 }
1383                 c=msg.charAt(index++);
1384             } else if(c=='+') {
1385                 if(index==limit) {
1386                     break;  // no number
1387                 }
1388                 c=msg.charAt(index++);
1389             }
1390             if(c==0x221e) {  // infinity
1391                 if(allowInfinity && index==limit) {
1392                     addArgDoublePart(
1393                         isNegative!=0 ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY,
1394                         start, limit-start);
1395                     return;
1396                 } else {
1397                     break;
1398                 }
1399             }
1400             // try to parse the number as a small integer but fall back to a double
1401             while('0'<=c && c<='9') {
1402                 value=value*10+(c-'0');
1403                 if(value>(Part.MAX_VALUE+isNegative)) {
1404                     break;  // not a small-enough integer
1405                 }
1406                 if(index==limit) {
1407                     addPart(Part.Type.ARG_INT, start, limit-start, isNegative!=0 ? -value : value);
1408                     return;
1409                 }
1410                 c=msg.charAt(index++);
1411             }
1412             // Let Double.parseDouble() throw a NumberFormatException.
1413             double numericValue=Double.parseDouble(msg.substring(start, limit));
1414             addArgDoublePart(numericValue, start, limit-start);
1415             return;
1416         }
1417         throw new NumberFormatException(
1418             "Bad syntax for numeric value: "+msg.substring(start, limit));
1419     }
1420 
1421     /**
1422      * Appends the s[start, limit[ substring to sb, but with only half of the apostrophes
1423      * according to JDK pattern behavior.
1424      * @internal
1425      */
1426     /* package */ static void appendReducedApostrophes(String s, int start, int limit,
1427                                                        StringBuilder sb) {
1428         int doubleApos=-1;
1429         for(;;) {
1430             int i=s.indexOf('\'', start);
1431             if(i<0 || i>=limit) {
1432                 sb.append(s, start, limit);
1433                 break;
1434             }
1435             if(i==doubleApos) {
1436                 // Double apostrophe at start-1 and start==i, append one.
1437                 sb.append('\'');
1438                 ++start;
1439                 doubleApos=-1;
1440             } else {
1441                 // Append text between apostrophes and skip this one.
1442                 sb.append(s, start, i);
1443                 doubleApos=start=i+1;
1444             }
1445         }
1446     }
1447 
1448     private int skipWhiteSpace(int index) {
1449         return PatternProps.skipWhiteSpace(msg, index);
1450     }
1451 
1452     private int skipIdentifier(int index) {
1453         return PatternProps.skipIdentifier(msg, index);
1454     }
1455 
1456     /**
1457      * Skips a sequence of characters that could occur in a double value.
1458      * Does not fully parse or validate the value.
1459      */
1460     private int skipDouble(int index) {
1461         while(index<msg.length()) {
1462             char c=msg.charAt(index);
1463             // U+221E: Allow the infinity symbol, for ChoiceFormat patterns.
1464             if((c<'0' && "+-.".indexOf(c)<0) || (c>'9' && c!='e' && c!='E' && c!=0x221e)) {
1465                 break;
1466             }
1467             ++index;
1468         }
1469         return index;
1470     }
1471 
1472     private static boolean isArgTypeChar(int c) {
1473         return ('a'<=c && c<='z') || ('A'<=c && c<='Z');
1474     }
1475 
1476     private boolean isChoice(int index) {
1477         char c;
1478         return
1479             ((c=msg.charAt(index++))=='c' || c=='C') &&
1480             ((c=msg.charAt(index++))=='h' || c=='H') &&
1481             ((c=msg.charAt(index++))=='o' || c=='O') &&
1482             ((c=msg.charAt(index++))=='i' || c=='I') &&
1483             ((c=msg.charAt(index++))=='c' || c=='C') &&
1484             ((c=msg.charAt(index))=='e' || c=='E');
1485     }
1486 
1487     private boolean isPlural(int index) {
1488         char c;
1489         return
1490             ((c=msg.charAt(index++))=='p' || c=='P') &&
1491             ((c=msg.charAt(index++))=='l' || c=='L') &&
1492             ((c=msg.charAt(index++))=='u' || c=='U') &&
1493             ((c=msg.charAt(index++))=='r' || c=='R') &&
1494             ((c=msg.charAt(index++))=='a' || c=='A') &&
1495             ((c=msg.charAt(index))=='l' || c=='L');
1496     }
1497 
1498     private boolean isSelect(int index) {
1499         char c;
1500         return
1501             ((c=msg.charAt(index++))=='s' || c=='S') &&
1502             ((c=msg.charAt(index++))=='e' || c=='E') &&
1503             ((c=msg.charAt(index++))=='l' || c=='L') &&
1504             ((c=msg.charAt(index++))=='e' || c=='E') &&
1505             ((c=msg.charAt(index++))=='c' || c=='C') &&
1506             ((c=msg.charAt(index))=='t' || c=='T');
1507     }
1508 
1509     private boolean isOrdinal(int index) {
1510         char c;
1511         return
1512             ((c=msg.charAt(index++))=='o' || c=='O') &&
1513             ((c=msg.charAt(index++))=='r' || c=='R') &&
1514             ((c=msg.charAt(index++))=='d' || c=='D') &&
1515             ((c=msg.charAt(index++))=='i' || c=='I') &&
1516             ((c=msg.charAt(index++))=='n' || c=='N') &&
1517             ((c=msg.charAt(index++))=='a' || c=='A') &&
1518             ((c=msg.charAt(index))=='l' || c=='L');
1519     }
1520 
1521     /**
1522      * @return true if we are inside a MessageFormat (sub-)pattern,
1523      *         as opposed to inside a top-level choice/plural/select pattern.
1524      */
1525     private boolean inMessageFormatPattern(int nestingLevel) {
1526         return nestingLevel>0 || parts.get(0).type==Part.Type.MSG_START;
1527     }
1528 
1529     /**
1530      * @return true if we are in a MessageFormat sub-pattern
1531      *         of a top-level ChoiceFormat pattern.
1532      */
1533     private boolean inTopLevelChoiceMessage(int nestingLevel, ArgType parentType) {
1534         return
1535             nestingLevel==1 &&
1536             parentType==ArgType.CHOICE &&
1537             parts.get(0).type!=Part.Type.MSG_START;
1538     }
1539 
1540     private void addPart(Part.Type type, int index, int length, int value) {
1541         parts.add(new Part(type, index, length, value));
1542     }
1543 
1544     private void addLimitPart(int start, Part.Type type, int index, int length, int value) {
1545         parts.get(start).limitPartIndex=parts.size();
1546         addPart(type, index, length, value);
1547     }
1548 
1549     private void addArgDoublePart(double numericValue, int start, int length) {
1550         int numericIndex;
1551         if(numericValues==null) {
1552             numericValues=new ArrayList<Double>();
1553             numericIndex=0;
1554         } else {
1555             numericIndex=numericValues.size();
1556             if(numericIndex>Part.MAX_VALUE) {
1557                 throw new IndexOutOfBoundsException("Too many numeric values");
1558             }
1559         }
1560         numericValues.add(numericValue);
1561         addPart(Part.Type.ARG_DOUBLE, start, length, numericIndex);
1562     }
1563 
1564     private static final int MAX_PREFIX_LENGTH=24;
1565 
1566     /**
1567      * Returns a prefix of s.substring(start). Used for Exception messages.
1568      * @param s
1569      * @param start start index in s
1570      * @return s.substring(start) or a prefix of that
1571      */
1572     private static String prefix(String s, int start) {
1573         StringBuilder prefix=new StringBuilder(MAX_PREFIX_LENGTH+20);
1574         if(start==0) {
1575             prefix.append("\"");
1576         } else {
1577             prefix.append("[at pattern index ").append(start).append("] \"");
1578         }
1579         int substringLength=s.length()-start;
1580         if(substringLength<=MAX_PREFIX_LENGTH) {
1581             prefix.append(start==0 ? s : s.substring(start));
1582         } else {
1583             int limit=start+MAX_PREFIX_LENGTH-4;
1584             if(Character.isHighSurrogate(s.charAt(limit-1))) {
1585                 // remove lead surrogate from the end of the prefix
1586                 --limit;
1587             }
1588             prefix.append(s, start, limit).append(" ...");
1589         }
1590         return prefix.append("\"").toString();
1591     }
1592 
1593     private static String prefix(String s) {
1594         return prefix(s, 0);
1595     }
1596 
1597     private String prefix(int start) {
1598         return prefix(msg, start);
1599     }
1600 
1601     private String prefix() {
1602         return prefix(msg, 0);
1603     }
1604 
1605     private ApostropheMode aposMode;
1606     private String msg;
1607     private ArrayList<Part> parts=new ArrayList<Part>();
1608     private ArrayList<Double> numericValues;
1609     private boolean hasArgNames;
1610     private boolean hasArgNumbers;
1611     private boolean needsAutoQuoting;
1612     private volatile boolean frozen;
1613 
1614     private static final ApostropheMode defaultAposMode=
1615         ApostropheMode.valueOf(
1616             ICUConfig.get("com.ibm.icu.text.MessagePattern.ApostropheMode", "DOUBLE_OPTIONAL"));
1617 
1618     private static final ArgType[] argTypes=ArgType.values();
1619 }
1620