1 /*
2  *  Licensed to the Apache Software Foundation (ASF) under one or more
3  *  contributor license agreements.  See the NOTICE file distributed with
4  *  this work for additional information regarding copyright ownership.
5  *  The ASF licenses this file to You under the Apache License, Version 2.0
6  *  (the "License"); you may not use this file except in compliance with
7  *  the License.  You may obtain a copy of the License at
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  *  Unless required by applicable law or agreed to in writing, software
12  *  distributed under the License is distributed on an "AS IS" BASIS,
13  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  *  See the License for the specific language governing permissions and
15  *  limitations under the License.
16  */
17 
18 package java.util;
19 
20 import java.io.IOException;
21 import java.io.ObjectInputStream;
22 import java.io.ObjectOutputStream;
23 import java.io.ObjectStreamField;
24 import java.io.Serializable;
25 import java.nio.charset.StandardCharsets;
26 import libcore.icu.ICU;
27 
28 /**
29  * {@code Locale} represents a language/country/variant combination. Locales are used to
30  * alter the presentation of information such as numbers or dates to suit the conventions
31  * in the region they describe.
32  *
33  * <p>The language codes are two-letter lowercase ISO language codes (such as "en") as defined by
34  * <a href="http://en.wikipedia.org/wiki/ISO_639-1">ISO 639-1</a>.
35  * The country codes are two-letter uppercase ISO country codes (such as "US") as defined by
36  * <a href="http://en.wikipedia.org/wiki/ISO_3166-1_alpha-3">ISO 3166-1</a>.
37  * The variant codes are unspecified.
38  *
39  * <p>Note that Java uses several deprecated two-letter codes. The Hebrew ("he") language
40  * code is rewritten as "iw", Indonesian ("id") as "in", and Yiddish ("yi") as "ji". This
41  * rewriting happens even if you construct your own {@code Locale} object, not just for
42  * instances returned by the various lookup methods.
43  *
44  * <a name="available_locales"></a><h3>Available locales</h3>
45  * <p>This class' constructors do no error checking. You can create a {@code Locale} for languages
46  * and countries that don't exist, and you can create instances for combinations that don't
47  * exist (such as "de_US" for "German as spoken in the US").
48  *
49  * <p>Note that locale data is not necessarily available for any of the locales pre-defined as
50  * constants in this class except for en_US, which is the only locale Java guarantees is always
51  * available.
52  *
53  * <p>It is also a mistake to assume that all devices have the same locales available.
54  * A device sold in the US will almost certainly support en_US and es_US, but not necessarily
55  * any locales with the same language but different countries (such as en_GB or es_ES),
56  * nor any locales for other languages (such as de_DE). The opposite may well be true for a device
57  * sold in Europe.
58  *
59  * <p>You can use {@link Locale#getDefault} to get an appropriate locale for the <i>user</i> of the
60  * device you're running on, or {@link Locale#getAvailableLocales} to get a list of all the locales
61  * available on the device you're running on.
62  *
63  * <a name="locale_data"></a><h3>Locale data</h3>
64  * <p>Note that locale data comes solely from ICU. User-supplied locale service providers (using
65  * the {@code java.text.spi} or {@code java.util.spi} mechanisms) are not supported.
66  *
67  * <p>Here are the versions of ICU (and the corresponding CLDR and Unicode versions) used in
68  * various Android releases:
69  * <table BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY="">
70  * <tr><td>Android 1.5 (Cupcake)/Android 1.6 (Donut)/Android 2.0 (Eclair)</td>
71  *     <td>ICU 3.8</td>
72  *     <td><a href="http://cldr.unicode.org/index/downloads/cldr-1-5">CLDR 1.5</a></td>
73  *     <td><a href="http://www.unicode.org/versions/Unicode5.0.0/">Unicode 5.0</a></td></tr>
74  * <tr><td>Android 2.2 (Froyo)</td>
75  *     <td>ICU 4.2</td>
76  *     <td><a href="http://cldr.unicode.org/index/downloads/cldr-1-7">CLDR 1.7</a></td>
77  *     <td><a href="http://www.unicode.org/versions/Unicode5.1.0/">Unicode 5.1</a></td></tr>
78  * <tr><td>Android 2.3 (Gingerbread)/Android 3.0 (Honeycomb)</td>
79  *     <td>ICU 4.4</td>
80  *     <td><a href="http://cldr.unicode.org/index/downloads/cldr-1-8">CLDR 1.8</a></td>
81  *     <td><a href="http://www.unicode.org/versions/Unicode5.2.0/">Unicode 5.2</a></td></tr>
82  * <tr><td>Android 4.0 (Ice Cream Sandwich)</td>
83  *     <td><a href="http://site.icu-project.org/download/46">ICU 4.6</a></td>
84  *     <td><a href="http://cldr.unicode.org/index/downloads/cldr-1-9">CLDR 1.9</a></td>
85  *     <td><a href="http://www.unicode.org/versions/Unicode6.0.0/">Unicode 6.0</a></td></tr>
86  * <tr><td>Android 4.1 (Jelly Bean)</td>
87  *     <td><a href="http://site.icu-project.org/download/48">ICU 4.8</a></td>
88  *     <td><a href="http://cldr.unicode.org/index/downloads/cldr-2-0">CLDR 2.0</a></td>
89  *     <td><a href="http://www.unicode.org/versions/Unicode6.0.0/">Unicode 6.0</a></td></tr>
90  * <tr><td>Android 4.3 (Jelly Bean MR2)</td>
91  *     <td><a href="http://site.icu-project.org/download/50">ICU 50</a></td>
92  *     <td><a href="http://cldr.unicode.org/index/downloads/cldr-22-1">CLDR 22.1</a></td>
93  *     <td><a href="http://www.unicode.org/versions/Unicode6.2.0/">Unicode 6.2</a></td></tr>
94  * <tr><td>Android 4.4 (KitKat)</td>
95  *     <td><a href="http://site.icu-project.org/download/51">ICU 51</a></td>
96  *     <td><a href="http://cldr.unicode.org/index/downloads/cldr-23">CLDR 23</a></td>
97  *     <td><a href="http://www.unicode.org/versions/Unicode6.2.0/">Unicode 6.2</a></td></tr>
98  * <tr><td>Android 5.0 (Lollipop)</td>
99  *     <td><a href="http://site.icu-project.org/download/53">ICU 53</a></td>
100  *     <td><a href="http://cldr.unicode.org/index/downloads/cldr-25">CLDR 25</a></td>
101  *     <td><a href="http://www.unicode.org/versions/Unicode6.3.0/">Unicode 6.3</a></td></tr>
102  * </table>
103  *
104  * <a name="default_locale"></a><h3>Be wary of the default locale</h3>
105  * <p>Note that there are many convenience methods that automatically use the default locale, but
106  * using them may lead to subtle bugs.
107  *
108  * <p>The default locale is appropriate for tasks that involve presenting data to the user. In
109  * this case, you want to use the user's date/time formats, number
110  * formats, rules for conversion to lowercase, and so on. In this case, it's safe to use the
111  * convenience methods.
112  *
113  * <p>The default locale is <i>not</i> appropriate for machine-readable output. The best choice
114  * there is usually {@code Locale.US}&nbsp;&ndash; this locale is guaranteed to be available on all
115  * devices, and the fact that it has no surprising special cases and is frequently used (especially
116  * for computer-computer communication) means that it tends to be the most efficient choice too.
117  *
118  * <p>A common mistake is to implicitly use the default locale when producing output meant to be
119  * machine-readable. This tends to work on the developer's test devices (especially because so many
120  * developers use en_US), but fails when run on a device whose user is in a more complex locale.
121  *
122  * <p>For example, if you're formatting integers some locales will use non-ASCII decimal
123  * digits. As another example, if you're formatting floating-point numbers some locales will use
124  * {@code ','} as the decimal point and {@code '.'} for digit grouping. That's correct for
125  * human-readable output, but likely to cause problems if presented to another
126  * computer ({@link Double#parseDouble} can't parse such a number, for example).
127  * You should also be wary of the {@link String#toLowerCase} and
128  * {@link String#toUpperCase} overloads that don't take a {@code Locale}: in Turkey, for example,
129  * the characters {@code 'i'} and {@code 'I'} won't be converted to {@code 'I'} and {@code 'i'}.
130  * This is the correct behavior for Turkish text (such as user input), but inappropriate for, say,
131  * HTTP headers.
132  */
133 public final class Locale implements Cloneable, Serializable {
134 
135     private static final long serialVersionUID = 9149081749638150636L;
136 
137     /**
138      * Locale constant for en_CA.
139      */
140     public static final Locale CANADA = new Locale(true, "en", "CA");
141 
142     /**
143      * Locale constant for fr_CA.
144      */
145     public static final Locale CANADA_FRENCH = new Locale(true, "fr", "CA");
146 
147     /**
148      * Locale constant for zh_CN.
149      */
150     public static final Locale CHINA = new Locale(true, "zh", "CN");
151 
152     /**
153      * Locale constant for zh.
154      */
155     public static final Locale CHINESE = new Locale(true, "zh", "");
156 
157     /**
158      * Locale constant for en.
159      */
160     public static final Locale ENGLISH = new Locale(true, "en", "");
161 
162     /**
163      * Locale constant for fr_FR.
164      */
165     public static final Locale FRANCE = new Locale(true, "fr", "FR");
166 
167     /**
168      * Locale constant for fr.
169      */
170     public static final Locale FRENCH = new Locale(true, "fr", "");
171 
172     /**
173      * Locale constant for de.
174      */
175     public static final Locale GERMAN = new Locale(true, "de", "");
176 
177     /**
178      * Locale constant for de_DE.
179      */
180     public static final Locale GERMANY = new Locale(true, "de", "DE");
181 
182     /**
183      * Locale constant for it.
184      */
185     public static final Locale ITALIAN = new Locale(true, "it", "");
186 
187     /**
188      * Locale constant for it_IT.
189      */
190     public static final Locale ITALY = new Locale(true, "it", "IT");
191 
192     /**
193      * Locale constant for ja_JP.
194      */
195     public static final Locale JAPAN = new Locale(true, "ja", "JP");
196 
197     /**
198      * Locale constant for ja.
199      */
200     public static final Locale JAPANESE = new Locale(true, "ja", "");
201 
202     /**
203      * Locale constant for ko_KR.
204      */
205     public static final Locale KOREA = new Locale(true, "ko", "KR");
206 
207     /**
208      * Locale constant for ko.
209      */
210     public static final Locale KOREAN = new Locale(true, "ko", "");
211 
212     /**
213      * Locale constant for zh_CN.
214      */
215     public static final Locale PRC = new Locale(true, "zh", "CN");
216 
217     /**
218      * Locale constant for the root locale. The root locale has an empty language,
219      * country, and variant.
220      *
221      * @since 1.6
222      */
223     public static final Locale ROOT = new Locale(true, "", "");
224 
225     /**
226      * Locale constant for zh_CN.
227      */
228     public static final Locale SIMPLIFIED_CHINESE = new Locale(true, "zh", "CN");
229 
230     /**
231      * Locale constant for zh_TW.
232      */
233     public static final Locale TAIWAN = new Locale(true, "zh", "TW");
234 
235     /**
236      * Locale constant for zh_TW.
237      */
238     public static final Locale TRADITIONAL_CHINESE = new Locale(true, "zh", "TW");
239 
240     /**
241      * Locale constant for en_GB.
242      */
243     public static final Locale UK = new Locale(true, "en", "GB");
244 
245     /**
246      * Locale constant for en_US.
247      */
248     public static final Locale US = new Locale(true, "en", "US");
249 
250     /**
251      * BCP-47 extension identifier (or "singleton") for the private
252      * use extension.
253      *
254      * See {@link #getExtension(char)} and {@link Builder#setExtension(char, String)}.
255      *
256      * @since 1.7
257      */
258     public static final char PRIVATE_USE_EXTENSION = 'x';
259 
260     /**
261      * BCP-47 extension identifier (or "singleton") for the unicode locale extension.
262      *
263      *
264      * See {@link #getExtension(char)} and {@link Builder#setExtension(char, String)}.
265      *
266      * @since 1.7
267      */
268     public static final char UNICODE_LOCALE_EXTENSION = 'u';
269 
270     /**
271      * ISO 639-3 generic code for undetermined languages.
272      */
273     private static final String UNDETERMINED_LANGUAGE = "und";
274 
275     /**
276      * The current default locale. It is temporarily assigned to US because we
277      * need a default locale to lookup the real default locale.
278      */
279     private static Locale defaultLocale = US;
280 
281     static {
282         String language = System.getProperty("user.language", "en");
283         String region = System.getProperty("user.region", "US");
284         String variant = System.getProperty("user.variant", "");
285         defaultLocale = new Locale(language, region, variant);
286     }
287 
288     /**
289      * A class that helps construct {@link Locale} instances.
290      *
291      * Unlike the public {@code Locale} constructors, the methods of this class
292      * perform much stricter checks on their input.
293      *
294      * Validity checks on the {@code language}, {@code country}, {@code variant}
295      * and {@code extension} values are carried out as per the
296      * <a href="https://tools.ietf.org/html/bcp47">BCP-47</a> specification.
297      *
298      * In addition, we treat the <a href="http://www.unicode.org/reports/tr35/">
299      * Unicode locale extension</a> specially and provide methods to manipulate
300      * the structured state (keywords and attributes) specified therein.
301      *
302      * @since 1.7
303      */
304     public static final class Builder {
305         private String language;
306         private String region;
307         private String variant;
308         private String script;
309 
310         private final Set<String> attributes;
311         private final Map<String, String> keywords;
312         private final Map<Character, String> extensions;
313 
Builder()314         public Builder() {
315             language = region = variant = script = "";
316 
317             // NOTE: We use sorted maps in the builder & the locale class itself
318             // because serialized forms of the unicode locale extension (and
319             // of the extension map itself) are specified to be in alphabetic
320             // order of keys.
321             attributes = new TreeSet<String>();
322             keywords = new TreeMap<String, String>();
323             extensions = new TreeMap<Character, String>();
324         }
325 
326         /**
327          * Sets the locale language. If {@code language} is {@code null} or empty, the
328          * previous value is cleared.
329          *
330          * As per BCP-47, the language must be between 2 and 3 ASCII characters
331          * in length and must only contain characters in the range {@code [a-zA-Z]}.
332          *
333          * This value is usually an <a href="http://www.loc.gov/standards/iso639-2/">
334          * ISO-639-2</a> alpha-2 or alpha-3 code, though no explicit checks are
335          * carried out that it's a valid code in that namespace.
336          *
337          * Values are normalized to lower case.
338          *
339          * Note that we don't support BCP-47 "extlang" languages because they were
340          * only ever used to substitute for a lack of 3 letter language codes.
341          *
342          * @throws IllformedLocaleException if the language was invalid.
343          */
setLanguage(String language)344         public Builder setLanguage(String language) {
345             this.language = normalizeAndValidateLanguage(language, true /* strict */);
346             return this;
347         }
348 
normalizeAndValidateLanguage(String language, boolean strict)349         private static String normalizeAndValidateLanguage(String language, boolean strict) {
350             if (language == null || language.isEmpty()) {
351                 return "";
352             }
353 
354             final String lowercaseLanguage = language.toLowerCase(Locale.ROOT);
355             if (!isValidBcp47Alpha(lowercaseLanguage, 2, 3)) {
356                 if (strict) {
357                     throw new IllformedLocaleException("Invalid language: " + language);
358                 } else {
359                     return UNDETERMINED_LANGUAGE;
360                 }
361             }
362 
363             return lowercaseLanguage;
364         }
365 
366         /**
367          * Set the state of this builder to the parsed contents of the BCP-47 language
368          * tag {@code languageTag}.
369          *
370          * This method is equivalent to a call to {@link #clear} if {@code languageTag}
371          * is {@code null} or empty.
372          *
373          * <b>NOTE:</b> In contrast to {@link Locale#forLanguageTag(String)}, which
374          * simply ignores malformed input, this method will throw an exception if
375          * its input is malformed.
376          *
377          * @throws IllformedLocaleException if {@code languageTag} is not a well formed
378          *         BCP-47 tag.
379          */
setLanguageTag(String languageTag)380         public Builder setLanguageTag(String languageTag) {
381             if (languageTag == null || languageTag.isEmpty()) {
382                 clear();
383                 return this;
384             }
385 
386             final Locale fromIcu = forLanguageTag(languageTag, true /* strict */);
387             // When we ask ICU for strict parsing, it might return a null locale
388             // if the language tag is malformed.
389             if (fromIcu == null) {
390                 throw new IllformedLocaleException("Invalid languageTag: " + languageTag);
391             }
392 
393             setLocale(fromIcu);
394             return this;
395         }
396 
397         /**
398          * Sets the locale region. If {@code region} is {@code null} or empty, the
399          * previous value is cleared.
400          *
401          * As per BCP-47, the region must either be a 2 character ISO-3166-1 code
402          * (each character in the range [a-zA-Z]) OR a 3 digit UN M.49 code.
403          *
404          * Values are normalized to upper case.
405          *
406          * @throws IllformedLocaleException if {@code} region is invalid.
407          */
setRegion(String region)408         public Builder setRegion(String region) {
409             this.region = normalizeAndValidateRegion(region, true /* strict */);
410             return this;
411         }
412 
normalizeAndValidateRegion(String region, boolean strict)413         private static String normalizeAndValidateRegion(String region, boolean strict) {
414             if (region == null || region.isEmpty()) {
415                 return "";
416             }
417 
418             final String uppercaseRegion = region.toUpperCase(Locale.ROOT);
419             if (!isValidBcp47Alpha(uppercaseRegion, 2, 2) &&
420                     !isUnM49AreaCode(uppercaseRegion)) {
421                 if (strict) {
422                     throw new IllformedLocaleException("Invalid region: " + region);
423                 } else {
424                     return "";
425                 }
426             }
427 
428             return uppercaseRegion;
429         }
430 
431         /**
432          * Sets the locale variant. If {@code variant} is {@code null} or empty,
433          * the previous value is cleared.
434          *
435          * The input string my consist of one or more variants separated by
436          * valid separators ('-' or '_').
437          *
438          * As per BCP-47, each variant must be between 5 and 8 alphanumeric characters
439          * in length (each character in the range {@code [a-zA-Z0-9]}) but
440          * can be exactly 4 characters in length if the first character is a digit.
441          *
442          * Note that this is a much stricter interpretation of {@code variant}
443          * than the public {@code Locale} constructors. The latter allowed free form
444          * variants.
445          *
446          * Variants are case sensitive and all separators are normalized to {@code '_'}.
447          *
448          * @throws IllformedLocaleException if {@code} variant is invalid.
449          */
setVariant(String variant)450         public Builder setVariant(String variant) {
451             this.variant = normalizeAndValidateVariant(variant);
452             return this;
453         }
454 
normalizeAndValidateVariant(String variant)455         private static String normalizeAndValidateVariant(String variant) {
456             if (variant == null || variant.isEmpty()) {
457                 return "";
458             }
459 
460             // Note that unlike extensions, we canonicalize to lower case alphabets
461             // and underscores instead of hyphens.
462             final String normalizedVariant = variant.replace('-', '_');
463             String[] subTags = normalizedVariant.split("_");
464 
465             for (String subTag : subTags) {
466                 if (!isValidVariantSubtag(subTag)) {
467                     throw new IllformedLocaleException("Invalid variant: " + variant);
468                 }
469             }
470 
471             return normalizedVariant;
472         }
473 
isValidVariantSubtag(String subTag)474         private static boolean isValidVariantSubtag(String subTag) {
475             // The BCP-47 spec states that :
476             // - Subtags can be between [5, 8] alphanumeric chars in length.
477             // - Subtags that start with a number are allowed to be 4 chars in length.
478             if (subTag.length() >= 5 && subTag.length() <= 8) {
479                 if (isAsciiAlphaNum(subTag)) {
480                     return true;
481                 }
482             } else if (subTag.length() == 4) {
483                 final char firstChar = subTag.charAt(0);
484                 if ((firstChar >= '0' && firstChar <= '9') && isAsciiAlphaNum(subTag)) {
485                     return true;
486                 }
487             }
488 
489             return false;
490         }
491 
492         /**
493          * Sets the locale script. If {@code script} is {@code null} or empty,
494          * the previous value is cleared.
495          *
496          * As per BCP-47, the script must be 4 characters in length, and
497          * each character in the range {@code [a-zA-Z]}.
498          *
499          * A script usually represents a valid ISO 15924 script code, though no
500          * other registry or validity checks are performed.
501          *
502          * Scripts are normalized to title cased values.
503          *
504          * @throws IllformedLocaleException if {@code script} is invalid.
505          */
setScript(String script)506         public Builder setScript(String script) {
507             this.script = normalizeAndValidateScript(script, true /* strict */);
508             return this;
509         }
510 
normalizeAndValidateScript(String script, boolean strict)511         private static String normalizeAndValidateScript(String script, boolean strict) {
512             if (script == null || script.isEmpty()) {
513                 return "";
514             }
515 
516             if (!isValidBcp47Alpha(script, 4, 4)) {
517                 if (strict) {
518                     throw new IllformedLocaleException("Invalid script: " + script);
519                 } else {
520                     return "";
521                 }
522             }
523 
524             return titleCaseAsciiWord(script);
525         }
526 
527         /**
528          * Sets the state of the builder to the {@link Locale} represented by
529          * {@code locale}.
530          *
531          * Note that the locale's language, region and variant are validated as per
532          * the rules specified in {@link #setLanguage}, {@link #setRegion} and
533          * {@link #setVariant}.
534          *
535          * All existing builder state is discarded.
536          *
537          * @throws IllformedLocaleException if {@code locale} is invalid.
538          * @throws NullPointerException if {@code locale} is null.
539          */
setLocale(Locale locale)540         public Builder setLocale(Locale locale) {
541             if (locale == null) {
542                 throw new NullPointerException("locale == null");
543             }
544 
545             // Make copies of the existing values so that we don't partially
546             // update the state if we encounter an error.
547             final String backupLanguage = language;
548             final String backupRegion = region;
549             final String backupVariant = variant;
550 
551             try {
552                 setLanguage(locale.getLanguage());
553                 setRegion(locale.getCountry());
554                 setVariant(locale.getVariant());
555             } catch (IllformedLocaleException ifle) {
556                 language = backupLanguage;
557                 region = backupRegion;
558                 variant = backupVariant;
559 
560                 throw ifle;
561             }
562 
563             // The following values can be set only via the builder class, so
564             // there's no need to normalize them or check their validity.
565 
566             this.script = locale.getScript();
567 
568             extensions.clear();
569             extensions.putAll(locale.extensions);
570 
571             keywords.clear();
572             keywords.putAll(locale.unicodeKeywords);
573 
574             attributes.clear();
575             attributes.addAll(locale.unicodeAttributes);
576 
577             return this;
578         }
579 
580         /**
581          * Adds the specified attribute to the list of attributes in the unicode
582          * locale extension.
583          *
584          * Attributes must be between 3 and 8 characters in length, and each character
585          * must be in the range {@code [a-zA-Z0-9]}.
586          *
587          * Attributes are normalized to lower case values. All added attributes and
588          * keywords are combined to form a complete unicode locale extension on
589          * {@link Locale} objects built by this builder, and accessible via
590          * {@link Locale#getExtension(char)} with the {@link Locale#UNICODE_LOCALE_EXTENSION}
591          * key.
592          *
593          * @throws IllformedLocaleException if {@code attribute} is invalid.
594          * @throws NullPointerException if {@code attribute} is null.
595          */
addUnicodeLocaleAttribute(String attribute)596         public Builder addUnicodeLocaleAttribute(String attribute) {
597             if (attribute == null) {
598                 throw new NullPointerException("attribute == null");
599             }
600 
601             final String lowercaseAttribute = attribute.toLowerCase(Locale.ROOT);
602             if (!isValidBcp47Alphanum(lowercaseAttribute, 3, 8)) {
603                 throw new IllformedLocaleException("Invalid locale attribute: " + attribute);
604             }
605 
606             attributes.add(lowercaseAttribute);
607 
608             return this;
609         }
610 
611         /**
612          * Removes an attribute from the list of attributes in the unicode locale
613          * extension.
614          *
615          * {@code attribute} must be valid as per the rules specified in
616          * {@link #addUnicodeLocaleAttribute}.
617          *
618          * This method has no effect if {@code attribute} hasn't already been
619          * added.
620          *
621          * @throws IllformedLocaleException if {@code attribute} is invalid.
622          * @throws NullPointerException if {@code attribute} is null.
623          */
removeUnicodeLocaleAttribute(String attribute)624         public Builder removeUnicodeLocaleAttribute(String attribute) {
625             if (attribute == null) {
626                 throw new NullPointerException("attribute == null");
627             }
628 
629             // Weirdly, remove is specified to check whether the attribute
630             // is valid, so we have to perform the full alphanumeric check here.
631             final String lowercaseAttribute = attribute.toLowerCase(Locale.ROOT);
632             if (!isValidBcp47Alphanum(lowercaseAttribute, 3, 8)) {
633                 throw new IllformedLocaleException("Invalid locale attribute: " + attribute);
634             }
635 
636             attributes.remove(attribute);
637             return this;
638         }
639 
640         /**
641          * Sets the extension identified by {@code key} to {@code value}.
642          *
643          * {@code key} must be in the range {@code [a-zA-Z0-9]}.
644          *
645          * If {@code value} is {@code null} or empty, the extension is removed.
646          *
647          * In the general case, {@code value} must be a series of subtags separated
648          * by ({@code "-"} or {@code "_"}). Each subtag must be between
649          * 2 and 8 characters in length, and each character in the subtag must be in
650          * the range {@code [a-zA-Z0-9]}.
651          *
652          * <p>
653          * There are two special cases :
654          * <li>
655          *     <ul>
656          *         The unicode locale extension
657          *         ({@code key == 'u'}, {@link Locale#UNICODE_LOCALE_EXTENSION}) : Setting
658          *         the unicode locale extension results in all existing keyword and attribute
659          *         state being replaced by the parsed result of {@code value}. For example,
660          *         {@code  builder.setExtension('u', "baaaz-baaar-fo-baar-ba-baaz")}
661          *         is equivalent to:
662          *         <pre>
663          *             builder.addUnicodeLocaleAttribute("baaaz");
664          *             builder.addUnicodeLocaleAttribute("baaar");
665          *             builder.setUnicodeLocaleKeyword("fo", "baar");
666          *             builder.setUnicodeLocaleKeyword("ba", "baaa");
667          *         </pre>
668          *     </ul>
669          *     <ul>
670          *         The private use extension
671          *         ({@code key == 'x'}, {@link Locale#PRIVATE_USE_EXTENSION}) : Each subtag in a
672          *         private use extension can be between 1 and 8 characters in length (in contrast
673          *         to a minimum length of 2 for all other extensions).
674          *     </ul>
675          * </li>
676          *
677          * @throws IllformedLocaleException if {@code value} is invalid.
678          */
setExtension(char key, String value)679         public Builder setExtension(char key, String value) {
680             if (value == null || value.isEmpty()) {
681                 extensions.remove(key);
682                 return this;
683             }
684 
685             final String normalizedValue = value.toLowerCase(Locale.ROOT).replace('_', '-');
686             final String[] subtags = normalizedValue.split("-");
687 
688             // Lengths for subtags in the private use extension should be [1, 8] chars.
689             // For all other extensions, they should be [2, 8] chars.
690             //
691             // http://www.rfc-editor.org/rfc/bcp/bcp47.txt
692             final int minimumLength = (key == PRIVATE_USE_EXTENSION) ? 1 : 2;
693             for (String subtag : subtags) {
694                 if (!isValidBcp47Alphanum(subtag, minimumLength, 8)) {
695                     throw new IllformedLocaleException(
696                             "Invalid private use extension : " + value);
697                 }
698             }
699 
700             // We need to take special action in the case of unicode extensions,
701             // since we claim to understand their keywords and attributes.
702             if (key == UNICODE_LOCALE_EXTENSION) {
703                 // First clear existing attributes and keywords.
704                 extensions.clear();
705                 attributes.clear();
706 
707                 parseUnicodeExtension(subtags, keywords, attributes);
708             } else {
709                 extensions.put(key, normalizedValue);
710             }
711 
712             return this;
713         }
714 
715         /**
716          * Clears all extensions from this builder. Note that this also implicitly
717          * clears all state related to the unicode locale extension; all attributes
718          * and keywords set by {@link #addUnicodeLocaleAttribute} and
719          * {@link #setUnicodeLocaleKeyword} are cleared.
720          */
clearExtensions()721         public Builder clearExtensions() {
722             extensions.clear();
723             attributes.clear();
724             keywords.clear();
725             return this;
726         }
727 
728         /**
729          * Adds a key / type pair to the list of unicode locale extension keys.
730          *
731          * {@code key} must be 2 characters in length, and each character must be
732          * in the range {@code [a-zA-Z0-9]}.
733          *
734          * {#code type} can either be empty, or a series of one or more subtags
735          * separated by a separator ({@code "-"} or {@code "_"}). Each subtag must
736          * be between 3 and 8 characters in length and each character in the subtag
737          * must be in the range {@code [a-zA-Z0-9]}.
738          *
739          * Note that the type is normalized to lower case, and all separators
740          * are normalized to {@code "-"}. All added attributes and
741          * keywords are combined to form a complete unicode locale extension on
742          * {@link Locale} objects built by this builder, and accessible via
743          * {@link Locale#getExtension(char)} with the {@link Locale#UNICODE_LOCALE_EXTENSION}
744          * key.
745          *
746          * @throws IllformedLocaleException if {@code key} or {@code value} are
747          *         invalid.
748          */
setUnicodeLocaleKeyword(String key, String type)749         public Builder setUnicodeLocaleKeyword(String key, String type) {
750             if (key == null) {
751                 throw new NullPointerException("key == null");
752             }
753 
754             if (type == null && keywords != null) {
755                 keywords.remove(key);
756                 return this;
757             }
758 
759             final String lowerCaseKey = key.toLowerCase(Locale.ROOT);
760             // The key must be exactly two alphanumeric characters.
761             if (lowerCaseKey.length() != 2 || !isAsciiAlphaNum(lowerCaseKey)) {
762                 throw new IllformedLocaleException("Invalid unicode locale keyword: " + key);
763             }
764 
765             // The type can be one or more alphanumeric strings of length [3, 8] characters,
766             // separated by a separator char, which is one of "_" or "-". Though the spec
767             // doesn't require it, we normalize all "_" to "-" to make the rest of our
768             // processing easier.
769             final String lowerCaseType = type.toLowerCase(Locale.ROOT).replace("_", "-");
770             if (!isValidTypeList(lowerCaseType)) {
771                 throw new IllformedLocaleException("Invalid unicode locale type: " + type);
772             }
773 
774             // Everything checks out fine, add the <key, type> mapping to the list.
775             keywords.put(lowerCaseKey, lowerCaseType);
776 
777             return this;
778         }
779 
780         /**
781          * Clears all existing state from this builder.
782          */
clear()783         public Builder clear() {
784             clearExtensions();
785             language = region = variant = script = "";
786 
787             return this;
788         }
789 
790         /**
791          * Constructs a locale from the existing state of the builder. Note that this
792          * method is guaranteed to succeed since field validity checks are performed
793          * at the point of setting them.
794          */
build()795         public Locale build() {
796             // NOTE: We need to make a copy of attributes, keywords and extensions
797             // because the RI allows this builder to reused.
798             return new Locale(language, region, variant, script,
799                     attributes, keywords, extensions,
800                     true /* has validated fields */);
801         }
802     }
803 
804     /**
805      * Returns a locale for a given BCP-47 language tag. This method is more
806      * lenient than {@link Builder#setLanguageTag}. For a given language tag, parsing
807      * will proceed up to the first malformed subtag. All subsequent tags are discarded.
808      * Note that language tags use {@code -} rather than {@code _}, for example {@code en-US}.
809      *
810      * @throws NullPointerException if {@code languageTag} is {@code null}.
811      *
812      * @since 1.7
813      */
forLanguageTag(String languageTag)814     public static Locale forLanguageTag(String languageTag) {
815         if (languageTag == null) {
816             throw new NullPointerException("languageTag == null");
817         }
818 
819         return forLanguageTag(languageTag, false /* strict */);
820     }
821 
822     private transient String countryCode;
823     private transient String languageCode;
824     private transient String variantCode;
825     private transient String scriptCode;
826 
827     /* Sorted, Unmodifiable */
828     private transient Set<String> unicodeAttributes;
829     /* Sorted, Unmodifiable */
830     private transient Map<String, String> unicodeKeywords;
831     /* Sorted, Unmodifiable */
832     private transient Map<Character, String> extensions;
833 
834     /**
835      * Whether this instance was constructed from a builder. We can make
836      * stronger assumptions about the validity of Locale fields if this was
837      * constructed by a builder.
838      */
839     private transient final boolean hasValidatedFields;
840 
841     private transient String cachedToStringResult;
842     private transient String cachedLanguageTag;
843     private transient String cachedIcuLocaleId;
844 
845     /**
846      * There's a circular dependency between toLowerCase/toUpperCase and
847      * Locale.US. Work around this by avoiding these methods when constructing
848      * the built-in locales.
849      */
Locale(boolean hasValidatedFields, String lowerCaseLanguageCode, String upperCaseCountryCode)850     private Locale(boolean hasValidatedFields, String lowerCaseLanguageCode,
851             String upperCaseCountryCode) {
852         this.languageCode = lowerCaseLanguageCode;
853         this.countryCode = upperCaseCountryCode;
854         this.variantCode = "";
855         this.scriptCode = "";
856 
857         this.unicodeAttributes = Collections.EMPTY_SET;
858         this.unicodeKeywords = Collections.EMPTY_MAP;
859         this.extensions = Collections.EMPTY_MAP;
860 
861         this.hasValidatedFields = hasValidatedFields;
862     }
863 
864     /**
865      * Constructs a new {@code Locale} using the specified language.
866      */
Locale(String language)867     public Locale(String language) {
868         this(language, "", "", "", Collections.EMPTY_SET, Collections.EMPTY_MAP,
869                 Collections.EMPTY_MAP, false /* has validated fields */);
870     }
871 
872     /**
873      * Constructs a new {@code Locale} using the specified language and country codes.
874      */
Locale(String language, String country)875     public Locale(String language, String country) {
876         this(language, country, "", "", Collections.EMPTY_SET, Collections.EMPTY_MAP,
877                 Collections.EMPTY_MAP, false /* has validated fields */);
878     }
879 
880     /**
881      * Required by libcore.icu.ICU.
882      *
883      * @hide
884      */
Locale(String language, String country, String variant, String scriptCode, Set<String> unicodeAttributes, Map<String, String> unicodeKeywords, Map<Character, String> extensions, boolean hasValidatedFields)885     public Locale(String language, String country, String variant, String scriptCode,
886             /* nonnull */ Set<String> unicodeAttributes,
887             /* nonnull */ Map<String, String> unicodeKeywords,
888             /* nonnull */ Map<Character, String> extensions,
889             boolean hasValidatedFields) {
890         if (language == null || country == null || variant == null) {
891             throw new NullPointerException("language=" + language +
892                     ",country=" + country +
893                     ",variant=" + variant);
894         }
895 
896         if (hasValidatedFields) {
897             this.languageCode = adjustLanguageCode(language);
898             this.countryCode = country;
899             this.variantCode = variant;
900         } else {
901             if (language.isEmpty() && country.isEmpty()) {
902                 languageCode = "";
903                 countryCode = "";
904                 variantCode = variant;
905             } else {
906                 languageCode = adjustLanguageCode(language);
907                 countryCode = country.toUpperCase(Locale.US);
908                 variantCode = variant;
909             }
910         }
911 
912         this.scriptCode = scriptCode;
913 
914         if (hasValidatedFields) {
915             Set<String> attribsCopy = new TreeSet<String>(unicodeAttributes);
916             Map<String, String> keywordsCopy = new TreeMap<String, String>(unicodeKeywords);
917             Map<Character, String> extensionsCopy = new TreeMap<Character, String>(extensions);
918 
919             // We need to transform the list of attributes & keywords set on the
920             // builder to a unicode locale extension. i.e, if we have any keywords
921             // or attributes set, Locale#getExtension('u') should return a well
922             // formed extension.
923             addUnicodeExtensionToExtensionsMap(attribsCopy, keywordsCopy, extensionsCopy);
924 
925             this.unicodeAttributes = Collections.unmodifiableSet(attribsCopy);
926             this.unicodeKeywords = Collections.unmodifiableMap(keywordsCopy);
927             this.extensions = Collections.unmodifiableMap(extensionsCopy);
928         } else {
929             this.unicodeAttributes = unicodeAttributes;
930             this.unicodeKeywords = unicodeKeywords;
931             this.extensions = extensions;
932         }
933 
934         this.hasValidatedFields = hasValidatedFields;
935     }
936 
937     /**
938      * Constructs a new {@code Locale} using the specified language, country,
939      * and variant codes.
940      */
Locale(String language, String country, String variant)941     public Locale(String language, String country, String variant) {
942         this(language, country, variant, "", Collections.EMPTY_SET,
943                 Collections.EMPTY_MAP, Collections.EMPTY_MAP,
944                 false /* has validated fields */);
945     }
946 
clone()947     @Override public Object clone() {
948         try {
949             return super.clone();
950         } catch (CloneNotSupportedException e) {
951             throw new AssertionError(e);
952         }
953     }
954 
955     /**
956      * Returns true if {@code object} is a locale with the same language,
957      * country and variant.
958      */
equals(Object object)959     @Override public boolean equals(Object object) {
960         if (object == this) {
961             return true;
962         }
963         if (object instanceof Locale) {
964             Locale o = (Locale) object;
965             return languageCode.equals(o.languageCode)
966                     && countryCode.equals(o.countryCode)
967                     && variantCode.equals(o.variantCode)
968                     && scriptCode.equals(o.scriptCode)
969                     && extensions.equals(o.extensions);
970 
971         }
972         return false;
973     }
974 
975     /**
976      * Returns the system's installed locales. This array always includes {@code
977      * Locale.US}, and usually several others. Most locale-sensitive classes
978      * offer their own {@code getAvailableLocales} method, which should be
979      * preferred over this general purpose method.
980      *
981      * @see java.text.BreakIterator#getAvailableLocales()
982      * @see java.text.Collator#getAvailableLocales()
983      * @see java.text.DateFormat#getAvailableLocales()
984      * @see java.text.DateFormatSymbols#getAvailableLocales()
985      * @see java.text.DecimalFormatSymbols#getAvailableLocales()
986      * @see java.text.NumberFormat#getAvailableLocales()
987      * @see java.util.Calendar#getAvailableLocales()
988      */
getAvailableLocales()989     public static Locale[] getAvailableLocales() {
990         return ICU.getAvailableLocales();
991     }
992 
993     /**
994      * Returns the country code for this locale, or {@code ""} if this locale
995      * doesn't correspond to a specific country.
996      */
getCountry()997     public String getCountry() {
998         return countryCode;
999     }
1000 
1001     /**
1002      * Returns the user's preferred locale. This may have been overridden for
1003      * this process with {@link #setDefault}.
1004      *
1005      * <p>Since the user's locale changes dynamically, avoid caching this value.
1006      * Instead, use this method to look it up for each use.
1007      */
getDefault()1008     public static Locale getDefault() {
1009         return defaultLocale;
1010     }
1011 
1012     /**
1013      * Equivalent to {@code getDisplayCountry(Locale.getDefault())}.
1014      */
getDisplayCountry()1015     public final String getDisplayCountry() {
1016         return getDisplayCountry(getDefault());
1017     }
1018 
1019     /**
1020      * Returns the name of this locale's country, localized to {@code locale}.
1021      * Returns the empty string if this locale does not correspond to a specific
1022      * country.
1023      */
getDisplayCountry(Locale locale)1024     public String getDisplayCountry(Locale locale) {
1025         if (countryCode.isEmpty()) {
1026             return "";
1027         }
1028 
1029         final String normalizedRegion = Builder.normalizeAndValidateRegion(
1030                 countryCode, false /* strict */);
1031         if (normalizedRegion.isEmpty()) {
1032             return countryCode;
1033         }
1034 
1035         String result = ICU.getDisplayCountry(this, locale);
1036         if (result == null) { // TODO: do we need to do this, or does ICU do it for us?
1037             result = ICU.getDisplayCountry(this, Locale.getDefault());
1038         }
1039         return result;
1040     }
1041 
1042     /**
1043      * Equivalent to {@code getDisplayLanguage(Locale.getDefault())}.
1044      */
getDisplayLanguage()1045     public final String getDisplayLanguage() {
1046         return getDisplayLanguage(getDefault());
1047     }
1048 
1049     /**
1050      * Returns the name of this locale's language, localized to {@code locale}.
1051      * If the language name is unknown, the language code is returned.
1052      */
getDisplayLanguage(Locale locale)1053     public String getDisplayLanguage(Locale locale) {
1054         if (languageCode.isEmpty()) {
1055             return "";
1056         }
1057 
1058         // Hacks for backward compatibility.
1059         //
1060         // Our language tag will contain "und" if the languageCode is invalid
1061         // or missing. ICU will then return "langue indéterminée" or the equivalent
1062         // display language for the indeterminate language code.
1063         //
1064         // Sigh... ugh... and what not.
1065         final String normalizedLanguage = Builder.normalizeAndValidateLanguage(
1066                 languageCode, false /* strict */);
1067         if (UNDETERMINED_LANGUAGE.equals(normalizedLanguage)) {
1068             return languageCode;
1069         }
1070 
1071         // TODO: We need a new hack or a complete fix for http://b/8049507 --- We would
1072         // cover the frameworks' tracks when they were using "tl" instead of "fil".
1073         String result = ICU.getDisplayLanguage(this, locale);
1074         if (result == null) { // TODO: do we need to do this, or does ICU do it for us?
1075             result = ICU.getDisplayLanguage(this, Locale.getDefault());
1076         }
1077         return result;
1078     }
1079 
1080     /**
1081      * Equivalent to {@code getDisplayName(Locale.getDefault())}.
1082      */
getDisplayName()1083     public final String getDisplayName() {
1084         return getDisplayName(getDefault());
1085     }
1086 
1087     /**
1088      * Returns this locale's language name, country name, and variant, localized
1089      * to {@code locale}. The exact output form depends on whether this locale
1090      * corresponds to a specific language, script, country and variant.
1091      *
1092      * <p>For example:
1093      * <ul>
1094      * <li>{@code new Locale("en").getDisplayName(Locale.US)} -> {@code English}
1095      * <li>{@code new Locale("en", "US").getDisplayName(Locale.US)} -> {@code English (United States)}
1096      * <li>{@code new Locale("en", "US", "POSIX").getDisplayName(Locale.US)} -> {@code English (United States,Computer)}
1097      * <li>{@code Locale.fromLanguageTag("zh-Hant-CN").getDisplayName(Locale.US)} -> {@code Chinese (Traditional Han,China)}
1098      * <li>{@code new Locale("en").getDisplayName(Locale.FRANCE)} -> {@code anglais}
1099      * <li>{@code new Locale("en", "US").getDisplayName(Locale.FRANCE)} -> {@code anglais (États-Unis)}
1100      * <li>{@code new Locale("en", "US", "POSIX").getDisplayName(Locale.FRANCE)} -> {@code anglais (États-Unis,informatique)}.
1101      * </ul>
1102      */
getDisplayName(Locale locale)1103     public String getDisplayName(Locale locale) {
1104         int count = 0;
1105         StringBuilder buffer = new StringBuilder();
1106         if (!languageCode.isEmpty()) {
1107             String displayLanguage = getDisplayLanguage(locale);
1108             buffer.append(displayLanguage.isEmpty() ? languageCode : displayLanguage);
1109             ++count;
1110         }
1111         if (!scriptCode.isEmpty()) {
1112             if (count == 1) {
1113                 buffer.append(" (");
1114             }
1115             String displayScript = getDisplayScript(locale);
1116             buffer.append(displayScript.isEmpty() ? scriptCode : displayScript);
1117             ++count;
1118         }
1119         if (!countryCode.isEmpty()) {
1120             if (count == 1) {
1121                 buffer.append(" (");
1122             } else if (count == 2) {
1123                 buffer.append(",");
1124             }
1125             String displayCountry = getDisplayCountry(locale);
1126             buffer.append(displayCountry.isEmpty() ? countryCode : displayCountry);
1127             ++count;
1128         }
1129         if (!variantCode.isEmpty()) {
1130             if (count == 1) {
1131                 buffer.append(" (");
1132             } else if (count == 2 || count == 3) {
1133                 buffer.append(",");
1134             }
1135             String displayVariant = getDisplayVariant(locale);
1136             buffer.append(displayVariant.isEmpty() ? variantCode : displayVariant);
1137             ++count;
1138         }
1139         if (count > 1) {
1140             buffer.append(")");
1141         }
1142         return buffer.toString();
1143     }
1144 
1145     /**
1146      * Returns the full variant name in the default {@code Locale} for the variant code of
1147      * this {@code Locale}. If there is no matching variant name, the variant code is
1148      * returned.
1149      *
1150      * @since 1.7
1151      */
getDisplayVariant()1152     public final String getDisplayVariant() {
1153         return getDisplayVariant(getDefault());
1154     }
1155 
1156     /**
1157      * Returns the full variant name in the specified {@code Locale} for the variant code
1158      * of this {@code Locale}. If there is no matching variant name, the variant code is
1159      * returned.
1160      *
1161      * @since 1.7
1162      */
getDisplayVariant(Locale locale)1163     public String getDisplayVariant(Locale locale) {
1164         if (variantCode.isEmpty()) {
1165             return "";
1166         }
1167 
1168         try {
1169             Builder.normalizeAndValidateVariant(variantCode);
1170         } catch (IllformedLocaleException ilfe) {
1171             return variantCode;
1172         }
1173 
1174         String result = ICU.getDisplayVariant(this, locale);
1175         if (result == null) { // TODO: do we need to do this, or does ICU do it for us?
1176             result = ICU.getDisplayVariant(this, Locale.getDefault());
1177         }
1178 
1179         // The "old style" locale constructors allow us to pass in variants that aren't
1180         // valid BCP-47 variant subtags. When that happens, toLanguageTag will not emit
1181         // them. Note that we know variantCode.length() > 0 due to the isEmpty check at
1182         // the beginning of this function.
1183         if (result.isEmpty()) {
1184             return variantCode;
1185         }
1186         return result;
1187     }
1188 
1189     /**
1190      * Returns the three-letter ISO 3166 country code which corresponds to the country
1191      * code for this {@code Locale}.
1192      * @throws MissingResourceException if there's no 3-letter country code for this locale.
1193      */
getISO3Country()1194     public String getISO3Country() {
1195         // The results of getISO3Country do not depend on the languageCode,
1196         // so we pass an arbitrarily selected language code here. This guards
1197         // against errors caused by malformed or invalid language codes.
1198         String code = ICU.getISO3Country("en-" + countryCode);
1199         if (!countryCode.isEmpty() && code.isEmpty()) {
1200             throw new MissingResourceException("No 3-letter country code for locale: " + this, "FormatData_" + this, "ShortCountry");
1201         }
1202         return code;
1203     }
1204 
1205     /**
1206      * Returns the three-letter ISO 639-2/T language code which corresponds to the language
1207      * code for this {@code Locale}.
1208      * @throws MissingResourceException if there's no 3-letter language code for this locale.
1209      */
getISO3Language()1210     public String getISO3Language() {
1211         // For backward compatibility, we must return "" for an empty language
1212         // code and not "und" which is the accurate ISO-639-3 code for an
1213         // undetermined language.
1214         if (languageCode.isEmpty()) {
1215             return "";
1216         }
1217 
1218         // The results of getISO3Language do not depend on the country code
1219         // or any of the other locale fields, so we pass just the language here.
1220         String code = ICU.getISO3Language(languageCode);
1221         if (!languageCode.isEmpty() && code.isEmpty()) {
1222             throw new MissingResourceException("No 3-letter language code for locale: " + this, "FormatData_" + this, "ShortLanguage");
1223         }
1224         return code;
1225     }
1226 
1227     /**
1228      * Returns an array of strings containing all the two-letter ISO 3166 country codes that can be
1229      * used as the country code when constructing a {@code Locale}.
1230      */
getISOCountries()1231     public static String[] getISOCountries() {
1232         return ICU.getISOCountries();
1233     }
1234 
1235     /**
1236      * Returns an array of strings containing all the two-letter ISO 639-1 language codes that can be
1237      * used as the language code when constructing a {@code Locale}.
1238      */
getISOLanguages()1239     public static String[] getISOLanguages() {
1240         return ICU.getISOLanguages();
1241     }
1242 
1243     /**
1244      * Returns the language code for this {@code Locale} or the empty string if no language
1245      * was set.
1246      */
getLanguage()1247     public String getLanguage() {
1248         return languageCode;
1249     }
1250 
1251     /**
1252      * Returns the variant code for this {@code Locale} or an empty {@code String} if no variant
1253      * was set.
1254      */
getVariant()1255     public String getVariant() {
1256         return variantCode;
1257     }
1258 
1259     /**
1260      * Returns the script code for this {@code Locale} or an empty {@code String} if no script
1261      * was set.
1262      *
1263      * If set, the script code will be a title cased string of length 4, as per the ISO 15924
1264      * specification.
1265      *
1266      * @since 1.7
1267      */
getScript()1268     public String getScript() {
1269         return scriptCode;
1270     }
1271 
1272     /**
1273      * Equivalent to {@code getDisplayScript(Locale.getDefault()))}
1274      *
1275      * @since 1.7
1276      */
getDisplayScript()1277     public String getDisplayScript() {
1278         return getDisplayScript(getDefault());
1279     }
1280 
1281     /**
1282      * Returns the name of this locale's script code, localized to {@link Locale}. If the
1283      * script code is unknown, the return value of this method is the same as that of
1284      * {@link #getScript()}.
1285      *
1286      * @since 1.7
1287      */
getDisplayScript(Locale locale)1288     public String getDisplayScript(Locale locale) {
1289         if (scriptCode.isEmpty()) {
1290             return "";
1291         }
1292 
1293         String result = ICU.getDisplayScript(this, locale);
1294         if (result == null) { // TODO: do we need to do this, or does ICU do it for us?
1295             result = ICU.getDisplayScript(this, Locale.getDefault());
1296         }
1297 
1298         return result;
1299 
1300     }
1301 
1302     /**
1303      * Returns a well formed BCP-47 language tag that identifies this locale.
1304      *
1305      * Note that this locale itself might consist of ill formed fields, since the
1306      * public {@code Locale} constructors do not perform validity checks to maintain
1307      * backwards compatibility. When this is the case, this method will either replace
1308      * ill formed fields with standard BCP-47 subtags (For eg. "und" (undetermined)
1309      * for invalid languages) or omit them altogether.
1310      *
1311      * Additionally, ill formed variants will result in the remainder of the tag
1312      * (both variants and extensions) being moved to the private use extension,
1313      * where they will appear after a subtag whose value is {@code "lvariant"}.
1314      *
1315      * It's also important to note that the BCP-47 tag is well formed in the sense
1316      * that it is unambiguously parseable into its specified components. We do not
1317      * require that any of the components are registered with the applicable registries.
1318      * For example, we do not require scripts to be a registered ISO 15924 scripts or
1319      * languages to appear in the ISO-639-2 code list.
1320      *
1321      * @since 1.7
1322      */
toLanguageTag()1323     public String toLanguageTag() {
1324         if (cachedLanguageTag == null) {
1325             cachedLanguageTag = makeLanguageTag();
1326         }
1327 
1328         return cachedLanguageTag;
1329     }
1330 
1331     /**
1332      * Constructs a valid BCP-47 language tag from locale fields. Additional validation
1333      * is required when this Locale was not constructed using a Builder and variants
1334      * set this way are treated specially.
1335      *
1336      * In both cases, we convert empty language tags to "und", omit invalid country tags
1337      * and perform a special case conversion of "no-NO-NY" to "nn-NO".
1338      */
makeLanguageTag()1339     private String makeLanguageTag() {
1340         // We only need to revalidate the language, country and variant because
1341         // the rest of the fields can only be set via the builder which validates
1342         // them anyway.
1343         String language = "";
1344         String region = "";
1345         String variant = "";
1346         String illFormedVariantSubtags = "";
1347 
1348         if (hasValidatedFields) {
1349             language = languageCode;
1350             region = countryCode;
1351             // Note that we are required to normalize hyphens to underscores
1352             // in the builder, but we must use hyphens in the BCP-47 language tag.
1353             variant = variantCode.replace('_', '-');
1354         } else {
1355             language = Builder.normalizeAndValidateLanguage(languageCode, false /* strict */);
1356             region = Builder.normalizeAndValidateRegion(countryCode, false /* strict */);
1357 
1358             try {
1359                 variant = Builder.normalizeAndValidateVariant(variantCode);
1360             } catch (IllformedLocaleException ilfe) {
1361                 // If our variant is ill formed, we must attempt to split it into
1362                 // its constituent subtags and preserve the well formed bits and
1363                 // move the rest to the private use extension (if they're well
1364                 // formed extension subtags).
1365                 String split[] = splitIllformedVariant(variantCode);
1366 
1367                 variant = split[0];
1368                 illFormedVariantSubtags = split[1];
1369             }
1370         }
1371 
1372         if (language.isEmpty()) {
1373             language = UNDETERMINED_LANGUAGE;
1374         }
1375 
1376         if ("no".equals(language) && "NO".equals(region) && "NY".equals(variant)) {
1377             language = "nn";
1378             region = "NO";
1379             variant = "";
1380         }
1381 
1382         final StringBuilder sb = new StringBuilder(16);
1383         sb.append(language);
1384 
1385         if (!scriptCode.isEmpty()) {
1386             sb.append('-');
1387             sb.append(scriptCode);
1388         }
1389 
1390         if (!region.isEmpty()) {
1391             sb.append('-');
1392             sb.append(region);
1393         }
1394 
1395         if (!variant.isEmpty()) {
1396             sb.append('-');
1397             sb.append(variant);
1398         }
1399 
1400         // Extensions (optional, omitted if empty). Note that we don't
1401         // emit the private use extension here, but add it in the end.
1402         for (Map.Entry<Character, String> extension : extensions.entrySet()) {
1403             if (!extension.getKey().equals('x')) {
1404                 sb.append('-').append(extension.getKey());
1405                 sb.append('-').append(extension.getValue());
1406             }
1407         }
1408 
1409         // The private use extension comes right at the very end.
1410         final String privateUse = extensions.get('x');
1411         if (privateUse != null) {
1412             sb.append("-x-");
1413             sb.append(privateUse);
1414         }
1415 
1416         // If we have any ill-formed variant subtags, we append them to the
1417         // private use extension (or add a private use extension if one doesn't
1418         // exist).
1419         if (!illFormedVariantSubtags.isEmpty()) {
1420             if (privateUse == null) {
1421                 sb.append("-x-lvariant-");
1422             } else {
1423                 sb.append('-');
1424             }
1425             sb.append(illFormedVariantSubtags);
1426         }
1427 
1428         return sb.toString();
1429     }
1430 
1431     /**
1432      * Splits ill formed variants into a set of valid variant subtags (which
1433      * can be used directly in language tag construction) and a set of invalid
1434      * variant subtags (which can be appended to the private use extension),
1435      * provided that each subtag is a valid private use extension subtag.
1436      *
1437      * This method returns a two element String array. The first element is a string
1438      * containing the concatenation of valid variant subtags which can be appended
1439      * to a BCP-47 tag directly and the second containing the concatenation of
1440      * invalid variant subtags which can be appended to the private use extension
1441      * directly.
1442      *
1443      * This method assumes that {@code variant} contains at least one ill formed
1444      * variant subtag.
1445      */
splitIllformedVariant(String variant)1446     private static String[] splitIllformedVariant(String variant) {
1447         final String normalizedVariant = variant.replace('_', '-');
1448         final String[] subTags = normalizedVariant.split("-");
1449 
1450         final String[] split = new String[] { "", "" };
1451 
1452         // First go through the list of variant subtags and check if they're
1453         // valid private use extension subtags. If they're not, we will omit
1454         // the first such subtag and all subtags after.
1455         //
1456         // NOTE: |firstInvalidSubtag| is the index of the first variant
1457         // subtag we decide to omit altogether, whereas |firstIllformedSubtag| is the
1458         // index of the first subtag we decide to append to the private use extension.
1459         //
1460         // In other words:
1461         // [0, firstIllformedSubtag) => expressed as variant subtags.
1462         // [firstIllformedSubtag, firstInvalidSubtag) => expressed as private use
1463         // extension subtags.
1464         // [firstInvalidSubtag, subTags.length) => omitted.
1465         int firstInvalidSubtag = subTags.length;
1466         for (int i = 0; i < subTags.length; ++i) {
1467             if (!isValidBcp47Alphanum(subTags[i], 1, 8)) {
1468                 firstInvalidSubtag = i;
1469                 break;
1470             }
1471         }
1472 
1473         if (firstInvalidSubtag == 0) {
1474             return split;
1475         }
1476 
1477         // We now consider each subtag that could potentially be appended to
1478         // the private use extension and check if it's valid.
1479         int firstIllformedSubtag = firstInvalidSubtag;
1480         for (int i = 0; i < firstInvalidSubtag; ++i) {
1481             final String subTag = subTags[i];
1482             // The BCP-47 spec states that :
1483             // - Subtags can be between [5, 8] alphanumeric chars in length.
1484             // - Subtags that start with a number are allowed to be 4 chars in length.
1485             if (subTag.length() >= 5 && subTag.length() <= 8) {
1486                 if (!isAsciiAlphaNum(subTag)) {
1487                     firstIllformedSubtag = i;
1488                 }
1489             } else if (subTag.length() == 4) {
1490                 final char firstChar = subTag.charAt(0);
1491                 if (!(firstChar >= '0' && firstChar <= '9') || !isAsciiAlphaNum(subTag)) {
1492                     firstIllformedSubtag = i;
1493                 }
1494             } else {
1495                 firstIllformedSubtag = i;
1496             }
1497         }
1498 
1499         split[0] = concatenateRange(subTags, 0, firstIllformedSubtag);
1500         split[1] = concatenateRange(subTags, firstIllformedSubtag, firstInvalidSubtag);
1501 
1502         return split;
1503     }
1504 
1505     /**
1506      * Builds a string by concatenating array elements within the range [start, end).
1507      * The supplied range is assumed to be valid and no checks are performed.
1508      */
concatenateRange(String[] array, int start, int end)1509     private static String concatenateRange(String[] array, int start, int end) {
1510         StringBuilder builder = new StringBuilder(32);
1511         for (int i = start; i < end; ++i) {
1512             if (i != start) {
1513                 builder.append('-');
1514             }
1515             builder.append(array[i]);
1516         }
1517 
1518         return builder.toString();
1519     }
1520 
1521     /**
1522      * Returns the set of BCP-47 extensions this locale contains.
1523      *
1524      * See <a href="https://tools.ietf.org/html/bcp47#section-2.1">
1525      *     the IETF BCP-47 specification</a> (Section 2.2.6) for details.
1526      *
1527      * @since 1.7
1528      */
getExtensionKeys()1529     public Set<Character> getExtensionKeys() {
1530         return extensions.keySet();
1531     }
1532 
1533     /**
1534      * Returns the BCP-47 extension whose key is {@code extensionKey}, or {@code null}
1535      * if this locale does not contain the extension.
1536      *
1537      * Individual Keywords and attributes for the unicode
1538      * locale extension can be fetched using {@link #getUnicodeLocaleAttributes()},
1539      * {@link #getUnicodeLocaleKeys()}  and {@link #getUnicodeLocaleType}.
1540      *
1541      * @since 1.7
1542      */
getExtension(char extensionKey)1543     public String getExtension(char extensionKey) {
1544         return extensions.get(extensionKey);
1545     }
1546 
1547     /**
1548      * Returns the {@code type} for the specified unicode locale extension {@code key}.
1549      *
1550      * For more information about types and keywords, see {@link Builder#setUnicodeLocaleKeyword}
1551      * and <a href="http://www.unicode.org/reports/tr35/#BCP47">Unicode Technical Standard #35</a>
1552      *
1553      * @since 1.7
1554      */
getUnicodeLocaleType(String keyWord)1555     public String getUnicodeLocaleType(String keyWord) {
1556         return unicodeKeywords.get(keyWord);
1557     }
1558 
1559     /**
1560      * Returns the set of unicode locale extension attributes this locale contains.
1561      *
1562      * For more information about attributes, see {@link Builder#addUnicodeLocaleAttribute}
1563      * and <a href="http://www.unicode.org/reports/tr35/#BCP47">Unicode Technical Standard #35</a>
1564      *
1565      * @since 1.7
1566      */
getUnicodeLocaleAttributes()1567     public Set<String> getUnicodeLocaleAttributes() {
1568         return unicodeAttributes;
1569     }
1570 
1571     /**
1572      * Returns the set of unicode locale extension keywords this locale contains.
1573      *
1574      * For more information about types and keywords, see {@link Builder#setUnicodeLocaleKeyword}
1575      * and <a href="http://www.unicode.org/reports/tr35/#BCP47">Unicode Technical Standard #35</a>
1576      *
1577      * @since 1.7
1578      */
getUnicodeLocaleKeys()1579     public Set<String> getUnicodeLocaleKeys() {
1580         return unicodeKeywords.keySet();
1581     }
1582 
1583     @Override
hashCode()1584     public synchronized int hashCode() {
1585         return countryCode.hashCode()
1586                 + languageCode.hashCode() + variantCode.hashCode()
1587                 + scriptCode.hashCode() + extensions.hashCode();
1588     }
1589 
1590     /**
1591      * Overrides the default locale. This does not affect system configuration,
1592      * and attempts to override the system-provided default locale may
1593      * themselves be overridden by actual changes to the system configuration.
1594      * Code that calls this method is usually incorrect, and should be fixed by
1595      * passing the appropriate locale to each locale-sensitive method that's
1596      * called.
1597      */
setDefault(Locale locale)1598     public synchronized static void setDefault(Locale locale) {
1599         if (locale == null) {
1600             throw new NullPointerException("locale == null");
1601         }
1602         String languageTag = locale.toLanguageTag();
1603         defaultLocale = locale;
1604         ICU.setDefaultLocale(languageTag);
1605     }
1606 
1607     /**
1608      * Returns the string representation of this {@code Locale}. It consists of the
1609      * language code, country code and variant separated by underscores.
1610      * If the language is missing the string begins
1611      * with an underscore. If the country is missing there are 2 underscores
1612      * between the language and the variant. The variant cannot stand alone
1613      * without a language and/or country code: in this case this method would
1614      * return the empty string.
1615      *
1616      * <p>Examples: "en", "en_US", "_US", "en__POSIX", "en_US_POSIX"
1617      */
1618     @Override
toString()1619     public final String toString() {
1620         String result = cachedToStringResult;
1621         if (result == null) {
1622             result = cachedToStringResult = toNewString(languageCode, countryCode, variantCode,
1623                                                         scriptCode, extensions);
1624         }
1625         return result;
1626     }
1627 
toNewString(String languageCode, String countryCode, String variantCode, String scriptCode, Map<Character, String> extensions)1628     private static String toNewString(String languageCode, String countryCode,
1629             String variantCode, String scriptCode, Map<Character, String> extensions) {
1630         // The string form of a locale that only has a variant is the empty string.
1631         if (languageCode.length() == 0 && countryCode.length() == 0) {
1632             return "";
1633         }
1634 
1635         // Otherwise, the output format is "ll_cc_variant", where language and country are always
1636         // two letters, but the variant is an arbitrary length. A size of 11 characters has room
1637         // for "en_US_POSIX", the largest "common" value. (In practice, the string form is almost
1638         // always 5 characters: "ll_cc".)
1639         StringBuilder result = new StringBuilder(11);
1640         result.append(languageCode);
1641 
1642         final boolean hasScriptOrExtensions = !scriptCode.isEmpty() || !extensions.isEmpty();
1643 
1644         if (!countryCode.isEmpty() || !variantCode.isEmpty() || hasScriptOrExtensions) {
1645             result.append('_');
1646         }
1647         result.append(countryCode);
1648         if (!variantCode.isEmpty() || hasScriptOrExtensions) {
1649             result.append('_');
1650         }
1651         result.append(variantCode);
1652 
1653         if (hasScriptOrExtensions) {
1654             if (!variantCode.isEmpty()) {
1655                 result.append('_');
1656             }
1657 
1658             // Note that this is notably different from the BCP-47 spec (for
1659             // backwards compatibility). We are forced to append a "#" before the script tag.
1660             // and also put the script code right at the end.
1661             result.append("#");
1662             if (!scriptCode.isEmpty() ) {
1663                 result.append(scriptCode);
1664             }
1665 
1666             // Note the use of "-" instead of "_" before the extensions.
1667             if (!extensions.isEmpty()) {
1668                 if (!scriptCode.isEmpty()) {
1669                     result.append('-');
1670                 }
1671                 result.append(serializeExtensions(extensions));
1672             }
1673         }
1674 
1675         return result.toString();
1676     }
1677 
1678     private static final ObjectStreamField[] serialPersistentFields = {
1679         new ObjectStreamField("country", String.class),
1680         new ObjectStreamField("hashcode", int.class),
1681         new ObjectStreamField("language", String.class),
1682         new ObjectStreamField("variant", String.class),
1683         new ObjectStreamField("script", String.class),
1684         new ObjectStreamField("extensions", String.class),
1685     };
1686 
writeObject(ObjectOutputStream stream)1687     private void writeObject(ObjectOutputStream stream) throws IOException {
1688         ObjectOutputStream.PutField fields = stream.putFields();
1689         fields.put("country", countryCode);
1690         fields.put("hashcode", -1);
1691         fields.put("language", languageCode);
1692         fields.put("variant", variantCode);
1693         fields.put("script", scriptCode);
1694 
1695         if (!extensions.isEmpty()) {
1696             fields.put("extensions", serializeExtensions(extensions));
1697         }
1698 
1699         stream.writeFields();
1700     }
1701 
readObject(ObjectInputStream stream)1702     private void readObject(ObjectInputStream stream) throws IOException, ClassNotFoundException {
1703         ObjectInputStream.GetField fields = stream.readFields();
1704         countryCode = (String) fields.get("country", "");
1705         languageCode = (String) fields.get("language", "");
1706         variantCode = (String) fields.get("variant", "");
1707         scriptCode = (String) fields.get("script", "");
1708 
1709         this.unicodeKeywords = Collections.EMPTY_MAP;
1710         this.unicodeAttributes = Collections.EMPTY_SET;
1711         this.extensions = Collections.EMPTY_MAP;
1712 
1713         String extensions = (String) fields.get("extensions", null);
1714         if (extensions != null) {
1715             readExtensions(extensions);
1716         }
1717     }
1718 
readExtensions(String extensions)1719     private void readExtensions(String extensions) {
1720         Map<Character, String> extensionsMap = new TreeMap<Character, String>();
1721         parseSerializedExtensions(extensions, extensionsMap);
1722         this.extensions = Collections.unmodifiableMap(extensionsMap);
1723 
1724         if (extensionsMap.containsKey(UNICODE_LOCALE_EXTENSION)) {
1725             String unicodeExtension = extensionsMap.get(UNICODE_LOCALE_EXTENSION);
1726             String[] subTags = unicodeExtension.split("-");
1727 
1728             Map<String, String> unicodeKeywords = new TreeMap<String, String>();
1729             Set<String> unicodeAttributes = new TreeSet<String>();
1730             parseUnicodeExtension(subTags, unicodeKeywords, unicodeAttributes);
1731 
1732             this.unicodeKeywords = Collections.unmodifiableMap(unicodeKeywords);
1733             this.unicodeAttributes = Collections.unmodifiableSet(unicodeAttributes);
1734         }
1735     }
1736 
1737     /**
1738      * The serialized form for extensions is straightforward. It's simply
1739      * of the form key1-value1-key2-value2 where each value might in turn contain
1740      * multiple subtags separated by hyphens. Each key is guaranteed to be a single
1741      * character in length.
1742      *
1743      * This method assumes that {@code extensionsMap} is non-empty.
1744      *
1745      * Visible for testing.
1746      *
1747      * @hide
1748      */
serializeExtensions(Map<Character, String> extensionsMap)1749     public static String serializeExtensions(Map<Character, String> extensionsMap) {
1750         Iterator<Map.Entry<Character, String>> entryIterator = extensionsMap.entrySet().iterator();
1751         StringBuilder sb = new StringBuilder(64);
1752 
1753         while (true) {
1754             final Map.Entry<Character, String> entry = entryIterator.next();
1755             sb.append(entry.getKey());
1756             sb.append('-');
1757             sb.append(entry.getValue());
1758 
1759             if (entryIterator.hasNext()) {
1760                 sb.append('-');
1761             } else {
1762                 break;
1763             }
1764         }
1765 
1766         return sb.toString();
1767     }
1768 
1769     /**
1770      * Visible for testing.
1771      *
1772      * @hide
1773      */
parseSerializedExtensions(String extString, Map<Character, String> outputMap)1774     public static void parseSerializedExtensions(String extString, Map<Character, String> outputMap) {
1775         // This probably isn't the most efficient approach, but it's the
1776         // most straightforward to code.
1777         //
1778         // Start by splitting the string on "-". We will then keep track of
1779         // where each of the extension keys (single characters) appear in the
1780         // original string and then use those indices to construct substrings
1781         // representing the values.
1782         final String[] subTags = extString.split("-");
1783         final int[] typeStartIndices = new int[subTags.length / 2];
1784 
1785         int length = 0;
1786         int count = 0;
1787         for (String subTag : subTags) {
1788             if (subTag.length() > 0) {
1789                 // Account for the length of the "-" at the end of each subtag.
1790                 length += (subTag.length() + 1);
1791             }
1792 
1793             if (subTag.length() == 1) {
1794                 typeStartIndices[count++] = length;
1795             }
1796         }
1797 
1798         for (int i = 0; i < count; ++i) {
1799             final int valueStart = typeStartIndices[i];
1800             // Since the start Index points to the beginning of the next type
1801             // ....prev-k-next.....
1802             //            |_ here
1803             // (idx - 2) is the index of the next key
1804             // (idx - 3) is the (non inclusive) end of the previous type.
1805             final int valueEnd = (i == (count - 1)) ?
1806                     extString.length() : (typeStartIndices[i + 1] - 3);
1807 
1808             outputMap.put(extString.charAt(typeStartIndices[i] - 2),
1809                     extString.substring(valueStart, valueEnd));
1810         }
1811     }
1812 
1813 
1814     /**
1815      * A UN M.49 is a 3 digit numeric code.
1816      */
isUnM49AreaCode(String code)1817     private static boolean isUnM49AreaCode(String code) {
1818         if (code.length() != 3) {
1819             return false;
1820         }
1821 
1822         for (int i = 0; i < 3; ++i) {
1823             final char character = code.charAt(i);
1824             if (!(character >= '0' && character <= '9')) {
1825                 return false;
1826             }
1827         }
1828 
1829         return true;
1830     }
1831 
1832     /*
1833      * Checks whether a given string is an ASCII alphanumeric string.
1834      */
isAsciiAlphaNum(String string)1835     private static boolean isAsciiAlphaNum(String string) {
1836         for (int i = 0; i < string.length(); i++) {
1837             final char character = string.charAt(i);
1838             if (!(character >= 'a' && character <= 'z' ||
1839                     character >= 'A' && character <= 'Z' ||
1840                     character >= '0' && character <= '9')) {
1841                 return false;
1842             }
1843         }
1844 
1845         return true;
1846     }
1847 
isValidBcp47Alpha(String string, int lowerBound, int upperBound)1848     private static boolean isValidBcp47Alpha(String string, int lowerBound, int upperBound) {
1849         final int length = string.length();
1850         if (length < lowerBound || length > upperBound) {
1851             return false;
1852         }
1853 
1854         for (int i = 0; i < length; ++i) {
1855             final char character = string.charAt(i);
1856             if (!(character >= 'a' && character <= 'z' ||
1857                     character >= 'A' && character <= 'Z')) {
1858                 return false;
1859             }
1860         }
1861 
1862         return true;
1863     }
1864 
isValidBcp47Alphanum(String attributeOrType, int lowerBound, int upperBound)1865     private static boolean isValidBcp47Alphanum(String attributeOrType,
1866             int lowerBound, int upperBound) {
1867         if (attributeOrType.length() < lowerBound || attributeOrType.length() > upperBound) {
1868             return false;
1869         }
1870 
1871         return isAsciiAlphaNum(attributeOrType);
1872     }
1873 
titleCaseAsciiWord(String word)1874     private static String titleCaseAsciiWord(String word) {
1875         try {
1876             byte[] chars = word.toLowerCase(Locale.ROOT).getBytes(StandardCharsets.US_ASCII);
1877             chars[0] = (byte) ((int) chars[0] + 'A' - 'a');
1878             return new String(chars, StandardCharsets.US_ASCII);
1879         } catch (UnsupportedOperationException uoe) {
1880             throw new AssertionError(uoe);
1881         }
1882     }
1883 
1884     /**
1885      * A type list must contain one or more alphanumeric subtags whose lengths
1886      * are between 3 and 8.
1887      */
isValidTypeList(String lowerCaseTypeList)1888     private static boolean isValidTypeList(String lowerCaseTypeList) {
1889         final String[] splitList = lowerCaseTypeList.split("-");
1890         for (String type : splitList) {
1891             if (!isValidBcp47Alphanum(type, 3, 8)) {
1892                 return false;
1893             }
1894         }
1895 
1896         return true;
1897     }
1898 
addUnicodeExtensionToExtensionsMap( Set<String> attributes, Map<String, String> keywords, Map<Character, String> extensions)1899     private static void addUnicodeExtensionToExtensionsMap(
1900             Set<String> attributes, Map<String, String> keywords,
1901             Map<Character, String> extensions) {
1902         if (attributes.isEmpty() && keywords.isEmpty()) {
1903             return;
1904         }
1905 
1906         // Assume that the common case is a low number of keywords & attributes
1907         // (usually one or two).
1908         final StringBuilder sb = new StringBuilder(32);
1909 
1910         // All attributes must appear before keywords, in lexical order.
1911         if (!attributes.isEmpty()) {
1912             Iterator<String> attributesIterator = attributes.iterator();
1913             while (true) {
1914                 sb.append(attributesIterator.next());
1915                 if (attributesIterator.hasNext()) {
1916                     sb.append('-');
1917                 } else {
1918                     break;
1919                 }
1920             }
1921         }
1922 
1923         if (!keywords.isEmpty()) {
1924             if (!attributes.isEmpty()) {
1925                 sb.append('-');
1926             }
1927 
1928             Iterator<Map.Entry<String, String>> keywordsIterator = keywords.entrySet().iterator();
1929             while (true) {
1930                 final Map.Entry<String, String> keyWord = keywordsIterator.next();
1931                 sb.append(keyWord.getKey());
1932                 if (!keyWord.getValue().isEmpty()) {
1933                     sb.append('-');
1934                     sb.append(keyWord.getValue());
1935                 }
1936                 if (keywordsIterator.hasNext()) {
1937                     sb.append('-');
1938                 } else {
1939                     break;
1940                 }
1941             }
1942         }
1943 
1944         extensions.put(UNICODE_LOCALE_EXTENSION, sb.toString());
1945     }
1946 
1947     /**
1948      * This extension is described by http://www.unicode.org/reports/tr35/#RFC5234
1949      * unicode_locale_extensions = sep "u" (1*(sep keyword) / 1*(sep attribute) *(sep keyword)).
1950      *
1951      * It must contain at least one keyword or attribute and attributes (if any)
1952      * must appear before keywords. Attributes can't appear after keywords because
1953      * they will be indistinguishable from a subtag of the keyword type.
1954      *
1955      * Visible for testing.
1956      *
1957      * @hide
1958      */
parseUnicodeExtension(String[] subtags, Map<String, String> keywords, Set<String> attributes)1959     public static void parseUnicodeExtension(String[] subtags,
1960             Map<String, String> keywords, Set<String> attributes)  {
1961         String lastKeyword = null;
1962         List<String> subtagsForKeyword = new ArrayList<String>();
1963         for (String subtag : subtags) {
1964             if (subtag.length() == 2) {
1965                 if (subtagsForKeyword.size() > 0) {
1966                     keywords.put(lastKeyword, joinBcp47Subtags(subtagsForKeyword));
1967                     subtagsForKeyword.clear();
1968                 }
1969 
1970                 lastKeyword = subtag;
1971             } else if (subtag.length() > 2) {
1972                 if (lastKeyword == null) {
1973                     attributes.add(subtag);
1974                 } else {
1975                     subtagsForKeyword.add(subtag);
1976                 }
1977             }
1978         }
1979 
1980         if (subtagsForKeyword.size() > 0) {
1981             keywords.put(lastKeyword, joinBcp47Subtags(subtagsForKeyword));
1982         } else if (lastKeyword != null) {
1983             keywords.put(lastKeyword, "");
1984         }
1985     }
1986 
1987     /**
1988      * Joins a list of subtags into a BCP-47 tag using the standard separator
1989      * ("-").
1990      */
joinBcp47Subtags(List<String> strings)1991     private static String joinBcp47Subtags(List<String> strings) {
1992         final int size = strings.size();
1993 
1994         StringBuilder sb = new StringBuilder(strings.get(0).length());
1995         for (int i = 0; i < size; ++i) {
1996             sb.append(strings.get(i));
1997             if (i != size - 1) {
1998                 sb.append('-');
1999             }
2000         }
2001 
2002         return sb.toString();
2003     }
2004 
2005     /**
2006      * @hide for internal use only.
2007      */
adjustLanguageCode(String languageCode)2008     public static String adjustLanguageCode(String languageCode) {
2009         String adjusted = languageCode.toLowerCase(Locale.US);
2010         // Map new language codes to the obsolete language
2011         // codes so the correct resource bundles will be used.
2012         if (languageCode.equals("he")) {
2013             adjusted = "iw";
2014         } else if (languageCode.equals("id")) {
2015             adjusted = "in";
2016         } else if (languageCode.equals("yi")) {
2017             adjusted = "ji";
2018         }
2019 
2020         return adjusted;
2021     }
2022 
2023     /**
2024      * Map of grandfathered language tags to their modern replacements.
2025      */
2026     private static final TreeMap<String, String> GRANDFATHERED_LOCALES;
2027 
2028     static {
2029         GRANDFATHERED_LOCALES = new TreeMap<String, String>(String.CASE_INSENSITIVE_ORDER);
2030 
2031         // From http://tools.ietf.org/html/bcp47
2032         //
2033         // grandfathered = irregular           ; non-redundant tags registered
2034         //               / regular             ; during the RFC 3066 era
2035         //  irregular =
2036         GRANDFATHERED_LOCALES.put("en-GB-oed", "en-GB-x-oed");
2037         GRANDFATHERED_LOCALES.put("i-ami", "ami");
2038         GRANDFATHERED_LOCALES.put("i-bnn", "bnn");
2039         GRANDFATHERED_LOCALES.put("i-default", "en-x-i-default");
2040         GRANDFATHERED_LOCALES.put("i-enochian", "und-x-i-enochian");
2041         GRANDFATHERED_LOCALES.put("i-hak", "hak");
2042         GRANDFATHERED_LOCALES.put("i-klingon", "tlh");
2043         GRANDFATHERED_LOCALES.put("i-lux", "lb");
2044         GRANDFATHERED_LOCALES.put("i-mingo", "see-x-i-mingo");
2045         GRANDFATHERED_LOCALES.put("i-navajo", "nv");
2046         GRANDFATHERED_LOCALES.put("i-pwn", "pwn");
2047         GRANDFATHERED_LOCALES.put("i-tao", "tao");
2048         GRANDFATHERED_LOCALES.put("i-tay", "tay");
2049         GRANDFATHERED_LOCALES.put("i-tsu", "tsu");
2050         GRANDFATHERED_LOCALES.put("sgn-BE-FR", "sfb");
2051         GRANDFATHERED_LOCALES.put("sgn-BE-NL", "vgt");
2052         GRANDFATHERED_LOCALES.put("sgn-CH-DE", "sgg");
2053 
2054         // regular =
2055         GRANDFATHERED_LOCALES.put("art-lojban", "jbo");
2056         GRANDFATHERED_LOCALES.put("cel-gaulish", "xtg-x-cel-gaulish");
2057         GRANDFATHERED_LOCALES.put("no-bok", "nb");
2058         GRANDFATHERED_LOCALES.put("no-nyn", "nn");
2059         GRANDFATHERED_LOCALES.put("zh-guoyu", "cmn");
2060         GRANDFATHERED_LOCALES.put("zh-hakka", "hak");
2061         GRANDFATHERED_LOCALES.put("zh-min", "nan-x-zh-min");
2062         GRANDFATHERED_LOCALES.put("zh-min-nan", "nan");
2063         GRANDFATHERED_LOCALES.put("zh-xiang", "hsn");
2064     }
2065 
convertGrandfatheredTag(String original)2066     private static String convertGrandfatheredTag(String original) {
2067         final String converted = GRANDFATHERED_LOCALES.get(original);
2068         return converted != null ? converted : original;
2069     }
2070 
2071     /**
2072      * Scans elements of {@code subtags} in the range {@code [startIndex, endIndex)}
2073      * and appends valid variant subtags upto the first invalid subtag  (if any) to
2074      * {@code normalizedVariants}.
2075      */
extractVariantSubtags(String[] subtags, int startIndex, int endIndex, List<String> normalizedVariants)2076     private static void extractVariantSubtags(String[] subtags, int startIndex, int endIndex,
2077             List<String> normalizedVariants) {
2078         for (int i = startIndex; i < endIndex; i++) {
2079             final String subtag = subtags[i];
2080 
2081             if (Builder.isValidVariantSubtag(subtag)) {
2082                 normalizedVariants.add(subtag);
2083             } else {
2084                 break;
2085             }
2086         }
2087     }
2088 
2089     /**
2090      * Scans elements of {@code subtags} in the range {@code [startIndex, endIndex)}
2091      * and inserts valid extensions into {@code extensions}. The scan is aborted
2092      * when an invalid extension is encountered. Returns the index of the first
2093      * unparsable element of {@code subtags}.
2094      */
extractExtensions(String[] subtags, int startIndex, int endIndex, Map<Character, String> extensions)2095     private static int extractExtensions(String[] subtags, int startIndex, int endIndex,
2096             Map<Character, String> extensions) {
2097         int privateUseExtensionIndex = -1;
2098         int extensionKeyIndex = -1;
2099 
2100         int i = startIndex;
2101         for (; i < endIndex; i++) {
2102             final String subtag = subtags[i];
2103 
2104             final boolean parsingPrivateUse = (privateUseExtensionIndex != -1) &&
2105                     (extensionKeyIndex == privateUseExtensionIndex);
2106 
2107             // Note that private use extensions allow subtags of length 1.
2108             // Private use extensions *must* come last, so there's no ambiguity
2109             // in that case.
2110             if (subtag.length() == 1 && !parsingPrivateUse) {
2111                 // Emit the last extension we encountered if any. First check
2112                 // whether we encountered two keys in a row (which is an error).
2113                 // Also checks if we already have an extension with the same key,
2114                 // which is again an error.
2115                 if (extensionKeyIndex != -1) {
2116                     if ((i - 1) == extensionKeyIndex) {
2117                         return extensionKeyIndex;
2118                     }
2119 
2120                     final String key = subtags[extensionKeyIndex];
2121                     if (extensions.containsKey(key.charAt(0))) {
2122                         return extensionKeyIndex;
2123                     }
2124 
2125                     final String value = concatenateRange(subtags, extensionKeyIndex + 1, i);
2126                     extensions.put(key.charAt(0), value.toLowerCase(Locale.ROOT));
2127                 }
2128 
2129                 // Mark the start of the next extension. Also keep track of whether this
2130                 // is a private use extension, and throw an error if it doesn't come last.
2131                 extensionKeyIndex = i;
2132                 if ("x".equals(subtag)) {
2133                     privateUseExtensionIndex = i;
2134                 } else if (privateUseExtensionIndex != -1) {
2135                     // The private use extension must come last.
2136                     return privateUseExtensionIndex;
2137                 }
2138             } else if (extensionKeyIndex != -1) {
2139                 // We must have encountered a valid key in order to start parsing
2140                 // its subtags.
2141                 if (!isValidBcp47Alphanum(subtag, parsingPrivateUse ? 1 : 2, 8)) {
2142                     return i;
2143                 }
2144             } else {
2145                 // Encountered a value without a preceding key.
2146                 return i;
2147             }
2148         }
2149 
2150         if (extensionKeyIndex != -1) {
2151             if ((i - 1) == extensionKeyIndex) {
2152                 return extensionKeyIndex;
2153             }
2154 
2155             final String key = subtags[extensionKeyIndex];
2156             if (extensions.containsKey(key.charAt(0))) {
2157                 return extensionKeyIndex;
2158             }
2159 
2160             final String value = concatenateRange(subtags, extensionKeyIndex + 1, i);
2161             extensions.put(key.charAt(0), value.toLowerCase(Locale.ROOT));
2162         }
2163 
2164         return i;
2165     }
2166 
forLanguageTag( String tag, boolean strict)2167     private static Locale forLanguageTag(/* @Nonnull */ String tag, boolean strict) {
2168         final String converted = convertGrandfatheredTag(tag);
2169         final String[] subtags = converted.split("-");
2170 
2171         int lastSubtag = subtags.length;
2172         for (int i = 0; i < subtags.length; ++i) {
2173             final String subtag = subtags[i];
2174             if (subtag.isEmpty() || subtag.length() > 8) {
2175                 if (strict) {
2176                     throw new IllformedLocaleException("Invalid subtag at index: " + i
2177                             + " in tag: " + tag);
2178                 } else {
2179                     lastSubtag = (i - 1);
2180                 }
2181 
2182                 break;
2183             }
2184         }
2185 
2186         final String languageCode = Builder.normalizeAndValidateLanguage(subtags[0], strict);
2187         String scriptCode = "";
2188         int nextSubtag = 1;
2189         if (lastSubtag > nextSubtag) {
2190             scriptCode = Builder.normalizeAndValidateScript(subtags[nextSubtag], false /* strict */);
2191             if (!scriptCode.isEmpty()) {
2192                 nextSubtag++;
2193             }
2194         }
2195 
2196         String regionCode = "";
2197         if (lastSubtag > nextSubtag) {
2198             regionCode = Builder.normalizeAndValidateRegion(subtags[nextSubtag], false /* strict */);
2199             if (!regionCode.isEmpty()) {
2200                 nextSubtag++;
2201             }
2202         }
2203 
2204         List<String> variants = null;
2205         if (lastSubtag > nextSubtag) {
2206             variants = new ArrayList<String>();
2207             extractVariantSubtags(subtags, nextSubtag, lastSubtag, variants);
2208             nextSubtag += variants.size();
2209         }
2210 
2211         Map<Character, String> extensions = Collections.EMPTY_MAP;
2212         if (lastSubtag > nextSubtag) {
2213             extensions = new TreeMap<Character, String>();
2214             nextSubtag = extractExtensions(subtags, nextSubtag, lastSubtag, extensions);
2215         }
2216 
2217         if (nextSubtag != lastSubtag) {
2218             if (strict) {
2219                 throw new IllformedLocaleException("Unparseable subtag: " + subtags[nextSubtag]
2220                         + " from language tag: " + tag);
2221             }
2222         }
2223 
2224         Set<String> unicodeKeywords = Collections.EMPTY_SET;
2225         Map<String, String> unicodeAttributes = Collections.EMPTY_MAP;
2226         if (extensions.containsKey(UNICODE_LOCALE_EXTENSION)) {
2227             unicodeKeywords = new TreeSet<String>();
2228             unicodeAttributes = new TreeMap<String, String>();
2229             parseUnicodeExtension(extensions.get(UNICODE_LOCALE_EXTENSION).split("-"),
2230                     unicodeAttributes, unicodeKeywords);
2231         }
2232 
2233         String variantCode = "";
2234         if (variants != null && !variants.isEmpty()) {
2235             StringBuilder variantsBuilder = new StringBuilder(variants.size() * 8);
2236             for (int i = 0; i < variants.size(); ++i) {
2237                 if (i != 0) {
2238                     variantsBuilder.append('_');
2239                 }
2240                 variantsBuilder.append(variants.get(i));
2241             }
2242             variantCode = variantsBuilder.toString();
2243         }
2244 
2245         return new Locale(languageCode, regionCode, variantCode, scriptCode,
2246                 unicodeKeywords, unicodeAttributes, extensions, true /* has validated fields */);
2247     }
2248 }
2249