1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package java.util; 19 20 import java.io.IOException; 21 import java.io.ObjectInputStream; 22 import java.io.ObjectOutputStream; 23 import java.io.ObjectStreamField; 24 import java.io.Serializable; 25 import java.nio.charset.StandardCharsets; 26 import libcore.icu.ICU; 27 28 /** 29 * {@code Locale} represents a language/country/variant combination. Locales are used to 30 * alter the presentation of information such as numbers or dates to suit the conventions 31 * in the region they describe. 32 * 33 * <p>The language codes are two-letter lowercase ISO language codes (such as "en") as defined by 34 * <a href="http://en.wikipedia.org/wiki/ISO_639-1">ISO 639-1</a>. 35 * The country codes are two-letter uppercase ISO country codes (such as "US") as defined by 36 * <a href="http://en.wikipedia.org/wiki/ISO_3166-1_alpha-3">ISO 3166-1</a>. 37 * The variant codes are unspecified. 38 * 39 * <p>Note that Java uses several deprecated two-letter codes. The Hebrew ("he") language 40 * code is rewritten as "iw", Indonesian ("id") as "in", and Yiddish ("yi") as "ji". This 41 * rewriting happens even if you construct your own {@code Locale} object, not just for 42 * instances returned by the various lookup methods. 43 * 44 * <a name="available_locales"></a><h3>Available locales</h3> 45 * <p>This class' constructors do no error checking. You can create a {@code Locale} for languages 46 * and countries that don't exist, and you can create instances for combinations that don't 47 * exist (such as "de_US" for "German as spoken in the US"). 48 * 49 * <p>Note that locale data is not necessarily available for any of the locales pre-defined as 50 * constants in this class except for en_US, which is the only locale Java guarantees is always 51 * available. 52 * 53 * <p>It is also a mistake to assume that all devices have the same locales available. 54 * A device sold in the US will almost certainly support en_US and es_US, but not necessarily 55 * any locales with the same language but different countries (such as en_GB or es_ES), 56 * nor any locales for other languages (such as de_DE). The opposite may well be true for a device 57 * sold in Europe. 58 * 59 * <p>You can use {@link Locale#getDefault} to get an appropriate locale for the <i>user</i> of the 60 * device you're running on, or {@link Locale#getAvailableLocales} to get a list of all the locales 61 * available on the device you're running on. 62 * 63 * <a name="locale_data"></a><h3>Locale data</h3> 64 * <p>Note that locale data comes solely from ICU. User-supplied locale service providers (using 65 * the {@code java.text.spi} or {@code java.util.spi} mechanisms) are not supported. 66 * 67 * <p>Here are the versions of ICU (and the corresponding CLDR and Unicode versions) used in 68 * various Android releases: 69 * <table BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY=""> 70 * <tr><td>Android 1.5 (Cupcake)/Android 1.6 (Donut)/Android 2.0 (Eclair)</td> 71 * <td>ICU 3.8</td> 72 * <td><a href="http://cldr.unicode.org/index/downloads/cldr-1-5">CLDR 1.5</a></td> 73 * <td><a href="http://www.unicode.org/versions/Unicode5.0.0/">Unicode 5.0</a></td></tr> 74 * <tr><td>Android 2.2 (Froyo)</td> 75 * <td>ICU 4.2</td> 76 * <td><a href="http://cldr.unicode.org/index/downloads/cldr-1-7">CLDR 1.7</a></td> 77 * <td><a href="http://www.unicode.org/versions/Unicode5.1.0/">Unicode 5.1</a></td></tr> 78 * <tr><td>Android 2.3 (Gingerbread)/Android 3.0 (Honeycomb)</td> 79 * <td>ICU 4.4</td> 80 * <td><a href="http://cldr.unicode.org/index/downloads/cldr-1-8">CLDR 1.8</a></td> 81 * <td><a href="http://www.unicode.org/versions/Unicode5.2.0/">Unicode 5.2</a></td></tr> 82 * <tr><td>Android 4.0 (Ice Cream Sandwich)</td> 83 * <td><a href="http://site.icu-project.org/download/46">ICU 4.6</a></td> 84 * <td><a href="http://cldr.unicode.org/index/downloads/cldr-1-9">CLDR 1.9</a></td> 85 * <td><a href="http://www.unicode.org/versions/Unicode6.0.0/">Unicode 6.0</a></td></tr> 86 * <tr><td>Android 4.1 (Jelly Bean)</td> 87 * <td><a href="http://site.icu-project.org/download/48">ICU 4.8</a></td> 88 * <td><a href="http://cldr.unicode.org/index/downloads/cldr-2-0">CLDR 2.0</a></td> 89 * <td><a href="http://www.unicode.org/versions/Unicode6.0.0/">Unicode 6.0</a></td></tr> 90 * <tr><td>Android 4.3 (Jelly Bean MR2)</td> 91 * <td><a href="http://site.icu-project.org/download/50">ICU 50</a></td> 92 * <td><a href="http://cldr.unicode.org/index/downloads/cldr-22-1">CLDR 22.1</a></td> 93 * <td><a href="http://www.unicode.org/versions/Unicode6.2.0/">Unicode 6.2</a></td></tr> 94 * <tr><td>Android 4.4 (KitKat)</td> 95 * <td><a href="http://site.icu-project.org/download/51">ICU 51</a></td> 96 * <td><a href="http://cldr.unicode.org/index/downloads/cldr-23">CLDR 23</a></td> 97 * <td><a href="http://www.unicode.org/versions/Unicode6.2.0/">Unicode 6.2</a></td></tr> 98 * <tr><td>Android 5.0 (Lollipop)</td> 99 * <td><a href="http://site.icu-project.org/download/53">ICU 53</a></td> 100 * <td><a href="http://cldr.unicode.org/index/downloads/cldr-25">CLDR 25</a></td> 101 * <td><a href="http://www.unicode.org/versions/Unicode6.3.0/">Unicode 6.3</a></td></tr> 102 * </table> 103 * 104 * <a name="default_locale"></a><h3>Be wary of the default locale</h3> 105 * <p>Note that there are many convenience methods that automatically use the default locale, but 106 * using them may lead to subtle bugs. 107 * 108 * <p>The default locale is appropriate for tasks that involve presenting data to the user. In 109 * this case, you want to use the user's date/time formats, number 110 * formats, rules for conversion to lowercase, and so on. In this case, it's safe to use the 111 * convenience methods. 112 * 113 * <p>The default locale is <i>not</i> appropriate for machine-readable output. The best choice 114 * there is usually {@code Locale.US} – this locale is guaranteed to be available on all 115 * devices, and the fact that it has no surprising special cases and is frequently used (especially 116 * for computer-computer communication) means that it tends to be the most efficient choice too. 117 * 118 * <p>A common mistake is to implicitly use the default locale when producing output meant to be 119 * machine-readable. This tends to work on the developer's test devices (especially because so many 120 * developers use en_US), but fails when run on a device whose user is in a more complex locale. 121 * 122 * <p>For example, if you're formatting integers some locales will use non-ASCII decimal 123 * digits. As another example, if you're formatting floating-point numbers some locales will use 124 * {@code ','} as the decimal point and {@code '.'} for digit grouping. That's correct for 125 * human-readable output, but likely to cause problems if presented to another 126 * computer ({@link Double#parseDouble} can't parse such a number, for example). 127 * You should also be wary of the {@link String#toLowerCase} and 128 * {@link String#toUpperCase} overloads that don't take a {@code Locale}: in Turkey, for example, 129 * the characters {@code 'i'} and {@code 'I'} won't be converted to {@code 'I'} and {@code 'i'}. 130 * This is the correct behavior for Turkish text (such as user input), but inappropriate for, say, 131 * HTTP headers. 132 */ 133 public final class Locale implements Cloneable, Serializable { 134 135 private static final long serialVersionUID = 9149081749638150636L; 136 137 /** 138 * Locale constant for en_CA. 139 */ 140 public static final Locale CANADA = new Locale(true, "en", "CA"); 141 142 /** 143 * Locale constant for fr_CA. 144 */ 145 public static final Locale CANADA_FRENCH = new Locale(true, "fr", "CA"); 146 147 /** 148 * Locale constant for zh_CN. 149 */ 150 public static final Locale CHINA = new Locale(true, "zh", "CN"); 151 152 /** 153 * Locale constant for zh. 154 */ 155 public static final Locale CHINESE = new Locale(true, "zh", ""); 156 157 /** 158 * Locale constant for en. 159 */ 160 public static final Locale ENGLISH = new Locale(true, "en", ""); 161 162 /** 163 * Locale constant for fr_FR. 164 */ 165 public static final Locale FRANCE = new Locale(true, "fr", "FR"); 166 167 /** 168 * Locale constant for fr. 169 */ 170 public static final Locale FRENCH = new Locale(true, "fr", ""); 171 172 /** 173 * Locale constant for de. 174 */ 175 public static final Locale GERMAN = new Locale(true, "de", ""); 176 177 /** 178 * Locale constant for de_DE. 179 */ 180 public static final Locale GERMANY = new Locale(true, "de", "DE"); 181 182 /** 183 * Locale constant for it. 184 */ 185 public static final Locale ITALIAN = new Locale(true, "it", ""); 186 187 /** 188 * Locale constant for it_IT. 189 */ 190 public static final Locale ITALY = new Locale(true, "it", "IT"); 191 192 /** 193 * Locale constant for ja_JP. 194 */ 195 public static final Locale JAPAN = new Locale(true, "ja", "JP"); 196 197 /** 198 * Locale constant for ja. 199 */ 200 public static final Locale JAPANESE = new Locale(true, "ja", ""); 201 202 /** 203 * Locale constant for ko_KR. 204 */ 205 public static final Locale KOREA = new Locale(true, "ko", "KR"); 206 207 /** 208 * Locale constant for ko. 209 */ 210 public static final Locale KOREAN = new Locale(true, "ko", ""); 211 212 /** 213 * Locale constant for zh_CN. 214 */ 215 public static final Locale PRC = new Locale(true, "zh", "CN"); 216 217 /** 218 * Locale constant for the root locale. The root locale has an empty language, 219 * country, and variant. 220 * 221 * @since 1.6 222 */ 223 public static final Locale ROOT = new Locale(true, "", ""); 224 225 /** 226 * Locale constant for zh_CN. 227 */ 228 public static final Locale SIMPLIFIED_CHINESE = new Locale(true, "zh", "CN"); 229 230 /** 231 * Locale constant for zh_TW. 232 */ 233 public static final Locale TAIWAN = new Locale(true, "zh", "TW"); 234 235 /** 236 * Locale constant for zh_TW. 237 */ 238 public static final Locale TRADITIONAL_CHINESE = new Locale(true, "zh", "TW"); 239 240 /** 241 * Locale constant for en_GB. 242 */ 243 public static final Locale UK = new Locale(true, "en", "GB"); 244 245 /** 246 * Locale constant for en_US. 247 */ 248 public static final Locale US = new Locale(true, "en", "US"); 249 250 /** 251 * BCP-47 extension identifier (or "singleton") for the private 252 * use extension. 253 * 254 * See {@link #getExtension(char)} and {@link Builder#setExtension(char, String)}. 255 * 256 * @since 1.7 257 */ 258 public static final char PRIVATE_USE_EXTENSION = 'x'; 259 260 /** 261 * BCP-47 extension identifier (or "singleton") for the unicode locale extension. 262 * 263 * 264 * See {@link #getExtension(char)} and {@link Builder#setExtension(char, String)}. 265 * 266 * @since 1.7 267 */ 268 public static final char UNICODE_LOCALE_EXTENSION = 'u'; 269 270 /** 271 * ISO 639-3 generic code for undetermined languages. 272 */ 273 private static final String UNDETERMINED_LANGUAGE = "und"; 274 275 /** 276 * The current default locale. It is temporarily assigned to US because we 277 * need a default locale to lookup the real default locale. 278 */ 279 private static Locale defaultLocale = US; 280 281 static { 282 String language = System.getProperty("user.language", "en"); 283 String region = System.getProperty("user.region", "US"); 284 String variant = System.getProperty("user.variant", ""); 285 defaultLocale = new Locale(language, region, variant); 286 } 287 288 /** 289 * A class that helps construct {@link Locale} instances. 290 * 291 * Unlike the public {@code Locale} constructors, the methods of this class 292 * perform much stricter checks on their input. 293 * 294 * Validity checks on the {@code language}, {@code country}, {@code variant} 295 * and {@code extension} values are carried out as per the 296 * <a href="https://tools.ietf.org/html/bcp47">BCP-47</a> specification. 297 * 298 * In addition, we treat the <a href="http://www.unicode.org/reports/tr35/"> 299 * Unicode locale extension</a> specially and provide methods to manipulate 300 * the structured state (keywords and attributes) specified therein. 301 * 302 * @since 1.7 303 */ 304 public static final class Builder { 305 private String language; 306 private String region; 307 private String variant; 308 private String script; 309 310 private final Set<String> attributes; 311 private final Map<String, String> keywords; 312 private final Map<Character, String> extensions; 313 Builder()314 public Builder() { 315 language = region = variant = script = ""; 316 317 // NOTE: We use sorted maps in the builder & the locale class itself 318 // because serialized forms of the unicode locale extension (and 319 // of the extension map itself) are specified to be in alphabetic 320 // order of keys. 321 attributes = new TreeSet<String>(); 322 keywords = new TreeMap<String, String>(); 323 extensions = new TreeMap<Character, String>(); 324 } 325 326 /** 327 * Sets the locale language. If {@code language} is {@code null} or empty, the 328 * previous value is cleared. 329 * 330 * As per BCP-47, the language must be between 2 and 3 ASCII characters 331 * in length and must only contain characters in the range {@code [a-zA-Z]}. 332 * 333 * This value is usually an <a href="http://www.loc.gov/standards/iso639-2/"> 334 * ISO-639-2</a> alpha-2 or alpha-3 code, though no explicit checks are 335 * carried out that it's a valid code in that namespace. 336 * 337 * Values are normalized to lower case. 338 * 339 * Note that we don't support BCP-47 "extlang" languages because they were 340 * only ever used to substitute for a lack of 3 letter language codes. 341 * 342 * @throws IllformedLocaleException if the language was invalid. 343 */ setLanguage(String language)344 public Builder setLanguage(String language) { 345 this.language = normalizeAndValidateLanguage(language, true /* strict */); 346 return this; 347 } 348 normalizeAndValidateLanguage(String language, boolean strict)349 private static String normalizeAndValidateLanguage(String language, boolean strict) { 350 if (language == null || language.isEmpty()) { 351 return ""; 352 } 353 354 final String lowercaseLanguage = language.toLowerCase(Locale.ROOT); 355 if (!isValidBcp47Alpha(lowercaseLanguage, 2, 3)) { 356 if (strict) { 357 throw new IllformedLocaleException("Invalid language: " + language); 358 } else { 359 return UNDETERMINED_LANGUAGE; 360 } 361 } 362 363 return lowercaseLanguage; 364 } 365 366 /** 367 * Set the state of this builder to the parsed contents of the BCP-47 language 368 * tag {@code languageTag}. 369 * 370 * This method is equivalent to a call to {@link #clear} if {@code languageTag} 371 * is {@code null} or empty. 372 * 373 * <b>NOTE:</b> In contrast to {@link Locale#forLanguageTag(String)}, which 374 * simply ignores malformed input, this method will throw an exception if 375 * its input is malformed. 376 * 377 * @throws IllformedLocaleException if {@code languageTag} is not a well formed 378 * BCP-47 tag. 379 */ setLanguageTag(String languageTag)380 public Builder setLanguageTag(String languageTag) { 381 if (languageTag == null || languageTag.isEmpty()) { 382 clear(); 383 return this; 384 } 385 386 final Locale fromIcu = forLanguageTag(languageTag, true /* strict */); 387 // When we ask ICU for strict parsing, it might return a null locale 388 // if the language tag is malformed. 389 if (fromIcu == null) { 390 throw new IllformedLocaleException("Invalid languageTag: " + languageTag); 391 } 392 393 setLocale(fromIcu); 394 return this; 395 } 396 397 /** 398 * Sets the locale region. If {@code region} is {@code null} or empty, the 399 * previous value is cleared. 400 * 401 * As per BCP-47, the region must either be a 2 character ISO-3166-1 code 402 * (each character in the range [a-zA-Z]) OR a 3 digit UN M.49 code. 403 * 404 * Values are normalized to upper case. 405 * 406 * @throws IllformedLocaleException if {@code} region is invalid. 407 */ setRegion(String region)408 public Builder setRegion(String region) { 409 this.region = normalizeAndValidateRegion(region, true /* strict */); 410 return this; 411 } 412 normalizeAndValidateRegion(String region, boolean strict)413 private static String normalizeAndValidateRegion(String region, boolean strict) { 414 if (region == null || region.isEmpty()) { 415 return ""; 416 } 417 418 final String uppercaseRegion = region.toUpperCase(Locale.ROOT); 419 if (!isValidBcp47Alpha(uppercaseRegion, 2, 2) && 420 !isUnM49AreaCode(uppercaseRegion)) { 421 if (strict) { 422 throw new IllformedLocaleException("Invalid region: " + region); 423 } else { 424 return ""; 425 } 426 } 427 428 return uppercaseRegion; 429 } 430 431 /** 432 * Sets the locale variant. If {@code variant} is {@code null} or empty, 433 * the previous value is cleared. 434 * 435 * The input string my consist of one or more variants separated by 436 * valid separators ('-' or '_'). 437 * 438 * As per BCP-47, each variant must be between 5 and 8 alphanumeric characters 439 * in length (each character in the range {@code [a-zA-Z0-9]}) but 440 * can be exactly 4 characters in length if the first character is a digit. 441 * 442 * Note that this is a much stricter interpretation of {@code variant} 443 * than the public {@code Locale} constructors. The latter allowed free form 444 * variants. 445 * 446 * Variants are case sensitive and all separators are normalized to {@code '_'}. 447 * 448 * @throws IllformedLocaleException if {@code} variant is invalid. 449 */ setVariant(String variant)450 public Builder setVariant(String variant) { 451 this.variant = normalizeAndValidateVariant(variant); 452 return this; 453 } 454 normalizeAndValidateVariant(String variant)455 private static String normalizeAndValidateVariant(String variant) { 456 if (variant == null || variant.isEmpty()) { 457 return ""; 458 } 459 460 // Note that unlike extensions, we canonicalize to lower case alphabets 461 // and underscores instead of hyphens. 462 final String normalizedVariant = variant.replace('-', '_'); 463 String[] subTags = normalizedVariant.split("_"); 464 465 for (String subTag : subTags) { 466 if (!isValidVariantSubtag(subTag)) { 467 throw new IllformedLocaleException("Invalid variant: " + variant); 468 } 469 } 470 471 return normalizedVariant; 472 } 473 isValidVariantSubtag(String subTag)474 private static boolean isValidVariantSubtag(String subTag) { 475 // The BCP-47 spec states that : 476 // - Subtags can be between [5, 8] alphanumeric chars in length. 477 // - Subtags that start with a number are allowed to be 4 chars in length. 478 if (subTag.length() >= 5 && subTag.length() <= 8) { 479 if (isAsciiAlphaNum(subTag)) { 480 return true; 481 } 482 } else if (subTag.length() == 4) { 483 final char firstChar = subTag.charAt(0); 484 if ((firstChar >= '0' && firstChar <= '9') && isAsciiAlphaNum(subTag)) { 485 return true; 486 } 487 } 488 489 return false; 490 } 491 492 /** 493 * Sets the locale script. If {@code script} is {@code null} or empty, 494 * the previous value is cleared. 495 * 496 * As per BCP-47, the script must be 4 characters in length, and 497 * each character in the range {@code [a-zA-Z]}. 498 * 499 * A script usually represents a valid ISO 15924 script code, though no 500 * other registry or validity checks are performed. 501 * 502 * Scripts are normalized to title cased values. 503 * 504 * @throws IllformedLocaleException if {@code script} is invalid. 505 */ setScript(String script)506 public Builder setScript(String script) { 507 this.script = normalizeAndValidateScript(script, true /* strict */); 508 return this; 509 } 510 normalizeAndValidateScript(String script, boolean strict)511 private static String normalizeAndValidateScript(String script, boolean strict) { 512 if (script == null || script.isEmpty()) { 513 return ""; 514 } 515 516 if (!isValidBcp47Alpha(script, 4, 4)) { 517 if (strict) { 518 throw new IllformedLocaleException("Invalid script: " + script); 519 } else { 520 return ""; 521 } 522 } 523 524 return titleCaseAsciiWord(script); 525 } 526 527 /** 528 * Sets the state of the builder to the {@link Locale} represented by 529 * {@code locale}. 530 * 531 * Note that the locale's language, region and variant are validated as per 532 * the rules specified in {@link #setLanguage}, {@link #setRegion} and 533 * {@link #setVariant}. 534 * 535 * All existing builder state is discarded. 536 * 537 * @throws IllformedLocaleException if {@code locale} is invalid. 538 * @throws NullPointerException if {@code locale} is null. 539 */ setLocale(Locale locale)540 public Builder setLocale(Locale locale) { 541 if (locale == null) { 542 throw new NullPointerException("locale == null"); 543 } 544 545 // Make copies of the existing values so that we don't partially 546 // update the state if we encounter an error. 547 final String backupLanguage = language; 548 final String backupRegion = region; 549 final String backupVariant = variant; 550 551 try { 552 setLanguage(locale.getLanguage()); 553 setRegion(locale.getCountry()); 554 setVariant(locale.getVariant()); 555 } catch (IllformedLocaleException ifle) { 556 language = backupLanguage; 557 region = backupRegion; 558 variant = backupVariant; 559 560 throw ifle; 561 } 562 563 // The following values can be set only via the builder class, so 564 // there's no need to normalize them or check their validity. 565 566 this.script = locale.getScript(); 567 568 extensions.clear(); 569 extensions.putAll(locale.extensions); 570 571 keywords.clear(); 572 keywords.putAll(locale.unicodeKeywords); 573 574 attributes.clear(); 575 attributes.addAll(locale.unicodeAttributes); 576 577 return this; 578 } 579 580 /** 581 * Adds the specified attribute to the list of attributes in the unicode 582 * locale extension. 583 * 584 * Attributes must be between 3 and 8 characters in length, and each character 585 * must be in the range {@code [a-zA-Z0-9]}. 586 * 587 * Attributes are normalized to lower case values. All added attributes and 588 * keywords are combined to form a complete unicode locale extension on 589 * {@link Locale} objects built by this builder, and accessible via 590 * {@link Locale#getExtension(char)} with the {@link Locale#UNICODE_LOCALE_EXTENSION} 591 * key. 592 * 593 * @throws IllformedLocaleException if {@code attribute} is invalid. 594 * @throws NullPointerException if {@code attribute} is null. 595 */ addUnicodeLocaleAttribute(String attribute)596 public Builder addUnicodeLocaleAttribute(String attribute) { 597 if (attribute == null) { 598 throw new NullPointerException("attribute == null"); 599 } 600 601 final String lowercaseAttribute = attribute.toLowerCase(Locale.ROOT); 602 if (!isValidBcp47Alphanum(lowercaseAttribute, 3, 8)) { 603 throw new IllformedLocaleException("Invalid locale attribute: " + attribute); 604 } 605 606 attributes.add(lowercaseAttribute); 607 608 return this; 609 } 610 611 /** 612 * Removes an attribute from the list of attributes in the unicode locale 613 * extension. 614 * 615 * {@code attribute} must be valid as per the rules specified in 616 * {@link #addUnicodeLocaleAttribute}. 617 * 618 * This method has no effect if {@code attribute} hasn't already been 619 * added. 620 * 621 * @throws IllformedLocaleException if {@code attribute} is invalid. 622 * @throws NullPointerException if {@code attribute} is null. 623 */ removeUnicodeLocaleAttribute(String attribute)624 public Builder removeUnicodeLocaleAttribute(String attribute) { 625 if (attribute == null) { 626 throw new NullPointerException("attribute == null"); 627 } 628 629 // Weirdly, remove is specified to check whether the attribute 630 // is valid, so we have to perform the full alphanumeric check here. 631 final String lowercaseAttribute = attribute.toLowerCase(Locale.ROOT); 632 if (!isValidBcp47Alphanum(lowercaseAttribute, 3, 8)) { 633 throw new IllformedLocaleException("Invalid locale attribute: " + attribute); 634 } 635 636 attributes.remove(attribute); 637 return this; 638 } 639 640 /** 641 * Sets the extension identified by {@code key} to {@code value}. 642 * 643 * {@code key} must be in the range {@code [a-zA-Z0-9]}. 644 * 645 * If {@code value} is {@code null} or empty, the extension is removed. 646 * 647 * In the general case, {@code value} must be a series of subtags separated 648 * by ({@code "-"} or {@code "_"}). Each subtag must be between 649 * 2 and 8 characters in length, and each character in the subtag must be in 650 * the range {@code [a-zA-Z0-9]}. 651 * 652 * <p> 653 * There are two special cases : 654 * <li> 655 * <ul> 656 * The unicode locale extension 657 * ({@code key == 'u'}, {@link Locale#UNICODE_LOCALE_EXTENSION}) : Setting 658 * the unicode locale extension results in all existing keyword and attribute 659 * state being replaced by the parsed result of {@code value}. For example, 660 * {@code builder.setExtension('u', "baaaz-baaar-fo-baar-ba-baaz")} 661 * is equivalent to: 662 * <pre> 663 * builder.addUnicodeLocaleAttribute("baaaz"); 664 * builder.addUnicodeLocaleAttribute("baaar"); 665 * builder.setUnicodeLocaleKeyword("fo", "baar"); 666 * builder.setUnicodeLocaleKeyword("ba", "baaa"); 667 * </pre> 668 * </ul> 669 * <ul> 670 * The private use extension 671 * ({@code key == 'x'}, {@link Locale#PRIVATE_USE_EXTENSION}) : Each subtag in a 672 * private use extension can be between 1 and 8 characters in length (in contrast 673 * to a minimum length of 2 for all other extensions). 674 * </ul> 675 * </li> 676 * 677 * @throws IllformedLocaleException if {@code value} is invalid. 678 */ setExtension(char key, String value)679 public Builder setExtension(char key, String value) { 680 if (value == null || value.isEmpty()) { 681 extensions.remove(key); 682 return this; 683 } 684 685 final String normalizedValue = value.toLowerCase(Locale.ROOT).replace('_', '-'); 686 final String[] subtags = normalizedValue.split("-"); 687 688 // Lengths for subtags in the private use extension should be [1, 8] chars. 689 // For all other extensions, they should be [2, 8] chars. 690 // 691 // http://www.rfc-editor.org/rfc/bcp/bcp47.txt 692 final int minimumLength = (key == PRIVATE_USE_EXTENSION) ? 1 : 2; 693 for (String subtag : subtags) { 694 if (!isValidBcp47Alphanum(subtag, minimumLength, 8)) { 695 throw new IllformedLocaleException( 696 "Invalid private use extension : " + value); 697 } 698 } 699 700 // We need to take special action in the case of unicode extensions, 701 // since we claim to understand their keywords and attributes. 702 if (key == UNICODE_LOCALE_EXTENSION) { 703 // First clear existing attributes and keywords. 704 extensions.clear(); 705 attributes.clear(); 706 707 parseUnicodeExtension(subtags, keywords, attributes); 708 } else { 709 extensions.put(key, normalizedValue); 710 } 711 712 return this; 713 } 714 715 /** 716 * Clears all extensions from this builder. Note that this also implicitly 717 * clears all state related to the unicode locale extension; all attributes 718 * and keywords set by {@link #addUnicodeLocaleAttribute} and 719 * {@link #setUnicodeLocaleKeyword} are cleared. 720 */ clearExtensions()721 public Builder clearExtensions() { 722 extensions.clear(); 723 attributes.clear(); 724 keywords.clear(); 725 return this; 726 } 727 728 /** 729 * Adds a key / type pair to the list of unicode locale extension keys. 730 * 731 * {@code key} must be 2 characters in length, and each character must be 732 * in the range {@code [a-zA-Z0-9]}. 733 * 734 * {#code type} can either be empty, or a series of one or more subtags 735 * separated by a separator ({@code "-"} or {@code "_"}). Each subtag must 736 * be between 3 and 8 characters in length and each character in the subtag 737 * must be in the range {@code [a-zA-Z0-9]}. 738 * 739 * Note that the type is normalized to lower case, and all separators 740 * are normalized to {@code "-"}. All added attributes and 741 * keywords are combined to form a complete unicode locale extension on 742 * {@link Locale} objects built by this builder, and accessible via 743 * {@link Locale#getExtension(char)} with the {@link Locale#UNICODE_LOCALE_EXTENSION} 744 * key. 745 * 746 * @throws IllformedLocaleException if {@code key} or {@code value} are 747 * invalid. 748 */ setUnicodeLocaleKeyword(String key, String type)749 public Builder setUnicodeLocaleKeyword(String key, String type) { 750 if (key == null) { 751 throw new NullPointerException("key == null"); 752 } 753 754 if (type == null && keywords != null) { 755 keywords.remove(key); 756 return this; 757 } 758 759 final String lowerCaseKey = key.toLowerCase(Locale.ROOT); 760 // The key must be exactly two alphanumeric characters. 761 if (lowerCaseKey.length() != 2 || !isAsciiAlphaNum(lowerCaseKey)) { 762 throw new IllformedLocaleException("Invalid unicode locale keyword: " + key); 763 } 764 765 // The type can be one or more alphanumeric strings of length [3, 8] characters, 766 // separated by a separator char, which is one of "_" or "-". Though the spec 767 // doesn't require it, we normalize all "_" to "-" to make the rest of our 768 // processing easier. 769 final String lowerCaseType = type.toLowerCase(Locale.ROOT).replace("_", "-"); 770 if (!isValidTypeList(lowerCaseType)) { 771 throw new IllformedLocaleException("Invalid unicode locale type: " + type); 772 } 773 774 // Everything checks out fine, add the <key, type> mapping to the list. 775 keywords.put(lowerCaseKey, lowerCaseType); 776 777 return this; 778 } 779 780 /** 781 * Clears all existing state from this builder. 782 */ clear()783 public Builder clear() { 784 clearExtensions(); 785 language = region = variant = script = ""; 786 787 return this; 788 } 789 790 /** 791 * Constructs a locale from the existing state of the builder. Note that this 792 * method is guaranteed to succeed since field validity checks are performed 793 * at the point of setting them. 794 */ build()795 public Locale build() { 796 // NOTE: We need to make a copy of attributes, keywords and extensions 797 // because the RI allows this builder to reused. 798 return new Locale(language, region, variant, script, 799 attributes, keywords, extensions, 800 true /* has validated fields */); 801 } 802 } 803 804 /** 805 * Returns a locale for a given BCP-47 language tag. This method is more 806 * lenient than {@link Builder#setLanguageTag}. For a given language tag, parsing 807 * will proceed up to the first malformed subtag. All subsequent tags are discarded. 808 * Note that language tags use {@code -} rather than {@code _}, for example {@code en-US}. 809 * 810 * @throws NullPointerException if {@code languageTag} is {@code null}. 811 * 812 * @since 1.7 813 */ forLanguageTag(String languageTag)814 public static Locale forLanguageTag(String languageTag) { 815 if (languageTag == null) { 816 throw new NullPointerException("languageTag == null"); 817 } 818 819 return forLanguageTag(languageTag, false /* strict */); 820 } 821 822 private transient String countryCode; 823 private transient String languageCode; 824 private transient String variantCode; 825 private transient String scriptCode; 826 827 /* Sorted, Unmodifiable */ 828 private transient Set<String> unicodeAttributes; 829 /* Sorted, Unmodifiable */ 830 private transient Map<String, String> unicodeKeywords; 831 /* Sorted, Unmodifiable */ 832 private transient Map<Character, String> extensions; 833 834 /** 835 * Whether this instance was constructed from a builder. We can make 836 * stronger assumptions about the validity of Locale fields if this was 837 * constructed by a builder. 838 */ 839 private transient final boolean hasValidatedFields; 840 841 private transient String cachedToStringResult; 842 private transient String cachedLanguageTag; 843 private transient String cachedIcuLocaleId; 844 845 /** 846 * There's a circular dependency between toLowerCase/toUpperCase and 847 * Locale.US. Work around this by avoiding these methods when constructing 848 * the built-in locales. 849 */ Locale(boolean hasValidatedFields, String lowerCaseLanguageCode, String upperCaseCountryCode)850 private Locale(boolean hasValidatedFields, String lowerCaseLanguageCode, 851 String upperCaseCountryCode) { 852 this.languageCode = lowerCaseLanguageCode; 853 this.countryCode = upperCaseCountryCode; 854 this.variantCode = ""; 855 this.scriptCode = ""; 856 857 this.unicodeAttributes = Collections.EMPTY_SET; 858 this.unicodeKeywords = Collections.EMPTY_MAP; 859 this.extensions = Collections.EMPTY_MAP; 860 861 this.hasValidatedFields = hasValidatedFields; 862 } 863 864 /** 865 * Constructs a new {@code Locale} using the specified language. 866 */ Locale(String language)867 public Locale(String language) { 868 this(language, "", "", "", Collections.EMPTY_SET, Collections.EMPTY_MAP, 869 Collections.EMPTY_MAP, false /* has validated fields */); 870 } 871 872 /** 873 * Constructs a new {@code Locale} using the specified language and country codes. 874 */ Locale(String language, String country)875 public Locale(String language, String country) { 876 this(language, country, "", "", Collections.EMPTY_SET, Collections.EMPTY_MAP, 877 Collections.EMPTY_MAP, false /* has validated fields */); 878 } 879 880 /** 881 * Required by libcore.icu.ICU. 882 * 883 * @hide 884 */ Locale(String language, String country, String variant, String scriptCode, Set<String> unicodeAttributes, Map<String, String> unicodeKeywords, Map<Character, String> extensions, boolean hasValidatedFields)885 public Locale(String language, String country, String variant, String scriptCode, 886 /* nonnull */ Set<String> unicodeAttributes, 887 /* nonnull */ Map<String, String> unicodeKeywords, 888 /* nonnull */ Map<Character, String> extensions, 889 boolean hasValidatedFields) { 890 if (language == null || country == null || variant == null) { 891 throw new NullPointerException("language=" + language + 892 ",country=" + country + 893 ",variant=" + variant); 894 } 895 896 if (hasValidatedFields) { 897 this.languageCode = adjustLanguageCode(language); 898 this.countryCode = country; 899 this.variantCode = variant; 900 } else { 901 if (language.isEmpty() && country.isEmpty()) { 902 languageCode = ""; 903 countryCode = ""; 904 variantCode = variant; 905 } else { 906 languageCode = adjustLanguageCode(language); 907 countryCode = country.toUpperCase(Locale.US); 908 variantCode = variant; 909 } 910 } 911 912 this.scriptCode = scriptCode; 913 914 if (hasValidatedFields) { 915 Set<String> attribsCopy = new TreeSet<String>(unicodeAttributes); 916 Map<String, String> keywordsCopy = new TreeMap<String, String>(unicodeKeywords); 917 Map<Character, String> extensionsCopy = new TreeMap<Character, String>(extensions); 918 919 // We need to transform the list of attributes & keywords set on the 920 // builder to a unicode locale extension. i.e, if we have any keywords 921 // or attributes set, Locale#getExtension('u') should return a well 922 // formed extension. 923 addUnicodeExtensionToExtensionsMap(attribsCopy, keywordsCopy, extensionsCopy); 924 925 this.unicodeAttributes = Collections.unmodifiableSet(attribsCopy); 926 this.unicodeKeywords = Collections.unmodifiableMap(keywordsCopy); 927 this.extensions = Collections.unmodifiableMap(extensionsCopy); 928 } else { 929 this.unicodeAttributes = unicodeAttributes; 930 this.unicodeKeywords = unicodeKeywords; 931 this.extensions = extensions; 932 } 933 934 this.hasValidatedFields = hasValidatedFields; 935 } 936 937 /** 938 * Constructs a new {@code Locale} using the specified language, country, 939 * and variant codes. 940 */ Locale(String language, String country, String variant)941 public Locale(String language, String country, String variant) { 942 this(language, country, variant, "", Collections.EMPTY_SET, 943 Collections.EMPTY_MAP, Collections.EMPTY_MAP, 944 false /* has validated fields */); 945 } 946 clone()947 @Override public Object clone() { 948 try { 949 return super.clone(); 950 } catch (CloneNotSupportedException e) { 951 throw new AssertionError(e); 952 } 953 } 954 955 /** 956 * Returns true if {@code object} is a locale with the same language, 957 * country and variant. 958 */ equals(Object object)959 @Override public boolean equals(Object object) { 960 if (object == this) { 961 return true; 962 } 963 if (object instanceof Locale) { 964 Locale o = (Locale) object; 965 return languageCode.equals(o.languageCode) 966 && countryCode.equals(o.countryCode) 967 && variantCode.equals(o.variantCode) 968 && scriptCode.equals(o.scriptCode) 969 && extensions.equals(o.extensions); 970 971 } 972 return false; 973 } 974 975 /** 976 * Returns the system's installed locales. This array always includes {@code 977 * Locale.US}, and usually several others. Most locale-sensitive classes 978 * offer their own {@code getAvailableLocales} method, which should be 979 * preferred over this general purpose method. 980 * 981 * @see java.text.BreakIterator#getAvailableLocales() 982 * @see java.text.Collator#getAvailableLocales() 983 * @see java.text.DateFormat#getAvailableLocales() 984 * @see java.text.DateFormatSymbols#getAvailableLocales() 985 * @see java.text.DecimalFormatSymbols#getAvailableLocales() 986 * @see java.text.NumberFormat#getAvailableLocales() 987 * @see java.util.Calendar#getAvailableLocales() 988 */ getAvailableLocales()989 public static Locale[] getAvailableLocales() { 990 return ICU.getAvailableLocales(); 991 } 992 993 /** 994 * Returns the country code for this locale, or {@code ""} if this locale 995 * doesn't correspond to a specific country. 996 */ getCountry()997 public String getCountry() { 998 return countryCode; 999 } 1000 1001 /** 1002 * Returns the user's preferred locale. This may have been overridden for 1003 * this process with {@link #setDefault}. 1004 * 1005 * <p>Since the user's locale changes dynamically, avoid caching this value. 1006 * Instead, use this method to look it up for each use. 1007 */ getDefault()1008 public static Locale getDefault() { 1009 return defaultLocale; 1010 } 1011 1012 /** 1013 * Equivalent to {@code getDisplayCountry(Locale.getDefault())}. 1014 */ getDisplayCountry()1015 public final String getDisplayCountry() { 1016 return getDisplayCountry(getDefault()); 1017 } 1018 1019 /** 1020 * Returns the name of this locale's country, localized to {@code locale}. 1021 * Returns the empty string if this locale does not correspond to a specific 1022 * country. 1023 */ getDisplayCountry(Locale locale)1024 public String getDisplayCountry(Locale locale) { 1025 if (countryCode.isEmpty()) { 1026 return ""; 1027 } 1028 1029 final String normalizedRegion = Builder.normalizeAndValidateRegion( 1030 countryCode, false /* strict */); 1031 if (normalizedRegion.isEmpty()) { 1032 return countryCode; 1033 } 1034 1035 String result = ICU.getDisplayCountry(this, locale); 1036 if (result == null) { // TODO: do we need to do this, or does ICU do it for us? 1037 result = ICU.getDisplayCountry(this, Locale.getDefault()); 1038 } 1039 return result; 1040 } 1041 1042 /** 1043 * Equivalent to {@code getDisplayLanguage(Locale.getDefault())}. 1044 */ getDisplayLanguage()1045 public final String getDisplayLanguage() { 1046 return getDisplayLanguage(getDefault()); 1047 } 1048 1049 /** 1050 * Returns the name of this locale's language, localized to {@code locale}. 1051 * If the language name is unknown, the language code is returned. 1052 */ getDisplayLanguage(Locale locale)1053 public String getDisplayLanguage(Locale locale) { 1054 if (languageCode.isEmpty()) { 1055 return ""; 1056 } 1057 1058 // Hacks for backward compatibility. 1059 // 1060 // Our language tag will contain "und" if the languageCode is invalid 1061 // or missing. ICU will then return "langue indéterminée" or the equivalent 1062 // display language for the indeterminate language code. 1063 // 1064 // Sigh... ugh... and what not. 1065 final String normalizedLanguage = Builder.normalizeAndValidateLanguage( 1066 languageCode, false /* strict */); 1067 if (UNDETERMINED_LANGUAGE.equals(normalizedLanguage)) { 1068 return languageCode; 1069 } 1070 1071 // TODO: We need a new hack or a complete fix for http://b/8049507 --- We would 1072 // cover the frameworks' tracks when they were using "tl" instead of "fil". 1073 String result = ICU.getDisplayLanguage(this, locale); 1074 if (result == null) { // TODO: do we need to do this, or does ICU do it for us? 1075 result = ICU.getDisplayLanguage(this, Locale.getDefault()); 1076 } 1077 return result; 1078 } 1079 1080 /** 1081 * Equivalent to {@code getDisplayName(Locale.getDefault())}. 1082 */ getDisplayName()1083 public final String getDisplayName() { 1084 return getDisplayName(getDefault()); 1085 } 1086 1087 /** 1088 * Returns this locale's language name, country name, and variant, localized 1089 * to {@code locale}. The exact output form depends on whether this locale 1090 * corresponds to a specific language, script, country and variant. 1091 * 1092 * <p>For example: 1093 * <ul> 1094 * <li>{@code new Locale("en").getDisplayName(Locale.US)} -> {@code English} 1095 * <li>{@code new Locale("en", "US").getDisplayName(Locale.US)} -> {@code English (United States)} 1096 * <li>{@code new Locale("en", "US", "POSIX").getDisplayName(Locale.US)} -> {@code English (United States,Computer)} 1097 * <li>{@code Locale.fromLanguageTag("zh-Hant-CN").getDisplayName(Locale.US)} -> {@code Chinese (Traditional Han,China)} 1098 * <li>{@code new Locale("en").getDisplayName(Locale.FRANCE)} -> {@code anglais} 1099 * <li>{@code new Locale("en", "US").getDisplayName(Locale.FRANCE)} -> {@code anglais (États-Unis)} 1100 * <li>{@code new Locale("en", "US", "POSIX").getDisplayName(Locale.FRANCE)} -> {@code anglais (États-Unis,informatique)}. 1101 * </ul> 1102 */ getDisplayName(Locale locale)1103 public String getDisplayName(Locale locale) { 1104 int count = 0; 1105 StringBuilder buffer = new StringBuilder(); 1106 if (!languageCode.isEmpty()) { 1107 String displayLanguage = getDisplayLanguage(locale); 1108 buffer.append(displayLanguage.isEmpty() ? languageCode : displayLanguage); 1109 ++count; 1110 } 1111 if (!scriptCode.isEmpty()) { 1112 if (count == 1) { 1113 buffer.append(" ("); 1114 } 1115 String displayScript = getDisplayScript(locale); 1116 buffer.append(displayScript.isEmpty() ? scriptCode : displayScript); 1117 ++count; 1118 } 1119 if (!countryCode.isEmpty()) { 1120 if (count == 1) { 1121 buffer.append(" ("); 1122 } else if (count == 2) { 1123 buffer.append(","); 1124 } 1125 String displayCountry = getDisplayCountry(locale); 1126 buffer.append(displayCountry.isEmpty() ? countryCode : displayCountry); 1127 ++count; 1128 } 1129 if (!variantCode.isEmpty()) { 1130 if (count == 1) { 1131 buffer.append(" ("); 1132 } else if (count == 2 || count == 3) { 1133 buffer.append(","); 1134 } 1135 String displayVariant = getDisplayVariant(locale); 1136 buffer.append(displayVariant.isEmpty() ? variantCode : displayVariant); 1137 ++count; 1138 } 1139 if (count > 1) { 1140 buffer.append(")"); 1141 } 1142 return buffer.toString(); 1143 } 1144 1145 /** 1146 * Returns the full variant name in the default {@code Locale} for the variant code of 1147 * this {@code Locale}. If there is no matching variant name, the variant code is 1148 * returned. 1149 * 1150 * @since 1.7 1151 */ getDisplayVariant()1152 public final String getDisplayVariant() { 1153 return getDisplayVariant(getDefault()); 1154 } 1155 1156 /** 1157 * Returns the full variant name in the specified {@code Locale} for the variant code 1158 * of this {@code Locale}. If there is no matching variant name, the variant code is 1159 * returned. 1160 * 1161 * @since 1.7 1162 */ getDisplayVariant(Locale locale)1163 public String getDisplayVariant(Locale locale) { 1164 if (variantCode.isEmpty()) { 1165 return ""; 1166 } 1167 1168 try { 1169 Builder.normalizeAndValidateVariant(variantCode); 1170 } catch (IllformedLocaleException ilfe) { 1171 return variantCode; 1172 } 1173 1174 String result = ICU.getDisplayVariant(this, locale); 1175 if (result == null) { // TODO: do we need to do this, or does ICU do it for us? 1176 result = ICU.getDisplayVariant(this, Locale.getDefault()); 1177 } 1178 1179 // The "old style" locale constructors allow us to pass in variants that aren't 1180 // valid BCP-47 variant subtags. When that happens, toLanguageTag will not emit 1181 // them. Note that we know variantCode.length() > 0 due to the isEmpty check at 1182 // the beginning of this function. 1183 if (result.isEmpty()) { 1184 return variantCode; 1185 } 1186 return result; 1187 } 1188 1189 /** 1190 * Returns the three-letter ISO 3166 country code which corresponds to the country 1191 * code for this {@code Locale}. 1192 * @throws MissingResourceException if there's no 3-letter country code for this locale. 1193 */ getISO3Country()1194 public String getISO3Country() { 1195 // The results of getISO3Country do not depend on the languageCode, 1196 // so we pass an arbitrarily selected language code here. This guards 1197 // against errors caused by malformed or invalid language codes. 1198 String code = ICU.getISO3Country("en-" + countryCode); 1199 if (!countryCode.isEmpty() && code.isEmpty()) { 1200 throw new MissingResourceException("No 3-letter country code for locale: " + this, "FormatData_" + this, "ShortCountry"); 1201 } 1202 return code; 1203 } 1204 1205 /** 1206 * Returns the three-letter ISO 639-2/T language code which corresponds to the language 1207 * code for this {@code Locale}. 1208 * @throws MissingResourceException if there's no 3-letter language code for this locale. 1209 */ getISO3Language()1210 public String getISO3Language() { 1211 // For backward compatibility, we must return "" for an empty language 1212 // code and not "und" which is the accurate ISO-639-3 code for an 1213 // undetermined language. 1214 if (languageCode.isEmpty()) { 1215 return ""; 1216 } 1217 1218 // The results of getISO3Language do not depend on the country code 1219 // or any of the other locale fields, so we pass just the language here. 1220 String code = ICU.getISO3Language(languageCode); 1221 if (!languageCode.isEmpty() && code.isEmpty()) { 1222 throw new MissingResourceException("No 3-letter language code for locale: " + this, "FormatData_" + this, "ShortLanguage"); 1223 } 1224 return code; 1225 } 1226 1227 /** 1228 * Returns an array of strings containing all the two-letter ISO 3166 country codes that can be 1229 * used as the country code when constructing a {@code Locale}. 1230 */ getISOCountries()1231 public static String[] getISOCountries() { 1232 return ICU.getISOCountries(); 1233 } 1234 1235 /** 1236 * Returns an array of strings containing all the two-letter ISO 639-1 language codes that can be 1237 * used as the language code when constructing a {@code Locale}. 1238 */ getISOLanguages()1239 public static String[] getISOLanguages() { 1240 return ICU.getISOLanguages(); 1241 } 1242 1243 /** 1244 * Returns the language code for this {@code Locale} or the empty string if no language 1245 * was set. 1246 */ getLanguage()1247 public String getLanguage() { 1248 return languageCode; 1249 } 1250 1251 /** 1252 * Returns the variant code for this {@code Locale} or an empty {@code String} if no variant 1253 * was set. 1254 */ getVariant()1255 public String getVariant() { 1256 return variantCode; 1257 } 1258 1259 /** 1260 * Returns the script code for this {@code Locale} or an empty {@code String} if no script 1261 * was set. 1262 * 1263 * If set, the script code will be a title cased string of length 4, as per the ISO 15924 1264 * specification. 1265 * 1266 * @since 1.7 1267 */ getScript()1268 public String getScript() { 1269 return scriptCode; 1270 } 1271 1272 /** 1273 * Equivalent to {@code getDisplayScript(Locale.getDefault()))} 1274 * 1275 * @since 1.7 1276 */ getDisplayScript()1277 public String getDisplayScript() { 1278 return getDisplayScript(getDefault()); 1279 } 1280 1281 /** 1282 * Returns the name of this locale's script code, localized to {@link Locale}. If the 1283 * script code is unknown, the return value of this method is the same as that of 1284 * {@link #getScript()}. 1285 * 1286 * @since 1.7 1287 */ getDisplayScript(Locale locale)1288 public String getDisplayScript(Locale locale) { 1289 if (scriptCode.isEmpty()) { 1290 return ""; 1291 } 1292 1293 String result = ICU.getDisplayScript(this, locale); 1294 if (result == null) { // TODO: do we need to do this, or does ICU do it for us? 1295 result = ICU.getDisplayScript(this, Locale.getDefault()); 1296 } 1297 1298 return result; 1299 1300 } 1301 1302 /** 1303 * Returns a well formed BCP-47 language tag that identifies this locale. 1304 * 1305 * Note that this locale itself might consist of ill formed fields, since the 1306 * public {@code Locale} constructors do not perform validity checks to maintain 1307 * backwards compatibility. When this is the case, this method will either replace 1308 * ill formed fields with standard BCP-47 subtags (For eg. "und" (undetermined) 1309 * for invalid languages) or omit them altogether. 1310 * 1311 * Additionally, ill formed variants will result in the remainder of the tag 1312 * (both variants and extensions) being moved to the private use extension, 1313 * where they will appear after a subtag whose value is {@code "lvariant"}. 1314 * 1315 * It's also important to note that the BCP-47 tag is well formed in the sense 1316 * that it is unambiguously parseable into its specified components. We do not 1317 * require that any of the components are registered with the applicable registries. 1318 * For example, we do not require scripts to be a registered ISO 15924 scripts or 1319 * languages to appear in the ISO-639-2 code list. 1320 * 1321 * @since 1.7 1322 */ toLanguageTag()1323 public String toLanguageTag() { 1324 if (cachedLanguageTag == null) { 1325 cachedLanguageTag = makeLanguageTag(); 1326 } 1327 1328 return cachedLanguageTag; 1329 } 1330 1331 /** 1332 * Constructs a valid BCP-47 language tag from locale fields. Additional validation 1333 * is required when this Locale was not constructed using a Builder and variants 1334 * set this way are treated specially. 1335 * 1336 * In both cases, we convert empty language tags to "und", omit invalid country tags 1337 * and perform a special case conversion of "no-NO-NY" to "nn-NO". 1338 */ makeLanguageTag()1339 private String makeLanguageTag() { 1340 // We only need to revalidate the language, country and variant because 1341 // the rest of the fields can only be set via the builder which validates 1342 // them anyway. 1343 String language = ""; 1344 String region = ""; 1345 String variant = ""; 1346 String illFormedVariantSubtags = ""; 1347 1348 if (hasValidatedFields) { 1349 language = languageCode; 1350 region = countryCode; 1351 // Note that we are required to normalize hyphens to underscores 1352 // in the builder, but we must use hyphens in the BCP-47 language tag. 1353 variant = variantCode.replace('_', '-'); 1354 } else { 1355 language = Builder.normalizeAndValidateLanguage(languageCode, false /* strict */); 1356 region = Builder.normalizeAndValidateRegion(countryCode, false /* strict */); 1357 1358 try { 1359 variant = Builder.normalizeAndValidateVariant(variantCode); 1360 } catch (IllformedLocaleException ilfe) { 1361 // If our variant is ill formed, we must attempt to split it into 1362 // its constituent subtags and preserve the well formed bits and 1363 // move the rest to the private use extension (if they're well 1364 // formed extension subtags). 1365 String split[] = splitIllformedVariant(variantCode); 1366 1367 variant = split[0]; 1368 illFormedVariantSubtags = split[1]; 1369 } 1370 } 1371 1372 if (language.isEmpty()) { 1373 language = UNDETERMINED_LANGUAGE; 1374 } 1375 1376 if ("no".equals(language) && "NO".equals(region) && "NY".equals(variant)) { 1377 language = "nn"; 1378 region = "NO"; 1379 variant = ""; 1380 } 1381 1382 final StringBuilder sb = new StringBuilder(16); 1383 sb.append(language); 1384 1385 if (!scriptCode.isEmpty()) { 1386 sb.append('-'); 1387 sb.append(scriptCode); 1388 } 1389 1390 if (!region.isEmpty()) { 1391 sb.append('-'); 1392 sb.append(region); 1393 } 1394 1395 if (!variant.isEmpty()) { 1396 sb.append('-'); 1397 sb.append(variant); 1398 } 1399 1400 // Extensions (optional, omitted if empty). Note that we don't 1401 // emit the private use extension here, but add it in the end. 1402 for (Map.Entry<Character, String> extension : extensions.entrySet()) { 1403 if (!extension.getKey().equals('x')) { 1404 sb.append('-').append(extension.getKey()); 1405 sb.append('-').append(extension.getValue()); 1406 } 1407 } 1408 1409 // The private use extension comes right at the very end. 1410 final String privateUse = extensions.get('x'); 1411 if (privateUse != null) { 1412 sb.append("-x-"); 1413 sb.append(privateUse); 1414 } 1415 1416 // If we have any ill-formed variant subtags, we append them to the 1417 // private use extension (or add a private use extension if one doesn't 1418 // exist). 1419 if (!illFormedVariantSubtags.isEmpty()) { 1420 if (privateUse == null) { 1421 sb.append("-x-lvariant-"); 1422 } else { 1423 sb.append('-'); 1424 } 1425 sb.append(illFormedVariantSubtags); 1426 } 1427 1428 return sb.toString(); 1429 } 1430 1431 /** 1432 * Splits ill formed variants into a set of valid variant subtags (which 1433 * can be used directly in language tag construction) and a set of invalid 1434 * variant subtags (which can be appended to the private use extension), 1435 * provided that each subtag is a valid private use extension subtag. 1436 * 1437 * This method returns a two element String array. The first element is a string 1438 * containing the concatenation of valid variant subtags which can be appended 1439 * to a BCP-47 tag directly and the second containing the concatenation of 1440 * invalid variant subtags which can be appended to the private use extension 1441 * directly. 1442 * 1443 * This method assumes that {@code variant} contains at least one ill formed 1444 * variant subtag. 1445 */ splitIllformedVariant(String variant)1446 private static String[] splitIllformedVariant(String variant) { 1447 final String normalizedVariant = variant.replace('_', '-'); 1448 final String[] subTags = normalizedVariant.split("-"); 1449 1450 final String[] split = new String[] { "", "" }; 1451 1452 // First go through the list of variant subtags and check if they're 1453 // valid private use extension subtags. If they're not, we will omit 1454 // the first such subtag and all subtags after. 1455 // 1456 // NOTE: |firstInvalidSubtag| is the index of the first variant 1457 // subtag we decide to omit altogether, whereas |firstIllformedSubtag| is the 1458 // index of the first subtag we decide to append to the private use extension. 1459 // 1460 // In other words: 1461 // [0, firstIllformedSubtag) => expressed as variant subtags. 1462 // [firstIllformedSubtag, firstInvalidSubtag) => expressed as private use 1463 // extension subtags. 1464 // [firstInvalidSubtag, subTags.length) => omitted. 1465 int firstInvalidSubtag = subTags.length; 1466 for (int i = 0; i < subTags.length; ++i) { 1467 if (!isValidBcp47Alphanum(subTags[i], 1, 8)) { 1468 firstInvalidSubtag = i; 1469 break; 1470 } 1471 } 1472 1473 if (firstInvalidSubtag == 0) { 1474 return split; 1475 } 1476 1477 // We now consider each subtag that could potentially be appended to 1478 // the private use extension and check if it's valid. 1479 int firstIllformedSubtag = firstInvalidSubtag; 1480 for (int i = 0; i < firstInvalidSubtag; ++i) { 1481 final String subTag = subTags[i]; 1482 // The BCP-47 spec states that : 1483 // - Subtags can be between [5, 8] alphanumeric chars in length. 1484 // - Subtags that start with a number are allowed to be 4 chars in length. 1485 if (subTag.length() >= 5 && subTag.length() <= 8) { 1486 if (!isAsciiAlphaNum(subTag)) { 1487 firstIllformedSubtag = i; 1488 } 1489 } else if (subTag.length() == 4) { 1490 final char firstChar = subTag.charAt(0); 1491 if (!(firstChar >= '0' && firstChar <= '9') || !isAsciiAlphaNum(subTag)) { 1492 firstIllformedSubtag = i; 1493 } 1494 } else { 1495 firstIllformedSubtag = i; 1496 } 1497 } 1498 1499 split[0] = concatenateRange(subTags, 0, firstIllformedSubtag); 1500 split[1] = concatenateRange(subTags, firstIllformedSubtag, firstInvalidSubtag); 1501 1502 return split; 1503 } 1504 1505 /** 1506 * Builds a string by concatenating array elements within the range [start, end). 1507 * The supplied range is assumed to be valid and no checks are performed. 1508 */ concatenateRange(String[] array, int start, int end)1509 private static String concatenateRange(String[] array, int start, int end) { 1510 StringBuilder builder = new StringBuilder(32); 1511 for (int i = start; i < end; ++i) { 1512 if (i != start) { 1513 builder.append('-'); 1514 } 1515 builder.append(array[i]); 1516 } 1517 1518 return builder.toString(); 1519 } 1520 1521 /** 1522 * Returns the set of BCP-47 extensions this locale contains. 1523 * 1524 * See <a href="https://tools.ietf.org/html/bcp47#section-2.1"> 1525 * the IETF BCP-47 specification</a> (Section 2.2.6) for details. 1526 * 1527 * @since 1.7 1528 */ getExtensionKeys()1529 public Set<Character> getExtensionKeys() { 1530 return extensions.keySet(); 1531 } 1532 1533 /** 1534 * Returns the BCP-47 extension whose key is {@code extensionKey}, or {@code null} 1535 * if this locale does not contain the extension. 1536 * 1537 * Individual Keywords and attributes for the unicode 1538 * locale extension can be fetched using {@link #getUnicodeLocaleAttributes()}, 1539 * {@link #getUnicodeLocaleKeys()} and {@link #getUnicodeLocaleType}. 1540 * 1541 * @since 1.7 1542 */ getExtension(char extensionKey)1543 public String getExtension(char extensionKey) { 1544 return extensions.get(extensionKey); 1545 } 1546 1547 /** 1548 * Returns the {@code type} for the specified unicode locale extension {@code key}. 1549 * 1550 * For more information about types and keywords, see {@link Builder#setUnicodeLocaleKeyword} 1551 * and <a href="http://www.unicode.org/reports/tr35/#BCP47">Unicode Technical Standard #35</a> 1552 * 1553 * @since 1.7 1554 */ getUnicodeLocaleType(String keyWord)1555 public String getUnicodeLocaleType(String keyWord) { 1556 return unicodeKeywords.get(keyWord); 1557 } 1558 1559 /** 1560 * Returns the set of unicode locale extension attributes this locale contains. 1561 * 1562 * For more information about attributes, see {@link Builder#addUnicodeLocaleAttribute} 1563 * and <a href="http://www.unicode.org/reports/tr35/#BCP47">Unicode Technical Standard #35</a> 1564 * 1565 * @since 1.7 1566 */ getUnicodeLocaleAttributes()1567 public Set<String> getUnicodeLocaleAttributes() { 1568 return unicodeAttributes; 1569 } 1570 1571 /** 1572 * Returns the set of unicode locale extension keywords this locale contains. 1573 * 1574 * For more information about types and keywords, see {@link Builder#setUnicodeLocaleKeyword} 1575 * and <a href="http://www.unicode.org/reports/tr35/#BCP47">Unicode Technical Standard #35</a> 1576 * 1577 * @since 1.7 1578 */ getUnicodeLocaleKeys()1579 public Set<String> getUnicodeLocaleKeys() { 1580 return unicodeKeywords.keySet(); 1581 } 1582 1583 @Override hashCode()1584 public synchronized int hashCode() { 1585 return countryCode.hashCode() 1586 + languageCode.hashCode() + variantCode.hashCode() 1587 + scriptCode.hashCode() + extensions.hashCode(); 1588 } 1589 1590 /** 1591 * Overrides the default locale. This does not affect system configuration, 1592 * and attempts to override the system-provided default locale may 1593 * themselves be overridden by actual changes to the system configuration. 1594 * Code that calls this method is usually incorrect, and should be fixed by 1595 * passing the appropriate locale to each locale-sensitive method that's 1596 * called. 1597 */ setDefault(Locale locale)1598 public synchronized static void setDefault(Locale locale) { 1599 if (locale == null) { 1600 throw new NullPointerException("locale == null"); 1601 } 1602 String languageTag = locale.toLanguageTag(); 1603 defaultLocale = locale; 1604 ICU.setDefaultLocale(languageTag); 1605 } 1606 1607 /** 1608 * Returns the string representation of this {@code Locale}. It consists of the 1609 * language code, country code and variant separated by underscores. 1610 * If the language is missing the string begins 1611 * with an underscore. If the country is missing there are 2 underscores 1612 * between the language and the variant. The variant cannot stand alone 1613 * without a language and/or country code: in this case this method would 1614 * return the empty string. 1615 * 1616 * <p>Examples: "en", "en_US", "_US", "en__POSIX", "en_US_POSIX" 1617 */ 1618 @Override toString()1619 public final String toString() { 1620 String result = cachedToStringResult; 1621 if (result == null) { 1622 result = cachedToStringResult = toNewString(languageCode, countryCode, variantCode, 1623 scriptCode, extensions); 1624 } 1625 return result; 1626 } 1627 toNewString(String languageCode, String countryCode, String variantCode, String scriptCode, Map<Character, String> extensions)1628 private static String toNewString(String languageCode, String countryCode, 1629 String variantCode, String scriptCode, Map<Character, String> extensions) { 1630 // The string form of a locale that only has a variant is the empty string. 1631 if (languageCode.length() == 0 && countryCode.length() == 0) { 1632 return ""; 1633 } 1634 1635 // Otherwise, the output format is "ll_cc_variant", where language and country are always 1636 // two letters, but the variant is an arbitrary length. A size of 11 characters has room 1637 // for "en_US_POSIX", the largest "common" value. (In practice, the string form is almost 1638 // always 5 characters: "ll_cc".) 1639 StringBuilder result = new StringBuilder(11); 1640 result.append(languageCode); 1641 1642 final boolean hasScriptOrExtensions = !scriptCode.isEmpty() || !extensions.isEmpty(); 1643 1644 if (!countryCode.isEmpty() || !variantCode.isEmpty() || hasScriptOrExtensions) { 1645 result.append('_'); 1646 } 1647 result.append(countryCode); 1648 if (!variantCode.isEmpty() || hasScriptOrExtensions) { 1649 result.append('_'); 1650 } 1651 result.append(variantCode); 1652 1653 if (hasScriptOrExtensions) { 1654 if (!variantCode.isEmpty()) { 1655 result.append('_'); 1656 } 1657 1658 // Note that this is notably different from the BCP-47 spec (for 1659 // backwards compatibility). We are forced to append a "#" before the script tag. 1660 // and also put the script code right at the end. 1661 result.append("#"); 1662 if (!scriptCode.isEmpty() ) { 1663 result.append(scriptCode); 1664 } 1665 1666 // Note the use of "-" instead of "_" before the extensions. 1667 if (!extensions.isEmpty()) { 1668 if (!scriptCode.isEmpty()) { 1669 result.append('-'); 1670 } 1671 result.append(serializeExtensions(extensions)); 1672 } 1673 } 1674 1675 return result.toString(); 1676 } 1677 1678 private static final ObjectStreamField[] serialPersistentFields = { 1679 new ObjectStreamField("country", String.class), 1680 new ObjectStreamField("hashcode", int.class), 1681 new ObjectStreamField("language", String.class), 1682 new ObjectStreamField("variant", String.class), 1683 new ObjectStreamField("script", String.class), 1684 new ObjectStreamField("extensions", String.class), 1685 }; 1686 writeObject(ObjectOutputStream stream)1687 private void writeObject(ObjectOutputStream stream) throws IOException { 1688 ObjectOutputStream.PutField fields = stream.putFields(); 1689 fields.put("country", countryCode); 1690 fields.put("hashcode", -1); 1691 fields.put("language", languageCode); 1692 fields.put("variant", variantCode); 1693 fields.put("script", scriptCode); 1694 1695 if (!extensions.isEmpty()) { 1696 fields.put("extensions", serializeExtensions(extensions)); 1697 } 1698 1699 stream.writeFields(); 1700 } 1701 readObject(ObjectInputStream stream)1702 private void readObject(ObjectInputStream stream) throws IOException, ClassNotFoundException { 1703 ObjectInputStream.GetField fields = stream.readFields(); 1704 countryCode = (String) fields.get("country", ""); 1705 languageCode = (String) fields.get("language", ""); 1706 variantCode = (String) fields.get("variant", ""); 1707 scriptCode = (String) fields.get("script", ""); 1708 1709 this.unicodeKeywords = Collections.EMPTY_MAP; 1710 this.unicodeAttributes = Collections.EMPTY_SET; 1711 this.extensions = Collections.EMPTY_MAP; 1712 1713 String extensions = (String) fields.get("extensions", null); 1714 if (extensions != null) { 1715 readExtensions(extensions); 1716 } 1717 } 1718 readExtensions(String extensions)1719 private void readExtensions(String extensions) { 1720 Map<Character, String> extensionsMap = new TreeMap<Character, String>(); 1721 parseSerializedExtensions(extensions, extensionsMap); 1722 this.extensions = Collections.unmodifiableMap(extensionsMap); 1723 1724 if (extensionsMap.containsKey(UNICODE_LOCALE_EXTENSION)) { 1725 String unicodeExtension = extensionsMap.get(UNICODE_LOCALE_EXTENSION); 1726 String[] subTags = unicodeExtension.split("-"); 1727 1728 Map<String, String> unicodeKeywords = new TreeMap<String, String>(); 1729 Set<String> unicodeAttributes = new TreeSet<String>(); 1730 parseUnicodeExtension(subTags, unicodeKeywords, unicodeAttributes); 1731 1732 this.unicodeKeywords = Collections.unmodifiableMap(unicodeKeywords); 1733 this.unicodeAttributes = Collections.unmodifiableSet(unicodeAttributes); 1734 } 1735 } 1736 1737 /** 1738 * The serialized form for extensions is straightforward. It's simply 1739 * of the form key1-value1-key2-value2 where each value might in turn contain 1740 * multiple subtags separated by hyphens. Each key is guaranteed to be a single 1741 * character in length. 1742 * 1743 * This method assumes that {@code extensionsMap} is non-empty. 1744 * 1745 * Visible for testing. 1746 * 1747 * @hide 1748 */ serializeExtensions(Map<Character, String> extensionsMap)1749 public static String serializeExtensions(Map<Character, String> extensionsMap) { 1750 Iterator<Map.Entry<Character, String>> entryIterator = extensionsMap.entrySet().iterator(); 1751 StringBuilder sb = new StringBuilder(64); 1752 1753 while (true) { 1754 final Map.Entry<Character, String> entry = entryIterator.next(); 1755 sb.append(entry.getKey()); 1756 sb.append('-'); 1757 sb.append(entry.getValue()); 1758 1759 if (entryIterator.hasNext()) { 1760 sb.append('-'); 1761 } else { 1762 break; 1763 } 1764 } 1765 1766 return sb.toString(); 1767 } 1768 1769 /** 1770 * Visible for testing. 1771 * 1772 * @hide 1773 */ parseSerializedExtensions(String extString, Map<Character, String> outputMap)1774 public static void parseSerializedExtensions(String extString, Map<Character, String> outputMap) { 1775 // This probably isn't the most efficient approach, but it's the 1776 // most straightforward to code. 1777 // 1778 // Start by splitting the string on "-". We will then keep track of 1779 // where each of the extension keys (single characters) appear in the 1780 // original string and then use those indices to construct substrings 1781 // representing the values. 1782 final String[] subTags = extString.split("-"); 1783 final int[] typeStartIndices = new int[subTags.length / 2]; 1784 1785 int length = 0; 1786 int count = 0; 1787 for (String subTag : subTags) { 1788 if (subTag.length() > 0) { 1789 // Account for the length of the "-" at the end of each subtag. 1790 length += (subTag.length() + 1); 1791 } 1792 1793 if (subTag.length() == 1) { 1794 typeStartIndices[count++] = length; 1795 } 1796 } 1797 1798 for (int i = 0; i < count; ++i) { 1799 final int valueStart = typeStartIndices[i]; 1800 // Since the start Index points to the beginning of the next type 1801 // ....prev-k-next..... 1802 // |_ here 1803 // (idx - 2) is the index of the next key 1804 // (idx - 3) is the (non inclusive) end of the previous type. 1805 final int valueEnd = (i == (count - 1)) ? 1806 extString.length() : (typeStartIndices[i + 1] - 3); 1807 1808 outputMap.put(extString.charAt(typeStartIndices[i] - 2), 1809 extString.substring(valueStart, valueEnd)); 1810 } 1811 } 1812 1813 1814 /** 1815 * A UN M.49 is a 3 digit numeric code. 1816 */ isUnM49AreaCode(String code)1817 private static boolean isUnM49AreaCode(String code) { 1818 if (code.length() != 3) { 1819 return false; 1820 } 1821 1822 for (int i = 0; i < 3; ++i) { 1823 final char character = code.charAt(i); 1824 if (!(character >= '0' && character <= '9')) { 1825 return false; 1826 } 1827 } 1828 1829 return true; 1830 } 1831 1832 /* 1833 * Checks whether a given string is an ASCII alphanumeric string. 1834 */ isAsciiAlphaNum(String string)1835 private static boolean isAsciiAlphaNum(String string) { 1836 for (int i = 0; i < string.length(); i++) { 1837 final char character = string.charAt(i); 1838 if (!(character >= 'a' && character <= 'z' || 1839 character >= 'A' && character <= 'Z' || 1840 character >= '0' && character <= '9')) { 1841 return false; 1842 } 1843 } 1844 1845 return true; 1846 } 1847 isValidBcp47Alpha(String string, int lowerBound, int upperBound)1848 private static boolean isValidBcp47Alpha(String string, int lowerBound, int upperBound) { 1849 final int length = string.length(); 1850 if (length < lowerBound || length > upperBound) { 1851 return false; 1852 } 1853 1854 for (int i = 0; i < length; ++i) { 1855 final char character = string.charAt(i); 1856 if (!(character >= 'a' && character <= 'z' || 1857 character >= 'A' && character <= 'Z')) { 1858 return false; 1859 } 1860 } 1861 1862 return true; 1863 } 1864 isValidBcp47Alphanum(String attributeOrType, int lowerBound, int upperBound)1865 private static boolean isValidBcp47Alphanum(String attributeOrType, 1866 int lowerBound, int upperBound) { 1867 if (attributeOrType.length() < lowerBound || attributeOrType.length() > upperBound) { 1868 return false; 1869 } 1870 1871 return isAsciiAlphaNum(attributeOrType); 1872 } 1873 titleCaseAsciiWord(String word)1874 private static String titleCaseAsciiWord(String word) { 1875 try { 1876 byte[] chars = word.toLowerCase(Locale.ROOT).getBytes(StandardCharsets.US_ASCII); 1877 chars[0] = (byte) ((int) chars[0] + 'A' - 'a'); 1878 return new String(chars, StandardCharsets.US_ASCII); 1879 } catch (UnsupportedOperationException uoe) { 1880 throw new AssertionError(uoe); 1881 } 1882 } 1883 1884 /** 1885 * A type list must contain one or more alphanumeric subtags whose lengths 1886 * are between 3 and 8. 1887 */ isValidTypeList(String lowerCaseTypeList)1888 private static boolean isValidTypeList(String lowerCaseTypeList) { 1889 final String[] splitList = lowerCaseTypeList.split("-"); 1890 for (String type : splitList) { 1891 if (!isValidBcp47Alphanum(type, 3, 8)) { 1892 return false; 1893 } 1894 } 1895 1896 return true; 1897 } 1898 addUnicodeExtensionToExtensionsMap( Set<String> attributes, Map<String, String> keywords, Map<Character, String> extensions)1899 private static void addUnicodeExtensionToExtensionsMap( 1900 Set<String> attributes, Map<String, String> keywords, 1901 Map<Character, String> extensions) { 1902 if (attributes.isEmpty() && keywords.isEmpty()) { 1903 return; 1904 } 1905 1906 // Assume that the common case is a low number of keywords & attributes 1907 // (usually one or two). 1908 final StringBuilder sb = new StringBuilder(32); 1909 1910 // All attributes must appear before keywords, in lexical order. 1911 if (!attributes.isEmpty()) { 1912 Iterator<String> attributesIterator = attributes.iterator(); 1913 while (true) { 1914 sb.append(attributesIterator.next()); 1915 if (attributesIterator.hasNext()) { 1916 sb.append('-'); 1917 } else { 1918 break; 1919 } 1920 } 1921 } 1922 1923 if (!keywords.isEmpty()) { 1924 if (!attributes.isEmpty()) { 1925 sb.append('-'); 1926 } 1927 1928 Iterator<Map.Entry<String, String>> keywordsIterator = keywords.entrySet().iterator(); 1929 while (true) { 1930 final Map.Entry<String, String> keyWord = keywordsIterator.next(); 1931 sb.append(keyWord.getKey()); 1932 if (!keyWord.getValue().isEmpty()) { 1933 sb.append('-'); 1934 sb.append(keyWord.getValue()); 1935 } 1936 if (keywordsIterator.hasNext()) { 1937 sb.append('-'); 1938 } else { 1939 break; 1940 } 1941 } 1942 } 1943 1944 extensions.put(UNICODE_LOCALE_EXTENSION, sb.toString()); 1945 } 1946 1947 /** 1948 * This extension is described by http://www.unicode.org/reports/tr35/#RFC5234 1949 * unicode_locale_extensions = sep "u" (1*(sep keyword) / 1*(sep attribute) *(sep keyword)). 1950 * 1951 * It must contain at least one keyword or attribute and attributes (if any) 1952 * must appear before keywords. Attributes can't appear after keywords because 1953 * they will be indistinguishable from a subtag of the keyword type. 1954 * 1955 * Visible for testing. 1956 * 1957 * @hide 1958 */ parseUnicodeExtension(String[] subtags, Map<String, String> keywords, Set<String> attributes)1959 public static void parseUnicodeExtension(String[] subtags, 1960 Map<String, String> keywords, Set<String> attributes) { 1961 String lastKeyword = null; 1962 List<String> subtagsForKeyword = new ArrayList<String>(); 1963 for (String subtag : subtags) { 1964 if (subtag.length() == 2) { 1965 if (subtagsForKeyword.size() > 0) { 1966 keywords.put(lastKeyword, joinBcp47Subtags(subtagsForKeyword)); 1967 subtagsForKeyword.clear(); 1968 } 1969 1970 lastKeyword = subtag; 1971 } else if (subtag.length() > 2) { 1972 if (lastKeyword == null) { 1973 attributes.add(subtag); 1974 } else { 1975 subtagsForKeyword.add(subtag); 1976 } 1977 } 1978 } 1979 1980 if (subtagsForKeyword.size() > 0) { 1981 keywords.put(lastKeyword, joinBcp47Subtags(subtagsForKeyword)); 1982 } else if (lastKeyword != null) { 1983 keywords.put(lastKeyword, ""); 1984 } 1985 } 1986 1987 /** 1988 * Joins a list of subtags into a BCP-47 tag using the standard separator 1989 * ("-"). 1990 */ joinBcp47Subtags(List<String> strings)1991 private static String joinBcp47Subtags(List<String> strings) { 1992 final int size = strings.size(); 1993 1994 StringBuilder sb = new StringBuilder(strings.get(0).length()); 1995 for (int i = 0; i < size; ++i) { 1996 sb.append(strings.get(i)); 1997 if (i != size - 1) { 1998 sb.append('-'); 1999 } 2000 } 2001 2002 return sb.toString(); 2003 } 2004 2005 /** 2006 * @hide for internal use only. 2007 */ adjustLanguageCode(String languageCode)2008 public static String adjustLanguageCode(String languageCode) { 2009 String adjusted = languageCode.toLowerCase(Locale.US); 2010 // Map new language codes to the obsolete language 2011 // codes so the correct resource bundles will be used. 2012 if (languageCode.equals("he")) { 2013 adjusted = "iw"; 2014 } else if (languageCode.equals("id")) { 2015 adjusted = "in"; 2016 } else if (languageCode.equals("yi")) { 2017 adjusted = "ji"; 2018 } 2019 2020 return adjusted; 2021 } 2022 2023 /** 2024 * Map of grandfathered language tags to their modern replacements. 2025 */ 2026 private static final TreeMap<String, String> GRANDFATHERED_LOCALES; 2027 2028 static { 2029 GRANDFATHERED_LOCALES = new TreeMap<String, String>(String.CASE_INSENSITIVE_ORDER); 2030 2031 // From http://tools.ietf.org/html/bcp47 2032 // 2033 // grandfathered = irregular ; non-redundant tags registered 2034 // / regular ; during the RFC 3066 era 2035 // irregular = 2036 GRANDFATHERED_LOCALES.put("en-GB-oed", "en-GB-x-oed"); 2037 GRANDFATHERED_LOCALES.put("i-ami", "ami"); 2038 GRANDFATHERED_LOCALES.put("i-bnn", "bnn"); 2039 GRANDFATHERED_LOCALES.put("i-default", "en-x-i-default"); 2040 GRANDFATHERED_LOCALES.put("i-enochian", "und-x-i-enochian"); 2041 GRANDFATHERED_LOCALES.put("i-hak", "hak"); 2042 GRANDFATHERED_LOCALES.put("i-klingon", "tlh"); 2043 GRANDFATHERED_LOCALES.put("i-lux", "lb"); 2044 GRANDFATHERED_LOCALES.put("i-mingo", "see-x-i-mingo"); 2045 GRANDFATHERED_LOCALES.put("i-navajo", "nv"); 2046 GRANDFATHERED_LOCALES.put("i-pwn", "pwn"); 2047 GRANDFATHERED_LOCALES.put("i-tao", "tao"); 2048 GRANDFATHERED_LOCALES.put("i-tay", "tay"); 2049 GRANDFATHERED_LOCALES.put("i-tsu", "tsu"); 2050 GRANDFATHERED_LOCALES.put("sgn-BE-FR", "sfb"); 2051 GRANDFATHERED_LOCALES.put("sgn-BE-NL", "vgt"); 2052 GRANDFATHERED_LOCALES.put("sgn-CH-DE", "sgg"); 2053 2054 // regular = 2055 GRANDFATHERED_LOCALES.put("art-lojban", "jbo"); 2056 GRANDFATHERED_LOCALES.put("cel-gaulish", "xtg-x-cel-gaulish"); 2057 GRANDFATHERED_LOCALES.put("no-bok", "nb"); 2058 GRANDFATHERED_LOCALES.put("no-nyn", "nn"); 2059 GRANDFATHERED_LOCALES.put("zh-guoyu", "cmn"); 2060 GRANDFATHERED_LOCALES.put("zh-hakka", "hak"); 2061 GRANDFATHERED_LOCALES.put("zh-min", "nan-x-zh-min"); 2062 GRANDFATHERED_LOCALES.put("zh-min-nan", "nan"); 2063 GRANDFATHERED_LOCALES.put("zh-xiang", "hsn"); 2064 } 2065 convertGrandfatheredTag(String original)2066 private static String convertGrandfatheredTag(String original) { 2067 final String converted = GRANDFATHERED_LOCALES.get(original); 2068 return converted != null ? converted : original; 2069 } 2070 2071 /** 2072 * Scans elements of {@code subtags} in the range {@code [startIndex, endIndex)} 2073 * and appends valid variant subtags upto the first invalid subtag (if any) to 2074 * {@code normalizedVariants}. 2075 */ extractVariantSubtags(String[] subtags, int startIndex, int endIndex, List<String> normalizedVariants)2076 private static void extractVariantSubtags(String[] subtags, int startIndex, int endIndex, 2077 List<String> normalizedVariants) { 2078 for (int i = startIndex; i < endIndex; i++) { 2079 final String subtag = subtags[i]; 2080 2081 if (Builder.isValidVariantSubtag(subtag)) { 2082 normalizedVariants.add(subtag); 2083 } else { 2084 break; 2085 } 2086 } 2087 } 2088 2089 /** 2090 * Scans elements of {@code subtags} in the range {@code [startIndex, endIndex)} 2091 * and inserts valid extensions into {@code extensions}. The scan is aborted 2092 * when an invalid extension is encountered. Returns the index of the first 2093 * unparsable element of {@code subtags}. 2094 */ extractExtensions(String[] subtags, int startIndex, int endIndex, Map<Character, String> extensions)2095 private static int extractExtensions(String[] subtags, int startIndex, int endIndex, 2096 Map<Character, String> extensions) { 2097 int privateUseExtensionIndex = -1; 2098 int extensionKeyIndex = -1; 2099 2100 int i = startIndex; 2101 for (; i < endIndex; i++) { 2102 final String subtag = subtags[i]; 2103 2104 final boolean parsingPrivateUse = (privateUseExtensionIndex != -1) && 2105 (extensionKeyIndex == privateUseExtensionIndex); 2106 2107 // Note that private use extensions allow subtags of length 1. 2108 // Private use extensions *must* come last, so there's no ambiguity 2109 // in that case. 2110 if (subtag.length() == 1 && !parsingPrivateUse) { 2111 // Emit the last extension we encountered if any. First check 2112 // whether we encountered two keys in a row (which is an error). 2113 // Also checks if we already have an extension with the same key, 2114 // which is again an error. 2115 if (extensionKeyIndex != -1) { 2116 if ((i - 1) == extensionKeyIndex) { 2117 return extensionKeyIndex; 2118 } 2119 2120 final String key = subtags[extensionKeyIndex]; 2121 if (extensions.containsKey(key.charAt(0))) { 2122 return extensionKeyIndex; 2123 } 2124 2125 final String value = concatenateRange(subtags, extensionKeyIndex + 1, i); 2126 extensions.put(key.charAt(0), value.toLowerCase(Locale.ROOT)); 2127 } 2128 2129 // Mark the start of the next extension. Also keep track of whether this 2130 // is a private use extension, and throw an error if it doesn't come last. 2131 extensionKeyIndex = i; 2132 if ("x".equals(subtag)) { 2133 privateUseExtensionIndex = i; 2134 } else if (privateUseExtensionIndex != -1) { 2135 // The private use extension must come last. 2136 return privateUseExtensionIndex; 2137 } 2138 } else if (extensionKeyIndex != -1) { 2139 // We must have encountered a valid key in order to start parsing 2140 // its subtags. 2141 if (!isValidBcp47Alphanum(subtag, parsingPrivateUse ? 1 : 2, 8)) { 2142 return i; 2143 } 2144 } else { 2145 // Encountered a value without a preceding key. 2146 return i; 2147 } 2148 } 2149 2150 if (extensionKeyIndex != -1) { 2151 if ((i - 1) == extensionKeyIndex) { 2152 return extensionKeyIndex; 2153 } 2154 2155 final String key = subtags[extensionKeyIndex]; 2156 if (extensions.containsKey(key.charAt(0))) { 2157 return extensionKeyIndex; 2158 } 2159 2160 final String value = concatenateRange(subtags, extensionKeyIndex + 1, i); 2161 extensions.put(key.charAt(0), value.toLowerCase(Locale.ROOT)); 2162 } 2163 2164 return i; 2165 } 2166 forLanguageTag( String tag, boolean strict)2167 private static Locale forLanguageTag(/* @Nonnull */ String tag, boolean strict) { 2168 final String converted = convertGrandfatheredTag(tag); 2169 final String[] subtags = converted.split("-"); 2170 2171 int lastSubtag = subtags.length; 2172 for (int i = 0; i < subtags.length; ++i) { 2173 final String subtag = subtags[i]; 2174 if (subtag.isEmpty() || subtag.length() > 8) { 2175 if (strict) { 2176 throw new IllformedLocaleException("Invalid subtag at index: " + i 2177 + " in tag: " + tag); 2178 } else { 2179 lastSubtag = (i - 1); 2180 } 2181 2182 break; 2183 } 2184 } 2185 2186 final String languageCode = Builder.normalizeAndValidateLanguage(subtags[0], strict); 2187 String scriptCode = ""; 2188 int nextSubtag = 1; 2189 if (lastSubtag > nextSubtag) { 2190 scriptCode = Builder.normalizeAndValidateScript(subtags[nextSubtag], false /* strict */); 2191 if (!scriptCode.isEmpty()) { 2192 nextSubtag++; 2193 } 2194 } 2195 2196 String regionCode = ""; 2197 if (lastSubtag > nextSubtag) { 2198 regionCode = Builder.normalizeAndValidateRegion(subtags[nextSubtag], false /* strict */); 2199 if (!regionCode.isEmpty()) { 2200 nextSubtag++; 2201 } 2202 } 2203 2204 List<String> variants = null; 2205 if (lastSubtag > nextSubtag) { 2206 variants = new ArrayList<String>(); 2207 extractVariantSubtags(subtags, nextSubtag, lastSubtag, variants); 2208 nextSubtag += variants.size(); 2209 } 2210 2211 Map<Character, String> extensions = Collections.EMPTY_MAP; 2212 if (lastSubtag > nextSubtag) { 2213 extensions = new TreeMap<Character, String>(); 2214 nextSubtag = extractExtensions(subtags, nextSubtag, lastSubtag, extensions); 2215 } 2216 2217 if (nextSubtag != lastSubtag) { 2218 if (strict) { 2219 throw new IllformedLocaleException("Unparseable subtag: " + subtags[nextSubtag] 2220 + " from language tag: " + tag); 2221 } 2222 } 2223 2224 Set<String> unicodeKeywords = Collections.EMPTY_SET; 2225 Map<String, String> unicodeAttributes = Collections.EMPTY_MAP; 2226 if (extensions.containsKey(UNICODE_LOCALE_EXTENSION)) { 2227 unicodeKeywords = new TreeSet<String>(); 2228 unicodeAttributes = new TreeMap<String, String>(); 2229 parseUnicodeExtension(extensions.get(UNICODE_LOCALE_EXTENSION).split("-"), 2230 unicodeAttributes, unicodeKeywords); 2231 } 2232 2233 String variantCode = ""; 2234 if (variants != null && !variants.isEmpty()) { 2235 StringBuilder variantsBuilder = new StringBuilder(variants.size() * 8); 2236 for (int i = 0; i < variants.size(); ++i) { 2237 if (i != 0) { 2238 variantsBuilder.append('_'); 2239 } 2240 variantsBuilder.append(variants.get(i)); 2241 } 2242 variantCode = variantsBuilder.toString(); 2243 } 2244 2245 return new Locale(languageCode, regionCode, variantCode, scriptCode, 2246 unicodeKeywords, unicodeAttributes, extensions, true /* has validated fields */); 2247 } 2248 } 2249