1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package libcore.icu; 18 19 import dalvik.annotation.compat.UnsupportedAppUsage; 20 import java.util.Collections; 21 import java.util.HashMap; 22 import java.util.HashSet; 23 import java.util.LinkedHashSet; 24 import java.util.Locale; 25 import java.util.Map; 26 import java.util.Map.Entry; 27 import java.util.Set; 28 import libcore.util.BasicLruCache; 29 30 /** 31 * Makes ICU data accessible to Java. 32 * @hide 33 */ 34 @libcore.api.CorePlatformApi 35 public final class ICU { 36 37 @UnsupportedAppUsage 38 private static final BasicLruCache<String, String> CACHED_PATTERNS = 39 new BasicLruCache<String, String>(8); 40 41 private static Locale[] availableLocalesCache; 42 43 private static String[] isoCountries; 44 45 private static String[] isoLanguages; 46 ICU()47 private ICU() { 48 } 49 50 /** 51 * Returns an array of two-letter ISO 639-1 language codes, either from ICU or our cache. 52 */ getISOLanguages()53 public static String[] getISOLanguages() { 54 if (isoLanguages == null) { 55 isoLanguages = getISOLanguagesNative(); 56 } 57 return isoLanguages.clone(); 58 } 59 60 /** 61 * Returns an array of two-letter ISO 3166 country codes, either from ICU or our cache. 62 */ getISOCountries()63 public static String[] getISOCountries() { 64 if (isoCountries == null) { 65 isoCountries = getISOCountriesNative(); 66 } 67 return isoCountries.clone(); 68 } 69 70 private static final int IDX_LANGUAGE = 0; 71 private static final int IDX_SCRIPT = 1; 72 private static final int IDX_REGION = 2; 73 private static final int IDX_VARIANT = 3; 74 75 /* 76 * Parse the {Language, Script, Region, Variant*} section of the ICU locale 77 * ID. This is the bit that appears before the keyword separate "@". The general 78 * structure is a series of ASCII alphanumeric strings (subtags) 79 * separated by underscores. 80 * 81 * Each subtag is interpreted according to its position in the list of subtags 82 * AND its length (groan...). The various cases are explained in comments 83 * below. 84 */ parseLangScriptRegionAndVariants(String string, String[] outputArray)85 private static void parseLangScriptRegionAndVariants(String string, 86 String[] outputArray) { 87 final int first = string.indexOf('_'); 88 final int second = string.indexOf('_', first + 1); 89 final int third = string.indexOf('_', second + 1); 90 91 if (first == -1) { 92 outputArray[IDX_LANGUAGE] = string; 93 } else if (second == -1) { 94 // Language and country ("ja_JP") OR 95 // Language and script ("en_Latn") OR 96 // Language and variant ("en_POSIX"). 97 98 outputArray[IDX_LANGUAGE] = string.substring(0, first); 99 final String secondString = string.substring(first + 1); 100 101 if (secondString.length() == 4) { 102 // 4 Letter ISO script code. 103 outputArray[IDX_SCRIPT] = secondString; 104 } else if (secondString.length() == 2 || secondString.length() == 3) { 105 // 2 or 3 Letter region code. 106 outputArray[IDX_REGION] = secondString; 107 } else { 108 // If we're here, the length of the second half is either 1 or greater 109 // than 5. Assume that ICU won't hand us malformed tags, and therefore 110 // assume the rest of the string is a series of variant tags. 111 outputArray[IDX_VARIANT] = secondString; 112 } 113 } else if (third == -1) { 114 // Language and country and variant ("ja_JP_TRADITIONAL") OR 115 // Language and script and variant ("en_Latn_POSIX") OR 116 // Language and script and region ("en_Latn_US"). OR 117 // Language and variant with multiple subtags ("en_POSIX_XISOP") 118 119 outputArray[IDX_LANGUAGE] = string.substring(0, first); 120 final String secondString = string.substring(first + 1, second); 121 final String thirdString = string.substring(second + 1); 122 123 if (secondString.length() == 4) { 124 // The second subtag is a script. 125 outputArray[IDX_SCRIPT] = secondString; 126 127 // The third subtag can be either a region or a variant, depending 128 // on its length. 129 if (thirdString.length() == 2 || thirdString.length() == 3 || 130 thirdString.isEmpty()) { 131 outputArray[IDX_REGION] = thirdString; 132 } else { 133 outputArray[IDX_VARIANT] = thirdString; 134 } 135 } else if (secondString.isEmpty() || 136 secondString.length() == 2 || secondString.length() == 3) { 137 // The second string is a region, and the third a variant. 138 outputArray[IDX_REGION] = secondString; 139 outputArray[IDX_VARIANT] = thirdString; 140 } else { 141 // Variant with multiple subtags. 142 outputArray[IDX_VARIANT] = string.substring(first + 1); 143 } 144 } else { 145 // Language, script, region and variant with 1 or more subtags 146 // ("en_Latn_US_POSIX") OR 147 // Language, region and variant with 2 or more subtags 148 // (en_US_POSIX_VARIANT). 149 outputArray[IDX_LANGUAGE] = string.substring(0, first); 150 final String secondString = string.substring(first + 1, second); 151 if (secondString.length() == 4) { 152 outputArray[IDX_SCRIPT] = secondString; 153 outputArray[IDX_REGION] = string.substring(second + 1, third); 154 outputArray[IDX_VARIANT] = string.substring(third + 1); 155 } else { 156 outputArray[IDX_REGION] = secondString; 157 outputArray[IDX_VARIANT] = string.substring(second + 1); 158 } 159 } 160 } 161 162 /** 163 * Returns the appropriate {@code Locale} given a {@code String} of the form returned 164 * by {@code toString}. This is very lenient, and doesn't care what's between the underscores: 165 * this method can parse strings that {@code Locale.toString} won't produce. 166 * Used to remove duplication. 167 */ localeFromIcuLocaleId(String localeId)168 public static Locale localeFromIcuLocaleId(String localeId) { 169 // @ == ULOC_KEYWORD_SEPARATOR_UNICODE (uloc.h). 170 final int extensionsIndex = localeId.indexOf('@'); 171 172 Map<Character, String> extensionsMap = Collections.EMPTY_MAP; 173 Map<String, String> unicodeKeywordsMap = Collections.EMPTY_MAP; 174 Set<String> unicodeAttributeSet = Collections.EMPTY_SET; 175 176 if (extensionsIndex != -1) { 177 extensionsMap = new HashMap<Character, String>(); 178 unicodeKeywordsMap = new HashMap<String, String>(); 179 unicodeAttributeSet = new HashSet<String>(); 180 181 // ICU sends us a semi-colon (ULOC_KEYWORD_ITEM_SEPARATOR) delimited string 182 // containing all "keywords" it could parse. An ICU keyword is a key-value pair 183 // separated by an "=" (ULOC_KEYWORD_ASSIGN). 184 // 185 // Each keyword item can be one of three things : 186 // - A unicode extension attribute list: In this case the item key is "attribute" 187 // and the value is a hyphen separated list of unicode attributes. 188 // - A unicode extension keyword: In this case, the item key will be larger than 189 // 1 char in length, and the value will be the unicode extension value. 190 // - A BCP-47 extension subtag: In this case, the item key will be exactly one 191 // char in length, and the value will be a sequence of unparsed subtags that 192 // represent the extension. 193 // 194 // Note that this implies that unicode extension keywords are "promoted" to 195 // to the same namespace as the top level extension subtags and their values. 196 // There can't be any collisions in practice because the BCP-47 spec imposes 197 // restrictions on their lengths. 198 final String extensionsString = localeId.substring(extensionsIndex + 1); 199 final String[] extensions = extensionsString.split(";"); 200 for (String extension : extensions) { 201 // This is the special key for the unicode attributes 202 if (extension.startsWith("attribute=")) { 203 String unicodeAttributeValues = extension.substring("attribute=".length()); 204 for (String unicodeAttribute : unicodeAttributeValues.split("-")) { 205 unicodeAttributeSet.add(unicodeAttribute); 206 } 207 } else { 208 final int separatorIndex = extension.indexOf('='); 209 210 if (separatorIndex == 1) { 211 // This is a BCP-47 extension subtag. 212 final String value = extension.substring(2); 213 final char extensionId = extension.charAt(0); 214 215 extensionsMap.put(extensionId, value); 216 } else { 217 // This is a unicode extension keyword. 218 unicodeKeywordsMap.put(extension.substring(0, separatorIndex), 219 extension.substring(separatorIndex + 1)); 220 } 221 } 222 } 223 } 224 225 final String[] outputArray = new String[] { "", "", "", "" }; 226 if (extensionsIndex == -1) { 227 parseLangScriptRegionAndVariants(localeId, outputArray); 228 } else { 229 parseLangScriptRegionAndVariants(localeId.substring(0, extensionsIndex), 230 outputArray); 231 } 232 Locale.Builder builder = new Locale.Builder(); 233 builder.setLanguage(outputArray[IDX_LANGUAGE]); 234 builder.setRegion(outputArray[IDX_REGION]); 235 builder.setVariant(outputArray[IDX_VARIANT]); 236 builder.setScript(outputArray[IDX_SCRIPT]); 237 for (String attribute : unicodeAttributeSet) { 238 builder.addUnicodeLocaleAttribute(attribute); 239 } 240 for (Entry<String, String> keyword : unicodeKeywordsMap.entrySet()) { 241 builder.setUnicodeLocaleKeyword(keyword.getKey(), keyword.getValue()); 242 } 243 244 for (Entry<Character, String> extension : extensionsMap.entrySet()) { 245 builder.setExtension(extension.getKey(), extension.getValue()); 246 } 247 248 return builder.build(); 249 } 250 localesFromStrings(String[] localeNames)251 public static Locale[] localesFromStrings(String[] localeNames) { 252 // We need to remove duplicates caused by the conversion of "he" to "iw", et cetera. 253 // Java needs the obsolete code, ICU needs the modern code, but we let ICU know about 254 // both so that we never need to convert back when talking to it. 255 LinkedHashSet<Locale> set = new LinkedHashSet<Locale>(); 256 for (String localeName : localeNames) { 257 set.add(localeFromIcuLocaleId(localeName)); 258 } 259 return set.toArray(new Locale[set.size()]); 260 } 261 getAvailableLocales()262 public static Locale[] getAvailableLocales() { 263 if (availableLocalesCache == null) { 264 availableLocalesCache = localesFromStrings(getAvailableLocalesNative()); 265 } 266 return availableLocalesCache.clone(); 267 } 268 getAvailableBreakIteratorLocales()269 public static Locale[] getAvailableBreakIteratorLocales() { 270 return localesFromStrings(getAvailableBreakIteratorLocalesNative()); 271 } 272 getAvailableCalendarLocales()273 public static Locale[] getAvailableCalendarLocales() { 274 return localesFromStrings(getAvailableCalendarLocalesNative()); 275 } 276 getAvailableCollatorLocales()277 public static Locale[] getAvailableCollatorLocales() { 278 return localesFromStrings(getAvailableCollatorLocalesNative()); 279 } 280 getAvailableDateFormatLocales()281 public static Locale[] getAvailableDateFormatLocales() { 282 return localesFromStrings(getAvailableDateFormatLocalesNative()); 283 } 284 getAvailableDateFormatSymbolsLocales()285 public static Locale[] getAvailableDateFormatSymbolsLocales() { 286 return getAvailableDateFormatLocales(); 287 } 288 getAvailableDecimalFormatSymbolsLocales()289 public static Locale[] getAvailableDecimalFormatSymbolsLocales() { 290 return getAvailableNumberFormatLocales(); 291 } 292 getAvailableNumberFormatLocales()293 public static Locale[] getAvailableNumberFormatLocales() { 294 return localesFromStrings(getAvailableNumberFormatLocalesNative()); 295 } 296 297 @UnsupportedAppUsage 298 @libcore.api.CorePlatformApi getBestDateTimePattern(String skeleton, Locale locale)299 public static String getBestDateTimePattern(String skeleton, Locale locale) { 300 String languageTag = locale.toLanguageTag(); 301 String key = skeleton + "\t" + languageTag; 302 synchronized (CACHED_PATTERNS) { 303 String pattern = CACHED_PATTERNS.get(key); 304 if (pattern == null) { 305 pattern = getBestDateTimePatternNative(skeleton, languageTag); 306 CACHED_PATTERNS.put(key, pattern); 307 } 308 return pattern; 309 } 310 } 311 312 @UnsupportedAppUsage getBestDateTimePatternNative(String skeleton, String languageTag)313 private static native String getBestDateTimePatternNative(String skeleton, String languageTag); 314 315 @UnsupportedAppUsage 316 @libcore.api.CorePlatformApi getDateFormatOrder(String pattern)317 public static char[] getDateFormatOrder(String pattern) { 318 char[] result = new char[3]; 319 int resultIndex = 0; 320 boolean sawDay = false; 321 boolean sawMonth = false; 322 boolean sawYear = false; 323 324 for (int i = 0; i < pattern.length(); ++i) { 325 char ch = pattern.charAt(i); 326 if (ch == 'd' || ch == 'L' || ch == 'M' || ch == 'y') { 327 if (ch == 'd' && !sawDay) { 328 result[resultIndex++] = 'd'; 329 sawDay = true; 330 } else if ((ch == 'L' || ch == 'M') && !sawMonth) { 331 result[resultIndex++] = 'M'; 332 sawMonth = true; 333 } else if ((ch == 'y') && !sawYear) { 334 result[resultIndex++] = 'y'; 335 sawYear = true; 336 } 337 } else if (ch == 'G') { 338 // Ignore the era specifier, if present. 339 } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { 340 throw new IllegalArgumentException("Bad pattern character '" + ch + "' in " + pattern); 341 } else if (ch == '\'') { 342 if (i < pattern.length() - 1 && pattern.charAt(i + 1) == '\'') { 343 ++i; 344 } else { 345 i = pattern.indexOf('\'', i + 1); 346 if (i == -1) { 347 throw new IllegalArgumentException("Bad quoting in " + pattern); 348 } 349 ++i; 350 } 351 } else { 352 // Ignore spaces and punctuation. 353 } 354 } 355 return result; 356 } 357 358 /** 359 * Returns the version of the CLDR data in use, such as "22.1.1". 360 */ getCldrVersion()361 public static native String getCldrVersion(); 362 363 /** 364 * Returns the icu4c version in use, such as "50.1.1". 365 */ getIcuVersion()366 public static native String getIcuVersion(); 367 368 /** 369 * Returns the Unicode version our ICU supports, such as "6.2". 370 */ getUnicodeVersion()371 public static native String getUnicodeVersion(); 372 373 // --- Case mapping. 374 toLowerCase(String s, Locale locale)375 public static String toLowerCase(String s, Locale locale) { 376 return toLowerCase(s, locale.toLanguageTag()); 377 } 378 toLowerCase(String s, String languageTag)379 private static native String toLowerCase(String s, String languageTag); 380 toUpperCase(String s, Locale locale)381 public static String toUpperCase(String s, Locale locale) { 382 return toUpperCase(s, locale.toLanguageTag()); 383 } 384 toUpperCase(String s, String languageTag)385 private static native String toUpperCase(String s, String languageTag); 386 387 // --- Errors. 388 389 // Just the subset of error codes needed by CharsetDecoderICU/CharsetEncoderICU. 390 public static final int U_ZERO_ERROR = 0; 391 public static final int U_INVALID_CHAR_FOUND = 10; 392 public static final int U_TRUNCATED_CHAR_FOUND = 11; 393 public static final int U_ILLEGAL_CHAR_FOUND = 12; 394 public static final int U_BUFFER_OVERFLOW_ERROR = 15; 395 U_FAILURE(int error)396 public static boolean U_FAILURE(int error) { 397 return error > U_ZERO_ERROR; 398 } 399 400 // --- Native methods accessing ICU's database. 401 getAvailableBreakIteratorLocalesNative()402 private static native String[] getAvailableBreakIteratorLocalesNative(); getAvailableCalendarLocalesNative()403 private static native String[] getAvailableCalendarLocalesNative(); getAvailableCollatorLocalesNative()404 private static native String[] getAvailableCollatorLocalesNative(); getAvailableDateFormatLocalesNative()405 private static native String[] getAvailableDateFormatLocalesNative(); getAvailableLocalesNative()406 private static native String[] getAvailableLocalesNative(); getAvailableNumberFormatLocalesNative()407 private static native String[] getAvailableNumberFormatLocalesNative(); 408 getAvailableCurrencyCodes()409 public static native String[] getAvailableCurrencyCodes(); getCurrencyCode(String countryCode)410 public static native String getCurrencyCode(String countryCode); 411 getCurrencyDisplayName(Locale locale, String currencyCode)412 public static String getCurrencyDisplayName(Locale locale, String currencyCode) { 413 return getCurrencyDisplayName(locale.toLanguageTag(), currencyCode); 414 } 415 getCurrencyDisplayName(String languageTag, String currencyCode)416 private static native String getCurrencyDisplayName(String languageTag, String currencyCode); 417 getCurrencyFractionDigits(String currencyCode)418 public static native int getCurrencyFractionDigits(String currencyCode); getCurrencyNumericCode(String currencyCode)419 public static native int getCurrencyNumericCode(String currencyCode); 420 getCurrencySymbol(Locale locale, String currencyCode)421 public static String getCurrencySymbol(Locale locale, String currencyCode) { 422 return getCurrencySymbol(locale.toLanguageTag(), currencyCode); 423 } 424 getCurrencySymbol(String languageTag, String currencyCode)425 private static native String getCurrencySymbol(String languageTag, String currencyCode); 426 getDisplayCountry(Locale targetLocale, Locale locale)427 public static String getDisplayCountry(Locale targetLocale, Locale locale) { 428 return getDisplayCountryNative(targetLocale.toLanguageTag(), locale.toLanguageTag()); 429 } 430 getDisplayCountryNative(String targetLanguageTag, String languageTag)431 private static native String getDisplayCountryNative(String targetLanguageTag, String languageTag); 432 getDisplayLanguage(Locale targetLocale, Locale locale)433 public static String getDisplayLanguage(Locale targetLocale, Locale locale) { 434 return getDisplayLanguageNative(targetLocale.toLanguageTag(), locale.toLanguageTag()); 435 } 436 getDisplayLanguageNative(String targetLanguageTag, String languageTag)437 private static native String getDisplayLanguageNative(String targetLanguageTag, String languageTag); 438 getDisplayVariant(Locale targetLocale, Locale locale)439 public static String getDisplayVariant(Locale targetLocale, Locale locale) { 440 return getDisplayVariantNative(targetLocale.toLanguageTag(), locale.toLanguageTag()); 441 } 442 getDisplayVariantNative(String targetLanguageTag, String languageTag)443 private static native String getDisplayVariantNative(String targetLanguageTag, String languageTag); 444 getDisplayScript(Locale targetLocale, Locale locale)445 public static String getDisplayScript(Locale targetLocale, Locale locale) { 446 return getDisplayScriptNative(targetLocale.toLanguageTag(), locale.toLanguageTag()); 447 } 448 getDisplayScriptNative(String targetLanguageTag, String languageTag)449 private static native String getDisplayScriptNative(String targetLanguageTag, String languageTag); 450 getISO3Country(String languageTag)451 public static native String getISO3Country(String languageTag); 452 getISO3Language(String languageTag)453 public static native String getISO3Language(String languageTag); 454 455 @UnsupportedAppUsage 456 @libcore.api.CorePlatformApi addLikelySubtags(Locale locale)457 public static Locale addLikelySubtags(Locale locale) { 458 return Locale.forLanguageTag(addLikelySubtags(locale.toLanguageTag()).replace('_', '-')); 459 } 460 461 /** 462 * @deprecated use {@link #addLikelySubtags(java.util.Locale)} instead. 463 */ 464 @UnsupportedAppUsage 465 @Deprecated addLikelySubtags(String locale)466 public static native String addLikelySubtags(String locale); 467 468 /** 469 * @deprecated use {@link java.util.Locale#getScript()} instead. This has been kept 470 * around only for the support library. 471 */ 472 @UnsupportedAppUsage 473 @Deprecated getScript(String locale)474 public static native String getScript(String locale); 475 getISOLanguagesNative()476 private static native String[] getISOLanguagesNative(); getISOCountriesNative()477 private static native String[] getISOCountriesNative(); 478 initLocaleDataNative(String languageTag, LocaleData result)479 static native boolean initLocaleDataNative(String languageTag, LocaleData result); 480 481 /** 482 * Takes a BCP-47 language tag (Locale.toLanguageTag()). e.g. en-US, not en_US 483 */ setDefaultLocale(String languageTag)484 public static native void setDefaultLocale(String languageTag); 485 486 /** 487 * Returns a locale name, not a BCP-47 language tag. e.g. en_US not en-US. 488 */ getDefaultLocale()489 public static native String getDefaultLocale(); 490 491 /** Returns the TZData version as reported by ICU4C. */ 492 @libcore.api.CorePlatformApi getTZDataVersion()493 public static native String getTZDataVersion(); 494 } 495