1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package libcore.icu; 18 19 import java.util.Collections; 20 import java.util.HashMap; 21 import java.util.HashSet; 22 import java.util.LinkedHashSet; 23 import java.util.Locale; 24 import java.util.Map; 25 import java.util.Set; 26 import libcore.util.BasicLruCache; 27 28 /** 29 * Makes ICU data accessible to Java. 30 */ 31 public final class ICU { 32 private static final BasicLruCache<String, String> CACHED_PATTERNS = 33 new BasicLruCache<String, String>(8); 34 35 private static Locale[] availableLocalesCache; 36 37 private static String[] isoCountries; 38 39 private static String[] isoLanguages; 40 41 /** 42 * Returns an array of two-letter ISO 639-1 language codes, either from ICU or our cache. 43 */ getISOLanguages()44 public static String[] getISOLanguages() { 45 if (isoLanguages == null) { 46 isoLanguages = getISOLanguagesNative(); 47 } 48 return isoLanguages.clone(); 49 } 50 51 /** 52 * Returns an array of two-letter ISO 3166 country codes, either from ICU or our cache. 53 */ getISOCountries()54 public static String[] getISOCountries() { 55 if (isoCountries == null) { 56 isoCountries = getISOCountriesNative(); 57 } 58 return isoCountries.clone(); 59 } 60 61 private static final int IDX_LANGUAGE = 0; 62 private static final int IDX_SCRIPT = 1; 63 private static final int IDX_REGION = 2; 64 private static final int IDX_VARIANT = 3; 65 66 /* 67 * Parse the {Language, Script, Region, Variant*} section of the ICU locale 68 * ID. This is the bit that appears before the keyword separate "@". The general 69 * structure is a series of ASCII alphanumeric strings (subtags) 70 * separated by underscores. 71 * 72 * Each subtag is interpreted according to its position in the list of subtags 73 * AND its length (groan...). The various cases are explained in comments 74 * below. 75 */ parseLangScriptRegionAndVariants(String string, String[] outputArray)76 private static void parseLangScriptRegionAndVariants(String string, 77 String[] outputArray) { 78 final int first = string.indexOf('_'); 79 final int second = string.indexOf('_', first + 1); 80 final int third = string.indexOf('_', second + 1); 81 82 if (first == -1) { 83 outputArray[IDX_LANGUAGE] = string; 84 } else if (second == -1) { 85 // Language and country ("ja_JP") OR 86 // Language and script ("en_Latn") OR 87 // Language and variant ("en_POSIX"). 88 89 outputArray[IDX_LANGUAGE] = string.substring(0, first); 90 final String secondString = string.substring(first + 1); 91 92 if (secondString.length() == 4) { 93 // 4 Letter ISO script code. 94 outputArray[IDX_SCRIPT] = secondString; 95 } else if (secondString.length() == 2 || secondString.length() == 3) { 96 // 2 or 3 Letter region code. 97 outputArray[IDX_REGION] = secondString; 98 } else { 99 // If we're here, the length of the second half is either 1 or greater 100 // than 5. Assume that ICU won't hand us malformed tags, and therefore 101 // assume the rest of the string is a series of variant tags. 102 outputArray[IDX_VARIANT] = secondString; 103 } 104 } else if (third == -1) { 105 // Language and country and variant ("ja_JP_TRADITIONAL") OR 106 // Language and script and variant ("en_Latn_POSIX") OR 107 // Language and script and region ("en_Latn_US"). OR 108 // Language and variant with multiple subtags ("en_POSIX_XISOP") 109 110 outputArray[IDX_LANGUAGE] = string.substring(0, first); 111 final String secondString = string.substring(first + 1, second); 112 final String thirdString = string.substring(second + 1); 113 114 if (secondString.length() == 4) { 115 // The second subtag is a script. 116 outputArray[IDX_SCRIPT] = secondString; 117 118 // The third subtag can be either a region or a variant, depending 119 // on its length. 120 if (thirdString.length() == 2 || thirdString.length() == 3 || 121 thirdString.isEmpty()) { 122 outputArray[IDX_REGION] = thirdString; 123 } else { 124 outputArray[IDX_VARIANT] = thirdString; 125 } 126 } else if (secondString.isEmpty() || 127 secondString.length() == 2 || secondString.length() == 3) { 128 // The second string is a region, and the third a variant. 129 outputArray[IDX_REGION] = secondString; 130 outputArray[IDX_VARIANT] = thirdString; 131 } else { 132 // Variant with multiple subtags. 133 outputArray[IDX_VARIANT] = string.substring(first + 1); 134 } 135 } else { 136 // Language, script, region and variant with 1 or more subtags 137 // ("en_Latn_US_POSIX") OR 138 // Language, region and variant with 2 or more subtags 139 // (en_US_POSIX_VARIANT). 140 outputArray[IDX_LANGUAGE] = string.substring(0, first); 141 final String secondString = string.substring(first + 1, second); 142 if (secondString.length() == 4) { 143 outputArray[IDX_SCRIPT] = secondString; 144 outputArray[IDX_REGION] = string.substring(second + 1, third); 145 outputArray[IDX_VARIANT] = string.substring(third + 1); 146 } else { 147 outputArray[IDX_REGION] = secondString; 148 outputArray[IDX_VARIANT] = string.substring(second + 1); 149 } 150 } 151 } 152 153 /** 154 * Returns the appropriate {@code Locale} given a {@code String} of the form returned 155 * by {@code toString}. This is very lenient, and doesn't care what's between the underscores: 156 * this method can parse strings that {@code Locale.toString} won't produce. 157 * Used to remove duplication. 158 */ localeFromIcuLocaleId(String localeId)159 public static Locale localeFromIcuLocaleId(String localeId) { 160 // @ == ULOC_KEYWORD_SEPARATOR_UNICODE (uloc.h). 161 final int extensionsIndex = localeId.indexOf('@'); 162 163 Map<Character, String> extensionsMap = Collections.EMPTY_MAP; 164 Map<String, String> unicodeKeywordsMap = Collections.EMPTY_MAP; 165 Set<String> unicodeAttributeSet = Collections.EMPTY_SET; 166 167 if (extensionsIndex != -1) { 168 extensionsMap = new HashMap<Character, String>(); 169 unicodeKeywordsMap = new HashMap<String, String>(); 170 unicodeAttributeSet = new HashSet<String>(); 171 172 // ICU sends us a semi-colon (ULOC_KEYWORD_ITEM_SEPARATOR) delimited string 173 // containing all "keywords" it could parse. An ICU keyword is a key-value pair 174 // separated by an "=" (ULOC_KEYWORD_ASSIGN). 175 // 176 // Each keyword item can be one of three things : 177 // - A unicode extension attribute list: In this case the item key is "attribute" 178 // and the value is a hyphen separated list of unicode attributes. 179 // - A unicode extension keyword: In this case, the item key will be larger than 180 // 1 char in length, and the value will be the unicode extension value. 181 // - A BCP-47 extension subtag: In this case, the item key will be exactly one 182 // char in length, and the value will be a sequence of unparsed subtags that 183 // represent the extension. 184 // 185 // Note that this implies that unicode extension keywords are "promoted" to 186 // to the same namespace as the top level extension subtags and their values. 187 // There can't be any collisions in practice because the BCP-47 spec imposes 188 // restrictions on their lengths. 189 final String extensionsString = localeId.substring(extensionsIndex + 1); 190 final String[] extensions = extensionsString.split(";"); 191 for (String extension : extensions) { 192 // This is the special key for the unicode attributes 193 if (extension.startsWith("attribute=")) { 194 String unicodeAttributeValues = extension.substring("attribute=".length()); 195 for (String unicodeAttribute : unicodeAttributeValues.split("-")) { 196 unicodeAttributeSet.add(unicodeAttribute); 197 } 198 } else { 199 final int separatorIndex = extension.indexOf('='); 200 201 if (separatorIndex == 1) { 202 // This is a BCP-47 extension subtag. 203 final String value = extension.substring(2); 204 final char extensionId = extension.charAt(0); 205 206 extensionsMap.put(extensionId, value); 207 } else { 208 // This is a unicode extension keyword. 209 unicodeKeywordsMap.put(extension.substring(0, separatorIndex), 210 extension.substring(separatorIndex + 1)); 211 } 212 } 213 } 214 } 215 216 final String[] outputArray = new String[] { "", "", "", "" }; 217 if (extensionsIndex == -1) { 218 parseLangScriptRegionAndVariants(localeId, outputArray); 219 } else { 220 parseLangScriptRegionAndVariants(localeId.substring(0, extensionsIndex), 221 outputArray); 222 } 223 224 return new Locale(outputArray[IDX_LANGUAGE], outputArray[IDX_REGION], 225 outputArray[IDX_VARIANT], outputArray[IDX_SCRIPT], 226 unicodeAttributeSet, unicodeKeywordsMap, extensionsMap, 227 true /* has validated fields */); 228 } 229 localesFromStrings(String[] localeNames)230 public static Locale[] localesFromStrings(String[] localeNames) { 231 // We need to remove duplicates caused by the conversion of "he" to "iw", et cetera. 232 // Java needs the obsolete code, ICU needs the modern code, but we let ICU know about 233 // both so that we never need to convert back when talking to it. 234 LinkedHashSet<Locale> set = new LinkedHashSet<Locale>(); 235 for (String localeName : localeNames) { 236 set.add(localeFromIcuLocaleId(localeName)); 237 } 238 return set.toArray(new Locale[set.size()]); 239 } 240 getAvailableLocales()241 public static Locale[] getAvailableLocales() { 242 if (availableLocalesCache == null) { 243 availableLocalesCache = localesFromStrings(getAvailableLocalesNative()); 244 } 245 return availableLocalesCache.clone(); 246 } 247 getAvailableBreakIteratorLocales()248 public static Locale[] getAvailableBreakIteratorLocales() { 249 return localesFromStrings(getAvailableBreakIteratorLocalesNative()); 250 } 251 getAvailableCalendarLocales()252 public static Locale[] getAvailableCalendarLocales() { 253 return localesFromStrings(getAvailableCalendarLocalesNative()); 254 } 255 getAvailableCollatorLocales()256 public static Locale[] getAvailableCollatorLocales() { 257 return localesFromStrings(getAvailableCollatorLocalesNative()); 258 } 259 getAvailableDateFormatLocales()260 public static Locale[] getAvailableDateFormatLocales() { 261 return localesFromStrings(getAvailableDateFormatLocalesNative()); 262 } 263 getAvailableDateFormatSymbolsLocales()264 public static Locale[] getAvailableDateFormatSymbolsLocales() { 265 return getAvailableDateFormatLocales(); 266 } 267 getAvailableDecimalFormatSymbolsLocales()268 public static Locale[] getAvailableDecimalFormatSymbolsLocales() { 269 return getAvailableNumberFormatLocales(); 270 } 271 getAvailableNumberFormatLocales()272 public static Locale[] getAvailableNumberFormatLocales() { 273 return localesFromStrings(getAvailableNumberFormatLocalesNative()); 274 } 275 getBestDateTimePattern(String skeleton, Locale locale)276 public static String getBestDateTimePattern(String skeleton, Locale locale) { 277 String languageTag = locale.toLanguageTag(); 278 String key = skeleton + "\t" + languageTag; 279 synchronized (CACHED_PATTERNS) { 280 String pattern = CACHED_PATTERNS.get(key); 281 if (pattern == null) { 282 pattern = getBestDateTimePatternNative(skeleton, languageTag); 283 CACHED_PATTERNS.put(key, pattern); 284 } 285 return pattern; 286 } 287 } 288 getBestDateTimePatternNative(String skeleton, String languageTag)289 private static native String getBestDateTimePatternNative(String skeleton, String languageTag); 290 getDateFormatOrder(String pattern)291 public static char[] getDateFormatOrder(String pattern) { 292 char[] result = new char[3]; 293 int resultIndex = 0; 294 boolean sawDay = false; 295 boolean sawMonth = false; 296 boolean sawYear = false; 297 298 for (int i = 0; i < pattern.length(); ++i) { 299 char ch = pattern.charAt(i); 300 if (ch == 'd' || ch == 'L' || ch == 'M' || ch == 'y') { 301 if (ch == 'd' && !sawDay) { 302 result[resultIndex++] = 'd'; 303 sawDay = true; 304 } else if ((ch == 'L' || ch == 'M') && !sawMonth) { 305 result[resultIndex++] = 'M'; 306 sawMonth = true; 307 } else if ((ch == 'y') && !sawYear) { 308 result[resultIndex++] = 'y'; 309 sawYear = true; 310 } 311 } else if (ch == 'G') { 312 // Ignore the era specifier, if present. 313 } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { 314 throw new IllegalArgumentException("Bad pattern character '" + ch + "' in " + pattern); 315 } else if (ch == '\'') { 316 if (i < pattern.length() - 1 && pattern.charAt(i + 1) == '\'') { 317 ++i; 318 } else { 319 i = pattern.indexOf('\'', i + 1); 320 if (i == -1) { 321 throw new IllegalArgumentException("Bad quoting in " + pattern); 322 } 323 ++i; 324 } 325 } else { 326 // Ignore spaces and punctuation. 327 } 328 } 329 return result; 330 } 331 332 /** 333 * Returns the version of the CLDR data in use, such as "22.1.1". 334 */ getCldrVersion()335 public static native String getCldrVersion(); 336 337 /** 338 * Returns the icu4c version in use, such as "50.1.1". 339 */ getIcuVersion()340 public static native String getIcuVersion(); 341 342 /** 343 * Returns the Unicode version our ICU supports, such as "6.2". 344 */ getUnicodeVersion()345 public static native String getUnicodeVersion(); 346 347 // --- Case mapping. 348 toLowerCase(String s, Locale locale)349 public static String toLowerCase(String s, Locale locale) { 350 return toLowerCase(s, locale.toLanguageTag()); 351 } 352 toLowerCase(String s, String languageTag)353 private static native String toLowerCase(String s, String languageTag); 354 toUpperCase(String s, Locale locale)355 public static String toUpperCase(String s, Locale locale) { 356 return toUpperCase(s, locale.toLanguageTag()); 357 } 358 toUpperCase(String s, String languageTag)359 private static native String toUpperCase(String s, String languageTag); 360 361 // --- Errors. 362 363 // Just the subset of error codes needed by CharsetDecoderICU/CharsetEncoderICU. 364 public static final int U_ZERO_ERROR = 0; 365 public static final int U_INVALID_CHAR_FOUND = 10; 366 public static final int U_TRUNCATED_CHAR_FOUND = 11; 367 public static final int U_ILLEGAL_CHAR_FOUND = 12; 368 public static final int U_BUFFER_OVERFLOW_ERROR = 15; 369 U_FAILURE(int error)370 public static boolean U_FAILURE(int error) { 371 return error > U_ZERO_ERROR; 372 } 373 374 // --- Native methods accessing ICU's database. 375 getAvailableBreakIteratorLocalesNative()376 private static native String[] getAvailableBreakIteratorLocalesNative(); getAvailableCalendarLocalesNative()377 private static native String[] getAvailableCalendarLocalesNative(); getAvailableCollatorLocalesNative()378 private static native String[] getAvailableCollatorLocalesNative(); getAvailableDateFormatLocalesNative()379 private static native String[] getAvailableDateFormatLocalesNative(); getAvailableLocalesNative()380 private static native String[] getAvailableLocalesNative(); getAvailableNumberFormatLocalesNative()381 private static native String[] getAvailableNumberFormatLocalesNative(); 382 getAvailableCurrencyCodes()383 public static native String[] getAvailableCurrencyCodes(); getCurrencyCode(String countryCode)384 public static native String getCurrencyCode(String countryCode); 385 getCurrencyDisplayName(Locale locale, String currencyCode)386 public static String getCurrencyDisplayName(Locale locale, String currencyCode) { 387 return getCurrencyDisplayName(locale.toLanguageTag(), currencyCode); 388 } 389 getCurrencyDisplayName(String languageTag, String currencyCode)390 private static native String getCurrencyDisplayName(String languageTag, String currencyCode); 391 getCurrencyFractionDigits(String currencyCode)392 public static native int getCurrencyFractionDigits(String currencyCode); getCurrencyNumericCode(String currencyCode)393 public static native int getCurrencyNumericCode(String currencyCode); 394 getCurrencySymbol(Locale locale, String currencyCode)395 public static String getCurrencySymbol(Locale locale, String currencyCode) { 396 return getCurrencySymbol(locale.toLanguageTag(), currencyCode); 397 } 398 getCurrencySymbol(String languageTag, String currencyCode)399 private static native String getCurrencySymbol(String languageTag, String currencyCode); 400 getDisplayCountry(Locale targetLocale, Locale locale)401 public static String getDisplayCountry(Locale targetLocale, Locale locale) { 402 return getDisplayCountryNative(targetLocale.toLanguageTag(), locale.toLanguageTag()); 403 } 404 getDisplayCountryNative(String targetLanguageTag, String languageTag)405 private static native String getDisplayCountryNative(String targetLanguageTag, String languageTag); 406 getDisplayLanguage(Locale targetLocale, Locale locale)407 public static String getDisplayLanguage(Locale targetLocale, Locale locale) { 408 return getDisplayLanguageNative(targetLocale.toLanguageTag(), locale.toLanguageTag()); 409 } 410 getDisplayLanguageNative(String targetLanguageTag, String languageTag)411 private static native String getDisplayLanguageNative(String targetLanguageTag, String languageTag); 412 getDisplayVariant(Locale targetLocale, Locale locale)413 public static String getDisplayVariant(Locale targetLocale, Locale locale) { 414 return getDisplayVariantNative(targetLocale.toLanguageTag(), locale.toLanguageTag()); 415 } 416 getDisplayVariantNative(String targetLanguageTag, String languageTag)417 private static native String getDisplayVariantNative(String targetLanguageTag, String languageTag); 418 getDisplayScript(Locale targetLocale, Locale locale)419 public static String getDisplayScript(Locale targetLocale, Locale locale) { 420 return getDisplayScriptNative(targetLocale.toLanguageTag(), locale.toLanguageTag()); 421 } 422 getDisplayScriptNative(String targetLanguageTag, String languageTag)423 private static native String getDisplayScriptNative(String targetLanguageTag, String languageTag); 424 getISO3Country(String languageTag)425 public static native String getISO3Country(String languageTag); 426 getISO3Language(String languageTag)427 public static native String getISO3Language(String languageTag); 428 addLikelySubtags(Locale locale)429 public static Locale addLikelySubtags(Locale locale) { 430 return Locale.forLanguageTag(addLikelySubtags(locale.toLanguageTag()).replace('_', '-')); 431 } 432 433 /** 434 * @deprecated use {@link #addLikelySubtags(java.util.Locale)} instead. 435 */ 436 @Deprecated addLikelySubtags(String locale)437 public static native String addLikelySubtags(String locale); 438 439 /** 440 * @deprecated use {@link java.util.Locale#getScript()} instead. This has been kept 441 * around only for the support library. 442 */ 443 @Deprecated getScript(String locale)444 public static native String getScript(String locale); 445 getISOLanguagesNative()446 private static native String[] getISOLanguagesNative(); getISOCountriesNative()447 private static native String[] getISOCountriesNative(); 448 initLocaleDataNative(String languageTag, LocaleData result)449 static native boolean initLocaleDataNative(String languageTag, LocaleData result); 450 451 /** 452 * Takes a BCP-47 language tag (Locale.toLanguageTag()). e.g. en-US, not en_US 453 */ setDefaultLocale(String languageTag)454 public static native void setDefaultLocale(String languageTag); 455 456 /** 457 * Returns a locale name, not a BCP-47 language tag. e.g. en_US not en-US. 458 */ getDefaultLocale()459 public static native String getDefaultLocale(); 460 } 461