1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package libcore.icu;
18 
19 import dalvik.annotation.compat.UnsupportedAppUsage;
20 import java.util.Collections;
21 import java.util.HashMap;
22 import java.util.HashSet;
23 import java.util.LinkedHashSet;
24 import java.util.Locale;
25 import java.util.Map;
26 import java.util.Map.Entry;
27 import java.util.Set;
28 import libcore.util.BasicLruCache;
29 
30 /**
31  * Makes ICU data accessible to Java.
32  * @hide
33  */
34 @libcore.api.CorePlatformApi
35 public final class ICU {
36 
37   @UnsupportedAppUsage
38   private static final BasicLruCache<String, String> CACHED_PATTERNS =
39       new BasicLruCache<String, String>(8);
40 
41   private static Locale[] availableLocalesCache;
42 
43   private static String[] isoCountries;
44 
45   private static String[] isoLanguages;
46 
ICU()47   private ICU() {
48   }
49 
50   /**
51    * Returns an array of two-letter ISO 639-1 language codes, either from ICU or our cache.
52    */
getISOLanguages()53   public static String[] getISOLanguages() {
54     if (isoLanguages == null) {
55       isoLanguages = getISOLanguagesNative();
56     }
57     return isoLanguages.clone();
58   }
59 
60   /**
61    * Returns an array of two-letter ISO 3166 country codes, either from ICU or our cache.
62    */
getISOCountries()63   public static String[] getISOCountries() {
64     if (isoCountries == null) {
65       isoCountries = getISOCountriesNative();
66     }
67     return isoCountries.clone();
68   }
69 
70   private static final int IDX_LANGUAGE = 0;
71   private static final int IDX_SCRIPT = 1;
72   private static final int IDX_REGION = 2;
73   private static final int IDX_VARIANT = 3;
74 
75   /*
76    * Parse the {Language, Script, Region, Variant*} section of the ICU locale
77    * ID. This is the bit that appears before the keyword separate "@". The general
78    * structure is a series of ASCII alphanumeric strings (subtags)
79    * separated by underscores.
80    *
81    * Each subtag is interpreted according to its position in the list of subtags
82    * AND its length (groan...). The various cases are explained in comments
83    * below.
84    */
parseLangScriptRegionAndVariants(String string, String[] outputArray)85   private static void parseLangScriptRegionAndVariants(String string,
86           String[] outputArray) {
87     final int first = string.indexOf('_');
88     final int second = string.indexOf('_', first + 1);
89     final int third = string.indexOf('_', second + 1);
90 
91     if (first == -1) {
92       outputArray[IDX_LANGUAGE] = string;
93     } else if (second == -1) {
94       // Language and country ("ja_JP") OR
95       // Language and script ("en_Latn") OR
96       // Language and variant ("en_POSIX").
97 
98       outputArray[IDX_LANGUAGE] = string.substring(0, first);
99       final String secondString = string.substring(first + 1);
100 
101       if (secondString.length() == 4) {
102           // 4 Letter ISO script code.
103           outputArray[IDX_SCRIPT] = secondString;
104       } else if (secondString.length() == 2 || secondString.length() == 3) {
105           // 2 or 3 Letter region code.
106           outputArray[IDX_REGION] = secondString;
107       } else {
108           // If we're here, the length of the second half is either 1 or greater
109           // than 5. Assume that ICU won't hand us malformed tags, and therefore
110           // assume the rest of the string is a series of variant tags.
111           outputArray[IDX_VARIANT] = secondString;
112       }
113     } else if (third == -1) {
114       // Language and country and variant ("ja_JP_TRADITIONAL") OR
115       // Language and script and variant ("en_Latn_POSIX") OR
116       // Language and script and region ("en_Latn_US"). OR
117       // Language and variant with multiple subtags ("en_POSIX_XISOP")
118 
119       outputArray[IDX_LANGUAGE] = string.substring(0, first);
120       final String secondString = string.substring(first + 1, second);
121       final String thirdString = string.substring(second + 1);
122 
123       if (secondString.length() == 4) {
124           // The second subtag is a script.
125           outputArray[IDX_SCRIPT] = secondString;
126 
127           // The third subtag can be either a region or a variant, depending
128           // on its length.
129           if (thirdString.length() == 2 || thirdString.length() == 3 ||
130                   thirdString.isEmpty()) {
131               outputArray[IDX_REGION] = thirdString;
132           } else {
133               outputArray[IDX_VARIANT] = thirdString;
134           }
135       } else if (secondString.isEmpty() ||
136               secondString.length() == 2 || secondString.length() == 3) {
137           // The second string is a region, and the third a variant.
138           outputArray[IDX_REGION] = secondString;
139           outputArray[IDX_VARIANT] = thirdString;
140       } else {
141           // Variant with multiple subtags.
142           outputArray[IDX_VARIANT] = string.substring(first + 1);
143       }
144     } else {
145       // Language, script, region and variant with 1 or more subtags
146       // ("en_Latn_US_POSIX") OR
147       // Language, region and variant with 2 or more subtags
148       // (en_US_POSIX_VARIANT).
149       outputArray[IDX_LANGUAGE] = string.substring(0, first);
150       final String secondString = string.substring(first + 1, second);
151       if (secondString.length() == 4) {
152           outputArray[IDX_SCRIPT] = secondString;
153           outputArray[IDX_REGION] = string.substring(second + 1, third);
154           outputArray[IDX_VARIANT] = string.substring(third + 1);
155       } else {
156           outputArray[IDX_REGION] = secondString;
157           outputArray[IDX_VARIANT] = string.substring(second + 1);
158       }
159     }
160   }
161 
162   /**
163    * Returns the appropriate {@code Locale} given a {@code String} of the form returned
164    * by {@code toString}. This is very lenient, and doesn't care what's between the underscores:
165    * this method can parse strings that {@code Locale.toString} won't produce.
166    * Used to remove duplication.
167    */
localeFromIcuLocaleId(String localeId)168   public static Locale localeFromIcuLocaleId(String localeId) {
169     // @ == ULOC_KEYWORD_SEPARATOR_UNICODE (uloc.h).
170     final int extensionsIndex = localeId.indexOf('@');
171 
172     Map<Character, String> extensionsMap = Collections.EMPTY_MAP;
173     Map<String, String> unicodeKeywordsMap = Collections.EMPTY_MAP;
174     Set<String> unicodeAttributeSet = Collections.EMPTY_SET;
175 
176     if (extensionsIndex != -1) {
177       extensionsMap = new HashMap<Character, String>();
178       unicodeKeywordsMap = new HashMap<String, String>();
179       unicodeAttributeSet = new HashSet<String>();
180 
181       // ICU sends us a semi-colon (ULOC_KEYWORD_ITEM_SEPARATOR) delimited string
182       // containing all "keywords" it could parse. An ICU keyword is a key-value pair
183       // separated by an "=" (ULOC_KEYWORD_ASSIGN).
184       //
185       // Each keyword item can be one of three things :
186       // - A unicode extension attribute list: In this case the item key is "attribute"
187       //   and the value is a hyphen separated list of unicode attributes.
188       // - A unicode extension keyword: In this case, the item key will be larger than
189       //   1 char in length, and the value will be the unicode extension value.
190       // - A BCP-47 extension subtag: In this case, the item key will be exactly one
191       //   char in length, and the value will be a sequence of unparsed subtags that
192       //   represent the extension.
193       //
194       // Note that this implies that unicode extension keywords are "promoted" to
195       // to the same namespace as the top level extension subtags and their values.
196       // There can't be any collisions in practice because the BCP-47 spec imposes
197       // restrictions on their lengths.
198       final String extensionsString = localeId.substring(extensionsIndex + 1);
199       final String[] extensions = extensionsString.split(";");
200       for (String extension : extensions) {
201         // This is the special key for the unicode attributes
202         if (extension.startsWith("attribute=")) {
203           String unicodeAttributeValues = extension.substring("attribute=".length());
204           for (String unicodeAttribute : unicodeAttributeValues.split("-")) {
205             unicodeAttributeSet.add(unicodeAttribute);
206           }
207         } else {
208           final int separatorIndex = extension.indexOf('=');
209 
210           if (separatorIndex == 1) {
211             // This is a BCP-47 extension subtag.
212             final String value = extension.substring(2);
213             final char extensionId = extension.charAt(0);
214 
215             extensionsMap.put(extensionId, value);
216           } else {
217             // This is a unicode extension keyword.
218             unicodeKeywordsMap.put(extension.substring(0, separatorIndex),
219             extension.substring(separatorIndex + 1));
220           }
221         }
222       }
223     }
224 
225     final String[] outputArray = new String[] { "", "", "", "" };
226     if (extensionsIndex == -1) {
227       parseLangScriptRegionAndVariants(localeId, outputArray);
228     } else {
229       parseLangScriptRegionAndVariants(localeId.substring(0, extensionsIndex),
230           outputArray);
231     }
232     Locale.Builder builder = new Locale.Builder();
233     builder.setLanguage(outputArray[IDX_LANGUAGE]);
234     builder.setRegion(outputArray[IDX_REGION]);
235     builder.setVariant(outputArray[IDX_VARIANT]);
236     builder.setScript(outputArray[IDX_SCRIPT]);
237     for (String attribute : unicodeAttributeSet) {
238       builder.addUnicodeLocaleAttribute(attribute);
239     }
240     for (Entry<String, String> keyword : unicodeKeywordsMap.entrySet()) {
241       builder.setUnicodeLocaleKeyword(keyword.getKey(), keyword.getValue());
242     }
243 
244     for (Entry<Character, String> extension : extensionsMap.entrySet()) {
245       builder.setExtension(extension.getKey(), extension.getValue());
246     }
247 
248     return builder.build();
249   }
250 
localesFromStrings(String[] localeNames)251   public static Locale[] localesFromStrings(String[] localeNames) {
252     // We need to remove duplicates caused by the conversion of "he" to "iw", et cetera.
253     // Java needs the obsolete code, ICU needs the modern code, but we let ICU know about
254     // both so that we never need to convert back when talking to it.
255     LinkedHashSet<Locale> set = new LinkedHashSet<Locale>();
256     for (String localeName : localeNames) {
257       set.add(localeFromIcuLocaleId(localeName));
258     }
259     return set.toArray(new Locale[set.size()]);
260   }
261 
getAvailableLocales()262   public static Locale[] getAvailableLocales() {
263     if (availableLocalesCache == null) {
264       availableLocalesCache = localesFromStrings(getAvailableLocalesNative());
265     }
266     return availableLocalesCache.clone();
267   }
268 
getAvailableBreakIteratorLocales()269   public static Locale[] getAvailableBreakIteratorLocales() {
270     return localesFromStrings(getAvailableBreakIteratorLocalesNative());
271   }
272 
getAvailableCalendarLocales()273   public static Locale[] getAvailableCalendarLocales() {
274     return localesFromStrings(getAvailableCalendarLocalesNative());
275   }
276 
getAvailableCollatorLocales()277   public static Locale[] getAvailableCollatorLocales() {
278     return localesFromStrings(getAvailableCollatorLocalesNative());
279   }
280 
getAvailableDateFormatLocales()281   public static Locale[] getAvailableDateFormatLocales() {
282     return localesFromStrings(getAvailableDateFormatLocalesNative());
283   }
284 
getAvailableDateFormatSymbolsLocales()285   public static Locale[] getAvailableDateFormatSymbolsLocales() {
286     return getAvailableDateFormatLocales();
287   }
288 
getAvailableDecimalFormatSymbolsLocales()289   public static Locale[] getAvailableDecimalFormatSymbolsLocales() {
290     return getAvailableNumberFormatLocales();
291   }
292 
getAvailableNumberFormatLocales()293   public static Locale[] getAvailableNumberFormatLocales() {
294     return localesFromStrings(getAvailableNumberFormatLocalesNative());
295   }
296 
297   @UnsupportedAppUsage
298   @libcore.api.CorePlatformApi
getBestDateTimePattern(String skeleton, Locale locale)299   public static String getBestDateTimePattern(String skeleton, Locale locale) {
300     String languageTag = locale.toLanguageTag();
301     String key = skeleton + "\t" + languageTag;
302     synchronized (CACHED_PATTERNS) {
303       String pattern = CACHED_PATTERNS.get(key);
304       if (pattern == null) {
305         pattern = getBestDateTimePatternNative(skeleton, languageTag);
306         CACHED_PATTERNS.put(key, pattern);
307       }
308       return pattern;
309     }
310   }
311 
312   @UnsupportedAppUsage
getBestDateTimePatternNative(String skeleton, String languageTag)313   private static native String getBestDateTimePatternNative(String skeleton, String languageTag);
314 
315   @UnsupportedAppUsage
316   @libcore.api.CorePlatformApi
getDateFormatOrder(String pattern)317   public static char[] getDateFormatOrder(String pattern) {
318     char[] result = new char[3];
319     int resultIndex = 0;
320     boolean sawDay = false;
321     boolean sawMonth = false;
322     boolean sawYear = false;
323 
324     for (int i = 0; i < pattern.length(); ++i) {
325       char ch = pattern.charAt(i);
326       if (ch == 'd' || ch == 'L' || ch == 'M' || ch == 'y') {
327         if (ch == 'd' && !sawDay) {
328           result[resultIndex++] = 'd';
329           sawDay = true;
330         } else if ((ch == 'L' || ch == 'M') && !sawMonth) {
331           result[resultIndex++] = 'M';
332           sawMonth = true;
333         } else if ((ch == 'y') && !sawYear) {
334           result[resultIndex++] = 'y';
335           sawYear = true;
336         }
337       } else if (ch == 'G') {
338         // Ignore the era specifier, if present.
339       } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
340         throw new IllegalArgumentException("Bad pattern character '" + ch + "' in " + pattern);
341       } else if (ch == '\'') {
342         if (i < pattern.length() - 1 && pattern.charAt(i + 1) == '\'') {
343           ++i;
344         } else {
345           i = pattern.indexOf('\'', i + 1);
346           if (i == -1) {
347             throw new IllegalArgumentException("Bad quoting in " + pattern);
348           }
349           ++i;
350         }
351       } else {
352         // Ignore spaces and punctuation.
353       }
354     }
355     return result;
356   }
357 
358   /**
359    * Returns the version of the CLDR data in use, such as "22.1.1".
360    */
getCldrVersion()361   public static native String getCldrVersion();
362 
363   /**
364    * Returns the icu4c version in use, such as "50.1.1".
365    */
getIcuVersion()366   public static native String getIcuVersion();
367 
368   /**
369    * Returns the Unicode version our ICU supports, such as "6.2".
370    */
getUnicodeVersion()371   public static native String getUnicodeVersion();
372 
373   // --- Case mapping.
374 
toLowerCase(String s, Locale locale)375   public static String toLowerCase(String s, Locale locale) {
376     return toLowerCase(s, locale.toLanguageTag());
377   }
378 
toLowerCase(String s, String languageTag)379   private static native String toLowerCase(String s, String languageTag);
380 
toUpperCase(String s, Locale locale)381   public static String toUpperCase(String s, Locale locale) {
382     return toUpperCase(s, locale.toLanguageTag());
383   }
384 
toUpperCase(String s, String languageTag)385   private static native String toUpperCase(String s, String languageTag);
386 
387   // --- Errors.
388 
389   // Just the subset of error codes needed by CharsetDecoderICU/CharsetEncoderICU.
390   public static final int U_ZERO_ERROR = 0;
391   public static final int U_INVALID_CHAR_FOUND = 10;
392   public static final int U_TRUNCATED_CHAR_FOUND = 11;
393   public static final int U_ILLEGAL_CHAR_FOUND = 12;
394   public static final int U_BUFFER_OVERFLOW_ERROR = 15;
395 
U_FAILURE(int error)396   public static boolean U_FAILURE(int error) {
397     return error > U_ZERO_ERROR;
398   }
399 
400   // --- Native methods accessing ICU's database.
401 
getAvailableBreakIteratorLocalesNative()402   private static native String[] getAvailableBreakIteratorLocalesNative();
getAvailableCalendarLocalesNative()403   private static native String[] getAvailableCalendarLocalesNative();
getAvailableCollatorLocalesNative()404   private static native String[] getAvailableCollatorLocalesNative();
getAvailableDateFormatLocalesNative()405   private static native String[] getAvailableDateFormatLocalesNative();
getAvailableLocalesNative()406   private static native String[] getAvailableLocalesNative();
getAvailableNumberFormatLocalesNative()407   private static native String[] getAvailableNumberFormatLocalesNative();
408 
getAvailableCurrencyCodes()409   public static native String[] getAvailableCurrencyCodes();
getCurrencyCode(String countryCode)410   public static native String getCurrencyCode(String countryCode);
411 
getCurrencyDisplayName(Locale locale, String currencyCode)412   public static String getCurrencyDisplayName(Locale locale, String currencyCode) {
413     return getCurrencyDisplayName(locale.toLanguageTag(), currencyCode);
414   }
415 
getCurrencyDisplayName(String languageTag, String currencyCode)416   private static native String getCurrencyDisplayName(String languageTag, String currencyCode);
417 
getCurrencyFractionDigits(String currencyCode)418   public static native int getCurrencyFractionDigits(String currencyCode);
getCurrencyNumericCode(String currencyCode)419   public static native int getCurrencyNumericCode(String currencyCode);
420 
getCurrencySymbol(Locale locale, String currencyCode)421   public static String getCurrencySymbol(Locale locale, String currencyCode) {
422     return getCurrencySymbol(locale.toLanguageTag(), currencyCode);
423   }
424 
getCurrencySymbol(String languageTag, String currencyCode)425   private static native String getCurrencySymbol(String languageTag, String currencyCode);
426 
getDisplayCountry(Locale targetLocale, Locale locale)427   public static String getDisplayCountry(Locale targetLocale, Locale locale) {
428     return getDisplayCountryNative(targetLocale.toLanguageTag(), locale.toLanguageTag());
429   }
430 
getDisplayCountryNative(String targetLanguageTag, String languageTag)431   private static native String getDisplayCountryNative(String targetLanguageTag, String languageTag);
432 
getDisplayLanguage(Locale targetLocale, Locale locale)433   public static String getDisplayLanguage(Locale targetLocale, Locale locale) {
434     return getDisplayLanguageNative(targetLocale.toLanguageTag(), locale.toLanguageTag());
435   }
436 
getDisplayLanguageNative(String targetLanguageTag, String languageTag)437   private static native String getDisplayLanguageNative(String targetLanguageTag, String languageTag);
438 
getDisplayVariant(Locale targetLocale, Locale locale)439   public static String getDisplayVariant(Locale targetLocale, Locale locale) {
440     return getDisplayVariantNative(targetLocale.toLanguageTag(), locale.toLanguageTag());
441   }
442 
getDisplayVariantNative(String targetLanguageTag, String languageTag)443   private static native String getDisplayVariantNative(String targetLanguageTag, String languageTag);
444 
getDisplayScript(Locale targetLocale, Locale locale)445   public static String getDisplayScript(Locale targetLocale, Locale locale) {
446     return getDisplayScriptNative(targetLocale.toLanguageTag(), locale.toLanguageTag());
447   }
448 
getDisplayScriptNative(String targetLanguageTag, String languageTag)449   private static native String getDisplayScriptNative(String targetLanguageTag, String languageTag);
450 
getISO3Country(String languageTag)451   public static native String getISO3Country(String languageTag);
452 
getISO3Language(String languageTag)453   public static native String getISO3Language(String languageTag);
454 
455   @UnsupportedAppUsage
456   @libcore.api.CorePlatformApi
addLikelySubtags(Locale locale)457   public static Locale addLikelySubtags(Locale locale) {
458       return Locale.forLanguageTag(addLikelySubtags(locale.toLanguageTag()).replace('_', '-'));
459   }
460 
461   /**
462    * @deprecated use {@link #addLikelySubtags(java.util.Locale)} instead.
463    */
464   @UnsupportedAppUsage
465   @Deprecated
addLikelySubtags(String locale)466   public static native String addLikelySubtags(String locale);
467 
468   /**
469    * @deprecated use {@link java.util.Locale#getScript()} instead. This has been kept
470    *     around only for the support library.
471    */
472   @UnsupportedAppUsage
473   @Deprecated
getScript(String locale)474   public static native String getScript(String locale);
475 
getISOLanguagesNative()476   private static native String[] getISOLanguagesNative();
getISOCountriesNative()477   private static native String[] getISOCountriesNative();
478 
initLocaleDataNative(String languageTag, LocaleData result)479   static native boolean initLocaleDataNative(String languageTag, LocaleData result);
480 
481   /**
482    * Takes a BCP-47 language tag (Locale.toLanguageTag()). e.g. en-US, not en_US
483    */
setDefaultLocale(String languageTag)484   public static native void setDefaultLocale(String languageTag);
485 
486   /**
487    * Returns a locale name, not a BCP-47 language tag. e.g. en_US not en-US.
488    */
getDefaultLocale()489   public static native String getDefaultLocale();
490 
491   /** Returns the TZData version as reported by ICU4C. */
492   @libcore.api.CorePlatformApi
getTZDataVersion()493   public static native String getTZDataVersion();
494 }
495