1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package libcore.icu;
18 
19 import java.util.Collections;
20 import java.util.HashMap;
21 import java.util.HashSet;
22 import java.util.LinkedHashSet;
23 import java.util.Locale;
24 import java.util.Map;
25 import java.util.Map.Entry;
26 import java.util.Set;
27 import libcore.util.BasicLruCache;
28 
29 /**
30  * Makes ICU data accessible to Java.
31  */
32 public final class ICU {
33   private static final BasicLruCache<String, String> CACHED_PATTERNS =
34       new BasicLruCache<String, String>(8);
35 
36   private static Locale[] availableLocalesCache;
37 
38   private static String[] isoCountries;
39 
40   private static String[] isoLanguages;
41 
42   /**
43    * Returns an array of two-letter ISO 639-1 language codes, either from ICU or our cache.
44    */
getISOLanguages()45   public static String[] getISOLanguages() {
46     if (isoLanguages == null) {
47       isoLanguages = getISOLanguagesNative();
48     }
49     return isoLanguages.clone();
50   }
51 
52   /**
53    * Returns an array of two-letter ISO 3166 country codes, either from ICU or our cache.
54    */
getISOCountries()55   public static String[] getISOCountries() {
56     if (isoCountries == null) {
57       isoCountries = getISOCountriesNative();
58     }
59     return isoCountries.clone();
60   }
61 
62   private static final int IDX_LANGUAGE = 0;
63   private static final int IDX_SCRIPT = 1;
64   private static final int IDX_REGION = 2;
65   private static final int IDX_VARIANT = 3;
66 
67   /*
68    * Parse the {Language, Script, Region, Variant*} section of the ICU locale
69    * ID. This is the bit that appears before the keyword separate "@". The general
70    * structure is a series of ASCII alphanumeric strings (subtags)
71    * separated by underscores.
72    *
73    * Each subtag is interpreted according to its position in the list of subtags
74    * AND its length (groan...). The various cases are explained in comments
75    * below.
76    */
parseLangScriptRegionAndVariants(String string, String[] outputArray)77   private static void parseLangScriptRegionAndVariants(String string,
78           String[] outputArray) {
79     final int first = string.indexOf('_');
80     final int second = string.indexOf('_', first + 1);
81     final int third = string.indexOf('_', second + 1);
82 
83     if (first == -1) {
84       outputArray[IDX_LANGUAGE] = string;
85     } else if (second == -1) {
86       // Language and country ("ja_JP") OR
87       // Language and script ("en_Latn") OR
88       // Language and variant ("en_POSIX").
89 
90       outputArray[IDX_LANGUAGE] = string.substring(0, first);
91       final String secondString = string.substring(first + 1);
92 
93       if (secondString.length() == 4) {
94           // 4 Letter ISO script code.
95           outputArray[IDX_SCRIPT] = secondString;
96       } else if (secondString.length() == 2 || secondString.length() == 3) {
97           // 2 or 3 Letter region code.
98           outputArray[IDX_REGION] = secondString;
99       } else {
100           // If we're here, the length of the second half is either 1 or greater
101           // than 5. Assume that ICU won't hand us malformed tags, and therefore
102           // assume the rest of the string is a series of variant tags.
103           outputArray[IDX_VARIANT] = secondString;
104       }
105     } else if (third == -1) {
106       // Language and country and variant ("ja_JP_TRADITIONAL") OR
107       // Language and script and variant ("en_Latn_POSIX") OR
108       // Language and script and region ("en_Latn_US"). OR
109       // Language and variant with multiple subtags ("en_POSIX_XISOP")
110 
111       outputArray[IDX_LANGUAGE] = string.substring(0, first);
112       final String secondString = string.substring(first + 1, second);
113       final String thirdString = string.substring(second + 1);
114 
115       if (secondString.length() == 4) {
116           // The second subtag is a script.
117           outputArray[IDX_SCRIPT] = secondString;
118 
119           // The third subtag can be either a region or a variant, depending
120           // on its length.
121           if (thirdString.length() == 2 || thirdString.length() == 3 ||
122                   thirdString.isEmpty()) {
123               outputArray[IDX_REGION] = thirdString;
124           } else {
125               outputArray[IDX_VARIANT] = thirdString;
126           }
127       } else if (secondString.isEmpty() ||
128               secondString.length() == 2 || secondString.length() == 3) {
129           // The second string is a region, and the third a variant.
130           outputArray[IDX_REGION] = secondString;
131           outputArray[IDX_VARIANT] = thirdString;
132       } else {
133           // Variant with multiple subtags.
134           outputArray[IDX_VARIANT] = string.substring(first + 1);
135       }
136     } else {
137       // Language, script, region and variant with 1 or more subtags
138       // ("en_Latn_US_POSIX") OR
139       // Language, region and variant with 2 or more subtags
140       // (en_US_POSIX_VARIANT).
141       outputArray[IDX_LANGUAGE] = string.substring(0, first);
142       final String secondString = string.substring(first + 1, second);
143       if (secondString.length() == 4) {
144           outputArray[IDX_SCRIPT] = secondString;
145           outputArray[IDX_REGION] = string.substring(second + 1, third);
146           outputArray[IDX_VARIANT] = string.substring(third + 1);
147       } else {
148           outputArray[IDX_REGION] = secondString;
149           outputArray[IDX_VARIANT] = string.substring(second + 1);
150       }
151     }
152   }
153 
154   /**
155    * Returns the appropriate {@code Locale} given a {@code String} of the form returned
156    * by {@code toString}. This is very lenient, and doesn't care what's between the underscores:
157    * this method can parse strings that {@code Locale.toString} won't produce.
158    * Used to remove duplication.
159    */
localeFromIcuLocaleId(String localeId)160   public static Locale localeFromIcuLocaleId(String localeId) {
161     // @ == ULOC_KEYWORD_SEPARATOR_UNICODE (uloc.h).
162     final int extensionsIndex = localeId.indexOf('@');
163 
164     Map<Character, String> extensionsMap = Collections.EMPTY_MAP;
165     Map<String, String> unicodeKeywordsMap = Collections.EMPTY_MAP;
166     Set<String> unicodeAttributeSet = Collections.EMPTY_SET;
167 
168     if (extensionsIndex != -1) {
169       extensionsMap = new HashMap<Character, String>();
170       unicodeKeywordsMap = new HashMap<String, String>();
171       unicodeAttributeSet = new HashSet<String>();
172 
173       // ICU sends us a semi-colon (ULOC_KEYWORD_ITEM_SEPARATOR) delimited string
174       // containing all "keywords" it could parse. An ICU keyword is a key-value pair
175       // separated by an "=" (ULOC_KEYWORD_ASSIGN).
176       //
177       // Each keyword item can be one of three things :
178       // - A unicode extension attribute list: In this case the item key is "attribute"
179       //   and the value is a hyphen separated list of unicode attributes.
180       // - A unicode extension keyword: In this case, the item key will be larger than
181       //   1 char in length, and the value will be the unicode extension value.
182       // - A BCP-47 extension subtag: In this case, the item key will be exactly one
183       //   char in length, and the value will be a sequence of unparsed subtags that
184       //   represent the extension.
185       //
186       // Note that this implies that unicode extension keywords are "promoted" to
187       // to the same namespace as the top level extension subtags and their values.
188       // There can't be any collisions in practice because the BCP-47 spec imposes
189       // restrictions on their lengths.
190       final String extensionsString = localeId.substring(extensionsIndex + 1);
191       final String[] extensions = extensionsString.split(";");
192       for (String extension : extensions) {
193         // This is the special key for the unicode attributes
194         if (extension.startsWith("attribute=")) {
195           String unicodeAttributeValues = extension.substring("attribute=".length());
196           for (String unicodeAttribute : unicodeAttributeValues.split("-")) {
197             unicodeAttributeSet.add(unicodeAttribute);
198           }
199         } else {
200           final int separatorIndex = extension.indexOf('=');
201 
202           if (separatorIndex == 1) {
203             // This is a BCP-47 extension subtag.
204             final String value = extension.substring(2);
205             final char extensionId = extension.charAt(0);
206 
207             extensionsMap.put(extensionId, value);
208           } else {
209             // This is a unicode extension keyword.
210             unicodeKeywordsMap.put(extension.substring(0, separatorIndex),
211             extension.substring(separatorIndex + 1));
212           }
213         }
214       }
215     }
216 
217     final String[] outputArray = new String[] { "", "", "", "" };
218     if (extensionsIndex == -1) {
219       parseLangScriptRegionAndVariants(localeId, outputArray);
220     } else {
221       parseLangScriptRegionAndVariants(localeId.substring(0, extensionsIndex),
222           outputArray);
223     }
224     Locale.Builder builder = new Locale.Builder();
225     builder.setLanguage(outputArray[IDX_LANGUAGE]);
226     builder.setRegion(outputArray[IDX_REGION]);
227     builder.setVariant(outputArray[IDX_VARIANT]);
228     builder.setScript(outputArray[IDX_SCRIPT]);
229     for (String attribute : unicodeAttributeSet) {
230       builder.addUnicodeLocaleAttribute(attribute);
231     }
232     for (Entry<String, String> keyword : unicodeKeywordsMap.entrySet()) {
233       builder.setUnicodeLocaleKeyword(keyword.getKey(), keyword.getValue());
234     }
235 
236     for (Entry<Character, String> extension : extensionsMap.entrySet()) {
237       builder.setExtension(extension.getKey(), extension.getValue());
238     }
239 
240     return builder.build();
241   }
242 
localesFromStrings(String[] localeNames)243   public static Locale[] localesFromStrings(String[] localeNames) {
244     // We need to remove duplicates caused by the conversion of "he" to "iw", et cetera.
245     // Java needs the obsolete code, ICU needs the modern code, but we let ICU know about
246     // both so that we never need to convert back when talking to it.
247     LinkedHashSet<Locale> set = new LinkedHashSet<Locale>();
248     for (String localeName : localeNames) {
249       set.add(localeFromIcuLocaleId(localeName));
250     }
251     return set.toArray(new Locale[set.size()]);
252   }
253 
getAvailableLocales()254   public static Locale[] getAvailableLocales() {
255     if (availableLocalesCache == null) {
256       availableLocalesCache = localesFromStrings(getAvailableLocalesNative());
257     }
258     return availableLocalesCache.clone();
259   }
260 
getAvailableBreakIteratorLocales()261   public static Locale[] getAvailableBreakIteratorLocales() {
262     return localesFromStrings(getAvailableBreakIteratorLocalesNative());
263   }
264 
getAvailableCalendarLocales()265   public static Locale[] getAvailableCalendarLocales() {
266     return localesFromStrings(getAvailableCalendarLocalesNative());
267   }
268 
getAvailableCollatorLocales()269   public static Locale[] getAvailableCollatorLocales() {
270     return localesFromStrings(getAvailableCollatorLocalesNative());
271   }
272 
getAvailableDateFormatLocales()273   public static Locale[] getAvailableDateFormatLocales() {
274     return localesFromStrings(getAvailableDateFormatLocalesNative());
275   }
276 
getAvailableDateFormatSymbolsLocales()277   public static Locale[] getAvailableDateFormatSymbolsLocales() {
278     return getAvailableDateFormatLocales();
279   }
280 
getAvailableDecimalFormatSymbolsLocales()281   public static Locale[] getAvailableDecimalFormatSymbolsLocales() {
282     return getAvailableNumberFormatLocales();
283   }
284 
getAvailableNumberFormatLocales()285   public static Locale[] getAvailableNumberFormatLocales() {
286     return localesFromStrings(getAvailableNumberFormatLocalesNative());
287   }
288 
getBestDateTimePattern(String skeleton, Locale locale)289   public static String getBestDateTimePattern(String skeleton, Locale locale) {
290     String languageTag = locale.toLanguageTag();
291     String key = skeleton + "\t" + languageTag;
292     synchronized (CACHED_PATTERNS) {
293       String pattern = CACHED_PATTERNS.get(key);
294       if (pattern == null) {
295         pattern = getBestDateTimePatternNative(skeleton, languageTag);
296         CACHED_PATTERNS.put(key, pattern);
297       }
298       return pattern;
299     }
300   }
301 
getBestDateTimePatternNative(String skeleton, String languageTag)302   private static native String getBestDateTimePatternNative(String skeleton, String languageTag);
303 
getDateFormatOrder(String pattern)304   public static char[] getDateFormatOrder(String pattern) {
305     char[] result = new char[3];
306     int resultIndex = 0;
307     boolean sawDay = false;
308     boolean sawMonth = false;
309     boolean sawYear = false;
310 
311     for (int i = 0; i < pattern.length(); ++i) {
312       char ch = pattern.charAt(i);
313       if (ch == 'd' || ch == 'L' || ch == 'M' || ch == 'y') {
314         if (ch == 'd' && !sawDay) {
315           result[resultIndex++] = 'd';
316           sawDay = true;
317         } else if ((ch == 'L' || ch == 'M') && !sawMonth) {
318           result[resultIndex++] = 'M';
319           sawMonth = true;
320         } else if ((ch == 'y') && !sawYear) {
321           result[resultIndex++] = 'y';
322           sawYear = true;
323         }
324       } else if (ch == 'G') {
325         // Ignore the era specifier, if present.
326       } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
327         throw new IllegalArgumentException("Bad pattern character '" + ch + "' in " + pattern);
328       } else if (ch == '\'') {
329         if (i < pattern.length() - 1 && pattern.charAt(i + 1) == '\'') {
330           ++i;
331         } else {
332           i = pattern.indexOf('\'', i + 1);
333           if (i == -1) {
334             throw new IllegalArgumentException("Bad quoting in " + pattern);
335           }
336           ++i;
337         }
338       } else {
339         // Ignore spaces and punctuation.
340       }
341     }
342     return result;
343   }
344 
345   /**
346    * Returns the version of the CLDR data in use, such as "22.1.1".
347    */
getCldrVersion()348   public static native String getCldrVersion();
349 
350   /**
351    * Returns the icu4c version in use, such as "50.1.1".
352    */
getIcuVersion()353   public static native String getIcuVersion();
354 
355   /**
356    * Returns the Unicode version our ICU supports, such as "6.2".
357    */
getUnicodeVersion()358   public static native String getUnicodeVersion();
359 
360   // --- Case mapping.
361 
toLowerCase(String s, Locale locale)362   public static String toLowerCase(String s, Locale locale) {
363     return toLowerCase(s, locale.toLanguageTag());
364   }
365 
toLowerCase(String s, String languageTag)366   private static native String toLowerCase(String s, String languageTag);
367 
toUpperCase(String s, Locale locale)368   public static String toUpperCase(String s, Locale locale) {
369     return toUpperCase(s, locale.toLanguageTag());
370   }
371 
toUpperCase(String s, String languageTag)372   private static native String toUpperCase(String s, String languageTag);
373 
374   // --- Errors.
375 
376   // Just the subset of error codes needed by CharsetDecoderICU/CharsetEncoderICU.
377   public static final int U_ZERO_ERROR = 0;
378   public static final int U_INVALID_CHAR_FOUND = 10;
379   public static final int U_TRUNCATED_CHAR_FOUND = 11;
380   public static final int U_ILLEGAL_CHAR_FOUND = 12;
381   public static final int U_BUFFER_OVERFLOW_ERROR = 15;
382 
U_FAILURE(int error)383   public static boolean U_FAILURE(int error) {
384     return error > U_ZERO_ERROR;
385   }
386 
387   // --- Native methods accessing ICU's database.
388 
getAvailableBreakIteratorLocalesNative()389   private static native String[] getAvailableBreakIteratorLocalesNative();
getAvailableCalendarLocalesNative()390   private static native String[] getAvailableCalendarLocalesNative();
getAvailableCollatorLocalesNative()391   private static native String[] getAvailableCollatorLocalesNative();
getAvailableDateFormatLocalesNative()392   private static native String[] getAvailableDateFormatLocalesNative();
getAvailableLocalesNative()393   private static native String[] getAvailableLocalesNative();
getAvailableNumberFormatLocalesNative()394   private static native String[] getAvailableNumberFormatLocalesNative();
395 
getAvailableCurrencyCodes()396   public static native String[] getAvailableCurrencyCodes();
getCurrencyCode(String countryCode)397   public static native String getCurrencyCode(String countryCode);
398 
getCurrencyDisplayName(Locale locale, String currencyCode)399   public static String getCurrencyDisplayName(Locale locale, String currencyCode) {
400     return getCurrencyDisplayName(locale.toLanguageTag(), currencyCode);
401   }
402 
getCurrencyDisplayName(String languageTag, String currencyCode)403   private static native String getCurrencyDisplayName(String languageTag, String currencyCode);
404 
getCurrencyFractionDigits(String currencyCode)405   public static native int getCurrencyFractionDigits(String currencyCode);
getCurrencyNumericCode(String currencyCode)406   public static native int getCurrencyNumericCode(String currencyCode);
407 
getCurrencySymbol(Locale locale, String currencyCode)408   public static String getCurrencySymbol(Locale locale, String currencyCode) {
409     return getCurrencySymbol(locale.toLanguageTag(), currencyCode);
410   }
411 
getCurrencySymbol(String languageTag, String currencyCode)412   private static native String getCurrencySymbol(String languageTag, String currencyCode);
413 
getDisplayCountry(Locale targetLocale, Locale locale)414   public static String getDisplayCountry(Locale targetLocale, Locale locale) {
415     return getDisplayCountryNative(targetLocale.toLanguageTag(), locale.toLanguageTag());
416   }
417 
getDisplayCountryNative(String targetLanguageTag, String languageTag)418   private static native String getDisplayCountryNative(String targetLanguageTag, String languageTag);
419 
getDisplayLanguage(Locale targetLocale, Locale locale)420   public static String getDisplayLanguage(Locale targetLocale, Locale locale) {
421     return getDisplayLanguageNative(targetLocale.toLanguageTag(), locale.toLanguageTag());
422   }
423 
getDisplayLanguageNative(String targetLanguageTag, String languageTag)424   private static native String getDisplayLanguageNative(String targetLanguageTag, String languageTag);
425 
getDisplayVariant(Locale targetLocale, Locale locale)426   public static String getDisplayVariant(Locale targetLocale, Locale locale) {
427     return getDisplayVariantNative(targetLocale.toLanguageTag(), locale.toLanguageTag());
428   }
429 
getDisplayVariantNative(String targetLanguageTag, String languageTag)430   private static native String getDisplayVariantNative(String targetLanguageTag, String languageTag);
431 
getDisplayScript(Locale targetLocale, Locale locale)432   public static String getDisplayScript(Locale targetLocale, Locale locale) {
433     return getDisplayScriptNative(targetLocale.toLanguageTag(), locale.toLanguageTag());
434   }
435 
getDisplayScriptNative(String targetLanguageTag, String languageTag)436   private static native String getDisplayScriptNative(String targetLanguageTag, String languageTag);
437 
getISO3Country(String languageTag)438   public static native String getISO3Country(String languageTag);
439 
getISO3Language(String languageTag)440   public static native String getISO3Language(String languageTag);
441 
addLikelySubtags(Locale locale)442   public static Locale addLikelySubtags(Locale locale) {
443       return Locale.forLanguageTag(addLikelySubtags(locale.toLanguageTag()).replace('_', '-'));
444   }
445 
446   /**
447    * @deprecated use {@link #addLikelySubtags(java.util.Locale)} instead.
448    */
449   @Deprecated
addLikelySubtags(String locale)450   public static native String addLikelySubtags(String locale);
451 
452   /**
453    * @deprecated use {@link java.util.Locale#getScript()} instead. This has been kept
454    *     around only for the support library.
455    */
456   @Deprecated
getScript(String locale)457   public static native String getScript(String locale);
458 
getISOLanguagesNative()459   private static native String[] getISOLanguagesNative();
getISOCountriesNative()460   private static native String[] getISOCountriesNative();
461 
initLocaleDataNative(String languageTag, LocaleData result)462   static native boolean initLocaleDataNative(String languageTag, LocaleData result);
463 
464   /**
465    * Takes a BCP-47 language tag (Locale.toLanguageTag()). e.g. en-US, not en_US
466    */
setDefaultLocale(String languageTag)467   public static native void setDefaultLocale(String languageTag);
468 
469   /**
470    * Returns a locale name, not a BCP-47 language tag. e.g. en_US not en-US.
471    */
getDefaultLocale()472   public static native String getDefaultLocale();
473 
474   /** Returns the TZData version as reported by ICU4C. */
getTZDataVersion()475   public static native String getTZDataVersion();
476 }
477