1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package libcore.icu;
18 
19 import java.util.Collections;
20 import java.util.HashMap;
21 import java.util.HashSet;
22 import java.util.LinkedHashSet;
23 import java.util.Locale;
24 import java.util.Map;
25 import java.util.Set;
26 import libcore.util.BasicLruCache;
27 
28 /**
29  * Makes ICU data accessible to Java.
30  */
31 public final class ICU {
32   private static final BasicLruCache<String, String> CACHED_PATTERNS =
33       new BasicLruCache<String, String>(8);
34 
35   private static Locale[] availableLocalesCache;
36 
37   private static String[] isoCountries;
38 
39   private static String[] isoLanguages;
40 
41   /**
42    * Returns an array of two-letter ISO 639-1 language codes, either from ICU or our cache.
43    */
getISOLanguages()44   public static String[] getISOLanguages() {
45     if (isoLanguages == null) {
46       isoLanguages = getISOLanguagesNative();
47     }
48     return isoLanguages.clone();
49   }
50 
51   /**
52    * Returns an array of two-letter ISO 3166 country codes, either from ICU or our cache.
53    */
getISOCountries()54   public static String[] getISOCountries() {
55     if (isoCountries == null) {
56       isoCountries = getISOCountriesNative();
57     }
58     return isoCountries.clone();
59   }
60 
61   private static final int IDX_LANGUAGE = 0;
62   private static final int IDX_SCRIPT = 1;
63   private static final int IDX_REGION = 2;
64   private static final int IDX_VARIANT = 3;
65 
66   /*
67    * Parse the {Language, Script, Region, Variant*} section of the ICU locale
68    * ID. This is the bit that appears before the keyword separate "@". The general
69    * structure is a series of ASCII alphanumeric strings (subtags)
70    * separated by underscores.
71    *
72    * Each subtag is interpreted according to its position in the list of subtags
73    * AND its length (groan...). The various cases are explained in comments
74    * below.
75    */
parseLangScriptRegionAndVariants(String string, String[] outputArray)76   private static void parseLangScriptRegionAndVariants(String string,
77           String[] outputArray) {
78     final int first = string.indexOf('_');
79     final int second = string.indexOf('_', first + 1);
80     final int third = string.indexOf('_', second + 1);
81 
82     if (first == -1) {
83       outputArray[IDX_LANGUAGE] = string;
84     } else if (second == -1) {
85       // Language and country ("ja_JP") OR
86       // Language and script ("en_Latn") OR
87       // Language and variant ("en_POSIX").
88 
89       outputArray[IDX_LANGUAGE] = string.substring(0, first);
90       final String secondString = string.substring(first + 1);
91 
92       if (secondString.length() == 4) {
93           // 4 Letter ISO script code.
94           outputArray[IDX_SCRIPT] = secondString;
95       } else if (secondString.length() == 2 || secondString.length() == 3) {
96           // 2 or 3 Letter region code.
97           outputArray[IDX_REGION] = secondString;
98       } else {
99           // If we're here, the length of the second half is either 1 or greater
100           // than 5. Assume that ICU won't hand us malformed tags, and therefore
101           // assume the rest of the string is a series of variant tags.
102           outputArray[IDX_VARIANT] = secondString;
103       }
104     } else if (third == -1) {
105       // Language and country and variant ("ja_JP_TRADITIONAL") OR
106       // Language and script and variant ("en_Latn_POSIX") OR
107       // Language and script and region ("en_Latn_US"). OR
108       // Language and variant with multiple subtags ("en_POSIX_XISOP")
109 
110       outputArray[IDX_LANGUAGE] = string.substring(0, first);
111       final String secondString = string.substring(first + 1, second);
112       final String thirdString = string.substring(second + 1);
113 
114       if (secondString.length() == 4) {
115           // The second subtag is a script.
116           outputArray[IDX_SCRIPT] = secondString;
117 
118           // The third subtag can be either a region or a variant, depending
119           // on its length.
120           if (thirdString.length() == 2 || thirdString.length() == 3 ||
121                   thirdString.isEmpty()) {
122               outputArray[IDX_REGION] = thirdString;
123           } else {
124               outputArray[IDX_VARIANT] = thirdString;
125           }
126       } else if (secondString.isEmpty() ||
127               secondString.length() == 2 || secondString.length() == 3) {
128           // The second string is a region, and the third a variant.
129           outputArray[IDX_REGION] = secondString;
130           outputArray[IDX_VARIANT] = thirdString;
131       } else {
132           // Variant with multiple subtags.
133           outputArray[IDX_VARIANT] = string.substring(first + 1);
134       }
135     } else {
136       // Language, script, region and variant with 1 or more subtags
137       // ("en_Latn_US_POSIX") OR
138       // Language, region and variant with 2 or more subtags
139       // (en_US_POSIX_VARIANT).
140       outputArray[IDX_LANGUAGE] = string.substring(0, first);
141       final String secondString = string.substring(first + 1, second);
142       if (secondString.length() == 4) {
143           outputArray[IDX_SCRIPT] = secondString;
144           outputArray[IDX_REGION] = string.substring(second + 1, third);
145           outputArray[IDX_VARIANT] = string.substring(third + 1);
146       } else {
147           outputArray[IDX_REGION] = secondString;
148           outputArray[IDX_VARIANT] = string.substring(second + 1);
149       }
150     }
151   }
152 
153   /**
154    * Returns the appropriate {@code Locale} given a {@code String} of the form returned
155    * by {@code toString}. This is very lenient, and doesn't care what's between the underscores:
156    * this method can parse strings that {@code Locale.toString} won't produce.
157    * Used to remove duplication.
158    */
localeFromIcuLocaleId(String localeId)159   public static Locale localeFromIcuLocaleId(String localeId) {
160     // @ == ULOC_KEYWORD_SEPARATOR_UNICODE (uloc.h).
161     final int extensionsIndex = localeId.indexOf('@');
162 
163     Map<Character, String> extensionsMap = Collections.EMPTY_MAP;
164     Map<String, String> unicodeKeywordsMap = Collections.EMPTY_MAP;
165     Set<String> unicodeAttributeSet = Collections.EMPTY_SET;
166 
167     if (extensionsIndex != -1) {
168       extensionsMap = new HashMap<Character, String>();
169       unicodeKeywordsMap = new HashMap<String, String>();
170       unicodeAttributeSet = new HashSet<String>();
171 
172       // ICU sends us a semi-colon (ULOC_KEYWORD_ITEM_SEPARATOR) delimited string
173       // containing all "keywords" it could parse. An ICU keyword is a key-value pair
174       // separated by an "=" (ULOC_KEYWORD_ASSIGN).
175       //
176       // Each keyword item can be one of three things :
177       // - A unicode extension attribute list: In this case the item key is "attribute"
178       //   and the value is a hyphen separated list of unicode attributes.
179       // - A unicode extension keyword: In this case, the item key will be larger than
180       //   1 char in length, and the value will be the unicode extension value.
181       // - A BCP-47 extension subtag: In this case, the item key will be exactly one
182       //   char in length, and the value will be a sequence of unparsed subtags that
183       //   represent the extension.
184       //
185       // Note that this implies that unicode extension keywords are "promoted" to
186       // to the same namespace as the top level extension subtags and their values.
187       // There can't be any collisions in practice because the BCP-47 spec imposes
188       // restrictions on their lengths.
189       final String extensionsString = localeId.substring(extensionsIndex + 1);
190       final String[] extensions = extensionsString.split(";");
191       for (String extension : extensions) {
192         // This is the special key for the unicode attributes
193         if (extension.startsWith("attribute=")) {
194           String unicodeAttributeValues = extension.substring("attribute=".length());
195           for (String unicodeAttribute : unicodeAttributeValues.split("-")) {
196             unicodeAttributeSet.add(unicodeAttribute);
197           }
198         } else {
199           final int separatorIndex = extension.indexOf('=');
200 
201           if (separatorIndex == 1) {
202             // This is a BCP-47 extension subtag.
203             final String value = extension.substring(2);
204             final char extensionId = extension.charAt(0);
205 
206             extensionsMap.put(extensionId, value);
207           } else {
208             // This is a unicode extension keyword.
209             unicodeKeywordsMap.put(extension.substring(0, separatorIndex),
210             extension.substring(separatorIndex + 1));
211           }
212         }
213       }
214     }
215 
216     final String[] outputArray = new String[] { "", "", "", "" };
217     if (extensionsIndex == -1) {
218       parseLangScriptRegionAndVariants(localeId, outputArray);
219     } else {
220       parseLangScriptRegionAndVariants(localeId.substring(0, extensionsIndex),
221           outputArray);
222     }
223 
224     return new Locale(outputArray[IDX_LANGUAGE], outputArray[IDX_REGION],
225         outputArray[IDX_VARIANT], outputArray[IDX_SCRIPT],
226         unicodeAttributeSet, unicodeKeywordsMap, extensionsMap,
227         true /* has validated fields */);
228   }
229 
localesFromStrings(String[] localeNames)230   public static Locale[] localesFromStrings(String[] localeNames) {
231     // We need to remove duplicates caused by the conversion of "he" to "iw", et cetera.
232     // Java needs the obsolete code, ICU needs the modern code, but we let ICU know about
233     // both so that we never need to convert back when talking to it.
234     LinkedHashSet<Locale> set = new LinkedHashSet<Locale>();
235     for (String localeName : localeNames) {
236       set.add(localeFromIcuLocaleId(localeName));
237     }
238     return set.toArray(new Locale[set.size()]);
239   }
240 
getAvailableLocales()241   public static Locale[] getAvailableLocales() {
242     if (availableLocalesCache == null) {
243       availableLocalesCache = localesFromStrings(getAvailableLocalesNative());
244     }
245     return availableLocalesCache.clone();
246   }
247 
getAvailableBreakIteratorLocales()248   public static Locale[] getAvailableBreakIteratorLocales() {
249     return localesFromStrings(getAvailableBreakIteratorLocalesNative());
250   }
251 
getAvailableCalendarLocales()252   public static Locale[] getAvailableCalendarLocales() {
253     return localesFromStrings(getAvailableCalendarLocalesNative());
254   }
255 
getAvailableCollatorLocales()256   public static Locale[] getAvailableCollatorLocales() {
257     return localesFromStrings(getAvailableCollatorLocalesNative());
258   }
259 
getAvailableDateFormatLocales()260   public static Locale[] getAvailableDateFormatLocales() {
261     return localesFromStrings(getAvailableDateFormatLocalesNative());
262   }
263 
getAvailableDateFormatSymbolsLocales()264   public static Locale[] getAvailableDateFormatSymbolsLocales() {
265     return getAvailableDateFormatLocales();
266   }
267 
getAvailableDecimalFormatSymbolsLocales()268   public static Locale[] getAvailableDecimalFormatSymbolsLocales() {
269     return getAvailableNumberFormatLocales();
270   }
271 
getAvailableNumberFormatLocales()272   public static Locale[] getAvailableNumberFormatLocales() {
273     return localesFromStrings(getAvailableNumberFormatLocalesNative());
274   }
275 
getBestDateTimePattern(String skeleton, Locale locale)276   public static String getBestDateTimePattern(String skeleton, Locale locale) {
277     String languageTag = locale.toLanguageTag();
278     String key = skeleton + "\t" + languageTag;
279     synchronized (CACHED_PATTERNS) {
280       String pattern = CACHED_PATTERNS.get(key);
281       if (pattern == null) {
282         pattern = getBestDateTimePatternNative(skeleton, languageTag);
283         CACHED_PATTERNS.put(key, pattern);
284       }
285       return pattern;
286     }
287   }
288 
getBestDateTimePatternNative(String skeleton, String languageTag)289   private static native String getBestDateTimePatternNative(String skeleton, String languageTag);
290 
getDateFormatOrder(String pattern)291   public static char[] getDateFormatOrder(String pattern) {
292     char[] result = new char[3];
293     int resultIndex = 0;
294     boolean sawDay = false;
295     boolean sawMonth = false;
296     boolean sawYear = false;
297 
298     for (int i = 0; i < pattern.length(); ++i) {
299       char ch = pattern.charAt(i);
300       if (ch == 'd' || ch == 'L' || ch == 'M' || ch == 'y') {
301         if (ch == 'd' && !sawDay) {
302           result[resultIndex++] = 'd';
303           sawDay = true;
304         } else if ((ch == 'L' || ch == 'M') && !sawMonth) {
305           result[resultIndex++] = 'M';
306           sawMonth = true;
307         } else if ((ch == 'y') && !sawYear) {
308           result[resultIndex++] = 'y';
309           sawYear = true;
310         }
311       } else if (ch == 'G') {
312         // Ignore the era specifier, if present.
313       } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
314         throw new IllegalArgumentException("Bad pattern character '" + ch + "' in " + pattern);
315       } else if (ch == '\'') {
316         if (i < pattern.length() - 1 && pattern.charAt(i + 1) == '\'') {
317           ++i;
318         } else {
319           i = pattern.indexOf('\'', i + 1);
320           if (i == -1) {
321             throw new IllegalArgumentException("Bad quoting in " + pattern);
322           }
323           ++i;
324         }
325       } else {
326         // Ignore spaces and punctuation.
327       }
328     }
329     return result;
330   }
331 
332   /**
333    * Returns the version of the CLDR data in use, such as "22.1.1".
334    */
getCldrVersion()335   public static native String getCldrVersion();
336 
337   /**
338    * Returns the icu4c version in use, such as "50.1.1".
339    */
getIcuVersion()340   public static native String getIcuVersion();
341 
342   /**
343    * Returns the Unicode version our ICU supports, such as "6.2".
344    */
getUnicodeVersion()345   public static native String getUnicodeVersion();
346 
347   // --- Case mapping.
348 
toLowerCase(String s, Locale locale)349   public static String toLowerCase(String s, Locale locale) {
350     return toLowerCase(s, locale.toLanguageTag());
351   }
352 
toLowerCase(String s, String languageTag)353   private static native String toLowerCase(String s, String languageTag);
354 
toUpperCase(String s, Locale locale)355   public static String toUpperCase(String s, Locale locale) {
356     return toUpperCase(s, locale.toLanguageTag());
357   }
358 
toUpperCase(String s, String languageTag)359   private static native String toUpperCase(String s, String languageTag);
360 
361   // --- Errors.
362 
363   // Just the subset of error codes needed by CharsetDecoderICU/CharsetEncoderICU.
364   public static final int U_ZERO_ERROR = 0;
365   public static final int U_INVALID_CHAR_FOUND = 10;
366   public static final int U_TRUNCATED_CHAR_FOUND = 11;
367   public static final int U_ILLEGAL_CHAR_FOUND = 12;
368   public static final int U_BUFFER_OVERFLOW_ERROR = 15;
369 
U_FAILURE(int error)370   public static boolean U_FAILURE(int error) {
371     return error > U_ZERO_ERROR;
372   }
373 
374   // --- Native methods accessing ICU's database.
375 
getAvailableBreakIteratorLocalesNative()376   private static native String[] getAvailableBreakIteratorLocalesNative();
getAvailableCalendarLocalesNative()377   private static native String[] getAvailableCalendarLocalesNative();
getAvailableCollatorLocalesNative()378   private static native String[] getAvailableCollatorLocalesNative();
getAvailableDateFormatLocalesNative()379   private static native String[] getAvailableDateFormatLocalesNative();
getAvailableLocalesNative()380   private static native String[] getAvailableLocalesNative();
getAvailableNumberFormatLocalesNative()381   private static native String[] getAvailableNumberFormatLocalesNative();
382 
getAvailableCurrencyCodes()383   public static native String[] getAvailableCurrencyCodes();
getCurrencyCode(String countryCode)384   public static native String getCurrencyCode(String countryCode);
385 
getCurrencyDisplayName(Locale locale, String currencyCode)386   public static String getCurrencyDisplayName(Locale locale, String currencyCode) {
387     return getCurrencyDisplayName(locale.toLanguageTag(), currencyCode);
388   }
389 
getCurrencyDisplayName(String languageTag, String currencyCode)390   private static native String getCurrencyDisplayName(String languageTag, String currencyCode);
391 
getCurrencyFractionDigits(String currencyCode)392   public static native int getCurrencyFractionDigits(String currencyCode);
getCurrencyNumericCode(String currencyCode)393   public static native int getCurrencyNumericCode(String currencyCode);
394 
getCurrencySymbol(Locale locale, String currencyCode)395   public static String getCurrencySymbol(Locale locale, String currencyCode) {
396     return getCurrencySymbol(locale.toLanguageTag(), currencyCode);
397   }
398 
getCurrencySymbol(String languageTag, String currencyCode)399   private static native String getCurrencySymbol(String languageTag, String currencyCode);
400 
getDisplayCountry(Locale targetLocale, Locale locale)401   public static String getDisplayCountry(Locale targetLocale, Locale locale) {
402     return getDisplayCountryNative(targetLocale.toLanguageTag(), locale.toLanguageTag());
403   }
404 
getDisplayCountryNative(String targetLanguageTag, String languageTag)405   private static native String getDisplayCountryNative(String targetLanguageTag, String languageTag);
406 
getDisplayLanguage(Locale targetLocale, Locale locale)407   public static String getDisplayLanguage(Locale targetLocale, Locale locale) {
408     return getDisplayLanguageNative(targetLocale.toLanguageTag(), locale.toLanguageTag());
409   }
410 
getDisplayLanguageNative(String targetLanguageTag, String languageTag)411   private static native String getDisplayLanguageNative(String targetLanguageTag, String languageTag);
412 
getDisplayVariant(Locale targetLocale, Locale locale)413   public static String getDisplayVariant(Locale targetLocale, Locale locale) {
414     return getDisplayVariantNative(targetLocale.toLanguageTag(), locale.toLanguageTag());
415   }
416 
getDisplayVariantNative(String targetLanguageTag, String languageTag)417   private static native String getDisplayVariantNative(String targetLanguageTag, String languageTag);
418 
getDisplayScript(Locale targetLocale, Locale locale)419   public static String getDisplayScript(Locale targetLocale, Locale locale) {
420     return getDisplayScriptNative(targetLocale.toLanguageTag(), locale.toLanguageTag());
421   }
422 
getDisplayScriptNative(String targetLanguageTag, String languageTag)423   private static native String getDisplayScriptNative(String targetLanguageTag, String languageTag);
424 
getISO3Country(String languageTag)425   public static native String getISO3Country(String languageTag);
426 
getISO3Language(String languageTag)427   public static native String getISO3Language(String languageTag);
428 
addLikelySubtags(Locale locale)429   public static Locale addLikelySubtags(Locale locale) {
430       return Locale.forLanguageTag(addLikelySubtags(locale.toLanguageTag()).replace('_', '-'));
431   }
432 
433   /**
434    * @deprecated use {@link #addLikelySubtags(java.util.Locale)} instead.
435    */
436   @Deprecated
addLikelySubtags(String locale)437   public static native String addLikelySubtags(String locale);
438 
439   /**
440    * @deprecated use {@link java.util.Locale#getScript()} instead. This has been kept
441    *     around only for the support library.
442    */
443   @Deprecated
getScript(String locale)444   public static native String getScript(String locale);
445 
getISOLanguagesNative()446   private static native String[] getISOLanguagesNative();
getISOCountriesNative()447   private static native String[] getISOCountriesNative();
448 
initLocaleDataNative(String languageTag, LocaleData result)449   static native boolean initLocaleDataNative(String languageTag, LocaleData result);
450 
451   /**
452    * Takes a BCP-47 language tag (Locale.toLanguageTag()). e.g. en-US, not en_US
453    */
setDefaultLocale(String languageTag)454   public static native void setDefaultLocale(String languageTag);
455 
456   /**
457    * Returns a locale name, not a BCP-47 language tag. e.g. en_US not en-US.
458    */
getDefaultLocale()459   public static native String getDefaultLocale();
460 }
461