1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 1996-2016, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *
11 * CollationLoader.java, ported from ucol_res.cpp
12 *
13 * created by: Markus W. Scherer
14 */
15 
16 package com.ibm.icu.impl.coll;
17 
18 import java.io.IOException;
19 import java.nio.ByteBuffer;
20 import java.util.MissingResourceException;
21 
22 import com.ibm.icu.impl.ICUData;
23 import com.ibm.icu.impl.ICUResourceBundle;
24 import com.ibm.icu.util.ICUUncheckedIOException;
25 import com.ibm.icu.util.Output;
26 import com.ibm.icu.util.ULocale;
27 import com.ibm.icu.util.UResourceBundle;
28 
29 /**
30  * Convenience string denoting the Collation data tree
31  */
32 public final class CollationLoader {
33 
34     // not implemented, all methods are static
CollationLoader()35     private CollationLoader() {
36     }
37 
38     private static volatile String rootRules = null;
39 
loadRootRules()40     private static void loadRootRules() {
41         if (rootRules != null) {
42             return;
43         }
44         synchronized(CollationLoader.class) {
45             if (rootRules == null) {
46                 UResourceBundle rootBundle = UResourceBundle.getBundleInstance(
47                         ICUData.ICU_COLLATION_BASE_NAME, ULocale.ROOT);
48                 rootRules = rootBundle.getString("UCARules");
49             }
50         }
51     }
52 
53     // C++: static void appendRootRules(UnicodeString &s)
getRootRules()54     public static String getRootRules() {
55         loadRootRules();
56         return rootRules;
57     }
58 
59     /**
60      * Simpler/faster methods for ASCII than ones based on Unicode data.
61      * TODO: There should be code like this somewhere already??
62      */
63     private static final class ASCII {
toLowerCase(String s)64         static String toLowerCase(String s) {
65             for (int i = 0; i < s.length(); ++i) {
66                 char c = s.charAt(i);
67                 if ('A' <= c && c <= 'Z') {
68                     StringBuilder sb = new StringBuilder(s.length());
69                     sb.append(s, 0, i).append((char)(c + 0x20));
70                     while (++i < s.length()) {
71                         c = s.charAt(i);
72                         if ('A' <= c && c <= 'Z') { c = (char)(c + 0x20); }
73                         sb.append(c);
74                     }
75                     return sb.toString();
76                 }
77             }
78             return s;
79         }
80     }
81 
loadRules(ULocale locale, String collationType)82     static String loadRules(ULocale locale, String collationType) {
83         UResourceBundle bundle = UResourceBundle.getBundleInstance(
84                 ICUData.ICU_COLLATION_BASE_NAME, locale);
85         UResourceBundle data = ((ICUResourceBundle)bundle).getWithFallback(
86                 "collations/" + ASCII.toLowerCase(collationType));
87         String rules = data.getString("Sequence");
88         return rules;
89     }
90 
findWithFallback(UResourceBundle table, String entryName)91     private static final UResourceBundle findWithFallback(UResourceBundle table, String entryName) {
92         return ((ICUResourceBundle)table).findWithFallback(entryName);
93     }
94 
loadTailoring(ULocale locale, Output<ULocale> outValidLocale)95     public static CollationTailoring loadTailoring(ULocale locale, Output<ULocale> outValidLocale) {
96 
97         // Java porting note: ICU4J getWithFallback/getStringWithFallback currently does not
98         // work well when alias table is involved in a resource path, unless full path is specified.
99         // For now, collation resources does not contain such data, so the code below should work fine.
100 
101         CollationTailoring root = CollationRoot.getRoot();
102         String localeName = locale.getName();
103         if (localeName.length() == 0 || localeName.equals("root")) {
104             outValidLocale.value = ULocale.ROOT;
105             return root;
106         }
107 
108         UResourceBundle bundle = null;
109         try {
110             bundle = ICUResourceBundle.getBundleInstance(
111                     ICUData.ICU_COLLATION_BASE_NAME, locale,
112                     ICUResourceBundle.OpenType.LOCALE_ROOT);
113         } catch (MissingResourceException e) {
114             outValidLocale.value = ULocale.ROOT;
115             return root;
116         }
117 
118         ULocale validLocale = bundle.getULocale();
119         // Normalize the root locale. See
120         // http://bugs.icu-project.org/trac/ticket/10715
121         String validLocaleName = validLocale.getName();
122         if (validLocaleName.length() == 0 || validLocaleName.equals("root")) {
123             validLocale = ULocale.ROOT;
124         }
125         outValidLocale.value = validLocale;
126 
127         // There are zero or more tailorings in the collations table.
128         UResourceBundle collations;
129         try {
130             collations = bundle.get("collations");
131             if (collations == null) {
132                 return root;
133             }
134         } catch(MissingResourceException ignored) {
135             return root;
136         }
137 
138         // Fetch the collation type from the locale ID and the default type from the data.
139         String type = locale.getKeywordValue("collation");
140         String defaultType = "standard";
141 
142         String defT = ((ICUResourceBundle)collations).findStringWithFallback("default");
143         if (defT != null) {
144             defaultType = defT;
145         }
146 
147         if (type == null || type.equals("default")) {
148             type = defaultType;
149         } else {
150             type = ASCII.toLowerCase(type);
151         }
152 
153         // Load the collations/type tailoring, with type fallback.
154 
155         // Java porting note: typeFallback is used for setting U_USING_DEFAULT_WARNING in
156         // ICU4C, but not used by ICU4J
157 
158         // boolean typeFallback = false;
159         UResourceBundle data = findWithFallback(collations, type);
160         if (data == null &&
161                 type.length() > 6 && type.startsWith("search")) {
162             // fall back from something like "searchjl" to "search"
163             // typeFallback = true;
164             type = "search";
165             data = findWithFallback(collations, type);
166         }
167 
168         if (data == null && !type.equals(defaultType)) {
169             // fall back to the default type
170             // typeFallback = true;
171             type = defaultType;
172             data = findWithFallback(collations, type);
173         }
174 
175         if (data == null && !type.equals("standard")) {
176             // fall back to the "standard" type
177             // typeFallback = true;
178             type = "standard";
179             data = findWithFallback(collations, type);
180         }
181 
182         if (data == null) {
183             return root;
184         }
185 
186         // Is this the same as the root collator? If so, then use that instead.
187         ULocale actualLocale = data.getULocale();
188         // http://bugs.icu-project.org/trac/ticket/10715 ICUResourceBundle(root).getULocale() != ULocale.ROOT
189         // Therefore not just if (actualLocale.equals(ULocale.ROOT) && type.equals("standard")) {
190         String actualLocaleName = actualLocale.getName();
191         if (actualLocaleName.length() == 0 || actualLocaleName.equals("root")) {
192             actualLocale = ULocale.ROOT;
193             if (type.equals("standard")) {
194                 return root;
195             }
196         }
197 
198         CollationTailoring t = new CollationTailoring(root.settings);
199         t.actualLocale = actualLocale;
200 
201         // deserialize
202         UResourceBundle binary = data.get("%%CollationBin");
203         ByteBuffer inBytes = binary.getBinary();
204         try {
205             CollationDataReader.read(root, inBytes, t);
206         } catch (IOException e) {
207             throw new ICUUncheckedIOException("Failed to load collation tailoring data for locale:"
208                     + actualLocale + " type:" + type, e);
209         }
210 
211         // Try to fetch the optional rules string.
212         try {
213             t.setRulesResource(data.get("Sequence"));
214         } catch(MissingResourceException ignored) {
215         }
216 
217         // Set the collation types on the informational locales,
218         // except when they match the default types (for brevity and backwards compatibility).
219         // For the valid locale, suppress the default type.
220         if (!type.equals(defaultType)) {
221             outValidLocale.value = validLocale.setKeywordValue("collation", type);
222         }
223 
224         // For the actual locale, suppress the default type *according to the actual locale*.
225         // For example, zh has default=pinyin and contains all of the Chinese tailorings.
226         // zh_Hant has default=stroke but has no other data.
227         // For the valid locale "zh_Hant" we need to suppress stroke.
228         // For the actual locale "zh" we need to suppress pinyin instead.
229         if (!actualLocale.equals(validLocale)) {
230             // Opening a bundle for the actual locale should always succeed.
231             UResourceBundle actualBundle = UResourceBundle.getBundleInstance(
232                     ICUData.ICU_COLLATION_BASE_NAME, actualLocale);
233             defT = ((ICUResourceBundle)actualBundle).findStringWithFallback("collations/default");
234             if (defT != null) {
235                 defaultType = defT;
236             }
237         }
238 
239         if (!type.equals(defaultType)) {
240             t.actualLocale = t.actualLocale.setKeywordValue("collation", type);
241         }
242 
243         // if (typeFallback) {
244         //     ICU4C implementation sets U_USING_DEFAULT_WARNING here
245         // }
246 
247         return t;
248     }
249 }
250