1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 1996-2016, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * 11 * CollationLoader.java, ported from ucol_res.cpp 12 * 13 * created by: Markus W. Scherer 14 */ 15 16 package com.ibm.icu.impl.coll; 17 18 import java.io.IOException; 19 import java.nio.ByteBuffer; 20 import java.util.MissingResourceException; 21 22 import com.ibm.icu.impl.ICUData; 23 import com.ibm.icu.impl.ICUResourceBundle; 24 import com.ibm.icu.util.ICUUncheckedIOException; 25 import com.ibm.icu.util.Output; 26 import com.ibm.icu.util.ULocale; 27 import com.ibm.icu.util.UResourceBundle; 28 29 /** 30 * Convenience string denoting the Collation data tree 31 */ 32 public final class CollationLoader { 33 34 // not implemented, all methods are static CollationLoader()35 private CollationLoader() { 36 } 37 38 private static volatile String rootRules = null; 39 loadRootRules()40 private static void loadRootRules() { 41 if (rootRules != null) { 42 return; 43 } 44 synchronized(CollationLoader.class) { 45 if (rootRules == null) { 46 UResourceBundle rootBundle = UResourceBundle.getBundleInstance( 47 ICUData.ICU_COLLATION_BASE_NAME, ULocale.ROOT); 48 rootRules = rootBundle.getString("UCARules"); 49 } 50 } 51 } 52 53 // C++: static void appendRootRules(UnicodeString &s) getRootRules()54 public static String getRootRules() { 55 loadRootRules(); 56 return rootRules; 57 } 58 59 /** 60 * Simpler/faster methods for ASCII than ones based on Unicode data. 61 * TODO: There should be code like this somewhere already?? 62 */ 63 private static final class ASCII { toLowerCase(String s)64 static String toLowerCase(String s) { 65 for (int i = 0; i < s.length(); ++i) { 66 char c = s.charAt(i); 67 if ('A' <= c && c <= 'Z') { 68 StringBuilder sb = new StringBuilder(s.length()); 69 sb.append(s, 0, i).append((char)(c + 0x20)); 70 while (++i < s.length()) { 71 c = s.charAt(i); 72 if ('A' <= c && c <= 'Z') { c = (char)(c + 0x20); } 73 sb.append(c); 74 } 75 return sb.toString(); 76 } 77 } 78 return s; 79 } 80 } 81 loadRules(ULocale locale, String collationType)82 static String loadRules(ULocale locale, String collationType) { 83 UResourceBundle bundle = UResourceBundle.getBundleInstance( 84 ICUData.ICU_COLLATION_BASE_NAME, locale); 85 UResourceBundle data = ((ICUResourceBundle)bundle).getWithFallback( 86 "collations/" + ASCII.toLowerCase(collationType)); 87 String rules = data.getString("Sequence"); 88 return rules; 89 } 90 findWithFallback(UResourceBundle table, String entryName)91 private static final UResourceBundle findWithFallback(UResourceBundle table, String entryName) { 92 return ((ICUResourceBundle)table).findWithFallback(entryName); 93 } 94 loadTailoring(ULocale locale, Output<ULocale> outValidLocale)95 public static CollationTailoring loadTailoring(ULocale locale, Output<ULocale> outValidLocale) { 96 97 // Java porting note: ICU4J getWithFallback/getStringWithFallback currently does not 98 // work well when alias table is involved in a resource path, unless full path is specified. 99 // For now, collation resources does not contain such data, so the code below should work fine. 100 101 CollationTailoring root = CollationRoot.getRoot(); 102 String localeName = locale.getName(); 103 if (localeName.length() == 0 || localeName.equals("root")) { 104 outValidLocale.value = ULocale.ROOT; 105 return root; 106 } 107 108 UResourceBundle bundle = null; 109 try { 110 bundle = ICUResourceBundle.getBundleInstance( 111 ICUData.ICU_COLLATION_BASE_NAME, locale, 112 ICUResourceBundle.OpenType.LOCALE_ROOT); 113 } catch (MissingResourceException e) { 114 outValidLocale.value = ULocale.ROOT; 115 return root; 116 } 117 118 ULocale validLocale = bundle.getULocale(); 119 // Normalize the root locale. See 120 // http://bugs.icu-project.org/trac/ticket/10715 121 String validLocaleName = validLocale.getName(); 122 if (validLocaleName.length() == 0 || validLocaleName.equals("root")) { 123 validLocale = ULocale.ROOT; 124 } 125 outValidLocale.value = validLocale; 126 127 // There are zero or more tailorings in the collations table. 128 UResourceBundle collations; 129 try { 130 collations = bundle.get("collations"); 131 if (collations == null) { 132 return root; 133 } 134 } catch(MissingResourceException ignored) { 135 return root; 136 } 137 138 // Fetch the collation type from the locale ID and the default type from the data. 139 String type = locale.getKeywordValue("collation"); 140 String defaultType = "standard"; 141 142 String defT = ((ICUResourceBundle)collations).findStringWithFallback("default"); 143 if (defT != null) { 144 defaultType = defT; 145 } 146 147 if (type == null || type.equals("default")) { 148 type = defaultType; 149 } else { 150 type = ASCII.toLowerCase(type); 151 } 152 153 // Load the collations/type tailoring, with type fallback. 154 155 // Java porting note: typeFallback is used for setting U_USING_DEFAULT_WARNING in 156 // ICU4C, but not used by ICU4J 157 158 // boolean typeFallback = false; 159 UResourceBundle data = findWithFallback(collations, type); 160 if (data == null && 161 type.length() > 6 && type.startsWith("search")) { 162 // fall back from something like "searchjl" to "search" 163 // typeFallback = true; 164 type = "search"; 165 data = findWithFallback(collations, type); 166 } 167 168 if (data == null && !type.equals(defaultType)) { 169 // fall back to the default type 170 // typeFallback = true; 171 type = defaultType; 172 data = findWithFallback(collations, type); 173 } 174 175 if (data == null && !type.equals("standard")) { 176 // fall back to the "standard" type 177 // typeFallback = true; 178 type = "standard"; 179 data = findWithFallback(collations, type); 180 } 181 182 if (data == null) { 183 return root; 184 } 185 186 // Is this the same as the root collator? If so, then use that instead. 187 ULocale actualLocale = data.getULocale(); 188 // http://bugs.icu-project.org/trac/ticket/10715 ICUResourceBundle(root).getULocale() != ULocale.ROOT 189 // Therefore not just if (actualLocale.equals(ULocale.ROOT) && type.equals("standard")) { 190 String actualLocaleName = actualLocale.getName(); 191 if (actualLocaleName.length() == 0 || actualLocaleName.equals("root")) { 192 actualLocale = ULocale.ROOT; 193 if (type.equals("standard")) { 194 return root; 195 } 196 } 197 198 CollationTailoring t = new CollationTailoring(root.settings); 199 t.actualLocale = actualLocale; 200 201 // deserialize 202 UResourceBundle binary = data.get("%%CollationBin"); 203 ByteBuffer inBytes = binary.getBinary(); 204 try { 205 CollationDataReader.read(root, inBytes, t); 206 } catch (IOException e) { 207 throw new ICUUncheckedIOException("Failed to load collation tailoring data for locale:" 208 + actualLocale + " type:" + type, e); 209 } 210 211 // Try to fetch the optional rules string. 212 try { 213 t.setRulesResource(data.get("Sequence")); 214 } catch(MissingResourceException ignored) { 215 } 216 217 // Set the collation types on the informational locales, 218 // except when they match the default types (for brevity and backwards compatibility). 219 // For the valid locale, suppress the default type. 220 if (!type.equals(defaultType)) { 221 outValidLocale.value = validLocale.setKeywordValue("collation", type); 222 } 223 224 // For the actual locale, suppress the default type *according to the actual locale*. 225 // For example, zh has default=pinyin and contains all of the Chinese tailorings. 226 // zh_Hant has default=stroke but has no other data. 227 // For the valid locale "zh_Hant" we need to suppress stroke. 228 // For the actual locale "zh" we need to suppress pinyin instead. 229 if (!actualLocale.equals(validLocale)) { 230 // Opening a bundle for the actual locale should always succeed. 231 UResourceBundle actualBundle = UResourceBundle.getBundleInstance( 232 ICUData.ICU_COLLATION_BASE_NAME, actualLocale); 233 defT = ((ICUResourceBundle)actualBundle).findStringWithFallback("collations/default"); 234 if (defT != null) { 235 defaultType = defT; 236 } 237 } 238 239 if (!type.equals(defaultType)) { 240 t.actualLocale = t.actualLocale.setKeywordValue("collation", type); 241 } 242 243 // if (typeFallback) { 244 // ICU4C implementation sets U_USING_DEFAULT_WARNING here 245 // } 246 247 return t; 248 } 249 } 250