1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.text; 18 19 import android.annotation.Nullable; 20 import android.util.Log; 21 22 import com.android.internal.annotations.GuardedBy; 23 24 import java.io.File; 25 import java.io.IOException; 26 import java.io.RandomAccessFile; 27 import java.nio.ByteBuffer; 28 import java.nio.MappedByteBuffer; 29 import java.nio.channels.FileChannel; 30 import java.util.HashMap; 31 import java.util.Locale; 32 33 /** 34 * Hyphenator is a wrapper class for a native implementation of automatic hyphenation, 35 * in essence finding valid hyphenation opportunities in a word. 36 * 37 * @hide 38 */ 39 public class Hyphenator { 40 // This class has deliberately simple lifetime management (no finalizer) because in 41 // the common case a process will use a very small number of locales. 42 43 private static String TAG = "Hyphenator"; 44 45 // TODO: Confirm that these are the best values. Various sources suggest (1, 1), but 46 // that appears too small. 47 private static final int INDIC_MIN_PREFIX = 2; 48 private static final int INDIC_MIN_SUFFIX = 2; 49 50 private final static Object sLock = new Object(); 51 52 @GuardedBy("sLock") 53 final static HashMap<Locale, Hyphenator> sMap = new HashMap<Locale, Hyphenator>(); 54 55 // Reasonable enough values for cases where we have no hyphenation patterns but may be able to 56 // do some automatic hyphenation based on characters. These values would be used very rarely. 57 private static final int DEFAULT_MIN_PREFIX = 2; 58 private static final int DEFAULT_MIN_SUFFIX = 2; 59 final static Hyphenator sEmptyHyphenator = 60 new Hyphenator(StaticLayout.nLoadHyphenator( 61 null, 0, DEFAULT_MIN_PREFIX, DEFAULT_MIN_SUFFIX), 62 null); 63 64 final private long mNativePtr; 65 66 // We retain a reference to the buffer to keep the memory mapping valid 67 @SuppressWarnings("unused") 68 final private ByteBuffer mBuffer; 69 Hyphenator(long nativePtr, ByteBuffer b)70 private Hyphenator(long nativePtr, ByteBuffer b) { 71 mNativePtr = nativePtr; 72 mBuffer = b; 73 } 74 getNativePtr()75 public long getNativePtr() { 76 return mNativePtr; 77 } 78 get(@ullable Locale locale)79 public static Hyphenator get(@Nullable Locale locale) { 80 synchronized (sLock) { 81 Hyphenator result = sMap.get(locale); 82 if (result != null) { 83 return result; 84 } 85 86 // If there's a variant, fall back to language+variant only, if available 87 final String variant = locale.getVariant(); 88 if (!variant.isEmpty()) { 89 final Locale languageAndVariantOnlyLocale = 90 new Locale(locale.getLanguage(), "", variant); 91 result = sMap.get(languageAndVariantOnlyLocale); 92 if (result != null) { 93 sMap.put(locale, result); 94 return result; 95 } 96 } 97 98 // Fall back to language-only, if available 99 final Locale languageOnlyLocale = new Locale(locale.getLanguage()); 100 result = sMap.get(languageOnlyLocale); 101 if (result != null) { 102 sMap.put(locale, result); 103 return result; 104 } 105 106 // Fall back to script-only, if available 107 final String script = locale.getScript(); 108 if (!script.equals("")) { 109 final Locale scriptOnlyLocale = new Locale.Builder() 110 .setLanguage("und") 111 .setScript(script) 112 .build(); 113 result = sMap.get(scriptOnlyLocale); 114 if (result != null) { 115 sMap.put(locale, result); 116 return result; 117 } 118 } 119 120 sMap.put(locale, sEmptyHyphenator); // To remember we found nothing. 121 } 122 return sEmptyHyphenator; 123 } 124 125 private static class HyphenationData { 126 final String mLanguageTag; 127 final int mMinPrefix, mMinSuffix; HyphenationData(String languageTag, int minPrefix, int minSuffix)128 HyphenationData(String languageTag, int minPrefix, int minSuffix) { 129 this.mLanguageTag = languageTag; 130 this.mMinPrefix = minPrefix; 131 this.mMinSuffix = minSuffix; 132 } 133 } 134 loadHyphenator(HyphenationData data)135 private static Hyphenator loadHyphenator(HyphenationData data) { 136 String patternFilename = "hyph-" + data.mLanguageTag.toLowerCase(Locale.US) + ".hyb"; 137 File patternFile = new File(getSystemHyphenatorLocation(), patternFilename); 138 if (!patternFile.canRead()) { 139 Log.e(TAG, "hyphenation patterns for " + patternFile + " not found or unreadable"); 140 return null; 141 } 142 try { 143 RandomAccessFile f = new RandomAccessFile(patternFile, "r"); 144 try { 145 FileChannel fc = f.getChannel(); 146 MappedByteBuffer buf = fc.map(FileChannel.MapMode.READ_ONLY, 0, fc.size()); 147 long nativePtr = StaticLayout.nLoadHyphenator( 148 buf, 0, data.mMinPrefix, data.mMinSuffix); 149 return new Hyphenator(nativePtr, buf); 150 } finally { 151 f.close(); 152 } 153 } catch (IOException e) { 154 Log.e(TAG, "error loading hyphenation " + patternFile, e); 155 return null; 156 } 157 } 158 getSystemHyphenatorLocation()159 private static File getSystemHyphenatorLocation() { 160 return new File("/system/usr/hyphen-data"); 161 } 162 163 // This array holds pairs of language tags that are used to prefill the map from locale to 164 // hyphenation data: The hyphenation data for the first field will be prefilled from the 165 // hyphenation data for the second field. 166 // 167 // The aliases that are computable by the get() method above are not included. 168 private static final String[][] LOCALE_FALLBACK_DATA = { 169 // English locales that fall back to en-US. The data is 170 // from CLDR. It's all English locales, minus the locales whose 171 // parent is en-001 (from supplementalData.xml, under <parentLocales>). 172 // TODO: Figure out how to get this from ICU. 173 {"en-AS", "en-US"}, // English (American Samoa) 174 {"en-GU", "en-US"}, // English (Guam) 175 {"en-MH", "en-US"}, // English (Marshall Islands) 176 {"en-MP", "en-US"}, // English (Northern Mariana Islands) 177 {"en-PR", "en-US"}, // English (Puerto Rico) 178 {"en-UM", "en-US"}, // English (United States Minor Outlying Islands) 179 {"en-VI", "en-US"}, // English (Virgin Islands) 180 181 // All English locales other than those falling back to en-US are mapped to en-GB. 182 {"en", "en-GB"}, 183 184 // For German, we're assuming the 1996 (and later) orthography by default. 185 {"de", "de-1996"}, 186 // Liechtenstein uses the Swiss hyphenation rules for the 1901 orthography. 187 {"de-LI-1901", "de-CH-1901"}, 188 189 // Norwegian is very probably Norwegian Bokmål. 190 {"no", "nb"}, 191 192 // Use mn-Cyrl. According to CLDR's likelySubtags.xml, mn is most likely to be mn-Cyrl. 193 {"mn", "mn-Cyrl"}, // Mongolian 194 195 // Fall back to Ethiopic script for languages likely to be written in Ethiopic. 196 // Data is from CLDR's likelySubtags.xml. 197 // TODO: Convert this to a mechanism using ICU4J's ULocale#addLikelySubtags(). 198 {"am", "und-Ethi"}, // Amharic 199 {"byn", "und-Ethi"}, // Blin 200 {"gez", "und-Ethi"}, // Geʻez 201 {"ti", "und-Ethi"}, // Tigrinya 202 {"wal", "und-Ethi"}, // Wolaytta 203 }; 204 205 private static final HyphenationData[] AVAILABLE_LANGUAGES = { 206 new HyphenationData("as", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Assamese 207 new HyphenationData("bg", 2, 2), // Bulgarian 208 new HyphenationData("bn", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Bengali 209 new HyphenationData("cu", 1, 2), // Church Slavonic 210 new HyphenationData("cy", 2, 3), // Welsh 211 new HyphenationData("da", 2, 2), // Danish 212 new HyphenationData("de-1901", 2, 2), // German 1901 orthography 213 new HyphenationData("de-1996", 2, 2), // German 1996 orthography 214 new HyphenationData("de-CH-1901", 2, 2), // Swiss High German 1901 orthography 215 new HyphenationData("en-GB", 2, 3), // British English 216 new HyphenationData("en-US", 2, 3), // American English 217 new HyphenationData("es", 2, 2), // Spanish 218 new HyphenationData("et", 2, 3), // Estonian 219 new HyphenationData("eu", 2, 2), // Basque 220 new HyphenationData("fr", 2, 3), // French 221 new HyphenationData("ga", 2, 3), // Irish 222 new HyphenationData("gu", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Gujarati 223 new HyphenationData("hi", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Hindi 224 new HyphenationData("hr", 2, 2), // Croatian 225 new HyphenationData("hu", 2, 2), // Hungarian 226 // texhyphen sources say Armenian may be (1, 2), but that it needs confirmation. 227 // Going with a more conservative value of (2, 2) for now. 228 new HyphenationData("hy", 2, 2), // Armenian 229 new HyphenationData("kn", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Kannada 230 new HyphenationData("ml", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Malayalam 231 new HyphenationData("mn-Cyrl", 2, 2), // Mongolian in Cyrillic script 232 new HyphenationData("mr", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Marathi 233 new HyphenationData("nb", 2, 2), // Norwegian Bokmål 234 new HyphenationData("nn", 2, 2), // Norwegian Nynorsk 235 new HyphenationData("or", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Oriya 236 new HyphenationData("pa", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Punjabi 237 new HyphenationData("pt", 2, 3), // Portuguese 238 new HyphenationData("sl", 2, 2), // Slovenian 239 new HyphenationData("ta", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Tamil 240 new HyphenationData("te", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Telugu 241 new HyphenationData("tk", 2, 2), // Turkmen 242 new HyphenationData("und-Ethi", 1, 1), // Any language in Ethiopic script 243 }; 244 245 /** 246 * Load hyphenation patterns at initialization time. We want to have patterns 247 * for all locales loaded and ready to use so we don't have to do any file IO 248 * on the UI thread when drawing text in different locales. 249 * 250 * @hide 251 */ init()252 public static void init() { 253 sMap.put(null, null); 254 255 for (int i = 0; i < AVAILABLE_LANGUAGES.length; i++) { 256 HyphenationData data = AVAILABLE_LANGUAGES[i]; 257 Hyphenator h = loadHyphenator(data); 258 if (h != null) { 259 sMap.put(Locale.forLanguageTag(data.mLanguageTag), h); 260 } 261 } 262 263 for (int i = 0; i < LOCALE_FALLBACK_DATA.length; i++) { 264 String language = LOCALE_FALLBACK_DATA[i][0]; 265 String fallback = LOCALE_FALLBACK_DATA[i][1]; 266 sMap.put(Locale.forLanguageTag(language), sMap.get(Locale.forLanguageTag(fallback))); 267 } 268 } 269 } 270