1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License 15 */ 16 17 package com.android.providers.contacts; 18 19 import android.icu.text.AlphabeticIndex; 20 import android.icu.text.AlphabeticIndex.ImmutableIndex; 21 import android.icu.text.Transliterator; 22 import android.provider.ContactsContract.FullNameStyle; 23 import android.provider.ContactsContract.PhoneticNameStyle; 24 import android.os.LocaleList; 25 import android.text.TextUtils; 26 import android.util.ArraySet; 27 import android.util.Log; 28 29 import com.android.providers.contacts.HanziToPinyin.Token; 30 import com.google.common.annotations.VisibleForTesting; 31 32 import java.lang.Character.UnicodeBlock; 33 import java.util.ArrayList; 34 import java.util.Collections; 35 import java.util.HashSet; 36 import java.util.Iterator; 37 import java.util.Locale; 38 import java.util.Set; 39 40 41 /** 42 * This utility class provides specialized handling for locale specific 43 * information: labels, name lookup keys. 44 */ 45 public class ContactLocaleUtils { 46 public static final String TAG = "ContactLocale"; 47 48 public static final Locale LOCALE_ARABIC = new Locale("ar"); 49 public static final Locale LOCALE_GREEK = new Locale("el"); 50 public static final Locale LOCALE_HEBREW = new Locale("he"); 51 // Serbian and Ukrainian labels are complementary supersets of Russian 52 public static final Locale LOCALE_SERBIAN = new Locale("sr"); 53 public static final Locale LOCALE_UKRAINIAN = new Locale("uk"); 54 public static final Locale LOCALE_THAI = new Locale("th"); 55 56 // -- Note for adding locales to sDefaultLabelLocales -- 57 // 58 // AlphabeticIndex.getBucketLabel() uses a binary search across 59 // the entire label set so care should be taken about growing this 60 // set too large. The following set determines for which locales 61 // we will show labels other than your primary locale. General rules 62 // of thumb for adding a locale: should be a supported locale; and 63 // should not be included if from a name it is not deterministic 64 // which way to label it (so eg Chinese cannot be added because 65 // the labeling of a Chinese character varies between Simplified, 66 // Traditional, and Japanese locales). Use English only for all 67 // Latin based alphabets. Ukrainian and Serbian are chosen for 68 // Cyrillic because their alphabets are complementary supersets 69 // of Russian. 70 private static final Locale[] sDefaultLabelLocales = new Locale[]{ 71 Locale.ENGLISH, 72 Locale.JAPANESE, 73 Locale.KOREAN, 74 LOCALE_THAI, 75 LOCALE_ARABIC, 76 LOCALE_HEBREW, 77 LOCALE_GREEK, 78 LOCALE_UKRAINIAN, 79 LOCALE_SERBIAN, 80 }; 81 82 /** 83 * This class is the default implementation and should be the base class 84 * for other locales. 85 * 86 * sortKey: same as name 87 * nameLookupKeys: none 88 * labels: uses ICU AlphabeticIndex for labels and extends by labeling 89 * phone numbers "#". Eg English labels are: [A-Z], #, " " 90 */ 91 private static class ContactLocaleUtilsBase { 92 private static final String EMPTY_STRING = ""; 93 private static final String NUMBER_STRING = "#"; 94 95 protected final ImmutableIndex mAlphabeticIndex; 96 private final int mAlphabeticIndexBucketCount; 97 private final int mNumberBucketIndex; 98 private final boolean mUsePinyinTransliterator; 99 ContactLocaleUtilsBase(LocaleSet locales)100 public ContactLocaleUtilsBase(LocaleSet locales) { 101 mUsePinyinTransliterator = locales.shouldPreferSimplifiedChinese(); 102 103 final ArraySet<Locale> addedLocales = new ArraySet<>(); 104 105 // First, add from the primary locale (which may not be the first locale in the locale 106 // list). 107 AlphabeticIndex ai = new AlphabeticIndex(locales.getPrimaryLocale()) 108 .setMaxLabelCount(300); 109 addedLocales.add(locales.getPrimaryLocale()); 110 111 // Next, add all locale form the locale list. 112 final LocaleList localeList = locales.getAllLocales(); 113 for (int i = 0; i < localeList.size(); i++) { 114 addLabels(ai, localeList.get(i), addedLocales); 115 } 116 // Then add the default locales. 117 for (int i = 0; i < sDefaultLabelLocales.length; i++) { 118 addLabels(ai, sDefaultLabelLocales[i], addedLocales); 119 } 120 mAlphabeticIndex = ai.buildImmutableIndex(); 121 mAlphabeticIndexBucketCount = mAlphabeticIndex.getBucketCount(); 122 mNumberBucketIndex = mAlphabeticIndexBucketCount - 1; 123 } 124 addLabels( AlphabeticIndex ai, Locale locale, ArraySet<Locale> addedLocales)125 private static void addLabels( 126 AlphabeticIndex ai, Locale locale, ArraySet<Locale> addedLocales) { 127 if (addedLocales.contains(locale)) { 128 return; 129 } 130 ai.addLabels(locale); 131 addedLocales.add(locale); 132 } 133 getSortKey(String name)134 public String getSortKey(String name) { 135 return name; 136 } 137 getNumberBucketIndex()138 public int getNumberBucketIndex() { 139 return mNumberBucketIndex; 140 } 141 142 /** 143 * Returns the bucket index for the specified string. AlphabeticIndex 144 * sorts strings into buckets numbered in order from 0 to N, where the 145 * exact value of N depends on how many representative index labels are 146 * used in a particular locale. This routine adds one additional bucket 147 * for phone numbers. It attempts to detect phone numbers and shifts 148 * the bucket indexes returned by AlphabeticIndex in order to make room 149 * for the new # bucket, so the returned range becomes 0 to N+1. 150 */ getBucketIndex(String name)151 public int getBucketIndex(String name) { 152 boolean prefixIsNumeric = false; 153 final int length = name.length(); 154 int offset = 0; 155 while (offset < length) { 156 int codePoint = Character.codePointAt(name, offset); 157 // Ignore standard phone number separators and identify any 158 // string that otherwise starts with a number. 159 if (Character.isDigit(codePoint)) { 160 prefixIsNumeric = true; 161 break; 162 } else if (!Character.isSpaceChar(codePoint) && 163 codePoint != '+' && codePoint != '(' && 164 codePoint != ')' && codePoint != '.' && 165 codePoint != '-' && codePoint != '#') { 166 break; 167 } 168 offset += Character.charCount(codePoint); 169 } 170 if (prefixIsNumeric) { 171 return mNumberBucketIndex; 172 } 173 174 /** 175 * ICU 55 AlphabeticIndex doesn't support Simplified Chinese 176 * as a secondary locale so it is necessary to use the 177 * Pinyin transliterator. We also use this for a Simplified 178 * Chinese primary locale because it gives more accurate letter 179 * buckets. b/19835686 180 */ 181 if (mUsePinyinTransliterator) { 182 name = HanziToPinyin.getInstance().transliterate(name); 183 } 184 final int bucket = mAlphabeticIndex.getBucketIndex(name); 185 if (bucket < 0) { 186 return -1; 187 } 188 if (bucket >= mNumberBucketIndex) { 189 return bucket + 1; 190 } 191 return bucket; 192 } 193 194 /** 195 * Returns the number of buckets in use (one more than AlphabeticIndex 196 * uses, because this class adds a bucket for phone numbers). 197 */ getBucketCount()198 public int getBucketCount() { 199 return mAlphabeticIndexBucketCount + 1; 200 } 201 202 /** 203 * Returns the label for the specified bucket index if a valid index, 204 * otherwise returns an empty string. '#' is returned for the phone 205 * number bucket; for all others, the AlphabeticIndex label is returned. 206 */ getBucketLabel(int bucketIndex)207 public String getBucketLabel(int bucketIndex) { 208 if (bucketIndex < 0 || bucketIndex >= getBucketCount()) { 209 return EMPTY_STRING; 210 } else if (bucketIndex == mNumberBucketIndex) { 211 return NUMBER_STRING; 212 } else if (bucketIndex > mNumberBucketIndex) { 213 --bucketIndex; 214 } 215 return mAlphabeticIndex.getBucket(bucketIndex).getLabel(); 216 } 217 218 @SuppressWarnings("unused") getNameLookupKeys(String name, int nameStyle)219 public Iterator<String> getNameLookupKeys(String name, int nameStyle) { 220 return null; 221 } 222 getLabels()223 public ArrayList<String> getLabels() { 224 final int bucketCount = getBucketCount(); 225 final ArrayList<String> labels = new ArrayList<String>(bucketCount); 226 for(int i = 0; i < bucketCount; ++i) { 227 labels.add(getBucketLabel(i)); 228 } 229 return labels; 230 } 231 } 232 233 /** 234 * Japanese specific locale overrides. 235 * 236 * sortKey: unchanged (same as name) 237 * nameLookupKeys: unchanged (none) 238 * labels: extends default labels by labeling unlabeled CJ characters 239 * with the Japanese character 他 ("misc"). Japanese labels are: 240 * あ, か, さ, た, な, は, ま, や, ら, わ, 他, [A-Z], #, " " 241 */ 242 private static class JapaneseContactUtils extends ContactLocaleUtilsBase { 243 // \u4ed6 is Japanese character 他 ("misc") 244 private static final String JAPANESE_MISC_LABEL = "\u4ed6"; 245 private final int mMiscBucketIndex; 246 JapaneseContactUtils(LocaleSet locales)247 public JapaneseContactUtils(LocaleSet locales) { 248 super(locales); 249 // Determine which bucket AlphabeticIndex is lumping unclassified 250 // Japanese characters into by looking up the bucket index for 251 // a representative Kanji/CJK unified ideograph (\u65e5 is the 252 // character '日'). 253 mMiscBucketIndex = super.getBucketIndex("\u65e5"); 254 } 255 256 // Set of UnicodeBlocks for unified CJK (Chinese) characters and 257 // Japanese characters. This includes all code blocks that might 258 // contain a character used in Japanese (which is why unified CJK 259 // blocks are included but Korean Hangul and jamo are not). 260 private static final Set<Character.UnicodeBlock> CJ_BLOCKS; 261 static { 262 Set<UnicodeBlock> set = new HashSet<UnicodeBlock>(); 263 set.add(UnicodeBlock.HIRAGANA); 264 set.add(UnicodeBlock.KATAKANA); 265 set.add(UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS); 266 set.add(UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS); 267 set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS); 268 set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A); 269 set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B); 270 set.add(UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION); 271 set.add(UnicodeBlock.CJK_RADICALS_SUPPLEMENT); 272 set.add(UnicodeBlock.CJK_COMPATIBILITY); 273 set.add(UnicodeBlock.CJK_COMPATIBILITY_FORMS); 274 set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS); 275 set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT); 276 CJ_BLOCKS = Collections.unmodifiableSet(set); 277 } 278 279 /** 280 * Helper routine to identify unlabeled Chinese or Japanese characters 281 * to put in a 'misc' bucket. 282 * 283 * @return true if the specified Unicode code point is Chinese or 284 * Japanese 285 */ isChineseOrJapanese(int codePoint)286 private static boolean isChineseOrJapanese(int codePoint) { 287 return CJ_BLOCKS.contains(UnicodeBlock.of(codePoint)); 288 } 289 290 /** 291 * Returns the bucket index for the specified string. Adds an 292 * additional 'misc' bucket for Kanji characters to the base class set. 293 */ 294 @Override getBucketIndex(String name)295 public int getBucketIndex(String name) { 296 final int bucketIndex = super.getBucketIndex(name); 297 if ((bucketIndex == mMiscBucketIndex && 298 !isChineseOrJapanese(Character.codePointAt(name, 0))) || 299 bucketIndex > mMiscBucketIndex) { 300 return bucketIndex + 1; 301 } 302 return bucketIndex; 303 } 304 305 /** 306 * Returns the number of buckets in use (one more than the base class 307 * uses, because this class adds a bucket for Kanji). 308 */ 309 @Override getBucketCount()310 public int getBucketCount() { 311 return super.getBucketCount() + 1; 312 } 313 314 /** 315 * Returns the label for the specified bucket index if a valid index, 316 * otherwise returns an empty string. '他' is returned for unclassified 317 * Kanji; for all others, the label determined by the base class is 318 * returned. 319 */ 320 @Override getBucketLabel(int bucketIndex)321 public String getBucketLabel(int bucketIndex) { 322 if (bucketIndex == mMiscBucketIndex) { 323 return JAPANESE_MISC_LABEL; 324 } else if (bucketIndex > mMiscBucketIndex) { 325 --bucketIndex; 326 } 327 return super.getBucketLabel(bucketIndex); 328 } 329 330 @Override getNameLookupKeys(String name, int nameStyle)331 public Iterator<String> getNameLookupKeys(String name, int nameStyle) { 332 // Hiragana and Katakana will be positively identified as Japanese. 333 if (nameStyle == PhoneticNameStyle.JAPANESE) { 334 return getRomajiNameLookupKeys(name); 335 } 336 return null; 337 } 338 339 private static boolean mInitializedTransliterator; 340 private static Transliterator mJapaneseTransliterator; 341 getJapaneseTransliterator()342 private static Transliterator getJapaneseTransliterator() { 343 synchronized(JapaneseContactUtils.class) { 344 if (!mInitializedTransliterator) { 345 mInitializedTransliterator = true; 346 Transliterator t = null; 347 try { 348 t = Transliterator.getInstance("Hiragana-Latin; Katakana-Latin;" 349 + " Latin-Ascii"); 350 } catch (IllegalArgumentException e) { 351 Log.w(TAG, "Hiragana/Katakana-Latin transliterator data" 352 + " is missing"); 353 } 354 mJapaneseTransliterator = t; 355 } 356 return mJapaneseTransliterator; 357 } 358 } 359 getRomajiNameLookupKeys(String name)360 public static Iterator<String> getRomajiNameLookupKeys(String name) { 361 final Transliterator t = getJapaneseTransliterator(); 362 if (t == null) { 363 return null; 364 } 365 final String romajiName = t.transliterate(name); 366 if (TextUtils.isEmpty(romajiName) || 367 TextUtils.equals(name, romajiName)) { 368 return null; 369 } 370 final HashSet<String> keys = new HashSet<String>(); 371 keys.add(romajiName); 372 return keys.iterator(); 373 } 374 } 375 376 /** 377 * Simplified Chinese specific locale overrides. Uses ICU Transliterator 378 * for generating pinyin transliteration. 379 * 380 * sortKey: unchanged (same as name) 381 * nameLookupKeys: adds additional name lookup keys 382 * - Chinese character's pinyin and pinyin's initial character. 383 * - Latin word and initial character. 384 * labels: unchanged 385 * Simplified Chinese labels are the same as English: [A-Z], #, " " 386 */ 387 private static class SimplifiedChineseContactUtils 388 extends ContactLocaleUtilsBase { SimplifiedChineseContactUtils(LocaleSet locales)389 public SimplifiedChineseContactUtils(LocaleSet locales) { 390 super(locales); 391 } 392 393 @Override getNameLookupKeys(String name, int nameStyle)394 public Iterator<String> getNameLookupKeys(String name, int nameStyle) { 395 if (nameStyle != FullNameStyle.JAPANESE && 396 nameStyle != FullNameStyle.KOREAN) { 397 return getPinyinNameLookupKeys(name); 398 } 399 return null; 400 } 401 getPinyinNameLookupKeys(String name)402 public static Iterator<String> getPinyinNameLookupKeys(String name) { 403 // TODO : Reduce the object allocation. 404 HashSet<String> keys = new HashSet<String>(); 405 ArrayList<Token> tokens = HanziToPinyin.getInstance().getTokens(name); 406 final int tokenCount = tokens.size(); 407 final StringBuilder keyPinyin = new StringBuilder(); 408 final StringBuilder keyInitial = new StringBuilder(); 409 // There is no space among the Chinese Characters, the variant name 410 // lookup key wouldn't work for Chinese. The keyOriginal is used to 411 // build the lookup keys for itself. 412 final StringBuilder keyOriginal = new StringBuilder(); 413 for (int i = tokenCount - 1; i >= 0; i--) { 414 final Token token = tokens.get(i); 415 if (Token.UNKNOWN == token.type) { 416 continue; 417 } 418 if (Token.PINYIN == token.type) { 419 keyPinyin.insert(0, token.target); 420 keyInitial.insert(0, token.target.charAt(0)); 421 } else if (Token.LATIN == token.type) { 422 // Avoid adding space at the end of String. 423 if (keyPinyin.length() > 0) { 424 keyPinyin.insert(0, ' '); 425 } 426 if (keyOriginal.length() > 0) { 427 keyOriginal.insert(0, ' '); 428 } 429 keyPinyin.insert(0, token.source); 430 keyInitial.insert(0, token.source.charAt(0)); 431 } 432 keyOriginal.insert(0, token.source); 433 keys.add(keyOriginal.toString()); 434 keys.add(keyPinyin.toString()); 435 keys.add(keyInitial.toString()); 436 } 437 return keys.iterator(); 438 } 439 } 440 441 private static ContactLocaleUtils sSingleton; 442 443 private final LocaleSet mLocales; 444 private final ContactLocaleUtilsBase mUtils; 445 ContactLocaleUtils(LocaleSet locales)446 private ContactLocaleUtils(LocaleSet locales) { 447 if (locales == null) { 448 mLocales = LocaleSet.newDefault(); 449 } else { 450 mLocales = locales; 451 } 452 if (mLocales.shouldPreferJapanese()) { 453 mUtils = new JapaneseContactUtils(mLocales); 454 } else if (mLocales.shouldPreferSimplifiedChinese()) { 455 mUtils = new SimplifiedChineseContactUtils(mLocales); 456 } else { 457 mUtils = new ContactLocaleUtilsBase(mLocales); 458 } 459 Log.i(TAG, "AddressBook Labels [" + mLocales.toString() + "]: " 460 + getLabels().toString()); 461 } 462 isLocale(LocaleSet locales)463 public boolean isLocale(LocaleSet locales) { 464 return mLocales.equals(locales); 465 } 466 getInstance()467 public static synchronized ContactLocaleUtils getInstance() { 468 if (sSingleton == null) { 469 sSingleton = new ContactLocaleUtils(LocaleSet.newDefault()); 470 } 471 return sSingleton; 472 } 473 474 @VisibleForTesting setLocaleForTest(Locale... locales)475 public static synchronized void setLocaleForTest(Locale... locales) { 476 setLocales(LocaleSet.newForTest(locales)); 477 } 478 setLocales(LocaleSet locales)479 public static synchronized void setLocales(LocaleSet locales) { 480 if (sSingleton == null || !sSingleton.isLocale(locales)) { 481 sSingleton = new ContactLocaleUtils(locales); 482 } 483 } 484 getSortKey(String name, int nameStyle)485 public String getSortKey(String name, int nameStyle) { 486 return mUtils.getSortKey(name); 487 } 488 getBucketIndex(String name)489 public int getBucketIndex(String name) { 490 return mUtils.getBucketIndex(name); 491 } 492 getNumberBucketIndex()493 public int getNumberBucketIndex() { 494 return mUtils.getNumberBucketIndex(); 495 } 496 getBucketCount()497 public int getBucketCount() { 498 return mUtils.getBucketCount(); 499 } 500 getBucketLabel(int bucketIndex)501 public String getBucketLabel(int bucketIndex) { 502 return mUtils.getBucketLabel(bucketIndex); 503 } 504 getLabel(String name)505 public String getLabel(String name) { 506 return getBucketLabel(getBucketIndex(name)); 507 } 508 getLabels()509 public ArrayList<String> getLabels() { 510 return mUtils.getLabels(); 511 } 512 513 /** 514 * Determine which utility should be used for generating NameLookupKey. 515 * (ie, whether we generate Romaji or Pinyin lookup keys or not) 516 * 517 * Hiragana and Katakana are tagged as JAPANESE; Kanji is unclassified 518 * and tagged as CJK. For Hiragana/Katakana names, generate Romaji 519 * lookup keys when not in a Chinese or Korean locale. 520 * 521 * Otherwise, use the default behavior of that locale: 522 * a. For Japan, generate Romaji lookup keys for Hiragana/Katakana. 523 * b. For Simplified Chinese locale, generate Pinyin lookup keys. 524 */ getNameLookupKeys(String name, int nameStyle)525 public Iterator<String> getNameLookupKeys(String name, int nameStyle) { 526 if (!mLocales.isPrimaryLocaleCJK()) { 527 if (mLocales.shouldPreferSimplifiedChinese()) { 528 if (nameStyle == FullNameStyle.CHINESE || 529 nameStyle == FullNameStyle.CJK) { 530 return SimplifiedChineseContactUtils.getPinyinNameLookupKeys(name); 531 } 532 } else { 533 if (nameStyle == FullNameStyle.JAPANESE) { 534 return JapaneseContactUtils.getRomajiNameLookupKeys(name); 535 } 536 } 537 } 538 return mUtils.getNameLookupKeys(name, nameStyle); 539 } 540 541 } 542