1 /*
2  * Copyright (C) 2010 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License
15  */
16 
17 package com.android.providers.contacts;
18 
19 import android.icu.text.AlphabeticIndex;
20 import android.icu.text.AlphabeticIndex.ImmutableIndex;
21 import android.icu.text.Transliterator;
22 import android.provider.ContactsContract.FullNameStyle;
23 import android.provider.ContactsContract.PhoneticNameStyle;
24 import android.os.LocaleList;
25 import android.text.TextUtils;
26 import android.util.ArraySet;
27 import android.util.Log;
28 
29 import com.android.providers.contacts.HanziToPinyin.Token;
30 import com.google.common.annotations.VisibleForTesting;
31 
32 import java.lang.Character.UnicodeBlock;
33 import java.util.ArrayList;
34 import java.util.Collections;
35 import java.util.HashSet;
36 import java.util.Iterator;
37 import java.util.Locale;
38 import java.util.Set;
39 
40 
41 /**
42  * This utility class provides specialized handling for locale specific
43  * information: labels, name lookup keys.
44  */
45 public class ContactLocaleUtils {
46     public static final String TAG = "ContactLocale";
47 
48     public static final Locale LOCALE_ARABIC = new Locale("ar");
49     public static final Locale LOCALE_GREEK = new Locale("el");
50     public static final Locale LOCALE_HEBREW = new Locale("he");
51     // Serbian and Ukrainian labels are complementary supersets of Russian
52     public static final Locale LOCALE_SERBIAN = new Locale("sr");
53     public static final Locale LOCALE_UKRAINIAN = new Locale("uk");
54     public static final Locale LOCALE_THAI = new Locale("th");
55 
56     // -- Note for adding locales to sDefaultLabelLocales --
57     //
58     // AlphabeticIndex.getBucketLabel() uses a binary search across
59     // the entire label set so care should be taken about growing this
60     // set too large. The following set determines for which locales
61     // we will show labels other than your primary locale. General rules
62     // of thumb for adding a locale: should be a supported locale; and
63     // should not be included if from a name it is not deterministic
64     // which way to label it (so eg Chinese cannot be added because
65     // the labeling of a Chinese character varies between Simplified,
66     // Traditional, and Japanese locales). Use English only for all
67     // Latin based alphabets. Ukrainian and Serbian are chosen for
68     // Cyrillic because their alphabets are complementary supersets
69     // of Russian.
70     private static final Locale[] sDefaultLabelLocales = new Locale[]{
71             Locale.ENGLISH,
72             Locale.JAPANESE,
73             Locale.KOREAN,
74             LOCALE_THAI,
75             LOCALE_ARABIC,
76             LOCALE_HEBREW,
77             LOCALE_GREEK,
78             LOCALE_UKRAINIAN,
79             LOCALE_SERBIAN,
80     };
81 
82     /**
83      * This class is the default implementation and should be the base class
84      * for other locales.
85      *
86      * sortKey: same as name
87      * nameLookupKeys: none
88      * labels: uses ICU AlphabeticIndex for labels and extends by labeling
89      *     phone numbers "#".  Eg English labels are: [A-Z], #, " "
90      */
91     private static class ContactLocaleUtilsBase {
92         private static final String EMPTY_STRING = "";
93         private static final String NUMBER_STRING = "#";
94 
95         protected final ImmutableIndex mAlphabeticIndex;
96         private final int mAlphabeticIndexBucketCount;
97         private final int mNumberBucketIndex;
98         private final boolean mUsePinyinTransliterator;
99 
ContactLocaleUtilsBase(LocaleSet locales)100         public ContactLocaleUtilsBase(LocaleSet locales) {
101             mUsePinyinTransliterator = locales.shouldPreferSimplifiedChinese();
102 
103             final ArraySet<Locale> addedLocales = new ArraySet<>();
104 
105             // First, add from the primary locale (which may not be the first locale in the locale
106             // list).
107             AlphabeticIndex ai = new AlphabeticIndex(locales.getPrimaryLocale())
108                     .setMaxLabelCount(300);
109             addedLocales.add(locales.getPrimaryLocale());
110 
111             // Next, add all locale form the locale list.
112             final LocaleList localeList = locales.getAllLocales();
113             for (int i = 0; i < localeList.size(); i++) {
114                 addLabels(ai, localeList.get(i), addedLocales);
115             }
116             // Then add the default locales.
117             for (int i = 0; i < sDefaultLabelLocales.length; i++) {
118                 addLabels(ai, sDefaultLabelLocales[i], addedLocales);
119             }
120             mAlphabeticIndex = ai.buildImmutableIndex();
121             mAlphabeticIndexBucketCount = mAlphabeticIndex.getBucketCount();
122             mNumberBucketIndex = mAlphabeticIndexBucketCount - 1;
123         }
124 
addLabels( AlphabeticIndex ai, Locale locale, ArraySet<Locale> addedLocales)125         private static void addLabels(
126                 AlphabeticIndex ai, Locale locale, ArraySet<Locale> addedLocales) {
127             if (addedLocales.contains(locale)) {
128                 return;
129             }
130             ai.addLabels(locale);
131             addedLocales.add(locale);
132         }
133 
getSortKey(String name)134         public String getSortKey(String name) {
135             return name;
136         }
137 
getNumberBucketIndex()138         public int getNumberBucketIndex() {
139             return mNumberBucketIndex;
140         }
141 
142         /**
143          * Returns the bucket index for the specified string. AlphabeticIndex
144          * sorts strings into buckets numbered in order from 0 to N, where the
145          * exact value of N depends on how many representative index labels are
146          * used in a particular locale. This routine adds one additional bucket
147          * for phone numbers. It attempts to detect phone numbers and shifts
148          * the bucket indexes returned by AlphabeticIndex in order to make room
149          * for the new # bucket, so the returned range becomes 0 to N+1.
150          */
getBucketIndex(String name)151         public int getBucketIndex(String name) {
152             boolean prefixIsNumeric = false;
153             final int length = name.length();
154             int offset = 0;
155             while (offset < length) {
156                 int codePoint = Character.codePointAt(name, offset);
157                 // Ignore standard phone number separators and identify any
158                 // string that otherwise starts with a number.
159                 if (Character.isDigit(codePoint)) {
160                     prefixIsNumeric = true;
161                     break;
162                 } else if (!Character.isSpaceChar(codePoint) &&
163                            codePoint != '+' && codePoint != '(' &&
164                            codePoint != ')' && codePoint != '.' &&
165                            codePoint != '-' && codePoint != '#') {
166                     break;
167                 }
168                 offset += Character.charCount(codePoint);
169             }
170             if (prefixIsNumeric) {
171                 return mNumberBucketIndex;
172             }
173 
174             /**
175              * ICU 55 AlphabeticIndex doesn't support Simplified Chinese
176              * as a secondary locale so it is necessary to use the
177              * Pinyin transliterator. We also use this for a Simplified
178              * Chinese primary locale because it gives more accurate letter
179              * buckets. b/19835686
180              */
181             if (mUsePinyinTransliterator) {
182                 name = HanziToPinyin.getInstance().transliterate(name);
183             }
184             final int bucket = mAlphabeticIndex.getBucketIndex(name);
185             if (bucket < 0) {
186                 return -1;
187             }
188             if (bucket >= mNumberBucketIndex) {
189                 return bucket + 1;
190             }
191             return bucket;
192         }
193 
194         /**
195          * Returns the number of buckets in use (one more than AlphabeticIndex
196          * uses, because this class adds a bucket for phone numbers).
197          */
getBucketCount()198         public int getBucketCount() {
199             return mAlphabeticIndexBucketCount + 1;
200         }
201 
202         /**
203          * Returns the label for the specified bucket index if a valid index,
204          * otherwise returns an empty string. '#' is returned for the phone
205          * number bucket; for all others, the AlphabeticIndex label is returned.
206          */
getBucketLabel(int bucketIndex)207         public String getBucketLabel(int bucketIndex) {
208             if (bucketIndex < 0 || bucketIndex >= getBucketCount()) {
209                 return EMPTY_STRING;
210             } else if (bucketIndex == mNumberBucketIndex) {
211                 return NUMBER_STRING;
212             } else if (bucketIndex > mNumberBucketIndex) {
213                 --bucketIndex;
214             }
215             return mAlphabeticIndex.getBucket(bucketIndex).getLabel();
216         }
217 
218         @SuppressWarnings("unused")
getNameLookupKeys(String name, int nameStyle)219         public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
220             return null;
221         }
222 
getLabels()223         public ArrayList<String> getLabels() {
224             final int bucketCount = getBucketCount();
225             final ArrayList<String> labels = new ArrayList<String>(bucketCount);
226             for(int i = 0; i < bucketCount; ++i) {
227                 labels.add(getBucketLabel(i));
228             }
229             return labels;
230         }
231     }
232 
233     /**
234      * Japanese specific locale overrides.
235      *
236      * sortKey: unchanged (same as name)
237      * nameLookupKeys: unchanged (none)
238      * labels: extends default labels by labeling unlabeled CJ characters
239      *     with the Japanese character 他 ("misc"). Japanese labels are:
240      *     あ, か, さ, た, な, は, ま, や, ら, わ, 他, [A-Z], #, " "
241      */
242     private static class JapaneseContactUtils extends ContactLocaleUtilsBase {
243         // \u4ed6 is Japanese character 他 ("misc")
244         private static final String JAPANESE_MISC_LABEL = "\u4ed6";
245         private final int mMiscBucketIndex;
246 
JapaneseContactUtils(LocaleSet locales)247         public JapaneseContactUtils(LocaleSet locales) {
248             super(locales);
249             // Determine which bucket AlphabeticIndex is lumping unclassified
250             // Japanese characters into by looking up the bucket index for
251             // a representative Kanji/CJK unified ideograph (\u65e5 is the
252             // character '日').
253             mMiscBucketIndex = super.getBucketIndex("\u65e5");
254         }
255 
256         // Set of UnicodeBlocks for unified CJK (Chinese) characters and
257         // Japanese characters. This includes all code blocks that might
258         // contain a character used in Japanese (which is why unified CJK
259         // blocks are included but Korean Hangul and jamo are not).
260         private static final Set<Character.UnicodeBlock> CJ_BLOCKS;
261         static {
262             Set<UnicodeBlock> set = new HashSet<UnicodeBlock>();
263             set.add(UnicodeBlock.HIRAGANA);
264             set.add(UnicodeBlock.KATAKANA);
265             set.add(UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS);
266             set.add(UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS);
267             set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS);
268             set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A);
269             set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B);
270             set.add(UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION);
271             set.add(UnicodeBlock.CJK_RADICALS_SUPPLEMENT);
272             set.add(UnicodeBlock.CJK_COMPATIBILITY);
273             set.add(UnicodeBlock.CJK_COMPATIBILITY_FORMS);
274             set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS);
275             set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT);
276             CJ_BLOCKS = Collections.unmodifiableSet(set);
277         }
278 
279         /**
280          * Helper routine to identify unlabeled Chinese or Japanese characters
281          * to put in a 'misc' bucket.
282          *
283          * @return true if the specified Unicode code point is Chinese or
284          *              Japanese
285          */
isChineseOrJapanese(int codePoint)286         private static boolean isChineseOrJapanese(int codePoint) {
287             return CJ_BLOCKS.contains(UnicodeBlock.of(codePoint));
288         }
289 
290         /**
291          * Returns the bucket index for the specified string. Adds an
292          * additional 'misc' bucket for Kanji characters to the base class set.
293          */
294         @Override
getBucketIndex(String name)295         public int getBucketIndex(String name) {
296             final int bucketIndex = super.getBucketIndex(name);
297             if ((bucketIndex == mMiscBucketIndex &&
298                  !isChineseOrJapanese(Character.codePointAt(name, 0))) ||
299                 bucketIndex > mMiscBucketIndex) {
300                 return bucketIndex + 1;
301             }
302             return bucketIndex;
303         }
304 
305         /**
306          * Returns the number of buckets in use (one more than the base class
307          * uses, because this class adds a bucket for Kanji).
308          */
309         @Override
getBucketCount()310         public int getBucketCount() {
311             return super.getBucketCount() + 1;
312         }
313 
314         /**
315          * Returns the label for the specified bucket index if a valid index,
316          * otherwise returns an empty string. '他' is returned for unclassified
317          * Kanji; for all others, the label determined by the base class is
318          * returned.
319          */
320         @Override
getBucketLabel(int bucketIndex)321         public String getBucketLabel(int bucketIndex) {
322             if (bucketIndex == mMiscBucketIndex) {
323                 return JAPANESE_MISC_LABEL;
324             } else if (bucketIndex > mMiscBucketIndex) {
325                 --bucketIndex;
326             }
327             return super.getBucketLabel(bucketIndex);
328         }
329 
330         @Override
getNameLookupKeys(String name, int nameStyle)331         public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
332             // Hiragana and Katakana will be positively identified as Japanese.
333             if (nameStyle == PhoneticNameStyle.JAPANESE) {
334                 return getRomajiNameLookupKeys(name);
335             }
336             return null;
337         }
338 
339         private static boolean mInitializedTransliterator;
340         private static Transliterator mJapaneseTransliterator;
341 
getJapaneseTransliterator()342         private static Transliterator getJapaneseTransliterator() {
343             synchronized(JapaneseContactUtils.class) {
344                 if (!mInitializedTransliterator) {
345                     mInitializedTransliterator = true;
346                     Transliterator t = null;
347                     try {
348                         t = Transliterator.getInstance("Hiragana-Latin; Katakana-Latin;"
349                                 + " Latin-Ascii");
350                     } catch (IllegalArgumentException e) {
351                         Log.w(TAG, "Hiragana/Katakana-Latin transliterator data"
352                                 + " is missing");
353                     }
354                     mJapaneseTransliterator = t;
355                 }
356                 return mJapaneseTransliterator;
357             }
358         }
359 
getRomajiNameLookupKeys(String name)360         public static Iterator<String> getRomajiNameLookupKeys(String name) {
361             final Transliterator t = getJapaneseTransliterator();
362             if (t == null) {
363                 return null;
364             }
365             final String romajiName = t.transliterate(name);
366             if (TextUtils.isEmpty(romajiName) ||
367                     TextUtils.equals(name, romajiName)) {
368                 return null;
369             }
370             final HashSet<String> keys = new HashSet<String>();
371             keys.add(romajiName);
372             return keys.iterator();
373         }
374     }
375 
376     /**
377      * Simplified Chinese specific locale overrides. Uses ICU Transliterator
378      * for generating pinyin transliteration.
379      *
380      * sortKey: unchanged (same as name)
381      * nameLookupKeys: adds additional name lookup keys
382      *     - Chinese character's pinyin and pinyin's initial character.
383      *     - Latin word and initial character.
384      * labels: unchanged
385      *     Simplified Chinese labels are the same as English: [A-Z], #, " "
386      */
387     private static class SimplifiedChineseContactUtils
388         extends ContactLocaleUtilsBase {
SimplifiedChineseContactUtils(LocaleSet locales)389         public SimplifiedChineseContactUtils(LocaleSet locales) {
390             super(locales);
391         }
392 
393         @Override
getNameLookupKeys(String name, int nameStyle)394         public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
395             if (nameStyle != FullNameStyle.JAPANESE &&
396                     nameStyle != FullNameStyle.KOREAN) {
397                 return getPinyinNameLookupKeys(name);
398             }
399             return null;
400         }
401 
getPinyinNameLookupKeys(String name)402         public static Iterator<String> getPinyinNameLookupKeys(String name) {
403             // TODO : Reduce the object allocation.
404             HashSet<String> keys = new HashSet<String>();
405             ArrayList<Token> tokens = HanziToPinyin.getInstance().getTokens(name);
406             final int tokenCount = tokens.size();
407             final StringBuilder keyPinyin = new StringBuilder();
408             final StringBuilder keyInitial = new StringBuilder();
409             // There is no space among the Chinese Characters, the variant name
410             // lookup key wouldn't work for Chinese. The keyOriginal is used to
411             // build the lookup keys for itself.
412             final StringBuilder keyOriginal = new StringBuilder();
413             for (int i = tokenCount - 1; i >= 0; i--) {
414                 final Token token = tokens.get(i);
415                 if (Token.UNKNOWN == token.type) {
416                     continue;
417                 }
418                 if (Token.PINYIN == token.type) {
419                     keyPinyin.insert(0, token.target);
420                     keyInitial.insert(0, token.target.charAt(0));
421                 } else if (Token.LATIN == token.type) {
422                     // Avoid adding space at the end of String.
423                     if (keyPinyin.length() > 0) {
424                         keyPinyin.insert(0, ' ');
425                     }
426                     if (keyOriginal.length() > 0) {
427                         keyOriginal.insert(0, ' ');
428                     }
429                     keyPinyin.insert(0, token.source);
430                     keyInitial.insert(0, token.source.charAt(0));
431                 }
432                 keyOriginal.insert(0, token.source);
433                 keys.add(keyOriginal.toString());
434                 keys.add(keyPinyin.toString());
435                 keys.add(keyInitial.toString());
436             }
437             return keys.iterator();
438         }
439     }
440 
441     private static ContactLocaleUtils sSingleton;
442 
443     private final LocaleSet mLocales;
444     private final ContactLocaleUtilsBase mUtils;
445 
ContactLocaleUtils(LocaleSet locales)446     private ContactLocaleUtils(LocaleSet locales) {
447         if (locales == null) {
448             mLocales = LocaleSet.newDefault();
449         } else {
450             mLocales = locales;
451         }
452         if (mLocales.shouldPreferJapanese()) {
453             mUtils = new JapaneseContactUtils(mLocales);
454         } else if (mLocales.shouldPreferSimplifiedChinese()) {
455             mUtils = new SimplifiedChineseContactUtils(mLocales);
456         } else {
457             mUtils = new ContactLocaleUtilsBase(mLocales);
458         }
459         Log.i(TAG, "AddressBook Labels [" + mLocales.toString() + "]: "
460                 + getLabels().toString());
461     }
462 
isLocale(LocaleSet locales)463     public boolean isLocale(LocaleSet locales) {
464         return mLocales.equals(locales);
465     }
466 
getInstance()467     public static synchronized ContactLocaleUtils getInstance() {
468         if (sSingleton == null) {
469             sSingleton = new ContactLocaleUtils(LocaleSet.newDefault());
470         }
471         return sSingleton;
472     }
473 
474     @VisibleForTesting
setLocaleForTest(Locale... locales)475     public static synchronized void setLocaleForTest(Locale... locales) {
476         setLocales(LocaleSet.newForTest(locales));
477     }
478 
setLocales(LocaleSet locales)479     public static synchronized void setLocales(LocaleSet locales) {
480         if (sSingleton == null || !sSingleton.isLocale(locales)) {
481             sSingleton = new ContactLocaleUtils(locales);
482         }
483     }
484 
getSortKey(String name, int nameStyle)485     public String getSortKey(String name, int nameStyle) {
486         return mUtils.getSortKey(name);
487     }
488 
getBucketIndex(String name)489     public int getBucketIndex(String name) {
490         return mUtils.getBucketIndex(name);
491     }
492 
getNumberBucketIndex()493     public int getNumberBucketIndex() {
494         return mUtils.getNumberBucketIndex();
495     }
496 
getBucketCount()497     public int getBucketCount() {
498         return mUtils.getBucketCount();
499     }
500 
getBucketLabel(int bucketIndex)501     public String getBucketLabel(int bucketIndex) {
502         return mUtils.getBucketLabel(bucketIndex);
503     }
504 
getLabel(String name)505     public String getLabel(String name) {
506         return getBucketLabel(getBucketIndex(name));
507     }
508 
getLabels()509     public ArrayList<String> getLabels() {
510         return mUtils.getLabels();
511     }
512 
513     /**
514      *  Determine which utility should be used for generating NameLookupKey.
515      *  (ie, whether we generate Romaji or Pinyin lookup keys or not)
516      *
517      *  Hiragana and Katakana are tagged as JAPANESE; Kanji is unclassified
518      *  and tagged as CJK. For Hiragana/Katakana names, generate Romaji
519      *  lookup keys when not in a Chinese or Korean locale.
520      *
521      *  Otherwise, use the default behavior of that locale:
522      *  a. For Japan, generate Romaji lookup keys for Hiragana/Katakana.
523      *  b. For Simplified Chinese locale, generate Pinyin lookup keys.
524      */
getNameLookupKeys(String name, int nameStyle)525     public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
526         if (!mLocales.isPrimaryLocaleCJK()) {
527             if (mLocales.shouldPreferSimplifiedChinese()) {
528                 if (nameStyle == FullNameStyle.CHINESE ||
529                         nameStyle == FullNameStyle.CJK) {
530                     return SimplifiedChineseContactUtils.getPinyinNameLookupKeys(name);
531                 }
532             } else {
533                 if (nameStyle == FullNameStyle.JAPANESE) {
534                     return JapaneseContactUtils.getRomajiNameLookupKeys(name);
535                 }
536             }
537         }
538         return mUtils.getNameLookupKeys(name, nameStyle);
539     }
540 
541 }
542