1 /*
2  * Copyright (C) 2010 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License
15  */
16 
17 package com.android.providers.contacts;
18 
19 import android.provider.ContactsContract.FullNameStyle;
20 import android.provider.ContactsContract.PhoneticNameStyle;
21 import android.text.TextUtils;
22 import android.util.Log;
23 
24 import com.android.providers.contacts.HanziToPinyin.Token;
25 import com.google.common.annotations.VisibleForTesting;
26 
27 import java.lang.Character.UnicodeBlock;
28 import java.util.Arrays;
29 import java.util.ArrayList;
30 import java.util.Collections;
31 import java.util.HashMap;
32 import java.util.HashSet;
33 import java.util.Iterator;
34 import java.util.List;
35 import java.util.Locale;
36 import java.util.Map;
37 import java.util.Set;
38 
39 import libcore.icu.AlphabeticIndex;
40 import libcore.icu.AlphabeticIndex.ImmutableIndex;
41 import libcore.icu.Transliterator;
42 
43 /**
44  * This utility class provides specialized handling for locale specific
45  * information: labels, name lookup keys.
46  */
47 public class ContactLocaleUtils {
48     public static final String TAG = "ContactLocale";
49 
50     public static final Locale LOCALE_ARABIC = new Locale("ar");
51     public static final Locale LOCALE_GREEK = new Locale("el");
52     public static final Locale LOCALE_HEBREW = new Locale("he");
53     // Serbian and Ukrainian labels are complementary supersets of Russian
54     public static final Locale LOCALE_SERBIAN = new Locale("sr");
55     public static final Locale LOCALE_UKRAINIAN = new Locale("uk");
56     public static final Locale LOCALE_THAI = new Locale("th");
57 
58     /**
59      * This class is the default implementation and should be the base class
60      * for other locales.
61      *
62      * sortKey: same as name
63      * nameLookupKeys: none
64      * labels: uses ICU AlphabeticIndex for labels and extends by labeling
65      *     phone numbers "#".  Eg English labels are: [A-Z], #, " "
66      */
67     private static class ContactLocaleUtilsBase {
68         private static final String EMPTY_STRING = "";
69         private static final String NUMBER_STRING = "#";
70 
71         protected final ImmutableIndex mAlphabeticIndex;
72         private final int mAlphabeticIndexBucketCount;
73         private final int mNumberBucketIndex;
74         private final boolean mEnableSecondaryLocalePinyin;
75 
ContactLocaleUtilsBase(LocaleSet locales)76         public ContactLocaleUtilsBase(LocaleSet locales) {
77             // AlphabeticIndex.getBucketLabel() uses a binary search across
78             // the entire label set so care should be taken about growing this
79             // set too large. The following set determines for which locales
80             // we will show labels other than your primary locale. General rules
81             // of thumb for adding a locale: should be a supported locale; and
82             // should not be included if from a name it is not deterministic
83             // which way to label it (so eg Chinese cannot be added because
84             // the labeling of a Chinese character varies between Simplified,
85             // Traditional, and Japanese locales). Use English only for all
86             // Latin based alphabets. Ukrainian and Serbian are chosen for
87             // Cyrillic because their alphabets are complementary supersets
88             // of Russian.
89             final Locale secondaryLocale = locales.getSecondaryLocale();
90             mEnableSecondaryLocalePinyin = locales.isSecondaryLocaleSimplifiedChinese();
91             AlphabeticIndex ai = new AlphabeticIndex(locales.getPrimaryLocale())
92                 .setMaxLabelCount(300);
93             if (secondaryLocale != null) {
94                 ai.addLabels(secondaryLocale);
95             }
96             mAlphabeticIndex = ai.addLabels(Locale.ENGLISH)
97                 .addLabels(Locale.JAPANESE)
98                 .addLabels(Locale.KOREAN)
99                 .addLabels(LOCALE_THAI)
100                 .addLabels(LOCALE_ARABIC)
101                 .addLabels(LOCALE_HEBREW)
102                 .addLabels(LOCALE_GREEK)
103                 .addLabels(LOCALE_UKRAINIAN)
104                 .addLabels(LOCALE_SERBIAN)
105                 .getImmutableIndex();
106             mAlphabeticIndexBucketCount = mAlphabeticIndex.getBucketCount();
107             mNumberBucketIndex = mAlphabeticIndexBucketCount - 1;
108         }
109 
getSortKey(String name)110         public String getSortKey(String name) {
111             return name;
112         }
113 
114         /**
115          * Returns the bucket index for the specified string. AlphabeticIndex
116          * sorts strings into buckets numbered in order from 0 to N, where the
117          * exact value of N depends on how many representative index labels are
118          * used in a particular locale. This routine adds one additional bucket
119          * for phone numbers. It attempts to detect phone numbers and shifts
120          * the bucket indexes returned by AlphabeticIndex in order to make room
121          * for the new # bucket, so the returned range becomes 0 to N+1.
122          */
getBucketIndex(String name)123         public int getBucketIndex(String name) {
124             boolean prefixIsNumeric = false;
125             final int length = name.length();
126             int offset = 0;
127             while (offset < length) {
128                 int codePoint = Character.codePointAt(name, offset);
129                 // Ignore standard phone number separators and identify any
130                 // string that otherwise starts with a number.
131                 if (Character.isDigit(codePoint)) {
132                     prefixIsNumeric = true;
133                     break;
134                 } else if (!Character.isSpaceChar(codePoint) &&
135                            codePoint != '+' && codePoint != '(' &&
136                            codePoint != ')' && codePoint != '.' &&
137                            codePoint != '-' && codePoint != '#') {
138                     break;
139                 }
140                 offset += Character.charCount(codePoint);
141             }
142             if (prefixIsNumeric) {
143                 return mNumberBucketIndex;
144             }
145 
146             /**
147              * TODO: ICU 52 AlphabeticIndex doesn't support Simplified Chinese
148              * as a secondary locale. Remove the following if that is added.
149              */
150             if (mEnableSecondaryLocalePinyin) {
151                 name = HanziToPinyin.getInstance().transliterate(name);
152             }
153             final int bucket = mAlphabeticIndex.getBucketIndex(name);
154             if (bucket < 0) {
155                 return -1;
156             }
157             if (bucket >= mNumberBucketIndex) {
158                 return bucket + 1;
159             }
160             return bucket;
161         }
162 
163         /**
164          * Returns the number of buckets in use (one more than AlphabeticIndex
165          * uses, because this class adds a bucket for phone numbers).
166          */
getBucketCount()167         public int getBucketCount() {
168             return mAlphabeticIndexBucketCount + 1;
169         }
170 
171         /**
172          * Returns the label for the specified bucket index if a valid index,
173          * otherwise returns an empty string. '#' is returned for the phone
174          * number bucket; for all others, the AlphabeticIndex label is returned.
175          */
getBucketLabel(int bucketIndex)176         public String getBucketLabel(int bucketIndex) {
177             if (bucketIndex < 0 || bucketIndex >= getBucketCount()) {
178                 return EMPTY_STRING;
179             } else if (bucketIndex == mNumberBucketIndex) {
180                 return NUMBER_STRING;
181             } else if (bucketIndex > mNumberBucketIndex) {
182                 --bucketIndex;
183             }
184             return mAlphabeticIndex.getBucketLabel(bucketIndex);
185         }
186 
187         @SuppressWarnings("unused")
getNameLookupKeys(String name, int nameStyle)188         public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
189             return null;
190         }
191 
getLabels()192         public ArrayList<String> getLabels() {
193             final int bucketCount = getBucketCount();
194             final ArrayList<String> labels = new ArrayList<String>(bucketCount);
195             for(int i = 0; i < bucketCount; ++i) {
196                 labels.add(getBucketLabel(i));
197             }
198             return labels;
199         }
200     }
201 
202     /**
203      * Japanese specific locale overrides.
204      *
205      * sortKey: unchanged (same as name)
206      * nameLookupKeys: unchanged (none)
207      * labels: extends default labels by labeling unlabeled CJ characters
208      *     with the Japanese character 他 ("misc"). Japanese labels are:
209      *     あ, か, さ, た, な, は, ま, や, ら, わ, 他, [A-Z], #, " "
210      */
211     private static class JapaneseContactUtils extends ContactLocaleUtilsBase {
212         // \u4ed6 is Japanese character 他 ("misc")
213         private static final String JAPANESE_MISC_LABEL = "\u4ed6";
214         private final int mMiscBucketIndex;
215 
JapaneseContactUtils(LocaleSet locales)216         public JapaneseContactUtils(LocaleSet locales) {
217             super(locales);
218             // Determine which bucket AlphabeticIndex is lumping unclassified
219             // Japanese characters into by looking up the bucket index for
220             // a representative Kanji/CJK unified ideograph (\u65e5 is the
221             // character '日').
222             mMiscBucketIndex = super.getBucketIndex("\u65e5");
223         }
224 
225         // Set of UnicodeBlocks for unified CJK (Chinese) characters and
226         // Japanese characters. This includes all code blocks that might
227         // contain a character used in Japanese (which is why unified CJK
228         // blocks are included but Korean Hangul and jamo are not).
229         private static final Set<Character.UnicodeBlock> CJ_BLOCKS;
230         static {
231             Set<UnicodeBlock> set = new HashSet<UnicodeBlock>();
232             set.add(UnicodeBlock.HIRAGANA);
233             set.add(UnicodeBlock.KATAKANA);
234             set.add(UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS);
235             set.add(UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS);
236             set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS);
237             set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A);
238             set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B);
239             set.add(UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION);
240             set.add(UnicodeBlock.CJK_RADICALS_SUPPLEMENT);
241             set.add(UnicodeBlock.CJK_COMPATIBILITY);
242             set.add(UnicodeBlock.CJK_COMPATIBILITY_FORMS);
243             set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS);
244             set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT);
245             CJ_BLOCKS = Collections.unmodifiableSet(set);
246         }
247 
248         /**
249          * Helper routine to identify unlabeled Chinese or Japanese characters
250          * to put in a 'misc' bucket.
251          *
252          * @return true if the specified Unicode code point is Chinese or
253          *              Japanese
254          */
isChineseOrJapanese(int codePoint)255         private static boolean isChineseOrJapanese(int codePoint) {
256             return CJ_BLOCKS.contains(UnicodeBlock.of(codePoint));
257         }
258 
259         /**
260          * Returns the bucket index for the specified string. Adds an
261          * additional 'misc' bucket for Kanji characters to the base class set.
262          */
263         @Override
getBucketIndex(String name)264         public int getBucketIndex(String name) {
265             final int bucketIndex = super.getBucketIndex(name);
266             if ((bucketIndex == mMiscBucketIndex &&
267                  !isChineseOrJapanese(Character.codePointAt(name, 0))) ||
268                 bucketIndex > mMiscBucketIndex) {
269                 return bucketIndex + 1;
270             }
271             return bucketIndex;
272         }
273 
274         /**
275          * Returns the number of buckets in use (one more than the base class
276          * uses, because this class adds a bucket for Kanji).
277          */
278         @Override
getBucketCount()279         public int getBucketCount() {
280             return super.getBucketCount() + 1;
281         }
282 
283         /**
284          * Returns the label for the specified bucket index if a valid index,
285          * otherwise returns an empty string. '他' is returned for unclassified
286          * Kanji; for all others, the label determined by the base class is
287          * returned.
288          */
289         @Override
getBucketLabel(int bucketIndex)290         public String getBucketLabel(int bucketIndex) {
291             if (bucketIndex == mMiscBucketIndex) {
292                 return JAPANESE_MISC_LABEL;
293             } else if (bucketIndex > mMiscBucketIndex) {
294                 --bucketIndex;
295             }
296             return super.getBucketLabel(bucketIndex);
297         }
298 
299         @Override
getNameLookupKeys(String name, int nameStyle)300         public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
301             // Hiragana and Katakana will be positively identified as Japanese.
302             if (nameStyle == PhoneticNameStyle.JAPANESE) {
303                 return getRomajiNameLookupKeys(name);
304             }
305             return null;
306         }
307 
308         private static boolean mInitializedTransliterator;
309         private static Transliterator mJapaneseTransliterator;
310 
getJapaneseTransliterator()311         private static Transliterator getJapaneseTransliterator() {
312             synchronized(JapaneseContactUtils.class) {
313                 if (!mInitializedTransliterator) {
314                     mInitializedTransliterator = true;
315                     Transliterator t = null;
316                     try {
317                         t = new Transliterator("Hiragana-Latin; Katakana-Latin;"
318                                 + " Latin-Ascii");
319                     } catch (RuntimeException e) {
320                         Log.w(TAG, "Hiragana/Katakana-Latin transliterator data"
321                                 + " is missing");
322                     }
323                     mJapaneseTransliterator = t;
324                 }
325                 return mJapaneseTransliterator;
326             }
327         }
328 
getRomajiNameLookupKeys(String name)329         public static Iterator<String> getRomajiNameLookupKeys(String name) {
330             final Transliterator t = getJapaneseTransliterator();
331             if (t == null) {
332                 return null;
333             }
334             final String romajiName = t.transliterate(name);
335             if (TextUtils.isEmpty(romajiName) ||
336                     TextUtils.equals(name, romajiName)) {
337                 return null;
338             }
339             final HashSet<String> keys = new HashSet<String>();
340             keys.add(romajiName);
341             return keys.iterator();
342         }
343     }
344 
345     /**
346      * Simplified Chinese specific locale overrides. Uses ICU Transliterator
347      * for generating pinyin transliteration.
348      *
349      * sortKey: unchanged (same as name)
350      * nameLookupKeys: adds additional name lookup keys
351      *     - Chinese character's pinyin and pinyin's initial character.
352      *     - Latin word and initial character.
353      * labels: unchanged
354      *     Simplified Chinese labels are the same as English: [A-Z], #, " "
355      */
356     private static class SimplifiedChineseContactUtils
357         extends ContactLocaleUtilsBase {
SimplifiedChineseContactUtils(LocaleSet locales)358         public SimplifiedChineseContactUtils(LocaleSet locales) {
359             super(locales);
360         }
361 
362         @Override
getNameLookupKeys(String name, int nameStyle)363         public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
364             if (nameStyle != FullNameStyle.JAPANESE &&
365                     nameStyle != FullNameStyle.KOREAN) {
366                 return getPinyinNameLookupKeys(name);
367             }
368             return null;
369         }
370 
getPinyinNameLookupKeys(String name)371         public static Iterator<String> getPinyinNameLookupKeys(String name) {
372             // TODO : Reduce the object allocation.
373             HashSet<String> keys = new HashSet<String>();
374             ArrayList<Token> tokens = HanziToPinyin.getInstance().getTokens(name);
375             final int tokenCount = tokens.size();
376             final StringBuilder keyPinyin = new StringBuilder();
377             final StringBuilder keyInitial = new StringBuilder();
378             // There is no space among the Chinese Characters, the variant name
379             // lookup key wouldn't work for Chinese. The keyOriginal is used to
380             // build the lookup keys for itself.
381             final StringBuilder keyOriginal = new StringBuilder();
382             for (int i = tokenCount - 1; i >= 0; i--) {
383                 final Token token = tokens.get(i);
384                 if (Token.UNKNOWN == token.type) {
385                     continue;
386                 }
387                 if (Token.PINYIN == token.type) {
388                     keyPinyin.insert(0, token.target);
389                     keyInitial.insert(0, token.target.charAt(0));
390                 } else if (Token.LATIN == token.type) {
391                     // Avoid adding space at the end of String.
392                     if (keyPinyin.length() > 0) {
393                         keyPinyin.insert(0, ' ');
394                     }
395                     if (keyOriginal.length() > 0) {
396                         keyOriginal.insert(0, ' ');
397                     }
398                     keyPinyin.insert(0, token.source);
399                     keyInitial.insert(0, token.source.charAt(0));
400                 }
401                 keyOriginal.insert(0, token.source);
402                 keys.add(keyOriginal.toString());
403                 keys.add(keyPinyin.toString());
404                 keys.add(keyInitial.toString());
405             }
406             return keys.iterator();
407         }
408     }
409 
410     private static final String JAPANESE_LANGUAGE = Locale.JAPANESE.getLanguage().toLowerCase();
411 
412     private static ContactLocaleUtils sSingleton;
413 
414     private final LocaleSet mLocales;
415     private final ContactLocaleUtilsBase mUtils;
416 
ContactLocaleUtils(LocaleSet locales)417     private ContactLocaleUtils(LocaleSet locales) {
418         if (locales == null) {
419             mLocales = LocaleSet.getDefault();
420         } else {
421             mLocales = locales;
422         }
423         if (mLocales.isPrimaryLanguage(JAPANESE_LANGUAGE)) {
424             mUtils = new JapaneseContactUtils(mLocales);
425         } else if (mLocales.isPrimaryLocaleSimplifiedChinese()) {
426             mUtils = new SimplifiedChineseContactUtils(mLocales);
427         } else {
428             mUtils = new ContactLocaleUtilsBase(mLocales);
429         }
430         Log.i(TAG, "AddressBook Labels [" + mLocales.toString() + "]: "
431                 + getLabels().toString());
432     }
433 
isLocale(LocaleSet locales)434     public boolean isLocale(LocaleSet locales) {
435         return mLocales.equals(locales);
436     }
437 
getInstance()438     public static synchronized ContactLocaleUtils getInstance() {
439         if (sSingleton == null) {
440             sSingleton = new ContactLocaleUtils(LocaleSet.getDefault());
441         }
442         return sSingleton;
443     }
444 
445     @VisibleForTesting
setLocale(Locale locale)446     public static synchronized void setLocale(Locale locale) {
447         setLocales(new LocaleSet(locale));
448     }
449 
setLocales(LocaleSet locales)450     public static synchronized void setLocales(LocaleSet locales) {
451         if (sSingleton == null || !sSingleton.isLocale(locales)) {
452             sSingleton = new ContactLocaleUtils(locales);
453         }
454     }
455 
getSortKey(String name, int nameStyle)456     public String getSortKey(String name, int nameStyle) {
457         return mUtils.getSortKey(name);
458     }
459 
getBucketIndex(String name)460     public int getBucketIndex(String name) {
461         return mUtils.getBucketIndex(name);
462     }
463 
getBucketCount()464     public int getBucketCount() {
465         return mUtils.getBucketCount();
466     }
467 
getBucketLabel(int bucketIndex)468     public String getBucketLabel(int bucketIndex) {
469         return mUtils.getBucketLabel(bucketIndex);
470     }
471 
getLabel(String name)472     public String getLabel(String name) {
473         return getBucketLabel(getBucketIndex(name));
474     }
475 
getLabels()476     public ArrayList<String> getLabels() {
477         return mUtils.getLabels();
478     }
479 
480     /**
481      *  Determine which utility should be used for generating NameLookupKey.
482      *  (ie, whether we generate Romaji or Pinyin lookup keys or not)
483      *
484      *  Hiragana and Katakana are tagged as JAPANESE; Kanji is unclassified
485      *  and tagged as CJK. For Hiragana/Katakana names, generate Romaji
486      *  lookup keys when not in a Chinese or Korean locale.
487      *
488      *  Otherwise, use the default behavior of that locale:
489      *  a. For Japan, generate Romaji lookup keys for Hiragana/Katakana.
490      *  b. For Simplified Chinese locale, generate Pinyin lookup keys.
491      */
getNameLookupKeys(String name, int nameStyle)492     public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
493         if (!mLocales.isPrimaryLocaleCJK()) {
494             if (mLocales.isSecondaryLocaleSimplifiedChinese()) {
495                 if (nameStyle == FullNameStyle.CHINESE ||
496                         nameStyle == FullNameStyle.CJK) {
497                     return SimplifiedChineseContactUtils.getPinyinNameLookupKeys(name);
498                 }
499             } else {
500                 if (nameStyle == FullNameStyle.JAPANESE) {
501                     return JapaneseContactUtils.getRomajiNameLookupKeys(name);
502                 }
503             }
504         }
505         return mUtils.getNameLookupKeys(name, nameStyle);
506     }
507 
508 }
509