1 /*
2  * Copyright (C) 2009 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License
15  */
16 package com.android.providers.contacts;
17 
18 import com.android.providers.contacts.util.Hex;
19 import com.google.common.annotations.VisibleForTesting;
20 
21 import java.text.CollationKey;
22 import java.text.Collator;
23 import java.text.RuleBasedCollator;
24 import java.util.Locale;
25 
26 /**
27  * Converts a name to a normalized form by removing all non-letter characters and normalizing
28  * UNICODE according to http://unicode.org/unicode/reports/tr15
29  */
30 public class NameNormalizer {
31 
32     private static final Object sCollatorLock = new Object();
33 
34     private static Locale sCollatorLocale;
35 
36     private static RuleBasedCollator sCachedCompressingCollator;
37     private static RuleBasedCollator sCachedComplexityCollator;
38 
39     /**
40      * Ensure that the cached collators are for the current locale.
41      */
ensureCollators()42     private static void ensureCollators() {
43         final Locale locale = Locale.getDefault();
44         if (locale.equals(sCollatorLocale)) {
45             return;
46         }
47         sCollatorLocale = locale;
48 
49         sCachedCompressingCollator = (RuleBasedCollator) Collator.getInstance(locale);
50         sCachedCompressingCollator.setStrength(Collator.PRIMARY);
51         sCachedCompressingCollator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
52 
53         sCachedComplexityCollator = (RuleBasedCollator) Collator.getInstance(locale);
54         sCachedComplexityCollator.setStrength(Collator.SECONDARY);
55     }
56 
57     @VisibleForTesting
getCompressingCollator()58     static RuleBasedCollator getCompressingCollator() {
59         synchronized (sCollatorLock) {
60             ensureCollators();
61             return sCachedCompressingCollator;
62         }
63     }
64 
65     @VisibleForTesting
getComplexityCollator()66     static RuleBasedCollator getComplexityCollator() {
67         synchronized (sCollatorLock) {
68             ensureCollators();
69             return sCachedComplexityCollator;
70         }
71     }
72 
73     /**
74      * Converts the supplied name to a string that can be used to perform approximate matching
75      * of names.  It ignores non-letter, non-digit characters, and removes accents.
76      */
normalize(String name)77     public static String normalize(String name) {
78         CollationKey key = getCompressingCollator().getCollationKey(lettersAndDigitsOnly(name));
79         return Hex.encodeHex(key.toByteArray(), true);
80     }
81 
82     /**
83      * Compares "complexity" of two names, which is determined by the presence
84      * of mixed case characters, accents and, if all else is equal, length.
85      */
compareComplexity(String name1, String name2)86     public static int compareComplexity(String name1, String name2) {
87         String clean1 = lettersAndDigitsOnly(name1);
88         String clean2 = lettersAndDigitsOnly(name2);
89         int diff = getComplexityCollator().compare(clean1, clean2);
90         if (diff != 0) {
91             return diff;
92         }
93         // compareTo sorts uppercase first. We know that there are no non-case
94         // differences from the above test, so we can negate here to get the
95         // lowercase-first comparison we really want...
96         diff = -clean1.compareTo(clean2);
97         if (diff != 0) {
98             return diff;
99         }
100         return name1.length() - name2.length();
101     }
102 
103     /**
104      * Returns a string containing just the letters and digits from the original string.
105      * Returns empty string if the original string is null.
106      */
lettersAndDigitsOnly(String name)107     private static String lettersAndDigitsOnly(String name) {
108         if (name == null) {
109             return "";
110         }
111         char[] letters = name.toCharArray();
112         int length = 0;
113         for (int i = 0; i < letters.length; i++) {
114             final char c = letters[i];
115             if (Character.isLetterOrDigit(c)) {
116                 letters[length++] = c;
117             }
118         }
119 
120         if (length != letters.length) {
121             return new String(letters, 0, length);
122         }
123 
124         return name;
125     }
126 }
127