1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License
15  */
16 package com.android.providers.contacts;
17 
18 import android.content.ContentValues;
19 import android.database.Cursor;
20 import android.database.sqlite.SQLiteDatabase;
21 import android.os.SystemClock;
22 import android.provider.ContactsContract.CommonDataKinds.Email;
23 import android.provider.ContactsContract.CommonDataKinds.Nickname;
24 import android.provider.ContactsContract.CommonDataKinds.Organization;
25 import android.provider.ContactsContract.CommonDataKinds.StructuredPostal;
26 import android.provider.ContactsContract.Data;
27 import android.provider.ContactsContract.ProviderStatus;
28 import android.provider.ContactsContract.RawContacts;
29 import android.text.TextUtils;
30 import android.util.Log;
31 
32 import com.android.providers.contacts.ContactsDatabaseHelper.DataColumns;
33 import com.android.providers.contacts.ContactsDatabaseHelper.MimetypesColumns;
34 import com.android.providers.contacts.ContactsDatabaseHelper.RawContactsColumns;
35 import com.android.providers.contacts.ContactsDatabaseHelper.SearchIndexColumns;
36 import com.android.providers.contacts.ContactsDatabaseHelper.Tables;
37 import com.google.android.collect.Lists;
38 import com.google.common.annotations.VisibleForTesting;
39 
40 import java.util.ArrayList;
41 import java.util.HashSet;
42 import java.util.List;
43 import java.util.Set;
44 import java.util.regex.Pattern;
45 
46 /**
47  * Maintains a search index for comprehensive contact search.
48  */
49 public class SearchIndexManager {
50     private static final String TAG = "ContactsFTS";
51 
52     private static final boolean VERBOSE_LOGGING = Log.isLoggable(TAG, Log.VERBOSE);
53 
54     public static final String PROPERTY_SEARCH_INDEX_VERSION = "search_index";
55     private static final int SEARCH_INDEX_VERSION = 1;
56 
57     private static final class ContactIndexQuery {
58         public static final String[] COLUMNS = {
59                 Data.CONTACT_ID,
60                 MimetypesColumns.MIMETYPE,
61                 Data.DATA1, Data.DATA2, Data.DATA3, Data.DATA4, Data.DATA5,
62                 Data.DATA6, Data.DATA7, Data.DATA8, Data.DATA9, Data.DATA10, Data.DATA11,
63                 Data.DATA12, Data.DATA13, Data.DATA14
64         };
65 
66         public static final int MIMETYPE = 1;
67     }
68 
69     public static class IndexBuilder {
70         public static final int SEPARATOR_SPACE = 0;
71         public static final int SEPARATOR_PARENTHESES = 1;
72         public static final int SEPARATOR_SLASH = 2;
73         public static final int SEPARATOR_COMMA = 3;
74 
75         private StringBuilder mSbContent = new StringBuilder();
76         private StringBuilder mSbName = new StringBuilder();
77         private StringBuilder mSbTokens = new StringBuilder();
78         private StringBuilder mSbElementContent = new StringBuilder();
79         private HashSet<String> mUniqueElements = new HashSet<String>();
80         private Cursor mCursor;
81 
setCursor(Cursor cursor)82         void setCursor(Cursor cursor) {
83             this.mCursor = cursor;
84         }
85 
reset()86         void reset() {
87             mSbContent.setLength(0);
88             mSbTokens.setLength(0);
89             mSbName.setLength(0);
90             mSbElementContent.setLength(0);
91             mUniqueElements.clear();
92         }
93 
getContent()94         public String getContent() {
95             return mSbContent.length() == 0 ? null : mSbContent.toString();
96         }
97 
getName()98         public String getName() {
99             return mSbName.length() == 0 ? null : mSbName.toString();
100         }
101 
getTokens()102         public String getTokens() {
103             return mSbTokens.length() == 0 ? null : mSbTokens.toString();
104         }
105 
getString(String columnName)106         public String getString(String columnName) {
107             return mCursor.getString(mCursor.getColumnIndex(columnName));
108         }
109 
getInt(String columnName)110         public int getInt(String columnName) {
111             return mCursor.getInt(mCursor.getColumnIndex(columnName));
112         }
113 
114         @Override
toString()115         public String toString() {
116             return "Content: " + mSbContent + "\n Name: " + mSbName + "\n Tokens: " + mSbTokens;
117         }
118 
commit()119         public void commit() {
120             if (mSbElementContent.length() != 0) {
121                 String content = mSbElementContent.toString().replace('\n', ' ');
122                 if (!mUniqueElements.contains(content)) {
123                     if (mSbContent.length() != 0) {
124                         mSbContent.append('\n');
125                     }
126                     mSbContent.append(content);
127                     mUniqueElements.add(content);
128                 }
129                 mSbElementContent.setLength(0);
130             }
131         }
132 
appendContentFromColumn(String columnName)133         public void appendContentFromColumn(String columnName) {
134             appendContentFromColumn(columnName, SEPARATOR_SPACE);
135         }
136 
appendContentFromColumn(String columnName, int format)137         public void appendContentFromColumn(String columnName, int format) {
138             appendContent(getString(columnName), format);
139         }
140 
appendContent(String value)141         public void appendContent(String value) {
142             appendContent(value, SEPARATOR_SPACE);
143         }
144 
appendContent(String value, int format)145         private void appendContent(String value, int format) {
146             if (TextUtils.isEmpty(value)) {
147                 return;
148             }
149 
150             switch (format) {
151                 case SEPARATOR_SPACE:
152                     if (mSbElementContent.length() > 0) {
153                         mSbElementContent.append(' ');
154                     }
155                     mSbElementContent.append(value);
156                     break;
157 
158                 case SEPARATOR_SLASH:
159                     mSbElementContent.append('/').append(value);
160                     break;
161 
162                 case SEPARATOR_PARENTHESES:
163                     if (mSbElementContent.length() > 0) {
164                         mSbElementContent.append(' ');
165                     }
166                     mSbElementContent.append('(').append(value).append(')');
167                     break;
168 
169                 case SEPARATOR_COMMA:
170                     if (mSbElementContent.length() > 0) {
171                         mSbElementContent.append(", ");
172                     }
173                     mSbElementContent.append(value);
174                     break;
175             }
176         }
177 
appendToken(String token)178         public void appendToken(String token) {
179             if (TextUtils.isEmpty(token)) {
180                 return;
181             }
182 
183             if (mSbTokens.length() != 0) {
184                 mSbTokens.append(' ');
185             }
186             mSbTokens.append(token);
187         }
188 
appendNameFromColumn(String columnName)189         public void appendNameFromColumn(String columnName) {
190             appendName(getString(columnName));
191         }
192 
appendName(String name)193         public void appendName(String name) {
194             if (TextUtils.isEmpty(name)) {
195                 return;
196             }
197             // First, put the original name.
198             appendNameInternal(name);
199 
200             // Then, if the name contains more than one FTS token, put each token into the index
201             // too.
202             //
203             // This is to make names with special characters searchable, such as "double-barrelled"
204             // "L'Image".
205             //
206             // Here's how it works:
207             // Because we "normalize" names when putting into the index, if we only put
208             // "double-barrelled", the index will only contain "doublebarrelled".
209             // Now, if the user searches for "double-barrelled", the searcher tokenizes it into
210             // two tokens, "double" and "barrelled".  The first one matches "doublebarrelled"
211             // but the second one doesn't (because we only do the prefix match), so
212             // "doublebarrelled" doesn't match.
213             // So, here, we put each token in a name into the index too.  In the case above,
214             // we put also "double" and "barrelled".
215             // With this, queries such as "double-barrelled", "double barrelled", "doublebarrelled"
216             // will all match "double-barrelled".
217             final List<String> nameParts = splitIntoFtsTokens(name);
218             if (nameParts.size() > 1) {
219                 for (String namePart : nameParts) {
220                     if (!TextUtils.isEmpty(namePart)) {
221                         appendNameInternal(namePart);
222                     }
223                 }
224             }
225         }
226 
227         /**
228          * Normalize a name and add to {@link #mSbName}
229          */
appendNameInternal(String name)230         private void appendNameInternal(String name) {
231             if (mSbName.length() != 0) {
232                 mSbName.append(' ');
233             }
234             mSbName.append(NameNormalizer.normalize(name));
235         }
236     }
237 
238     private final ContactsProvider2 mContactsProvider;
239     private final ContactsDatabaseHelper mDbHelper;
240     private StringBuilder mSb = new StringBuilder();
241     private IndexBuilder mIndexBuilder = new IndexBuilder();
242     private ContentValues mValues = new ContentValues();
243     private String[] mSelectionArgs1 = new String[1];
244 
SearchIndexManager(ContactsProvider2 contactsProvider)245     public SearchIndexManager(ContactsProvider2 contactsProvider) {
246         this.mContactsProvider = contactsProvider;
247         mDbHelper = (ContactsDatabaseHelper) mContactsProvider.getDatabaseHelper();
248     }
249 
updateIndex(boolean force)250     public void updateIndex(boolean force) {
251         if (force) {
252             setSearchIndexVersion(0);
253         } else {
254             if (getSearchIndexVersion() == SEARCH_INDEX_VERSION) {
255                 return;
256             }
257         }
258         SQLiteDatabase db = mDbHelper.getWritableDatabase();
259         db.beginTransaction();
260         try {
261             // We do a version check again, because the version might have been modified after
262             // the first check.  We need to do the check again in a transaction to make sure.
263             if (getSearchIndexVersion() != SEARCH_INDEX_VERSION) {
264                 rebuildIndex(db);
265                 setSearchIndexVersion(SEARCH_INDEX_VERSION);
266                 db.setTransactionSuccessful();
267             }
268         } finally {
269             db.endTransaction();
270         }
271     }
272 
rebuildIndex(SQLiteDatabase db)273     private void rebuildIndex(SQLiteDatabase db) {
274         mContactsProvider.setProviderStatus(ContactsProvider2.STATUS_UPGRADING);
275         final long start = SystemClock.elapsedRealtime();
276         int count = 0;
277         try {
278             mDbHelper.createSearchIndexTable(db, true);
279             count = buildAndInsertIndex(db, null);
280         } finally {
281             mContactsProvider.setProviderStatus(ContactsProvider2.STATUS_NORMAL);
282 
283             final long end = SystemClock.elapsedRealtime();
284             Log.i(TAG, "Rebuild contact search index in " + (end - start) + "ms, "
285                     + count + " contacts");
286         }
287     }
288 
updateIndexForRawContacts(Set<Long> contactIds, Set<Long> rawContactIds)289     public void updateIndexForRawContacts(Set<Long> contactIds, Set<Long> rawContactIds) {
290         if (VERBOSE_LOGGING) {
291             Log.v(TAG, "Updating search index for " + contactIds.size() +
292                     " contacts / " + rawContactIds.size() + " raw contacts");
293         }
294         StringBuilder sb = new StringBuilder();
295         sb.append("(");
296         if (!contactIds.isEmpty()) {
297             // Select all raw contacts that belong to all contacts in contactIds
298             sb.append(RawContacts.CONTACT_ID + " IN (");
299             sb.append(TextUtils.join(",", contactIds));
300             sb.append(')');
301         }
302         if (!rawContactIds.isEmpty()) {
303             if (!contactIds.isEmpty()) {
304                 sb.append(" OR ");
305             }
306             // Select all raw contacts that belong to the same contact as all raw contacts
307             // in rawContactIds. For every raw contact in rawContactIds that we are updating
308             // the index for, we need to rebuild the search index for all raw contacts belonging
309             // to the same contact, because we can only update the search index on a per-contact
310             // basis.
311             sb.append(RawContacts.CONTACT_ID + " IN " +
312                     "(SELECT " + RawContacts.CONTACT_ID + " FROM " + Tables.RAW_CONTACTS +
313                     " WHERE " + RawContactsColumns.CONCRETE_ID + " IN (");
314             sb.append(TextUtils.join(",", rawContactIds));
315             sb.append("))");
316         }
317 
318         sb.append(")");
319 
320         // The selection to select raw_contacts.
321         final String rawContactsSelection = sb.toString();
322 
323         // Remove affected search_index rows.
324         final SQLiteDatabase db = mDbHelper.getWritableDatabase();
325         final int deleted = db.delete(Tables.SEARCH_INDEX,
326                 SearchIndexColumns.CONTACT_ID + " IN (SELECT " +
327                     RawContacts.CONTACT_ID +
328                     " FROM " + Tables.RAW_CONTACTS +
329                     " WHERE " + rawContactsSelection +
330                     ")"
331                 , null);
332 
333         // Then rebuild index for them.
334         final int count = buildAndInsertIndex(db, rawContactsSelection);
335 
336         if (VERBOSE_LOGGING) {
337             Log.v(TAG, "Updated search index for " + count + " contacts");
338         }
339     }
340 
buildAndInsertIndex(SQLiteDatabase db, String selection)341     private int buildAndInsertIndex(SQLiteDatabase db, String selection) {
342         mSb.setLength(0);
343         mSb.append(Data.CONTACT_ID + ", ");
344         mSb.append("(CASE WHEN " + DataColumns.MIMETYPE_ID + "=");
345         mSb.append(mDbHelper.getMimeTypeId(Nickname.CONTENT_ITEM_TYPE));
346         mSb.append(" THEN -4 ");
347         mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "=");
348         mSb.append(mDbHelper.getMimeTypeId(Organization.CONTENT_ITEM_TYPE));
349         mSb.append(" THEN -3 ");
350         mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "=");
351         mSb.append(mDbHelper.getMimeTypeId(StructuredPostal.CONTENT_ITEM_TYPE));
352         mSb.append(" THEN -2");
353         mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "=");
354         mSb.append(mDbHelper.getMimeTypeId(Email.CONTENT_ITEM_TYPE));
355         mSb.append(" THEN -1");
356         mSb.append(" ELSE " + DataColumns.MIMETYPE_ID);
357         mSb.append(" END), " + Data.IS_SUPER_PRIMARY + ", " + DataColumns.CONCRETE_ID);
358 
359         int count = 0;
360         Cursor cursor = db.query(Tables.DATA_JOIN_MIMETYPE_RAW_CONTACTS, ContactIndexQuery.COLUMNS,
361                 selection, null, null, null, mSb.toString());
362         mIndexBuilder.setCursor(cursor);
363         mIndexBuilder.reset();
364         try {
365             long currentContactId = -1;
366             while (cursor.moveToNext()) {
367                 long contactId = cursor.getLong(0);
368                 if (contactId != currentContactId) {
369                     if (currentContactId != -1) {
370                         insertIndexRow(db, currentContactId, mIndexBuilder);
371                         count++;
372                     }
373                     currentContactId = contactId;
374                     mIndexBuilder.reset();
375                 }
376                 String mimetype = cursor.getString(ContactIndexQuery.MIMETYPE);
377                 DataRowHandler dataRowHandler = mContactsProvider.getDataRowHandler(mimetype);
378                 if (dataRowHandler.hasSearchableData()) {
379                     dataRowHandler.appendSearchableData(mIndexBuilder);
380                     mIndexBuilder.commit();
381                 }
382             }
383             if (currentContactId != -1) {
384                 insertIndexRow(db, currentContactId, mIndexBuilder);
385                 count++;
386             }
387         } finally {
388             cursor.close();
389         }
390         return count;
391     }
392 
insertIndexRow(SQLiteDatabase db, long contactId, IndexBuilder builder)393     private void insertIndexRow(SQLiteDatabase db, long contactId, IndexBuilder builder) {
394         mValues.clear();
395         mValues.put(SearchIndexColumns.CONTENT, builder.getContent());
396         mValues.put(SearchIndexColumns.NAME, builder.getName());
397         mValues.put(SearchIndexColumns.TOKENS, builder.getTokens());
398         mValues.put(SearchIndexColumns.CONTACT_ID, contactId);
399         db.insert(Tables.SEARCH_INDEX, null, mValues);
400     }
getSearchIndexVersion()401     private int getSearchIndexVersion() {
402         return Integer.parseInt(mDbHelper.getProperty(PROPERTY_SEARCH_INDEX_VERSION, "0"));
403     }
404 
setSearchIndexVersion(int version)405     private void setSearchIndexVersion(int version) {
406         mDbHelper.setProperty(PROPERTY_SEARCH_INDEX_VERSION, String.valueOf(version));
407     }
408 
409     /**
410      * Token separator that matches SQLite's "simple" tokenizer.
411      * - Unicode codepoints >= 128: Everything
412      * - Unicode codepoints < 128: Alphanumeric and "_"
413      * - Everything else is a separator of tokens
414      */
415     private static final Pattern FTS_TOKEN_SEPARATOR_RE =
416             Pattern.compile("[^\u0080-\uffff\\p{Alnum}_]");
417 
418     /**
419      * Tokenize a string in the way as that of SQLite's "simple" tokenizer.
420      */
421     @VisibleForTesting
splitIntoFtsTokens(String s)422     static List<String> splitIntoFtsTokens(String s) {
423         final ArrayList<String> ret = Lists.newArrayList();
424         for (String token : FTS_TOKEN_SEPARATOR_RE.split(s)) {
425             if (!TextUtils.isEmpty(token)) {
426                 ret.add(token);
427             }
428         }
429         return ret;
430     }
431 
432     /**
433      * Tokenizes the query and normalizes/hex encodes each token. The tokenizer uses the same
434      * rules as SQLite's "simple" tokenizer. Each token is added to the retokenizer and then
435      * returned as a String.
436      * @see FtsQueryBuilder#UNSCOPED_NORMALIZING
437      * @see FtsQueryBuilder#SCOPED_NAME_NORMALIZING
438      */
getFtsMatchQuery(String query, FtsQueryBuilder ftsQueryBuilder)439     public static String getFtsMatchQuery(String query, FtsQueryBuilder ftsQueryBuilder) {
440         final StringBuilder result = new StringBuilder();
441         for (String token : splitIntoFtsTokens(query)) {
442             ftsQueryBuilder.addToken(result, token);
443         }
444         return result.toString();
445     }
446 
447     public static abstract class FtsQueryBuilder {
addToken(StringBuilder builder, String token)448         public abstract void addToken(StringBuilder builder, String token);
449 
450         /** Normalizes and space-concatenates each token. Example: "a1b2c1* a2b3c2*" */
451         public static final FtsQueryBuilder UNSCOPED_NORMALIZING = new UnscopedNormalizingBuilder();
452 
453         /**
454          * Scopes each token to a column and normalizes the name.
455          * Example: "content:foo* name:a1b2c1* tokens:foo* content:bar* name:a2b3c2* tokens:bar*"
456          */
457         public static final FtsQueryBuilder SCOPED_NAME_NORMALIZING =
458                 new ScopedNameNormalizingBuilder();
459 
460         /**
461          * Scopes each token to a the content column and also for name with normalization.
462          * Also adds a user-defined expression to each token. This allows common criteria to be
463          * concatenated to each token.
464          * Example (commonCriteria=" OR tokens:123*"):
465          * "content:650* OR name:1A1B1C* OR tokens:123* content:2A2B2C* OR name:foo* OR tokens:123*"
466          */
getDigitsQueryBuilder(final String commonCriteria)467         public static FtsQueryBuilder getDigitsQueryBuilder(final String commonCriteria) {
468             return new FtsQueryBuilder() {
469                 @Override
470                 public void addToken(StringBuilder builder, String token) {
471                     if (builder.length() != 0) builder.append(' ');
472 
473                     builder.append("content:");
474                     builder.append(token);
475                     builder.append("* ");
476 
477                     final String normalizedToken = NameNormalizer.normalize(token);
478                     if (!TextUtils.isEmpty(normalizedToken)) {
479                         builder.append(" OR name:");
480                         builder.append(normalizedToken);
481                         builder.append('*');
482                     }
483 
484                     builder.append(commonCriteria);
485                 }
486             };
487         }
488     }
489 
490     private static class UnscopedNormalizingBuilder extends FtsQueryBuilder {
491         @Override
492         public void addToken(StringBuilder builder, String token) {
493             if (builder.length() != 0) builder.append(' ');
494 
495             // the token could be empty (if the search query was "_"). we should still emit it
496             // here, as we otherwise risk to end up with an empty MATCH-expression MATCH ""
497             builder.append(NameNormalizer.normalize(token));
498             builder.append('*');
499         }
500     }
501 
502     private static class ScopedNameNormalizingBuilder extends FtsQueryBuilder {
503         @Override
504         public void addToken(StringBuilder builder, String token) {
505             if (builder.length() != 0) builder.append(' ');
506 
507             builder.append("content:");
508             builder.append(token);
509             builder.append('*');
510 
511             final String normalizedToken = NameNormalizer.normalize(token);
512             if (!TextUtils.isEmpty(normalizedToken)) {
513                 builder.append(" OR name:");
514                 builder.append(normalizedToken);
515                 builder.append('*');
516             }
517 
518             builder.append(" OR tokens:");
519             builder.append(token);
520             builder.append("*");
521         }
522     }
523 }
524