1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License
15  */
16 package com.android.providers.contacts;
17 
18 import android.content.ContentValues;
19 import android.database.Cursor;
20 import android.database.sqlite.SQLiteDatabase;
21 import android.os.SystemClock;
22 import android.provider.ContactsContract.CommonDataKinds.Email;
23 import android.provider.ContactsContract.CommonDataKinds.Nickname;
24 import android.provider.ContactsContract.CommonDataKinds.Organization;
25 import android.provider.ContactsContract.CommonDataKinds.StructuredPostal;
26 import android.provider.ContactsContract.Data;
27 import android.provider.ContactsContract.RawContacts;
28 import android.text.TextUtils;
29 import android.util.ArraySet;
30 import android.util.Log;
31 
32 import com.android.providers.contacts.ContactsDatabaseHelper.DataColumns;
33 import com.android.providers.contacts.ContactsDatabaseHelper.MimetypesColumns;
34 import com.android.providers.contacts.ContactsDatabaseHelper.RawContactsColumns;
35 import com.android.providers.contacts.ContactsDatabaseHelper.SearchIndexColumns;
36 import com.android.providers.contacts.ContactsDatabaseHelper.Tables;
37 import com.android.providers.contacts.util.CappedStringBuilder;
38 
39 import com.google.android.collect.Lists;
40 import com.google.common.annotations.VisibleForTesting;
41 
42 import java.util.ArrayList;
43 import java.util.List;
44 import java.util.Set;
45 import java.util.regex.Pattern;
46 
47 /**
48  * Maintains a search index for comprehensive contact search.
49  */
50 public class SearchIndexManager {
51     private static final String TAG = "ContactsFTS";
52 
53     private static final boolean VERBOSE_LOGGING = Log.isLoggable(TAG, Log.VERBOSE);
54 
55     private static final int MAX_STRING_BUILDER_SIZE = 1024 * 10;
56 
57     public static final String PROPERTY_SEARCH_INDEX_VERSION = "search_index";
58     private static final String ROW_ID_KEY = "rowid";
59     private static final int SEARCH_INDEX_VERSION = 2;
60 
61     private static final class ContactIndexQuery {
62         public static final String[] COLUMNS = {
63                 Data.CONTACT_ID,
64                 MimetypesColumns.MIMETYPE,
65                 Data.DATA1, Data.DATA2, Data.DATA3, Data.DATA4, Data.DATA5,
66                 Data.DATA6, Data.DATA7, Data.DATA8, Data.DATA9, Data.DATA10, Data.DATA11,
67                 Data.DATA12, Data.DATA13, Data.DATA14
68         };
69 
70         public static final int MIMETYPE = 1;
71     }
72 
73     public static class IndexBuilder {
74         public static final int SEPARATOR_SPACE = 0;
75         public static final int SEPARATOR_PARENTHESES = 1;
76         public static final int SEPARATOR_SLASH = 2;
77         public static final int SEPARATOR_COMMA = 3;
78 
79         private CappedStringBuilder mSbContent = new CappedStringBuilder(MAX_STRING_BUILDER_SIZE);
80         private CappedStringBuilder mSbName = new CappedStringBuilder(MAX_STRING_BUILDER_SIZE);
81         private CappedStringBuilder mSbTokens = new CappedStringBuilder(MAX_STRING_BUILDER_SIZE);
82         private CappedStringBuilder mSbElementContent = new CappedStringBuilder(
83                 MAX_STRING_BUILDER_SIZE);
84         private ArraySet<String> mUniqueElements = new ArraySet<>();
85         private Cursor mCursor;
86 
setCursor(Cursor cursor)87         void setCursor(Cursor cursor) {
88             this.mCursor = cursor;
89         }
90 
reset()91         void reset() {
92             mSbContent.clear();
93             mSbTokens.clear();
94             mSbName.clear();
95             mSbElementContent.clear();
96             mUniqueElements.clear();
97         }
98 
getContent()99         public String getContent() {
100             return mSbContent.length() == 0 ? null : mSbContent.toString();
101         }
102 
getName()103         public String getName() {
104             return mSbName.length() == 0 ? null : mSbName.toString();
105         }
106 
getTokens()107         public String getTokens() {
108             return mSbTokens.length() == 0 ? null : mSbTokens.toString();
109         }
110 
getString(String columnName)111         public String getString(String columnName) {
112             return mCursor.getString(mCursor.getColumnIndex(columnName));
113         }
114 
getInt(String columnName)115         public int getInt(String columnName) {
116             return mCursor.getInt(mCursor.getColumnIndex(columnName));
117         }
118 
119         @Override
toString()120         public String toString() {
121             return "Content: " + mSbContent + "\n Name: " + mSbName + "\n Tokens: " + mSbTokens;
122         }
123 
commit()124         public void commit() {
125             if (mSbElementContent.length() != 0) {
126                 String content = mSbElementContent.toString().replace('\n', ' ');
127                 if (!mUniqueElements.contains(content)) {
128                     if (mSbContent.length() != 0) {
129                         mSbContent.append('\n');
130                     }
131                     mSbContent.append(content);
132                     mUniqueElements.add(content);
133                 }
134                 mSbElementContent.clear();
135             }
136         }
137 
appendContentFromColumn(String columnName)138         public void appendContentFromColumn(String columnName) {
139             appendContentFromColumn(columnName, SEPARATOR_SPACE);
140         }
141 
appendContentFromColumn(String columnName, int format)142         public void appendContentFromColumn(String columnName, int format) {
143             appendContent(getString(columnName), format);
144         }
145 
appendContent(String value)146         public void appendContent(String value) {
147             appendContent(value, SEPARATOR_SPACE);
148         }
149 
appendContent(String value, int format)150         private void appendContent(String value, int format) {
151             if (TextUtils.isEmpty(value)) {
152                 return;
153             }
154 
155             switch (format) {
156                 case SEPARATOR_SPACE:
157                     if (mSbElementContent.length() > 0) {
158                         mSbElementContent.append(' ');
159                     }
160                     mSbElementContent.append(value);
161                     break;
162 
163                 case SEPARATOR_SLASH:
164                     mSbElementContent.append('/').append(value);
165                     break;
166 
167                 case SEPARATOR_PARENTHESES:
168                     if (mSbElementContent.length() > 0) {
169                         mSbElementContent.append(' ');
170                     }
171                     mSbElementContent.append('(').append(value).append(')');
172                     break;
173 
174                 case SEPARATOR_COMMA:
175                     if (mSbElementContent.length() > 0) {
176                         mSbElementContent.append(", ");
177                     }
178                     mSbElementContent.append(value);
179                     break;
180             }
181         }
182 
appendToken(String token)183         public void appendToken(String token) {
184             if (TextUtils.isEmpty(token)) {
185                 return;
186             }
187 
188             if (mSbTokens.length() != 0) {
189                 mSbTokens.append(' ');
190             }
191             mSbTokens.append(token);
192         }
193 
appendNameFromColumn(String columnName)194         public void appendNameFromColumn(String columnName) {
195             appendName(getString(columnName));
196         }
197 
appendName(String name)198         public void appendName(String name) {
199             if (TextUtils.isEmpty(name)) {
200                 return;
201             }
202             // First, put the original name.
203             appendNameInternal(name);
204 
205             // Then, if the name contains more than one FTS token, put each token into the index
206             // too.
207             //
208             // This is to make names with special characters searchable, such as "double-barrelled"
209             // "L'Image".
210             //
211             // Here's how it works:
212             // Because we "normalize" names when putting into the index, if we only put
213             // "double-barrelled", the index will only contain "doublebarrelled".
214             // Now, if the user searches for "double-barrelled", the searcher tokenizes it into
215             // two tokens, "double" and "barrelled".  The first one matches "doublebarrelled"
216             // but the second one doesn't (because we only do the prefix match), so
217             // "doublebarrelled" doesn't match.
218             // So, here, we put each token in a name into the index too.  In the case above,
219             // we put also "double" and "barrelled".
220             // With this, queries such as "double-barrelled", "double barrelled", "doublebarrelled"
221             // will all match "double-barrelled".
222             final List<String> nameParts = splitIntoFtsTokens(name);
223             if (nameParts.size() > 1) {
224                 for (String namePart : nameParts) {
225                     if (!TextUtils.isEmpty(namePart)) {
226                         appendNameInternal(namePart);
227                     }
228                 }
229             }
230         }
231 
232         /**
233          * Normalize a name and add to {@link #mSbName}
234          */
appendNameInternal(String name)235         private void appendNameInternal(String name) {
236             if (mSbName.length() != 0) {
237                 mSbName.append(' ');
238             }
239             mSbName.append(NameNormalizer.normalize(name));
240         }
241     }
242 
243     private final ContactsProvider2 mContactsProvider;
244     private final ContactsDatabaseHelper mDbHelper;
245     private StringBuilder mSb = new StringBuilder();
246     private IndexBuilder mIndexBuilder = new IndexBuilder();
247     private ContentValues mValues = new ContentValues();
248     private String[] mSelectionArgs1 = new String[1];
249 
SearchIndexManager(ContactsProvider2 contactsProvider)250     public SearchIndexManager(ContactsProvider2 contactsProvider) {
251         this.mContactsProvider = contactsProvider;
252         mDbHelper = (ContactsDatabaseHelper) mContactsProvider.getDatabaseHelper();
253     }
254 
updateIndex(boolean force)255     public void updateIndex(boolean force) {
256         if (force) {
257             setSearchIndexVersion(0);
258         } else {
259             if (getSearchIndexVersion() == SEARCH_INDEX_VERSION) {
260                 return;
261             }
262         }
263         SQLiteDatabase db = mDbHelper.getWritableDatabase();
264         db.beginTransaction();
265         try {
266             // We do a version check again, because the version might have been modified after
267             // the first check.  We need to do the check again in a transaction to make sure.
268             if (getSearchIndexVersion() != SEARCH_INDEX_VERSION) {
269                 rebuildIndex(db);
270                 setSearchIndexVersion(SEARCH_INDEX_VERSION);
271                 db.setTransactionSuccessful();
272             }
273         } finally {
274             db.endTransaction();
275         }
276     }
277 
rebuildIndex(SQLiteDatabase db)278     private void rebuildIndex(SQLiteDatabase db) {
279         mContactsProvider.setProviderStatus(ContactsProvider2.STATUS_UPGRADING);
280         final long start = SystemClock.elapsedRealtime();
281         int count = 0;
282         try {
283             mDbHelper.createSearchIndexTable(db, true);
284             count = buildAndInsertIndex(db, null);
285         } finally {
286             mContactsProvider.setProviderStatus(ContactsProvider2.STATUS_NORMAL);
287 
288             final long end = SystemClock.elapsedRealtime();
289             Log.i(TAG, "Rebuild contact search index in " + (end - start) + "ms, "
290                     + count + " contacts");
291         }
292     }
293 
updateIndexForRawContacts(Set<Long> contactIds, Set<Long> rawContactIds)294     public void updateIndexForRawContacts(Set<Long> contactIds, Set<Long> rawContactIds) {
295         if (VERBOSE_LOGGING) {
296             Log.v(TAG, "Updating search index for " + contactIds.size() +
297                     " contacts / " + rawContactIds.size() + " raw contacts");
298         }
299         StringBuilder sb = new StringBuilder();
300         sb.append("(");
301         if (!contactIds.isEmpty()) {
302             // Select all raw contacts that belong to all contacts in contactIds
303             sb.append(RawContacts.CONTACT_ID + " IN (");
304             sb.append(TextUtils.join(",", contactIds));
305             sb.append(')');
306         }
307         if (!rawContactIds.isEmpty()) {
308             if (!contactIds.isEmpty()) {
309                 sb.append(" OR ");
310             }
311             // Select all raw contacts that belong to the same contact as all raw contacts
312             // in rawContactIds. For every raw contact in rawContactIds that we are updating
313             // the index for, we need to rebuild the search index for all raw contacts belonging
314             // to the same contact, because we can only update the search index on a per-contact
315             // basis.
316             sb.append(RawContacts.CONTACT_ID + " IN " +
317                     "(SELECT " + RawContacts.CONTACT_ID + " FROM " + Tables.RAW_CONTACTS +
318                     " WHERE " + RawContactsColumns.CONCRETE_ID + " IN (");
319             sb.append(TextUtils.join(",", rawContactIds));
320             sb.append("))");
321         }
322 
323         sb.append(")");
324 
325         // The selection to select raw_contacts.
326         final String rawContactsSelection = sb.toString();
327 
328         // Remove affected search_index rows.
329         final SQLiteDatabase db = mDbHelper.getWritableDatabase();
330         final int deleted = db.delete(Tables.SEARCH_INDEX,
331                 ROW_ID_KEY + " IN (SELECT " +
332                     RawContacts.CONTACT_ID +
333                     " FROM " + Tables.RAW_CONTACTS +
334                     " WHERE " + rawContactsSelection +
335                     ")"
336                 , null);
337 
338         // Then rebuild index for them.
339         final int count = buildAndInsertIndex(db, rawContactsSelection);
340 
341         if (VERBOSE_LOGGING) {
342             Log.v(TAG, "Updated search index for " + count + " contacts");
343         }
344     }
345 
buildAndInsertIndex(SQLiteDatabase db, String selection)346     private int buildAndInsertIndex(SQLiteDatabase db, String selection) {
347         mSb.setLength(0);
348         mSb.append(Data.CONTACT_ID + ", ");
349         mSb.append("(CASE WHEN " + DataColumns.MIMETYPE_ID + "=");
350         mSb.append(mDbHelper.getMimeTypeId(Nickname.CONTENT_ITEM_TYPE));
351         mSb.append(" THEN -4 ");
352         mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "=");
353         mSb.append(mDbHelper.getMimeTypeId(Organization.CONTENT_ITEM_TYPE));
354         mSb.append(" THEN -3 ");
355         mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "=");
356         mSb.append(mDbHelper.getMimeTypeId(StructuredPostal.CONTENT_ITEM_TYPE));
357         mSb.append(" THEN -2");
358         mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "=");
359         mSb.append(mDbHelper.getMimeTypeId(Email.CONTENT_ITEM_TYPE));
360         mSb.append(" THEN -1");
361         mSb.append(" ELSE " + DataColumns.MIMETYPE_ID);
362         mSb.append(" END), " + Data.IS_SUPER_PRIMARY + ", " + DataColumns.CONCRETE_ID);
363 
364         int count = 0;
365         Cursor cursor = db.query(Tables.DATA_JOIN_MIMETYPE_RAW_CONTACTS, ContactIndexQuery.COLUMNS,
366                 selection, null, null, null, mSb.toString());
367         mIndexBuilder.setCursor(cursor);
368         mIndexBuilder.reset();
369         try {
370             long currentContactId = -1;
371             while (cursor.moveToNext()) {
372                 long contactId = cursor.getLong(0);
373                 if (contactId != currentContactId) {
374                     if (currentContactId != -1) {
375                         insertIndexRow(db, currentContactId, mIndexBuilder);
376                         count++;
377                     }
378                     currentContactId = contactId;
379                     mIndexBuilder.reset();
380                 }
381                 String mimetype = cursor.getString(ContactIndexQuery.MIMETYPE);
382                 DataRowHandler dataRowHandler = mContactsProvider.getDataRowHandler(mimetype);
383                 if (dataRowHandler.hasSearchableData()) {
384                     dataRowHandler.appendSearchableData(mIndexBuilder);
385                     mIndexBuilder.commit();
386                 }
387             }
388             if (currentContactId != -1) {
389                 insertIndexRow(db, currentContactId, mIndexBuilder);
390                 count++;
391             }
392         } finally {
393             cursor.close();
394         }
395         return count;
396     }
397 
insertIndexRow(SQLiteDatabase db, long contactId, IndexBuilder builder)398     private void insertIndexRow(SQLiteDatabase db, long contactId, IndexBuilder builder) {
399         mValues.clear();
400         mValues.put(SearchIndexColumns.CONTENT, builder.getContent());
401         mValues.put(SearchIndexColumns.NAME, builder.getName());
402         mValues.put(SearchIndexColumns.TOKENS, builder.getTokens());
403         mValues.put(SearchIndexColumns.CONTACT_ID, contactId);
404         mValues.put(ROW_ID_KEY, contactId);
405         db.insert(Tables.SEARCH_INDEX, null, mValues);
406     }
getSearchIndexVersion()407     private int getSearchIndexVersion() {
408         return Integer.parseInt(mDbHelper.getProperty(PROPERTY_SEARCH_INDEX_VERSION, "0"));
409     }
410 
setSearchIndexVersion(int version)411     private void setSearchIndexVersion(int version) {
412         mDbHelper.setProperty(PROPERTY_SEARCH_INDEX_VERSION, String.valueOf(version));
413     }
414 
415     /**
416      * Token separator that matches SQLite's "simple" tokenizer.
417      * - Unicode codepoints >= 128: Everything
418      * - Unicode codepoints < 128: Alphanumeric and "_"
419      * - Everything else is a separator of tokens
420      */
421     private static final Pattern FTS_TOKEN_SEPARATOR_RE =
422             Pattern.compile("[^\u0080-\uffff\\p{Alnum}_]");
423 
424     /**
425      * Tokenize a string in the way as that of SQLite's "simple" tokenizer.
426      */
427     @VisibleForTesting
splitIntoFtsTokens(String s)428     static List<String> splitIntoFtsTokens(String s) {
429         final ArrayList<String> ret = Lists.newArrayList();
430         for (String token : FTS_TOKEN_SEPARATOR_RE.split(s)) {
431             if (!TextUtils.isEmpty(token)) {
432                 ret.add(token);
433             }
434         }
435         return ret;
436     }
437 
438     /**
439      * Tokenizes the query and normalizes/hex encodes each token. The tokenizer uses the same
440      * rules as SQLite's "simple" tokenizer. Each token is added to the retokenizer and then
441      * returned as a String.
442      * @see FtsQueryBuilder#UNSCOPED_NORMALIZING
443      * @see FtsQueryBuilder#SCOPED_NAME_NORMALIZING
444      */
getFtsMatchQuery(String query, FtsQueryBuilder ftsQueryBuilder)445     public static String getFtsMatchQuery(String query, FtsQueryBuilder ftsQueryBuilder) {
446         final StringBuilder result = new StringBuilder();
447         for (String token : splitIntoFtsTokens(query)) {
448             ftsQueryBuilder.addToken(result, token);
449         }
450         return result.toString();
451     }
452 
453     public static abstract class FtsQueryBuilder {
addToken(StringBuilder builder, String token)454         public abstract void addToken(StringBuilder builder, String token);
455 
456         /** Normalizes and space-concatenates each token. Example: "a1b2c1* a2b3c2*" */
457         public static final FtsQueryBuilder UNSCOPED_NORMALIZING = new UnscopedNormalizingBuilder();
458 
459         /**
460          * Scopes each token to a column and normalizes the name.
461          * Example: "content:foo* name:a1b2c1* tokens:foo* content:bar* name:a2b3c2* tokens:bar*"
462          */
463         public static final FtsQueryBuilder SCOPED_NAME_NORMALIZING =
464                 new ScopedNameNormalizingBuilder();
465 
466         /**
467          * Scopes each token to a the content column and also for name with normalization.
468          * Also adds a user-defined expression to each token. This allows common criteria to be
469          * concatenated to each token.
470          * Example (commonCriteria=" OR tokens:123*"):
471          * "content:650* OR name:1A1B1C* OR tokens:123* content:2A2B2C* OR name:foo* OR tokens:123*"
472          */
getDigitsQueryBuilder(final String commonCriteria)473         public static FtsQueryBuilder getDigitsQueryBuilder(final String commonCriteria) {
474             return new FtsQueryBuilder() {
475                 @Override
476                 public void addToken(StringBuilder builder, String token) {
477                     if (builder.length() != 0) builder.append(' ');
478 
479                     builder.append("content:");
480                     builder.append(token);
481                     builder.append("* ");
482 
483                     final String normalizedToken = NameNormalizer.normalize(token);
484                     if (!TextUtils.isEmpty(normalizedToken)) {
485                         builder.append(" OR name:");
486                         builder.append(normalizedToken);
487                         builder.append('*');
488                     }
489 
490                     builder.append(commonCriteria);
491                 }
492             };
493         }
494     }
495 
496     private static class UnscopedNormalizingBuilder extends FtsQueryBuilder {
497         @Override
498         public void addToken(StringBuilder builder, String token) {
499             if (builder.length() != 0) builder.append(' ');
500 
501             // the token could be empty (if the search query was "_"). we should still emit it
502             // here, as we otherwise risk to end up with an empty MATCH-expression MATCH ""
503             builder.append(NameNormalizer.normalize(token));
504             builder.append('*');
505         }
506     }
507 
508     private static class ScopedNameNormalizingBuilder extends FtsQueryBuilder {
509         @Override
510         public void addToken(StringBuilder builder, String token) {
511             if (builder.length() != 0) builder.append(' ');
512 
513             builder.append("content:");
514             builder.append(token);
515             builder.append('*');
516 
517             final String normalizedToken = NameNormalizer.normalize(token);
518             if (!TextUtils.isEmpty(normalizedToken)) {
519                 builder.append(" OR name:");
520                 builder.append(normalizedToken);
521                 builder.append('*');
522             }
523 
524             builder.append(" OR tokens:");
525             builder.append(token);
526             builder.append("*");
527         }
528     }
529 }
530