1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License 15 */ 16 package com.android.providers.contacts; 17 18 import android.content.ContentValues; 19 import android.database.Cursor; 20 import android.database.sqlite.SQLiteDatabase; 21 import android.os.SystemClock; 22 import android.provider.ContactsContract.CommonDataKinds.Email; 23 import android.provider.ContactsContract.CommonDataKinds.Nickname; 24 import android.provider.ContactsContract.CommonDataKinds.Organization; 25 import android.provider.ContactsContract.CommonDataKinds.StructuredPostal; 26 import android.provider.ContactsContract.Data; 27 import android.provider.ContactsContract.ProviderStatus; 28 import android.provider.ContactsContract.RawContacts; 29 import android.text.TextUtils; 30 import android.util.Log; 31 32 import com.android.providers.contacts.ContactsDatabaseHelper.DataColumns; 33 import com.android.providers.contacts.ContactsDatabaseHelper.MimetypesColumns; 34 import com.android.providers.contacts.ContactsDatabaseHelper.RawContactsColumns; 35 import com.android.providers.contacts.ContactsDatabaseHelper.SearchIndexColumns; 36 import com.android.providers.contacts.ContactsDatabaseHelper.Tables; 37 import com.google.android.collect.Lists; 38 import com.google.common.annotations.VisibleForTesting; 39 40 import java.util.ArrayList; 41 import java.util.HashSet; 42 import java.util.List; 43 import java.util.Set; 44 import java.util.regex.Pattern; 45 46 /** 47 * Maintains a search index for comprehensive contact search. 48 */ 49 public class SearchIndexManager { 50 private static final String TAG = "ContactsFTS"; 51 52 private static final boolean VERBOSE_LOGGING = Log.isLoggable(TAG, Log.VERBOSE); 53 54 public static final String PROPERTY_SEARCH_INDEX_VERSION = "search_index"; 55 private static final int SEARCH_INDEX_VERSION = 1; 56 57 private static final class ContactIndexQuery { 58 public static final String[] COLUMNS = { 59 Data.CONTACT_ID, 60 MimetypesColumns.MIMETYPE, 61 Data.DATA1, Data.DATA2, Data.DATA3, Data.DATA4, Data.DATA5, 62 Data.DATA6, Data.DATA7, Data.DATA8, Data.DATA9, Data.DATA10, Data.DATA11, 63 Data.DATA12, Data.DATA13, Data.DATA14 64 }; 65 66 public static final int MIMETYPE = 1; 67 } 68 69 public static class IndexBuilder { 70 public static final int SEPARATOR_SPACE = 0; 71 public static final int SEPARATOR_PARENTHESES = 1; 72 public static final int SEPARATOR_SLASH = 2; 73 public static final int SEPARATOR_COMMA = 3; 74 75 private StringBuilder mSbContent = new StringBuilder(); 76 private StringBuilder mSbName = new StringBuilder(); 77 private StringBuilder mSbTokens = new StringBuilder(); 78 private StringBuilder mSbElementContent = new StringBuilder(); 79 private HashSet<String> mUniqueElements = new HashSet<String>(); 80 private Cursor mCursor; 81 setCursor(Cursor cursor)82 void setCursor(Cursor cursor) { 83 this.mCursor = cursor; 84 } 85 reset()86 void reset() { 87 mSbContent.setLength(0); 88 mSbTokens.setLength(0); 89 mSbName.setLength(0); 90 mSbElementContent.setLength(0); 91 mUniqueElements.clear(); 92 } 93 getContent()94 public String getContent() { 95 return mSbContent.length() == 0 ? null : mSbContent.toString(); 96 } 97 getName()98 public String getName() { 99 return mSbName.length() == 0 ? null : mSbName.toString(); 100 } 101 getTokens()102 public String getTokens() { 103 return mSbTokens.length() == 0 ? null : mSbTokens.toString(); 104 } 105 getString(String columnName)106 public String getString(String columnName) { 107 return mCursor.getString(mCursor.getColumnIndex(columnName)); 108 } 109 getInt(String columnName)110 public int getInt(String columnName) { 111 return mCursor.getInt(mCursor.getColumnIndex(columnName)); 112 } 113 114 @Override toString()115 public String toString() { 116 return "Content: " + mSbContent + "\n Name: " + mSbName + "\n Tokens: " + mSbTokens; 117 } 118 commit()119 public void commit() { 120 if (mSbElementContent.length() != 0) { 121 String content = mSbElementContent.toString().replace('\n', ' '); 122 if (!mUniqueElements.contains(content)) { 123 if (mSbContent.length() != 0) { 124 mSbContent.append('\n'); 125 } 126 mSbContent.append(content); 127 mUniqueElements.add(content); 128 } 129 mSbElementContent.setLength(0); 130 } 131 } 132 appendContentFromColumn(String columnName)133 public void appendContentFromColumn(String columnName) { 134 appendContentFromColumn(columnName, SEPARATOR_SPACE); 135 } 136 appendContentFromColumn(String columnName, int format)137 public void appendContentFromColumn(String columnName, int format) { 138 appendContent(getString(columnName), format); 139 } 140 appendContent(String value)141 public void appendContent(String value) { 142 appendContent(value, SEPARATOR_SPACE); 143 } 144 appendContent(String value, int format)145 private void appendContent(String value, int format) { 146 if (TextUtils.isEmpty(value)) { 147 return; 148 } 149 150 switch (format) { 151 case SEPARATOR_SPACE: 152 if (mSbElementContent.length() > 0) { 153 mSbElementContent.append(' '); 154 } 155 mSbElementContent.append(value); 156 break; 157 158 case SEPARATOR_SLASH: 159 mSbElementContent.append('/').append(value); 160 break; 161 162 case SEPARATOR_PARENTHESES: 163 if (mSbElementContent.length() > 0) { 164 mSbElementContent.append(' '); 165 } 166 mSbElementContent.append('(').append(value).append(')'); 167 break; 168 169 case SEPARATOR_COMMA: 170 if (mSbElementContent.length() > 0) { 171 mSbElementContent.append(", "); 172 } 173 mSbElementContent.append(value); 174 break; 175 } 176 } 177 appendToken(String token)178 public void appendToken(String token) { 179 if (TextUtils.isEmpty(token)) { 180 return; 181 } 182 183 if (mSbTokens.length() != 0) { 184 mSbTokens.append(' '); 185 } 186 mSbTokens.append(token); 187 } 188 appendNameFromColumn(String columnName)189 public void appendNameFromColumn(String columnName) { 190 appendName(getString(columnName)); 191 } 192 appendName(String name)193 public void appendName(String name) { 194 if (TextUtils.isEmpty(name)) { 195 return; 196 } 197 // First, put the original name. 198 appendNameInternal(name); 199 200 // Then, if the name contains more than one FTS token, put each token into the index 201 // too. 202 // 203 // This is to make names with special characters searchable, such as "double-barrelled" 204 // "L'Image". 205 // 206 // Here's how it works: 207 // Because we "normalize" names when putting into the index, if we only put 208 // "double-barrelled", the index will only contain "doublebarrelled". 209 // Now, if the user searches for "double-barrelled", the searcher tokenizes it into 210 // two tokens, "double" and "barrelled". The first one matches "doublebarrelled" 211 // but the second one doesn't (because we only do the prefix match), so 212 // "doublebarrelled" doesn't match. 213 // So, here, we put each token in a name into the index too. In the case above, 214 // we put also "double" and "barrelled". 215 // With this, queries such as "double-barrelled", "double barrelled", "doublebarrelled" 216 // will all match "double-barrelled". 217 final List<String> nameParts = splitIntoFtsTokens(name); 218 if (nameParts.size() > 1) { 219 for (String namePart : nameParts) { 220 if (!TextUtils.isEmpty(namePart)) { 221 appendNameInternal(namePart); 222 } 223 } 224 } 225 } 226 227 /** 228 * Normalize a name and add to {@link #mSbName} 229 */ appendNameInternal(String name)230 private void appendNameInternal(String name) { 231 if (mSbName.length() != 0) { 232 mSbName.append(' '); 233 } 234 mSbName.append(NameNormalizer.normalize(name)); 235 } 236 } 237 238 private final ContactsProvider2 mContactsProvider; 239 private final ContactsDatabaseHelper mDbHelper; 240 private StringBuilder mSb = new StringBuilder(); 241 private IndexBuilder mIndexBuilder = new IndexBuilder(); 242 private ContentValues mValues = new ContentValues(); 243 private String[] mSelectionArgs1 = new String[1]; 244 SearchIndexManager(ContactsProvider2 contactsProvider)245 public SearchIndexManager(ContactsProvider2 contactsProvider) { 246 this.mContactsProvider = contactsProvider; 247 mDbHelper = (ContactsDatabaseHelper) mContactsProvider.getDatabaseHelper(); 248 } 249 updateIndex(boolean force)250 public void updateIndex(boolean force) { 251 if (force) { 252 setSearchIndexVersion(0); 253 } else { 254 if (getSearchIndexVersion() == SEARCH_INDEX_VERSION) { 255 return; 256 } 257 } 258 SQLiteDatabase db = mDbHelper.getWritableDatabase(); 259 db.beginTransaction(); 260 try { 261 // We do a version check again, because the version might have been modified after 262 // the first check. We need to do the check again in a transaction to make sure. 263 if (getSearchIndexVersion() != SEARCH_INDEX_VERSION) { 264 rebuildIndex(db); 265 setSearchIndexVersion(SEARCH_INDEX_VERSION); 266 db.setTransactionSuccessful(); 267 } 268 } finally { 269 db.endTransaction(); 270 } 271 } 272 rebuildIndex(SQLiteDatabase db)273 private void rebuildIndex(SQLiteDatabase db) { 274 mContactsProvider.setProviderStatus(ContactsProvider2.STATUS_UPGRADING); 275 final long start = SystemClock.elapsedRealtime(); 276 int count = 0; 277 try { 278 mDbHelper.createSearchIndexTable(db, true); 279 count = buildAndInsertIndex(db, null); 280 } finally { 281 mContactsProvider.setProviderStatus(ContactsProvider2.STATUS_NORMAL); 282 283 final long end = SystemClock.elapsedRealtime(); 284 Log.i(TAG, "Rebuild contact search index in " + (end - start) + "ms, " 285 + count + " contacts"); 286 } 287 } 288 updateIndexForRawContacts(Set<Long> contactIds, Set<Long> rawContactIds)289 public void updateIndexForRawContacts(Set<Long> contactIds, Set<Long> rawContactIds) { 290 if (VERBOSE_LOGGING) { 291 Log.v(TAG, "Updating search index for " + contactIds.size() + 292 " contacts / " + rawContactIds.size() + " raw contacts"); 293 } 294 StringBuilder sb = new StringBuilder(); 295 sb.append("("); 296 if (!contactIds.isEmpty()) { 297 // Select all raw contacts that belong to all contacts in contactIds 298 sb.append(RawContacts.CONTACT_ID + " IN ("); 299 sb.append(TextUtils.join(",", contactIds)); 300 sb.append(')'); 301 } 302 if (!rawContactIds.isEmpty()) { 303 if (!contactIds.isEmpty()) { 304 sb.append(" OR "); 305 } 306 // Select all raw contacts that belong to the same contact as all raw contacts 307 // in rawContactIds. For every raw contact in rawContactIds that we are updating 308 // the index for, we need to rebuild the search index for all raw contacts belonging 309 // to the same contact, because we can only update the search index on a per-contact 310 // basis. 311 sb.append(RawContacts.CONTACT_ID + " IN " + 312 "(SELECT " + RawContacts.CONTACT_ID + " FROM " + Tables.RAW_CONTACTS + 313 " WHERE " + RawContactsColumns.CONCRETE_ID + " IN ("); 314 sb.append(TextUtils.join(",", rawContactIds)); 315 sb.append("))"); 316 } 317 318 sb.append(")"); 319 320 // The selection to select raw_contacts. 321 final String rawContactsSelection = sb.toString(); 322 323 // Remove affected search_index rows. 324 final SQLiteDatabase db = mDbHelper.getWritableDatabase(); 325 final int deleted = db.delete(Tables.SEARCH_INDEX, 326 SearchIndexColumns.CONTACT_ID + " IN (SELECT " + 327 RawContacts.CONTACT_ID + 328 " FROM " + Tables.RAW_CONTACTS + 329 " WHERE " + rawContactsSelection + 330 ")" 331 , null); 332 333 // Then rebuild index for them. 334 final int count = buildAndInsertIndex(db, rawContactsSelection); 335 336 if (VERBOSE_LOGGING) { 337 Log.v(TAG, "Updated search index for " + count + " contacts"); 338 } 339 } 340 buildAndInsertIndex(SQLiteDatabase db, String selection)341 private int buildAndInsertIndex(SQLiteDatabase db, String selection) { 342 mSb.setLength(0); 343 mSb.append(Data.CONTACT_ID + ", "); 344 mSb.append("(CASE WHEN " + DataColumns.MIMETYPE_ID + "="); 345 mSb.append(mDbHelper.getMimeTypeId(Nickname.CONTENT_ITEM_TYPE)); 346 mSb.append(" THEN -4 "); 347 mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "="); 348 mSb.append(mDbHelper.getMimeTypeId(Organization.CONTENT_ITEM_TYPE)); 349 mSb.append(" THEN -3 "); 350 mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "="); 351 mSb.append(mDbHelper.getMimeTypeId(StructuredPostal.CONTENT_ITEM_TYPE)); 352 mSb.append(" THEN -2"); 353 mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "="); 354 mSb.append(mDbHelper.getMimeTypeId(Email.CONTENT_ITEM_TYPE)); 355 mSb.append(" THEN -1"); 356 mSb.append(" ELSE " + DataColumns.MIMETYPE_ID); 357 mSb.append(" END), " + Data.IS_SUPER_PRIMARY + ", " + DataColumns.CONCRETE_ID); 358 359 int count = 0; 360 Cursor cursor = db.query(Tables.DATA_JOIN_MIMETYPE_RAW_CONTACTS, ContactIndexQuery.COLUMNS, 361 selection, null, null, null, mSb.toString()); 362 mIndexBuilder.setCursor(cursor); 363 mIndexBuilder.reset(); 364 try { 365 long currentContactId = -1; 366 while (cursor.moveToNext()) { 367 long contactId = cursor.getLong(0); 368 if (contactId != currentContactId) { 369 if (currentContactId != -1) { 370 insertIndexRow(db, currentContactId, mIndexBuilder); 371 count++; 372 } 373 currentContactId = contactId; 374 mIndexBuilder.reset(); 375 } 376 String mimetype = cursor.getString(ContactIndexQuery.MIMETYPE); 377 DataRowHandler dataRowHandler = mContactsProvider.getDataRowHandler(mimetype); 378 if (dataRowHandler.hasSearchableData()) { 379 dataRowHandler.appendSearchableData(mIndexBuilder); 380 mIndexBuilder.commit(); 381 } 382 } 383 if (currentContactId != -1) { 384 insertIndexRow(db, currentContactId, mIndexBuilder); 385 count++; 386 } 387 } finally { 388 cursor.close(); 389 } 390 return count; 391 } 392 insertIndexRow(SQLiteDatabase db, long contactId, IndexBuilder builder)393 private void insertIndexRow(SQLiteDatabase db, long contactId, IndexBuilder builder) { 394 mValues.clear(); 395 mValues.put(SearchIndexColumns.CONTENT, builder.getContent()); 396 mValues.put(SearchIndexColumns.NAME, builder.getName()); 397 mValues.put(SearchIndexColumns.TOKENS, builder.getTokens()); 398 mValues.put(SearchIndexColumns.CONTACT_ID, contactId); 399 db.insert(Tables.SEARCH_INDEX, null, mValues); 400 } getSearchIndexVersion()401 private int getSearchIndexVersion() { 402 return Integer.parseInt(mDbHelper.getProperty(PROPERTY_SEARCH_INDEX_VERSION, "0")); 403 } 404 setSearchIndexVersion(int version)405 private void setSearchIndexVersion(int version) { 406 mDbHelper.setProperty(PROPERTY_SEARCH_INDEX_VERSION, String.valueOf(version)); 407 } 408 409 /** 410 * Token separator that matches SQLite's "simple" tokenizer. 411 * - Unicode codepoints >= 128: Everything 412 * - Unicode codepoints < 128: Alphanumeric and "_" 413 * - Everything else is a separator of tokens 414 */ 415 private static final Pattern FTS_TOKEN_SEPARATOR_RE = 416 Pattern.compile("[^\u0080-\uffff\\p{Alnum}_]"); 417 418 /** 419 * Tokenize a string in the way as that of SQLite's "simple" tokenizer. 420 */ 421 @VisibleForTesting splitIntoFtsTokens(String s)422 static List<String> splitIntoFtsTokens(String s) { 423 final ArrayList<String> ret = Lists.newArrayList(); 424 for (String token : FTS_TOKEN_SEPARATOR_RE.split(s)) { 425 if (!TextUtils.isEmpty(token)) { 426 ret.add(token); 427 } 428 } 429 return ret; 430 } 431 432 /** 433 * Tokenizes the query and normalizes/hex encodes each token. The tokenizer uses the same 434 * rules as SQLite's "simple" tokenizer. Each token is added to the retokenizer and then 435 * returned as a String. 436 * @see FtsQueryBuilder#UNSCOPED_NORMALIZING 437 * @see FtsQueryBuilder#SCOPED_NAME_NORMALIZING 438 */ getFtsMatchQuery(String query, FtsQueryBuilder ftsQueryBuilder)439 public static String getFtsMatchQuery(String query, FtsQueryBuilder ftsQueryBuilder) { 440 final StringBuilder result = new StringBuilder(); 441 for (String token : splitIntoFtsTokens(query)) { 442 ftsQueryBuilder.addToken(result, token); 443 } 444 return result.toString(); 445 } 446 447 public static abstract class FtsQueryBuilder { addToken(StringBuilder builder, String token)448 public abstract void addToken(StringBuilder builder, String token); 449 450 /** Normalizes and space-concatenates each token. Example: "a1b2c1* a2b3c2*" */ 451 public static final FtsQueryBuilder UNSCOPED_NORMALIZING = new UnscopedNormalizingBuilder(); 452 453 /** 454 * Scopes each token to a column and normalizes the name. 455 * Example: "content:foo* name:a1b2c1* tokens:foo* content:bar* name:a2b3c2* tokens:bar*" 456 */ 457 public static final FtsQueryBuilder SCOPED_NAME_NORMALIZING = 458 new ScopedNameNormalizingBuilder(); 459 460 /** 461 * Scopes each token to a the content column and also for name with normalization. 462 * Also adds a user-defined expression to each token. This allows common criteria to be 463 * concatenated to each token. 464 * Example (commonCriteria=" OR tokens:123*"): 465 * "content:650* OR name:1A1B1C* OR tokens:123* content:2A2B2C* OR name:foo* OR tokens:123*" 466 */ getDigitsQueryBuilder(final String commonCriteria)467 public static FtsQueryBuilder getDigitsQueryBuilder(final String commonCriteria) { 468 return new FtsQueryBuilder() { 469 @Override 470 public void addToken(StringBuilder builder, String token) { 471 if (builder.length() != 0) builder.append(' '); 472 473 builder.append("content:"); 474 builder.append(token); 475 builder.append("* "); 476 477 final String normalizedToken = NameNormalizer.normalize(token); 478 if (!TextUtils.isEmpty(normalizedToken)) { 479 builder.append(" OR name:"); 480 builder.append(normalizedToken); 481 builder.append('*'); 482 } 483 484 builder.append(commonCriteria); 485 } 486 }; 487 } 488 } 489 490 private static class UnscopedNormalizingBuilder extends FtsQueryBuilder { 491 @Override 492 public void addToken(StringBuilder builder, String token) { 493 if (builder.length() != 0) builder.append(' '); 494 495 // the token could be empty (if the search query was "_"). we should still emit it 496 // here, as we otherwise risk to end up with an empty MATCH-expression MATCH "" 497 builder.append(NameNormalizer.normalize(token)); 498 builder.append('*'); 499 } 500 } 501 502 private static class ScopedNameNormalizingBuilder extends FtsQueryBuilder { 503 @Override 504 public void addToken(StringBuilder builder, String token) { 505 if (builder.length() != 0) builder.append(' '); 506 507 builder.append("content:"); 508 builder.append(token); 509 builder.append('*'); 510 511 final String normalizedToken = NameNormalizer.normalize(token); 512 if (!TextUtils.isEmpty(normalizedToken)) { 513 builder.append(" OR name:"); 514 builder.append(normalizedToken); 515 builder.append('*'); 516 } 517 518 builder.append(" OR tokens:"); 519 builder.append(token); 520 builder.append("*"); 521 } 522 } 523 } 524