1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.contacts.common.util; 18 19 import android.support.annotation.VisibleForTesting; 20 21 /** Methods related to search. */ 22 public class SearchUtil { 23 24 /** 25 * Given a string with lines delimited with '\n', finds the matching line to the given substring. 26 * 27 * @param contents The string to search. 28 * @param substring The substring to search for. 29 * @return A MatchedLine object containing the matching line and the startIndex of the substring 30 * match within that line. 31 */ findMatchingLine(String contents, String substring)32 public static MatchedLine findMatchingLine(String contents, String substring) { 33 final MatchedLine matched = new MatchedLine(); 34 35 // Snippet may contain multiple lines separated by "\n". 36 // Locate the lines of the content that contain the substring. 37 final int index = SearchUtil.contains(contents, substring); 38 if (index != -1) { 39 // Match found. Find the corresponding line. 40 int start = index - 1; 41 while (start > -1) { 42 if (contents.charAt(start) == '\n') { 43 break; 44 } 45 start--; 46 } 47 int end = index + 1; 48 while (end < contents.length()) { 49 if (contents.charAt(end) == '\n') { 50 break; 51 } 52 end++; 53 } 54 matched.line = contents.substring(start + 1, end); 55 matched.startIndex = index - (start + 1); 56 } 57 return matched; 58 } 59 60 /** 61 * Similar to String.contains() with two main differences: 62 * 63 * <p>1) Only searches token prefixes. A token is defined as any combination of letters or 64 * numbers. 65 * 66 * <p>2) Returns the starting index where the substring is found. 67 * 68 * @param value The string to search. 69 * @param substring The substring to look for. 70 * @return The starting index where the substring is found. {@literal -1} if substring is not 71 * found in value. 72 */ 73 @VisibleForTesting contains(String value, String substring)74 static int contains(String value, String substring) { 75 if (value.length() < substring.length()) { 76 return -1; 77 } 78 79 // i18n support 80 // Generate the code points for the substring once. 81 // There will be a maximum of substring.length code points. But may be fewer. 82 // Since the array length is not an accurate size, we need to keep a separate variable. 83 final int[] substringCodePoints = new int[substring.length()]; 84 int substringLength = 0; // may not equal substring.length()!! 85 for (int i = 0; i < substring.length(); ) { 86 final int codePoint = Character.codePointAt(substring, i); 87 substringCodePoints[substringLength] = codePoint; 88 substringLength++; 89 i += Character.charCount(codePoint); 90 } 91 92 for (int i = 0; i < value.length(); i = findNextTokenStart(value, i)) { 93 int numMatch = 0; 94 for (int j = i; j < value.length() && numMatch < substringLength; ++numMatch) { 95 int valueCp = Character.toLowerCase(value.codePointAt(j)); 96 int substringCp = substringCodePoints[numMatch]; 97 if (valueCp != substringCp) { 98 break; 99 } 100 j += Character.charCount(valueCp); 101 } 102 if (numMatch == substringLength) { 103 return i; 104 } 105 } 106 return -1; 107 } 108 109 /** 110 * Find the start of the next token. A token is composed of letters and numbers. Any other 111 * character are considered delimiters. 112 * 113 * @param line The string to search for the next token. 114 * @param startIndex The index to start searching. 0 based indexing. 115 * @return The index for the start of the next token. line.length() if next token not found. 116 */ 117 @VisibleForTesting findNextTokenStart(String line, int startIndex)118 static int findNextTokenStart(String line, int startIndex) { 119 int index = startIndex; 120 121 // If already in token, eat remainder of token. 122 while (index <= line.length()) { 123 if (index == line.length()) { 124 // No more tokens. 125 return index; 126 } 127 final int codePoint = line.codePointAt(index); 128 if (!Character.isLetterOrDigit(codePoint)) { 129 break; 130 } 131 index += Character.charCount(codePoint); 132 } 133 134 // Out of token, eat all consecutive delimiters. 135 while (index <= line.length()) { 136 if (index == line.length()) { 137 return index; 138 } 139 final int codePoint = line.codePointAt(index); 140 if (Character.isLetterOrDigit(codePoint)) { 141 break; 142 } 143 index += Character.charCount(codePoint); 144 } 145 146 return index; 147 } 148 149 /** 150 * Anything other than letter and numbers are considered delimiters. Remove start and end 151 * delimiters since they are not relevant to search. 152 * 153 * @param query The query string to clean. 154 * @return The cleaned query. Empty string if all characters are cleaned out. 155 */ cleanStartAndEndOfSearchQuery(String query)156 public static String cleanStartAndEndOfSearchQuery(String query) { 157 int start = 0; 158 while (start < query.length()) { 159 int codePoint = query.codePointAt(start); 160 if (Character.isLetterOrDigit(codePoint)) { 161 break; 162 } 163 start += Character.charCount(codePoint); 164 } 165 166 if (start == query.length()) { 167 // All characters are delimiters. 168 return ""; 169 } 170 171 int end = query.length() - 1; 172 while (end > -1) { 173 if (Character.isLowSurrogate(query.charAt(end))) { 174 // Assume valid i18n string. There should be a matching high surrogate before it. 175 end--; 176 } 177 int codePoint = query.codePointAt(end); 178 if (Character.isLetterOrDigit(codePoint)) { 179 break; 180 } 181 end--; 182 } 183 184 // end is a letter or digit. 185 return query.substring(start, end + 1); 186 } 187 188 public static class MatchedLine { 189 190 public int startIndex = -1; 191 public String line; 192 193 @Override toString()194 public String toString() { 195 return "MatchedLine{" + "line='" + line + '\'' + ", startIndex=" + startIndex + '}'; 196 } 197 } 198 } 199