1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.contacts.common.util; 18 19 import com.google.common.annotations.VisibleForTesting; 20 21 /** 22 * Methods related to search. 23 */ 24 public class SearchUtil { 25 26 public static class MatchedLine { 27 28 public int startIndex = -1; 29 public String line; 30 31 @Override toString()32 public String toString() { 33 return "MatchedLine{" + 34 "line='" + line + '\'' + 35 ", startIndex=" + startIndex + 36 '}'; 37 } 38 } 39 40 /** 41 * Given a string with lines delimited with '\n', finds the matching line to the given 42 * substring. 43 * 44 * @param contents The string to search. 45 * @param substring The substring to search for. 46 * @return A MatchedLine object containing the matching line and the startIndex of the substring 47 * match within that line. 48 */ findMatchingLine(String contents, String substring)49 public static MatchedLine findMatchingLine(String contents, String substring) { 50 final MatchedLine matched = new MatchedLine(); 51 52 // Snippet may contain multiple lines separated by "\n". 53 // Locate the lines of the content that contain the substring. 54 final int index = SearchUtil.contains(contents, substring); 55 if (index != -1) { 56 // Match found. Find the corresponding line. 57 int start = index - 1; 58 while (start > -1) { 59 if (contents.charAt(start) == '\n') { 60 break; 61 } 62 start--; 63 } 64 int end = index + 1; 65 while (end < contents.length()) { 66 if (contents.charAt(end) == '\n') { 67 break; 68 } 69 end++; 70 } 71 matched.line = contents.substring(start + 1, end); 72 matched.startIndex = index - (start + 1); 73 } 74 return matched; 75 } 76 77 /** 78 * Similar to String.contains() with two main differences: 79 * <p> 80 * 1) Only searches token prefixes. A token is defined as any combination of letters or 81 * numbers. 82 * <p> 83 * 2) Returns the starting index where the substring is found. 84 * 85 * @param value The string to search. 86 * @param substring The substring to look for. 87 * @return The starting index where the substring is found. {@literal -1} if substring is not 88 * found in value. 89 */ 90 @VisibleForTesting contains(String value, String substring)91 static int contains(String value, String substring) { 92 if (value.length() < substring.length()) { 93 return -1; 94 } 95 96 // i18n support 97 // Generate the code points for the substring once. 98 // There will be a maximum of substring.length code points. But may be fewer. 99 // Since the array length is not an accurate size, we need to keep a separate variable. 100 final int[] substringCodePoints = new int[substring.length()]; 101 int substringLength = 0; // may not equal substring.length()!! 102 for (int i = 0; i < substring.length(); ) { 103 final int codePoint = Character.codePointAt(substring, i); 104 substringCodePoints[substringLength] = codePoint; 105 substringLength++; 106 i += Character.charCount(codePoint); 107 } 108 109 for (int i = 0; i < value.length(); i = findNextTokenStart(value, i)) { 110 int numMatch = 0; 111 for (int j = i; j < value.length() && numMatch < substringLength; ++numMatch) { 112 int valueCp = Character.toLowerCase(value.codePointAt(j)); 113 int substringCp = substringCodePoints[numMatch]; 114 if (valueCp != substringCp) { 115 break; 116 } 117 j += Character.charCount(valueCp); 118 } 119 if (numMatch == substringLength) { 120 return i; 121 } 122 } 123 return -1; 124 } 125 126 /** 127 * Find the start of the next token. A token is composed of letters and numbers. Any other 128 * character are considered delimiters. 129 * 130 * @param line The string to search for the next token. 131 * @param startIndex The index to start searching. 0 based indexing. 132 * @return The index for the start of the next token. line.length() if next token not found. 133 */ 134 @VisibleForTesting findNextTokenStart(String line, int startIndex)135 static int findNextTokenStart(String line, int startIndex) { 136 int index = startIndex; 137 138 // If already in token, eat remainder of token. 139 while (index <= line.length()) { 140 if (index == line.length()) { 141 // No more tokens. 142 return index; 143 } 144 final int codePoint = line.codePointAt(index); 145 if (!Character.isLetterOrDigit(codePoint)) { 146 break; 147 } 148 index += Character.charCount(codePoint); 149 } 150 151 // Out of token, eat all consecutive delimiters. 152 while (index <= line.length()) { 153 if (index == line.length()) { 154 return index; 155 } 156 final int codePoint = line.codePointAt(index); 157 if (Character.isLetterOrDigit(codePoint)) { 158 break; 159 } 160 index += Character.charCount(codePoint); 161 } 162 163 return index; 164 } 165 166 /** 167 * Anything other than letter and numbers are considered delimiters. Remove start and end 168 * delimiters since they are not relevant to search. 169 * 170 * @param query The query string to clean. 171 * @return The cleaned query. Empty string if all characters are cleaned out. 172 */ cleanStartAndEndOfSearchQuery(String query)173 public static String cleanStartAndEndOfSearchQuery(String query) { 174 int start = 0; 175 while (start < query.length()) { 176 int codePoint = query.codePointAt(start); 177 if (Character.isLetterOrDigit(codePoint)) { 178 break; 179 } 180 start += Character.charCount(codePoint); 181 } 182 183 if (start == query.length()) { 184 // All characters are delimiters. 185 return ""; 186 } 187 188 int end = query.length() - 1; 189 while (end > -1) { 190 if (Character.isLowSurrogate(query.charAt(end))) { 191 // Assume valid i18n string. There should be a matching high surrogate before it. 192 end--; 193 } 194 int codePoint = query.codePointAt(end); 195 if (Character.isLetterOrDigit(codePoint)) { 196 break; 197 } 198 end--; 199 } 200 201 // end is a letter or digit. 202 return query.substring(start, end + 1); 203 } 204 } 205