1 /*
2  * Copyright (C) 2012 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.contacts.common.util;
18 
19 import android.support.annotation.VisibleForTesting;
20 
21 /** Methods related to search. */
22 public class SearchUtil {
23 
24   /**
25    * Given a string with lines delimited with '\n', finds the matching line to the given substring.
26    *
27    * @param contents The string to search.
28    * @param substring The substring to search for.
29    * @return A MatchedLine object containing the matching line and the startIndex of the substring
30    *     match within that line.
31    */
findMatchingLine(String contents, String substring)32   public static MatchedLine findMatchingLine(String contents, String substring) {
33     final MatchedLine matched = new MatchedLine();
34 
35     // Snippet may contain multiple lines separated by "\n".
36     // Locate the lines of the content that contain the substring.
37     final int index = SearchUtil.contains(contents, substring);
38     if (index != -1) {
39       // Match found.  Find the corresponding line.
40       int start = index - 1;
41       while (start > -1) {
42         if (contents.charAt(start) == '\n') {
43           break;
44         }
45         start--;
46       }
47       int end = index + 1;
48       while (end < contents.length()) {
49         if (contents.charAt(end) == '\n') {
50           break;
51         }
52         end++;
53       }
54       matched.line = contents.substring(start + 1, end);
55       matched.startIndex = index - (start + 1);
56     }
57     return matched;
58   }
59 
60   /**
61    * Similar to String.contains() with two main differences:
62    *
63    * <p>1) Only searches token prefixes. A token is defined as any combination of letters or
64    * numbers.
65    *
66    * <p>2) Returns the starting index where the substring is found.
67    *
68    * @param value The string to search.
69    * @param substring The substring to look for.
70    * @return The starting index where the substring is found. {@literal -1} if substring is not
71    *     found in value.
72    */
73   @VisibleForTesting
contains(String value, String substring)74   static int contains(String value, String substring) {
75     if (value.length() < substring.length()) {
76       return -1;
77     }
78 
79     // i18n support
80     // Generate the code points for the substring once.
81     // There will be a maximum of substring.length code points.  But may be fewer.
82     // Since the array length is not an accurate size, we need to keep a separate variable.
83     final int[] substringCodePoints = new int[substring.length()];
84     int substringLength = 0; // may not equal substring.length()!!
85     for (int i = 0; i < substring.length(); ) {
86       final int codePoint = Character.codePointAt(substring, i);
87       substringCodePoints[substringLength] = codePoint;
88       substringLength++;
89       i += Character.charCount(codePoint);
90     }
91 
92     for (int i = 0; i < value.length(); i = findNextTokenStart(value, i)) {
93       int numMatch = 0;
94       for (int j = i; j < value.length() && numMatch < substringLength; ++numMatch) {
95         int valueCp = Character.toLowerCase(value.codePointAt(j));
96         int substringCp = substringCodePoints[numMatch];
97         if (valueCp != substringCp) {
98           break;
99         }
100         j += Character.charCount(valueCp);
101       }
102       if (numMatch == substringLength) {
103         return i;
104       }
105     }
106     return -1;
107   }
108 
109   /**
110    * Find the start of the next token. A token is composed of letters and numbers. Any other
111    * character are considered delimiters.
112    *
113    * @param line The string to search for the next token.
114    * @param startIndex The index to start searching. 0 based indexing.
115    * @return The index for the start of the next token. line.length() if next token not found.
116    */
117   @VisibleForTesting
findNextTokenStart(String line, int startIndex)118   static int findNextTokenStart(String line, int startIndex) {
119     int index = startIndex;
120 
121     // If already in token, eat remainder of token.
122     while (index <= line.length()) {
123       if (index == line.length()) {
124         // No more tokens.
125         return index;
126       }
127       final int codePoint = line.codePointAt(index);
128       if (!Character.isLetterOrDigit(codePoint)) {
129         break;
130       }
131       index += Character.charCount(codePoint);
132     }
133 
134     // Out of token, eat all consecutive delimiters.
135     while (index <= line.length()) {
136       if (index == line.length()) {
137         return index;
138       }
139       final int codePoint = line.codePointAt(index);
140       if (Character.isLetterOrDigit(codePoint)) {
141         break;
142       }
143       index += Character.charCount(codePoint);
144     }
145 
146     return index;
147   }
148 
149   /**
150    * Anything other than letter and numbers are considered delimiters. Remove start and end
151    * delimiters since they are not relevant to search.
152    *
153    * @param query The query string to clean.
154    * @return The cleaned query. Empty string if all characters are cleaned out.
155    */
cleanStartAndEndOfSearchQuery(String query)156   public static String cleanStartAndEndOfSearchQuery(String query) {
157     int start = 0;
158     while (start < query.length()) {
159       int codePoint = query.codePointAt(start);
160       if (Character.isLetterOrDigit(codePoint)) {
161         break;
162       }
163       start += Character.charCount(codePoint);
164     }
165 
166     if (start == query.length()) {
167       // All characters are delimiters.
168       return "";
169     }
170 
171     int end = query.length() - 1;
172     while (end > -1) {
173       if (Character.isLowSurrogate(query.charAt(end))) {
174         // Assume valid i18n string.  There should be a matching high surrogate before it.
175         end--;
176       }
177       int codePoint = query.codePointAt(end);
178       if (Character.isLetterOrDigit(codePoint)) {
179         break;
180       }
181       end--;
182     }
183 
184     // end is a letter or digit.
185     return query.substring(start, end + 1);
186   }
187 
188   public static class MatchedLine {
189 
190     public int startIndex = -1;
191     public String line;
192 
193     @Override
toString()194     public String toString() {
195       return "MatchedLine{" + "line='" + line + '\'' + ", startIndex=" + startIndex + '}';
196     }
197   }
198 }
199