1 /*
2  * Copyright (C) 2012 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.contacts.common.util;
18 
19 import com.google.common.annotations.VisibleForTesting;
20 
21 /**
22  * Methods related to search.
23  */
24 public class SearchUtil {
25 
26     public static class MatchedLine {
27 
28         public int startIndex = -1;
29         public String line;
30 
31         @Override
toString()32         public String toString() {
33             return "MatchedLine{" +
34                     "line='" + line + '\'' +
35                     ", startIndex=" + startIndex +
36                     '}';
37         }
38     }
39 
40     /**
41      * Given a string with lines delimited with '\n', finds the matching line to the given
42      * substring.
43      *
44      * @param contents The string to search.
45      * @param substring The substring to search for.
46      * @return A MatchedLine object containing the matching line and the startIndex of the substring
47      * match within that line.
48      */
findMatchingLine(String contents, String substring)49     public static MatchedLine findMatchingLine(String contents, String substring) {
50         final MatchedLine matched = new MatchedLine();
51 
52         // Snippet may contain multiple lines separated by "\n".
53         // Locate the lines of the content that contain the substring.
54         final int index = SearchUtil.contains(contents, substring);
55         if (index != -1) {
56             // Match found.  Find the corresponding line.
57             int start = index - 1;
58             while (start > -1) {
59                 if (contents.charAt(start) == '\n') {
60                     break;
61                 }
62                 start--;
63             }
64             int end = index + 1;
65             while (end < contents.length()) {
66                 if (contents.charAt(end) == '\n') {
67                     break;
68                 }
69                 end++;
70             }
71             matched.line = contents.substring(start + 1, end);
72             matched.startIndex = index - (start + 1);
73         }
74         return matched;
75     }
76 
77     /**
78      * Similar to String.contains() with two main differences:
79      * <p>
80      * 1) Only searches token prefixes.  A token is defined as any combination of letters or
81      * numbers.
82      * <p>
83      * 2) Returns the starting index where the substring is found.
84      *
85      * @param value The string to search.
86      * @param substring The substring to look for.
87      * @return The starting index where the substring is found. {@literal -1} if substring is not
88      *         found in value.
89      */
90     @VisibleForTesting
contains(String value, String substring)91     static int contains(String value, String substring) {
92         if (value.length() < substring.length()) {
93             return -1;
94         }
95 
96         // i18n support
97         // Generate the code points for the substring once.
98         // There will be a maximum of substring.length code points.  But may be fewer.
99         // Since the array length is not an accurate size, we need to keep a separate variable.
100         final int[] substringCodePoints = new int[substring.length()];
101         int substringLength = 0;  // may not equal substring.length()!!
102         for (int i = 0; i < substring.length(); ) {
103             final int codePoint = Character.codePointAt(substring, i);
104             substringCodePoints[substringLength] = codePoint;
105             substringLength++;
106             i += Character.charCount(codePoint);
107         }
108 
109         for (int i = 0; i < value.length(); i = findNextTokenStart(value, i)) {
110             int numMatch = 0;
111             for (int j = i; j < value.length() && numMatch < substringLength; ++numMatch) {
112                 int valueCp = Character.toLowerCase(value.codePointAt(j));
113                 int substringCp = substringCodePoints[numMatch];
114                 if (valueCp != substringCp) {
115                     break;
116                 }
117                 j += Character.charCount(valueCp);
118             }
119             if (numMatch == substringLength) {
120                 return i;
121             }
122         }
123         return -1;
124     }
125 
126     /**
127      * Find the start of the next token.  A token is composed of letters and numbers. Any other
128      * character are considered delimiters.
129      *
130      * @param line The string to search for the next token.
131      * @param startIndex The index to start searching.  0 based indexing.
132      * @return The index for the start of the next token.  line.length() if next token not found.
133      */
134     @VisibleForTesting
findNextTokenStart(String line, int startIndex)135     static int findNextTokenStart(String line, int startIndex) {
136         int index = startIndex;
137 
138         // If already in token, eat remainder of token.
139         while (index <= line.length()) {
140             if (index == line.length()) {
141                 // No more tokens.
142                 return index;
143             }
144             final int codePoint = line.codePointAt(index);
145             if (!Character.isLetterOrDigit(codePoint)) {
146                 break;
147             }
148             index += Character.charCount(codePoint);
149         }
150 
151         // Out of token, eat all consecutive delimiters.
152         while (index <= line.length()) {
153             if (index == line.length()) {
154                 return index;
155             }
156             final int codePoint = line.codePointAt(index);
157             if (Character.isLetterOrDigit(codePoint)) {
158                 break;
159             }
160             index += Character.charCount(codePoint);
161         }
162 
163         return index;
164     }
165 
166     /**
167      * Anything other than letter and numbers are considered delimiters.  Remove start and end
168      * delimiters since they are not relevant to search.
169      *
170      * @param query The query string to clean.
171      * @return The cleaned query. Empty string if all characters are cleaned out.
172      */
cleanStartAndEndOfSearchQuery(String query)173     public static String cleanStartAndEndOfSearchQuery(String query) {
174         int start = 0;
175         while (start < query.length()) {
176             int codePoint = query.codePointAt(start);
177             if (Character.isLetterOrDigit(codePoint)) {
178                 break;
179             }
180             start += Character.charCount(codePoint);
181         }
182 
183         if (start == query.length()) {
184             // All characters are delimiters.
185             return "";
186         }
187 
188         int end = query.length() - 1;
189         while (end > -1) {
190             if (Character.isLowSurrogate(query.charAt(end))) {
191                 // Assume valid i18n string.  There should be a matching high surrogate before it.
192                 end--;
193             }
194             int codePoint = query.codePointAt(end);
195             if (Character.isLetterOrDigit(codePoint)) {
196                 break;
197             }
198             end--;
199         }
200 
201         // end is a letter or digit.
202         return query.substring(start, end + 1);
203     }
204 }
205