1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.launcher3.search;
18 
19 import android.text.TextUtils;
20 
21 import androidx.annotation.Nullable;
22 
23 import com.android.launcher3.util.IntArray;
24 
25 import java.text.Collator;
26 import java.util.stream.IntStream;
27 
28 /**
29  * Utilities for matching query string to target string.
30  */
31 public class StringMatcherUtility {
32 
33     private static final Character SPACE = ' ';
34 
35     /**
36      * Returns {@code true} if {@code query} is a prefix of a substring in {@code target}. How to
37      * break target to valid substring is defined in the given {@code matcher}.
38      */
matches(String query, String target, StringMatcher matcher)39     public static boolean matches(String query, String target, StringMatcher matcher) {
40         int queryLength = query.length();
41 
42         int targetLength = target.length();
43 
44         if (targetLength < queryLength || queryLength <= 0) {
45             return false;
46         }
47 
48         if (requestSimpleFuzzySearch(query)) {
49             return target.toLowerCase().contains(query);
50         }
51 
52         int lastType;
53         int thisType = Character.UNASSIGNED;
54         int nextType = Character.getType(target.codePointAt(0));
55 
56         int end = targetLength - queryLength;
57         for (int i = 0; i <= end; i++) {
58             lastType = thisType;
59             thisType = nextType;
60             nextType = i < (targetLength - 1)
61                     ? Character.getType(target.codePointAt(i + 1)) : Character.UNASSIGNED;
62             if (matcher.isBreak(thisType, lastType, nextType)
63                     && matcher.matches(query, target.substring(i, i + queryLength))) {
64                 return true;
65             }
66         }
67         return false;
68     }
69 
70     /**
71      * Returns a list of breakpoints wherever the string contains a break. For example:
72      * "t-mobile" would have breakpoints at [0, 1]
73      * "Agar.io" would have breakpoints at [3, 4]
74      * "LEGO®Builder" would have a breakpoint at [4]
75      */
76     public static IntArray getListOfBreakpoints(CharSequence input, StringMatcher matcher) {
77         int inputLength = input.length();
78         if ((inputLength <= 2) || TextUtils.indexOf(input, SPACE) != -1) {
79             // when there is a space in the string, return a list where the elements are the
80             // position of the spaces - 1. This is to make the logic consistent where breakpoints
81             // are placed
82             return IntArray.wrap(IntStream.range(0, inputLength)
83                     .filter(i -> input.charAt(i) == SPACE)
84                     .map(i -> i - 1)
85                     .toArray());
86         }
87         IntArray listOfBreakPoints = new IntArray();
88         int prevType;
89         int thisType = Character.getType(Character.codePointAt(input, 0));
90         int nextType = Character.getType(Character.codePointAt(input, 1));
91         for (int i = 1; i < inputLength; i++) {
92             prevType = thisType;
93             thisType = nextType;
94             nextType = i < (inputLength - 1)
95                     ? Character.getType(Character.codePointAt(input, i + 1))
96                     : Character.UNASSIGNED;
97             if (matcher.isBreak(thisType, prevType, nextType)) {
98                 // breakpoint is at previous
99                 listOfBreakPoints.add(i-1);
100             }
101         }
102         return listOfBreakPoints;
103     }
104 
105     /**
106      * Performs locale sensitive string comparison using {@link Collator}.
107      */
108     public static class StringMatcher {
109 
110         private static final char MAX_UNICODE = '\uFFFF';
111 
112         private final Collator mCollator;
113 
114         StringMatcher() {
115             // On android N and above, Collator uses ICU implementation which has a much better
116             // support for non-latin locales.
117             mCollator = Collator.getInstance();
118             mCollator.setStrength(Collator.PRIMARY);
119             mCollator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
120         }
121 
122         /**
123          * Returns true if {@param query} is a prefix of {@param target}
124          */
125         public boolean matches(@Nullable String query, @Nullable String target) {
126             // `mCollator.compare` requires non-null inputs, so return false earlier (not a match)
127             if (query == null || target == null) {
128                 return false;
129             }
130             switch (mCollator.compare(query, target)) {
131                 case 0:
132                     return true;
133                 case -1:
134                     // The target string can contain a modifier which would make it larger than
135                     // the query string (even though the length is same). If the query becomes
136                     // larger after appending a unicode character, it was originally a prefix of
137                     // the target string and hence should match.
138                     return mCollator.compare(query + MAX_UNICODE, target) > -1;
139                 default:
140                     return false;
141             }
142         }
143 
getInstance()144         public static StringMatcher getInstance() {
145             return new StringMatcher();
146         }
147 
148         /**
149          * Returns true if the current point should be a break point.
150          *
151          * Following cases are considered as break points:
152          *     1) Any non space character after a space character
153          *     2) Any digit after a non-digit character
154          *     3) Any capital character after a digit or small character
155          *     4) Any capital character before a small character
156          *
157          * E.g., "YouTube" matches the input "you" and "tube", but not "out".
158          */
isBreak(int thisType, int prevType, int nextType)159         protected boolean isBreak(int thisType, int prevType, int nextType) {
160             switch (prevType) {
161                 case Character.UNASSIGNED:
162                 case Character.SPACE_SEPARATOR:
163                 case Character.LINE_SEPARATOR:
164                 case Character.PARAGRAPH_SEPARATOR:
165                     return true;
166             }
167             switch (thisType) {
168                 case Character.UPPERCASE_LETTER:
169                     // takes care of the case where there are consistent uppercase letters as well
170                     // as a special symbol following the capitalize letters for example: LEGO®
171                     if (nextType != Character.UPPERCASE_LETTER && nextType != Character.OTHER_SYMBOL
172                             && nextType != Character.DECIMAL_DIGIT_NUMBER
173                             && nextType != Character.UNASSIGNED) {
174                         return true;
175                     }
176                     // Follow through
177                 case Character.TITLECASE_LETTER:
178                     // Break point if previous was not a upper case
179                     return prevType != Character.UPPERCASE_LETTER;
180                 case Character.LOWERCASE_LETTER:
181                     // Break point if previous was not a letter.
182                     return prevType > Character.OTHER_LETTER || prevType <= Character.UNASSIGNED;
183                 case Character.DECIMAL_DIGIT_NUMBER:
184                 case Character.LETTER_NUMBER:
185                 case Character.OTHER_NUMBER:
186                     // Break point if previous was not a number
187                     return !(prevType == Character.DECIMAL_DIGIT_NUMBER
188                             || prevType == Character.LETTER_NUMBER
189                             || prevType == Character.OTHER_NUMBER);
190                 case Character.MATH_SYMBOL:
191                 case Character.CURRENCY_SYMBOL:
192                 case Character.OTHER_PUNCTUATION:
193                 case Character.DASH_PUNCTUATION:
194                     // Always a break point for a symbol
195                     return true;
196                 default:
197                     return  false;
198             }
199         }
200     }
201 
202     /**
203      * Subclass of {@code StringMatcher} using simple space break for prefix matching.
204      * E.g., "YouTube" matches the input "you". "Play Store" matches the input "play".
205      */
206     public static class StringMatcherSpace extends StringMatcher {
207 
getInstance()208         public static StringMatcherSpace getInstance() {
209             return new StringMatcherSpace();
210         }
211 
212         /**
213          * The first character or any character after a space is considered as a break point.
214          * Returns true if the current point should be a break point.
215          */
216         @Override
isBreak(int thisType, int prevType, int nextType)217         protected boolean isBreak(int thisType, int prevType, int nextType) {
218             return prevType == Character.UNASSIGNED || prevType == Character.SPACE_SEPARATOR;
219         }
220     }
221 
222     /**
223      * Matching optimization to search in Chinese.
224      */
requestSimpleFuzzySearch(String s)225     private static boolean requestSimpleFuzzySearch(String s) {
226         for (int i = 0; i < s.length(); ) {
227             int codepoint = s.codePointAt(i);
228             i += Character.charCount(codepoint);
229             switch (Character.UnicodeScript.of(codepoint)) {
230                 case HAN:
231                     //Character.UnicodeScript.HAN: use String.contains to match
232                     return true;
233             }
234         }
235         return false;
236     }
237 }
238