1 /* 2 * Copyright (C) 2021 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.launcher3.search; 18 19 import android.text.TextUtils; 20 21 import androidx.annotation.Nullable; 22 23 import com.android.launcher3.util.IntArray; 24 25 import java.text.Collator; 26 import java.util.stream.IntStream; 27 28 /** 29 * Utilities for matching query string to target string. 30 */ 31 public class StringMatcherUtility { 32 33 private static final Character SPACE = ' '; 34 35 /** 36 * Returns {@code true} if {@code query} is a prefix of a substring in {@code target}. How to 37 * break target to valid substring is defined in the given {@code matcher}. 38 */ matches(String query, String target, StringMatcher matcher)39 public static boolean matches(String query, String target, StringMatcher matcher) { 40 int queryLength = query.length(); 41 42 int targetLength = target.length(); 43 44 if (targetLength < queryLength || queryLength <= 0) { 45 return false; 46 } 47 48 if (requestSimpleFuzzySearch(query)) { 49 return target.toLowerCase().contains(query); 50 } 51 52 int lastType; 53 int thisType = Character.UNASSIGNED; 54 int nextType = Character.getType(target.codePointAt(0)); 55 56 int end = targetLength - queryLength; 57 for (int i = 0; i <= end; i++) { 58 lastType = thisType; 59 thisType = nextType; 60 nextType = i < (targetLength - 1) 61 ? Character.getType(target.codePointAt(i + 1)) : Character.UNASSIGNED; 62 if (matcher.isBreak(thisType, lastType, nextType) 63 && matcher.matches(query, target.substring(i, i + queryLength))) { 64 return true; 65 } 66 } 67 return false; 68 } 69 70 /** 71 * Returns a list of breakpoints wherever the string contains a break. For example: 72 * "t-mobile" would have breakpoints at [0, 1] 73 * "Agar.io" would have breakpoints at [3, 4] 74 * "LEGO®Builder" would have a breakpoint at [4] 75 */ 76 public static IntArray getListOfBreakpoints(CharSequence input, StringMatcher matcher) { 77 int inputLength = input.length(); 78 if ((inputLength <= 2) || TextUtils.indexOf(input, SPACE) != -1) { 79 // when there is a space in the string, return a list where the elements are the 80 // position of the spaces - 1. This is to make the logic consistent where breakpoints 81 // are placed 82 return IntArray.wrap(IntStream.range(0, inputLength) 83 .filter(i -> input.charAt(i) == SPACE) 84 .map(i -> i - 1) 85 .toArray()); 86 } 87 IntArray listOfBreakPoints = new IntArray(); 88 int prevType; 89 int thisType = Character.getType(Character.codePointAt(input, 0)); 90 int nextType = Character.getType(Character.codePointAt(input, 1)); 91 for (int i = 1; i < inputLength; i++) { 92 prevType = thisType; 93 thisType = nextType; 94 nextType = i < (inputLength - 1) 95 ? Character.getType(Character.codePointAt(input, i + 1)) 96 : Character.UNASSIGNED; 97 if (matcher.isBreak(thisType, prevType, nextType)) { 98 // breakpoint is at previous 99 listOfBreakPoints.add(i-1); 100 } 101 } 102 return listOfBreakPoints; 103 } 104 105 /** 106 * Performs locale sensitive string comparison using {@link Collator}. 107 */ 108 public static class StringMatcher { 109 110 private static final char MAX_UNICODE = '\uFFFF'; 111 112 private final Collator mCollator; 113 114 StringMatcher() { 115 // On android N and above, Collator uses ICU implementation which has a much better 116 // support for non-latin locales. 117 mCollator = Collator.getInstance(); 118 mCollator.setStrength(Collator.PRIMARY); 119 mCollator.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 120 } 121 122 /** 123 * Returns true if {@param query} is a prefix of {@param target} 124 */ 125 public boolean matches(@Nullable String query, @Nullable String target) { 126 // `mCollator.compare` requires non-null inputs, so return false earlier (not a match) 127 if (query == null || target == null) { 128 return false; 129 } 130 switch (mCollator.compare(query, target)) { 131 case 0: 132 return true; 133 case -1: 134 // The target string can contain a modifier which would make it larger than 135 // the query string (even though the length is same). If the query becomes 136 // larger after appending a unicode character, it was originally a prefix of 137 // the target string and hence should match. 138 return mCollator.compare(query + MAX_UNICODE, target) > -1; 139 default: 140 return false; 141 } 142 } 143 getInstance()144 public static StringMatcher getInstance() { 145 return new StringMatcher(); 146 } 147 148 /** 149 * Returns true if the current point should be a break point. 150 * 151 * Following cases are considered as break points: 152 * 1) Any non space character after a space character 153 * 2) Any digit after a non-digit character 154 * 3) Any capital character after a digit or small character 155 * 4) Any capital character before a small character 156 * 157 * E.g., "YouTube" matches the input "you" and "tube", but not "out". 158 */ isBreak(int thisType, int prevType, int nextType)159 protected boolean isBreak(int thisType, int prevType, int nextType) { 160 switch (prevType) { 161 case Character.UNASSIGNED: 162 case Character.SPACE_SEPARATOR: 163 case Character.LINE_SEPARATOR: 164 case Character.PARAGRAPH_SEPARATOR: 165 return true; 166 } 167 switch (thisType) { 168 case Character.UPPERCASE_LETTER: 169 // takes care of the case where there are consistent uppercase letters as well 170 // as a special symbol following the capitalize letters for example: LEGO® 171 if (nextType != Character.UPPERCASE_LETTER && nextType != Character.OTHER_SYMBOL 172 && nextType != Character.DECIMAL_DIGIT_NUMBER 173 && nextType != Character.UNASSIGNED) { 174 return true; 175 } 176 // Follow through 177 case Character.TITLECASE_LETTER: 178 // Break point if previous was not a upper case 179 return prevType != Character.UPPERCASE_LETTER; 180 case Character.LOWERCASE_LETTER: 181 // Break point if previous was not a letter. 182 return prevType > Character.OTHER_LETTER || prevType <= Character.UNASSIGNED; 183 case Character.DECIMAL_DIGIT_NUMBER: 184 case Character.LETTER_NUMBER: 185 case Character.OTHER_NUMBER: 186 // Break point if previous was not a number 187 return !(prevType == Character.DECIMAL_DIGIT_NUMBER 188 || prevType == Character.LETTER_NUMBER 189 || prevType == Character.OTHER_NUMBER); 190 case Character.MATH_SYMBOL: 191 case Character.CURRENCY_SYMBOL: 192 case Character.OTHER_PUNCTUATION: 193 case Character.DASH_PUNCTUATION: 194 // Always a break point for a symbol 195 return true; 196 default: 197 return false; 198 } 199 } 200 } 201 202 /** 203 * Subclass of {@code StringMatcher} using simple space break for prefix matching. 204 * E.g., "YouTube" matches the input "you". "Play Store" matches the input "play". 205 */ 206 public static class StringMatcherSpace extends StringMatcher { 207 getInstance()208 public static StringMatcherSpace getInstance() { 209 return new StringMatcherSpace(); 210 } 211 212 /** 213 * The first character or any character after a space is considered as a break point. 214 * Returns true if the current point should be a break point. 215 */ 216 @Override isBreak(int thisType, int prevType, int nextType)217 protected boolean isBreak(int thisType, int prevType, int nextType) { 218 return prevType == Character.UNASSIGNED || prevType == Character.SPACE_SEPARATOR; 219 } 220 } 221 222 /** 223 * Matching optimization to search in Chinese. 224 */ requestSimpleFuzzySearch(String s)225 private static boolean requestSimpleFuzzySearch(String s) { 226 for (int i = 0; i < s.length(); ) { 227 int codepoint = s.codePointAt(i); 228 i += Character.charCount(codepoint); 229 switch (Character.UnicodeScript.of(codepoint)) { 230 case HAN: 231 //Character.UnicodeScript.HAN: use String.contains to match 232 return true; 233 } 234 } 235 return false; 236 } 237 } 238