1 /* 2 * Copyright (C) 2023 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.ext.services.notification; 18 19 import static android.app.Notification.CATEGORY_EMAIL; 20 import static android.app.Notification.CATEGORY_MESSAGE; 21 import static android.app.Notification.CATEGORY_SOCIAL; 22 import static android.app.Notification.EXTRA_BIG_TEXT; 23 import static android.app.Notification.EXTRA_MESSAGES; 24 import static android.app.Notification.EXTRA_SUB_TEXT; 25 import static android.app.Notification.EXTRA_SUMMARY_TEXT; 26 import static android.app.Notification.EXTRA_TEXT; 27 import static android.app.Notification.EXTRA_TEXT_LINES; 28 import static android.app.Notification.EXTRA_TITLE; 29 import static android.app.Notification.EXTRA_TITLE_BIG; 30 import static android.view.textclassifier.TextClassifier.TYPE_ADDRESS; 31 import static android.view.textclassifier.TextClassifier.TYPE_FLIGHT_NUMBER; 32 import static android.view.textclassifier.TextClassifier.TYPE_PHONE; 33 34 import static java.lang.String.format; 35 36 import android.app.Notification; 37 import android.app.Notification.MessagingStyle; 38 import android.app.Notification.MessagingStyle.Message; 39 import android.icu.util.ULocale; 40 import android.os.Bundle; 41 import android.os.Parcelable; 42 import android.service.notification.Flags; 43 import android.util.ArrayMap; 44 import android.view.textclassifier.TextClassifier; 45 import android.view.textclassifier.TextLanguage; 46 import android.view.textclassifier.TextLinks; 47 48 import com.android.modules.utils.build.SdkLevel; 49 50 import java.util.ArrayList; 51 import java.util.List; 52 import java.util.Objects; 53 import java.util.regex.Matcher; 54 import java.util.regex.Pattern; 55 56 /** 57 * Class with helper methods related to detecting OTP codes in notifications 58 */ 59 public class NotificationOtpDetectionHelper { 60 61 // Use an ArrayList because a List.of list will throw NPE when calling "contains(null)" 62 private static final List<String> SENSITIVE_NOTIFICATION_CATEGORIES = new ArrayList<>( 63 List.of(CATEGORY_MESSAGE, CATEGORY_EMAIL, CATEGORY_SOCIAL)); 64 65 private static final float TC_THRESHOLD = 0.6f; 66 67 private static final ArrayMap<String, ThreadLocal<Matcher>> EXTRA_LANG_OTP_REGEX = 68 new ArrayMap<>(); 69 70 private static final int MAX_SENSITIVE_TEXT_LEN = 600; 71 72 // A regex matching a line start, space, open paren, arrow, colon (not proceeded by a digit), 73 // open square bracket, equals sign, double or single quote, or ideographic char. It will 74 // not consume the start char (meaning START won't be included in the matched string) 75 private static final String START = "(^|(?<=[>\\s(\"'=\\[\\p{IsIdeographic}]|[^0-9]:))"; 76 77 78 // One single OTP char. A number or alphabetical char (that isn't also ideographic), followed by 79 // an optional dash 80 private static final String OTP_CHAR = "([0-9\\p{IsAlphabetic}&&[^\\p{IsIdeographic}]]-?)"; 81 82 // Performs a lookahead to find a digit after 0 to 7 OTP_CHARs. This ensures that our potential 83 // OTP code contains at least one number 84 private static final String FIND_DIGIT = format("(?=%s{0,7}\\d)", OTP_CHAR); 85 86 // Matches between 5 and 8 OTP_CHARs. Here, we are assuming an OTP code is 5-8 characters long 87 private static final String OTP_CHARS = format("(%s{5,8})", OTP_CHAR); 88 89 // A regex matching a line end, non-word char (except dash or underscore), or ideographic char. 90 // It will not consume the end char 91 private static final String END = "(?=\\W|$|\\p{IsIdeographic})"; 92 93 // A regex matching four digit numerical codes 94 private static final String FOUR_DIGITS = "(\\d{4})"; 95 96 private static final String FIVE_TO_EIGHT_ALPHANUM_AT_LEAST_ONE_NUM = 97 format("(%s%s)", FIND_DIGIT, OTP_CHARS); 98 99 // A regex matching two pairs of 3 digits (ex "123 456") 100 private static final String SIX_DIGITS_WITH_SPACE = "(\\d{3}\\s\\d{3})"; 101 102 /** 103 * Combining the regular expressions above, we get an OTP regex: 104 * 1. search for START, THEN 105 * 2. match ONE of 106 * a. alphanumeric sequence, at least one number, length 5-8, with optional dashes 107 * b. 4 numbers in a row 108 * c. pair of 3 digit codes separated by a space 109 * THEN 110 * 3. search for END Ex: 111 * "6454", " 345 678.", "[YDT-456]" 112 */ 113 private static final String ALL_OTP = 114 format("%s(%s|%s|%s)%s", 115 START, FIVE_TO_EIGHT_ALPHANUM_AT_LEAST_ONE_NUM, FOUR_DIGITS, 116 SIX_DIGITS_WITH_SPACE, END); 117 118 119 120 private static final ThreadLocal<Matcher> OTP_REGEX = ThreadLocal.withInitial(() -> 121 Pattern.compile(ALL_OTP).matcher("")); 122 /** 123 * A Date regular expression. Looks for dates with the month, day, and year separated by dashes. 124 * Handles one and two digit months and days, and four or two-digit years. It makes the 125 * following assumptions: 126 * Dates and months will never be higher than 39 127 * If a four digit year is used, the leading digit will be 1 or 2 128 * This regex is used to eliminate the most common false positive of the OTP regex, and is run 129 * on all messages, even before looking at language-specific regexs. 130 */ 131 private static final ThreadLocal<Matcher> DATE_WITH_DASHES_REGEX = ThreadLocal.withInitial(() -> 132 Pattern.compile(format("%s([0-3]?\\d-[0-3]?\\d-([12]\\d)?\\d\\d)%s", START, END)) 133 .matcher("")); 134 135 // A regex matching the common years of 19xx and 20xx. Used for false positive reduction 136 private static final String COMMON_YEARS = format("%s((19|20)\\d\\d)%s", START, END); 137 138 // A regex matching three lower case letters. Used for false positive reduction, as no known 139 // OTPs have 3 lowercase letters in sequence. 140 private static final String THREE_LOWERCASE = "(\\p{Ll}{3})"; 141 142 // A combination of common false positives. Run in cases where we don't have a language specific 143 // regular expression. 144 private static final ThreadLocal<Matcher> FALSE_POSITIVE_REGEX = ThreadLocal.withInitial(() -> 145 Pattern.compile(format("%s|%s", COMMON_YEARS, THREE_LOWERCASE)).matcher("")); 146 147 /** 148 * A list of regular expressions representing words found in an OTP context (non case sensitive) 149 * Note: TAN is short for Transaction Authentication Number 150 */ 151 private static final String[] ENGLISH_CONTEXT_WORDS = new String[] { 152 "pin", "pass[-\\s]?(code|word)", "TAN", "otp", "2fa", "(two|2)[-\\s]?factor", 153 "log[-\\s]?in", "auth(enticat(e|ion))?", "code", "secret", "verif(y|ication)", 154 "confirm(ation)?" 155 }; 156 157 /** 158 * Creates a regular expression to match any of a series of individual words, case insensitive. 159 */ createDictionaryRegex(String[] words)160 private static Matcher createDictionaryRegex(String[] words) { 161 StringBuilder regex = new StringBuilder("(?i)\\b("); 162 for (int i = 0; i < words.length; i++) { 163 regex.append(words[i]); 164 if (i != words.length - 1) { 165 regex.append("|"); 166 } 167 } 168 regex.append(")\\b"); 169 return Pattern.compile(regex.toString()).matcher(""); 170 } 171 172 static { ULocale.ENGLISH.toLanguageTag()173 EXTRA_LANG_OTP_REGEX.put(ULocale.ENGLISH.toLanguageTag(), ThreadLocal.withInitial(() -> 174 createDictionaryRegex(ENGLISH_CONTEXT_WORDS))); 175 } 176 177 /** 178 * Checks if the sensitive parts of a notification might contain an OTP, based on several 179 * regular expressions, and potentially using a textClassifier to eliminate false positives 180 * 181 * @param notification The notification whose content should be checked 182 * @param checkForFalsePositives If true, will ensure the content does not match the date regex. 183 * If a TextClassifier is provided, it will then try to find a 184 * language specific regex. If it is successful, it will use that 185 * regex to check for false positives. If it is not, it will use 186 * the TextClassifier (if provided), plus the year and three 187 * lowercase regexes to remove possible false positives. 188 * @param tc If non null, the provided TextClassifier will be used to find the language of the 189 * text, and look for a language-specific regex for it. If checkForFalsePositives is 190 * true will also use the classifier to find flight codes and addresses. 191 * @return True if the regex matches and ensureNotDate is false, or the date regex failed to 192 * match, false otherwise. 193 */ containsOtp(Notification notification, boolean checkForFalsePositives, TextClassifier tc)194 public static boolean containsOtp(Notification notification, 195 boolean checkForFalsePositives, TextClassifier tc) { 196 if (notification == null || !SdkLevel.isAtLeastV()) { 197 return false; 198 } 199 200 String sensitiveText = getTextForDetection(notification); 201 Matcher otpMatcher = OTP_REGEX.get(); 202 otpMatcher.reset(sensitiveText); 203 boolean otpMatch = otpMatcher.find(); 204 if (!checkForFalsePositives || !otpMatch) { 205 return otpMatch; 206 } 207 208 if (allOtpMatchesAreFalsePositives(sensitiveText, DATE_WITH_DASHES_REGEX.get())) { 209 return false; 210 } 211 212 if (tc != null) { 213 Matcher languageSpecificMatcher = getLanguageSpecificRegex(sensitiveText, tc); 214 if (languageSpecificMatcher != null) { 215 languageSpecificMatcher.reset(sensitiveText); 216 // Only use the language-specific regex for false positives 217 return languageSpecificMatcher.find(); 218 } 219 // Else, use TC to check for false positives 220 if (hasFalsePositivesTcCheck(sensitiveText, tc)) { 221 return false; 222 } 223 } 224 225 return !allOtpMatchesAreFalsePositives(sensitiveText, FALSE_POSITIVE_REGEX.get()); 226 } 227 228 /** 229 * Checks that a given text has at least one match for one regex, that doesn't match another 230 * @param text The full text to check 231 * @param falsePositiveRegex A regex that should not match the OTP regex (for at least one match 232 * found by the OTP regex 233 * @return true, if all matches found by OTP_REGEX are also found by "shouldNotMatch" 234 */ allOtpMatchesAreFalsePositives(String text, Matcher falsePositiveRegex)235 private static boolean allOtpMatchesAreFalsePositives(String text, 236 Matcher falsePositiveRegex) { 237 falsePositiveRegex = falsePositiveRegex.reset(text); 238 if (!falsePositiveRegex.find()) { 239 return false; 240 } 241 Matcher otpMatcher = OTP_REGEX.get(); 242 otpMatcher.reset(text); 243 while (otpMatcher.find()) { 244 falsePositiveRegex.reset(otpMatcher.group()); 245 if (!falsePositiveRegex.find()) { 246 // A possible otp was not matched by the false positive regex 247 return false; 248 } 249 } 250 // All otp matches were matched by the false positive regex 251 return true; 252 } 253 getLanguageSpecificRegex(String text, TextClassifier tc)254 private static Matcher getLanguageSpecificRegex(String text, TextClassifier tc) { 255 TextLanguage.Request langRequest = new TextLanguage.Request.Builder(text).build(); 256 TextLanguage lang = tc.detectLanguage(langRequest); 257 for (int i = 0; i < lang.getLocaleHypothesisCount(); i++) { 258 ULocale locale = lang.getLocale(i); 259 if (lang.getConfidenceScore(locale) >= TC_THRESHOLD 260 && EXTRA_LANG_OTP_REGEX.containsKey(locale.toLanguageTag())) { 261 return EXTRA_LANG_OTP_REGEX.get(locale.toLanguageTag()).get(); 262 } 263 } 264 return null; 265 } 266 hasFalsePositivesTcCheck(String text, TextClassifier tc)267 private static boolean hasFalsePositivesTcCheck(String text, TextClassifier tc) { 268 // Use TC to eliminate false positives from a regex match, namely: flight codes, and 269 // addresses 270 List<String> included = new ArrayList<>(List.of(TYPE_FLIGHT_NUMBER, TYPE_ADDRESS)); 271 List<String> excluded = new ArrayList<>(List.of(TYPE_PHONE)); 272 TextClassifier.EntityConfig config = 273 new TextClassifier.EntityConfig.Builder().setIncludedTypes( 274 included).setExcludedTypes(excluded).build(); 275 TextLinks.Request request = 276 new TextLinks.Request.Builder(text).setEntityConfig(config).build(); 277 TextLinks links = tc.generateLinks(request); 278 for (TextLinks.TextLink link : links.getLinks()) { 279 if (link.getConfidenceScore(TYPE_FLIGHT_NUMBER) > TC_THRESHOLD 280 || link.getConfidenceScore(TYPE_ADDRESS) > TC_THRESHOLD) { 281 return true; 282 } 283 } 284 return false; 285 } 286 287 /** 288 * Gets the sections of text in a notification that should be checked for sensitive content. 289 * This includes the text, title, subtext, messages, and extra text lines. 290 * @param notification The notification whose content should be filtered 291 * @return The extracted text fields 292 */ getTextForDetection(Notification notification)293 public static String getTextForDetection(Notification notification) { 294 if (notification.extras == null || !SdkLevel.isAtLeastV() 295 || !Flags.redactSensitiveNotificationsFromUntrustedListeners()) { 296 return ""; 297 } 298 Bundle extras = notification.extras; 299 CharSequence title = extras.getCharSequence(EXTRA_TITLE); 300 CharSequence text = extras.getCharSequence(EXTRA_TEXT); 301 CharSequence subText = extras.getCharSequence(EXTRA_SUB_TEXT); 302 StringBuilder builder = new StringBuilder() 303 .append(title != null ? title : "").append(" ") 304 .append(text != null ? text : "").append(" ") 305 .append(subText != null ? subText : "").append(" "); 306 if (Flags.redactSensitiveNotificationsBigTextStyle()) { 307 CharSequence bigText = extras.getCharSequence(EXTRA_BIG_TEXT); 308 CharSequence bigTitleText = extras.getCharSequence(EXTRA_TITLE_BIG); 309 CharSequence summaryText = extras.getCharSequence(EXTRA_SUMMARY_TEXT); 310 builder.append(bigText != null ? bigText : "").append(" ") 311 .append(bigTitleText != null ? bigTitleText : "").append(" ") 312 .append(summaryText != null ? summaryText : "").append(" "); 313 } 314 CharSequence[] textLines = extras.getCharSequenceArray(EXTRA_TEXT_LINES); 315 if (textLines != null) { 316 for (CharSequence line : textLines) { 317 builder.append(line).append(" "); 318 } 319 } 320 List<Message> messages = Message.getMessagesFromBundleArray( 321 extras.getParcelableArray(EXTRA_MESSAGES, Parcelable.class)); 322 // Sort the newest messages (largest timestamp) first 323 messages.sort((MessagingStyle.Message lhs, MessagingStyle.Message rhs) -> 324 Long.compare(rhs.getTimestamp(), lhs.getTimestamp())); 325 for (MessagingStyle.Message message : messages) { 326 builder.append(message.getText()).append(" "); 327 } 328 return builder.length() <= MAX_SENSITIVE_TEXT_LEN ? builder.toString() 329 : builder.substring(0, MAX_SENSITIVE_TEXT_LEN); 330 } 331 332 /** 333 * Determines if a notification should be checked for an OTP, based on category, style, and 334 * possible otp content (as determined by a regular expression). 335 * @param notification The notification whose content should be checked 336 * @return true, if further checks for OTP codes should be performed, false otherwise 337 */ shouldCheckForOtp(Notification notification)338 public static boolean shouldCheckForOtp(Notification notification) { 339 if (notification == null || !SdkLevel.isAtLeastV() 340 || !Flags.redactSensitiveNotificationsFromUntrustedListeners()) { 341 return false; 342 } 343 return SENSITIVE_NOTIFICATION_CATEGORIES.contains(notification.category) 344 || isStyle(notification, Notification.MessagingStyle.class) 345 || isStyle(notification, Notification.InboxStyle.class) 346 || containsOtp(notification, false, null) 347 || shouldCheckForOtp(notification.publicVersion); 348 } 349 isStyle(Notification notification, Class<? extends Notification.Style> styleClass)350 private static boolean isStyle(Notification notification, 351 Class<? extends Notification.Style> styleClass) { 352 if (notification.extras == null) { 353 return false; 354 } 355 String templateClass = notification.extras.getString(Notification.EXTRA_TEMPLATE); 356 return Objects.equals(templateClass, styleClass.getName()); 357 } 358 NotificationOtpDetectionHelper()359 private NotificationOtpDetectionHelper() { } 360 } 361