1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package android.ext.services.notification;
18 
19 import static android.app.Notification.CATEGORY_EMAIL;
20 import static android.app.Notification.CATEGORY_MESSAGE;
21 import static android.app.Notification.CATEGORY_SOCIAL;
22 import static android.app.Notification.EXTRA_BIG_TEXT;
23 import static android.app.Notification.EXTRA_MESSAGES;
24 import static android.app.Notification.EXTRA_SUB_TEXT;
25 import static android.app.Notification.EXTRA_SUMMARY_TEXT;
26 import static android.app.Notification.EXTRA_TEXT;
27 import static android.app.Notification.EXTRA_TEXT_LINES;
28 import static android.app.Notification.EXTRA_TITLE;
29 import static android.app.Notification.EXTRA_TITLE_BIG;
30 import static android.view.textclassifier.TextClassifier.TYPE_ADDRESS;
31 import static android.view.textclassifier.TextClassifier.TYPE_FLIGHT_NUMBER;
32 import static android.view.textclassifier.TextClassifier.TYPE_PHONE;
33 
34 import static java.lang.String.format;
35 
36 import android.app.Notification;
37 import android.app.Notification.MessagingStyle;
38 import android.app.Notification.MessagingStyle.Message;
39 import android.icu.util.ULocale;
40 import android.os.Bundle;
41 import android.os.Parcelable;
42 import android.service.notification.Flags;
43 import android.util.ArrayMap;
44 import android.view.textclassifier.TextClassifier;
45 import android.view.textclassifier.TextLanguage;
46 import android.view.textclassifier.TextLinks;
47 
48 import com.android.modules.utils.build.SdkLevel;
49 
50 import java.util.ArrayList;
51 import java.util.List;
52 import java.util.Objects;
53 import java.util.regex.Matcher;
54 import java.util.regex.Pattern;
55 
56 /**
57  * Class with helper methods related to detecting OTP codes in notifications
58  */
59 public class NotificationOtpDetectionHelper {
60 
61     // Use an ArrayList because a List.of list will throw NPE when calling "contains(null)"
62     private static final List<String> SENSITIVE_NOTIFICATION_CATEGORIES = new ArrayList<>(
63             List.of(CATEGORY_MESSAGE, CATEGORY_EMAIL, CATEGORY_SOCIAL));
64 
65     private static final float TC_THRESHOLD = 0.6f;
66 
67     private static final ArrayMap<String, ThreadLocal<Matcher>> EXTRA_LANG_OTP_REGEX =
68             new ArrayMap<>();
69 
70     private static final int MAX_SENSITIVE_TEXT_LEN = 600;
71 
72     // A regex matching a line start, space, open paren, arrow, colon (not proceeded by a digit),
73     // open square bracket, equals sign, double or single quote, or ideographic char. It will
74     // not consume the start char (meaning START won't be included in the matched string)
75     private static final String START = "(^|(?<=[>\\s(\"'=\\[\\p{IsIdeographic}]|[^0-9]:))";
76 
77 
78     // One single OTP char. A number or alphabetical char (that isn't also ideographic), followed by
79     // an optional dash
80     private static final String OTP_CHAR = "([0-9\\p{IsAlphabetic}&&[^\\p{IsIdeographic}]]-?)";
81 
82     // Performs a lookahead to find a digit after 0 to 7 OTP_CHARs. This ensures that our potential
83     // OTP code contains at least one number
84     private static final String FIND_DIGIT = format("(?=%s{0,7}\\d)", OTP_CHAR);
85 
86     // Matches between 5 and 8 OTP_CHARs. Here, we are assuming an OTP code is 5-8 characters long
87     private static final String OTP_CHARS = format("(%s{5,8})", OTP_CHAR);
88 
89     // A regex matching a line end, non-word char (except dash or underscore), or ideographic char.
90     // It will not consume the end char
91     private static final String END = "(?=\\W|$|\\p{IsIdeographic})";
92 
93     // A regex matching four digit numerical codes
94     private static final String FOUR_DIGITS = "(\\d{4})";
95 
96     private static final String FIVE_TO_EIGHT_ALPHANUM_AT_LEAST_ONE_NUM =
97             format("(%s%s)", FIND_DIGIT, OTP_CHARS);
98 
99     // A regex matching two pairs of 3 digits (ex "123 456")
100     private static final String SIX_DIGITS_WITH_SPACE = "(\\d{3}\\s\\d{3})";
101 
102     /**
103      * Combining the regular expressions above, we get an OTP regex:
104      * 1. search for START, THEN
105      * 2. match ONE of
106      *   a. alphanumeric sequence, at least one number, length 5-8, with optional dashes
107      *   b. 4 numbers in a row
108      *   c. pair of 3 digit codes separated by a space
109      * THEN
110      * 3. search for END Ex:
111      * "6454", " 345 678.", "[YDT-456]"
112      */
113     private static final String ALL_OTP =
114             format("%s(%s|%s|%s)%s",
115                     START, FIVE_TO_EIGHT_ALPHANUM_AT_LEAST_ONE_NUM, FOUR_DIGITS,
116                     SIX_DIGITS_WITH_SPACE, END);
117 
118 
119 
120     private static final ThreadLocal<Matcher> OTP_REGEX = ThreadLocal.withInitial(() ->
121             Pattern.compile(ALL_OTP).matcher(""));
122     /**
123      * A Date regular expression. Looks for dates with the month, day, and year separated by dashes.
124      * Handles one and two digit months and days, and four or two-digit years. It makes the
125      * following assumptions:
126      * Dates and months will never be higher than 39
127      * If a four digit year is used, the leading digit will be 1 or 2
128      * This regex is used to eliminate the most common false positive of the OTP regex, and is run
129      * on all messages, even before looking at language-specific regexs.
130      */
131     private static final ThreadLocal<Matcher> DATE_WITH_DASHES_REGEX = ThreadLocal.withInitial(() ->
132             Pattern.compile(format("%s([0-3]?\\d-[0-3]?\\d-([12]\\d)?\\d\\d)%s", START, END))
133                     .matcher(""));
134 
135     // A regex matching the common years of 19xx and 20xx. Used for false positive reduction
136     private static final String COMMON_YEARS = format("%s((19|20)\\d\\d)%s", START, END);
137 
138     // A regex matching three lower case letters. Used for false positive reduction, as no known
139     // OTPs have 3 lowercase letters in sequence.
140     private static final String THREE_LOWERCASE = "(\\p{Ll}{3})";
141 
142     // A combination of common false positives. Run in cases where we don't have a language specific
143     // regular expression.
144     private static final ThreadLocal<Matcher> FALSE_POSITIVE_REGEX = ThreadLocal.withInitial(() ->
145             Pattern.compile(format("%s|%s", COMMON_YEARS, THREE_LOWERCASE)).matcher(""));
146 
147     /**
148      * A list of regular expressions representing words found in an OTP context (non case sensitive)
149      * Note: TAN is short for Transaction Authentication Number
150      */
151     private static final String[] ENGLISH_CONTEXT_WORDS = new String[] {
152             "pin", "pass[-\\s]?(code|word)", "TAN", "otp", "2fa", "(two|2)[-\\s]?factor",
153             "log[-\\s]?in", "auth(enticat(e|ion))?", "code", "secret", "verif(y|ication)",
154             "confirm(ation)?"
155     };
156 
157     /**
158      * Creates a regular expression to match any of a series of individual words, case insensitive.
159      */
createDictionaryRegex(String[] words)160     private static Matcher createDictionaryRegex(String[] words) {
161         StringBuilder regex = new StringBuilder("(?i)\\b(");
162         for (int i = 0; i < words.length; i++) {
163             regex.append(words[i]);
164             if (i != words.length - 1) {
165                 regex.append("|");
166             }
167         }
168         regex.append(")\\b");
169         return Pattern.compile(regex.toString()).matcher("");
170     }
171 
172     static {
ULocale.ENGLISH.toLanguageTag()173         EXTRA_LANG_OTP_REGEX.put(ULocale.ENGLISH.toLanguageTag(), ThreadLocal.withInitial(() ->
174                 createDictionaryRegex(ENGLISH_CONTEXT_WORDS)));
175     }
176 
177     /**
178      * Checks if the sensitive parts of a notification might contain an OTP, based on several
179      * regular expressions, and potentially using a textClassifier to eliminate false positives
180      *
181      * @param notification The notification whose content should be checked
182      * @param checkForFalsePositives If true, will ensure the content does not match the date regex.
183      *                               If a TextClassifier is provided, it will then try to find a
184      *                               language specific regex. If it is successful, it will use that
185      *                               regex to check for false positives. If it is not, it will use
186      *                               the TextClassifier (if provided), plus the year and three
187      *                               lowercase regexes to remove possible false positives.
188      * @param tc If non null, the provided TextClassifier will be used to find the language of the
189      *           text, and look for a language-specific regex for it. If checkForFalsePositives is
190      *           true will also use the classifier to find flight codes and addresses.
191      * @return True if the regex matches and ensureNotDate is false, or the date regex failed to
192      *     match, false otherwise.
193      */
containsOtp(Notification notification, boolean checkForFalsePositives, TextClassifier tc)194     public static boolean containsOtp(Notification notification,
195             boolean checkForFalsePositives, TextClassifier tc) {
196         if (notification == null || !SdkLevel.isAtLeastV()) {
197             return false;
198         }
199 
200         String sensitiveText = getTextForDetection(notification);
201         Matcher otpMatcher = OTP_REGEX.get();
202         otpMatcher.reset(sensitiveText);
203         boolean otpMatch = otpMatcher.find();
204         if (!checkForFalsePositives || !otpMatch) {
205             return otpMatch;
206         }
207 
208         if (allOtpMatchesAreFalsePositives(sensitiveText, DATE_WITH_DASHES_REGEX.get())) {
209             return false;
210         }
211 
212         if (tc != null) {
213             Matcher languageSpecificMatcher = getLanguageSpecificRegex(sensitiveText, tc);
214             if (languageSpecificMatcher != null) {
215                 languageSpecificMatcher.reset(sensitiveText);
216                 // Only use the language-specific regex for false positives
217                 return languageSpecificMatcher.find();
218             }
219             // Else, use TC to check for false positives
220             if (hasFalsePositivesTcCheck(sensitiveText, tc)) {
221                 return false;
222             }
223         }
224 
225         return !allOtpMatchesAreFalsePositives(sensitiveText, FALSE_POSITIVE_REGEX.get());
226     }
227 
228     /**
229      * Checks that a given text has at least one match for one regex, that doesn't match another
230      * @param text The full text to check
231      * @param falsePositiveRegex A regex that should not match the OTP regex (for at least one match
232      *                           found by the OTP regex
233      * @return true, if all matches found by OTP_REGEX are also found by "shouldNotMatch"
234      */
allOtpMatchesAreFalsePositives(String text, Matcher falsePositiveRegex)235     private static boolean allOtpMatchesAreFalsePositives(String text,
236             Matcher falsePositiveRegex) {
237         falsePositiveRegex = falsePositiveRegex.reset(text);
238         if (!falsePositiveRegex.find()) {
239             return false;
240         }
241         Matcher otpMatcher = OTP_REGEX.get();
242         otpMatcher.reset(text);
243         while (otpMatcher.find()) {
244             falsePositiveRegex.reset(otpMatcher.group());
245             if (!falsePositiveRegex.find()) {
246                 // A possible otp was not matched by the false positive regex
247                 return false;
248             }
249         }
250         // All otp matches were matched by the false positive regex
251         return true;
252     }
253 
getLanguageSpecificRegex(String text, TextClassifier tc)254     private static Matcher getLanguageSpecificRegex(String text, TextClassifier tc) {
255         TextLanguage.Request langRequest = new TextLanguage.Request.Builder(text).build();
256         TextLanguage lang = tc.detectLanguage(langRequest);
257         for (int i = 0; i < lang.getLocaleHypothesisCount(); i++) {
258             ULocale locale = lang.getLocale(i);
259             if (lang.getConfidenceScore(locale) >= TC_THRESHOLD
260                     && EXTRA_LANG_OTP_REGEX.containsKey(locale.toLanguageTag())) {
261                 return EXTRA_LANG_OTP_REGEX.get(locale.toLanguageTag()).get();
262             }
263         }
264         return null;
265     }
266 
hasFalsePositivesTcCheck(String text, TextClassifier tc)267     private static boolean hasFalsePositivesTcCheck(String text, TextClassifier tc) {
268         // Use TC to eliminate false positives from a regex match, namely: flight codes, and
269         // addresses
270         List<String> included = new ArrayList<>(List.of(TYPE_FLIGHT_NUMBER, TYPE_ADDRESS));
271         List<String> excluded = new ArrayList<>(List.of(TYPE_PHONE));
272         TextClassifier.EntityConfig config =
273                 new TextClassifier.EntityConfig.Builder().setIncludedTypes(
274                         included).setExcludedTypes(excluded).build();
275         TextLinks.Request request =
276                 new TextLinks.Request.Builder(text).setEntityConfig(config).build();
277         TextLinks links = tc.generateLinks(request);
278         for (TextLinks.TextLink link : links.getLinks()) {
279             if (link.getConfidenceScore(TYPE_FLIGHT_NUMBER) > TC_THRESHOLD
280                     || link.getConfidenceScore(TYPE_ADDRESS) > TC_THRESHOLD) {
281                 return true;
282             }
283         }
284         return false;
285     }
286 
287     /**
288      * Gets the sections of text in a notification that should be checked for sensitive content.
289      * This includes the text, title, subtext, messages, and extra text lines.
290      * @param notification The notification whose content should be filtered
291      * @return The extracted text fields
292      */
getTextForDetection(Notification notification)293     public static String getTextForDetection(Notification notification) {
294         if (notification.extras == null || !SdkLevel.isAtLeastV()
295                 || !Flags.redactSensitiveNotificationsFromUntrustedListeners()) {
296             return "";
297         }
298         Bundle extras = notification.extras;
299         CharSequence title = extras.getCharSequence(EXTRA_TITLE);
300         CharSequence text = extras.getCharSequence(EXTRA_TEXT);
301         CharSequence subText = extras.getCharSequence(EXTRA_SUB_TEXT);
302         StringBuilder builder = new StringBuilder()
303                 .append(title != null ? title : "").append(" ")
304                 .append(text != null ? text : "").append(" ")
305                 .append(subText != null ? subText : "").append(" ");
306         if (Flags.redactSensitiveNotificationsBigTextStyle()) {
307             CharSequence bigText = extras.getCharSequence(EXTRA_BIG_TEXT);
308             CharSequence bigTitleText = extras.getCharSequence(EXTRA_TITLE_BIG);
309             CharSequence summaryText = extras.getCharSequence(EXTRA_SUMMARY_TEXT);
310             builder.append(bigText != null ? bigText : "").append(" ")
311                     .append(bigTitleText != null ? bigTitleText : "").append(" ")
312                     .append(summaryText != null ? summaryText : "").append(" ");
313         }
314         CharSequence[] textLines = extras.getCharSequenceArray(EXTRA_TEXT_LINES);
315         if (textLines != null) {
316             for (CharSequence line : textLines) {
317                 builder.append(line).append(" ");
318             }
319         }
320         List<Message> messages = Message.getMessagesFromBundleArray(
321                 extras.getParcelableArray(EXTRA_MESSAGES, Parcelable.class));
322         // Sort the newest messages (largest timestamp) first
323         messages.sort((MessagingStyle.Message lhs, MessagingStyle.Message rhs) ->
324                 Long.compare(rhs.getTimestamp(), lhs.getTimestamp()));
325         for (MessagingStyle.Message message : messages) {
326             builder.append(message.getText()).append(" ");
327         }
328         return builder.length() <= MAX_SENSITIVE_TEXT_LEN ? builder.toString()
329                 : builder.substring(0, MAX_SENSITIVE_TEXT_LEN);
330     }
331 
332     /**
333      * Determines if a notification should be checked for an OTP, based on category, style, and
334      * possible otp content (as determined by a regular expression).
335      * @param notification The notification whose content should be checked
336      * @return true, if further checks for OTP codes should be performed, false otherwise
337      */
shouldCheckForOtp(Notification notification)338     public static boolean shouldCheckForOtp(Notification notification) {
339         if (notification == null || !SdkLevel.isAtLeastV()
340                 || !Flags.redactSensitiveNotificationsFromUntrustedListeners()) {
341             return false;
342         }
343         return SENSITIVE_NOTIFICATION_CATEGORIES.contains(notification.category)
344                 || isStyle(notification, Notification.MessagingStyle.class)
345                 || isStyle(notification, Notification.InboxStyle.class)
346                 || containsOtp(notification, false, null)
347                 || shouldCheckForOtp(notification.publicVersion);
348     }
349 
isStyle(Notification notification, Class<? extends Notification.Style> styleClass)350     private static boolean isStyle(Notification notification,
351             Class<? extends Notification.Style> styleClass) {
352         if (notification.extras == null) {
353             return false;
354         }
355         String templateClass = notification.extras.getString(Notification.EXTRA_TEMPLATE);
356         return Objects.equals(templateClass, styleClass.getName());
357     }
358 
NotificationOtpDetectionHelper()359     private NotificationOtpDetectionHelper() { }
360 }
361