1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package android.os;
18 
19 import android.util.proto.ProtoOutputStream;
20 
21 import java.util.Arrays;
22 
23 /**
24  * A simple pattern matcher, which is safe to use on untrusted data: it does
25  * not provide full reg-exp support, only simple globbing that can not be
26  * used maliciously.
27  */
28 public class PatternMatcher implements Parcelable {
29     /**
30      * Pattern type: the given pattern must exactly match the string it is
31      * tested against.
32      */
33     public static final int PATTERN_LITERAL = 0;
34 
35     /**
36      * Pattern type: the given pattern must match the
37      * beginning of the string it is tested against.
38      */
39     public static final int PATTERN_PREFIX = 1;
40 
41     /**
42      * Pattern type: the given pattern is interpreted with a
43      * simple glob syntax for matching against the string it is tested against.
44      * In this syntax, you can use the '*' character to match against zero or
45      * more occurrences of the character immediately before.  If the
46      * character before it is '.' it will match any character.  The character
47      * '\' can be used as an escape.  This essentially provides only the '*'
48      * wildcard part of a normal regexp.
49      */
50     public static final int PATTERN_SIMPLE_GLOB = 2;
51 
52     /**
53      * Pattern type: the given pattern is interpreted with a regular
54      * expression-like syntax for matching against the string it is tested
55      * against. Supported tokens include dot ({@code .}) and sets ({@code [...]})
56      * with full support for character ranges and the not ({@code ^}) modifier.
57      * Supported modifiers include star ({@code *}) for zero-or-more, plus ({@code +})
58      * for one-or-more and full range ({@code {...}}) support. This is a simple
59      * evaluation implementation in which matching is done against the pattern in
60      * real time with no backtracking support.
61      */
62     public static final int PATTERN_ADVANCED_GLOB = 3;
63 
64     // token types for advanced matching
65     private static final int TOKEN_TYPE_LITERAL = 0;
66     private static final int TOKEN_TYPE_ANY = 1;
67     private static final int TOKEN_TYPE_SET = 2;
68     private static final int TOKEN_TYPE_INVERSE_SET = 3;
69 
70     // Return for no match
71     private static final int NO_MATCH = -1;
72 
73     private static final String TAG = "PatternMatcher";
74 
75     // Parsed placeholders for advanced patterns
76     private static final int PARSED_TOKEN_CHAR_SET_START = -1;
77     private static final int PARSED_TOKEN_CHAR_SET_INVERSE_START = -2;
78     private static final int PARSED_TOKEN_CHAR_SET_STOP = -3;
79     private static final int PARSED_TOKEN_CHAR_ANY = -4;
80     private static final int PARSED_MODIFIER_RANGE_START = -5;
81     private static final int PARSED_MODIFIER_RANGE_STOP = -6;
82     private static final int PARSED_MODIFIER_ZERO_OR_MORE = -7;
83     private static final int PARSED_MODIFIER_ONE_OR_MORE = -8;
84 
85     private final String mPattern;
86     private final int mType;
87     private final int[] mParsedPattern;
88 
89 
90     private static final int MAX_PATTERN_STORAGE = 2048;
91     // workspace to use for building a parsed advanced pattern;
92     private static final int[] sParsedPatternScratch = new int[MAX_PATTERN_STORAGE];
93 
PatternMatcher(String pattern, int type)94     public PatternMatcher(String pattern, int type) {
95         mPattern = pattern;
96         mType = type;
97         if (mType == PATTERN_ADVANCED_GLOB) {
98             mParsedPattern = parseAndVerifyAdvancedPattern(pattern);
99         } else {
100             mParsedPattern = null;
101         }
102     }
103 
getPath()104     public final String getPath() {
105         return mPattern;
106     }
107 
getType()108     public final int getType() {
109         return mType;
110     }
111 
match(String str)112     public boolean match(String str) {
113         return matchPattern(str, mPattern, mParsedPattern, mType);
114     }
115 
toString()116     public String toString() {
117         String type = "? ";
118         switch (mType) {
119             case PATTERN_LITERAL:
120                 type = "LITERAL: ";
121                 break;
122             case PATTERN_PREFIX:
123                 type = "PREFIX: ";
124                 break;
125             case PATTERN_SIMPLE_GLOB:
126                 type = "GLOB: ";
127                 break;
128             case PATTERN_ADVANCED_GLOB:
129                 type = "ADVANCED: ";
130                 break;
131         }
132         return "PatternMatcher{" + type + mPattern + "}";
133     }
134 
135     /** @hide */
writeToProto(ProtoOutputStream proto, long fieldId)136     public void writeToProto(ProtoOutputStream proto, long fieldId) {
137         long token = proto.start(fieldId);
138         proto.write(PatternMatcherProto.PATTERN, mPattern);
139         proto.write(PatternMatcherProto.TYPE, mType);
140         // PatternMatcherProto.PARSED_PATTERN is too much to dump, but the field is reserved to
141         // match the current data structure.
142         proto.end(token);
143     }
144 
describeContents()145     public int describeContents() {
146         return 0;
147     }
148 
writeToParcel(Parcel dest, int flags)149     public void writeToParcel(Parcel dest, int flags) {
150         dest.writeString(mPattern);
151         dest.writeInt(mType);
152         dest.writeIntArray(mParsedPattern);
153     }
154 
PatternMatcher(Parcel src)155     public PatternMatcher(Parcel src) {
156         mPattern = src.readString();
157         mType = src.readInt();
158         mParsedPattern = src.createIntArray();
159     }
160 
161     public static final Parcelable.Creator<PatternMatcher> CREATOR
162             = new Parcelable.Creator<PatternMatcher>() {
163         public PatternMatcher createFromParcel(Parcel source) {
164             return new PatternMatcher(source);
165         }
166 
167         public PatternMatcher[] newArray(int size) {
168             return new PatternMatcher[size];
169         }
170     };
171 
matchPattern(String match, String pattern, int[] parsedPattern, int type)172     static boolean matchPattern(String match, String pattern, int[] parsedPattern, int type) {
173         if (match == null) return false;
174         if (type == PATTERN_LITERAL) {
175             return pattern.equals(match);
176         } if (type == PATTERN_PREFIX) {
177             return match.startsWith(pattern);
178         } else if (type == PATTERN_SIMPLE_GLOB) {
179             return matchGlobPattern(pattern, match);
180         } else if (type == PATTERN_ADVANCED_GLOB) {
181             return matchAdvancedPattern(parsedPattern, match);
182         }
183         return false;
184     }
185 
matchGlobPattern(String pattern, String match)186     static boolean matchGlobPattern(String pattern, String match) {
187         final int NP = pattern.length();
188         if (NP <= 0) {
189             return match.length() <= 0;
190         }
191         final int NM = match.length();
192         int ip = 0, im = 0;
193         char nextChar = pattern.charAt(0);
194         while ((ip<NP) && (im<NM)) {
195             char c = nextChar;
196             ip++;
197             nextChar = ip < NP ? pattern.charAt(ip) : 0;
198             final boolean escaped = (c == '\\');
199             if (escaped) {
200                 c = nextChar;
201                 ip++;
202                 nextChar = ip < NP ? pattern.charAt(ip) : 0;
203             }
204             if (nextChar == '*') {
205                 if (!escaped && c == '.') {
206                     if (ip >= (NP-1)) {
207                         // at the end with a pattern match, so
208                         // all is good without checking!
209                         return true;
210                     }
211                     ip++;
212                     nextChar = pattern.charAt(ip);
213                     // Consume everything until the next character in the
214                     // pattern is found.
215                     if (nextChar == '\\') {
216                         ip++;
217                         nextChar = ip < NP ? pattern.charAt(ip) : 0;
218                     }
219                     do {
220                         if (match.charAt(im) == nextChar) {
221                             break;
222                         }
223                         im++;
224                     } while (im < NM);
225                     if (im == NM) {
226                         // Whoops, the next character in the pattern didn't
227                         // exist in the match.
228                         return false;
229                     }
230                     ip++;
231                     nextChar = ip < NP ? pattern.charAt(ip) : 0;
232                     im++;
233                 } else {
234                     // Consume only characters matching the one before '*'.
235                     do {
236                         if (match.charAt(im) != c) {
237                             break;
238                         }
239                         im++;
240                     } while (im < NM);
241                     ip++;
242                     nextChar = ip < NP ? pattern.charAt(ip) : 0;
243                 }
244             } else {
245                 if (c != '.' && match.charAt(im) != c) return false;
246                 im++;
247             }
248         }
249 
250         if (ip >= NP && im >= NM) {
251             // Reached the end of both strings, all is good!
252             return true;
253         }
254 
255         // One last check: we may have finished the match string, but still
256         // have a '.*' at the end of the pattern, which should still count
257         // as a match.
258         if (ip == NP-2 && pattern.charAt(ip) == '.'
259             && pattern.charAt(ip+1) == '*') {
260             return true;
261         }
262 
263         return false;
264     }
265 
266     /**
267      * Parses the advanced pattern and returns an integer array representation of it. The integer
268      * array treats each field as a character if positive and a unique token placeholder if
269      * negative. This method will throw on any pattern structure violations.
270      */
271     synchronized static int[] parseAndVerifyAdvancedPattern(String pattern) {
272         int ip = 0;
273         final int LP = pattern.length();
274 
275         int it = 0;
276 
277         boolean inSet = false;
278         boolean inRange = false;
279         boolean inCharClass = false;
280 
281         boolean addToParsedPattern;
282 
283         while (ip < LP) {
284             if (it > MAX_PATTERN_STORAGE - 3) {
285                 throw new IllegalArgumentException("Pattern is too large!");
286             }
287 
288             char c = pattern.charAt(ip);
289             addToParsedPattern = false;
290 
291             switch (c) {
292                 case '[':
293                     if (inSet) {
294                         addToParsedPattern = true; // treat as literal or char class in set
295                     } else {
296                         if (pattern.charAt(ip + 1) == '^') {
297                             sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_INVERSE_START;
298                             ip++; // skip over the '^'
299                         } else {
300                             sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_START;
301                         }
302                         ip++; // move to the next pattern char
303                         inSet = true;
304                         continue;
305                     }
306                     break;
307                 case ']':
308                     if (!inSet) {
309                         addToParsedPattern = true; // treat as literal outside of set
310                     } else {
311                         int parsedToken = sParsedPatternScratch[it - 1];
312                         if (parsedToken == PARSED_TOKEN_CHAR_SET_START ||
313                             parsedToken == PARSED_TOKEN_CHAR_SET_INVERSE_START) {
314                             throw new IllegalArgumentException(
315                                     "You must define characters in a set.");
316                         }
317                         sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_STOP;
318                         inSet = false;
319                         inCharClass = false;
320                     }
321                     break;
322                 case '{':
323                     if (!inSet) {
324                         if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) {
325                             throw new IllegalArgumentException("Modifier must follow a token.");
326                         }
327                         sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_START;
328                         ip++;
329                         inRange = true;
330                     }
331                     break;
332                 case '}':
333                     if (inRange) { // only terminate the range if we're currently in one
334                         sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_STOP;
335                         inRange = false;
336                     }
337                     break;
338                 case '*':
339                     if (!inSet) {
340                         if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) {
341                             throw new IllegalArgumentException("Modifier must follow a token.");
342                         }
343                         sParsedPatternScratch[it++] = PARSED_MODIFIER_ZERO_OR_MORE;
344                     }
345                     break;
346                 case '+':
347                     if (!inSet) {
348                         if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) {
349                             throw new IllegalArgumentException("Modifier must follow a token.");
350                         }
351                         sParsedPatternScratch[it++] = PARSED_MODIFIER_ONE_OR_MORE;
352                     }
353                     break;
354                 case '.':
355                     if (!inSet) {
356                         sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_ANY;
357                     }
358                     break;
359                 case '\\': // escape
360                     if (ip + 1 >= LP) {
361                         throw new IllegalArgumentException("Escape found at end of pattern!");
362                     }
363                     c = pattern.charAt(++ip);
364                     addToParsedPattern = true;
365                     break;
366                 default:
367                     addToParsedPattern = true;
368                     break;
369             }
370             if (inSet) {
371                 if (inCharClass) {
372                     sParsedPatternScratch[it++] = c;
373                     inCharClass = false;
374                 } else {
375                     // look forward for character class
376                     if (ip + 2 < LP
377                             && pattern.charAt(ip + 1) == '-'
378                             && pattern.charAt(ip + 2) != ']') {
379                         inCharClass = true;
380                         sParsedPatternScratch[it++] = c; // set first token as lower end of range
381                         ip++; // advance past dash
382                     } else { // literal
383                         sParsedPatternScratch[it++] = c; // set first token as literal
384                         sParsedPatternScratch[it++] = c; // set second set as literal
385                     }
386                 }
387             } else if (inRange) {
388                 int endOfSet = pattern.indexOf('}', ip);
389                 if (endOfSet < 0) {
390                     throw new IllegalArgumentException("Range not ended with '}'");
391                 }
392                 String rangeString = pattern.substring(ip, endOfSet);
393                 int commaIndex = rangeString.indexOf(',');
394                 try {
395                     final int rangeMin;
396                     final int rangeMax;
397                     if (commaIndex < 0) {
398                         int parsedRange = Integer.parseInt(rangeString);
399                         rangeMin = rangeMax = parsedRange;
400                     } else {
401                         rangeMin = Integer.parseInt(rangeString.substring(0, commaIndex));
402                         if (commaIndex == rangeString.length() - 1) { // e.g. {n,} (n or more)
403                             rangeMax = Integer.MAX_VALUE;
404                         } else {
405                             rangeMax = Integer.parseInt(rangeString.substring(commaIndex + 1));
406                         }
407                     }
408                     if (rangeMin > rangeMax) {
409                         throw new IllegalArgumentException(
410                             "Range quantifier minimum is greater than maximum");
411                     }
412                     sParsedPatternScratch[it++] = rangeMin;
413                     sParsedPatternScratch[it++] = rangeMax;
414                 } catch (NumberFormatException e) {
415                     throw new IllegalArgumentException("Range number format incorrect", e);
416                 }
417                 ip = endOfSet;
418                 continue; // don't increment ip
419             } else if (addToParsedPattern) {
420                 sParsedPatternScratch[it++] = c;
421             }
422             ip++;
423         }
424         if (inSet) {
425             throw new IllegalArgumentException("Set was not terminated!");
426         }
427         return Arrays.copyOf(sParsedPatternScratch, it);
428     }
429 
430     private static boolean isParsedModifier(int parsedChar) {
431         return parsedChar == PARSED_MODIFIER_ONE_OR_MORE ||
432                 parsedChar == PARSED_MODIFIER_ZERO_OR_MORE ||
433                 parsedChar == PARSED_MODIFIER_RANGE_STOP ||
434                 parsedChar == PARSED_MODIFIER_RANGE_START;
435     }
436 
437     static boolean matchAdvancedPattern(int[] parsedPattern, String match) {
438 
439         // create indexes
440         int ip = 0, im = 0;
441 
442         // one-time length check
443         final int LP = parsedPattern.length, LM = match.length();
444 
445         // The current character being analyzed in the pattern
446         int patternChar;
447 
448         int tokenType;
449 
450         int charSetStart = 0, charSetEnd = 0;
451 
452         while (ip < LP) { // we still have content in the pattern
453 
454             patternChar = parsedPattern[ip];
455             // get the match type of the next verb
456 
457             switch (patternChar) {
458                 case PARSED_TOKEN_CHAR_ANY:
459                     tokenType = TOKEN_TYPE_ANY;
460                     ip++;
461                     break;
462                 case PARSED_TOKEN_CHAR_SET_START:
463                 case PARSED_TOKEN_CHAR_SET_INVERSE_START:
464                     tokenType = patternChar == PARSED_TOKEN_CHAR_SET_START
465                             ? TOKEN_TYPE_SET
466                             : TOKEN_TYPE_INVERSE_SET;
467                     charSetStart = ip + 1; // start from the char after the set start
468                     while (++ip < LP && parsedPattern[ip] != PARSED_TOKEN_CHAR_SET_STOP);
469                     charSetEnd = ip - 1; // we're on the set stop, end is the previous
470                     ip++; // move the pointer to the next pattern entry
471                     break;
472                 default:
473                     charSetStart = ip;
474                     tokenType = TOKEN_TYPE_LITERAL;
475                     ip++;
476                     break;
477             }
478 
479             final int minRepetition;
480             final int maxRepetition;
481 
482             // look for a match length modifier
483             if (ip >= LP) {
484                 minRepetition = maxRepetition = 1;
485             } else {
486                 patternChar = parsedPattern[ip];
487                 switch (patternChar) {
488                     case PARSED_MODIFIER_ZERO_OR_MORE:
489                         minRepetition = 0;
490                         maxRepetition = Integer.MAX_VALUE;
491                         ip++;
492                         break;
493                     case PARSED_MODIFIER_ONE_OR_MORE:
494                         minRepetition = 1;
495                         maxRepetition = Integer.MAX_VALUE;
496                         ip++;
497                         break;
498                     case PARSED_MODIFIER_RANGE_START:
499                         minRepetition = parsedPattern[++ip];
500                         maxRepetition = parsedPattern[++ip];
501                         ip += 2; // step over PARSED_MODIFIER_RANGE_STOP and on to the next token
502                         break;
503                     default:
504                         minRepetition = maxRepetition = 1; // implied literal
505                         break;
506                 }
507             }
508             if (minRepetition > maxRepetition) {
509                 return false;
510             }
511 
512             // attempt to match as many characters as possible
513             int matched = matchChars(match, im, LM, tokenType, minRepetition, maxRepetition,
514                     parsedPattern, charSetStart, charSetEnd);
515 
516             // if we found a conflict, return false immediately
517             if (matched == NO_MATCH) {
518                 return false;
519             }
520 
521             // move the match pointer the number of characters matched
522             im += matched;
523         }
524         return ip >= LP && im >= LM; // have parsed entire string and regex
525     }
526 
527     private static int matchChars(String match, int im, final int lm, int tokenType,
528             int minRepetition, int maxRepetition, int[] parsedPattern,
529             int tokenStart, int tokenEnd) {
530         int matched = 0;
531 
532         while(matched < maxRepetition
533                 && matchChar(match, im + matched, lm, tokenType, parsedPattern, tokenStart,
534                     tokenEnd)) {
535             matched++;
536         }
537 
538         return matched < minRepetition ? NO_MATCH : matched;
539     }
540 
541     private static boolean matchChar(String match, int im, final int lm, int tokenType,
542             int[] parsedPattern, int tokenStart, int tokenEnd) {
543         if (im >= lm) { // we've overrun the string, no match
544             return false;
545         }
546         switch (tokenType) {
547             case TOKEN_TYPE_ANY:
548                 return true;
549             case TOKEN_TYPE_SET:
550                 for (int i = tokenStart; i < tokenEnd; i += 2) {
551                     char matchChar = match.charAt(im);
552                     if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) {
553                         return true;
554                     }
555                 }
556                 return false;
557             case TOKEN_TYPE_INVERSE_SET:
558                 for (int i = tokenStart; i < tokenEnd; i += 2) {
559                     char matchChar = match.charAt(im);
560                     if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) {
561                         return false;
562                     }
563                 }
564                 return true;
565             case TOKEN_TYPE_LITERAL:
566                 return match.charAt(im) == parsedPattern[tokenStart];
567             default:
568                 return false;
569         }
570     }
571 }