1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package android.os;
18 
19 import android.util.Log;
20 
21 import java.util.Arrays;
22 
23 /**
24  * A simple pattern matcher, which is safe to use on untrusted data: it does
25  * not provide full reg-exp support, only simple globbing that can not be
26  * used maliciously.
27  */
28 public class PatternMatcher implements Parcelable {
29     /**
30      * Pattern type: the given pattern must exactly match the string it is
31      * tested against.
32      */
33     public static final int PATTERN_LITERAL = 0;
34 
35     /**
36      * Pattern type: the given pattern must match the
37      * beginning of the string it is tested against.
38      */
39     public static final int PATTERN_PREFIX = 1;
40 
41     /**
42      * Pattern type: the given pattern is interpreted with a
43      * simple glob syntax for matching against the string it is tested against.
44      * In this syntax, you can use the '*' character to match against zero or
45      * more occurrences of the character immediately before.  If the
46      * character before it is '.' it will match any character.  The character
47      * '\' can be used as an escape.  This essentially provides only the '*'
48      * wildcard part of a normal regexp.
49      */
50     public static final int PATTERN_SIMPLE_GLOB = 2;
51 
52     /**
53      * Pattern type: the given pattern is interpreted with a regular
54      * expression-like syntax for matching against the string it is tested
55      * against. Supported tokens include dot ({@code .}) and sets ({@code [...]})
56      * with full support for character ranges and the not ({@code ^}) modifier.
57      * Supported modifiers include star ({@code *}) for zero-or-more, plus ({@code +})
58      * for one-or-more and full range ({@code {...}}) support. This is a simple
59      * evaluation implementation in which matching is done against the pattern in
60      * real time with no backtracking support.
61      */
62     public static final int PATTERN_ADVANCED_GLOB = 3;
63 
64     // token types for advanced matching
65     private static final int TOKEN_TYPE_LITERAL = 0;
66     private static final int TOKEN_TYPE_ANY = 1;
67     private static final int TOKEN_TYPE_SET = 2;
68     private static final int TOKEN_TYPE_INVERSE_SET = 3;
69 
70     // Return for no match
71     private static final int NO_MATCH = -1;
72 
73     private static final String TAG = "PatternMatcher";
74 
75     // Parsed placeholders for advanced patterns
76     private static final int PARSED_TOKEN_CHAR_SET_START = -1;
77     private static final int PARSED_TOKEN_CHAR_SET_INVERSE_START = -2;
78     private static final int PARSED_TOKEN_CHAR_SET_STOP = -3;
79     private static final int PARSED_TOKEN_CHAR_ANY = -4;
80     private static final int PARSED_MODIFIER_RANGE_START = -5;
81     private static final int PARSED_MODIFIER_RANGE_STOP = -6;
82     private static final int PARSED_MODIFIER_ZERO_OR_MORE = -7;
83     private static final int PARSED_MODIFIER_ONE_OR_MORE = -8;
84 
85     private final String mPattern;
86     private final int mType;
87     private final int[] mParsedPattern;
88 
89 
90     private static final int MAX_PATTERN_STORAGE = 2048;
91     // workspace to use for building a parsed advanced pattern;
92     private static final int[] sParsedPatternScratch = new int[MAX_PATTERN_STORAGE];
93 
PatternMatcher(String pattern, int type)94     public PatternMatcher(String pattern, int type) {
95         mPattern = pattern;
96         mType = type;
97         if (mType == PATTERN_ADVANCED_GLOB) {
98             mParsedPattern = parseAndVerifyAdvancedPattern(pattern);
99         } else {
100             mParsedPattern = null;
101         }
102     }
103 
getPath()104     public final String getPath() {
105         return mPattern;
106     }
107 
getType()108     public final int getType() {
109         return mType;
110     }
111 
match(String str)112     public boolean match(String str) {
113         return matchPattern(str, mPattern, mParsedPattern, mType);
114     }
115 
toString()116     public String toString() {
117         String type = "? ";
118         switch (mType) {
119             case PATTERN_LITERAL:
120                 type = "LITERAL: ";
121                 break;
122             case PATTERN_PREFIX:
123                 type = "PREFIX: ";
124                 break;
125             case PATTERN_SIMPLE_GLOB:
126                 type = "GLOB: ";
127                 break;
128             case PATTERN_ADVANCED_GLOB:
129                 type = "ADVANCED: ";
130                 break;
131         }
132         return "PatternMatcher{" + type + mPattern + "}";
133     }
134 
describeContents()135     public int describeContents() {
136         return 0;
137     }
138 
writeToParcel(Parcel dest, int flags)139     public void writeToParcel(Parcel dest, int flags) {
140         dest.writeString(mPattern);
141         dest.writeInt(mType);
142         dest.writeIntArray(mParsedPattern);
143     }
144 
PatternMatcher(Parcel src)145     public PatternMatcher(Parcel src) {
146         mPattern = src.readString();
147         mType = src.readInt();
148         mParsedPattern = src.createIntArray();
149     }
150 
151     public static final Parcelable.Creator<PatternMatcher> CREATOR
152             = new Parcelable.Creator<PatternMatcher>() {
153         public PatternMatcher createFromParcel(Parcel source) {
154             return new PatternMatcher(source);
155         }
156 
157         public PatternMatcher[] newArray(int size) {
158             return new PatternMatcher[size];
159         }
160     };
161 
matchPattern(String match, String pattern, int[] parsedPattern, int type)162     static boolean matchPattern(String match, String pattern, int[] parsedPattern, int type) {
163         if (match == null) return false;
164         if (type == PATTERN_LITERAL) {
165             return pattern.equals(match);
166         } if (type == PATTERN_PREFIX) {
167             return match.startsWith(pattern);
168         } else if (type == PATTERN_SIMPLE_GLOB) {
169             return matchGlobPattern(pattern, match);
170         } else if (type == PATTERN_ADVANCED_GLOB) {
171             return matchAdvancedPattern(parsedPattern, match);
172         }
173         return false;
174     }
175 
matchGlobPattern(String pattern, String match)176     static boolean matchGlobPattern(String pattern, String match) {
177         final int NP = pattern.length();
178         if (NP <= 0) {
179             return match.length() <= 0;
180         }
181         final int NM = match.length();
182         int ip = 0, im = 0;
183         char nextChar = pattern.charAt(0);
184         while ((ip<NP) && (im<NM)) {
185             char c = nextChar;
186             ip++;
187             nextChar = ip < NP ? pattern.charAt(ip) : 0;
188             final boolean escaped = (c == '\\');
189             if (escaped) {
190                 c = nextChar;
191                 ip++;
192                 nextChar = ip < NP ? pattern.charAt(ip) : 0;
193             }
194             if (nextChar == '*') {
195                 if (!escaped && c == '.') {
196                     if (ip >= (NP-1)) {
197                         // at the end with a pattern match, so
198                         // all is good without checking!
199                         return true;
200                     }
201                     ip++;
202                     nextChar = pattern.charAt(ip);
203                     // Consume everything until the next character in the
204                     // pattern is found.
205                     if (nextChar == '\\') {
206                         ip++;
207                         nextChar = ip < NP ? pattern.charAt(ip) : 0;
208                     }
209                     do {
210                         if (match.charAt(im) == nextChar) {
211                             break;
212                         }
213                         im++;
214                     } while (im < NM);
215                     if (im == NM) {
216                         // Whoops, the next character in the pattern didn't
217                         // exist in the match.
218                         return false;
219                     }
220                     ip++;
221                     nextChar = ip < NP ? pattern.charAt(ip) : 0;
222                     im++;
223                 } else {
224                     // Consume only characters matching the one before '*'.
225                     do {
226                         if (match.charAt(im) != c) {
227                             break;
228                         }
229                         im++;
230                     } while (im < NM);
231                     ip++;
232                     nextChar = ip < NP ? pattern.charAt(ip) : 0;
233                 }
234             } else {
235                 if (c != '.' && match.charAt(im) != c) return false;
236                 im++;
237             }
238         }
239 
240         if (ip >= NP && im >= NM) {
241             // Reached the end of both strings, all is good!
242             return true;
243         }
244 
245         // One last check: we may have finished the match string, but still
246         // have a '.*' at the end of the pattern, which should still count
247         // as a match.
248         if (ip == NP-2 && pattern.charAt(ip) == '.'
249             && pattern.charAt(ip+1) == '*') {
250             return true;
251         }
252 
253         return false;
254     }
255 
256     /**
257      * Parses the advanced pattern and returns an integer array representation of it. The integer
258      * array treats each field as a character if positive and a unique token placeholder if
259      * negative. This method will throw on any pattern structure violations.
260      */
261     synchronized static int[] parseAndVerifyAdvancedPattern(String pattern) {
262         int ip = 0;
263         final int LP = pattern.length();
264 
265         int it = 0;
266 
267         boolean inSet = false;
268         boolean inRange = false;
269         boolean inCharClass = false;
270 
271         boolean addToParsedPattern;
272 
273         while (ip < LP) {
274             if (it > MAX_PATTERN_STORAGE - 3) {
275                 throw new IllegalArgumentException("Pattern is too large!");
276             }
277 
278             char c = pattern.charAt(ip);
279             addToParsedPattern = false;
280 
281             switch (c) {
282                 case '[':
283                     if (inSet) {
284                         addToParsedPattern = true; // treat as literal or char class in set
285                     } else {
286                         if (pattern.charAt(ip + 1) == '^') {
287                             sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_INVERSE_START;
288                             ip++; // skip over the '^'
289                         } else {
290                             sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_START;
291                         }
292                         ip++; // move to the next pattern char
293                         inSet = true;
294                         continue;
295                     }
296                     break;
297                 case ']':
298                     if (!inSet) {
299                         addToParsedPattern = true; // treat as literal outside of set
300                     } else {
301                         int parsedToken = sParsedPatternScratch[it - 1];
302                         if (parsedToken == PARSED_TOKEN_CHAR_SET_START ||
303                             parsedToken == PARSED_TOKEN_CHAR_SET_INVERSE_START) {
304                             throw new IllegalArgumentException(
305                                     "You must define characters in a set.");
306                         }
307                         sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_STOP;
308                         inSet = false;
309                         inCharClass = false;
310                     }
311                     break;
312                 case '{':
313                     if (!inSet) {
314                         if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) {
315                             throw new IllegalArgumentException("Modifier must follow a token.");
316                         }
317                         sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_START;
318                         ip++;
319                         inRange = true;
320                     }
321                     break;
322                 case '}':
323                     if (inRange) { // only terminate the range if we're currently in one
324                         sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_STOP;
325                         inRange = false;
326                     }
327                     break;
328                 case '*':
329                     if (!inSet) {
330                         if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) {
331                             throw new IllegalArgumentException("Modifier must follow a token.");
332                         }
333                         sParsedPatternScratch[it++] = PARSED_MODIFIER_ZERO_OR_MORE;
334                     }
335                     break;
336                 case '+':
337                     if (!inSet) {
338                         if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) {
339                             throw new IllegalArgumentException("Modifier must follow a token.");
340                         }
341                         sParsedPatternScratch[it++] = PARSED_MODIFIER_ONE_OR_MORE;
342                     }
343                     break;
344                 case '.':
345                     if (!inSet) {
346                         sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_ANY;
347                     }
348                     break;
349                 case '\\': // escape
350                     if (ip + 1 >= LP) {
351                         throw new IllegalArgumentException("Escape found at end of pattern!");
352                     }
353                     c = pattern.charAt(++ip);
354                     addToParsedPattern = true;
355                     break;
356                 default:
357                     addToParsedPattern = true;
358                     break;
359             }
360             if (inSet) {
361                 if (inCharClass) {
362                     sParsedPatternScratch[it++] = c;
363                     inCharClass = false;
364                 } else {
365                     // look forward for character class
366                     if (ip + 2 < LP
367                             && pattern.charAt(ip + 1) == '-'
368                             && pattern.charAt(ip + 2) != ']') {
369                         inCharClass = true;
370                         sParsedPatternScratch[it++] = c; // set first token as lower end of range
371                         ip++; // advance past dash
372                     } else { // literal
373                         sParsedPatternScratch[it++] = c; // set first token as literal
374                         sParsedPatternScratch[it++] = c; // set second set as literal
375                     }
376                 }
377             } else if (inRange) {
378                 int endOfSet = pattern.indexOf('}', ip);
379                 if (endOfSet < 0) {
380                     throw new IllegalArgumentException("Range not ended with '}'");
381                 }
382                 String rangeString = pattern.substring(ip, endOfSet);
383                 int commaIndex = rangeString.indexOf(',');
384                 try {
385                     final int rangeMin;
386                     final int rangeMax;
387                     if (commaIndex < 0) {
388                         int parsedRange = Integer.parseInt(rangeString);
389                         rangeMin = rangeMax = parsedRange;
390                     } else {
391                         rangeMin = Integer.parseInt(rangeString.substring(0, commaIndex));
392                         if (commaIndex == rangeString.length() - 1) { // e.g. {n,} (n or more)
393                             rangeMax = Integer.MAX_VALUE;
394                         } else {
395                             rangeMax = Integer.parseInt(rangeString.substring(commaIndex + 1));
396                         }
397                     }
398                     if (rangeMin > rangeMax) {
399                         throw new IllegalArgumentException(
400                             "Range quantifier minimum is greater than maximum");
401                     }
402                     sParsedPatternScratch[it++] = rangeMin;
403                     sParsedPatternScratch[it++] = rangeMax;
404                 } catch (NumberFormatException e) {
405                     throw new IllegalArgumentException("Range number format incorrect", e);
406                 }
407                 ip = endOfSet;
408                 continue; // don't increment ip
409             } else if (addToParsedPattern) {
410                 sParsedPatternScratch[it++] = c;
411             }
412             ip++;
413         }
414         if (inSet) {
415             throw new IllegalArgumentException("Set was not terminated!");
416         }
417         return Arrays.copyOf(sParsedPatternScratch, it);
418     }
419 
420     private static boolean isParsedModifier(int parsedChar) {
421         return parsedChar == PARSED_MODIFIER_ONE_OR_MORE ||
422                 parsedChar == PARSED_MODIFIER_ZERO_OR_MORE ||
423                 parsedChar == PARSED_MODIFIER_RANGE_STOP ||
424                 parsedChar == PARSED_MODIFIER_RANGE_START;
425     }
426 
427     static boolean matchAdvancedPattern(int[] parsedPattern, String match) {
428 
429         // create indexes
430         int ip = 0, im = 0;
431 
432         // one-time length check
433         final int LP = parsedPattern.length, LM = match.length();
434 
435         // The current character being analyzed in the pattern
436         int patternChar;
437 
438         int tokenType;
439 
440         int charSetStart = 0, charSetEnd = 0;
441 
442         while (ip < LP) { // we still have content in the pattern
443 
444             patternChar = parsedPattern[ip];
445             // get the match type of the next verb
446 
447             switch (patternChar) {
448                 case PARSED_TOKEN_CHAR_ANY:
449                     tokenType = TOKEN_TYPE_ANY;
450                     ip++;
451                     break;
452                 case PARSED_TOKEN_CHAR_SET_START:
453                 case PARSED_TOKEN_CHAR_SET_INVERSE_START:
454                     tokenType = patternChar == PARSED_TOKEN_CHAR_SET_START
455                             ? TOKEN_TYPE_SET
456                             : TOKEN_TYPE_INVERSE_SET;
457                     charSetStart = ip + 1; // start from the char after the set start
458                     while (++ip < LP && parsedPattern[ip] != PARSED_TOKEN_CHAR_SET_STOP);
459                     charSetEnd = ip - 1; // we're on the set stop, end is the previous
460                     ip++; // move the pointer to the next pattern entry
461                     break;
462                 default:
463                     charSetStart = ip;
464                     tokenType = TOKEN_TYPE_LITERAL;
465                     ip++;
466                     break;
467             }
468 
469             final int minRepetition;
470             final int maxRepetition;
471 
472             // look for a match length modifier
473             if (ip >= LP) {
474                 minRepetition = maxRepetition = 1;
475             } else {
476                 patternChar = parsedPattern[ip];
477                 switch (patternChar) {
478                     case PARSED_MODIFIER_ZERO_OR_MORE:
479                         minRepetition = 0;
480                         maxRepetition = Integer.MAX_VALUE;
481                         ip++;
482                         break;
483                     case PARSED_MODIFIER_ONE_OR_MORE:
484                         minRepetition = 1;
485                         maxRepetition = Integer.MAX_VALUE;
486                         ip++;
487                         break;
488                     case PARSED_MODIFIER_RANGE_START:
489                         minRepetition = parsedPattern[++ip];
490                         maxRepetition = parsedPattern[++ip];
491                         ip += 2; // step over PARSED_MODIFIER_RANGE_STOP and on to the next token
492                         break;
493                     default:
494                         minRepetition = maxRepetition = 1; // implied literal
495                         break;
496                 }
497             }
498             if (minRepetition > maxRepetition) {
499                 return false;
500             }
501 
502             // attempt to match as many characters as possible
503             int matched = matchChars(match, im, LM, tokenType, minRepetition, maxRepetition,
504                     parsedPattern, charSetStart, charSetEnd);
505 
506             // if we found a conflict, return false immediately
507             if (matched == NO_MATCH) {
508                 return false;
509             }
510 
511             // move the match pointer the number of characters matched
512             im += matched;
513         }
514         return ip >= LP && im >= LM; // have parsed entire string and regex
515     }
516 
517     private static int matchChars(String match, int im, final int lm, int tokenType,
518             int minRepetition, int maxRepetition, int[] parsedPattern,
519             int tokenStart, int tokenEnd) {
520         int matched = 0;
521 
522         while(matched < maxRepetition
523                 && matchChar(match, im + matched, lm, tokenType, parsedPattern, tokenStart,
524                     tokenEnd)) {
525             matched++;
526         }
527 
528         return matched < minRepetition ? NO_MATCH : matched;
529     }
530 
531     private static boolean matchChar(String match, int im, final int lm, int tokenType,
532             int[] parsedPattern, int tokenStart, int tokenEnd) {
533         if (im >= lm) { // we've overrun the string, no match
534             return false;
535         }
536         switch (tokenType) {
537             case TOKEN_TYPE_ANY:
538                 return true;
539             case TOKEN_TYPE_SET:
540                 for (int i = tokenStart; i < tokenEnd; i += 2) {
541                     char matchChar = match.charAt(im);
542                     if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) {
543                         return true;
544                     }
545                 }
546                 return false;
547             case TOKEN_TYPE_INVERSE_SET:
548                 for (int i = tokenStart; i < tokenEnd; i += 2) {
549                     char matchChar = match.charAt(im);
550                     if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) {
551                         return false;
552                     }
553                 }
554                 return true;
555             case TOKEN_TYPE_LITERAL:
556                 return match.charAt(im) == parsedPattern[tokenStart];
557             default:
558                 return false;
559         }
560     }
561 }