1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.os; 18 19 import android.util.proto.ProtoOutputStream; 20 21 import java.util.Arrays; 22 23 /** 24 * A simple pattern matcher, which is safe to use on untrusted data: it does 25 * not provide full reg-exp support, only simple globbing that can not be 26 * used maliciously. 27 */ 28 public class PatternMatcher implements Parcelable { 29 /** 30 * Pattern type: the given pattern must exactly match the string it is 31 * tested against. 32 */ 33 public static final int PATTERN_LITERAL = 0; 34 35 /** 36 * Pattern type: the given pattern must match the 37 * beginning of the string it is tested against. 38 */ 39 public static final int PATTERN_PREFIX = 1; 40 41 /** 42 * Pattern type: the given pattern is interpreted with a 43 * simple glob syntax for matching against the string it is tested against. 44 * In this syntax, you can use the '*' character to match against zero or 45 * more occurrences of the character immediately before. If the 46 * character before it is '.' it will match any character. The character 47 * '\' can be used as an escape. This essentially provides only the '*' 48 * wildcard part of a normal regexp. 49 */ 50 public static final int PATTERN_SIMPLE_GLOB = 2; 51 52 /** 53 * Pattern type: the given pattern is interpreted with a regular 54 * expression-like syntax for matching against the string it is tested 55 * against. Supported tokens include dot ({@code .}) and sets ({@code [...]}) 56 * with full support for character ranges and the not ({@code ^}) modifier. 57 * Supported modifiers include star ({@code *}) for zero-or-more, plus ({@code +}) 58 * for one-or-more and full range ({@code {...}}) support. This is a simple 59 * evaluation implementation in which matching is done against the pattern in 60 * real time with no backtracking support. 61 */ 62 public static final int PATTERN_ADVANCED_GLOB = 3; 63 64 // token types for advanced matching 65 private static final int TOKEN_TYPE_LITERAL = 0; 66 private static final int TOKEN_TYPE_ANY = 1; 67 private static final int TOKEN_TYPE_SET = 2; 68 private static final int TOKEN_TYPE_INVERSE_SET = 3; 69 70 // Return for no match 71 private static final int NO_MATCH = -1; 72 73 private static final String TAG = "PatternMatcher"; 74 75 // Parsed placeholders for advanced patterns 76 private static final int PARSED_TOKEN_CHAR_SET_START = -1; 77 private static final int PARSED_TOKEN_CHAR_SET_INVERSE_START = -2; 78 private static final int PARSED_TOKEN_CHAR_SET_STOP = -3; 79 private static final int PARSED_TOKEN_CHAR_ANY = -4; 80 private static final int PARSED_MODIFIER_RANGE_START = -5; 81 private static final int PARSED_MODIFIER_RANGE_STOP = -6; 82 private static final int PARSED_MODIFIER_ZERO_OR_MORE = -7; 83 private static final int PARSED_MODIFIER_ONE_OR_MORE = -8; 84 85 private final String mPattern; 86 private final int mType; 87 private final int[] mParsedPattern; 88 89 90 private static final int MAX_PATTERN_STORAGE = 2048; 91 // workspace to use for building a parsed advanced pattern; 92 private static final int[] sParsedPatternScratch = new int[MAX_PATTERN_STORAGE]; 93 PatternMatcher(String pattern, int type)94 public PatternMatcher(String pattern, int type) { 95 mPattern = pattern; 96 mType = type; 97 if (mType == PATTERN_ADVANCED_GLOB) { 98 mParsedPattern = parseAndVerifyAdvancedPattern(pattern); 99 } else { 100 mParsedPattern = null; 101 } 102 } 103 getPath()104 public final String getPath() { 105 return mPattern; 106 } 107 getType()108 public final int getType() { 109 return mType; 110 } 111 match(String str)112 public boolean match(String str) { 113 return matchPattern(str, mPattern, mParsedPattern, mType); 114 } 115 toString()116 public String toString() { 117 String type = "? "; 118 switch (mType) { 119 case PATTERN_LITERAL: 120 type = "LITERAL: "; 121 break; 122 case PATTERN_PREFIX: 123 type = "PREFIX: "; 124 break; 125 case PATTERN_SIMPLE_GLOB: 126 type = "GLOB: "; 127 break; 128 case PATTERN_ADVANCED_GLOB: 129 type = "ADVANCED: "; 130 break; 131 } 132 return "PatternMatcher{" + type + mPattern + "}"; 133 } 134 135 /** @hide */ writeToProto(ProtoOutputStream proto, long fieldId)136 public void writeToProto(ProtoOutputStream proto, long fieldId) { 137 long token = proto.start(fieldId); 138 proto.write(PatternMatcherProto.PATTERN, mPattern); 139 proto.write(PatternMatcherProto.TYPE, mType); 140 // PatternMatcherProto.PARSED_PATTERN is too much to dump, but the field is reserved to 141 // match the current data structure. 142 proto.end(token); 143 } 144 describeContents()145 public int describeContents() { 146 return 0; 147 } 148 writeToParcel(Parcel dest, int flags)149 public void writeToParcel(Parcel dest, int flags) { 150 dest.writeString(mPattern); 151 dest.writeInt(mType); 152 dest.writeIntArray(mParsedPattern); 153 } 154 PatternMatcher(Parcel src)155 public PatternMatcher(Parcel src) { 156 mPattern = src.readString(); 157 mType = src.readInt(); 158 mParsedPattern = src.createIntArray(); 159 } 160 161 public static final Parcelable.Creator<PatternMatcher> CREATOR 162 = new Parcelable.Creator<PatternMatcher>() { 163 public PatternMatcher createFromParcel(Parcel source) { 164 return new PatternMatcher(source); 165 } 166 167 public PatternMatcher[] newArray(int size) { 168 return new PatternMatcher[size]; 169 } 170 }; 171 matchPattern(String match, String pattern, int[] parsedPattern, int type)172 static boolean matchPattern(String match, String pattern, int[] parsedPattern, int type) { 173 if (match == null) return false; 174 if (type == PATTERN_LITERAL) { 175 return pattern.equals(match); 176 } if (type == PATTERN_PREFIX) { 177 return match.startsWith(pattern); 178 } else if (type == PATTERN_SIMPLE_GLOB) { 179 return matchGlobPattern(pattern, match); 180 } else if (type == PATTERN_ADVANCED_GLOB) { 181 return matchAdvancedPattern(parsedPattern, match); 182 } 183 return false; 184 } 185 matchGlobPattern(String pattern, String match)186 static boolean matchGlobPattern(String pattern, String match) { 187 final int NP = pattern.length(); 188 if (NP <= 0) { 189 return match.length() <= 0; 190 } 191 final int NM = match.length(); 192 int ip = 0, im = 0; 193 char nextChar = pattern.charAt(0); 194 while ((ip<NP) && (im<NM)) { 195 char c = nextChar; 196 ip++; 197 nextChar = ip < NP ? pattern.charAt(ip) : 0; 198 final boolean escaped = (c == '\\'); 199 if (escaped) { 200 c = nextChar; 201 ip++; 202 nextChar = ip < NP ? pattern.charAt(ip) : 0; 203 } 204 if (nextChar == '*') { 205 if (!escaped && c == '.') { 206 if (ip >= (NP-1)) { 207 // at the end with a pattern match, so 208 // all is good without checking! 209 return true; 210 } 211 ip++; 212 nextChar = pattern.charAt(ip); 213 // Consume everything until the next character in the 214 // pattern is found. 215 if (nextChar == '\\') { 216 ip++; 217 nextChar = ip < NP ? pattern.charAt(ip) : 0; 218 } 219 do { 220 if (match.charAt(im) == nextChar) { 221 break; 222 } 223 im++; 224 } while (im < NM); 225 if (im == NM) { 226 // Whoops, the next character in the pattern didn't 227 // exist in the match. 228 return false; 229 } 230 ip++; 231 nextChar = ip < NP ? pattern.charAt(ip) : 0; 232 im++; 233 } else { 234 // Consume only characters matching the one before '*'. 235 do { 236 if (match.charAt(im) != c) { 237 break; 238 } 239 im++; 240 } while (im < NM); 241 ip++; 242 nextChar = ip < NP ? pattern.charAt(ip) : 0; 243 } 244 } else { 245 if (c != '.' && match.charAt(im) != c) return false; 246 im++; 247 } 248 } 249 250 if (ip >= NP && im >= NM) { 251 // Reached the end of both strings, all is good! 252 return true; 253 } 254 255 // One last check: we may have finished the match string, but still 256 // have a '.*' at the end of the pattern, which should still count 257 // as a match. 258 if (ip == NP-2 && pattern.charAt(ip) == '.' 259 && pattern.charAt(ip+1) == '*') { 260 return true; 261 } 262 263 return false; 264 } 265 266 /** 267 * Parses the advanced pattern and returns an integer array representation of it. The integer 268 * array treats each field as a character if positive and a unique token placeholder if 269 * negative. This method will throw on any pattern structure violations. 270 */ 271 synchronized static int[] parseAndVerifyAdvancedPattern(String pattern) { 272 int ip = 0; 273 final int LP = pattern.length(); 274 275 int it = 0; 276 277 boolean inSet = false; 278 boolean inRange = false; 279 boolean inCharClass = false; 280 281 boolean addToParsedPattern; 282 283 while (ip < LP) { 284 if (it > MAX_PATTERN_STORAGE - 3) { 285 throw new IllegalArgumentException("Pattern is too large!"); 286 } 287 288 char c = pattern.charAt(ip); 289 addToParsedPattern = false; 290 291 switch (c) { 292 case '[': 293 if (inSet) { 294 addToParsedPattern = true; // treat as literal or char class in set 295 } else { 296 if (pattern.charAt(ip + 1) == '^') { 297 sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_INVERSE_START; 298 ip++; // skip over the '^' 299 } else { 300 sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_START; 301 } 302 ip++; // move to the next pattern char 303 inSet = true; 304 continue; 305 } 306 break; 307 case ']': 308 if (!inSet) { 309 addToParsedPattern = true; // treat as literal outside of set 310 } else { 311 int parsedToken = sParsedPatternScratch[it - 1]; 312 if (parsedToken == PARSED_TOKEN_CHAR_SET_START || 313 parsedToken == PARSED_TOKEN_CHAR_SET_INVERSE_START) { 314 throw new IllegalArgumentException( 315 "You must define characters in a set."); 316 } 317 sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_STOP; 318 inSet = false; 319 inCharClass = false; 320 } 321 break; 322 case '{': 323 if (!inSet) { 324 if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) { 325 throw new IllegalArgumentException("Modifier must follow a token."); 326 } 327 sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_START; 328 ip++; 329 inRange = true; 330 } 331 break; 332 case '}': 333 if (inRange) { // only terminate the range if we're currently in one 334 sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_STOP; 335 inRange = false; 336 } 337 break; 338 case '*': 339 if (!inSet) { 340 if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) { 341 throw new IllegalArgumentException("Modifier must follow a token."); 342 } 343 sParsedPatternScratch[it++] = PARSED_MODIFIER_ZERO_OR_MORE; 344 } 345 break; 346 case '+': 347 if (!inSet) { 348 if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) { 349 throw new IllegalArgumentException("Modifier must follow a token."); 350 } 351 sParsedPatternScratch[it++] = PARSED_MODIFIER_ONE_OR_MORE; 352 } 353 break; 354 case '.': 355 if (!inSet) { 356 sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_ANY; 357 } 358 break; 359 case '\\': // escape 360 if (ip + 1 >= LP) { 361 throw new IllegalArgumentException("Escape found at end of pattern!"); 362 } 363 c = pattern.charAt(++ip); 364 addToParsedPattern = true; 365 break; 366 default: 367 addToParsedPattern = true; 368 break; 369 } 370 if (inSet) { 371 if (inCharClass) { 372 sParsedPatternScratch[it++] = c; 373 inCharClass = false; 374 } else { 375 // look forward for character class 376 if (ip + 2 < LP 377 && pattern.charAt(ip + 1) == '-' 378 && pattern.charAt(ip + 2) != ']') { 379 inCharClass = true; 380 sParsedPatternScratch[it++] = c; // set first token as lower end of range 381 ip++; // advance past dash 382 } else { // literal 383 sParsedPatternScratch[it++] = c; // set first token as literal 384 sParsedPatternScratch[it++] = c; // set second set as literal 385 } 386 } 387 } else if (inRange) { 388 int endOfSet = pattern.indexOf('}', ip); 389 if (endOfSet < 0) { 390 throw new IllegalArgumentException("Range not ended with '}'"); 391 } 392 String rangeString = pattern.substring(ip, endOfSet); 393 int commaIndex = rangeString.indexOf(','); 394 try { 395 final int rangeMin; 396 final int rangeMax; 397 if (commaIndex < 0) { 398 int parsedRange = Integer.parseInt(rangeString); 399 rangeMin = rangeMax = parsedRange; 400 } else { 401 rangeMin = Integer.parseInt(rangeString.substring(0, commaIndex)); 402 if (commaIndex == rangeString.length() - 1) { // e.g. {n,} (n or more) 403 rangeMax = Integer.MAX_VALUE; 404 } else { 405 rangeMax = Integer.parseInt(rangeString.substring(commaIndex + 1)); 406 } 407 } 408 if (rangeMin > rangeMax) { 409 throw new IllegalArgumentException( 410 "Range quantifier minimum is greater than maximum"); 411 } 412 sParsedPatternScratch[it++] = rangeMin; 413 sParsedPatternScratch[it++] = rangeMax; 414 } catch (NumberFormatException e) { 415 throw new IllegalArgumentException("Range number format incorrect", e); 416 } 417 ip = endOfSet; 418 continue; // don't increment ip 419 } else if (addToParsedPattern) { 420 sParsedPatternScratch[it++] = c; 421 } 422 ip++; 423 } 424 if (inSet) { 425 throw new IllegalArgumentException("Set was not terminated!"); 426 } 427 return Arrays.copyOf(sParsedPatternScratch, it); 428 } 429 430 private static boolean isParsedModifier(int parsedChar) { 431 return parsedChar == PARSED_MODIFIER_ONE_OR_MORE || 432 parsedChar == PARSED_MODIFIER_ZERO_OR_MORE || 433 parsedChar == PARSED_MODIFIER_RANGE_STOP || 434 parsedChar == PARSED_MODIFIER_RANGE_START; 435 } 436 437 static boolean matchAdvancedPattern(int[] parsedPattern, String match) { 438 439 // create indexes 440 int ip = 0, im = 0; 441 442 // one-time length check 443 final int LP = parsedPattern.length, LM = match.length(); 444 445 // The current character being analyzed in the pattern 446 int patternChar; 447 448 int tokenType; 449 450 int charSetStart = 0, charSetEnd = 0; 451 452 while (ip < LP) { // we still have content in the pattern 453 454 patternChar = parsedPattern[ip]; 455 // get the match type of the next verb 456 457 switch (patternChar) { 458 case PARSED_TOKEN_CHAR_ANY: 459 tokenType = TOKEN_TYPE_ANY; 460 ip++; 461 break; 462 case PARSED_TOKEN_CHAR_SET_START: 463 case PARSED_TOKEN_CHAR_SET_INVERSE_START: 464 tokenType = patternChar == PARSED_TOKEN_CHAR_SET_START 465 ? TOKEN_TYPE_SET 466 : TOKEN_TYPE_INVERSE_SET; 467 charSetStart = ip + 1; // start from the char after the set start 468 while (++ip < LP && parsedPattern[ip] != PARSED_TOKEN_CHAR_SET_STOP); 469 charSetEnd = ip - 1; // we're on the set stop, end is the previous 470 ip++; // move the pointer to the next pattern entry 471 break; 472 default: 473 charSetStart = ip; 474 tokenType = TOKEN_TYPE_LITERAL; 475 ip++; 476 break; 477 } 478 479 final int minRepetition; 480 final int maxRepetition; 481 482 // look for a match length modifier 483 if (ip >= LP) { 484 minRepetition = maxRepetition = 1; 485 } else { 486 patternChar = parsedPattern[ip]; 487 switch (patternChar) { 488 case PARSED_MODIFIER_ZERO_OR_MORE: 489 minRepetition = 0; 490 maxRepetition = Integer.MAX_VALUE; 491 ip++; 492 break; 493 case PARSED_MODIFIER_ONE_OR_MORE: 494 minRepetition = 1; 495 maxRepetition = Integer.MAX_VALUE; 496 ip++; 497 break; 498 case PARSED_MODIFIER_RANGE_START: 499 minRepetition = parsedPattern[++ip]; 500 maxRepetition = parsedPattern[++ip]; 501 ip += 2; // step over PARSED_MODIFIER_RANGE_STOP and on to the next token 502 break; 503 default: 504 minRepetition = maxRepetition = 1; // implied literal 505 break; 506 } 507 } 508 if (minRepetition > maxRepetition) { 509 return false; 510 } 511 512 // attempt to match as many characters as possible 513 int matched = matchChars(match, im, LM, tokenType, minRepetition, maxRepetition, 514 parsedPattern, charSetStart, charSetEnd); 515 516 // if we found a conflict, return false immediately 517 if (matched == NO_MATCH) { 518 return false; 519 } 520 521 // move the match pointer the number of characters matched 522 im += matched; 523 } 524 return ip >= LP && im >= LM; // have parsed entire string and regex 525 } 526 527 private static int matchChars(String match, int im, final int lm, int tokenType, 528 int minRepetition, int maxRepetition, int[] parsedPattern, 529 int tokenStart, int tokenEnd) { 530 int matched = 0; 531 532 while(matched < maxRepetition 533 && matchChar(match, im + matched, lm, tokenType, parsedPattern, tokenStart, 534 tokenEnd)) { 535 matched++; 536 } 537 538 return matched < minRepetition ? NO_MATCH : matched; 539 } 540 541 private static boolean matchChar(String match, int im, final int lm, int tokenType, 542 int[] parsedPattern, int tokenStart, int tokenEnd) { 543 if (im >= lm) { // we've overrun the string, no match 544 return false; 545 } 546 switch (tokenType) { 547 case TOKEN_TYPE_ANY: 548 return true; 549 case TOKEN_TYPE_SET: 550 for (int i = tokenStart; i < tokenEnd; i += 2) { 551 char matchChar = match.charAt(im); 552 if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) { 553 return true; 554 } 555 } 556 return false; 557 case TOKEN_TYPE_INVERSE_SET: 558 for (int i = tokenStart; i < tokenEnd; i += 2) { 559 char matchChar = match.charAt(im); 560 if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) { 561 return false; 562 } 563 } 564 return true; 565 case TOKEN_TYPE_LITERAL: 566 return match.charAt(im) == parsedPattern[tokenStart]; 567 default: 568 return false; 569 } 570 } 571 }