1 /* 2 * Copyright (C) 2007 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.net; 18 19 import java.util.ArrayList; 20 import java.util.HashMap; 21 import java.util.List; 22 import java.util.Locale; 23 import java.util.Set; 24 import java.util.StringTokenizer; 25 import java.util.regex.Matcher; 26 import java.util.regex.Pattern; 27 28 /** 29 * 30 * Sanitizes the Query portion of a URL. Simple example: 31 * <code> 32 * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer(); 33 * sanitizer.setAllowUnregisteredParamaters(true); 34 * sanitizer.parseUrl("http://example.com/?name=Joe+User"); 35 * String name = sanitizer.getValue("name")); 36 * // name now contains "Joe_User" 37 * </code> 38 * 39 * Register ValueSanitizers to customize the way individual 40 * parameters are sanitized: 41 * <code> 42 * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer(); 43 * sanitizer.registerParamater("name", UrlQuerySanitizer.createSpaceLegal()); 44 * sanitizer.parseUrl("http://example.com/?name=Joe+User"); 45 * String name = sanitizer.getValue("name")); 46 * // name now contains "Joe User". (The string is first decoded, which 47 * // converts the '+' to a ' '. Then the string is sanitized, which 48 * // converts the ' ' to an '_'. (The ' ' is converted because the default 49 * unregistered parameter sanitizer does not allow any special characters, 50 * and ' ' is a special character.) 51 * </code> 52 * 53 * There are several ways to create ValueSanitizers. In order of increasing 54 * sophistication: 55 * <ol> 56 * <li>Call one of the UrlQuerySanitizer.createXXX() methods. 57 * <li>Construct your own instance of 58 * UrlQuerySanitizer.IllegalCharacterValueSanitizer. 59 * <li>Subclass UrlQuerySanitizer.ValueSanitizer to define your own value 60 * sanitizer. 61 * </ol> 62 * 63 */ 64 public class UrlQuerySanitizer { 65 66 /** 67 * A simple tuple that holds parameter-value pairs. 68 * 69 */ 70 public class ParameterValuePair { 71 /** 72 * Construct a parameter-value tuple. 73 * @param parameter an unencoded parameter 74 * @param value an unencoded value 75 */ ParameterValuePair(String parameter, String value)76 public ParameterValuePair(String parameter, 77 String value) { 78 mParameter = parameter; 79 mValue = value; 80 } 81 /** 82 * The unencoded parameter 83 */ 84 public String mParameter; 85 /** 86 * The unencoded value 87 */ 88 public String mValue; 89 } 90 91 final private HashMap<String, ValueSanitizer> mSanitizers = 92 new HashMap<String, ValueSanitizer>(); 93 final private HashMap<String, String> mEntries = 94 new HashMap<String, String>(); 95 final private ArrayList<ParameterValuePair> mEntriesList = 96 new ArrayList<ParameterValuePair>(); 97 private boolean mAllowUnregisteredParamaters; 98 private boolean mPreferFirstRepeatedParameter; 99 private ValueSanitizer mUnregisteredParameterValueSanitizer = 100 getAllIllegal(); 101 102 /** 103 * A functor used to sanitize a single query value. 104 * 105 */ 106 public static interface ValueSanitizer { 107 /** 108 * Sanitize an unencoded value. 109 * @param value 110 * @return the sanitized unencoded value 111 */ sanitize(String value)112 public String sanitize(String value); 113 } 114 115 /** 116 * Sanitize values based on which characters they contain. Illegal 117 * characters are replaced with either space or '_', depending upon 118 * whether space is a legal character or not. 119 */ 120 public static class IllegalCharacterValueSanitizer implements 121 ValueSanitizer { 122 private int mFlags; 123 124 /** 125 * Allow space (' ') characters. 126 */ 127 public final static int SPACE_OK = 1 << 0; 128 /** 129 * Allow whitespace characters other than space. The 130 * other whitespace characters are 131 * '\t' '\f' '\n' '\r' and '\0x000b' (vertical tab) 132 */ 133 public final static int OTHER_WHITESPACE_OK = 1 << 1; 134 /** 135 * Allow characters with character codes 128 to 255. 136 */ 137 public final static int NON_7_BIT_ASCII_OK = 1 << 2; 138 /** 139 * Allow double quote characters. ('"') 140 */ 141 public final static int DQUOTE_OK = 1 << 3; 142 /** 143 * Allow single quote characters. ('\'') 144 */ 145 public final static int SQUOTE_OK = 1 << 4; 146 /** 147 * Allow less-than characters. ('<') 148 */ 149 public final static int LT_OK = 1 << 5; 150 /** 151 * Allow greater-than characters. ('>') 152 */ 153 public final static int GT_OK = 1 << 6; 154 /** 155 * Allow ampersand characters ('&') 156 */ 157 public final static int AMP_OK = 1 << 7; 158 /** 159 * Allow percent-sign characters ('%') 160 */ 161 public final static int PCT_OK = 1 << 8; 162 /** 163 * Allow nul characters ('\0') 164 */ 165 public final static int NUL_OK = 1 << 9; 166 /** 167 * Allow text to start with a script URL 168 * such as "javascript:" or "vbscript:" 169 */ 170 public final static int SCRIPT_URL_OK = 1 << 10; 171 172 /** 173 * Mask with all fields set to OK 174 */ 175 public final static int ALL_OK = 0x7ff; 176 177 /** 178 * Mask with both regular space and other whitespace OK 179 */ 180 public final static int ALL_WHITESPACE_OK = 181 SPACE_OK | OTHER_WHITESPACE_OK; 182 183 184 // Common flag combinations: 185 186 /** 187 * <ul> 188 * <li>Deny all special characters. 189 * <li>Deny script URLs. 190 * </ul> 191 */ 192 public final static int ALL_ILLEGAL = 193 0; 194 /** 195 * <ul> 196 * <li>Allow all special characters except Nul. ('\0'). 197 * <li>Allow script URLs. 198 * </ul> 199 */ 200 public final static int ALL_BUT_NUL_LEGAL = 201 ALL_OK & ~NUL_OK; 202 /** 203 * <ul> 204 * <li>Allow all special characters except for: 205 * <ul> 206 * <li>whitespace characters 207 * <li>Nul ('\0') 208 * </ul> 209 * <li>Allow script URLs. 210 * </ul> 211 */ 212 public final static int ALL_BUT_WHITESPACE_LEGAL = 213 ALL_OK & ~(ALL_WHITESPACE_OK | NUL_OK); 214 /** 215 * <ul> 216 * <li>Allow characters used by encoded URLs. 217 * <li>Deny script URLs. 218 * </ul> 219 */ 220 public final static int URL_LEGAL = 221 NON_7_BIT_ASCII_OK | SQUOTE_OK | AMP_OK | PCT_OK; 222 /** 223 * <ul> 224 * <li>Allow characters used by encoded URLs. 225 * <li>Allow spaces. 226 * <li>Deny script URLs. 227 * </ul> 228 */ 229 public final static int URL_AND_SPACE_LEGAL = 230 URL_LEGAL | SPACE_OK; 231 /** 232 * <ul> 233 * <li>Allow ampersand. 234 * <li>Deny script URLs. 235 * </ul> 236 */ 237 public final static int AMP_LEGAL = 238 AMP_OK; 239 /** 240 * <ul> 241 * <li>Allow ampersand. 242 * <li>Allow space. 243 * <li>Deny script URLs. 244 * </ul> 245 */ 246 public final static int AMP_AND_SPACE_LEGAL = 247 AMP_OK | SPACE_OK; 248 /** 249 * <ul> 250 * <li>Allow space. 251 * <li>Deny script URLs. 252 * </ul> 253 */ 254 public final static int SPACE_LEGAL = 255 SPACE_OK; 256 /** 257 * <ul> 258 * <li>Allow all but. 259 * <ul> 260 * <li>Nul ('\0') 261 * <li>Angle brackets ('<', '>') 262 * </ul> 263 * <li>Deny script URLs. 264 * </ul> 265 */ 266 public final static int ALL_BUT_NUL_AND_ANGLE_BRACKETS_LEGAL = 267 ALL_OK & ~(NUL_OK | LT_OK | GT_OK); 268 269 /** 270 * Script URL definitions 271 */ 272 273 private final static String JAVASCRIPT_PREFIX = "javascript:"; 274 275 private final static String VBSCRIPT_PREFIX = "vbscript:"; 276 277 private final static int MIN_SCRIPT_PREFIX_LENGTH = Math.min( 278 JAVASCRIPT_PREFIX.length(), VBSCRIPT_PREFIX.length()); 279 280 /** 281 * Construct a sanitizer. The parameters set the behavior of the 282 * sanitizer. 283 * @param flags some combination of the XXX_OK flags. 284 */ IllegalCharacterValueSanitizer( int flags)285 public IllegalCharacterValueSanitizer( 286 int flags) { 287 mFlags = flags; 288 } 289 /** 290 * Sanitize a value. 291 * <ol> 292 * <li>If script URLs are not OK, they will be removed. 293 * <li>If neither spaces nor other white space is OK, then 294 * white space will be trimmed from the beginning and end of 295 * the URL. (Just the actual white space characters are trimmed, not 296 * other control codes.) 297 * <li> Illegal characters will be replaced with 298 * either ' ' or '_', depending on whether a space is itself a 299 * legal character. 300 * </ol> 301 * @param value 302 * @return the sanitized value 303 */ sanitize(String value)304 public String sanitize(String value) { 305 if (value == null) { 306 return null; 307 } 308 int length = value.length(); 309 if ((mFlags & SCRIPT_URL_OK) == 0) { 310 if (length >= MIN_SCRIPT_PREFIX_LENGTH) { 311 String asLower = value.toLowerCase(Locale.ROOT); 312 if (asLower.startsWith(JAVASCRIPT_PREFIX) || 313 asLower.startsWith(VBSCRIPT_PREFIX)) { 314 return ""; 315 } 316 } 317 } 318 319 // If whitespace isn't OK, get rid of whitespace at beginning 320 // and end of value. 321 if ( (mFlags & ALL_WHITESPACE_OK) == 0) { 322 value = trimWhitespace(value); 323 // The length could have changed, so we need to correct 324 // the length variable. 325 length = value.length(); 326 } 327 328 StringBuilder stringBuilder = new StringBuilder(length); 329 for(int i = 0; i < length; i++) { 330 char c = value.charAt(i); 331 if (!characterIsLegal(c)) { 332 if ((mFlags & SPACE_OK) != 0) { 333 c = ' '; 334 } 335 else { 336 c = '_'; 337 } 338 } 339 stringBuilder.append(c); 340 } 341 return stringBuilder.toString(); 342 } 343 344 /** 345 * Trim whitespace from the beginning and end of a string. 346 * <p> 347 * Note: can't use {@link String#trim} because {@link String#trim} has a 348 * different definition of whitespace than we want. 349 * @param value the string to trim 350 * @return the trimmed string 351 */ trimWhitespace(String value)352 private String trimWhitespace(String value) { 353 int start = 0; 354 int last = value.length() - 1; 355 int end = last; 356 while (start <= end && isWhitespace(value.charAt(start))) { 357 start++; 358 } 359 while (end >= start && isWhitespace(value.charAt(end))) { 360 end--; 361 } 362 if (start == 0 && end == last) { 363 return value; 364 } 365 return value.substring(start, end + 1); 366 } 367 368 /** 369 * Check if c is whitespace. 370 * @param c character to test 371 * @return true if c is a whitespace character 372 */ isWhitespace(char c)373 private boolean isWhitespace(char c) { 374 switch(c) { 375 case ' ': 376 case '\t': 377 case '\f': 378 case '\n': 379 case '\r': 380 case 11: /* VT */ 381 return true; 382 default: 383 return false; 384 } 385 } 386 387 /** 388 * Check whether an individual character is legal. Uses the 389 * flag bit-set passed into the constructor. 390 * @param c 391 * @return true if c is a legal character 392 */ characterIsLegal(char c)393 private boolean characterIsLegal(char c) { 394 switch(c) { 395 case ' ' : return (mFlags & SPACE_OK) != 0; 396 case '\t': case '\f': case '\n': case '\r': case 11: /* VT */ 397 return (mFlags & OTHER_WHITESPACE_OK) != 0; 398 case '\"': return (mFlags & DQUOTE_OK) != 0; 399 case '\'': return (mFlags & SQUOTE_OK) != 0; 400 case '<' : return (mFlags & LT_OK) != 0; 401 case '>' : return (mFlags & GT_OK) != 0; 402 case '&' : return (mFlags & AMP_OK) != 0; 403 case '%' : return (mFlags & PCT_OK) != 0; 404 case '\0': return (mFlags & NUL_OK) != 0; 405 default : return (c >= 32 && c < 127) || 406 ((c >= 128) && ((mFlags & NON_7_BIT_ASCII_OK) != 0)); 407 } 408 } 409 } 410 411 /** 412 * Get the current value sanitizer used when processing 413 * unregistered parameter values. 414 * <p> 415 * <b>Note:</b> The default unregistered parameter value sanitizer is 416 * one that doesn't allow any special characters, similar to what 417 * is returned by calling createAllIllegal. 418 * 419 * @return the current ValueSanitizer used to sanitize unregistered 420 * parameter values. 421 */ getUnregisteredParameterValueSanitizer()422 public ValueSanitizer getUnregisteredParameterValueSanitizer() { 423 return mUnregisteredParameterValueSanitizer; 424 } 425 426 /** 427 * Set the value sanitizer used when processing unregistered 428 * parameter values. 429 * @param sanitizer set the ValueSanitizer used to sanitize unregistered 430 * parameter values. 431 */ setUnregisteredParameterValueSanitizer( ValueSanitizer sanitizer)432 public void setUnregisteredParameterValueSanitizer( 433 ValueSanitizer sanitizer) { 434 mUnregisteredParameterValueSanitizer = sanitizer; 435 } 436 437 438 // Private fields for singleton sanitizers: 439 440 private static final ValueSanitizer sAllIllegal = 441 new IllegalCharacterValueSanitizer( 442 IllegalCharacterValueSanitizer.ALL_ILLEGAL); 443 444 private static final ValueSanitizer sAllButNulLegal = 445 new IllegalCharacterValueSanitizer( 446 IllegalCharacterValueSanitizer.ALL_BUT_NUL_LEGAL); 447 448 private static final ValueSanitizer sAllButWhitespaceLegal = 449 new IllegalCharacterValueSanitizer( 450 IllegalCharacterValueSanitizer.ALL_BUT_WHITESPACE_LEGAL); 451 452 private static final ValueSanitizer sURLLegal = 453 new IllegalCharacterValueSanitizer( 454 IllegalCharacterValueSanitizer.URL_LEGAL); 455 456 private static final ValueSanitizer sUrlAndSpaceLegal = 457 new IllegalCharacterValueSanitizer( 458 IllegalCharacterValueSanitizer.URL_AND_SPACE_LEGAL); 459 460 private static final ValueSanitizer sAmpLegal = 461 new IllegalCharacterValueSanitizer( 462 IllegalCharacterValueSanitizer.AMP_LEGAL); 463 464 private static final ValueSanitizer sAmpAndSpaceLegal = 465 new IllegalCharacterValueSanitizer( 466 IllegalCharacterValueSanitizer.AMP_AND_SPACE_LEGAL); 467 468 private static final ValueSanitizer sSpaceLegal = 469 new IllegalCharacterValueSanitizer( 470 IllegalCharacterValueSanitizer.SPACE_LEGAL); 471 472 private static final ValueSanitizer sAllButNulAndAngleBracketsLegal = 473 new IllegalCharacterValueSanitizer( 474 IllegalCharacterValueSanitizer.ALL_BUT_NUL_AND_ANGLE_BRACKETS_LEGAL); 475 476 /** 477 * Return a value sanitizer that does not allow any special characters, 478 * and also does not allow script URLs. 479 * @return a value sanitizer 480 */ getAllIllegal()481 public static final ValueSanitizer getAllIllegal() { 482 return sAllIllegal; 483 } 484 485 /** 486 * Return a value sanitizer that allows everything except Nul ('\0') 487 * characters. Script URLs are allowed. 488 * @return a value sanitizer 489 */ getAllButNulLegal()490 public static final ValueSanitizer getAllButNulLegal() { 491 return sAllButNulLegal; 492 } 493 /** 494 * Return a value sanitizer that allows everything except Nul ('\0') 495 * characters, space (' '), and other whitespace characters. 496 * Script URLs are allowed. 497 * @return a value sanitizer 498 */ getAllButWhitespaceLegal()499 public static final ValueSanitizer getAllButWhitespaceLegal() { 500 return sAllButWhitespaceLegal; 501 } 502 /** 503 * Return a value sanitizer that allows all the characters used by 504 * encoded URLs. Does not allow script URLs. 505 * @return a value sanitizer 506 */ getUrlLegal()507 public static final ValueSanitizer getUrlLegal() { 508 return sURLLegal; 509 } 510 /** 511 * Return a value sanitizer that allows all the characters used by 512 * encoded URLs and allows spaces, which are not technically legal 513 * in encoded URLs, but commonly appear anyway. 514 * Does not allow script URLs. 515 * @return a value sanitizer 516 */ getUrlAndSpaceLegal()517 public static final ValueSanitizer getUrlAndSpaceLegal() { 518 return sUrlAndSpaceLegal; 519 } 520 /** 521 * Return a value sanitizer that does not allow any special characters 522 * except ampersand ('&'). Does not allow script URLs. 523 * @return a value sanitizer 524 */ getAmpLegal()525 public static final ValueSanitizer getAmpLegal() { 526 return sAmpLegal; 527 } 528 /** 529 * Return a value sanitizer that does not allow any special characters 530 * except ampersand ('&') and space (' '). Does not allow script URLs. 531 * @return a value sanitizer 532 */ getAmpAndSpaceLegal()533 public static final ValueSanitizer getAmpAndSpaceLegal() { 534 return sAmpAndSpaceLegal; 535 } 536 /** 537 * Return a value sanitizer that does not allow any special characters 538 * except space (' '). Does not allow script URLs. 539 * @return a value sanitizer 540 */ getSpaceLegal()541 public static final ValueSanitizer getSpaceLegal() { 542 return sSpaceLegal; 543 } 544 /** 545 * Return a value sanitizer that allows any special characters 546 * except angle brackets ('<' and '>') and Nul ('\0'). 547 * Allows script URLs. 548 * @return a value sanitizer 549 */ getAllButNulAndAngleBracketsLegal()550 public static final ValueSanitizer getAllButNulAndAngleBracketsLegal() { 551 return sAllButNulAndAngleBracketsLegal; 552 } 553 554 /** 555 * Constructs a UrlQuerySanitizer. 556 * <p> 557 * Defaults: 558 * <ul> 559 * <li>unregistered parameters are not allowed. 560 * <li>the last instance of a repeated parameter is preferred. 561 * <li>The default value sanitizer is an AllIllegal value sanitizer. 562 * <ul> 563 */ UrlQuerySanitizer()564 public UrlQuerySanitizer() { 565 } 566 567 /** 568 * Constructs a UrlQuerySanitizer and parses a URL. 569 * This constructor is provided for convenience when the 570 * default parsing behavior is acceptable. 571 * <p> 572 * Because the URL is parsed before the constructor returns, there isn't 573 * a chance to configure the sanitizer to change the parsing behavior. 574 * <p> 575 * <code> 576 * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer(myUrl); 577 * String name = sanitizer.getValue("name"); 578 * </code> 579 * <p> 580 * Defaults: 581 * <ul> 582 * <li>unregistered parameters <em>are</em> allowed. 583 * <li>the last instance of a repeated parameter is preferred. 584 * <li>The default value sanitizer is an AllIllegal value sanitizer. 585 * <ul> 586 */ UrlQuerySanitizer(String url)587 public UrlQuerySanitizer(String url) { 588 setAllowUnregisteredParamaters(true); 589 parseUrl(url); 590 } 591 592 /** 593 * Parse the query parameters out of an encoded URL. 594 * Works by extracting the query portion from the URL and then 595 * calling parseQuery(). If there is no query portion it is 596 * treated as if the query portion is an empty string. 597 * @param url the encoded URL to parse. 598 */ parseUrl(String url)599 public void parseUrl(String url) { 600 int queryIndex = url.indexOf('?'); 601 String query; 602 if (queryIndex >= 0) { 603 query = url.substring(queryIndex + 1); 604 } 605 else { 606 query = ""; 607 } 608 parseQuery(query); 609 } 610 611 /** 612 * Parse a query. A query string is any number of parameter-value clauses 613 * separated by any non-zero number of ampersands. A parameter-value clause 614 * is a parameter followed by an equal sign, followed by a value. If the 615 * equal sign is missing, the value is assumed to be the empty string. 616 * @param query the query to parse. 617 */ parseQuery(String query)618 public void parseQuery(String query) { 619 clear(); 620 // Split by '&' 621 StringTokenizer tokenizer = new StringTokenizer(query, "&"); 622 while(tokenizer.hasMoreElements()) { 623 String attributeValuePair = tokenizer.nextToken(); 624 if (attributeValuePair.length() > 0) { 625 int assignmentIndex = attributeValuePair.indexOf('='); 626 if (assignmentIndex < 0) { 627 // No assignment found, treat as if empty value 628 parseEntry(attributeValuePair, ""); 629 } 630 else { 631 parseEntry(attributeValuePair.substring(0, assignmentIndex), 632 attributeValuePair.substring(assignmentIndex + 1)); 633 } 634 } 635 } 636 } 637 638 /** 639 * Get a set of all of the parameters found in the sanitized query. 640 * <p> 641 * Note: Do not modify this set. Treat it as a read-only set. 642 * @return all the parameters found in the current query. 643 */ getParameterSet()644 public Set<String> getParameterSet() { 645 return mEntries.keySet(); 646 } 647 648 /** 649 * An array list of all of the parameter-value pairs in the sanitized 650 * query, in the order they appeared in the query. May contain duplicate 651 * parameters. 652 * <p class="note"><b>Note:</b> Do not modify this list. Treat it as a read-only list.</p> 653 */ getParameterList()654 public List<ParameterValuePair> getParameterList() { 655 return mEntriesList; 656 } 657 658 /** 659 * Check if a parameter exists in the current sanitized query. 660 * @param parameter the unencoded name of a parameter. 661 * @return true if the parameter exists in the current sanitized queary. 662 */ hasParameter(String parameter)663 public boolean hasParameter(String parameter) { 664 return mEntries.containsKey(parameter); 665 } 666 667 /** 668 * Get the value for a parameter in the current sanitized query. 669 * Returns null if the parameter does not 670 * exit. 671 * @param parameter the unencoded name of a parameter. 672 * @return the sanitized unencoded value of the parameter, 673 * or null if the parameter does not exist. 674 */ getValue(String parameter)675 public String getValue(String parameter) { 676 return mEntries.get(parameter); 677 } 678 679 /** 680 * Register a value sanitizer for a particular parameter. Can also be used 681 * to replace or remove an already-set value sanitizer. 682 * <p> 683 * Registering a non-null value sanitizer for a particular parameter 684 * makes that parameter a registered parameter. 685 * @param parameter an unencoded parameter name 686 * @param valueSanitizer the value sanitizer to use for a particular 687 * parameter. May be null in order to unregister that parameter. 688 * @see #getAllowUnregisteredParamaters() 689 */ registerParameter(String parameter, ValueSanitizer valueSanitizer)690 public void registerParameter(String parameter, 691 ValueSanitizer valueSanitizer) { 692 if (valueSanitizer == null) { 693 mSanitizers.remove(parameter); 694 } 695 mSanitizers.put(parameter, valueSanitizer); 696 } 697 698 /** 699 * Register a value sanitizer for an array of parameters. 700 * @param parameters An array of unencoded parameter names. 701 * @param valueSanitizer 702 * @see #registerParameter 703 */ registerParameters(String[] parameters, ValueSanitizer valueSanitizer)704 public void registerParameters(String[] parameters, 705 ValueSanitizer valueSanitizer) { 706 int length = parameters.length; 707 for(int i = 0; i < length; i++) { 708 mSanitizers.put(parameters[i], valueSanitizer); 709 } 710 } 711 712 /** 713 * Set whether or not unregistered parameters are allowed. If they 714 * are not allowed, then they will be dropped when a query is sanitized. 715 * <p> 716 * Defaults to false. 717 * @param allowUnregisteredParamaters true to allow unregistered parameters. 718 * @see #getAllowUnregisteredParamaters() 719 */ setAllowUnregisteredParamaters( boolean allowUnregisteredParamaters)720 public void setAllowUnregisteredParamaters( 721 boolean allowUnregisteredParamaters) { 722 mAllowUnregisteredParamaters = allowUnregisteredParamaters; 723 } 724 725 /** 726 * Get whether or not unregistered parameters are allowed. If not 727 * allowed, they will be dropped when a query is parsed. 728 * @return true if unregistered parameters are allowed. 729 * @see #setAllowUnregisteredParamaters(boolean) 730 */ getAllowUnregisteredParamaters()731 public boolean getAllowUnregisteredParamaters() { 732 return mAllowUnregisteredParamaters; 733 } 734 735 /** 736 * Set whether or not the first occurrence of a repeated parameter is 737 * preferred. True means the first repeated parameter is preferred. 738 * False means that the last repeated parameter is preferred. 739 * <p> 740 * The preferred parameter is the one that is returned when getParameter 741 * is called. 742 * <p> 743 * defaults to false. 744 * @param preferFirstRepeatedParameter True if the first repeated 745 * parameter is preferred. 746 * @see #getPreferFirstRepeatedParameter() 747 */ setPreferFirstRepeatedParameter( boolean preferFirstRepeatedParameter)748 public void setPreferFirstRepeatedParameter( 749 boolean preferFirstRepeatedParameter) { 750 mPreferFirstRepeatedParameter = preferFirstRepeatedParameter; 751 } 752 753 /** 754 * Get whether or not the first occurrence of a repeated parameter is 755 * preferred. 756 * @return true if the first occurrence of a repeated parameter is 757 * preferred. 758 * @see #setPreferFirstRepeatedParameter(boolean) 759 */ getPreferFirstRepeatedParameter()760 public boolean getPreferFirstRepeatedParameter() { 761 return mPreferFirstRepeatedParameter; 762 } 763 764 /** 765 * Parse an escaped parameter-value pair. The default implementation 766 * unescapes both the parameter and the value, then looks up the 767 * effective value sanitizer for the parameter and uses it to sanitize 768 * the value. If all goes well then addSanitizedValue is called with 769 * the unescaped parameter and the sanitized unescaped value. 770 * @param parameter an escaped parameter 771 * @param value an unsanitized escaped value 772 */ parseEntry(String parameter, String value)773 protected void parseEntry(String parameter, String value) { 774 String unescapedParameter = unescape(parameter); 775 ValueSanitizer valueSanitizer = 776 getEffectiveValueSanitizer(unescapedParameter); 777 778 if (valueSanitizer == null) { 779 return; 780 } 781 String unescapedValue = unescape(value); 782 String sanitizedValue = valueSanitizer.sanitize(unescapedValue); 783 addSanitizedEntry(unescapedParameter, sanitizedValue); 784 } 785 786 /** 787 * Record a sanitized parameter-value pair. Override if you want to 788 * do additional filtering or validation. 789 * @param parameter an unescaped parameter 790 * @param value a sanitized unescaped value 791 */ addSanitizedEntry(String parameter, String value)792 protected void addSanitizedEntry(String parameter, String value) { 793 mEntriesList.add( 794 new ParameterValuePair(parameter, value)); 795 if (mPreferFirstRepeatedParameter) { 796 if (mEntries.containsKey(parameter)) { 797 return; 798 } 799 } 800 mEntries.put(parameter, value); 801 } 802 803 /** 804 * Get the value sanitizer for a parameter. Returns null if there 805 * is no value sanitizer registered for the parameter. 806 * @param parameter the unescaped parameter 807 * @return the currently registered value sanitizer for this parameter. 808 * @see #registerParameter(String, android.net.UrlQuerySanitizer.ValueSanitizer) 809 */ getValueSanitizer(String parameter)810 public ValueSanitizer getValueSanitizer(String parameter) { 811 return mSanitizers.get(parameter); 812 } 813 814 /** 815 * Get the effective value sanitizer for a parameter. Like getValueSanitizer, 816 * except if there is no value sanitizer registered for a parameter, and 817 * unregistered parameters are allowed, then the default value sanitizer is 818 * returned. 819 * @param parameter an unescaped parameter 820 * @return the effective value sanitizer for a parameter. 821 */ getEffectiveValueSanitizer(String parameter)822 public ValueSanitizer getEffectiveValueSanitizer(String parameter) { 823 ValueSanitizer sanitizer = getValueSanitizer(parameter); 824 if (sanitizer == null && mAllowUnregisteredParamaters) { 825 sanitizer = getUnregisteredParameterValueSanitizer(); 826 } 827 return sanitizer; 828 } 829 830 /** 831 * Unescape an escaped string. 832 * <ul> 833 * <li>'+' characters are replaced by 834 * ' ' characters. 835 * <li>Valid "%xx" escape sequences are replaced by the 836 * corresponding unescaped character. 837 * <li>Invalid escape sequences such as %1z", are passed through unchanged. 838 * <ol> 839 * @param string the escaped string 840 * @return the unescaped string. 841 */ 842 private static final Pattern plusOrPercent = Pattern.compile("[+%]"); unescape(String string)843 public String unescape(String string) { 844 final Matcher matcher = plusOrPercent.matcher(string); 845 if (!matcher.find()) return string; 846 final int firstEscape = matcher.start(); 847 848 int length = string.length(); 849 850 StringBuilder stringBuilder = new StringBuilder(length); 851 stringBuilder.append(string.substring(0, firstEscape)); 852 for (int i = firstEscape; i < length; i++) { 853 char c = string.charAt(i); 854 if (c == '+') { 855 c = ' '; 856 } else if (c == '%' && i + 2 < length) { 857 char c1 = string.charAt(i + 1); 858 char c2 = string.charAt(i + 2); 859 if (isHexDigit(c1) && isHexDigit(c2)) { 860 c = (char) (decodeHexDigit(c1) * 16 + decodeHexDigit(c2)); 861 i += 2; 862 } 863 } 864 stringBuilder.append(c); 865 } 866 return stringBuilder.toString(); 867 } 868 869 /** 870 * Test if a character is a hexidecimal digit. Both upper case and lower 871 * case hex digits are allowed. 872 * @param c the character to test 873 * @return true if c is a hex digit. 874 */ isHexDigit(char c)875 protected boolean isHexDigit(char c) { 876 return decodeHexDigit(c) >= 0; 877 } 878 879 /** 880 * Convert a character that represents a hexidecimal digit into an integer. 881 * If the character is not a hexidecimal digit, then -1 is returned. 882 * Both upper case and lower case hex digits are allowed. 883 * @param c the hexidecimal digit. 884 * @return the integer value of the hexidecimal digit. 885 */ 886 decodeHexDigit(char c)887 protected int decodeHexDigit(char c) { 888 if (c >= '0' && c <= '9') { 889 return c - '0'; 890 } 891 else if (c >= 'A' && c <= 'F') { 892 return c - 'A' + 10; 893 } 894 else if (c >= 'a' && c <= 'f') { 895 return c - 'a' + 10; 896 } 897 else { 898 return -1; 899 } 900 } 901 902 /** 903 * Clear the existing entries. Called to get ready to parse a new 904 * query string. 905 */ clear()906 protected void clear() { 907 mEntries.clear(); 908 mEntriesList.clear(); 909 } 910 } 911 912