1 /* 2 * Copyright (C) 2007 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.net; 18 19 import java.util.ArrayList; 20 import java.util.HashMap; 21 import java.util.List; 22 import java.util.Locale; 23 import java.util.Set; 24 import java.util.StringTokenizer; 25 import java.util.regex.Matcher; 26 import java.util.regex.Pattern; 27 28 /** 29 * 30 * Sanitizes the Query portion of a URL. Simple example: 31 * <pre class="prettyprint"> 32 * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer(); 33 * sanitizer.setAllowUnregisteredParamaters(true); 34 * sanitizer.parseUrl("http://example.com/?name=Joe+User"); 35 * String name = sanitizer.getValue("name")); 36 * // name now contains "Joe_User" 37 * </pre> 38 * 39 * Register ValueSanitizers to customize the way individual 40 * parameters are sanitized: 41 * <pre class="prettyprint"> 42 * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer(); 43 * sanitizer.registerParamater("name", UrlQuerySanitizer.createSpaceLegal()); 44 * sanitizer.parseUrl("http://example.com/?name=Joe+User"); 45 * String name = sanitizer.getValue("name")); 46 * // name now contains "Joe User". (The string is first decoded, which 47 * // converts the '+' to a ' '. Then the string is sanitized, which 48 * // converts the ' ' to an '_'. (The ' ' is converted because the default 49 * unregistered parameter sanitizer does not allow any special characters, 50 * and ' ' is a special character.) 51 * </pre> 52 * <p> 53 * There are several ways to create ValueSanitizers. In order of increasing 54 * sophistication: 55 * </p> 56 * <ol> 57 * <li>Call one of the UrlQuerySanitizer.createXXX() methods. 58 * <li>Construct your own instance of 59 * UrlQuerySanitizer.IllegalCharacterValueSanitizer. 60 * <li>Subclass UrlQuerySanitizer.ValueSanitizer to define your own value 61 * sanitizer. 62 * </ol> 63 * 64 */ 65 public class UrlQuerySanitizer { 66 67 /** 68 * A simple tuple that holds parameter-value pairs. 69 * 70 */ 71 public class ParameterValuePair { 72 /** 73 * Construct a parameter-value tuple. 74 * @param parameter an unencoded parameter 75 * @param value an unencoded value 76 */ ParameterValuePair(String parameter, String value)77 public ParameterValuePair(String parameter, 78 String value) { 79 mParameter = parameter; 80 mValue = value; 81 } 82 /** 83 * The unencoded parameter 84 */ 85 public String mParameter; 86 /** 87 * The unencoded value 88 */ 89 public String mValue; 90 } 91 92 final private HashMap<String, ValueSanitizer> mSanitizers = 93 new HashMap<String, ValueSanitizer>(); 94 final private HashMap<String, String> mEntries = 95 new HashMap<String, String>(); 96 final private ArrayList<ParameterValuePair> mEntriesList = 97 new ArrayList<ParameterValuePair>(); 98 private boolean mAllowUnregisteredParamaters; 99 private boolean mPreferFirstRepeatedParameter; 100 private ValueSanitizer mUnregisteredParameterValueSanitizer = 101 getAllIllegal(); 102 103 /** 104 * A functor used to sanitize a single query value. 105 * 106 */ 107 public static interface ValueSanitizer { 108 /** 109 * Sanitize an unencoded value. 110 * @param value 111 * @return the sanitized unencoded value 112 */ sanitize(String value)113 public String sanitize(String value); 114 } 115 116 /** 117 * Sanitize values based on which characters they contain. Illegal 118 * characters are replaced with either space or '_', depending upon 119 * whether space is a legal character or not. 120 */ 121 public static class IllegalCharacterValueSanitizer implements 122 ValueSanitizer { 123 private int mFlags; 124 125 /** 126 * Allow space (' ') characters. 127 */ 128 public final static int SPACE_OK = 1 << 0; 129 /** 130 * Allow whitespace characters other than space. The 131 * other whitespace characters are 132 * '\t' '\f' '\n' '\r' and '\0x000b' (vertical tab) 133 */ 134 public final static int OTHER_WHITESPACE_OK = 1 << 1; 135 /** 136 * Allow characters with character codes 128 to 255. 137 */ 138 public final static int NON_7_BIT_ASCII_OK = 1 << 2; 139 /** 140 * Allow double quote characters. ('"') 141 */ 142 public final static int DQUOTE_OK = 1 << 3; 143 /** 144 * Allow single quote characters. ('\'') 145 */ 146 public final static int SQUOTE_OK = 1 << 4; 147 /** 148 * Allow less-than characters. ('<') 149 */ 150 public final static int LT_OK = 1 << 5; 151 /** 152 * Allow greater-than characters. ('>') 153 */ 154 public final static int GT_OK = 1 << 6; 155 /** 156 * Allow ampersand characters ('&') 157 */ 158 public final static int AMP_OK = 1 << 7; 159 /** 160 * Allow percent-sign characters ('%') 161 */ 162 public final static int PCT_OK = 1 << 8; 163 /** 164 * Allow nul characters ('\0') 165 */ 166 public final static int NUL_OK = 1 << 9; 167 /** 168 * Allow text to start with a script URL 169 * such as "javascript:" or "vbscript:" 170 */ 171 public final static int SCRIPT_URL_OK = 1 << 10; 172 173 /** 174 * Mask with all fields set to OK 175 */ 176 public final static int ALL_OK = 0x7ff; 177 178 /** 179 * Mask with both regular space and other whitespace OK 180 */ 181 public final static int ALL_WHITESPACE_OK = 182 SPACE_OK | OTHER_WHITESPACE_OK; 183 184 185 // Common flag combinations: 186 187 /** 188 * <ul> 189 * <li>Deny all special characters. 190 * <li>Deny script URLs. 191 * </ul> 192 */ 193 public final static int ALL_ILLEGAL = 194 0; 195 /** 196 * <ul> 197 * <li>Allow all special characters except Nul. ('\0'). 198 * <li>Allow script URLs. 199 * </ul> 200 */ 201 public final static int ALL_BUT_NUL_LEGAL = 202 ALL_OK & ~NUL_OK; 203 /** 204 * <ul> 205 * <li>Allow all special characters except for: 206 * <ul> 207 * <li>whitespace characters 208 * <li>Nul ('\0') 209 * </ul> 210 * <li>Allow script URLs. 211 * </ul> 212 */ 213 public final static int ALL_BUT_WHITESPACE_LEGAL = 214 ALL_OK & ~(ALL_WHITESPACE_OK | NUL_OK); 215 /** 216 * <ul> 217 * <li>Allow characters used by encoded URLs. 218 * <li>Deny script URLs. 219 * </ul> 220 */ 221 public final static int URL_LEGAL = 222 NON_7_BIT_ASCII_OK | SQUOTE_OK | AMP_OK | PCT_OK; 223 /** 224 * <ul> 225 * <li>Allow characters used by encoded URLs. 226 * <li>Allow spaces. 227 * <li>Deny script URLs. 228 * </ul> 229 */ 230 public final static int URL_AND_SPACE_LEGAL = 231 URL_LEGAL | SPACE_OK; 232 /** 233 * <ul> 234 * <li>Allow ampersand. 235 * <li>Deny script URLs. 236 * </ul> 237 */ 238 public final static int AMP_LEGAL = 239 AMP_OK; 240 /** 241 * <ul> 242 * <li>Allow ampersand. 243 * <li>Allow space. 244 * <li>Deny script URLs. 245 * </ul> 246 */ 247 public final static int AMP_AND_SPACE_LEGAL = 248 AMP_OK | SPACE_OK; 249 /** 250 * <ul> 251 * <li>Allow space. 252 * <li>Deny script URLs. 253 * </ul> 254 */ 255 public final static int SPACE_LEGAL = 256 SPACE_OK; 257 /** 258 * <ul> 259 * <li>Allow all but. 260 * <ul> 261 * <li>Nul ('\0') 262 * <li>Angle brackets ('<', '>') 263 * </ul> 264 * <li>Deny script URLs. 265 * </ul> 266 */ 267 public final static int ALL_BUT_NUL_AND_ANGLE_BRACKETS_LEGAL = 268 ALL_OK & ~(NUL_OK | LT_OK | GT_OK); 269 270 /** 271 * Script URL definitions 272 */ 273 274 private final static String JAVASCRIPT_PREFIX = "javascript:"; 275 276 private final static String VBSCRIPT_PREFIX = "vbscript:"; 277 278 private final static int MIN_SCRIPT_PREFIX_LENGTH = Math.min( 279 JAVASCRIPT_PREFIX.length(), VBSCRIPT_PREFIX.length()); 280 281 /** 282 * Construct a sanitizer. The parameters set the behavior of the 283 * sanitizer. 284 * @param flags some combination of the XXX_OK flags. 285 */ IllegalCharacterValueSanitizer( int flags)286 public IllegalCharacterValueSanitizer( 287 int flags) { 288 mFlags = flags; 289 } 290 /** 291 * Sanitize a value. 292 * <ol> 293 * <li>If script URLs are not OK, they will be removed. 294 * <li>If neither spaces nor other white space is OK, then 295 * white space will be trimmed from the beginning and end of 296 * the URL. (Just the actual white space characters are trimmed, not 297 * other control codes.) 298 * <li> Illegal characters will be replaced with 299 * either ' ' or '_', depending on whether a space is itself a 300 * legal character. 301 * </ol> 302 * @param value 303 * @return the sanitized value 304 */ sanitize(String value)305 public String sanitize(String value) { 306 if (value == null) { 307 return null; 308 } 309 int length = value.length(); 310 if ((mFlags & SCRIPT_URL_OK) == 0) { 311 if (length >= MIN_SCRIPT_PREFIX_LENGTH) { 312 String asLower = value.toLowerCase(Locale.ROOT); 313 if (asLower.startsWith(JAVASCRIPT_PREFIX) || 314 asLower.startsWith(VBSCRIPT_PREFIX)) { 315 return ""; 316 } 317 } 318 } 319 320 // If whitespace isn't OK, get rid of whitespace at beginning 321 // and end of value. 322 if ( (mFlags & ALL_WHITESPACE_OK) == 0) { 323 value = trimWhitespace(value); 324 // The length could have changed, so we need to correct 325 // the length variable. 326 length = value.length(); 327 } 328 329 StringBuilder stringBuilder = new StringBuilder(length); 330 for(int i = 0; i < length; i++) { 331 char c = value.charAt(i); 332 if (!characterIsLegal(c)) { 333 if ((mFlags & SPACE_OK) != 0) { 334 c = ' '; 335 } 336 else { 337 c = '_'; 338 } 339 } 340 stringBuilder.append(c); 341 } 342 return stringBuilder.toString(); 343 } 344 345 /** 346 * Trim whitespace from the beginning and end of a string. 347 * <p> 348 * Note: can't use {@link String#trim} because {@link String#trim} has a 349 * different definition of whitespace than we want. 350 * @param value the string to trim 351 * @return the trimmed string 352 */ trimWhitespace(String value)353 private String trimWhitespace(String value) { 354 int start = 0; 355 int last = value.length() - 1; 356 int end = last; 357 while (start <= end && isWhitespace(value.charAt(start))) { 358 start++; 359 } 360 while (end >= start && isWhitespace(value.charAt(end))) { 361 end--; 362 } 363 if (start == 0 && end == last) { 364 return value; 365 } 366 return value.substring(start, end + 1); 367 } 368 369 /** 370 * Check if c is whitespace. 371 * @param c character to test 372 * @return true if c is a whitespace character 373 */ isWhitespace(char c)374 private boolean isWhitespace(char c) { 375 switch(c) { 376 case ' ': 377 case '\t': 378 case '\f': 379 case '\n': 380 case '\r': 381 case 11: /* VT */ 382 return true; 383 default: 384 return false; 385 } 386 } 387 388 /** 389 * Check whether an individual character is legal. Uses the 390 * flag bit-set passed into the constructor. 391 * @param c 392 * @return true if c is a legal character 393 */ characterIsLegal(char c)394 private boolean characterIsLegal(char c) { 395 switch(c) { 396 case ' ' : return (mFlags & SPACE_OK) != 0; 397 case '\t': case '\f': case '\n': case '\r': case 11: /* VT */ 398 return (mFlags & OTHER_WHITESPACE_OK) != 0; 399 case '\"': return (mFlags & DQUOTE_OK) != 0; 400 case '\'': return (mFlags & SQUOTE_OK) != 0; 401 case '<' : return (mFlags & LT_OK) != 0; 402 case '>' : return (mFlags & GT_OK) != 0; 403 case '&' : return (mFlags & AMP_OK) != 0; 404 case '%' : return (mFlags & PCT_OK) != 0; 405 case '\0': return (mFlags & NUL_OK) != 0; 406 default : return (c >= 32 && c < 127) || 407 ((c >= 128) && ((mFlags & NON_7_BIT_ASCII_OK) != 0)); 408 } 409 } 410 } 411 412 /** 413 * Get the current value sanitizer used when processing 414 * unregistered parameter values. 415 * <p> 416 * <b>Note:</b> The default unregistered parameter value sanitizer is 417 * one that doesn't allow any special characters, similar to what 418 * is returned by calling createAllIllegal. 419 * 420 * @return the current ValueSanitizer used to sanitize unregistered 421 * parameter values. 422 */ getUnregisteredParameterValueSanitizer()423 public ValueSanitizer getUnregisteredParameterValueSanitizer() { 424 return mUnregisteredParameterValueSanitizer; 425 } 426 427 /** 428 * Set the value sanitizer used when processing unregistered 429 * parameter values. 430 * @param sanitizer set the ValueSanitizer used to sanitize unregistered 431 * parameter values. 432 */ setUnregisteredParameterValueSanitizer( ValueSanitizer sanitizer)433 public void setUnregisteredParameterValueSanitizer( 434 ValueSanitizer sanitizer) { 435 mUnregisteredParameterValueSanitizer = sanitizer; 436 } 437 438 439 // Private fields for singleton sanitizers: 440 441 private static final ValueSanitizer sAllIllegal = 442 new IllegalCharacterValueSanitizer( 443 IllegalCharacterValueSanitizer.ALL_ILLEGAL); 444 445 private static final ValueSanitizer sAllButNulLegal = 446 new IllegalCharacterValueSanitizer( 447 IllegalCharacterValueSanitizer.ALL_BUT_NUL_LEGAL); 448 449 private static final ValueSanitizer sAllButWhitespaceLegal = 450 new IllegalCharacterValueSanitizer( 451 IllegalCharacterValueSanitizer.ALL_BUT_WHITESPACE_LEGAL); 452 453 private static final ValueSanitizer sURLLegal = 454 new IllegalCharacterValueSanitizer( 455 IllegalCharacterValueSanitizer.URL_LEGAL); 456 457 private static final ValueSanitizer sUrlAndSpaceLegal = 458 new IllegalCharacterValueSanitizer( 459 IllegalCharacterValueSanitizer.URL_AND_SPACE_LEGAL); 460 461 private static final ValueSanitizer sAmpLegal = 462 new IllegalCharacterValueSanitizer( 463 IllegalCharacterValueSanitizer.AMP_LEGAL); 464 465 private static final ValueSanitizer sAmpAndSpaceLegal = 466 new IllegalCharacterValueSanitizer( 467 IllegalCharacterValueSanitizer.AMP_AND_SPACE_LEGAL); 468 469 private static final ValueSanitizer sSpaceLegal = 470 new IllegalCharacterValueSanitizer( 471 IllegalCharacterValueSanitizer.SPACE_LEGAL); 472 473 private static final ValueSanitizer sAllButNulAndAngleBracketsLegal = 474 new IllegalCharacterValueSanitizer( 475 IllegalCharacterValueSanitizer.ALL_BUT_NUL_AND_ANGLE_BRACKETS_LEGAL); 476 477 /** 478 * Return a value sanitizer that does not allow any special characters, 479 * and also does not allow script URLs. 480 * @return a value sanitizer 481 */ getAllIllegal()482 public static final ValueSanitizer getAllIllegal() { 483 return sAllIllegal; 484 } 485 486 /** 487 * Return a value sanitizer that allows everything except Nul ('\0') 488 * characters. Script URLs are allowed. 489 * @return a value sanitizer 490 */ getAllButNulLegal()491 public static final ValueSanitizer getAllButNulLegal() { 492 return sAllButNulLegal; 493 } 494 /** 495 * Return a value sanitizer that allows everything except Nul ('\0') 496 * characters, space (' '), and other whitespace characters. 497 * Script URLs are allowed. 498 * @return a value sanitizer 499 */ getAllButWhitespaceLegal()500 public static final ValueSanitizer getAllButWhitespaceLegal() { 501 return sAllButWhitespaceLegal; 502 } 503 /** 504 * Return a value sanitizer that allows all the characters used by 505 * encoded URLs. Does not allow script URLs. 506 * @return a value sanitizer 507 */ getUrlLegal()508 public static final ValueSanitizer getUrlLegal() { 509 return sURLLegal; 510 } 511 /** 512 * Return a value sanitizer that allows all the characters used by 513 * encoded URLs and allows spaces, which are not technically legal 514 * in encoded URLs, but commonly appear anyway. 515 * Does not allow script URLs. 516 * @return a value sanitizer 517 */ getUrlAndSpaceLegal()518 public static final ValueSanitizer getUrlAndSpaceLegal() { 519 return sUrlAndSpaceLegal; 520 } 521 /** 522 * Return a value sanitizer that does not allow any special characters 523 * except ampersand ('&'). Does not allow script URLs. 524 * @return a value sanitizer 525 */ getAmpLegal()526 public static final ValueSanitizer getAmpLegal() { 527 return sAmpLegal; 528 } 529 /** 530 * Return a value sanitizer that does not allow any special characters 531 * except ampersand ('&') and space (' '). Does not allow script URLs. 532 * @return a value sanitizer 533 */ getAmpAndSpaceLegal()534 public static final ValueSanitizer getAmpAndSpaceLegal() { 535 return sAmpAndSpaceLegal; 536 } 537 /** 538 * Return a value sanitizer that does not allow any special characters 539 * except space (' '). Does not allow script URLs. 540 * @return a value sanitizer 541 */ getSpaceLegal()542 public static final ValueSanitizer getSpaceLegal() { 543 return sSpaceLegal; 544 } 545 /** 546 * Return a value sanitizer that allows any special characters 547 * except angle brackets ('<' and '>') and Nul ('\0'). 548 * Allows script URLs. 549 * @return a value sanitizer 550 */ getAllButNulAndAngleBracketsLegal()551 public static final ValueSanitizer getAllButNulAndAngleBracketsLegal() { 552 return sAllButNulAndAngleBracketsLegal; 553 } 554 555 /** 556 * Constructs a UrlQuerySanitizer. 557 * <p> 558 * Defaults: 559 * <ul> 560 * <li>unregistered parameters are not allowed. 561 * <li>the last instance of a repeated parameter is preferred. 562 * <li>The default value sanitizer is an AllIllegal value sanitizer. 563 * <ul> 564 */ UrlQuerySanitizer()565 public UrlQuerySanitizer() { 566 } 567 568 /** 569 * Constructs a UrlQuerySanitizer and parses a URL. 570 * This constructor is provided for convenience when the 571 * default parsing behavior is acceptable. 572 * <p> 573 * Because the URL is parsed before the constructor returns, there isn't 574 * a chance to configure the sanitizer to change the parsing behavior. 575 * <p> 576 * <code> 577 * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer(myUrl); 578 * String name = sanitizer.getValue("name"); 579 * </code> 580 * <p> 581 * Defaults: 582 * <ul> 583 * <li>unregistered parameters <em>are</em> allowed. 584 * <li>the last instance of a repeated parameter is preferred. 585 * <li>The default value sanitizer is an AllIllegal value sanitizer. 586 * <ul> 587 */ UrlQuerySanitizer(String url)588 public UrlQuerySanitizer(String url) { 589 setAllowUnregisteredParamaters(true); 590 parseUrl(url); 591 } 592 593 /** 594 * Parse the query parameters out of an encoded URL. 595 * Works by extracting the query portion from the URL and then 596 * calling parseQuery(). If there is no query portion it is 597 * treated as if the query portion is an empty string. 598 * @param url the encoded URL to parse. 599 */ parseUrl(String url)600 public void parseUrl(String url) { 601 int queryIndex = url.indexOf('?'); 602 String query; 603 if (queryIndex >= 0) { 604 query = url.substring(queryIndex + 1); 605 } 606 else { 607 query = ""; 608 } 609 parseQuery(query); 610 } 611 612 /** 613 * Parse a query. A query string is any number of parameter-value clauses 614 * separated by any non-zero number of ampersands. A parameter-value clause 615 * is a parameter followed by an equal sign, followed by a value. If the 616 * equal sign is missing, the value is assumed to be the empty string. 617 * @param query the query to parse. 618 */ parseQuery(String query)619 public void parseQuery(String query) { 620 clear(); 621 // Split by '&' 622 StringTokenizer tokenizer = new StringTokenizer(query, "&"); 623 while(tokenizer.hasMoreElements()) { 624 String attributeValuePair = tokenizer.nextToken(); 625 if (attributeValuePair.length() > 0) { 626 int assignmentIndex = attributeValuePair.indexOf('='); 627 if (assignmentIndex < 0) { 628 // No assignment found, treat as if empty value 629 parseEntry(attributeValuePair, ""); 630 } 631 else { 632 parseEntry(attributeValuePair.substring(0, assignmentIndex), 633 attributeValuePair.substring(assignmentIndex + 1)); 634 } 635 } 636 } 637 } 638 639 /** 640 * Get a set of all of the parameters found in the sanitized query. 641 * <p> 642 * Note: Do not modify this set. Treat it as a read-only set. 643 * @return all the parameters found in the current query. 644 */ getParameterSet()645 public Set<String> getParameterSet() { 646 return mEntries.keySet(); 647 } 648 649 /** 650 * An array list of all of the parameter-value pairs in the sanitized 651 * query, in the order they appeared in the query. May contain duplicate 652 * parameters. 653 * <p class="note"><b>Note:</b> Do not modify this list. Treat it as a read-only list.</p> 654 */ getParameterList()655 public List<ParameterValuePair> getParameterList() { 656 return mEntriesList; 657 } 658 659 /** 660 * Check if a parameter exists in the current sanitized query. 661 * @param parameter the unencoded name of a parameter. 662 * @return true if the parameter exists in the current sanitized queary. 663 */ hasParameter(String parameter)664 public boolean hasParameter(String parameter) { 665 return mEntries.containsKey(parameter); 666 } 667 668 /** 669 * Get the value for a parameter in the current sanitized query. 670 * Returns null if the parameter does not 671 * exit. 672 * @param parameter the unencoded name of a parameter. 673 * @return the sanitized unencoded value of the parameter, 674 * or null if the parameter does not exist. 675 */ getValue(String parameter)676 public String getValue(String parameter) { 677 return mEntries.get(parameter); 678 } 679 680 /** 681 * Register a value sanitizer for a particular parameter. Can also be used 682 * to replace or remove an already-set value sanitizer. 683 * <p> 684 * Registering a non-null value sanitizer for a particular parameter 685 * makes that parameter a registered parameter. 686 * @param parameter an unencoded parameter name 687 * @param valueSanitizer the value sanitizer to use for a particular 688 * parameter. May be null in order to unregister that parameter. 689 * @see #getAllowUnregisteredParamaters() 690 */ registerParameter(String parameter, ValueSanitizer valueSanitizer)691 public void registerParameter(String parameter, 692 ValueSanitizer valueSanitizer) { 693 if (valueSanitizer == null) { 694 mSanitizers.remove(parameter); 695 } 696 mSanitizers.put(parameter, valueSanitizer); 697 } 698 699 /** 700 * Register a value sanitizer for an array of parameters. 701 * @param parameters An array of unencoded parameter names. 702 * @param valueSanitizer 703 * @see #registerParameter 704 */ registerParameters(String[] parameters, ValueSanitizer valueSanitizer)705 public void registerParameters(String[] parameters, 706 ValueSanitizer valueSanitizer) { 707 int length = parameters.length; 708 for(int i = 0; i < length; i++) { 709 mSanitizers.put(parameters[i], valueSanitizer); 710 } 711 } 712 713 /** 714 * Set whether or not unregistered parameters are allowed. If they 715 * are not allowed, then they will be dropped when a query is sanitized. 716 * <p> 717 * Defaults to false. 718 * @param allowUnregisteredParamaters true to allow unregistered parameters. 719 * @see #getAllowUnregisteredParamaters() 720 */ setAllowUnregisteredParamaters( boolean allowUnregisteredParamaters)721 public void setAllowUnregisteredParamaters( 722 boolean allowUnregisteredParamaters) { 723 mAllowUnregisteredParamaters = allowUnregisteredParamaters; 724 } 725 726 /** 727 * Get whether or not unregistered parameters are allowed. If not 728 * allowed, they will be dropped when a query is parsed. 729 * @return true if unregistered parameters are allowed. 730 * @see #setAllowUnregisteredParamaters(boolean) 731 */ getAllowUnregisteredParamaters()732 public boolean getAllowUnregisteredParamaters() { 733 return mAllowUnregisteredParamaters; 734 } 735 736 /** 737 * Set whether or not the first occurrence of a repeated parameter is 738 * preferred. True means the first repeated parameter is preferred. 739 * False means that the last repeated parameter is preferred. 740 * <p> 741 * The preferred parameter is the one that is returned when getParameter 742 * is called. 743 * <p> 744 * defaults to false. 745 * @param preferFirstRepeatedParameter True if the first repeated 746 * parameter is preferred. 747 * @see #getPreferFirstRepeatedParameter() 748 */ setPreferFirstRepeatedParameter( boolean preferFirstRepeatedParameter)749 public void setPreferFirstRepeatedParameter( 750 boolean preferFirstRepeatedParameter) { 751 mPreferFirstRepeatedParameter = preferFirstRepeatedParameter; 752 } 753 754 /** 755 * Get whether or not the first occurrence of a repeated parameter is 756 * preferred. 757 * @return true if the first occurrence of a repeated parameter is 758 * preferred. 759 * @see #setPreferFirstRepeatedParameter(boolean) 760 */ getPreferFirstRepeatedParameter()761 public boolean getPreferFirstRepeatedParameter() { 762 return mPreferFirstRepeatedParameter; 763 } 764 765 /** 766 * Parse an escaped parameter-value pair. The default implementation 767 * unescapes both the parameter and the value, then looks up the 768 * effective value sanitizer for the parameter and uses it to sanitize 769 * the value. If all goes well then addSanitizedValue is called with 770 * the unescaped parameter and the sanitized unescaped value. 771 * @param parameter an escaped parameter 772 * @param value an unsanitized escaped value 773 */ parseEntry(String parameter, String value)774 protected void parseEntry(String parameter, String value) { 775 String unescapedParameter = unescape(parameter); 776 ValueSanitizer valueSanitizer = 777 getEffectiveValueSanitizer(unescapedParameter); 778 779 if (valueSanitizer == null) { 780 return; 781 } 782 String unescapedValue = unescape(value); 783 String sanitizedValue = valueSanitizer.sanitize(unescapedValue); 784 addSanitizedEntry(unescapedParameter, sanitizedValue); 785 } 786 787 /** 788 * Record a sanitized parameter-value pair. Override if you want to 789 * do additional filtering or validation. 790 * @param parameter an unescaped parameter 791 * @param value a sanitized unescaped value 792 */ addSanitizedEntry(String parameter, String value)793 protected void addSanitizedEntry(String parameter, String value) { 794 mEntriesList.add( 795 new ParameterValuePair(parameter, value)); 796 if (mPreferFirstRepeatedParameter) { 797 if (mEntries.containsKey(parameter)) { 798 return; 799 } 800 } 801 mEntries.put(parameter, value); 802 } 803 804 /** 805 * Get the value sanitizer for a parameter. Returns null if there 806 * is no value sanitizer registered for the parameter. 807 * @param parameter the unescaped parameter 808 * @return the currently registered value sanitizer for this parameter. 809 * @see #registerParameter(String, android.net.UrlQuerySanitizer.ValueSanitizer) 810 */ getValueSanitizer(String parameter)811 public ValueSanitizer getValueSanitizer(String parameter) { 812 return mSanitizers.get(parameter); 813 } 814 815 /** 816 * Get the effective value sanitizer for a parameter. Like getValueSanitizer, 817 * except if there is no value sanitizer registered for a parameter, and 818 * unregistered parameters are allowed, then the default value sanitizer is 819 * returned. 820 * @param parameter an unescaped parameter 821 * @return the effective value sanitizer for a parameter. 822 */ getEffectiveValueSanitizer(String parameter)823 public ValueSanitizer getEffectiveValueSanitizer(String parameter) { 824 ValueSanitizer sanitizer = getValueSanitizer(parameter); 825 if (sanitizer == null && mAllowUnregisteredParamaters) { 826 sanitizer = getUnregisteredParameterValueSanitizer(); 827 } 828 return sanitizer; 829 } 830 831 /** 832 * Unescape an escaped string. 833 * <ul> 834 * <li>'+' characters are replaced by 835 * ' ' characters. 836 * <li>Valid "%xx" escape sequences are replaced by the 837 * corresponding unescaped character. 838 * <li>Invalid escape sequences such as %1z", are passed through unchanged. 839 * <ol> 840 * @param string the escaped string 841 * @return the unescaped string. 842 */ 843 private static final Pattern plusOrPercent = Pattern.compile("[+%]"); unescape(String string)844 public String unescape(String string) { 845 final Matcher matcher = plusOrPercent.matcher(string); 846 if (!matcher.find()) return string; 847 final int firstEscape = matcher.start(); 848 849 int length = string.length(); 850 851 StringBuilder stringBuilder = new StringBuilder(length); 852 stringBuilder.append(string.substring(0, firstEscape)); 853 for (int i = firstEscape; i < length; i++) { 854 char c = string.charAt(i); 855 if (c == '+') { 856 c = ' '; 857 } else if (c == '%' && i + 2 < length) { 858 char c1 = string.charAt(i + 1); 859 char c2 = string.charAt(i + 2); 860 if (isHexDigit(c1) && isHexDigit(c2)) { 861 c = (char) (decodeHexDigit(c1) * 16 + decodeHexDigit(c2)); 862 i += 2; 863 } 864 } 865 stringBuilder.append(c); 866 } 867 return stringBuilder.toString(); 868 } 869 870 /** 871 * Test if a character is a hexidecimal digit. Both upper case and lower 872 * case hex digits are allowed. 873 * @param c the character to test 874 * @return true if c is a hex digit. 875 */ isHexDigit(char c)876 protected boolean isHexDigit(char c) { 877 return decodeHexDigit(c) >= 0; 878 } 879 880 /** 881 * Convert a character that represents a hexidecimal digit into an integer. 882 * If the character is not a hexidecimal digit, then -1 is returned. 883 * Both upper case and lower case hex digits are allowed. 884 * @param c the hexidecimal digit. 885 * @return the integer value of the hexidecimal digit. 886 */ 887 decodeHexDigit(char c)888 protected int decodeHexDigit(char c) { 889 if (c >= '0' && c <= '9') { 890 return c - '0'; 891 } 892 else if (c >= 'A' && c <= 'F') { 893 return c - 'A' + 10; 894 } 895 else if (c >= 'a' && c <= 'f') { 896 return c - 'a' + 10; 897 } 898 else { 899 return -1; 900 } 901 } 902 903 /** 904 * Clear the existing entries. Called to get ready to parse a new 905 * query string. 906 */ clear()907 protected void clear() { 908 mEntries.clear(); 909 mEntriesList.clear(); 910 } 911 } 912 913