1 /*
2  * Copyright (C) 2007 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package android.net;
18 
19 import java.util.ArrayList;
20 import java.util.HashMap;
21 import java.util.List;
22 import java.util.Locale;
23 import java.util.Set;
24 import java.util.StringTokenizer;
25 import java.util.regex.Matcher;
26 import java.util.regex.Pattern;
27 
28 /**
29  *
30  * Sanitizes the Query portion of a URL. Simple example:
31  * <code>
32  * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer();
33  * sanitizer.setAllowUnregisteredParamaters(true);
34  * sanitizer.parseUrl("http://example.com/?name=Joe+User");
35  * String name = sanitizer.getValue("name"));
36  * // name now contains "Joe_User"
37  * </code>
38  *
39  * Register ValueSanitizers to customize the way individual
40  * parameters are sanitized:
41  * <code>
42  * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer();
43  * sanitizer.registerParamater("name", UrlQuerySanitizer.createSpaceLegal());
44  * sanitizer.parseUrl("http://example.com/?name=Joe+User");
45  * String name = sanitizer.getValue("name"));
46  * // name now contains "Joe User". (The string is first decoded, which
47  * // converts the '+' to a ' '. Then the string is sanitized, which
48  * // converts the ' ' to an '_'. (The ' ' is converted because the default
49  * unregistered parameter sanitizer does not allow any special characters,
50  * and ' ' is a special character.)
51  * </code>
52  *
53  * There are several ways to create ValueSanitizers. In order of increasing
54  * sophistication:
55  * <ol>
56  * <li>Call one of the UrlQuerySanitizer.createXXX() methods.
57  * <li>Construct your own instance of
58  * UrlQuerySanitizer.IllegalCharacterValueSanitizer.
59  * <li>Subclass UrlQuerySanitizer.ValueSanitizer to define your own value
60  * sanitizer.
61  * </ol>
62  *
63  */
64 public class UrlQuerySanitizer {
65 
66     /**
67      * A simple tuple that holds parameter-value pairs.
68      *
69      */
70     public class ParameterValuePair {
71         /**
72          * Construct a parameter-value tuple.
73          * @param parameter an unencoded parameter
74          * @param value an unencoded value
75          */
ParameterValuePair(String parameter, String value)76         public ParameterValuePair(String parameter,
77                 String value) {
78             mParameter = parameter;
79             mValue = value;
80         }
81         /**
82          * The unencoded parameter
83          */
84         public String mParameter;
85         /**
86          * The unencoded value
87          */
88         public String mValue;
89     }
90 
91     final private HashMap<String, ValueSanitizer> mSanitizers =
92         new HashMap<String, ValueSanitizer>();
93     final private HashMap<String, String> mEntries =
94         new HashMap<String, String>();
95     final private ArrayList<ParameterValuePair> mEntriesList =
96         new ArrayList<ParameterValuePair>();
97     private boolean mAllowUnregisteredParamaters;
98     private boolean mPreferFirstRepeatedParameter;
99     private ValueSanitizer mUnregisteredParameterValueSanitizer =
100         getAllIllegal();
101 
102     /**
103      * A functor used to sanitize a single query value.
104      *
105      */
106     public static interface ValueSanitizer {
107         /**
108          * Sanitize an unencoded value.
109          * @param value
110          * @return the sanitized unencoded value
111          */
sanitize(String value)112         public String sanitize(String value);
113     }
114 
115     /**
116      * Sanitize values based on which characters they contain. Illegal
117      * characters are replaced with either space or '_', depending upon
118      * whether space is a legal character or not.
119      */
120     public static class IllegalCharacterValueSanitizer implements
121         ValueSanitizer {
122         private int mFlags;
123 
124         /**
125          * Allow space (' ') characters.
126          */
127         public final static int SPACE_OK =              1 << 0;
128         /**
129          * Allow whitespace characters other than space. The
130          * other whitespace characters are
131          * '\t' '\f' '\n' '\r' and '\0x000b' (vertical tab)
132          */
133         public final static int OTHER_WHITESPACE_OK =  1 << 1;
134         /**
135          * Allow characters with character codes 128 to 255.
136          */
137         public final static int NON_7_BIT_ASCII_OK =    1 << 2;
138         /**
139          * Allow double quote characters. ('"')
140          */
141         public final static int DQUOTE_OK =             1 << 3;
142         /**
143          * Allow single quote characters. ('\'')
144          */
145         public final static int SQUOTE_OK =             1 << 4;
146         /**
147          * Allow less-than characters. ('<')
148          */
149         public final static int LT_OK =                 1 << 5;
150         /**
151          * Allow greater-than characters. ('>')
152          */
153         public final static int GT_OK =                 1 << 6;
154         /**
155          * Allow ampersand characters ('&')
156          */
157         public final static int AMP_OK =                1 << 7;
158         /**
159          * Allow percent-sign characters ('%')
160          */
161         public final static int PCT_OK =                1 << 8;
162         /**
163          * Allow nul characters ('\0')
164          */
165         public final static int NUL_OK =                1 << 9;
166         /**
167          * Allow text to start with a script URL
168          * such as "javascript:" or "vbscript:"
169          */
170         public final static int SCRIPT_URL_OK =         1 << 10;
171 
172         /**
173          * Mask with all fields set to OK
174          */
175         public final static int ALL_OK =                0x7ff;
176 
177         /**
178          * Mask with both regular space and other whitespace OK
179          */
180         public final static int ALL_WHITESPACE_OK =
181             SPACE_OK | OTHER_WHITESPACE_OK;
182 
183 
184         // Common flag combinations:
185 
186         /**
187          * <ul>
188          * <li>Deny all special characters.
189          * <li>Deny script URLs.
190          * </ul>
191          */
192         public final static int ALL_ILLEGAL =
193             0;
194         /**
195          * <ul>
196          * <li>Allow all special characters except Nul. ('\0').
197          * <li>Allow script URLs.
198          * </ul>
199          */
200         public final static int ALL_BUT_NUL_LEGAL =
201             ALL_OK & ~NUL_OK;
202         /**
203          * <ul>
204          * <li>Allow all special characters except for:
205          * <ul>
206          *  <li>whitespace characters
207          *  <li>Nul ('\0')
208          * </ul>
209          * <li>Allow script URLs.
210          * </ul>
211          */
212         public final static int ALL_BUT_WHITESPACE_LEGAL =
213             ALL_OK & ~(ALL_WHITESPACE_OK | NUL_OK);
214         /**
215          * <ul>
216          * <li>Allow characters used by encoded URLs.
217          * <li>Deny script URLs.
218          * </ul>
219          */
220         public final static int URL_LEGAL =
221             NON_7_BIT_ASCII_OK | SQUOTE_OK | AMP_OK | PCT_OK;
222         /**
223          * <ul>
224          * <li>Allow characters used by encoded URLs.
225          * <li>Allow spaces.
226          * <li>Deny script URLs.
227          * </ul>
228          */
229         public final static int URL_AND_SPACE_LEGAL =
230             URL_LEGAL | SPACE_OK;
231         /**
232          * <ul>
233          * <li>Allow ampersand.
234          * <li>Deny script URLs.
235          * </ul>
236          */
237         public final static int AMP_LEGAL =
238             AMP_OK;
239         /**
240          * <ul>
241          * <li>Allow ampersand.
242          * <li>Allow space.
243          * <li>Deny script URLs.
244          * </ul>
245          */
246         public final static int AMP_AND_SPACE_LEGAL =
247             AMP_OK | SPACE_OK;
248         /**
249          * <ul>
250          * <li>Allow space.
251          * <li>Deny script URLs.
252          * </ul>
253          */
254         public final static int SPACE_LEGAL =
255             SPACE_OK;
256         /**
257          * <ul>
258          * <li>Allow all but.
259          * <ul>
260          *  <li>Nul ('\0')
261          *  <li>Angle brackets ('<', '>')
262          * </ul>
263          * <li>Deny script URLs.
264          * </ul>
265          */
266         public final static int ALL_BUT_NUL_AND_ANGLE_BRACKETS_LEGAL =
267             ALL_OK & ~(NUL_OK | LT_OK | GT_OK);
268 
269         /**
270          *  Script URL definitions
271          */
272 
273         private final static String JAVASCRIPT_PREFIX = "javascript:";
274 
275         private final static String VBSCRIPT_PREFIX = "vbscript:";
276 
277         private final static int MIN_SCRIPT_PREFIX_LENGTH = Math.min(
278                 JAVASCRIPT_PREFIX.length(), VBSCRIPT_PREFIX.length());
279 
280         /**
281          * Construct a sanitizer. The parameters set the behavior of the
282          * sanitizer.
283          * @param flags some combination of the XXX_OK flags.
284          */
IllegalCharacterValueSanitizer( int flags)285         public IllegalCharacterValueSanitizer(
286             int flags) {
287             mFlags = flags;
288         }
289         /**
290          * Sanitize a value.
291          * <ol>
292          * <li>If script URLs are not OK, they will be removed.
293          * <li>If neither spaces nor other white space is OK, then
294          * white space will be trimmed from the beginning and end of
295          * the URL. (Just the actual white space characters are trimmed, not
296          * other control codes.)
297          * <li> Illegal characters will be replaced with
298          * either ' ' or '_', depending on whether a space is itself a
299          * legal character.
300          * </ol>
301          * @param value
302          * @return the sanitized value
303          */
sanitize(String value)304         public String sanitize(String value) {
305             if (value == null) {
306                 return null;
307             }
308             int length = value.length();
309             if ((mFlags & SCRIPT_URL_OK) == 0) {
310                 if (length >= MIN_SCRIPT_PREFIX_LENGTH) {
311                     String asLower = value.toLowerCase(Locale.ROOT);
312                     if (asLower.startsWith(JAVASCRIPT_PREFIX)  ||
313                         asLower.startsWith(VBSCRIPT_PREFIX)) {
314                         return "";
315                     }
316                 }
317             }
318 
319             // If whitespace isn't OK, get rid of whitespace at beginning
320             // and end of value.
321             if ( (mFlags & ALL_WHITESPACE_OK) == 0) {
322                 value = trimWhitespace(value);
323                 // The length could have changed, so we need to correct
324                 // the length variable.
325                 length = value.length();
326             }
327 
328             StringBuilder stringBuilder = new StringBuilder(length);
329             for(int i = 0; i < length; i++) {
330                 char c = value.charAt(i);
331                 if (!characterIsLegal(c)) {
332                     if ((mFlags & SPACE_OK) != 0) {
333                         c = ' ';
334                     }
335                     else {
336                         c = '_';
337                     }
338                 }
339                 stringBuilder.append(c);
340             }
341             return stringBuilder.toString();
342         }
343 
344         /**
345          * Trim whitespace from the beginning and end of a string.
346          * <p>
347          * Note: can't use {@link String#trim} because {@link String#trim} has a
348          * different definition of whitespace than we want.
349          * @param value the string to trim
350          * @return the trimmed string
351          */
trimWhitespace(String value)352         private String trimWhitespace(String value) {
353             int start = 0;
354             int last = value.length() - 1;
355             int end = last;
356             while (start <= end && isWhitespace(value.charAt(start))) {
357                 start++;
358             }
359             while (end >= start && isWhitespace(value.charAt(end))) {
360                 end--;
361             }
362             if (start == 0 && end == last) {
363                 return value;
364             }
365             return value.substring(start, end + 1);
366         }
367 
368         /**
369          * Check if c is whitespace.
370          * @param c character to test
371          * @return true if c is a whitespace character
372          */
isWhitespace(char c)373         private boolean isWhitespace(char c) {
374             switch(c) {
375             case ' ':
376             case '\t':
377             case '\f':
378             case '\n':
379             case '\r':
380             case 11: /* VT */
381                 return true;
382             default:
383                 return false;
384             }
385         }
386 
387         /**
388          * Check whether an individual character is legal. Uses the
389          * flag bit-set passed into the constructor.
390          * @param c
391          * @return true if c is a legal character
392          */
characterIsLegal(char c)393         private boolean characterIsLegal(char c) {
394             switch(c) {
395             case ' ' : return (mFlags & SPACE_OK) != 0;
396             case '\t': case '\f': case '\n': case '\r': case 11: /* VT */
397               return (mFlags & OTHER_WHITESPACE_OK) != 0;
398             case '\"': return (mFlags & DQUOTE_OK) != 0;
399             case '\'': return (mFlags & SQUOTE_OK) != 0;
400             case '<' : return (mFlags & LT_OK) != 0;
401             case '>' : return (mFlags & GT_OK) != 0;
402             case '&' : return (mFlags & AMP_OK) != 0;
403             case '%' : return (mFlags & PCT_OK) != 0;
404             case '\0': return (mFlags & NUL_OK) != 0;
405             default  : return (c >= 32 && c < 127) ||
406                 ((c >= 128) && ((mFlags & NON_7_BIT_ASCII_OK) != 0));
407             }
408         }
409     }
410 
411     /**
412      * Get the current value sanitizer used when processing
413      * unregistered parameter values.
414      * <p>
415      * <b>Note:</b> The default unregistered parameter value sanitizer is
416      * one that doesn't allow any special characters, similar to what
417      * is returned by calling createAllIllegal.
418      *
419      * @return the current ValueSanitizer used to sanitize unregistered
420      * parameter values.
421      */
getUnregisteredParameterValueSanitizer()422     public ValueSanitizer getUnregisteredParameterValueSanitizer() {
423         return mUnregisteredParameterValueSanitizer;
424     }
425 
426     /**
427      * Set the value sanitizer used when processing unregistered
428      * parameter values.
429      * @param sanitizer set the ValueSanitizer used to sanitize unregistered
430      * parameter values.
431      */
setUnregisteredParameterValueSanitizer( ValueSanitizer sanitizer)432     public void setUnregisteredParameterValueSanitizer(
433             ValueSanitizer sanitizer) {
434         mUnregisteredParameterValueSanitizer = sanitizer;
435     }
436 
437 
438     // Private fields for singleton sanitizers:
439 
440     private static final ValueSanitizer sAllIllegal =
441         new IllegalCharacterValueSanitizer(
442                 IllegalCharacterValueSanitizer.ALL_ILLEGAL);
443 
444     private static final ValueSanitizer sAllButNulLegal =
445         new IllegalCharacterValueSanitizer(
446                 IllegalCharacterValueSanitizer.ALL_BUT_NUL_LEGAL);
447 
448     private static final ValueSanitizer sAllButWhitespaceLegal =
449         new IllegalCharacterValueSanitizer(
450                 IllegalCharacterValueSanitizer.ALL_BUT_WHITESPACE_LEGAL);
451 
452     private static final ValueSanitizer sURLLegal =
453         new IllegalCharacterValueSanitizer(
454                 IllegalCharacterValueSanitizer.URL_LEGAL);
455 
456     private static final ValueSanitizer sUrlAndSpaceLegal =
457         new IllegalCharacterValueSanitizer(
458                 IllegalCharacterValueSanitizer.URL_AND_SPACE_LEGAL);
459 
460     private static final ValueSanitizer sAmpLegal =
461         new IllegalCharacterValueSanitizer(
462                 IllegalCharacterValueSanitizer.AMP_LEGAL);
463 
464     private static final ValueSanitizer sAmpAndSpaceLegal =
465         new IllegalCharacterValueSanitizer(
466                 IllegalCharacterValueSanitizer.AMP_AND_SPACE_LEGAL);
467 
468     private static final ValueSanitizer sSpaceLegal =
469         new IllegalCharacterValueSanitizer(
470                 IllegalCharacterValueSanitizer.SPACE_LEGAL);
471 
472     private static final ValueSanitizer sAllButNulAndAngleBracketsLegal =
473         new IllegalCharacterValueSanitizer(
474                 IllegalCharacterValueSanitizer.ALL_BUT_NUL_AND_ANGLE_BRACKETS_LEGAL);
475 
476     /**
477      * Return a value sanitizer that does not allow any special characters,
478      * and also does not allow script URLs.
479      * @return a value sanitizer
480      */
getAllIllegal()481     public static final ValueSanitizer getAllIllegal() {
482         return sAllIllegal;
483     }
484 
485     /**
486      * Return a value sanitizer that allows everything except Nul ('\0')
487      * characters. Script URLs are allowed.
488      * @return a value sanitizer
489      */
getAllButNulLegal()490     public static final ValueSanitizer getAllButNulLegal() {
491         return sAllButNulLegal;
492     }
493     /**
494      * Return a value sanitizer that allows everything except Nul ('\0')
495      * characters, space (' '), and other whitespace characters.
496      * Script URLs are allowed.
497      * @return a value sanitizer
498      */
getAllButWhitespaceLegal()499     public static final ValueSanitizer getAllButWhitespaceLegal() {
500         return sAllButWhitespaceLegal;
501     }
502     /**
503      * Return a value sanitizer that allows all the characters used by
504      * encoded URLs. Does not allow script URLs.
505      * @return a value sanitizer
506      */
getUrlLegal()507     public static final ValueSanitizer getUrlLegal() {
508         return sURLLegal;
509     }
510     /**
511      * Return a value sanitizer that allows all the characters used by
512      * encoded URLs and allows spaces, which are not technically legal
513      * in encoded URLs, but commonly appear anyway.
514      * Does not allow script URLs.
515      * @return a value sanitizer
516      */
getUrlAndSpaceLegal()517     public static final ValueSanitizer getUrlAndSpaceLegal() {
518         return sUrlAndSpaceLegal;
519     }
520     /**
521      * Return a value sanitizer that does not allow any special characters
522      * except ampersand ('&'). Does not allow script URLs.
523      * @return a value sanitizer
524      */
getAmpLegal()525     public static final ValueSanitizer getAmpLegal() {
526         return sAmpLegal;
527     }
528     /**
529      * Return a value sanitizer that does not allow any special characters
530      * except ampersand ('&') and space (' '). Does not allow script URLs.
531      * @return a value sanitizer
532      */
getAmpAndSpaceLegal()533     public static final ValueSanitizer getAmpAndSpaceLegal() {
534         return sAmpAndSpaceLegal;
535     }
536     /**
537      * Return a value sanitizer that does not allow any special characters
538      * except space (' '). Does not allow script URLs.
539      * @return a value sanitizer
540      */
getSpaceLegal()541     public static final ValueSanitizer getSpaceLegal() {
542         return sSpaceLegal;
543     }
544     /**
545      * Return a value sanitizer that allows any special characters
546      * except angle brackets ('<' and '>') and Nul ('\0').
547      * Allows script URLs.
548      * @return a value sanitizer
549      */
getAllButNulAndAngleBracketsLegal()550     public static final ValueSanitizer getAllButNulAndAngleBracketsLegal() {
551         return sAllButNulAndAngleBracketsLegal;
552     }
553 
554     /**
555      * Constructs a UrlQuerySanitizer.
556      * <p>
557      * Defaults:
558      * <ul>
559      * <li>unregistered parameters are not allowed.
560      * <li>the last instance of a repeated parameter is preferred.
561      * <li>The default value sanitizer is an AllIllegal value sanitizer.
562      * <ul>
563      */
UrlQuerySanitizer()564     public UrlQuerySanitizer() {
565     }
566 
567     /**
568      * Constructs a UrlQuerySanitizer and parses a URL.
569      * This constructor is provided for convenience when the
570      * default parsing behavior is acceptable.
571      * <p>
572      * Because the URL is parsed before the constructor returns, there isn't
573      * a chance to configure the sanitizer to change the parsing behavior.
574      * <p>
575      * <code>
576      * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer(myUrl);
577      * String name = sanitizer.getValue("name");
578      * </code>
579      * <p>
580      * Defaults:
581      * <ul>
582      * <li>unregistered parameters <em>are</em> allowed.
583      * <li>the last instance of a repeated parameter is preferred.
584      * <li>The default value sanitizer is an AllIllegal value sanitizer.
585      * <ul>
586      */
UrlQuerySanitizer(String url)587     public UrlQuerySanitizer(String url) {
588         setAllowUnregisteredParamaters(true);
589         parseUrl(url);
590     }
591 
592     /**
593      * Parse the query parameters out of an encoded URL.
594      * Works by extracting the query portion from the URL and then
595      * calling parseQuery(). If there is no query portion it is
596      * treated as if the query portion is an empty string.
597      * @param url the encoded URL to parse.
598      */
parseUrl(String url)599     public void parseUrl(String url) {
600         int queryIndex = url.indexOf('?');
601         String query;
602         if (queryIndex >= 0) {
603             query = url.substring(queryIndex + 1);
604         }
605         else {
606             query = "";
607         }
608         parseQuery(query);
609     }
610 
611     /**
612      * Parse a query. A query string is any number of parameter-value clauses
613      * separated by any non-zero number of ampersands. A parameter-value clause
614      * is a parameter followed by an equal sign, followed by a value. If the
615      * equal sign is missing, the value is assumed to be the empty string.
616      * @param query the query to parse.
617      */
parseQuery(String query)618     public void parseQuery(String query) {
619         clear();
620         // Split by '&'
621         StringTokenizer tokenizer = new StringTokenizer(query, "&");
622         while(tokenizer.hasMoreElements()) {
623             String attributeValuePair = tokenizer.nextToken();
624             if (attributeValuePair.length() > 0) {
625                 int assignmentIndex = attributeValuePair.indexOf('=');
626                 if (assignmentIndex < 0) {
627                     // No assignment found, treat as if empty value
628                     parseEntry(attributeValuePair, "");
629                 }
630                 else {
631                     parseEntry(attributeValuePair.substring(0, assignmentIndex),
632                             attributeValuePair.substring(assignmentIndex + 1));
633                 }
634             }
635         }
636     }
637 
638     /**
639      * Get a set of all of the parameters found in the sanitized query.
640      * <p>
641      * Note: Do not modify this set. Treat it as a read-only set.
642      * @return all the parameters found in the current query.
643      */
getParameterSet()644     public Set<String> getParameterSet() {
645         return mEntries.keySet();
646     }
647 
648     /**
649      * An array list of all of the parameter-value pairs in the sanitized
650      * query, in the order they appeared in the query. May contain duplicate
651      * parameters.
652      * <p class="note"><b>Note:</b> Do not modify this list. Treat it as a read-only list.</p>
653      */
getParameterList()654     public List<ParameterValuePair> getParameterList() {
655         return mEntriesList;
656     }
657 
658     /**
659      * Check if a parameter exists in the current sanitized query.
660      * @param parameter the unencoded name of a parameter.
661      * @return true if the parameter exists in the current sanitized queary.
662      */
hasParameter(String parameter)663     public boolean hasParameter(String parameter) {
664         return mEntries.containsKey(parameter);
665     }
666 
667     /**
668      * Get the value for a parameter in the current sanitized query.
669      * Returns null if the parameter does not
670      * exit.
671      * @param parameter the unencoded name of a parameter.
672      * @return the sanitized unencoded value of the parameter,
673      * or null if the parameter does not exist.
674      */
getValue(String parameter)675     public String getValue(String parameter) {
676         return mEntries.get(parameter);
677     }
678 
679     /**
680      * Register a value sanitizer for a particular parameter. Can also be used
681      * to replace or remove an already-set value sanitizer.
682      * <p>
683      * Registering a non-null value sanitizer for a particular parameter
684      * makes that parameter a registered parameter.
685      * @param parameter an unencoded parameter name
686      * @param valueSanitizer the value sanitizer to use for a particular
687      * parameter. May be null in order to unregister that parameter.
688      * @see #getAllowUnregisteredParamaters()
689      */
registerParameter(String parameter, ValueSanitizer valueSanitizer)690     public void registerParameter(String parameter,
691             ValueSanitizer valueSanitizer) {
692         if (valueSanitizer == null) {
693             mSanitizers.remove(parameter);
694         }
695         mSanitizers.put(parameter, valueSanitizer);
696     }
697 
698     /**
699      * Register a value sanitizer for an array of parameters.
700      * @param parameters An array of unencoded parameter names.
701      * @param valueSanitizer
702      * @see #registerParameter
703      */
registerParameters(String[] parameters, ValueSanitizer valueSanitizer)704     public void registerParameters(String[] parameters,
705             ValueSanitizer valueSanitizer) {
706         int length = parameters.length;
707         for(int i = 0; i < length; i++) {
708             mSanitizers.put(parameters[i], valueSanitizer);
709         }
710     }
711 
712     /**
713      * Set whether or not unregistered parameters are allowed. If they
714      * are not allowed, then they will be dropped when a query is sanitized.
715      * <p>
716      * Defaults to false.
717      * @param allowUnregisteredParamaters true to allow unregistered parameters.
718      * @see #getAllowUnregisteredParamaters()
719      */
setAllowUnregisteredParamaters( boolean allowUnregisteredParamaters)720     public void setAllowUnregisteredParamaters(
721             boolean allowUnregisteredParamaters) {
722         mAllowUnregisteredParamaters = allowUnregisteredParamaters;
723     }
724 
725     /**
726      * Get whether or not unregistered parameters are allowed. If not
727      * allowed, they will be dropped when a query is parsed.
728      * @return true if unregistered parameters are allowed.
729      * @see #setAllowUnregisteredParamaters(boolean)
730      */
getAllowUnregisteredParamaters()731     public boolean getAllowUnregisteredParamaters() {
732         return mAllowUnregisteredParamaters;
733     }
734 
735     /**
736      * Set whether or not the first occurrence of a repeated parameter is
737      * preferred. True means the first repeated parameter is preferred.
738      * False means that the last repeated parameter is preferred.
739      * <p>
740      * The preferred parameter is the one that is returned when getParameter
741      * is called.
742      * <p>
743      * defaults to false.
744      * @param preferFirstRepeatedParameter True if the first repeated
745      * parameter is preferred.
746      * @see #getPreferFirstRepeatedParameter()
747      */
setPreferFirstRepeatedParameter( boolean preferFirstRepeatedParameter)748     public void setPreferFirstRepeatedParameter(
749             boolean preferFirstRepeatedParameter) {
750         mPreferFirstRepeatedParameter = preferFirstRepeatedParameter;
751     }
752 
753     /**
754      * Get whether or not the first occurrence of a repeated parameter is
755      * preferred.
756      * @return true if the first occurrence of a repeated parameter is
757      * preferred.
758      * @see #setPreferFirstRepeatedParameter(boolean)
759      */
getPreferFirstRepeatedParameter()760     public boolean getPreferFirstRepeatedParameter() {
761         return mPreferFirstRepeatedParameter;
762     }
763 
764     /**
765      * Parse an escaped parameter-value pair. The default implementation
766      * unescapes both the parameter and the value, then looks up the
767      * effective value sanitizer for the parameter and uses it to sanitize
768      * the value. If all goes well then addSanitizedValue is called with
769      * the unescaped parameter and the sanitized unescaped value.
770      * @param parameter an escaped parameter
771      * @param value an unsanitized escaped value
772      */
parseEntry(String parameter, String value)773     protected void parseEntry(String parameter, String value) {
774         String unescapedParameter = unescape(parameter);
775          ValueSanitizer valueSanitizer =
776             getEffectiveValueSanitizer(unescapedParameter);
777 
778         if (valueSanitizer == null) {
779             return;
780         }
781         String unescapedValue = unescape(value);
782         String sanitizedValue = valueSanitizer.sanitize(unescapedValue);
783         addSanitizedEntry(unescapedParameter, sanitizedValue);
784     }
785 
786     /**
787      * Record a sanitized parameter-value pair. Override if you want to
788      * do additional filtering or validation.
789      * @param parameter an unescaped parameter
790      * @param value a sanitized unescaped value
791      */
addSanitizedEntry(String parameter, String value)792     protected void addSanitizedEntry(String parameter, String value) {
793         mEntriesList.add(
794                 new ParameterValuePair(parameter, value));
795         if (mPreferFirstRepeatedParameter) {
796             if (mEntries.containsKey(parameter)) {
797                 return;
798             }
799         }
800         mEntries.put(parameter, value);
801     }
802 
803     /**
804      * Get the value sanitizer for a parameter. Returns null if there
805      * is no value sanitizer registered for the parameter.
806      * @param parameter the unescaped parameter
807      * @return the currently registered value sanitizer for this parameter.
808      * @see #registerParameter(String, android.net.UrlQuerySanitizer.ValueSanitizer)
809      */
getValueSanitizer(String parameter)810     public ValueSanitizer getValueSanitizer(String parameter) {
811         return mSanitizers.get(parameter);
812     }
813 
814     /**
815      * Get the effective value sanitizer for a parameter. Like getValueSanitizer,
816      * except if there is no value sanitizer registered for a parameter, and
817      * unregistered parameters are allowed, then the default value sanitizer is
818      * returned.
819      * @param parameter an unescaped parameter
820      * @return the effective value sanitizer for a parameter.
821      */
getEffectiveValueSanitizer(String parameter)822     public ValueSanitizer getEffectiveValueSanitizer(String parameter) {
823         ValueSanitizer sanitizer = getValueSanitizer(parameter);
824         if (sanitizer == null && mAllowUnregisteredParamaters) {
825             sanitizer = getUnregisteredParameterValueSanitizer();
826         }
827         return sanitizer;
828     }
829 
830     /**
831      * Unescape an escaped string.
832      * <ul>
833      * <li>'+' characters are replaced by
834      * ' ' characters.
835      * <li>Valid "%xx" escape sequences are replaced by the
836      * corresponding unescaped character.
837      * <li>Invalid escape sequences such as %1z", are passed through unchanged.
838      * <ol>
839      * @param string the escaped string
840      * @return the unescaped string.
841      */
842     private static final Pattern plusOrPercent = Pattern.compile("[+%]");
unescape(String string)843     public String unescape(String string) {
844         final Matcher matcher = plusOrPercent.matcher(string);
845         if (!matcher.find()) return string;
846         final int firstEscape = matcher.start();
847 
848         int length = string.length();
849 
850         StringBuilder stringBuilder = new StringBuilder(length);
851         stringBuilder.append(string.substring(0, firstEscape));
852         for (int i = firstEscape; i < length; i++) {
853             char c = string.charAt(i);
854             if (c == '+') {
855                 c = ' ';
856             } else if (c == '%' && i + 2 < length) {
857                 char c1 = string.charAt(i + 1);
858                 char c2 = string.charAt(i + 2);
859                 if (isHexDigit(c1) && isHexDigit(c2)) {
860                     c = (char) (decodeHexDigit(c1) * 16 + decodeHexDigit(c2));
861                     i += 2;
862                 }
863             }
864             stringBuilder.append(c);
865         }
866         return stringBuilder.toString();
867     }
868 
869     /**
870      * Test if a character is a hexidecimal digit. Both upper case and lower
871      * case hex digits are allowed.
872      * @param c the character to test
873      * @return true if c is a hex digit.
874      */
isHexDigit(char c)875     protected boolean isHexDigit(char c) {
876         return decodeHexDigit(c) >= 0;
877     }
878 
879     /**
880      * Convert a character that represents a hexidecimal digit into an integer.
881      * If the character is not a hexidecimal digit, then -1 is returned.
882      * Both upper case and lower case hex digits are allowed.
883      * @param c the hexidecimal digit.
884      * @return the integer value of the hexidecimal digit.
885      */
886 
decodeHexDigit(char c)887     protected int decodeHexDigit(char c) {
888         if (c >= '0' && c <= '9') {
889             return c - '0';
890         }
891         else if (c >= 'A' && c <= 'F') {
892             return c - 'A' + 10;
893         }
894         else if (c >= 'a' && c <= 'f') {
895             return c - 'a' + 10;
896         }
897         else {
898             return -1;
899         }
900     }
901 
902     /**
903      * Clear the existing entries. Called to get ready to parse a new
904      * query string.
905      */
clear()906     protected void clear() {
907         mEntries.clear();
908         mEntriesList.clear();
909     }
910 }
911 
912