1 /*
2  * Copyright (C) 2007 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package android.net;
18 
19 import java.util.ArrayList;
20 import java.util.HashMap;
21 import java.util.List;
22 import java.util.Locale;
23 import java.util.Set;
24 import java.util.StringTokenizer;
25 import java.util.regex.Matcher;
26 import java.util.regex.Pattern;
27 
28 /**
29  *
30  * Sanitizes the Query portion of a URL. Simple example:
31  * <pre class="prettyprint">
32  * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer();
33  * sanitizer.setAllowUnregisteredParamaters(true);
34  * sanitizer.parseUrl("http://example.com/?name=Joe+User");
35  * String name = sanitizer.getValue("name"));
36  * // name now contains "Joe_User"
37  * </pre>
38  *
39  * Register ValueSanitizers to customize the way individual
40  * parameters are sanitized:
41  * <pre class="prettyprint">
42  * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer();
43  * sanitizer.registerParamater("name", UrlQuerySanitizer.createSpaceLegal());
44  * sanitizer.parseUrl("http://example.com/?name=Joe+User");
45  * String name = sanitizer.getValue("name"));
46  * // name now contains "Joe User". (The string is first decoded, which
47  * // converts the '+' to a ' '. Then the string is sanitized, which
48  * // converts the ' ' to an '_'. (The ' ' is converted because the default
49  * unregistered parameter sanitizer does not allow any special characters,
50  * and ' ' is a special character.)
51  * </pre>
52  * <p>
53  * There are several ways to create ValueSanitizers. In order of increasing
54  * sophistication:
55 * </p>
56  * <ol>
57  * <li>Call one of the UrlQuerySanitizer.createXXX() methods.
58  * <li>Construct your own instance of
59  * UrlQuerySanitizer.IllegalCharacterValueSanitizer.
60  * <li>Subclass UrlQuerySanitizer.ValueSanitizer to define your own value
61  * sanitizer.
62  * </ol>
63  *
64  */
65 public class UrlQuerySanitizer {
66 
67     /**
68      * A simple tuple that holds parameter-value pairs.
69      *
70      */
71     public class ParameterValuePair {
72         /**
73          * Construct a parameter-value tuple.
74          * @param parameter an unencoded parameter
75          * @param value an unencoded value
76          */
ParameterValuePair(String parameter, String value)77         public ParameterValuePair(String parameter,
78                 String value) {
79             mParameter = parameter;
80             mValue = value;
81         }
82         /**
83          * The unencoded parameter
84          */
85         public String mParameter;
86         /**
87          * The unencoded value
88          */
89         public String mValue;
90     }
91 
92     final private HashMap<String, ValueSanitizer> mSanitizers =
93         new HashMap<String, ValueSanitizer>();
94     final private HashMap<String, String> mEntries =
95         new HashMap<String, String>();
96     final private ArrayList<ParameterValuePair> mEntriesList =
97         new ArrayList<ParameterValuePair>();
98     private boolean mAllowUnregisteredParamaters;
99     private boolean mPreferFirstRepeatedParameter;
100     private ValueSanitizer mUnregisteredParameterValueSanitizer =
101         getAllIllegal();
102 
103     /**
104      * A functor used to sanitize a single query value.
105      *
106      */
107     public static interface ValueSanitizer {
108         /**
109          * Sanitize an unencoded value.
110          * @param value
111          * @return the sanitized unencoded value
112          */
sanitize(String value)113         public String sanitize(String value);
114     }
115 
116     /**
117      * Sanitize values based on which characters they contain. Illegal
118      * characters are replaced with either space or '_', depending upon
119      * whether space is a legal character or not.
120      */
121     public static class IllegalCharacterValueSanitizer implements
122         ValueSanitizer {
123         private int mFlags;
124 
125         /**
126          * Allow space (' ') characters.
127          */
128         public final static int SPACE_OK =              1 << 0;
129         /**
130          * Allow whitespace characters other than space. The
131          * other whitespace characters are
132          * '\t' '\f' '\n' '\r' and '\0x000b' (vertical tab)
133          */
134         public final static int OTHER_WHITESPACE_OK =  1 << 1;
135         /**
136          * Allow characters with character codes 128 to 255.
137          */
138         public final static int NON_7_BIT_ASCII_OK =    1 << 2;
139         /**
140          * Allow double quote characters. ('"')
141          */
142         public final static int DQUOTE_OK =             1 << 3;
143         /**
144          * Allow single quote characters. ('\'')
145          */
146         public final static int SQUOTE_OK =             1 << 4;
147         /**
148          * Allow less-than characters. ('<')
149          */
150         public final static int LT_OK =                 1 << 5;
151         /**
152          * Allow greater-than characters. ('>')
153          */
154         public final static int GT_OK =                 1 << 6;
155         /**
156          * Allow ampersand characters ('&')
157          */
158         public final static int AMP_OK =                1 << 7;
159         /**
160          * Allow percent-sign characters ('%')
161          */
162         public final static int PCT_OK =                1 << 8;
163         /**
164          * Allow nul characters ('\0')
165          */
166         public final static int NUL_OK =                1 << 9;
167         /**
168          * Allow text to start with a script URL
169          * such as "javascript:" or "vbscript:"
170          */
171         public final static int SCRIPT_URL_OK =         1 << 10;
172 
173         /**
174          * Mask with all fields set to OK
175          */
176         public final static int ALL_OK =                0x7ff;
177 
178         /**
179          * Mask with both regular space and other whitespace OK
180          */
181         public final static int ALL_WHITESPACE_OK =
182             SPACE_OK | OTHER_WHITESPACE_OK;
183 
184 
185         // Common flag combinations:
186 
187         /**
188          * <ul>
189          * <li>Deny all special characters.
190          * <li>Deny script URLs.
191          * </ul>
192          */
193         public final static int ALL_ILLEGAL =
194             0;
195         /**
196          * <ul>
197          * <li>Allow all special characters except Nul. ('\0').
198          * <li>Allow script URLs.
199          * </ul>
200          */
201         public final static int ALL_BUT_NUL_LEGAL =
202             ALL_OK & ~NUL_OK;
203         /**
204          * <ul>
205          * <li>Allow all special characters except for:
206          * <ul>
207          *  <li>whitespace characters
208          *  <li>Nul ('\0')
209          * </ul>
210          * <li>Allow script URLs.
211          * </ul>
212          */
213         public final static int ALL_BUT_WHITESPACE_LEGAL =
214             ALL_OK & ~(ALL_WHITESPACE_OK | NUL_OK);
215         /**
216          * <ul>
217          * <li>Allow characters used by encoded URLs.
218          * <li>Deny script URLs.
219          * </ul>
220          */
221         public final static int URL_LEGAL =
222             NON_7_BIT_ASCII_OK | SQUOTE_OK | AMP_OK | PCT_OK;
223         /**
224          * <ul>
225          * <li>Allow characters used by encoded URLs.
226          * <li>Allow spaces.
227          * <li>Deny script URLs.
228          * </ul>
229          */
230         public final static int URL_AND_SPACE_LEGAL =
231             URL_LEGAL | SPACE_OK;
232         /**
233          * <ul>
234          * <li>Allow ampersand.
235          * <li>Deny script URLs.
236          * </ul>
237          */
238         public final static int AMP_LEGAL =
239             AMP_OK;
240         /**
241          * <ul>
242          * <li>Allow ampersand.
243          * <li>Allow space.
244          * <li>Deny script URLs.
245          * </ul>
246          */
247         public final static int AMP_AND_SPACE_LEGAL =
248             AMP_OK | SPACE_OK;
249         /**
250          * <ul>
251          * <li>Allow space.
252          * <li>Deny script URLs.
253          * </ul>
254          */
255         public final static int SPACE_LEGAL =
256             SPACE_OK;
257         /**
258          * <ul>
259          * <li>Allow all but.
260          * <ul>
261          *  <li>Nul ('\0')
262          *  <li>Angle brackets ('<', '>')
263          * </ul>
264          * <li>Deny script URLs.
265          * </ul>
266          */
267         public final static int ALL_BUT_NUL_AND_ANGLE_BRACKETS_LEGAL =
268             ALL_OK & ~(NUL_OK | LT_OK | GT_OK);
269 
270         /**
271          *  Script URL definitions
272          */
273 
274         private final static String JAVASCRIPT_PREFIX = "javascript:";
275 
276         private final static String VBSCRIPT_PREFIX = "vbscript:";
277 
278         private final static int MIN_SCRIPT_PREFIX_LENGTH = Math.min(
279                 JAVASCRIPT_PREFIX.length(), VBSCRIPT_PREFIX.length());
280 
281         /**
282          * Construct a sanitizer. The parameters set the behavior of the
283          * sanitizer.
284          * @param flags some combination of the XXX_OK flags.
285          */
IllegalCharacterValueSanitizer( int flags)286         public IllegalCharacterValueSanitizer(
287             int flags) {
288             mFlags = flags;
289         }
290         /**
291          * Sanitize a value.
292          * <ol>
293          * <li>If script URLs are not OK, they will be removed.
294          * <li>If neither spaces nor other white space is OK, then
295          * white space will be trimmed from the beginning and end of
296          * the URL. (Just the actual white space characters are trimmed, not
297          * other control codes.)
298          * <li> Illegal characters will be replaced with
299          * either ' ' or '_', depending on whether a space is itself a
300          * legal character.
301          * </ol>
302          * @param value
303          * @return the sanitized value
304          */
sanitize(String value)305         public String sanitize(String value) {
306             if (value == null) {
307                 return null;
308             }
309             int length = value.length();
310             if ((mFlags & SCRIPT_URL_OK) == 0) {
311                 if (length >= MIN_SCRIPT_PREFIX_LENGTH) {
312                     String asLower = value.toLowerCase(Locale.ROOT);
313                     if (asLower.startsWith(JAVASCRIPT_PREFIX)  ||
314                         asLower.startsWith(VBSCRIPT_PREFIX)) {
315                         return "";
316                     }
317                 }
318             }
319 
320             // If whitespace isn't OK, get rid of whitespace at beginning
321             // and end of value.
322             if ( (mFlags & ALL_WHITESPACE_OK) == 0) {
323                 value = trimWhitespace(value);
324                 // The length could have changed, so we need to correct
325                 // the length variable.
326                 length = value.length();
327             }
328 
329             StringBuilder stringBuilder = new StringBuilder(length);
330             for(int i = 0; i < length; i++) {
331                 char c = value.charAt(i);
332                 if (!characterIsLegal(c)) {
333                     if ((mFlags & SPACE_OK) != 0) {
334                         c = ' ';
335                     }
336                     else {
337                         c = '_';
338                     }
339                 }
340                 stringBuilder.append(c);
341             }
342             return stringBuilder.toString();
343         }
344 
345         /**
346          * Trim whitespace from the beginning and end of a string.
347          * <p>
348          * Note: can't use {@link String#trim} because {@link String#trim} has a
349          * different definition of whitespace than we want.
350          * @param value the string to trim
351          * @return the trimmed string
352          */
trimWhitespace(String value)353         private String trimWhitespace(String value) {
354             int start = 0;
355             int last = value.length() - 1;
356             int end = last;
357             while (start <= end && isWhitespace(value.charAt(start))) {
358                 start++;
359             }
360             while (end >= start && isWhitespace(value.charAt(end))) {
361                 end--;
362             }
363             if (start == 0 && end == last) {
364                 return value;
365             }
366             return value.substring(start, end + 1);
367         }
368 
369         /**
370          * Check if c is whitespace.
371          * @param c character to test
372          * @return true if c is a whitespace character
373          */
isWhitespace(char c)374         private boolean isWhitespace(char c) {
375             switch(c) {
376             case ' ':
377             case '\t':
378             case '\f':
379             case '\n':
380             case '\r':
381             case 11: /* VT */
382                 return true;
383             default:
384                 return false;
385             }
386         }
387 
388         /**
389          * Check whether an individual character is legal. Uses the
390          * flag bit-set passed into the constructor.
391          * @param c
392          * @return true if c is a legal character
393          */
characterIsLegal(char c)394         private boolean characterIsLegal(char c) {
395             switch(c) {
396             case ' ' : return (mFlags & SPACE_OK) != 0;
397             case '\t': case '\f': case '\n': case '\r': case 11: /* VT */
398               return (mFlags & OTHER_WHITESPACE_OK) != 0;
399             case '\"': return (mFlags & DQUOTE_OK) != 0;
400             case '\'': return (mFlags & SQUOTE_OK) != 0;
401             case '<' : return (mFlags & LT_OK) != 0;
402             case '>' : return (mFlags & GT_OK) != 0;
403             case '&' : return (mFlags & AMP_OK) != 0;
404             case '%' : return (mFlags & PCT_OK) != 0;
405             case '\0': return (mFlags & NUL_OK) != 0;
406             default  : return (c >= 32 && c < 127) ||
407                 ((c >= 128) && ((mFlags & NON_7_BIT_ASCII_OK) != 0));
408             }
409         }
410     }
411 
412     /**
413      * Get the current value sanitizer used when processing
414      * unregistered parameter values.
415      * <p>
416      * <b>Note:</b> The default unregistered parameter value sanitizer is
417      * one that doesn't allow any special characters, similar to what
418      * is returned by calling createAllIllegal.
419      *
420      * @return the current ValueSanitizer used to sanitize unregistered
421      * parameter values.
422      */
getUnregisteredParameterValueSanitizer()423     public ValueSanitizer getUnregisteredParameterValueSanitizer() {
424         return mUnregisteredParameterValueSanitizer;
425     }
426 
427     /**
428      * Set the value sanitizer used when processing unregistered
429      * parameter values.
430      * @param sanitizer set the ValueSanitizer used to sanitize unregistered
431      * parameter values.
432      */
setUnregisteredParameterValueSanitizer( ValueSanitizer sanitizer)433     public void setUnregisteredParameterValueSanitizer(
434             ValueSanitizer sanitizer) {
435         mUnregisteredParameterValueSanitizer = sanitizer;
436     }
437 
438 
439     // Private fields for singleton sanitizers:
440 
441     private static final ValueSanitizer sAllIllegal =
442         new IllegalCharacterValueSanitizer(
443                 IllegalCharacterValueSanitizer.ALL_ILLEGAL);
444 
445     private static final ValueSanitizer sAllButNulLegal =
446         new IllegalCharacterValueSanitizer(
447                 IllegalCharacterValueSanitizer.ALL_BUT_NUL_LEGAL);
448 
449     private static final ValueSanitizer sAllButWhitespaceLegal =
450         new IllegalCharacterValueSanitizer(
451                 IllegalCharacterValueSanitizer.ALL_BUT_WHITESPACE_LEGAL);
452 
453     private static final ValueSanitizer sURLLegal =
454         new IllegalCharacterValueSanitizer(
455                 IllegalCharacterValueSanitizer.URL_LEGAL);
456 
457     private static final ValueSanitizer sUrlAndSpaceLegal =
458         new IllegalCharacterValueSanitizer(
459                 IllegalCharacterValueSanitizer.URL_AND_SPACE_LEGAL);
460 
461     private static final ValueSanitizer sAmpLegal =
462         new IllegalCharacterValueSanitizer(
463                 IllegalCharacterValueSanitizer.AMP_LEGAL);
464 
465     private static final ValueSanitizer sAmpAndSpaceLegal =
466         new IllegalCharacterValueSanitizer(
467                 IllegalCharacterValueSanitizer.AMP_AND_SPACE_LEGAL);
468 
469     private static final ValueSanitizer sSpaceLegal =
470         new IllegalCharacterValueSanitizer(
471                 IllegalCharacterValueSanitizer.SPACE_LEGAL);
472 
473     private static final ValueSanitizer sAllButNulAndAngleBracketsLegal =
474         new IllegalCharacterValueSanitizer(
475                 IllegalCharacterValueSanitizer.ALL_BUT_NUL_AND_ANGLE_BRACKETS_LEGAL);
476 
477     /**
478      * Return a value sanitizer that does not allow any special characters,
479      * and also does not allow script URLs.
480      * @return a value sanitizer
481      */
getAllIllegal()482     public static final ValueSanitizer getAllIllegal() {
483         return sAllIllegal;
484     }
485 
486     /**
487      * Return a value sanitizer that allows everything except Nul ('\0')
488      * characters. Script URLs are allowed.
489      * @return a value sanitizer
490      */
getAllButNulLegal()491     public static final ValueSanitizer getAllButNulLegal() {
492         return sAllButNulLegal;
493     }
494     /**
495      * Return a value sanitizer that allows everything except Nul ('\0')
496      * characters, space (' '), and other whitespace characters.
497      * Script URLs are allowed.
498      * @return a value sanitizer
499      */
getAllButWhitespaceLegal()500     public static final ValueSanitizer getAllButWhitespaceLegal() {
501         return sAllButWhitespaceLegal;
502     }
503     /**
504      * Return a value sanitizer that allows all the characters used by
505      * encoded URLs. Does not allow script URLs.
506      * @return a value sanitizer
507      */
getUrlLegal()508     public static final ValueSanitizer getUrlLegal() {
509         return sURLLegal;
510     }
511     /**
512      * Return a value sanitizer that allows all the characters used by
513      * encoded URLs and allows spaces, which are not technically legal
514      * in encoded URLs, but commonly appear anyway.
515      * Does not allow script URLs.
516      * @return a value sanitizer
517      */
getUrlAndSpaceLegal()518     public static final ValueSanitizer getUrlAndSpaceLegal() {
519         return sUrlAndSpaceLegal;
520     }
521     /**
522      * Return a value sanitizer that does not allow any special characters
523      * except ampersand ('&'). Does not allow script URLs.
524      * @return a value sanitizer
525      */
getAmpLegal()526     public static final ValueSanitizer getAmpLegal() {
527         return sAmpLegal;
528     }
529     /**
530      * Return a value sanitizer that does not allow any special characters
531      * except ampersand ('&') and space (' '). Does not allow script URLs.
532      * @return a value sanitizer
533      */
getAmpAndSpaceLegal()534     public static final ValueSanitizer getAmpAndSpaceLegal() {
535         return sAmpAndSpaceLegal;
536     }
537     /**
538      * Return a value sanitizer that does not allow any special characters
539      * except space (' '). Does not allow script URLs.
540      * @return a value sanitizer
541      */
getSpaceLegal()542     public static final ValueSanitizer getSpaceLegal() {
543         return sSpaceLegal;
544     }
545     /**
546      * Return a value sanitizer that allows any special characters
547      * except angle brackets ('<' and '>') and Nul ('\0').
548      * Allows script URLs.
549      * @return a value sanitizer
550      */
getAllButNulAndAngleBracketsLegal()551     public static final ValueSanitizer getAllButNulAndAngleBracketsLegal() {
552         return sAllButNulAndAngleBracketsLegal;
553     }
554 
555     /**
556      * Constructs a UrlQuerySanitizer.
557      * <p>
558      * Defaults:
559      * <ul>
560      * <li>unregistered parameters are not allowed.
561      * <li>the last instance of a repeated parameter is preferred.
562      * <li>The default value sanitizer is an AllIllegal value sanitizer.
563      * <ul>
564      */
UrlQuerySanitizer()565     public UrlQuerySanitizer() {
566     }
567 
568     /**
569      * Constructs a UrlQuerySanitizer and parses a URL.
570      * This constructor is provided for convenience when the
571      * default parsing behavior is acceptable.
572      * <p>
573      * Because the URL is parsed before the constructor returns, there isn't
574      * a chance to configure the sanitizer to change the parsing behavior.
575      * <p>
576      * <code>
577      * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer(myUrl);
578      * String name = sanitizer.getValue("name");
579      * </code>
580      * <p>
581      * Defaults:
582      * <ul>
583      * <li>unregistered parameters <em>are</em> allowed.
584      * <li>the last instance of a repeated parameter is preferred.
585      * <li>The default value sanitizer is an AllIllegal value sanitizer.
586      * <ul>
587      */
UrlQuerySanitizer(String url)588     public UrlQuerySanitizer(String url) {
589         setAllowUnregisteredParamaters(true);
590         parseUrl(url);
591     }
592 
593     /**
594      * Parse the query parameters out of an encoded URL.
595      * Works by extracting the query portion from the URL and then
596      * calling parseQuery(). If there is no query portion it is
597      * treated as if the query portion is an empty string.
598      * @param url the encoded URL to parse.
599      */
parseUrl(String url)600     public void parseUrl(String url) {
601         int queryIndex = url.indexOf('?');
602         String query;
603         if (queryIndex >= 0) {
604             query = url.substring(queryIndex + 1);
605         }
606         else {
607             query = "";
608         }
609         parseQuery(query);
610     }
611 
612     /**
613      * Parse a query. A query string is any number of parameter-value clauses
614      * separated by any non-zero number of ampersands. A parameter-value clause
615      * is a parameter followed by an equal sign, followed by a value. If the
616      * equal sign is missing, the value is assumed to be the empty string.
617      * @param query the query to parse.
618      */
parseQuery(String query)619     public void parseQuery(String query) {
620         clear();
621         // Split by '&'
622         StringTokenizer tokenizer = new StringTokenizer(query, "&");
623         while(tokenizer.hasMoreElements()) {
624             String attributeValuePair = tokenizer.nextToken();
625             if (attributeValuePair.length() > 0) {
626                 int assignmentIndex = attributeValuePair.indexOf('=');
627                 if (assignmentIndex < 0) {
628                     // No assignment found, treat as if empty value
629                     parseEntry(attributeValuePair, "");
630                 }
631                 else {
632                     parseEntry(attributeValuePair.substring(0, assignmentIndex),
633                             attributeValuePair.substring(assignmentIndex + 1));
634                 }
635             }
636         }
637     }
638 
639     /**
640      * Get a set of all of the parameters found in the sanitized query.
641      * <p>
642      * Note: Do not modify this set. Treat it as a read-only set.
643      * @return all the parameters found in the current query.
644      */
getParameterSet()645     public Set<String> getParameterSet() {
646         return mEntries.keySet();
647     }
648 
649     /**
650      * An array list of all of the parameter-value pairs in the sanitized
651      * query, in the order they appeared in the query. May contain duplicate
652      * parameters.
653      * <p class="note"><b>Note:</b> Do not modify this list. Treat it as a read-only list.</p>
654      */
getParameterList()655     public List<ParameterValuePair> getParameterList() {
656         return mEntriesList;
657     }
658 
659     /**
660      * Check if a parameter exists in the current sanitized query.
661      * @param parameter the unencoded name of a parameter.
662      * @return true if the parameter exists in the current sanitized queary.
663      */
hasParameter(String parameter)664     public boolean hasParameter(String parameter) {
665         return mEntries.containsKey(parameter);
666     }
667 
668     /**
669      * Get the value for a parameter in the current sanitized query.
670      * Returns null if the parameter does not
671      * exit.
672      * @param parameter the unencoded name of a parameter.
673      * @return the sanitized unencoded value of the parameter,
674      * or null if the parameter does not exist.
675      */
getValue(String parameter)676     public String getValue(String parameter) {
677         return mEntries.get(parameter);
678     }
679 
680     /**
681      * Register a value sanitizer for a particular parameter. Can also be used
682      * to replace or remove an already-set value sanitizer.
683      * <p>
684      * Registering a non-null value sanitizer for a particular parameter
685      * makes that parameter a registered parameter.
686      * @param parameter an unencoded parameter name
687      * @param valueSanitizer the value sanitizer to use for a particular
688      * parameter. May be null in order to unregister that parameter.
689      * @see #getAllowUnregisteredParamaters()
690      */
registerParameter(String parameter, ValueSanitizer valueSanitizer)691     public void registerParameter(String parameter,
692             ValueSanitizer valueSanitizer) {
693         if (valueSanitizer == null) {
694             mSanitizers.remove(parameter);
695         }
696         mSanitizers.put(parameter, valueSanitizer);
697     }
698 
699     /**
700      * Register a value sanitizer for an array of parameters.
701      * @param parameters An array of unencoded parameter names.
702      * @param valueSanitizer
703      * @see #registerParameter
704      */
registerParameters(String[] parameters, ValueSanitizer valueSanitizer)705     public void registerParameters(String[] parameters,
706             ValueSanitizer valueSanitizer) {
707         int length = parameters.length;
708         for(int i = 0; i < length; i++) {
709             mSanitizers.put(parameters[i], valueSanitizer);
710         }
711     }
712 
713     /**
714      * Set whether or not unregistered parameters are allowed. If they
715      * are not allowed, then they will be dropped when a query is sanitized.
716      * <p>
717      * Defaults to false.
718      * @param allowUnregisteredParamaters true to allow unregistered parameters.
719      * @see #getAllowUnregisteredParamaters()
720      */
setAllowUnregisteredParamaters( boolean allowUnregisteredParamaters)721     public void setAllowUnregisteredParamaters(
722             boolean allowUnregisteredParamaters) {
723         mAllowUnregisteredParamaters = allowUnregisteredParamaters;
724     }
725 
726     /**
727      * Get whether or not unregistered parameters are allowed. If not
728      * allowed, they will be dropped when a query is parsed.
729      * @return true if unregistered parameters are allowed.
730      * @see #setAllowUnregisteredParamaters(boolean)
731      */
getAllowUnregisteredParamaters()732     public boolean getAllowUnregisteredParamaters() {
733         return mAllowUnregisteredParamaters;
734     }
735 
736     /**
737      * Set whether or not the first occurrence of a repeated parameter is
738      * preferred. True means the first repeated parameter is preferred.
739      * False means that the last repeated parameter is preferred.
740      * <p>
741      * The preferred parameter is the one that is returned when getParameter
742      * is called.
743      * <p>
744      * defaults to false.
745      * @param preferFirstRepeatedParameter True if the first repeated
746      * parameter is preferred.
747      * @see #getPreferFirstRepeatedParameter()
748      */
setPreferFirstRepeatedParameter( boolean preferFirstRepeatedParameter)749     public void setPreferFirstRepeatedParameter(
750             boolean preferFirstRepeatedParameter) {
751         mPreferFirstRepeatedParameter = preferFirstRepeatedParameter;
752     }
753 
754     /**
755      * Get whether or not the first occurrence of a repeated parameter is
756      * preferred.
757      * @return true if the first occurrence of a repeated parameter is
758      * preferred.
759      * @see #setPreferFirstRepeatedParameter(boolean)
760      */
getPreferFirstRepeatedParameter()761     public boolean getPreferFirstRepeatedParameter() {
762         return mPreferFirstRepeatedParameter;
763     }
764 
765     /**
766      * Parse an escaped parameter-value pair. The default implementation
767      * unescapes both the parameter and the value, then looks up the
768      * effective value sanitizer for the parameter and uses it to sanitize
769      * the value. If all goes well then addSanitizedValue is called with
770      * the unescaped parameter and the sanitized unescaped value.
771      * @param parameter an escaped parameter
772      * @param value an unsanitized escaped value
773      */
parseEntry(String parameter, String value)774     protected void parseEntry(String parameter, String value) {
775         String unescapedParameter = unescape(parameter);
776          ValueSanitizer valueSanitizer =
777             getEffectiveValueSanitizer(unescapedParameter);
778 
779         if (valueSanitizer == null) {
780             return;
781         }
782         String unescapedValue = unescape(value);
783         String sanitizedValue = valueSanitizer.sanitize(unescapedValue);
784         addSanitizedEntry(unescapedParameter, sanitizedValue);
785     }
786 
787     /**
788      * Record a sanitized parameter-value pair. Override if you want to
789      * do additional filtering or validation.
790      * @param parameter an unescaped parameter
791      * @param value a sanitized unescaped value
792      */
addSanitizedEntry(String parameter, String value)793     protected void addSanitizedEntry(String parameter, String value) {
794         mEntriesList.add(
795                 new ParameterValuePair(parameter, value));
796         if (mPreferFirstRepeatedParameter) {
797             if (mEntries.containsKey(parameter)) {
798                 return;
799             }
800         }
801         mEntries.put(parameter, value);
802     }
803 
804     /**
805      * Get the value sanitizer for a parameter. Returns null if there
806      * is no value sanitizer registered for the parameter.
807      * @param parameter the unescaped parameter
808      * @return the currently registered value sanitizer for this parameter.
809      * @see #registerParameter(String, android.net.UrlQuerySanitizer.ValueSanitizer)
810      */
getValueSanitizer(String parameter)811     public ValueSanitizer getValueSanitizer(String parameter) {
812         return mSanitizers.get(parameter);
813     }
814 
815     /**
816      * Get the effective value sanitizer for a parameter. Like getValueSanitizer,
817      * except if there is no value sanitizer registered for a parameter, and
818      * unregistered parameters are allowed, then the default value sanitizer is
819      * returned.
820      * @param parameter an unescaped parameter
821      * @return the effective value sanitizer for a parameter.
822      */
getEffectiveValueSanitizer(String parameter)823     public ValueSanitizer getEffectiveValueSanitizer(String parameter) {
824         ValueSanitizer sanitizer = getValueSanitizer(parameter);
825         if (sanitizer == null && mAllowUnregisteredParamaters) {
826             sanitizer = getUnregisteredParameterValueSanitizer();
827         }
828         return sanitizer;
829     }
830 
831     /**
832      * Unescape an escaped string.
833      * <ul>
834      * <li>'+' characters are replaced by
835      * ' ' characters.
836      * <li>Valid "%xx" escape sequences are replaced by the
837      * corresponding unescaped character.
838      * <li>Invalid escape sequences such as %1z", are passed through unchanged.
839      * <ol>
840      * @param string the escaped string
841      * @return the unescaped string.
842      */
843     private static final Pattern plusOrPercent = Pattern.compile("[+%]");
unescape(String string)844     public String unescape(String string) {
845         final Matcher matcher = plusOrPercent.matcher(string);
846         if (!matcher.find()) return string;
847         final int firstEscape = matcher.start();
848 
849         int length = string.length();
850 
851         StringBuilder stringBuilder = new StringBuilder(length);
852         stringBuilder.append(string.substring(0, firstEscape));
853         for (int i = firstEscape; i < length; i++) {
854             char c = string.charAt(i);
855             if (c == '+') {
856                 c = ' ';
857             } else if (c == '%' && i + 2 < length) {
858                 char c1 = string.charAt(i + 1);
859                 char c2 = string.charAt(i + 2);
860                 if (isHexDigit(c1) && isHexDigit(c2)) {
861                     c = (char) (decodeHexDigit(c1) * 16 + decodeHexDigit(c2));
862                     i += 2;
863                 }
864             }
865             stringBuilder.append(c);
866         }
867         return stringBuilder.toString();
868     }
869 
870     /**
871      * Test if a character is a hexidecimal digit. Both upper case and lower
872      * case hex digits are allowed.
873      * @param c the character to test
874      * @return true if c is a hex digit.
875      */
isHexDigit(char c)876     protected boolean isHexDigit(char c) {
877         return decodeHexDigit(c) >= 0;
878     }
879 
880     /**
881      * Convert a character that represents a hexidecimal digit into an integer.
882      * If the character is not a hexidecimal digit, then -1 is returned.
883      * Both upper case and lower case hex digits are allowed.
884      * @param c the hexidecimal digit.
885      * @return the integer value of the hexidecimal digit.
886      */
887 
decodeHexDigit(char c)888     protected int decodeHexDigit(char c) {
889         if (c >= '0' && c <= '9') {
890             return c - '0';
891         }
892         else if (c >= 'A' && c <= 'F') {
893             return c - 'A' + 10;
894         }
895         else if (c >= 'a' && c <= 'f') {
896             return c - 'a' + 10;
897         }
898         else {
899             return -1;
900         }
901     }
902 
903     /**
904      * Clear the existing entries. Called to get ready to parse a new
905      * query string.
906      */
clear()907     protected void clear() {
908         mEntries.clear();
909         mEntriesList.clear();
910     }
911 }
912 
913