1 /*
2  * Copyright (C) 2007 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package android.net;
18 
19 import java.util.ArrayList;
20 import java.util.HashMap;
21 import java.util.List;
22 import java.util.Locale;
23 import java.util.Set;
24 import java.util.StringTokenizer;
25 
26 /**
27  *
28  * Sanitizes the Query portion of a URL. Simple example:
29  * <code>
30  * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer();
31  * sanitizer.setAllowUnregisteredParamaters(true);
32  * sanitizer.parseUrl("http://example.com/?name=Joe+User");
33  * String name = sanitizer.getValue("name"));
34  * // name now contains "Joe_User"
35  * </code>
36  *
37  * Register ValueSanitizers to customize the way individual
38  * parameters are sanitized:
39  * <code>
40  * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer();
41  * sanitizer.registerParamater("name", UrlQuerySanitizer.createSpaceLegal());
42  * sanitizer.parseUrl("http://example.com/?name=Joe+User");
43  * String name = sanitizer.getValue("name"));
44  * // name now contains "Joe User". (The string is first decoded, which
45  * // converts the '+' to a ' '. Then the string is sanitized, which
46  * // converts the ' ' to an '_'. (The ' ' is converted because the default
47  * unregistered parameter sanitizer does not allow any special characters,
48  * and ' ' is a special character.)
49  * </code>
50  *
51  * There are several ways to create ValueSanitizers. In order of increasing
52  * sophistication:
53  * <ol>
54  * <li>Call one of the UrlQuerySanitizer.createXXX() methods.
55  * <li>Construct your own instance of
56  * UrlQuerySanitizer.IllegalCharacterValueSanitizer.
57  * <li>Subclass UrlQuerySanitizer.ValueSanitizer to define your own value
58  * sanitizer.
59  * </ol>
60  *
61  */
62 public class UrlQuerySanitizer {
63 
64     /**
65      * A simple tuple that holds parameter-value pairs.
66      *
67      */
68     public class ParameterValuePair {
69         /**
70          * Construct a parameter-value tuple.
71          * @param parameter an unencoded parameter
72          * @param value an unencoded value
73          */
ParameterValuePair(String parameter, String value)74         public ParameterValuePair(String parameter,
75                 String value) {
76             mParameter = parameter;
77             mValue = value;
78         }
79         /**
80          * The unencoded parameter
81          */
82         public String mParameter;
83         /**
84          * The unencoded value
85          */
86         public String mValue;
87     }
88 
89     final private HashMap<String, ValueSanitizer> mSanitizers =
90         new HashMap<String, ValueSanitizer>();
91     final private HashMap<String, String> mEntries =
92         new HashMap<String, String>();
93     final private ArrayList<ParameterValuePair> mEntriesList =
94         new ArrayList<ParameterValuePair>();
95     private boolean mAllowUnregisteredParamaters;
96     private boolean mPreferFirstRepeatedParameter;
97     private ValueSanitizer mUnregisteredParameterValueSanitizer =
98         getAllIllegal();
99 
100     /**
101      * A functor used to sanitize a single query value.
102      *
103      */
104     public static interface ValueSanitizer {
105         /**
106          * Sanitize an unencoded value.
107          * @param value
108          * @return the sanitized unencoded value
109          */
sanitize(String value)110         public String sanitize(String value);
111     }
112 
113     /**
114      * Sanitize values based on which characters they contain. Illegal
115      * characters are replaced with either space or '_', depending upon
116      * whether space is a legal character or not.
117      */
118     public static class IllegalCharacterValueSanitizer implements
119         ValueSanitizer {
120         private int mFlags;
121 
122         /**
123          * Allow space (' ') characters.
124          */
125         public final static int SPACE_OK =              1 << 0;
126         /**
127          * Allow whitespace characters other than space. The
128          * other whitespace characters are
129          * '\t' '\f' '\n' '\r' and '\0x000b' (vertical tab)
130          */
131         public final static int OTHER_WHITESPACE_OK =  1 << 1;
132         /**
133          * Allow characters with character codes 128 to 255.
134          */
135         public final static int NON_7_BIT_ASCII_OK =    1 << 2;
136         /**
137          * Allow double quote characters. ('"')
138          */
139         public final static int DQUOTE_OK =             1 << 3;
140         /**
141          * Allow single quote characters. ('\'')
142          */
143         public final static int SQUOTE_OK =             1 << 4;
144         /**
145          * Allow less-than characters. ('<')
146          */
147         public final static int LT_OK =                 1 << 5;
148         /**
149          * Allow greater-than characters. ('>')
150          */
151         public final static int GT_OK =                 1 << 6;
152         /**
153          * Allow ampersand characters ('&')
154          */
155         public final static int AMP_OK =                1 << 7;
156         /**
157          * Allow percent-sign characters ('%')
158          */
159         public final static int PCT_OK =                1 << 8;
160         /**
161          * Allow nul characters ('\0')
162          */
163         public final static int NUL_OK =                1 << 9;
164         /**
165          * Allow text to start with a script URL
166          * such as "javascript:" or "vbscript:"
167          */
168         public final static int SCRIPT_URL_OK =         1 << 10;
169 
170         /**
171          * Mask with all fields set to OK
172          */
173         public final static int ALL_OK =                0x7ff;
174 
175         /**
176          * Mask with both regular space and other whitespace OK
177          */
178         public final static int ALL_WHITESPACE_OK =
179             SPACE_OK | OTHER_WHITESPACE_OK;
180 
181 
182         // Common flag combinations:
183 
184         /**
185          * <ul>
186          * <li>Deny all special characters.
187          * <li>Deny script URLs.
188          * </ul>
189          */
190         public final static int ALL_ILLEGAL =
191             0;
192         /**
193          * <ul>
194          * <li>Allow all special characters except Nul. ('\0').
195          * <li>Allow script URLs.
196          * </ul>
197          */
198         public final static int ALL_BUT_NUL_LEGAL =
199             ALL_OK & ~NUL_OK;
200         /**
201          * <ul>
202          * <li>Allow all special characters except for:
203          * <ul>
204          *  <li>whitespace characters
205          *  <li>Nul ('\0')
206          * </ul>
207          * <li>Allow script URLs.
208          * </ul>
209          */
210         public final static int ALL_BUT_WHITESPACE_LEGAL =
211             ALL_OK & ~(ALL_WHITESPACE_OK | NUL_OK);
212         /**
213          * <ul>
214          * <li>Allow characters used by encoded URLs.
215          * <li>Deny script URLs.
216          * </ul>
217          */
218         public final static int URL_LEGAL =
219             NON_7_BIT_ASCII_OK | SQUOTE_OK | AMP_OK | PCT_OK;
220         /**
221          * <ul>
222          * <li>Allow characters used by encoded URLs.
223          * <li>Allow spaces.
224          * <li>Deny script URLs.
225          * </ul>
226          */
227         public final static int URL_AND_SPACE_LEGAL =
228             URL_LEGAL | SPACE_OK;
229         /**
230          * <ul>
231          * <li>Allow ampersand.
232          * <li>Deny script URLs.
233          * </ul>
234          */
235         public final static int AMP_LEGAL =
236             AMP_OK;
237         /**
238          * <ul>
239          * <li>Allow ampersand.
240          * <li>Allow space.
241          * <li>Deny script URLs.
242          * </ul>
243          */
244         public final static int AMP_AND_SPACE_LEGAL =
245             AMP_OK | SPACE_OK;
246         /**
247          * <ul>
248          * <li>Allow space.
249          * <li>Deny script URLs.
250          * </ul>
251          */
252         public final static int SPACE_LEGAL =
253             SPACE_OK;
254         /**
255          * <ul>
256          * <li>Allow all but.
257          * <ul>
258          *  <li>Nul ('\0')
259          *  <li>Angle brackets ('<', '>')
260          * </ul>
261          * <li>Deny script URLs.
262          * </ul>
263          */
264         public final static int ALL_BUT_NUL_AND_ANGLE_BRACKETS_LEGAL =
265             ALL_OK & ~(NUL_OK | LT_OK | GT_OK);
266 
267         /**
268          *  Script URL definitions
269          */
270 
271         private final static String JAVASCRIPT_PREFIX = "javascript:";
272 
273         private final static String VBSCRIPT_PREFIX = "vbscript:";
274 
275         private final static int MIN_SCRIPT_PREFIX_LENGTH = Math.min(
276                 JAVASCRIPT_PREFIX.length(), VBSCRIPT_PREFIX.length());
277 
278         /**
279          * Construct a sanitizer. The parameters set the behavior of the
280          * sanitizer.
281          * @param flags some combination of the XXX_OK flags.
282          */
IllegalCharacterValueSanitizer( int flags)283         public IllegalCharacterValueSanitizer(
284             int flags) {
285             mFlags = flags;
286         }
287         /**
288          * Sanitize a value.
289          * <ol>
290          * <li>If script URLs are not OK, the will be removed.
291          * <li>If neither spaces nor other white space is OK, then
292          * white space will be trimmed from the beginning and end of
293          * the URL. (Just the actual white space characters are trimmed, not
294          * other control codes.)
295          * <li> Illegal characters will be replaced with
296          * either ' ' or '_', depending on whether a space is itself a
297          * legal character.
298          * </ol>
299          * @param value
300          * @return the sanitized value
301          */
sanitize(String value)302         public String sanitize(String value) {
303             if (value == null) {
304                 return null;
305             }
306             int length = value.length();
307             if ((mFlags & SCRIPT_URL_OK) != 0) {
308                 if (length >= MIN_SCRIPT_PREFIX_LENGTH) {
309                     String asLower = value.toLowerCase(Locale.ROOT);
310                     if (asLower.startsWith(JAVASCRIPT_PREFIX)  ||
311                         asLower.startsWith(VBSCRIPT_PREFIX)) {
312                         return "";
313                     }
314                 }
315             }
316 
317             // If whitespace isn't OK, get rid of whitespace at beginning
318             // and end of value.
319             if ( (mFlags & ALL_WHITESPACE_OK) == 0) {
320                 value = trimWhitespace(value);
321                 // The length could have changed, so we need to correct
322                 // the length variable.
323                 length = value.length();
324             }
325 
326             StringBuilder stringBuilder = new StringBuilder(length);
327             for(int i = 0; i < length; i++) {
328                 char c = value.charAt(i);
329                 if (!characterIsLegal(c)) {
330                     if ((mFlags & SPACE_OK) != 0) {
331                         c = ' ';
332                     }
333                     else {
334                         c = '_';
335                     }
336                 }
337                 stringBuilder.append(c);
338             }
339             return stringBuilder.toString();
340         }
341 
342         /**
343          * Trim whitespace from the beginning and end of a string.
344          * <p>
345          * Note: can't use {@link String#trim} because {@link String#trim} has a
346          * different definition of whitespace than we want.
347          * @param value the string to trim
348          * @return the trimmed string
349          */
trimWhitespace(String value)350         private String trimWhitespace(String value) {
351             int start = 0;
352             int last = value.length() - 1;
353             int end = last;
354             while (start <= end && isWhitespace(value.charAt(start))) {
355                 start++;
356             }
357             while (end >= start && isWhitespace(value.charAt(end))) {
358                 end--;
359             }
360             if (start == 0 && end == last) {
361                 return value;
362             }
363             return value.substring(start, end + 1);
364         }
365 
366         /**
367          * Check if c is whitespace.
368          * @param c character to test
369          * @return true if c is a whitespace character
370          */
isWhitespace(char c)371         private boolean isWhitespace(char c) {
372             switch(c) {
373             case ' ':
374             case '\t':
375             case '\f':
376             case '\n':
377             case '\r':
378             case 11: /* VT */
379                 return true;
380             default:
381                 return false;
382             }
383         }
384 
385         /**
386          * Check whether an individual character is legal. Uses the
387          * flag bit-set passed into the constructor.
388          * @param c
389          * @return true if c is a legal character
390          */
characterIsLegal(char c)391         private boolean characterIsLegal(char c) {
392             switch(c) {
393             case ' ' : return (mFlags & SPACE_OK) != 0;
394             case '\t': case '\f': case '\n': case '\r': case 11: /* VT */
395               return (mFlags & OTHER_WHITESPACE_OK) != 0;
396             case '\"': return (mFlags & DQUOTE_OK) != 0;
397             case '\'': return (mFlags & SQUOTE_OK) != 0;
398             case '<' : return (mFlags & LT_OK) != 0;
399             case '>' : return (mFlags & GT_OK) != 0;
400             case '&' : return (mFlags & AMP_OK) != 0;
401             case '%' : return (mFlags & PCT_OK) != 0;
402             case '\0': return (mFlags & NUL_OK) != 0;
403             default  : return (c >= 32 && c < 127) ||
404                 ((c >= 128) && ((mFlags & NON_7_BIT_ASCII_OK) != 0));
405             }
406         }
407     }
408 
409     /**
410      * Get the current value sanitizer used when processing
411      * unregistered parameter values.
412      * <p>
413      * <b>Note:</b> The default unregistered parameter value sanitizer is
414      * one that doesn't allow any special characters, similar to what
415      * is returned by calling createAllIllegal.
416      *
417      * @return the current ValueSanitizer used to sanitize unregistered
418      * parameter values.
419      */
getUnregisteredParameterValueSanitizer()420     public ValueSanitizer getUnregisteredParameterValueSanitizer() {
421         return mUnregisteredParameterValueSanitizer;
422     }
423 
424     /**
425      * Set the value sanitizer used when processing unregistered
426      * parameter values.
427      * @param sanitizer set the ValueSanitizer used to sanitize unregistered
428      * parameter values.
429      */
setUnregisteredParameterValueSanitizer( ValueSanitizer sanitizer)430     public void setUnregisteredParameterValueSanitizer(
431             ValueSanitizer sanitizer) {
432         mUnregisteredParameterValueSanitizer = sanitizer;
433     }
434 
435 
436     // Private fields for singleton sanitizers:
437 
438     private static final ValueSanitizer sAllIllegal =
439         new IllegalCharacterValueSanitizer(
440                 IllegalCharacterValueSanitizer.ALL_ILLEGAL);
441 
442     private static final ValueSanitizer sAllButNulLegal =
443         new IllegalCharacterValueSanitizer(
444                 IllegalCharacterValueSanitizer.ALL_BUT_NUL_LEGAL);
445 
446     private static final ValueSanitizer sAllButWhitespaceLegal =
447         new IllegalCharacterValueSanitizer(
448                 IllegalCharacterValueSanitizer.ALL_BUT_WHITESPACE_LEGAL);
449 
450     private static final ValueSanitizer sURLLegal =
451         new IllegalCharacterValueSanitizer(
452                 IllegalCharacterValueSanitizer.URL_LEGAL);
453 
454     private static final ValueSanitizer sUrlAndSpaceLegal =
455         new IllegalCharacterValueSanitizer(
456                 IllegalCharacterValueSanitizer.URL_AND_SPACE_LEGAL);
457 
458     private static final ValueSanitizer sAmpLegal =
459         new IllegalCharacterValueSanitizer(
460                 IllegalCharacterValueSanitizer.AMP_LEGAL);
461 
462     private static final ValueSanitizer sAmpAndSpaceLegal =
463         new IllegalCharacterValueSanitizer(
464                 IllegalCharacterValueSanitizer.AMP_AND_SPACE_LEGAL);
465 
466     private static final ValueSanitizer sSpaceLegal =
467         new IllegalCharacterValueSanitizer(
468                 IllegalCharacterValueSanitizer.SPACE_LEGAL);
469 
470     private static final ValueSanitizer sAllButNulAndAngleBracketsLegal =
471         new IllegalCharacterValueSanitizer(
472                 IllegalCharacterValueSanitizer.ALL_BUT_NUL_AND_ANGLE_BRACKETS_LEGAL);
473 
474     /**
475      * Return a value sanitizer that does not allow any special characters,
476      * and also does not allow script URLs.
477      * @return a value sanitizer
478      */
getAllIllegal()479     public static final ValueSanitizer getAllIllegal() {
480         return sAllIllegal;
481     }
482 
483     /**
484      * Return a value sanitizer that allows everything except Nul ('\0')
485      * characters. Script URLs are allowed.
486      * @return a value sanitizer
487      */
getAllButNulLegal()488     public static final ValueSanitizer getAllButNulLegal() {
489         return sAllButNulLegal;
490     }
491     /**
492      * Return a value sanitizer that allows everything except Nul ('\0')
493      * characters, space (' '), and other whitespace characters.
494      * Script URLs are allowed.
495      * @return a value sanitizer
496      */
getAllButWhitespaceLegal()497     public static final ValueSanitizer getAllButWhitespaceLegal() {
498         return sAllButWhitespaceLegal;
499     }
500     /**
501      * Return a value sanitizer that allows all the characters used by
502      * encoded URLs. Does not allow script URLs.
503      * @return a value sanitizer
504      */
getUrlLegal()505     public static final ValueSanitizer getUrlLegal() {
506         return sURLLegal;
507     }
508     /**
509      * Return a value sanitizer that allows all the characters used by
510      * encoded URLs and allows spaces, which are not technically legal
511      * in encoded URLs, but commonly appear anyway.
512      * Does not allow script URLs.
513      * @return a value sanitizer
514      */
getUrlAndSpaceLegal()515     public static final ValueSanitizer getUrlAndSpaceLegal() {
516         return sUrlAndSpaceLegal;
517     }
518     /**
519      * Return a value sanitizer that does not allow any special characters
520      * except ampersand ('&'). Does not allow script URLs.
521      * @return a value sanitizer
522      */
getAmpLegal()523     public static final ValueSanitizer getAmpLegal() {
524         return sAmpLegal;
525     }
526     /**
527      * Return a value sanitizer that does not allow any special characters
528      * except ampersand ('&') and space (' '). Does not allow script URLs.
529      * @return a value sanitizer
530      */
getAmpAndSpaceLegal()531     public static final ValueSanitizer getAmpAndSpaceLegal() {
532         return sAmpAndSpaceLegal;
533     }
534     /**
535      * Return a value sanitizer that does not allow any special characters
536      * except space (' '). Does not allow script URLs.
537      * @return a value sanitizer
538      */
getSpaceLegal()539     public static final ValueSanitizer getSpaceLegal() {
540         return sSpaceLegal;
541     }
542     /**
543      * Return a value sanitizer that allows any special characters
544      * except angle brackets ('<' and '>') and Nul ('\0').
545      * Allows script URLs.
546      * @return a value sanitizer
547      */
getAllButNulAndAngleBracketsLegal()548     public static final ValueSanitizer getAllButNulAndAngleBracketsLegal() {
549         return sAllButNulAndAngleBracketsLegal;
550     }
551 
552     /**
553      * Constructs a UrlQuerySanitizer.
554      * <p>
555      * Defaults:
556      * <ul>
557      * <li>unregistered parameters are not allowed.
558      * <li>the last instance of a repeated parameter is preferred.
559      * <li>The default value sanitizer is an AllIllegal value sanitizer.
560      * <ul>
561      */
UrlQuerySanitizer()562     public UrlQuerySanitizer() {
563     }
564 
565     /**
566      * Constructs a UrlQuerySanitizer and parse a URL.
567      * This constructor is provided for convenience when the
568      * default parsing behavior is acceptable.
569      * <p>
570      * Because the URL is parsed before the constructor returns, there isn't
571      * a chance to configure the sanitizer to change the parsing behavior.
572      * <p>
573      * <code>
574      * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer(myUrl);
575      * String name = sanitizer.getValue("name");
576      * </code>
577      * <p>
578      * Defaults:
579      * <ul>
580      * <li>unregistered parameters <em>are</em> allowed.
581      * <li>the last instance of a repeated parameter is preferred.
582      * <li>The default value sanitizer is an AllIllegal value sanitizer.
583      * <ul>
584      */
UrlQuerySanitizer(String url)585     public UrlQuerySanitizer(String url) {
586         setAllowUnregisteredParamaters(true);
587         parseUrl(url);
588     }
589 
590     /**
591      * Parse the query parameters out of an encoded URL.
592      * Works by extracting the query portion from the URL and then
593      * calling parseQuery(). If there is no query portion it is
594      * treated as if the query portion is an empty string.
595      * @param url the encoded URL to parse.
596      */
parseUrl(String url)597     public void parseUrl(String url) {
598         int queryIndex = url.indexOf('?');
599         String query;
600         if (queryIndex >= 0) {
601             query = url.substring(queryIndex + 1);
602         }
603         else {
604             query = "";
605         }
606         parseQuery(query);
607     }
608 
609     /**
610      * Parse a query. A query string is any number of parameter-value clauses
611      * separated by any non-zero number of ampersands. A parameter-value clause
612      * is a parameter followed by an equal sign, followed by a value. If the
613      * equal sign is missing, the value is assumed to be the empty string.
614      * @param query the query to parse.
615      */
parseQuery(String query)616     public void parseQuery(String query) {
617         clear();
618         // Split by '&'
619         StringTokenizer tokenizer = new StringTokenizer(query, "&");
620         while(tokenizer.hasMoreElements()) {
621             String attributeValuePair = tokenizer.nextToken();
622             if (attributeValuePair.length() > 0) {
623                 int assignmentIndex = attributeValuePair.indexOf('=');
624                 if (assignmentIndex < 0) {
625                     // No assignment found, treat as if empty value
626                     parseEntry(attributeValuePair, "");
627                 }
628                 else {
629                     parseEntry(attributeValuePair.substring(0, assignmentIndex),
630                             attributeValuePair.substring(assignmentIndex + 1));
631                 }
632             }
633         }
634     }
635 
636     /**
637      * Get a set of all of the parameters found in the sanitized query.
638      * <p>
639      * Note: Do not modify this set. Treat it as a read-only set.
640      * @return all the parameters found in the current query.
641      */
getParameterSet()642     public Set<String> getParameterSet() {
643         return mEntries.keySet();
644     }
645 
646     /**
647      * An array list of all of the parameter value pairs in the sanitized
648      * query, in the order they appeared in the query. May contain duplicate
649      * parameters.
650      * <p class="note"><b>Note:</b> Do not modify this list. Treat it as a read-only list.</p>
651      */
getParameterList()652     public List<ParameterValuePair> getParameterList() {
653         return mEntriesList;
654     }
655 
656     /**
657      * Check if a parameter exists in the current sanitized query.
658      * @param parameter the unencoded name of a parameter.
659      * @return true if the paramater exists in the current sanitized queary.
660      */
hasParameter(String parameter)661     public boolean hasParameter(String parameter) {
662         return mEntries.containsKey(parameter);
663     }
664 
665     /**
666      * Get the value for a parameter in the current sanitized query.
667      * Returns null if the parameter does not
668      * exit.
669      * @param parameter the unencoded name of a parameter.
670      * @return the sanitized unencoded value of the parameter,
671      * or null if the parameter does not exist.
672      */
getValue(String parameter)673     public String getValue(String parameter) {
674         return mEntries.get(parameter);
675     }
676 
677     /**
678      * Register a value sanitizer for a particular parameter. Can also be used
679      * to replace or remove an already-set value sanitizer.
680      * <p>
681      * Registering a non-null value sanitizer for a particular parameter
682      * makes that parameter a registered parameter.
683      * @param parameter an unencoded parameter name
684      * @param valueSanitizer the value sanitizer to use for a particular
685      * parameter. May be null in order to unregister that parameter.
686      * @see #getAllowUnregisteredParamaters()
687      */
registerParameter(String parameter, ValueSanitizer valueSanitizer)688     public void registerParameter(String parameter,
689             ValueSanitizer valueSanitizer) {
690         if (valueSanitizer == null) {
691             mSanitizers.remove(parameter);
692         }
693         mSanitizers.put(parameter, valueSanitizer);
694     }
695 
696     /**
697      * Register a value sanitizer for an array of parameters.
698      * @param parameters An array of unencoded parameter names.
699      * @param valueSanitizer
700      * @see #registerParameter
701      */
registerParameters(String[] parameters, ValueSanitizer valueSanitizer)702     public void registerParameters(String[] parameters,
703             ValueSanitizer valueSanitizer) {
704         int length = parameters.length;
705         for(int i = 0; i < length; i++) {
706             mSanitizers.put(parameters[i], valueSanitizer);
707         }
708     }
709 
710     /**
711      * Set whether or not unregistered parameters are allowed. If they
712      * are not allowed, then they will be dropped when a query is sanitized.
713      * <p>
714      * Defaults to false.
715      * @param allowUnregisteredParamaters true to allow unregistered parameters.
716      * @see #getAllowUnregisteredParamaters()
717      */
setAllowUnregisteredParamaters( boolean allowUnregisteredParamaters)718     public void setAllowUnregisteredParamaters(
719             boolean allowUnregisteredParamaters) {
720         mAllowUnregisteredParamaters = allowUnregisteredParamaters;
721     }
722 
723     /**
724      * Get whether or not unregistered parameters are allowed. If not
725      * allowed, they will be dropped when a query is parsed.
726      * @return true if unregistered parameters are allowed.
727      * @see #setAllowUnregisteredParamaters(boolean)
728      */
getAllowUnregisteredParamaters()729     public boolean getAllowUnregisteredParamaters() {
730         return mAllowUnregisteredParamaters;
731     }
732 
733     /**
734      * Set whether or not the first occurrence of a repeated parameter is
735      * preferred. True means the first repeated parameter is preferred.
736      * False means that the last repeated parameter is preferred.
737      * <p>
738      * The preferred parameter is the one that is returned when getParameter
739      * is called.
740      * <p>
741      * defaults to false.
742      * @param preferFirstRepeatedParameter True if the first repeated
743      * parameter is preferred.
744      * @see #getPreferFirstRepeatedParameter()
745      */
setPreferFirstRepeatedParameter( boolean preferFirstRepeatedParameter)746     public void setPreferFirstRepeatedParameter(
747             boolean preferFirstRepeatedParameter) {
748         mPreferFirstRepeatedParameter = preferFirstRepeatedParameter;
749     }
750 
751     /**
752      * Get whether or not the first occurrence of a repeated parameter is
753      * preferred.
754      * @return true if the first occurrence of a repeated parameter is
755      * preferred.
756      * @see #setPreferFirstRepeatedParameter(boolean)
757      */
getPreferFirstRepeatedParameter()758     public boolean getPreferFirstRepeatedParameter() {
759         return mPreferFirstRepeatedParameter;
760     }
761 
762     /**
763      * Parse an escaped parameter-value pair. The default implementation
764      * unescapes both the parameter and the value, then looks up the
765      * effective value sanitizer for the parameter and uses it to sanitize
766      * the value. If all goes well then addSanitizedValue is called with
767      * the unescaped parameter and the sanitized unescaped value.
768      * @param parameter an escaped parameter
769      * @param value an unsanitzied escaped value
770      */
parseEntry(String parameter, String value)771     protected void parseEntry(String parameter, String value) {
772         String unescapedParameter = unescape(parameter);
773          ValueSanitizer valueSanitizer =
774             getEffectiveValueSanitizer(unescapedParameter);
775 
776         if (valueSanitizer == null) {
777             return;
778         }
779         String unescapedValue = unescape(value);
780         String sanitizedValue = valueSanitizer.sanitize(unescapedValue);
781         addSanitizedEntry(unescapedParameter, sanitizedValue);
782     }
783 
784     /**
785      * Record a sanitized parameter-value pair. Override if you want to
786      * do additional filtering or validation.
787      * @param parameter an unescaped parameter
788      * @param value a sanitized unescaped value
789      */
addSanitizedEntry(String parameter, String value)790     protected void addSanitizedEntry(String parameter, String value) {
791         mEntriesList.add(
792                 new ParameterValuePair(parameter, value));
793         if (mPreferFirstRepeatedParameter) {
794             if (mEntries.containsKey(parameter)) {
795                 return;
796             }
797         }
798         mEntries.put(parameter, value);
799     }
800 
801     /**
802      * Get the value sanitizer for a parameter. Returns null if there
803      * is no value sanitizer registered for the parameter.
804      * @param parameter the unescaped parameter
805      * @return the currently registered value sanitizer for this parameter.
806      * @see #registerParameter(String, android.net.UrlQuerySanitizer.ValueSanitizer)
807      */
getValueSanitizer(String parameter)808     public ValueSanitizer getValueSanitizer(String parameter) {
809         return mSanitizers.get(parameter);
810     }
811 
812     /**
813      * Get the effective value sanitizer for a parameter. Like getValueSanitizer,
814      * except if there is no value sanitizer registered for a parameter, and
815      * unregistered paramaters are allowed, then the default value sanitizer is
816      * returned.
817      * @param parameter an unescaped parameter
818      * @return the effective value sanitizer for a parameter.
819      */
getEffectiveValueSanitizer(String parameter)820     public ValueSanitizer getEffectiveValueSanitizer(String parameter) {
821         ValueSanitizer sanitizer = getValueSanitizer(parameter);
822         if (sanitizer == null && mAllowUnregisteredParamaters) {
823             sanitizer = getUnregisteredParameterValueSanitizer();
824         }
825         return sanitizer;
826     }
827 
828     /**
829      * Unescape an escaped string.
830      * <ul>
831      * <li>'+' characters are replaced by
832      * ' ' characters.
833      * <li>Valid "%xx" escape sequences are replaced by the
834      * corresponding unescaped character.
835      * <li>Invalid escape sequences such as %1z", are passed through unchanged.
836      * <ol>
837      * @param string the escaped string
838      * @return the unescaped string.
839      */
unescape(String string)840     public String unescape(String string) {
841         // Early exit if no escaped characters.
842         int firstEscape = string.indexOf('%');
843         if ( firstEscape < 0) {
844             firstEscape = string.indexOf('+');
845             if (firstEscape < 0) {
846                 return string;
847             }
848         }
849 
850         int length = string.length();
851 
852         StringBuilder stringBuilder = new StringBuilder(length);
853         stringBuilder.append(string.substring(0, firstEscape));
854         for (int i = firstEscape; i < length; i++) {
855             char c = string.charAt(i);
856             if (c == '+') {
857                 c = ' ';
858             }
859             else if ( c == '%' && i + 2 < length) {
860                 char c1 = string.charAt(i + 1);
861                 char c2 = string.charAt(i + 2);
862                 if (isHexDigit(c1) && isHexDigit(c2)) {
863                     c = (char) (decodeHexDigit(c1) * 16 + decodeHexDigit(c2));
864                     i += 2;
865                 }
866             }
867             stringBuilder.append(c);
868         }
869         return stringBuilder.toString();
870     }
871 
872     /**
873      * Test if a character is a hexidecimal digit. Both upper case and lower
874      * case hex digits are allowed.
875      * @param c the character to test
876      * @return true if c is a hex digit.
877      */
isHexDigit(char c)878     protected boolean isHexDigit(char c) {
879         return decodeHexDigit(c) >= 0;
880     }
881 
882     /**
883      * Convert a character that represents a hexidecimal digit into an integer.
884      * If the character is not a hexidecimal digit, then -1 is returned.
885      * Both upper case and lower case hex digits are allowed.
886      * @param c the hexidecimal digit.
887      * @return the integer value of the hexidecimal digit.
888      */
889 
decodeHexDigit(char c)890     protected int decodeHexDigit(char c) {
891         if (c >= '0' && c <= '9') {
892             return c - '0';
893         }
894         else if (c >= 'A' && c <= 'F') {
895             return c - 'A' + 10;
896         }
897         else if (c >= 'a' && c <= 'f') {
898             return c - 'a' + 10;
899         }
900         else {
901             return -1;
902         }
903     }
904 
905     /**
906      * Clear the existing entries. Called to get ready to parse a new
907      * query string.
908      */
clear()909     protected void clear() {
910         mEntries.clear();
911         mEntriesList.clear();
912     }
913 }
914 
915