android/net/UrlQuerySanitizer.java

/*
 * Copyright (C) 2007 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package android.net;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 *
 * Sanitizes the Query portion of a URL. Simple example:
 * <code>
 * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer();
 * sanitizer.setAllowUnregisteredParamaters(true);
 * sanitizer.parseUrl("http://example.com/?name=Joe+User");
 * String name = sanitizer.getValue("name"));
 * // name now contains "Joe_User"
 * </code>
 *
 * Register ValueSanitizers to customize the way individual
 * parameters are sanitized:
 * <code>
 * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer();
 * sanitizer.registerParamater("name", UrlQuerySanitizer.createSpaceLegal());
 * sanitizer.parseUrl("http://example.com/?name=Joe+User");
 * String name = sanitizer.getValue("name"));
 * // name now contains "Joe User". (The string is first decoded, which
 * // converts the '+' to a ' '. Then the string is sanitized, which
 * // converts the ' ' to an '_'. (The ' ' is converted because the default
 * unregistered parameter sanitizer does not allow any special characters,
 * and ' ' is a special character.)
 * </code>
 *
 * There are several ways to create ValueSanitizers. In order of increasing
 * sophistication:
 * <ol>
 * <li>Call one of the UrlQuerySanitizer.createXXX() methods.
 * <li>Construct your own instance of
 * UrlQuerySanitizer.IllegalCharacterValueSanitizer.
 * <li>Subclass UrlQuerySanitizer.ValueSanitizer to define your own value
 * sanitizer.
 * </ol>
 *
 */
public class UrlQuerySanitizer {

    /**
     * A simple tuple that holds parameter-value pairs.
     *
     */
    public class ParameterValuePair {
        /**
         * Construct a parameter-value tuple.
         * @param parameter an unencoded parameter
         * @param value an unencoded value
         */
        public ParameterValuePair(String parameter,
                String value) {
            mParameter = parameter;
            mValue = value;
        }
        /**
         * The unencoded parameter
         */
        public String mParameter;
        /**
         * The unencoded value
         */
        public String mValue;
    }

    final private HashMap<String, ValueSanitizer> mSanitizers =
        new HashMap<String, ValueSanitizer>();
    final private HashMap<String, String> mEntries =
        new HashMap<String, String>();
    final private ArrayList<ParameterValuePair> mEntriesList =
        new ArrayList<ParameterValuePair>();
    private boolean mAllowUnregisteredParamaters;
    private boolean mPreferFirstRepeatedParameter;
    private ValueSanitizer mUnregisteredParameterValueSanitizer =
        getAllIllegal();

    /**
     * A functor used to sanitize a single query value.
     *
     */
    public static interface ValueSanitizer {
        /**
         * Sanitize an unencoded value.
         * @param value
         * @return the sanitized unencoded value
         */
        public String sanitize(String value);
    }

    /**
     * Sanitize values based on which characters they contain. Illegal
     * characters are replaced with either space or '_', depending upon
     * whether space is a legal character or not.
     */
    public static class IllegalCharacterValueSanitizer implements
        ValueSanitizer {
        private int mFlags;

        /**
         * Allow space (' ') characters.
         */
        public final static int SPACE_OK =              1 << 0;
        /**
         * Allow whitespace characters other than space. The
         * other whitespace characters are
         * '\t' '\f' '\n' '\r' and '\0x000b' (vertical tab)
         */
        public final static int OTHER_WHITESPACE_OK =  1 << 1;
        /**
         * Allow characters with character codes 128 to 255.
         */
        public final static int NON_7_BIT_ASCII_OK =    1 << 2;
        /**
         * Allow double quote characters. ('"')
         */
        public final static int DQUOTE_OK =             1 << 3;
        /**
         * Allow single quote characters. ('\'')
         */
        public final static int SQUOTE_OK =             1 << 4;
        /**
         * Allow less-than characters. ('<')
         */
        public final static int LT_OK =                 1 << 5;
        /**
         * Allow greater-than characters. ('>')
         */
        public final static int GT_OK =                 1 << 6;
        /**
         * Allow ampersand characters ('&')
         */
        public final static int AMP_OK =                1 << 7;
        /**
         * Allow percent-sign characters ('%')
         */
        public final static int PCT_OK =                1 << 8;
        /**
         * Allow nul characters ('\0')
         */
        public final static int NUL_OK =                1 << 9;
        /**
         * Allow text to start with a script URL
         * such as "javascript:" or "vbscript:"
         */
        public final static int SCRIPT_URL_OK =         1 << 10;

        /**
         * Mask with all fields set to OK
         */
        public final static int ALL_OK =                0x7ff;

        /**
         * Mask with both regular space and other whitespace OK
         */
        public final static int ALL_WHITESPACE_OK =
            SPACE_OK | OTHER_WHITESPACE_OK;


        // Common flag combinations:

        /**
         * <ul>
         * <li>Deny all special characters.
         * <li>Deny script URLs.
         * </ul>
         */
        public final static int ALL_ILLEGAL =
            0;
        /**
         * <ul>
         * <li>Allow all special characters except Nul. ('\0').
         * <li>Allow script URLs.
         * </ul>
         */
        public final static int ALL_BUT_NUL_LEGAL =
            ALL_OK & ~NUL_OK;
        /**
         * <ul>
         * <li>Allow all special characters except for:
         * <ul>
         *  <li>whitespace characters
         *  <li>Nul ('\0')
         * </ul>
         * <li>Allow script URLs.
         * </ul>
         */
        public final static int ALL_BUT_WHITESPACE_LEGAL =
            ALL_OK & ~(ALL_WHITESPACE_OK | NUL_OK);
        /**
         * <ul>
         * <li>Allow characters used by encoded URLs.
         * <li>Deny script URLs.
         * </ul>
         */
        public final static int URL_LEGAL =
            NON_7_BIT_ASCII_OK | SQUOTE_OK | AMP_OK | PCT_OK;
        /**
         * <ul>
         * <li>Allow characters used by encoded URLs.
         * <li>Allow spaces.
         * <li>Deny script URLs.
         * </ul>
         */
        public final static int URL_AND_SPACE_LEGAL =
            URL_LEGAL | SPACE_OK;
        /**
         * <ul>
         * <li>Allow ampersand.
         * <li>Deny script URLs.
         * </ul>
         */
        public final static int AMP_LEGAL =
            AMP_OK;
        /**
         * <ul>
         * <li>Allow ampersand.
         * <li>Allow space.
         * <li>Deny script URLs.
         * </ul>
         */
        public final static int AMP_AND_SPACE_LEGAL =
            AMP_OK | SPACE_OK;
        /**
         * <ul>
         * <li>Allow space.
         * <li>Deny script URLs.
         * </ul>
         */
        public final static int SPACE_LEGAL =
            SPACE_OK;
        /**
         * <ul>
         * <li>Allow all but.
         * <ul>
         *  <li>Nul ('\0')
         *  <li>Angle brackets ('<', '>')
         * </ul>
         * <li>Deny script URLs.
         * </ul>
         */
        public final static int ALL_BUT_NUL_AND_ANGLE_BRACKETS_LEGAL =
            ALL_OK & ~(NUL_OK | LT_OK | GT_OK);

        /**
         *  Script URL definitions
         */

        private final static String JAVASCRIPT_PREFIX = "javascript:";

        private final static String VBSCRIPT_PREFIX = "vbscript:";

        private final static int MIN_SCRIPT_PREFIX_LENGTH = Math.min(
                JAVASCRIPT_PREFIX.length(), VBSCRIPT_PREFIX.length());

        /**
         * Construct a sanitizer. The parameters set the behavior of the
         * sanitizer.
         * @param flags some combination of the XXX_OK flags.
         */
        public IllegalCharacterValueSanitizer(
            int flags) {
            mFlags = flags;
        }
        /**
         * Sanitize a value.
         * <ol>
         * <li>If script URLs are not OK, they will be removed.
         * <li>If neither spaces nor other white space is OK, then
         * white space will be trimmed from the beginning and end of
         * the URL. (Just the actual white space characters are trimmed, not
         * other control codes.)
         * <li> Illegal characters will be replaced with
         * either ' ' or '_', depending on whether a space is itself a
         * legal character.
         * </ol>
         * @param value
         * @return the sanitized value
         */
        public String sanitize(String value) {
            if (value == null) {
                return null;
            }
            int length = value.length();
            if ((mFlags & SCRIPT_URL_OK) == 0) {
                if (length >= MIN_SCRIPT_PREFIX_LENGTH) {
                    String asLower = value.toLowerCase(Locale.ROOT);
                    if (asLower.startsWith(JAVASCRIPT_PREFIX)  ||
                        asLower.startsWith(VBSCRIPT_PREFIX)) {
                        return "";
                    }
                }
            }

            // If whitespace isn't OK, get rid of whitespace at beginning
            // and end of value.
            if ( (mFlags & ALL_WHITESPACE_OK) == 0) {
                value = trimWhitespace(value);
                // The length could have changed, so we need to correct
                // the length variable.
                length = value.length();
            }

            StringBuilder stringBuilder = new StringBuilder(length);
            for(int i = 0; i < length; i++) {
                char c = value.charAt(i);
                if (!characterIsLegal(c)) {
                    if ((mFlags & SPACE_OK) != 0) {
                        c = ' ';
                    }
                    else {
                        c = '_';
                    }
                }
                stringBuilder.append(c);
            }
            return stringBuilder.toString();
        }

        /**
         * Trim whitespace from the beginning and end of a string.
         * <p>
         * Note: can't use {@link String#trim} because {@link String#trim} has a
         * different definition of whitespace than we want.
         * @param value the string to trim
         * @return the trimmed string
         */
        private String trimWhitespace(String value) {
            int start = 0;
            int last = value.length() - 1;
            int end = last;
            while (start <= end && isWhitespace(value.charAt(start))) {
                start++;
            }
            while (end >= start && isWhitespace(value.charAt(end))) {
                end--;
            }
            if (start == 0 && end == last) {
                return value;
            }
            return value.substring(start, end + 1);
        }

        /**
         * Check if c is whitespace.
         * @param c character to test
         * @return true if c is a whitespace character
         */
        private boolean isWhitespace(char c) {
            switch(c) {
            case ' ':
            case '\t':
            case '\f':
            case '\n':
            case '\r':
            case 11: /* VT */
                return true;
            default:
                return false;
            }
        }

        /**
         * Check whether an individual character is legal. Uses the
         * flag bit-set passed into the constructor.
         * @param c
         * @return true if c is a legal character
         */
        private boolean characterIsLegal(char c) {
            switch(c) {
            case ' ' : return (mFlags & SPACE_OK) != 0;
            case '\t': case '\f': case '\n': case '\r': case 11: /* VT */
              return (mFlags & OTHER_WHITESPACE_OK) != 0;
            case '\"': return (mFlags & DQUOTE_OK) != 0;
            case '\'': return (mFlags & SQUOTE_OK) != 0;
            case '<' : return (mFlags & LT_OK) != 0;
            case '>' : return (mFlags & GT_OK) != 0;
            case '&' : return (mFlags & AMP_OK) != 0;
            case '%' : return (mFlags & PCT_OK) != 0;
            case '\0': return (mFlags & NUL_OK) != 0;
            default  : return (c >= 32 && c < 127) ||
                ((c >= 128) && ((mFlags & NON_7_BIT_ASCII_OK) != 0));
            }
        }
    }

    /**
     * Get the current value sanitizer used when processing
     * unregistered parameter values.
     * <p>
     * <b>Note:</b> The default unregistered parameter value sanitizer is
     * one that doesn't allow any special characters, similar to what
     * is returned by calling createAllIllegal.
     *
     * @return the current ValueSanitizer used to sanitize unregistered
     * parameter values.
     */
    public ValueSanitizer getUnregisteredParameterValueSanitizer() {
        return mUnregisteredParameterValueSanitizer;
    }

    /**
     * Set the value sanitizer used when processing unregistered
     * parameter values.
     * @param sanitizer set the ValueSanitizer used to sanitize unregistered
     * parameter values.
     */
    public void setUnregisteredParameterValueSanitizer(
            ValueSanitizer sanitizer) {
        mUnregisteredParameterValueSanitizer = sanitizer;
    }


    // Private fields for singleton sanitizers:

    private static final ValueSanitizer sAllIllegal =
        new IllegalCharacterValueSanitizer(
                IllegalCharacterValueSanitizer.ALL_ILLEGAL);

    private static final ValueSanitizer sAllButNulLegal =
        new IllegalCharacterValueSanitizer(
                IllegalCharacterValueSanitizer.ALL_BUT_NUL_LEGAL);

    private static final ValueSanitizer sAllButWhitespaceLegal =
        new IllegalCharacterValueSanitizer(
                IllegalCharacterValueSanitizer.ALL_BUT_WHITESPACE_LEGAL);

    private static final ValueSanitizer sURLLegal =
        new IllegalCharacterValueSanitizer(
                IllegalCharacterValueSanitizer.URL_LEGAL);

    private static final ValueSanitizer sUrlAndSpaceLegal =
        new IllegalCharacterValueSanitizer(
                IllegalCharacterValueSanitizer.URL_AND_SPACE_LEGAL);

    private static final ValueSanitizer sAmpLegal =
        new IllegalCharacterValueSanitizer(
                IllegalCharacterValueSanitizer.AMP_LEGAL);

    private static final ValueSanitizer sAmpAndSpaceLegal =
        new IllegalCharacterValueSanitizer(
                IllegalCharacterValueSanitizer.AMP_AND_SPACE_LEGAL);

    private static final ValueSanitizer sSpaceLegal =
        new IllegalCharacterValueSanitizer(
                IllegalCharacterValueSanitizer.SPACE_LEGAL);

    private static final ValueSanitizer sAllButNulAndAngleBracketsLegal =
        new IllegalCharacterValueSanitizer(
                IllegalCharacterValueSanitizer.ALL_BUT_NUL_AND_ANGLE_BRACKETS_LEGAL);

    /**
     * Return a value sanitizer that does not allow any special characters,
     * and also does not allow script URLs.
     * @return a value sanitizer
     */
    public static final ValueSanitizer getAllIllegal() {
        return sAllIllegal;
    }

    /**
     * Return a value sanitizer that allows everything except Nul ('\0')
     * characters. Script URLs are allowed.
     * @return a value sanitizer
     */
    public static final ValueSanitizer getAllButNulLegal() {
        return sAllButNulLegal;
    }
    /**
     * Return a value sanitizer that allows everything except Nul ('\0')
     * characters, space (' '), and other whitespace characters.
     * Script URLs are allowed.
     * @return a value sanitizer
     */
    public static final ValueSanitizer getAllButWhitespaceLegal() {
        return sAllButWhitespaceLegal;
    }
    /**
     * Return a value sanitizer that allows all the characters used by
     * encoded URLs. Does not allow script URLs.
     * @return a value sanitizer
     */
    public static final ValueSanitizer getUrlLegal() {
        return sURLLegal;
    }
    /**
     * Return a value sanitizer that allows all the characters used by
     * encoded URLs and allows spaces, which are not technically legal
     * in encoded URLs, but commonly appear anyway.
     * Does not allow script URLs.
     * @return a value sanitizer
     */
    public static final ValueSanitizer getUrlAndSpaceLegal() {
        return sUrlAndSpaceLegal;
    }
    /**
     * Return a value sanitizer that does not allow any special characters
     * except ampersand ('&'). Does not allow script URLs.
     * @return a value sanitizer
     */
    public static final ValueSanitizer getAmpLegal() {
        return sAmpLegal;
    }
    /**
     * Return a value sanitizer that does not allow any special characters
     * except ampersand ('&') and space (' '). Does not allow script URLs.
     * @return a value sanitizer
     */
    public static final ValueSanitizer getAmpAndSpaceLegal() {
        return sAmpAndSpaceLegal;
    }
    /**
     * Return a value sanitizer that does not allow any special characters
     * except space (' '). Does not allow script URLs.
     * @return a value sanitizer
     */
    public static final ValueSanitizer getSpaceLegal() {
        return sSpaceLegal;
    }
    /**
     * Return a value sanitizer that allows any special characters
     * except angle brackets ('<' and '>') and Nul ('\0').
     * Allows script URLs.
     * @return a value sanitizer
     */
    public static final ValueSanitizer getAllButNulAndAngleBracketsLegal() {
        return sAllButNulAndAngleBracketsLegal;
    }

    /**
     * Constructs a UrlQuerySanitizer.
     * <p>
     * Defaults:
     * <ul>
     * <li>unregistered parameters are not allowed.
     * <li>the last instance of a repeated parameter is preferred.
     * <li>The default value sanitizer is an AllIllegal value sanitizer.
     * <ul>
     */
    public UrlQuerySanitizer() {
    }

    /**
     * Constructs a UrlQuerySanitizer and parses a URL.
     * This constructor is provided for convenience when the
     * default parsing behavior is acceptable.
     * <p>
     * Because the URL is parsed before the constructor returns, there isn't
     * a chance to configure the sanitizer to change the parsing behavior.
     * <p>
     * <code>
     * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer(myUrl);
     * String name = sanitizer.getValue("name");
     * </code>
     * <p>
     * Defaults:
     * <ul>
     * <li>unregistered parameters <em>are</em> allowed.
     * <li>the last instance of a repeated parameter is preferred.
     * <li>The default value sanitizer is an AllIllegal value sanitizer.
     * <ul>
     */
    public UrlQuerySanitizer(String url) {
        setAllowUnregisteredParamaters(true);
        parseUrl(url);
    }

    /**
     * Parse the query parameters out of an encoded URL.
     * Works by extracting the query portion from the URL and then
     * calling parseQuery(). If there is no query portion it is
     * treated as if the query portion is an empty string.
     * @param url the encoded URL to parse.
     */
    public void parseUrl(String url) {
        int queryIndex = url.indexOf('?');
        String query;
        if (queryIndex >= 0) {
            query = url.substring(queryIndex + 1);
        }
        else {
            query = "";
        }
        parseQuery(query);
    }

    /**
     * Parse a query. A query string is any number of parameter-value clauses
     * separated by any non-zero number of ampersands. A parameter-value clause
     * is a parameter followed by an equal sign, followed by a value. If the
     * equal sign is missing, the value is assumed to be the empty string.
     * @param query the query to parse.
     */
    public void parseQuery(String query) {
        clear();
        // Split by '&'
        StringTokenizer tokenizer = new StringTokenizer(query, "&");
        while(tokenizer.hasMoreElements()) {
            String attributeValuePair = tokenizer.nextToken();
            if (attributeValuePair.length() > 0) {
                int assignmentIndex = attributeValuePair.indexOf('=');
                if (assignmentIndex < 0) {
                    // No assignment found, treat as if empty value
                    parseEntry(attributeValuePair, "");
                }
                else {
                    parseEntry(attributeValuePair.substring(0, assignmentIndex),
                            attributeValuePair.substring(assignmentIndex + 1));
                }
            }
        }
    }

    /**
     * Get a set of all of the parameters found in the sanitized query.
     * <p>
     * Note: Do not modify this set. Treat it as a read-only set.
     * @return all the parameters found in the current query.
     */
    public Set<String> getParameterSet() {
        return mEntries.keySet();
    }

    /**
     * An array list of all of the parameter-value pairs in the sanitized
     * query, in the order they appeared in the query. May contain duplicate
     * parameters.
     * <p class="note"><b>Note:</b> Do not modify this list. Treat it as a read-only list.</p>
     */
    public List<ParameterValuePair> getParameterList() {
        return mEntriesList;
    }

    /**
     * Check if a parameter exists in the current sanitized query.
     * @param parameter the unencoded name of a parameter.
     * @return true if the parameter exists in the current sanitized queary.
     */
    public boolean hasParameter(String parameter) {
        return mEntries.containsKey(parameter);
    }

    /**
     * Get the value for a parameter in the current sanitized query.
     * Returns null if the parameter does not
     * exit.
     * @param parameter the unencoded name of a parameter.
     * @return the sanitized unencoded value of the parameter,
     * or null if the parameter does not exist.
     */
    public String getValue(String parameter) {
        return mEntries.get(parameter);
    }

    /**
     * Register a value sanitizer for a particular parameter. Can also be used
     * to replace or remove an already-set value sanitizer.
     * <p>
     * Registering a non-null value sanitizer for a particular parameter
     * makes that parameter a registered parameter.
     * @param parameter an unencoded parameter name
     * @param valueSanitizer the value sanitizer to use for a particular
     * parameter. May be null in order to unregister that parameter.
     * @see #getAllowUnregisteredParamaters()
     */
    public void registerParameter(String parameter,
            ValueSanitizer valueSanitizer) {
        if (valueSanitizer == null) {
            mSanitizers.remove(parameter);
        }
        mSanitizers.put(parameter, valueSanitizer);
    }

    /**
     * Register a value sanitizer for an array of parameters.
     * @param parameters An array of unencoded parameter names.
     * @param valueSanitizer
     * @see #registerParameter
     */
    public void registerParameters(String[] parameters,
            ValueSanitizer valueSanitizer) {
        int length = parameters.length;
        for(int i = 0; i < length; i++) {
            mSanitizers.put(parameters[i], valueSanitizer);
        }
    }

    /**
     * Set whether or not unregistered parameters are allowed. If they
     * are not allowed, then they will be dropped when a query is sanitized.
     * <p>
     * Defaults to false.
     * @param allowUnregisteredParamaters true to allow unregistered parameters.
     * @see #getAllowUnregisteredParamaters()
     */
    public void setAllowUnregisteredParamaters(
            boolean allowUnregisteredParamaters) {
        mAllowUnregisteredParamaters = allowUnregisteredParamaters;
    }

    /**
     * Get whether or not unregistered parameters are allowed. If not
     * allowed, they will be dropped when a query is parsed.
     * @return true if unregistered parameters are allowed.
     * @see #setAllowUnregisteredParamaters(boolean)
     */
    public boolean getAllowUnregisteredParamaters() {
        return mAllowUnregisteredParamaters;
    }

    /**
     * Set whether or not the first occurrence of a repeated parameter is
     * preferred. True means the first repeated parameter is preferred.
     * False means that the last repeated parameter is preferred.
     * <p>
     * The preferred parameter is the one that is returned when getParameter
     * is called.
     * <p>
     * defaults to false.
     * @param preferFirstRepeatedParameter True if the first repeated
     * parameter is preferred.
     * @see #getPreferFirstRepeatedParameter()
     */
    public void setPreferFirstRepeatedParameter(
            boolean preferFirstRepeatedParameter) {
        mPreferFirstRepeatedParameter = preferFirstRepeatedParameter;
    }

    /**
     * Get whether or not the first occurrence of a repeated parameter is
     * preferred.
     * @return true if the first occurrence of a repeated parameter is
     * preferred.
     * @see #setPreferFirstRepeatedParameter(boolean)
     */
    public boolean getPreferFirstRepeatedParameter() {
        return mPreferFirstRepeatedParameter;
    }

    /**
     * Parse an escaped parameter-value pair. The default implementation
     * unescapes both the parameter and the value, then looks up the
     * effective value sanitizer for the parameter and uses it to sanitize
     * the value. If all goes well then addSanitizedValue is called with
     * the unescaped parameter and the sanitized unescaped value.
     * @param parameter an escaped parameter
     * @param value an unsanitized escaped value
     */
    protected void parseEntry(String parameter, String value) {
        String unescapedParameter = unescape(parameter);
         ValueSanitizer valueSanitizer =
            getEffectiveValueSanitizer(unescapedParameter);

        if (valueSanitizer == null) {
            return;
        }
        String unescapedValue = unescape(value);
        String sanitizedValue = valueSanitizer.sanitize(unescapedValue);
        addSanitizedEntry(unescapedParameter, sanitizedValue);
    }

    /**
     * Record a sanitized parameter-value pair. Override if you want to
     * do additional filtering or validation.
     * @param parameter an unescaped parameter
     * @param value a sanitized unescaped value
     */
    protected void addSanitizedEntry(String parameter, String value) {
        mEntriesList.add(
                new ParameterValuePair(parameter, value));
        if (mPreferFirstRepeatedParameter) {
            if (mEntries.containsKey(parameter)) {
                return;
            }
        }
        mEntries.put(parameter, value);
    }

    /**
     * Get the value sanitizer for a parameter. Returns null if there
     * is no value sanitizer registered for the parameter.
     * @param parameter the unescaped parameter
     * @return the currently registered value sanitizer for this parameter.
     * @see #registerParameter(String, android.net.UrlQuerySanitizer.ValueSanitizer)
     */
    public ValueSanitizer getValueSanitizer(String parameter) {
        return mSanitizers.get(parameter);
    }

    /**
     * Get the effective value sanitizer for a parameter. Like getValueSanitizer,
     * except if there is no value sanitizer registered for a parameter, and
     * unregistered parameters are allowed, then the default value sanitizer is
     * returned.
     * @param parameter an unescaped parameter
     * @return the effective value sanitizer for a parameter.
     */
    public ValueSanitizer getEffectiveValueSanitizer(String parameter) {
        ValueSanitizer sanitizer = getValueSanitizer(parameter);
        if (sanitizer == null && mAllowUnregisteredParamaters) {
            sanitizer = getUnregisteredParameterValueSanitizer();
        }
        return sanitizer;
    }

    /**
     * Unescape an escaped string.
     * <ul>
     * <li>'+' characters are replaced by
     * ' ' characters.
     * <li>Valid "%xx" escape sequences are replaced by the
     * corresponding unescaped character.
     * <li>Invalid escape sequences such as %1z", are passed through unchanged.
     * <ol>
     * @param string the escaped string
     * @return the unescaped string.
     */
    private static final Pattern plusOrPercent = Pattern.compile("[+%]");
    public String unescape(String string) {
        final Matcher matcher = plusOrPercent.matcher(string);
        if (!matcher.find()) return string;
        final int firstEscape = matcher.start();

        int length = string.length();

        StringBuilder stringBuilder = new StringBuilder(length);
        stringBuilder.append(string.substring(0, firstEscape));
        for (int i = firstEscape; i < length; i++) {
            char c = string.charAt(i);
            if (c == '+') {
                c = ' ';
            } else if (c == '%' && i + 2 < length) {
                char c1 = string.charAt(i + 1);
                char c2 = string.charAt(i + 2);
                if (isHexDigit(c1) && isHexDigit(c2)) {
                    c = (char) (decodeHexDigit(c1) * 16 + decodeHexDigit(c2));
                    i += 2;
                }
            }
            stringBuilder.append(c);
        }
        return stringBuilder.toString();
    }

    /**
     * Test if a character is a hexidecimal digit. Both upper case and lower
     * case hex digits are allowed.
     * @param c the character to test
     * @return true if c is a hex digit.
     */
    protected boolean isHexDigit(char c) {
        return decodeHexDigit(c) >= 0;
    }

    /**
     * Convert a character that represents a hexidecimal digit into an integer.
     * If the character is not a hexidecimal digit, then -1 is returned.
     * Both upper case and lower case hex digits are allowed.
     * @param c the hexidecimal digit.
     * @return the integer value of the hexidecimal digit.
     */

    protected int decodeHexDigit(char c) {
        if (c >= '0' && c <= '9') {
            return c - '0';
        }
        else if (c >= 'A' && c <= 'F') {
            return c - 'A' + 10;
        }
        else if (c >= 'a' && c <= 'f') {
            return c - 'a' + 10;
        }
        else {
            return -1;
        }
    }

    /**
     * Clear the existing entries. Called to get ready to parse a new
     * query string.
     */
    protected void clear() {
        mEntries.clear();
        mEntriesList.clear();
    }
}