1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  * Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved.
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This code is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License version 2 only, as
8  * published by the Free Software Foundation.  Oracle designates this
9  * particular file as subject to the "Classpath" exception as provided
10  * by Oracle in the LICENSE file that accompanied this code.
11  *
12  * This code is distributed in the hope that it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15  * version 2 for more details (a copy is included in the LICENSE file that
16  * accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License version
19  * 2 along with this work; if not, write to the Free Software Foundation,
20  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
21  *
22  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
23  * or visit www.oracle.com if you need additional information or have any
24  * questions.
25  */
26 
27 package java.net;
28 
29 import java.io.*;
30 import java.nio.charset.Charset;
31 import java.nio.charset.IllegalCharsetNameException;
32 import java.nio.charset.UnsupportedCharsetException;
33 import java.util.Objects;
34 
35 /**
36  * Utility class for HTML form decoding. This class contains static methods
37  * for decoding a String from the <CODE>application/x-www-form-urlencoded</CODE>
38  * MIME format.
39  * <p>
40  * The conversion process is the reverse of that used by the URLEncoder class. It is assumed
41  * that all characters in the encoded string are one of the following:
42  * &quot;{@code a}&quot; through &quot;{@code z}&quot;,
43  * &quot;{@code A}&quot; through &quot;{@code Z}&quot;,
44  * &quot;{@code 0}&quot; through &quot;{@code 9}&quot;, and
45  * &quot;{@code -}&quot;, &quot;{@code _}&quot;,
46  * &quot;{@code .}&quot;, and &quot;{@code *}&quot;. The
47  * character &quot;{@code %}&quot; is allowed but is interpreted
48  * as the start of a special escaped sequence.
49  * <p>
50  * The following rules are applied in the conversion:
51  *
52  * <ul>
53  * <li>The alphanumeric characters &quot;{@code a}&quot; through
54  *     &quot;{@code z}&quot;, &quot;{@code A}&quot; through
55  *     &quot;{@code Z}&quot; and &quot;{@code 0}&quot;
56  *     through &quot;{@code 9}&quot; remain the same.
57  * <li>The special characters &quot;{@code .}&quot;,
58  *     &quot;{@code -}&quot;, &quot;{@code *}&quot;, and
59  *     &quot;{@code _}&quot; remain the same.
60  * <li>The plus sign &quot;{@code +}&quot; is converted into a
61  *     space character &quot; &nbsp; &quot; .
62  * <li>A sequence of the form "<i>{@code %xy}</i>" will be
63  *     treated as representing a byte where <i>xy</i> is the two-digit
64  *     hexadecimal representation of the 8 bits. Then, all substrings
65  *     that contain one or more of these byte sequences consecutively
66  *     will be replaced by the character(s) whose encoding would result
67  *     in those consecutive bytes.
68  *     The encoding scheme used to decode these characters may be specified,
69  *     or if unspecified, the default encoding of the platform will be used.
70  * </ul>
71  * <p>
72  * There are two possible ways in which this decoder could deal with
73  * illegal strings.  It could either leave illegal characters alone or
74  * it could throw an {@link java.lang.IllegalArgumentException}.
75  * Which approach the decoder takes is left to the
76  * implementation.
77  *
78  * @author  Mark Chamness
79  * @author  Michael McCloskey
80  * @since   1.2
81  */
82 
83 public class URLDecoder {
84 
85     // The platform default encoding
86     static String dfltEncName = URLEncoder.dfltEncName;
87 
88     /**
89      * Decodes a {@code x-www-form-urlencoded} string.
90      * The platform's default encoding is used to determine what characters
91      * are represented by any consecutive sequences of the form
92      * "<i>{@code %xy}</i>".
93      * @param s the {@code String} to decode
94      * @deprecated The resulting string may vary depending on the platform's
95      *          default encoding. Instead, use the decode(String,String) method
96      *          to specify the encoding.
97      * @return the newly decoded {@code String}
98      */
99     @Deprecated
decode(String s)100     public static String decode(String s) {
101 
102         String str = null;
103 
104         try {
105             str = decode(s, dfltEncName);
106         } catch (UnsupportedEncodingException e) {
107             // The system should always have the platform default
108         }
109 
110         return str;
111     }
112 
113     /**
114      * Decodes an {@code application/x-www-form-urlencoded} string using
115      * a specific encoding scheme.
116      *
117      * <p>
118      * This method behaves the same as {@linkplain String decode(String s, Charset charset)}
119      * except that it will {@linkplain java.nio.charset.Charset#forName look up the charset}
120      * using the given encoding name.
121      *
122      * @implNote This implementation will throw an {@link java.lang.IllegalArgumentException}
123      * when illegal strings are encountered.
124      *
125      * @param s the {@code String} to decode
126      * @param enc   The name of a supported
127      *    <a href="../lang/package-summary.html#charenc">character
128      *    encoding</a>.
129      * @return the newly decoded {@code String}
130      * @throws UnsupportedEncodingException
131      *             If character encoding needs to be consulted, but
132      *             named character encoding is not supported
133      * @see URLEncoder#encode(java.lang.String, java.lang.String)
134      * @since 1.4
135      */
decode(String s, String enc)136     public static String decode(String s, String enc) throws UnsupportedEncodingException {
137         if (enc.isEmpty()) {
138             throw new UnsupportedEncodingException ("URLDecoder: empty string enc parameter");
139         }
140 
141         try {
142             Charset charset = Charset.forName(enc);
143             return decode(s, charset);
144         } catch (IllegalCharsetNameException | UnsupportedCharsetException e) {
145             throw new UnsupportedEncodingException(enc);
146         }
147     }
148 
149     /**
150      * Decodes an {@code application/x-www-form-urlencoded} string using
151      * a specific {@linkplain java.nio.charset.Charset Charset}.
152      * The supplied charset is used to determine
153      * what characters are represented by any consecutive sequences of the
154      * form "<i>{@code %xy}</i>".
155      * <p>
156      * <em><strong>Note:</strong> The <a href=
157      * "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars">
158      * World Wide Web Consortium Recommendation</a> states that
159      * UTF-8 should be used. Not doing so may introduce
160      * incompatibilities.</em>
161      *
162      * @implNote This implementation will throw an {@link java.lang.IllegalArgumentException}
163      * when illegal strings are encountered.
164      *
165      * @param s the {@code String} to decode
166      * @param charset the given charset
167      * @return the newly decoded {@code String}
168      * @throws NullPointerException if {@code s} or {@code charset} is {@code null}
169      * @throws IllegalArgumentException if the implementation encounters illegal
170      * characters
171      * @see URLEncoder#encode(java.lang.String, java.nio.charset.Charset)
172      * @since 10
173      */
decode(String s, Charset charset)174     public static String decode(String s, Charset charset) {
175         Objects.requireNonNull(charset, "Charset");
176         boolean needToChange = false;
177         int numChars = s.length();
178         StringBuilder sb = new StringBuilder(numChars > 500 ? numChars / 2 : numChars);
179         int i = 0;
180 
181         char c;
182         byte[] bytes = null;
183         while (i < numChars) {
184             c = s.charAt(i);
185             switch (c) {
186             case '+':
187                 sb.append(' ');
188                 i++;
189                 needToChange = true;
190                 break;
191             case '%':
192                 /*
193                  * Starting with this instance of %, process all
194                  * consecutive substrings of the form %xy. Each
195                  * substring %xy will yield a byte. Convert all
196                  * consecutive  bytes obtained this way to whatever
197                  * character(s) they represent in the provided
198                  * encoding.
199                  */
200 
201                 try {
202 
203                     // (numChars-i)/3 is an upper bound for the number
204                     // of remaining bytes
205                     if (bytes == null)
206                         bytes = new byte[(numChars-i)/3];
207                     int pos = 0;
208 
209                     while ( ((i+2) < numChars) &&
210                             (c=='%')) {
211                         // BEGIN Android-changed: App compat. Forbid non-hex chars after '%'.
212                         if (!isValidHexChar(s.charAt(i+1)) || !isValidHexChar(s.charAt(i+2))) {
213                             throw new IllegalArgumentException("URLDecoder: Illegal hex characters in escape (%) pattern : "
214                                     + s.substring(i, i + 3));
215                         }
216                         // END Android-changed: App compat. Forbid non-hex chars after '%'.
217                         int v = Integer.parseInt(s.substring(i+1,i+3),16);
218                         if (v < 0)
219                             // Android-changed: Improve error message by printing the string value.
220                             throw new IllegalArgumentException("URLDecoder: Illegal hex characters in escape (%) pattern - negative value : "
221                                     + s.substring(i, i + 3));
222                         bytes[pos++] = (byte) v;
223                         i+= 3;
224                         if (i < numChars)
225                             c = s.charAt(i);
226                     }
227 
228                     // A trailing, incomplete byte encoding such as
229                     // "%x" will cause an exception to be thrown
230 
231                     if ((i < numChars) && (c=='%'))
232                         throw new IllegalArgumentException(
233                          "URLDecoder: Incomplete trailing escape (%) pattern");
234 
235                     sb.append(new String(bytes, 0, pos, charset));
236                 } catch (NumberFormatException e) {
237                     throw new IllegalArgumentException(
238                     "URLDecoder: Illegal hex characters in escape (%) pattern - "
239                     + e.getMessage());
240                 }
241                 needToChange = true;
242                 break;
243             default:
244                 sb.append(c);
245                 i++;
246                 break;
247             }
248         }
249 
250         return (needToChange? sb.toString() : s);
251     }
252 
253     // BEGIN Android-added: App compat. Forbid non-hex chars after '%'.
isValidHexChar(char c)254     private static boolean isValidHexChar(char c) {
255         return ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F');
256     }
257     // END Android-added: App compat. Forbid non-hex chars after '%'.
258 }
259