1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. Oracle designates this 9 * particular file as subject to the "Classpath" exception as provided 10 * by Oracle in the LICENSE file that accompanied this code. 11 * 12 * This code is distributed in the hope that it will be useful, but WITHOUT 13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15 * version 2 for more details (a copy is included in the LICENSE file that 16 * accompanied this code). 17 * 18 * You should have received a copy of the GNU General Public License version 19 * 2 along with this work; if not, write to the Free Software Foundation, 20 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 21 * 22 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 23 * or visit www.oracle.com if you need additional information or have any 24 * questions. 25 */ 26 27 package java.net; 28 29 import java.io.*; 30 import java.nio.charset.Charset; 31 import java.nio.charset.IllegalCharsetNameException; 32 import java.nio.charset.UnsupportedCharsetException; 33 import java.util.Objects; 34 35 /** 36 * Utility class for HTML form decoding. This class contains static methods 37 * for decoding a String from the <CODE>application/x-www-form-urlencoded</CODE> 38 * MIME format. 39 * <p> 40 * The conversion process is the reverse of that used by the URLEncoder class. It is assumed 41 * that all characters in the encoded string are one of the following: 42 * "{@code a}" through "{@code z}", 43 * "{@code A}" through "{@code Z}", 44 * "{@code 0}" through "{@code 9}", and 45 * "{@code -}", "{@code _}", 46 * "{@code .}", and "{@code *}". The 47 * character "{@code %}" is allowed but is interpreted 48 * as the start of a special escaped sequence. 49 * <p> 50 * The following rules are applied in the conversion: 51 * 52 * <ul> 53 * <li>The alphanumeric characters "{@code a}" through 54 * "{@code z}", "{@code A}" through 55 * "{@code Z}" and "{@code 0}" 56 * through "{@code 9}" remain the same. 57 * <li>The special characters "{@code .}", 58 * "{@code -}", "{@code *}", and 59 * "{@code _}" remain the same. 60 * <li>The plus sign "{@code +}" is converted into a 61 * space character " " . 62 * <li>A sequence of the form "<i>{@code %xy}</i>" will be 63 * treated as representing a byte where <i>xy</i> is the two-digit 64 * hexadecimal representation of the 8 bits. Then, all substrings 65 * that contain one or more of these byte sequences consecutively 66 * will be replaced by the character(s) whose encoding would result 67 * in those consecutive bytes. 68 * The encoding scheme used to decode these characters may be specified, 69 * or if unspecified, the default encoding of the platform will be used. 70 * </ul> 71 * <p> 72 * There are two possible ways in which this decoder could deal with 73 * illegal strings. It could either leave illegal characters alone or 74 * it could throw an {@link java.lang.IllegalArgumentException}. 75 * Which approach the decoder takes is left to the 76 * implementation. 77 * 78 * @author Mark Chamness 79 * @author Michael McCloskey 80 * @since 1.2 81 */ 82 83 public class URLDecoder { 84 85 // The platform default encoding 86 static String dfltEncName = URLEncoder.dfltEncName; 87 88 /** 89 * Decodes a {@code x-www-form-urlencoded} string. 90 * The platform's default encoding is used to determine what characters 91 * are represented by any consecutive sequences of the form 92 * "<i>{@code %xy}</i>". 93 * @param s the {@code String} to decode 94 * @deprecated The resulting string may vary depending on the platform's 95 * default encoding. Instead, use the decode(String,String) method 96 * to specify the encoding. 97 * @return the newly decoded {@code String} 98 */ 99 @Deprecated decode(String s)100 public static String decode(String s) { 101 102 String str = null; 103 104 try { 105 str = decode(s, dfltEncName); 106 } catch (UnsupportedEncodingException e) { 107 // The system should always have the platform default 108 } 109 110 return str; 111 } 112 113 /** 114 * Decodes an {@code application/x-www-form-urlencoded} string using 115 * a specific encoding scheme. 116 * 117 * <p> 118 * This method behaves the same as {@linkplain String decode(String s, Charset charset)} 119 * except that it will {@linkplain java.nio.charset.Charset#forName look up the charset} 120 * using the given encoding name. 121 * 122 * @implNote This implementation will throw an {@link java.lang.IllegalArgumentException} 123 * when illegal strings are encountered. 124 * 125 * @param s the {@code String} to decode 126 * @param enc The name of a supported 127 * <a href="../lang/package-summary.html#charenc">character 128 * encoding</a>. 129 * @return the newly decoded {@code String} 130 * @throws UnsupportedEncodingException 131 * If character encoding needs to be consulted, but 132 * named character encoding is not supported 133 * @see URLEncoder#encode(java.lang.String, java.lang.String) 134 * @since 1.4 135 */ decode(String s, String enc)136 public static String decode(String s, String enc) throws UnsupportedEncodingException { 137 if (enc.isEmpty()) { 138 throw new UnsupportedEncodingException ("URLDecoder: empty string enc parameter"); 139 } 140 141 try { 142 Charset charset = Charset.forName(enc); 143 return decode(s, charset); 144 } catch (IllegalCharsetNameException | UnsupportedCharsetException e) { 145 throw new UnsupportedEncodingException(enc); 146 } 147 } 148 149 /** 150 * Decodes an {@code application/x-www-form-urlencoded} string using 151 * a specific {@linkplain java.nio.charset.Charset Charset}. 152 * The supplied charset is used to determine 153 * what characters are represented by any consecutive sequences of the 154 * form "<i>{@code %xy}</i>". 155 * <p> 156 * <em><strong>Note:</strong> The <a href= 157 * "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars"> 158 * World Wide Web Consortium Recommendation</a> states that 159 * UTF-8 should be used. Not doing so may introduce 160 * incompatibilities.</em> 161 * 162 * @implNote This implementation will throw an {@link java.lang.IllegalArgumentException} 163 * when illegal strings are encountered. 164 * 165 * @param s the {@code String} to decode 166 * @param charset the given charset 167 * @return the newly decoded {@code String} 168 * @throws NullPointerException if {@code s} or {@code charset} is {@code null} 169 * @throws IllegalArgumentException if the implementation encounters illegal 170 * characters 171 * @see URLEncoder#encode(java.lang.String, java.nio.charset.Charset) 172 * @since 10 173 */ decode(String s, Charset charset)174 public static String decode(String s, Charset charset) { 175 Objects.requireNonNull(charset, "Charset"); 176 boolean needToChange = false; 177 int numChars = s.length(); 178 StringBuilder sb = new StringBuilder(numChars > 500 ? numChars / 2 : numChars); 179 int i = 0; 180 181 char c; 182 byte[] bytes = null; 183 while (i < numChars) { 184 c = s.charAt(i); 185 switch (c) { 186 case '+': 187 sb.append(' '); 188 i++; 189 needToChange = true; 190 break; 191 case '%': 192 /* 193 * Starting with this instance of %, process all 194 * consecutive substrings of the form %xy. Each 195 * substring %xy will yield a byte. Convert all 196 * consecutive bytes obtained this way to whatever 197 * character(s) they represent in the provided 198 * encoding. 199 */ 200 201 try { 202 203 // (numChars-i)/3 is an upper bound for the number 204 // of remaining bytes 205 if (bytes == null) 206 bytes = new byte[(numChars-i)/3]; 207 int pos = 0; 208 209 while ( ((i+2) < numChars) && 210 (c=='%')) { 211 // BEGIN Android-changed: App compat. Forbid non-hex chars after '%'. 212 if (!isValidHexChar(s.charAt(i+1)) || !isValidHexChar(s.charAt(i+2))) { 213 throw new IllegalArgumentException("URLDecoder: Illegal hex characters in escape (%) pattern : " 214 + s.substring(i, i + 3)); 215 } 216 // END Android-changed: App compat. Forbid non-hex chars after '%'. 217 int v = Integer.parseInt(s.substring(i+1,i+3),16); 218 if (v < 0) 219 // Android-changed: Improve error message by printing the string value. 220 throw new IllegalArgumentException("URLDecoder: Illegal hex characters in escape (%) pattern - negative value : " 221 + s.substring(i, i + 3)); 222 bytes[pos++] = (byte) v; 223 i+= 3; 224 if (i < numChars) 225 c = s.charAt(i); 226 } 227 228 // A trailing, incomplete byte encoding such as 229 // "%x" will cause an exception to be thrown 230 231 if ((i < numChars) && (c=='%')) 232 throw new IllegalArgumentException( 233 "URLDecoder: Incomplete trailing escape (%) pattern"); 234 235 sb.append(new String(bytes, 0, pos, charset)); 236 } catch (NumberFormatException e) { 237 throw new IllegalArgumentException( 238 "URLDecoder: Illegal hex characters in escape (%) pattern - " 239 + e.getMessage()); 240 } 241 needToChange = true; 242 break; 243 default: 244 sb.append(c); 245 i++; 246 break; 247 } 248 } 249 250 return (needToChange? sb.toString() : s); 251 } 252 253 // BEGIN Android-added: App compat. Forbid non-hex chars after '%'. isValidHexChar(char c)254 private static boolean isValidHexChar(char c) { 255 return ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'); 256 } 257 // END Android-added: App compat. Forbid non-hex chars after '%'. 258 } 259