1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package libcore.net; 19 20 import java.io.ByteArrayOutputStream; 21 import java.net.URISyntaxException; 22 import java.nio.charset.Charset; 23 import java.nio.charset.StandardCharsets; 24 25 /** 26 * Encodes and decodes {@code application/x-www-form-urlencoded} content. 27 * Subclasses define exactly which characters are legal. 28 * 29 * <p>By default, UTF-8 is used to encode escaped characters. A single input 30 * character like "\u0080" may be encoded to multiple octets like %C2%80. 31 */ 32 public abstract class UriCodec { 33 34 /** 35 * Returns true if {@code c} does not need to be escaped. 36 */ isRetained(char c)37 protected abstract boolean isRetained(char c); 38 39 /** 40 * Throws if {@code s} is invalid according to this encoder. 41 */ validate(String uri, int start, int end, String name)42 public final String validate(String uri, int start, int end, String name) 43 throws URISyntaxException { 44 for (int i = start; i < end; ) { 45 char ch = uri.charAt(i); 46 if ((ch >= 'a' && ch <= 'z') 47 || (ch >= 'A' && ch <= 'Z') 48 || (ch >= '0' && ch <= '9') 49 || isRetained(ch)) { 50 i++; 51 } else if (ch == '%') { 52 if (i + 2 >= end) { 53 throw new URISyntaxException(uri, "Incomplete % sequence in " + name, i); 54 } 55 int d1 = hexToInt(uri.charAt(i + 1)); 56 int d2 = hexToInt(uri.charAt(i + 2)); 57 if (d1 == -1 || d2 == -1) { 58 throw new URISyntaxException(uri, "Invalid % sequence: " 59 + uri.substring(i, i + 3) + " in " + name, i); 60 } 61 i += 3; 62 } else { 63 throw new URISyntaxException(uri, "Illegal character in " + name, i); 64 } 65 } 66 return uri.substring(start, end); 67 } 68 69 /** 70 * Throws if {@code s} contains characters that are not letters, digits or 71 * in {@code legal}. 72 */ validateSimple(String s, String legal)73 public static void validateSimple(String s, String legal) 74 throws URISyntaxException { 75 for (int i = 0; i < s.length(); i++) { 76 char ch = s.charAt(i); 77 if (!((ch >= 'a' && ch <= 'z') 78 || (ch >= 'A' && ch <= 'Z') 79 || (ch >= '0' && ch <= '9') 80 || legal.indexOf(ch) > -1)) { 81 throw new URISyntaxException(s, "Illegal character", i); 82 } 83 } 84 } 85 86 /** 87 * Encodes {@code s} and appends the result to {@code builder}. 88 * 89 * @param isPartiallyEncoded true to fix input that has already been 90 * partially or fully encoded. For example, input of "hello%20world" is 91 * unchanged with isPartiallyEncoded=true but would be double-escaped to 92 * "hello%2520world" otherwise. 93 */ appendEncoded(StringBuilder builder, String s, Charset charset, boolean isPartiallyEncoded)94 private void appendEncoded(StringBuilder builder, String s, Charset charset, 95 boolean isPartiallyEncoded) { 96 if (s == null) { 97 throw new NullPointerException("s == null"); 98 } 99 100 int escapeStart = -1; 101 for (int i = 0; i < s.length(); i++) { 102 char c = s.charAt(i); 103 if ((c >= 'a' && c <= 'z') 104 || (c >= 'A' && c <= 'Z') 105 || (c >= '0' && c <= '9') 106 || isRetained(c) 107 || (c == '%' && isPartiallyEncoded)) { 108 if (escapeStart != -1) { 109 appendHex(builder, s.substring(escapeStart, i), charset); 110 escapeStart = -1; 111 } 112 if (c == '%' && isPartiallyEncoded) { 113 // this is an encoded 3-character sequence like "%20" 114 builder.append(s, i, Math.min(i + 3, s.length())); 115 i += 2; 116 } else if (c == ' ') { 117 builder.append('+'); 118 } else { 119 builder.append(c); 120 } 121 } else if (escapeStart == -1) { 122 escapeStart = i; 123 } 124 } 125 if (escapeStart != -1) { 126 appendHex(builder, s.substring(escapeStart, s.length()), charset); 127 } 128 } 129 encode(String s, Charset charset)130 public final String encode(String s, Charset charset) { 131 // Guess a bit larger for encoded form 132 StringBuilder builder = new StringBuilder(s.length() + 16); 133 appendEncoded(builder, s, charset, false); 134 return builder.toString(); 135 } 136 appendEncoded(StringBuilder builder, String s)137 public final void appendEncoded(StringBuilder builder, String s) { 138 appendEncoded(builder, s, StandardCharsets.UTF_8, false); 139 } 140 appendPartiallyEncoded(StringBuilder builder, String s)141 public final void appendPartiallyEncoded(StringBuilder builder, String s) { 142 appendEncoded(builder, s, StandardCharsets.UTF_8, true); 143 } 144 145 /** 146 * @param convertPlus true to convert '+' to ' '. 147 * @param throwOnFailure true to throw an IllegalArgumentException on 148 * invalid escape sequences; false to replace them with the replacement 149 * character (U+fffd). 150 */ decode(String s, boolean convertPlus, Charset charset, boolean throwOnFailure)151 public static String decode(String s, boolean convertPlus, Charset charset, 152 boolean throwOnFailure) { 153 if (s.indexOf('%') == -1 && (!convertPlus || s.indexOf('+') == -1)) { 154 return s; 155 } 156 157 StringBuilder result = new StringBuilder(s.length()); 158 ByteArrayOutputStream out = new ByteArrayOutputStream(); 159 for (int i = 0; i < s.length();) { 160 char c = s.charAt(i); 161 if (c == '%') { 162 do { 163 int d1, d2; 164 if (i + 2 < s.length() 165 && (d1 = hexToInt(s.charAt(i + 1))) != -1 166 && (d2 = hexToInt(s.charAt(i + 2))) != -1) { 167 out.write((byte) ((d1 << 4) + d2)); 168 } else if (throwOnFailure) { 169 throw new IllegalArgumentException("Invalid % sequence at " + i + ": " + s); 170 } else { 171 byte[] replacement = "\ufffd".getBytes(charset); 172 out.write(replacement, 0, replacement.length); 173 } 174 i += 3; 175 } while (i < s.length() && s.charAt(i) == '%'); 176 result.append(new String(out.toByteArray(), charset)); 177 out.reset(); 178 } else { 179 if (convertPlus && c == '+') { 180 c = ' '; 181 } 182 result.append(c); 183 i++; 184 } 185 } 186 return result.toString(); 187 } 188 189 /** 190 * Like {@link Character#digit}, but without support for non-ASCII 191 * characters. 192 */ hexToInt(char c)193 private static int hexToInt(char c) { 194 if ('0' <= c && c <= '9') { 195 return c - '0'; 196 } else if ('a' <= c && c <= 'f') { 197 return 10 + (c - 'a'); 198 } else if ('A' <= c && c <= 'F') { 199 return 10 + (c - 'A'); 200 } else { 201 return -1; 202 } 203 } 204 decode(String s)205 public static String decode(String s) { 206 return decode(s, false, StandardCharsets.UTF_8, true); 207 } 208 appendHex(StringBuilder builder, String s, Charset charset)209 private static void appendHex(StringBuilder builder, String s, Charset charset) { 210 for (byte b : s.getBytes(charset)) { 211 appendHex(builder, b); 212 } 213 } 214 appendHex(StringBuilder sb, byte b)215 private static void appendHex(StringBuilder sb, byte b) { 216 sb.append('%'); 217 sb.append(Byte.toHexString(b, true)); 218 } 219 } 220