1 /*
2  *  Licensed to the Apache Software Foundation (ASF) under one or more
3  *  contributor license agreements.  See the NOTICE file distributed with
4  *  this work for additional information regarding copyright ownership.
5  *  The ASF licenses this file to You under the Apache License, Version 2.0
6  *  (the "License"); you may not use this file except in compliance with
7  *  the License.  You may obtain a copy of the License at
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  *  Unless required by applicable law or agreed to in writing, software
12  *  distributed under the License is distributed on an "AS IS" BASIS,
13  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  *  See the License for the specific language governing permissions and
15  *  limitations under the License.
16  */
17 
18 package libcore.net;
19 
20 import java.io.ByteArrayOutputStream;
21 import java.net.URISyntaxException;
22 import java.nio.charset.Charset;
23 import java.nio.charset.StandardCharsets;
24 
25 /**
26  * Encodes and decodes {@code application/x-www-form-urlencoded} content.
27  * Subclasses define exactly which characters are legal.
28  *
29  * <p>By default, UTF-8 is used to encode escaped characters. A single input
30  * character like "\u0080" may be encoded to multiple octets like %C2%80.
31  */
32 public abstract class UriCodec {
33 
34     /**
35      * Returns true if {@code c} does not need to be escaped.
36      */
isRetained(char c)37     protected abstract boolean isRetained(char c);
38 
39     /**
40      * Throws if {@code s} is invalid according to this encoder.
41      */
validate(String uri, int start, int end, String name)42     public final String validate(String uri, int start, int end, String name)
43             throws URISyntaxException {
44         for (int i = start; i < end; ) {
45             char ch = uri.charAt(i);
46             if ((ch >= 'a' && ch <= 'z')
47                     || (ch >= 'A' && ch <= 'Z')
48                     || (ch >= '0' && ch <= '9')
49                     || isRetained(ch)) {
50                 i++;
51             } else if (ch == '%') {
52                 if (i + 2 >= end) {
53                     throw new URISyntaxException(uri, "Incomplete % sequence in " + name, i);
54                 }
55                 int d1 = hexToInt(uri.charAt(i + 1));
56                 int d2 = hexToInt(uri.charAt(i + 2));
57                 if (d1 == -1 || d2 == -1) {
58                     throw new URISyntaxException(uri, "Invalid % sequence: "
59                             + uri.substring(i, i + 3) + " in " + name, i);
60                 }
61                 i += 3;
62             } else {
63                 throw new URISyntaxException(uri, "Illegal character in " + name, i);
64             }
65         }
66         return uri.substring(start, end);
67     }
68 
69     /**
70      * Throws if {@code s} contains characters that are not letters, digits or
71      * in {@code legal}.
72      */
validateSimple(String s, String legal)73     public static void validateSimple(String s, String legal)
74             throws URISyntaxException {
75         for (int i = 0; i < s.length(); i++) {
76             char ch = s.charAt(i);
77             if (!((ch >= 'a' && ch <= 'z')
78                     || (ch >= 'A' && ch <= 'Z')
79                     || (ch >= '0' && ch <= '9')
80                     || legal.indexOf(ch) > -1)) {
81                 throw new URISyntaxException(s, "Illegal character", i);
82             }
83         }
84     }
85 
86     /**
87      * Encodes {@code s} and appends the result to {@code builder}.
88      *
89      * @param isPartiallyEncoded true to fix input that has already been
90      *     partially or fully encoded. For example, input of "hello%20world" is
91      *     unchanged with isPartiallyEncoded=true but would be double-escaped to
92      *     "hello%2520world" otherwise.
93      */
appendEncoded(StringBuilder builder, String s, Charset charset, boolean isPartiallyEncoded)94     private void appendEncoded(StringBuilder builder, String s, Charset charset,
95             boolean isPartiallyEncoded) {
96         if (s == null) {
97             throw new NullPointerException("s == null");
98         }
99 
100         int escapeStart = -1;
101         for (int i = 0; i < s.length(); i++) {
102             char c = s.charAt(i);
103             if ((c >= 'a' && c <= 'z')
104                     || (c >= 'A' && c <= 'Z')
105                     || (c >= '0' && c <= '9')
106                     || isRetained(c)
107                     || (c == '%' && isPartiallyEncoded)) {
108                 if (escapeStart != -1) {
109                     appendHex(builder, s.substring(escapeStart, i), charset);
110                     escapeStart = -1;
111                 }
112                 if (c == '%' && isPartiallyEncoded) {
113                     // this is an encoded 3-character sequence like "%20"
114                     builder.append(s, i, Math.min(i + 3, s.length()));
115                     i += 2;
116                 } else if (c == ' ') {
117                     builder.append('+');
118                 } else {
119                     builder.append(c);
120                 }
121             } else if (escapeStart == -1) {
122                 escapeStart = i;
123             }
124         }
125         if (escapeStart != -1) {
126             appendHex(builder, s.substring(escapeStart, s.length()), charset);
127         }
128     }
129 
encode(String s, Charset charset)130     public final String encode(String s, Charset charset) {
131         // Guess a bit larger for encoded form
132         StringBuilder builder = new StringBuilder(s.length() + 16);
133         appendEncoded(builder, s, charset, false);
134         return builder.toString();
135     }
136 
appendEncoded(StringBuilder builder, String s)137     public final void appendEncoded(StringBuilder builder, String s) {
138         appendEncoded(builder, s, StandardCharsets.UTF_8, false);
139     }
140 
appendPartiallyEncoded(StringBuilder builder, String s)141     public final void appendPartiallyEncoded(StringBuilder builder, String s) {
142         appendEncoded(builder, s, StandardCharsets.UTF_8, true);
143     }
144 
145     /**
146      * @param convertPlus true to convert '+' to ' '.
147      * @param throwOnFailure true to throw an IllegalArgumentException on
148      *     invalid escape sequences; false to replace them with the replacement
149      *     character (U+fffd).
150      */
decode(String s, boolean convertPlus, Charset charset, boolean throwOnFailure)151     public static String decode(String s, boolean convertPlus, Charset charset,
152             boolean throwOnFailure) {
153         if (s.indexOf('%') == -1 && (!convertPlus || s.indexOf('+') == -1)) {
154             return s;
155         }
156 
157         StringBuilder result = new StringBuilder(s.length());
158         ByteArrayOutputStream out = new ByteArrayOutputStream();
159         for (int i = 0; i < s.length();) {
160             char c = s.charAt(i);
161             if (c == '%') {
162                 do {
163                     int d1, d2;
164                     if (i + 2 < s.length()
165                             && (d1 = hexToInt(s.charAt(i + 1))) != -1
166                             && (d2 = hexToInt(s.charAt(i + 2))) != -1) {
167                         out.write((byte) ((d1 << 4) + d2));
168                     } else if (throwOnFailure) {
169                         throw new IllegalArgumentException("Invalid % sequence at " + i + ": " + s);
170                     } else {
171                         byte[] replacement = "\ufffd".getBytes(charset);
172                         out.write(replacement, 0, replacement.length);
173                     }
174                     i += 3;
175                 } while (i < s.length() && s.charAt(i) == '%');
176                 result.append(new String(out.toByteArray(), charset));
177                 out.reset();
178             } else {
179                 if (convertPlus && c == '+') {
180                     c = ' ';
181                 }
182                 result.append(c);
183                 i++;
184             }
185         }
186         return result.toString();
187     }
188 
189     /**
190      * Like {@link Character#digit}, but without support for non-ASCII
191      * characters.
192      */
hexToInt(char c)193     private static int hexToInt(char c) {
194         if ('0' <= c && c <= '9') {
195             return c - '0';
196         } else if ('a' <= c && c <= 'f') {
197             return 10 + (c - 'a');
198         } else if ('A' <= c && c <= 'F') {
199             return 10 + (c - 'A');
200         } else {
201             return -1;
202         }
203     }
204 
decode(String s)205     public static String decode(String s) {
206         return decode(s, false, StandardCharsets.UTF_8, true);
207     }
208 
appendHex(StringBuilder builder, String s, Charset charset)209     private static void appendHex(StringBuilder builder, String s, Charset charset) {
210         for (byte b : s.getBytes(charset)) {
211             appendHex(builder, b);
212         }
213     }
214 
appendHex(StringBuilder sb, byte b)215     private static void appendHex(StringBuilder sb, byte b) {
216         sb.append('%');
217         sb.append(Byte.toHexString(b, true));
218     }
219 }
220