1 /*
2  * Copyright 2001-2004 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package org.apache.commons.codec.net;
18 
19 import java.io.ByteArrayOutputStream;
20 import java.io.UnsupportedEncodingException;
21 import java.util.BitSet;
22 
23 import org.apache.commons.codec.BinaryDecoder;
24 import org.apache.commons.codec.BinaryEncoder;
25 import org.apache.commons.codec.DecoderException;
26 import org.apache.commons.codec.EncoderException;
27 import org.apache.commons.codec.StringDecoder;
28 import org.apache.commons.codec.StringEncoder;
29 
30 /**
31  * <p>Implements the 'www-form-urlencoded' encoding scheme,
32  * also misleadingly known as URL encoding.</p>
33  *
34  * <p>For more detailed information please refer to
35  * <a href="http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1">
36  * Chapter 17.13.4 'Form content types'</a> of the
37  * <a href="http://www.w3.org/TR/html4/">HTML 4.01 Specification<a></p>
38  *
39  * <p>
40  * This codec is meant to be a replacement for standard Java classes
41  * {@link java.net.URLEncoder} and {@link java.net.URLDecoder}
42  * on older Java platforms, as these classes in Java versions below
43  * 1.4 rely on the platform's default charset encoding.
44  * </p>
45  *
46  * @author Apache Software Foundation
47  * @since 1.2
48  * @version $Id: URLCodec.java,v 1.19 2004/03/29 07:59:00 ggregory Exp $
49  *
50  * @deprecated Please use {@link java.net.URL#openConnection} instead.
51  *     Please visit <a href="http://android-developers.blogspot.com/2011/09/androids-http-clients.html">this webpage</a>
52  *     for further details.
53  */
54 @Deprecated
55 public class URLCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder {
56 
57     /**
58      * The default charset used for string decoding and encoding.
59      */
60     protected String charset = StringEncodings.UTF8;
61 
62     protected static byte ESCAPE_CHAR = '%';
63     /**
64      * BitSet of www-form-url safe characters.
65      */
66     protected static final BitSet WWW_FORM_URL = new BitSet(256);
67 
68     // Static initializer for www_form_url
69     static {
70         // alpha characters
71         for (int i = 'a'; i <= 'z'; i++) {
72             WWW_FORM_URL.set(i);
73         }
74         for (int i = 'A'; i <= 'Z'; i++) {
75             WWW_FORM_URL.set(i);
76         }
77         // numeric characters
78         for (int i = '0'; i <= '9'; i++) {
79             WWW_FORM_URL.set(i);
80         }
81         // special chars
82         WWW_FORM_URL.set('-');
83         WWW_FORM_URL.set('_');
84         WWW_FORM_URL.set('.');
85         WWW_FORM_URL.set('*');
86         // blank to be replaced with +
87         WWW_FORM_URL.set(' ');
88     }
89 
90 
91     /**
92      * Default constructor.
93      */
URLCodec()94     public URLCodec() {
95         super();
96     }
97 
98     /**
99      * Constructor which allows for the selection of a default charset
100      *
101      * @param charset the default string charset to use.
102      */
URLCodec(String charset)103     public URLCodec(String charset) {
104         super();
105         this.charset = charset;
106     }
107 
108     /**
109      * Encodes an array of bytes into an array of URL safe 7-bit
110      * characters. Unsafe characters are escaped.
111      *
112      * @param urlsafe bitset of characters deemed URL safe
113      * @param bytes array of bytes to convert to URL safe characters
114      * @return array of bytes containing URL safe characters
115      */
encodeUrl(BitSet urlsafe, byte[] bytes)116     public static final byte[] encodeUrl(BitSet urlsafe, byte[] bytes)
117     {
118         if (bytes == null) {
119             return null;
120         }
121         if (urlsafe == null) {
122             urlsafe = WWW_FORM_URL;
123         }
124 
125         ByteArrayOutputStream buffer = new ByteArrayOutputStream();
126         for (int i = 0; i < bytes.length; i++) {
127             int b = bytes[i];
128             if (b < 0) {
129                 b = 256 + b;
130             }
131             if (urlsafe.get(b)) {
132                 if (b == ' ') {
133                     b = '+';
134                 }
135                 buffer.write(b);
136             } else {
137                 buffer.write('%');
138                 char hex1 = Character.toUpperCase(
139                   Character.forDigit((b >> 4) & 0xF, 16));
140                 char hex2 = Character.toUpperCase(
141                   Character.forDigit(b & 0xF, 16));
142                 buffer.write(hex1);
143                 buffer.write(hex2);
144             }
145         }
146         return buffer.toByteArray();
147     }
148 
149 
150     /**
151      * Decodes an array of URL safe 7-bit characters into an array of
152      * original bytes. Escaped characters are converted back to their
153      * original representation.
154      *
155      * @param bytes array of URL safe characters
156      * @return array of original bytes
157      * @throws DecoderException Thrown if URL decoding is unsuccessful
158      */
decodeUrl(byte[] bytes)159     public static final byte[] decodeUrl(byte[] bytes)
160          throws DecoderException
161     {
162         if (bytes == null) {
163             return null;
164         }
165         ByteArrayOutputStream buffer = new ByteArrayOutputStream();
166         for (int i = 0; i < bytes.length; i++) {
167             int b = bytes[i];
168             if (b == '+') {
169                 buffer.write(' ');
170             } else if (b == '%') {
171                 try {
172                     int u = Character.digit((char)bytes[++i], 16);
173                     int l = Character.digit((char)bytes[++i], 16);
174                     if (u == -1 || l == -1) {
175                         throw new DecoderException("Invalid URL encoding");
176                     }
177                     buffer.write((char)((u << 4) + l));
178                 } catch(ArrayIndexOutOfBoundsException e) {
179                     throw new DecoderException("Invalid URL encoding");
180                 }
181             } else {
182                 buffer.write(b);
183             }
184         }
185         return buffer.toByteArray();
186     }
187 
188 
189     /**
190      * Encodes an array of bytes into an array of URL safe 7-bit
191      * characters. Unsafe characters are escaped.
192      *
193      * @param bytes array of bytes to convert to URL safe characters
194      * @return array of bytes containing URL safe characters
195      */
encode(byte[] bytes)196     public byte[] encode(byte[] bytes) {
197         return encodeUrl(WWW_FORM_URL, bytes);
198     }
199 
200 
201     /**
202      * Decodes an array of URL safe 7-bit characters into an array of
203      * original bytes. Escaped characters are converted back to their
204      * original representation.
205      *
206      * @param bytes array of URL safe characters
207      * @return array of original bytes
208      * @throws DecoderException Thrown if URL decoding is unsuccessful
209      */
decode(byte[] bytes)210     public byte[] decode(byte[] bytes) throws DecoderException {
211         return decodeUrl(bytes);
212     }
213 
214 
215     /**
216      * Encodes a string into its URL safe form using the specified
217      * string charset. Unsafe characters are escaped.
218      *
219      * @param pString string to convert to a URL safe form
220      * @param charset the charset for pString
221      * @return URL safe string
222      * @throws UnsupportedEncodingException Thrown if charset is not
223      *                                      supported
224      */
encode(String pString, String charset)225     public String encode(String pString, String charset)
226         throws UnsupportedEncodingException
227     {
228         if (pString == null) {
229             return null;
230         }
231         return new String(encode(pString.getBytes(charset)), StringEncodings.US_ASCII);
232     }
233 
234 
235     /**
236      * Encodes a string into its URL safe form using the default string
237      * charset. Unsafe characters are escaped.
238      *
239      * @param pString string to convert to a URL safe form
240      * @return URL safe string
241      * @throws EncoderException Thrown if URL encoding is unsuccessful
242      *
243      * @see #getDefaultCharset()
244      */
encode(String pString)245     public String encode(String pString) throws EncoderException {
246         if (pString == null) {
247             return null;
248         }
249         try {
250             return encode(pString, getDefaultCharset());
251         } catch(UnsupportedEncodingException e) {
252             throw new EncoderException(e.getMessage());
253         }
254     }
255 
256 
257     /**
258      * Decodes a URL safe string into its original form using the
259      * specified encoding. Escaped characters are converted back
260      * to their original representation.
261      *
262      * @param pString URL safe string to convert into its original form
263      * @param charset the original string charset
264      * @return original string
265      * @throws DecoderException Thrown if URL decoding is unsuccessful
266      * @throws UnsupportedEncodingException Thrown if charset is not
267      *                                      supported
268      */
decode(String pString, String charset)269     public String decode(String pString, String charset)
270         throws DecoderException, UnsupportedEncodingException
271     {
272         if (pString == null) {
273             return null;
274         }
275         return new String(decode(pString.getBytes(StringEncodings.US_ASCII)), charset);
276     }
277 
278 
279     /**
280      * Decodes a URL safe string into its original form using the default
281      * string charset. Escaped characters are converted back to their
282      * original representation.
283      *
284      * @param pString URL safe string to convert into its original form
285      * @return original string
286      * @throws DecoderException Thrown if URL decoding is unsuccessful
287      *
288      * @see #getDefaultCharset()
289      */
decode(String pString)290     public String decode(String pString) throws DecoderException {
291         if (pString == null) {
292             return null;
293         }
294         try {
295             return decode(pString, getDefaultCharset());
296         } catch(UnsupportedEncodingException e) {
297             throw new DecoderException(e.getMessage());
298         }
299     }
300 
301     /**
302      * Encodes an object into its URL safe form. Unsafe characters are
303      * escaped.
304      *
305      * @param pObject string to convert to a URL safe form
306      * @return URL safe object
307      * @throws EncoderException Thrown if URL encoding is not
308      *                          applicable to objects of this type or
309      *                          if encoding is unsuccessful
310      */
encode(Object pObject)311     public Object encode(Object pObject) throws EncoderException {
312         if (pObject == null) {
313             return null;
314         } else if (pObject instanceof byte[]) {
315             return encode((byte[])pObject);
316         } else if (pObject instanceof String) {
317             return encode((String)pObject);
318         } else {
319             throw new EncoderException("Objects of type " +
320                 pObject.getClass().getName() + " cannot be URL encoded");
321 
322         }
323     }
324 
325     /**
326      * Decodes a URL safe object into its original form. Escaped
327      * characters are converted back to their original representation.
328      *
329      * @param pObject URL safe object to convert into its original form
330      * @return original object
331      * @throws DecoderException Thrown if URL decoding is not
332      *                          applicable to objects of this type
333      *                          if decoding is unsuccessful
334      */
decode(Object pObject)335     public Object decode(Object pObject) throws DecoderException {
336         if (pObject == null) {
337             return null;
338         } else if (pObject instanceof byte[]) {
339             return decode((byte[])pObject);
340         } else if (pObject instanceof String) {
341             return decode((String)pObject);
342         } else {
343             throw new DecoderException("Objects of type " +
344                 pObject.getClass().getName() + " cannot be URL decoded");
345 
346         }
347     }
348 
349     /**
350      * The <code>String</code> encoding used for decoding and encoding.
351      *
352      * @return Returns the encoding.
353      *
354      * @deprecated use #getDefaultCharset()
355      */
getEncoding()356     public String getEncoding() {
357         return this.charset;
358     }
359 
360     /**
361      * The default charset used for string decoding and encoding.
362      *
363      * @return the default string charset.
364      */
getDefaultCharset()365     public String getDefaultCharset() {
366         return this.charset;
367     }
368 
369 }
370