1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 package org.apache.commons.codec.binary;
19 
20 import java.io.UnsupportedEncodingException;
21 
22 import org.apache.commons.codec.CharEncoding;
23 
24 /**
25  * Converts String to and from bytes using the encodings required by the Java specification. These encodings are specified in <a
26  * href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
27  *
28  * @see CharEncoding
29  * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
30  * @version $Id: StringUtils.java 801391 2009-08-05 19:55:54Z ggregory $
31  * @since 1.4
32  */
33 public class StringUtils {
34 
35     /**
36      * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new
37      * byte array.
38      *
39      * @param string
40      *            the String to encode
41      * @return encoded bytes
42      * @throws IllegalStateException
43      *             Thrown when the charset is missing, which should be never according the the Java specification.
44      * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
45      * @see #getBytesUnchecked(String, String)
46      */
getBytesIso8859_1(String string)47     public static byte[] getBytesIso8859_1(String string) {
48         return StringUtils.getBytesUnchecked(string, CharEncoding.ISO_8859_1);
49     }
50 
51     /**
52      * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte
53      * array.
54      *
55      * @param string
56      *            the String to encode
57      * @return encoded bytes
58      * @throws IllegalStateException
59      *             Thrown when the charset is missing, which should be never according the the Java specification.
60      * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
61      * @see #getBytesUnchecked(String, String)
62      */
getBytesUsAscii(String string)63     public static byte[] getBytesUsAscii(String string) {
64         return StringUtils.getBytesUnchecked(string, CharEncoding.US_ASCII);
65     }
66 
67     /**
68      * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte
69      * array.
70      *
71      * @param string
72      *            the String to encode
73      * @return encoded bytes
74      * @throws IllegalStateException
75      *             Thrown when the charset is missing, which should be never according the the Java specification.
76      * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
77      * @see #getBytesUnchecked(String, String)
78      */
getBytesUtf16(String string)79     public static byte[] getBytesUtf16(String string) {
80         return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_16);
81     }
82 
83     /**
84      * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte
85      * array.
86      *
87      * @param string
88      *            the String to encode
89      * @return encoded bytes
90      * @throws IllegalStateException
91      *             Thrown when the charset is missing, which should be never according the the Java specification.
92      * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
93      * @see #getBytesUnchecked(String, String)
94      */
getBytesUtf16Be(String string)95     public static byte[] getBytesUtf16Be(String string) {
96         return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_16BE);
97     }
98 
99     /**
100      * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte
101      * array.
102      *
103      * @param string
104      *            the String to encode
105      * @return encoded bytes
106      * @throws IllegalStateException
107      *             Thrown when the charset is missing, which should be never according the the Java specification.
108      * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
109      * @see #getBytesUnchecked(String, String)
110      */
getBytesUtf16Le(String string)111     public static byte[] getBytesUtf16Le(String string) {
112         return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_16LE);
113     }
114 
115     /**
116      * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte
117      * array.
118      *
119      * @param string
120      *            the String to encode
121      * @return encoded bytes
122      * @throws IllegalStateException
123      *             Thrown when the charset is missing, which should be never according the the Java specification.
124      * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
125      * @see #getBytesUnchecked(String, String)
126      */
getBytesUtf8(String string)127     public static byte[] getBytesUtf8(String string) {
128         return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_8);
129     }
130 
131     /**
132      * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte
133      * array.
134      * <p>
135      * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which
136      * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
137      * </p>
138      *
139      * @param string
140      *            the String to encode
141      * @param charsetName
142      *            The name of a required {@link java.nio.charset.Charset}
143      * @return encoded bytes
144      * @throws IllegalStateException
145      *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
146      *             required charset name.
147      * @see CharEncoding
148      * @see String#getBytes(String)
149      */
getBytesUnchecked(String string, String charsetName)150     public static byte[] getBytesUnchecked(String string, String charsetName) {
151         if (string == null) {
152             return null;
153         }
154         try {
155             return string.getBytes(charsetName);
156         } catch (UnsupportedEncodingException e) {
157             throw StringUtils.newIllegalStateException(charsetName, e);
158         }
159     }
160 
newIllegalStateException(String charsetName, UnsupportedEncodingException e)161     private static IllegalStateException newIllegalStateException(String charsetName, UnsupportedEncodingException e) {
162         return new IllegalStateException(charsetName + ": " + e);
163     }
164 
165     /**
166      * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
167      * <p>
168      * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which
169      * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
170      * </p>
171      *
172      * @param bytes
173      *            The bytes to be decoded into characters
174      * @param charsetName
175      *            The name of a required {@link java.nio.charset.Charset}
176      * @return A new <code>String</code> decoded from the specified array of bytes using the given charset.
177      * @throws IllegalStateException
178      *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
179      *             required charset name.
180      * @see CharEncoding
181      * @see String#String(byte[], String)
182      */
newString(byte[] bytes, String charsetName)183     public static String newString(byte[] bytes, String charsetName) {
184         if (bytes == null) {
185             return null;
186         }
187         try {
188             return new String(bytes, charsetName);
189         } catch (UnsupportedEncodingException e) {
190             throw StringUtils.newIllegalStateException(charsetName, e);
191         }
192     }
193 
194     /**
195      * Constructs a new <code>String</code> by decoding the specified array of bytes using the ISO-8859-1 charset.
196      *
197      * @param bytes
198      *            The bytes to be decoded into characters
199      * @return A new <code>String</code> decoded from the specified array of bytes using the given charset.
200      * @throws IllegalStateException
201      *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
202      *             charset is required.
203      */
newStringIso8859_1(byte[] bytes)204     public static String newStringIso8859_1(byte[] bytes) {
205         return StringUtils.newString(bytes, CharEncoding.ISO_8859_1);
206     }
207 
208     /**
209      * Constructs a new <code>String</code> by decoding the specified array of bytes using the US-ASCII charset.
210      *
211      * @param bytes
212      *            The bytes to be decoded into characters
213      * @return A new <code>String</code> decoded from the specified array of bytes using the given charset.
214      * @throws IllegalStateException
215      *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
216      *             charset is required.
217      */
newStringUsAscii(byte[] bytes)218     public static String newStringUsAscii(byte[] bytes) {
219         return StringUtils.newString(bytes, CharEncoding.US_ASCII);
220     }
221 
222     /**
223      * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16 charset.
224      *
225      * @param bytes
226      *            The bytes to be decoded into characters
227      * @return A new <code>String</code> decoded from the specified array of bytes using the given charset.
228      * @throws IllegalStateException
229      *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
230      *             charset is required.
231      */
newStringUtf16(byte[] bytes)232     public static String newStringUtf16(byte[] bytes) {
233         return StringUtils.newString(bytes, CharEncoding.UTF_16);
234     }
235 
236     /**
237      * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16BE charset.
238      *
239      * @param bytes
240      *            The bytes to be decoded into characters
241      * @return A new <code>String</code> decoded from the specified array of bytes using the given charset.
242      * @throws IllegalStateException
243      *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
244      *             charset is required.
245      */
newStringUtf16Be(byte[] bytes)246     public static String newStringUtf16Be(byte[] bytes) {
247         return StringUtils.newString(bytes, CharEncoding.UTF_16BE);
248     }
249 
250     /**
251      * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16LE charset.
252      *
253      * @param bytes
254      *            The bytes to be decoded into characters
255      * @return A new <code>String</code> decoded from the specified array of bytes using the given charset.
256      * @throws IllegalStateException
257      *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
258      *             charset is required.
259      */
newStringUtf16Le(byte[] bytes)260     public static String newStringUtf16Le(byte[] bytes) {
261         return StringUtils.newString(bytes, CharEncoding.UTF_16LE);
262     }
263 
264     /**
265      * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-8 charset.
266      *
267      * @param bytes
268      *            The bytes to be decoded into characters
269      * @return A new <code>String</code> decoded from the specified array of bytes using the given charset.
270      * @throws IllegalStateException
271      *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
272      *             charset is required.
273      */
newStringUtf8(byte[] bytes)274     public static String newStringUtf8(byte[] bytes) {
275         return StringUtils.newString(bytes, CharEncoding.UTF_8);
276     }
277 
278 }
279