1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This code is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License version 2 only, as
8  * published by the Free Software Foundation.  Oracle designates this
9  * particular file as subject to the "Classpath" exception as provided
10  * by Oracle in the LICENSE file that accompanied this code.
11  *
12  * This code is distributed in the hope that it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15  * version 2 for more details (a copy is included in the LICENSE file that
16  * accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License version
19  * 2 along with this work; if not, write to the Free Software Foundation,
20  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
21  *
22  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
23  * or visit www.oracle.com if you need additional information or have any
24  * questions.
25  */
26 
27 package java.lang;
28 
29 import java.io.UnsupportedEncodingException;
30 import java.lang.ref.SoftReference;
31 import java.nio.ByteBuffer;
32 import java.nio.CharBuffer;
33 import java.nio.charset.Charset;
34 import java.nio.charset.CharsetDecoder;
35 import java.nio.charset.CharsetEncoder;
36 import java.nio.charset.CharacterCodingException;
37 import java.nio.charset.CoderResult;
38 import java.nio.charset.CodingErrorAction;
39 import java.nio.charset.IllegalCharsetNameException;
40 import java.nio.charset.UnsupportedCharsetException;
41 import java.util.Arrays;
42 import sun.misc.MessageUtils;
43 import sun.nio.cs.HistoricallyNamedCharset;
44 import sun.nio.cs.ArrayDecoder;
45 import sun.nio.cs.ArrayEncoder;
46 
47 /**
48  * Utility class for string encoding and decoding.
49  */
50 
51 class StringCoding {
52 
StringCoding()53     private StringCoding() { }
54 
55     /** The cached coders for each thread */
56     private final static ThreadLocal<SoftReference<StringDecoder>> decoder =
57         new ThreadLocal<>();
58     private final static ThreadLocal<SoftReference<StringEncoder>> encoder =
59         new ThreadLocal<>();
60 
61     private static boolean warnUnsupportedCharset = true;
62 
deref(ThreadLocal<SoftReference<T>> tl)63     private static <T> T deref(ThreadLocal<SoftReference<T>> tl) {
64         SoftReference<T> sr = tl.get();
65         if (sr == null)
66             return null;
67         return sr.get();
68     }
69 
set(ThreadLocal<SoftReference<T>> tl, T ob)70     private static <T> void set(ThreadLocal<SoftReference<T>> tl, T ob) {
71         tl.set(new SoftReference<T>(ob));
72     }
73 
74     // Trim the given byte array to the given length
75     //
safeTrim(byte[] ba, int len, Charset cs, boolean isTrusted)76     private static byte[] safeTrim(byte[] ba, int len, Charset cs, boolean isTrusted) {
77 
78         // Android-changed: System.getSecurityManager() == null is always true on Android.
79         // Libcore tests expect a defensive copy in pretty much all cases.
80         // if (len == ba.length && (isTrusted || System.getSecurityManager() == null))
81         if (len == ba.length && (isTrusted))
82             return ba;
83         else
84             return Arrays.copyOf(ba, len);
85     }
86 
87     // Trim the given char array to the given length
88     //
safeTrim(char[] ca, int len, Charset cs, boolean isTrusted)89     private static char[] safeTrim(char[] ca, int len,
90                                    Charset cs, boolean isTrusted) {
91         // Android-changed: System.getSecurityManager() == null is always true on Android.
92         // Libcore tests expect a defensive copy in pretty much all cases.
93         // if (len == ca.length && (isTrusted || System.getSecurityManager() == null))
94         if (len == ca.length && (isTrusted))
95             return ca;
96         else
97             return Arrays.copyOf(ca, len);
98     }
99 
scale(int len, float expansionFactor)100     private static int scale(int len, float expansionFactor) {
101         // We need to perform double, not float, arithmetic; otherwise
102         // we lose low order bits when len is larger than 2**24.
103         return (int)(len * (double)expansionFactor);
104     }
105 
lookupCharset(String csn)106     private static Charset lookupCharset(String csn) {
107         if (Charset.isSupported(csn)) {
108             try {
109                 return Charset.forName(csn);
110             } catch (UnsupportedCharsetException x) {
111                 throw new Error(x);
112             }
113         }
114         return null;
115     }
116 
warnUnsupportedCharset(String csn)117     private static void warnUnsupportedCharset(String csn) {
118         if (warnUnsupportedCharset) {
119             // Use sun.misc.MessageUtils rather than the Logging API or
120             // System.err since this method may be called during VM
121             // initialization before either is available.
122             MessageUtils.err("WARNING: Default charset " + csn +
123                              " not supported, using ISO-8859-1 instead");
124             warnUnsupportedCharset = false;
125         }
126     }
127 
128 
129     // -- Decoding --
130     private static class StringDecoder {
131         private final String requestedCharsetName;
132         private final Charset cs;
133         private final CharsetDecoder cd;
134         private final boolean isTrusted;
135 
StringDecoder(Charset cs, String rcn)136         private StringDecoder(Charset cs, String rcn) {
137             this.requestedCharsetName = rcn;
138             this.cs = cs;
139             this.cd = cs.newDecoder()
140                 .onMalformedInput(CodingErrorAction.REPLACE)
141                 .onUnmappableCharacter(CodingErrorAction.REPLACE);
142             this.isTrusted = (cs.getClass().getClassLoader() == null);
143         }
144 
charsetName()145         String charsetName() {
146             if (cs instanceof HistoricallyNamedCharset)
147                 return ((HistoricallyNamedCharset)cs).historicalName();
148             return cs.name();
149         }
150 
requestedCharsetName()151         final String requestedCharsetName() {
152             return requestedCharsetName;
153         }
154 
decode(byte[] ba, int off, int len)155         char[] decode(byte[] ba, int off, int len) {
156             int en = scale(len, cd.maxCharsPerByte());
157             char[] ca = new char[en];
158             if (len == 0)
159                 return ca;
160             if (cd instanceof ArrayDecoder) {
161                 int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
162                 return safeTrim(ca, clen, cs, isTrusted);
163             } else {
164                 cd.reset();
165                 ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
166                 CharBuffer cb = CharBuffer.wrap(ca);
167                 try {
168                     CoderResult cr = cd.decode(bb, cb, true);
169                     if (!cr.isUnderflow())
170                         cr.throwException();
171                     cr = cd.flush(cb);
172                     if (!cr.isUnderflow())
173                         cr.throwException();
174                 } catch (CharacterCodingException x) {
175                     // Substitution is always enabled,
176                     // so this shouldn't happen
177                     throw new Error(x);
178                 }
179                 return safeTrim(ca, cb.position(), cs, isTrusted);
180             }
181         }
182     }
183 
decode(String charsetName, byte[] ba, int off, int len)184     static char[] decode(String charsetName, byte[] ba, int off, int len)
185         throws UnsupportedEncodingException
186     {
187         StringDecoder sd = deref(decoder);
188         String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
189         if ((sd == null) || !(csn.equals(sd.requestedCharsetName())
190                               || csn.equals(sd.charsetName()))) {
191             sd = null;
192             try {
193                 Charset cs = lookupCharset(csn);
194                 if (cs != null)
195                     sd = new StringDecoder(cs, csn);
196             } catch (IllegalCharsetNameException x) {}
197             if (sd == null)
198                 throw new UnsupportedEncodingException(csn);
199             set(decoder, sd);
200         }
201         return sd.decode(ba, off, len);
202     }
203 
decode(Charset cs, byte[] ba, int off, int len)204     static char[] decode(Charset cs, byte[] ba, int off, int len) {
205         // (1)We never cache the "external" cs, the only benefit of creating
206         // an additional StringDe/Encoder object to wrap it is to share the
207         // de/encode() method. These SD/E objects are short-lifed, the young-gen
208         // gc should be able to take care of them well. But the best approash
209         // is still not to generate them if not really necessary.
210         // (2)The defensive copy of the input byte/char[] has a big performance
211         // impact, as well as the outgoing result byte/char[]. Need to do the
212         // optimization check of (sm==null && classLoader0==null) for both.
213         // (3)getClass().getClassLoader0() is expensive
214         // (4)There might be a timing gap in isTrusted setting. getClassLoader0()
215         // is only chcked (and then isTrusted gets set) when (SM==null). It is
216         // possible that the SM==null for now but then SM is NOT null later
217         // when safeTrim() is invoked...the "safe" way to do is to redundant
218         // check (... && (isTrusted || SM == null || getClassLoader0())) in trim
219         // but it then can be argued that the SM is null when the opertaion
220         // is started...
221         CharsetDecoder cd = cs.newDecoder();
222         int en = scale(len, cd.maxCharsPerByte());
223         char[] ca = new char[en];
224         if (len == 0)
225             return ca;
226         boolean isTrusted = false;
227         if (System.getSecurityManager() != null) {
228             if (!(isTrusted = (cs.getClass().getClassLoader() == null))) {
229                 ba =  Arrays.copyOfRange(ba, off, off + len);
230                 off = 0;
231             }
232         }
233         cd.onMalformedInput(CodingErrorAction.REPLACE)
234           .onUnmappableCharacter(CodingErrorAction.REPLACE)
235           .reset();
236         if (cd instanceof ArrayDecoder) {
237             int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
238             return safeTrim(ca, clen, cs, isTrusted);
239         } else {
240             ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
241             CharBuffer cb = CharBuffer.wrap(ca);
242             try {
243                 CoderResult cr = cd.decode(bb, cb, true);
244                 if (!cr.isUnderflow())
245                     cr.throwException();
246                 cr = cd.flush(cb);
247                 if (!cr.isUnderflow())
248                     cr.throwException();
249             } catch (CharacterCodingException x) {
250                 // Substitution is always enabled,
251                 // so this shouldn't happen
252                 throw new Error(x);
253             }
254             return safeTrim(ca, cb.position(), cs, isTrusted);
255         }
256     }
257 
decode(byte[] ba, int off, int len)258     static char[] decode(byte[] ba, int off, int len) {
259         String csn = Charset.defaultCharset().name();
260         try {
261             // use charset name decode() variant which provides caching.
262             return decode(csn, ba, off, len);
263         } catch (UnsupportedEncodingException x) {
264             warnUnsupportedCharset(csn);
265         }
266         try {
267             return decode("ISO-8859-1", ba, off, len);
268         } catch (UnsupportedEncodingException x) {
269             // If this code is hit during VM initialization, MessageUtils is
270             // the only way we will be able to get any kind of error message.
271             MessageUtils.err("ISO-8859-1 charset not available: "
272                              + x.toString());
273             // If we can not find ISO-8859-1 (a required encoding) then things
274             // are seriously wrong with the installation.
275             System.exit(1);
276             return null;
277         }
278     }
279 
280     // -- Encoding --
281     private static class StringEncoder {
282         private Charset cs;
283         private CharsetEncoder ce;
284         private final String requestedCharsetName;
285         private final boolean isTrusted;
286 
StringEncoder(Charset cs, String rcn)287         private StringEncoder(Charset cs, String rcn) {
288             this.requestedCharsetName = rcn;
289             this.cs = cs;
290             this.ce = cs.newEncoder()
291                 .onMalformedInput(CodingErrorAction.REPLACE)
292                 .onUnmappableCharacter(CodingErrorAction.REPLACE);
293             this.isTrusted = (cs.getClass().getClassLoader() == null);
294         }
295 
charsetName()296         String charsetName() {
297             if (cs instanceof HistoricallyNamedCharset)
298                 return ((HistoricallyNamedCharset)cs).historicalName();
299             return cs.name();
300         }
301 
requestedCharsetName()302         final String requestedCharsetName() {
303             return requestedCharsetName;
304         }
305 
encode(char[] ca, int off, int len)306         byte[] encode(char[] ca, int off, int len) {
307             int en = scale(len, ce.maxBytesPerChar());
308             byte[] ba = new byte[en];
309             if (len == 0)
310                 return ba;
311             if (ce instanceof ArrayEncoder) {
312                 int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba);
313                 return safeTrim(ba, blen, cs, isTrusted);
314             } else {
315                 ce.reset();
316                 ByteBuffer bb = ByteBuffer.wrap(ba);
317                 CharBuffer cb = CharBuffer.wrap(ca, off, len);
318                 try {
319                     // Android-changed:  Pass read-only buffer, so the encoder can't alter it
320                     CoderResult cr = ce.encode(cb.asReadOnlyBuffer(), bb, true);
321                     if (!cr.isUnderflow())
322                         cr.throwException();
323                     cr = ce.flush(bb);
324                     if (!cr.isUnderflow())
325                         cr.throwException();
326                 } catch (CharacterCodingException x) {
327                     // Substitution is always enabled,
328                     // so this shouldn't happen
329                     throw new Error(x);
330                 }
331                 return safeTrim(ba, bb.position(), cs, isTrusted);
332             }
333         }
334     }
335 
encode(String charsetName, char[] ca, int off, int len)336     static byte[] encode(String charsetName, char[] ca, int off, int len)
337         throws UnsupportedEncodingException
338     {
339         StringEncoder se = deref(encoder);
340         String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
341         if ((se == null) || !(csn.equals(se.requestedCharsetName())
342                               || csn.equals(se.charsetName()))) {
343             se = null;
344             try {
345                 Charset cs = lookupCharset(csn);
346                 if (cs != null)
347                     se = new StringEncoder(cs, csn);
348             } catch (IllegalCharsetNameException x) {}
349             if (se == null)
350                 throw new UnsupportedEncodingException (csn);
351             set(encoder, se);
352         }
353         return se.encode(ca, off, len);
354     }
355 
encode(Charset cs, char[] ca, int off, int len)356     static byte[] encode(Charset cs, char[] ca, int off, int len) {
357         CharsetEncoder ce = cs.newEncoder();
358         int en = scale(len, ce.maxBytesPerChar());
359         byte[] ba = new byte[en];
360         if (len == 0)
361             return ba;
362         boolean isTrusted = false;
363         if (System.getSecurityManager() != null) {
364             if (!(isTrusted = (cs.getClass().getClassLoader() == null))) {
365                 ca =  Arrays.copyOfRange(ca, off, off + len);
366                 off = 0;
367             }
368         }
369         ce.onMalformedInput(CodingErrorAction.REPLACE)
370           .onUnmappableCharacter(CodingErrorAction.REPLACE)
371           .reset();
372         if (ce instanceof ArrayEncoder) {
373             int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba);
374             return safeTrim(ba, blen, cs, isTrusted);
375         } else {
376             ByteBuffer bb = ByteBuffer.wrap(ba);
377             CharBuffer cb = CharBuffer.wrap(ca, off, len);
378             try {
379                 // Android-changed:  Pass read-only buffer, so the encoder can't alter it
380                 CoderResult cr = ce.encode(cb.asReadOnlyBuffer(), bb, true);
381                 if (!cr.isUnderflow())
382                     cr.throwException();
383                 cr = ce.flush(bb);
384                 if (!cr.isUnderflow())
385                     cr.throwException();
386             } catch (CharacterCodingException x) {
387                 throw new Error(x);
388             }
389             return safeTrim(ba, bb.position(), cs, isTrusted);
390         }
391     }
392 
encode(char[] ca, int off, int len)393     static byte[] encode(char[] ca, int off, int len) {
394         String csn = Charset.defaultCharset().name();
395         try {
396             // use charset name encode() variant which provides caching.
397             return encode(csn, ca, off, len);
398         } catch (UnsupportedEncodingException x) {
399             warnUnsupportedCharset(csn);
400         }
401         try {
402             return encode("ISO-8859-1", ca, off, len);
403         } catch (UnsupportedEncodingException x) {
404             // If this code is hit during VM initialization, MessageUtils is
405             // the only way we will be able to get any kind of error message.
406             MessageUtils.err("ISO-8859-1 charset not available: "
407                              + x.toString());
408             // If we can not find ISO-8859-1 (a required encoding) then things
409             // are seriously wrong with the installation.
410             System.exit(1);
411             return null;
412         }
413     }
414 }
415