1 /****************************************************************
2  * Licensed to the Apache Software Foundation (ASF) under one   *
3  * or more contributor license agreements.  See the NOTICE file *
4  * distributed with this work for additional information        *
5  * regarding copyright ownership.  The ASF licenses this file   *
6  * to you under the Apache License, Version 2.0 (the            *
7  * "License"); you may not use this file except in compliance   *
8  * with the License.  You may obtain a copy of the License at   *
9  *                                                              *
10  *   http://www.apache.org/licenses/LICENSE-2.0                 *
11  *                                                              *
12  * Unless required by applicable law or agreed to in writing,   *
13  * software distributed under the License is distributed on an  *
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
15  * KIND, either express or implied.  See the License for the    *
16  * specific language governing permissions and limitations      *
17  * under the License.                                           *
18  ****************************************************************/
19 
20 package org.apache.james.mime4j.decoder;
21 
22 //BEGIN android-changed: Stubbing out logging
23 import org.apache.james.mime4j.Log;
24 import org.apache.james.mime4j.LogFactory;
25 //END android-changed
26 import org.apache.james.mime4j.util.CharsetUtil;
27 
28 import java.io.ByteArrayInputStream;
29 import java.io.ByteArrayOutputStream;
30 import java.io.IOException;
31 import java.io.UnsupportedEncodingException;
32 
33 /**
34  * Static methods for decoding strings, byte arrays and encoded words.
35  *
36  *
37  * @version $Id: DecoderUtil.java,v 1.3 2005/02/07 15:33:59 ntherning Exp $
38  */
39 public class DecoderUtil {
40     private static Log log = LogFactory.getLog(DecoderUtil.class);
41 
42     /**
43      * Decodes a string containing quoted-printable encoded data.
44      *
45      * @param s the string to decode.
46      * @return the decoded bytes.
47      */
decodeBaseQuotedPrintable(String s)48     public static byte[] decodeBaseQuotedPrintable(String s) {
49         ByteArrayOutputStream baos = new ByteArrayOutputStream();
50 
51         try {
52             byte[] bytes = s.getBytes("US-ASCII");
53 
54             QuotedPrintableInputStream is = new QuotedPrintableInputStream(
55                                                new ByteArrayInputStream(bytes));
56 
57             int b = 0;
58             while ((b = is.read()) != -1) {
59                 baos.write(b);
60             }
61         } catch (IOException e) {
62             /*
63              * This should never happen!
64              */
65             log.error(e);
66         }
67 
68         return baos.toByteArray();
69     }
70 
71     /**
72      * Decodes a string containing base64 encoded data.
73      *
74      * @param s the string to decode.
75      * @return the decoded bytes.
76      */
decodeBase64(String s)77     public static byte[] decodeBase64(String s) {
78         ByteArrayOutputStream baos = new ByteArrayOutputStream();
79 
80         try {
81             byte[] bytes = s.getBytes("US-ASCII");
82 
83             Base64InputStream is = new Base64InputStream(
84                                         new ByteArrayInputStream(bytes));
85 
86             int b = 0;
87             while ((b = is.read()) != -1) {
88                 baos.write(b);
89             }
90         } catch (IOException e) {
91             /*
92              * This should never happen!
93              */
94             log.error(e);
95         }
96 
97         return baos.toByteArray();
98     }
99 
100     /**
101      * Decodes an encoded word encoded with the 'B' encoding (described in
102      * RFC 2047) found in a header field body.
103      *
104      * @param encodedWord the encoded word to decode.
105      * @param charset the Java charset to use.
106      * @return the decoded string.
107      * @throws UnsupportedEncodingException if the given Java charset isn't
108      *         supported.
109      */
decodeB(String encodedWord, String charset)110     public static String decodeB(String encodedWord, String charset)
111             throws UnsupportedEncodingException {
112 
113         return new String(decodeBase64(encodedWord), charset);
114     }
115 
116     /**
117      * Decodes an encoded word encoded with the 'Q' encoding (described in
118      * RFC 2047) found in a header field body.
119      *
120      * @param encodedWord the encoded word to decode.
121      * @param charset the Java charset to use.
122      * @return the decoded string.
123      * @throws UnsupportedEncodingException if the given Java charset isn't
124      *         supported.
125      */
decodeQ(String encodedWord, String charset)126     public static String decodeQ(String encodedWord, String charset)
127             throws UnsupportedEncodingException {
128 
129         /*
130          * Replace _ with =20
131          */
132         StringBuffer sb = new StringBuffer();
133         for (int i = 0; i < encodedWord.length(); i++) {
134             char c = encodedWord.charAt(i);
135             if (c == '_') {
136                 sb.append("=20");
137             } else {
138                 sb.append(c);
139             }
140         }
141 
142         return new String(decodeBaseQuotedPrintable(sb.toString()), charset);
143     }
144 
145     /**
146      * Decodes a string containing encoded words as defined by RFC 2047.
147      * Encoded words in have the form
148      * =?charset?enc?Encoded word?= where enc is either 'Q' or 'q' for
149      * quoted-printable and 'B' or 'b' for Base64.
150      *
151      * ANDROID:  COPIED FROM A NEWER VERSION OF MIME4J
152      *
153      * @param body the string to decode.
154      * @return the decoded string.
155      */
decodeEncodedWords(String body)156     public static String decodeEncodedWords(String body) {
157 
158         // ANDROID:  Most strings will not include "=?" so a quick test can prevent unneeded
159         // object creation.  This could also be handled via lazy creation of the StringBuilder.
160         if (body.indexOf("=?") == -1) {
161             return body;
162         }
163 
164         int previousEnd = 0;
165         boolean previousWasEncoded = false;
166 
167         StringBuilder sb = new StringBuilder();
168 
169         while (true) {
170             int begin = body.indexOf("=?", previousEnd);
171 
172             // ANDROID:  The mime4j original version has an error here.  It gets confused if
173             // the encoded string begins with an '=' (just after "?Q?").  This patch seeks forward
174             // to find the two '?' in the "header", before looking for the final "?=".
175             if (begin == -1) {
176                 break;
177             }
178             int qm1 = body.indexOf('?', begin + 2);
179             if (qm1 == -1) {
180                 break;
181             }
182             int qm2 = body.indexOf('?', qm1 + 1);
183             if (qm2 == -1) {
184                 break;
185             }
186             int end = body.indexOf("?=", qm2 + 1);
187             if (end == -1) {
188                 break;
189             }
190             end += 2;
191 
192             String sep = body.substring(previousEnd, begin);
193 
194             String decoded = decodeEncodedWord(body, begin, end);
195             if (decoded == null) {
196                 sb.append(sep);
197                 sb.append(body.substring(begin, end));
198             } else {
199                 if (!previousWasEncoded || !CharsetUtil.isWhitespace(sep)) {
200                     sb.append(sep);
201                 }
202                 sb.append(decoded);
203             }
204 
205             previousEnd = end;
206             previousWasEncoded = decoded != null;
207         }
208 
209         if (previousEnd == 0)
210             return body;
211 
212         sb.append(body.substring(previousEnd));
213         return sb.toString();
214     }
215 
216     // return null on error. Begin is index of '=?' in body.
decodeEncodedWord(String body, int begin, int end)217     public static String decodeEncodedWord(String body, int begin, int end) {
218         // Skip the '?=' chars in body and scan forward from there for next '?'
219         int qm1 = body.indexOf('?', begin + 2);
220         if (qm1 == -1 || qm1 == end - 2)
221             return null;
222 
223         int qm2 = body.indexOf('?', qm1 + 1);
224         if (qm2 == -1 || qm2 == end - 2)
225             return null;
226 
227         String mimeCharset = body.substring(begin + 2, qm1);
228         String encoding = body.substring(qm1 + 1, qm2);
229         String encodedText = body.substring(qm2 + 1, end - 2);
230 
231         String charset = CharsetUtil.toJavaCharset(mimeCharset);
232         if (charset == null) {
233             if (log.isWarnEnabled()) {
234                 log.warn("MIME charset '" + mimeCharset + "' in encoded word '"
235                         + body.substring(begin, end) + "' doesn't have a "
236                         + "corresponding Java charset");
237             }
238             return null;
239         } else if (!CharsetUtil.isDecodingSupported(charset)) {
240             if (log.isWarnEnabled()) {
241                 log.warn("Current JDK doesn't support decoding of charset '"
242                         + charset + "' (MIME charset '" + mimeCharset
243                         + "' in encoded word '" + body.substring(begin, end)
244                         + "')");
245             }
246             return null;
247         }
248 
249         if (encodedText.length() == 0) {
250             if (log.isWarnEnabled()) {
251                 log.warn("Missing encoded text in encoded word: '"
252                         + body.substring(begin, end) + "'");
253             }
254             return null;
255         }
256 
257         try {
258             if (encoding.equalsIgnoreCase("Q")) {
259                 return DecoderUtil.decodeQ(encodedText, charset);
260             } else if (encoding.equalsIgnoreCase("B")) {
261                 return DecoderUtil.decodeB(encodedText, charset);
262             } else {
263                 if (log.isWarnEnabled()) {
264                     log.warn("Warning: Unknown encoding in encoded word '"
265                             + body.substring(begin, end) + "'");
266                 }
267                 return null;
268             }
269         } catch (UnsupportedEncodingException e) {
270             // should not happen because of isDecodingSupported check above
271             if (log.isWarnEnabled()) {
272                 log.warn("Unsupported encoding in encoded word '"
273                         + body.substring(begin, end) + "'", e);
274             }
275             return null;
276         } catch (RuntimeException e) {
277             if (log.isWarnEnabled()) {
278                 log.warn("Could not decode encoded word '"
279                         + body.substring(begin, end) + "'", e);
280             }
281             return null;
282         }
283     }
284 }
285