1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.emailcommon.internet;
18 
19 import android.text.TextUtils;
20 import android.util.Base64;
21 import android.util.Base64DataException;
22 import android.util.Base64InputStream;
23 import android.util.Log;
24 
25 import com.android.emailcommon.mail.Body;
26 import com.android.emailcommon.mail.BodyPart;
27 import com.android.emailcommon.mail.Message;
28 import com.android.emailcommon.mail.MessagingException;
29 import com.android.emailcommon.mail.Multipart;
30 import com.android.emailcommon.mail.Part;
31 
32 import org.apache.commons.io.IOUtils;
33 import org.apache.james.mime4j.codec.EncoderUtil;
34 import org.apache.james.mime4j.decoder.DecoderUtil;
35 import org.apache.james.mime4j.decoder.QuotedPrintableInputStream;
36 import org.apache.james.mime4j.util.CharsetUtil;
37 
38 import java.io.ByteArrayOutputStream;
39 import java.io.IOException;
40 import java.io.InputStream;
41 import java.io.OutputStream;
42 import java.util.ArrayList;
43 import java.util.regex.Matcher;
44 import java.util.regex.Pattern;
45 
46 public class MimeUtility {
47     private static final String LOG_TAG = "Email";
48 
49     public static final String MIME_TYPE_RFC822 = "message/rfc822";
50     private final static Pattern PATTERN_CR_OR_LF = Pattern.compile("\r|\n");
51 
52     /**
53      * Replace sequences of CRLF+WSP with WSP.  Tries to preserve original string
54      * object whenever possible.
55      */
unfold(String s)56     public static String unfold(String s) {
57         if (s == null) {
58             return null;
59         }
60         Matcher patternMatcher = PATTERN_CR_OR_LF.matcher(s);
61         if (patternMatcher.find()) {
62             patternMatcher.reset();
63             s = patternMatcher.replaceAll("");
64         }
65         return s;
66     }
67 
decode(String s)68     public static String decode(String s) {
69         if (s == null) {
70             return null;
71         }
72         return DecoderUtil.decodeEncodedWords(s);
73     }
74 
unfoldAndDecode(String s)75     public static String unfoldAndDecode(String s) {
76         return decode(unfold(s));
77     }
78 
79     // TODO implement proper foldAndEncode
80     // NOTE: When this really works, we *must* remove all calls to foldAndEncode2() to prevent
81     // duplication of encoding.
foldAndEncode(String s)82     public static String foldAndEncode(String s) {
83         return s;
84     }
85 
86     /**
87      * INTERIM version of foldAndEncode that will be used only by Subject: headers.
88      * This is safer than implementing foldAndEncode() (see above) and risking unknown damage
89      * to other headers.
90      *
91      * TODO: Copy this code to foldAndEncode(), get rid of this function, confirm all working OK.
92      *
93      * @param s original string to encode and fold
94      * @param usedCharacters number of characters already used up by header name
95 
96      * @return the String ready to be transmitted
97      */
foldAndEncode2(String s, int usedCharacters)98     public static String foldAndEncode2(String s, int usedCharacters) {
99         // james.mime4j.codec.EncoderUtil.java
100         // encode:  encodeIfNecessary(text, usage, numUsedInHeaderName)
101         // Usage.TEXT_TOKENlooks like the right thing for subjects
102         // use WORD_ENTITY for address/names
103 
104         String encoded = EncoderUtil.encodeIfNecessary(s, EncoderUtil.Usage.TEXT_TOKEN,
105                 usedCharacters);
106 
107         return fold(encoded, usedCharacters);
108     }
109 
110     /**
111      * INTERIM:  From newer version of org.apache.james (but we don't want to import
112      * the entire MimeUtil class).
113      *
114      * Splits the specified string into a multiple-line representation with
115      * lines no longer than 76 characters (because the line might contain
116      * encoded words; see <a href='http://www.faqs.org/rfcs/rfc2047.html'>RFC
117      * 2047</a> section 2). If the string contains non-whitespace sequences
118      * longer than 76 characters a line break is inserted at the whitespace
119      * character following the sequence resulting in a line longer than 76
120      * characters.
121      *
122      * @param s
123      *            string to split.
124      * @param usedCharacters
125      *            number of characters already used up. Usually the number of
126      *            characters for header field name plus colon and one space.
127      * @return a multiple-line representation of the given string.
128      */
fold(String s, int usedCharacters)129     public static String fold(String s, int usedCharacters) {
130         final int maxCharacters = 76;
131 
132         final int length = s.length();
133         if (usedCharacters + length <= maxCharacters)
134             return s;
135 
136         StringBuilder sb = new StringBuilder();
137 
138         int lastLineBreak = -usedCharacters;
139         int wspIdx = indexOfWsp(s, 0);
140         while (true) {
141             if (wspIdx == length) {
142                 sb.append(s.substring(Math.max(0, lastLineBreak)));
143                 return sb.toString();
144             }
145 
146             int nextWspIdx = indexOfWsp(s, wspIdx + 1);
147 
148             if (nextWspIdx - lastLineBreak > maxCharacters) {
149                 sb.append(s.substring(Math.max(0, lastLineBreak), wspIdx));
150                 sb.append("\r\n");
151                 lastLineBreak = wspIdx;
152             }
153 
154             wspIdx = nextWspIdx;
155         }
156     }
157 
158     /**
159      * INTERIM:  From newer version of org.apache.james (but we don't want to import
160      * the entire MimeUtil class).
161      *
162      * Search for whitespace.
163      */
indexOfWsp(String s, int fromIndex)164     private static int indexOfWsp(String s, int fromIndex) {
165         final int len = s.length();
166         for (int index = fromIndex; index < len; index++) {
167             char c = s.charAt(index);
168             if (c == ' ' || c == '\t')
169                 return index;
170         }
171         return len;
172     }
173 
174     /**
175      * Returns the named parameter of a header field. If name is null the first
176      * parameter is returned, or if there are no additional parameters in the
177      * field the entire field is returned. Otherwise the named parameter is
178      * searched for in a case insensitive fashion and returned. If the parameter
179      * cannot be found the method returns null.
180      *
181      * TODO: quite inefficient with the inner trimming & splitting.
182      * TODO: Also has a latent bug: uses "startsWith" to match the name, which can false-positive.
183      * TODO: The doc says that for a null name you get the first param, but you get the header.
184      *    Should probably just fix the doc, but if other code assumes that behavior, fix the code.
185      * TODO: Need to decode %-escaped strings, as in: filename="ab%22d".
186      *       ('+' -> ' ' conversion too? check RFC)
187      *
188      * @param header
189      * @param name
190      * @return the entire header (if name=null), the found parameter, or null
191      */
getHeaderParameter(String header, String name)192     public static String getHeaderParameter(String header, String name) {
193         if (header == null) {
194             return null;
195         }
196         String[] parts = unfold(header).split(";");
197         if (name == null) {
198             return parts[0].trim();
199         }
200         String lowerCaseName = name.toLowerCase();
201         for (String part : parts) {
202             if (part.trim().toLowerCase().startsWith(lowerCaseName)) {
203                 String[] parameterParts = part.split("=", 2);
204                 if (parameterParts.length < 2) {
205                     return null;
206                 }
207                 String parameter = parameterParts[1].trim();
208                 if (parameter.startsWith("\"") && parameter.endsWith("\"")) {
209                     return parameter.substring(1, parameter.length() - 1);
210                 } else {
211                     return parameter;
212                 }
213             }
214         }
215         return null;
216     }
217 
218     /**
219      * Reads the Part's body and returns a String based on any charset conversion that needed
220      * to be done.
221      * @param part The part containing a body
222      * @return a String containing the converted text in the body, or null if there was no text
223      * or an error during conversion.
224      */
getTextFromPart(Part part)225     public static String getTextFromPart(Part part) {
226         try {
227             if (part != null && part.getBody() != null) {
228                 InputStream in = part.getBody().getInputStream();
229                 String mimeType = part.getMimeType();
230                 if (mimeType != null && MimeUtility.mimeTypeMatches(mimeType, "text/*")) {
231                     /*
232                      * Now we read the part into a buffer for further processing. Because
233                      * the stream is now wrapped we'll remove any transfer encoding at this point.
234                      */
235                     ByteArrayOutputStream out = new ByteArrayOutputStream();
236                     IOUtils.copy(in, out);
237                     in.close();
238                     in = null;      // we want all of our memory back, and close might not release
239 
240                     /*
241                      * We've got a text part, so let's see if it needs to be processed further.
242                      */
243                     String charset = getHeaderParameter(part.getContentType(), "charset");
244                     if (charset != null) {
245                         /*
246                          * See if there is conversion from the MIME charset to the Java one.
247                          */
248                         charset = CharsetUtil.toJavaCharset(charset);
249                     }
250                     /*
251                      * No encoding, so use us-ascii, which is the standard.
252                      */
253                     if (charset == null) {
254                         charset = "ASCII";
255                     }
256                     /*
257                      * Convert and return as new String
258                      */
259                     String result = out.toString(charset);
260                     out.close();
261                     return result;
262                 }
263             }
264 
265         }
266         catch (OutOfMemoryError oom) {
267             /*
268              * If we are not able to process the body there's nothing we can do about it. Return
269              * null and let the upper layers handle the missing content.
270              */
271             Log.e(LOG_TAG, "Unable to getTextFromPart " + oom.toString());
272         }
273         catch (Exception e) {
274             /*
275              * If we are not able to process the body there's nothing we can do about it. Return
276              * null and let the upper layers handle the missing content.
277              */
278             Log.e(LOG_TAG, "Unable to getTextFromPart " + e.toString());
279         }
280         return null;
281     }
282 
283     /**
284      * Returns true if the given mimeType matches the matchAgainst specification.  The comparison
285      * ignores case and the matchAgainst string may include "*" for a wildcard (e.g. "image/*").
286      *
287      * @param mimeType A MIME type to check.
288      * @param matchAgainst A MIME type to check against. May include wildcards.
289      * @return true if the mimeType matches
290      */
mimeTypeMatches(String mimeType, String matchAgainst)291     public static boolean mimeTypeMatches(String mimeType, String matchAgainst) {
292         Pattern p = Pattern.compile(matchAgainst.replaceAll("\\*", "\\.\\*"),
293                 Pattern.CASE_INSENSITIVE);
294         return p.matcher(mimeType).matches();
295     }
296 
297     /**
298      * Returns true if the given mimeType matches any of the matchAgainst specifications.  The
299      * comparison ignores case and the matchAgainst strings may include "*" for a wildcard
300      * (e.g. "image/*").
301      *
302      * @param mimeType A MIME type to check.
303      * @param matchAgainst An array of MIME types to check against. May include wildcards.
304      * @return true if the mimeType matches any of the matchAgainst strings
305      */
mimeTypeMatches(String mimeType, String[] matchAgainst)306     public static boolean mimeTypeMatches(String mimeType, String[] matchAgainst) {
307         for (String matchType : matchAgainst) {
308             if (mimeTypeMatches(mimeType, matchType)) {
309                 return true;
310             }
311         }
312         return false;
313     }
314 
315     /**
316      * Given an input stream and a transfer encoding, return a wrapped input stream for that
317      * encoding (or the original if none is required)
318      * @param in the input stream
319      * @param contentTransferEncoding the content transfer encoding
320      * @return a properly wrapped stream
321      */
getInputStreamForContentTransferEncoding(InputStream in, String contentTransferEncoding)322     public static InputStream getInputStreamForContentTransferEncoding(InputStream in,
323             String contentTransferEncoding) {
324         if (contentTransferEncoding != null) {
325             contentTransferEncoding =
326                 MimeUtility.getHeaderParameter(contentTransferEncoding, null);
327             if ("quoted-printable".equalsIgnoreCase(contentTransferEncoding)) {
328                 in = new QuotedPrintableInputStream(in);
329             }
330             else if ("base64".equalsIgnoreCase(contentTransferEncoding)) {
331                 in = new Base64InputStream(in, Base64.DEFAULT);
332             }
333         }
334         return in;
335     }
336 
337     /**
338      * Removes any content transfer encoding from the stream and returns a Body.
339      */
decodeBody(InputStream in, String contentTransferEncoding)340     public static Body decodeBody(InputStream in, String contentTransferEncoding)
341             throws IOException {
342         /*
343          * We'll remove any transfer encoding by wrapping the stream.
344          */
345         in = getInputStreamForContentTransferEncoding(in, contentTransferEncoding);
346         BinaryTempFileBody tempBody = new BinaryTempFileBody();
347         OutputStream out = tempBody.getOutputStream();
348         try {
349             IOUtils.copy(in, out);
350         } catch (Base64DataException bde) {
351             // TODO Need to fix this somehow
352             //String warning = "\n\n" + Email.getMessageDecodeErrorString();
353             //out.write(warning.getBytes());
354         } finally {
355             out.close();
356         }
357         return tempBody;
358     }
359 
360     /**
361      * Recursively scan a Part (usually a Message) and sort out which of its children will be
362      * "viewable" and which will be attachments.
363      *
364      * @param part The part to be broken down
365      * @param viewables This arraylist will be populated with all parts that appear to be
366      * the "message" (e.g. text/plain & text/html)
367      * @param attachments This arraylist will be populated with all parts that appear to be
368      * attachments (including inlines)
369      * @throws MessagingException
370      */
collectParts(Part part, ArrayList<Part> viewables, ArrayList<Part> attachments)371     public static void collectParts(Part part, ArrayList<Part> viewables,
372             ArrayList<Part> attachments) throws MessagingException {
373         String disposition = part.getDisposition();
374         String dispositionType = MimeUtility.getHeaderParameter(disposition, null);
375         // If a disposition is not specified, default to "inline"
376         boolean inline =
377                 TextUtils.isEmpty(dispositionType) || "inline".equalsIgnoreCase(dispositionType);
378         // The lower-case mime type
379         String mimeType = part.getMimeType().toLowerCase();
380 
381         if (part.getBody() instanceof Multipart) {
382             // If the part is Multipart but not alternative it's either mixed or
383             // something we don't know about, which means we treat it as mixed
384             // per the spec. We just process its pieces recursively.
385             MimeMultipart mp = (MimeMultipart)part.getBody();
386             boolean foundHtml = false;
387             if (mp.getSubTypeForTest().equals("alternative")) {
388                 for (int i = 0; i < mp.getCount(); i++) {
389                     if (mp.getBodyPart(i).isMimeType("text/html")) {
390                         foundHtml = true;
391                         break;
392                     }
393                 }
394             }
395             for (int i = 0; i < mp.getCount(); i++) {
396                 // See if we have text and html
397                 BodyPart bp = mp.getBodyPart(i);
398                 // If there's html, don't bother loading text
399                 if (foundHtml && bp.isMimeType("text/plain")) {
400                     continue;
401                 }
402                 collectParts(bp, viewables, attachments);
403             }
404         } else if (part.getBody() instanceof Message) {
405             // If the part is an embedded message we just continue to process
406             // it, pulling any viewables or attachments into the running list.
407             Message message = (Message)part.getBody();
408             collectParts(message, viewables, attachments);
409         } else if (inline && (mimeType.startsWith("text") || (mimeType.startsWith("image")))) {
410             // We'll treat text and images as viewables
411             viewables.add(part);
412         } else {
413             // Everything else is an attachment.
414             attachments.add(part);
415         }
416     }
417 }
418