1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 package com.android.voicemail.impl.mail.internet;
17 
18 import android.text.TextUtils;
19 import android.util.Base64;
20 import android.util.Base64DataException;
21 import android.util.Base64InputStream;
22 import com.android.voicemail.impl.VvmLog;
23 import com.android.voicemail.impl.mail.Body;
24 import com.android.voicemail.impl.mail.BodyPart;
25 import com.android.voicemail.impl.mail.Message;
26 import com.android.voicemail.impl.mail.MessagingException;
27 import com.android.voicemail.impl.mail.Multipart;
28 import com.android.voicemail.impl.mail.Part;
29 import java.io.ByteArrayOutputStream;
30 import java.io.IOException;
31 import java.io.InputStream;
32 import java.io.OutputStream;
33 import java.util.ArrayList;
34 import java.util.regex.Matcher;
35 import java.util.regex.Pattern;
36 import org.apache.commons.io.IOUtils;
37 import org.apache.james.mime4j.codec.DecodeMonitor;
38 import org.apache.james.mime4j.codec.DecoderUtil;
39 import org.apache.james.mime4j.codec.EncoderUtil;
40 import org.apache.james.mime4j.codec.QuotedPrintableInputStream;
41 import org.apache.james.mime4j.util.CharsetUtil;
42 
43 public class MimeUtility {
44   private static final String LOG_TAG = "Email";
45 
46   public static final String MIME_TYPE_RFC822 = "message/rfc822";
47   private static final Pattern PATTERN_CR_OR_LF = Pattern.compile("\r|\n");
48 
49   /**
50    * Replace sequences of CRLF+WSP with WSP. Tries to preserve original string object whenever
51    * possible.
52    */
unfold(String s)53   public static String unfold(String s) {
54     if (s == null) {
55       return null;
56     }
57     Matcher patternMatcher = PATTERN_CR_OR_LF.matcher(s);
58     if (patternMatcher.find()) {
59       patternMatcher.reset();
60       s = patternMatcher.replaceAll("");
61     }
62     return s;
63   }
64 
decode(String s)65   public static String decode(String s) {
66     if (s == null) {
67       return null;
68     }
69     return DecoderUtil.decodeEncodedWords(s, DecodeMonitor.STRICT);
70   }
71 
unfoldAndDecode(String s)72   public static String unfoldAndDecode(String s) {
73     return decode(unfold(s));
74   }
75 
76   // TODO implement proper foldAndEncode
77   // NOTE: When this really works, we *must* remove all calls to foldAndEncode2() to prevent
78   // duplication of encoding.
foldAndEncode(String s)79   public static String foldAndEncode(String s) {
80     return s;
81   }
82 
83   /**
84    * INTERIM version of foldAndEncode that will be used only by Subject: headers. This is safer than
85    * implementing foldAndEncode() (see above) and risking unknown damage to other headers.
86    *
87    * <p>TODO: Copy this code to foldAndEncode(), get rid of this function, confirm all working OK.
88    *
89    * @param s original string to encode and fold
90    * @param usedCharacters number of characters already used up by header name
91    * @return the String ready to be transmitted
92    */
foldAndEncode2(String s, int usedCharacters)93   public static String foldAndEncode2(String s, int usedCharacters) {
94     // james.mime4j.codec.EncoderUtil.java
95     // encode:  encodeIfNecessary(text, usage, numUsedInHeaderName)
96     // Usage.TEXT_TOKENlooks like the right thing for subjects
97     // use WORD_ENTITY for address/names
98 
99     String encoded = EncoderUtil.encodeIfNecessary(s, EncoderUtil.Usage.TEXT_TOKEN, usedCharacters);
100 
101     return fold(encoded, usedCharacters);
102   }
103 
104   /**
105    * INTERIM: From newer version of org.apache.james (but we don't want to import the entire
106    * MimeUtil class).
107    *
108    * <p>Splits the specified string into a multiple-line representation with lines no longer than 76
109    * characters (because the line might contain encoded words; see <a
110    * href='http://www.faqs.org/rfcs/rfc2047.html'>RFC 2047</a> section 2). If the string contains
111    * non-whitespace sequences longer than 76 characters a line break is inserted at the whitespace
112    * character following the sequence resulting in a line longer than 76 characters.
113    *
114    * @param s string to split.
115    * @param usedCharacters number of characters already used up. Usually the number of characters
116    *     for header field name plus colon and one space.
117    * @return a multiple-line representation of the given string.
118    */
fold(String s, int usedCharacters)119   public static String fold(String s, int usedCharacters) {
120     final int maxCharacters = 76;
121 
122     final int length = s.length();
123     if (usedCharacters + length <= maxCharacters) {
124       return s;
125     }
126 
127     StringBuilder sb = new StringBuilder();
128 
129     int lastLineBreak = -usedCharacters;
130     int wspIdx = indexOfWsp(s, 0);
131     while (true) {
132       if (wspIdx == length) {
133         sb.append(s.substring(Math.max(0, lastLineBreak)));
134         return sb.toString();
135       }
136 
137       int nextWspIdx = indexOfWsp(s, wspIdx + 1);
138 
139       if (nextWspIdx - lastLineBreak > maxCharacters) {
140         sb.append(s.substring(Math.max(0, lastLineBreak), wspIdx));
141         sb.append("\r\n");
142         lastLineBreak = wspIdx;
143       }
144 
145       wspIdx = nextWspIdx;
146     }
147   }
148 
149   /**
150    * INTERIM: From newer version of org.apache.james (but we don't want to import the entire
151    * MimeUtil class).
152    *
153    * <p>Search for whitespace.
154    */
indexOfWsp(String s, int fromIndex)155   private static int indexOfWsp(String s, int fromIndex) {
156     final int len = s.length();
157     for (int index = fromIndex; index < len; index++) {
158       char c = s.charAt(index);
159       if (c == ' ' || c == '\t') {
160         return index;
161       }
162     }
163     return len;
164   }
165 
166   /**
167    * Returns the named parameter of a header field. If name is null the first parameter is returned,
168    * or if there are no additional parameters in the field the entire field is returned. Otherwise
169    * the named parameter is searched for in a case insensitive fashion and returned. If the
170    * parameter cannot be found the method returns null.
171    *
172    * <p>TODO: quite inefficient with the inner trimming & splitting. TODO: Also has a latent bug:
173    * uses "startsWith" to match the name, which can false-positive. TODO: The doc says that for a
174    * null name you get the first param, but you get the header. Should probably just fix the doc,
175    * but if other code assumes that behavior, fix the code. TODO: Need to decode %-escaped strings,
176    * as in: filename="ab%22d". ('+' -> ' ' conversion too? check RFC)
177    *
178    * @param header
179    * @param name
180    * @return the entire header (if name=null), the found parameter, or null
181    */
getHeaderParameter(String header, String name)182   public static String getHeaderParameter(String header, String name) {
183     if (header == null) {
184       return null;
185     }
186     String[] parts = unfold(header).split(";");
187     if (name == null) {
188       return parts[0].trim();
189     }
190     String lowerCaseName = name.toLowerCase();
191     for (String part : parts) {
192       if (part.trim().toLowerCase().startsWith(lowerCaseName)) {
193         String[] parameterParts = part.split("=", 2);
194         if (parameterParts.length < 2) {
195           return null;
196         }
197         String parameter = parameterParts[1].trim();
198         if (parameter.startsWith("\"") && parameter.endsWith("\"")) {
199           return parameter.substring(1, parameter.length() - 1);
200         } else {
201           return parameter;
202         }
203       }
204     }
205     return null;
206   }
207 
208   /**
209    * Reads the Part's body and returns a String based on any charset conversion that needed to be
210    * done.
211    *
212    * @param part The part containing a body
213    * @return a String containing the converted text in the body, or null if there was no text or an
214    *     error during conversion.
215    */
getTextFromPart(Part part)216   public static String getTextFromPart(Part part) {
217     try {
218       if (part != null && part.getBody() != null) {
219         InputStream in = part.getBody().getInputStream();
220         String mimeType = part.getMimeType();
221         if (mimeType != null && MimeUtility.mimeTypeMatches(mimeType, "text/*")) {
222           /*
223            * Now we read the part into a buffer for further processing. Because
224            * the stream is now wrapped we'll remove any transfer encoding at this point.
225            */
226           ByteArrayOutputStream out = new ByteArrayOutputStream();
227           IOUtils.copy(in, out);
228           in.close();
229           in = null; // we want all of our memory back, and close might not release
230 
231           /*
232            * We've got a text part, so let's see if it needs to be processed further.
233            */
234           String charset = getHeaderParameter(part.getContentType(), "charset");
235           if (charset != null) {
236             /*
237              * See if there is conversion from the MIME charset to the Java one.
238              */
239             charset = CharsetUtil.lookup(charset).name();
240           }
241           /*
242            * No encoding, so use us-ascii, which is the standard.
243            */
244           if (charset == null) {
245             charset = "ASCII";
246           }
247           /*
248            * Convert and return as new String
249            */
250           String result = out.toString(charset);
251           out.close();
252           return result;
253         }
254       }
255 
256     } catch (OutOfMemoryError oom) {
257       /*
258        * If we are not able to process the body there's nothing we can do about it. Return
259        * null and let the upper layers handle the missing content.
260        */
261       VvmLog.e(LOG_TAG, "Unable to getTextFromPart " + oom.toString());
262     } catch (Exception e) {
263       /*
264        * If we are not able to process the body there's nothing we can do about it. Return
265        * null and let the upper layers handle the missing content.
266        */
267       VvmLog.e(LOG_TAG, "Unable to getTextFromPart " + e.toString());
268     }
269     return null;
270   }
271 
272   /**
273    * Returns true if the given mimeType matches the matchAgainst specification. The comparison
274    * ignores case and the matchAgainst string may include "*" for a wildcard (e.g. "image/*").
275    *
276    * @param mimeType A MIME type to check.
277    * @param matchAgainst A MIME type to check against. May include wildcards.
278    * @return true if the mimeType matches
279    */
mimeTypeMatches(String mimeType, String matchAgainst)280   public static boolean mimeTypeMatches(String mimeType, String matchAgainst) {
281     Pattern p = Pattern.compile(matchAgainst.replaceAll("\\*", "\\.\\*"), Pattern.CASE_INSENSITIVE);
282     return p.matcher(mimeType).matches();
283   }
284 
285   /**
286    * Returns true if the given mimeType matches any of the matchAgainst specifications. The
287    * comparison ignores case and the matchAgainst strings may include "*" for a wildcard (e.g.
288    * "image/*").
289    *
290    * @param mimeType A MIME type to check.
291    * @param matchAgainst An array of MIME types to check against. May include wildcards.
292    * @return true if the mimeType matches any of the matchAgainst strings
293    */
mimeTypeMatches(String mimeType, String[] matchAgainst)294   public static boolean mimeTypeMatches(String mimeType, String[] matchAgainst) {
295     for (String matchType : matchAgainst) {
296       if (mimeTypeMatches(mimeType, matchType)) {
297         return true;
298       }
299     }
300     return false;
301   }
302 
303   /**
304    * Given an input stream and a transfer encoding, return a wrapped input stream for that encoding
305    * (or the original if none is required)
306    *
307    * @param in the input stream
308    * @param contentTransferEncoding the content transfer encoding
309    * @return a properly wrapped stream
310    */
getInputStreamForContentTransferEncoding( InputStream in, String contentTransferEncoding)311   public static InputStream getInputStreamForContentTransferEncoding(
312       InputStream in, String contentTransferEncoding) {
313     if (contentTransferEncoding != null) {
314       contentTransferEncoding = MimeUtility.getHeaderParameter(contentTransferEncoding, null);
315       if ("quoted-printable".equalsIgnoreCase(contentTransferEncoding)) {
316         in = new QuotedPrintableInputStream(in);
317       } else if ("base64".equalsIgnoreCase(contentTransferEncoding)) {
318         in = new Base64InputStream(in, Base64.DEFAULT);
319       }
320     }
321     return in;
322   }
323 
324   /** Removes any content transfer encoding from the stream and returns a Body. */
decodeBody(InputStream in, String contentTransferEncoding)325   public static Body decodeBody(InputStream in, String contentTransferEncoding) throws IOException {
326     /*
327      * We'll remove any transfer encoding by wrapping the stream.
328      */
329     in = getInputStreamForContentTransferEncoding(in, contentTransferEncoding);
330     BinaryTempFileBody tempBody = new BinaryTempFileBody();
331     OutputStream out = tempBody.getOutputStream();
332     try {
333       IOUtils.copy(in, out);
334     } catch (Base64DataException bde) {
335       // TODO Need to fix this somehow
336       //String warning = "\n\n" + Email.getMessageDecodeErrorString();
337       //out.write(warning.getBytes());
338     } finally {
339       out.close();
340     }
341     return tempBody;
342   }
343 
344   /**
345    * Recursively scan a Part (usually a Message) and sort out which of its children will be
346    * "viewable" and which will be attachments.
347    *
348    * @param part The part to be broken down
349    * @param viewables This arraylist will be populated with all parts that appear to be the
350    *     "message" (e.g. text/plain & text/html)
351    * @param attachments This arraylist will be populated with all parts that appear to be
352    *     attachments (including inlines)
353    * @throws MessagingException
354    */
collectParts(Part part, ArrayList<Part> viewables, ArrayList<Part> attachments)355   public static void collectParts(Part part, ArrayList<Part> viewables, ArrayList<Part> attachments)
356       throws MessagingException {
357     String disposition = part.getDisposition();
358     String dispositionType = MimeUtility.getHeaderParameter(disposition, null);
359     // If a disposition is not specified, default to "inline"
360     boolean inline =
361         TextUtils.isEmpty(dispositionType) || "inline".equalsIgnoreCase(dispositionType);
362     // The lower-case mime type
363     String mimeType = part.getMimeType().toLowerCase();
364 
365     if (part.getBody() instanceof Multipart) {
366       // If the part is Multipart but not alternative it's either mixed or
367       // something we don't know about, which means we treat it as mixed
368       // per the spec. We just process its pieces recursively.
369       MimeMultipart mp = (MimeMultipart) part.getBody();
370       boolean foundHtml = false;
371       if (mp.getSubTypeForTest().equals("alternative")) {
372         for (int i = 0; i < mp.getCount(); i++) {
373           if (mp.getBodyPart(i).isMimeType("text/html")) {
374             foundHtml = true;
375             break;
376           }
377         }
378       }
379       for (int i = 0; i < mp.getCount(); i++) {
380         // See if we have text and html
381         BodyPart bp = mp.getBodyPart(i);
382         // If there's html, don't bother loading text
383         if (foundHtml && bp.isMimeType("text/plain")) {
384           continue;
385         }
386         collectParts(bp, viewables, attachments);
387       }
388     } else if (part.getBody() instanceof Message) {
389       // If the part is an embedded message we just continue to process
390       // it, pulling any viewables or attachments into the running list.
391       Message message = (Message) part.getBody();
392       collectParts(message, viewables, attachments);
393     } else if (inline && (mimeType.startsWith("text") || (mimeType.startsWith("image")))) {
394       // We'll treat text and images as viewables
395       viewables.add(part);
396     } else {
397       // Everything else is an attachment.
398       attachments.add(part);
399     }
400   }
401 }
402