1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 package com.android.voicemail.impl.mail.internet; 17 18 import android.text.TextUtils; 19 import android.util.Base64; 20 import android.util.Base64DataException; 21 import android.util.Base64InputStream; 22 import com.android.voicemail.impl.VvmLog; 23 import com.android.voicemail.impl.mail.Body; 24 import com.android.voicemail.impl.mail.BodyPart; 25 import com.android.voicemail.impl.mail.Message; 26 import com.android.voicemail.impl.mail.MessagingException; 27 import com.android.voicemail.impl.mail.Multipart; 28 import com.android.voicemail.impl.mail.Part; 29 import java.io.ByteArrayOutputStream; 30 import java.io.IOException; 31 import java.io.InputStream; 32 import java.io.OutputStream; 33 import java.util.ArrayList; 34 import java.util.regex.Matcher; 35 import java.util.regex.Pattern; 36 import org.apache.commons.io.IOUtils; 37 import org.apache.james.mime4j.codec.DecodeMonitor; 38 import org.apache.james.mime4j.codec.DecoderUtil; 39 import org.apache.james.mime4j.codec.EncoderUtil; 40 import org.apache.james.mime4j.codec.QuotedPrintableInputStream; 41 import org.apache.james.mime4j.util.CharsetUtil; 42 43 public class MimeUtility { 44 private static final String LOG_TAG = "Email"; 45 46 public static final String MIME_TYPE_RFC822 = "message/rfc822"; 47 private static final Pattern PATTERN_CR_OR_LF = Pattern.compile("\r|\n"); 48 49 /** 50 * Replace sequences of CRLF+WSP with WSP. Tries to preserve original string object whenever 51 * possible. 52 */ unfold(String s)53 public static String unfold(String s) { 54 if (s == null) { 55 return null; 56 } 57 Matcher patternMatcher = PATTERN_CR_OR_LF.matcher(s); 58 if (patternMatcher.find()) { 59 patternMatcher.reset(); 60 s = patternMatcher.replaceAll(""); 61 } 62 return s; 63 } 64 decode(String s)65 public static String decode(String s) { 66 if (s == null) { 67 return null; 68 } 69 return DecoderUtil.decodeEncodedWords(s, DecodeMonitor.STRICT); 70 } 71 unfoldAndDecode(String s)72 public static String unfoldAndDecode(String s) { 73 return decode(unfold(s)); 74 } 75 76 // TODO implement proper foldAndEncode 77 // NOTE: When this really works, we *must* remove all calls to foldAndEncode2() to prevent 78 // duplication of encoding. foldAndEncode(String s)79 public static String foldAndEncode(String s) { 80 return s; 81 } 82 83 /** 84 * INTERIM version of foldAndEncode that will be used only by Subject: headers. This is safer than 85 * implementing foldAndEncode() (see above) and risking unknown damage to other headers. 86 * 87 * <p>TODO: Copy this code to foldAndEncode(), get rid of this function, confirm all working OK. 88 * 89 * @param s original string to encode and fold 90 * @param usedCharacters number of characters already used up by header name 91 * @return the String ready to be transmitted 92 */ foldAndEncode2(String s, int usedCharacters)93 public static String foldAndEncode2(String s, int usedCharacters) { 94 // james.mime4j.codec.EncoderUtil.java 95 // encode: encodeIfNecessary(text, usage, numUsedInHeaderName) 96 // Usage.TEXT_TOKENlooks like the right thing for subjects 97 // use WORD_ENTITY for address/names 98 99 String encoded = EncoderUtil.encodeIfNecessary(s, EncoderUtil.Usage.TEXT_TOKEN, usedCharacters); 100 101 return fold(encoded, usedCharacters); 102 } 103 104 /** 105 * INTERIM: From newer version of org.apache.james (but we don't want to import the entire 106 * MimeUtil class). 107 * 108 * <p>Splits the specified string into a multiple-line representation with lines no longer than 76 109 * characters (because the line might contain encoded words; see <a 110 * href='http://www.faqs.org/rfcs/rfc2047.html'>RFC 2047</a> section 2). If the string contains 111 * non-whitespace sequences longer than 76 characters a line break is inserted at the whitespace 112 * character following the sequence resulting in a line longer than 76 characters. 113 * 114 * @param s string to split. 115 * @param usedCharacters number of characters already used up. Usually the number of characters 116 * for header field name plus colon and one space. 117 * @return a multiple-line representation of the given string. 118 */ fold(String s, int usedCharacters)119 public static String fold(String s, int usedCharacters) { 120 final int maxCharacters = 76; 121 122 final int length = s.length(); 123 if (usedCharacters + length <= maxCharacters) { 124 return s; 125 } 126 127 StringBuilder sb = new StringBuilder(); 128 129 int lastLineBreak = -usedCharacters; 130 int wspIdx = indexOfWsp(s, 0); 131 while (true) { 132 if (wspIdx == length) { 133 sb.append(s.substring(Math.max(0, lastLineBreak))); 134 return sb.toString(); 135 } 136 137 int nextWspIdx = indexOfWsp(s, wspIdx + 1); 138 139 if (nextWspIdx - lastLineBreak > maxCharacters) { 140 sb.append(s.substring(Math.max(0, lastLineBreak), wspIdx)); 141 sb.append("\r\n"); 142 lastLineBreak = wspIdx; 143 } 144 145 wspIdx = nextWspIdx; 146 } 147 } 148 149 /** 150 * INTERIM: From newer version of org.apache.james (but we don't want to import the entire 151 * MimeUtil class). 152 * 153 * <p>Search for whitespace. 154 */ indexOfWsp(String s, int fromIndex)155 private static int indexOfWsp(String s, int fromIndex) { 156 final int len = s.length(); 157 for (int index = fromIndex; index < len; index++) { 158 char c = s.charAt(index); 159 if (c == ' ' || c == '\t') { 160 return index; 161 } 162 } 163 return len; 164 } 165 166 /** 167 * Returns the named parameter of a header field. If name is null the first parameter is returned, 168 * or if there are no additional parameters in the field the entire field is returned. Otherwise 169 * the named parameter is searched for in a case insensitive fashion and returned. If the 170 * parameter cannot be found the method returns null. 171 * 172 * <p>TODO: quite inefficient with the inner trimming & splitting. TODO: Also has a latent bug: 173 * uses "startsWith" to match the name, which can false-positive. TODO: The doc says that for a 174 * null name you get the first param, but you get the header. Should probably just fix the doc, 175 * but if other code assumes that behavior, fix the code. TODO: Need to decode %-escaped strings, 176 * as in: filename="ab%22d". ('+' -> ' ' conversion too? check RFC) 177 * 178 * @param header 179 * @param name 180 * @return the entire header (if name=null), the found parameter, or null 181 */ getHeaderParameter(String header, String name)182 public static String getHeaderParameter(String header, String name) { 183 if (header == null) { 184 return null; 185 } 186 String[] parts = unfold(header).split(";"); 187 if (name == null) { 188 return parts[0].trim(); 189 } 190 String lowerCaseName = name.toLowerCase(); 191 for (String part : parts) { 192 if (part.trim().toLowerCase().startsWith(lowerCaseName)) { 193 String[] parameterParts = part.split("=", 2); 194 if (parameterParts.length < 2) { 195 return null; 196 } 197 String parameter = parameterParts[1].trim(); 198 if (parameter.startsWith("\"") && parameter.endsWith("\"")) { 199 return parameter.substring(1, parameter.length() - 1); 200 } else { 201 return parameter; 202 } 203 } 204 } 205 return null; 206 } 207 208 /** 209 * Reads the Part's body and returns a String based on any charset conversion that needed to be 210 * done. 211 * 212 * @param part The part containing a body 213 * @return a String containing the converted text in the body, or null if there was no text or an 214 * error during conversion. 215 */ getTextFromPart(Part part)216 public static String getTextFromPart(Part part) { 217 try { 218 if (part != null && part.getBody() != null) { 219 InputStream in = part.getBody().getInputStream(); 220 String mimeType = part.getMimeType(); 221 if (mimeType != null && MimeUtility.mimeTypeMatches(mimeType, "text/*")) { 222 /* 223 * Now we read the part into a buffer for further processing. Because 224 * the stream is now wrapped we'll remove any transfer encoding at this point. 225 */ 226 ByteArrayOutputStream out = new ByteArrayOutputStream(); 227 IOUtils.copy(in, out); 228 in.close(); 229 in = null; // we want all of our memory back, and close might not release 230 231 /* 232 * We've got a text part, so let's see if it needs to be processed further. 233 */ 234 String charset = getHeaderParameter(part.getContentType(), "charset"); 235 if (charset != null) { 236 /* 237 * See if there is conversion from the MIME charset to the Java one. 238 */ 239 charset = CharsetUtil.lookup(charset).name(); 240 } 241 /* 242 * No encoding, so use us-ascii, which is the standard. 243 */ 244 if (charset == null) { 245 charset = "ASCII"; 246 } 247 /* 248 * Convert and return as new String 249 */ 250 String result = out.toString(charset); 251 out.close(); 252 return result; 253 } 254 } 255 256 } catch (OutOfMemoryError oom) { 257 /* 258 * If we are not able to process the body there's nothing we can do about it. Return 259 * null and let the upper layers handle the missing content. 260 */ 261 VvmLog.e(LOG_TAG, "Unable to getTextFromPart " + oom.toString()); 262 } catch (Exception e) { 263 /* 264 * If we are not able to process the body there's nothing we can do about it. Return 265 * null and let the upper layers handle the missing content. 266 */ 267 VvmLog.e(LOG_TAG, "Unable to getTextFromPart " + e.toString()); 268 } 269 return null; 270 } 271 272 /** 273 * Returns true if the given mimeType matches the matchAgainst specification. The comparison 274 * ignores case and the matchAgainst string may include "*" for a wildcard (e.g. "image/*"). 275 * 276 * @param mimeType A MIME type to check. 277 * @param matchAgainst A MIME type to check against. May include wildcards. 278 * @return true if the mimeType matches 279 */ mimeTypeMatches(String mimeType, String matchAgainst)280 public static boolean mimeTypeMatches(String mimeType, String matchAgainst) { 281 Pattern p = Pattern.compile(matchAgainst.replaceAll("\\*", "\\.\\*"), Pattern.CASE_INSENSITIVE); 282 return p.matcher(mimeType).matches(); 283 } 284 285 /** 286 * Returns true if the given mimeType matches any of the matchAgainst specifications. The 287 * comparison ignores case and the matchAgainst strings may include "*" for a wildcard (e.g. 288 * "image/*"). 289 * 290 * @param mimeType A MIME type to check. 291 * @param matchAgainst An array of MIME types to check against. May include wildcards. 292 * @return true if the mimeType matches any of the matchAgainst strings 293 */ mimeTypeMatches(String mimeType, String[] matchAgainst)294 public static boolean mimeTypeMatches(String mimeType, String[] matchAgainst) { 295 for (String matchType : matchAgainst) { 296 if (mimeTypeMatches(mimeType, matchType)) { 297 return true; 298 } 299 } 300 return false; 301 } 302 303 /** 304 * Given an input stream and a transfer encoding, return a wrapped input stream for that encoding 305 * (or the original if none is required) 306 * 307 * @param in the input stream 308 * @param contentTransferEncoding the content transfer encoding 309 * @return a properly wrapped stream 310 */ getInputStreamForContentTransferEncoding( InputStream in, String contentTransferEncoding)311 public static InputStream getInputStreamForContentTransferEncoding( 312 InputStream in, String contentTransferEncoding) { 313 if (contentTransferEncoding != null) { 314 contentTransferEncoding = MimeUtility.getHeaderParameter(contentTransferEncoding, null); 315 if ("quoted-printable".equalsIgnoreCase(contentTransferEncoding)) { 316 in = new QuotedPrintableInputStream(in); 317 } else if ("base64".equalsIgnoreCase(contentTransferEncoding)) { 318 in = new Base64InputStream(in, Base64.DEFAULT); 319 } 320 } 321 return in; 322 } 323 324 /** Removes any content transfer encoding from the stream and returns a Body. */ decodeBody(InputStream in, String contentTransferEncoding)325 public static Body decodeBody(InputStream in, String contentTransferEncoding) throws IOException { 326 /* 327 * We'll remove any transfer encoding by wrapping the stream. 328 */ 329 in = getInputStreamForContentTransferEncoding(in, contentTransferEncoding); 330 BinaryTempFileBody tempBody = new BinaryTempFileBody(); 331 OutputStream out = tempBody.getOutputStream(); 332 try { 333 IOUtils.copy(in, out); 334 } catch (Base64DataException bde) { 335 // TODO Need to fix this somehow 336 //String warning = "\n\n" + Email.getMessageDecodeErrorString(); 337 //out.write(warning.getBytes()); 338 } finally { 339 out.close(); 340 } 341 return tempBody; 342 } 343 344 /** 345 * Recursively scan a Part (usually a Message) and sort out which of its children will be 346 * "viewable" and which will be attachments. 347 * 348 * @param part The part to be broken down 349 * @param viewables This arraylist will be populated with all parts that appear to be the 350 * "message" (e.g. text/plain & text/html) 351 * @param attachments This arraylist will be populated with all parts that appear to be 352 * attachments (including inlines) 353 * @throws MessagingException 354 */ collectParts(Part part, ArrayList<Part> viewables, ArrayList<Part> attachments)355 public static void collectParts(Part part, ArrayList<Part> viewables, ArrayList<Part> attachments) 356 throws MessagingException { 357 String disposition = part.getDisposition(); 358 String dispositionType = MimeUtility.getHeaderParameter(disposition, null); 359 // If a disposition is not specified, default to "inline" 360 boolean inline = 361 TextUtils.isEmpty(dispositionType) || "inline".equalsIgnoreCase(dispositionType); 362 // The lower-case mime type 363 String mimeType = part.getMimeType().toLowerCase(); 364 365 if (part.getBody() instanceof Multipart) { 366 // If the part is Multipart but not alternative it's either mixed or 367 // something we don't know about, which means we treat it as mixed 368 // per the spec. We just process its pieces recursively. 369 MimeMultipart mp = (MimeMultipart) part.getBody(); 370 boolean foundHtml = false; 371 if (mp.getSubTypeForTest().equals("alternative")) { 372 for (int i = 0; i < mp.getCount(); i++) { 373 if (mp.getBodyPart(i).isMimeType("text/html")) { 374 foundHtml = true; 375 break; 376 } 377 } 378 } 379 for (int i = 0; i < mp.getCount(); i++) { 380 // See if we have text and html 381 BodyPart bp = mp.getBodyPart(i); 382 // If there's html, don't bother loading text 383 if (foundHtml && bp.isMimeType("text/plain")) { 384 continue; 385 } 386 collectParts(bp, viewables, attachments); 387 } 388 } else if (part.getBody() instanceof Message) { 389 // If the part is an embedded message we just continue to process 390 // it, pulling any viewables or attachments into the running list. 391 Message message = (Message) part.getBody(); 392 collectParts(message, viewables, attachments); 393 } else if (inline && (mimeType.startsWith("text") || (mimeType.startsWith("image")))) { 394 // We'll treat text and images as viewables 395 viewables.add(part); 396 } else { 397 // Everything else is an attachment. 398 attachments.add(part); 399 } 400 } 401 } 402