summaryrefslogtreecommitdiff
path: root/java/com/android/voicemail/impl/mail/internet/MimeUtility.java
diff options
context:
space:
mode:
Diffstat (limited to 'java/com/android/voicemail/impl/mail/internet/MimeUtility.java')
-rw-r--r--java/com/android/voicemail/impl/mail/internet/MimeUtility.java400
1 files changed, 400 insertions, 0 deletions
diff --git a/java/com/android/voicemail/impl/mail/internet/MimeUtility.java b/java/com/android/voicemail/impl/mail/internet/MimeUtility.java
new file mode 100644
index 000000000..99846027b
--- /dev/null
+++ b/java/com/android/voicemail/impl/mail/internet/MimeUtility.java
@@ -0,0 +1,400 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.android.voicemail.impl.mail.internet;
+
+import android.text.TextUtils;
+import android.util.Base64;
+import android.util.Base64DataException;
+import android.util.Base64InputStream;
+import com.android.voicemail.impl.VvmLog;
+import com.android.voicemail.impl.mail.Body;
+import com.android.voicemail.impl.mail.BodyPart;
+import com.android.voicemail.impl.mail.Message;
+import com.android.voicemail.impl.mail.MessagingException;
+import com.android.voicemail.impl.mail.Multipart;
+import com.android.voicemail.impl.mail.Part;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import org.apache.commons.io.IOUtils;
+import org.apache.james.mime4j.codec.EncoderUtil;
+import org.apache.james.mime4j.decoder.DecoderUtil;
+import org.apache.james.mime4j.decoder.QuotedPrintableInputStream;
+import org.apache.james.mime4j.util.CharsetUtil;
+
+public class MimeUtility {
+ private static final String LOG_TAG = "Email";
+
+ public static final String MIME_TYPE_RFC822 = "message/rfc822";
+ private static final Pattern PATTERN_CR_OR_LF = Pattern.compile("\r|\n");
+
+ /**
+ * Replace sequences of CRLF+WSP with WSP. Tries to preserve original string object whenever
+ * possible.
+ */
+ public static String unfold(String s) {
+ if (s == null) {
+ return null;
+ }
+ Matcher patternMatcher = PATTERN_CR_OR_LF.matcher(s);
+ if (patternMatcher.find()) {
+ patternMatcher.reset();
+ s = patternMatcher.replaceAll("");
+ }
+ return s;
+ }
+
+ public static String decode(String s) {
+ if (s == null) {
+ return null;
+ }
+ return DecoderUtil.decodeEncodedWords(s);
+ }
+
+ public static String unfoldAndDecode(String s) {
+ return decode(unfold(s));
+ }
+
+ // TODO implement proper foldAndEncode
+ // NOTE: When this really works, we *must* remove all calls to foldAndEncode2() to prevent
+ // duplication of encoding.
+ public static String foldAndEncode(String s) {
+ return s;
+ }
+
+ /**
+ * INTERIM version of foldAndEncode that will be used only by Subject: headers. This is safer than
+ * implementing foldAndEncode() (see above) and risking unknown damage to other headers.
+ *
+ * <p>TODO: Copy this code to foldAndEncode(), get rid of this function, confirm all working OK.
+ *
+ * @param s original string to encode and fold
+ * @param usedCharacters number of characters already used up by header name
+ * @return the String ready to be transmitted
+ */
+ public static String foldAndEncode2(String s, int usedCharacters) {
+ // james.mime4j.codec.EncoderUtil.java
+ // encode: encodeIfNecessary(text, usage, numUsedInHeaderName)
+ // Usage.TEXT_TOKENlooks like the right thing for subjects
+ // use WORD_ENTITY for address/names
+
+ String encoded = EncoderUtil.encodeIfNecessary(s, EncoderUtil.Usage.TEXT_TOKEN, usedCharacters);
+
+ return fold(encoded, usedCharacters);
+ }
+
+ /**
+ * INTERIM: From newer version of org.apache.james (but we don't want to import the entire
+ * MimeUtil class).
+ *
+ * <p>Splits the specified string into a multiple-line representation with lines no longer than 76
+ * characters (because the line might contain encoded words; see <a
+ * href='http://www.faqs.org/rfcs/rfc2047.html'>RFC 2047</a> section 2). If the string contains
+ * non-whitespace sequences longer than 76 characters a line break is inserted at the whitespace
+ * character following the sequence resulting in a line longer than 76 characters.
+ *
+ * @param s string to split.
+ * @param usedCharacters number of characters already used up. Usually the number of characters
+ * for header field name plus colon and one space.
+ * @return a multiple-line representation of the given string.
+ */
+ public static String fold(String s, int usedCharacters) {
+ final int maxCharacters = 76;
+
+ final int length = s.length();
+ if (usedCharacters + length <= maxCharacters) {
+ return s;
+ }
+
+ StringBuilder sb = new StringBuilder();
+
+ int lastLineBreak = -usedCharacters;
+ int wspIdx = indexOfWsp(s, 0);
+ while (true) {
+ if (wspIdx == length) {
+ sb.append(s.substring(Math.max(0, lastLineBreak)));
+ return sb.toString();
+ }
+
+ int nextWspIdx = indexOfWsp(s, wspIdx + 1);
+
+ if (nextWspIdx - lastLineBreak > maxCharacters) {
+ sb.append(s.substring(Math.max(0, lastLineBreak), wspIdx));
+ sb.append("\r\n");
+ lastLineBreak = wspIdx;
+ }
+
+ wspIdx = nextWspIdx;
+ }
+ }
+
+ /**
+ * INTERIM: From newer version of org.apache.james (but we don't want to import the entire
+ * MimeUtil class).
+ *
+ * <p>Search for whitespace.
+ */
+ private static int indexOfWsp(String s, int fromIndex) {
+ final int len = s.length();
+ for (int index = fromIndex; index < len; index++) {
+ char c = s.charAt(index);
+ if (c == ' ' || c == '\t') {
+ return index;
+ }
+ }
+ return len;
+ }
+
+ /**
+ * Returns the named parameter of a header field. If name is null the first parameter is returned,
+ * or if there are no additional parameters in the field the entire field is returned. Otherwise
+ * the named parameter is searched for in a case insensitive fashion and returned. If the
+ * parameter cannot be found the method returns null.
+ *
+ * <p>TODO: quite inefficient with the inner trimming & splitting. TODO: Also has a latent bug:
+ * uses "startsWith" to match the name, which can false-positive. TODO: The doc says that for a
+ * null name you get the first param, but you get the header. Should probably just fix the doc,
+ * but if other code assumes that behavior, fix the code. TODO: Need to decode %-escaped strings,
+ * as in: filename="ab%22d". ('+' -> ' ' conversion too? check RFC)
+ *
+ * @param header
+ * @param name
+ * @return the entire header (if name=null), the found parameter, or null
+ */
+ public static String getHeaderParameter(String header, String name) {
+ if (header == null) {
+ return null;
+ }
+ String[] parts = unfold(header).split(";");
+ if (name == null) {
+ return parts[0].trim();
+ }
+ String lowerCaseName = name.toLowerCase();
+ for (String part : parts) {
+ if (part.trim().toLowerCase().startsWith(lowerCaseName)) {
+ String[] parameterParts = part.split("=", 2);
+ if (parameterParts.length < 2) {
+ return null;
+ }
+ String parameter = parameterParts[1].trim();
+ if (parameter.startsWith("\"") && parameter.endsWith("\"")) {
+ return parameter.substring(1, parameter.length() - 1);
+ } else {
+ return parameter;
+ }
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Reads the Part's body and returns a String based on any charset conversion that needed to be
+ * done.
+ *
+ * @param part The part containing a body
+ * @return a String containing the converted text in the body, or null if there was no text or an
+ * error during conversion.
+ */
+ public static String getTextFromPart(Part part) {
+ try {
+ if (part != null && part.getBody() != null) {
+ InputStream in = part.getBody().getInputStream();
+ String mimeType = part.getMimeType();
+ if (mimeType != null && MimeUtility.mimeTypeMatches(mimeType, "text/*")) {
+ /*
+ * Now we read the part into a buffer for further processing. Because
+ * the stream is now wrapped we'll remove any transfer encoding at this point.
+ */
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ IOUtils.copy(in, out);
+ in.close();
+ in = null; // we want all of our memory back, and close might not release
+
+ /*
+ * We've got a text part, so let's see if it needs to be processed further.
+ */
+ String charset = getHeaderParameter(part.getContentType(), "charset");
+ if (charset != null) {
+ /*
+ * See if there is conversion from the MIME charset to the Java one.
+ */
+ charset = CharsetUtil.toJavaCharset(charset);
+ }
+ /*
+ * No encoding, so use us-ascii, which is the standard.
+ */
+ if (charset == null) {
+ charset = "ASCII";
+ }
+ /*
+ * Convert and return as new String
+ */
+ String result = out.toString(charset);
+ out.close();
+ return result;
+ }
+ }
+
+ } catch (OutOfMemoryError oom) {
+ /*
+ * If we are not able to process the body there's nothing we can do about it. Return
+ * null and let the upper layers handle the missing content.
+ */
+ VvmLog.e(LOG_TAG, "Unable to getTextFromPart " + oom.toString());
+ } catch (Exception e) {
+ /*
+ * If we are not able to process the body there's nothing we can do about it. Return
+ * null and let the upper layers handle the missing content.
+ */
+ VvmLog.e(LOG_TAG, "Unable to getTextFromPart " + e.toString());
+ }
+ return null;
+ }
+
+ /**
+ * Returns true if the given mimeType matches the matchAgainst specification. The comparison
+ * ignores case and the matchAgainst string may include "*" for a wildcard (e.g. "image/*").
+ *
+ * @param mimeType A MIME type to check.
+ * @param matchAgainst A MIME type to check against. May include wildcards.
+ * @return true if the mimeType matches
+ */
+ public static boolean mimeTypeMatches(String mimeType, String matchAgainst) {
+ Pattern p = Pattern.compile(matchAgainst.replaceAll("\\*", "\\.\\*"), Pattern.CASE_INSENSITIVE);
+ return p.matcher(mimeType).matches();
+ }
+
+ /**
+ * Returns true if the given mimeType matches any of the matchAgainst specifications. The
+ * comparison ignores case and the matchAgainst strings may include "*" for a wildcard (e.g.
+ * "image/*").
+ *
+ * @param mimeType A MIME type to check.
+ * @param matchAgainst An array of MIME types to check against. May include wildcards.
+ * @return true if the mimeType matches any of the matchAgainst strings
+ */
+ public static boolean mimeTypeMatches(String mimeType, String[] matchAgainst) {
+ for (String matchType : matchAgainst) {
+ if (mimeTypeMatches(mimeType, matchType)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Given an input stream and a transfer encoding, return a wrapped input stream for that encoding
+ * (or the original if none is required)
+ *
+ * @param in the input stream
+ * @param contentTransferEncoding the content transfer encoding
+ * @return a properly wrapped stream
+ */
+ public static InputStream getInputStreamForContentTransferEncoding(
+ InputStream in, String contentTransferEncoding) {
+ if (contentTransferEncoding != null) {
+ contentTransferEncoding = MimeUtility.getHeaderParameter(contentTransferEncoding, null);
+ if ("quoted-printable".equalsIgnoreCase(contentTransferEncoding)) {
+ in = new QuotedPrintableInputStream(in);
+ } else if ("base64".equalsIgnoreCase(contentTransferEncoding)) {
+ in = new Base64InputStream(in, Base64.DEFAULT);
+ }
+ }
+ return in;
+ }
+
+ /** Removes any content transfer encoding from the stream and returns a Body. */
+ public static Body decodeBody(InputStream in, String contentTransferEncoding) throws IOException {
+ /*
+ * We'll remove any transfer encoding by wrapping the stream.
+ */
+ in = getInputStreamForContentTransferEncoding(in, contentTransferEncoding);
+ BinaryTempFileBody tempBody = new BinaryTempFileBody();
+ OutputStream out = tempBody.getOutputStream();
+ try {
+ IOUtils.copy(in, out);
+ } catch (Base64DataException bde) {
+ // TODO Need to fix this somehow
+ //String warning = "\n\n" + Email.getMessageDecodeErrorString();
+ //out.write(warning.getBytes());
+ } finally {
+ out.close();
+ }
+ return tempBody;
+ }
+
+ /**
+ * Recursively scan a Part (usually a Message) and sort out which of its children will be
+ * "viewable" and which will be attachments.
+ *
+ * @param part The part to be broken down
+ * @param viewables This arraylist will be populated with all parts that appear to be the
+ * "message" (e.g. text/plain & text/html)
+ * @param attachments This arraylist will be populated with all parts that appear to be
+ * attachments (including inlines)
+ * @throws MessagingException
+ */
+ public static void collectParts(Part part, ArrayList<Part> viewables, ArrayList<Part> attachments)
+ throws MessagingException {
+ String disposition = part.getDisposition();
+ String dispositionType = MimeUtility.getHeaderParameter(disposition, null);
+ // If a disposition is not specified, default to "inline"
+ boolean inline =
+ TextUtils.isEmpty(dispositionType) || "inline".equalsIgnoreCase(dispositionType);
+ // The lower-case mime type
+ String mimeType = part.getMimeType().toLowerCase();
+
+ if (part.getBody() instanceof Multipart) {
+ // If the part is Multipart but not alternative it's either mixed or
+ // something we don't know about, which means we treat it as mixed
+ // per the spec. We just process its pieces recursively.
+ MimeMultipart mp = (MimeMultipart) part.getBody();
+ boolean foundHtml = false;
+ if (mp.getSubTypeForTest().equals("alternative")) {
+ for (int i = 0; i < mp.getCount(); i++) {
+ if (mp.getBodyPart(i).isMimeType("text/html")) {
+ foundHtml = true;
+ break;
+ }
+ }
+ }
+ for (int i = 0; i < mp.getCount(); i++) {
+ // See if we have text and html
+ BodyPart bp = mp.getBodyPart(i);
+ // If there's html, don't bother loading text
+ if (foundHtml && bp.isMimeType("text/plain")) {
+ continue;
+ }
+ collectParts(bp, viewables, attachments);
+ }
+ } else if (part.getBody() instanceof Message) {
+ // If the part is an embedded message we just continue to process
+ // it, pulling any viewables or attachments into the running list.
+ Message message = (Message) part.getBody();
+ collectParts(message, viewables, attachments);
+ } else if (inline && (mimeType.startsWith("text") || (mimeType.startsWith("image")))) {
+ // We'll treat text and images as viewables
+ viewables.add(part);
+ } else {
+ // Everything else is an attachment.
+ attachments.add(part);
+ }
+ }
+}