Search in sources :

Example 6 with ChunkNotFoundException

use of org.apache.poi.hsmf.exceptions.ChunkNotFoundException in project poi by apache.

the class MAPIMessage method getRecipientNamesList.

/**
    * Returns an array of all the recipient's names, normally
    *  in TO then CC then BCC order.
    * Checks all the likely chunks in search of the names. 
    * See also {@link #getDisplayTo()}, {@link #getDisplayCC()}
    *  and {@link #getDisplayBCC()}.
    */
public String[] getRecipientNamesList() throws ChunkNotFoundException {
    if (recipientChunks == null || recipientChunks.length == 0) {
        throw new ChunkNotFoundException("No recipients section present");
    }
    String[] names = new String[recipientChunks.length];
    for (int i = 0; i < names.length; i++) {
        RecipientChunks rc = recipientChunks[i];
        String name = rc.getRecipientName();
        if (name != null) {
            names[i] = name;
        } else {
            throw new ChunkNotFoundException("No display name holding chunks found for the " + (i + 1) + "th recipient");
        }
    }
    return names;
}
Also used : ChunkNotFoundException(org.apache.poi.hsmf.exceptions.ChunkNotFoundException) RecipientChunks(org.apache.poi.hsmf.datatypes.RecipientChunks)

Example 7 with ChunkNotFoundException

use of org.apache.poi.hsmf.exceptions.ChunkNotFoundException in project poi by apache.

the class MAPIMessage method getRecipientEmailAddressList.

/**
    * Returns an array of all the recipient's email address, normally
    *  in TO then CC then BCC order.
    * Checks all the likely chunks in search of the addresses. 
    */
public String[] getRecipientEmailAddressList() throws ChunkNotFoundException {
    if (recipientChunks == null || recipientChunks.length == 0) {
        throw new ChunkNotFoundException("No recipients section present");
    }
    String[] emails = new String[recipientChunks.length];
    for (int i = 0; i < emails.length; i++) {
        RecipientChunks rc = recipientChunks[i];
        String email = rc.getRecipientEmailAddress();
        if (email != null) {
            emails[i] = email;
        } else {
            if (returnNullOnMissingChunk) {
                emails[i] = null;
            } else {
                throw new ChunkNotFoundException("No email address holding chunks found for the " + (i + 1) + "th recipient");
            }
        }
    }
    return emails;
}
Also used : ChunkNotFoundException(org.apache.poi.hsmf.exceptions.ChunkNotFoundException) RecipientChunks(org.apache.poi.hsmf.datatypes.RecipientChunks)

Example 8 with ChunkNotFoundException

use of org.apache.poi.hsmf.exceptions.ChunkNotFoundException in project tika by apache.

the class OutlookExtractor method guess7BitEncoding.

/**
     * Tries to identify the correct encoding for 7-bit (non-unicode)
     *  strings in the file.
     * <p>Many messages store their strings as unicode, which is
     *  nice and easy. Some use one-byte encodings for their
     *  strings, but don't always store the encoding anywhere
     *  helpful in the file.</p>
     * <p>This method checks for codepage properties, and failing that
     *  looks at the headers for the message, and uses these to
     *  guess the correct encoding for your file.</p>
     * <p>Bug #49441 has more on why this is needed</p>
     * <p>This is taken verbatim from POI (TIKA-1238)
     * as a temporary workaround to prevent unsupported encoding exceptions</p>
     */
private void guess7BitEncoding(MAPIMessage msg) {
    Chunks mainChunks = msg.getMainChunks();
    //sanity check
    if (mainChunks == null) {
        return;
    }
    Map<MAPIProperty, List<PropertyValue>> props = mainChunks.getProperties();
    if (props != null) {
        // First choice is a codepage property
        for (MAPIProperty prop : new MAPIProperty[] { MAPIProperty.MESSAGE_CODEPAGE, MAPIProperty.INTERNET_CPID }) {
            List<PropertyValue> val = props.get(prop);
            if (val != null && val.size() > 0) {
                int codepage = ((PropertyValue.LongPropertyValue) val.get(0)).getValue();
                String encoding = null;
                try {
                    encoding = CodePageUtil.codepageToEncoding(codepage, true);
                } catch (UnsupportedEncodingException e) {
                //swallow
                }
                if (tryToSet7BitEncoding(msg, encoding)) {
                    return;
                }
            }
        }
    }
    // Second choice is a charset on a content type header
    try {
        String[] headers = msg.getHeaders();
        if (headers != null && headers.length > 0) {
            // Look for a content type with a charset
            Pattern p = Pattern.compile("Content-Type:.*?charset=[\"']?([^;'\"]+)[\"']?", Pattern.CASE_INSENSITIVE);
            for (String header : headers) {
                if (header.startsWith("Content-Type")) {
                    Matcher m = p.matcher(header);
                    if (m.matches()) {
                        // Found it! Tell all the string chunks
                        String charset = m.group(1);
                        if (tryToSet7BitEncoding(msg, charset)) {
                            return;
                        }
                    }
                }
            }
        }
    } catch (ChunkNotFoundException e) {
    }
    // meta header if there is no other information?
    try {
        String html = msg.getHtmlBody();
        if (html != null && html.length() > 0) {
            Charset charset = null;
            try {
                charset = detector.detect(new ByteArrayInputStream(html.getBytes(UTF_8)), EMPTY_METADATA);
            } catch (IOException e) {
            //swallow
            }
            if (charset != null && tryToSet7BitEncoding(msg, charset.name())) {
                return;
            }
        }
    } catch (ChunkNotFoundException e) {
    }
    //absolute last resort, try charset detector
    StringChunk text = mainChunks.getTextBodyChunk();
    if (text != null) {
        CharsetDetector detector = new CharsetDetector();
        detector.setText(text.getRawValue());
        CharsetMatch match = detector.detect();
        if (match != null && match.getConfidence() > 35 && tryToSet7BitEncoding(msg, match.getName())) {
            return;
        }
    }
}
Also used : ChunkNotFoundException(org.apache.poi.hsmf.exceptions.ChunkNotFoundException) Pattern(java.util.regex.Pattern) Chunks(org.apache.poi.hsmf.datatypes.Chunks) RecipientChunks(org.apache.poi.hsmf.datatypes.RecipientChunks) AttachmentChunks(org.apache.poi.hsmf.datatypes.AttachmentChunks) Matcher(java.util.regex.Matcher) CharsetDetector(org.apache.tika.parser.txt.CharsetDetector) PropertyValue(org.apache.poi.hsmf.datatypes.PropertyValue) UnsupportedEncodingException(java.io.UnsupportedEncodingException) Charset(java.nio.charset.Charset) IOException(java.io.IOException) StringChunk(org.apache.poi.hsmf.datatypes.StringChunk) CharsetMatch(org.apache.tika.parser.txt.CharsetMatch) ByteArrayInputStream(java.io.ByteArrayInputStream) List(java.util.List) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) MAPIProperty(org.apache.poi.hsmf.datatypes.MAPIProperty)

Aggregations

ChunkNotFoundException (org.apache.poi.hsmf.exceptions.ChunkNotFoundException)8 AttachmentChunks (org.apache.poi.hsmf.datatypes.AttachmentChunks)5 RecipientChunks (org.apache.poi.hsmf.datatypes.RecipientChunks)3 StringChunk (org.apache.poi.hsmf.datatypes.StringChunk)3 ByteArrayInputStream (java.io.ByteArrayInputStream)2 IOException (java.io.IOException)2 UnsupportedEncodingException (java.io.UnsupportedEncodingException)2 Matcher (java.util.regex.Matcher)2 Pattern (java.util.regex.Pattern)2 MAPIMessage (org.apache.poi.hsmf.MAPIMessage)2 MAPIProperty (org.apache.poi.hsmf.datatypes.MAPIProperty)2 PropertyValue (org.apache.poi.hsmf.datatypes.PropertyValue)2 File (java.io.File)1 PrintWriter (java.io.PrintWriter)1 Charset (java.nio.charset.Charset)1 ParseException (java.text.ParseException)1 SimpleDateFormat (java.text.SimpleDateFormat)1 ArrayList (java.util.ArrayList)1 Date (java.util.Date)1 LinkedHashMap (java.util.LinkedHashMap)1