Search in sources :

Example 6 with MAPIProperty

use of org.apache.poi.hsmf.datatypes.MAPIProperty in project poi by apache.

the class TestHMEFMessage method testCustomProperty.

public void testCustomProperty() throws Exception {
    HMEFMessage msg = new HMEFMessage(_samples.openResourceAsStream("quick-winmail.dat"));
    // Should have non-standard properties with IDs 0xE28 and 0xE29
    boolean hasE28 = false;
    boolean hasE29 = false;
    for (MAPIAttribute attr : msg.getMessageMAPIAttributes()) {
        if (attr.getProperty().id == 0xe28)
            hasE28 = true;
        if (attr.getProperty().id == 0xe29)
            hasE29 = true;
    }
    assertEquals(true, hasE28);
    assertEquals(true, hasE29);
    // Ensure we can fetch those as custom ones
    MAPIProperty propE28 = MAPIProperty.createCustom(0xe28, Types.ASCII_STRING, "Custom E28");
    MAPIProperty propE29 = MAPIProperty.createCustom(0xe29, Types.ASCII_STRING, "Custom E29");
    assertNotNull(msg.getMessageMAPIAttribute(propE28));
    assertNotNull(msg.getMessageMAPIAttribute(propE29));
    assertEquals(MAPIStringAttribute.class, msg.getMessageMAPIAttribute(propE28).getClass());
    assertEquals("Zimbra - Mark Rogers", ((MAPIStringAttribute) msg.getMessageMAPIAttribute(propE28)).getDataString().substring(10));
}
Also used : MAPIStringAttribute(org.apache.poi.hmef.attribute.MAPIStringAttribute) MAPIAttribute(org.apache.poi.hmef.attribute.MAPIAttribute) MAPIProperty(org.apache.poi.hsmf.datatypes.MAPIProperty)

Example 7 with MAPIProperty

use of org.apache.poi.hsmf.datatypes.MAPIProperty in project poi by apache.

the class MAPIAttribute method create.

/**
    * Parses a MAPI Properties TNEF Attribute, and returns
    *  the list of MAPI Attributes contained within it
    */
public static List<MAPIAttribute> create(TNEFAttribute parent) throws IOException {
    if (parent.getProperty() == TNEFProperty.ID_MAPIPROPERTIES) {
    // Regular MAPI Properties, normally on the message
    } else if (parent.getProperty() == TNEFProperty.ID_ATTACHMENT) {
    // MAPI Properties for an attachment
    } else {
        // Something else, oh dear...
        throw new IllegalArgumentException("Can only create from a MAPIProperty attribute, " + "instead received a " + parent.getProperty() + " one");
    }
    ByteArrayInputStream inp = new ByteArrayInputStream(parent.getData());
    // First up, get the number of attributes
    int count = LittleEndian.readInt(inp);
    List<MAPIAttribute> attrs = new ArrayList<MAPIAttribute>();
    // Now, read each one in in turn
    for (int i = 0; i < count; i++) {
        int typeAndMV = LittleEndian.readUShort(inp);
        int id = LittleEndian.readUShort(inp);
        // Is it either Multi-Valued or Variable-Length?
        boolean isMV = false;
        boolean isVL = false;
        int typeId = typeAndMV;
        if ((typeAndMV & Types.MULTIVALUED_FLAG) != 0) {
            isMV = true;
            typeId -= Types.MULTIVALUED_FLAG;
        }
        if (typeId == Types.ASCII_STRING.getId() || typeId == Types.UNICODE_STRING.getId() || typeId == Types.BINARY.getId() || typeId == Types.DIRECTORY.getId()) {
            isVL = true;
        }
        // Turn the type ID into a strongly typed thing
        MAPIType type = Types.getById(typeId);
        if (type == null) {
            type = Types.createCustom(typeId);
        }
        // If it's a named property, rather than a standard
        //  MAPI property, grab the details of it
        MAPIProperty prop = MAPIProperty.get(id);
        if (id >= 0x8000 && id <= 0xFFFF) {
            byte[] guid = new byte[16];
            IOUtils.readFully(inp, guid);
            int mptype = LittleEndian.readInt(inp);
            // Get the name of it
            String name;
            if (mptype == 0) {
                // It's based on a normal one
                int mpid = LittleEndian.readInt(inp);
                MAPIProperty base = MAPIProperty.get(mpid);
                name = base.name;
            } else {
                // Custom name was stored
                int mplen = LittleEndian.readInt(inp);
                byte[] mpdata = new byte[mplen];
                IOUtils.readFully(inp, mpdata);
                name = StringUtil.getFromUnicodeLE(mpdata, 0, (mplen / 2) - 1);
                skipToBoundary(mplen, inp);
            }
            // Now create
            prop = MAPIProperty.createCustom(id, type, name);
        }
        if (prop == MAPIProperty.UNKNOWN) {
            prop = MAPIProperty.createCustom(id, type, "(unknown " + Integer.toHexString(id) + ")");
        }
        // Now read in the value(s)
        int values = 1;
        if (isMV || isVL) {
            values = LittleEndian.readInt(inp);
        }
        for (int j = 0; j < values; j++) {
            int len = getLength(type, inp);
            byte[] data = new byte[len];
            IOUtils.readFully(inp, data);
            skipToBoundary(len, inp);
            // Create
            MAPIAttribute attr;
            if (type == Types.UNICODE_STRING || type == Types.ASCII_STRING) {
                attr = new MAPIStringAttribute(prop, typeId, data);
            } else if (type == Types.APP_TIME || type == Types.TIME) {
                attr = new MAPIDateAttribute(prop, typeId, data);
            } else if (id == MAPIProperty.RTF_COMPRESSED.id) {
                attr = new MAPIRtfAttribute(prop, typeId, data);
            } else {
                attr = new MAPIAttribute(prop, typeId, data);
            }
            attrs.add(attr);
        }
    }
    // All done
    return attrs;
}
Also used : ArrayList(java.util.ArrayList) ByteArrayInputStream(java.io.ByteArrayInputStream) MAPIType(org.apache.poi.hsmf.datatypes.Types.MAPIType) MAPIProperty(org.apache.poi.hsmf.datatypes.MAPIProperty)

Example 8 with MAPIProperty

use of org.apache.poi.hsmf.datatypes.MAPIProperty in project poi by apache.

the class HMEFContentsExtractor method getBodyAttribute.

protected MAPIAttribute getBodyAttribute() {
    MAPIAttribute body = message.getMessageMAPIAttribute(MAPIProperty.RTF_COMPRESSED);
    if (body != null)
        return body;
    // See bug #59786 - we'd really like a test file to confirm if this
    //  is the right properties + if this is truely general or not!
    MAPIProperty uncompressedBody = MAPIProperty.createCustom(0x3fd9, Types.ASCII_STRING, "Uncompressed Body");
    // Return this uncompressed one, or null if that isn't their either
    return message.getMessageMAPIAttribute(uncompressedBody);
}
Also used : MAPIAttribute(org.apache.poi.hmef.attribute.MAPIAttribute) MAPIProperty(org.apache.poi.hsmf.datatypes.MAPIProperty)

Example 9 with MAPIProperty

use of org.apache.poi.hsmf.datatypes.MAPIProperty in project tika by apache.

the class OutlookExtractor method handleFromTo.

private void handleFromTo(Map<String, String[]> headers, Metadata metadata) throws ChunkNotFoundException {
    String from = msg.getDisplayFrom();
    metadata.set(TikaCoreProperties.CREATOR, from);
    metadata.set(Metadata.MESSAGE_FROM, from);
    metadata.set(Metadata.MESSAGE_TO, msg.getDisplayTo());
    metadata.set(Metadata.MESSAGE_CC, msg.getDisplayCC());
    metadata.set(Metadata.MESSAGE_BCC, msg.getDisplayBCC());
    Chunks chunks = msg.getMainChunks();
    StringChunk sentByServerType = chunks.getSentByServerType();
    if (sentByServerType != null) {
        metadata.set(Office.MAPI_SENT_BY_SERVER_TYPE, sentByServerType.getValue());
    }
    Map<MAPIProperty, List<Chunk>> mainChunks = msg.getMainChunks().getAll();
    List<Chunk> senderAddresType = mainChunks.get(MAPIProperty.SENDER_ADDRTYPE);
    String senderAddressTypeString = "";
    if (senderAddresType != null && senderAddresType.size() > 0) {
        senderAddressTypeString = senderAddresType.get(0).toString();
    }
    //sometimes in SMTP .msg files there is an email in the sender name field.
    setFirstChunk(mainChunks.get(MAPIProperty.SENDER_NAME), Message.MESSAGE_FROM_NAME, metadata);
    setFirstChunk(mainChunks.get(MAPIProperty.SENT_REPRESENTING_NAME), Office.MAPI_FROM_REPRESENTING_NAME, metadata);
    setFirstChunk(mainChunks.get(MAPIProperty.SENDER_EMAIL_ADDRESS), Message.MESSAGE_FROM_EMAIL, metadata);
    setFirstChunk(mainChunks.get(MAPIProperty.SENT_REPRESENTING_EMAIL_ADDRESS), Office.MAPI_FROM_REPRESENTING_EMAIL, metadata);
    for (Recipient recipient : buildRecipients()) {
        switch(recipient.recipientType) {
            case TO:
                addEvenIfNull(Message.MESSAGE_TO_NAME, recipient.name, metadata);
                addEvenIfNull(Message.MESSAGE_TO_DISPLAY_NAME, recipient.displayName, metadata);
                addEvenIfNull(Message.MESSAGE_TO_EMAIL, recipient.emailAddress, metadata);
                break;
            case CC:
                addEvenIfNull(Message.MESSAGE_CC_NAME, recipient.name, metadata);
                addEvenIfNull(Message.MESSAGE_CC_DISPLAY_NAME, recipient.displayName, metadata);
                addEvenIfNull(Message.MESSAGE_CC_EMAIL, recipient.emailAddress, metadata);
                break;
            case BCC:
                addEvenIfNull(Message.MESSAGE_BCC_NAME, recipient.name, metadata);
                addEvenIfNull(Message.MESSAGE_BCC_DISPLAY_NAME, recipient.displayName, metadata);
                addEvenIfNull(Message.MESSAGE_BCC_EMAIL, recipient.emailAddress, metadata);
                break;
            default:
                //log unknown or undefined?
                break;
        }
    }
}
Also used : Chunks(org.apache.poi.hsmf.datatypes.Chunks) RecipientChunks(org.apache.poi.hsmf.datatypes.RecipientChunks) AttachmentChunks(org.apache.poi.hsmf.datatypes.AttachmentChunks) List(java.util.List) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) ByteChunk(org.apache.poi.hsmf.datatypes.ByteChunk) StringChunk(org.apache.poi.hsmf.datatypes.StringChunk) Chunk(org.apache.poi.hsmf.datatypes.Chunk) MAPIProperty(org.apache.poi.hsmf.datatypes.MAPIProperty) StringChunk(org.apache.poi.hsmf.datatypes.StringChunk)

Example 10 with MAPIProperty

use of org.apache.poi.hsmf.datatypes.MAPIProperty in project tika by apache.

the class OutlookExtractor method guess7BitEncoding.

/**
     * Tries to identify the correct encoding for 7-bit (non-unicode)
     *  strings in the file.
     * <p>Many messages store their strings as unicode, which is
     *  nice and easy. Some use one-byte encodings for their
     *  strings, but don't always store the encoding anywhere
     *  helpful in the file.</p>
     * <p>This method checks for codepage properties, and failing that
     *  looks at the headers for the message, and uses these to
     *  guess the correct encoding for your file.</p>
     * <p>Bug #49441 has more on why this is needed</p>
     * <p>This is taken verbatim from POI (TIKA-1238)
     * as a temporary workaround to prevent unsupported encoding exceptions</p>
     */
private void guess7BitEncoding(MAPIMessage msg) {
    Chunks mainChunks = msg.getMainChunks();
    //sanity check
    if (mainChunks == null) {
        return;
    }
    Map<MAPIProperty, List<PropertyValue>> props = mainChunks.getProperties();
    if (props != null) {
        // First choice is a codepage property
        for (MAPIProperty prop : new MAPIProperty[] { MAPIProperty.MESSAGE_CODEPAGE, MAPIProperty.INTERNET_CPID }) {
            List<PropertyValue> val = props.get(prop);
            if (val != null && val.size() > 0) {
                int codepage = ((PropertyValue.LongPropertyValue) val.get(0)).getValue();
                String encoding = null;
                try {
                    encoding = CodePageUtil.codepageToEncoding(codepage, true);
                } catch (UnsupportedEncodingException e) {
                //swallow
                }
                if (tryToSet7BitEncoding(msg, encoding)) {
                    return;
                }
            }
        }
    }
    // Second choice is a charset on a content type header
    try {
        String[] headers = msg.getHeaders();
        if (headers != null && headers.length > 0) {
            // Look for a content type with a charset
            Pattern p = Pattern.compile("Content-Type:.*?charset=[\"']?([^;'\"]+)[\"']?", Pattern.CASE_INSENSITIVE);
            for (String header : headers) {
                if (header.startsWith("Content-Type")) {
                    Matcher m = p.matcher(header);
                    if (m.matches()) {
                        // Found it! Tell all the string chunks
                        String charset = m.group(1);
                        if (tryToSet7BitEncoding(msg, charset)) {
                            return;
                        }
                    }
                }
            }
        }
    } catch (ChunkNotFoundException e) {
    }
    // meta header if there is no other information?
    try {
        String html = msg.getHtmlBody();
        if (html != null && html.length() > 0) {
            Charset charset = null;
            try {
                charset = detector.detect(new ByteArrayInputStream(html.getBytes(UTF_8)), EMPTY_METADATA);
            } catch (IOException e) {
            //swallow
            }
            if (charset != null && tryToSet7BitEncoding(msg, charset.name())) {
                return;
            }
        }
    } catch (ChunkNotFoundException e) {
    }
    //absolute last resort, try charset detector
    StringChunk text = mainChunks.getTextBodyChunk();
    if (text != null) {
        CharsetDetector detector = new CharsetDetector();
        detector.setText(text.getRawValue());
        CharsetMatch match = detector.detect();
        if (match != null && match.getConfidence() > 35 && tryToSet7BitEncoding(msg, match.getName())) {
            return;
        }
    }
}
Also used : ChunkNotFoundException(org.apache.poi.hsmf.exceptions.ChunkNotFoundException) Pattern(java.util.regex.Pattern) Chunks(org.apache.poi.hsmf.datatypes.Chunks) RecipientChunks(org.apache.poi.hsmf.datatypes.RecipientChunks) AttachmentChunks(org.apache.poi.hsmf.datatypes.AttachmentChunks) Matcher(java.util.regex.Matcher) CharsetDetector(org.apache.tika.parser.txt.CharsetDetector) PropertyValue(org.apache.poi.hsmf.datatypes.PropertyValue) UnsupportedEncodingException(java.io.UnsupportedEncodingException) Charset(java.nio.charset.Charset) IOException(java.io.IOException) StringChunk(org.apache.poi.hsmf.datatypes.StringChunk) CharsetMatch(org.apache.tika.parser.txt.CharsetMatch) ByteArrayInputStream(java.io.ByteArrayInputStream) List(java.util.List) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) MAPIProperty(org.apache.poi.hsmf.datatypes.MAPIProperty)

Aggregations

MAPIProperty (org.apache.poi.hsmf.datatypes.MAPIProperty)10 PropertyValue (org.apache.poi.hsmf.datatypes.PropertyValue)5 List (java.util.List)4 ArrayList (java.util.ArrayList)3 Chunks (org.apache.poi.hsmf.datatypes.Chunks)3 LongPropertyValue (org.apache.poi.hsmf.datatypes.PropertyValue.LongPropertyValue)3 TimePropertyValue (org.apache.poi.hsmf.datatypes.PropertyValue.TimePropertyValue)3 ByteArrayInputStream (java.io.ByteArrayInputStream)2 UnsupportedEncodingException (java.io.UnsupportedEncodingException)2 LinkedList (java.util.LinkedList)2 Matcher (java.util.regex.Matcher)2 Pattern (java.util.regex.Pattern)2 MAPIAttribute (org.apache.poi.hmef.attribute.MAPIAttribute)2 AttachmentChunks (org.apache.poi.hsmf.datatypes.AttachmentChunks)2 Chunk (org.apache.poi.hsmf.datatypes.Chunk)2 ChunkBasedPropertyValue (org.apache.poi.hsmf.datatypes.ChunkBasedPropertyValue)2 RecipientChunks (org.apache.poi.hsmf.datatypes.RecipientChunks)2 StringChunk (org.apache.poi.hsmf.datatypes.StringChunk)2 ChunkNotFoundException (org.apache.poi.hsmf.exceptions.ChunkNotFoundException)2 Test (org.junit.Test)2