Search in sources :

Example 1 with Chunks

use of org.apache.poi.hsmf.datatypes.Chunks in project poi by apache.

the class POIFSChunkParser method parse.

public static ChunkGroup[] parse(DirectoryNode node) throws IOException {
    Chunks mainChunks = new Chunks();
    ArrayList<ChunkGroup> groups = new ArrayList<ChunkGroup>();
    groups.add(mainChunks);
    //  there doesn't seem to be any use of that in Outlook
    for (Entry entry : node) {
        if (entry instanceof DirectoryNode) {
            DirectoryNode dir = (DirectoryNode) entry;
            ChunkGroup group = null;
            // Do we know what to do with it?
            if (dir.getName().startsWith(AttachmentChunks.PREFIX)) {
                group = new AttachmentChunks(dir.getName());
            }
            if (dir.getName().startsWith(NameIdChunks.NAME)) {
                group = new NameIdChunks();
            }
            if (dir.getName().startsWith(RecipientChunks.PREFIX)) {
                group = new RecipientChunks(dir.getName());
            }
            if (group != null) {
                processChunks(dir, group);
                groups.add(group);
            } else {
            // Unknown directory, skip silently
            }
        }
    }
    // Now do the top level chunks
    processChunks(node, mainChunks);
    // match up variable-length properties and their chunks
    for (ChunkGroup group : groups) {
        group.chunksComplete();
    }
    // Finish
    return groups.toArray(new ChunkGroup[groups.size()]);
}
Also used : Entry(org.apache.poi.poifs.filesystem.Entry) Chunks(org.apache.poi.hsmf.datatypes.Chunks) AttachmentChunks(org.apache.poi.hsmf.datatypes.AttachmentChunks) RecipientChunks(org.apache.poi.hsmf.datatypes.RecipientChunks) NameIdChunks(org.apache.poi.hsmf.datatypes.NameIdChunks) ChunkGroup(org.apache.poi.hsmf.datatypes.ChunkGroup) ArrayList(java.util.ArrayList) RecipientChunks(org.apache.poi.hsmf.datatypes.RecipientChunks) DirectoryNode(org.apache.poi.poifs.filesystem.DirectoryNode) NameIdChunks(org.apache.poi.hsmf.datatypes.NameIdChunks) AttachmentChunks(org.apache.poi.hsmf.datatypes.AttachmentChunks)

Example 2 with Chunks

use of org.apache.poi.hsmf.datatypes.Chunks in project poi by apache.

the class TestPOIFSChunkParser method testFindsMultipleRecipients.

@Test
public void testFindsMultipleRecipients() throws IOException, ChunkNotFoundException {
    NPOIFSFileSystem multiple = new NPOIFSFileSystem(samples.getFile("example_received_unicode.msg"), true);
    multiple.getRoot().getEntry("__recip_version1.0_#00000000");
    multiple.getRoot().getEntry("__recip_version1.0_#00000001");
    multiple.getRoot().getEntry("__recip_version1.0_#00000002");
    multiple.getRoot().getEntry("__recip_version1.0_#00000003");
    multiple.getRoot().getEntry("__recip_version1.0_#00000004");
    multiple.getRoot().getEntry("__recip_version1.0_#00000005");
    ChunkGroup[] groups = POIFSChunkParser.parse(multiple.getRoot());
    assertEquals(9, groups.length);
    assertTrue(groups[0] instanceof Chunks);
    assertTrue(groups[1] instanceof RecipientChunks);
    assertTrue(groups[2] instanceof RecipientChunks);
    assertTrue(groups[3] instanceof RecipientChunks);
    assertTrue(groups[4] instanceof RecipientChunks);
    assertTrue(groups[5] instanceof AttachmentChunks);
    assertTrue(groups[6] instanceof RecipientChunks);
    assertTrue(groups[7] instanceof RecipientChunks);
    assertTrue(groups[8] instanceof NameIdChunks);
    // In FS order initially
    RecipientChunks[] chunks = new RecipientChunks[] { (RecipientChunks) groups[1], (RecipientChunks) groups[2], (RecipientChunks) groups[3], (RecipientChunks) groups[4], (RecipientChunks) groups[6], (RecipientChunks) groups[7] };
    assertEquals(6, chunks.length);
    assertEquals(0, chunks[0].recipientNumber);
    assertEquals(2, chunks[1].recipientNumber);
    assertEquals(4, chunks[2].recipientNumber);
    assertEquals(5, chunks[3].recipientNumber);
    assertEquals(3, chunks[4].recipientNumber);
    assertEquals(1, chunks[5].recipientNumber);
    // Check
    assertEquals("'Ashutosh Dandavate'", chunks[0].getRecipientName());
    assertEquals("ashutosh.dandavate@alfresco.com", chunks[0].getRecipientEmailAddress());
    assertEquals("'Mike Farman'", chunks[1].getRecipientName());
    assertEquals("mikef@alfresco.com", chunks[1].getRecipientEmailAddress());
    assertEquals("nick.burch@alfresco.com", chunks[2].getRecipientName());
    assertEquals("nick.burch@alfresco.com", chunks[2].getRecipientEmailAddress());
    assertEquals("'Roy Wetherall'", chunks[3].getRecipientName());
    assertEquals("roy.wetherall@alfresco.com", chunks[3].getRecipientEmailAddress());
    assertEquals("nickb@alfresco.com", chunks[4].getRecipientName());
    assertEquals("nickb@alfresco.com", chunks[4].getRecipientEmailAddress());
    assertEquals("'Paul Holmes-Higgin'", chunks[5].getRecipientName());
    assertEquals("paul.hh@alfresco.com", chunks[5].getRecipientEmailAddress());
    // Now sort, and re-check
    Arrays.sort(chunks, new RecipientChunksSorter());
    assertEquals("'Ashutosh Dandavate'", chunks[0].getRecipientName());
    assertEquals("ashutosh.dandavate@alfresco.com", chunks[0].getRecipientEmailAddress());
    assertEquals("'Paul Holmes-Higgin'", chunks[1].getRecipientName());
    assertEquals("paul.hh@alfresco.com", chunks[1].getRecipientEmailAddress());
    assertEquals("'Mike Farman'", chunks[2].getRecipientName());
    assertEquals("mikef@alfresco.com", chunks[2].getRecipientEmailAddress());
    assertEquals("nickb@alfresco.com", chunks[3].getRecipientName());
    assertEquals("nickb@alfresco.com", chunks[3].getRecipientEmailAddress());
    assertEquals("nick.burch@alfresco.com", chunks[4].getRecipientName());
    assertEquals("nick.burch@alfresco.com", chunks[4].getRecipientEmailAddress());
    assertEquals("'Roy Wetherall'", chunks[5].getRecipientName());
    assertEquals("roy.wetherall@alfresco.com", chunks[5].getRecipientEmailAddress());
    // Finally check on message
    MAPIMessage msg = new MAPIMessage(multiple);
    assertEquals(6, msg.getRecipientEmailAddressList().length);
    assertEquals(6, msg.getRecipientNamesList().length);
    assertEquals("'Ashutosh Dandavate'", msg.getRecipientNamesList()[0]);
    assertEquals("'Paul Holmes-Higgin'", msg.getRecipientNamesList()[1]);
    assertEquals("'Mike Farman'", msg.getRecipientNamesList()[2]);
    assertEquals("nickb@alfresco.com", msg.getRecipientNamesList()[3]);
    assertEquals("nick.burch@alfresco.com", msg.getRecipientNamesList()[4]);
    assertEquals("'Roy Wetherall'", msg.getRecipientNamesList()[5]);
    assertEquals("ashutosh.dandavate@alfresco.com", msg.getRecipientEmailAddressList()[0]);
    assertEquals("paul.hh@alfresco.com", msg.getRecipientEmailAddressList()[1]);
    assertEquals("mikef@alfresco.com", msg.getRecipientEmailAddressList()[2]);
    assertEquals("nickb@alfresco.com", msg.getRecipientEmailAddressList()[3]);
    assertEquals("nick.burch@alfresco.com", msg.getRecipientEmailAddressList()[4]);
    assertEquals("roy.wetherall@alfresco.com", msg.getRecipientEmailAddressList()[5]);
    msg.close();
    multiple.close();
}
Also used : MAPIMessage(org.apache.poi.hsmf.MAPIMessage) NPOIFSFileSystem(org.apache.poi.poifs.filesystem.NPOIFSFileSystem) ChunkGroup(org.apache.poi.hsmf.datatypes.ChunkGroup) Chunks(org.apache.poi.hsmf.datatypes.Chunks) RecipientChunks(org.apache.poi.hsmf.datatypes.RecipientChunks) NameIdChunks(org.apache.poi.hsmf.datatypes.NameIdChunks) AttachmentChunks(org.apache.poi.hsmf.datatypes.AttachmentChunks) RecipientChunks(org.apache.poi.hsmf.datatypes.RecipientChunks) NameIdChunks(org.apache.poi.hsmf.datatypes.NameIdChunks) RecipientChunksSorter(org.apache.poi.hsmf.datatypes.RecipientChunks.RecipientChunksSorter) AttachmentChunks(org.apache.poi.hsmf.datatypes.AttachmentChunks) Test(org.junit.Test)

Example 3 with Chunks

use of org.apache.poi.hsmf.datatypes.Chunks in project poi by apache.

the class TestPOIFSChunkParser method testFindsAttachments.

@Test
public void testFindsAttachments() throws IOException, ChunkNotFoundException {
    NPOIFSFileSystem with = new NPOIFSFileSystem(samples.getFile("attachment_test_msg.msg"), true);
    NPOIFSFileSystem without = new NPOIFSFileSystem(samples.getFile("quick.msg"), true);
    AttachmentChunks attachment;
    // Check raw details on the one with
    with.getRoot().getEntry("__attach_version1.0_#00000000");
    with.getRoot().getEntry("__attach_version1.0_#00000001");
    POIFSChunkParser.parse(with.getRoot());
    ChunkGroup[] groups = POIFSChunkParser.parse(with.getRoot());
    assertEquals(5, groups.length);
    assertTrue(groups[0] instanceof Chunks);
    assertTrue(groups[1] instanceof RecipientChunks);
    assertTrue(groups[2] instanceof AttachmentChunks);
    assertTrue(groups[3] instanceof AttachmentChunks);
    assertTrue(groups[4] instanceof NameIdChunks);
    attachment = (AttachmentChunks) groups[2];
    assertEquals("TEST-U~1.DOC", attachment.getAttachFileName().toString());
    assertEquals("test-unicode.doc", attachment.getAttachLongFileName().toString());
    assertEquals(24064, attachment.getAttachData().getValue().length);
    attachment = (AttachmentChunks) groups[3];
    assertEquals("pj1.txt", attachment.getAttachFileName().toString());
    assertEquals("pj1.txt", attachment.getAttachLongFileName().toString());
    assertEquals(89, attachment.getAttachData().getValue().length);
    // Check raw details on one without
    assertFalse(without.getRoot().hasEntry("__attach_version1.0_#00000000"));
    assertFalse(without.getRoot().hasEntry("__attach_version1.0_#00000001"));
    // One with, from the top
    MAPIMessage msgWith = new MAPIMessage(with);
    assertEquals(2, msgWith.getAttachmentFiles().length);
    attachment = msgWith.getAttachmentFiles()[0];
    assertEquals("TEST-U~1.DOC", attachment.getAttachFileName().toString());
    assertEquals("test-unicode.doc", attachment.getAttachLongFileName().toString());
    assertEquals(24064, attachment.getAttachData().getValue().length);
    attachment = msgWith.getAttachmentFiles()[1];
    assertEquals("pj1.txt", attachment.getAttachFileName().toString());
    assertEquals("pj1.txt", attachment.getAttachLongFileName().toString());
    assertEquals(89, attachment.getAttachData().getValue().length);
    // Plus check core details are there
    assertEquals("'nicolas1.23456@free.fr'", msgWith.getDisplayTo());
    assertEquals("Nicolas1 23456", msgWith.getDisplayFrom());
    assertEquals("test pièce jointe 1", msgWith.getSubject());
    // One without, from the top
    MAPIMessage msgWithout = new MAPIMessage(without);
    // No attachments
    assertEquals(0, msgWithout.getAttachmentFiles().length);
    // But has core details
    assertEquals("Kevin Roast", msgWithout.getDisplayTo());
    assertEquals("Kevin Roast", msgWithout.getDisplayFrom());
    assertEquals("Test the content transformer", msgWithout.getSubject());
    msgWithout.close();
    msgWith.close();
    without.close();
    with.close();
}
Also used : MAPIMessage(org.apache.poi.hsmf.MAPIMessage) NPOIFSFileSystem(org.apache.poi.poifs.filesystem.NPOIFSFileSystem) ChunkGroup(org.apache.poi.hsmf.datatypes.ChunkGroup) Chunks(org.apache.poi.hsmf.datatypes.Chunks) RecipientChunks(org.apache.poi.hsmf.datatypes.RecipientChunks) NameIdChunks(org.apache.poi.hsmf.datatypes.NameIdChunks) AttachmentChunks(org.apache.poi.hsmf.datatypes.AttachmentChunks) RecipientChunks(org.apache.poi.hsmf.datatypes.RecipientChunks) NameIdChunks(org.apache.poi.hsmf.datatypes.NameIdChunks) AttachmentChunks(org.apache.poi.hsmf.datatypes.AttachmentChunks) Test(org.junit.Test)

Example 4 with Chunks

use of org.apache.poi.hsmf.datatypes.Chunks in project poi by apache.

the class POIFSChunkParser method process.

/**
    * Creates a chunk, and gives it to its parent group 
    */
protected static void process(Entry entry, ChunkGroup grouping) {
    String entryName = entry.getName();
    Chunk chunk = null;
    // Is it a properties chunk? (They have special names)
    if (entryName.equals(PropertiesChunk.NAME)) {
        if (grouping instanceof Chunks) {
            // These should be the properties for the message itself
            chunk = new MessagePropertiesChunk(grouping);
        } else {
            // Will be properties on an attachment or recipient
            chunk = new StoragePropertiesChunk(grouping);
        }
    } else {
        // Check it's a regular chunk
        if (entryName.length() < 9) {
            // Name in the wrong format
            return;
        }
        if (!entryName.contains("_")) {
            // Name in the wrong format
            return;
        }
        // Split it into its parts
        int splitAt = entryName.lastIndexOf('_');
        String namePrefix = entryName.substring(0, splitAt + 1);
        String ids = entryName.substring(splitAt + 1);
        //  the form __<name>_<id><type>
        if (namePrefix.equals("Olk10SideProps") || namePrefix.equals("Olk10SideProps_")) {
            // This is some odd Outlook 2002 thing, skip
            return;
        } else if (splitAt <= entryName.length() - 8) {
        // In the right form for a normal chunk
        // We'll process this further in a little bit
        } else {
            // Underscores not the right place, something's wrong
            throw new IllegalArgumentException("Invalid chunk name " + entryName);
        }
        // Now try to turn it into id + type
        try {
            int chunkId = Integer.parseInt(ids.substring(0, 4), 16);
            int typeId = Integer.parseInt(ids.substring(4, 8), 16);
            MAPIType type = Types.getById(typeId);
            if (type == null) {
                type = Types.createCustom(typeId);
            }
            // Special cases based on the ID
            if (chunkId == MAPIProperty.MESSAGE_SUBMISSION_ID.id) {
                chunk = new MessageSubmissionChunk(namePrefix, chunkId, type);
            } else {
                // So, do the usual thing which is by type
                if (type == Types.BINARY) {
                    chunk = new ByteChunk(namePrefix, chunkId, type);
                } else if (type == Types.DIRECTORY) {
                    if (entry instanceof DirectoryNode) {
                        chunk = new DirectoryChunk((DirectoryNode) entry, namePrefix, chunkId, type);
                    }
                } else if (type == Types.ASCII_STRING || type == Types.UNICODE_STRING) {
                    chunk = new StringChunk(namePrefix, chunkId, type);
                } else {
                // Type of an unsupported type! Skipping... 
                }
            }
        } catch (NumberFormatException e) {
            // Name in the wrong format
            return;
        }
    }
    if (chunk != null) {
        if (entry instanceof DocumentNode) {
            DocumentInputStream inp = null;
            try {
                inp = new DocumentInputStream((DocumentNode) entry);
                chunk.readValue(inp);
                grouping.record(chunk);
            } catch (IOException e) {
                logger.log(POILogger.ERROR, "Error reading from part " + entry.getName() + " - " + e);
            } finally {
                if (inp != null)
                    inp.close();
            }
        } else {
            grouping.record(chunk);
        }
    }
}
Also used : StoragePropertiesChunk(org.apache.poi.hsmf.datatypes.StoragePropertiesChunk) Chunks(org.apache.poi.hsmf.datatypes.Chunks) AttachmentChunks(org.apache.poi.hsmf.datatypes.AttachmentChunks) RecipientChunks(org.apache.poi.hsmf.datatypes.RecipientChunks) NameIdChunks(org.apache.poi.hsmf.datatypes.NameIdChunks) DocumentNode(org.apache.poi.poifs.filesystem.DocumentNode) ByteChunk(org.apache.poi.hsmf.datatypes.ByteChunk) DirectoryNode(org.apache.poi.poifs.filesystem.DirectoryNode) DirectoryChunk(org.apache.poi.hsmf.datatypes.DirectoryChunk) MessagePropertiesChunk(org.apache.poi.hsmf.datatypes.MessagePropertiesChunk) IOException(java.io.IOException) ByteChunk(org.apache.poi.hsmf.datatypes.ByteChunk) PropertiesChunk(org.apache.poi.hsmf.datatypes.PropertiesChunk) StringChunk(org.apache.poi.hsmf.datatypes.StringChunk) Chunk(org.apache.poi.hsmf.datatypes.Chunk) DirectoryChunk(org.apache.poi.hsmf.datatypes.DirectoryChunk) StoragePropertiesChunk(org.apache.poi.hsmf.datatypes.StoragePropertiesChunk) MessagePropertiesChunk(org.apache.poi.hsmf.datatypes.MessagePropertiesChunk) MessageSubmissionChunk(org.apache.poi.hsmf.datatypes.MessageSubmissionChunk) DocumentInputStream(org.apache.poi.poifs.filesystem.DocumentInputStream) MessageSubmissionChunk(org.apache.poi.hsmf.datatypes.MessageSubmissionChunk) StringChunk(org.apache.poi.hsmf.datatypes.StringChunk) MAPIType(org.apache.poi.hsmf.datatypes.Types.MAPIType)

Example 5 with Chunks

use of org.apache.poi.hsmf.datatypes.Chunks in project tika by apache.

the class OutlookExtractor method handleFromTo.

private void handleFromTo(Map<String, String[]> headers, Metadata metadata) throws ChunkNotFoundException {
    String from = msg.getDisplayFrom();
    metadata.set(TikaCoreProperties.CREATOR, from);
    metadata.set(Metadata.MESSAGE_FROM, from);
    metadata.set(Metadata.MESSAGE_TO, msg.getDisplayTo());
    metadata.set(Metadata.MESSAGE_CC, msg.getDisplayCC());
    metadata.set(Metadata.MESSAGE_BCC, msg.getDisplayBCC());
    Chunks chunks = msg.getMainChunks();
    StringChunk sentByServerType = chunks.getSentByServerType();
    if (sentByServerType != null) {
        metadata.set(Office.MAPI_SENT_BY_SERVER_TYPE, sentByServerType.getValue());
    }
    Map<MAPIProperty, List<Chunk>> mainChunks = msg.getMainChunks().getAll();
    List<Chunk> senderAddresType = mainChunks.get(MAPIProperty.SENDER_ADDRTYPE);
    String senderAddressTypeString = "";
    if (senderAddresType != null && senderAddresType.size() > 0) {
        senderAddressTypeString = senderAddresType.get(0).toString();
    }
    //sometimes in SMTP .msg files there is an email in the sender name field.
    setFirstChunk(mainChunks.get(MAPIProperty.SENDER_NAME), Message.MESSAGE_FROM_NAME, metadata);
    setFirstChunk(mainChunks.get(MAPIProperty.SENT_REPRESENTING_NAME), Office.MAPI_FROM_REPRESENTING_NAME, metadata);
    setFirstChunk(mainChunks.get(MAPIProperty.SENDER_EMAIL_ADDRESS), Message.MESSAGE_FROM_EMAIL, metadata);
    setFirstChunk(mainChunks.get(MAPIProperty.SENT_REPRESENTING_EMAIL_ADDRESS), Office.MAPI_FROM_REPRESENTING_EMAIL, metadata);
    for (Recipient recipient : buildRecipients()) {
        switch(recipient.recipientType) {
            case TO:
                addEvenIfNull(Message.MESSAGE_TO_NAME, recipient.name, metadata);
                addEvenIfNull(Message.MESSAGE_TO_DISPLAY_NAME, recipient.displayName, metadata);
                addEvenIfNull(Message.MESSAGE_TO_EMAIL, recipient.emailAddress, metadata);
                break;
            case CC:
                addEvenIfNull(Message.MESSAGE_CC_NAME, recipient.name, metadata);
                addEvenIfNull(Message.MESSAGE_CC_DISPLAY_NAME, recipient.displayName, metadata);
                addEvenIfNull(Message.MESSAGE_CC_EMAIL, recipient.emailAddress, metadata);
                break;
            case BCC:
                addEvenIfNull(Message.MESSAGE_BCC_NAME, recipient.name, metadata);
                addEvenIfNull(Message.MESSAGE_BCC_DISPLAY_NAME, recipient.displayName, metadata);
                addEvenIfNull(Message.MESSAGE_BCC_EMAIL, recipient.emailAddress, metadata);
                break;
            default:
                //log unknown or undefined?
                break;
        }
    }
}
Also used : Chunks(org.apache.poi.hsmf.datatypes.Chunks) RecipientChunks(org.apache.poi.hsmf.datatypes.RecipientChunks) AttachmentChunks(org.apache.poi.hsmf.datatypes.AttachmentChunks) List(java.util.List) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) ByteChunk(org.apache.poi.hsmf.datatypes.ByteChunk) StringChunk(org.apache.poi.hsmf.datatypes.StringChunk) Chunk(org.apache.poi.hsmf.datatypes.Chunk) MAPIProperty(org.apache.poi.hsmf.datatypes.MAPIProperty) StringChunk(org.apache.poi.hsmf.datatypes.StringChunk)

Aggregations

Chunks (org.apache.poi.hsmf.datatypes.Chunks)9 AttachmentChunks (org.apache.poi.hsmf.datatypes.AttachmentChunks)8 RecipientChunks (org.apache.poi.hsmf.datatypes.RecipientChunks)8 NameIdChunks (org.apache.poi.hsmf.datatypes.NameIdChunks)6 ChunkGroup (org.apache.poi.hsmf.datatypes.ChunkGroup)5 Test (org.junit.Test)5 MAPIMessage (org.apache.poi.hsmf.MAPIMessage)4 NPOIFSFileSystem (org.apache.poi.poifs.filesystem.NPOIFSFileSystem)4 ArrayList (java.util.ArrayList)3 List (java.util.List)3 MAPIProperty (org.apache.poi.hsmf.datatypes.MAPIProperty)3 StringChunk (org.apache.poi.hsmf.datatypes.StringChunk)3 IOException (java.io.IOException)2 LinkedList (java.util.LinkedList)2 ByteChunk (org.apache.poi.hsmf.datatypes.ByteChunk)2 Chunk (org.apache.poi.hsmf.datatypes.Chunk)2 PropertyValue (org.apache.poi.hsmf.datatypes.PropertyValue)2 DirectoryNode (org.apache.poi.poifs.filesystem.DirectoryNode)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 UnsupportedEncodingException (java.io.UnsupportedEncodingException)1