use of org.apache.poi.hsmf.MAPIMessage in project poi by apache.
the class ExtractorFactory method getEmbededDocsTextExtractors.
/**
* Returns an array of text extractors, one for each of
* the embedded documents in the file (if there are any).
* If there are no embedded documents, you'll get back an
* empty array. Otherwise, you'll get one open
* {@link POITextExtractor} for each embedded file.
*/
public static POITextExtractor[] getEmbededDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, OpenXML4JException, XmlException {
// All the embedded directories we spotted
ArrayList<Entry> dirs = new ArrayList<Entry>();
// For anything else not directly held in as a POIFS directory
ArrayList<InputStream> nonPOIFS = new ArrayList<InputStream>();
// Find all the embedded directories
DirectoryEntry root = ext.getRoot();
if (root == null) {
throw new IllegalStateException("The extractor didn't know which POIFS it came from!");
}
if (ext instanceof ExcelExtractor) {
// These are in MBD... under the root
Iterator<Entry> it = root.getEntries();
while (it.hasNext()) {
Entry entry = it.next();
if (entry.getName().startsWith("MBD")) {
dirs.add(entry);
}
}
} else if (ext instanceof WordExtractor) {
// These are in ObjectPool -> _... under the root
try {
DirectoryEntry op = (DirectoryEntry) root.getEntry("ObjectPool");
Iterator<Entry> it = op.getEntries();
while (it.hasNext()) {
Entry entry = it.next();
if (entry.getName().startsWith("_")) {
dirs.add(entry);
}
}
} catch (FileNotFoundException e) {
logger.log(POILogger.INFO, "Ignoring FileNotFoundException while extracting Word document", e.getLocalizedMessage());
// ignored here
}
//} else if(ext instanceof PowerPointExtractor) {
// Tricky, not stored directly in poifs
// TODO
} else if (ext instanceof OutlookTextExtactor) {
// Stored in the Attachment blocks
MAPIMessage msg = ((OutlookTextExtactor) ext).getMAPIMessage();
for (AttachmentChunks attachment : msg.getAttachmentFiles()) {
if (attachment.getAttachData() != null) {
byte[] data = attachment.getAttachData().getValue();
nonPOIFS.add(new ByteArrayInputStream(data));
} else if (attachment.getAttachmentDirectory() != null) {
dirs.add(attachment.getAttachmentDirectory().getDirectory());
}
}
}
// Create the extractors
if (dirs.size() == 0 && nonPOIFS.size() == 0) {
return new POITextExtractor[0];
}
ArrayList<POITextExtractor> textExtractors = new ArrayList<POITextExtractor>();
for (Entry dir : dirs) {
textExtractors.add(createExtractor((DirectoryNode) dir));
}
for (InputStream nonPOIF : nonPOIFS) {
try {
textExtractors.add(createExtractor(nonPOIF));
} catch (IllegalArgumentException e) {
// Ignore, just means it didn't contain
// a format we support as yet
logger.log(POILogger.INFO, "Format not supported yet", e.getLocalizedMessage());
} catch (XmlException e) {
throw new IOException(e.getMessage(), e);
} catch (OpenXML4JException e) {
throw new IOException(e.getMessage(), e);
}
}
return textExtractors.toArray(new POITextExtractor[textExtractors.size()]);
}
use of org.apache.poi.hsmf.MAPIMessage in project poi by apache.
the class TestPOIFSChunkParser method testFindsMultipleRecipients.
@Test
public void testFindsMultipleRecipients() throws IOException, ChunkNotFoundException {
NPOIFSFileSystem multiple = new NPOIFSFileSystem(samples.getFile("example_received_unicode.msg"), true);
multiple.getRoot().getEntry("__recip_version1.0_#00000000");
multiple.getRoot().getEntry("__recip_version1.0_#00000001");
multiple.getRoot().getEntry("__recip_version1.0_#00000002");
multiple.getRoot().getEntry("__recip_version1.0_#00000003");
multiple.getRoot().getEntry("__recip_version1.0_#00000004");
multiple.getRoot().getEntry("__recip_version1.0_#00000005");
ChunkGroup[] groups = POIFSChunkParser.parse(multiple.getRoot());
assertEquals(9, groups.length);
assertTrue(groups[0] instanceof Chunks);
assertTrue(groups[1] instanceof RecipientChunks);
assertTrue(groups[2] instanceof RecipientChunks);
assertTrue(groups[3] instanceof RecipientChunks);
assertTrue(groups[4] instanceof RecipientChunks);
assertTrue(groups[5] instanceof AttachmentChunks);
assertTrue(groups[6] instanceof RecipientChunks);
assertTrue(groups[7] instanceof RecipientChunks);
assertTrue(groups[8] instanceof NameIdChunks);
// In FS order initially
RecipientChunks[] chunks = new RecipientChunks[] { (RecipientChunks) groups[1], (RecipientChunks) groups[2], (RecipientChunks) groups[3], (RecipientChunks) groups[4], (RecipientChunks) groups[6], (RecipientChunks) groups[7] };
assertEquals(6, chunks.length);
assertEquals(0, chunks[0].recipientNumber);
assertEquals(2, chunks[1].recipientNumber);
assertEquals(4, chunks[2].recipientNumber);
assertEquals(5, chunks[3].recipientNumber);
assertEquals(3, chunks[4].recipientNumber);
assertEquals(1, chunks[5].recipientNumber);
// Check
assertEquals("'Ashutosh Dandavate'", chunks[0].getRecipientName());
assertEquals("ashutosh.dandavate@alfresco.com", chunks[0].getRecipientEmailAddress());
assertEquals("'Mike Farman'", chunks[1].getRecipientName());
assertEquals("mikef@alfresco.com", chunks[1].getRecipientEmailAddress());
assertEquals("nick.burch@alfresco.com", chunks[2].getRecipientName());
assertEquals("nick.burch@alfresco.com", chunks[2].getRecipientEmailAddress());
assertEquals("'Roy Wetherall'", chunks[3].getRecipientName());
assertEquals("roy.wetherall@alfresco.com", chunks[3].getRecipientEmailAddress());
assertEquals("nickb@alfresco.com", chunks[4].getRecipientName());
assertEquals("nickb@alfresco.com", chunks[4].getRecipientEmailAddress());
assertEquals("'Paul Holmes-Higgin'", chunks[5].getRecipientName());
assertEquals("paul.hh@alfresco.com", chunks[5].getRecipientEmailAddress());
// Now sort, and re-check
Arrays.sort(chunks, new RecipientChunksSorter());
assertEquals("'Ashutosh Dandavate'", chunks[0].getRecipientName());
assertEquals("ashutosh.dandavate@alfresco.com", chunks[0].getRecipientEmailAddress());
assertEquals("'Paul Holmes-Higgin'", chunks[1].getRecipientName());
assertEquals("paul.hh@alfresco.com", chunks[1].getRecipientEmailAddress());
assertEquals("'Mike Farman'", chunks[2].getRecipientName());
assertEquals("mikef@alfresco.com", chunks[2].getRecipientEmailAddress());
assertEquals("nickb@alfresco.com", chunks[3].getRecipientName());
assertEquals("nickb@alfresco.com", chunks[3].getRecipientEmailAddress());
assertEquals("nick.burch@alfresco.com", chunks[4].getRecipientName());
assertEquals("nick.burch@alfresco.com", chunks[4].getRecipientEmailAddress());
assertEquals("'Roy Wetherall'", chunks[5].getRecipientName());
assertEquals("roy.wetherall@alfresco.com", chunks[5].getRecipientEmailAddress());
// Finally check on message
MAPIMessage msg = new MAPIMessage(multiple);
assertEquals(6, msg.getRecipientEmailAddressList().length);
assertEquals(6, msg.getRecipientNamesList().length);
assertEquals("'Ashutosh Dandavate'", msg.getRecipientNamesList()[0]);
assertEquals("'Paul Holmes-Higgin'", msg.getRecipientNamesList()[1]);
assertEquals("'Mike Farman'", msg.getRecipientNamesList()[2]);
assertEquals("nickb@alfresco.com", msg.getRecipientNamesList()[3]);
assertEquals("nick.burch@alfresco.com", msg.getRecipientNamesList()[4]);
assertEquals("'Roy Wetherall'", msg.getRecipientNamesList()[5]);
assertEquals("ashutosh.dandavate@alfresco.com", msg.getRecipientEmailAddressList()[0]);
assertEquals("paul.hh@alfresco.com", msg.getRecipientEmailAddressList()[1]);
assertEquals("mikef@alfresco.com", msg.getRecipientEmailAddressList()[2]);
assertEquals("nickb@alfresco.com", msg.getRecipientEmailAddressList()[3]);
assertEquals("nick.burch@alfresco.com", msg.getRecipientEmailAddressList()[4]);
assertEquals("roy.wetherall@alfresco.com", msg.getRecipientEmailAddressList()[5]);
msg.close();
multiple.close();
}
use of org.apache.poi.hsmf.MAPIMessage in project poi by apache.
the class TestPOIFSChunkParser method testOlk10SideProps.
/**
* Bugzilla #51873 - Outlook 2002 files created with dragging and
* dropping files to the disk include a non-standard named streams
* such as "Olk10SideProps_0001"
*/
@Test
public void testOlk10SideProps() throws IOException, ChunkNotFoundException {
NPOIFSFileSystem poifs = new NPOIFSFileSystem(samples.getFile("51873.msg"), true);
MAPIMessage msg = new MAPIMessage(poifs);
// Check core details came through
assertEquals("bubba@bubbasmith.com", msg.getDisplayTo());
assertEquals("Test with Olk10SideProps_ Chunk", msg.getSubject());
msg.close();
poifs.close();
}
use of org.apache.poi.hsmf.MAPIMessage in project poi by apache.
the class TestPOIFSChunkParser method testFindsAttachments.
@Test
public void testFindsAttachments() throws IOException, ChunkNotFoundException {
NPOIFSFileSystem with = new NPOIFSFileSystem(samples.getFile("attachment_test_msg.msg"), true);
NPOIFSFileSystem without = new NPOIFSFileSystem(samples.getFile("quick.msg"), true);
AttachmentChunks attachment;
// Check raw details on the one with
with.getRoot().getEntry("__attach_version1.0_#00000000");
with.getRoot().getEntry("__attach_version1.0_#00000001");
POIFSChunkParser.parse(with.getRoot());
ChunkGroup[] groups = POIFSChunkParser.parse(with.getRoot());
assertEquals(5, groups.length);
assertTrue(groups[0] instanceof Chunks);
assertTrue(groups[1] instanceof RecipientChunks);
assertTrue(groups[2] instanceof AttachmentChunks);
assertTrue(groups[3] instanceof AttachmentChunks);
assertTrue(groups[4] instanceof NameIdChunks);
attachment = (AttachmentChunks) groups[2];
assertEquals("TEST-U~1.DOC", attachment.getAttachFileName().toString());
assertEquals("test-unicode.doc", attachment.getAttachLongFileName().toString());
assertEquals(24064, attachment.getAttachData().getValue().length);
attachment = (AttachmentChunks) groups[3];
assertEquals("pj1.txt", attachment.getAttachFileName().toString());
assertEquals("pj1.txt", attachment.getAttachLongFileName().toString());
assertEquals(89, attachment.getAttachData().getValue().length);
// Check raw details on one without
assertFalse(without.getRoot().hasEntry("__attach_version1.0_#00000000"));
assertFalse(without.getRoot().hasEntry("__attach_version1.0_#00000001"));
// One with, from the top
MAPIMessage msgWith = new MAPIMessage(with);
assertEquals(2, msgWith.getAttachmentFiles().length);
attachment = msgWith.getAttachmentFiles()[0];
assertEquals("TEST-U~1.DOC", attachment.getAttachFileName().toString());
assertEquals("test-unicode.doc", attachment.getAttachLongFileName().toString());
assertEquals(24064, attachment.getAttachData().getValue().length);
attachment = msgWith.getAttachmentFiles()[1];
assertEquals("pj1.txt", attachment.getAttachFileName().toString());
assertEquals("pj1.txt", attachment.getAttachLongFileName().toString());
assertEquals(89, attachment.getAttachData().getValue().length);
// Plus check core details are there
assertEquals("'nicolas1.23456@free.fr'", msgWith.getDisplayTo());
assertEquals("Nicolas1 23456", msgWith.getDisplayFrom());
assertEquals("test pièce jointe 1", msgWith.getSubject());
// One without, from the top
MAPIMessage msgWithout = new MAPIMessage(without);
// No attachments
assertEquals(0, msgWithout.getAttachmentFiles().length);
// But has core details
assertEquals("Kevin Roast", msgWithout.getDisplayTo());
assertEquals("Kevin Roast", msgWithout.getDisplayFrom());
assertEquals("Test the content transformer", msgWithout.getSubject());
msgWithout.close();
msgWith.close();
without.close();
with.close();
}
use of org.apache.poi.hsmf.MAPIMessage in project poi by apache.
the class TestOutlookTextExtractor method testEncodings.
public void testEncodings() throws Exception {
NPOIFSFileSystem poifs = new NPOIFSFileSystem(samples.getFile("chinese-traditional.msg"), true);
MAPIMessage msg = new MAPIMessage(poifs);
OutlookTextExtactor ext = new OutlookTextExtactor(msg);
String text = ext.getText();
// Check the english bits
assertContains(text, "From: Tests Chang@FT");
assertContains(text, "tests.chang@fengttt.com");
// And check some chinese bits
assertContains(text, "(張毓倫)");
assertContains(text, "( MSG 格式測試 )");
ext.close();
poifs.close();
}
Aggregations