use of org.apache.poi.hsmf.datatypes.Chunks in project poi by apache.
the class POIFSChunkParser method parse.
public static ChunkGroup[] parse(DirectoryNode node) throws IOException {
Chunks mainChunks = new Chunks();
ArrayList<ChunkGroup> groups = new ArrayList<ChunkGroup>();
groups.add(mainChunks);
// there doesn't seem to be any use of that in Outlook
for (Entry entry : node) {
if (entry instanceof DirectoryNode) {
DirectoryNode dir = (DirectoryNode) entry;
ChunkGroup group = null;
// Do we know what to do with it?
if (dir.getName().startsWith(AttachmentChunks.PREFIX)) {
group = new AttachmentChunks(dir.getName());
}
if (dir.getName().startsWith(NameIdChunks.NAME)) {
group = new NameIdChunks();
}
if (dir.getName().startsWith(RecipientChunks.PREFIX)) {
group = new RecipientChunks(dir.getName());
}
if (group != null) {
processChunks(dir, group);
groups.add(group);
} else {
// Unknown directory, skip silently
}
}
}
// Now do the top level chunks
processChunks(node, mainChunks);
// match up variable-length properties and their chunks
for (ChunkGroup group : groups) {
group.chunksComplete();
}
// Finish
return groups.toArray(new ChunkGroup[groups.size()]);
}
use of org.apache.poi.hsmf.datatypes.Chunks in project poi by apache.
the class TestPOIFSChunkParser method testFindsMultipleRecipients.
@Test
public void testFindsMultipleRecipients() throws IOException, ChunkNotFoundException {
NPOIFSFileSystem multiple = new NPOIFSFileSystem(samples.getFile("example_received_unicode.msg"), true);
multiple.getRoot().getEntry("__recip_version1.0_#00000000");
multiple.getRoot().getEntry("__recip_version1.0_#00000001");
multiple.getRoot().getEntry("__recip_version1.0_#00000002");
multiple.getRoot().getEntry("__recip_version1.0_#00000003");
multiple.getRoot().getEntry("__recip_version1.0_#00000004");
multiple.getRoot().getEntry("__recip_version1.0_#00000005");
ChunkGroup[] groups = POIFSChunkParser.parse(multiple.getRoot());
assertEquals(9, groups.length);
assertTrue(groups[0] instanceof Chunks);
assertTrue(groups[1] instanceof RecipientChunks);
assertTrue(groups[2] instanceof RecipientChunks);
assertTrue(groups[3] instanceof RecipientChunks);
assertTrue(groups[4] instanceof RecipientChunks);
assertTrue(groups[5] instanceof AttachmentChunks);
assertTrue(groups[6] instanceof RecipientChunks);
assertTrue(groups[7] instanceof RecipientChunks);
assertTrue(groups[8] instanceof NameIdChunks);
// In FS order initially
RecipientChunks[] chunks = new RecipientChunks[] { (RecipientChunks) groups[1], (RecipientChunks) groups[2], (RecipientChunks) groups[3], (RecipientChunks) groups[4], (RecipientChunks) groups[6], (RecipientChunks) groups[7] };
assertEquals(6, chunks.length);
assertEquals(0, chunks[0].recipientNumber);
assertEquals(2, chunks[1].recipientNumber);
assertEquals(4, chunks[2].recipientNumber);
assertEquals(5, chunks[3].recipientNumber);
assertEquals(3, chunks[4].recipientNumber);
assertEquals(1, chunks[5].recipientNumber);
// Check
assertEquals("'Ashutosh Dandavate'", chunks[0].getRecipientName());
assertEquals("ashutosh.dandavate@alfresco.com", chunks[0].getRecipientEmailAddress());
assertEquals("'Mike Farman'", chunks[1].getRecipientName());
assertEquals("mikef@alfresco.com", chunks[1].getRecipientEmailAddress());
assertEquals("nick.burch@alfresco.com", chunks[2].getRecipientName());
assertEquals("nick.burch@alfresco.com", chunks[2].getRecipientEmailAddress());
assertEquals("'Roy Wetherall'", chunks[3].getRecipientName());
assertEquals("roy.wetherall@alfresco.com", chunks[3].getRecipientEmailAddress());
assertEquals("nickb@alfresco.com", chunks[4].getRecipientName());
assertEquals("nickb@alfresco.com", chunks[4].getRecipientEmailAddress());
assertEquals("'Paul Holmes-Higgin'", chunks[5].getRecipientName());
assertEquals("paul.hh@alfresco.com", chunks[5].getRecipientEmailAddress());
// Now sort, and re-check
Arrays.sort(chunks, new RecipientChunksSorter());
assertEquals("'Ashutosh Dandavate'", chunks[0].getRecipientName());
assertEquals("ashutosh.dandavate@alfresco.com", chunks[0].getRecipientEmailAddress());
assertEquals("'Paul Holmes-Higgin'", chunks[1].getRecipientName());
assertEquals("paul.hh@alfresco.com", chunks[1].getRecipientEmailAddress());
assertEquals("'Mike Farman'", chunks[2].getRecipientName());
assertEquals("mikef@alfresco.com", chunks[2].getRecipientEmailAddress());
assertEquals("nickb@alfresco.com", chunks[3].getRecipientName());
assertEquals("nickb@alfresco.com", chunks[3].getRecipientEmailAddress());
assertEquals("nick.burch@alfresco.com", chunks[4].getRecipientName());
assertEquals("nick.burch@alfresco.com", chunks[4].getRecipientEmailAddress());
assertEquals("'Roy Wetherall'", chunks[5].getRecipientName());
assertEquals("roy.wetherall@alfresco.com", chunks[5].getRecipientEmailAddress());
// Finally check on message
MAPIMessage msg = new MAPIMessage(multiple);
assertEquals(6, msg.getRecipientEmailAddressList().length);
assertEquals(6, msg.getRecipientNamesList().length);
assertEquals("'Ashutosh Dandavate'", msg.getRecipientNamesList()[0]);
assertEquals("'Paul Holmes-Higgin'", msg.getRecipientNamesList()[1]);
assertEquals("'Mike Farman'", msg.getRecipientNamesList()[2]);
assertEquals("nickb@alfresco.com", msg.getRecipientNamesList()[3]);
assertEquals("nick.burch@alfresco.com", msg.getRecipientNamesList()[4]);
assertEquals("'Roy Wetherall'", msg.getRecipientNamesList()[5]);
assertEquals("ashutosh.dandavate@alfresco.com", msg.getRecipientEmailAddressList()[0]);
assertEquals("paul.hh@alfresco.com", msg.getRecipientEmailAddressList()[1]);
assertEquals("mikef@alfresco.com", msg.getRecipientEmailAddressList()[2]);
assertEquals("nickb@alfresco.com", msg.getRecipientEmailAddressList()[3]);
assertEquals("nick.burch@alfresco.com", msg.getRecipientEmailAddressList()[4]);
assertEquals("roy.wetherall@alfresco.com", msg.getRecipientEmailAddressList()[5]);
msg.close();
multiple.close();
}
use of org.apache.poi.hsmf.datatypes.Chunks in project poi by apache.
the class TestPOIFSChunkParser method testFindsAttachments.
@Test
public void testFindsAttachments() throws IOException, ChunkNotFoundException {
NPOIFSFileSystem with = new NPOIFSFileSystem(samples.getFile("attachment_test_msg.msg"), true);
NPOIFSFileSystem without = new NPOIFSFileSystem(samples.getFile("quick.msg"), true);
AttachmentChunks attachment;
// Check raw details on the one with
with.getRoot().getEntry("__attach_version1.0_#00000000");
with.getRoot().getEntry("__attach_version1.0_#00000001");
POIFSChunkParser.parse(with.getRoot());
ChunkGroup[] groups = POIFSChunkParser.parse(with.getRoot());
assertEquals(5, groups.length);
assertTrue(groups[0] instanceof Chunks);
assertTrue(groups[1] instanceof RecipientChunks);
assertTrue(groups[2] instanceof AttachmentChunks);
assertTrue(groups[3] instanceof AttachmentChunks);
assertTrue(groups[4] instanceof NameIdChunks);
attachment = (AttachmentChunks) groups[2];
assertEquals("TEST-U~1.DOC", attachment.getAttachFileName().toString());
assertEquals("test-unicode.doc", attachment.getAttachLongFileName().toString());
assertEquals(24064, attachment.getAttachData().getValue().length);
attachment = (AttachmentChunks) groups[3];
assertEquals("pj1.txt", attachment.getAttachFileName().toString());
assertEquals("pj1.txt", attachment.getAttachLongFileName().toString());
assertEquals(89, attachment.getAttachData().getValue().length);
// Check raw details on one without
assertFalse(without.getRoot().hasEntry("__attach_version1.0_#00000000"));
assertFalse(without.getRoot().hasEntry("__attach_version1.0_#00000001"));
// One with, from the top
MAPIMessage msgWith = new MAPIMessage(with);
assertEquals(2, msgWith.getAttachmentFiles().length);
attachment = msgWith.getAttachmentFiles()[0];
assertEquals("TEST-U~1.DOC", attachment.getAttachFileName().toString());
assertEquals("test-unicode.doc", attachment.getAttachLongFileName().toString());
assertEquals(24064, attachment.getAttachData().getValue().length);
attachment = msgWith.getAttachmentFiles()[1];
assertEquals("pj1.txt", attachment.getAttachFileName().toString());
assertEquals("pj1.txt", attachment.getAttachLongFileName().toString());
assertEquals(89, attachment.getAttachData().getValue().length);
// Plus check core details are there
assertEquals("'nicolas1.23456@free.fr'", msgWith.getDisplayTo());
assertEquals("Nicolas1 23456", msgWith.getDisplayFrom());
assertEquals("test pièce jointe 1", msgWith.getSubject());
// One without, from the top
MAPIMessage msgWithout = new MAPIMessage(without);
// No attachments
assertEquals(0, msgWithout.getAttachmentFiles().length);
// But has core details
assertEquals("Kevin Roast", msgWithout.getDisplayTo());
assertEquals("Kevin Roast", msgWithout.getDisplayFrom());
assertEquals("Test the content transformer", msgWithout.getSubject());
msgWithout.close();
msgWith.close();
without.close();
with.close();
}
use of org.apache.poi.hsmf.datatypes.Chunks in project poi by apache.
the class POIFSChunkParser method process.
/**
* Creates a chunk, and gives it to its parent group
*/
protected static void process(Entry entry, ChunkGroup grouping) {
String entryName = entry.getName();
Chunk chunk = null;
// Is it a properties chunk? (They have special names)
if (entryName.equals(PropertiesChunk.NAME)) {
if (grouping instanceof Chunks) {
// These should be the properties for the message itself
chunk = new MessagePropertiesChunk(grouping);
} else {
// Will be properties on an attachment or recipient
chunk = new StoragePropertiesChunk(grouping);
}
} else {
// Check it's a regular chunk
if (entryName.length() < 9) {
// Name in the wrong format
return;
}
if (!entryName.contains("_")) {
// Name in the wrong format
return;
}
// Split it into its parts
int splitAt = entryName.lastIndexOf('_');
String namePrefix = entryName.substring(0, splitAt + 1);
String ids = entryName.substring(splitAt + 1);
// the form __<name>_<id><type>
if (namePrefix.equals("Olk10SideProps") || namePrefix.equals("Olk10SideProps_")) {
// This is some odd Outlook 2002 thing, skip
return;
} else if (splitAt <= entryName.length() - 8) {
// In the right form for a normal chunk
// We'll process this further in a little bit
} else {
// Underscores not the right place, something's wrong
throw new IllegalArgumentException("Invalid chunk name " + entryName);
}
// Now try to turn it into id + type
try {
int chunkId = Integer.parseInt(ids.substring(0, 4), 16);
int typeId = Integer.parseInt(ids.substring(4, 8), 16);
MAPIType type = Types.getById(typeId);
if (type == null) {
type = Types.createCustom(typeId);
}
// Special cases based on the ID
if (chunkId == MAPIProperty.MESSAGE_SUBMISSION_ID.id) {
chunk = new MessageSubmissionChunk(namePrefix, chunkId, type);
} else {
// So, do the usual thing which is by type
if (type == Types.BINARY) {
chunk = new ByteChunk(namePrefix, chunkId, type);
} else if (type == Types.DIRECTORY) {
if (entry instanceof DirectoryNode) {
chunk = new DirectoryChunk((DirectoryNode) entry, namePrefix, chunkId, type);
}
} else if (type == Types.ASCII_STRING || type == Types.UNICODE_STRING) {
chunk = new StringChunk(namePrefix, chunkId, type);
} else {
// Type of an unsupported type! Skipping...
}
}
} catch (NumberFormatException e) {
// Name in the wrong format
return;
}
}
if (chunk != null) {
if (entry instanceof DocumentNode) {
DocumentInputStream inp = null;
try {
inp = new DocumentInputStream((DocumentNode) entry);
chunk.readValue(inp);
grouping.record(chunk);
} catch (IOException e) {
logger.log(POILogger.ERROR, "Error reading from part " + entry.getName() + " - " + e);
} finally {
if (inp != null)
inp.close();
}
} else {
grouping.record(chunk);
}
}
}
use of org.apache.poi.hsmf.datatypes.Chunks in project tika by apache.
the class OutlookExtractor method handleFromTo.
private void handleFromTo(Map<String, String[]> headers, Metadata metadata) throws ChunkNotFoundException {
String from = msg.getDisplayFrom();
metadata.set(TikaCoreProperties.CREATOR, from);
metadata.set(Metadata.MESSAGE_FROM, from);
metadata.set(Metadata.MESSAGE_TO, msg.getDisplayTo());
metadata.set(Metadata.MESSAGE_CC, msg.getDisplayCC());
metadata.set(Metadata.MESSAGE_BCC, msg.getDisplayBCC());
Chunks chunks = msg.getMainChunks();
StringChunk sentByServerType = chunks.getSentByServerType();
if (sentByServerType != null) {
metadata.set(Office.MAPI_SENT_BY_SERVER_TYPE, sentByServerType.getValue());
}
Map<MAPIProperty, List<Chunk>> mainChunks = msg.getMainChunks().getAll();
List<Chunk> senderAddresType = mainChunks.get(MAPIProperty.SENDER_ADDRTYPE);
String senderAddressTypeString = "";
if (senderAddresType != null && senderAddresType.size() > 0) {
senderAddressTypeString = senderAddresType.get(0).toString();
}
//sometimes in SMTP .msg files there is an email in the sender name field.
setFirstChunk(mainChunks.get(MAPIProperty.SENDER_NAME), Message.MESSAGE_FROM_NAME, metadata);
setFirstChunk(mainChunks.get(MAPIProperty.SENT_REPRESENTING_NAME), Office.MAPI_FROM_REPRESENTING_NAME, metadata);
setFirstChunk(mainChunks.get(MAPIProperty.SENDER_EMAIL_ADDRESS), Message.MESSAGE_FROM_EMAIL, metadata);
setFirstChunk(mainChunks.get(MAPIProperty.SENT_REPRESENTING_EMAIL_ADDRESS), Office.MAPI_FROM_REPRESENTING_EMAIL, metadata);
for (Recipient recipient : buildRecipients()) {
switch(recipient.recipientType) {
case TO:
addEvenIfNull(Message.MESSAGE_TO_NAME, recipient.name, metadata);
addEvenIfNull(Message.MESSAGE_TO_DISPLAY_NAME, recipient.displayName, metadata);
addEvenIfNull(Message.MESSAGE_TO_EMAIL, recipient.emailAddress, metadata);
break;
case CC:
addEvenIfNull(Message.MESSAGE_CC_NAME, recipient.name, metadata);
addEvenIfNull(Message.MESSAGE_CC_DISPLAY_NAME, recipient.displayName, metadata);
addEvenIfNull(Message.MESSAGE_CC_EMAIL, recipient.emailAddress, metadata);
break;
case BCC:
addEvenIfNull(Message.MESSAGE_BCC_NAME, recipient.name, metadata);
addEvenIfNull(Message.MESSAGE_BCC_DISPLAY_NAME, recipient.displayName, metadata);
addEvenIfNull(Message.MESSAGE_BCC_EMAIL, recipient.emailAddress, metadata);
break;
default:
//log unknown or undefined?
break;
}
}
}
Aggregations