use of org.apache.poi.hsmf.MAPIMessage in project poi by apache.
the class TestOutlookTextExtractor method testWithAttachedMessage.
public void testWithAttachedMessage() throws Exception {
NPOIFSFileSystem poifs = new NPOIFSFileSystem(samples.getFile("58214_with_attachment.msg"), true);
MAPIMessage msg = new MAPIMessage(poifs);
OutlookTextExtactor ext = new OutlookTextExtactor(msg);
String text = ext.getText();
// Check we got bits from the main message
assertContains(text, "Master mail");
assertContains(text, "ante in lacinia euismod");
// But not the attached message
assertNotContained(text, "Test mail attachment");
assertNotContained(text, "Lorem ipsum dolor sit");
ext.close();
poifs.close();
}
use of org.apache.poi.hsmf.MAPIMessage in project poi by apache.
the class TestOutlookTextExtractor method testQuick.
@Test
public void testQuick() throws Exception {
NPOIFSFileSystem poifs = new NPOIFSFileSystem(samples.getFile("quick.msg"), true);
MAPIMessage msg = new MAPIMessage(poifs);
OutlookTextExtactor ext = new OutlookTextExtactor(msg);
String text = ext.getText();
assertContains(text, "From: Kevin Roast\n");
assertContains(text, "To: Kevin Roast <kevin.roast@alfresco.org>\n");
assertNotContained(text, "CC:");
assertNotContained(text, "BCC:");
assertNotContained(text, "Attachment:");
assertContains(text, "Subject: Test the content transformer\n");
Calendar cal = LocaleUtil.getLocaleCalendar(2007, 5, 14, 9, 42, 55);
SimpleDateFormat f = new SimpleDateFormat("E, d MMM yyyy HH:mm:ss Z", Locale.ROOT);
f.setTimeZone(LocaleUtil.getUserTimeZone());
String dateText = f.format(cal.getTime());
assertContains(text, "Date: " + dateText + "\n");
assertContains(text, "The quick brown fox jumps over the lazy dog");
ext.close();
poifs.close();
}
use of org.apache.poi.hsmf.MAPIMessage in project poi by apache.
the class TestOutlookTextExtractor method testConstructors.
@Test
public void testConstructors() throws Exception {
FileInputStream fis = new FileInputStream(samples.getFile("simple_test_msg.msg"));
OutlookTextExtactor ext = new OutlookTextExtactor(fis);
String inp = ext.getText();
ext.close();
fis.close();
NPOIFSFileSystem poifs = new NPOIFSFileSystem(samples.getFile("simple_test_msg.msg"), true);
ext = new OutlookTextExtactor(poifs);
String poifsTxt = ext.getText();
ext.close();
poifs.close();
fis = new FileInputStream(samples.getFile("simple_test_msg.msg"));
ext = new OutlookTextExtactor(new MAPIMessage(fis));
String mapi = ext.getText();
ext.close();
fis.close();
assertEquals(inp, poifsTxt);
assertEquals(inp, mapi);
}
use of org.apache.poi.hsmf.MAPIMessage in project poi by apache.
the class OLE2ScratchpadExtractorFactory method identifyEmbeddedResources.
/**
* Returns an array of text extractors, one for each of
* the embedded documents in the file (if there are any).
* If there are no embedded documents, you'll get back an
* empty array. Otherwise, you'll get one open
* {@link POITextExtractor} for each embedded file.
*/
public static void identifyEmbeddedResources(POIOLE2TextExtractor ext, List<Entry> dirs, List<InputStream> nonPOIFS) throws IOException {
// Find all the embedded directories
DirectoryEntry root = ext.getRoot();
if (root == null) {
throw new IllegalStateException("The extractor didn't know which POIFS it came from!");
}
if (ext instanceof WordExtractor) {
// These are in ObjectPool -> _... under the root
try {
DirectoryEntry op = (DirectoryEntry) root.getEntry("ObjectPool");
Iterator<Entry> it = op.getEntries();
while (it.hasNext()) {
Entry entry = it.next();
if (entry.getName().startsWith("_")) {
dirs.add(entry);
}
}
} catch (FileNotFoundException e) {
// ignored here
}
//} else if(ext instanceof PowerPointExtractor) {
// Tricky, not stored directly in poifs
// TODO
} else if (ext instanceof OutlookTextExtactor) {
// Stored in the Attachment blocks
MAPIMessage msg = ((OutlookTextExtactor) ext).getMAPIMessage();
for (AttachmentChunks attachment : msg.getAttachmentFiles()) {
if (attachment.getAttachData() != null) {
byte[] data = attachment.getAttachData().getValue();
nonPOIFS.add(new ByteArrayInputStream(data));
} else if (attachment.getAttachmentDirectory() != null) {
dirs.add(attachment.getAttachmentDirectory().getDirectory());
}
}
}
}
use of org.apache.poi.hsmf.MAPIMessage in project Xponents by OpenSextant.
the class OLEMessageConverter method conversionImplementation.
@Override
protected ConvertedDocument conversionImplementation(InputStream in, File doc) throws IOException {
ConvertedDocument msgDoc = new ConvertedDocument(doc);
try {
MAPIMessage msg = new MAPIMessage(in);
// If your message is Latin-1 text... there is no real easy way to get bytes of raw message text
// to ensure it is UTF-8
// TextTranscodingConverter.setTextAndEncoding(doc, msg.getM);
// By default this may be UTF-8 text.
msgDoc.setText(msg.getTextBody());
/* Would prefer not to set encoding here without knowing or attempting to derive it properly */
msgDoc.setEncoding(ConvertedDocument.OUTPUT_ENCODING);
AttachmentChunks[] chunks = msg.getAttachmentFiles();
for (AttachmentChunks c : chunks) {
Content child = new Content();
child.id = getAttachmentName(c.attachLongFileName, c.attachFileName);
child.content = c.attachData.getValue();
msgDoc.addRawChild(child);
}
// Get a subject line.
try {
msgDoc.addTitle(msg.getSubject());
} catch (ChunkNotFoundException err) {
msgDoc.addTitle("(MIME error: unable to get subject)");
}
// Get a date line.
try {
msgDoc.addCreateDate(msg.getMessageDate());
} catch (ChunkNotFoundException err) {
//
}
// Get author.
try {
msgDoc.addAuthor(msg.getDisplayFrom());
} catch (ChunkNotFoundException err) {
msgDoc.addAuthor("(MIME error: unable to get sender)");
}
return msgDoc;
} catch (Exception xerr) {
throw new IOException("Unable to parse content", xerr);
} finally {
in.close();
}
}
Aggregations