use of org.apache.poi.hmef.attribute.MAPIAttribute in project poi by apache.
the class HMEFContentsExtractor method getBodyAttribute.
protected MAPIAttribute getBodyAttribute() {
MAPIAttribute body = message.getMessageMAPIAttribute(MAPIProperty.RTF_COMPRESSED);
if (body != null)
return body;
// See bug #59786 - we'd really like a test file to confirm if this
// is the right properties + if this is truely general or not!
MAPIProperty uncompressedBody = MAPIProperty.createCustom(0x3fd9, Types.ASCII_STRING, "Uncompressed Body");
// Return this uncompressed one, or null if that isn't their either
return message.getMessageMAPIAttribute(uncompressedBody);
}
use of org.apache.poi.hmef.attribute.MAPIAttribute in project poi by apache.
the class HMEFContentsExtractor method extractMessageBody.
/**
* Extracts the RTF message body to the supplied file
*/
public void extractMessageBody(File dest) throws IOException {
MAPIAttribute body = getBodyAttribute();
if (body == null) {
System.err.println("No message body found, " + dest + " not created");
return;
}
if (body instanceof MAPIStringAttribute) {
String name = dest.toString();
if (name.endsWith(".rtf")) {
name = name.substring(0, name.length() - 4);
}
dest = new File(name + ".txt");
}
OutputStream fout = new FileOutputStream(dest);
try {
if (body instanceof MAPIStringAttribute) {
// Save in a predictable encoding, not raw bytes
String text = ((MAPIStringAttribute) body).getDataString();
fout.write(text.getBytes(StringUtil.UTF8));
} else {
// Save the raw bytes, should be raw RTF
fout.write(body.getData());
}
} finally {
fout.close();
}
}
use of org.apache.poi.hmef.attribute.MAPIAttribute in project tika by apache.
the class TNEFParser method parse.
/**
* Extracts properties and text from an MS Document input stream
*/
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
// We work by recursing, so get the appropriate bits
EmbeddedDocumentExtractor embeddedExtractor = EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context);
// Ask POI to process the file for us
HMEFMessage msg = new HMEFMessage(stream);
// Set the message subject if known
String subject = msg.getSubject();
if (subject != null && subject.length() > 0) {
// TODO: Move to title in Tika 2.0
metadata.set(TikaCoreProperties.TRANSITION_SUBJECT_TO_DC_TITLE, subject);
}
// Recurse into the message body RTF
MAPIAttribute attr = msg.getMessageMAPIAttribute(MAPIProperty.RTF_COMPRESSED);
if (attr != null && attr instanceof MAPIRtfAttribute) {
MAPIRtfAttribute rtf = (MAPIRtfAttribute) attr;
handleEmbedded("message.rtf", "application/rtf", rtf.getData(), embeddedExtractor, handler);
}
// Recurse into each attachment in turn
for (Attachment attachment : msg.getAttachments()) {
String name = attachment.getLongFilename();
if (name == null || name.length() == 0) {
name = attachment.getFilename();
}
if (name == null || name.length() == 0) {
String ext = attachment.getExtension();
if (ext != null) {
name = "unknown" + ext;
}
}
handleEmbedded(name, null, attachment.getContents(), embeddedExtractor, handler);
}
}
Aggregations