use of org.apache.poi.hsmf.datatypes.MAPIProperty in project poi by apache.
the class TestHMEFMessage method testCustomProperty.
public void testCustomProperty() throws Exception {
HMEFMessage msg = new HMEFMessage(_samples.openResourceAsStream("quick-winmail.dat"));
// Should have non-standard properties with IDs 0xE28 and 0xE29
boolean hasE28 = false;
boolean hasE29 = false;
for (MAPIAttribute attr : msg.getMessageMAPIAttributes()) {
if (attr.getProperty().id == 0xe28)
hasE28 = true;
if (attr.getProperty().id == 0xe29)
hasE29 = true;
}
assertEquals(true, hasE28);
assertEquals(true, hasE29);
// Ensure we can fetch those as custom ones
MAPIProperty propE28 = MAPIProperty.createCustom(0xe28, Types.ASCII_STRING, "Custom E28");
MAPIProperty propE29 = MAPIProperty.createCustom(0xe29, Types.ASCII_STRING, "Custom E29");
assertNotNull(msg.getMessageMAPIAttribute(propE28));
assertNotNull(msg.getMessageMAPIAttribute(propE29));
assertEquals(MAPIStringAttribute.class, msg.getMessageMAPIAttribute(propE28).getClass());
assertEquals("Zimbra - Mark Rogers", ((MAPIStringAttribute) msg.getMessageMAPIAttribute(propE28)).getDataString().substring(10));
}
use of org.apache.poi.hsmf.datatypes.MAPIProperty in project poi by apache.
the class MAPIAttribute method create.
/**
* Parses a MAPI Properties TNEF Attribute, and returns
* the list of MAPI Attributes contained within it
*/
public static List<MAPIAttribute> create(TNEFAttribute parent) throws IOException {
if (parent.getProperty() == TNEFProperty.ID_MAPIPROPERTIES) {
// Regular MAPI Properties, normally on the message
} else if (parent.getProperty() == TNEFProperty.ID_ATTACHMENT) {
// MAPI Properties for an attachment
} else {
// Something else, oh dear...
throw new IllegalArgumentException("Can only create from a MAPIProperty attribute, " + "instead received a " + parent.getProperty() + " one");
}
ByteArrayInputStream inp = new ByteArrayInputStream(parent.getData());
// First up, get the number of attributes
int count = LittleEndian.readInt(inp);
List<MAPIAttribute> attrs = new ArrayList<MAPIAttribute>();
// Now, read each one in in turn
for (int i = 0; i < count; i++) {
int typeAndMV = LittleEndian.readUShort(inp);
int id = LittleEndian.readUShort(inp);
// Is it either Multi-Valued or Variable-Length?
boolean isMV = false;
boolean isVL = false;
int typeId = typeAndMV;
if ((typeAndMV & Types.MULTIVALUED_FLAG) != 0) {
isMV = true;
typeId -= Types.MULTIVALUED_FLAG;
}
if (typeId == Types.ASCII_STRING.getId() || typeId == Types.UNICODE_STRING.getId() || typeId == Types.BINARY.getId() || typeId == Types.DIRECTORY.getId()) {
isVL = true;
}
// Turn the type ID into a strongly typed thing
MAPIType type = Types.getById(typeId);
if (type == null) {
type = Types.createCustom(typeId);
}
// If it's a named property, rather than a standard
// MAPI property, grab the details of it
MAPIProperty prop = MAPIProperty.get(id);
if (id >= 0x8000 && id <= 0xFFFF) {
byte[] guid = new byte[16];
IOUtils.readFully(inp, guid);
int mptype = LittleEndian.readInt(inp);
// Get the name of it
String name;
if (mptype == 0) {
// It's based on a normal one
int mpid = LittleEndian.readInt(inp);
MAPIProperty base = MAPIProperty.get(mpid);
name = base.name;
} else {
// Custom name was stored
int mplen = LittleEndian.readInt(inp);
byte[] mpdata = new byte[mplen];
IOUtils.readFully(inp, mpdata);
name = StringUtil.getFromUnicodeLE(mpdata, 0, (mplen / 2) - 1);
skipToBoundary(mplen, inp);
}
// Now create
prop = MAPIProperty.createCustom(id, type, name);
}
if (prop == MAPIProperty.UNKNOWN) {
prop = MAPIProperty.createCustom(id, type, "(unknown " + Integer.toHexString(id) + ")");
}
// Now read in the value(s)
int values = 1;
if (isMV || isVL) {
values = LittleEndian.readInt(inp);
}
for (int j = 0; j < values; j++) {
int len = getLength(type, inp);
byte[] data = new byte[len];
IOUtils.readFully(inp, data);
skipToBoundary(len, inp);
// Create
MAPIAttribute attr;
if (type == Types.UNICODE_STRING || type == Types.ASCII_STRING) {
attr = new MAPIStringAttribute(prop, typeId, data);
} else if (type == Types.APP_TIME || type == Types.TIME) {
attr = new MAPIDateAttribute(prop, typeId, data);
} else if (id == MAPIProperty.RTF_COMPRESSED.id) {
attr = new MAPIRtfAttribute(prop, typeId, data);
} else {
attr = new MAPIAttribute(prop, typeId, data);
}
attrs.add(attr);
}
}
// All done
return attrs;
}
use of org.apache.poi.hsmf.datatypes.MAPIProperty in project poi by apache.
the class HMEFContentsExtractor method getBodyAttribute.
protected MAPIAttribute getBodyAttribute() {
MAPIAttribute body = message.getMessageMAPIAttribute(MAPIProperty.RTF_COMPRESSED);
if (body != null)
return body;
// See bug #59786 - we'd really like a test file to confirm if this
// is the right properties + if this is truely general or not!
MAPIProperty uncompressedBody = MAPIProperty.createCustom(0x3fd9, Types.ASCII_STRING, "Uncompressed Body");
// Return this uncompressed one, or null if that isn't their either
return message.getMessageMAPIAttribute(uncompressedBody);
}
use of org.apache.poi.hsmf.datatypes.MAPIProperty in project tika by apache.
the class OutlookExtractor method handleFromTo.
private void handleFromTo(Map<String, String[]> headers, Metadata metadata) throws ChunkNotFoundException {
String from = msg.getDisplayFrom();
metadata.set(TikaCoreProperties.CREATOR, from);
metadata.set(Metadata.MESSAGE_FROM, from);
metadata.set(Metadata.MESSAGE_TO, msg.getDisplayTo());
metadata.set(Metadata.MESSAGE_CC, msg.getDisplayCC());
metadata.set(Metadata.MESSAGE_BCC, msg.getDisplayBCC());
Chunks chunks = msg.getMainChunks();
StringChunk sentByServerType = chunks.getSentByServerType();
if (sentByServerType != null) {
metadata.set(Office.MAPI_SENT_BY_SERVER_TYPE, sentByServerType.getValue());
}
Map<MAPIProperty, List<Chunk>> mainChunks = msg.getMainChunks().getAll();
List<Chunk> senderAddresType = mainChunks.get(MAPIProperty.SENDER_ADDRTYPE);
String senderAddressTypeString = "";
if (senderAddresType != null && senderAddresType.size() > 0) {
senderAddressTypeString = senderAddresType.get(0).toString();
}
//sometimes in SMTP .msg files there is an email in the sender name field.
setFirstChunk(mainChunks.get(MAPIProperty.SENDER_NAME), Message.MESSAGE_FROM_NAME, metadata);
setFirstChunk(mainChunks.get(MAPIProperty.SENT_REPRESENTING_NAME), Office.MAPI_FROM_REPRESENTING_NAME, metadata);
setFirstChunk(mainChunks.get(MAPIProperty.SENDER_EMAIL_ADDRESS), Message.MESSAGE_FROM_EMAIL, metadata);
setFirstChunk(mainChunks.get(MAPIProperty.SENT_REPRESENTING_EMAIL_ADDRESS), Office.MAPI_FROM_REPRESENTING_EMAIL, metadata);
for (Recipient recipient : buildRecipients()) {
switch(recipient.recipientType) {
case TO:
addEvenIfNull(Message.MESSAGE_TO_NAME, recipient.name, metadata);
addEvenIfNull(Message.MESSAGE_TO_DISPLAY_NAME, recipient.displayName, metadata);
addEvenIfNull(Message.MESSAGE_TO_EMAIL, recipient.emailAddress, metadata);
break;
case CC:
addEvenIfNull(Message.MESSAGE_CC_NAME, recipient.name, metadata);
addEvenIfNull(Message.MESSAGE_CC_DISPLAY_NAME, recipient.displayName, metadata);
addEvenIfNull(Message.MESSAGE_CC_EMAIL, recipient.emailAddress, metadata);
break;
case BCC:
addEvenIfNull(Message.MESSAGE_BCC_NAME, recipient.name, metadata);
addEvenIfNull(Message.MESSAGE_BCC_DISPLAY_NAME, recipient.displayName, metadata);
addEvenIfNull(Message.MESSAGE_BCC_EMAIL, recipient.emailAddress, metadata);
break;
default:
//log unknown or undefined?
break;
}
}
}
use of org.apache.poi.hsmf.datatypes.MAPIProperty in project tika by apache.
the class OutlookExtractor method guess7BitEncoding.
/**
* Tries to identify the correct encoding for 7-bit (non-unicode)
* strings in the file.
* <p>Many messages store their strings as unicode, which is
* nice and easy. Some use one-byte encodings for their
* strings, but don't always store the encoding anywhere
* helpful in the file.</p>
* <p>This method checks for codepage properties, and failing that
* looks at the headers for the message, and uses these to
* guess the correct encoding for your file.</p>
* <p>Bug #49441 has more on why this is needed</p>
* <p>This is taken verbatim from POI (TIKA-1238)
* as a temporary workaround to prevent unsupported encoding exceptions</p>
*/
private void guess7BitEncoding(MAPIMessage msg) {
Chunks mainChunks = msg.getMainChunks();
//sanity check
if (mainChunks == null) {
return;
}
Map<MAPIProperty, List<PropertyValue>> props = mainChunks.getProperties();
if (props != null) {
// First choice is a codepage property
for (MAPIProperty prop : new MAPIProperty[] { MAPIProperty.MESSAGE_CODEPAGE, MAPIProperty.INTERNET_CPID }) {
List<PropertyValue> val = props.get(prop);
if (val != null && val.size() > 0) {
int codepage = ((PropertyValue.LongPropertyValue) val.get(0)).getValue();
String encoding = null;
try {
encoding = CodePageUtil.codepageToEncoding(codepage, true);
} catch (UnsupportedEncodingException e) {
//swallow
}
if (tryToSet7BitEncoding(msg, encoding)) {
return;
}
}
}
}
// Second choice is a charset on a content type header
try {
String[] headers = msg.getHeaders();
if (headers != null && headers.length > 0) {
// Look for a content type with a charset
Pattern p = Pattern.compile("Content-Type:.*?charset=[\"']?([^;'\"]+)[\"']?", Pattern.CASE_INSENSITIVE);
for (String header : headers) {
if (header.startsWith("Content-Type")) {
Matcher m = p.matcher(header);
if (m.matches()) {
// Found it! Tell all the string chunks
String charset = m.group(1);
if (tryToSet7BitEncoding(msg, charset)) {
return;
}
}
}
}
}
} catch (ChunkNotFoundException e) {
}
// meta header if there is no other information?
try {
String html = msg.getHtmlBody();
if (html != null && html.length() > 0) {
Charset charset = null;
try {
charset = detector.detect(new ByteArrayInputStream(html.getBytes(UTF_8)), EMPTY_METADATA);
} catch (IOException e) {
//swallow
}
if (charset != null && tryToSet7BitEncoding(msg, charset.name())) {
return;
}
}
} catch (ChunkNotFoundException e) {
}
//absolute last resort, try charset detector
StringChunk text = mainChunks.getTextBodyChunk();
if (text != null) {
CharsetDetector detector = new CharsetDetector();
detector.setText(text.getRawValue());
CharsetMatch match = detector.detect();
if (match != null && match.getConfidence() > 35 && tryToSet7BitEncoding(msg, match.getName())) {
return;
}
}
}
Aggregations