use of org.apache.poi.hsmf.exceptions.ChunkNotFoundException in project poi by apache.
the class MAPIMessage method getRecipientNamesList.
/**
* Returns an array of all the recipient's names, normally
* in TO then CC then BCC order.
* Checks all the likely chunks in search of the names.
* See also {@link #getDisplayTo()}, {@link #getDisplayCC()}
* and {@link #getDisplayBCC()}.
*/
public String[] getRecipientNamesList() throws ChunkNotFoundException {
if (recipientChunks == null || recipientChunks.length == 0) {
throw new ChunkNotFoundException("No recipients section present");
}
String[] names = new String[recipientChunks.length];
for (int i = 0; i < names.length; i++) {
RecipientChunks rc = recipientChunks[i];
String name = rc.getRecipientName();
if (name != null) {
names[i] = name;
} else {
throw new ChunkNotFoundException("No display name holding chunks found for the " + (i + 1) + "th recipient");
}
}
return names;
}
use of org.apache.poi.hsmf.exceptions.ChunkNotFoundException in project poi by apache.
the class MAPIMessage method getRecipientEmailAddressList.
/**
* Returns an array of all the recipient's email address, normally
* in TO then CC then BCC order.
* Checks all the likely chunks in search of the addresses.
*/
public String[] getRecipientEmailAddressList() throws ChunkNotFoundException {
if (recipientChunks == null || recipientChunks.length == 0) {
throw new ChunkNotFoundException("No recipients section present");
}
String[] emails = new String[recipientChunks.length];
for (int i = 0; i < emails.length; i++) {
RecipientChunks rc = recipientChunks[i];
String email = rc.getRecipientEmailAddress();
if (email != null) {
emails[i] = email;
} else {
if (returnNullOnMissingChunk) {
emails[i] = null;
} else {
throw new ChunkNotFoundException("No email address holding chunks found for the " + (i + 1) + "th recipient");
}
}
}
return emails;
}
use of org.apache.poi.hsmf.exceptions.ChunkNotFoundException in project tika by apache.
the class OutlookExtractor method guess7BitEncoding.
/**
* Tries to identify the correct encoding for 7-bit (non-unicode)
* strings in the file.
* <p>Many messages store their strings as unicode, which is
* nice and easy. Some use one-byte encodings for their
* strings, but don't always store the encoding anywhere
* helpful in the file.</p>
* <p>This method checks for codepage properties, and failing that
* looks at the headers for the message, and uses these to
* guess the correct encoding for your file.</p>
* <p>Bug #49441 has more on why this is needed</p>
* <p>This is taken verbatim from POI (TIKA-1238)
* as a temporary workaround to prevent unsupported encoding exceptions</p>
*/
private void guess7BitEncoding(MAPIMessage msg) {
Chunks mainChunks = msg.getMainChunks();
//sanity check
if (mainChunks == null) {
return;
}
Map<MAPIProperty, List<PropertyValue>> props = mainChunks.getProperties();
if (props != null) {
// First choice is a codepage property
for (MAPIProperty prop : new MAPIProperty[] { MAPIProperty.MESSAGE_CODEPAGE, MAPIProperty.INTERNET_CPID }) {
List<PropertyValue> val = props.get(prop);
if (val != null && val.size() > 0) {
int codepage = ((PropertyValue.LongPropertyValue) val.get(0)).getValue();
String encoding = null;
try {
encoding = CodePageUtil.codepageToEncoding(codepage, true);
} catch (UnsupportedEncodingException e) {
//swallow
}
if (tryToSet7BitEncoding(msg, encoding)) {
return;
}
}
}
}
// Second choice is a charset on a content type header
try {
String[] headers = msg.getHeaders();
if (headers != null && headers.length > 0) {
// Look for a content type with a charset
Pattern p = Pattern.compile("Content-Type:.*?charset=[\"']?([^;'\"]+)[\"']?", Pattern.CASE_INSENSITIVE);
for (String header : headers) {
if (header.startsWith("Content-Type")) {
Matcher m = p.matcher(header);
if (m.matches()) {
// Found it! Tell all the string chunks
String charset = m.group(1);
if (tryToSet7BitEncoding(msg, charset)) {
return;
}
}
}
}
}
} catch (ChunkNotFoundException e) {
}
// meta header if there is no other information?
try {
String html = msg.getHtmlBody();
if (html != null && html.length() > 0) {
Charset charset = null;
try {
charset = detector.detect(new ByteArrayInputStream(html.getBytes(UTF_8)), EMPTY_METADATA);
} catch (IOException e) {
//swallow
}
if (charset != null && tryToSet7BitEncoding(msg, charset.name())) {
return;
}
}
} catch (ChunkNotFoundException e) {
}
//absolute last resort, try charset detector
StringChunk text = mainChunks.getTextBodyChunk();
if (text != null) {
CharsetDetector detector = new CharsetDetector();
detector.setText(text.getRawValue());
CharsetMatch match = detector.detect();
if (match != null && match.getConfidence() > 35 && tryToSet7BitEncoding(msg, match.getName())) {
return;
}
}
}
Aggregations