use of org.apache.commons.mail.util.MimeMessageParser in project webofneeds by researchstudio-sat.
the class MailFileNeedProducer method readNeedFromFile.
@Override
public synchronized Dataset readNeedFromFile(final File file) throws IOException {
logger.debug("processing as mail file: {} ", file);
FileInputStream fis = new FileInputStream(file);
DefaultNeedModelWrapper needModelWrapper = new DefaultNeedModelWrapper("no:uri");
try {
MimeMessage emailMessage = new MimeMessage(null, fis);
MimeMessageParser parser = new MimeMessageParser(emailMessage);
parser.parse();
needModelWrapper.setTitle(needContentPropertyType, parser.getSubject());
String content = null;
if (parser.hasPlainContent()) {
content = parser.getPlainContent();
} else if (parser.hasHtmlContent()) {
Document doc = Jsoup.parse(parser.getHtmlContent());
content = doc.text();
}
if (content != null) {
needModelWrapper.setDescription(needContentPropertyType, content);
}
logger.debug("mail subject : {}", parser.getSubject());
logger.debug("mail has plain content: {}", parser.hasPlainContent());
logger.debug("mail has html content : {}", parser.hasHtmlContent());
logger.debug("mail has attachments : {}", parser.hasAttachments());
logger.debug("mail plain content : {}", StringUtils.abbreviate(parser.getPlainContent(), 200));
logger.debug("mail html content : {}", StringUtils.abbreviate(parser.getHtmlContent(), 200));
return needModelWrapper.copyDataset();
} catch (Exception e) {
logger.error("could not parse email from file {} ", file, e);
} finally {
if (fis != null)
fis.close();
}
return null;
}
use of org.apache.commons.mail.util.MimeMessageParser in project nifi by apache.
the class ExtractEmailAttachments method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
final ComponentLog logger = getLogger();
final FlowFile originalFlowFile = session.get();
if (originalFlowFile == null) {
return;
}
final List<FlowFile> attachmentsList = new ArrayList<>();
final List<FlowFile> invalidFlowFilesList = new ArrayList<>();
final List<FlowFile> originalFlowFilesList = new ArrayList<>();
final String requireStrictAddresses = "false";
session.read(originalFlowFile, new InputStreamCallback() {
@Override
public void process(final InputStream rawIn) throws IOException {
try (final InputStream in = new BufferedInputStream(rawIn)) {
Properties props = new Properties();
props.put("mail.mime.address.strict", requireStrictAddresses);
Session mailSession = Session.getInstance(props);
MimeMessage originalMessage = new MimeMessage(mailSession, in);
MimeMessageParser parser = new MimeMessageParser(originalMessage).parse();
// RFC-2822 determines that a message must have a "From:" header
// if a message lacks the field, it is flagged as invalid
Address[] from = originalMessage.getFrom();
if (from == null) {
throw new MessagingException("Message failed RFC-2822 validation: No Sender");
}
Date sentDate = originalMessage.getSentDate();
if (sentDate == null) {
// Throws MessageException due to lack of minimum required headers
throw new MessagingException("Message failed RFC2822 validation: No Sent Date");
}
originalFlowFilesList.add(originalFlowFile);
if (parser.hasAttachments()) {
final String originalFlowFileName = originalFlowFile.getAttribute(CoreAttributes.FILENAME.key());
try {
for (final DataSource data : parser.getAttachmentList()) {
FlowFile split = session.create(originalFlowFile);
final Map<String, String> attributes = new HashMap<>();
if (StringUtils.isNotBlank(data.getName())) {
attributes.put(CoreAttributes.FILENAME.key(), data.getName());
}
if (StringUtils.isNotBlank(data.getContentType())) {
attributes.put(CoreAttributes.MIME_TYPE.key(), data.getContentType());
}
String parentUuid = originalFlowFile.getAttribute(CoreAttributes.UUID.key());
attributes.put(ATTACHMENT_ORIGINAL_UUID, parentUuid);
attributes.put(ATTACHMENT_ORIGINAL_FILENAME, originalFlowFileName);
split = session.append(split, new OutputStreamCallback() {
@Override
public void process(OutputStream out) throws IOException {
IOUtils.copy(data.getInputStream(), out);
}
});
split = session.putAllAttributes(split, attributes);
attachmentsList.add(split);
}
} catch (FlowFileHandlingException e) {
// Something went wrong
// Removing splits that may have been created
session.remove(attachmentsList);
// Removing the original flow from its list
originalFlowFilesList.remove(originalFlowFile);
logger.error("Flowfile {} triggered error {} while processing message removing generated FlowFiles from sessions", new Object[] { originalFlowFile, e });
invalidFlowFilesList.add(originalFlowFile);
}
}
} catch (Exception e) {
// Another error hit...
// Removing the original flow from its list
originalFlowFilesList.remove(originalFlowFile);
logger.error("Could not parse the flowfile {} as an email, treating as failure", new Object[] { originalFlowFile, e });
// Message is invalid or triggered an error during parsing
invalidFlowFilesList.add(originalFlowFile);
}
}
});
session.transfer(attachmentsList, REL_ATTACHMENTS);
// As per above code, originalFlowfile may be routed to invalid or
// original depending on RFC2822 compliance.
session.transfer(invalidFlowFilesList, REL_FAILURE);
session.transfer(originalFlowFilesList, REL_ORIGINAL);
if (attachmentsList.size() > 10) {
logger.info("Split {} into {} files", new Object[] { originalFlowFile, attachmentsList.size() });
} else if (attachmentsList.size() > 1) {
logger.info("Split {} into {} files: {}", new Object[] { originalFlowFile, attachmentsList.size(), attachmentsList });
}
}
use of org.apache.commons.mail.util.MimeMessageParser in project nifi by apache.
the class ExtractEmailHeaders method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
final ComponentLog logger = getLogger();
final List<FlowFile> invalidFlowFilesList = new ArrayList<>();
final List<FlowFile> processedFlowFilesList = new ArrayList<>();
final FlowFile originalFlowFile = session.get();
if (originalFlowFile == null) {
return;
}
final String requireStrictAddresses = context.getProperty(STRICT_PARSING).getValue();
final List<String> capturedHeadersList = Arrays.asList(context.getProperty(CAPTURED_HEADERS).getValue().toLowerCase().split(":"));
final Map<String, String> attributes = new HashMap<>();
session.read(originalFlowFile, new InputStreamCallback() {
@Override
public void process(final InputStream rawIn) throws IOException {
try (final InputStream in = new BufferedInputStream(rawIn)) {
Properties props = new Properties();
props.put("mail.mime.address.strict", requireStrictAddresses);
Session mailSession = Session.getInstance(props);
MimeMessage originalMessage = new MimeMessage(mailSession, in);
MimeMessageParser parser = new MimeMessageParser(originalMessage).parse();
// RFC-2822 determines that a message must have a "From:" header
// if a message lacks the field, it is flagged as invalid
Address[] from = originalMessage.getFrom();
if (from == null) {
throw new MessagingException("Message failed RFC-2822 validation: No Sender");
}
Date sentDate = originalMessage.getSentDate();
if (sentDate == null) {
// Throws MessageException due to lack of minimum required headers
throw new MessagingException("Message failed RFC-2822 validation: No Sent Date");
} else if (capturedHeadersList.size() > 0) {
Enumeration headers = originalMessage.getAllHeaders();
while (headers.hasMoreElements()) {
Header header = (Header) headers.nextElement();
if (StringUtils.isNotEmpty(header.getValue()) && capturedHeadersList.contains(header.getName().toLowerCase())) {
attributes.put("email.headers." + header.getName().toLowerCase(), header.getValue());
}
}
}
putAddressListInAttributes(attributes, EMAIL_HEADER_TO, originalMessage.getRecipients(Message.RecipientType.TO));
putAddressListInAttributes(attributes, EMAIL_HEADER_CC, originalMessage.getRecipients(Message.RecipientType.CC));
putAddressListInAttributes(attributes, EMAIL_HEADER_BCC, originalMessage.getRecipients(Message.RecipientType.BCC));
// RFC-2822 specifies "From" as mailbox-list
putAddressListInAttributes(attributes, EMAIL_HEADER_FROM, originalMessage.getFrom());
if (StringUtils.isNotEmpty(originalMessage.getMessageID())) {
attributes.put(EMAIL_HEADER_MESSAGE_ID, originalMessage.getMessageID());
}
if (originalMessage.getReceivedDate() != null) {
attributes.put(EMAIL_HEADER_RECV_DATE, originalMessage.getReceivedDate().toString());
}
if (originalMessage.getSentDate() != null) {
attributes.put(EMAIL_HEADER_SENT_DATE, originalMessage.getSentDate().toString());
}
if (StringUtils.isNotEmpty(originalMessage.getSubject())) {
attributes.put(EMAIL_HEADER_SUBJECT, originalMessage.getSubject());
}
// Zeroes EMAIL_ATTACHMENT_COUNT
attributes.put(EMAIL_ATTACHMENT_COUNT, "0");
// But insert correct value if attachments are present
if (parser.hasAttachments()) {
attributes.put(EMAIL_ATTACHMENT_COUNT, String.valueOf(parser.getAttachmentList().size()));
}
} catch (Exception e) {
// Message is invalid or triggered an error during parsing
attributes.clear();
logger.error("Could not parse the flowfile {} as an email, treating as failure", new Object[] { originalFlowFile, e });
invalidFlowFilesList.add(originalFlowFile);
}
}
});
if (attributes.size() > 0) {
FlowFile updatedFlowFile = session.putAllAttributes(originalFlowFile, attributes);
logger.info("Extracted {} headers into {} file", new Object[] { attributes.size(), updatedFlowFile });
processedFlowFilesList.add(updatedFlowFile);
}
session.transfer(processedFlowFilesList, REL_SUCCESS);
session.transfer(invalidFlowFilesList, REL_FAILURE);
}
Aggregations