Search in sources :

Example 1 with BufferedInputStream

use of org.apache.nifi.stream.io.BufferedInputStream in project nifi by apache.

the class ExtractEmailAttachments method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    final ComponentLog logger = getLogger();
    final FlowFile originalFlowFile = session.get();
    if (originalFlowFile == null) {
        return;
    }
    final List<FlowFile> attachmentsList = new ArrayList<>();
    final List<FlowFile> invalidFlowFilesList = new ArrayList<>();
    final List<FlowFile> originalFlowFilesList = new ArrayList<>();
    final String requireStrictAddresses = "false";
    session.read(originalFlowFile, new InputStreamCallback() {

        @Override
        public void process(final InputStream rawIn) throws IOException {
            try (final InputStream in = new BufferedInputStream(rawIn)) {
                Properties props = new Properties();
                props.put("mail.mime.address.strict", requireStrictAddresses);
                Session mailSession = Session.getInstance(props);
                MimeMessage originalMessage = new MimeMessage(mailSession, in);
                MimeMessageParser parser = new MimeMessageParser(originalMessage).parse();
                // RFC-2822 determines that a message must have a "From:" header
                // if a message lacks the field, it is flagged as invalid
                Address[] from = originalMessage.getFrom();
                if (from == null) {
                    throw new MessagingException("Message failed RFC-2822 validation: No Sender");
                }
                Date sentDate = originalMessage.getSentDate();
                if (sentDate == null) {
                    // Throws MessageException due to lack of minimum required headers
                    throw new MessagingException("Message failed RFC2822 validation: No Sent Date");
                }
                originalFlowFilesList.add(originalFlowFile);
                if (parser.hasAttachments()) {
                    final String originalFlowFileName = originalFlowFile.getAttribute(CoreAttributes.FILENAME.key());
                    try {
                        for (final DataSource data : parser.getAttachmentList()) {
                            FlowFile split = session.create(originalFlowFile);
                            final Map<String, String> attributes = new HashMap<>();
                            if (StringUtils.isNotBlank(data.getName())) {
                                attributes.put(CoreAttributes.FILENAME.key(), data.getName());
                            }
                            if (StringUtils.isNotBlank(data.getContentType())) {
                                attributes.put(CoreAttributes.MIME_TYPE.key(), data.getContentType());
                            }
                            String parentUuid = originalFlowFile.getAttribute(CoreAttributes.UUID.key());
                            attributes.put(ATTACHMENT_ORIGINAL_UUID, parentUuid);
                            attributes.put(ATTACHMENT_ORIGINAL_FILENAME, originalFlowFileName);
                            split = session.append(split, new OutputStreamCallback() {

                                @Override
                                public void process(OutputStream out) throws IOException {
                                    IOUtils.copy(data.getInputStream(), out);
                                }
                            });
                            split = session.putAllAttributes(split, attributes);
                            attachmentsList.add(split);
                        }
                    } catch (FlowFileHandlingException e) {
                        // Something went wrong
                        // Removing splits that may have been created
                        session.remove(attachmentsList);
                        // Removing the original flow from its list
                        originalFlowFilesList.remove(originalFlowFile);
                        logger.error("Flowfile {} triggered error {} while processing message removing generated FlowFiles from sessions", new Object[] { originalFlowFile, e });
                        invalidFlowFilesList.add(originalFlowFile);
                    }
                }
            } catch (Exception e) {
                // Another error hit...
                // Removing the original flow from its list
                originalFlowFilesList.remove(originalFlowFile);
                logger.error("Could not parse the flowfile {} as an email, treating as failure", new Object[] { originalFlowFile, e });
                // Message is invalid or triggered an error during parsing
                invalidFlowFilesList.add(originalFlowFile);
            }
        }
    });
    session.transfer(attachmentsList, REL_ATTACHMENTS);
    // As per above code, originalFlowfile may be routed to invalid or
    // original depending on RFC2822 compliance.
    session.transfer(invalidFlowFilesList, REL_FAILURE);
    session.transfer(originalFlowFilesList, REL_ORIGINAL);
    if (attachmentsList.size() > 10) {
        logger.info("Split {} into {} files", new Object[] { originalFlowFile, attachmentsList.size() });
    } else if (attachmentsList.size() > 1) {
        logger.info("Split {} into {} files: {}", new Object[] { originalFlowFile, attachmentsList.size(), attachmentsList });
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) MessagingException(javax.mail.MessagingException) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) InputStream(java.io.InputStream) OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) IOException(java.io.IOException) Properties(java.util.Properties) MimeMessageParser(org.apache.commons.mail.util.MimeMessageParser) ComponentLog(org.apache.nifi.logging.ComponentLog) Date(java.util.Date) MessagingException(javax.mail.MessagingException) FlowFileHandlingException(org.apache.nifi.processor.exception.FlowFileHandlingException) IOException(java.io.IOException) DataSource(javax.activation.DataSource) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) MimeMessage(javax.mail.internet.MimeMessage) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) FlowFileHandlingException(org.apache.nifi.processor.exception.FlowFileHandlingException) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback) HashMap(java.util.HashMap) Map(java.util.Map) ProcessSession(org.apache.nifi.processor.ProcessSession) Session(javax.mail.Session)

Example 2 with BufferedInputStream

use of org.apache.nifi.stream.io.BufferedInputStream in project nifi by apache.

the class PutFileTransfer method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final ComponentLog logger = getLogger();
    final String hostname = context.getProperty(FileTransfer.HOSTNAME).evaluateAttributeExpressions(flowFile).getValue();
    final int maxNumberOfFiles = context.getProperty(FileTransfer.BATCH_SIZE).asInteger();
    int fileCount = 0;
    try (final T transfer = getFileTransfer(context)) {
        do {
            final String rootPath = context.getProperty(FileTransfer.REMOTE_PATH).evaluateAttributeExpressions(flowFile).getValue();
            final String workingDirPath;
            if (rootPath == null) {
                workingDirPath = null;
            } else {
                File workingDirectory = new File(rootPath);
                if (!workingDirectory.getPath().startsWith("/") && !workingDirectory.getPath().startsWith("\\")) {
                    workingDirectory = new File(transfer.getHomeDirectory(flowFile), workingDirectory.getPath());
                }
                workingDirPath = workingDirectory.getPath().replace("\\", "/");
            }
            final boolean rejectZeroByteFiles = context.getProperty(FileTransfer.REJECT_ZERO_BYTE).asBoolean();
            final ConflictResult conflictResult = identifyAndResolveConflictFile(context.getProperty(FileTransfer.CONFLICT_RESOLUTION).getValue(), transfer, workingDirPath, flowFile, rejectZeroByteFiles, logger);
            if (conflictResult.isTransfer()) {
                final StopWatch stopWatch = new StopWatch();
                stopWatch.start();
                beforePut(flowFile, context, transfer);
                final FlowFile flowFileToTransfer = flowFile;
                final AtomicReference<String> fullPathRef = new AtomicReference<>(null);
                session.read(flowFile, new InputStreamCallback() {

                    @Override
                    public void process(final InputStream in) throws IOException {
                        try (final InputStream bufferedIn = new BufferedInputStream(in)) {
                            if (workingDirPath != null && context.getProperty(SFTPTransfer.CREATE_DIRECTORY).asBoolean()) {
                                transfer.ensureDirectoryExists(flowFileToTransfer, new File(workingDirPath));
                            }
                            fullPathRef.set(transfer.put(flowFileToTransfer, workingDirPath, conflictResult.getFileName(), bufferedIn));
                        }
                    }
                });
                afterPut(flowFile, context, transfer);
                stopWatch.stop();
                final String dataRate = stopWatch.calculateDataRate(flowFile.getSize());
                final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
                logger.info("Successfully transferred {} to {} on remote host {} in {} milliseconds at a rate of {}", new Object[] { flowFile, fullPathRef.get(), hostname, millis, dataRate });
                String fullPathWithSlash = fullPathRef.get();
                if (!fullPathWithSlash.startsWith("/")) {
                    fullPathWithSlash = "/" + fullPathWithSlash;
                }
                final String destinationUri = transfer.getProtocolName() + "://" + hostname + fullPathWithSlash;
                session.getProvenanceReporter().send(flowFile, destinationUri, millis);
            }
            if (conflictResult.isPenalize()) {
                flowFile = session.penalize(flowFile);
            }
            session.transfer(flowFile, conflictResult.getRelationship());
            session.commit();
        } while (isScheduled() && (getRelationships().size() == context.getAvailableRelationships().size()) && (++fileCount < maxNumberOfFiles) && ((flowFile = session.get()) != null));
    } catch (final IOException e) {
        context.yield();
        logger.error("Unable to transfer {} to remote host {} due to {}", new Object[] { flowFile, hostname, e });
        flowFile = session.penalize(flowFile);
        session.transfer(flowFile, REL_FAILURE);
    } catch (final FlowFileAccessException e) {
        context.yield();
        logger.error("Unable to transfer {} to remote host {} due to {}", new Object[] { flowFile, hostname, e.getCause() });
        flowFile = session.penalize(flowFile);
        session.transfer(flowFile, REL_FAILURE);
    } catch (final ProcessException e) {
        context.yield();
        logger.error("Unable to transfer {} to remote host {} due to {}: {}; routing to failure", new Object[] { flowFile, hostname, e, e.getCause() });
        flowFile = session.penalize(flowFile);
        session.transfer(flowFile, REL_FAILURE);
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) FlowFileAccessException(org.apache.nifi.processor.exception.FlowFileAccessException) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) InputStream(java.io.InputStream) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) StopWatch(org.apache.nifi.util.StopWatch) ProcessException(org.apache.nifi.processor.exception.ProcessException) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) FlowFile(org.apache.nifi.flowfile.FlowFile) File(java.io.File)

Example 3 with BufferedInputStream

use of org.apache.nifi.stream.io.BufferedInputStream in project nifi by apache.

the class TransformXml method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    final FlowFile original = session.get();
    if (original == null) {
        return;
    }
    final ComponentLog logger = getLogger();
    final StopWatch stopWatch = new StopWatch(true);
    final String xsltFileName = context.getProperty(XSLT_FILE_NAME).evaluateAttributeExpressions(original).getValue();
    final Boolean indentOutput = context.getProperty(INDENT_OUTPUT).asBoolean();
    try {
        FlowFile transformed = session.write(original, new StreamCallback() {

            @Override
            public void process(final InputStream rawIn, final OutputStream out) throws IOException {
                try (final InputStream in = new BufferedInputStream(rawIn)) {
                    final Templates templates;
                    if (cache != null) {
                        templates = cache.get(xsltFileName);
                    } else {
                        templates = newTemplates(context, xsltFileName);
                    }
                    final Transformer transformer = templates.newTransformer();
                    transformer.setOutputProperty(OutputKeys.INDENT, (indentOutput ? "yes" : "no"));
                    // pass all dynamic properties to the transformer
                    for (final Map.Entry<PropertyDescriptor, String> entry : context.getProperties().entrySet()) {
                        if (entry.getKey().isDynamic()) {
                            String value = context.newPropertyValue(entry.getValue()).evaluateAttributeExpressions(original).getValue();
                            transformer.setParameter(entry.getKey().getName(), value);
                        }
                    }
                    // use a StreamSource with Saxon
                    StreamSource source = new StreamSource(in);
                    StreamResult result = new StreamResult(out);
                    transformer.transform(source, result);
                } catch (final Exception e) {
                    throw new IOException(e);
                }
            }
        });
        session.transfer(transformed, REL_SUCCESS);
        session.getProvenanceReporter().modifyContent(transformed, stopWatch.getElapsed(TimeUnit.MILLISECONDS));
        logger.info("Transformed {}", new Object[] { original });
    } catch (ProcessException e) {
        logger.error("Unable to transform {} due to {}", new Object[] { original, e });
        session.transfer(original, REL_FAILURE);
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) Transformer(javax.xml.transform.Transformer) StreamResult(javax.xml.transform.stream.StreamResult) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) InputStream(java.io.InputStream) OutputStream(java.io.OutputStream) StreamSource(javax.xml.transform.stream.StreamSource) Templates(javax.xml.transform.Templates) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) StreamCallback(org.apache.nifi.processor.io.StreamCallback) ProcessException(org.apache.nifi.processor.exception.ProcessException) TransformerConfigurationException(javax.xml.transform.TransformerConfigurationException) IOException(java.io.IOException) StopWatch(org.apache.nifi.util.StopWatch) ProcessException(org.apache.nifi.processor.exception.ProcessException) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream)

Example 4 with BufferedInputStream

use of org.apache.nifi.stream.io.BufferedInputStream in project nifi by apache.

the class ZipUnpackerSequenceFileWriter method processInputStream.

@Override
protected void processInputStream(InputStream stream, final FlowFile flowFile, final Writer writer) throws IOException {
    try (final ZipInputStream zipIn = new ZipInputStream(new BufferedInputStream(stream))) {
        ZipEntry zipEntry;
        while ((zipEntry = zipIn.getNextEntry()) != null) {
            if (zipEntry.isDirectory()) {
                continue;
            }
            final File file = new File(zipEntry.getName());
            final String key = file.getName();
            long fileSize = zipEntry.getSize();
            final InputStreamWritable inStreamWritable = new InputStreamWritable(zipIn, (int) fileSize);
            writer.append(new Text(key), inStreamWritable);
            logger.debug("Appending FlowFile {} to Sequence File", new Object[] { key });
        }
    }
}
Also used : InputStreamWritable(org.apache.nifi.processors.hadoop.util.InputStreamWritable) ZipInputStream(java.util.zip.ZipInputStream) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) ZipEntry(java.util.zip.ZipEntry) Text(org.apache.hadoop.io.Text) FlowFile(org.apache.nifi.flowfile.FlowFile) File(java.io.File)

Example 5 with BufferedInputStream

use of org.apache.nifi.stream.io.BufferedInputStream in project nifi by apache.

the class ExtractEmailHeaders method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    final ComponentLog logger = getLogger();
    final List<FlowFile> invalidFlowFilesList = new ArrayList<>();
    final List<FlowFile> processedFlowFilesList = new ArrayList<>();
    final FlowFile originalFlowFile = session.get();
    if (originalFlowFile == null) {
        return;
    }
    final String requireStrictAddresses = context.getProperty(STRICT_PARSING).getValue();
    final List<String> capturedHeadersList = Arrays.asList(context.getProperty(CAPTURED_HEADERS).getValue().toLowerCase().split(":"));
    final Map<String, String> attributes = new HashMap<>();
    session.read(originalFlowFile, new InputStreamCallback() {

        @Override
        public void process(final InputStream rawIn) throws IOException {
            try (final InputStream in = new BufferedInputStream(rawIn)) {
                Properties props = new Properties();
                props.put("mail.mime.address.strict", requireStrictAddresses);
                Session mailSession = Session.getInstance(props);
                MimeMessage originalMessage = new MimeMessage(mailSession, in);
                MimeMessageParser parser = new MimeMessageParser(originalMessage).parse();
                // RFC-2822 determines that a message must have a "From:" header
                // if a message lacks the field, it is flagged as invalid
                Address[] from = originalMessage.getFrom();
                if (from == null) {
                    throw new MessagingException("Message failed RFC-2822 validation: No Sender");
                }
                Date sentDate = originalMessage.getSentDate();
                if (sentDate == null) {
                    // Throws MessageException due to lack of minimum required headers
                    throw new MessagingException("Message failed RFC-2822 validation: No Sent Date");
                } else if (capturedHeadersList.size() > 0) {
                    Enumeration headers = originalMessage.getAllHeaders();
                    while (headers.hasMoreElements()) {
                        Header header = (Header) headers.nextElement();
                        if (StringUtils.isNotEmpty(header.getValue()) && capturedHeadersList.contains(header.getName().toLowerCase())) {
                            attributes.put("email.headers." + header.getName().toLowerCase(), header.getValue());
                        }
                    }
                }
                putAddressListInAttributes(attributes, EMAIL_HEADER_TO, originalMessage.getRecipients(Message.RecipientType.TO));
                putAddressListInAttributes(attributes, EMAIL_HEADER_CC, originalMessage.getRecipients(Message.RecipientType.CC));
                putAddressListInAttributes(attributes, EMAIL_HEADER_BCC, originalMessage.getRecipients(Message.RecipientType.BCC));
                // RFC-2822 specifies "From" as mailbox-list
                putAddressListInAttributes(attributes, EMAIL_HEADER_FROM, originalMessage.getFrom());
                if (StringUtils.isNotEmpty(originalMessage.getMessageID())) {
                    attributes.put(EMAIL_HEADER_MESSAGE_ID, originalMessage.getMessageID());
                }
                if (originalMessage.getReceivedDate() != null) {
                    attributes.put(EMAIL_HEADER_RECV_DATE, originalMessage.getReceivedDate().toString());
                }
                if (originalMessage.getSentDate() != null) {
                    attributes.put(EMAIL_HEADER_SENT_DATE, originalMessage.getSentDate().toString());
                }
                if (StringUtils.isNotEmpty(originalMessage.getSubject())) {
                    attributes.put(EMAIL_HEADER_SUBJECT, originalMessage.getSubject());
                }
                // Zeroes EMAIL_ATTACHMENT_COUNT
                attributes.put(EMAIL_ATTACHMENT_COUNT, "0");
                // But insert correct value if attachments are present
                if (parser.hasAttachments()) {
                    attributes.put(EMAIL_ATTACHMENT_COUNT, String.valueOf(parser.getAttachmentList().size()));
                }
            } catch (Exception e) {
                // Message is invalid or triggered an error during parsing
                attributes.clear();
                logger.error("Could not parse the flowfile {} as an email, treating as failure", new Object[] { originalFlowFile, e });
                invalidFlowFilesList.add(originalFlowFile);
            }
        }
    });
    if (attributes.size() > 0) {
        FlowFile updatedFlowFile = session.putAllAttributes(originalFlowFile, attributes);
        logger.info("Extracted {} headers into {} file", new Object[] { attributes.size(), updatedFlowFile });
        processedFlowFilesList.add(updatedFlowFile);
    }
    session.transfer(processedFlowFilesList, REL_SUCCESS);
    session.transfer(invalidFlowFilesList, REL_FAILURE);
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) Enumeration(java.util.Enumeration) HashMap(java.util.HashMap) MessagingException(javax.mail.MessagingException) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) InputStream(java.io.InputStream) ArrayList(java.util.ArrayList) IOException(java.io.IOException) Properties(java.util.Properties) MimeMessageParser(org.apache.commons.mail.util.MimeMessageParser) ComponentLog(org.apache.nifi.logging.ComponentLog) Date(java.util.Date) MessagingException(javax.mail.MessagingException) IOException(java.io.IOException) Header(javax.mail.Header) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) MimeMessage(javax.mail.internet.MimeMessage) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) ProcessSession(org.apache.nifi.processor.ProcessSession) Session(javax.mail.Session)

Aggregations

BufferedInputStream (org.apache.nifi.stream.io.BufferedInputStream)14 IOException (java.io.IOException)12 InputStream (java.io.InputStream)12 FlowFile (org.apache.nifi.flowfile.FlowFile)11 ComponentLog (org.apache.nifi.logging.ComponentLog)10 InputStreamCallback (org.apache.nifi.processor.io.InputStreamCallback)10 OutputStream (java.io.OutputStream)6 ArrayList (java.util.ArrayList)6 AtomicReference (java.util.concurrent.atomic.AtomicReference)6 ProcessException (org.apache.nifi.processor.exception.ProcessException)6 HashMap (java.util.HashMap)5 Map (java.util.Map)4 OutputStreamCallback (org.apache.nifi.processor.io.OutputStreamCallback)4 ByteArrayOutputStream (java.io.ByteArrayOutputStream)3 Properties (java.util.Properties)3 BufferedOutputStream (org.apache.nifi.stream.io.BufferedOutputStream)3 File (java.io.File)2 StringReader (java.io.StringReader)2 Date (java.util.Date)2 List (java.util.List)2