Search in sources :

Example 46 with OutputStreamCallback

use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.

the class DataGeneratorTestProcessor method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    FlowFile toRemove = session.get();
    if (toRemove != null) {
        LOG.warn("Removing flow file");
        session.remove(toRemove);
    }
    FlowFile flowFile = session.create();
    final Random random = new Random();
    final byte[] data = new byte[4096];
    random.nextBytes(data);
    flowFile = session.write(flowFile, new OutputStreamCallback() {

        @Override
        public void process(final OutputStream out) throws IOException {
            out.write(data);
        }
    });
    LOG.info("{} transferring {} to success", new Object[] { this, flowFile });
    session.transfer(flowFile, REL_SUCCESS);
    session.commit();
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) Random(java.util.Random) OutputStream(java.io.OutputStream) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback)

Example 47 with OutputStreamCallback

use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.

the class QueryCassandra method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile fileToProcess = null;
    if (context.hasIncomingConnection()) {
        fileToProcess = session.get();
        // we know that we should run only if we have a FlowFile.
        if (fileToProcess == null && context.hasNonLoopConnection()) {
            return;
        }
    }
    final ComponentLog logger = getLogger();
    final String selectQuery = context.getProperty(CQL_SELECT_QUERY).evaluateAttributeExpressions(fileToProcess).getValue();
    final long queryTimeout = context.getProperty(QUERY_TIMEOUT).evaluateAttributeExpressions(fileToProcess).asTimePeriod(TimeUnit.MILLISECONDS);
    final String outputFormat = context.getProperty(OUTPUT_FORMAT).getValue();
    final Charset charset = Charset.forName(context.getProperty(CHARSET).evaluateAttributeExpressions(fileToProcess).getValue());
    final StopWatch stopWatch = new StopWatch(true);
    if (fileToProcess == null) {
        fileToProcess = session.create();
    }
    try {
        // The documentation for the driver recommends the session remain open the entire time the processor is running
        // and states that it is thread-safe. This is why connectionSession is not in a try-with-resources.
        final Session connectionSession = cassandraSession.get();
        final ResultSetFuture queryFuture = connectionSession.executeAsync(selectQuery);
        final AtomicLong nrOfRows = new AtomicLong(0L);
        fileToProcess = session.write(fileToProcess, new OutputStreamCallback() {

            @Override
            public void process(final OutputStream out) throws IOException {
                try {
                    logger.debug("Executing CQL query {}", new Object[] { selectQuery });
                    final ResultSet resultSet;
                    if (queryTimeout > 0) {
                        resultSet = queryFuture.getUninterruptibly(queryTimeout, TimeUnit.MILLISECONDS);
                        if (AVRO_FORMAT.equals(outputFormat)) {
                            nrOfRows.set(convertToAvroStream(resultSet, out, queryTimeout, TimeUnit.MILLISECONDS));
                        } else if (JSON_FORMAT.equals(outputFormat)) {
                            nrOfRows.set(convertToJsonStream(resultSet, out, charset, queryTimeout, TimeUnit.MILLISECONDS));
                        }
                    } else {
                        resultSet = queryFuture.getUninterruptibly();
                        if (AVRO_FORMAT.equals(outputFormat)) {
                            nrOfRows.set(convertToAvroStream(resultSet, out, 0, null));
                        } else if (JSON_FORMAT.equals(outputFormat)) {
                            nrOfRows.set(convertToJsonStream(resultSet, out, charset, 0, null));
                        }
                    }
                } catch (final TimeoutException | InterruptedException | ExecutionException e) {
                    throw new ProcessException(e);
                }
            }
        });
        // set attribute how many rows were selected
        fileToProcess = session.putAttribute(fileToProcess, RESULT_ROW_COUNT, String.valueOf(nrOfRows.get()));
        // set mime.type based on output format
        fileToProcess = session.putAttribute(fileToProcess, CoreAttributes.MIME_TYPE.key(), JSON_FORMAT.equals(outputFormat) ? "application/json" : "application/avro-binary");
        logger.info("{} contains {} Avro records; transferring to 'success'", new Object[] { fileToProcess, nrOfRows.get() });
        session.getProvenanceReporter().modifyContent(fileToProcess, "Retrieved " + nrOfRows.get() + " rows", stopWatch.getElapsed(TimeUnit.MILLISECONDS));
        session.transfer(fileToProcess, REL_SUCCESS);
    } catch (final NoHostAvailableException nhae) {
        getLogger().error("No host in the Cassandra cluster can be contacted successfully to execute this query", nhae);
        // Log up to 10 error messages. Otherwise if a 1000-node cluster was specified but there was no connectivity,
        // a thousand error messages would be logged. However we would like information from Cassandra itself, so
        // cap the error limit at 10, format the messages, and don't include the stack trace (it is displayed by the
        // logger message above).
        getLogger().error(nhae.getCustomMessage(10, true, false));
        fileToProcess = session.penalize(fileToProcess);
        session.transfer(fileToProcess, REL_RETRY);
    } catch (final QueryExecutionException qee) {
        logger.error("Cannot execute the query with the requested consistency level successfully", qee);
        fileToProcess = session.penalize(fileToProcess);
        session.transfer(fileToProcess, REL_RETRY);
    } catch (final QueryValidationException qve) {
        if (context.hasIncomingConnection()) {
            logger.error("The CQL query {} is invalid due to syntax error, authorization issue, or another " + "validation problem; routing {} to failure", new Object[] { selectQuery, fileToProcess }, qve);
            fileToProcess = session.penalize(fileToProcess);
            session.transfer(fileToProcess, REL_FAILURE);
        } else {
            // This can happen if any exceptions occur while setting up the connection, statement, etc.
            logger.error("The CQL query {} is invalid due to syntax error, authorization issue, or another " + "validation problem", new Object[] { selectQuery }, qve);
            session.remove(fileToProcess);
            context.yield();
        }
    } catch (final ProcessException e) {
        if (context.hasIncomingConnection()) {
            logger.error("Unable to execute CQL select query {} for {} due to {}; routing to failure", new Object[] { selectQuery, fileToProcess, e });
            fileToProcess = session.penalize(fileToProcess);
            session.transfer(fileToProcess, REL_FAILURE);
        } else {
            logger.error("Unable to execute CQL select query {} due to {}", new Object[] { selectQuery, e });
            session.remove(fileToProcess);
            context.yield();
        }
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) ResultSetFuture(com.datastax.driver.core.ResultSetFuture) OutputStream(java.io.OutputStream) Charset(java.nio.charset.Charset) ComponentLog(org.apache.nifi.logging.ComponentLog) StopWatch(org.apache.nifi.util.StopWatch) AtomicLong(java.util.concurrent.atomic.AtomicLong) ProcessException(org.apache.nifi.processor.exception.ProcessException) QueryExecutionException(com.datastax.driver.core.exceptions.QueryExecutionException) QueryValidationException(com.datastax.driver.core.exceptions.QueryValidationException) NoHostAvailableException(com.datastax.driver.core.exceptions.NoHostAvailableException) ResultSet(com.datastax.driver.core.ResultSet) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback) QueryExecutionException(com.datastax.driver.core.exceptions.QueryExecutionException) ExecutionException(java.util.concurrent.ExecutionException) Session(com.datastax.driver.core.Session) ProcessSession(org.apache.nifi.processor.ProcessSession) TimeoutException(java.util.concurrent.TimeoutException)

Example 48 with OutputStreamCallback

use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.

the class GetCouchbaseKey method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile inFile = session.get();
    if (inFile == null) {
        return;
    }
    final long startNanos = System.nanoTime();
    final ComponentLog logger = getLogger();
    String docId = null;
    if (!StringUtils.isEmpty(context.getProperty(DOC_ID).getValue())) {
        docId = context.getProperty(DOC_ID).evaluateAttributeExpressions(inFile).getValue();
    } else {
        final byte[] content = new byte[(int) inFile.getSize()];
        session.read(inFile, new InputStreamCallback() {

            @Override
            public void process(final InputStream in) throws IOException {
                StreamUtils.fillBuffer(in, content, true);
            }
        });
        docId = new String(content, StandardCharsets.UTF_8);
    }
    if (StringUtils.isEmpty(docId)) {
        throw new ProcessException("Please check 'Document Id' setting. Couldn't get document id from " + inFile);
    }
    try {
        final Document<?> doc;
        final byte[] content;
        final Bucket bucket = openBucket(context);
        final DocumentType documentType = DocumentType.valueOf(context.getProperty(DOCUMENT_TYPE).getValue());
        switch(documentType) {
            case Json:
                {
                    RawJsonDocument document = bucket.get(docId, RawJsonDocument.class);
                    if (document == null) {
                        doc = null;
                        content = null;
                    } else {
                        content = document.content().getBytes(StandardCharsets.UTF_8);
                        doc = document;
                    }
                    break;
                }
            case Binary:
                {
                    BinaryDocument document = bucket.get(docId, BinaryDocument.class);
                    if (document == null) {
                        doc = null;
                        content = null;
                    } else {
                        content = document.content().array();
                        doc = document;
                    }
                    break;
                }
            default:
                {
                    doc = null;
                    content = null;
                }
        }
        if (doc == null) {
            logger.error("Document {} was not found in {}; routing {} to failure", new Object[] { docId, getTransitUrl(context, docId), inFile });
            inFile = session.putAttribute(inFile, CouchbaseAttributes.Exception.key(), DocumentDoesNotExistException.class.getName());
            session.transfer(inFile, REL_FAILURE);
            return;
        }
        FlowFile outFile = session.create(inFile);
        outFile = session.write(outFile, new OutputStreamCallback() {

            @Override
            public void process(final OutputStream out) throws IOException {
                out.write(content);
            }
        });
        final Map<String, String> updatedAttrs = new HashMap<>();
        updatedAttrs.put(CouchbaseAttributes.Cluster.key(), context.getProperty(COUCHBASE_CLUSTER_SERVICE).getValue());
        updatedAttrs.put(CouchbaseAttributes.Bucket.key(), context.getProperty(BUCKET_NAME).getValue());
        updatedAttrs.put(CouchbaseAttributes.DocId.key(), docId);
        updatedAttrs.put(CouchbaseAttributes.Cas.key(), String.valueOf(doc.cas()));
        updatedAttrs.put(CouchbaseAttributes.Expiry.key(), String.valueOf(doc.expiry()));
        outFile = session.putAllAttributes(outFile, updatedAttrs);
        final long fetchMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos);
        session.getProvenanceReporter().fetch(outFile, getTransitUrl(context, docId), fetchMillis);
        session.transfer(outFile, REL_SUCCESS);
        session.transfer(inFile, REL_ORIGINAL);
    } catch (final CouchbaseException e) {
        String errMsg = String.format("Getting document %s from Couchbase Server using %s failed due to %s", docId, inFile, e);
        handleCouchbaseException(context, session, logger, inFile, e, errMsg);
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) HashMap(java.util.HashMap) InputStream(java.io.InputStream) OutputStream(java.io.OutputStream) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) RawJsonDocument(com.couchbase.client.java.document.RawJsonDocument) BinaryDocument(com.couchbase.client.java.document.BinaryDocument) ProcessException(org.apache.nifi.processor.exception.ProcessException) CouchbaseException(com.couchbase.client.core.CouchbaseException) Bucket(com.couchbase.client.java.Bucket) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback)

Example 49 with OutputStreamCallback

use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.

the class ConsumeEWS method transfer.

/**
 * Disposes the message by converting it to a {@link FlowFile} transferring
 * it to the REL_SUCCESS relationship.
 */
private void transfer(Message emailMessage, ProcessContext context, ProcessSession processSession) {
    long start = System.nanoTime();
    FlowFile flowFile = processSession.create();
    flowFile = processSession.append(flowFile, new OutputStreamCallback() {

        @Override
        public void process(final OutputStream out) throws IOException {
            try {
                emailMessage.writeTo(out);
            } catch (MessagingException e) {
                throw new IOException(e);
            }
        }
    });
    long executionDuration = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start);
    String fromAddressesString = "";
    try {
        Address[] fromAddresses = emailMessage.getFrom();
        if (fromAddresses != null) {
            fromAddressesString = Arrays.asList(fromAddresses).toString();
        }
    } catch (MessagingException e) {
        this.logger.warn("Faild to retrieve 'From' attribute from Message.");
    }
    processSession.getProvenanceReporter().receive(flowFile, this.displayUrl, "Received message from " + fromAddressesString, executionDuration);
    this.getLogger().info("Successfully received {} from {} in {} millis", new Object[] { flowFile, fromAddressesString, executionDuration });
    processSession.transfer(flowFile, REL_SUCCESS);
    try {
        emailMessage.setFlag(Flags.Flag.DELETED, this.shouldSetDeleteFlag);
    } catch (MessagingException e) {
        this.logger.warn("Failed to set DELETE Flag on the message, data duplication may occur.");
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) Address(javax.mail.Address) MessagingException(javax.mail.MessagingException) OutputStream(java.io.OutputStream) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback) IOException(java.io.IOException)

Example 50 with OutputStreamCallback

use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.

the class ExtractTNEFAttachments method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    final ComponentLog logger = getLogger();
    final FlowFile originalFlowFile = session.get();
    if (originalFlowFile == null) {
        return;
    }
    final List<FlowFile> attachmentsList = new ArrayList<>();
    final List<FlowFile> invalidFlowFilesList = new ArrayList<>();
    final List<FlowFile> originalFlowFilesList = new ArrayList<>();
    session.read(originalFlowFile, new InputStreamCallback() {

        @Override
        public void process(final InputStream rawIn) throws IOException {
            try (final InputStream in = new BufferedInputStream(rawIn)) {
                Properties props = new Properties();
                HMEFMessage hmefMessage = null;
                // This will trigger an exception in case content is not a TNEF.
                hmefMessage = new HMEFMessage(in);
                // Add otiginal flowfile (may revert later on in case of errors) //
                originalFlowFilesList.add(originalFlowFile);
                if (hmefMessage != null) {
                    // Attachments isn empty, proceeding.
                    if (!hmefMessage.getAttachments().isEmpty()) {
                        final String originalFlowFileName = originalFlowFile.getAttribute(CoreAttributes.FILENAME.key());
                        try {
                            for (final Attachment attachment : hmefMessage.getAttachments()) {
                                FlowFile split = session.create(originalFlowFile);
                                final Map<String, String> attributes = new HashMap<>();
                                if (StringUtils.isNotBlank(attachment.getLongFilename())) {
                                    attributes.put(CoreAttributes.FILENAME.key(), attachment.getFilename());
                                }
                                String parentUuid = originalFlowFile.getAttribute(CoreAttributes.UUID.key());
                                attributes.put(ATTACHMENT_ORIGINAL_UUID, parentUuid);
                                attributes.put(ATTACHMENT_ORIGINAL_FILENAME, originalFlowFileName);
                                // TODO: Extract Mime Type (HMEF doesn't seem to be able to get this info.
                                split = session.append(split, new OutputStreamCallback() {

                                    @Override
                                    public void process(OutputStream out) throws IOException {
                                        out.write(attachment.getContents());
                                    }
                                });
                                split = session.putAllAttributes(split, attributes);
                                attachmentsList.add(split);
                            }
                        } catch (FlowFileHandlingException e) {
                            // Something went wrong
                            // Removing splits that may have been created
                            session.remove(attachmentsList);
                            // Removing the original flow from its list
                            originalFlowFilesList.remove(originalFlowFile);
                            logger.error("Flowfile {} triggered error {} while processing message removing generated FlowFiles from sessions", new Object[] { originalFlowFile, e });
                            invalidFlowFilesList.add(originalFlowFile);
                        }
                    }
                }
            } catch (Exception e) {
                // Another error hit...
                // Removing the original flow from its list
                originalFlowFilesList.remove(originalFlowFile);
                logger.error("Could not parse the flowfile {} as an email, treating as failure", new Object[] { originalFlowFile, e });
                // Message is invalid or triggered an error during parsing
                invalidFlowFilesList.add(originalFlowFile);
            }
        }
    });
    session.transfer(attachmentsList, REL_ATTACHMENTS);
    // As per above code, originalFlowfile may be routed to invalid or
    // original depending on RFC2822 compliance.
    session.transfer(invalidFlowFilesList, REL_FAILURE);
    session.transfer(originalFlowFilesList, REL_ORIGINAL);
    // check if attachments have been extracted
    if (attachmentsList.size() != 0) {
        if (attachmentsList.size() > 10) {
            // If more than 10, summarise log
            logger.info("Split {} into {} files", new Object[] { originalFlowFile, attachmentsList.size() });
        } else {
            // Otherwise be more verbose and list each individual split
            logger.info("Split {} into {} files: {}", new Object[] { originalFlowFile, attachmentsList.size(), attachmentsList });
        }
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) InputStream(java.io.InputStream) OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) Attachment(org.apache.poi.hmef.Attachment) IOException(java.io.IOException) Properties(java.util.Properties) ComponentLog(org.apache.nifi.logging.ComponentLog) FlowFileHandlingException(org.apache.nifi.processor.exception.FlowFileHandlingException) IOException(java.io.IOException) HMEFMessage(org.apache.poi.hmef.HMEFMessage) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) FlowFileHandlingException(org.apache.nifi.processor.exception.FlowFileHandlingException) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

FlowFile (org.apache.nifi.flowfile.FlowFile)70 OutputStreamCallback (org.apache.nifi.processor.io.OutputStreamCallback)70 OutputStream (java.io.OutputStream)69 IOException (java.io.IOException)39 ProcessException (org.apache.nifi.processor.exception.ProcessException)27 HashMap (java.util.HashMap)25 InputStream (java.io.InputStream)24 Test (org.junit.Test)24 MockFlowFile (org.apache.nifi.util.MockFlowFile)23 ByteArrayOutputStream (java.io.ByteArrayOutputStream)20 ComponentLog (org.apache.nifi.logging.ComponentLog)17 FileOutputStream (java.io.FileOutputStream)16 FilterOutputStream (java.io.FilterOutputStream)16 InputStreamCallback (org.apache.nifi.processor.io.InputStreamCallback)14 ArrayList (java.util.ArrayList)12 Map (java.util.Map)12 ProcessSession (org.apache.nifi.processor.ProcessSession)12 BufferedOutputStream (org.apache.nifi.stream.io.BufferedOutputStream)10 AtomicReference (java.util.concurrent.atomic.AtomicReference)9 StandardContentClaim (org.apache.nifi.controller.repository.claim.StandardContentClaim)9