Search in sources :

Example 71 with FlowFile

use of org.apache.nifi.flowfile.FlowFile in project nifi by apache.

the class PutElasticsearch method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final ComponentLog logger = getLogger();
    final String id_attribute = context.getProperty(ID_ATTRIBUTE).getValue();
    final int batchSize = context.getProperty(BATCH_SIZE).evaluateAttributeExpressions().asInteger();
    final List<FlowFile> flowFiles = session.get(batchSize);
    if (flowFiles.isEmpty()) {
        return;
    }
    // Keep track of the list of flow files that need to be transferred. As they are transferred, remove them from the list.
    List<FlowFile> flowFilesToTransfer = new LinkedList<>(flowFiles);
    try {
        final BulkRequestBuilder bulk = esClient.get().prepareBulk();
        if (authToken != null) {
            bulk.putHeader("Authorization", authToken);
        }
        for (FlowFile file : flowFiles) {
            final String index = context.getProperty(INDEX).evaluateAttributeExpressions(file).getValue();
            final String docType = context.getProperty(TYPE).evaluateAttributeExpressions(file).getValue();
            final String indexOp = context.getProperty(INDEX_OP).evaluateAttributeExpressions(file).getValue();
            final Charset charset = Charset.forName(context.getProperty(CHARSET).evaluateAttributeExpressions(file).getValue());
            final String id = file.getAttribute(id_attribute);
            if (id == null) {
                logger.error("No value in identifier attribute {} for {}, transferring to failure", new Object[] { id_attribute, file });
                flowFilesToTransfer.remove(file);
                session.transfer(file, REL_FAILURE);
            } else {
                session.read(file, new InputStreamCallback() {

                    @Override
                    public void process(final InputStream in) throws IOException {
                        String json = IOUtils.toString(in, charset).replace("\r\n", " ").replace('\n', ' ').replace('\r', ' ');
                        if (indexOp.equalsIgnoreCase("index")) {
                            bulk.add(esClient.get().prepareIndex(index, docType, id).setSource(json.getBytes(charset)));
                        } else if (indexOp.equalsIgnoreCase("upsert")) {
                            bulk.add(esClient.get().prepareUpdate(index, docType, id).setDoc(json.getBytes(charset)).setDocAsUpsert(true));
                        } else if (indexOp.equalsIgnoreCase("update")) {
                            bulk.add(esClient.get().prepareUpdate(index, docType, id).setDoc(json.getBytes(charset)));
                        } else {
                            throw new IOException("Index operation: " + indexOp + " not supported.");
                        }
                    }
                });
            }
        }
        final BulkResponse response = bulk.execute().actionGet();
        if (response.hasFailures()) {
            // Responses are guaranteed to be in order, remove them in reverse order
            BulkItemResponse[] responses = response.getItems();
            if (responses != null && responses.length > 0) {
                for (int i = responses.length - 1; i >= 0; i--) {
                    final FlowFile flowFile = flowFilesToTransfer.get(i);
                    if (responses[i].isFailed()) {
                        logger.error("Failed to insert {} into Elasticsearch due to {}, transferring to failure", new Object[] { flowFile, responses[i].getFailure().getMessage() });
                        session.transfer(flowFile, REL_FAILURE);
                    } else {
                        session.getProvenanceReporter().send(flowFile, context.getProperty(HOSTS).evaluateAttributeExpressions().getValue() + "/" + responses[i].getIndex());
                        session.transfer(flowFile, REL_SUCCESS);
                    }
                    flowFilesToTransfer.remove(flowFile);
                }
            }
        }
        // Transfer any remaining flowfiles to success
        flowFilesToTransfer.forEach(file -> {
            session.transfer(file, REL_SUCCESS);
            // Record provenance event
            session.getProvenanceReporter().send(file, context.getProperty(HOSTS).evaluateAttributeExpressions().getValue() + "/" + context.getProperty(INDEX).evaluateAttributeExpressions(file).getValue());
        });
    } catch (NoNodeAvailableException | ElasticsearchTimeoutException | ReceiveTimeoutTransportException | NodeClosedException exceptionToRetry) {
        // Authorization errors and other problems are often returned as NoNodeAvailableExceptions without a
        // traceable cause. However the cause seems to be logged, just not available to this caught exception.
        // Since the error message will show up as a bulletin, we make specific mention to check the logs for
        // more details.
        logger.error("Failed to insert into Elasticsearch due to {}. More detailed information may be available in " + "the NiFi logs.", new Object[] { exceptionToRetry.getLocalizedMessage() }, exceptionToRetry);
        session.transfer(flowFilesToTransfer, REL_RETRY);
        context.yield();
    } catch (Exception exceptionToFail) {
        logger.error("Failed to insert into Elasticsearch due to {}, transferring to failure", new Object[] { exceptionToFail.getLocalizedMessage() }, exceptionToFail);
        session.transfer(flowFilesToTransfer, REL_FAILURE);
        context.yield();
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) Charset(java.nio.charset.Charset) BulkItemResponse(org.elasticsearch.action.bulk.BulkItemResponse) BulkResponse(org.elasticsearch.action.bulk.BulkResponse) IOException(java.io.IOException) NoNodeAvailableException(org.elasticsearch.client.transport.NoNodeAvailableException) ComponentLog(org.apache.nifi.logging.ComponentLog) LinkedList(java.util.LinkedList) NodeClosedException(org.elasticsearch.node.NodeClosedException) ProcessException(org.apache.nifi.processor.exception.ProcessException) ElasticsearchTimeoutException(org.elasticsearch.ElasticsearchTimeoutException) ReceiveTimeoutTransportException(org.elasticsearch.transport.ReceiveTimeoutTransportException) NoNodeAvailableException(org.elasticsearch.client.transport.NoNodeAvailableException) IOException(java.io.IOException) ReceiveTimeoutTransportException(org.elasticsearch.transport.ReceiveTimeoutTransportException) ElasticsearchTimeoutException(org.elasticsearch.ElasticsearchTimeoutException) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) NodeClosedException(org.elasticsearch.node.NodeClosedException) BulkRequestBuilder(org.elasticsearch.action.bulk.BulkRequestBuilder)

Example 72 with FlowFile

use of org.apache.nifi.flowfile.FlowFile in project nifi by apache.

the class ScrollElasticsearchHttp method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    try {
        if (isQueryFinished(context.getStateManager())) {
            getLogger().trace("Query has been marked finished in the state manager.  " + "To run another query, clear the state.");
            return;
        }
    } catch (IOException e) {
        throw new ProcessException("Could not retrieve state", e);
    }
    OkHttpClient okHttpClient = getClient();
    FlowFile flowFile = session.create();
    final String index = context.getProperty(INDEX).evaluateAttributeExpressions(flowFile).getValue();
    final String query = context.getProperty(QUERY).evaluateAttributeExpressions(flowFile).getValue();
    final String docType = context.getProperty(TYPE).evaluateAttributeExpressions(flowFile).getValue();
    final int pageSize = context.getProperty(PAGE_SIZE).evaluateAttributeExpressions(flowFile).asInteger().intValue();
    final String fields = context.getProperty(FIELDS).isSet() ? context.getProperty(FIELDS).evaluateAttributeExpressions(flowFile).getValue() : null;
    final String sort = context.getProperty(SORT).isSet() ? context.getProperty(SORT).evaluateAttributeExpressions(flowFile).getValue() : null;
    final String scroll = context.getProperty(SCROLL_DURATION).isSet() ? context.getProperty(SCROLL_DURATION).evaluateAttributeExpressions(flowFile).getValue() : null;
    // Authentication
    final String username = context.getProperty(USERNAME).evaluateAttributeExpressions().getValue();
    final String password = context.getProperty(PASSWORD).evaluateAttributeExpressions().getValue();
    final ComponentLog logger = getLogger();
    try {
        String scrollId = loadScrollId(context.getStateManager());
        // read the url property from the context
        final String urlstr = StringUtils.trimToEmpty(context.getProperty(ES_URL).evaluateAttributeExpressions().getValue());
        if (scrollId != null) {
            final URL scrollurl = buildRequestURL(urlstr, query, index, docType, fields, sort, scrollId, pageSize, scroll, context);
            final long startNanos = System.nanoTime();
            final Response getResponse = sendRequestToElasticsearch(okHttpClient, scrollurl, username, password, "GET", null);
            this.getPage(getResponse, scrollurl, context, session, flowFile, logger, startNanos);
            getResponse.close();
        } else {
            logger.debug("Querying {}/{} from Elasticsearch: {}", new Object[] { index, docType, query });
            // read the url property from the context
            final URL queryUrl = buildRequestURL(urlstr, query, index, docType, fields, sort, scrollId, pageSize, scroll, context);
            final long startNanos = System.nanoTime();
            final Response getResponse = sendRequestToElasticsearch(okHttpClient, queryUrl, username, password, "GET", null);
            this.getPage(getResponse, queryUrl, context, session, flowFile, logger, startNanos);
            getResponse.close();
        }
    } catch (IOException ioe) {
        logger.error("Failed to read from Elasticsearch due to {}, this may indicate an error in configuration " + "(hosts, username/password, etc.).", new Object[] { ioe.getLocalizedMessage() }, ioe);
        session.remove(flowFile);
        context.yield();
    } catch (Exception e) {
        logger.error("Failed to read {} from Elasticsearch due to {}", new Object[] { flowFile, e.getLocalizedMessage() }, e);
        session.transfer(flowFile, REL_FAILURE);
        context.yield();
    }
}
Also used : Response(okhttp3.Response) FlowFile(org.apache.nifi.flowfile.FlowFile) ProcessException(org.apache.nifi.processor.exception.ProcessException) OkHttpClient(okhttp3.OkHttpClient) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) URL(java.net.URL) ProcessException(org.apache.nifi.processor.exception.ProcessException) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException)

Example 73 with FlowFile

use of org.apache.nifi.flowfile.FlowFile in project nifi by apache.

the class JsonQueryElasticsearch method handleHits.

private List<FlowFile> handleHits(List<Map<String, Object>> hits, ProcessContext context, ProcessSession session, FlowFile parent, Map<String, String> attributes) throws IOException {
    String splitUpValue = context.getProperty(SPLIT_UP_HITS).getValue();
    List<FlowFile> retVal = new ArrayList<>();
    if (splitUpValue.equals(SPLIT_UP_YES.getValue())) {
        for (Map<String, Object> hit : hits) {
            FlowFile hitFlowFile = parent != null ? session.create(parent) : session.create();
            String json = mapper.writeValueAsString(hit);
            retVal.add(writeHitFlowFile(json, session, hitFlowFile, attributes));
        }
    } else {
        FlowFile hitFlowFile = parent != null ? session.create(parent) : session.create();
        String json = mapper.writeValueAsString(hits);
        retVal.add(writeHitFlowFile(json, session, hitFlowFile, attributes));
    }
    return retVal;
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) ArrayList(java.util.ArrayList)

Example 74 with FlowFile

use of org.apache.nifi.flowfile.FlowFile in project nifi by apache.

the class JsonQueryElasticsearch method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile input = null;
    if (context.hasIncomingConnection()) {
        input = session.get();
        if (input == null && context.hasNonLoopConnection()) {
            return;
        }
    }
    try {
        final String query = getQuery(input, context, session);
        final String index = context.getProperty(INDEX).evaluateAttributeExpressions(input).getValue();
        final String type = context.getProperty(TYPE).evaluateAttributeExpressions(input).getValue();
        final String queryAttr = context.getProperty(QUERY_ATTRIBUTE).isSet() ? context.getProperty(QUERY_ATTRIBUTE).evaluateAttributeExpressions(input).getValue() : null;
        SearchResponse response = clientService.search(query, index, type);
        Map<String, String> attributes = new HashMap<>();
        attributes.put(CoreAttributes.MIME_TYPE.key(), "application/json");
        if (!StringUtils.isBlank(queryAttr)) {
            attributes.put(queryAttr, query);
        }
        List<FlowFile> hitsFlowFiles = handleHits(response.getHits(), context, session, input, attributes);
        List<FlowFile> aggsFlowFiles = handleAggregations(response.getAggregations(), context, session, input, attributes);
        final String transitUri = clientService.getTransitUrl(index, type);
        if (hitsFlowFiles.size() > 0) {
            session.transfer(hitsFlowFiles, REL_HITS);
            for (FlowFile ff : hitsFlowFiles) {
                session.getProvenanceReporter().send(ff, transitUri);
            }
        }
        if (aggsFlowFiles.size() > 0) {
            session.transfer(aggsFlowFiles, REL_AGGREGATIONS);
            for (FlowFile ff : aggsFlowFiles) {
                session.getProvenanceReporter().send(ff, transitUri);
            }
        }
        if (input != null) {
            session.transfer(input, REL_ORIGINAL);
        }
    } catch (Exception ex) {
        getLogger().error("Error processing flowfile.", ex);
        if (input != null) {
            session.transfer(input, REL_FAILURE);
        }
        context.yield();
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) HashMap(java.util.HashMap) ProcessException(org.apache.nifi.processor.exception.ProcessException) IOException(java.io.IOException) SearchResponse(org.apache.nifi.elasticsearch.SearchResponse)

Example 75 with FlowFile

use of org.apache.nifi.flowfile.FlowFile in project nifi by apache.

the class ExtractEmailAttachments method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    final ComponentLog logger = getLogger();
    final FlowFile originalFlowFile = session.get();
    if (originalFlowFile == null) {
        return;
    }
    final List<FlowFile> attachmentsList = new ArrayList<>();
    final List<FlowFile> invalidFlowFilesList = new ArrayList<>();
    final List<FlowFile> originalFlowFilesList = new ArrayList<>();
    final String requireStrictAddresses = "false";
    session.read(originalFlowFile, new InputStreamCallback() {

        @Override
        public void process(final InputStream rawIn) throws IOException {
            try (final InputStream in = new BufferedInputStream(rawIn)) {
                Properties props = new Properties();
                props.put("mail.mime.address.strict", requireStrictAddresses);
                Session mailSession = Session.getInstance(props);
                MimeMessage originalMessage = new MimeMessage(mailSession, in);
                MimeMessageParser parser = new MimeMessageParser(originalMessage).parse();
                // RFC-2822 determines that a message must have a "From:" header
                // if a message lacks the field, it is flagged as invalid
                Address[] from = originalMessage.getFrom();
                if (from == null) {
                    throw new MessagingException("Message failed RFC-2822 validation: No Sender");
                }
                Date sentDate = originalMessage.getSentDate();
                if (sentDate == null) {
                    // Throws MessageException due to lack of minimum required headers
                    throw new MessagingException("Message failed RFC2822 validation: No Sent Date");
                }
                originalFlowFilesList.add(originalFlowFile);
                if (parser.hasAttachments()) {
                    final String originalFlowFileName = originalFlowFile.getAttribute(CoreAttributes.FILENAME.key());
                    try {
                        for (final DataSource data : parser.getAttachmentList()) {
                            FlowFile split = session.create(originalFlowFile);
                            final Map<String, String> attributes = new HashMap<>();
                            if (StringUtils.isNotBlank(data.getName())) {
                                attributes.put(CoreAttributes.FILENAME.key(), data.getName());
                            }
                            if (StringUtils.isNotBlank(data.getContentType())) {
                                attributes.put(CoreAttributes.MIME_TYPE.key(), data.getContentType());
                            }
                            String parentUuid = originalFlowFile.getAttribute(CoreAttributes.UUID.key());
                            attributes.put(ATTACHMENT_ORIGINAL_UUID, parentUuid);
                            attributes.put(ATTACHMENT_ORIGINAL_FILENAME, originalFlowFileName);
                            split = session.append(split, new OutputStreamCallback() {

                                @Override
                                public void process(OutputStream out) throws IOException {
                                    IOUtils.copy(data.getInputStream(), out);
                                }
                            });
                            split = session.putAllAttributes(split, attributes);
                            attachmentsList.add(split);
                        }
                    } catch (FlowFileHandlingException e) {
                        // Something went wrong
                        // Removing splits that may have been created
                        session.remove(attachmentsList);
                        // Removing the original flow from its list
                        originalFlowFilesList.remove(originalFlowFile);
                        logger.error("Flowfile {} triggered error {} while processing message removing generated FlowFiles from sessions", new Object[] { originalFlowFile, e });
                        invalidFlowFilesList.add(originalFlowFile);
                    }
                }
            } catch (Exception e) {
                // Another error hit...
                // Removing the original flow from its list
                originalFlowFilesList.remove(originalFlowFile);
                logger.error("Could not parse the flowfile {} as an email, treating as failure", new Object[] { originalFlowFile, e });
                // Message is invalid or triggered an error during parsing
                invalidFlowFilesList.add(originalFlowFile);
            }
        }
    });
    session.transfer(attachmentsList, REL_ATTACHMENTS);
    // As per above code, originalFlowfile may be routed to invalid or
    // original depending on RFC2822 compliance.
    session.transfer(invalidFlowFilesList, REL_FAILURE);
    session.transfer(originalFlowFilesList, REL_ORIGINAL);
    if (attachmentsList.size() > 10) {
        logger.info("Split {} into {} files", new Object[] { originalFlowFile, attachmentsList.size() });
    } else if (attachmentsList.size() > 1) {
        logger.info("Split {} into {} files: {}", new Object[] { originalFlowFile, attachmentsList.size(), attachmentsList });
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) MessagingException(javax.mail.MessagingException) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) InputStream(java.io.InputStream) OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) IOException(java.io.IOException) Properties(java.util.Properties) MimeMessageParser(org.apache.commons.mail.util.MimeMessageParser) ComponentLog(org.apache.nifi.logging.ComponentLog) Date(java.util.Date) MessagingException(javax.mail.MessagingException) FlowFileHandlingException(org.apache.nifi.processor.exception.FlowFileHandlingException) IOException(java.io.IOException) DataSource(javax.activation.DataSource) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) MimeMessage(javax.mail.internet.MimeMessage) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) FlowFileHandlingException(org.apache.nifi.processor.exception.FlowFileHandlingException) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback) HashMap(java.util.HashMap) Map(java.util.Map) ProcessSession(org.apache.nifi.processor.ProcessSession) Session(javax.mail.Session)

Aggregations

FlowFile (org.apache.nifi.flowfile.FlowFile)500 IOException (java.io.IOException)236 ProcessException (org.apache.nifi.processor.exception.ProcessException)193 HashMap (java.util.HashMap)160 InputStream (java.io.InputStream)145 OutputStream (java.io.OutputStream)131 ComponentLog (org.apache.nifi.logging.ComponentLog)119 Test (org.junit.Test)116 ArrayList (java.util.ArrayList)113 Map (java.util.Map)105 MockFlowFile (org.apache.nifi.util.MockFlowFile)103 ProcessSession (org.apache.nifi.processor.ProcessSession)99 OutputStreamCallback (org.apache.nifi.processor.io.OutputStreamCallback)83 Relationship (org.apache.nifi.processor.Relationship)78 InputStreamCallback (org.apache.nifi.processor.io.InputStreamCallback)78 HashSet (java.util.HashSet)75 List (java.util.List)67 StopWatch (org.apache.nifi.util.StopWatch)59 Set (java.util.Set)56 PropertyDescriptor (org.apache.nifi.components.PropertyDescriptor)55