Search in sources :

Example 66 with FlowFile

use of org.apache.nifi.flowfile.FlowFile in project nifi by apache.

the class PutDruidRecord method processFlowFile.

/**
 * Parses the record(s), converts each to a Map, and sends via Tranquility to the Druid Indexing Service
 *
 * @param context The process context
 * @param session The process session
 */
@SuppressWarnings("unchecked")
private void processFlowFile(ProcessContext context, final ProcessSession session) {
    final ComponentLog log = getLogger();
    // Get handle on Druid Tranquility session
    DruidTranquilityService tranquilityController = context.getProperty(DRUID_TRANQUILITY_SERVICE).asControllerService(DruidTranquilityService.class);
    Tranquilizer<Map<String, Object>> tranquilizer = tranquilityController.getTranquilizer();
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    // Create the outgoing flow files and output streams
    FlowFile droppedFlowFile = session.create(flowFile);
    final AtomicInteger droppedFlowFileCount = new AtomicInteger(0);
    FlowFile failedFlowFile = session.create(flowFile);
    final AtomicInteger failedFlowFileCount = new AtomicInteger(0);
    FlowFile successfulFlowFile = session.create(flowFile);
    final AtomicInteger successfulFlowFileCount = new AtomicInteger(0);
    final AtomicInteger recordWriteErrors = new AtomicInteger(0);
    int recordCount = 0;
    final OutputStream droppedOutputStream = session.write(droppedFlowFile);
    final RecordSetWriter droppedRecordWriter;
    final OutputStream failedOutputStream = session.write(failedFlowFile);
    final RecordSetWriter failedRecordWriter;
    final OutputStream successfulOutputStream = session.write(successfulFlowFile);
    final RecordSetWriter successfulRecordWriter;
    try (final InputStream in = session.read(flowFile)) {
        final RecordReaderFactory recordParserFactory = context.getProperty(RECORD_READER_FACTORY).asControllerService(RecordReaderFactory.class);
        final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER_FACTORY).asControllerService(RecordSetWriterFactory.class);
        final Map<String, String> attributes = flowFile.getAttributes();
        final RecordReader reader = recordParserFactory.createRecordReader(flowFile, in, getLogger());
        final RecordSchema outSchema = writerFactory.getSchema(attributes, reader.getSchema());
        droppedRecordWriter = writerFactory.createWriter(log, outSchema, droppedOutputStream);
        droppedRecordWriter.beginRecordSet();
        failedRecordWriter = writerFactory.createWriter(log, outSchema, failedOutputStream);
        failedRecordWriter.beginRecordSet();
        successfulRecordWriter = writerFactory.createWriter(log, outSchema, successfulOutputStream);
        successfulRecordWriter.beginRecordSet();
        Record r;
        while ((r = reader.nextRecord()) != null) {
            final Record record = r;
            recordCount++;
            // Convert each Record to HashMap and send to Druid
            Map<String, Object> contentMap = (Map<String, Object>) DataTypeUtils.convertRecordFieldtoObject(r, RecordFieldType.RECORD.getRecordDataType(r.getSchema()));
            log.debug("Tranquilizer Status: {}", new Object[] { tranquilizer.status().toString() });
            // Send data element to Druid asynchronously
            Future<BoxedUnit> future = tranquilizer.send(contentMap);
            log.debug("Sent Payload to Druid: {}", new Object[] { contentMap });
            // Wait for Druid to call back with status
            future.addEventListener(new FutureEventListener<Object>() {

                @Override
                public void onFailure(Throwable cause) {
                    if (cause instanceof MessageDroppedException) {
                        // This happens when event timestamp targets a Druid Indexing task that has closed (Late Arriving Data)
                        log.debug("Record Dropped due to MessageDroppedException: {}, transferring record to dropped.", new Object[] { cause.getMessage() }, cause);
                        try {
                            synchronized (droppedRecordWriter) {
                                droppedRecordWriter.write(record);
                                droppedRecordWriter.flush();
                                droppedFlowFileCount.incrementAndGet();
                            }
                        } catch (final IOException ioe) {
                            log.error("Error transferring record to dropped, this may result in data loss.", new Object[] { ioe.getMessage() }, ioe);
                            recordWriteErrors.incrementAndGet();
                        }
                    } else {
                        log.error("FlowFile Processing Failed due to: {}", new Object[] { cause.getMessage() }, cause);
                        try {
                            synchronized (failedRecordWriter) {
                                failedRecordWriter.write(record);
                                failedRecordWriter.flush();
                                failedFlowFileCount.incrementAndGet();
                            }
                        } catch (final IOException ioe) {
                            log.error("Error transferring record to failure, this may result in data loss.", new Object[] { ioe.getMessage() }, ioe);
                            recordWriteErrors.incrementAndGet();
                        }
                    }
                }

                @Override
                public void onSuccess(Object value) {
                    log.debug(" FlowFile Processing Success: {}", new Object[] { value.toString() });
                    try {
                        synchronized (successfulRecordWriter) {
                            successfulRecordWriter.write(record);
                            successfulRecordWriter.flush();
                            successfulFlowFileCount.incrementAndGet();
                        }
                    } catch (final IOException ioe) {
                        log.error("Error transferring record to success, this may result in data loss. " + "However the record was successfully processed by Druid", new Object[] { ioe.getMessage() }, ioe);
                        recordWriteErrors.incrementAndGet();
                    }
                }
            });
        }
    } catch (IOException | SchemaNotFoundException | MalformedRecordException e) {
        log.error("FlowFile Processing Failed due to: {}", new Object[] { e.getMessage() }, e);
        // The FlowFile will be obtained and the error logged below, when calling publishResult.getFailedFlowFiles()
        flowFile = session.putAttribute(flowFile, RECORD_COUNT, Integer.toString(recordCount));
        session.transfer(flowFile, REL_FAILURE);
        try {
            droppedOutputStream.close();
            session.remove(droppedFlowFile);
        } catch (IOException ioe) {
            log.error("Error closing output stream for FlowFile with dropped records.", ioe);
        }
        try {
            failedOutputStream.close();
            session.remove(failedFlowFile);
        } catch (IOException ioe) {
            log.error("Error closing output stream for FlowFile with failed records.", ioe);
        }
        try {
            successfulOutputStream.close();
            session.remove(successfulFlowFile);
        } catch (IOException ioe) {
            log.error("Error closing output stream for FlowFile with successful records.", ioe);
        }
        session.commit();
        return;
    }
    if (recordCount == 0) {
        // Send original (empty) flow file to success, remove the rest
        flowFile = session.putAttribute(flowFile, RECORD_COUNT, "0");
        session.transfer(flowFile, REL_SUCCESS);
        try {
            droppedOutputStream.close();
            session.remove(droppedFlowFile);
        } catch (IOException ioe) {
            log.error("Error closing output stream for FlowFile with dropped records.", ioe);
        }
        try {
            failedOutputStream.close();
            session.remove(failedFlowFile);
        } catch (IOException ioe) {
            log.error("Error closing output stream for FlowFile with failed records.", ioe);
        }
        try {
            successfulOutputStream.close();
            session.remove(successfulFlowFile);
        } catch (IOException ioe) {
            log.error("Error closing output stream for FlowFile with successful records.", ioe);
        }
    } else {
        // Wait for all the records to finish processing
        while (recordCount != (droppedFlowFileCount.get() + failedFlowFileCount.get() + successfulFlowFileCount.get() + recordWriteErrors.get())) {
            Thread.yield();
        }
        try {
            droppedRecordWriter.finishRecordSet();
            droppedRecordWriter.close();
        } catch (IOException ioe) {
            log.error("Error closing FlowFile with dropped records: {}", new Object[] { ioe.getMessage() }, ioe);
            session.rollback();
            throw new ProcessException(ioe);
        }
        if (droppedFlowFileCount.get() > 0) {
            droppedFlowFile = session.putAttribute(droppedFlowFile, RECORD_COUNT, Integer.toString(droppedFlowFileCount.get()));
            session.transfer(droppedFlowFile, REL_DROPPED);
        } else {
            session.remove(droppedFlowFile);
        }
        try {
            failedRecordWriter.finishRecordSet();
            failedRecordWriter.close();
        } catch (IOException ioe) {
            log.error("Error closing FlowFile with failed records: {}", new Object[] { ioe.getMessage() }, ioe);
            session.rollback();
            throw new ProcessException(ioe);
        }
        if (failedFlowFileCount.get() > 0) {
            failedFlowFile = session.putAttribute(failedFlowFile, RECORD_COUNT, Integer.toString(failedFlowFileCount.get()));
            session.transfer(failedFlowFile, REL_FAILURE);
        } else {
            session.remove(failedFlowFile);
        }
        try {
            successfulRecordWriter.finishRecordSet();
            successfulRecordWriter.close();
        } catch (IOException ioe) {
            log.error("Error closing FlowFile with successful records: {}", new Object[] { ioe.getMessage() }, ioe);
            session.rollback();
            throw new ProcessException(ioe);
        }
        if (successfulFlowFileCount.get() > 0) {
            successfulFlowFile = session.putAttribute(successfulFlowFile, RECORD_COUNT, Integer.toString(successfulFlowFileCount.get()));
            session.transfer(successfulFlowFile, REL_SUCCESS);
            session.getProvenanceReporter().send(successfulFlowFile, tranquilityController.getTransitUri());
        } else {
            session.remove(successfulFlowFile);
        }
        session.remove(flowFile);
    }
    session.commit();
}
Also used : MessageDroppedException(com.metamx.tranquility.tranquilizer.MessageDroppedException) OutputStream(java.io.OutputStream) RecordReader(org.apache.nifi.serialization.RecordReader) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) DruidTranquilityService(org.apache.nifi.controller.api.druid.DruidTranquilityService) Record(org.apache.nifi.serialization.record.Record) BoxedUnit(scala.runtime.BoxedUnit) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) MalformedRecordException(org.apache.nifi.serialization.MalformedRecordException) ProcessException(org.apache.nifi.processor.exception.ProcessException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) Map(java.util.Map)

Example 67 with FlowFile

use of org.apache.nifi.flowfile.FlowFile in project nifi by apache.

the class FetchElasticsearch5 method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    synchronized (esClient) {
        if (esClient.get() == null) {
            super.setup(context);
        }
    }
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final String index = context.getProperty(INDEX).evaluateAttributeExpressions(flowFile).getValue();
    final String docId = context.getProperty(DOC_ID).evaluateAttributeExpressions(flowFile).getValue();
    final String docType = context.getProperty(TYPE).evaluateAttributeExpressions(flowFile).getValue();
    final Charset charset = Charset.forName(context.getProperty(CHARSET).evaluateAttributeExpressions(flowFile).getValue());
    final ComponentLog logger = getLogger();
    try {
        logger.debug("Fetching {}/{}/{} from Elasticsearch", new Object[] { index, docType, docId });
        GetRequestBuilder getRequestBuilder = esClient.get().prepareGet(index, docType, docId);
        final GetResponse getResponse = getRequestBuilder.execute().actionGet();
        if (getResponse == null || !getResponse.isExists()) {
            logger.debug("Failed to read {}/{}/{} from Elasticsearch: Document not found", new Object[] { index, docType, docId });
            // We couldn't find the document, so penalize it and send it to "not found"
            flowFile = session.penalize(flowFile);
            session.transfer(flowFile, REL_NOT_FOUND);
        } else {
            flowFile = session.putAllAttributes(flowFile, new HashMap<String, String>() {

                {
                    put("filename", docId);
                    put("es.index", index);
                    put("es.type", docType);
                }
            });
            flowFile = session.write(flowFile, new OutputStreamCallback() {

                @Override
                public void process(OutputStream out) throws IOException {
                    out.write(getResponse.getSourceAsString().getBytes(charset));
                }
            });
            logger.debug("Elasticsearch document " + docId + " fetched, routing to success");
            // The document is JSON, so update the MIME type of the flow file
            flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), "application/json");
            session.getProvenanceReporter().fetch(flowFile, getResponse.remoteAddress().getAddress());
            session.transfer(flowFile, REL_SUCCESS);
        }
    } catch (NoNodeAvailableException | ElasticsearchTimeoutException | ReceiveTimeoutTransportException | NodeClosedException exceptionToRetry) {
        logger.error("Failed to read into Elasticsearch due to {}, this may indicate an error in configuration " + "(hosts, username/password, etc.), or this issue may be transient. Routing to retry", new Object[] { exceptionToRetry.getLocalizedMessage() }, exceptionToRetry);
        session.transfer(flowFile, REL_RETRY);
        context.yield();
    } catch (Exception e) {
        logger.error("Failed to read {} from Elasticsearch due to {}", new Object[] { flowFile, e.getLocalizedMessage() }, e);
        session.transfer(flowFile, REL_FAILURE);
        context.yield();
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) HashMap(java.util.HashMap) OutputStream(java.io.OutputStream) Charset(java.nio.charset.Charset) NoNodeAvailableException(org.elasticsearch.client.transport.NoNodeAvailableException) ComponentLog(org.apache.nifi.logging.ComponentLog) GetResponse(org.elasticsearch.action.get.GetResponse) NodeClosedException(org.elasticsearch.node.NodeClosedException) ProcessException(org.apache.nifi.processor.exception.ProcessException) ElasticsearchTimeoutException(org.elasticsearch.ElasticsearchTimeoutException) ReceiveTimeoutTransportException(org.elasticsearch.transport.ReceiveTimeoutTransportException) NoNodeAvailableException(org.elasticsearch.client.transport.NoNodeAvailableException) IOException(java.io.IOException) ReceiveTimeoutTransportException(org.elasticsearch.transport.ReceiveTimeoutTransportException) ElasticsearchTimeoutException(org.elasticsearch.ElasticsearchTimeoutException) NodeClosedException(org.elasticsearch.node.NodeClosedException) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback) GetRequestBuilder(org.elasticsearch.action.get.GetRequestBuilder)

Example 68 with FlowFile

use of org.apache.nifi.flowfile.FlowFile in project nifi by apache.

the class PutElasticsearch5 method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    synchronized (esClient) {
        if (esClient.get() == null) {
            super.setup(context);
        }
    }
    final String id_attribute = context.getProperty(ID_ATTRIBUTE).getValue();
    final int batchSize = context.getProperty(BATCH_SIZE).evaluateAttributeExpressions().asInteger();
    final List<FlowFile> flowFiles = session.get(batchSize);
    if (flowFiles.isEmpty()) {
        return;
    }
    final ComponentLog logger = getLogger();
    // Keep track of the list of flow files that need to be transferred. As they are transferred, remove them from the list.
    List<FlowFile> flowFilesToTransfer = new LinkedList<>(flowFiles);
    try {
        final BulkRequestBuilder bulk = esClient.get().prepareBulk();
        for (FlowFile file : flowFiles) {
            final String index = context.getProperty(INDEX).evaluateAttributeExpressions(file).getValue();
            final String docType = context.getProperty(TYPE).evaluateAttributeExpressions(file).getValue();
            final String indexOp = context.getProperty(INDEX_OP).evaluateAttributeExpressions(file).getValue();
            final Charset charset = Charset.forName(context.getProperty(CHARSET).evaluateAttributeExpressions(file).getValue());
            final String id = file.getAttribute(id_attribute);
            if (id == null) {
                logger.warn("No value in identifier attribute {} for {}, transferring to failure", new Object[] { id_attribute, file });
                flowFilesToTransfer.remove(file);
                session.transfer(file, REL_FAILURE);
            } else {
                session.read(file, new InputStreamCallback() {

                    @Override
                    public void process(final InputStream in) throws IOException {
                        // For the bulk insert, each document has to be on its own line, so remove all CRLF
                        String json = IOUtils.toString(in, charset).replace("\r\n", " ").replace('\n', ' ').replace('\r', ' ');
                        if (indexOp.equalsIgnoreCase("index")) {
                            bulk.add(esClient.get().prepareIndex(index, docType, id).setSource(json.getBytes(charset)));
                        } else if (indexOp.equalsIgnoreCase("upsert")) {
                            bulk.add(esClient.get().prepareUpdate(index, docType, id).setDoc(json.getBytes(charset)).setDocAsUpsert(true));
                        } else if (indexOp.equalsIgnoreCase("update")) {
                            bulk.add(esClient.get().prepareUpdate(index, docType, id).setDoc(json.getBytes(charset)));
                        } else {
                            throw new IOException("Index operation: " + indexOp + " not supported.");
                        }
                    }
                });
            }
        }
        if (bulk.numberOfActions() > 0) {
            final BulkResponse response = bulk.execute().actionGet();
            if (response.hasFailures()) {
                // Responses are guaranteed to be in order, remove them in reverse order
                BulkItemResponse[] responses = response.getItems();
                if (responses != null && responses.length > 0) {
                    for (int i = responses.length - 1; i >= 0; i--) {
                        final BulkItemResponse item = responses[i];
                        final FlowFile flowFile = flowFilesToTransfer.get(item.getItemId());
                        if (item.isFailed()) {
                            logger.warn("Failed to insert {} into Elasticsearch due to {}, transferring to failure", new Object[] { flowFile, item.getFailure().getMessage() });
                            session.transfer(flowFile, REL_FAILURE);
                        } else {
                            session.getProvenanceReporter().send(flowFile, response.remoteAddress().getAddress());
                            session.transfer(flowFile, REL_SUCCESS);
                        }
                        flowFilesToTransfer.remove(flowFile);
                    }
                }
            }
            // Transfer any remaining flowfiles to success
            for (FlowFile ff : flowFilesToTransfer) {
                session.getProvenanceReporter().send(ff, response.remoteAddress().getAddress());
                session.transfer(ff, REL_SUCCESS);
            }
        }
    } catch (NoNodeAvailableException | ElasticsearchTimeoutException | ReceiveTimeoutTransportException | NodeClosedException exceptionToRetry) {
        // Authorization errors and other problems are often returned as NoNodeAvailableExceptions without a
        // traceable cause. However the cause seems to be logged, just not available to this caught exception.
        // Since the error message will show up as a bulletin, we make specific mention to check the logs for
        // more details.
        logger.error("Failed to insert into Elasticsearch due to {}. More detailed information may be available in " + "the NiFi logs.", new Object[] { exceptionToRetry.getLocalizedMessage() }, exceptionToRetry);
        session.transfer(flowFilesToTransfer, REL_RETRY);
        context.yield();
    } catch (Exception exceptionToFail) {
        logger.error("Failed to insert into Elasticsearch due to {}, transferring to failure", new Object[] { exceptionToFail.getLocalizedMessage() }, exceptionToFail);
        session.transfer(flowFilesToTransfer, REL_FAILURE);
        context.yield();
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) Charset(java.nio.charset.Charset) BulkItemResponse(org.elasticsearch.action.bulk.BulkItemResponse) BulkResponse(org.elasticsearch.action.bulk.BulkResponse) IOException(java.io.IOException) NoNodeAvailableException(org.elasticsearch.client.transport.NoNodeAvailableException) ComponentLog(org.apache.nifi.logging.ComponentLog) LinkedList(java.util.LinkedList) NodeClosedException(org.elasticsearch.node.NodeClosedException) ProcessException(org.apache.nifi.processor.exception.ProcessException) ElasticsearchTimeoutException(org.elasticsearch.ElasticsearchTimeoutException) ReceiveTimeoutTransportException(org.elasticsearch.transport.ReceiveTimeoutTransportException) NoNodeAvailableException(org.elasticsearch.client.transport.NoNodeAvailableException) IOException(java.io.IOException) ReceiveTimeoutTransportException(org.elasticsearch.transport.ReceiveTimeoutTransportException) ElasticsearchTimeoutException(org.elasticsearch.ElasticsearchTimeoutException) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) NodeClosedException(org.elasticsearch.node.NodeClosedException) BulkRequestBuilder(org.elasticsearch.action.bulk.BulkRequestBuilder)

Example 69 with FlowFile

use of org.apache.nifi.flowfile.FlowFile in project nifi by apache.

the class FetchElasticsearch method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final String index = context.getProperty(INDEX).evaluateAttributeExpressions(flowFile).getValue();
    final String docId = context.getProperty(DOC_ID).evaluateAttributeExpressions(flowFile).getValue();
    final String docType = context.getProperty(TYPE).evaluateAttributeExpressions(flowFile).getValue();
    final Charset charset = Charset.forName(context.getProperty(CHARSET).evaluateAttributeExpressions(flowFile).getValue());
    final ComponentLog logger = getLogger();
    try {
        logger.debug("Fetching {}/{}/{} from Elasticsearch", new Object[] { index, docType, docId });
        final long startNanos = System.nanoTime();
        GetRequestBuilder getRequestBuilder = esClient.get().prepareGet(index, docType, docId);
        if (authToken != null) {
            getRequestBuilder.putHeader("Authorization", authToken);
        }
        final GetResponse getResponse = getRequestBuilder.execute().actionGet();
        if (getResponse == null || !getResponse.isExists()) {
            logger.debug("Failed to read {}/{}/{} from Elasticsearch: Document not found", new Object[] { index, docType, docId });
            // We couldn't find the document, so penalize it and send it to "not found"
            flowFile = session.penalize(flowFile);
            session.transfer(flowFile, REL_NOT_FOUND);
        } else {
            flowFile = session.putAttribute(flowFile, "filename", docId);
            flowFile = session.putAttribute(flowFile, "es.index", index);
            flowFile = session.putAttribute(flowFile, "es.type", docType);
            flowFile = session.write(flowFile, new OutputStreamCallback() {

                @Override
                public void process(OutputStream out) throws IOException {
                    out.write(getResponse.getSourceAsString().getBytes(charset));
                }
            });
            logger.debug("Elasticsearch document " + docId + " fetched, routing to success");
            final long millis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos);
            final String uri = context.getProperty(HOSTS).evaluateAttributeExpressions().getValue() + "/" + index + "/" + docType + "/" + docId;
            session.getProvenanceReporter().fetch(flowFile, uri, millis);
            session.transfer(flowFile, REL_SUCCESS);
        }
    } catch (NoNodeAvailableException | ElasticsearchTimeoutException | ReceiveTimeoutTransportException | NodeClosedException exceptionToRetry) {
        logger.error("Failed to read into Elasticsearch due to {}, this may indicate an error in configuration " + "(hosts, username/password, etc.). Routing to retry", new Object[] { exceptionToRetry.getLocalizedMessage() }, exceptionToRetry);
        session.transfer(flowFile, REL_RETRY);
        context.yield();
    } catch (Exception e) {
        logger.error("Failed to read {} from Elasticsearch due to {}", new Object[] { flowFile, e.getLocalizedMessage() }, e);
        session.transfer(flowFile, REL_FAILURE);
        context.yield();
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) OutputStream(java.io.OutputStream) Charset(java.nio.charset.Charset) NoNodeAvailableException(org.elasticsearch.client.transport.NoNodeAvailableException) ComponentLog(org.apache.nifi.logging.ComponentLog) GetResponse(org.elasticsearch.action.get.GetResponse) NodeClosedException(org.elasticsearch.node.NodeClosedException) ProcessException(org.apache.nifi.processor.exception.ProcessException) ElasticsearchTimeoutException(org.elasticsearch.ElasticsearchTimeoutException) ReceiveTimeoutTransportException(org.elasticsearch.transport.ReceiveTimeoutTransportException) NoNodeAvailableException(org.elasticsearch.client.transport.NoNodeAvailableException) IOException(java.io.IOException) ReceiveTimeoutTransportException(org.elasticsearch.transport.ReceiveTimeoutTransportException) ElasticsearchTimeoutException(org.elasticsearch.ElasticsearchTimeoutException) NodeClosedException(org.elasticsearch.node.NodeClosedException) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback) GetRequestBuilder(org.elasticsearch.action.get.GetRequestBuilder)

Example 70 with FlowFile

use of org.apache.nifi.flowfile.FlowFile in project nifi by apache.

the class FetchElasticsearchHttp method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = null;
    if (context.hasIncomingConnection()) {
        flowFile = session.get();
        // we know that we should run only if we have a FlowFile.
        if (flowFile == null && context.hasNonLoopConnection()) {
            return;
        }
    }
    OkHttpClient okHttpClient = getClient();
    if (flowFile == null) {
        flowFile = session.create();
    }
    final String index = context.getProperty(INDEX).evaluateAttributeExpressions(flowFile).getValue();
    final String docId = context.getProperty(DOC_ID).evaluateAttributeExpressions(flowFile).getValue();
    final String docType = context.getProperty(TYPE).evaluateAttributeExpressions(flowFile).getValue();
    final String fields = context.getProperty(FIELDS).isSet() ? context.getProperty(FIELDS).evaluateAttributeExpressions(flowFile).getValue() : null;
    // Authentication
    final String username = context.getProperty(USERNAME).evaluateAttributeExpressions(flowFile).getValue();
    final String password = context.getProperty(PASSWORD).evaluateAttributeExpressions().getValue();
    final ComponentLog logger = getLogger();
    Response getResponse = null;
    try {
        logger.debug("Fetching {}/{}/{} from Elasticsearch", new Object[] { index, docType, docId });
        // read the url property from the context
        final String urlstr = StringUtils.trimToEmpty(context.getProperty(ES_URL).evaluateAttributeExpressions().getValue());
        final URL url = buildRequestURL(urlstr, docId, index, docType, fields, context);
        final long startNanos = System.nanoTime();
        getResponse = sendRequestToElasticsearch(okHttpClient, url, username, password, "GET", null);
        final int statusCode = getResponse.code();
        if (isSuccess(statusCode)) {
            ResponseBody body = getResponse.body();
            final byte[] bodyBytes = body.bytes();
            JsonNode responseJson = parseJsonResponse(new ByteArrayInputStream(bodyBytes));
            boolean found = responseJson.get("found").asBoolean(false);
            String retrievedIndex = responseJson.get("_index").asText();
            String retrievedType = responseJson.get("_type").asText();
            String retrievedId = responseJson.get("_id").asText();
            if (found) {
                JsonNode source = responseJson.get("_source");
                flowFile = session.putAttribute(flowFile, "filename", retrievedId);
                flowFile = session.putAttribute(flowFile, "es.index", retrievedIndex);
                flowFile = session.putAttribute(flowFile, "es.type", retrievedType);
                if (source != null) {
                    flowFile = session.write(flowFile, out -> {
                        out.write(source.toString().getBytes());
                    });
                }
                logger.debug("Elasticsearch document " + retrievedId + " fetched, routing to success");
                // emit provenance event
                final long millis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos);
                if (context.hasNonLoopConnection()) {
                    session.getProvenanceReporter().fetch(flowFile, url.toExternalForm(), millis);
                } else {
                    session.getProvenanceReporter().receive(flowFile, url.toExternalForm(), millis);
                }
                session.transfer(flowFile, REL_SUCCESS);
            } else {
                logger.debug("Failed to read {}/{}/{} from Elasticsearch: Document not found", new Object[] { index, docType, docId });
                // We couldn't find the document, so send it to "not found"
                session.transfer(flowFile, REL_NOT_FOUND);
            }
        } else {
            if (statusCode == 404) {
                logger.warn("Failed to read {}/{}/{} from Elasticsearch: Document not found", new Object[] { index, docType, docId });
                // We couldn't find the document, so penalize it and send it to "not found"
                session.transfer(flowFile, REL_NOT_FOUND);
            } else {
                // 5xx -> RETRY, but a server error might last a while, so yield
                if (statusCode / 100 == 5) {
                    logger.warn("Elasticsearch returned code {} with message {}, transferring flow file to retry. This is likely a server problem, yielding...", new Object[] { statusCode, getResponse.message() });
                    session.transfer(flowFile, REL_RETRY);
                    context.yield();
                } else if (context.hasIncomingConnection()) {
                    // 1xx, 3xx, 4xx -> NO RETRY
                    logger.warn("Elasticsearch returned code {} with message {}, transferring flow file to failure", new Object[] { statusCode, getResponse.message() });
                    session.transfer(flowFile, REL_FAILURE);
                } else {
                    logger.warn("Elasticsearch returned code {} with message {}", new Object[] { statusCode, getResponse.message() });
                    session.remove(flowFile);
                }
            }
        }
    } catch (IOException ioe) {
        logger.error("Failed to read from Elasticsearch due to {}, this may indicate an error in configuration " + "(hosts, username/password, etc.). Routing to retry", new Object[] { ioe.getLocalizedMessage() }, ioe);
        if (context.hasIncomingConnection()) {
            session.transfer(flowFile, REL_RETRY);
        } else {
            session.remove(flowFile);
        }
        context.yield();
    } catch (Exception e) {
        logger.error("Failed to read {} from Elasticsearch due to {}", new Object[] { flowFile, e.getLocalizedMessage() }, e);
        if (context.hasIncomingConnection()) {
            session.transfer(flowFile, REL_FAILURE);
        } else {
            session.remove(flowFile);
        }
        context.yield();
    } finally {
        if (getResponse != null) {
            getResponse.close();
        }
    }
}
Also used : StandardValidators(org.apache.nifi.processor.util.StandardValidators) CapabilityDescription(org.apache.nifi.annotation.documentation.CapabilityDescription) URL(java.net.URL) EventDriven(org.apache.nifi.annotation.behavior.EventDriven) ComponentLog(org.apache.nifi.logging.ComponentLog) StringUtils(org.apache.commons.lang3.StringUtils) PropertyDescriptor(org.apache.nifi.components.PropertyDescriptor) ProcessException(org.apache.nifi.processor.exception.ProcessException) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) ByteArrayInputStream(java.io.ByteArrayInputStream) WritesAttributes(org.apache.nifi.annotation.behavior.WritesAttributes) Relationship(org.apache.nifi.processor.Relationship) Map(java.util.Map) Response(okhttp3.Response) JsonNode(com.fasterxml.jackson.databind.JsonNode) ResponseBody(okhttp3.ResponseBody) FlowFile(org.apache.nifi.flowfile.FlowFile) MalformedURLException(java.net.MalformedURLException) ProcessContext(org.apache.nifi.processor.ProcessContext) Set(java.util.Set) ProcessSession(org.apache.nifi.processor.ProcessSession) IOException(java.io.IOException) WritesAttribute(org.apache.nifi.annotation.behavior.WritesAttribute) Collectors(java.util.stream.Collectors) TimeUnit(java.util.concurrent.TimeUnit) InputRequirement(org.apache.nifi.annotation.behavior.InputRequirement) OnScheduled(org.apache.nifi.annotation.lifecycle.OnScheduled) List(java.util.List) OkHttpClient(okhttp3.OkHttpClient) DynamicProperty(org.apache.nifi.annotation.behavior.DynamicProperty) Stream(java.util.stream.Stream) SupportsBatching(org.apache.nifi.annotation.behavior.SupportsBatching) Tags(org.apache.nifi.annotation.documentation.Tags) HttpUrl(okhttp3.HttpUrl) Collections(java.util.Collections) FlowFile(org.apache.nifi.flowfile.FlowFile) OkHttpClient(okhttp3.OkHttpClient) JsonNode(com.fasterxml.jackson.databind.JsonNode) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) URL(java.net.URL) ProcessException(org.apache.nifi.processor.exception.ProcessException) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException) ResponseBody(okhttp3.ResponseBody) Response(okhttp3.Response) ByteArrayInputStream(java.io.ByteArrayInputStream)

Aggregations

FlowFile (org.apache.nifi.flowfile.FlowFile)500 IOException (java.io.IOException)236 ProcessException (org.apache.nifi.processor.exception.ProcessException)193 HashMap (java.util.HashMap)160 InputStream (java.io.InputStream)145 OutputStream (java.io.OutputStream)131 ComponentLog (org.apache.nifi.logging.ComponentLog)119 Test (org.junit.Test)116 ArrayList (java.util.ArrayList)113 Map (java.util.Map)105 MockFlowFile (org.apache.nifi.util.MockFlowFile)103 ProcessSession (org.apache.nifi.processor.ProcessSession)99 OutputStreamCallback (org.apache.nifi.processor.io.OutputStreamCallback)83 Relationship (org.apache.nifi.processor.Relationship)78 InputStreamCallback (org.apache.nifi.processor.io.InputStreamCallback)78 HashSet (java.util.HashSet)75 List (java.util.List)67 StopWatch (org.apache.nifi.util.StopWatch)59 Set (java.util.Set)56 PropertyDescriptor (org.apache.nifi.components.PropertyDescriptor)55