Search in sources :

Example 11 with BulkItemResponse

use of org.graylog.shaded.elasticsearch7.org.elasticsearch.action.bulk.BulkItemResponse in project nifi by apache.

the class PutElasticsearch5 method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    synchronized (esClient) {
        if (esClient.get() == null) {
            super.setup(context);
        }
    }
    final String id_attribute = context.getProperty(ID_ATTRIBUTE).getValue();
    final int batchSize = context.getProperty(BATCH_SIZE).evaluateAttributeExpressions().asInteger();
    final List<FlowFile> flowFiles = session.get(batchSize);
    if (flowFiles.isEmpty()) {
        return;
    }
    final ComponentLog logger = getLogger();
    // Keep track of the list of flow files that need to be transferred. As they are transferred, remove them from the list.
    List<FlowFile> flowFilesToTransfer = new LinkedList<>(flowFiles);
    try {
        final BulkRequestBuilder bulk = esClient.get().prepareBulk();
        for (FlowFile file : flowFiles) {
            final String index = context.getProperty(INDEX).evaluateAttributeExpressions(file).getValue();
            final String docType = context.getProperty(TYPE).evaluateAttributeExpressions(file).getValue();
            final String indexOp = context.getProperty(INDEX_OP).evaluateAttributeExpressions(file).getValue();
            final Charset charset = Charset.forName(context.getProperty(CHARSET).evaluateAttributeExpressions(file).getValue());
            final String id = file.getAttribute(id_attribute);
            if (id == null) {
                logger.warn("No value in identifier attribute {} for {}, transferring to failure", new Object[] { id_attribute, file });
                flowFilesToTransfer.remove(file);
                session.transfer(file, REL_FAILURE);
            } else {
                session.read(file, new InputStreamCallback() {

                    @Override
                    public void process(final InputStream in) throws IOException {
                        // For the bulk insert, each document has to be on its own line, so remove all CRLF
                        String json = IOUtils.toString(in, charset).replace("\r\n", " ").replace('\n', ' ').replace('\r', ' ');
                        if (indexOp.equalsIgnoreCase("index")) {
                            bulk.add(esClient.get().prepareIndex(index, docType, id).setSource(json.getBytes(charset)));
                        } else if (indexOp.equalsIgnoreCase("upsert")) {
                            bulk.add(esClient.get().prepareUpdate(index, docType, id).setDoc(json.getBytes(charset)).setDocAsUpsert(true));
                        } else if (indexOp.equalsIgnoreCase("update")) {
                            bulk.add(esClient.get().prepareUpdate(index, docType, id).setDoc(json.getBytes(charset)));
                        } else {
                            throw new IOException("Index operation: " + indexOp + " not supported.");
                        }
                    }
                });
            }
        }
        if (bulk.numberOfActions() > 0) {
            final BulkResponse response = bulk.execute().actionGet();
            if (response.hasFailures()) {
                // Responses are guaranteed to be in order, remove them in reverse order
                BulkItemResponse[] responses = response.getItems();
                if (responses != null && responses.length > 0) {
                    for (int i = responses.length - 1; i >= 0; i--) {
                        final BulkItemResponse item = responses[i];
                        final FlowFile flowFile = flowFilesToTransfer.get(item.getItemId());
                        if (item.isFailed()) {
                            logger.warn("Failed to insert {} into Elasticsearch due to {}, transferring to failure", new Object[] { flowFile, item.getFailure().getMessage() });
                            session.transfer(flowFile, REL_FAILURE);
                        } else {
                            session.getProvenanceReporter().send(flowFile, response.remoteAddress().getAddress());
                            session.transfer(flowFile, REL_SUCCESS);
                        }
                        flowFilesToTransfer.remove(flowFile);
                    }
                }
            }
            // Transfer any remaining flowfiles to success
            for (FlowFile ff : flowFilesToTransfer) {
                session.getProvenanceReporter().send(ff, response.remoteAddress().getAddress());
                session.transfer(ff, REL_SUCCESS);
            }
        }
    } catch (NoNodeAvailableException | ElasticsearchTimeoutException | ReceiveTimeoutTransportException | NodeClosedException exceptionToRetry) {
        // Authorization errors and other problems are often returned as NoNodeAvailableExceptions without a
        // traceable cause. However the cause seems to be logged, just not available to this caught exception.
        // Since the error message will show up as a bulletin, we make specific mention to check the logs for
        // more details.
        logger.error("Failed to insert into Elasticsearch due to {}. More detailed information may be available in " + "the NiFi logs.", new Object[] { exceptionToRetry.getLocalizedMessage() }, exceptionToRetry);
        session.transfer(flowFilesToTransfer, REL_RETRY);
        context.yield();
    } catch (Exception exceptionToFail) {
        logger.error("Failed to insert into Elasticsearch due to {}, transferring to failure", new Object[] { exceptionToFail.getLocalizedMessage() }, exceptionToFail);
        session.transfer(flowFilesToTransfer, REL_FAILURE);
        context.yield();
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) Charset(java.nio.charset.Charset) BulkItemResponse(org.elasticsearch.action.bulk.BulkItemResponse) BulkResponse(org.elasticsearch.action.bulk.BulkResponse) IOException(java.io.IOException) NoNodeAvailableException(org.elasticsearch.client.transport.NoNodeAvailableException) ComponentLog(org.apache.nifi.logging.ComponentLog) LinkedList(java.util.LinkedList) NodeClosedException(org.elasticsearch.node.NodeClosedException) ProcessException(org.apache.nifi.processor.exception.ProcessException) ElasticsearchTimeoutException(org.elasticsearch.ElasticsearchTimeoutException) ReceiveTimeoutTransportException(org.elasticsearch.transport.ReceiveTimeoutTransportException) NoNodeAvailableException(org.elasticsearch.client.transport.NoNodeAvailableException) IOException(java.io.IOException) ReceiveTimeoutTransportException(org.elasticsearch.transport.ReceiveTimeoutTransportException) ElasticsearchTimeoutException(org.elasticsearch.ElasticsearchTimeoutException) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) NodeClosedException(org.elasticsearch.node.NodeClosedException) BulkRequestBuilder(org.elasticsearch.action.bulk.BulkRequestBuilder)

Example 12 with BulkItemResponse

use of org.graylog.shaded.elasticsearch7.org.elasticsearch.action.bulk.BulkItemResponse in project nifi by apache.

the class PutElasticsearch method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final ComponentLog logger = getLogger();
    final String id_attribute = context.getProperty(ID_ATTRIBUTE).getValue();
    final int batchSize = context.getProperty(BATCH_SIZE).evaluateAttributeExpressions().asInteger();
    final List<FlowFile> flowFiles = session.get(batchSize);
    if (flowFiles.isEmpty()) {
        return;
    }
    // Keep track of the list of flow files that need to be transferred. As they are transferred, remove them from the list.
    List<FlowFile> flowFilesToTransfer = new LinkedList<>(flowFiles);
    try {
        final BulkRequestBuilder bulk = esClient.get().prepareBulk();
        if (authToken != null) {
            bulk.putHeader("Authorization", authToken);
        }
        for (FlowFile file : flowFiles) {
            final String index = context.getProperty(INDEX).evaluateAttributeExpressions(file).getValue();
            final String docType = context.getProperty(TYPE).evaluateAttributeExpressions(file).getValue();
            final String indexOp = context.getProperty(INDEX_OP).evaluateAttributeExpressions(file).getValue();
            final Charset charset = Charset.forName(context.getProperty(CHARSET).evaluateAttributeExpressions(file).getValue());
            final String id = file.getAttribute(id_attribute);
            if (id == null) {
                logger.error("No value in identifier attribute {} for {}, transferring to failure", new Object[] { id_attribute, file });
                flowFilesToTransfer.remove(file);
                session.transfer(file, REL_FAILURE);
            } else {
                session.read(file, new InputStreamCallback() {

                    @Override
                    public void process(final InputStream in) throws IOException {
                        String json = IOUtils.toString(in, charset).replace("\r\n", " ").replace('\n', ' ').replace('\r', ' ');
                        if (indexOp.equalsIgnoreCase("index")) {
                            bulk.add(esClient.get().prepareIndex(index, docType, id).setSource(json.getBytes(charset)));
                        } else if (indexOp.equalsIgnoreCase("upsert")) {
                            bulk.add(esClient.get().prepareUpdate(index, docType, id).setDoc(json.getBytes(charset)).setDocAsUpsert(true));
                        } else if (indexOp.equalsIgnoreCase("update")) {
                            bulk.add(esClient.get().prepareUpdate(index, docType, id).setDoc(json.getBytes(charset)));
                        } else {
                            throw new IOException("Index operation: " + indexOp + " not supported.");
                        }
                    }
                });
            }
        }
        final BulkResponse response = bulk.execute().actionGet();
        if (response.hasFailures()) {
            // Responses are guaranteed to be in order, remove them in reverse order
            BulkItemResponse[] responses = response.getItems();
            if (responses != null && responses.length > 0) {
                for (int i = responses.length - 1; i >= 0; i--) {
                    final FlowFile flowFile = flowFilesToTransfer.get(i);
                    if (responses[i].isFailed()) {
                        logger.error("Failed to insert {} into Elasticsearch due to {}, transferring to failure", new Object[] { flowFile, responses[i].getFailure().getMessage() });
                        session.transfer(flowFile, REL_FAILURE);
                    } else {
                        session.getProvenanceReporter().send(flowFile, context.getProperty(HOSTS).evaluateAttributeExpressions().getValue() + "/" + responses[i].getIndex());
                        session.transfer(flowFile, REL_SUCCESS);
                    }
                    flowFilesToTransfer.remove(flowFile);
                }
            }
        }
        // Transfer any remaining flowfiles to success
        flowFilesToTransfer.forEach(file -> {
            session.transfer(file, REL_SUCCESS);
            // Record provenance event
            session.getProvenanceReporter().send(file, context.getProperty(HOSTS).evaluateAttributeExpressions().getValue() + "/" + context.getProperty(INDEX).evaluateAttributeExpressions(file).getValue());
        });
    } catch (NoNodeAvailableException | ElasticsearchTimeoutException | ReceiveTimeoutTransportException | NodeClosedException exceptionToRetry) {
        // Authorization errors and other problems are often returned as NoNodeAvailableExceptions without a
        // traceable cause. However the cause seems to be logged, just not available to this caught exception.
        // Since the error message will show up as a bulletin, we make specific mention to check the logs for
        // more details.
        logger.error("Failed to insert into Elasticsearch due to {}. More detailed information may be available in " + "the NiFi logs.", new Object[] { exceptionToRetry.getLocalizedMessage() }, exceptionToRetry);
        session.transfer(flowFilesToTransfer, REL_RETRY);
        context.yield();
    } catch (Exception exceptionToFail) {
        logger.error("Failed to insert into Elasticsearch due to {}, transferring to failure", new Object[] { exceptionToFail.getLocalizedMessage() }, exceptionToFail);
        session.transfer(flowFilesToTransfer, REL_FAILURE);
        context.yield();
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) Charset(java.nio.charset.Charset) BulkItemResponse(org.elasticsearch.action.bulk.BulkItemResponse) BulkResponse(org.elasticsearch.action.bulk.BulkResponse) IOException(java.io.IOException) NoNodeAvailableException(org.elasticsearch.client.transport.NoNodeAvailableException) ComponentLog(org.apache.nifi.logging.ComponentLog) LinkedList(java.util.LinkedList) NodeClosedException(org.elasticsearch.node.NodeClosedException) ProcessException(org.apache.nifi.processor.exception.ProcessException) ElasticsearchTimeoutException(org.elasticsearch.ElasticsearchTimeoutException) ReceiveTimeoutTransportException(org.elasticsearch.transport.ReceiveTimeoutTransportException) NoNodeAvailableException(org.elasticsearch.client.transport.NoNodeAvailableException) IOException(java.io.IOException) ReceiveTimeoutTransportException(org.elasticsearch.transport.ReceiveTimeoutTransportException) ElasticsearchTimeoutException(org.elasticsearch.ElasticsearchTimeoutException) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) NodeClosedException(org.elasticsearch.node.NodeClosedException) BulkRequestBuilder(org.elasticsearch.action.bulk.BulkRequestBuilder)

Example 13 with BulkItemResponse

use of org.graylog.shaded.elasticsearch7.org.elasticsearch.action.bulk.BulkItemResponse in project metron by apache.

the class ElasticsearchWriterTest method testSingleFailure.

@Test
public void testSingleFailure() throws Exception {
    Tuple tuple1 = mock(Tuple.class);
    BulkResponse response = mock(BulkResponse.class);
    when(response.hasFailures()).thenReturn(true);
    Exception e = new IllegalStateException();
    BulkItemResponse itemResponse = buildBulkItemFailure(e);
    when(response.iterator()).thenReturn(ImmutableList.of(itemResponse).iterator());
    BulkWriterResponse expected = new BulkWriterResponse();
    expected.addError(e, tuple1);
    ElasticsearchWriter esWriter = new ElasticsearchWriter();
    BulkWriterResponse actual = esWriter.buildWriteReponse(ImmutableList.of(tuple1), response);
    assertEquals("Response should have one error and zero successes", expected, actual);
}
Also used : BulkItemResponse(org.elasticsearch.action.bulk.BulkItemResponse) BulkResponse(org.elasticsearch.action.bulk.BulkResponse) Tuple(org.apache.storm.tuple.Tuple) BulkWriterResponse(org.apache.metron.common.writer.BulkWriterResponse) Test(org.junit.Test)

Example 14 with BulkItemResponse

use of org.graylog.shaded.elasticsearch7.org.elasticsearch.action.bulk.BulkItemResponse in project metron by apache.

the class ElasticsearchWriterTest method buildBulkItemFailure.

private BulkItemResponse buildBulkItemFailure(Exception e) {
    BulkItemResponse itemResponse = mock(BulkItemResponse.class);
    when(itemResponse.isFailed()).thenReturn(true);
    BulkItemResponse.Failure failure = mock(BulkItemResponse.Failure.class);
    when(itemResponse.getFailure()).thenReturn(failure);
    when(failure.getCause()).thenReturn(e);
    return itemResponse;
}
Also used : BulkItemResponse(org.elasticsearch.action.bulk.BulkItemResponse)

Example 15 with BulkItemResponse

use of org.graylog.shaded.elasticsearch7.org.elasticsearch.action.bulk.BulkItemResponse in project nutch by apache.

the class TestElasticIndexWriter method setup.

@Before
public void setup() {
    conf = NutchConfiguration.create();
    conf.addResource("nutch-site-test.xml");
    bulkRequestSuccessful = false;
    clusterSaturated = false;
    curNumFailures = 0;
    maxNumFailures = 0;
    Settings settings = Settings.builder().build();
    ThreadPool threadPool = new ThreadPool(settings);
    // customize the ES client to simulate responses from an ES cluster
    client = new AbstractClient(settings, threadPool) {

        @Override
        public void close() {
        }

        @Override
        protected <Request extends ActionRequest, Response extends ActionResponse, RequestBuilder extends ActionRequestBuilder<Request, Response, RequestBuilder>> void doExecute(Action<Request, Response, RequestBuilder> action, Request request, ActionListener<Response> listener) {
            BulkResponse response = null;
            if (clusterSaturated) {
                // pretend the cluster is saturated
                curNumFailures++;
                if (curNumFailures >= maxNumFailures) {
                    // pretend the cluster is suddenly no longer saturated
                    clusterSaturated = false;
                }
                // respond with a failure
                BulkItemResponse failed = new BulkItemResponse(0, OpType.INDEX, new BulkItemResponse.Failure("nutch", "index", "failure0", new EsRejectedExecutionException("saturated")));
                response = new BulkResponse(new BulkItemResponse[] { failed }, 0);
            } else {
                // respond successfully
                BulkItemResponse success = new BulkItemResponse(0, OpType.INDEX, new IndexResponse(new ShardId("nutch", UUID.randomUUID().toString(), 0), "index", "index0", 0, true));
                response = new BulkResponse(new BulkItemResponse[] { success }, 0);
            }
            listener.onResponse((Response) response);
        }
    };
    // customize the plugin to signal successful bulk operations
    testIndexWriter = new ElasticIndexWriter() {

        @Override
        protected Client makeClient(Configuration conf) {
            return client;
        }

        @Override
        protected BulkProcessor.Listener bulkProcessorListener() {
            return new BulkProcessor.Listener() {

                @Override
                public void afterBulk(long executionId, BulkRequest request, BulkResponse response) {
                    if (!response.hasFailures()) {
                        bulkRequestSuccessful = true;
                    }
                }

                @Override
                public void afterBulk(long executionId, BulkRequest request, Throwable failure) {
                }

                @Override
                public void beforeBulk(long executionId, BulkRequest request) {
                }
            };
        }
    };
}
Also used : ActionListener(org.elasticsearch.action.ActionListener) ActionRequestBuilder(org.elasticsearch.action.ActionRequestBuilder) Configuration(org.apache.hadoop.conf.Configuration) NutchConfiguration(org.apache.nutch.util.NutchConfiguration) AbstractClient(org.elasticsearch.client.support.AbstractClient) ThreadPool(org.elasticsearch.threadpool.ThreadPool) ActionRequest(org.elasticsearch.action.ActionRequest) BulkRequest(org.elasticsearch.action.bulk.BulkRequest) BulkItemResponse(org.elasticsearch.action.bulk.BulkItemResponse) BulkResponse(org.elasticsearch.action.bulk.BulkResponse) IndexResponse(org.elasticsearch.action.index.IndexResponse) BulkItemResponse(org.elasticsearch.action.bulk.BulkItemResponse) ActionResponse(org.elasticsearch.action.ActionResponse) BulkResponse(org.elasticsearch.action.bulk.BulkResponse) ShardId(org.elasticsearch.index.shard.ShardId) IndexResponse(org.elasticsearch.action.index.IndexResponse) BulkProcessor(org.elasticsearch.action.bulk.BulkProcessor) BulkRequest(org.elasticsearch.action.bulk.BulkRequest) Client(org.elasticsearch.client.Client) AbstractClient(org.elasticsearch.client.support.AbstractClient) Settings(org.elasticsearch.common.settings.Settings) EsRejectedExecutionException(org.elasticsearch.common.util.concurrent.EsRejectedExecutionException) Before(org.junit.Before)

Aggregations

BulkItemResponse (org.elasticsearch.action.bulk.BulkItemResponse)48 BulkResponse (org.elasticsearch.action.bulk.BulkResponse)37 BulkRequestBuilder (org.elasticsearch.action.bulk.BulkRequestBuilder)16 BulkRequest (org.elasticsearch.action.bulk.BulkRequest)14 IOException (java.io.IOException)9 ArrayList (java.util.ArrayList)8 IndexRequest (org.elasticsearch.action.index.IndexRequest)8 ElasticsearchTimeoutException (org.elasticsearch.ElasticsearchTimeoutException)7 NoNodeAvailableException (org.elasticsearch.client.transport.NoNodeAvailableException)7 EsRejectedExecutionException (org.elasticsearch.common.util.concurrent.EsRejectedExecutionException)7 NodeClosedException (org.elasticsearch.node.NodeClosedException)7 ReceiveTimeoutTransportException (org.elasticsearch.transport.ReceiveTimeoutTransportException)7 List (java.util.List)6 DocWriteRequest (org.elasticsearch.action.DocWriteRequest)5 FailedNodeException (org.elasticsearch.action.FailedNodeException)5 DeleteRequest (org.elasticsearch.action.delete.DeleteRequest)5 IndexResponse (org.elasticsearch.action.index.IndexResponse)5 UpdateRequest (org.elasticsearch.action.update.UpdateRequest)5 TransportException (org.elasticsearch.transport.TransportException)5 BulkWriterResponse (org.apache.metron.common.writer.BulkWriterResponse)4