Search in sources :

Example 11 with OutputStreamCallback

use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.

the class FetchElasticsearch5 method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    synchronized (esClient) {
        if (esClient.get() == null) {
            super.setup(context);
        }
    }
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final String index = context.getProperty(INDEX).evaluateAttributeExpressions(flowFile).getValue();
    final String docId = context.getProperty(DOC_ID).evaluateAttributeExpressions(flowFile).getValue();
    final String docType = context.getProperty(TYPE).evaluateAttributeExpressions(flowFile).getValue();
    final Charset charset = Charset.forName(context.getProperty(CHARSET).evaluateAttributeExpressions(flowFile).getValue());
    final ComponentLog logger = getLogger();
    try {
        logger.debug("Fetching {}/{}/{} from Elasticsearch", new Object[] { index, docType, docId });
        GetRequestBuilder getRequestBuilder = esClient.get().prepareGet(index, docType, docId);
        final GetResponse getResponse = getRequestBuilder.execute().actionGet();
        if (getResponse == null || !getResponse.isExists()) {
            logger.debug("Failed to read {}/{}/{} from Elasticsearch: Document not found", new Object[] { index, docType, docId });
            // We couldn't find the document, so penalize it and send it to "not found"
            flowFile = session.penalize(flowFile);
            session.transfer(flowFile, REL_NOT_FOUND);
        } else {
            flowFile = session.putAllAttributes(flowFile, new HashMap<String, String>() {

                {
                    put("filename", docId);
                    put("es.index", index);
                    put("es.type", docType);
                }
            });
            flowFile = session.write(flowFile, new OutputStreamCallback() {

                @Override
                public void process(OutputStream out) throws IOException {
                    out.write(getResponse.getSourceAsString().getBytes(charset));
                }
            });
            logger.debug("Elasticsearch document " + docId + " fetched, routing to success");
            // The document is JSON, so update the MIME type of the flow file
            flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), "application/json");
            session.getProvenanceReporter().fetch(flowFile, getResponse.remoteAddress().getAddress());
            session.transfer(flowFile, REL_SUCCESS);
        }
    } catch (NoNodeAvailableException | ElasticsearchTimeoutException | ReceiveTimeoutTransportException | NodeClosedException exceptionToRetry) {
        logger.error("Failed to read into Elasticsearch due to {}, this may indicate an error in configuration " + "(hosts, username/password, etc.), or this issue may be transient. Routing to retry", new Object[] { exceptionToRetry.getLocalizedMessage() }, exceptionToRetry);
        session.transfer(flowFile, REL_RETRY);
        context.yield();
    } catch (Exception e) {
        logger.error("Failed to read {} from Elasticsearch due to {}", new Object[] { flowFile, e.getLocalizedMessage() }, e);
        session.transfer(flowFile, REL_FAILURE);
        context.yield();
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) HashMap(java.util.HashMap) OutputStream(java.io.OutputStream) Charset(java.nio.charset.Charset) NoNodeAvailableException(org.elasticsearch.client.transport.NoNodeAvailableException) ComponentLog(org.apache.nifi.logging.ComponentLog) GetResponse(org.elasticsearch.action.get.GetResponse) NodeClosedException(org.elasticsearch.node.NodeClosedException) ProcessException(org.apache.nifi.processor.exception.ProcessException) ElasticsearchTimeoutException(org.elasticsearch.ElasticsearchTimeoutException) ReceiveTimeoutTransportException(org.elasticsearch.transport.ReceiveTimeoutTransportException) NoNodeAvailableException(org.elasticsearch.client.transport.NoNodeAvailableException) IOException(java.io.IOException) ReceiveTimeoutTransportException(org.elasticsearch.transport.ReceiveTimeoutTransportException) ElasticsearchTimeoutException(org.elasticsearch.ElasticsearchTimeoutException) NodeClosedException(org.elasticsearch.node.NodeClosedException) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback) GetRequestBuilder(org.elasticsearch.action.get.GetRequestBuilder)

Example 12 with OutputStreamCallback

use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.

the class FetchElasticsearch method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final String index = context.getProperty(INDEX).evaluateAttributeExpressions(flowFile).getValue();
    final String docId = context.getProperty(DOC_ID).evaluateAttributeExpressions(flowFile).getValue();
    final String docType = context.getProperty(TYPE).evaluateAttributeExpressions(flowFile).getValue();
    final Charset charset = Charset.forName(context.getProperty(CHARSET).evaluateAttributeExpressions(flowFile).getValue());
    final ComponentLog logger = getLogger();
    try {
        logger.debug("Fetching {}/{}/{} from Elasticsearch", new Object[] { index, docType, docId });
        final long startNanos = System.nanoTime();
        GetRequestBuilder getRequestBuilder = esClient.get().prepareGet(index, docType, docId);
        if (authToken != null) {
            getRequestBuilder.putHeader("Authorization", authToken);
        }
        final GetResponse getResponse = getRequestBuilder.execute().actionGet();
        if (getResponse == null || !getResponse.isExists()) {
            logger.debug("Failed to read {}/{}/{} from Elasticsearch: Document not found", new Object[] { index, docType, docId });
            // We couldn't find the document, so penalize it and send it to "not found"
            flowFile = session.penalize(flowFile);
            session.transfer(flowFile, REL_NOT_FOUND);
        } else {
            flowFile = session.putAttribute(flowFile, "filename", docId);
            flowFile = session.putAttribute(flowFile, "es.index", index);
            flowFile = session.putAttribute(flowFile, "es.type", docType);
            flowFile = session.write(flowFile, new OutputStreamCallback() {

                @Override
                public void process(OutputStream out) throws IOException {
                    out.write(getResponse.getSourceAsString().getBytes(charset));
                }
            });
            logger.debug("Elasticsearch document " + docId + " fetched, routing to success");
            final long millis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos);
            final String uri = context.getProperty(HOSTS).evaluateAttributeExpressions().getValue() + "/" + index + "/" + docType + "/" + docId;
            session.getProvenanceReporter().fetch(flowFile, uri, millis);
            session.transfer(flowFile, REL_SUCCESS);
        }
    } catch (NoNodeAvailableException | ElasticsearchTimeoutException | ReceiveTimeoutTransportException | NodeClosedException exceptionToRetry) {
        logger.error("Failed to read into Elasticsearch due to {}, this may indicate an error in configuration " + "(hosts, username/password, etc.). Routing to retry", new Object[] { exceptionToRetry.getLocalizedMessage() }, exceptionToRetry);
        session.transfer(flowFile, REL_RETRY);
        context.yield();
    } catch (Exception e) {
        logger.error("Failed to read {} from Elasticsearch due to {}", new Object[] { flowFile, e.getLocalizedMessage() }, e);
        session.transfer(flowFile, REL_FAILURE);
        context.yield();
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) OutputStream(java.io.OutputStream) Charset(java.nio.charset.Charset) NoNodeAvailableException(org.elasticsearch.client.transport.NoNodeAvailableException) ComponentLog(org.apache.nifi.logging.ComponentLog) GetResponse(org.elasticsearch.action.get.GetResponse) NodeClosedException(org.elasticsearch.node.NodeClosedException) ProcessException(org.apache.nifi.processor.exception.ProcessException) ElasticsearchTimeoutException(org.elasticsearch.ElasticsearchTimeoutException) ReceiveTimeoutTransportException(org.elasticsearch.transport.ReceiveTimeoutTransportException) NoNodeAvailableException(org.elasticsearch.client.transport.NoNodeAvailableException) IOException(java.io.IOException) ReceiveTimeoutTransportException(org.elasticsearch.transport.ReceiveTimeoutTransportException) ElasticsearchTimeoutException(org.elasticsearch.ElasticsearchTimeoutException) NodeClosedException(org.elasticsearch.node.NodeClosedException) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback) GetRequestBuilder(org.elasticsearch.action.get.GetRequestBuilder)

Example 13 with OutputStreamCallback

use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.

the class ExtractEmailAttachments method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    final ComponentLog logger = getLogger();
    final FlowFile originalFlowFile = session.get();
    if (originalFlowFile == null) {
        return;
    }
    final List<FlowFile> attachmentsList = new ArrayList<>();
    final List<FlowFile> invalidFlowFilesList = new ArrayList<>();
    final List<FlowFile> originalFlowFilesList = new ArrayList<>();
    final String requireStrictAddresses = "false";
    session.read(originalFlowFile, new InputStreamCallback() {

        @Override
        public void process(final InputStream rawIn) throws IOException {
            try (final InputStream in = new BufferedInputStream(rawIn)) {
                Properties props = new Properties();
                props.put("mail.mime.address.strict", requireStrictAddresses);
                Session mailSession = Session.getInstance(props);
                MimeMessage originalMessage = new MimeMessage(mailSession, in);
                MimeMessageParser parser = new MimeMessageParser(originalMessage).parse();
                // RFC-2822 determines that a message must have a "From:" header
                // if a message lacks the field, it is flagged as invalid
                Address[] from = originalMessage.getFrom();
                if (from == null) {
                    throw new MessagingException("Message failed RFC-2822 validation: No Sender");
                }
                Date sentDate = originalMessage.getSentDate();
                if (sentDate == null) {
                    // Throws MessageException due to lack of minimum required headers
                    throw new MessagingException("Message failed RFC2822 validation: No Sent Date");
                }
                originalFlowFilesList.add(originalFlowFile);
                if (parser.hasAttachments()) {
                    final String originalFlowFileName = originalFlowFile.getAttribute(CoreAttributes.FILENAME.key());
                    try {
                        for (final DataSource data : parser.getAttachmentList()) {
                            FlowFile split = session.create(originalFlowFile);
                            final Map<String, String> attributes = new HashMap<>();
                            if (StringUtils.isNotBlank(data.getName())) {
                                attributes.put(CoreAttributes.FILENAME.key(), data.getName());
                            }
                            if (StringUtils.isNotBlank(data.getContentType())) {
                                attributes.put(CoreAttributes.MIME_TYPE.key(), data.getContentType());
                            }
                            String parentUuid = originalFlowFile.getAttribute(CoreAttributes.UUID.key());
                            attributes.put(ATTACHMENT_ORIGINAL_UUID, parentUuid);
                            attributes.put(ATTACHMENT_ORIGINAL_FILENAME, originalFlowFileName);
                            split = session.append(split, new OutputStreamCallback() {

                                @Override
                                public void process(OutputStream out) throws IOException {
                                    IOUtils.copy(data.getInputStream(), out);
                                }
                            });
                            split = session.putAllAttributes(split, attributes);
                            attachmentsList.add(split);
                        }
                    } catch (FlowFileHandlingException e) {
                        // Something went wrong
                        // Removing splits that may have been created
                        session.remove(attachmentsList);
                        // Removing the original flow from its list
                        originalFlowFilesList.remove(originalFlowFile);
                        logger.error("Flowfile {} triggered error {} while processing message removing generated FlowFiles from sessions", new Object[] { originalFlowFile, e });
                        invalidFlowFilesList.add(originalFlowFile);
                    }
                }
            } catch (Exception e) {
                // Another error hit...
                // Removing the original flow from its list
                originalFlowFilesList.remove(originalFlowFile);
                logger.error("Could not parse the flowfile {} as an email, treating as failure", new Object[] { originalFlowFile, e });
                // Message is invalid or triggered an error during parsing
                invalidFlowFilesList.add(originalFlowFile);
            }
        }
    });
    session.transfer(attachmentsList, REL_ATTACHMENTS);
    // As per above code, originalFlowfile may be routed to invalid or
    // original depending on RFC2822 compliance.
    session.transfer(invalidFlowFilesList, REL_FAILURE);
    session.transfer(originalFlowFilesList, REL_ORIGINAL);
    if (attachmentsList.size() > 10) {
        logger.info("Split {} into {} files", new Object[] { originalFlowFile, attachmentsList.size() });
    } else if (attachmentsList.size() > 1) {
        logger.info("Split {} into {} files: {}", new Object[] { originalFlowFile, attachmentsList.size(), attachmentsList });
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) MessagingException(javax.mail.MessagingException) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) InputStream(java.io.InputStream) OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) IOException(java.io.IOException) Properties(java.util.Properties) MimeMessageParser(org.apache.commons.mail.util.MimeMessageParser) ComponentLog(org.apache.nifi.logging.ComponentLog) Date(java.util.Date) MessagingException(javax.mail.MessagingException) FlowFileHandlingException(org.apache.nifi.processor.exception.FlowFileHandlingException) IOException(java.io.IOException) DataSource(javax.activation.DataSource) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) MimeMessage(javax.mail.internet.MimeMessage) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) FlowFileHandlingException(org.apache.nifi.processor.exception.FlowFileHandlingException) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback) HashMap(java.util.HashMap) Map(java.util.Map) ProcessSession(org.apache.nifi.processor.ProcessSession) Session(javax.mail.Session)

Example 14 with OutputStreamCallback

use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.

the class MalformedChunkHandlerTest method testHandle.

@Test
public void testHandle() {
    String name = "name";
    byte[] badChunk = { 8 };
    FlowFile original = mock(FlowFile.class);
    FlowFile updated1 = mock(FlowFile.class);
    FlowFile updated2 = mock(FlowFile.class);
    FlowFile updated3 = mock(FlowFile.class);
    FlowFile updated4 = mock(FlowFile.class);
    ProcessSession session = mock(ProcessSession.class);
    when(session.create(original)).thenReturn(updated1);
    when(session.putAttribute(updated1, CoreAttributes.FILENAME.key(), name)).thenReturn(updated2);
    when(session.putAttribute(updated2, CoreAttributes.MIME_TYPE.key(), MediaType.APPLICATION_BINARY.toString())).thenReturn(updated3);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    when(session.write(eq(updated3), any(OutputStreamCallback.class))).thenAnswer(invocation -> {
        ((OutputStreamCallback) invocation.getArguments()[1]).process(out);
        return updated4;
    });
    malformedChunkHandler.handle(original, session, name, badChunk);
    verify(session).transfer(updated4, badChunkRelationship);
    assertArrayEquals(badChunk, out.toByteArray());
}
Also used : ProcessSession(org.apache.nifi.processor.ProcessSession) FlowFile(org.apache.nifi.flowfile.FlowFile) ByteArrayOutputStream(java.io.ByteArrayOutputStream) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback) Test(org.junit.Test)

Example 15 with OutputStreamCallback

use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.

the class ParseEvtxTest method testProcessChunkGranularity.

@Test
public void testProcessChunkGranularity() throws IOException, MalformedChunkException, XMLStreamException {
    String basename = "basename";
    int chunkNum = 5;
    int offset = 10001;
    byte[] badChunk = { 8 };
    RootNodeHandler rootNodeHandler1 = mock(RootNodeHandler.class);
    RootNodeHandler rootNodeHandler2 = mock(RootNodeHandler.class);
    OutputStream out2 = mock(OutputStream.class);
    when(rootNodeHandlerFactory.create(out)).thenReturn(rootNodeHandler1);
    when(rootNodeHandlerFactory.create(out2)).thenReturn(rootNodeHandler2);
    ChunkHeader chunkHeader1 = mock(ChunkHeader.class);
    ChunkHeader chunkHeader2 = mock(ChunkHeader.class);
    Record record1 = mock(Record.class);
    Record record2 = mock(Record.class);
    Record record3 = mock(Record.class);
    RootNode rootNode1 = mock(RootNode.class);
    RootNode rootNode2 = mock(RootNode.class);
    RootNode rootNode3 = mock(RootNode.class);
    ProcessSession session = mock(ProcessSession.class);
    FlowFile flowFile = mock(FlowFile.class);
    FlowFile created1 = mock(FlowFile.class);
    FlowFile updated1 = mock(FlowFile.class);
    FlowFile created2 = mock(FlowFile.class);
    FlowFile updated2 = mock(FlowFile.class);
    MalformedChunkException malformedChunkException = new MalformedChunkException("Test", null, offset, chunkNum, badChunk);
    when(session.create(flowFile)).thenReturn(created1).thenReturn(created2).thenReturn(null);
    when(session.write(eq(created1), any(OutputStreamCallback.class))).thenAnswer(invocation -> {
        ((OutputStreamCallback) invocation.getArguments()[1]).process(out);
        return updated1;
    });
    when(session.write(eq(created2), any(OutputStreamCallback.class))).thenAnswer(invocation -> {
        ((OutputStreamCallback) invocation.getArguments()[1]).process(out2);
        return updated2;
    });
    when(record1.getRootNode()).thenReturn(rootNode1);
    when(record2.getRootNode()).thenReturn(rootNode2);
    when(record3.getRootNode()).thenReturn(rootNode3);
    when(fileHeader.hasNext()).thenReturn(true).thenReturn(true).thenReturn(true).thenReturn(false);
    when(fileHeader.next()).thenThrow(malformedChunkException).thenReturn(chunkHeader1).thenReturn(chunkHeader2).thenReturn(null);
    when(chunkHeader1.hasNext()).thenReturn(true).thenReturn(false);
    when(chunkHeader1.next()).thenReturn(record1).thenReturn(null);
    when(chunkHeader2.hasNext()).thenReturn(true).thenReturn(true).thenReturn(false);
    when(chunkHeader2.next()).thenReturn(record2).thenReturn(record3).thenReturn(null);
    parseEvtx.processChunkGranularity(session, componentLog, flowFile, basename, in);
    verify(malformedChunkHandler).handle(flowFile, session, parseEvtx.getName(basename, chunkNum, null, ParseEvtx.EVTX_EXTENSION), badChunk);
    verify(rootNodeHandler1).handle(rootNode1);
    verify(rootNodeHandler1).close();
    verify(rootNodeHandler2).handle(rootNode2);
    verify(rootNodeHandler2).handle(rootNode3);
    verify(rootNodeHandler2).close();
}
Also used : ProcessSession(org.apache.nifi.processor.ProcessSession) RootNode(org.apache.nifi.processors.evtx.parser.bxml.RootNode) FlowFile(org.apache.nifi.flowfile.FlowFile) MockFlowFile(org.apache.nifi.util.MockFlowFile) OutputStream(java.io.OutputStream) ChunkHeader(org.apache.nifi.processors.evtx.parser.ChunkHeader) Record(org.apache.nifi.processors.evtx.parser.Record) Mockito.anyString(org.mockito.Mockito.anyString) MalformedChunkException(org.apache.nifi.processors.evtx.parser.MalformedChunkException) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback) Test(org.junit.Test)

Aggregations

FlowFile (org.apache.nifi.flowfile.FlowFile)70 OutputStreamCallback (org.apache.nifi.processor.io.OutputStreamCallback)70 OutputStream (java.io.OutputStream)69 IOException (java.io.IOException)39 ProcessException (org.apache.nifi.processor.exception.ProcessException)27 HashMap (java.util.HashMap)25 InputStream (java.io.InputStream)24 Test (org.junit.Test)24 MockFlowFile (org.apache.nifi.util.MockFlowFile)23 ByteArrayOutputStream (java.io.ByteArrayOutputStream)20 ComponentLog (org.apache.nifi.logging.ComponentLog)17 FileOutputStream (java.io.FileOutputStream)16 FilterOutputStream (java.io.FilterOutputStream)16 InputStreamCallback (org.apache.nifi.processor.io.InputStreamCallback)14 ArrayList (java.util.ArrayList)12 Map (java.util.Map)12 ProcessSession (org.apache.nifi.processor.ProcessSession)12 BufferedOutputStream (org.apache.nifi.stream.io.BufferedOutputStream)10 AtomicReference (java.util.concurrent.atomic.AtomicReference)9 StandardContentClaim (org.apache.nifi.controller.repository.claim.StandardContentClaim)9