use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.
the class FetchElasticsearch5 method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
synchronized (esClient) {
if (esClient.get() == null) {
super.setup(context);
}
}
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final String index = context.getProperty(INDEX).evaluateAttributeExpressions(flowFile).getValue();
final String docId = context.getProperty(DOC_ID).evaluateAttributeExpressions(flowFile).getValue();
final String docType = context.getProperty(TYPE).evaluateAttributeExpressions(flowFile).getValue();
final Charset charset = Charset.forName(context.getProperty(CHARSET).evaluateAttributeExpressions(flowFile).getValue());
final ComponentLog logger = getLogger();
try {
logger.debug("Fetching {}/{}/{} from Elasticsearch", new Object[] { index, docType, docId });
GetRequestBuilder getRequestBuilder = esClient.get().prepareGet(index, docType, docId);
final GetResponse getResponse = getRequestBuilder.execute().actionGet();
if (getResponse == null || !getResponse.isExists()) {
logger.debug("Failed to read {}/{}/{} from Elasticsearch: Document not found", new Object[] { index, docType, docId });
// We couldn't find the document, so penalize it and send it to "not found"
flowFile = session.penalize(flowFile);
session.transfer(flowFile, REL_NOT_FOUND);
} else {
flowFile = session.putAllAttributes(flowFile, new HashMap<String, String>() {
{
put("filename", docId);
put("es.index", index);
put("es.type", docType);
}
});
flowFile = session.write(flowFile, new OutputStreamCallback() {
@Override
public void process(OutputStream out) throws IOException {
out.write(getResponse.getSourceAsString().getBytes(charset));
}
});
logger.debug("Elasticsearch document " + docId + " fetched, routing to success");
// The document is JSON, so update the MIME type of the flow file
flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), "application/json");
session.getProvenanceReporter().fetch(flowFile, getResponse.remoteAddress().getAddress());
session.transfer(flowFile, REL_SUCCESS);
}
} catch (NoNodeAvailableException | ElasticsearchTimeoutException | ReceiveTimeoutTransportException | NodeClosedException exceptionToRetry) {
logger.error("Failed to read into Elasticsearch due to {}, this may indicate an error in configuration " + "(hosts, username/password, etc.), or this issue may be transient. Routing to retry", new Object[] { exceptionToRetry.getLocalizedMessage() }, exceptionToRetry);
session.transfer(flowFile, REL_RETRY);
context.yield();
} catch (Exception e) {
logger.error("Failed to read {} from Elasticsearch due to {}", new Object[] { flowFile, e.getLocalizedMessage() }, e);
session.transfer(flowFile, REL_FAILURE);
context.yield();
}
}
use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.
the class FetchElasticsearch method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final String index = context.getProperty(INDEX).evaluateAttributeExpressions(flowFile).getValue();
final String docId = context.getProperty(DOC_ID).evaluateAttributeExpressions(flowFile).getValue();
final String docType = context.getProperty(TYPE).evaluateAttributeExpressions(flowFile).getValue();
final Charset charset = Charset.forName(context.getProperty(CHARSET).evaluateAttributeExpressions(flowFile).getValue());
final ComponentLog logger = getLogger();
try {
logger.debug("Fetching {}/{}/{} from Elasticsearch", new Object[] { index, docType, docId });
final long startNanos = System.nanoTime();
GetRequestBuilder getRequestBuilder = esClient.get().prepareGet(index, docType, docId);
if (authToken != null) {
getRequestBuilder.putHeader("Authorization", authToken);
}
final GetResponse getResponse = getRequestBuilder.execute().actionGet();
if (getResponse == null || !getResponse.isExists()) {
logger.debug("Failed to read {}/{}/{} from Elasticsearch: Document not found", new Object[] { index, docType, docId });
// We couldn't find the document, so penalize it and send it to "not found"
flowFile = session.penalize(flowFile);
session.transfer(flowFile, REL_NOT_FOUND);
} else {
flowFile = session.putAttribute(flowFile, "filename", docId);
flowFile = session.putAttribute(flowFile, "es.index", index);
flowFile = session.putAttribute(flowFile, "es.type", docType);
flowFile = session.write(flowFile, new OutputStreamCallback() {
@Override
public void process(OutputStream out) throws IOException {
out.write(getResponse.getSourceAsString().getBytes(charset));
}
});
logger.debug("Elasticsearch document " + docId + " fetched, routing to success");
final long millis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos);
final String uri = context.getProperty(HOSTS).evaluateAttributeExpressions().getValue() + "/" + index + "/" + docType + "/" + docId;
session.getProvenanceReporter().fetch(flowFile, uri, millis);
session.transfer(flowFile, REL_SUCCESS);
}
} catch (NoNodeAvailableException | ElasticsearchTimeoutException | ReceiveTimeoutTransportException | NodeClosedException exceptionToRetry) {
logger.error("Failed to read into Elasticsearch due to {}, this may indicate an error in configuration " + "(hosts, username/password, etc.). Routing to retry", new Object[] { exceptionToRetry.getLocalizedMessage() }, exceptionToRetry);
session.transfer(flowFile, REL_RETRY);
context.yield();
} catch (Exception e) {
logger.error("Failed to read {} from Elasticsearch due to {}", new Object[] { flowFile, e.getLocalizedMessage() }, e);
session.transfer(flowFile, REL_FAILURE);
context.yield();
}
}
use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.
the class ExtractEmailAttachments method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
final ComponentLog logger = getLogger();
final FlowFile originalFlowFile = session.get();
if (originalFlowFile == null) {
return;
}
final List<FlowFile> attachmentsList = new ArrayList<>();
final List<FlowFile> invalidFlowFilesList = new ArrayList<>();
final List<FlowFile> originalFlowFilesList = new ArrayList<>();
final String requireStrictAddresses = "false";
session.read(originalFlowFile, new InputStreamCallback() {
@Override
public void process(final InputStream rawIn) throws IOException {
try (final InputStream in = new BufferedInputStream(rawIn)) {
Properties props = new Properties();
props.put("mail.mime.address.strict", requireStrictAddresses);
Session mailSession = Session.getInstance(props);
MimeMessage originalMessage = new MimeMessage(mailSession, in);
MimeMessageParser parser = new MimeMessageParser(originalMessage).parse();
// RFC-2822 determines that a message must have a "From:" header
// if a message lacks the field, it is flagged as invalid
Address[] from = originalMessage.getFrom();
if (from == null) {
throw new MessagingException("Message failed RFC-2822 validation: No Sender");
}
Date sentDate = originalMessage.getSentDate();
if (sentDate == null) {
// Throws MessageException due to lack of minimum required headers
throw new MessagingException("Message failed RFC2822 validation: No Sent Date");
}
originalFlowFilesList.add(originalFlowFile);
if (parser.hasAttachments()) {
final String originalFlowFileName = originalFlowFile.getAttribute(CoreAttributes.FILENAME.key());
try {
for (final DataSource data : parser.getAttachmentList()) {
FlowFile split = session.create(originalFlowFile);
final Map<String, String> attributes = new HashMap<>();
if (StringUtils.isNotBlank(data.getName())) {
attributes.put(CoreAttributes.FILENAME.key(), data.getName());
}
if (StringUtils.isNotBlank(data.getContentType())) {
attributes.put(CoreAttributes.MIME_TYPE.key(), data.getContentType());
}
String parentUuid = originalFlowFile.getAttribute(CoreAttributes.UUID.key());
attributes.put(ATTACHMENT_ORIGINAL_UUID, parentUuid);
attributes.put(ATTACHMENT_ORIGINAL_FILENAME, originalFlowFileName);
split = session.append(split, new OutputStreamCallback() {
@Override
public void process(OutputStream out) throws IOException {
IOUtils.copy(data.getInputStream(), out);
}
});
split = session.putAllAttributes(split, attributes);
attachmentsList.add(split);
}
} catch (FlowFileHandlingException e) {
// Something went wrong
// Removing splits that may have been created
session.remove(attachmentsList);
// Removing the original flow from its list
originalFlowFilesList.remove(originalFlowFile);
logger.error("Flowfile {} triggered error {} while processing message removing generated FlowFiles from sessions", new Object[] { originalFlowFile, e });
invalidFlowFilesList.add(originalFlowFile);
}
}
} catch (Exception e) {
// Another error hit...
// Removing the original flow from its list
originalFlowFilesList.remove(originalFlowFile);
logger.error("Could not parse the flowfile {} as an email, treating as failure", new Object[] { originalFlowFile, e });
// Message is invalid or triggered an error during parsing
invalidFlowFilesList.add(originalFlowFile);
}
}
});
session.transfer(attachmentsList, REL_ATTACHMENTS);
// As per above code, originalFlowfile may be routed to invalid or
// original depending on RFC2822 compliance.
session.transfer(invalidFlowFilesList, REL_FAILURE);
session.transfer(originalFlowFilesList, REL_ORIGINAL);
if (attachmentsList.size() > 10) {
logger.info("Split {} into {} files", new Object[] { originalFlowFile, attachmentsList.size() });
} else if (attachmentsList.size() > 1) {
logger.info("Split {} into {} files: {}", new Object[] { originalFlowFile, attachmentsList.size(), attachmentsList });
}
}
use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.
the class MalformedChunkHandlerTest method testHandle.
@Test
public void testHandle() {
String name = "name";
byte[] badChunk = { 8 };
FlowFile original = mock(FlowFile.class);
FlowFile updated1 = mock(FlowFile.class);
FlowFile updated2 = mock(FlowFile.class);
FlowFile updated3 = mock(FlowFile.class);
FlowFile updated4 = mock(FlowFile.class);
ProcessSession session = mock(ProcessSession.class);
when(session.create(original)).thenReturn(updated1);
when(session.putAttribute(updated1, CoreAttributes.FILENAME.key(), name)).thenReturn(updated2);
when(session.putAttribute(updated2, CoreAttributes.MIME_TYPE.key(), MediaType.APPLICATION_BINARY.toString())).thenReturn(updated3);
ByteArrayOutputStream out = new ByteArrayOutputStream();
when(session.write(eq(updated3), any(OutputStreamCallback.class))).thenAnswer(invocation -> {
((OutputStreamCallback) invocation.getArguments()[1]).process(out);
return updated4;
});
malformedChunkHandler.handle(original, session, name, badChunk);
verify(session).transfer(updated4, badChunkRelationship);
assertArrayEquals(badChunk, out.toByteArray());
}
use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.
the class ParseEvtxTest method testProcessChunkGranularity.
@Test
public void testProcessChunkGranularity() throws IOException, MalformedChunkException, XMLStreamException {
String basename = "basename";
int chunkNum = 5;
int offset = 10001;
byte[] badChunk = { 8 };
RootNodeHandler rootNodeHandler1 = mock(RootNodeHandler.class);
RootNodeHandler rootNodeHandler2 = mock(RootNodeHandler.class);
OutputStream out2 = mock(OutputStream.class);
when(rootNodeHandlerFactory.create(out)).thenReturn(rootNodeHandler1);
when(rootNodeHandlerFactory.create(out2)).thenReturn(rootNodeHandler2);
ChunkHeader chunkHeader1 = mock(ChunkHeader.class);
ChunkHeader chunkHeader2 = mock(ChunkHeader.class);
Record record1 = mock(Record.class);
Record record2 = mock(Record.class);
Record record3 = mock(Record.class);
RootNode rootNode1 = mock(RootNode.class);
RootNode rootNode2 = mock(RootNode.class);
RootNode rootNode3 = mock(RootNode.class);
ProcessSession session = mock(ProcessSession.class);
FlowFile flowFile = mock(FlowFile.class);
FlowFile created1 = mock(FlowFile.class);
FlowFile updated1 = mock(FlowFile.class);
FlowFile created2 = mock(FlowFile.class);
FlowFile updated2 = mock(FlowFile.class);
MalformedChunkException malformedChunkException = new MalformedChunkException("Test", null, offset, chunkNum, badChunk);
when(session.create(flowFile)).thenReturn(created1).thenReturn(created2).thenReturn(null);
when(session.write(eq(created1), any(OutputStreamCallback.class))).thenAnswer(invocation -> {
((OutputStreamCallback) invocation.getArguments()[1]).process(out);
return updated1;
});
when(session.write(eq(created2), any(OutputStreamCallback.class))).thenAnswer(invocation -> {
((OutputStreamCallback) invocation.getArguments()[1]).process(out2);
return updated2;
});
when(record1.getRootNode()).thenReturn(rootNode1);
when(record2.getRootNode()).thenReturn(rootNode2);
when(record3.getRootNode()).thenReturn(rootNode3);
when(fileHeader.hasNext()).thenReturn(true).thenReturn(true).thenReturn(true).thenReturn(false);
when(fileHeader.next()).thenThrow(malformedChunkException).thenReturn(chunkHeader1).thenReturn(chunkHeader2).thenReturn(null);
when(chunkHeader1.hasNext()).thenReturn(true).thenReturn(false);
when(chunkHeader1.next()).thenReturn(record1).thenReturn(null);
when(chunkHeader2.hasNext()).thenReturn(true).thenReturn(true).thenReturn(false);
when(chunkHeader2.next()).thenReturn(record2).thenReturn(record3).thenReturn(null);
parseEvtx.processChunkGranularity(session, componentLog, flowFile, basename, in);
verify(malformedChunkHandler).handle(flowFile, session, parseEvtx.getName(basename, chunkNum, null, ParseEvtx.EVTX_EXTENSION), badChunk);
verify(rootNodeHandler1).handle(rootNode1);
verify(rootNodeHandler1).close();
verify(rootNodeHandler2).handle(rootNode2);
verify(rootNodeHandler2).handle(rootNode3);
verify(rootNodeHandler2).close();
}
Aggregations