use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.
the class PutElasticsearch method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
final ComponentLog logger = getLogger();
final String id_attribute = context.getProperty(ID_ATTRIBUTE).getValue();
final int batchSize = context.getProperty(BATCH_SIZE).evaluateAttributeExpressions().asInteger();
final List<FlowFile> flowFiles = session.get(batchSize);
if (flowFiles.isEmpty()) {
return;
}
// Keep track of the list of flow files that need to be transferred. As they are transferred, remove them from the list.
List<FlowFile> flowFilesToTransfer = new LinkedList<>(flowFiles);
try {
final BulkRequestBuilder bulk = esClient.get().prepareBulk();
if (authToken != null) {
bulk.putHeader("Authorization", authToken);
}
for (FlowFile file : flowFiles) {
final String index = context.getProperty(INDEX).evaluateAttributeExpressions(file).getValue();
final String docType = context.getProperty(TYPE).evaluateAttributeExpressions(file).getValue();
final String indexOp = context.getProperty(INDEX_OP).evaluateAttributeExpressions(file).getValue();
final Charset charset = Charset.forName(context.getProperty(CHARSET).evaluateAttributeExpressions(file).getValue());
final String id = file.getAttribute(id_attribute);
if (id == null) {
logger.error("No value in identifier attribute {} for {}, transferring to failure", new Object[] { id_attribute, file });
flowFilesToTransfer.remove(file);
session.transfer(file, REL_FAILURE);
} else {
session.read(file, new InputStreamCallback() {
@Override
public void process(final InputStream in) throws IOException {
String json = IOUtils.toString(in, charset).replace("\r\n", " ").replace('\n', ' ').replace('\r', ' ');
if (indexOp.equalsIgnoreCase("index")) {
bulk.add(esClient.get().prepareIndex(index, docType, id).setSource(json.getBytes(charset)));
} else if (indexOp.equalsIgnoreCase("upsert")) {
bulk.add(esClient.get().prepareUpdate(index, docType, id).setDoc(json.getBytes(charset)).setDocAsUpsert(true));
} else if (indexOp.equalsIgnoreCase("update")) {
bulk.add(esClient.get().prepareUpdate(index, docType, id).setDoc(json.getBytes(charset)));
} else {
throw new IOException("Index operation: " + indexOp + " not supported.");
}
}
});
}
}
final BulkResponse response = bulk.execute().actionGet();
if (response.hasFailures()) {
// Responses are guaranteed to be in order, remove them in reverse order
BulkItemResponse[] responses = response.getItems();
if (responses != null && responses.length > 0) {
for (int i = responses.length - 1; i >= 0; i--) {
final FlowFile flowFile = flowFilesToTransfer.get(i);
if (responses[i].isFailed()) {
logger.error("Failed to insert {} into Elasticsearch due to {}, transferring to failure", new Object[] { flowFile, responses[i].getFailure().getMessage() });
session.transfer(flowFile, REL_FAILURE);
} else {
session.getProvenanceReporter().send(flowFile, context.getProperty(HOSTS).evaluateAttributeExpressions().getValue() + "/" + responses[i].getIndex());
session.transfer(flowFile, REL_SUCCESS);
}
flowFilesToTransfer.remove(flowFile);
}
}
}
// Transfer any remaining flowfiles to success
flowFilesToTransfer.forEach(file -> {
session.transfer(file, REL_SUCCESS);
// Record provenance event
session.getProvenanceReporter().send(file, context.getProperty(HOSTS).evaluateAttributeExpressions().getValue() + "/" + context.getProperty(INDEX).evaluateAttributeExpressions(file).getValue());
});
} catch (NoNodeAvailableException | ElasticsearchTimeoutException | ReceiveTimeoutTransportException | NodeClosedException exceptionToRetry) {
// Authorization errors and other problems are often returned as NoNodeAvailableExceptions without a
// traceable cause. However the cause seems to be logged, just not available to this caught exception.
// Since the error message will show up as a bulletin, we make specific mention to check the logs for
// more details.
logger.error("Failed to insert into Elasticsearch due to {}. More detailed information may be available in " + "the NiFi logs.", new Object[] { exceptionToRetry.getLocalizedMessage() }, exceptionToRetry);
session.transfer(flowFilesToTransfer, REL_RETRY);
context.yield();
} catch (Exception exceptionToFail) {
logger.error("Failed to insert into Elasticsearch due to {}, transferring to failure", new Object[] { exceptionToFail.getLocalizedMessage() }, exceptionToFail);
session.transfer(flowFilesToTransfer, REL_FAILURE);
context.yield();
}
}
use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.
the class ExtractEmailAttachments method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
final ComponentLog logger = getLogger();
final FlowFile originalFlowFile = session.get();
if (originalFlowFile == null) {
return;
}
final List<FlowFile> attachmentsList = new ArrayList<>();
final List<FlowFile> invalidFlowFilesList = new ArrayList<>();
final List<FlowFile> originalFlowFilesList = new ArrayList<>();
final String requireStrictAddresses = "false";
session.read(originalFlowFile, new InputStreamCallback() {
@Override
public void process(final InputStream rawIn) throws IOException {
try (final InputStream in = new BufferedInputStream(rawIn)) {
Properties props = new Properties();
props.put("mail.mime.address.strict", requireStrictAddresses);
Session mailSession = Session.getInstance(props);
MimeMessage originalMessage = new MimeMessage(mailSession, in);
MimeMessageParser parser = new MimeMessageParser(originalMessage).parse();
// RFC-2822 determines that a message must have a "From:" header
// if a message lacks the field, it is flagged as invalid
Address[] from = originalMessage.getFrom();
if (from == null) {
throw new MessagingException("Message failed RFC-2822 validation: No Sender");
}
Date sentDate = originalMessage.getSentDate();
if (sentDate == null) {
// Throws MessageException due to lack of minimum required headers
throw new MessagingException("Message failed RFC2822 validation: No Sent Date");
}
originalFlowFilesList.add(originalFlowFile);
if (parser.hasAttachments()) {
final String originalFlowFileName = originalFlowFile.getAttribute(CoreAttributes.FILENAME.key());
try {
for (final DataSource data : parser.getAttachmentList()) {
FlowFile split = session.create(originalFlowFile);
final Map<String, String> attributes = new HashMap<>();
if (StringUtils.isNotBlank(data.getName())) {
attributes.put(CoreAttributes.FILENAME.key(), data.getName());
}
if (StringUtils.isNotBlank(data.getContentType())) {
attributes.put(CoreAttributes.MIME_TYPE.key(), data.getContentType());
}
String parentUuid = originalFlowFile.getAttribute(CoreAttributes.UUID.key());
attributes.put(ATTACHMENT_ORIGINAL_UUID, parentUuid);
attributes.put(ATTACHMENT_ORIGINAL_FILENAME, originalFlowFileName);
split = session.append(split, new OutputStreamCallback() {
@Override
public void process(OutputStream out) throws IOException {
IOUtils.copy(data.getInputStream(), out);
}
});
split = session.putAllAttributes(split, attributes);
attachmentsList.add(split);
}
} catch (FlowFileHandlingException e) {
// Something went wrong
// Removing splits that may have been created
session.remove(attachmentsList);
// Removing the original flow from its list
originalFlowFilesList.remove(originalFlowFile);
logger.error("Flowfile {} triggered error {} while processing message removing generated FlowFiles from sessions", new Object[] { originalFlowFile, e });
invalidFlowFilesList.add(originalFlowFile);
}
}
} catch (Exception e) {
// Another error hit...
// Removing the original flow from its list
originalFlowFilesList.remove(originalFlowFile);
logger.error("Could not parse the flowfile {} as an email, treating as failure", new Object[] { originalFlowFile, e });
// Message is invalid or triggered an error during parsing
invalidFlowFilesList.add(originalFlowFile);
}
}
});
session.transfer(attachmentsList, REL_ATTACHMENTS);
// As per above code, originalFlowfile may be routed to invalid or
// original depending on RFC2822 compliance.
session.transfer(invalidFlowFilesList, REL_FAILURE);
session.transfer(originalFlowFilesList, REL_ORIGINAL);
if (attachmentsList.size() > 10) {
logger.info("Split {} into {} files", new Object[] { originalFlowFile, attachmentsList.size() });
} else if (attachmentsList.size() > 1) {
logger.info("Split {} into {} files: {}", new Object[] { originalFlowFile, attachmentsList.size(), attachmentsList });
}
}
use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.
the class ExtractImageMetadata method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowfile = session.get();
if (flowfile == null) {
return;
}
final ComponentLog logger = this.getLogger();
final AtomicReference<Metadata> value = new AtomicReference<>(null);
final Integer max = context.getProperty(MAX_NUMBER_OF_ATTRIBUTES).asInteger();
try {
session.read(flowfile, new InputStreamCallback() {
@Override
public void process(InputStream in) throws IOException {
try {
Metadata imageMetadata = ImageMetadataReader.readMetadata(in);
value.set(imageMetadata);
} catch (ImageProcessingException ex) {
throw new ProcessException(ex);
}
}
});
Metadata metadata = value.get();
Map<String, String> results = getTags(max, metadata);
// Write the results to an attribute
if (!results.isEmpty()) {
flowfile = session.putAllAttributes(flowfile, results);
}
session.transfer(flowfile, SUCCESS);
} catch (ProcessException e) {
logger.error("Failed to extract image metadata from {} due to {}", new Object[] { flowfile, e });
session.transfer(flowfile, FAILURE);
}
}
use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.
the class ExtractMediaMetadata method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final ComponentLog logger = this.getLogger();
final AtomicReference<Map<String, String>> value = new AtomicReference<>(null);
final Integer maxAttribCount = context.getProperty(MAX_NUMBER_OF_ATTRIBUTES).asInteger();
final Integer maxAttribLength = context.getProperty(MAX_ATTRIBUTE_LENGTH).asInteger();
final String prefix = context.getProperty(METADATA_KEY_PREFIX).evaluateAttributeExpressions(flowFile).getValue();
try {
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(InputStream in) throws IOException {
try {
Map<String, String> results = tika_parse(in, prefix, maxAttribCount, maxAttribLength);
value.set(results);
} catch (SAXException | TikaException e) {
throw new IOException(e);
}
}
});
// Write the results to attributes
Map<String, String> results = value.get();
if (results != null && !results.isEmpty()) {
flowFile = session.putAllAttributes(flowFile, results);
}
session.transfer(flowFile, SUCCESS);
session.getProvenanceReporter().modifyAttributes(flowFile, "media attributes extracted");
} catch (ProcessException e) {
logger.error("Failed to extract media metadata from {} due to {}", new Object[] { flowFile, e });
flowFile = session.penalize(flowFile);
session.transfer(flowFile, FAILURE);
}
}
use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.
the class PublishMQTT method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowfile = session.get();
if (flowfile == null) {
return;
}
if (mqttClient == null || !mqttClient.isConnected()) {
logger.info("Was disconnected from client or was never connected, attempting to connect.");
try {
reconnect();
} catch (MqttException e) {
context.yield();
session.transfer(flowfile, REL_FAILURE);
logger.error("MQTT client is disconnected and re-connecting failed. Transferring FlowFile to fail and yielding", e);
return;
}
}
// get the MQTT topic
String topic = context.getProperty(PROP_TOPIC).evaluateAttributeExpressions(flowfile).getValue();
if (topic == null || topic.isEmpty()) {
logger.warn("Evaluation of the topic property returned null or evaluated to be empty, routing to failure");
session.transfer(flowfile, REL_FAILURE);
return;
}
// do the read
final byte[] messageContent = new byte[(int) flowfile.getSize()];
session.read(flowfile, new InputStreamCallback() {
@Override
public void process(final InputStream in) throws IOException {
StreamUtils.fillBuffer(in, messageContent, true);
}
});
int qos = context.getProperty(PROP_QOS).evaluateAttributeExpressions(flowfile).asInteger();
final MqttMessage mqttMessage = new MqttMessage(messageContent);
mqttMessage.setQos(qos);
mqttMessage.setPayload(messageContent);
mqttMessage.setRetained(context.getProperty(PROP_RETAIN).evaluateAttributeExpressions(flowfile).asBoolean());
try {
mqttClientConnectLock.readLock().lock();
final StopWatch stopWatch = new StopWatch(true);
try {
/*
* Underlying method waits for the message to publish (according to set QoS), so it executes synchronously:
* MqttClient.java:361 aClient.publish(topic, message, null, null).waitForCompletion(getTimeToWait());
*/
mqttClient.publish(topic, mqttMessage);
} finally {
mqttClientConnectLock.readLock().unlock();
}
session.getProvenanceReporter().send(flowfile, broker, stopWatch.getElapsed(TimeUnit.MILLISECONDS));
session.transfer(flowfile, REL_SUCCESS);
} catch (MqttException me) {
logger.error("Failed to publish message.", me);
session.transfer(flowfile, REL_FAILURE);
}
}
Aggregations