use of org.apache.nifi.processor.ProcessSession in project nifi by apache.
the class GetAzureEventHub method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
final BlockingQueue<String> partitionIds = this.partitionNames;
final String partitionId = partitionIds.poll();
if (partitionId == null) {
getLogger().debug("No partitions available");
return;
}
final StopWatch stopWatch = new StopWatch(true);
try {
final Iterable<EventData> receivedEvents = receiveEvents(context, partitionId);
if (receivedEvents == null) {
return;
}
for (final EventData eventData : receivedEvents) {
if (null != eventData) {
final Map<String, String> attributes = new HashMap<>();
FlowFile flowFile = session.create();
final EventData.SystemProperties systemProperties = eventData.getSystemProperties();
if (null != systemProperties) {
attributes.put("eventhub.enqueued.timestamp", String.valueOf(systemProperties.getEnqueuedTime()));
attributes.put("eventhub.offset", systemProperties.getOffset());
attributes.put("eventhub.sequence", String.valueOf(systemProperties.getSequenceNumber()));
}
attributes.put("eventhub.name", context.getProperty(EVENT_HUB_NAME).getValue());
attributes.put("eventhub.partition", partitionId);
flowFile = session.putAllAttributes(flowFile, attributes);
flowFile = session.write(flowFile, out -> {
out.write(eventData.getBytes());
});
session.transfer(flowFile, REL_SUCCESS);
final String namespace = context.getProperty(NAMESPACE).getValue();
final String eventHubName = context.getProperty(EVENT_HUB_NAME).getValue();
final String consumerGroup = context.getProperty(CONSUMER_GROUP).getValue();
final String serviceBusEndPoint = context.getProperty(SERVICE_BUS_ENDPOINT).getValue();
final String transitUri = "amqps://" + namespace + serviceBusEndPoint + "/" + eventHubName + "/ConsumerGroups/" + consumerGroup + "/Partitions/" + partitionId;
session.getProvenanceReporter().receive(flowFile, transitUri, stopWatch.getElapsed(TimeUnit.MILLISECONDS));
}
}
} finally {
partitionIds.offer(partitionId);
}
}
use of org.apache.nifi.processor.ProcessSession in project nifi by apache.
the class FetchAzureBlobStorage method onTrigger.
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final long startNanos = System.nanoTime();
String containerName = context.getProperty(AzureStorageUtils.CONTAINER).evaluateAttributeExpressions(flowFile).getValue();
String blobPath = context.getProperty(BLOB).evaluateAttributeExpressions(flowFile).getValue();
AtomicReference<Exception> storedException = new AtomicReference<>();
try {
CloudBlobClient blobClient = AzureStorageUtils.createCloudBlobClient(context, getLogger(), flowFile);
CloudBlobContainer container = blobClient.getContainerReference(containerName);
final Map<String, String> attributes = new HashMap<>();
final CloudBlob blob = container.getBlockBlobReference(blobPath);
// TODO - we may be able do fancier things with ranges and
// distribution of download over threads, investigate
flowFile = session.write(flowFile, os -> {
try {
blob.download(os);
} catch (StorageException e) {
storedException.set(e);
throw new IOException(e);
}
});
long length = blob.getProperties().getLength();
attributes.put("azure.length", String.valueOf(length));
if (!attributes.isEmpty()) {
flowFile = session.putAllAttributes(flowFile, attributes);
}
session.transfer(flowFile, REL_SUCCESS);
final long transferMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos);
session.getProvenanceReporter().fetch(flowFile, blob.getSnapshotQualifiedUri().toString(), transferMillis);
} catch (IllegalArgumentException | URISyntaxException | StorageException | ProcessException e) {
if (e instanceof ProcessException && storedException.get() == null) {
throw (ProcessException) e;
} else {
Exception failureException = Optional.ofNullable(storedException.get()).orElse(e);
getLogger().error("Failure to fetch Azure blob {}", new Object[] { blobPath }, failureException);
flowFile = session.penalize(flowFile);
session.transfer(flowFile, REL_FAILURE);
}
}
}
use of org.apache.nifi.processor.ProcessSession in project nifi by apache.
the class FetchElasticsearchHttp method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = null;
if (context.hasIncomingConnection()) {
flowFile = session.get();
// we know that we should run only if we have a FlowFile.
if (flowFile == null && context.hasNonLoopConnection()) {
return;
}
}
OkHttpClient okHttpClient = getClient();
if (flowFile == null) {
flowFile = session.create();
}
final String index = context.getProperty(INDEX).evaluateAttributeExpressions(flowFile).getValue();
final String docId = context.getProperty(DOC_ID).evaluateAttributeExpressions(flowFile).getValue();
final String docType = context.getProperty(TYPE).evaluateAttributeExpressions(flowFile).getValue();
final String fields = context.getProperty(FIELDS).isSet() ? context.getProperty(FIELDS).evaluateAttributeExpressions(flowFile).getValue() : null;
// Authentication
final String username = context.getProperty(USERNAME).evaluateAttributeExpressions(flowFile).getValue();
final String password = context.getProperty(PASSWORD).evaluateAttributeExpressions().getValue();
final ComponentLog logger = getLogger();
Response getResponse = null;
try {
logger.debug("Fetching {}/{}/{} from Elasticsearch", new Object[] { index, docType, docId });
// read the url property from the context
final String urlstr = StringUtils.trimToEmpty(context.getProperty(ES_URL).evaluateAttributeExpressions().getValue());
final URL url = buildRequestURL(urlstr, docId, index, docType, fields, context);
final long startNanos = System.nanoTime();
getResponse = sendRequestToElasticsearch(okHttpClient, url, username, password, "GET", null);
final int statusCode = getResponse.code();
if (isSuccess(statusCode)) {
ResponseBody body = getResponse.body();
final byte[] bodyBytes = body.bytes();
JsonNode responseJson = parseJsonResponse(new ByteArrayInputStream(bodyBytes));
boolean found = responseJson.get("found").asBoolean(false);
String retrievedIndex = responseJson.get("_index").asText();
String retrievedType = responseJson.get("_type").asText();
String retrievedId = responseJson.get("_id").asText();
if (found) {
JsonNode source = responseJson.get("_source");
flowFile = session.putAttribute(flowFile, "filename", retrievedId);
flowFile = session.putAttribute(flowFile, "es.index", retrievedIndex);
flowFile = session.putAttribute(flowFile, "es.type", retrievedType);
if (source != null) {
flowFile = session.write(flowFile, out -> {
out.write(source.toString().getBytes());
});
}
logger.debug("Elasticsearch document " + retrievedId + " fetched, routing to success");
// emit provenance event
final long millis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos);
if (context.hasNonLoopConnection()) {
session.getProvenanceReporter().fetch(flowFile, url.toExternalForm(), millis);
} else {
session.getProvenanceReporter().receive(flowFile, url.toExternalForm(), millis);
}
session.transfer(flowFile, REL_SUCCESS);
} else {
logger.debug("Failed to read {}/{}/{} from Elasticsearch: Document not found", new Object[] { index, docType, docId });
// We couldn't find the document, so send it to "not found"
session.transfer(flowFile, REL_NOT_FOUND);
}
} else {
if (statusCode == 404) {
logger.warn("Failed to read {}/{}/{} from Elasticsearch: Document not found", new Object[] { index, docType, docId });
// We couldn't find the document, so penalize it and send it to "not found"
session.transfer(flowFile, REL_NOT_FOUND);
} else {
// 5xx -> RETRY, but a server error might last a while, so yield
if (statusCode / 100 == 5) {
logger.warn("Elasticsearch returned code {} with message {}, transferring flow file to retry. This is likely a server problem, yielding...", new Object[] { statusCode, getResponse.message() });
session.transfer(flowFile, REL_RETRY);
context.yield();
} else if (context.hasIncomingConnection()) {
// 1xx, 3xx, 4xx -> NO RETRY
logger.warn("Elasticsearch returned code {} with message {}, transferring flow file to failure", new Object[] { statusCode, getResponse.message() });
session.transfer(flowFile, REL_FAILURE);
} else {
logger.warn("Elasticsearch returned code {} with message {}", new Object[] { statusCode, getResponse.message() });
session.remove(flowFile);
}
}
}
} catch (IOException ioe) {
logger.error("Failed to read from Elasticsearch due to {}, this may indicate an error in configuration " + "(hosts, username/password, etc.). Routing to retry", new Object[] { ioe.getLocalizedMessage() }, ioe);
if (context.hasIncomingConnection()) {
session.transfer(flowFile, REL_RETRY);
} else {
session.remove(flowFile);
}
context.yield();
} catch (Exception e) {
logger.error("Failed to read {} from Elasticsearch due to {}", new Object[] { flowFile, e.getLocalizedMessage() }, e);
if (context.hasIncomingConnection()) {
session.transfer(flowFile, REL_FAILURE);
} else {
session.remove(flowFile);
}
context.yield();
} finally {
if (getResponse != null) {
getResponse.close();
}
}
}
use of org.apache.nifi.processor.ProcessSession in project nifi by apache.
the class ExtractEmailAttachments method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
final ComponentLog logger = getLogger();
final FlowFile originalFlowFile = session.get();
if (originalFlowFile == null) {
return;
}
final List<FlowFile> attachmentsList = new ArrayList<>();
final List<FlowFile> invalidFlowFilesList = new ArrayList<>();
final List<FlowFile> originalFlowFilesList = new ArrayList<>();
final String requireStrictAddresses = "false";
session.read(originalFlowFile, new InputStreamCallback() {
@Override
public void process(final InputStream rawIn) throws IOException {
try (final InputStream in = new BufferedInputStream(rawIn)) {
Properties props = new Properties();
props.put("mail.mime.address.strict", requireStrictAddresses);
Session mailSession = Session.getInstance(props);
MimeMessage originalMessage = new MimeMessage(mailSession, in);
MimeMessageParser parser = new MimeMessageParser(originalMessage).parse();
// RFC-2822 determines that a message must have a "From:" header
// if a message lacks the field, it is flagged as invalid
Address[] from = originalMessage.getFrom();
if (from == null) {
throw new MessagingException("Message failed RFC-2822 validation: No Sender");
}
Date sentDate = originalMessage.getSentDate();
if (sentDate == null) {
// Throws MessageException due to lack of minimum required headers
throw new MessagingException("Message failed RFC2822 validation: No Sent Date");
}
originalFlowFilesList.add(originalFlowFile);
if (parser.hasAttachments()) {
final String originalFlowFileName = originalFlowFile.getAttribute(CoreAttributes.FILENAME.key());
try {
for (final DataSource data : parser.getAttachmentList()) {
FlowFile split = session.create(originalFlowFile);
final Map<String, String> attributes = new HashMap<>();
if (StringUtils.isNotBlank(data.getName())) {
attributes.put(CoreAttributes.FILENAME.key(), data.getName());
}
if (StringUtils.isNotBlank(data.getContentType())) {
attributes.put(CoreAttributes.MIME_TYPE.key(), data.getContentType());
}
String parentUuid = originalFlowFile.getAttribute(CoreAttributes.UUID.key());
attributes.put(ATTACHMENT_ORIGINAL_UUID, parentUuid);
attributes.put(ATTACHMENT_ORIGINAL_FILENAME, originalFlowFileName);
split = session.append(split, new OutputStreamCallback() {
@Override
public void process(OutputStream out) throws IOException {
IOUtils.copy(data.getInputStream(), out);
}
});
split = session.putAllAttributes(split, attributes);
attachmentsList.add(split);
}
} catch (FlowFileHandlingException e) {
// Something went wrong
// Removing splits that may have been created
session.remove(attachmentsList);
// Removing the original flow from its list
originalFlowFilesList.remove(originalFlowFile);
logger.error("Flowfile {} triggered error {} while processing message removing generated FlowFiles from sessions", new Object[] { originalFlowFile, e });
invalidFlowFilesList.add(originalFlowFile);
}
}
} catch (Exception e) {
// Another error hit...
// Removing the original flow from its list
originalFlowFilesList.remove(originalFlowFile);
logger.error("Could not parse the flowfile {} as an email, treating as failure", new Object[] { originalFlowFile, e });
// Message is invalid or triggered an error during parsing
invalidFlowFilesList.add(originalFlowFile);
}
}
});
session.transfer(attachmentsList, REL_ATTACHMENTS);
// As per above code, originalFlowfile may be routed to invalid or
// original depending on RFC2822 compliance.
session.transfer(invalidFlowFilesList, REL_FAILURE);
session.transfer(originalFlowFilesList, REL_ORIGINAL);
if (attachmentsList.size() > 10) {
logger.info("Split {} into {} files", new Object[] { originalFlowFile, attachmentsList.size() });
} else if (attachmentsList.size() > 1) {
logger.info("Split {} into {} files: {}", new Object[] { originalFlowFile, attachmentsList.size(), attachmentsList });
}
}
use of org.apache.nifi.processor.ProcessSession in project nifi by apache.
the class MalformedChunkHandler method handle.
public void handle(FlowFile original, ProcessSession processSession, String chunkName, byte[] badChunk) {
FlowFile flowFile = processSession.create(original);
flowFile = processSession.putAttribute(flowFile, CoreAttributes.FILENAME.key(), chunkName);
flowFile = processSession.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), MediaType.APPLICATION_BINARY.toString());
flowFile = processSession.write(flowFile, out -> out.write(badChunk));
processSession.transfer(flowFile, badChunkRelationship);
}
Aggregations