use of org.apache.nifi.flowfile.FlowFile in project nifi by apache.
the class ListProcessorTestWatcher method dumpState.
private void dumpState(Consumer<String> d, final Map<String, String> state, final List<ListableEntity> entities, final List<FlowFile> flowFiles, final long start) {
final long nTime = System.currentTimeMillis();
log(d, "--------------------------------------------------------------------");
log(d, "%-19s %-13s %-23s %s", "", "timestamp", "date from timestamp", "t0 delta");
log(d, "%-19s %-13s %-23s %s", "-------------------", "-------------", "-----------------------", "--------");
log(d, "%-19s = %13d %s %8d", "started at", start, dateFormat.format(start), 0);
log(d, "%-19s = %13d %s %8d", "current time", nTime, dateFormat.format(nTime), 0);
log(d, "---- processor state -----------------------------------------------");
if (state.containsKey("processed.timestamp")) {
final long pTime = Long.parseLong(state.get("processed.timestamp"));
log(d, "%19s = %13d %s %8d", "processed.timestamp", pTime, dateFormat.format(pTime), pTime - nTime);
} else {
log(d, "%19s = na", "processed.timestamp");
}
if (state.containsKey("listing.timestamp")) {
final long lTime = Long.parseLong(state.get("listing.timestamp"));
log(d, "%19s = %13d %s %8d", "listing.timestamp", lTime, dateFormat.format(lTime), lTime - nTime);
} else {
log(d, "%19s = na", "listing.timestamp");
}
log(d, "---- input folder contents -----------------------------------------");
entities.sort(Comparator.comparing(ListableEntity::getIdentifier));
for (ListableEntity entity : entities) {
log(d, "%19s = %12d %s %8d", entity.getIdentifier(), entity.getTimestamp(), dateFormat.format(entity.getTimestamp()), entity.getTimestamp() - nTime);
}
log(d, "---- output flowfiles ----------------------------------------------");
final Map<String, Long> fileTimes = entities.stream().collect(Collectors.toMap(ListableEntity::getIdentifier, ListableEntity::getTimestamp));
for (FlowFile ff : flowFiles) {
String fName = ff.getAttribute(CoreAttributes.FILENAME.key());
Long fTime = fileTimes.get(fName);
log(d, "%19s = %13d %s %8d", fName, fTime, dateFormat.format(fTime), fTime - nTime);
}
log(d, "REL_SUCCESS count = " + flowFiles.size());
log(d, "--------------------------------------------------------------------");
log(d, "");
}
use of org.apache.nifi.flowfile.FlowFile in project nifi by apache.
the class FetchS3Object method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final long startNanos = System.nanoTime();
final String bucket = context.getProperty(BUCKET).evaluateAttributeExpressions(flowFile).getValue();
final String key = context.getProperty(KEY).evaluateAttributeExpressions(flowFile).getValue();
final String versionId = context.getProperty(VERSION_ID).evaluateAttributeExpressions(flowFile).getValue();
final AmazonS3 client = getClient();
final GetObjectRequest request;
if (versionId == null) {
request = new GetObjectRequest(bucket, key);
} else {
request = new GetObjectRequest(bucket, key, versionId);
}
final Map<String, String> attributes = new HashMap<>();
try (final S3Object s3Object = client.getObject(request)) {
flowFile = session.importFrom(s3Object.getObjectContent(), flowFile);
attributes.put("s3.bucket", s3Object.getBucketName());
final ObjectMetadata metadata = s3Object.getObjectMetadata();
if (metadata.getContentDisposition() != null) {
final String fullyQualified = metadata.getContentDisposition();
final int lastSlash = fullyQualified.lastIndexOf("/");
if (lastSlash > -1 && lastSlash < fullyQualified.length() - 1) {
attributes.put(CoreAttributes.PATH.key(), fullyQualified.substring(0, lastSlash));
attributes.put(CoreAttributes.ABSOLUTE_PATH.key(), fullyQualified);
attributes.put(CoreAttributes.FILENAME.key(), fullyQualified.substring(lastSlash + 1));
} else {
attributes.put(CoreAttributes.FILENAME.key(), metadata.getContentDisposition());
}
}
if (metadata.getContentMD5() != null) {
attributes.put("hash.value", metadata.getContentMD5());
attributes.put("hash.algorithm", "MD5");
}
if (metadata.getContentType() != null) {
attributes.put(CoreAttributes.MIME_TYPE.key(), metadata.getContentType());
}
if (metadata.getETag() != null) {
attributes.put("s3.etag", metadata.getETag());
}
if (metadata.getExpirationTime() != null) {
attributes.put("s3.expirationTime", String.valueOf(metadata.getExpirationTime().getTime()));
}
if (metadata.getExpirationTimeRuleId() != null) {
attributes.put("s3.expirationTimeRuleId", metadata.getExpirationTimeRuleId());
}
if (metadata.getUserMetadata() != null) {
attributes.putAll(metadata.getUserMetadata());
}
if (metadata.getSSEAlgorithm() != null) {
attributes.put("s3.sseAlgorithm", metadata.getSSEAlgorithm());
}
if (metadata.getVersionId() != null) {
attributes.put("s3.version", metadata.getVersionId());
}
} catch (final IOException | AmazonClientException ioe) {
getLogger().error("Failed to retrieve S3 Object for {}; routing to failure", new Object[] { flowFile, ioe });
flowFile = session.penalize(flowFile);
session.transfer(flowFile, REL_FAILURE);
return;
}
if (!attributes.isEmpty()) {
flowFile = session.putAllAttributes(flowFile, attributes);
}
session.transfer(flowFile, REL_SUCCESS);
final long transferMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos);
getLogger().info("Successfully retrieved S3 Object for {} in {} millis; routing to success", new Object[] { flowFile, transferMillis });
session.getProvenanceReporter().fetch(flowFile, "http://" + bucket + ".amazonaws.com/" + key, transferMillis);
}
use of org.apache.nifi.flowfile.FlowFile in project nifi by apache.
the class GetSQS method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
final String queueUrl = context.getProperty(DYNAMIC_QUEUE_URL).evaluateAttributeExpressions().getValue();
final AmazonSQSClient client = getClient();
final ReceiveMessageRequest request = new ReceiveMessageRequest();
request.setAttributeNames(Collections.singleton("All"));
request.setMessageAttributeNames(Collections.singleton("All"));
request.setMaxNumberOfMessages(context.getProperty(BATCH_SIZE).asInteger());
request.setVisibilityTimeout(context.getProperty(VISIBILITY_TIMEOUT).asTimePeriod(TimeUnit.SECONDS).intValue());
request.setQueueUrl(queueUrl);
request.setWaitTimeSeconds(context.getProperty(RECEIVE_MSG_WAIT_TIME).asTimePeriod(TimeUnit.SECONDS).intValue());
final Charset charset = Charset.forName(context.getProperty(CHARSET).getValue());
final ReceiveMessageResult result;
try {
result = client.receiveMessage(request);
} catch (final Exception e) {
getLogger().error("Failed to receive messages from Amazon SQS due to {}", new Object[] { e });
context.yield();
return;
}
final List<Message> messages = result.getMessages();
if (messages.isEmpty()) {
context.yield();
return;
}
final boolean autoDelete = context.getProperty(AUTO_DELETE).asBoolean();
for (final Message message : messages) {
FlowFile flowFile = session.create();
final Map<String, String> attributes = new HashMap<>();
for (final Map.Entry<String, String> entry : message.getAttributes().entrySet()) {
attributes.put("sqs." + entry.getKey(), entry.getValue());
}
for (final Map.Entry<String, MessageAttributeValue> entry : message.getMessageAttributes().entrySet()) {
attributes.put("sqs." + entry.getKey(), entry.getValue().getStringValue());
}
attributes.put("hash.value", message.getMD5OfBody());
attributes.put("hash.algorithm", "md5");
attributes.put("sqs.message.id", message.getMessageId());
attributes.put("sqs.receipt.handle", message.getReceiptHandle());
flowFile = session.putAllAttributes(flowFile, attributes);
flowFile = session.write(flowFile, new OutputStreamCallback() {
@Override
public void process(final OutputStream out) throws IOException {
out.write(message.getBody().getBytes(charset));
}
});
session.transfer(flowFile, REL_SUCCESS);
session.getProvenanceReporter().receive(flowFile, queueUrl);
getLogger().info("Successfully received {} from Amazon SQS", new Object[] { flowFile });
}
if (autoDelete) {
// If we want to auto-delete messages, we must fist commit the session to ensure that the data
// is persisted in NiFi's repositories.
session.commit();
final DeleteMessageBatchRequest deleteRequest = new DeleteMessageBatchRequest();
deleteRequest.setQueueUrl(queueUrl);
final List<DeleteMessageBatchRequestEntry> deleteRequestEntries = new ArrayList<>();
for (final Message message : messages) {
final DeleteMessageBatchRequestEntry entry = new DeleteMessageBatchRequestEntry();
entry.setId(message.getMessageId());
entry.setReceiptHandle(message.getReceiptHandle());
deleteRequestEntries.add(entry);
}
deleteRequest.setEntries(deleteRequestEntries);
try {
client.deleteMessageBatch(deleteRequest);
} catch (final Exception e) {
getLogger().error("Received {} messages from Amazon SQS but failed to delete the messages; these messages" + " may be duplicated. Reason for deletion failure: {}", new Object[] { messages.size(), e });
}
}
}
use of org.apache.nifi.flowfile.FlowFile in project nifi by apache.
the class ExtractImageMetadata method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowfile = session.get();
if (flowfile == null) {
return;
}
final ComponentLog logger = this.getLogger();
final AtomicReference<Metadata> value = new AtomicReference<>(null);
final Integer max = context.getProperty(MAX_NUMBER_OF_ATTRIBUTES).asInteger();
try {
session.read(flowfile, new InputStreamCallback() {
@Override
public void process(InputStream in) throws IOException {
try {
Metadata imageMetadata = ImageMetadataReader.readMetadata(in);
value.set(imageMetadata);
} catch (ImageProcessingException ex) {
throw new ProcessException(ex);
}
}
});
Metadata metadata = value.get();
Map<String, String> results = getTags(max, metadata);
// Write the results to an attribute
if (!results.isEmpty()) {
flowfile = session.putAllAttributes(flowfile, results);
}
session.transfer(flowfile, SUCCESS);
} catch (ProcessException e) {
logger.error("Failed to extract image metadata from {} due to {}", new Object[] { flowfile, e });
session.transfer(flowfile, FAILURE);
}
}
use of org.apache.nifi.flowfile.FlowFile in project nifi by apache.
the class ExtractMediaMetadata method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final ComponentLog logger = this.getLogger();
final AtomicReference<Map<String, String>> value = new AtomicReference<>(null);
final Integer maxAttribCount = context.getProperty(MAX_NUMBER_OF_ATTRIBUTES).asInteger();
final Integer maxAttribLength = context.getProperty(MAX_ATTRIBUTE_LENGTH).asInteger();
final String prefix = context.getProperty(METADATA_KEY_PREFIX).evaluateAttributeExpressions(flowFile).getValue();
try {
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(InputStream in) throws IOException {
try {
Map<String, String> results = tika_parse(in, prefix, maxAttribCount, maxAttribLength);
value.set(results);
} catch (SAXException | TikaException e) {
throw new IOException(e);
}
}
});
// Write the results to attributes
Map<String, String> results = value.get();
if (results != null && !results.isEmpty()) {
flowFile = session.putAllAttributes(flowFile, results);
}
session.transfer(flowFile, SUCCESS);
session.getProvenanceReporter().modifyAttributes(flowFile, "media attributes extracted");
} catch (ProcessException e) {
logger.error("Failed to extract media metadata from {} due to {}", new Object[] { flowFile, e });
flowFile = session.penalize(flowFile);
session.transfer(flowFile, FAILURE);
}
}
Aggregations