Search in sources :

Example 96 with FlowFile

use of org.apache.nifi.flowfile.FlowFile in project nifi by apache.

the class ListProcessorTestWatcher method dumpState.

private void dumpState(Consumer<String> d, final Map<String, String> state, final List<ListableEntity> entities, final List<FlowFile> flowFiles, final long start) {
    final long nTime = System.currentTimeMillis();
    log(d, "--------------------------------------------------------------------");
    log(d, "%-19s   %-13s %-23s %s", "", "timestamp", "date from timestamp", "t0 delta");
    log(d, "%-19s   %-13s %-23s %s", "-------------------", "-------------", "-----------------------", "--------");
    log(d, "%-19s = %13d %s %8d", "started at", start, dateFormat.format(start), 0);
    log(d, "%-19s = %13d %s %8d", "current time", nTime, dateFormat.format(nTime), 0);
    log(d, "---- processor state -----------------------------------------------");
    if (state.containsKey("processed.timestamp")) {
        final long pTime = Long.parseLong(state.get("processed.timestamp"));
        log(d, "%19s = %13d %s %8d", "processed.timestamp", pTime, dateFormat.format(pTime), pTime - nTime);
    } else {
        log(d, "%19s = na", "processed.timestamp");
    }
    if (state.containsKey("listing.timestamp")) {
        final long lTime = Long.parseLong(state.get("listing.timestamp"));
        log(d, "%19s = %13d %s %8d", "listing.timestamp", lTime, dateFormat.format(lTime), lTime - nTime);
    } else {
        log(d, "%19s = na", "listing.timestamp");
    }
    log(d, "---- input folder contents -----------------------------------------");
    entities.sort(Comparator.comparing(ListableEntity::getIdentifier));
    for (ListableEntity entity : entities) {
        log(d, "%19s = %12d %s %8d", entity.getIdentifier(), entity.getTimestamp(), dateFormat.format(entity.getTimestamp()), entity.getTimestamp() - nTime);
    }
    log(d, "---- output flowfiles ----------------------------------------------");
    final Map<String, Long> fileTimes = entities.stream().collect(Collectors.toMap(ListableEntity::getIdentifier, ListableEntity::getTimestamp));
    for (FlowFile ff : flowFiles) {
        String fName = ff.getAttribute(CoreAttributes.FILENAME.key());
        Long fTime = fileTimes.get(fName);
        log(d, "%19s = %13d %s %8d", fName, fTime, dateFormat.format(fTime), fTime - nTime);
    }
    log(d, "REL_SUCCESS count = " + flowFiles.size());
    log(d, "--------------------------------------------------------------------");
    log(d, "");
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile)

Example 97 with FlowFile

use of org.apache.nifi.flowfile.FlowFile in project nifi by apache.

the class FetchS3Object method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final long startNanos = System.nanoTime();
    final String bucket = context.getProperty(BUCKET).evaluateAttributeExpressions(flowFile).getValue();
    final String key = context.getProperty(KEY).evaluateAttributeExpressions(flowFile).getValue();
    final String versionId = context.getProperty(VERSION_ID).evaluateAttributeExpressions(flowFile).getValue();
    final AmazonS3 client = getClient();
    final GetObjectRequest request;
    if (versionId == null) {
        request = new GetObjectRequest(bucket, key);
    } else {
        request = new GetObjectRequest(bucket, key, versionId);
    }
    final Map<String, String> attributes = new HashMap<>();
    try (final S3Object s3Object = client.getObject(request)) {
        flowFile = session.importFrom(s3Object.getObjectContent(), flowFile);
        attributes.put("s3.bucket", s3Object.getBucketName());
        final ObjectMetadata metadata = s3Object.getObjectMetadata();
        if (metadata.getContentDisposition() != null) {
            final String fullyQualified = metadata.getContentDisposition();
            final int lastSlash = fullyQualified.lastIndexOf("/");
            if (lastSlash > -1 && lastSlash < fullyQualified.length() - 1) {
                attributes.put(CoreAttributes.PATH.key(), fullyQualified.substring(0, lastSlash));
                attributes.put(CoreAttributes.ABSOLUTE_PATH.key(), fullyQualified);
                attributes.put(CoreAttributes.FILENAME.key(), fullyQualified.substring(lastSlash + 1));
            } else {
                attributes.put(CoreAttributes.FILENAME.key(), metadata.getContentDisposition());
            }
        }
        if (metadata.getContentMD5() != null) {
            attributes.put("hash.value", metadata.getContentMD5());
            attributes.put("hash.algorithm", "MD5");
        }
        if (metadata.getContentType() != null) {
            attributes.put(CoreAttributes.MIME_TYPE.key(), metadata.getContentType());
        }
        if (metadata.getETag() != null) {
            attributes.put("s3.etag", metadata.getETag());
        }
        if (metadata.getExpirationTime() != null) {
            attributes.put("s3.expirationTime", String.valueOf(metadata.getExpirationTime().getTime()));
        }
        if (metadata.getExpirationTimeRuleId() != null) {
            attributes.put("s3.expirationTimeRuleId", metadata.getExpirationTimeRuleId());
        }
        if (metadata.getUserMetadata() != null) {
            attributes.putAll(metadata.getUserMetadata());
        }
        if (metadata.getSSEAlgorithm() != null) {
            attributes.put("s3.sseAlgorithm", metadata.getSSEAlgorithm());
        }
        if (metadata.getVersionId() != null) {
            attributes.put("s3.version", metadata.getVersionId());
        }
    } catch (final IOException | AmazonClientException ioe) {
        getLogger().error("Failed to retrieve S3 Object for {}; routing to failure", new Object[] { flowFile, ioe });
        flowFile = session.penalize(flowFile);
        session.transfer(flowFile, REL_FAILURE);
        return;
    }
    if (!attributes.isEmpty()) {
        flowFile = session.putAllAttributes(flowFile, attributes);
    }
    session.transfer(flowFile, REL_SUCCESS);
    final long transferMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos);
    getLogger().info("Successfully retrieved S3 Object for {} in {} millis; routing to success", new Object[] { flowFile, transferMillis });
    session.getProvenanceReporter().fetch(flowFile, "http://" + bucket + ".amazonaws.com/" + key, transferMillis);
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) AmazonS3(com.amazonaws.services.s3.AmazonS3) HashMap(java.util.HashMap) AmazonClientException(com.amazonaws.AmazonClientException) IOException(java.io.IOException) S3Object(com.amazonaws.services.s3.model.S3Object) S3Object(com.amazonaws.services.s3.model.S3Object) GetObjectRequest(com.amazonaws.services.s3.model.GetObjectRequest) ObjectMetadata(com.amazonaws.services.s3.model.ObjectMetadata)

Example 98 with FlowFile

use of org.apache.nifi.flowfile.FlowFile in project nifi by apache.

the class GetSQS method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    final String queueUrl = context.getProperty(DYNAMIC_QUEUE_URL).evaluateAttributeExpressions().getValue();
    final AmazonSQSClient client = getClient();
    final ReceiveMessageRequest request = new ReceiveMessageRequest();
    request.setAttributeNames(Collections.singleton("All"));
    request.setMessageAttributeNames(Collections.singleton("All"));
    request.setMaxNumberOfMessages(context.getProperty(BATCH_SIZE).asInteger());
    request.setVisibilityTimeout(context.getProperty(VISIBILITY_TIMEOUT).asTimePeriod(TimeUnit.SECONDS).intValue());
    request.setQueueUrl(queueUrl);
    request.setWaitTimeSeconds(context.getProperty(RECEIVE_MSG_WAIT_TIME).asTimePeriod(TimeUnit.SECONDS).intValue());
    final Charset charset = Charset.forName(context.getProperty(CHARSET).getValue());
    final ReceiveMessageResult result;
    try {
        result = client.receiveMessage(request);
    } catch (final Exception e) {
        getLogger().error("Failed to receive messages from Amazon SQS due to {}", new Object[] { e });
        context.yield();
        return;
    }
    final List<Message> messages = result.getMessages();
    if (messages.isEmpty()) {
        context.yield();
        return;
    }
    final boolean autoDelete = context.getProperty(AUTO_DELETE).asBoolean();
    for (final Message message : messages) {
        FlowFile flowFile = session.create();
        final Map<String, String> attributes = new HashMap<>();
        for (final Map.Entry<String, String> entry : message.getAttributes().entrySet()) {
            attributes.put("sqs." + entry.getKey(), entry.getValue());
        }
        for (final Map.Entry<String, MessageAttributeValue> entry : message.getMessageAttributes().entrySet()) {
            attributes.put("sqs." + entry.getKey(), entry.getValue().getStringValue());
        }
        attributes.put("hash.value", message.getMD5OfBody());
        attributes.put("hash.algorithm", "md5");
        attributes.put("sqs.message.id", message.getMessageId());
        attributes.put("sqs.receipt.handle", message.getReceiptHandle());
        flowFile = session.putAllAttributes(flowFile, attributes);
        flowFile = session.write(flowFile, new OutputStreamCallback() {

            @Override
            public void process(final OutputStream out) throws IOException {
                out.write(message.getBody().getBytes(charset));
            }
        });
        session.transfer(flowFile, REL_SUCCESS);
        session.getProvenanceReporter().receive(flowFile, queueUrl);
        getLogger().info("Successfully received {} from Amazon SQS", new Object[] { flowFile });
    }
    if (autoDelete) {
        // If we want to auto-delete messages, we must fist commit the session to ensure that the data
        // is persisted in NiFi's repositories.
        session.commit();
        final DeleteMessageBatchRequest deleteRequest = new DeleteMessageBatchRequest();
        deleteRequest.setQueueUrl(queueUrl);
        final List<DeleteMessageBatchRequestEntry> deleteRequestEntries = new ArrayList<>();
        for (final Message message : messages) {
            final DeleteMessageBatchRequestEntry entry = new DeleteMessageBatchRequestEntry();
            entry.setId(message.getMessageId());
            entry.setReceiptHandle(message.getReceiptHandle());
            deleteRequestEntries.add(entry);
        }
        deleteRequest.setEntries(deleteRequestEntries);
        try {
            client.deleteMessageBatch(deleteRequest);
        } catch (final Exception e) {
            getLogger().error("Received {} messages from Amazon SQS but failed to delete the messages; these messages" + " may be duplicated. Reason for deletion failure: {}", new Object[] { messages.size(), e });
        }
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) DeleteMessageBatchRequest(com.amazonaws.services.sqs.model.DeleteMessageBatchRequest) ReceiveMessageRequest(com.amazonaws.services.sqs.model.ReceiveMessageRequest) Message(com.amazonaws.services.sqs.model.Message) HashMap(java.util.HashMap) OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) Charset(java.nio.charset.Charset) AmazonSQSClient(com.amazonaws.services.sqs.AmazonSQSClient) IOException(java.io.IOException) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback) DeleteMessageBatchRequestEntry(com.amazonaws.services.sqs.model.DeleteMessageBatchRequestEntry) HashMap(java.util.HashMap) Map(java.util.Map) ReceiveMessageResult(com.amazonaws.services.sqs.model.ReceiveMessageResult) MessageAttributeValue(com.amazonaws.services.sqs.model.MessageAttributeValue)

Example 99 with FlowFile

use of org.apache.nifi.flowfile.FlowFile in project nifi by apache.

the class ExtractImageMetadata method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowfile = session.get();
    if (flowfile == null) {
        return;
    }
    final ComponentLog logger = this.getLogger();
    final AtomicReference<Metadata> value = new AtomicReference<>(null);
    final Integer max = context.getProperty(MAX_NUMBER_OF_ATTRIBUTES).asInteger();
    try {
        session.read(flowfile, new InputStreamCallback() {

            @Override
            public void process(InputStream in) throws IOException {
                try {
                    Metadata imageMetadata = ImageMetadataReader.readMetadata(in);
                    value.set(imageMetadata);
                } catch (ImageProcessingException ex) {
                    throw new ProcessException(ex);
                }
            }
        });
        Metadata metadata = value.get();
        Map<String, String> results = getTags(max, metadata);
        // Write the results to an attribute
        if (!results.isEmpty()) {
            flowfile = session.putAllAttributes(flowfile, results);
        }
        session.transfer(flowfile, SUCCESS);
    } catch (ProcessException e) {
        logger.error("Failed to extract image metadata from {} due to {}", new Object[] { flowfile, e });
        session.transfer(flowfile, FAILURE);
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) ImageProcessingException(com.drew.imaging.ImageProcessingException) InputStream(java.io.InputStream) Metadata(com.drew.metadata.Metadata) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) ProcessException(org.apache.nifi.processor.exception.ProcessException) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback)

Example 100 with FlowFile

use of org.apache.nifi.flowfile.FlowFile in project nifi by apache.

the class ExtractMediaMetadata method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final ComponentLog logger = this.getLogger();
    final AtomicReference<Map<String, String>> value = new AtomicReference<>(null);
    final Integer maxAttribCount = context.getProperty(MAX_NUMBER_OF_ATTRIBUTES).asInteger();
    final Integer maxAttribLength = context.getProperty(MAX_ATTRIBUTE_LENGTH).asInteger();
    final String prefix = context.getProperty(METADATA_KEY_PREFIX).evaluateAttributeExpressions(flowFile).getValue();
    try {
        session.read(flowFile, new InputStreamCallback() {

            @Override
            public void process(InputStream in) throws IOException {
                try {
                    Map<String, String> results = tika_parse(in, prefix, maxAttribCount, maxAttribLength);
                    value.set(results);
                } catch (SAXException | TikaException e) {
                    throw new IOException(e);
                }
            }
        });
        // Write the results to attributes
        Map<String, String> results = value.get();
        if (results != null && !results.isEmpty()) {
            flowFile = session.putAllAttributes(flowFile, results);
        }
        session.transfer(flowFile, SUCCESS);
        session.getProvenanceReporter().modifyAttributes(flowFile, "media attributes extracted");
    } catch (ProcessException e) {
        logger.error("Failed to extract media metadata from {} due to {}", new Object[] { flowFile, e });
        flowFile = session.penalize(flowFile);
        session.transfer(flowFile, FAILURE);
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) TikaInputStream(org.apache.tika.io.TikaInputStream) InputStream(java.io.InputStream) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) ProcessException(org.apache.nifi.processor.exception.ProcessException) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

FlowFile (org.apache.nifi.flowfile.FlowFile)500 IOException (java.io.IOException)236 ProcessException (org.apache.nifi.processor.exception.ProcessException)193 HashMap (java.util.HashMap)160 InputStream (java.io.InputStream)145 OutputStream (java.io.OutputStream)131 ComponentLog (org.apache.nifi.logging.ComponentLog)119 Test (org.junit.Test)116 ArrayList (java.util.ArrayList)113 Map (java.util.Map)105 MockFlowFile (org.apache.nifi.util.MockFlowFile)103 ProcessSession (org.apache.nifi.processor.ProcessSession)99 OutputStreamCallback (org.apache.nifi.processor.io.OutputStreamCallback)83 Relationship (org.apache.nifi.processor.Relationship)78 InputStreamCallback (org.apache.nifi.processor.io.InputStreamCallback)78 HashSet (java.util.HashSet)75 List (java.util.List)67 StopWatch (org.apache.nifi.util.StopWatch)59 Set (java.util.Set)56 PropertyDescriptor (org.apache.nifi.components.PropertyDescriptor)55