Search in sources :

Example 56 with FlowFile

use of org.apache.nifi.flowfile.FlowFile in project nifi by apache.

the class CompareFuzzyHash method onTrigger.

@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final ComponentLog logger = getLogger();
    String algorithm = context.getProperty(HASH_ALGORITHM).getValue();
    final String attributeName = context.getProperty(ATTRIBUTE_NAME).getValue();
    String inputHash = flowFile.getAttribute(attributeName);
    if (inputHash == null) {
        getLogger().info("FlowFile {} lacks the required '{}' attribute, routing to failure.", new Object[] { flowFile, attributeName });
        session.transfer(flowFile, REL_FAILURE);
        return;
    }
    FuzzyHashMatcher fuzzyHashMatcher = null;
    switch(algorithm) {
        case tlsh:
            fuzzyHashMatcher = new TLSHHashMatcher(getLogger());
            break;
        case ssdeep:
            fuzzyHashMatcher = new SSDeepHashMatcher(getLogger());
            break;
        default:
            getLogger().error("Seems like the processor is configured to use unsupported algorithm '{}' ? Yielding.", new Object[] { algorithm });
            context.yield();
            return;
    }
    if (fuzzyHashMatcher.isValidHash(inputHash) == false) {
        // and if that is the case we log
        logger.error("Invalid hash provided. Sending to failure");
        // and send to failure
        session.transfer(flowFile, REL_FAILURE);
        session.commit();
        return;
    }
    double similarity = 0;
    double matchThreshold = context.getProperty(MATCH_THRESHOLD).asDouble();
    try {
        Map<String, Double> matched = new ConcurrentHashMap<String, Double>();
        BufferedReader reader = fuzzyHashMatcher.getReader(context.getProperty(HASH_LIST_FILE).getValue());
        String line = null;
        iterateFile: while ((line = reader.readLine()) != null) {
            if (line != null) {
                similarity = fuzzyHashMatcher.getSimilarity(inputHash, line);
                if (fuzzyHashMatcher.matchExceedsThreshold(similarity, matchThreshold)) {
                    String match = fuzzyHashMatcher.getMatch(line);
                    // Because this would simply look odd, we ignore such entry and log
                    if (!StringUtils.isEmpty(match)) {
                        matched.put(match, similarity);
                    } else {
                        logger.error("Found a match against a malformed entry '{}'. Please inspect the contents of" + "the {} file and ensure they are properly formatted", new Object[] { line, HASH_LIST_FILE.getDisplayName() });
                    }
                }
            }
            // Check if single match is desired and if a match has been made
            if (context.getProperty(MATCHING_MODE).getValue() == singleMatch.getValue() && (matched.size() > 0)) {
                // and save time by breaking the outer loop
                break iterateFile;
            }
        }
        // no matter if the break was called or not, Continue processing
        // First by creating a new map to hold attributes
        Map<String, String> attributes = new ConcurrentHashMap<String, String>();
        // Then by iterating over the hashmap of matches
        if (matched.size() > 0) {
            int x = 0;
            for (Map.Entry<String, Double> entry : matched.entrySet()) {
                // defining attributes accordingly
                attributes.put(attributeName + "." + x + ".match", entry.getKey());
                attributes.put(attributeName + "." + x + ".similarity", String.valueOf(entry.getValue()));
                x++;
            }
            // Finally, append the attributes to the flowfile and sent to match
            flowFile = session.putAllAttributes(flowFile, attributes);
            session.transfer(flowFile, REL_FOUND);
            session.commit();
            return;
        } else {
            // Otherwise send it to non-match
            session.transfer(flowFile, REL_NOT_FOUND);
            session.commit();
            return;
        }
    } catch (IOException e) {
        logger.error("Error while reading the hash input source");
        context.yield();
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) FuzzyHashMatcher(org.apache.nifi.processors.cybersecurity.matchers.FuzzyHashMatcher) TLSHHashMatcher(org.apache.nifi.processors.cybersecurity.matchers.TLSHHashMatcher) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) BufferedReader(java.io.BufferedReader) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) SSDeepHashMatcher(org.apache.nifi.processors.cybersecurity.matchers.SSDeepHashMatcher) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 57 with FlowFile

use of org.apache.nifi.flowfile.FlowFile in project nifi by apache.

the class GetAzureEventHub method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final BlockingQueue<String> partitionIds = this.partitionNames;
    final String partitionId = partitionIds.poll();
    if (partitionId == null) {
        getLogger().debug("No partitions available");
        return;
    }
    final StopWatch stopWatch = new StopWatch(true);
    try {
        final Iterable<EventData> receivedEvents = receiveEvents(context, partitionId);
        if (receivedEvents == null) {
            return;
        }
        for (final EventData eventData : receivedEvents) {
            if (null != eventData) {
                final Map<String, String> attributes = new HashMap<>();
                FlowFile flowFile = session.create();
                final EventData.SystemProperties systemProperties = eventData.getSystemProperties();
                if (null != systemProperties) {
                    attributes.put("eventhub.enqueued.timestamp", String.valueOf(systemProperties.getEnqueuedTime()));
                    attributes.put("eventhub.offset", systemProperties.getOffset());
                    attributes.put("eventhub.sequence", String.valueOf(systemProperties.getSequenceNumber()));
                }
                attributes.put("eventhub.name", context.getProperty(EVENT_HUB_NAME).getValue());
                attributes.put("eventhub.partition", partitionId);
                flowFile = session.putAllAttributes(flowFile, attributes);
                flowFile = session.write(flowFile, out -> {
                    out.write(eventData.getBytes());
                });
                session.transfer(flowFile, REL_SUCCESS);
                final String namespace = context.getProperty(NAMESPACE).getValue();
                final String eventHubName = context.getProperty(EVENT_HUB_NAME).getValue();
                final String consumerGroup = context.getProperty(CONSUMER_GROUP).getValue();
                final String serviceBusEndPoint = context.getProperty(SERVICE_BUS_ENDPOINT).getValue();
                final String transitUri = "amqps://" + namespace + serviceBusEndPoint + "/" + eventHubName + "/ConsumerGroups/" + consumerGroup + "/Partitions/" + partitionId;
                session.getProvenanceReporter().receive(flowFile, transitUri, stopWatch.getElapsed(TimeUnit.MILLISECONDS));
            }
        }
    } finally {
        partitionIds.offer(partitionId);
    }
}
Also used : StandardValidators(org.apache.nifi.processor.util.StandardValidators) CapabilityDescription(org.apache.nifi.annotation.documentation.CapabilityDescription) URISyntaxException(java.net.URISyntaxException) HashMap(java.util.HashMap) PropertyDescriptor(org.apache.nifi.components.PropertyDescriptor) ProcessException(org.apache.nifi.processor.exception.ProcessException) ArrayList(java.util.ArrayList) ConcurrentMap(java.util.concurrent.ConcurrentMap) HashSet(java.util.HashSet) WritesAttributes(org.apache.nifi.annotation.behavior.WritesAttributes) Relationship(org.apache.nifi.processor.Relationship) Duration(java.time.Duration) Map(java.util.Map) ServiceBusException(com.microsoft.azure.servicebus.ServiceBusException) Requirement(org.apache.nifi.annotation.behavior.InputRequirement.Requirement) URI(java.net.URI) ConnectionStringBuilder(com.microsoft.azure.servicebus.ConnectionStringBuilder) FlowFile(org.apache.nifi.flowfile.FlowFile) PartitionReceiver(com.microsoft.azure.eventhubs.PartitionReceiver) ProcessContext(org.apache.nifi.processor.ProcessContext) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) ProcessSession(org.apache.nifi.processor.ProcessSession) IOException(java.io.IOException) BlockingQueue(java.util.concurrent.BlockingQueue) WritesAttribute(org.apache.nifi.annotation.behavior.WritesAttribute) EventData(com.microsoft.azure.eventhubs.EventData) Instant(java.time.Instant) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) InputRequirement(org.apache.nifi.annotation.behavior.InputRequirement) OnScheduled(org.apache.nifi.annotation.lifecycle.OnScheduled) List(java.util.List) EventHubClient(com.microsoft.azure.eventhubs.EventHubClient) StopWatch(org.apache.nifi.util.StopWatch) AbstractProcessor(org.apache.nifi.processor.AbstractProcessor) Tags(org.apache.nifi.annotation.documentation.Tags) OnStopped(org.apache.nifi.annotation.lifecycle.OnStopped) Collections(java.util.Collections) FlowFile(org.apache.nifi.flowfile.FlowFile) HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) EventData(com.microsoft.azure.eventhubs.EventData) StopWatch(org.apache.nifi.util.StopWatch)

Example 58 with FlowFile

use of org.apache.nifi.flowfile.FlowFile in project nifi by apache.

the class FetchAzureBlobStorage method onTrigger.

@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final long startNanos = System.nanoTime();
    String containerName = context.getProperty(AzureStorageUtils.CONTAINER).evaluateAttributeExpressions(flowFile).getValue();
    String blobPath = context.getProperty(BLOB).evaluateAttributeExpressions(flowFile).getValue();
    AtomicReference<Exception> storedException = new AtomicReference<>();
    try {
        CloudBlobClient blobClient = AzureStorageUtils.createCloudBlobClient(context, getLogger(), flowFile);
        CloudBlobContainer container = blobClient.getContainerReference(containerName);
        final Map<String, String> attributes = new HashMap<>();
        final CloudBlob blob = container.getBlockBlobReference(blobPath);
        // TODO - we may be able do fancier things with ranges and
        // distribution of download over threads, investigate
        flowFile = session.write(flowFile, os -> {
            try {
                blob.download(os);
            } catch (StorageException e) {
                storedException.set(e);
                throw new IOException(e);
            }
        });
        long length = blob.getProperties().getLength();
        attributes.put("azure.length", String.valueOf(length));
        if (!attributes.isEmpty()) {
            flowFile = session.putAllAttributes(flowFile, attributes);
        }
        session.transfer(flowFile, REL_SUCCESS);
        final long transferMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos);
        session.getProvenanceReporter().fetch(flowFile, blob.getSnapshotQualifiedUri().toString(), transferMillis);
    } catch (IllegalArgumentException | URISyntaxException | StorageException | ProcessException e) {
        if (e instanceof ProcessException && storedException.get() == null) {
            throw (ProcessException) e;
        } else {
            Exception failureException = Optional.ofNullable(storedException.get()).orElse(e);
            getLogger().error("Failure to fetch Azure blob {}", new Object[] { blobPath }, failureException);
            flowFile = session.penalize(flowFile);
            session.transfer(flowFile, REL_FAILURE);
        }
    }
}
Also used : CapabilityDescription(org.apache.nifi.annotation.documentation.CapabilityDescription) FlowFile(org.apache.nifi.flowfile.FlowFile) URISyntaxException(java.net.URISyntaxException) CloudBlobClient(com.microsoft.azure.storage.blob.CloudBlobClient) ProcessContext(org.apache.nifi.processor.ProcessContext) IOException(java.io.IOException) HashMap(java.util.HashMap) ProcessSession(org.apache.nifi.processor.ProcessSession) WritesAttribute(org.apache.nifi.annotation.behavior.WritesAttribute) SeeAlso(org.apache.nifi.annotation.documentation.SeeAlso) AtomicReference(java.util.concurrent.atomic.AtomicReference) ProcessException(org.apache.nifi.processor.exception.ProcessException) TimeUnit(java.util.concurrent.TimeUnit) StorageException(com.microsoft.azure.storage.StorageException) InputRequirement(org.apache.nifi.annotation.behavior.InputRequirement) WritesAttributes(org.apache.nifi.annotation.behavior.WritesAttributes) CloudBlobContainer(com.microsoft.azure.storage.blob.CloudBlobContainer) Map(java.util.Map) Requirement(org.apache.nifi.annotation.behavior.InputRequirement.Requirement) Optional(java.util.Optional) Tags(org.apache.nifi.annotation.documentation.Tags) AbstractAzureBlobProcessor(org.apache.nifi.processors.azure.AbstractAzureBlobProcessor) AzureStorageUtils(org.apache.nifi.processors.azure.storage.utils.AzureStorageUtils) CloudBlob(com.microsoft.azure.storage.blob.CloudBlob) FlowFile(org.apache.nifi.flowfile.FlowFile) CloudBlobClient(com.microsoft.azure.storage.blob.CloudBlobClient) HashMap(java.util.HashMap) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) URISyntaxException(java.net.URISyntaxException) URISyntaxException(java.net.URISyntaxException) IOException(java.io.IOException) ProcessException(org.apache.nifi.processor.exception.ProcessException) StorageException(com.microsoft.azure.storage.StorageException) CloudBlob(com.microsoft.azure.storage.blob.CloudBlob) ProcessException(org.apache.nifi.processor.exception.ProcessException) CloudBlobContainer(com.microsoft.azure.storage.blob.CloudBlobContainer) StorageException(com.microsoft.azure.storage.StorageException)

Example 59 with FlowFile

use of org.apache.nifi.flowfile.FlowFile in project nifi by apache.

the class PutAzureBlobStorage method onTrigger.

public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final long startNanos = System.nanoTime();
    String containerName = context.getProperty(AzureStorageUtils.CONTAINER).evaluateAttributeExpressions(flowFile).getValue();
    String blobPath = context.getProperty(BLOB).evaluateAttributeExpressions(flowFile).getValue();
    AtomicReference<Exception> storedException = new AtomicReference<>();
    try {
        CloudBlobClient blobClient = AzureStorageUtils.createCloudBlobClient(context, getLogger(), flowFile);
        CloudBlobContainer container = blobClient.getContainerReference(containerName);
        CloudBlob blob = container.getBlockBlobReference(blobPath);
        final Map<String, String> attributes = new HashMap<>();
        long length = flowFile.getSize();
        session.read(flowFile, rawIn -> {
            InputStream in = rawIn;
            if (!(in instanceof BufferedInputStream)) {
                // do not double-wrap
                in = new BufferedInputStream(rawIn);
            }
            try {
                blob.upload(in, length);
                BlobProperties properties = blob.getProperties();
                attributes.put("azure.container", containerName);
                attributes.put("azure.primaryUri", blob.getSnapshotQualifiedUri().toString());
                attributes.put("azure.etag", properties.getEtag());
                attributes.put("azure.length", String.valueOf(length));
                attributes.put("azure.timestamp", String.valueOf(properties.getLastModified()));
            } catch (StorageException | URISyntaxException e) {
                storedException.set(e);
                throw new IOException(e);
            }
        });
        if (!attributes.isEmpty()) {
            flowFile = session.putAllAttributes(flowFile, attributes);
        }
        session.transfer(flowFile, REL_SUCCESS);
        final long transferMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos);
        session.getProvenanceReporter().send(flowFile, blob.getSnapshotQualifiedUri().toString(), transferMillis);
    } catch (IllegalArgumentException | URISyntaxException | StorageException | ProcessException e) {
        if (e instanceof ProcessException && storedException.get() == null) {
            throw (ProcessException) e;
        } else {
            Exception failureException = Optional.ofNullable(storedException.get()).orElse(e);
            getLogger().error("Failed to put Azure blob {}", new Object[] { blobPath }, failureException);
            flowFile = session.penalize(flowFile);
            session.transfer(flowFile, REL_FAILURE);
        }
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) CloudBlobClient(com.microsoft.azure.storage.blob.CloudBlobClient) HashMap(java.util.HashMap) BufferedInputStream(java.io.BufferedInputStream) InputStream(java.io.InputStream) AtomicReference(java.util.concurrent.atomic.AtomicReference) URISyntaxException(java.net.URISyntaxException) IOException(java.io.IOException) URISyntaxException(java.net.URISyntaxException) ProcessException(org.apache.nifi.processor.exception.ProcessException) StorageException(com.microsoft.azure.storage.StorageException) IOException(java.io.IOException) CloudBlob(com.microsoft.azure.storage.blob.CloudBlob) ProcessException(org.apache.nifi.processor.exception.ProcessException) BufferedInputStream(java.io.BufferedInputStream) BlobProperties(com.microsoft.azure.storage.blob.BlobProperties) CloudBlobContainer(com.microsoft.azure.storage.blob.CloudBlobContainer) StorageException(com.microsoft.azure.storage.StorageException)

Example 60 with FlowFile

use of org.apache.nifi.flowfile.FlowFile in project nifi by apache.

the class ConvertAvroToJSON method onTrigger.

@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final String containerOption = context.getProperty(CONTAINER_OPTIONS).getValue();
    final boolean useContainer = containerOption.equals(CONTAINER_ARRAY);
    // Wrap a single record (inclusive of no records) only when a container is being used
    final boolean wrapSingleRecord = context.getProperty(WRAP_SINGLE_RECORD).asBoolean() && useContainer;
    final String stringSchema = context.getProperty(SCHEMA).getValue();
    final boolean schemaLess = stringSchema != null;
    try {
        flowFile = session.write(flowFile, new StreamCallback() {

            @Override
            public void process(final InputStream rawIn, final OutputStream rawOut) throws IOException {
                final GenericData genericData = GenericData.get();
                if (schemaLess) {
                    if (schema == null) {
                        schema = new Schema.Parser().parse(stringSchema);
                    }
                    try (final InputStream in = new BufferedInputStream(rawIn);
                        final OutputStream out = new BufferedOutputStream(rawOut)) {
                        final DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(schema);
                        final BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(in, null);
                        final GenericRecord record = reader.read(null, decoder);
                        // need to be true before we wrap it with an array
                        if (useContainer && wrapSingleRecord) {
                            out.write('[');
                        }
                        final byte[] outputBytes = (record == null) ? EMPTY_JSON_OBJECT : genericData.toString(record).getBytes(StandardCharsets.UTF_8);
                        out.write(outputBytes);
                        if (useContainer && wrapSingleRecord) {
                            out.write(']');
                        }
                    }
                } else {
                    try (final InputStream in = new BufferedInputStream(rawIn);
                        final OutputStream out = new BufferedOutputStream(rawOut);
                        final DataFileStream<GenericRecord> reader = new DataFileStream<>(in, new GenericDatumReader<GenericRecord>())) {
                        int recordCount = 0;
                        GenericRecord currRecord = null;
                        if (reader.hasNext()) {
                            currRecord = reader.next();
                            recordCount++;
                        }
                        // if configured to wrap single record
                        if (reader.hasNext() && useContainer || wrapSingleRecord) {
                            out.write('[');
                        }
                        // Determine the initial output record, inclusive if we should have an empty set of Avro records
                        final byte[] outputBytes = (currRecord == null) ? EMPTY_JSON_OBJECT : genericData.toString(currRecord).getBytes(StandardCharsets.UTF_8);
                        out.write(outputBytes);
                        while (reader.hasNext()) {
                            if (useContainer) {
                                out.write(',');
                            } else {
                                out.write('\n');
                            }
                            currRecord = reader.next(currRecord);
                            out.write(genericData.toString(currRecord).getBytes(StandardCharsets.UTF_8));
                            recordCount++;
                        }
                        // configured to wrap a single record
                        if (recordCount > 1 && useContainer || wrapSingleRecord) {
                            out.write(']');
                        }
                    }
                }
            }
        });
    } catch (final ProcessException pe) {
        getLogger().error("Failed to convert {} from Avro to JSON due to {}; transferring to failure", new Object[] { flowFile, pe });
        session.transfer(flowFile, REL_FAILURE);
        return;
    }
    flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), "application/json");
    session.transfer(flowFile, REL_SUCCESS);
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) BufferedInputStream(java.io.BufferedInputStream) InputStream(java.io.InputStream) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) BufferedOutputStream(java.io.BufferedOutputStream) OutputStream(java.io.OutputStream) DataFileStream(org.apache.avro.file.DataFileStream) GenericData(org.apache.avro.generic.GenericData) StreamCallback(org.apache.nifi.processor.io.StreamCallback) BinaryDecoder(org.apache.avro.io.BinaryDecoder) ProcessException(org.apache.nifi.processor.exception.ProcessException) BufferedInputStream(java.io.BufferedInputStream) GenericRecord(org.apache.avro.generic.GenericRecord) BufferedOutputStream(java.io.BufferedOutputStream)

Aggregations

FlowFile (org.apache.nifi.flowfile.FlowFile)500 IOException (java.io.IOException)236 ProcessException (org.apache.nifi.processor.exception.ProcessException)193 HashMap (java.util.HashMap)160 InputStream (java.io.InputStream)145 OutputStream (java.io.OutputStream)131 ComponentLog (org.apache.nifi.logging.ComponentLog)119 Test (org.junit.Test)116 ArrayList (java.util.ArrayList)113 Map (java.util.Map)105 MockFlowFile (org.apache.nifi.util.MockFlowFile)103 ProcessSession (org.apache.nifi.processor.ProcessSession)99 OutputStreamCallback (org.apache.nifi.processor.io.OutputStreamCallback)83 Relationship (org.apache.nifi.processor.Relationship)78 InputStreamCallback (org.apache.nifi.processor.io.InputStreamCallback)78 HashSet (java.util.HashSet)75 List (java.util.List)67 StopWatch (org.apache.nifi.util.StopWatch)59 Set (java.util.Set)56 PropertyDescriptor (org.apache.nifi.components.PropertyDescriptor)55