use of org.apache.nifi.flowfile.FlowFile in project nifi by apache.
the class CompareFuzzyHash method onTrigger.
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final ComponentLog logger = getLogger();
String algorithm = context.getProperty(HASH_ALGORITHM).getValue();
final String attributeName = context.getProperty(ATTRIBUTE_NAME).getValue();
String inputHash = flowFile.getAttribute(attributeName);
if (inputHash == null) {
getLogger().info("FlowFile {} lacks the required '{}' attribute, routing to failure.", new Object[] { flowFile, attributeName });
session.transfer(flowFile, REL_FAILURE);
return;
}
FuzzyHashMatcher fuzzyHashMatcher = null;
switch(algorithm) {
case tlsh:
fuzzyHashMatcher = new TLSHHashMatcher(getLogger());
break;
case ssdeep:
fuzzyHashMatcher = new SSDeepHashMatcher(getLogger());
break;
default:
getLogger().error("Seems like the processor is configured to use unsupported algorithm '{}' ? Yielding.", new Object[] { algorithm });
context.yield();
return;
}
if (fuzzyHashMatcher.isValidHash(inputHash) == false) {
// and if that is the case we log
logger.error("Invalid hash provided. Sending to failure");
// and send to failure
session.transfer(flowFile, REL_FAILURE);
session.commit();
return;
}
double similarity = 0;
double matchThreshold = context.getProperty(MATCH_THRESHOLD).asDouble();
try {
Map<String, Double> matched = new ConcurrentHashMap<String, Double>();
BufferedReader reader = fuzzyHashMatcher.getReader(context.getProperty(HASH_LIST_FILE).getValue());
String line = null;
iterateFile: while ((line = reader.readLine()) != null) {
if (line != null) {
similarity = fuzzyHashMatcher.getSimilarity(inputHash, line);
if (fuzzyHashMatcher.matchExceedsThreshold(similarity, matchThreshold)) {
String match = fuzzyHashMatcher.getMatch(line);
// Because this would simply look odd, we ignore such entry and log
if (!StringUtils.isEmpty(match)) {
matched.put(match, similarity);
} else {
logger.error("Found a match against a malformed entry '{}'. Please inspect the contents of" + "the {} file and ensure they are properly formatted", new Object[] { line, HASH_LIST_FILE.getDisplayName() });
}
}
}
// Check if single match is desired and if a match has been made
if (context.getProperty(MATCHING_MODE).getValue() == singleMatch.getValue() && (matched.size() > 0)) {
// and save time by breaking the outer loop
break iterateFile;
}
}
// no matter if the break was called or not, Continue processing
// First by creating a new map to hold attributes
Map<String, String> attributes = new ConcurrentHashMap<String, String>();
// Then by iterating over the hashmap of matches
if (matched.size() > 0) {
int x = 0;
for (Map.Entry<String, Double> entry : matched.entrySet()) {
// defining attributes accordingly
attributes.put(attributeName + "." + x + ".match", entry.getKey());
attributes.put(attributeName + "." + x + ".similarity", String.valueOf(entry.getValue()));
x++;
}
// Finally, append the attributes to the flowfile and sent to match
flowFile = session.putAllAttributes(flowFile, attributes);
session.transfer(flowFile, REL_FOUND);
session.commit();
return;
} else {
// Otherwise send it to non-match
session.transfer(flowFile, REL_NOT_FOUND);
session.commit();
return;
}
} catch (IOException e) {
logger.error("Error while reading the hash input source");
context.yield();
}
}
use of org.apache.nifi.flowfile.FlowFile in project nifi by apache.
the class GetAzureEventHub method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
final BlockingQueue<String> partitionIds = this.partitionNames;
final String partitionId = partitionIds.poll();
if (partitionId == null) {
getLogger().debug("No partitions available");
return;
}
final StopWatch stopWatch = new StopWatch(true);
try {
final Iterable<EventData> receivedEvents = receiveEvents(context, partitionId);
if (receivedEvents == null) {
return;
}
for (final EventData eventData : receivedEvents) {
if (null != eventData) {
final Map<String, String> attributes = new HashMap<>();
FlowFile flowFile = session.create();
final EventData.SystemProperties systemProperties = eventData.getSystemProperties();
if (null != systemProperties) {
attributes.put("eventhub.enqueued.timestamp", String.valueOf(systemProperties.getEnqueuedTime()));
attributes.put("eventhub.offset", systemProperties.getOffset());
attributes.put("eventhub.sequence", String.valueOf(systemProperties.getSequenceNumber()));
}
attributes.put("eventhub.name", context.getProperty(EVENT_HUB_NAME).getValue());
attributes.put("eventhub.partition", partitionId);
flowFile = session.putAllAttributes(flowFile, attributes);
flowFile = session.write(flowFile, out -> {
out.write(eventData.getBytes());
});
session.transfer(flowFile, REL_SUCCESS);
final String namespace = context.getProperty(NAMESPACE).getValue();
final String eventHubName = context.getProperty(EVENT_HUB_NAME).getValue();
final String consumerGroup = context.getProperty(CONSUMER_GROUP).getValue();
final String serviceBusEndPoint = context.getProperty(SERVICE_BUS_ENDPOINT).getValue();
final String transitUri = "amqps://" + namespace + serviceBusEndPoint + "/" + eventHubName + "/ConsumerGroups/" + consumerGroup + "/Partitions/" + partitionId;
session.getProvenanceReporter().receive(flowFile, transitUri, stopWatch.getElapsed(TimeUnit.MILLISECONDS));
}
}
} finally {
partitionIds.offer(partitionId);
}
}
use of org.apache.nifi.flowfile.FlowFile in project nifi by apache.
the class FetchAzureBlobStorage method onTrigger.
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final long startNanos = System.nanoTime();
String containerName = context.getProperty(AzureStorageUtils.CONTAINER).evaluateAttributeExpressions(flowFile).getValue();
String blobPath = context.getProperty(BLOB).evaluateAttributeExpressions(flowFile).getValue();
AtomicReference<Exception> storedException = new AtomicReference<>();
try {
CloudBlobClient blobClient = AzureStorageUtils.createCloudBlobClient(context, getLogger(), flowFile);
CloudBlobContainer container = blobClient.getContainerReference(containerName);
final Map<String, String> attributes = new HashMap<>();
final CloudBlob blob = container.getBlockBlobReference(blobPath);
// TODO - we may be able do fancier things with ranges and
// distribution of download over threads, investigate
flowFile = session.write(flowFile, os -> {
try {
blob.download(os);
} catch (StorageException e) {
storedException.set(e);
throw new IOException(e);
}
});
long length = blob.getProperties().getLength();
attributes.put("azure.length", String.valueOf(length));
if (!attributes.isEmpty()) {
flowFile = session.putAllAttributes(flowFile, attributes);
}
session.transfer(flowFile, REL_SUCCESS);
final long transferMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos);
session.getProvenanceReporter().fetch(flowFile, blob.getSnapshotQualifiedUri().toString(), transferMillis);
} catch (IllegalArgumentException | URISyntaxException | StorageException | ProcessException e) {
if (e instanceof ProcessException && storedException.get() == null) {
throw (ProcessException) e;
} else {
Exception failureException = Optional.ofNullable(storedException.get()).orElse(e);
getLogger().error("Failure to fetch Azure blob {}", new Object[] { blobPath }, failureException);
flowFile = session.penalize(flowFile);
session.transfer(flowFile, REL_FAILURE);
}
}
}
use of org.apache.nifi.flowfile.FlowFile in project nifi by apache.
the class PutAzureBlobStorage method onTrigger.
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final long startNanos = System.nanoTime();
String containerName = context.getProperty(AzureStorageUtils.CONTAINER).evaluateAttributeExpressions(flowFile).getValue();
String blobPath = context.getProperty(BLOB).evaluateAttributeExpressions(flowFile).getValue();
AtomicReference<Exception> storedException = new AtomicReference<>();
try {
CloudBlobClient blobClient = AzureStorageUtils.createCloudBlobClient(context, getLogger(), flowFile);
CloudBlobContainer container = blobClient.getContainerReference(containerName);
CloudBlob blob = container.getBlockBlobReference(blobPath);
final Map<String, String> attributes = new HashMap<>();
long length = flowFile.getSize();
session.read(flowFile, rawIn -> {
InputStream in = rawIn;
if (!(in instanceof BufferedInputStream)) {
// do not double-wrap
in = new BufferedInputStream(rawIn);
}
try {
blob.upload(in, length);
BlobProperties properties = blob.getProperties();
attributes.put("azure.container", containerName);
attributes.put("azure.primaryUri", blob.getSnapshotQualifiedUri().toString());
attributes.put("azure.etag", properties.getEtag());
attributes.put("azure.length", String.valueOf(length));
attributes.put("azure.timestamp", String.valueOf(properties.getLastModified()));
} catch (StorageException | URISyntaxException e) {
storedException.set(e);
throw new IOException(e);
}
});
if (!attributes.isEmpty()) {
flowFile = session.putAllAttributes(flowFile, attributes);
}
session.transfer(flowFile, REL_SUCCESS);
final long transferMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos);
session.getProvenanceReporter().send(flowFile, blob.getSnapshotQualifiedUri().toString(), transferMillis);
} catch (IllegalArgumentException | URISyntaxException | StorageException | ProcessException e) {
if (e instanceof ProcessException && storedException.get() == null) {
throw (ProcessException) e;
} else {
Exception failureException = Optional.ofNullable(storedException.get()).orElse(e);
getLogger().error("Failed to put Azure blob {}", new Object[] { blobPath }, failureException);
flowFile = session.penalize(flowFile);
session.transfer(flowFile, REL_FAILURE);
}
}
}
use of org.apache.nifi.flowfile.FlowFile in project nifi by apache.
the class ConvertAvroToJSON method onTrigger.
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final String containerOption = context.getProperty(CONTAINER_OPTIONS).getValue();
final boolean useContainer = containerOption.equals(CONTAINER_ARRAY);
// Wrap a single record (inclusive of no records) only when a container is being used
final boolean wrapSingleRecord = context.getProperty(WRAP_SINGLE_RECORD).asBoolean() && useContainer;
final String stringSchema = context.getProperty(SCHEMA).getValue();
final boolean schemaLess = stringSchema != null;
try {
flowFile = session.write(flowFile, new StreamCallback() {
@Override
public void process(final InputStream rawIn, final OutputStream rawOut) throws IOException {
final GenericData genericData = GenericData.get();
if (schemaLess) {
if (schema == null) {
schema = new Schema.Parser().parse(stringSchema);
}
try (final InputStream in = new BufferedInputStream(rawIn);
final OutputStream out = new BufferedOutputStream(rawOut)) {
final DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(schema);
final BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(in, null);
final GenericRecord record = reader.read(null, decoder);
// need to be true before we wrap it with an array
if (useContainer && wrapSingleRecord) {
out.write('[');
}
final byte[] outputBytes = (record == null) ? EMPTY_JSON_OBJECT : genericData.toString(record).getBytes(StandardCharsets.UTF_8);
out.write(outputBytes);
if (useContainer && wrapSingleRecord) {
out.write(']');
}
}
} else {
try (final InputStream in = new BufferedInputStream(rawIn);
final OutputStream out = new BufferedOutputStream(rawOut);
final DataFileStream<GenericRecord> reader = new DataFileStream<>(in, new GenericDatumReader<GenericRecord>())) {
int recordCount = 0;
GenericRecord currRecord = null;
if (reader.hasNext()) {
currRecord = reader.next();
recordCount++;
}
// if configured to wrap single record
if (reader.hasNext() && useContainer || wrapSingleRecord) {
out.write('[');
}
// Determine the initial output record, inclusive if we should have an empty set of Avro records
final byte[] outputBytes = (currRecord == null) ? EMPTY_JSON_OBJECT : genericData.toString(currRecord).getBytes(StandardCharsets.UTF_8);
out.write(outputBytes);
while (reader.hasNext()) {
if (useContainer) {
out.write(',');
} else {
out.write('\n');
}
currRecord = reader.next(currRecord);
out.write(genericData.toString(currRecord).getBytes(StandardCharsets.UTF_8));
recordCount++;
}
// configured to wrap a single record
if (recordCount > 1 && useContainer || wrapSingleRecord) {
out.write(']');
}
}
}
}
});
} catch (final ProcessException pe) {
getLogger().error("Failed to convert {} from Avro to JSON due to {}; transferring to failure", new Object[] { flowFile, pe });
session.transfer(flowFile, REL_FAILURE);
return;
}
flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), "application/json");
session.transfer(flowFile, REL_SUCCESS);
}
Aggregations