use of org.apache.nifi.logging.ComponentLog in project nifi by apache.
the class CompareFuzzyHash method onTrigger.
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final ComponentLog logger = getLogger();
String algorithm = context.getProperty(HASH_ALGORITHM).getValue();
final String attributeName = context.getProperty(ATTRIBUTE_NAME).getValue();
String inputHash = flowFile.getAttribute(attributeName);
if (inputHash == null) {
getLogger().info("FlowFile {} lacks the required '{}' attribute, routing to failure.", new Object[] { flowFile, attributeName });
session.transfer(flowFile, REL_FAILURE);
return;
}
FuzzyHashMatcher fuzzyHashMatcher = null;
switch(algorithm) {
case tlsh:
fuzzyHashMatcher = new TLSHHashMatcher(getLogger());
break;
case ssdeep:
fuzzyHashMatcher = new SSDeepHashMatcher(getLogger());
break;
default:
getLogger().error("Seems like the processor is configured to use unsupported algorithm '{}' ? Yielding.", new Object[] { algorithm });
context.yield();
return;
}
if (fuzzyHashMatcher.isValidHash(inputHash) == false) {
// and if that is the case we log
logger.error("Invalid hash provided. Sending to failure");
// and send to failure
session.transfer(flowFile, REL_FAILURE);
session.commit();
return;
}
double similarity = 0;
double matchThreshold = context.getProperty(MATCH_THRESHOLD).asDouble();
try {
Map<String, Double> matched = new ConcurrentHashMap<String, Double>();
BufferedReader reader = fuzzyHashMatcher.getReader(context.getProperty(HASH_LIST_FILE).getValue());
String line = null;
iterateFile: while ((line = reader.readLine()) != null) {
if (line != null) {
similarity = fuzzyHashMatcher.getSimilarity(inputHash, line);
if (fuzzyHashMatcher.matchExceedsThreshold(similarity, matchThreshold)) {
String match = fuzzyHashMatcher.getMatch(line);
// Because this would simply look odd, we ignore such entry and log
if (!StringUtils.isEmpty(match)) {
matched.put(match, similarity);
} else {
logger.error("Found a match against a malformed entry '{}'. Please inspect the contents of" + "the {} file and ensure they are properly formatted", new Object[] { line, HASH_LIST_FILE.getDisplayName() });
}
}
}
// Check if single match is desired and if a match has been made
if (context.getProperty(MATCHING_MODE).getValue() == singleMatch.getValue() && (matched.size() > 0)) {
// and save time by breaking the outer loop
break iterateFile;
}
}
// no matter if the break was called or not, Continue processing
// First by creating a new map to hold attributes
Map<String, String> attributes = new ConcurrentHashMap<String, String>();
// Then by iterating over the hashmap of matches
if (matched.size() > 0) {
int x = 0;
for (Map.Entry<String, Double> entry : matched.entrySet()) {
// defining attributes accordingly
attributes.put(attributeName + "." + x + ".match", entry.getKey());
attributes.put(attributeName + "." + x + ".similarity", String.valueOf(entry.getValue()));
x++;
}
// Finally, append the attributes to the flowfile and sent to match
flowFile = session.putAllAttributes(flowFile, attributes);
session.transfer(flowFile, REL_FOUND);
session.commit();
return;
} else {
// Otherwise send it to non-match
session.transfer(flowFile, REL_NOT_FOUND);
session.commit();
return;
}
} catch (IOException e) {
logger.error("Error while reading the hash input source");
context.yield();
}
}
use of org.apache.nifi.logging.ComponentLog in project nifi by apache.
the class PutDruidRecord method processFlowFile.
/**
* Parses the record(s), converts each to a Map, and sends via Tranquility to the Druid Indexing Service
*
* @param context The process context
* @param session The process session
*/
@SuppressWarnings("unchecked")
private void processFlowFile(ProcessContext context, final ProcessSession session) {
final ComponentLog log = getLogger();
// Get handle on Druid Tranquility session
DruidTranquilityService tranquilityController = context.getProperty(DRUID_TRANQUILITY_SERVICE).asControllerService(DruidTranquilityService.class);
Tranquilizer<Map<String, Object>> tranquilizer = tranquilityController.getTranquilizer();
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
// Create the outgoing flow files and output streams
FlowFile droppedFlowFile = session.create(flowFile);
final AtomicInteger droppedFlowFileCount = new AtomicInteger(0);
FlowFile failedFlowFile = session.create(flowFile);
final AtomicInteger failedFlowFileCount = new AtomicInteger(0);
FlowFile successfulFlowFile = session.create(flowFile);
final AtomicInteger successfulFlowFileCount = new AtomicInteger(0);
final AtomicInteger recordWriteErrors = new AtomicInteger(0);
int recordCount = 0;
final OutputStream droppedOutputStream = session.write(droppedFlowFile);
final RecordSetWriter droppedRecordWriter;
final OutputStream failedOutputStream = session.write(failedFlowFile);
final RecordSetWriter failedRecordWriter;
final OutputStream successfulOutputStream = session.write(successfulFlowFile);
final RecordSetWriter successfulRecordWriter;
try (final InputStream in = session.read(flowFile)) {
final RecordReaderFactory recordParserFactory = context.getProperty(RECORD_READER_FACTORY).asControllerService(RecordReaderFactory.class);
final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER_FACTORY).asControllerService(RecordSetWriterFactory.class);
final Map<String, String> attributes = flowFile.getAttributes();
final RecordReader reader = recordParserFactory.createRecordReader(flowFile, in, getLogger());
final RecordSchema outSchema = writerFactory.getSchema(attributes, reader.getSchema());
droppedRecordWriter = writerFactory.createWriter(log, outSchema, droppedOutputStream);
droppedRecordWriter.beginRecordSet();
failedRecordWriter = writerFactory.createWriter(log, outSchema, failedOutputStream);
failedRecordWriter.beginRecordSet();
successfulRecordWriter = writerFactory.createWriter(log, outSchema, successfulOutputStream);
successfulRecordWriter.beginRecordSet();
Record r;
while ((r = reader.nextRecord()) != null) {
final Record record = r;
recordCount++;
// Convert each Record to HashMap and send to Druid
Map<String, Object> contentMap = (Map<String, Object>) DataTypeUtils.convertRecordFieldtoObject(r, RecordFieldType.RECORD.getRecordDataType(r.getSchema()));
log.debug("Tranquilizer Status: {}", new Object[] { tranquilizer.status().toString() });
// Send data element to Druid asynchronously
Future<BoxedUnit> future = tranquilizer.send(contentMap);
log.debug("Sent Payload to Druid: {}", new Object[] { contentMap });
// Wait for Druid to call back with status
future.addEventListener(new FutureEventListener<Object>() {
@Override
public void onFailure(Throwable cause) {
if (cause instanceof MessageDroppedException) {
// This happens when event timestamp targets a Druid Indexing task that has closed (Late Arriving Data)
log.debug("Record Dropped due to MessageDroppedException: {}, transferring record to dropped.", new Object[] { cause.getMessage() }, cause);
try {
synchronized (droppedRecordWriter) {
droppedRecordWriter.write(record);
droppedRecordWriter.flush();
droppedFlowFileCount.incrementAndGet();
}
} catch (final IOException ioe) {
log.error("Error transferring record to dropped, this may result in data loss.", new Object[] { ioe.getMessage() }, ioe);
recordWriteErrors.incrementAndGet();
}
} else {
log.error("FlowFile Processing Failed due to: {}", new Object[] { cause.getMessage() }, cause);
try {
synchronized (failedRecordWriter) {
failedRecordWriter.write(record);
failedRecordWriter.flush();
failedFlowFileCount.incrementAndGet();
}
} catch (final IOException ioe) {
log.error("Error transferring record to failure, this may result in data loss.", new Object[] { ioe.getMessage() }, ioe);
recordWriteErrors.incrementAndGet();
}
}
}
@Override
public void onSuccess(Object value) {
log.debug(" FlowFile Processing Success: {}", new Object[] { value.toString() });
try {
synchronized (successfulRecordWriter) {
successfulRecordWriter.write(record);
successfulRecordWriter.flush();
successfulFlowFileCount.incrementAndGet();
}
} catch (final IOException ioe) {
log.error("Error transferring record to success, this may result in data loss. " + "However the record was successfully processed by Druid", new Object[] { ioe.getMessage() }, ioe);
recordWriteErrors.incrementAndGet();
}
}
});
}
} catch (IOException | SchemaNotFoundException | MalformedRecordException e) {
log.error("FlowFile Processing Failed due to: {}", new Object[] { e.getMessage() }, e);
// The FlowFile will be obtained and the error logged below, when calling publishResult.getFailedFlowFiles()
flowFile = session.putAttribute(flowFile, RECORD_COUNT, Integer.toString(recordCount));
session.transfer(flowFile, REL_FAILURE);
try {
droppedOutputStream.close();
session.remove(droppedFlowFile);
} catch (IOException ioe) {
log.error("Error closing output stream for FlowFile with dropped records.", ioe);
}
try {
failedOutputStream.close();
session.remove(failedFlowFile);
} catch (IOException ioe) {
log.error("Error closing output stream for FlowFile with failed records.", ioe);
}
try {
successfulOutputStream.close();
session.remove(successfulFlowFile);
} catch (IOException ioe) {
log.error("Error closing output stream for FlowFile with successful records.", ioe);
}
session.commit();
return;
}
if (recordCount == 0) {
// Send original (empty) flow file to success, remove the rest
flowFile = session.putAttribute(flowFile, RECORD_COUNT, "0");
session.transfer(flowFile, REL_SUCCESS);
try {
droppedOutputStream.close();
session.remove(droppedFlowFile);
} catch (IOException ioe) {
log.error("Error closing output stream for FlowFile with dropped records.", ioe);
}
try {
failedOutputStream.close();
session.remove(failedFlowFile);
} catch (IOException ioe) {
log.error("Error closing output stream for FlowFile with failed records.", ioe);
}
try {
successfulOutputStream.close();
session.remove(successfulFlowFile);
} catch (IOException ioe) {
log.error("Error closing output stream for FlowFile with successful records.", ioe);
}
} else {
// Wait for all the records to finish processing
while (recordCount != (droppedFlowFileCount.get() + failedFlowFileCount.get() + successfulFlowFileCount.get() + recordWriteErrors.get())) {
Thread.yield();
}
try {
droppedRecordWriter.finishRecordSet();
droppedRecordWriter.close();
} catch (IOException ioe) {
log.error("Error closing FlowFile with dropped records: {}", new Object[] { ioe.getMessage() }, ioe);
session.rollback();
throw new ProcessException(ioe);
}
if (droppedFlowFileCount.get() > 0) {
droppedFlowFile = session.putAttribute(droppedFlowFile, RECORD_COUNT, Integer.toString(droppedFlowFileCount.get()));
session.transfer(droppedFlowFile, REL_DROPPED);
} else {
session.remove(droppedFlowFile);
}
try {
failedRecordWriter.finishRecordSet();
failedRecordWriter.close();
} catch (IOException ioe) {
log.error("Error closing FlowFile with failed records: {}", new Object[] { ioe.getMessage() }, ioe);
session.rollback();
throw new ProcessException(ioe);
}
if (failedFlowFileCount.get() > 0) {
failedFlowFile = session.putAttribute(failedFlowFile, RECORD_COUNT, Integer.toString(failedFlowFileCount.get()));
session.transfer(failedFlowFile, REL_FAILURE);
} else {
session.remove(failedFlowFile);
}
try {
successfulRecordWriter.finishRecordSet();
successfulRecordWriter.close();
} catch (IOException ioe) {
log.error("Error closing FlowFile with successful records: {}", new Object[] { ioe.getMessage() }, ioe);
session.rollback();
throw new ProcessException(ioe);
}
if (successfulFlowFileCount.get() > 0) {
successfulFlowFile = session.putAttribute(successfulFlowFile, RECORD_COUNT, Integer.toString(successfulFlowFileCount.get()));
session.transfer(successfulFlowFile, REL_SUCCESS);
session.getProvenanceReporter().send(successfulFlowFile, tranquilityController.getTransitUri());
} else {
session.remove(successfulFlowFile);
}
session.remove(flowFile);
}
session.commit();
}
use of org.apache.nifi.logging.ComponentLog in project nifi by apache.
the class AbstractElasticsearch5TransportClientProcessor method createElasticsearchClient.
/**
* Instantiate ElasticSearch Client. This should be called by subclasses' @OnScheduled method to create a client
* if one does not yet exist. If called when scheduled, closeClient() should be called by the subclasses' @OnStopped
* method so the client will be destroyed when the processor is stopped.
*
* @param context The context for this processor
* @throws ProcessException if an error occurs while creating an Elasticsearch client
*/
@Override
protected void createElasticsearchClient(ProcessContext context) throws ProcessException {
ComponentLog log = getLogger();
if (esClient.get() != null) {
return;
}
log.debug("Creating ElasticSearch Client");
try {
final String clusterName = context.getProperty(CLUSTER_NAME).evaluateAttributeExpressions().getValue();
final String pingTimeout = context.getProperty(PING_TIMEOUT).evaluateAttributeExpressions().getValue();
final String samplerInterval = context.getProperty(SAMPLER_INTERVAL).evaluateAttributeExpressions().getValue();
final String username = context.getProperty(USERNAME).evaluateAttributeExpressions().getValue();
final String password = context.getProperty(PASSWORD).getValue();
final SSLContextService sslService = context.getProperty(PROP_SSL_CONTEXT_SERVICE).asControllerService(SSLContextService.class);
Settings.Builder settingsBuilder = Settings.builder().put("cluster.name", clusterName).put("client.transport.ping_timeout", pingTimeout).put("client.transport.nodes_sampler_interval", samplerInterval);
String xPackUrl = context.getProperty(PROP_XPACK_LOCATION).evaluateAttributeExpressions().getValue();
if (sslService != null) {
settingsBuilder.put("xpack.security.transport.ssl.enabled", "true");
if (!StringUtils.isEmpty(sslService.getKeyStoreFile())) {
settingsBuilder.put("xpack.ssl.keystore.path", sslService.getKeyStoreFile());
}
if (!StringUtils.isEmpty(sslService.getKeyStorePassword())) {
settingsBuilder.put("xpack.ssl.keystore.password", sslService.getKeyStorePassword());
}
if (!StringUtils.isEmpty(sslService.getKeyPassword())) {
settingsBuilder.put("xpack.ssl.keystore.key_password", sslService.getKeyPassword());
}
if (!StringUtils.isEmpty(sslService.getTrustStoreFile())) {
settingsBuilder.put("xpack.ssl.truststore.path", sslService.getTrustStoreFile());
}
if (!StringUtils.isEmpty(sslService.getTrustStorePassword())) {
settingsBuilder.put("xpack.ssl.truststore.password", sslService.getTrustStorePassword());
}
}
// Set username and password for X-Pack
if (!StringUtils.isEmpty(username)) {
StringBuffer secureUser = new StringBuffer(username);
if (!StringUtils.isEmpty(password)) {
secureUser.append(":");
secureUser.append(password);
}
settingsBuilder.put("xpack.security.user", secureUser);
}
final String hosts = context.getProperty(HOSTS).evaluateAttributeExpressions().getValue();
esHosts = getEsHosts(hosts);
Client transportClient = getTransportClient(settingsBuilder, xPackUrl, username, password, esHosts, log);
esClient.set(transportClient);
} catch (Exception e) {
log.error("Failed to create Elasticsearch client due to {}", new Object[] { e }, e);
throw new ProcessException(e);
}
}
use of org.apache.nifi.logging.ComponentLog in project nifi by apache.
the class FetchElasticsearch5 method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
synchronized (esClient) {
if (esClient.get() == null) {
super.setup(context);
}
}
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final String index = context.getProperty(INDEX).evaluateAttributeExpressions(flowFile).getValue();
final String docId = context.getProperty(DOC_ID).evaluateAttributeExpressions(flowFile).getValue();
final String docType = context.getProperty(TYPE).evaluateAttributeExpressions(flowFile).getValue();
final Charset charset = Charset.forName(context.getProperty(CHARSET).evaluateAttributeExpressions(flowFile).getValue());
final ComponentLog logger = getLogger();
try {
logger.debug("Fetching {}/{}/{} from Elasticsearch", new Object[] { index, docType, docId });
GetRequestBuilder getRequestBuilder = esClient.get().prepareGet(index, docType, docId);
final GetResponse getResponse = getRequestBuilder.execute().actionGet();
if (getResponse == null || !getResponse.isExists()) {
logger.debug("Failed to read {}/{}/{} from Elasticsearch: Document not found", new Object[] { index, docType, docId });
// We couldn't find the document, so penalize it and send it to "not found"
flowFile = session.penalize(flowFile);
session.transfer(flowFile, REL_NOT_FOUND);
} else {
flowFile = session.putAllAttributes(flowFile, new HashMap<String, String>() {
{
put("filename", docId);
put("es.index", index);
put("es.type", docType);
}
});
flowFile = session.write(flowFile, new OutputStreamCallback() {
@Override
public void process(OutputStream out) throws IOException {
out.write(getResponse.getSourceAsString().getBytes(charset));
}
});
logger.debug("Elasticsearch document " + docId + " fetched, routing to success");
// The document is JSON, so update the MIME type of the flow file
flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), "application/json");
session.getProvenanceReporter().fetch(flowFile, getResponse.remoteAddress().getAddress());
session.transfer(flowFile, REL_SUCCESS);
}
} catch (NoNodeAvailableException | ElasticsearchTimeoutException | ReceiveTimeoutTransportException | NodeClosedException exceptionToRetry) {
logger.error("Failed to read into Elasticsearch due to {}, this may indicate an error in configuration " + "(hosts, username/password, etc.), or this issue may be transient. Routing to retry", new Object[] { exceptionToRetry.getLocalizedMessage() }, exceptionToRetry);
session.transfer(flowFile, REL_RETRY);
context.yield();
} catch (Exception e) {
logger.error("Failed to read {} from Elasticsearch due to {}", new Object[] { flowFile, e.getLocalizedMessage() }, e);
session.transfer(flowFile, REL_FAILURE);
context.yield();
}
}
use of org.apache.nifi.logging.ComponentLog in project nifi by apache.
the class PutElasticsearch5 method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
synchronized (esClient) {
if (esClient.get() == null) {
super.setup(context);
}
}
final String id_attribute = context.getProperty(ID_ATTRIBUTE).getValue();
final int batchSize = context.getProperty(BATCH_SIZE).evaluateAttributeExpressions().asInteger();
final List<FlowFile> flowFiles = session.get(batchSize);
if (flowFiles.isEmpty()) {
return;
}
final ComponentLog logger = getLogger();
// Keep track of the list of flow files that need to be transferred. As they are transferred, remove them from the list.
List<FlowFile> flowFilesToTransfer = new LinkedList<>(flowFiles);
try {
final BulkRequestBuilder bulk = esClient.get().prepareBulk();
for (FlowFile file : flowFiles) {
final String index = context.getProperty(INDEX).evaluateAttributeExpressions(file).getValue();
final String docType = context.getProperty(TYPE).evaluateAttributeExpressions(file).getValue();
final String indexOp = context.getProperty(INDEX_OP).evaluateAttributeExpressions(file).getValue();
final Charset charset = Charset.forName(context.getProperty(CHARSET).evaluateAttributeExpressions(file).getValue());
final String id = file.getAttribute(id_attribute);
if (id == null) {
logger.warn("No value in identifier attribute {} for {}, transferring to failure", new Object[] { id_attribute, file });
flowFilesToTransfer.remove(file);
session.transfer(file, REL_FAILURE);
} else {
session.read(file, new InputStreamCallback() {
@Override
public void process(final InputStream in) throws IOException {
// For the bulk insert, each document has to be on its own line, so remove all CRLF
String json = IOUtils.toString(in, charset).replace("\r\n", " ").replace('\n', ' ').replace('\r', ' ');
if (indexOp.equalsIgnoreCase("index")) {
bulk.add(esClient.get().prepareIndex(index, docType, id).setSource(json.getBytes(charset)));
} else if (indexOp.equalsIgnoreCase("upsert")) {
bulk.add(esClient.get().prepareUpdate(index, docType, id).setDoc(json.getBytes(charset)).setDocAsUpsert(true));
} else if (indexOp.equalsIgnoreCase("update")) {
bulk.add(esClient.get().prepareUpdate(index, docType, id).setDoc(json.getBytes(charset)));
} else {
throw new IOException("Index operation: " + indexOp + " not supported.");
}
}
});
}
}
if (bulk.numberOfActions() > 0) {
final BulkResponse response = bulk.execute().actionGet();
if (response.hasFailures()) {
// Responses are guaranteed to be in order, remove them in reverse order
BulkItemResponse[] responses = response.getItems();
if (responses != null && responses.length > 0) {
for (int i = responses.length - 1; i >= 0; i--) {
final BulkItemResponse item = responses[i];
final FlowFile flowFile = flowFilesToTransfer.get(item.getItemId());
if (item.isFailed()) {
logger.warn("Failed to insert {} into Elasticsearch due to {}, transferring to failure", new Object[] { flowFile, item.getFailure().getMessage() });
session.transfer(flowFile, REL_FAILURE);
} else {
session.getProvenanceReporter().send(flowFile, response.remoteAddress().getAddress());
session.transfer(flowFile, REL_SUCCESS);
}
flowFilesToTransfer.remove(flowFile);
}
}
}
// Transfer any remaining flowfiles to success
for (FlowFile ff : flowFilesToTransfer) {
session.getProvenanceReporter().send(ff, response.remoteAddress().getAddress());
session.transfer(ff, REL_SUCCESS);
}
}
} catch (NoNodeAvailableException | ElasticsearchTimeoutException | ReceiveTimeoutTransportException | NodeClosedException exceptionToRetry) {
// Authorization errors and other problems are often returned as NoNodeAvailableExceptions without a
// traceable cause. However the cause seems to be logged, just not available to this caught exception.
// Since the error message will show up as a bulletin, we make specific mention to check the logs for
// more details.
logger.error("Failed to insert into Elasticsearch due to {}. More detailed information may be available in " + "the NiFi logs.", new Object[] { exceptionToRetry.getLocalizedMessage() }, exceptionToRetry);
session.transfer(flowFilesToTransfer, REL_RETRY);
context.yield();
} catch (Exception exceptionToFail) {
logger.error("Failed to insert into Elasticsearch due to {}, transferring to failure", new Object[] { exceptionToFail.getLocalizedMessage() }, exceptionToFail);
session.transfer(flowFilesToTransfer, REL_FAILURE);
context.yield();
}
}
Aggregations