use of org.apache.nifi.components.state.StateManager in project nifi by apache.
the class GetHDFSEvents method onTrigger.
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
final StateManager stateManager = context.getStateManager();
try {
StateMap state = stateManager.getState(Scope.CLUSTER);
String txIdAsString = state.get(LAST_TX_ID);
if (txIdAsString != null && !"".equals(txIdAsString)) {
lastTxId = Long.parseLong(txIdAsString);
}
} catch (IOException e) {
getLogger().error("Unable to retrieve last transaction ID. Must retrieve last processed transaction ID before processing can occur.", e);
context.yield();
return;
}
try {
final int retries = context.getProperty(NUMBER_OF_RETRIES_FOR_POLL).asInteger();
final TimeUnit pollDurationTimeUnit = TimeUnit.MICROSECONDS;
final long pollDuration = context.getProperty(POLL_DURATION).asTimePeriod(pollDurationTimeUnit);
final DFSInotifyEventInputStream eventStream = lastTxId == -1L ? getHdfsAdmin().getInotifyEventStream() : getHdfsAdmin().getInotifyEventStream(lastTxId);
final EventBatch eventBatch = getEventBatch(eventStream, pollDuration, pollDurationTimeUnit, retries);
if (eventBatch != null && eventBatch.getEvents() != null) {
if (eventBatch.getEvents().length > 0) {
List<FlowFile> flowFiles = new ArrayList<>(eventBatch.getEvents().length);
for (Event e : eventBatch.getEvents()) {
if (toProcessEvent(context, e)) {
getLogger().debug("Creating flow file for event: {}.", new Object[] { e });
final String path = getPath(e);
FlowFile flowFile = session.create();
flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), "application/json");
flowFile = session.putAttribute(flowFile, EventAttributes.EVENT_TYPE, e.getEventType().name());
flowFile = session.putAttribute(flowFile, EventAttributes.EVENT_PATH, path);
flowFile = session.write(flowFile, new OutputStreamCallback() {
@Override
public void process(OutputStream out) throws IOException {
out.write(OBJECT_MAPPER.writeValueAsBytes(e));
}
});
flowFiles.add(flowFile);
}
}
for (FlowFile flowFile : flowFiles) {
final String path = flowFile.getAttribute(EventAttributes.EVENT_PATH);
final String transitUri = path.startsWith("/") ? "hdfs:/" + path : "hdfs://" + path;
getLogger().debug("Transferring flow file {} and creating provenance event with URI {}.", new Object[] { flowFile, transitUri });
session.transfer(flowFile, REL_SUCCESS);
session.getProvenanceReporter().receive(flowFile, transitUri);
}
}
lastTxId = eventBatch.getTxid();
}
} catch (IOException | InterruptedException e) {
getLogger().error("Unable to get notification information: {}", new Object[] { e });
context.yield();
return;
} catch (MissingEventsException e) {
// set lastTxId to -1 and update state. This may cause events not to be processed. The reason this exception is thrown is described in the
// org.apache.hadoop.hdfs.client.HdfsAdmin#getInotifyEventStrea API. It suggests tuning a couple parameters if this API is used.
lastTxId = -1L;
getLogger().error("Unable to get notification information. Setting transaction id to -1. This may cause some events to get missed. " + "Please see javadoc for org.apache.hadoop.hdfs.client.HdfsAdmin#getInotifyEventStream: {}", new Object[] { e });
}
updateClusterStateForTxId(stateManager);
}
Aggregations