Search in sources :

Example 26 with StateManager

use of org.apache.nifi.components.state.StateManager in project nifi by apache.

the class GetHDFSEvents method onTrigger.

@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    final StateManager stateManager = context.getStateManager();
    try {
        StateMap state = stateManager.getState(Scope.CLUSTER);
        String txIdAsString = state.get(LAST_TX_ID);
        if (txIdAsString != null && !"".equals(txIdAsString)) {
            lastTxId = Long.parseLong(txIdAsString);
        }
    } catch (IOException e) {
        getLogger().error("Unable to retrieve last transaction ID. Must retrieve last processed transaction ID before processing can occur.", e);
        context.yield();
        return;
    }
    try {
        final int retries = context.getProperty(NUMBER_OF_RETRIES_FOR_POLL).asInteger();
        final TimeUnit pollDurationTimeUnit = TimeUnit.MICROSECONDS;
        final long pollDuration = context.getProperty(POLL_DURATION).asTimePeriod(pollDurationTimeUnit);
        final DFSInotifyEventInputStream eventStream = lastTxId == -1L ? getHdfsAdmin().getInotifyEventStream() : getHdfsAdmin().getInotifyEventStream(lastTxId);
        final EventBatch eventBatch = getEventBatch(eventStream, pollDuration, pollDurationTimeUnit, retries);
        if (eventBatch != null && eventBatch.getEvents() != null) {
            if (eventBatch.getEvents().length > 0) {
                List<FlowFile> flowFiles = new ArrayList<>(eventBatch.getEvents().length);
                for (Event e : eventBatch.getEvents()) {
                    if (toProcessEvent(context, e)) {
                        getLogger().debug("Creating flow file for event: {}.", new Object[] { e });
                        final String path = getPath(e);
                        FlowFile flowFile = session.create();
                        flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), "application/json");
                        flowFile = session.putAttribute(flowFile, EventAttributes.EVENT_TYPE, e.getEventType().name());
                        flowFile = session.putAttribute(flowFile, EventAttributes.EVENT_PATH, path);
                        flowFile = session.write(flowFile, new OutputStreamCallback() {

                            @Override
                            public void process(OutputStream out) throws IOException {
                                out.write(OBJECT_MAPPER.writeValueAsBytes(e));
                            }
                        });
                        flowFiles.add(flowFile);
                    }
                }
                for (FlowFile flowFile : flowFiles) {
                    final String path = flowFile.getAttribute(EventAttributes.EVENT_PATH);
                    final String transitUri = path.startsWith("/") ? "hdfs:/" + path : "hdfs://" + path;
                    getLogger().debug("Transferring flow file {} and creating provenance event with URI {}.", new Object[] { flowFile, transitUri });
                    session.transfer(flowFile, REL_SUCCESS);
                    session.getProvenanceReporter().receive(flowFile, transitUri);
                }
            }
            lastTxId = eventBatch.getTxid();
        }
    } catch (IOException | InterruptedException e) {
        getLogger().error("Unable to get notification information: {}", new Object[] { e });
        context.yield();
        return;
    } catch (MissingEventsException e) {
        // set lastTxId to -1 and update state. This may cause events not to be processed. The reason this exception is thrown is described in the
        // org.apache.hadoop.hdfs.client.HdfsAdmin#getInotifyEventStrea API. It suggests tuning a couple parameters if this API is used.
        lastTxId = -1L;
        getLogger().error("Unable to get notification information. Setting transaction id to -1. This may cause some events to get missed. " + "Please see javadoc for org.apache.hadoop.hdfs.client.HdfsAdmin#getInotifyEventStream: {}", new Object[] { e });
    }
    updateClusterStateForTxId(stateManager);
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) StateMap(org.apache.nifi.components.state.StateMap) OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) IOException(java.io.IOException) MissingEventsException(org.apache.hadoop.hdfs.inotify.MissingEventsException) StateManager(org.apache.nifi.components.state.StateManager) TimeUnit(java.util.concurrent.TimeUnit) Event(org.apache.hadoop.hdfs.inotify.Event) DFSInotifyEventInputStream(org.apache.hadoop.hdfs.DFSInotifyEventInputStream) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback) EventBatch(org.apache.hadoop.hdfs.inotify.EventBatch)

Aggregations

StateManager (org.apache.nifi.components.state.StateManager)26 IOException (java.io.IOException)13 StateMap (org.apache.nifi.components.state.StateMap)12 HashMap (java.util.HashMap)11 SQLException (java.sql.SQLException)8 ComponentLog (org.apache.nifi.logging.ComponentLog)8 Connection (java.sql.Connection)7 Statement (java.sql.Statement)7 HashSet (java.util.HashSet)7 DBCPService (org.apache.nifi.dbcp.DBCPService)7 ProcessException (org.apache.nifi.processor.exception.ProcessException)7 Map (java.util.Map)6 ArrayList (java.util.ArrayList)5 TimeUnit (java.util.concurrent.TimeUnit)5 OnScheduled (org.apache.nifi.annotation.lifecycle.OnScheduled)5 PropertyDescriptor (org.apache.nifi.components.PropertyDescriptor)5 ValidationResult (org.apache.nifi.components.ValidationResult)5 FlowFile (org.apache.nifi.flowfile.FlowFile)5 Test (org.junit.Test)5 Collections (java.util.Collections)4