Search in sources :

Example 26 with StateMap

use of org.apache.nifi.components.state.StateMap in project nifi by apache.

the class MockStateManager method assertStateSet.

/**
 * Ensures that the state was set for the given scope, regardless of what the value was.
 *
 * @param scope the scope
 */
public void assertStateSet(final Scope scope) {
    final StateMap stateMap = (scope == Scope.CLUSTER) ? clusterStateMap : localStateMap;
    Assert.assertNotSame("Expected state to be set for Scope " + scope + ", but it was not set", -1L, stateMap.getVersion());
}
Also used : StateMap(org.apache.nifi.components.state.StateMap)

Example 27 with StateMap

use of org.apache.nifi.components.state.StateMap in project nifi by apache.

the class GetHBase method parseColumns.

@OnScheduled
public void parseColumns(final ProcessContext context) throws IOException {
    final StateMap stateMap = context.getStateManager().getState(Scope.CLUSTER);
    if (stateMap.getVersion() < 0) {
        // no state has been stored in the State Manager - check if we have state stored in the
        // DistributedMapCacheClient service and migrate it if so
        final DistributedMapCacheClient client = context.getProperty(DISTRIBUTED_CACHE_SERVICE).asControllerService(DistributedMapCacheClient.class);
        final ScanResult scanResult = getState(client);
        if (scanResult != null) {
            storeState(scanResult, context.getStateManager());
        }
        clearState(client);
    }
    final String columnsValue = context.getProperty(COLUMNS).getValue();
    final String[] columns = (columnsValue == null || columnsValue.isEmpty() ? new String[0] : columnsValue.split(","));
    this.columns.clear();
    for (final String column : columns) {
        if (column.contains(":")) {
            final String[] parts = column.split(":");
            final byte[] cf = parts[0].getBytes(Charset.forName("UTF-8"));
            final byte[] cq = parts[1].getBytes(Charset.forName("UTF-8"));
            this.columns.add(new Column(cf, cq));
        } else {
            final byte[] cf = column.getBytes(Charset.forName("UTF-8"));
            this.columns.add(new Column(cf, null));
        }
    }
}
Also used : DistributedMapCacheClient(org.apache.nifi.distributed.cache.client.DistributedMapCacheClient) Column(org.apache.nifi.hbase.scan.Column) StateMap(org.apache.nifi.components.state.StateMap) OnScheduled(org.apache.nifi.annotation.lifecycle.OnScheduled)

Example 28 with StateMap

use of org.apache.nifi.components.state.StateMap in project nifi by apache.

the class ListGCSBucket method restoreState.

void restoreState(final ProcessContext context) throws IOException {
    final StateMap stateMap = context.getStateManager().getState(Scope.CLUSTER);
    if (stateMap.getVersion() == -1L || stateMap.get(CURRENT_TIMESTAMP) == null || stateMap.get(CURRENT_KEY_PREFIX + "0") == null) {
        currentTimestamp = 0L;
        currentKeys = new HashSet<>();
    } else {
        currentTimestamp = Long.parseLong(stateMap.get(CURRENT_TIMESTAMP));
        currentKeys = extractKeys(stateMap);
    }
}
Also used : StateMap(org.apache.nifi.components.state.StateMap)

Example 29 with StateMap

use of org.apache.nifi.components.state.StateMap in project nifi by apache.

the class ListHDFS method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    // We have to ensure that we don't continually perform listings, because if we perform two listings within
    // the same millisecond, our algorithm for comparing timestamps will not work. So we ensure here that we do
    // not let that happen.
    final long now = System.nanoTime();
    if (now - lastRunTimestamp < LISTING_LAG_NANOS) {
        lastRunTimestamp = now;
        context.yield();
        return;
    }
    lastRunTimestamp = now;
    final String directory = context.getProperty(DIRECTORY).evaluateAttributeExpressions().getValue();
    // Ensure that we are using the latest listing information before we try to perform a listing of HDFS files.
    try {
        final StateMap stateMap = context.getStateManager().getState(Scope.CLUSTER);
        if (stateMap.getVersion() == -1L) {
            latestTimestampEmitted = -1L;
            latestTimestampListed = -1L;
            getLogger().debug("Found no state stored");
        } else {
            // Determine if state is stored in the 'new' format or the 'old' format
            final String emittedString = stateMap.get(EMITTED_TIMESTAMP_KEY);
            if (emittedString == null) {
                latestTimestampEmitted = -1L;
                latestTimestampListed = -1L;
                getLogger().debug("Found no recognized state keys; assuming no relevant state and resetting listing/emitted time to -1");
            } else {
                // state is stored in the new format, using just two timestamps
                latestTimestampEmitted = Long.parseLong(emittedString);
                final String listingTimestmapString = stateMap.get(LISTING_TIMESTAMP_KEY);
                if (listingTimestmapString != null) {
                    latestTimestampListed = Long.parseLong(listingTimestmapString);
                }
                getLogger().debug("Found new-style state stored, latesting timestamp emitted = {}, latest listed = {}", new Object[] { latestTimestampEmitted, latestTimestampListed });
            }
        }
    } catch (final IOException ioe) {
        getLogger().error("Failed to retrieve timestamp of last listing from Distributed Cache Service. Will not perform listing until this is accomplished.");
        context.yield();
        return;
    }
    // Pull in any file that is newer than the timestamp that we have.
    final FileSystem hdfs = getFileSystem();
    final boolean recursive = context.getProperty(RECURSE_SUBDIRS).asBoolean();
    final Set<FileStatus> statuses;
    try {
        final Path rootPath = new Path(directory);
        statuses = getStatuses(rootPath, recursive, hdfs, createPathFilter(context));
        getLogger().debug("Found a total of {} files in HDFS", new Object[] { statuses.size() });
    } catch (final IOException | IllegalArgumentException e) {
        getLogger().error("Failed to perform listing of HDFS due to {}", new Object[] { e });
        return;
    } catch (final InterruptedException e) {
        Thread.currentThread().interrupt();
        getLogger().error("Interrupted while performing listing of HDFS", e);
        return;
    }
    final Set<FileStatus> listable = determineListable(statuses, context);
    getLogger().debug("Of the {} files found in HDFS, {} are listable", new Object[] { statuses.size(), listable.size() });
    for (final FileStatus status : listable) {
        final Map<String, String> attributes = createAttributes(status);
        FlowFile flowFile = session.create();
        flowFile = session.putAllAttributes(flowFile, attributes);
        session.transfer(flowFile, REL_SUCCESS);
        final long fileModTime = status.getModificationTime();
        if (fileModTime > latestTimestampEmitted) {
            latestTimestampEmitted = fileModTime;
        }
    }
    final int listCount = listable.size();
    if (listCount > 0) {
        getLogger().info("Successfully created listing with {} new files from HDFS", new Object[] { listCount });
        session.commit();
    } else {
        getLogger().debug("There is no data to list. Yielding.");
        context.yield();
    }
    final Map<String, String> updatedState = new HashMap<>(1);
    updatedState.put(LISTING_TIMESTAMP_KEY, String.valueOf(latestTimestampListed));
    updatedState.put(EMITTED_TIMESTAMP_KEY, String.valueOf(latestTimestampEmitted));
    getLogger().debug("New state map: {}", new Object[] { updatedState });
    try {
        context.getStateManager().setState(updatedState, Scope.CLUSTER);
    } catch (final IOException ioe) {
        getLogger().warn("Failed to save cluster-wide state. If NiFi is restarted, data duplication may occur", ioe);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FlowFile(org.apache.nifi.flowfile.FlowFile) FileStatus(org.apache.hadoop.fs.FileStatus) HashMap(java.util.HashMap) StateMap(org.apache.nifi.components.state.StateMap) IOException(java.io.IOException) FileSystem(org.apache.hadoop.fs.FileSystem)

Example 30 with StateMap

use of org.apache.nifi.components.state.StateMap in project nifi by apache.

the class ITAbstractListProcessor method testOnlyNewStateStored.

@Test
public void testOnlyNewStateStored() throws Exception {
    runner.run();
    final long initialTimestamp = System.currentTimeMillis();
    runner.assertAllFlowFilesTransferred(ConcreteListProcessor.REL_SUCCESS, 0);
    proc.addEntity("name", "id", initialTimestamp);
    proc.addEntity("name", "id2", initialTimestamp);
    runner.run();
    runner.assertAllFlowFilesTransferred(ConcreteListProcessor.REL_SUCCESS, 0);
    runner.clearTransferState();
    Thread.sleep(DEFAULT_SLEEP_MILLIS);
    runner.run();
    runner.assertAllFlowFilesTransferred(ConcreteListProcessor.REL_SUCCESS, 2);
    runner.clearTransferState();
    final StateMap stateMap = runner.getStateManager().getState(Scope.CLUSTER);
    assertEquals(2, stateMap.getVersion());
    final Map<String, String> map = stateMap.toMap();
    // Ensure timestamp and identifiers are migrated
    assertEquals(4, map.size());
    assertEquals(Long.toString(initialTimestamp), map.get(AbstractListProcessor.LATEST_LISTED_ENTRY_TIMESTAMP_KEY));
    assertEquals(Long.toString(initialTimestamp), map.get(AbstractListProcessor.LAST_PROCESSED_LATEST_ENTRY_TIMESTAMP_KEY));
    assertEquals("id", map.get(AbstractListProcessor.IDENTIFIER_PREFIX + ".0"));
    assertEquals("id2", map.get(AbstractListProcessor.IDENTIFIER_PREFIX + ".1"));
    proc.addEntity("new name", "new id", initialTimestamp + 1);
    runner.run();
    runner.assertAllFlowFilesTransferred(ConcreteListProcessor.REL_SUCCESS, 1);
    runner.clearTransferState();
    StateMap updatedStateMap = runner.getStateManager().getState(Scope.CLUSTER);
    assertEquals(3, updatedStateMap.getVersion());
    assertEquals(3, updatedStateMap.toMap().size());
    assertEquals(Long.toString(initialTimestamp + 1), updatedStateMap.get(AbstractListProcessor.LATEST_LISTED_ENTRY_TIMESTAMP_KEY));
    // Processed timestamp is now caught up
    assertEquals(Long.toString(initialTimestamp + 1), updatedStateMap.get(AbstractListProcessor.LAST_PROCESSED_LATEST_ENTRY_TIMESTAMP_KEY));
    assertEquals("new id", updatedStateMap.get(AbstractListProcessor.IDENTIFIER_PREFIX + ".0"));
}
Also used : StateMap(org.apache.nifi.components.state.StateMap) Test(org.junit.Test)

Aggregations

StateMap (org.apache.nifi.components.state.StateMap)70 HashMap (java.util.HashMap)31 Test (org.junit.Test)29 IOException (java.io.IOException)18 StateProvider (org.apache.nifi.components.state.StateProvider)14 ArrayList (java.util.ArrayList)11 StateManager (org.apache.nifi.components.state.StateManager)11 FlowFile (org.apache.nifi.flowfile.FlowFile)10 TestRunner (org.apache.nifi.util.TestRunner)10 OnScheduled (org.apache.nifi.annotation.lifecycle.OnScheduled)9 ComponentLog (org.apache.nifi.logging.ComponentLog)8 Map (java.util.Map)7 PropertyDescriptor (org.apache.nifi.components.PropertyDescriptor)7 ProcessException (org.apache.nifi.processor.exception.ProcessException)7 Date (java.util.Date)6 List (java.util.List)6 TimeUnit (java.util.concurrent.TimeUnit)6 Scope (org.apache.nifi.components.state.Scope)6 ProcessSession (org.apache.nifi.processor.ProcessSession)6 Collections (java.util.Collections)5