use of org.apache.nifi.components.state.StateMap in project nifi by apache.
the class CaptureChangeMySQL method setup.
public void setup(ProcessContext context) {
final ComponentLog logger = getLogger();
final StateManager stateManager = context.getStateManager();
final StateMap stateMap;
try {
stateMap = stateManager.getState(Scope.CLUSTER);
} catch (final IOException ioe) {
logger.error("Failed to retrieve observed maximum values from the State Manager. Will not attempt " + "connection until this is accomplished.", ioe);
context.yield();
return;
}
PropertyValue dbNameValue = context.getProperty(DATABASE_NAME_PATTERN);
databaseNamePattern = dbNameValue.isSet() ? Pattern.compile(dbNameValue.getValue()) : null;
PropertyValue tableNameValue = context.getProperty(TABLE_NAME_PATTERN);
tableNamePattern = tableNameValue.isSet() ? Pattern.compile(tableNameValue.getValue()) : null;
stateUpdateInterval = context.getProperty(STATE_UPDATE_INTERVAL).evaluateAttributeExpressions().asTimePeriod(TimeUnit.MILLISECONDS);
boolean getAllRecords = context.getProperty(RETRIEVE_ALL_RECORDS).asBoolean();
includeBeginCommit = context.getProperty(INCLUDE_BEGIN_COMMIT).asBoolean();
includeDDLEvents = context.getProperty(INCLUDE_DDL_EVENTS).asBoolean();
// Set current binlog filename to whatever is in State, falling back to the Retrieve All Records then Initial Binlog Filename if no State variable is present
currentBinlogFile = stateMap.get(BinlogEventInfo.BINLOG_FILENAME_KEY);
if (currentBinlogFile == null) {
if (!getAllRecords) {
if (context.getProperty(INIT_BINLOG_FILENAME).isSet()) {
currentBinlogFile = context.getProperty(INIT_BINLOG_FILENAME).evaluateAttributeExpressions().getValue();
}
} else {
// If we're starting from the beginning of all binlogs, the binlog filename must be the empty string (not null)
currentBinlogFile = "";
}
}
// Set current binlog position to whatever is in State, falling back to the Retrieve All Records then Initial Binlog Filename if no State variable is present
String binlogPosition = stateMap.get(BinlogEventInfo.BINLOG_POSITION_KEY);
if (binlogPosition != null) {
currentBinlogPosition = Long.valueOf(binlogPosition);
} else if (!getAllRecords) {
if (context.getProperty(INIT_BINLOG_POSITION).isSet()) {
currentBinlogPosition = context.getProperty(INIT_BINLOG_POSITION).evaluateAttributeExpressions().asLong();
} else {
currentBinlogPosition = DO_NOT_SET;
}
} else {
currentBinlogPosition = -1;
}
// Get current sequence ID from state
String seqIdString = stateMap.get(EventWriter.SEQUENCE_ID_KEY);
if (StringUtils.isEmpty(seqIdString)) {
// Use Initial Sequence ID property if none is found in state
PropertyValue seqIdProp = context.getProperty(INIT_SEQUENCE_ID);
if (seqIdProp.isSet()) {
currentSequenceId.set(seqIdProp.evaluateAttributeExpressions().asInteger());
}
} else {
currentSequenceId.set(Integer.parseInt(seqIdString));
}
// Get reference to Distributed Cache if one exists. If it does not, no enrichment (resolution of column names, e.g.) will be performed
boolean createEnrichmentConnection = false;
if (context.getProperty(DIST_CACHE_CLIENT).isSet()) {
cacheClient = context.getProperty(DIST_CACHE_CLIENT).asControllerService(DistributedMapCacheClient.class);
createEnrichmentConnection = true;
} else {
logger.warn("No Distributed Map Cache Client is specified, so no event enrichment (resolution of column names, e.g.) will be performed.");
cacheClient = null;
}
// Save off MySQL cluster and JDBC driver information, will be used to connect for event enrichment as well as for the binlog connector
try {
List<InetSocketAddress> hosts = getHosts(context.getProperty(HOSTS).evaluateAttributeExpressions().getValue());
String username = context.getProperty(USERNAME).evaluateAttributeExpressions().getValue();
String password = context.getProperty(PASSWORD).evaluateAttributeExpressions().getValue();
// BinaryLogClient expects a non-null password, so set it to the empty string if it is not provided
if (password == null) {
password = "";
}
long connectTimeout = context.getProperty(CONNECT_TIMEOUT).evaluateAttributeExpressions().asTimePeriod(TimeUnit.MILLISECONDS);
String driverLocation = context.getProperty(DRIVER_LOCATION).evaluateAttributeExpressions().getValue();
String driverName = context.getProperty(DRIVER_NAME).evaluateAttributeExpressions().getValue();
Long serverId = context.getProperty(SERVER_ID).evaluateAttributeExpressions().asLong();
connect(hosts, username, password, serverId, createEnrichmentConnection, driverLocation, driverName, connectTimeout);
} catch (IOException | IllegalStateException e) {
context.yield();
binlogClient = null;
throw new ProcessException(e.getMessage(), e);
}
}
use of org.apache.nifi.components.state.StateMap in project nifi by apache.
the class ScrollElasticsearchHttp method loadScrollId.
private String loadScrollId(StateManager stateManager) throws IOException {
final StateMap stateMap = stateManager.getState(Scope.LOCAL);
if (stateMap.getVersion() < 0) {
getLogger().debug("No previous state found");
return null;
}
final String scrollId = stateMap.get(SCROLL_ID_STATE);
getLogger().debug("Loaded state with scrollId {}", new Object[] { scrollId });
return scrollId;
}
use of org.apache.nifi.components.state.StateMap in project nifi by apache.
the class ScrollElasticsearchHttp method isQueryFinished.
private boolean isQueryFinished(StateManager stateManager) throws IOException {
final StateMap stateMap = stateManager.getState(Scope.LOCAL);
if (stateMap.getVersion() < 0) {
getLogger().debug("No previous state found");
return false;
}
final String isQueryFinished = stateMap.get(FINISHED_QUERY_STATE);
getLogger().debug("Loaded state with finishedQuery = {}", new Object[] { isQueryFinished });
return "true".equals(isQueryFinished);
}
use of org.apache.nifi.components.state.StateMap in project nifi by apache.
the class AbstractListProcessor method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
Long minTimestampToListMillis = lastListedLatestEntryTimestampMillis;
if (this.lastListedLatestEntryTimestampMillis == null || this.lastProcessedLatestEntryTimestampMillis == null || justElectedPrimaryNode) {
try {
// Attempt to retrieve state from the state manager if a last listing was not yet established or
// if just elected the primary node
final StateMap stateMap = context.getStateManager().getState(getStateScope(context));
latestIdentifiersProcessed.clear();
for (Map.Entry<String, String> state : stateMap.toMap().entrySet()) {
final String k = state.getKey();
final String v = state.getValue();
if (v == null || v.isEmpty()) {
continue;
}
if (LATEST_LISTED_ENTRY_TIMESTAMP_KEY.equals(k)) {
minTimestampToListMillis = Long.parseLong(v);
// If our determined timestamp is the same as that of our last listing, skip this execution as there are no updates
if (minTimestampToListMillis.equals(this.lastListedLatestEntryTimestampMillis)) {
context.yield();
return;
} else {
this.lastListedLatestEntryTimestampMillis = minTimestampToListMillis;
}
} else if (LAST_PROCESSED_LATEST_ENTRY_TIMESTAMP_KEY.equals(k)) {
this.lastProcessedLatestEntryTimestampMillis = Long.parseLong(v);
} else if (k.startsWith(IDENTIFIER_PREFIX)) {
latestIdentifiersProcessed.add(v);
}
}
justElectedPrimaryNode = false;
} catch (final IOException ioe) {
getLogger().error("Failed to retrieve timestamp of last listing from the State Manager. Will not perform listing until this is accomplished.");
context.yield();
return;
}
}
final List<T> entityList;
final long currentRunTimeNanos = System.nanoTime();
final long currentRunTimeMillis = System.currentTimeMillis();
try {
// track of when this last executed for consideration of the lag nanos
entityList = performListing(context, minTimestampToListMillis);
} catch (final IOException e) {
getLogger().error("Failed to perform listing on remote host due to {}", e);
context.yield();
return;
}
if (entityList == null || entityList.isEmpty()) {
context.yield();
return;
}
Long latestListedEntryTimestampThisCycleMillis = null;
final TreeMap<Long, List<T>> orderedEntries = new TreeMap<>();
// Build a sorted map to determine the latest possible entries
boolean targetSystemHasMilliseconds = false;
boolean targetSystemHasSeconds = false;
for (final T entity : entityList) {
final long entityTimestampMillis = entity.getTimestamp();
if (!targetSystemHasMilliseconds) {
targetSystemHasMilliseconds = entityTimestampMillis % 1000 > 0;
}
if (!targetSystemHasSeconds) {
targetSystemHasSeconds = entityTimestampMillis % 60_000 > 0;
}
// New entries are all those that occur at or after the associated timestamp
final boolean newEntry = minTimestampToListMillis == null || entityTimestampMillis >= minTimestampToListMillis && entityTimestampMillis >= lastProcessedLatestEntryTimestampMillis;
if (newEntry) {
List<T> entitiesForTimestamp = orderedEntries.get(entity.getTimestamp());
if (entitiesForTimestamp == null) {
entitiesForTimestamp = new ArrayList<T>();
orderedEntries.put(entity.getTimestamp(), entitiesForTimestamp);
}
entitiesForTimestamp.add(entity);
}
}
int flowfilesCreated = 0;
if (orderedEntries.size() > 0) {
latestListedEntryTimestampThisCycleMillis = orderedEntries.lastKey();
// Determine target system time precision.
String specifiedPrecision = context.getProperty(TARGET_SYSTEM_TIMESTAMP_PRECISION).getValue();
if (StringUtils.isBlank(specifiedPrecision)) {
// If TARGET_SYSTEM_TIMESTAMP_PRECISION is not supported by the Processor, then specifiedPrecision can be null, instead of its default value.
specifiedPrecision = getDefaultTimePrecision();
}
final TimeUnit targetSystemTimePrecision = PRECISION_AUTO_DETECT.getValue().equals(specifiedPrecision) ? targetSystemHasMilliseconds ? TimeUnit.MILLISECONDS : targetSystemHasSeconds ? TimeUnit.SECONDS : TimeUnit.MINUTES : PRECISION_MILLIS.getValue().equals(specifiedPrecision) ? TimeUnit.MILLISECONDS : PRECISION_SECONDS.getValue().equals(specifiedPrecision) ? TimeUnit.SECONDS : TimeUnit.MINUTES;
final Long listingLagMillis = LISTING_LAG_MILLIS.get(targetSystemTimePrecision);
// another iteration has occurred without new files and special handling is needed to avoid starvation
if (latestListedEntryTimestampThisCycleMillis.equals(lastListedLatestEntryTimestampMillis)) {
/* We need to wait for another cycle when either:
* - If we have not eclipsed the minimal listing lag needed due to being triggered too soon after the last run
* - The latest listed entity timestamp is equal to the last processed time, meaning we handled those items originally passed over. No need to process it again.
*/
final long listingLagNanos = TimeUnit.MILLISECONDS.toNanos(listingLagMillis);
if (currentRunTimeNanos - lastRunTimeNanos < listingLagNanos || (latestListedEntryTimestampThisCycleMillis.equals(lastProcessedLatestEntryTimestampMillis) && orderedEntries.get(latestListedEntryTimestampThisCycleMillis).stream().allMatch(entity -> latestIdentifiersProcessed.contains(entity.getIdentifier())))) {
context.yield();
return;
}
} else {
// Convert minimum reliable timestamp into target system time unit, in order to truncate unreliable digits.
final long minimumReliableTimestampInFilesystemTimeUnit = targetSystemTimePrecision.convert(currentRunTimeMillis - listingLagMillis, TimeUnit.MILLISECONDS);
final long minimumReliableTimestampMillis = targetSystemTimePrecision.toMillis(minimumReliableTimestampInFilesystemTimeUnit);
// The minimum timestamp should be reliable to determine that no further entries will be added with the same timestamp based on the target system time precision.
if (minimumReliableTimestampMillis < latestListedEntryTimestampThisCycleMillis) {
// Otherwise, newest entries are held back one cycle to avoid issues in writes occurring exactly when the listing is being performed to avoid missing data
orderedEntries.remove(latestListedEntryTimestampThisCycleMillis);
}
}
for (Map.Entry<Long, List<T>> timestampEntities : orderedEntries.entrySet()) {
List<T> entities = timestampEntities.getValue();
if (timestampEntities.getKey().equals(lastProcessedLatestEntryTimestampMillis)) {
// Filter out previously processed entities.
entities = entities.stream().filter(entity -> !latestIdentifiersProcessed.contains(entity.getIdentifier())).collect(Collectors.toList());
}
for (T entity : entities) {
// Create the FlowFile for this path.
final Map<String, String> attributes = createAttributes(entity, context);
FlowFile flowFile = session.create();
flowFile = session.putAllAttributes(flowFile, attributes);
session.transfer(flowFile, REL_SUCCESS);
flowfilesCreated++;
}
}
}
// As long as we have a listing timestamp, there is meaningful state to capture regardless of any outputs generated
if (latestListedEntryTimestampThisCycleMillis != null) {
boolean processedNewFiles = flowfilesCreated > 0;
if (processedNewFiles) {
// because latestListedEntryTimestampThisCycleMillis might be removed if it's not old enough.
if (!orderedEntries.lastKey().equals(lastProcessedLatestEntryTimestampMillis)) {
// If the latest timestamp at this cycle becomes different than the previous one, we need to clear identifiers.
// If it didn't change, we need to add identifiers.
latestIdentifiersProcessed.clear();
}
// Capture latestIdentifierProcessed.
latestIdentifiersProcessed.addAll(orderedEntries.lastEntry().getValue().stream().map(T::getIdentifier).collect(Collectors.toList()));
lastProcessedLatestEntryTimestampMillis = orderedEntries.lastKey();
getLogger().info("Successfully created listing with {} new objects", new Object[] { flowfilesCreated });
session.commit();
}
lastRunTimeNanos = currentRunTimeNanos;
if (!latestListedEntryTimestampThisCycleMillis.equals(lastListedLatestEntryTimestampMillis) || processedNewFiles) {
// the distributed state cache, the node can continue to run (if it is primary node).
try {
lastListedLatestEntryTimestampMillis = latestListedEntryTimestampThisCycleMillis;
persist(latestListedEntryTimestampThisCycleMillis, lastProcessedLatestEntryTimestampMillis, latestIdentifiersProcessed, context.getStateManager(), getStateScope(context));
} catch (final IOException ioe) {
getLogger().warn("Unable to save state due to {}. If NiFi is restarted before state is saved, or " + "if another node begins executing this Processor, data duplication may occur.", ioe);
}
}
} else {
getLogger().debug("There is no data to list. Yielding.");
context.yield();
// lastListingTime = 0 so that we don't continually poll the distributed cache / local file system
if (lastListedLatestEntryTimestampMillis == null) {
lastListedLatestEntryTimestampMillis = 0L;
}
return;
}
}
use of org.apache.nifi.components.state.StateMap in project nifi by apache.
the class MonitorActivity method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
final long thresholdMillis = context.getProperty(THRESHOLD).asTimePeriod(TimeUnit.MILLISECONDS);
final long now = System.currentTimeMillis();
final ComponentLog logger = getLogger();
final boolean copyAttributes = context.getProperty(COPY_ATTRIBUTES).asBoolean();
final boolean isClusterScope = isClusterScope(context, false);
final boolean shouldReportOnlyOnPrimary = shouldReportOnlyOnPrimary(isClusterScope, context);
final List<FlowFile> flowFiles = session.get(50);
boolean isInactive = false;
long updatedLatestSuccessTransfer = -1;
StateMap clusterState = null;
if (flowFiles.isEmpty()) {
final long previousSuccessMillis = latestSuccessTransfer.get();
boolean sendInactiveMarker = false;
isInactive = (now >= previousSuccessMillis + thresholdMillis);
logger.debug("isInactive={}, previousSuccessMillis={}, now={}", new Object[] { isInactive, previousSuccessMillis, now });
if (isInactive && isClusterScope) {
// However, if this node is active, we don't have to look at cluster state.
try {
clusterState = context.getStateManager().getState(Scope.CLUSTER);
if (clusterState != null && !StringUtils.isEmpty(clusterState.get(STATE_KEY_LATEST_SUCCESS_TRANSFER))) {
final long latestReportedClusterActivity = Long.valueOf(clusterState.get(STATE_KEY_LATEST_SUCCESS_TRANSFER));
isInactive = (now >= latestReportedClusterActivity + thresholdMillis);
if (!isInactive) {
// This node has been inactive, but other node has more recent activity.
updatedLatestSuccessTransfer = latestReportedClusterActivity;
}
logger.debug("isInactive={}, latestReportedClusterActivity={}", new Object[] { isInactive, latestReportedClusterActivity });
}
} catch (IOException e) {
logger.error("Failed to access cluster state. Activity will not be monitored properly until this is addressed.", e);
}
}
if (isInactive) {
final boolean continual = context.getProperty(CONTINUALLY_SEND_MESSAGES).asBoolean();
sendInactiveMarker = !inactive.getAndSet(true) || (continual && (now > lastInactiveMessage.get() + thresholdMillis));
}
if (sendInactiveMarker && shouldThisNodeReport(isClusterScope, shouldReportOnlyOnPrimary)) {
lastInactiveMessage.set(System.currentTimeMillis());
FlowFile inactiveFlowFile = session.create();
inactiveFlowFile = session.putAttribute(inactiveFlowFile, "inactivityStartMillis", String.valueOf(previousSuccessMillis));
inactiveFlowFile = session.putAttribute(inactiveFlowFile, "inactivityDurationMillis", String.valueOf(now - previousSuccessMillis));
final byte[] outBytes = context.getProperty(INACTIVITY_MESSAGE).evaluateAttributeExpressions(inactiveFlowFile).getValue().getBytes(UTF8);
inactiveFlowFile = session.write(inactiveFlowFile, new OutputStreamCallback() {
@Override
public void process(final OutputStream out) throws IOException {
out.write(outBytes);
}
});
session.getProvenanceReporter().create(inactiveFlowFile);
session.transfer(inactiveFlowFile, REL_INACTIVE);
logger.info("Transferred {} to 'inactive'", new Object[] { inactiveFlowFile });
} else {
// no need to dominate CPU checking times; let other processors run for a bit.
context.yield();
}
} else {
session.transfer(flowFiles, REL_SUCCESS);
updatedLatestSuccessTransfer = now;
logger.info("Transferred {} FlowFiles to 'success'", new Object[] { flowFiles.size() });
final long latestStateReportTimestamp = latestReportedNodeState.get();
if (isClusterScope && (now - latestStateReportTimestamp) > (thresholdMillis / 3)) {
// We don't want to hit the state manager every onTrigger(), but often enough to detect activeness.
try {
final StateManager stateManager = context.getStateManager();
final StateMap state = stateManager.getState(Scope.CLUSTER);
final Map<String, String> newValues = new HashMap<>();
// Persist attributes so that other nodes can copy it
if (copyAttributes) {
newValues.putAll(flowFiles.get(0).getAttributes());
}
newValues.put(STATE_KEY_LATEST_SUCCESS_TRANSFER, String.valueOf(now));
if (state == null || state.getVersion() == -1) {
stateManager.setState(newValues, Scope.CLUSTER);
} else {
final String existingTimestamp = state.get(STATE_KEY_LATEST_SUCCESS_TRANSFER);
if (StringUtils.isEmpty(existingTimestamp) || Long.parseLong(existingTimestamp) < now) {
// If this returns false due to race condition, it's not a problem since we just need
// the latest active timestamp.
stateManager.replace(state, newValues, Scope.CLUSTER);
} else {
logger.debug("Existing state has more recent timestamp, didn't update state.");
}
}
latestReportedNodeState.set(now);
} catch (IOException e) {
logger.error("Failed to access cluster state. Activity will not be monitored properly until this is addressed.", e);
}
}
}
if (!isInactive) {
final long inactivityStartMillis = latestSuccessTransfer.get();
if (updatedLatestSuccessTransfer > -1) {
latestSuccessTransfer.set(updatedLatestSuccessTransfer);
}
if (inactive.getAndSet(false) && shouldThisNodeReport(isClusterScope, shouldReportOnlyOnPrimary)) {
FlowFile activityRestoredFlowFile = session.create();
if (copyAttributes) {
final Map<String, String> attributes = new HashMap<>();
if (flowFiles.size() > 0) {
// copy attributes from the first flow file in the list
attributes.putAll(flowFiles.get(0).getAttributes());
} else if (clusterState != null) {
attributes.putAll(clusterState.toMap());
attributes.remove(STATE_KEY_LATEST_SUCCESS_TRANSFER);
}
// don't copy the UUID
attributes.remove(CoreAttributes.UUID.key());
activityRestoredFlowFile = session.putAllAttributes(activityRestoredFlowFile, attributes);
}
activityRestoredFlowFile = session.putAttribute(activityRestoredFlowFile, "inactivityStartMillis", String.valueOf(inactivityStartMillis));
activityRestoredFlowFile = session.putAttribute(activityRestoredFlowFile, "inactivityDurationMillis", String.valueOf(now - inactivityStartMillis));
final byte[] outBytes = context.getProperty(ACTIVITY_RESTORED_MESSAGE).evaluateAttributeExpressions(activityRestoredFlowFile).getValue().getBytes(UTF8);
activityRestoredFlowFile = session.write(activityRestoredFlowFile, out -> out.write(outBytes));
session.getProvenanceReporter().create(activityRestoredFlowFile);
session.transfer(activityRestoredFlowFile, REL_ACTIVITY_RESTORED);
logger.info("Transferred {} to 'activity.restored'", new Object[] { activityRestoredFlowFile });
}
}
}
Aggregations