use of org.apache.nifi.processor.ProcessContext in project nifi by apache.
the class AbstractListProcessor method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
Long minTimestampToListMillis = lastListedLatestEntryTimestampMillis;
if (this.lastListedLatestEntryTimestampMillis == null || this.lastProcessedLatestEntryTimestampMillis == null || justElectedPrimaryNode) {
try {
// Attempt to retrieve state from the state manager if a last listing was not yet established or
// if just elected the primary node
final StateMap stateMap = context.getStateManager().getState(getStateScope(context));
latestIdentifiersProcessed.clear();
for (Map.Entry<String, String> state : stateMap.toMap().entrySet()) {
final String k = state.getKey();
final String v = state.getValue();
if (v == null || v.isEmpty()) {
continue;
}
if (LATEST_LISTED_ENTRY_TIMESTAMP_KEY.equals(k)) {
minTimestampToListMillis = Long.parseLong(v);
// If our determined timestamp is the same as that of our last listing, skip this execution as there are no updates
if (minTimestampToListMillis.equals(this.lastListedLatestEntryTimestampMillis)) {
context.yield();
return;
} else {
this.lastListedLatestEntryTimestampMillis = minTimestampToListMillis;
}
} else if (LAST_PROCESSED_LATEST_ENTRY_TIMESTAMP_KEY.equals(k)) {
this.lastProcessedLatestEntryTimestampMillis = Long.parseLong(v);
} else if (k.startsWith(IDENTIFIER_PREFIX)) {
latestIdentifiersProcessed.add(v);
}
}
justElectedPrimaryNode = false;
} catch (final IOException ioe) {
getLogger().error("Failed to retrieve timestamp of last listing from the State Manager. Will not perform listing until this is accomplished.");
context.yield();
return;
}
}
final List<T> entityList;
final long currentRunTimeNanos = System.nanoTime();
final long currentRunTimeMillis = System.currentTimeMillis();
try {
// track of when this last executed for consideration of the lag nanos
entityList = performListing(context, minTimestampToListMillis);
} catch (final IOException e) {
getLogger().error("Failed to perform listing on remote host due to {}", e);
context.yield();
return;
}
if (entityList == null || entityList.isEmpty()) {
context.yield();
return;
}
Long latestListedEntryTimestampThisCycleMillis = null;
final TreeMap<Long, List<T>> orderedEntries = new TreeMap<>();
// Build a sorted map to determine the latest possible entries
boolean targetSystemHasMilliseconds = false;
boolean targetSystemHasSeconds = false;
for (final T entity : entityList) {
final long entityTimestampMillis = entity.getTimestamp();
if (!targetSystemHasMilliseconds) {
targetSystemHasMilliseconds = entityTimestampMillis % 1000 > 0;
}
if (!targetSystemHasSeconds) {
targetSystemHasSeconds = entityTimestampMillis % 60_000 > 0;
}
// New entries are all those that occur at or after the associated timestamp
final boolean newEntry = minTimestampToListMillis == null || entityTimestampMillis >= minTimestampToListMillis && entityTimestampMillis >= lastProcessedLatestEntryTimestampMillis;
if (newEntry) {
List<T> entitiesForTimestamp = orderedEntries.get(entity.getTimestamp());
if (entitiesForTimestamp == null) {
entitiesForTimestamp = new ArrayList<T>();
orderedEntries.put(entity.getTimestamp(), entitiesForTimestamp);
}
entitiesForTimestamp.add(entity);
}
}
int flowfilesCreated = 0;
if (orderedEntries.size() > 0) {
latestListedEntryTimestampThisCycleMillis = orderedEntries.lastKey();
// Determine target system time precision.
String specifiedPrecision = context.getProperty(TARGET_SYSTEM_TIMESTAMP_PRECISION).getValue();
if (StringUtils.isBlank(specifiedPrecision)) {
// If TARGET_SYSTEM_TIMESTAMP_PRECISION is not supported by the Processor, then specifiedPrecision can be null, instead of its default value.
specifiedPrecision = getDefaultTimePrecision();
}
final TimeUnit targetSystemTimePrecision = PRECISION_AUTO_DETECT.getValue().equals(specifiedPrecision) ? targetSystemHasMilliseconds ? TimeUnit.MILLISECONDS : targetSystemHasSeconds ? TimeUnit.SECONDS : TimeUnit.MINUTES : PRECISION_MILLIS.getValue().equals(specifiedPrecision) ? TimeUnit.MILLISECONDS : PRECISION_SECONDS.getValue().equals(specifiedPrecision) ? TimeUnit.SECONDS : TimeUnit.MINUTES;
final Long listingLagMillis = LISTING_LAG_MILLIS.get(targetSystemTimePrecision);
// another iteration has occurred without new files and special handling is needed to avoid starvation
if (latestListedEntryTimestampThisCycleMillis.equals(lastListedLatestEntryTimestampMillis)) {
/* We need to wait for another cycle when either:
* - If we have not eclipsed the minimal listing lag needed due to being triggered too soon after the last run
* - The latest listed entity timestamp is equal to the last processed time, meaning we handled those items originally passed over. No need to process it again.
*/
final long listingLagNanos = TimeUnit.MILLISECONDS.toNanos(listingLagMillis);
if (currentRunTimeNanos - lastRunTimeNanos < listingLagNanos || (latestListedEntryTimestampThisCycleMillis.equals(lastProcessedLatestEntryTimestampMillis) && orderedEntries.get(latestListedEntryTimestampThisCycleMillis).stream().allMatch(entity -> latestIdentifiersProcessed.contains(entity.getIdentifier())))) {
context.yield();
return;
}
} else {
// Convert minimum reliable timestamp into target system time unit, in order to truncate unreliable digits.
final long minimumReliableTimestampInFilesystemTimeUnit = targetSystemTimePrecision.convert(currentRunTimeMillis - listingLagMillis, TimeUnit.MILLISECONDS);
final long minimumReliableTimestampMillis = targetSystemTimePrecision.toMillis(minimumReliableTimestampInFilesystemTimeUnit);
// The minimum timestamp should be reliable to determine that no further entries will be added with the same timestamp based on the target system time precision.
if (minimumReliableTimestampMillis < latestListedEntryTimestampThisCycleMillis) {
// Otherwise, newest entries are held back one cycle to avoid issues in writes occurring exactly when the listing is being performed to avoid missing data
orderedEntries.remove(latestListedEntryTimestampThisCycleMillis);
}
}
for (Map.Entry<Long, List<T>> timestampEntities : orderedEntries.entrySet()) {
List<T> entities = timestampEntities.getValue();
if (timestampEntities.getKey().equals(lastProcessedLatestEntryTimestampMillis)) {
// Filter out previously processed entities.
entities = entities.stream().filter(entity -> !latestIdentifiersProcessed.contains(entity.getIdentifier())).collect(Collectors.toList());
}
for (T entity : entities) {
// Create the FlowFile for this path.
final Map<String, String> attributes = createAttributes(entity, context);
FlowFile flowFile = session.create();
flowFile = session.putAllAttributes(flowFile, attributes);
session.transfer(flowFile, REL_SUCCESS);
flowfilesCreated++;
}
}
}
// As long as we have a listing timestamp, there is meaningful state to capture regardless of any outputs generated
if (latestListedEntryTimestampThisCycleMillis != null) {
boolean processedNewFiles = flowfilesCreated > 0;
if (processedNewFiles) {
// because latestListedEntryTimestampThisCycleMillis might be removed if it's not old enough.
if (!orderedEntries.lastKey().equals(lastProcessedLatestEntryTimestampMillis)) {
// If the latest timestamp at this cycle becomes different than the previous one, we need to clear identifiers.
// If it didn't change, we need to add identifiers.
latestIdentifiersProcessed.clear();
}
// Capture latestIdentifierProcessed.
latestIdentifiersProcessed.addAll(orderedEntries.lastEntry().getValue().stream().map(T::getIdentifier).collect(Collectors.toList()));
lastProcessedLatestEntryTimestampMillis = orderedEntries.lastKey();
getLogger().info("Successfully created listing with {} new objects", new Object[] { flowfilesCreated });
session.commit();
}
lastRunTimeNanos = currentRunTimeNanos;
if (!latestListedEntryTimestampThisCycleMillis.equals(lastListedLatestEntryTimestampMillis) || processedNewFiles) {
// the distributed state cache, the node can continue to run (if it is primary node).
try {
lastListedLatestEntryTimestampMillis = latestListedEntryTimestampThisCycleMillis;
persist(latestListedEntryTimestampThisCycleMillis, lastProcessedLatestEntryTimestampMillis, latestIdentifiersProcessed, context.getStateManager(), getStateScope(context));
} catch (final IOException ioe) {
getLogger().warn("Unable to save state due to {}. If NiFi is restarted before state is saved, or " + "if another node begins executing this Processor, data duplication may occur.", ioe);
}
}
} else {
getLogger().debug("There is no data to list. Yielding.");
context.yield();
// lastListingTime = 0 so that we don't continually poll the distributed cache / local file system
if (lastListedLatestEntryTimestampMillis == null) {
lastListedLatestEntryTimestampMillis = 0L;
}
return;
}
}
use of org.apache.nifi.processor.ProcessContext in project nifi by apache.
the class ListFile method createFileFilter.
private BiPredicate<Path, BasicFileAttributes> createFileFilter(final ProcessContext context) {
final long minSize = context.getProperty(MIN_SIZE).asDataSize(DataUnit.B).longValue();
final Double maxSize = context.getProperty(MAX_SIZE).asDataSize(DataUnit.B);
final long minAge = context.getProperty(MIN_AGE).asTimePeriod(TimeUnit.MILLISECONDS);
final Long maxAge = context.getProperty(MAX_AGE).asTimePeriod(TimeUnit.MILLISECONDS);
final boolean ignoreHidden = context.getProperty(IGNORE_HIDDEN_FILES).asBoolean();
final Pattern filePattern = Pattern.compile(context.getProperty(FILE_FILTER).getValue());
final String indir = context.getProperty(DIRECTORY).evaluateAttributeExpressions().getValue();
final boolean recurseDirs = context.getProperty(RECURSE).asBoolean();
final String pathPatternStr = context.getProperty(PATH_FILTER).getValue();
final Pattern pathPattern = (!recurseDirs || pathPatternStr == null) ? null : Pattern.compile(pathPatternStr);
return (path, attributes) -> {
if (minSize > attributes.size()) {
return false;
}
if (maxSize != null && maxSize < attributes.size()) {
return false;
}
final long fileAge = System.currentTimeMillis() - attributes.lastModifiedTime().toMillis();
if (minAge > fileAge) {
return false;
}
if (maxAge != null && maxAge < fileAge) {
return false;
}
if (ignoreHidden && path.toFile().isHidden()) {
return false;
}
if (pathPattern != null) {
Path reldir = Paths.get(indir).relativize(path).getParent();
if (reldir != null && !reldir.toString().isEmpty()) {
if (!pathPattern.matcher(reldir.toString()).matches()) {
return false;
}
}
}
// Verify that we have at least read permissions on the file we're considering grabbing
if (!Files.isReadable(path)) {
return false;
}
return filePattern.matcher(path.getFileName().toString()).matches();
};
}
use of org.apache.nifi.processor.ProcessContext in project nifi by apache.
the class LogAttribute method getAttributesToLog.
private Set<String> getAttributesToLog(final Set<String> flowFileAttrKeys, final ProcessContext context) {
// collect properties
final String attrsToLogValue = context.getProperty(ATTRIBUTES_TO_LOG_CSV).getValue();
final String attrsToRemoveValue = context.getProperty(ATTRIBUTES_TO_IGNORE_CSV).getValue();
final Set<String> attrsToLog = StringUtils.isBlank(attrsToLogValue) ? Sets.newHashSet(flowFileAttrKeys) : Sets.newHashSet(attrsToLogValue.split("\\s*,\\s*"));
final Set<String> attrsToRemove = StringUtils.isBlank(attrsToRemoveValue) ? Sets.newHashSet() : Sets.newHashSet(attrsToRemoveValue.split("\\s*,\\s*"));
final Pattern attrsToLogRegex = Pattern.compile(context.getProperty(ATTRIBUTES_TO_LOG_REGEX).getValue());
final String attrsToRemoveRegexValue = context.getProperty(ATTRIBUTES_TO_IGNORE_REGEX).getValue();
final Pattern attrsToRemoveRegex = attrsToRemoveRegexValue == null ? null : Pattern.compile(context.getProperty(ATTRIBUTES_TO_IGNORE_REGEX).getValue());
return flowFileAttrKeys.stream().filter(candidate -> {
// if this property was configured to be logged, or if the regular expression of properties to log matches
if ((attrsToLog.isEmpty() || attrsToLog.contains(candidate)) && attrsToLogRegex.matcher(candidate).matches()) {
// log properties we've _not_ configured either explicitly or by regular expression to be ignored.
if ((attrsToRemove.isEmpty() || !attrsToRemove.contains(candidate)) && (attrsToRemoveRegex == null || !attrsToRemoveRegex.matcher(candidate).matches())) {
return true;
}
}
return false;
}).collect(Collectors.toCollection(TreeSet::new));
}
use of org.apache.nifi.processor.ProcessContext in project nifi by apache.
the class LookupRecord method route.
@Override
protected Set<Relationship> route(final Record record, final RecordSchema writeSchema, final FlowFile flowFile, final ProcessContext context, final Tuple<Map<String, RecordPath>, RecordPath> flowFileContext) {
final Map<String, RecordPath> recordPaths = flowFileContext.getKey();
final Map<String, Object> lookupCoordinates = new HashMap<>(recordPaths.size());
for (final Map.Entry<String, RecordPath> entry : recordPaths.entrySet()) {
final String coordinateKey = entry.getKey();
final RecordPath recordPath = entry.getValue();
final RecordPathResult pathResult = recordPath.evaluate(record);
final List<FieldValue> lookupFieldValues = pathResult.getSelectedFields().filter(fieldVal -> fieldVal.getValue() != null).collect(Collectors.toList());
if (lookupFieldValues.isEmpty()) {
final Set<Relationship> rels = routeToMatchedUnmatched ? UNMATCHED_COLLECTION : SUCCESS_COLLECTION;
getLogger().debug("RecordPath for property '{}' did not match any fields in a record for {}; routing record to {}", new Object[] { coordinateKey, flowFile, rels });
return rels;
}
if (lookupFieldValues.size() > 1) {
final Set<Relationship> rels = routeToMatchedUnmatched ? UNMATCHED_COLLECTION : SUCCESS_COLLECTION;
getLogger().debug("RecordPath for property '{}' matched {} fields in a record for {}; routing record to {}", new Object[] { coordinateKey, lookupFieldValues.size(), flowFile, rels });
return rels;
}
final FieldValue fieldValue = lookupFieldValues.get(0);
final Object coordinateValue = (fieldValue.getValue() instanceof Number || fieldValue.getValue() instanceof Boolean) ? fieldValue.getValue() : DataTypeUtils.toString(fieldValue.getValue(), (String) null);
lookupCoordinates.put(coordinateKey, coordinateValue);
}
final Optional<?> lookupValueOption;
try {
lookupValueOption = lookupService.lookup(lookupCoordinates);
} catch (final Exception e) {
throw new ProcessException("Failed to lookup coordinates " + lookupCoordinates + " in Lookup Service", e);
}
if (!lookupValueOption.isPresent()) {
final Set<Relationship> rels = routeToMatchedUnmatched ? UNMATCHED_COLLECTION : SUCCESS_COLLECTION;
return rels;
}
// Ensure that the Record has the appropriate schema to account for the newly added values
final RecordPath resultPath = flowFileContext.getValue();
if (resultPath != null) {
record.incorporateSchema(writeSchema);
final Object lookupValue = lookupValueOption.get();
final RecordPathResult resultPathResult = flowFileContext.getValue().evaluate(record);
final String resultContentsValue = context.getProperty(RESULT_CONTENTS).getValue();
if (RESULT_RECORD_FIELDS.getValue().equals(resultContentsValue) && lookupValue instanceof Record) {
final Record lookupRecord = (Record) lookupValue;
// Use wants to add all fields of the resultant Record to the specified Record Path.
// If the destination Record Path returns to us a Record, then we will add all field values of
// the Lookup Record to the destination Record. However, if the destination Record Path returns
// something other than a Record, then we can't add the fields to it. We can only replace it,
// because it doesn't make sense to add fields to anything but a Record.
resultPathResult.getSelectedFields().forEach(fieldVal -> {
final Object destinationValue = fieldVal.getValue();
if (destinationValue instanceof Record) {
final Record destinationRecord = (Record) destinationValue;
for (final String fieldName : lookupRecord.getRawFieldNames()) {
final Object value = lookupRecord.getValue(fieldName);
destinationRecord.setValue(fieldName, value);
}
} else {
final Optional<Record> parentOption = fieldVal.getParentRecord();
if (parentOption.isPresent()) {
parentOption.get().setValue(fieldVal.getField().getFieldName(), lookupRecord);
}
}
});
} else {
resultPathResult.getSelectedFields().forEach(fieldVal -> fieldVal.updateValue(lookupValue));
}
}
final Set<Relationship> rels = routeToMatchedUnmatched ? MATCHED_COLLECTION : SUCCESS_COLLECTION;
return rels;
}
use of org.apache.nifi.processor.ProcessContext in project nifi by apache.
the class MonitorActivity method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
final long thresholdMillis = context.getProperty(THRESHOLD).asTimePeriod(TimeUnit.MILLISECONDS);
final long now = System.currentTimeMillis();
final ComponentLog logger = getLogger();
final boolean copyAttributes = context.getProperty(COPY_ATTRIBUTES).asBoolean();
final boolean isClusterScope = isClusterScope(context, false);
final boolean shouldReportOnlyOnPrimary = shouldReportOnlyOnPrimary(isClusterScope, context);
final List<FlowFile> flowFiles = session.get(50);
boolean isInactive = false;
long updatedLatestSuccessTransfer = -1;
StateMap clusterState = null;
if (flowFiles.isEmpty()) {
final long previousSuccessMillis = latestSuccessTransfer.get();
boolean sendInactiveMarker = false;
isInactive = (now >= previousSuccessMillis + thresholdMillis);
logger.debug("isInactive={}, previousSuccessMillis={}, now={}", new Object[] { isInactive, previousSuccessMillis, now });
if (isInactive && isClusterScope) {
// However, if this node is active, we don't have to look at cluster state.
try {
clusterState = context.getStateManager().getState(Scope.CLUSTER);
if (clusterState != null && !StringUtils.isEmpty(clusterState.get(STATE_KEY_LATEST_SUCCESS_TRANSFER))) {
final long latestReportedClusterActivity = Long.valueOf(clusterState.get(STATE_KEY_LATEST_SUCCESS_TRANSFER));
isInactive = (now >= latestReportedClusterActivity + thresholdMillis);
if (!isInactive) {
// This node has been inactive, but other node has more recent activity.
updatedLatestSuccessTransfer = latestReportedClusterActivity;
}
logger.debug("isInactive={}, latestReportedClusterActivity={}", new Object[] { isInactive, latestReportedClusterActivity });
}
} catch (IOException e) {
logger.error("Failed to access cluster state. Activity will not be monitored properly until this is addressed.", e);
}
}
if (isInactive) {
final boolean continual = context.getProperty(CONTINUALLY_SEND_MESSAGES).asBoolean();
sendInactiveMarker = !inactive.getAndSet(true) || (continual && (now > lastInactiveMessage.get() + thresholdMillis));
}
if (sendInactiveMarker && shouldThisNodeReport(isClusterScope, shouldReportOnlyOnPrimary)) {
lastInactiveMessage.set(System.currentTimeMillis());
FlowFile inactiveFlowFile = session.create();
inactiveFlowFile = session.putAttribute(inactiveFlowFile, "inactivityStartMillis", String.valueOf(previousSuccessMillis));
inactiveFlowFile = session.putAttribute(inactiveFlowFile, "inactivityDurationMillis", String.valueOf(now - previousSuccessMillis));
final byte[] outBytes = context.getProperty(INACTIVITY_MESSAGE).evaluateAttributeExpressions(inactiveFlowFile).getValue().getBytes(UTF8);
inactiveFlowFile = session.write(inactiveFlowFile, new OutputStreamCallback() {
@Override
public void process(final OutputStream out) throws IOException {
out.write(outBytes);
}
});
session.getProvenanceReporter().create(inactiveFlowFile);
session.transfer(inactiveFlowFile, REL_INACTIVE);
logger.info("Transferred {} to 'inactive'", new Object[] { inactiveFlowFile });
} else {
// no need to dominate CPU checking times; let other processors run for a bit.
context.yield();
}
} else {
session.transfer(flowFiles, REL_SUCCESS);
updatedLatestSuccessTransfer = now;
logger.info("Transferred {} FlowFiles to 'success'", new Object[] { flowFiles.size() });
final long latestStateReportTimestamp = latestReportedNodeState.get();
if (isClusterScope && (now - latestStateReportTimestamp) > (thresholdMillis / 3)) {
// We don't want to hit the state manager every onTrigger(), but often enough to detect activeness.
try {
final StateManager stateManager = context.getStateManager();
final StateMap state = stateManager.getState(Scope.CLUSTER);
final Map<String, String> newValues = new HashMap<>();
// Persist attributes so that other nodes can copy it
if (copyAttributes) {
newValues.putAll(flowFiles.get(0).getAttributes());
}
newValues.put(STATE_KEY_LATEST_SUCCESS_TRANSFER, String.valueOf(now));
if (state == null || state.getVersion() == -1) {
stateManager.setState(newValues, Scope.CLUSTER);
} else {
final String existingTimestamp = state.get(STATE_KEY_LATEST_SUCCESS_TRANSFER);
if (StringUtils.isEmpty(existingTimestamp) || Long.parseLong(existingTimestamp) < now) {
// If this returns false due to race condition, it's not a problem since we just need
// the latest active timestamp.
stateManager.replace(state, newValues, Scope.CLUSTER);
} else {
logger.debug("Existing state has more recent timestamp, didn't update state.");
}
}
latestReportedNodeState.set(now);
} catch (IOException e) {
logger.error("Failed to access cluster state. Activity will not be monitored properly until this is addressed.", e);
}
}
}
if (!isInactive) {
final long inactivityStartMillis = latestSuccessTransfer.get();
if (updatedLatestSuccessTransfer > -1) {
latestSuccessTransfer.set(updatedLatestSuccessTransfer);
}
if (inactive.getAndSet(false) && shouldThisNodeReport(isClusterScope, shouldReportOnlyOnPrimary)) {
FlowFile activityRestoredFlowFile = session.create();
if (copyAttributes) {
final Map<String, String> attributes = new HashMap<>();
if (flowFiles.size() > 0) {
// copy attributes from the first flow file in the list
attributes.putAll(flowFiles.get(0).getAttributes());
} else if (clusterState != null) {
attributes.putAll(clusterState.toMap());
attributes.remove(STATE_KEY_LATEST_SUCCESS_TRANSFER);
}
// don't copy the UUID
attributes.remove(CoreAttributes.UUID.key());
activityRestoredFlowFile = session.putAllAttributes(activityRestoredFlowFile, attributes);
}
activityRestoredFlowFile = session.putAttribute(activityRestoredFlowFile, "inactivityStartMillis", String.valueOf(inactivityStartMillis));
activityRestoredFlowFile = session.putAttribute(activityRestoredFlowFile, "inactivityDurationMillis", String.valueOf(now - inactivityStartMillis));
final byte[] outBytes = context.getProperty(ACTIVITY_RESTORED_MESSAGE).evaluateAttributeExpressions(activityRestoredFlowFile).getValue().getBytes(UTF8);
activityRestoredFlowFile = session.write(activityRestoredFlowFile, out -> out.write(outBytes));
session.getProvenanceReporter().create(activityRestoredFlowFile);
session.transfer(activityRestoredFlowFile, REL_ACTIVITY_RESTORED);
logger.info("Transferred {} to 'activity.restored'", new Object[] { activityRestoredFlowFile });
}
}
}
Aggregations