use of org.apache.nifi.provenance.ProvenanceEventRecord in project nifi by apache.
the class StandardProcessSession method registerJoinEvent.
private void registerJoinEvent(final FlowFile child, final Collection<FlowFile> parents) {
final ProvenanceEventRecord eventRecord = provenanceReporter.generateJoinEvent(parents, child);
List<ProvenanceEventRecord> existingRecords = generatedProvenanceEvents.get(child);
if (existingRecords == null) {
existingRecords = new ArrayList<>();
generatedProvenanceEvents.put(child, existingRecords);
}
existingRecords.add(eventRecord);
}
use of org.apache.nifi.provenance.ProvenanceEventRecord in project nifi by apache.
the class StandardProcessSession method migrate.
private void migrate(final StandardProcessSession newOwner, Collection<FlowFile> flowFiles) {
// We don't call validateRecordState() here because we want to allow migration of FlowFiles that have already been marked as removed or transferred, etc.
flowFiles = flowFiles.stream().map(this::getMostRecent).collect(Collectors.toList());
for (final FlowFile flowFile : flowFiles) {
if (openInputStreams.containsKey(flowFile)) {
throw new IllegalStateException(flowFile + " cannot be migrated to a new Process Session because this session currently " + "has an open InputStream for the FlowFile, created by calling ProcessSession.read(FlowFile)");
}
if (openOutputStreams.containsKey(flowFile)) {
throw new IllegalStateException(flowFile + " cannot be migrated to a new Process Session because this session currently " + "has an open OutputStream for the FlowFile, created by calling ProcessSession.write(FlowFile)");
}
if (readRecursionSet.containsKey(flowFile)) {
throw new IllegalStateException(flowFile + " already in use for an active callback or InputStream created by ProcessSession.read(FlowFile) has not been closed");
}
if (writeRecursionSet.contains(flowFile)) {
throw new IllegalStateException(flowFile + " already in use for an active callback or OutputStream created by ProcessSession.write(FlowFile) has not been closed");
}
final StandardRepositoryRecord record = records.get(flowFile);
if (record == null) {
throw new FlowFileHandlingException(flowFile + " is not known in this session (" + toString() + ")");
}
}
// If we have a FORK event for one of the given FlowFiles, then all children must also be migrated. Otherwise, we
// could have a case where we have FlowFile A transferred and eventually exiting the flow and later the 'newOwner'
// ProcessSession is committed, claiming to have created FlowFiles from the parent, which is no longer even in
// the flow. This would be very confusing when looking at the provenance for the FlowFile, so it is best to avoid this.
final Set<String> flowFileIds = flowFiles.stream().map(ff -> ff.getAttribute(CoreAttributes.UUID.key())).collect(Collectors.toSet());
for (final Map.Entry<FlowFile, ProvenanceEventBuilder> entry : forkEventBuilders.entrySet()) {
final FlowFile eventFlowFile = entry.getKey();
if (flowFiles.contains(eventFlowFile)) {
final ProvenanceEventBuilder eventBuilder = entry.getValue();
for (final String childId : eventBuilder.getChildFlowFileIds()) {
if (!flowFileIds.contains(childId)) {
throw new IllegalStateException("Cannot migrate " + eventFlowFile + " to a new session because it was forked to create " + eventBuilder.getChildFlowFileIds().size() + " children and not all children are being migrated. If any FlowFile is forked, all of its children must also be migrated at the same time as the forked FlowFile");
}
}
}
}
// event builder for the new owner of the FlowFile and remove the child from our fork event builder.
for (final Map.Entry<FlowFile, ProvenanceEventBuilder> entry : forkEventBuilders.entrySet()) {
final FlowFile eventFlowFile = entry.getKey();
final ProvenanceEventBuilder eventBuilder = entry.getValue();
final Set<String> childrenIds = new HashSet<>(eventBuilder.getChildFlowFileIds());
ProvenanceEventBuilder copy = null;
for (final FlowFile flowFile : flowFiles) {
final String flowFileId = flowFile.getAttribute(CoreAttributes.UUID.key());
if (childrenIds.contains(flowFileId)) {
eventBuilder.removeChildFlowFile(flowFile);
if (copy == null) {
copy = eventBuilder.copy();
copy.getChildFlowFileIds().clear();
}
copy.addChildFlowFile(flowFileId);
}
}
if (copy != null) {
newOwner.forkEventBuilders.put(eventFlowFile, copy);
}
}
newOwner.processingStartTime = Math.min(newOwner.processingStartTime, processingStartTime);
for (final FlowFile flowFile : flowFiles) {
final FlowFileRecord flowFileRecord = (FlowFileRecord) flowFile;
final StandardRepositoryRecord repoRecord = this.records.remove(flowFile);
newOwner.records.put(flowFileRecord, repoRecord);
// Adjust the counts for Connections for each FlowFile that was pulled from a Connection.
// We do not have to worry about accounting for 'input counts' on connections because those
// are incremented only during a checkpoint, and anything that's been checkpointed has
// also been committed above.
final FlowFileQueue inputQueue = repoRecord.getOriginalQueue();
if (inputQueue != null) {
final String connectionId = inputQueue.getIdentifier();
incrementConnectionOutputCounts(connectionId, -1, -repoRecord.getOriginal().getSize());
newOwner.incrementConnectionOutputCounts(connectionId, 1, repoRecord.getOriginal().getSize());
unacknowledgedFlowFiles.get(inputQueue).remove(flowFile);
newOwner.unacknowledgedFlowFiles.computeIfAbsent(inputQueue, queue -> new HashSet<>()).add(flowFileRecord);
flowFilesIn--;
contentSizeIn -= flowFile.getSize();
newOwner.flowFilesIn++;
newOwner.contentSizeIn += flowFile.getSize();
}
final String flowFileId = flowFile.getAttribute(CoreAttributes.UUID.key());
if (removedFlowFiles.remove(flowFileId)) {
newOwner.removedFlowFiles.add(flowFileId);
newOwner.removedCount++;
newOwner.removedBytes += flowFile.getSize();
removedCount--;
removedBytes -= flowFile.getSize();
}
if (createdFlowFiles.remove(flowFileId)) {
newOwner.createdFlowFiles.add(flowFileId);
}
if (repoRecord.getTransferRelationship() != null) {
flowFilesOut--;
contentSizeOut -= flowFile.getSize();
newOwner.flowFilesOut++;
newOwner.contentSizeOut += flowFile.getSize();
}
final List<ProvenanceEventRecord> events = generatedProvenanceEvents.remove(flowFile);
if (events != null) {
newOwner.generatedProvenanceEvents.put(flowFile, events);
}
final ContentClaim currentClaim = repoRecord.getCurrentClaim();
if (currentClaim != null) {
final ByteCountingOutputStream appendableStream = appendableStreams.remove(currentClaim);
if (appendableStream != null) {
newOwner.appendableStreams.put(currentClaim, appendableStream);
}
}
final Path toDelete = deleteOnCommit.remove(flowFile);
if (toDelete != null) {
newOwner.deleteOnCommit.put(flowFile, toDelete);
}
}
provenanceReporter.migrate(newOwner.provenanceReporter, flowFileIds);
}
use of org.apache.nifi.provenance.ProvenanceEventRecord in project nifi by apache.
the class StandardProcessSession method updateProvenanceRepo.
private void updateProvenanceRepo(final Checkpoint checkpoint) {
// Update Provenance Repository
final ProvenanceEventRepository provenanceRepo = context.getProvenanceRepository();
// We need to de-dupe the events that we've created and those reported to the provenance reporter,
// in case the Processor developer submitted the same events to the reporter. So we use a LinkedHashSet
// for this, so that we are able to ensure that the events are submitted in the proper order.
final Set<ProvenanceEventRecord> recordsToSubmit = new LinkedHashSet<>();
final Map<String, Set<ProvenanceEventType>> eventTypesPerFlowFileId = new HashMap<>();
final Set<ProvenanceEventRecord> processorGenerated = checkpoint.reportedEvents;
// by the Processor contains any of the FORK events that we generated
for (final Map.Entry<FlowFile, ProvenanceEventBuilder> entry : checkpoint.forkEventBuilders.entrySet()) {
final ProvenanceEventBuilder builder = entry.getValue();
final FlowFile flowFile = entry.getKey();
updateEventContentClaims(builder, flowFile, checkpoint.records.get(flowFile));
final ProvenanceEventRecord event = builder.build();
if (!event.getChildUuids().isEmpty() && !isSpuriousForkEvent(event, checkpoint.removedFlowFiles)) {
// If framework generated the event, add it to the 'recordsToSubmit' Set.
if (!processorGenerated.contains(event)) {
recordsToSubmit.add(event);
}
// Register the FORK event for each child and each parent.
for (final String childUuid : event.getChildUuids()) {
addEventType(eventTypesPerFlowFileId, childUuid, event.getEventType());
}
for (final String parentUuid : event.getParentUuids()) {
addEventType(eventTypesPerFlowFileId, parentUuid, event.getEventType());
}
}
}
// Now add any Processor-reported events.
for (final ProvenanceEventRecord event : processorGenerated) {
if (isSpuriousForkEvent(event, checkpoint.removedFlowFiles)) {
continue;
}
// connection from which it was pulled (and only this connection). If so, discard the event.
if (isSpuriousRouteEvent(event, checkpoint.records)) {
continue;
}
recordsToSubmit.add(event);
addEventType(eventTypesPerFlowFileId, event.getFlowFileUuid(), event.getEventType());
}
// Finally, add any other events that we may have generated.
for (final List<ProvenanceEventRecord> eventList : checkpoint.generatedProvenanceEvents.values()) {
for (final ProvenanceEventRecord event : eventList) {
if (isSpuriousForkEvent(event, checkpoint.removedFlowFiles)) {
continue;
}
recordsToSubmit.add(event);
addEventType(eventTypesPerFlowFileId, event.getFlowFileUuid(), event.getEventType());
}
}
// Check if content or attributes changed. If so, register the appropriate events.
for (final StandardRepositoryRecord repoRecord : checkpoint.records.values()) {
final ContentClaim original = repoRecord.getOriginalClaim();
final ContentClaim current = repoRecord.getCurrentClaim();
boolean contentChanged = false;
if (original == null && current != null) {
contentChanged = true;
}
if (original != null && current == null) {
contentChanged = true;
}
if (original != null && current != null && !original.equals(current)) {
contentChanged = true;
}
final FlowFileRecord curFlowFile = repoRecord.getCurrent();
final String flowFileId = curFlowFile.getAttribute(CoreAttributes.UUID.key());
boolean eventAdded = false;
if (checkpoint.removedFlowFiles.contains(flowFileId)) {
continue;
}
final boolean newFlowFile = repoRecord.getOriginal() == null;
if (contentChanged && !newFlowFile) {
recordsToSubmit.add(provenanceReporter.build(curFlowFile, ProvenanceEventType.CONTENT_MODIFIED).build());
addEventType(eventTypesPerFlowFileId, flowFileId, ProvenanceEventType.CONTENT_MODIFIED);
eventAdded = true;
}
if (checkpoint.createdFlowFiles.contains(flowFileId)) {
final Set<ProvenanceEventType> registeredTypes = eventTypesPerFlowFileId.get(flowFileId);
boolean creationEventRegistered = false;
if (registeredTypes != null) {
if (registeredTypes.contains(ProvenanceEventType.CREATE) || registeredTypes.contains(ProvenanceEventType.FORK) || registeredTypes.contains(ProvenanceEventType.JOIN) || registeredTypes.contains(ProvenanceEventType.RECEIVE) || registeredTypes.contains(ProvenanceEventType.FETCH)) {
creationEventRegistered = true;
}
}
if (!creationEventRegistered) {
recordsToSubmit.add(provenanceReporter.build(curFlowFile, ProvenanceEventType.CREATE).build());
eventAdded = true;
}
}
if (!eventAdded && !repoRecord.getUpdatedAttributes().isEmpty()) {
// event is redundant if another already exists.
if (!eventTypesPerFlowFileId.containsKey(flowFileId)) {
recordsToSubmit.add(provenanceReporter.build(curFlowFile, ProvenanceEventType.ATTRIBUTES_MODIFIED).build());
addEventType(eventTypesPerFlowFileId, flowFileId, ProvenanceEventType.ATTRIBUTES_MODIFIED);
}
}
}
// We want to submit the 'recordsToSubmit' collection, followed by the auto-terminated events to the Provenance Repository.
// We want to do this with a single call to ProvenanceEventRepository#registerEvents because it may be much more efficient
// to do so.
// However, we want to modify the events in 'recordsToSubmit' to obtain the data from the most recent version of the FlowFiles
// (except for SEND events); see note below as to why this is
// Therefore, we create an Iterable that can iterate over each of these events, modifying them as needed, and returning them
// in the appropriate order. This prevents an unnecessary step of creating an intermediate List and adding all of those values
// to the List.
// This is done in a similar veign to how Java 8's streams work, iterating over the events and returning a processed version
// one-at-a-time as opposed to iterating over the entire Collection and putting the results in another Collection. However,
// we don't want to change the Framework to require Java 8 at this time, because it's not yet as prevalent as we would desire
final Map<String, FlowFileRecord> flowFileRecordMap = new HashMap<>();
for (final StandardRepositoryRecord repoRecord : checkpoint.records.values()) {
final FlowFileRecord flowFile = repoRecord.getCurrent();
flowFileRecordMap.put(flowFile.getAttribute(CoreAttributes.UUID.key()), flowFile);
}
final List<ProvenanceEventRecord> autoTermEvents = checkpoint.autoTerminatedEvents;
final Iterable<ProvenanceEventRecord> iterable = new Iterable<ProvenanceEventRecord>() {
final Iterator<ProvenanceEventRecord> recordsToSubmitIterator = recordsToSubmit.iterator();
final Iterator<ProvenanceEventRecord> autoTermIterator = autoTermEvents == null ? null : autoTermEvents.iterator();
@Override
public Iterator<ProvenanceEventRecord> iterator() {
return new Iterator<ProvenanceEventRecord>() {
@Override
public boolean hasNext() {
return recordsToSubmitIterator.hasNext() || autoTermIterator != null && autoTermIterator.hasNext();
}
@Override
public ProvenanceEventRecord next() {
if (recordsToSubmitIterator.hasNext()) {
final ProvenanceEventRecord rawEvent = recordsToSubmitIterator.next();
// exposed.
return enrich(rawEvent, flowFileRecordMap, checkpoint.records, rawEvent.getEventType() != ProvenanceEventType.SEND);
} else if (autoTermIterator != null && autoTermIterator.hasNext()) {
return enrich(autoTermIterator.next(), flowFileRecordMap, checkpoint.records, true);
}
throw new NoSuchElementException();
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
};
provenanceRepo.registerEvents(iterable);
}
use of org.apache.nifi.provenance.ProvenanceEventRecord in project nifi by apache.
the class StandardProvenanceReporter method drop.
ProvenanceEventRecord drop(final FlowFile flowFile, final String reason) {
try {
final ProvenanceEventBuilder builder = build(flowFile, ProvenanceEventType.DROP);
if (reason != null) {
builder.setDetails("Discard reason: " + reason);
}
final ProvenanceEventRecord record = builder.build();
events.add(record);
return record;
} catch (final Exception e) {
logger.error("Failed to generate Provenance Event due to " + e);
if (logger.isDebugEnabled()) {
logger.error("", e);
}
return null;
}
}
use of org.apache.nifi.provenance.ProvenanceEventRecord in project nifi by apache.
the class StandardProvenanceReporter method send.
@Override
public void send(final FlowFile flowFile, final String transitUri, final String details, final long transmissionMillis, final boolean force) {
try {
final ProvenanceEventRecord record = build(flowFile, ProvenanceEventType.SEND).setTransitUri(transitUri).setEventDuration(transmissionMillis).setDetails(details).build();
final ProvenanceEventRecord enriched = eventEnricher == null ? record : eventEnricher.enrich(record, flowFile);
if (force) {
repository.registerEvent(enriched);
} else {
events.add(enriched);
}
} catch (final Exception e) {
logger.error("Failed to generate Provenance Event due to " + e);
if (logger.isDebugEnabled()) {
logger.error("", e);
}
}
}
Aggregations