use of org.apache.nifi.provenance.ProvenanceEventRepository in project nifi by apache.
the class StandardProcessSession method updateProvenanceRepo.
private void updateProvenanceRepo(final Checkpoint checkpoint) {
// Update Provenance Repository
final ProvenanceEventRepository provenanceRepo = context.getProvenanceRepository();
// We need to de-dupe the events that we've created and those reported to the provenance reporter,
// in case the Processor developer submitted the same events to the reporter. So we use a LinkedHashSet
// for this, so that we are able to ensure that the events are submitted in the proper order.
final Set<ProvenanceEventRecord> recordsToSubmit = new LinkedHashSet<>();
final Map<String, Set<ProvenanceEventType>> eventTypesPerFlowFileId = new HashMap<>();
final Set<ProvenanceEventRecord> processorGenerated = checkpoint.reportedEvents;
// by the Processor contains any of the FORK events that we generated
for (final Map.Entry<FlowFile, ProvenanceEventBuilder> entry : checkpoint.forkEventBuilders.entrySet()) {
final ProvenanceEventBuilder builder = entry.getValue();
final FlowFile flowFile = entry.getKey();
updateEventContentClaims(builder, flowFile, checkpoint.records.get(flowFile));
final ProvenanceEventRecord event = builder.build();
if (!event.getChildUuids().isEmpty() && !isSpuriousForkEvent(event, checkpoint.removedFlowFiles)) {
// If framework generated the event, add it to the 'recordsToSubmit' Set.
if (!processorGenerated.contains(event)) {
recordsToSubmit.add(event);
}
// Register the FORK event for each child and each parent.
for (final String childUuid : event.getChildUuids()) {
addEventType(eventTypesPerFlowFileId, childUuid, event.getEventType());
}
for (final String parentUuid : event.getParentUuids()) {
addEventType(eventTypesPerFlowFileId, parentUuid, event.getEventType());
}
}
}
// Now add any Processor-reported events.
for (final ProvenanceEventRecord event : processorGenerated) {
if (isSpuriousForkEvent(event, checkpoint.removedFlowFiles)) {
continue;
}
// connection from which it was pulled (and only this connection). If so, discard the event.
if (isSpuriousRouteEvent(event, checkpoint.records)) {
continue;
}
recordsToSubmit.add(event);
addEventType(eventTypesPerFlowFileId, event.getFlowFileUuid(), event.getEventType());
}
// Finally, add any other events that we may have generated.
for (final List<ProvenanceEventRecord> eventList : checkpoint.generatedProvenanceEvents.values()) {
for (final ProvenanceEventRecord event : eventList) {
if (isSpuriousForkEvent(event, checkpoint.removedFlowFiles)) {
continue;
}
recordsToSubmit.add(event);
addEventType(eventTypesPerFlowFileId, event.getFlowFileUuid(), event.getEventType());
}
}
// Check if content or attributes changed. If so, register the appropriate events.
for (final StandardRepositoryRecord repoRecord : checkpoint.records.values()) {
final ContentClaim original = repoRecord.getOriginalClaim();
final ContentClaim current = repoRecord.getCurrentClaim();
boolean contentChanged = false;
if (original == null && current != null) {
contentChanged = true;
}
if (original != null && current == null) {
contentChanged = true;
}
if (original != null && current != null && !original.equals(current)) {
contentChanged = true;
}
final FlowFileRecord curFlowFile = repoRecord.getCurrent();
final String flowFileId = curFlowFile.getAttribute(CoreAttributes.UUID.key());
boolean eventAdded = false;
if (checkpoint.removedFlowFiles.contains(flowFileId)) {
continue;
}
final boolean newFlowFile = repoRecord.getOriginal() == null;
if (contentChanged && !newFlowFile) {
recordsToSubmit.add(provenanceReporter.build(curFlowFile, ProvenanceEventType.CONTENT_MODIFIED).build());
addEventType(eventTypesPerFlowFileId, flowFileId, ProvenanceEventType.CONTENT_MODIFIED);
eventAdded = true;
}
if (checkpoint.createdFlowFiles.contains(flowFileId)) {
final Set<ProvenanceEventType> registeredTypes = eventTypesPerFlowFileId.get(flowFileId);
boolean creationEventRegistered = false;
if (registeredTypes != null) {
if (registeredTypes.contains(ProvenanceEventType.CREATE) || registeredTypes.contains(ProvenanceEventType.FORK) || registeredTypes.contains(ProvenanceEventType.JOIN) || registeredTypes.contains(ProvenanceEventType.RECEIVE) || registeredTypes.contains(ProvenanceEventType.FETCH)) {
creationEventRegistered = true;
}
}
if (!creationEventRegistered) {
recordsToSubmit.add(provenanceReporter.build(curFlowFile, ProvenanceEventType.CREATE).build());
eventAdded = true;
}
}
if (!eventAdded && !repoRecord.getUpdatedAttributes().isEmpty()) {
// event is redundant if another already exists.
if (!eventTypesPerFlowFileId.containsKey(flowFileId)) {
recordsToSubmit.add(provenanceReporter.build(curFlowFile, ProvenanceEventType.ATTRIBUTES_MODIFIED).build());
addEventType(eventTypesPerFlowFileId, flowFileId, ProvenanceEventType.ATTRIBUTES_MODIFIED);
}
}
}
// We want to submit the 'recordsToSubmit' collection, followed by the auto-terminated events to the Provenance Repository.
// We want to do this with a single call to ProvenanceEventRepository#registerEvents because it may be much more efficient
// to do so.
// However, we want to modify the events in 'recordsToSubmit' to obtain the data from the most recent version of the FlowFiles
// (except for SEND events); see note below as to why this is
// Therefore, we create an Iterable that can iterate over each of these events, modifying them as needed, and returning them
// in the appropriate order. This prevents an unnecessary step of creating an intermediate List and adding all of those values
// to the List.
// This is done in a similar veign to how Java 8's streams work, iterating over the events and returning a processed version
// one-at-a-time as opposed to iterating over the entire Collection and putting the results in another Collection. However,
// we don't want to change the Framework to require Java 8 at this time, because it's not yet as prevalent as we would desire
final Map<String, FlowFileRecord> flowFileRecordMap = new HashMap<>();
for (final StandardRepositoryRecord repoRecord : checkpoint.records.values()) {
final FlowFileRecord flowFile = repoRecord.getCurrent();
flowFileRecordMap.put(flowFile.getAttribute(CoreAttributes.UUID.key()), flowFile);
}
final List<ProvenanceEventRecord> autoTermEvents = checkpoint.autoTerminatedEvents;
final Iterable<ProvenanceEventRecord> iterable = new Iterable<ProvenanceEventRecord>() {
final Iterator<ProvenanceEventRecord> recordsToSubmitIterator = recordsToSubmit.iterator();
final Iterator<ProvenanceEventRecord> autoTermIterator = autoTermEvents == null ? null : autoTermEvents.iterator();
@Override
public Iterator<ProvenanceEventRecord> iterator() {
return new Iterator<ProvenanceEventRecord>() {
@Override
public boolean hasNext() {
return recordsToSubmitIterator.hasNext() || autoTermIterator != null && autoTermIterator.hasNext();
}
@Override
public ProvenanceEventRecord next() {
if (recordsToSubmitIterator.hasNext()) {
final ProvenanceEventRecord rawEvent = recordsToSubmitIterator.next();
// exposed.
return enrich(rawEvent, flowFileRecordMap, checkpoint.records, rawEvent.getEventType() != ProvenanceEventType.SEND);
} else if (autoTermIterator != null && autoTermIterator.hasNext()) {
return enrich(autoTermIterator.next(), flowFileRecordMap, checkpoint.records, true);
}
throw new NoSuchElementException();
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
};
provenanceRepo.registerEvents(iterable);
}
use of org.apache.nifi.provenance.ProvenanceEventRepository in project nifi by apache.
the class TestStandardProvenanceReporter method testDuplicatesIgnored.
@Test
@Ignore
public void testDuplicatesIgnored() {
final ProvenanceEventRepository mockRepo = Mockito.mock(ProvenanceEventRepository.class);
final StandardProvenanceReporter reporter = new StandardProvenanceReporter(null, "1234", "TestProc", mockRepo, null);
final List<FlowFile> parents = new ArrayList<>();
for (int i = 0; i < 10; i++) {
final FlowFile ff = new StandardFlowFileRecord.Builder().id(i).addAttribute("uuid", String.valueOf(i)).build();
parents.add(ff);
}
final FlowFile flowFile = new StandardFlowFileRecord.Builder().id(10L).addAttribute("uuid", "10").build();
reporter.fork(flowFile, parents);
reporter.fork(flowFile, parents);
final Set<ProvenanceEventRecord> records = reporter.getEvents();
// 1 for each parent in the spawn and 1 for the spawn itself
assertEquals(11, records.size());
final FlowFile firstParent = parents.get(0);
parents.clear();
parents.add(firstParent);
reporter.fork(flowFile, parents);
// 1 more emitted for the spawn event containing the child but not for the parent because that one has already been emitted
assertEquals(12, reporter.getEvents().size());
}
use of org.apache.nifi.provenance.ProvenanceEventRepository in project nifi by apache.
the class TestSiteToSiteProvenanceReportingTask method testWhenProvenanceMaxIdEqualToLastEventIdInStateManager.
@Test
public void testWhenProvenanceMaxIdEqualToLastEventIdInStateManager() throws IOException, InitializationException {
final long maxEventId = 2500;
// create the mock reporting task and mock state manager
final Map<PropertyDescriptor, String> properties = new HashMap<>();
for (final PropertyDescriptor descriptor : new MockSiteToSiteProvenanceReportingTask().getSupportedPropertyDescriptors()) {
properties.put(descriptor, descriptor.getDefaultValue());
}
final MockSiteToSiteProvenanceReportingTask task = setup(null, properties);
final MockStateManager stateManager = new MockStateManager(task);
// create the state map and set the last id to the same value as maxEventId
final Map<String, String> state = new HashMap<>();
state.put(SiteToSiteProvenanceReportingTask.LAST_EVENT_ID_KEY, String.valueOf(maxEventId));
stateManager.setState(state, Scope.LOCAL);
// setup the mock provenance repository to return maxEventId
final ProvenanceEventRepository provenanceRepository = Mockito.mock(ProvenanceEventRepository.class);
Mockito.doAnswer(new Answer<Long>() {
@Override
public Long answer(final InvocationOnMock invocation) throws Throwable {
return maxEventId;
}
}).when(provenanceRepository).getMaxEventId();
// setup the mock EventAccess to return the mock provenance repository
final EventAccess eventAccess = Mockito.mock(EventAccess.class);
when(eventAccess.getProvenanceRepository()).thenReturn(provenanceRepository);
task.initialize(initContext);
// execute the reporting task and should not produce any data b/c max id same as previous id
task.onScheduled(confContext);
task.onTrigger(context);
assertEquals(0, task.dataSent.size());
}
use of org.apache.nifi.provenance.ProvenanceEventRepository in project nifi by apache.
the class TestSiteToSiteProvenanceReportingTask method setup.
private MockSiteToSiteProvenanceReportingTask setup(ProvenanceEventRecord event, Map<PropertyDescriptor, String> properties, long maxEventId) throws IOException {
final MockSiteToSiteProvenanceReportingTask task = new MockSiteToSiteProvenanceReportingTask();
when(context.getStateManager()).thenReturn(new MockStateManager(task));
Mockito.doAnswer(new Answer<PropertyValue>() {
@Override
public PropertyValue answer(final InvocationOnMock invocation) throws Throwable {
final PropertyDescriptor descriptor = invocation.getArgumentAt(0, PropertyDescriptor.class);
return new MockPropertyValue(properties.get(descriptor));
}
}).when(context).getProperty(Mockito.any(PropertyDescriptor.class));
Mockito.doAnswer(new Answer<PropertyValue>() {
@Override
public PropertyValue answer(final InvocationOnMock invocation) throws Throwable {
final PropertyDescriptor descriptor = invocation.getArgumentAt(0, PropertyDescriptor.class);
return new MockPropertyValue(properties.get(descriptor));
}
}).when(confContext).getProperty(Mockito.any(PropertyDescriptor.class));
final AtomicInteger totalEvents = new AtomicInteger(0);
final EventAccess eventAccess = Mockito.mock(EventAccess.class);
Mockito.doAnswer(new Answer<List<ProvenanceEventRecord>>() {
@Override
public List<ProvenanceEventRecord> answer(final InvocationOnMock invocation) throws Throwable {
final long startId = invocation.getArgumentAt(0, long.class);
final int maxRecords = invocation.getArgumentAt(1, int.class);
final List<ProvenanceEventRecord> eventsToReturn = new ArrayList<>();
for (int i = (int) Math.max(0, startId); i < (int) (startId + maxRecords) && totalEvents.get() < maxEventId; i++) {
if (event != null) {
eventsToReturn.add(event);
}
totalEvents.getAndIncrement();
}
return eventsToReturn;
}
}).when(eventAccess).getProvenanceEvents(Mockito.anyLong(), Mockito.anyInt());
ProcessGroupStatus pgRoot = new ProcessGroupStatus();
pgRoot.setId("root");
when(eventAccess.getControllerStatus()).thenReturn(pgRoot);
// Add child Process Groups.
// Root -> (A, B -> (B2 -> (B3)))
final ProcessGroupStatus pgA = new ProcessGroupStatus();
pgA.setId("pgA");
final ProcessGroupStatus pgB = new ProcessGroupStatus();
pgB.setId("pgB");
final ProcessGroupStatus pgB2 = new ProcessGroupStatus();
pgB2.setId("pgB2");
final ProcessGroupStatus pgB3 = new ProcessGroupStatus();
pgB3.setId("pgB3");
final Collection<ProcessGroupStatus> childPGs = pgRoot.getProcessGroupStatus();
childPGs.add(pgA);
childPGs.add(pgB);
pgB.getProcessGroupStatus().add(pgB2);
pgB2.getProcessGroupStatus().add(pgB3);
// Add Processors.
final ProcessorStatus prcRoot = new ProcessorStatus();
prcRoot.setId("1234");
pgRoot.getProcessorStatus().add(prcRoot);
final ProcessorStatus prcA = new ProcessorStatus();
prcA.setId("A001");
prcA.setName("Processor in PGA");
pgA.getProcessorStatus().add(prcA);
final ProcessorStatus prcB = new ProcessorStatus();
prcB.setId("B001");
prcB.setName("Processor in PGB");
pgB.getProcessorStatus().add(prcB);
final ProcessorStatus prcB2 = new ProcessorStatus();
prcB2.setId("B201");
prcB2.setName("Processor in PGB2");
pgB2.getProcessorStatus().add(prcB2);
final ProcessorStatus prcB3 = new ProcessorStatus();
prcB3.setId("B301");
prcB3.setName("Processor in PGB3");
pgB3.getProcessorStatus().add(prcB3);
// Add connection status to test Remote Input/Output Ports
final ConnectionStatus b2RemoteInputPort = new ConnectionStatus();
b2RemoteInputPort.setGroupId("pgB2");
b2RemoteInputPort.setSourceId("B201");
b2RemoteInputPort.setDestinationId("riB2");
b2RemoteInputPort.setDestinationName("Remote Input Port name");
pgB2.getConnectionStatus().add(b2RemoteInputPort);
final ConnectionStatus b3RemoteOutputPort = new ConnectionStatus();
b3RemoteOutputPort.setGroupId("pgB3");
b3RemoteOutputPort.setSourceId("roB3");
b3RemoteOutputPort.setSourceName("Remote Output Port name");
b3RemoteOutputPort.setDestinationId("B301");
pgB3.getConnectionStatus().add(b3RemoteOutputPort);
final ProvenanceEventRepository provenanceRepository = Mockito.mock(ProvenanceEventRepository.class);
Mockito.doAnswer(new Answer<Long>() {
@Override
public Long answer(final InvocationOnMock invocation) throws Throwable {
return maxEventId;
}
}).when(provenanceRepository).getMaxEventId();
when(context.getEventAccess()).thenReturn(eventAccess);
when(eventAccess.getProvenanceRepository()).thenReturn(provenanceRepository);
final ComponentLog logger = Mockito.mock(ComponentLog.class);
when(initContext.getIdentifier()).thenReturn(UUID.randomUUID().toString());
when(initContext.getLogger()).thenReturn(logger);
return task;
}
Aggregations