Search in sources :

Example 1 with ProvenanceEventRepository

use of org.apache.nifi.provenance.ProvenanceEventRepository in project nifi by apache.

the class StandardProcessSession method updateProvenanceRepo.

private void updateProvenanceRepo(final Checkpoint checkpoint) {
    // Update Provenance Repository
    final ProvenanceEventRepository provenanceRepo = context.getProvenanceRepository();
    // We need to de-dupe the events that we've created and those reported to the provenance reporter,
    // in case the Processor developer submitted the same events to the reporter. So we use a LinkedHashSet
    // for this, so that we are able to ensure that the events are submitted in the proper order.
    final Set<ProvenanceEventRecord> recordsToSubmit = new LinkedHashSet<>();
    final Map<String, Set<ProvenanceEventType>> eventTypesPerFlowFileId = new HashMap<>();
    final Set<ProvenanceEventRecord> processorGenerated = checkpoint.reportedEvents;
    // by the Processor contains any of the FORK events that we generated
    for (final Map.Entry<FlowFile, ProvenanceEventBuilder> entry : checkpoint.forkEventBuilders.entrySet()) {
        final ProvenanceEventBuilder builder = entry.getValue();
        final FlowFile flowFile = entry.getKey();
        updateEventContentClaims(builder, flowFile, checkpoint.records.get(flowFile));
        final ProvenanceEventRecord event = builder.build();
        if (!event.getChildUuids().isEmpty() && !isSpuriousForkEvent(event, checkpoint.removedFlowFiles)) {
            // If framework generated the event, add it to the 'recordsToSubmit' Set.
            if (!processorGenerated.contains(event)) {
                recordsToSubmit.add(event);
            }
            // Register the FORK event for each child and each parent.
            for (final String childUuid : event.getChildUuids()) {
                addEventType(eventTypesPerFlowFileId, childUuid, event.getEventType());
            }
            for (final String parentUuid : event.getParentUuids()) {
                addEventType(eventTypesPerFlowFileId, parentUuid, event.getEventType());
            }
        }
    }
    // Now add any Processor-reported events.
    for (final ProvenanceEventRecord event : processorGenerated) {
        if (isSpuriousForkEvent(event, checkpoint.removedFlowFiles)) {
            continue;
        }
        // connection from which it was pulled (and only this connection). If so, discard the event.
        if (isSpuriousRouteEvent(event, checkpoint.records)) {
            continue;
        }
        recordsToSubmit.add(event);
        addEventType(eventTypesPerFlowFileId, event.getFlowFileUuid(), event.getEventType());
    }
    // Finally, add any other events that we may have generated.
    for (final List<ProvenanceEventRecord> eventList : checkpoint.generatedProvenanceEvents.values()) {
        for (final ProvenanceEventRecord event : eventList) {
            if (isSpuriousForkEvent(event, checkpoint.removedFlowFiles)) {
                continue;
            }
            recordsToSubmit.add(event);
            addEventType(eventTypesPerFlowFileId, event.getFlowFileUuid(), event.getEventType());
        }
    }
    // Check if content or attributes changed. If so, register the appropriate events.
    for (final StandardRepositoryRecord repoRecord : checkpoint.records.values()) {
        final ContentClaim original = repoRecord.getOriginalClaim();
        final ContentClaim current = repoRecord.getCurrentClaim();
        boolean contentChanged = false;
        if (original == null && current != null) {
            contentChanged = true;
        }
        if (original != null && current == null) {
            contentChanged = true;
        }
        if (original != null && current != null && !original.equals(current)) {
            contentChanged = true;
        }
        final FlowFileRecord curFlowFile = repoRecord.getCurrent();
        final String flowFileId = curFlowFile.getAttribute(CoreAttributes.UUID.key());
        boolean eventAdded = false;
        if (checkpoint.removedFlowFiles.contains(flowFileId)) {
            continue;
        }
        final boolean newFlowFile = repoRecord.getOriginal() == null;
        if (contentChanged && !newFlowFile) {
            recordsToSubmit.add(provenanceReporter.build(curFlowFile, ProvenanceEventType.CONTENT_MODIFIED).build());
            addEventType(eventTypesPerFlowFileId, flowFileId, ProvenanceEventType.CONTENT_MODIFIED);
            eventAdded = true;
        }
        if (checkpoint.createdFlowFiles.contains(flowFileId)) {
            final Set<ProvenanceEventType> registeredTypes = eventTypesPerFlowFileId.get(flowFileId);
            boolean creationEventRegistered = false;
            if (registeredTypes != null) {
                if (registeredTypes.contains(ProvenanceEventType.CREATE) || registeredTypes.contains(ProvenanceEventType.FORK) || registeredTypes.contains(ProvenanceEventType.JOIN) || registeredTypes.contains(ProvenanceEventType.RECEIVE) || registeredTypes.contains(ProvenanceEventType.FETCH)) {
                    creationEventRegistered = true;
                }
            }
            if (!creationEventRegistered) {
                recordsToSubmit.add(provenanceReporter.build(curFlowFile, ProvenanceEventType.CREATE).build());
                eventAdded = true;
            }
        }
        if (!eventAdded && !repoRecord.getUpdatedAttributes().isEmpty()) {
            // event is redundant if another already exists.
            if (!eventTypesPerFlowFileId.containsKey(flowFileId)) {
                recordsToSubmit.add(provenanceReporter.build(curFlowFile, ProvenanceEventType.ATTRIBUTES_MODIFIED).build());
                addEventType(eventTypesPerFlowFileId, flowFileId, ProvenanceEventType.ATTRIBUTES_MODIFIED);
            }
        }
    }
    // We want to submit the 'recordsToSubmit' collection, followed by the auto-terminated events to the Provenance Repository.
    // We want to do this with a single call to ProvenanceEventRepository#registerEvents because it may be much more efficient
    // to do so.
    // However, we want to modify the events in 'recordsToSubmit' to obtain the data from the most recent version of the FlowFiles
    // (except for SEND events); see note below as to why this is
    // Therefore, we create an Iterable that can iterate over each of these events, modifying them as needed, and returning them
    // in the appropriate order. This prevents an unnecessary step of creating an intermediate List and adding all of those values
    // to the List.
    // This is done in a similar veign to how Java 8's streams work, iterating over the events and returning a processed version
    // one-at-a-time as opposed to iterating over the entire Collection and putting the results in another Collection. However,
    // we don't want to change the Framework to require Java 8 at this time, because it's not yet as prevalent as we would desire
    final Map<String, FlowFileRecord> flowFileRecordMap = new HashMap<>();
    for (final StandardRepositoryRecord repoRecord : checkpoint.records.values()) {
        final FlowFileRecord flowFile = repoRecord.getCurrent();
        flowFileRecordMap.put(flowFile.getAttribute(CoreAttributes.UUID.key()), flowFile);
    }
    final List<ProvenanceEventRecord> autoTermEvents = checkpoint.autoTerminatedEvents;
    final Iterable<ProvenanceEventRecord> iterable = new Iterable<ProvenanceEventRecord>() {

        final Iterator<ProvenanceEventRecord> recordsToSubmitIterator = recordsToSubmit.iterator();

        final Iterator<ProvenanceEventRecord> autoTermIterator = autoTermEvents == null ? null : autoTermEvents.iterator();

        @Override
        public Iterator<ProvenanceEventRecord> iterator() {
            return new Iterator<ProvenanceEventRecord>() {

                @Override
                public boolean hasNext() {
                    return recordsToSubmitIterator.hasNext() || autoTermIterator != null && autoTermIterator.hasNext();
                }

                @Override
                public ProvenanceEventRecord next() {
                    if (recordsToSubmitIterator.hasNext()) {
                        final ProvenanceEventRecord rawEvent = recordsToSubmitIterator.next();
                        // exposed.
                        return enrich(rawEvent, flowFileRecordMap, checkpoint.records, rawEvent.getEventType() != ProvenanceEventType.SEND);
                    } else if (autoTermIterator != null && autoTermIterator.hasNext()) {
                        return enrich(autoTermIterator.next(), flowFileRecordMap, checkpoint.records, true);
                    }
                    throw new NoSuchElementException();
                }

                @Override
                public void remove() {
                    throw new UnsupportedOperationException();
                }
            };
        }
    };
    provenanceRepo.registerEvents(iterable);
}
Also used : LinkedHashSet(java.util.LinkedHashSet) Set(java.util.Set) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) StandardProvenanceEventRecord(org.apache.nifi.provenance.StandardProvenanceEventRecord) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) Iterator(java.util.Iterator) ProvenanceEventType(org.apache.nifi.provenance.ProvenanceEventType) ProvenanceEventBuilder(org.apache.nifi.provenance.ProvenanceEventBuilder) FlowFile(org.apache.nifi.flowfile.FlowFile) ProvenanceEventRepository(org.apache.nifi.provenance.ProvenanceEventRepository) ContentClaim(org.apache.nifi.controller.repository.claim.ContentClaim) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) NoSuchElementException(java.util.NoSuchElementException)

Example 2 with ProvenanceEventRepository

use of org.apache.nifi.provenance.ProvenanceEventRepository in project nifi by apache.

the class TestStandardProvenanceReporter method testDuplicatesIgnored.

@Test
@Ignore
public void testDuplicatesIgnored() {
    final ProvenanceEventRepository mockRepo = Mockito.mock(ProvenanceEventRepository.class);
    final StandardProvenanceReporter reporter = new StandardProvenanceReporter(null, "1234", "TestProc", mockRepo, null);
    final List<FlowFile> parents = new ArrayList<>();
    for (int i = 0; i < 10; i++) {
        final FlowFile ff = new StandardFlowFileRecord.Builder().id(i).addAttribute("uuid", String.valueOf(i)).build();
        parents.add(ff);
    }
    final FlowFile flowFile = new StandardFlowFileRecord.Builder().id(10L).addAttribute("uuid", "10").build();
    reporter.fork(flowFile, parents);
    reporter.fork(flowFile, parents);
    final Set<ProvenanceEventRecord> records = reporter.getEvents();
    // 1 for each parent in the spawn and 1 for the spawn itself
    assertEquals(11, records.size());
    final FlowFile firstParent = parents.get(0);
    parents.clear();
    parents.add(firstParent);
    reporter.fork(flowFile, parents);
    // 1 more emitted for the spawn event containing the child but not for the parent because that one has already been emitted
    assertEquals(12, reporter.getEvents().size());
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) ArrayList(java.util.ArrayList) ProvenanceEventRepository(org.apache.nifi.provenance.ProvenanceEventRepository) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 3 with ProvenanceEventRepository

use of org.apache.nifi.provenance.ProvenanceEventRepository in project nifi by apache.

the class TestSiteToSiteProvenanceReportingTask method testWhenProvenanceMaxIdEqualToLastEventIdInStateManager.

@Test
public void testWhenProvenanceMaxIdEqualToLastEventIdInStateManager() throws IOException, InitializationException {
    final long maxEventId = 2500;
    // create the mock reporting task and mock state manager
    final Map<PropertyDescriptor, String> properties = new HashMap<>();
    for (final PropertyDescriptor descriptor : new MockSiteToSiteProvenanceReportingTask().getSupportedPropertyDescriptors()) {
        properties.put(descriptor, descriptor.getDefaultValue());
    }
    final MockSiteToSiteProvenanceReportingTask task = setup(null, properties);
    final MockStateManager stateManager = new MockStateManager(task);
    // create the state map and set the last id to the same value as maxEventId
    final Map<String, String> state = new HashMap<>();
    state.put(SiteToSiteProvenanceReportingTask.LAST_EVENT_ID_KEY, String.valueOf(maxEventId));
    stateManager.setState(state, Scope.LOCAL);
    // setup the mock provenance repository to return maxEventId
    final ProvenanceEventRepository provenanceRepository = Mockito.mock(ProvenanceEventRepository.class);
    Mockito.doAnswer(new Answer<Long>() {

        @Override
        public Long answer(final InvocationOnMock invocation) throws Throwable {
            return maxEventId;
        }
    }).when(provenanceRepository).getMaxEventId();
    // setup the mock EventAccess to return the mock provenance repository
    final EventAccess eventAccess = Mockito.mock(EventAccess.class);
    when(eventAccess.getProvenanceRepository()).thenReturn(provenanceRepository);
    task.initialize(initContext);
    // execute the reporting task and should not produce any data b/c max id same as previous id
    task.onScheduled(confContext);
    task.onTrigger(context);
    assertEquals(0, task.dataSent.size());
}
Also used : PropertyDescriptor(org.apache.nifi.components.PropertyDescriptor) HashMap(java.util.HashMap) ProvenanceEventRepository(org.apache.nifi.provenance.ProvenanceEventRepository) MockStateManager(org.apache.nifi.state.MockStateManager) InvocationOnMock(org.mockito.invocation.InvocationOnMock) Test(org.junit.Test)

Example 4 with ProvenanceEventRepository

use of org.apache.nifi.provenance.ProvenanceEventRepository in project nifi by apache.

the class TestSiteToSiteProvenanceReportingTask method setup.

private MockSiteToSiteProvenanceReportingTask setup(ProvenanceEventRecord event, Map<PropertyDescriptor, String> properties, long maxEventId) throws IOException {
    final MockSiteToSiteProvenanceReportingTask task = new MockSiteToSiteProvenanceReportingTask();
    when(context.getStateManager()).thenReturn(new MockStateManager(task));
    Mockito.doAnswer(new Answer<PropertyValue>() {

        @Override
        public PropertyValue answer(final InvocationOnMock invocation) throws Throwable {
            final PropertyDescriptor descriptor = invocation.getArgumentAt(0, PropertyDescriptor.class);
            return new MockPropertyValue(properties.get(descriptor));
        }
    }).when(context).getProperty(Mockito.any(PropertyDescriptor.class));
    Mockito.doAnswer(new Answer<PropertyValue>() {

        @Override
        public PropertyValue answer(final InvocationOnMock invocation) throws Throwable {
            final PropertyDescriptor descriptor = invocation.getArgumentAt(0, PropertyDescriptor.class);
            return new MockPropertyValue(properties.get(descriptor));
        }
    }).when(confContext).getProperty(Mockito.any(PropertyDescriptor.class));
    final AtomicInteger totalEvents = new AtomicInteger(0);
    final EventAccess eventAccess = Mockito.mock(EventAccess.class);
    Mockito.doAnswer(new Answer<List<ProvenanceEventRecord>>() {

        @Override
        public List<ProvenanceEventRecord> answer(final InvocationOnMock invocation) throws Throwable {
            final long startId = invocation.getArgumentAt(0, long.class);
            final int maxRecords = invocation.getArgumentAt(1, int.class);
            final List<ProvenanceEventRecord> eventsToReturn = new ArrayList<>();
            for (int i = (int) Math.max(0, startId); i < (int) (startId + maxRecords) && totalEvents.get() < maxEventId; i++) {
                if (event != null) {
                    eventsToReturn.add(event);
                }
                totalEvents.getAndIncrement();
            }
            return eventsToReturn;
        }
    }).when(eventAccess).getProvenanceEvents(Mockito.anyLong(), Mockito.anyInt());
    ProcessGroupStatus pgRoot = new ProcessGroupStatus();
    pgRoot.setId("root");
    when(eventAccess.getControllerStatus()).thenReturn(pgRoot);
    // Add child Process Groups.
    // Root -> (A, B -> (B2 -> (B3)))
    final ProcessGroupStatus pgA = new ProcessGroupStatus();
    pgA.setId("pgA");
    final ProcessGroupStatus pgB = new ProcessGroupStatus();
    pgB.setId("pgB");
    final ProcessGroupStatus pgB2 = new ProcessGroupStatus();
    pgB2.setId("pgB2");
    final ProcessGroupStatus pgB3 = new ProcessGroupStatus();
    pgB3.setId("pgB3");
    final Collection<ProcessGroupStatus> childPGs = pgRoot.getProcessGroupStatus();
    childPGs.add(pgA);
    childPGs.add(pgB);
    pgB.getProcessGroupStatus().add(pgB2);
    pgB2.getProcessGroupStatus().add(pgB3);
    // Add Processors.
    final ProcessorStatus prcRoot = new ProcessorStatus();
    prcRoot.setId("1234");
    pgRoot.getProcessorStatus().add(prcRoot);
    final ProcessorStatus prcA = new ProcessorStatus();
    prcA.setId("A001");
    prcA.setName("Processor in PGA");
    pgA.getProcessorStatus().add(prcA);
    final ProcessorStatus prcB = new ProcessorStatus();
    prcB.setId("B001");
    prcB.setName("Processor in PGB");
    pgB.getProcessorStatus().add(prcB);
    final ProcessorStatus prcB2 = new ProcessorStatus();
    prcB2.setId("B201");
    prcB2.setName("Processor in PGB2");
    pgB2.getProcessorStatus().add(prcB2);
    final ProcessorStatus prcB3 = new ProcessorStatus();
    prcB3.setId("B301");
    prcB3.setName("Processor in PGB3");
    pgB3.getProcessorStatus().add(prcB3);
    // Add connection status to test Remote Input/Output Ports
    final ConnectionStatus b2RemoteInputPort = new ConnectionStatus();
    b2RemoteInputPort.setGroupId("pgB2");
    b2RemoteInputPort.setSourceId("B201");
    b2RemoteInputPort.setDestinationId("riB2");
    b2RemoteInputPort.setDestinationName("Remote Input Port name");
    pgB2.getConnectionStatus().add(b2RemoteInputPort);
    final ConnectionStatus b3RemoteOutputPort = new ConnectionStatus();
    b3RemoteOutputPort.setGroupId("pgB3");
    b3RemoteOutputPort.setSourceId("roB3");
    b3RemoteOutputPort.setSourceName("Remote Output Port name");
    b3RemoteOutputPort.setDestinationId("B301");
    pgB3.getConnectionStatus().add(b3RemoteOutputPort);
    final ProvenanceEventRepository provenanceRepository = Mockito.mock(ProvenanceEventRepository.class);
    Mockito.doAnswer(new Answer<Long>() {

        @Override
        public Long answer(final InvocationOnMock invocation) throws Throwable {
            return maxEventId;
        }
    }).when(provenanceRepository).getMaxEventId();
    when(context.getEventAccess()).thenReturn(eventAccess);
    when(eventAccess.getProvenanceRepository()).thenReturn(provenanceRepository);
    final ComponentLog logger = Mockito.mock(ComponentLog.class);
    when(initContext.getIdentifier()).thenReturn(UUID.randomUUID().toString());
    when(initContext.getLogger()).thenReturn(logger);
    return task;
}
Also used : ProcessGroupStatus(org.apache.nifi.controller.status.ProcessGroupStatus) PropertyDescriptor(org.apache.nifi.components.PropertyDescriptor) MockPropertyValue(org.apache.nifi.util.MockPropertyValue) PropertyValue(org.apache.nifi.components.PropertyValue) MockPropertyValue(org.apache.nifi.util.MockPropertyValue) ProcessorStatus(org.apache.nifi.controller.status.ProcessorStatus) ComponentLog(org.apache.nifi.logging.ComponentLog) ProvenanceEventRepository(org.apache.nifi.provenance.ProvenanceEventRepository) MockStateManager(org.apache.nifi.state.MockStateManager) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) InvocationOnMock(org.mockito.invocation.InvocationOnMock) ArrayList(java.util.ArrayList) List(java.util.List) ConnectionStatus(org.apache.nifi.controller.status.ConnectionStatus)

Aggregations

ProvenanceEventRepository (org.apache.nifi.provenance.ProvenanceEventRepository)4 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 PropertyDescriptor (org.apache.nifi.components.PropertyDescriptor)2 FlowFile (org.apache.nifi.flowfile.FlowFile)2 ProvenanceEventRecord (org.apache.nifi.provenance.ProvenanceEventRecord)2 MockStateManager (org.apache.nifi.state.MockStateManager)2 Test (org.junit.Test)2 InvocationOnMock (org.mockito.invocation.InvocationOnMock)2 HashSet (java.util.HashSet)1 Iterator (java.util.Iterator)1 LinkedHashSet (java.util.LinkedHashSet)1 List (java.util.List)1 Map (java.util.Map)1 NoSuchElementException (java.util.NoSuchElementException)1 Set (java.util.Set)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 PropertyValue (org.apache.nifi.components.PropertyValue)1 ContentClaim (org.apache.nifi.controller.repository.claim.ContentClaim)1