use of org.apache.nifi.provenance.ProvenanceEventType in project nifi by apache.
the class TestLuceneEventIndex method testUnauthorizedEventsGetPlaceholdersForFindParents.
@Test(timeout = 60000)
public void testUnauthorizedEventsGetPlaceholdersForFindParents() throws InterruptedException {
assumeFalse(isWindowsEnvironment());
final RepositoryConfiguration repoConfig = createConfig(1);
repoConfig.setDesiredIndexSize(1L);
final IndexManager indexManager = new SimpleIndexManager(repoConfig);
final ArrayListEventStore eventStore = new ArrayListEventStore();
final LuceneEventIndex index = new LuceneEventIndex(repoConfig, indexManager, 3, EventReporter.NO_OP);
index.initialize(eventStore);
final ProvenanceEventRecord firstEvent = createEvent("4444");
final Map<String, String> previousAttributes = new HashMap<>();
previousAttributes.put("uuid", "4444");
final Map<String, String> updatedAttributes = new HashMap<>();
updatedAttributes.put("updated", "true");
final ProvenanceEventRecord join = new StandardProvenanceEventRecord.Builder().setEventType(ProvenanceEventType.JOIN).setAttributes(previousAttributes, updatedAttributes).addParentUuid("4444").addChildFlowFile("1234").setComponentId("component-1").setComponentType("unit test").setEventId(idGenerator.getAndIncrement()).setEventTime(System.currentTimeMillis()).setFlowFileEntryDate(System.currentTimeMillis()).setFlowFileUUID("1234").setLineageStartDate(System.currentTimeMillis()).setCurrentContentClaim("container", "section", "unit-test-id", 0L, 1024L).build();
index.addEvents(eventStore.addEvent(firstEvent).getStorageLocations());
index.addEvents(eventStore.addEvent(join).getStorageLocations());
for (int i = 0; i < 3; i++) {
final ProvenanceEventRecord event = createEvent("1234");
final StorageResult storageResult = eventStore.addEvent(event);
index.addEvents(storageResult.getStorageLocations());
}
final NiFiUser user = createUser();
final EventAuthorizer allowJoinEvents = new EventAuthorizer() {
@Override
public boolean isAuthorized(ProvenanceEventRecord event) {
return event.getEventType() == ProvenanceEventType.JOIN;
}
@Override
public void authorize(ProvenanceEventRecord event) throws AccessDeniedException {
}
};
List<LineageNode> nodes = Collections.emptyList();
while (nodes.size() < 2) {
final ComputeLineageSubmission submission = index.submitExpandParents(1L, user, allowJoinEvents);
assertTrue(submission.getResult().awaitCompletion(5, TimeUnit.SECONDS));
nodes = submission.getResult().getNodes();
Thread.sleep(25L);
}
assertEquals(2, nodes.size());
final Map<ProvenanceEventType, List<LineageNode>> eventMap = nodes.stream().filter(n -> n.getNodeType() == LineageNodeType.PROVENANCE_EVENT_NODE).collect(Collectors.groupingBy(n -> ((ProvenanceEventLineageNode) n).getEventType()));
assertEquals(2, eventMap.size());
assertEquals(1, eventMap.get(ProvenanceEventType.JOIN).size());
assertEquals(1, eventMap.get(ProvenanceEventType.UNKNOWN).size());
assertEquals("4444", eventMap.get(ProvenanceEventType.UNKNOWN).get(0).getFlowFileUuid());
}
use of org.apache.nifi.provenance.ProvenanceEventType in project nifi by apache.
the class ConvertEventToLuceneDocument method convert.
public Document convert(final ProvenanceEventRecord record, final StorageSummary persistedEvent) {
final Document doc = new Document();
addField(doc, SearchableFields.FlowFileUUID, record.getFlowFileUuid());
addField(doc, SearchableFields.Filename, record.getAttribute(CoreAttributes.FILENAME.key()));
addField(doc, SearchableFields.ComponentID, record.getComponentId());
addField(doc, SearchableFields.AlternateIdentifierURI, record.getAlternateIdentifierUri());
addField(doc, SearchableFields.EventType, record.getEventType().name());
addField(doc, SearchableFields.Relationship, record.getRelationship());
addField(doc, SearchableFields.Details, record.getDetails());
addField(doc, SearchableFields.ContentClaimSection, record.getContentClaimSection());
addField(doc, SearchableFields.ContentClaimContainer, record.getContentClaimContainer());
addField(doc, SearchableFields.ContentClaimIdentifier, record.getContentClaimIdentifier());
addField(doc, SearchableFields.SourceQueueIdentifier, record.getSourceQueueIdentifier());
addField(doc, SearchableFields.TransitURI, record.getTransitUri());
for (final SearchableField searchableField : searchableAttributeFields) {
addField(doc, searchableField, LuceneUtil.truncateIndexField(record.getAttribute(searchableField.getSearchableFieldName())));
}
// Index the fields that we always index (unless there's nothing else to index at all)
if (!doc.getFields().isEmpty()) {
// Always include Lineage Start Date because it allows us to make our Lineage queries more efficient.
doc.add(new LongField(SearchableFields.LineageStartDate.getSearchableFieldName(), record.getLineageStartDate(), Store.NO));
// Always include Event Time because most queries are bound by a start and end time.
doc.add(new LongField(SearchableFields.EventTime.getSearchableFieldName(), record.getEventTime(), Store.NO));
// We always include File Size because the UI wants to always render the controls for specifying this. This idea could be revisited.
doc.add(new LongField(SearchableFields.FileSize.getSearchableFieldName(), record.getFileSize(), Store.NO));
// We always store the event Event ID in the Document but do not index it. It doesn't make sense to query based on Event ID because
// if we want a particular Event ID, we can just obtain it directly from the EventStore. But when we obtain a Document, this info must
// be stored so that we know how to lookup the event in the store.
doc.add(new UnIndexedLongField(SearchableFields.Identifier.getSearchableFieldName(), persistedEvent.getEventId()));
// If it's event is a FORK, or JOIN, add the FlowFileUUID for all child/parent UUIDs.
final ProvenanceEventType eventType = record.getEventType();
if (eventType == ProvenanceEventType.FORK || eventType == ProvenanceEventType.CLONE || eventType == ProvenanceEventType.REPLAY) {
for (final String uuid : record.getChildUuids()) {
if (!uuid.equals(record.getFlowFileUuid())) {
addField(doc, SearchableFields.FlowFileUUID, uuid);
}
}
} else if (eventType == ProvenanceEventType.JOIN) {
for (final String uuid : record.getParentUuids()) {
if (!uuid.equals(record.getFlowFileUuid())) {
addField(doc, SearchableFields.FlowFileUUID, uuid);
}
}
} else if (eventType == ProvenanceEventType.RECEIVE && record.getSourceSystemFlowFileIdentifier() != null) {
// If we get a receive with a Source System FlowFile Identifier, we add another Document that shows the UUID
// that the Source System uses to refer to the data.
final String sourceIdentifier = record.getSourceSystemFlowFileIdentifier();
final String sourceFlowFileUUID;
final int lastColon = sourceIdentifier.lastIndexOf(":");
if (lastColon > -1 && lastColon < sourceIdentifier.length() - 2) {
sourceFlowFileUUID = sourceIdentifier.substring(lastColon + 1);
} else {
sourceFlowFileUUID = null;
}
if (sourceFlowFileUUID != null) {
addField(doc, SearchableFields.FlowFileUUID, sourceFlowFileUUID);
}
}
return doc;
}
return null;
}
use of org.apache.nifi.provenance.ProvenanceEventType in project nifi by apache.
the class IndexingAction method index.
public void index(final StandardProvenanceEventRecord record, final IndexWriter indexWriter, final Integer blockIndex) throws IOException {
final Document doc = new Document();
addField(doc, SearchableFields.FlowFileUUID, record.getFlowFileUuid(), Store.NO);
addField(doc, SearchableFields.Filename, record.getAttribute(CoreAttributes.FILENAME.key()), Store.NO);
addField(doc, SearchableFields.ComponentID, record.getComponentId(), Store.NO);
addField(doc, SearchableFields.AlternateIdentifierURI, record.getAlternateIdentifierUri(), Store.NO);
addField(doc, SearchableFields.EventType, record.getEventType().name(), Store.NO);
addField(doc, SearchableFields.Relationship, record.getRelationship(), Store.NO);
addField(doc, SearchableFields.Details, record.getDetails(), Store.NO);
addField(doc, SearchableFields.ContentClaimSection, record.getContentClaimSection(), Store.NO);
addField(doc, SearchableFields.ContentClaimContainer, record.getContentClaimContainer(), Store.NO);
addField(doc, SearchableFields.ContentClaimIdentifier, record.getContentClaimIdentifier(), Store.NO);
addField(doc, SearchableFields.SourceQueueIdentifier, record.getSourceQueueIdentifier(), Store.NO);
addField(doc, SearchableFields.TransitURI, record.getTransitUri(), Store.NO);
for (final SearchableField searchableField : searchableAttributeFields) {
addField(doc, searchableField, LuceneUtil.truncateIndexField(record.getAttribute(searchableField.getSearchableFieldName())), Store.NO);
}
final String storageFilename = LuceneUtil.substringBefore(record.getStorageFilename(), ".");
// Index the fields that we always index (unless there's nothing else to index at all)
if (!doc.getFields().isEmpty()) {
doc.add(new LongField(SearchableFields.LineageStartDate.getSearchableFieldName(), record.getLineageStartDate(), Store.NO));
doc.add(new LongField(SearchableFields.EventTime.getSearchableFieldName(), record.getEventTime(), Store.NO));
doc.add(new LongField(SearchableFields.FileSize.getSearchableFieldName(), record.getFileSize(), Store.NO));
doc.add(new StringField(FieldNames.STORAGE_FILENAME, storageFilename, Store.YES));
if (blockIndex == null) {
doc.add(new LongField(FieldNames.STORAGE_FILE_OFFSET, record.getStorageByteOffset(), Store.YES));
} else {
doc.add(new IntField(FieldNames.BLOCK_INDEX, blockIndex, Store.YES));
doc.add(new LongField(SearchableFields.Identifier.getSearchableFieldName(), record.getEventId(), Store.YES));
}
// If it's event is a FORK, or JOIN, add the FlowFileUUID for all child/parent UUIDs.
final ProvenanceEventType eventType = record.getEventType();
if (eventType == ProvenanceEventType.FORK || eventType == ProvenanceEventType.CLONE || eventType == ProvenanceEventType.REPLAY) {
for (final String uuid : record.getChildUuids()) {
if (!uuid.equals(record.getFlowFileUuid())) {
addField(doc, SearchableFields.FlowFileUUID, uuid, Store.NO);
}
}
} else if (eventType == ProvenanceEventType.JOIN) {
for (final String uuid : record.getParentUuids()) {
if (!uuid.equals(record.getFlowFileUuid())) {
addField(doc, SearchableFields.FlowFileUUID, uuid, Store.NO);
}
}
} else if (eventType == ProvenanceEventType.RECEIVE && record.getSourceSystemFlowFileIdentifier() != null) {
// If we get a receive with a Source System FlowFile Identifier, we add another Document that shows the UUID
// that the Source System uses to refer to the data.
final String sourceIdentifier = record.getSourceSystemFlowFileIdentifier();
final String sourceFlowFileUUID;
final int lastColon = sourceIdentifier.lastIndexOf(":");
if (lastColon > -1 && lastColon < sourceIdentifier.length() - 2) {
sourceFlowFileUUID = sourceIdentifier.substring(lastColon + 1);
} else {
sourceFlowFileUUID = null;
}
if (sourceFlowFileUUID != null) {
addField(doc, SearchableFields.FlowFileUUID, sourceFlowFileUUID, Store.NO);
}
}
indexWriter.addDocument(doc);
}
}
use of org.apache.nifi.provenance.ProvenanceEventType in project nifi by apache.
the class StandardProcessSession method updateProvenanceRepo.
private void updateProvenanceRepo(final Checkpoint checkpoint) {
// Update Provenance Repository
final ProvenanceEventRepository provenanceRepo = context.getProvenanceRepository();
// We need to de-dupe the events that we've created and those reported to the provenance reporter,
// in case the Processor developer submitted the same events to the reporter. So we use a LinkedHashSet
// for this, so that we are able to ensure that the events are submitted in the proper order.
final Set<ProvenanceEventRecord> recordsToSubmit = new LinkedHashSet<>();
final Map<String, Set<ProvenanceEventType>> eventTypesPerFlowFileId = new HashMap<>();
final Set<ProvenanceEventRecord> processorGenerated = checkpoint.reportedEvents;
// by the Processor contains any of the FORK events that we generated
for (final Map.Entry<FlowFile, ProvenanceEventBuilder> entry : checkpoint.forkEventBuilders.entrySet()) {
final ProvenanceEventBuilder builder = entry.getValue();
final FlowFile flowFile = entry.getKey();
updateEventContentClaims(builder, flowFile, checkpoint.records.get(flowFile));
final ProvenanceEventRecord event = builder.build();
if (!event.getChildUuids().isEmpty() && !isSpuriousForkEvent(event, checkpoint.removedFlowFiles)) {
// If framework generated the event, add it to the 'recordsToSubmit' Set.
if (!processorGenerated.contains(event)) {
recordsToSubmit.add(event);
}
// Register the FORK event for each child and each parent.
for (final String childUuid : event.getChildUuids()) {
addEventType(eventTypesPerFlowFileId, childUuid, event.getEventType());
}
for (final String parentUuid : event.getParentUuids()) {
addEventType(eventTypesPerFlowFileId, parentUuid, event.getEventType());
}
}
}
// Now add any Processor-reported events.
for (final ProvenanceEventRecord event : processorGenerated) {
if (isSpuriousForkEvent(event, checkpoint.removedFlowFiles)) {
continue;
}
// connection from which it was pulled (and only this connection). If so, discard the event.
if (isSpuriousRouteEvent(event, checkpoint.records)) {
continue;
}
recordsToSubmit.add(event);
addEventType(eventTypesPerFlowFileId, event.getFlowFileUuid(), event.getEventType());
}
// Finally, add any other events that we may have generated.
for (final List<ProvenanceEventRecord> eventList : checkpoint.generatedProvenanceEvents.values()) {
for (final ProvenanceEventRecord event : eventList) {
if (isSpuriousForkEvent(event, checkpoint.removedFlowFiles)) {
continue;
}
recordsToSubmit.add(event);
addEventType(eventTypesPerFlowFileId, event.getFlowFileUuid(), event.getEventType());
}
}
// Check if content or attributes changed. If so, register the appropriate events.
for (final StandardRepositoryRecord repoRecord : checkpoint.records.values()) {
final ContentClaim original = repoRecord.getOriginalClaim();
final ContentClaim current = repoRecord.getCurrentClaim();
boolean contentChanged = false;
if (original == null && current != null) {
contentChanged = true;
}
if (original != null && current == null) {
contentChanged = true;
}
if (original != null && current != null && !original.equals(current)) {
contentChanged = true;
}
final FlowFileRecord curFlowFile = repoRecord.getCurrent();
final String flowFileId = curFlowFile.getAttribute(CoreAttributes.UUID.key());
boolean eventAdded = false;
if (checkpoint.removedFlowFiles.contains(flowFileId)) {
continue;
}
final boolean newFlowFile = repoRecord.getOriginal() == null;
if (contentChanged && !newFlowFile) {
recordsToSubmit.add(provenanceReporter.build(curFlowFile, ProvenanceEventType.CONTENT_MODIFIED).build());
addEventType(eventTypesPerFlowFileId, flowFileId, ProvenanceEventType.CONTENT_MODIFIED);
eventAdded = true;
}
if (checkpoint.createdFlowFiles.contains(flowFileId)) {
final Set<ProvenanceEventType> registeredTypes = eventTypesPerFlowFileId.get(flowFileId);
boolean creationEventRegistered = false;
if (registeredTypes != null) {
if (registeredTypes.contains(ProvenanceEventType.CREATE) || registeredTypes.contains(ProvenanceEventType.FORK) || registeredTypes.contains(ProvenanceEventType.JOIN) || registeredTypes.contains(ProvenanceEventType.RECEIVE) || registeredTypes.contains(ProvenanceEventType.FETCH)) {
creationEventRegistered = true;
}
}
if (!creationEventRegistered) {
recordsToSubmit.add(provenanceReporter.build(curFlowFile, ProvenanceEventType.CREATE).build());
eventAdded = true;
}
}
if (!eventAdded && !repoRecord.getUpdatedAttributes().isEmpty()) {
// event is redundant if another already exists.
if (!eventTypesPerFlowFileId.containsKey(flowFileId)) {
recordsToSubmit.add(provenanceReporter.build(curFlowFile, ProvenanceEventType.ATTRIBUTES_MODIFIED).build());
addEventType(eventTypesPerFlowFileId, flowFileId, ProvenanceEventType.ATTRIBUTES_MODIFIED);
}
}
}
// We want to submit the 'recordsToSubmit' collection, followed by the auto-terminated events to the Provenance Repository.
// We want to do this with a single call to ProvenanceEventRepository#registerEvents because it may be much more efficient
// to do so.
// However, we want to modify the events in 'recordsToSubmit' to obtain the data from the most recent version of the FlowFiles
// (except for SEND events); see note below as to why this is
// Therefore, we create an Iterable that can iterate over each of these events, modifying them as needed, and returning them
// in the appropriate order. This prevents an unnecessary step of creating an intermediate List and adding all of those values
// to the List.
// This is done in a similar veign to how Java 8's streams work, iterating over the events and returning a processed version
// one-at-a-time as opposed to iterating over the entire Collection and putting the results in another Collection. However,
// we don't want to change the Framework to require Java 8 at this time, because it's not yet as prevalent as we would desire
final Map<String, FlowFileRecord> flowFileRecordMap = new HashMap<>();
for (final StandardRepositoryRecord repoRecord : checkpoint.records.values()) {
final FlowFileRecord flowFile = repoRecord.getCurrent();
flowFileRecordMap.put(flowFile.getAttribute(CoreAttributes.UUID.key()), flowFile);
}
final List<ProvenanceEventRecord> autoTermEvents = checkpoint.autoTerminatedEvents;
final Iterable<ProvenanceEventRecord> iterable = new Iterable<ProvenanceEventRecord>() {
final Iterator<ProvenanceEventRecord> recordsToSubmitIterator = recordsToSubmit.iterator();
final Iterator<ProvenanceEventRecord> autoTermIterator = autoTermEvents == null ? null : autoTermEvents.iterator();
@Override
public Iterator<ProvenanceEventRecord> iterator() {
return new Iterator<ProvenanceEventRecord>() {
@Override
public boolean hasNext() {
return recordsToSubmitIterator.hasNext() || autoTermIterator != null && autoTermIterator.hasNext();
}
@Override
public ProvenanceEventRecord next() {
if (recordsToSubmitIterator.hasNext()) {
final ProvenanceEventRecord rawEvent = recordsToSubmitIterator.next();
// exposed.
return enrich(rawEvent, flowFileRecordMap, checkpoint.records, rawEvent.getEventType() != ProvenanceEventType.SEND);
} else if (autoTermIterator != null && autoTermIterator.hasNext()) {
return enrich(autoTermIterator.next(), flowFileRecordMap, checkpoint.records, true);
}
throw new NoSuchElementException();
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
};
provenanceRepo.registerEvents(iterable);
}
use of org.apache.nifi.provenance.ProvenanceEventType in project nifi by apache.
the class FlowController method getReplayFailureReason.
private String getReplayFailureReason(final ProvenanceEventRecord event) {
// Check that the event is a valid type.
final ProvenanceEventType type = event.getEventType();
if (type == ProvenanceEventType.JOIN) {
return "Cannot replay events that are created from multiple parents";
}
// Make sure event has the Content Claim info
final Long contentSize = event.getPreviousFileSize();
final String contentClaimId = event.getPreviousContentClaimIdentifier();
final String contentClaimSection = event.getPreviousContentClaimSection();
final String contentClaimContainer = event.getPreviousContentClaimContainer();
if (contentSize == null || contentClaimId == null || contentClaimSection == null || contentClaimContainer == null) {
return "Cannot replay data from Provenance Event because the event does not contain the required Content Claim";
}
try {
final ResourceClaim resourceClaim = resourceClaimManager.newResourceClaim(contentClaimContainer, contentClaimSection, contentClaimId, false, false);
final ContentClaim contentClaim = new StandardContentClaim(resourceClaim, event.getPreviousContentClaimOffset());
if (!contentRepository.isAccessible(contentClaim)) {
return "Content is no longer available in Content Repository";
}
} catch (final IOException ioe) {
return "Failed to determine whether or not content was available in Content Repository due to " + ioe.toString();
}
// Make sure that the source queue exists
if (event.getSourceQueueIdentifier() == null) {
return "Cannot replay data from Provenance Event because the event does not specify the Source FlowFile Queue";
}
final List<Connection> connections = getGroup(getRootGroupId()).findAllConnections();
FlowFileQueue queue = null;
for (final Connection connection : connections) {
if (event.getSourceQueueIdentifier().equals(connection.getIdentifier())) {
queue = connection.getFlowFileQueue();
break;
}
}
if (queue == null) {
return "Cannot replay data from Provenance Event because the Source FlowFile Queue with ID " + event.getSourceQueueIdentifier() + " no longer exists";
}
return null;
}
Aggregations