use of org.apache.nifi.provenance.lineage.LineageNode in project nifi by apache.
the class TestPersistentProvenanceRepository method testLineageReceiveDrop.
@Test
public void testLineageReceiveDrop() throws IOException, InterruptedException, ParseException {
assumeFalse(isWindowsEnvironment());
final RepositoryConfiguration config = createConfiguration();
config.setMaxRecordLife(3, TimeUnit.SECONDS);
config.setMaxStorageCapacity(1024L * 1024L);
config.setMaxEventFileLife(500, TimeUnit.MILLISECONDS);
config.setMaxEventFileCapacity(1024L * 1024L);
config.setSearchableFields(new ArrayList<>(SearchableFields.getStandardFields()));
repo = new PersistentProvenanceRepository(config, DEFAULT_ROLLOVER_MILLIS);
repo.initialize(getEventReporter(), null, null, IdentifierLookup.EMPTY);
final String uuid = "00000000-0000-0000-0000-000000000001";
final Map<String, String> attributes = new HashMap<>();
attributes.put("abc", "xyz");
attributes.put("uuid", uuid);
attributes.put("filename", "file-" + uuid);
final ProvenanceEventBuilder builder = new StandardProvenanceEventRecord.Builder();
builder.setEventTime(System.currentTimeMillis());
builder.setEventType(ProvenanceEventType.RECEIVE);
builder.setTransitUri("nifi://unit-test");
attributes.put("uuid", uuid);
builder.fromFlowFile(createFlowFile(3L, 3000L, attributes));
builder.setComponentId("1234");
builder.setComponentType("dummy processor");
repo.registerEvent(builder.build());
builder.setEventTime(System.currentTimeMillis() + 1);
builder.setEventType(ProvenanceEventType.DROP);
builder.setTransitUri(null);
repo.registerEvent(builder.build());
repo.waitForRollover();
final Lineage lineage = repo.computeLineage(uuid, createUser());
assertNotNull(lineage);
// Nodes should consist of a RECEIVE followed by FlowFileNode, followed by a DROP
final List<LineageNode> nodes = lineage.getNodes();
final List<LineageEdge> edges = lineage.getEdges();
assertEquals(3, nodes.size());
for (final LineageEdge edge : edges) {
if (edge.getSource().getNodeType() == LineageNodeType.FLOWFILE_NODE) {
assertTrue(edge.getDestination().getNodeType() == LineageNodeType.PROVENANCE_EVENT_NODE);
assertTrue(((EventNode) edge.getDestination()).getEventType() == ProvenanceEventType.DROP);
} else {
assertTrue(((EventNode) edge.getSource()).getEventType() == ProvenanceEventType.RECEIVE);
assertTrue(edge.getDestination().getNodeType() == LineageNodeType.FLOWFILE_NODE);
}
}
}
use of org.apache.nifi.provenance.lineage.LineageNode in project nifi by apache.
the class TestLuceneEventIndex method testUnauthorizedEventsGetPlaceholdersForFindParents.
@Test(timeout = 60000)
public void testUnauthorizedEventsGetPlaceholdersForFindParents() throws InterruptedException {
assumeFalse(isWindowsEnvironment());
final RepositoryConfiguration repoConfig = createConfig(1);
repoConfig.setDesiredIndexSize(1L);
final IndexManager indexManager = new SimpleIndexManager(repoConfig);
final ArrayListEventStore eventStore = new ArrayListEventStore();
final LuceneEventIndex index = new LuceneEventIndex(repoConfig, indexManager, 3, EventReporter.NO_OP);
index.initialize(eventStore);
final ProvenanceEventRecord firstEvent = createEvent("4444");
final Map<String, String> previousAttributes = new HashMap<>();
previousAttributes.put("uuid", "4444");
final Map<String, String> updatedAttributes = new HashMap<>();
updatedAttributes.put("updated", "true");
final ProvenanceEventRecord join = new StandardProvenanceEventRecord.Builder().setEventType(ProvenanceEventType.JOIN).setAttributes(previousAttributes, updatedAttributes).addParentUuid("4444").addChildFlowFile("1234").setComponentId("component-1").setComponentType("unit test").setEventId(idGenerator.getAndIncrement()).setEventTime(System.currentTimeMillis()).setFlowFileEntryDate(System.currentTimeMillis()).setFlowFileUUID("1234").setLineageStartDate(System.currentTimeMillis()).setCurrentContentClaim("container", "section", "unit-test-id", 0L, 1024L).build();
index.addEvents(eventStore.addEvent(firstEvent).getStorageLocations());
index.addEvents(eventStore.addEvent(join).getStorageLocations());
for (int i = 0; i < 3; i++) {
final ProvenanceEventRecord event = createEvent("1234");
final StorageResult storageResult = eventStore.addEvent(event);
index.addEvents(storageResult.getStorageLocations());
}
final NiFiUser user = createUser();
final EventAuthorizer allowJoinEvents = new EventAuthorizer() {
@Override
public boolean isAuthorized(ProvenanceEventRecord event) {
return event.getEventType() == ProvenanceEventType.JOIN;
}
@Override
public void authorize(ProvenanceEventRecord event) throws AccessDeniedException {
}
};
List<LineageNode> nodes = Collections.emptyList();
while (nodes.size() < 2) {
final ComputeLineageSubmission submission = index.submitExpandParents(1L, user, allowJoinEvents);
assertTrue(submission.getResult().awaitCompletion(5, TimeUnit.SECONDS));
nodes = submission.getResult().getNodes();
Thread.sleep(25L);
}
assertEquals(2, nodes.size());
final Map<ProvenanceEventType, List<LineageNode>> eventMap = nodes.stream().filter(n -> n.getNodeType() == LineageNodeType.PROVENANCE_EVENT_NODE).collect(Collectors.groupingBy(n -> ((ProvenanceEventLineageNode) n).getEventType()));
assertEquals(2, eventMap.size());
assertEquals(1, eventMap.get(ProvenanceEventType.JOIN).size());
assertEquals(1, eventMap.get(ProvenanceEventType.UNKNOWN).size());
assertEquals("4444", eventMap.get(ProvenanceEventType.UNKNOWN).get(0).getFlowFileUuid());
}
use of org.apache.nifi.provenance.lineage.LineageNode in project nifi by apache.
the class TestLuceneEventIndex method testUnauthorizedEventsGetPlaceholdersForLineage.
@Test(timeout = 60000)
public void testUnauthorizedEventsGetPlaceholdersForLineage() throws InterruptedException {
assumeFalse(isWindowsEnvironment());
final RepositoryConfiguration repoConfig = createConfig(1);
repoConfig.setDesiredIndexSize(1L);
final IndexManager indexManager = new SimpleIndexManager(repoConfig);
final ArrayListEventStore eventStore = new ArrayListEventStore();
final LuceneEventIndex index = new LuceneEventIndex(repoConfig, indexManager, 3, EventReporter.NO_OP);
index.initialize(eventStore);
for (int i = 0; i < 3; i++) {
final ProvenanceEventRecord event = createEvent("1234");
final StorageResult storageResult = eventStore.addEvent(event);
index.addEvents(storageResult.getStorageLocations());
}
final NiFiUser user = createUser();
List<LineageNode> nodes = Collections.emptyList();
while (nodes.size() < 3) {
final ComputeLineageSubmission submission = index.submitLineageComputation(1L, user, EventAuthorizer.DENY_ALL);
assertTrue(submission.getResult().awaitCompletion(5, TimeUnit.SECONDS));
nodes = submission.getResult().getNodes();
Thread.sleep(25L);
}
assertEquals(3, nodes.size());
for (final LineageNode node : nodes) {
assertEquals(LineageNodeType.PROVENANCE_EVENT_NODE, node.getNodeType());
final ProvenanceEventLineageNode eventNode = (ProvenanceEventLineageNode) node;
assertEquals(ProvenanceEventType.UNKNOWN, eventNode.getEventType());
}
}
use of org.apache.nifi.provenance.lineage.LineageNode in project nifi by apache.
the class StandardLineageResult method computeLineage.
/**
* Computes the lineage from the relevant Provenance Event Records. This
* method must be called with the write lock held and is only going to be
* useful after all of the records have been successfully obtained
*/
private void computeLineage() {
logger.debug("Computing lineage with the following events: {}", relevantRecords);
final long startNanos = System.nanoTime();
nodes.clear();
edges.clear();
// maps FlowFile UUID to last event for that FlowFile
Map<String, LineageNode> lastEventMap = new HashMap<>();
final List<ProvenanceEventRecord> sortedRecords = new ArrayList<>(relevantRecords);
Collections.sort(sortedRecords, new Comparator<ProvenanceEventRecord>() {
@Override
public int compare(final ProvenanceEventRecord o1, final ProvenanceEventRecord o2) {
// Sort on Event Time, then Event ID.
final int eventTimeComparison = Long.compare(o1.getEventTime(), o2.getEventTime());
if (eventTimeComparison == 0) {
return Long.compare(o1.getEventId(), o2.getEventId());
} else {
return eventTimeComparison;
}
}
});
// convert the StandardProvenanceRecord objects into Lineage nodes (FlowFileNode, EventNodes).
for (final ProvenanceEventRecord record : sortedRecords) {
final LineageNode lineageNode = new EventNode(record);
final boolean added = nodes.add(lineageNode);
if (!added) {
logger.debug("Did not add {} because it already exists in the 'nodes' set", lineageNode);
}
// Create an edge that connects this node to the previous node for the same FlowFile UUID.
final LineageNode lastNode = lastEventMap.get(record.getFlowFileUuid());
if (lastNode != null) {
// We calculate the Edge UUID based on whether or not this event is a SPAWN.
// If this event is a SPAWN, then we want to use the previous node's UUID because a
// SPAWN Event's UUID is not necessarily what we want, since a SPAWN Event's UUID pertains to
// only one of (potentially) many UUIDs associated with the event. Otherwise, we know that
// the UUID of this record is appropriate, so we just use it.
final String edgeUuid;
switch(record.getEventType()) {
case JOIN:
case CLONE:
case REPLAY:
edgeUuid = lastNode.getFlowFileUuid();
break;
default:
edgeUuid = record.getFlowFileUuid();
break;
}
edges.add(new EdgeNode(edgeUuid, lastNode, lineageNode));
}
lastEventMap.put(record.getFlowFileUuid(), lineageNode);
switch(record.getEventType()) {
case FORK:
case JOIN:
case REPLAY:
case FETCH:
case CLONE:
{
// For events that create FlowFile nodes, we need to create the FlowFile Nodes and associated Edges, as appropriate
for (final String childUuid : record.getChildUuids()) {
if (flowFileUuids.contains(childUuid)) {
final FlowFileNode childNode = new FlowFileNode(childUuid, record.getEventTime());
final boolean isNewFlowFile = nodes.add(childNode);
if (!isNewFlowFile) {
final String msg = "Unable to generate Lineage Graph because multiple " + "events were registered claiming to have generated the same FlowFile (UUID = " + childNode.getFlowFileUuid() + ")";
logger.error(msg);
setError(msg);
return;
}
edges.add(new EdgeNode(childNode.getFlowFileUuid(), lineageNode, childNode));
lastEventMap.put(childUuid, childNode);
}
}
for (final String parentUuid : record.getParentUuids()) {
LineageNode lastNodeForParent = lastEventMap.get(parentUuid);
if (lastNodeForParent != null && !lastNodeForParent.equals(lineageNode)) {
edges.add(new EdgeNode(parentUuid, lastNodeForParent, lineageNode));
}
lastEventMap.put(parentUuid, lineageNode);
}
}
break;
case RECEIVE:
case CREATE:
{
// for a receive event, we want to create a FlowFile Node that represents the FlowFile received
// and create an edge from the Receive Event to the FlowFile Node
final LineageNode flowFileNode = new FlowFileNode(record.getFlowFileUuid(), record.getEventTime());
final boolean isNewFlowFile = nodes.add(flowFileNode);
if (!isNewFlowFile) {
final String msg = "Found cycle in graph. This indicates that multiple events " + "were registered claiming to have generated the same FlowFile (UUID = " + flowFileNode.getFlowFileUuid() + ")";
setError(msg);
logger.error(msg);
return;
}
edges.add(new EdgeNode(record.getFlowFileUuid(), lineageNode, flowFileNode));
lastEventMap.put(record.getFlowFileUuid(), flowFileNode);
}
break;
default:
break;
}
}
final long nanos = System.nanoTime() - startNanos;
logger.debug("Finished building lineage with {} nodes and {} edges in {} millis", nodes.size(), edges.size(), TimeUnit.NANOSECONDS.toMillis(nanos));
}
use of org.apache.nifi.provenance.lineage.LineageNode in project nifi by apache.
the class TestPersistentProvenanceRepository method testLineageReceiveDropAsync.
@Test
public void testLineageReceiveDropAsync() throws IOException, InterruptedException, ParseException {
assumeFalse(isWindowsEnvironment());
final RepositoryConfiguration config = createConfiguration();
config.setMaxRecordLife(3, TimeUnit.SECONDS);
config.setMaxStorageCapacity(1024L * 1024L);
config.setMaxEventFileLife(500, TimeUnit.MILLISECONDS);
config.setMaxEventFileCapacity(1024L * 1024L);
config.setSearchableFields(new ArrayList<>(SearchableFields.getStandardFields()));
repo = new PersistentProvenanceRepository(config, DEFAULT_ROLLOVER_MILLIS);
repo.initialize(getEventReporter(), null, null, IdentifierLookup.EMPTY);
final String uuid = "00000000-0000-0000-0000-000000000001";
final Map<String, String> attributes = new HashMap<>();
attributes.put("abc", "xyz");
attributes.put("uuid", uuid);
attributes.put("filename", "file-" + uuid);
final ProvenanceEventBuilder builder = new StandardProvenanceEventRecord.Builder();
builder.setEventTime(System.currentTimeMillis());
builder.setEventType(ProvenanceEventType.RECEIVE);
builder.setTransitUri("nifi://unit-test");
attributes.put("uuid", uuid);
builder.fromFlowFile(createFlowFile(3L, 3000L, attributes));
builder.setComponentId("1234");
builder.setComponentType("dummy processor");
repo.registerEvent(builder.build());
builder.setEventTime(System.currentTimeMillis() + 1);
builder.setEventType(ProvenanceEventType.DROP);
builder.setTransitUri(null);
repo.registerEvent(builder.build());
repo.waitForRollover();
final AsyncLineageSubmission submission = repo.submitLineageComputation(uuid, createUser());
while (!submission.getResult().isFinished()) {
Thread.sleep(100L);
}
assertNotNull(submission);
// Nodes should consist of a RECEIVE followed by FlowFileNode, followed by a DROP
final List<LineageNode> nodes = submission.getResult().getNodes();
final List<LineageEdge> edges = submission.getResult().getEdges();
assertEquals(3, nodes.size());
for (final LineageEdge edge : edges) {
if (edge.getSource().getNodeType() == LineageNodeType.FLOWFILE_NODE) {
assertTrue(edge.getDestination().getNodeType() == LineageNodeType.PROVENANCE_EVENT_NODE);
assertTrue(((EventNode) edge.getDestination()).getEventType() == ProvenanceEventType.DROP);
} else {
assertTrue(((EventNode) edge.getSource()).getEventType() == ProvenanceEventType.RECEIVE);
assertTrue(edge.getDestination().getNodeType() == LineageNodeType.FLOWFILE_NODE);
}
}
}
Aggregations