use of org.apache.nifi.provenance.lineage.EventNode in project nifi by apache.
the class ITReportLineageToAtlas method createEdge.
private EdgeNode createEdge(ProvenanceRecords prs, int srcIdx, int tgtIdx) {
// Generate C created a FlowFile
final ProvenanceEventRecord srcR = prs.get(srcIdx);
// Then Remote Input Port sent it
final ProvenanceEventRecord tgtR = prs.get(tgtIdx);
final EventNode src = new EventNode(srcR);
final EventNode tgt = new EventNode(tgtR);
final EdgeNode edge = new EdgeNode(srcR.getComponentType() + " to " + tgtR.getEventType(), src, tgt);
return edge;
}
use of org.apache.nifi.provenance.lineage.EventNode in project nifi by apache.
the class TestPersistentProvenanceRepository method testLineageReceiveDrop.
@Test
public void testLineageReceiveDrop() throws IOException, InterruptedException, ParseException {
assumeFalse(isWindowsEnvironment());
final RepositoryConfiguration config = createConfiguration();
config.setMaxRecordLife(3, TimeUnit.SECONDS);
config.setMaxStorageCapacity(1024L * 1024L);
config.setMaxEventFileLife(500, TimeUnit.MILLISECONDS);
config.setMaxEventFileCapacity(1024L * 1024L);
config.setSearchableFields(new ArrayList<>(SearchableFields.getStandardFields()));
repo = new PersistentProvenanceRepository(config, DEFAULT_ROLLOVER_MILLIS);
repo.initialize(getEventReporter(), null, null, IdentifierLookup.EMPTY);
final String uuid = "00000000-0000-0000-0000-000000000001";
final Map<String, String> attributes = new HashMap<>();
attributes.put("abc", "xyz");
attributes.put("uuid", uuid);
attributes.put("filename", "file-" + uuid);
final ProvenanceEventBuilder builder = new StandardProvenanceEventRecord.Builder();
builder.setEventTime(System.currentTimeMillis());
builder.setEventType(ProvenanceEventType.RECEIVE);
builder.setTransitUri("nifi://unit-test");
attributes.put("uuid", uuid);
builder.fromFlowFile(createFlowFile(3L, 3000L, attributes));
builder.setComponentId("1234");
builder.setComponentType("dummy processor");
repo.registerEvent(builder.build());
builder.setEventTime(System.currentTimeMillis() + 1);
builder.setEventType(ProvenanceEventType.DROP);
builder.setTransitUri(null);
repo.registerEvent(builder.build());
repo.waitForRollover();
final Lineage lineage = repo.computeLineage(uuid, createUser());
assertNotNull(lineage);
// Nodes should consist of a RECEIVE followed by FlowFileNode, followed by a DROP
final List<LineageNode> nodes = lineage.getNodes();
final List<LineageEdge> edges = lineage.getEdges();
assertEquals(3, nodes.size());
for (final LineageEdge edge : edges) {
if (edge.getSource().getNodeType() == LineageNodeType.FLOWFILE_NODE) {
assertTrue(edge.getDestination().getNodeType() == LineageNodeType.PROVENANCE_EVENT_NODE);
assertTrue(((EventNode) edge.getDestination()).getEventType() == ProvenanceEventType.DROP);
} else {
assertTrue(((EventNode) edge.getSource()).getEventType() == ProvenanceEventType.RECEIVE);
assertTrue(edge.getDestination().getNodeType() == LineageNodeType.FLOWFILE_NODE);
}
}
}
use of org.apache.nifi.provenance.lineage.EventNode in project nifi by apache.
the class StandardLineageResult method computeLineage.
/**
* Computes the lineage from the relevant Provenance Event Records. This
* method must be called with the write lock held and is only going to be
* useful after all of the records have been successfully obtained
*/
private void computeLineage() {
logger.debug("Computing lineage with the following events: {}", relevantRecords);
final long startNanos = System.nanoTime();
nodes.clear();
edges.clear();
// maps FlowFile UUID to last event for that FlowFile
Map<String, LineageNode> lastEventMap = new HashMap<>();
final List<ProvenanceEventRecord> sortedRecords = new ArrayList<>(relevantRecords);
Collections.sort(sortedRecords, new Comparator<ProvenanceEventRecord>() {
@Override
public int compare(final ProvenanceEventRecord o1, final ProvenanceEventRecord o2) {
// Sort on Event Time, then Event ID.
final int eventTimeComparison = Long.compare(o1.getEventTime(), o2.getEventTime());
if (eventTimeComparison == 0) {
return Long.compare(o1.getEventId(), o2.getEventId());
} else {
return eventTimeComparison;
}
}
});
// convert the StandardProvenanceRecord objects into Lineage nodes (FlowFileNode, EventNodes).
for (final ProvenanceEventRecord record : sortedRecords) {
final LineageNode lineageNode = new EventNode(record);
final boolean added = nodes.add(lineageNode);
if (!added) {
logger.debug("Did not add {} because it already exists in the 'nodes' set", lineageNode);
}
// Create an edge that connects this node to the previous node for the same FlowFile UUID.
final LineageNode lastNode = lastEventMap.get(record.getFlowFileUuid());
if (lastNode != null) {
// We calculate the Edge UUID based on whether or not this event is a SPAWN.
// If this event is a SPAWN, then we want to use the previous node's UUID because a
// SPAWN Event's UUID is not necessarily what we want, since a SPAWN Event's UUID pertains to
// only one of (potentially) many UUIDs associated with the event. Otherwise, we know that
// the UUID of this record is appropriate, so we just use it.
final String edgeUuid;
switch(record.getEventType()) {
case JOIN:
case CLONE:
case REPLAY:
edgeUuid = lastNode.getFlowFileUuid();
break;
default:
edgeUuid = record.getFlowFileUuid();
break;
}
edges.add(new EdgeNode(edgeUuid, lastNode, lineageNode));
}
lastEventMap.put(record.getFlowFileUuid(), lineageNode);
switch(record.getEventType()) {
case FORK:
case JOIN:
case REPLAY:
case FETCH:
case CLONE:
{
// For events that create FlowFile nodes, we need to create the FlowFile Nodes and associated Edges, as appropriate
for (final String childUuid : record.getChildUuids()) {
if (flowFileUuids.contains(childUuid)) {
final FlowFileNode childNode = new FlowFileNode(childUuid, record.getEventTime());
final boolean isNewFlowFile = nodes.add(childNode);
if (!isNewFlowFile) {
final String msg = "Unable to generate Lineage Graph because multiple " + "events were registered claiming to have generated the same FlowFile (UUID = " + childNode.getFlowFileUuid() + ")";
logger.error(msg);
setError(msg);
return;
}
edges.add(new EdgeNode(childNode.getFlowFileUuid(), lineageNode, childNode));
lastEventMap.put(childUuid, childNode);
}
}
for (final String parentUuid : record.getParentUuids()) {
LineageNode lastNodeForParent = lastEventMap.get(parentUuid);
if (lastNodeForParent != null && !lastNodeForParent.equals(lineageNode)) {
edges.add(new EdgeNode(parentUuid, lastNodeForParent, lineageNode));
}
lastEventMap.put(parentUuid, lineageNode);
}
}
break;
case RECEIVE:
case CREATE:
{
// for a receive event, we want to create a FlowFile Node that represents the FlowFile received
// and create an edge from the Receive Event to the FlowFile Node
final LineageNode flowFileNode = new FlowFileNode(record.getFlowFileUuid(), record.getEventTime());
final boolean isNewFlowFile = nodes.add(flowFileNode);
if (!isNewFlowFile) {
final String msg = "Found cycle in graph. This indicates that multiple events " + "were registered claiming to have generated the same FlowFile (UUID = " + flowFileNode.getFlowFileUuid() + ")";
setError(msg);
logger.error(msg);
return;
}
edges.add(new EdgeNode(record.getFlowFileUuid(), lineageNode, flowFileNode));
lastEventMap.put(record.getFlowFileUuid(), flowFileNode);
}
break;
default:
break;
}
}
final long nanos = System.nanoTime() - startNanos;
logger.debug("Finished building lineage with {} nodes and {} edges in {} millis", nodes.size(), edges.size(), TimeUnit.NANOSECONDS.toMillis(nanos));
}
use of org.apache.nifi.provenance.lineage.EventNode in project nifi by apache.
the class TestPersistentProvenanceRepository method testLineageReceiveDropAsync.
@Test
public void testLineageReceiveDropAsync() throws IOException, InterruptedException, ParseException {
assumeFalse(isWindowsEnvironment());
final RepositoryConfiguration config = createConfiguration();
config.setMaxRecordLife(3, TimeUnit.SECONDS);
config.setMaxStorageCapacity(1024L * 1024L);
config.setMaxEventFileLife(500, TimeUnit.MILLISECONDS);
config.setMaxEventFileCapacity(1024L * 1024L);
config.setSearchableFields(new ArrayList<>(SearchableFields.getStandardFields()));
repo = new PersistentProvenanceRepository(config, DEFAULT_ROLLOVER_MILLIS);
repo.initialize(getEventReporter(), null, null, IdentifierLookup.EMPTY);
final String uuid = "00000000-0000-0000-0000-000000000001";
final Map<String, String> attributes = new HashMap<>();
attributes.put("abc", "xyz");
attributes.put("uuid", uuid);
attributes.put("filename", "file-" + uuid);
final ProvenanceEventBuilder builder = new StandardProvenanceEventRecord.Builder();
builder.setEventTime(System.currentTimeMillis());
builder.setEventType(ProvenanceEventType.RECEIVE);
builder.setTransitUri("nifi://unit-test");
attributes.put("uuid", uuid);
builder.fromFlowFile(createFlowFile(3L, 3000L, attributes));
builder.setComponentId("1234");
builder.setComponentType("dummy processor");
repo.registerEvent(builder.build());
builder.setEventTime(System.currentTimeMillis() + 1);
builder.setEventType(ProvenanceEventType.DROP);
builder.setTransitUri(null);
repo.registerEvent(builder.build());
repo.waitForRollover();
final AsyncLineageSubmission submission = repo.submitLineageComputation(uuid, createUser());
while (!submission.getResult().isFinished()) {
Thread.sleep(100L);
}
assertNotNull(submission);
// Nodes should consist of a RECEIVE followed by FlowFileNode, followed by a DROP
final List<LineageNode> nodes = submission.getResult().getNodes();
final List<LineageEdge> edges = submission.getResult().getEdges();
assertEquals(3, nodes.size());
for (final LineageEdge edge : edges) {
if (edge.getSource().getNodeType() == LineageNodeType.FLOWFILE_NODE) {
assertTrue(edge.getDestination().getNodeType() == LineageNodeType.PROVENANCE_EVENT_NODE);
assertTrue(((EventNode) edge.getDestination()).getEventType() == ProvenanceEventType.DROP);
} else {
assertTrue(((EventNode) edge.getSource()).getEventType() == ProvenanceEventType.RECEIVE);
assertTrue(edge.getDestination().getNodeType() == LineageNodeType.FLOWFILE_NODE);
}
}
}
Aggregations