use of org.apache.nifi.provenance.lineage.FlowFileNode in project nifi by apache.
the class StandardLineageResult method computeLineage.
/**
* Computes the lineage from the relevant Provenance Event Records. This
* method must be called with the write lock held and is only going to be
* useful after all of the records have been successfully obtained
*/
private void computeLineage() {
logger.debug("Computing lineage with the following events: {}", relevantRecords);
final long startNanos = System.nanoTime();
nodes.clear();
edges.clear();
// maps FlowFile UUID to last event for that FlowFile
Map<String, LineageNode> lastEventMap = new HashMap<>();
final List<ProvenanceEventRecord> sortedRecords = new ArrayList<>(relevantRecords);
Collections.sort(sortedRecords, new Comparator<ProvenanceEventRecord>() {
@Override
public int compare(final ProvenanceEventRecord o1, final ProvenanceEventRecord o2) {
// Sort on Event Time, then Event ID.
final int eventTimeComparison = Long.compare(o1.getEventTime(), o2.getEventTime());
if (eventTimeComparison == 0) {
return Long.compare(o1.getEventId(), o2.getEventId());
} else {
return eventTimeComparison;
}
}
});
// convert the StandardProvenanceRecord objects into Lineage nodes (FlowFileNode, EventNodes).
for (final ProvenanceEventRecord record : sortedRecords) {
final LineageNode lineageNode = new EventNode(record);
final boolean added = nodes.add(lineageNode);
if (!added) {
logger.debug("Did not add {} because it already exists in the 'nodes' set", lineageNode);
}
// Create an edge that connects this node to the previous node for the same FlowFile UUID.
final LineageNode lastNode = lastEventMap.get(record.getFlowFileUuid());
if (lastNode != null) {
// We calculate the Edge UUID based on whether or not this event is a SPAWN.
// If this event is a SPAWN, then we want to use the previous node's UUID because a
// SPAWN Event's UUID is not necessarily what we want, since a SPAWN Event's UUID pertains to
// only one of (potentially) many UUIDs associated with the event. Otherwise, we know that
// the UUID of this record is appropriate, so we just use it.
final String edgeUuid;
switch(record.getEventType()) {
case JOIN:
case CLONE:
case REPLAY:
edgeUuid = lastNode.getFlowFileUuid();
break;
default:
edgeUuid = record.getFlowFileUuid();
break;
}
edges.add(new EdgeNode(edgeUuid, lastNode, lineageNode));
}
lastEventMap.put(record.getFlowFileUuid(), lineageNode);
switch(record.getEventType()) {
case FORK:
case JOIN:
case REPLAY:
case FETCH:
case CLONE:
{
// For events that create FlowFile nodes, we need to create the FlowFile Nodes and associated Edges, as appropriate
for (final String childUuid : record.getChildUuids()) {
if (flowFileUuids.contains(childUuid)) {
final FlowFileNode childNode = new FlowFileNode(childUuid, record.getEventTime());
final boolean isNewFlowFile = nodes.add(childNode);
if (!isNewFlowFile) {
final String msg = "Unable to generate Lineage Graph because multiple " + "events were registered claiming to have generated the same FlowFile (UUID = " + childNode.getFlowFileUuid() + ")";
logger.error(msg);
setError(msg);
return;
}
edges.add(new EdgeNode(childNode.getFlowFileUuid(), lineageNode, childNode));
lastEventMap.put(childUuid, childNode);
}
}
for (final String parentUuid : record.getParentUuids()) {
LineageNode lastNodeForParent = lastEventMap.get(parentUuid);
if (lastNodeForParent != null && !lastNodeForParent.equals(lineageNode)) {
edges.add(new EdgeNode(parentUuid, lastNodeForParent, lineageNode));
}
lastEventMap.put(parentUuid, lineageNode);
}
}
break;
case RECEIVE:
case CREATE:
{
// for a receive event, we want to create a FlowFile Node that represents the FlowFile received
// and create an edge from the Receive Event to the FlowFile Node
final LineageNode flowFileNode = new FlowFileNode(record.getFlowFileUuid(), record.getEventTime());
final boolean isNewFlowFile = nodes.add(flowFileNode);
if (!isNewFlowFile) {
final String msg = "Found cycle in graph. This indicates that multiple events " + "were registered claiming to have generated the same FlowFile (UUID = " + flowFileNode.getFlowFileUuid() + ")";
setError(msg);
logger.error(msg);
return;
}
edges.add(new EdgeNode(record.getFlowFileUuid(), lineageNode, flowFileNode));
lastEventMap.put(record.getFlowFileUuid(), flowFileNode);
}
break;
default:
break;
}
}
final long nanos = System.nanoTime() - startNanos;
logger.debug("Finished building lineage with {} nodes and {} edges in {} millis", nodes.size(), edges.size(), TimeUnit.NANOSECONDS.toMillis(nanos));
}
Aggregations