Search in sources :

Example 1 with FlowFileNode

use of org.apache.nifi.provenance.lineage.FlowFileNode in project nifi by apache.

the class StandardLineageResult method computeLineage.

/**
 * Computes the lineage from the relevant Provenance Event Records. This
 * method must be called with the write lock held and is only going to be
 * useful after all of the records have been successfully obtained
 */
private void computeLineage() {
    logger.debug("Computing lineage with the following events: {}", relevantRecords);
    final long startNanos = System.nanoTime();
    nodes.clear();
    edges.clear();
    // maps FlowFile UUID to last event for that FlowFile
    Map<String, LineageNode> lastEventMap = new HashMap<>();
    final List<ProvenanceEventRecord> sortedRecords = new ArrayList<>(relevantRecords);
    Collections.sort(sortedRecords, new Comparator<ProvenanceEventRecord>() {

        @Override
        public int compare(final ProvenanceEventRecord o1, final ProvenanceEventRecord o2) {
            // Sort on Event Time, then Event ID.
            final int eventTimeComparison = Long.compare(o1.getEventTime(), o2.getEventTime());
            if (eventTimeComparison == 0) {
                return Long.compare(o1.getEventId(), o2.getEventId());
            } else {
                return eventTimeComparison;
            }
        }
    });
    // convert the StandardProvenanceRecord objects into Lineage nodes (FlowFileNode, EventNodes).
    for (final ProvenanceEventRecord record : sortedRecords) {
        final LineageNode lineageNode = new EventNode(record);
        final boolean added = nodes.add(lineageNode);
        if (!added) {
            logger.debug("Did not add {} because it already exists in the 'nodes' set", lineageNode);
        }
        // Create an edge that connects this node to the previous node for the same FlowFile UUID.
        final LineageNode lastNode = lastEventMap.get(record.getFlowFileUuid());
        if (lastNode != null) {
            // We calculate the Edge UUID based on whether or not this event is a SPAWN.
            // If this event is a SPAWN, then we want to use the previous node's UUID because a
            // SPAWN Event's UUID is not necessarily what we want, since a SPAWN Event's UUID pertains to
            // only one of (potentially) many UUIDs associated with the event. Otherwise, we know that
            // the UUID of this record is appropriate, so we just use it.
            final String edgeUuid;
            switch(record.getEventType()) {
                case JOIN:
                case CLONE:
                case REPLAY:
                    edgeUuid = lastNode.getFlowFileUuid();
                    break;
                default:
                    edgeUuid = record.getFlowFileUuid();
                    break;
            }
            edges.add(new EdgeNode(edgeUuid, lastNode, lineageNode));
        }
        lastEventMap.put(record.getFlowFileUuid(), lineageNode);
        switch(record.getEventType()) {
            case FORK:
            case JOIN:
            case REPLAY:
            case FETCH:
            case CLONE:
                {
                    // For events that create FlowFile nodes, we need to create the FlowFile Nodes and associated Edges, as appropriate
                    for (final String childUuid : record.getChildUuids()) {
                        if (flowFileUuids.contains(childUuid)) {
                            final FlowFileNode childNode = new FlowFileNode(childUuid, record.getEventTime());
                            final boolean isNewFlowFile = nodes.add(childNode);
                            if (!isNewFlowFile) {
                                final String msg = "Unable to generate Lineage Graph because multiple " + "events were registered claiming to have generated the same FlowFile (UUID = " + childNode.getFlowFileUuid() + ")";
                                logger.error(msg);
                                setError(msg);
                                return;
                            }
                            edges.add(new EdgeNode(childNode.getFlowFileUuid(), lineageNode, childNode));
                            lastEventMap.put(childUuid, childNode);
                        }
                    }
                    for (final String parentUuid : record.getParentUuids()) {
                        LineageNode lastNodeForParent = lastEventMap.get(parentUuid);
                        if (lastNodeForParent != null && !lastNodeForParent.equals(lineageNode)) {
                            edges.add(new EdgeNode(parentUuid, lastNodeForParent, lineageNode));
                        }
                        lastEventMap.put(parentUuid, lineageNode);
                    }
                }
                break;
            case RECEIVE:
            case CREATE:
                {
                    // for a receive event, we want to create a FlowFile Node that represents the FlowFile received
                    // and create an edge from the Receive Event to the FlowFile Node
                    final LineageNode flowFileNode = new FlowFileNode(record.getFlowFileUuid(), record.getEventTime());
                    final boolean isNewFlowFile = nodes.add(flowFileNode);
                    if (!isNewFlowFile) {
                        final String msg = "Found cycle in graph. This indicates that multiple events " + "were registered claiming to have generated the same FlowFile (UUID = " + flowFileNode.getFlowFileUuid() + ")";
                        setError(msg);
                        logger.error(msg);
                        return;
                    }
                    edges.add(new EdgeNode(record.getFlowFileUuid(), lineageNode, flowFileNode));
                    lastEventMap.put(record.getFlowFileUuid(), flowFileNode);
                }
                break;
            default:
                break;
        }
    }
    final long nanos = System.nanoTime() - startNanos;
    logger.debug("Finished building lineage with {} nodes and {} edges in {} millis", nodes.size(), edges.size(), TimeUnit.NANOSECONDS.toMillis(nanos));
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) EdgeNode(org.apache.nifi.provenance.lineage.EdgeNode) EventNode(org.apache.nifi.provenance.lineage.EventNode) FlowFileNode(org.apache.nifi.provenance.lineage.FlowFileNode) LineageNode(org.apache.nifi.provenance.lineage.LineageNode)

Aggregations

ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 EdgeNode (org.apache.nifi.provenance.lineage.EdgeNode)1 EventNode (org.apache.nifi.provenance.lineage.EventNode)1 FlowFileNode (org.apache.nifi.provenance.lineage.FlowFileNode)1 LineageNode (org.apache.nifi.provenance.lineage.LineageNode)1