Search in sources :

Example 1 with EventNode

use of org.apache.nifi.provenance.lineage.EventNode in project nifi by apache.

the class ITReportLineageToAtlas method createEdge.

private EdgeNode createEdge(ProvenanceRecords prs, int srcIdx, int tgtIdx) {
    // Generate C created a FlowFile
    final ProvenanceEventRecord srcR = prs.get(srcIdx);
    // Then Remote Input Port sent it
    final ProvenanceEventRecord tgtR = prs.get(tgtIdx);
    final EventNode src = new EventNode(srcR);
    final EventNode tgt = new EventNode(tgtR);
    final EdgeNode edge = new EdgeNode(srcR.getComponentType() + " to " + tgtR.getEventType(), src, tgt);
    return edge;
}
Also used : EventNode(org.apache.nifi.provenance.lineage.EventNode) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) EdgeNode(org.apache.nifi.provenance.lineage.EdgeNode)

Example 2 with EventNode

use of org.apache.nifi.provenance.lineage.EventNode in project nifi by apache.

the class TestPersistentProvenanceRepository method testLineageReceiveDrop.

@Test
public void testLineageReceiveDrop() throws IOException, InterruptedException, ParseException {
    assumeFalse(isWindowsEnvironment());
    final RepositoryConfiguration config = createConfiguration();
    config.setMaxRecordLife(3, TimeUnit.SECONDS);
    config.setMaxStorageCapacity(1024L * 1024L);
    config.setMaxEventFileLife(500, TimeUnit.MILLISECONDS);
    config.setMaxEventFileCapacity(1024L * 1024L);
    config.setSearchableFields(new ArrayList<>(SearchableFields.getStandardFields()));
    repo = new PersistentProvenanceRepository(config, DEFAULT_ROLLOVER_MILLIS);
    repo.initialize(getEventReporter(), null, null, IdentifierLookup.EMPTY);
    final String uuid = "00000000-0000-0000-0000-000000000001";
    final Map<String, String> attributes = new HashMap<>();
    attributes.put("abc", "xyz");
    attributes.put("uuid", uuid);
    attributes.put("filename", "file-" + uuid);
    final ProvenanceEventBuilder builder = new StandardProvenanceEventRecord.Builder();
    builder.setEventTime(System.currentTimeMillis());
    builder.setEventType(ProvenanceEventType.RECEIVE);
    builder.setTransitUri("nifi://unit-test");
    attributes.put("uuid", uuid);
    builder.fromFlowFile(createFlowFile(3L, 3000L, attributes));
    builder.setComponentId("1234");
    builder.setComponentType("dummy processor");
    repo.registerEvent(builder.build());
    builder.setEventTime(System.currentTimeMillis() + 1);
    builder.setEventType(ProvenanceEventType.DROP);
    builder.setTransitUri(null);
    repo.registerEvent(builder.build());
    repo.waitForRollover();
    final Lineage lineage = repo.computeLineage(uuid, createUser());
    assertNotNull(lineage);
    // Nodes should consist of a RECEIVE followed by FlowFileNode, followed by a DROP
    final List<LineageNode> nodes = lineage.getNodes();
    final List<LineageEdge> edges = lineage.getEdges();
    assertEquals(3, nodes.size());
    for (final LineageEdge edge : edges) {
        if (edge.getSource().getNodeType() == LineageNodeType.FLOWFILE_NODE) {
            assertTrue(edge.getDestination().getNodeType() == LineageNodeType.PROVENANCE_EVENT_NODE);
            assertTrue(((EventNode) edge.getDestination()).getEventType() == ProvenanceEventType.DROP);
        } else {
            assertTrue(((EventNode) edge.getSource()).getEventType() == ProvenanceEventType.RECEIVE);
            assertTrue(edge.getDestination().getNodeType() == LineageNodeType.FLOWFILE_NODE);
        }
    }
}
Also used : HashMap(java.util.HashMap) Lineage(org.apache.nifi.provenance.lineage.Lineage) EventNode(org.apache.nifi.provenance.lineage.EventNode) LineageEdge(org.apache.nifi.provenance.lineage.LineageEdge) LineageNode(org.apache.nifi.provenance.lineage.LineageNode) Test(org.junit.Test)

Example 3 with EventNode

use of org.apache.nifi.provenance.lineage.EventNode in project nifi by apache.

the class StandardLineageResult method computeLineage.

/**
 * Computes the lineage from the relevant Provenance Event Records. This
 * method must be called with the write lock held and is only going to be
 * useful after all of the records have been successfully obtained
 */
private void computeLineage() {
    logger.debug("Computing lineage with the following events: {}", relevantRecords);
    final long startNanos = System.nanoTime();
    nodes.clear();
    edges.clear();
    // maps FlowFile UUID to last event for that FlowFile
    Map<String, LineageNode> lastEventMap = new HashMap<>();
    final List<ProvenanceEventRecord> sortedRecords = new ArrayList<>(relevantRecords);
    Collections.sort(sortedRecords, new Comparator<ProvenanceEventRecord>() {

        @Override
        public int compare(final ProvenanceEventRecord o1, final ProvenanceEventRecord o2) {
            // Sort on Event Time, then Event ID.
            final int eventTimeComparison = Long.compare(o1.getEventTime(), o2.getEventTime());
            if (eventTimeComparison == 0) {
                return Long.compare(o1.getEventId(), o2.getEventId());
            } else {
                return eventTimeComparison;
            }
        }
    });
    // convert the StandardProvenanceRecord objects into Lineage nodes (FlowFileNode, EventNodes).
    for (final ProvenanceEventRecord record : sortedRecords) {
        final LineageNode lineageNode = new EventNode(record);
        final boolean added = nodes.add(lineageNode);
        if (!added) {
            logger.debug("Did not add {} because it already exists in the 'nodes' set", lineageNode);
        }
        // Create an edge that connects this node to the previous node for the same FlowFile UUID.
        final LineageNode lastNode = lastEventMap.get(record.getFlowFileUuid());
        if (lastNode != null) {
            // We calculate the Edge UUID based on whether or not this event is a SPAWN.
            // If this event is a SPAWN, then we want to use the previous node's UUID because a
            // SPAWN Event's UUID is not necessarily what we want, since a SPAWN Event's UUID pertains to
            // only one of (potentially) many UUIDs associated with the event. Otherwise, we know that
            // the UUID of this record is appropriate, so we just use it.
            final String edgeUuid;
            switch(record.getEventType()) {
                case JOIN:
                case CLONE:
                case REPLAY:
                    edgeUuid = lastNode.getFlowFileUuid();
                    break;
                default:
                    edgeUuid = record.getFlowFileUuid();
                    break;
            }
            edges.add(new EdgeNode(edgeUuid, lastNode, lineageNode));
        }
        lastEventMap.put(record.getFlowFileUuid(), lineageNode);
        switch(record.getEventType()) {
            case FORK:
            case JOIN:
            case REPLAY:
            case FETCH:
            case CLONE:
                {
                    // For events that create FlowFile nodes, we need to create the FlowFile Nodes and associated Edges, as appropriate
                    for (final String childUuid : record.getChildUuids()) {
                        if (flowFileUuids.contains(childUuid)) {
                            final FlowFileNode childNode = new FlowFileNode(childUuid, record.getEventTime());
                            final boolean isNewFlowFile = nodes.add(childNode);
                            if (!isNewFlowFile) {
                                final String msg = "Unable to generate Lineage Graph because multiple " + "events were registered claiming to have generated the same FlowFile (UUID = " + childNode.getFlowFileUuid() + ")";
                                logger.error(msg);
                                setError(msg);
                                return;
                            }
                            edges.add(new EdgeNode(childNode.getFlowFileUuid(), lineageNode, childNode));
                            lastEventMap.put(childUuid, childNode);
                        }
                    }
                    for (final String parentUuid : record.getParentUuids()) {
                        LineageNode lastNodeForParent = lastEventMap.get(parentUuid);
                        if (lastNodeForParent != null && !lastNodeForParent.equals(lineageNode)) {
                            edges.add(new EdgeNode(parentUuid, lastNodeForParent, lineageNode));
                        }
                        lastEventMap.put(parentUuid, lineageNode);
                    }
                }
                break;
            case RECEIVE:
            case CREATE:
                {
                    // for a receive event, we want to create a FlowFile Node that represents the FlowFile received
                    // and create an edge from the Receive Event to the FlowFile Node
                    final LineageNode flowFileNode = new FlowFileNode(record.getFlowFileUuid(), record.getEventTime());
                    final boolean isNewFlowFile = nodes.add(flowFileNode);
                    if (!isNewFlowFile) {
                        final String msg = "Found cycle in graph. This indicates that multiple events " + "were registered claiming to have generated the same FlowFile (UUID = " + flowFileNode.getFlowFileUuid() + ")";
                        setError(msg);
                        logger.error(msg);
                        return;
                    }
                    edges.add(new EdgeNode(record.getFlowFileUuid(), lineageNode, flowFileNode));
                    lastEventMap.put(record.getFlowFileUuid(), flowFileNode);
                }
                break;
            default:
                break;
        }
    }
    final long nanos = System.nanoTime() - startNanos;
    logger.debug("Finished building lineage with {} nodes and {} edges in {} millis", nodes.size(), edges.size(), TimeUnit.NANOSECONDS.toMillis(nanos));
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) EdgeNode(org.apache.nifi.provenance.lineage.EdgeNode) EventNode(org.apache.nifi.provenance.lineage.EventNode) FlowFileNode(org.apache.nifi.provenance.lineage.FlowFileNode) LineageNode(org.apache.nifi.provenance.lineage.LineageNode)

Example 4 with EventNode

use of org.apache.nifi.provenance.lineage.EventNode in project nifi by apache.

the class TestPersistentProvenanceRepository method testLineageReceiveDropAsync.

@Test
public void testLineageReceiveDropAsync() throws IOException, InterruptedException, ParseException {
    assumeFalse(isWindowsEnvironment());
    final RepositoryConfiguration config = createConfiguration();
    config.setMaxRecordLife(3, TimeUnit.SECONDS);
    config.setMaxStorageCapacity(1024L * 1024L);
    config.setMaxEventFileLife(500, TimeUnit.MILLISECONDS);
    config.setMaxEventFileCapacity(1024L * 1024L);
    config.setSearchableFields(new ArrayList<>(SearchableFields.getStandardFields()));
    repo = new PersistentProvenanceRepository(config, DEFAULT_ROLLOVER_MILLIS);
    repo.initialize(getEventReporter(), null, null, IdentifierLookup.EMPTY);
    final String uuid = "00000000-0000-0000-0000-000000000001";
    final Map<String, String> attributes = new HashMap<>();
    attributes.put("abc", "xyz");
    attributes.put("uuid", uuid);
    attributes.put("filename", "file-" + uuid);
    final ProvenanceEventBuilder builder = new StandardProvenanceEventRecord.Builder();
    builder.setEventTime(System.currentTimeMillis());
    builder.setEventType(ProvenanceEventType.RECEIVE);
    builder.setTransitUri("nifi://unit-test");
    attributes.put("uuid", uuid);
    builder.fromFlowFile(createFlowFile(3L, 3000L, attributes));
    builder.setComponentId("1234");
    builder.setComponentType("dummy processor");
    repo.registerEvent(builder.build());
    builder.setEventTime(System.currentTimeMillis() + 1);
    builder.setEventType(ProvenanceEventType.DROP);
    builder.setTransitUri(null);
    repo.registerEvent(builder.build());
    repo.waitForRollover();
    final AsyncLineageSubmission submission = repo.submitLineageComputation(uuid, createUser());
    while (!submission.getResult().isFinished()) {
        Thread.sleep(100L);
    }
    assertNotNull(submission);
    // Nodes should consist of a RECEIVE followed by FlowFileNode, followed by a DROP
    final List<LineageNode> nodes = submission.getResult().getNodes();
    final List<LineageEdge> edges = submission.getResult().getEdges();
    assertEquals(3, nodes.size());
    for (final LineageEdge edge : edges) {
        if (edge.getSource().getNodeType() == LineageNodeType.FLOWFILE_NODE) {
            assertTrue(edge.getDestination().getNodeType() == LineageNodeType.PROVENANCE_EVENT_NODE);
            assertTrue(((EventNode) edge.getDestination()).getEventType() == ProvenanceEventType.DROP);
        } else {
            assertTrue(((EventNode) edge.getSource()).getEventType() == ProvenanceEventType.RECEIVE);
            assertTrue(edge.getDestination().getNodeType() == LineageNodeType.FLOWFILE_NODE);
        }
    }
}
Also used : HashMap(java.util.HashMap) EventNode(org.apache.nifi.provenance.lineage.EventNode) LineageEdge(org.apache.nifi.provenance.lineage.LineageEdge) LineageNode(org.apache.nifi.provenance.lineage.LineageNode) Test(org.junit.Test)

Aggregations

EventNode (org.apache.nifi.provenance.lineage.EventNode)4 HashMap (java.util.HashMap)3 LineageNode (org.apache.nifi.provenance.lineage.LineageNode)3 EdgeNode (org.apache.nifi.provenance.lineage.EdgeNode)2 LineageEdge (org.apache.nifi.provenance.lineage.LineageEdge)2 Test (org.junit.Test)2 ArrayList (java.util.ArrayList)1 ProvenanceEventRecord (org.apache.nifi.provenance.ProvenanceEventRecord)1 FlowFileNode (org.apache.nifi.provenance.lineage.FlowFileNode)1 Lineage (org.apache.nifi.provenance.lineage.Lineage)1