Search in sources :

Example 1 with ComputeLineageResult

use of org.apache.nifi.provenance.lineage.ComputeLineageResult in project nifi by apache.

the class ITReportLineageToAtlas method testRecordAndDataSetLevel.

@Test
public void testRecordAndDataSetLevel() throws Exception {
    final TestConfiguration tc = new TestConfiguration("RecordAndDataSetLevel");
    tc.properties.put(NIFI_LINEAGE_STRATEGY, LINEAGE_STRATEGY_COMPLETE_PATH.getValue());
    final ProvenanceRecords prs = tc.provenanceRecords;
    // Publish part
    final String ffIdA1 = "A1000000";
    final String ffIdB1 = "B1000000";
    // 0
    prs.add(pr("22be62d9-c4a1-3056", "GetFile", RECEIVE, "file:/tmp/input/A1.csv", ffIdA1));
    // 1
    prs.add(pr("22be62d9-c4a1-3056", "GetFile", RECEIVE, "file:/tmp/input/B1.csv", ffIdB1));
    // 2
    prs.add(pr("eaf013c1-aec5-39b0", "PutFile", SEND, "file:/tmp/output/A1.csv", ffIdA1));
    // 3
    prs.add(pr("eaf013c1-aec5-39b0", "PutFile", SEND, "file:/tmp/output/B1.csv", ffIdB1));
    // 4
    prs.add(pr("97641de3-fb76-3d95", "PublishKafkaRecord_0_10", SEND, "PLAINTEXT://localhost:9092/nifi-test", ffIdA1));
    // 5
    prs.add(pr("97641de3-fb76-3d95", "PublishKafkaRecord_0_10", SEND, "PLAINTEXT://localhost:9092/nifi-test", ffIdB1));
    // 6
    prs.add(pr("97641de3-fb76-3d95", "PublishKafkaRecord_0_10", DROP, ffIdA1));
    // 7
    prs.add(pr("97641de3-fb76-3d95", "PublishKafkaRecord_0_10", DROP, ffIdB1));
    // Consume part
    final String ffIdK1 = "K1000000";
    // Forked children
    final String ffIdA2 = "A2000000";
    // Forked children
    final String ffIdB2 = "B2000000";
    // 8
    prs.add(pr("529e6722-9b49-3b66", "ConsumeKafkaRecord_0_10", RECEIVE, "PLAINTEXT://localhost:9092/nifi-test", ffIdK1));
    // 9
    prs.add(pr("3f6d405e-6e3d-38c9", "PartitionRecord", FORK, ffIdK1));
    // 10
    prs.add(pr("db8bb12c-5cd3-3011", "UpdateAttribute", ATTRIBUTES_MODIFIED, ffIdA2));
    // 11
    prs.add(pr("db8bb12c-5cd3-3011", "UpdateAttribute", ATTRIBUTES_MODIFIED, ffIdB2));
    // 12
    prs.add(pr("062caf95-da40-3a57", "PutFile", SEND, "file:/tmp/consumed/A_20171101_100701.csv", ffIdA2));
    // 13
    prs.add(pr("062caf95-da40-3a57", "PutFile", SEND, "file:/tmp/consumed/B_20171101_100701.csv", ffIdB2));
    // 14
    prs.add(pr("062caf95-da40-3a57", "PutFile", DROP, ffIdA2));
    // 15
    prs.add(pr("062caf95-da40-3a57", "PutFile", DROP, ffIdB2));
    Map<Long, ComputeLineageResult> lineages = tc.lineageResults;
    Map<Long, ComputeLineageResult> parents = tc.parentLineageResults;
    // Publish A1
    lineages.put(6L, createLineage(prs, 0, 2, 4, 6));
    // Publish B1
    lineages.put(7L, createLineage(prs, 1, 3, 5, 7));
    // Consumed and Forked K1
    parents.put(9L, createLineage(prs, 8, 9));
    // Processed A2
    lineages.put(14L, createLineage(prs, 9, 10, 12, 14));
    // Processed B2
    lineages.put(15L, createLineage(prs, 9, 11, 13, 15));
    test(tc);
    waitNotificationsGetDelivered();
    final Lineage lineage = getLineage();
    // Publish part
    final Node inputFileA1 = lineage.findNode("fs_path", "/tmp/input/A1.csv@example");
    final Node inputFileB1 = lineage.findNode("fs_path", "/tmp/input/B1.csv@example");
    // These two flow paths are derived from the same set of Processors, but with different input files, and resulted different hashes.
    final Node getFileToPublishKafkaA = lineage.findNode("nifi_flow_path", "GetFile, PutFile, PublishKafkaRecord_0_10", "22be62d9-c4a1-3056-0000-000000000000::2823953997@example");
    final Node getFileToPublishKafkaB = lineage.findNode("nifi_flow_path", "GetFile, PutFile, PublishKafkaRecord_0_10", "22be62d9-c4a1-3056-0000-000000000000::568010061@example");
    lineage.assertLink(inputFileA1, getFileToPublishKafkaA);
    lineage.assertLink(inputFileB1, getFileToPublishKafkaB);
    final Node nifiTestTopic = lineage.findNode("kafka_topic", "nifi-test@example");
    final Node outputFileA = lineage.findNode("fs_path", "/tmp/output/A1.csv@example");
    final Node outputFileB = lineage.findNode("fs_path", "/tmp/output/B1.csv@example");
    lineage.assertLink(getFileToPublishKafkaA, nifiTestTopic);
    lineage.assertLink(getFileToPublishKafkaB, nifiTestTopic);
    lineage.assertLink(getFileToPublishKafkaA, outputFileA);
    lineage.assertLink(getFileToPublishKafkaB, outputFileB);
    // Consume part
    final Node consumeNifiTestTopic = lineage.findNode("nifi_flow_path", "ConsumeKafkaRecord_0_10", "529e6722-9b49-3b66-0000-000000000000::3649132843@example");
    final Node forkedA = lineage.findNode("nifi_queue", "FORK", "3f6d405e-6e3d-38c9-0000-000000000000::234149075@example");
    final Node forkedB = lineage.findNode("nifi_queue", "FORK", "3f6d405e-6e3d-38c9-0000-000000000000::2377021542@example");
    lineage.assertLink(consumeNifiTestTopic, forkedA);
    lineage.assertLink(consumeNifiTestTopic, forkedB);
    final Node partitionToPutA = lineage.findNode("nifi_flow_path", "PartitionRecord, UpdateAttribute, PutFile", "3f6d405e-6e3d-38c9-0000-000000000000::234149075@example");
    final Node partitionToPutB = lineage.findNode("nifi_flow_path", "PartitionRecord, UpdateAttribute, PutFile", "3f6d405e-6e3d-38c9-0000-000000000000::2377021542@example");
    final Node consumedFileA = lineage.findNode("fs_path", "/tmp/consumed/A_20171101_100701.csv@example");
    final Node consumedFileB = lineage.findNode("fs_path", "/tmp/consumed/B_20171101_100701.csv@example");
    lineage.assertLink(forkedA, partitionToPutA);
    lineage.assertLink(forkedB, partitionToPutB);
    lineage.assertLink(partitionToPutA, consumedFileA);
    lineage.assertLink(partitionToPutB, consumedFileB);
}
Also used : ComputeLineageResult(org.apache.nifi.provenance.lineage.ComputeLineageResult) EdgeNode(org.apache.nifi.provenance.lineage.EdgeNode) LineageNode(org.apache.nifi.provenance.lineage.LineageNode) EventNode(org.apache.nifi.provenance.lineage.EventNode) Node(org.apache.nifi.atlas.emulator.Node) Matchers.anyLong(org.mockito.Matchers.anyLong) Lineage(org.apache.nifi.atlas.emulator.Lineage) Test(org.junit.Test)

Example 2 with ComputeLineageResult

use of org.apache.nifi.provenance.lineage.ComputeLineageResult in project nifi by apache.

the class ITReportLineageToAtlas method createLineage.

private ComputeLineageResult createLineage(ProvenanceRecords prs, int... indices) throws InterruptedException {
    final ComputeLineageResult lineage = mock(ComputeLineageResult.class);
    when(lineage.awaitCompletion(anyLong(), any())).thenReturn(true);
    final List<LineageEdge> edges = new ArrayList<>();
    final Set<LineageNode> nodes = new LinkedHashSet<>();
    for (int i = 0; i < indices.length - 1; i++) {
        final EdgeNode edge = createEdge(prs, indices[i], indices[i + 1]);
        edges.add(edge);
        nodes.add(edge.getSource());
        nodes.add(edge.getDestination());
    }
    when(lineage.getEdges()).thenReturn(edges);
    when(lineage.getNodes()).thenReturn(new ArrayList<>(nodes));
    return lineage;
}
Also used : ComputeLineageResult(org.apache.nifi.provenance.lineage.ComputeLineageResult) LinkedHashSet(java.util.LinkedHashSet) ArrayList(java.util.ArrayList) EdgeNode(org.apache.nifi.provenance.lineage.EdgeNode) LineageEdge(org.apache.nifi.provenance.lineage.LineageEdge) LineageNode(org.apache.nifi.provenance.lineage.LineageNode)

Example 3 with ComputeLineageResult

use of org.apache.nifi.provenance.lineage.ComputeLineageResult in project nifi by apache.

the class ITReportLineageToAtlas method testS2SDirect.

/**
 * A client NiFi gets FlowFiles from a remote output port and sends it to a remote input port without doing anything.
 */
@Test
public void testS2SDirect() throws Exception {
    final TestConfiguration tc = new TestConfiguration("S2SDirect");
    final ProvenanceRecords prs = tc.provenanceRecords;
    prs.add(pr("d73d9115-b987-4ffc", "Remote Output Port", RECEIVE, "http://nifi.example.com:8080/nifi-api/data-transfer/output-ports" + "/015f1040-dcd7-17bd-5c1f-e31afe0a09a4/transactions/tx-1/flow-files"));
    prs.add((pr("a4f14247-89aa-4e6c", "Remote Input Port", SEND, "http://nifi.example.com:8080/nifi-api/data-transfer/input-ports" + "/015f101e-dcd7-17bd-8899-1a723733521a/transactions/tx-2/flow-files")));
    Map<Long, ComputeLineageResult> lineages = tc.lineageResults;
    // Received from remote output port, then sent it via remote input port
    lineages.put(1L, createLineage(prs, 0, 1));
    test(tc);
    waitNotificationsGetDelivered();
    final Lineage lineage = getLineage();
    final Node flow = lineage.findNode("nifi_flow", "S2SDirect", "S2SDirect@example");
    final Node remoteOutputPort = lineage.findNode("nifi_output_port", "output", "015f1040-dcd7-17bd-5c1f-e31afe0a09a4@example");
    final Node remoteOutputPortP = lineage.findNode("nifi_flow_path", "Remote Output Port", "d73d9115-b987-4ffc");
    final Node remoteInputPortQ = lineage.findNode("nifi_queue", "queue", "a4f14247-89aa-4e6c");
    final Node remoteInputPortP = lineage.findNode("nifi_flow_path", "Remote Input Port", "a4f14247-89aa-4e6c");
    final Node remoteInputPort = lineage.findNode("nifi_input_port", "input", "015f101e-dcd7-17bd-8899-1a723733521a@example");
    // Even if there is no Processor, lineage can be reported using root flow_path.
    lineage.assertLink(flow, remoteOutputPortP);
    lineage.assertLink(remoteOutputPort, remoteOutputPortP);
    lineage.assertLink(remoteOutputPortP, remoteInputPortQ);
    lineage.assertLink(remoteInputPortQ, remoteInputPortP);
    lineage.assertLink(remoteInputPortP, remoteInputPort);
}
Also used : ComputeLineageResult(org.apache.nifi.provenance.lineage.ComputeLineageResult) EdgeNode(org.apache.nifi.provenance.lineage.EdgeNode) LineageNode(org.apache.nifi.provenance.lineage.LineageNode) EventNode(org.apache.nifi.provenance.lineage.EventNode) Node(org.apache.nifi.atlas.emulator.Node) Matchers.anyLong(org.mockito.Matchers.anyLong) Lineage(org.apache.nifi.atlas.emulator.Lineage) Test(org.junit.Test)

Example 4 with ComputeLineageResult

use of org.apache.nifi.provenance.lineage.ComputeLineageResult in project nifi by apache.

the class CompleteFlowPathLineage method extractLineagePaths.

private void extractLineagePaths(AnalysisContext context, Map<String, List<LineageNode>> lineageTree, LineagePath lineagePath, ProvenanceEventRecord lastEvent) {
    lineagePath.getEvents().add(lastEvent);
    List<LineageNode> parentEvents = findParentEvents(lineageTree, lastEvent);
    final boolean createSeparateParentPath = lineagePath.shouldCreateSeparatePath(lastEvent.getEventType());
    if (createSeparateParentPath && (parentEvents == null || parentEvents.isEmpty())) {
        // Try expanding the lineage.
        // This is for the FlowFiles those are FORKed (or JOINed ... etc) other FlowFile(s).
        // FlowFiles routed to 'original' may have these event types, too, however they have parents fetched together.
        // For example, with these inputs: CREATE(F1), FORK (F1 -> F2, F3), DROP(F1), SEND (F2), SEND(F3), DROP(F2), DROP(F3)
        // Then when DROP(F1) is queried, FORK(F1) and CREATE(F1) are returned.
        // For DROP(F2), SEND(F2) and FORK(F2) are returned.
        // For DROP(F3), SEND(F3) and FORK(F3) are returned.
        // In this case, FORK(F2) and FORK(F3) have to query their parents again, to get CREATE(F1).
        final ComputeLineageResult joinedParents = context.findParents(lastEvent.getEventId());
        analyzeLineageTree(joinedParents, lineageTree);
        parentEvents = findParentEvents(lineageTree, lastEvent);
    }
    if (parentEvents == null || parentEvents.isEmpty()) {
        logger.debug("{} does not have any parent, stop extracting lineage path.", lastEvent);
        return;
    }
    if (createSeparateParentPath) {
        // Treat those as separated lineage_path
        parentEvents.stream().map(parentEvent -> context.getProvenanceEvent(Long.parseLong(parentEvent.getIdentifier()))).filter(Objects::nonNull).forEach(parent -> {
            final LineagePath parentPath = new LineagePath();
            lineagePath.getParents().add(parentPath);
            extractLineagePaths(context, lineageTree, parentPath, parent);
        });
    } else {
        // Simply traverse upwards.
        if (parentEvents.size() > 1) {
            throw new IllegalStateException(String.format("Having more than 1 parents for event type %s" + " is not expected. Should ask NiFi developer for investigation. %s", lastEvent.getEventType(), lastEvent));
        }
        final ProvenanceEventRecord parentEvent = context.getProvenanceEvent(Long.parseLong(parentEvents.get(0).getIdentifier()));
        if (parentEvent != null) {
            extractLineagePaths(context, lineageTree, lineagePath, parentEvent);
        }
    }
}
Also used : ComputeLineageResult(org.apache.nifi.provenance.lineage.ComputeLineageResult) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) LineageNode(org.apache.nifi.provenance.lineage.LineageNode)

Example 5 with ComputeLineageResult

use of org.apache.nifi.provenance.lineage.ComputeLineageResult in project nifi by apache.

the class SimpleFlowPathLineage method findPreviousProvenanceEvent.

private ProvenanceEventRecord findPreviousProvenanceEvent(AnalysisContext context, ProvenanceEventRecord event) {
    final ComputeLineageResult lineage = context.queryLineage(event.getEventId());
    if (lineage == null) {
        logger.warn("Lineage was not found: {}", new Object[] { event });
        return null;
    }
    // If no previous provenance node found due to expired or other reasons, just log a warning msg and do nothing.
    final LineageNode previousProvenanceNode = traverseLineage(lineage, String.valueOf(event.getEventId()));
    if (previousProvenanceNode == null) {
        logger.warn("Traverse lineage could not find any preceding provenance event node: {}", new Object[] { event });
        return null;
    }
    final long previousEventId = Long.parseLong(previousProvenanceNode.getIdentifier());
    return context.getProvenanceEvent(previousEventId);
}
Also used : ComputeLineageResult(org.apache.nifi.provenance.lineage.ComputeLineageResult) LineageNode(org.apache.nifi.provenance.lineage.LineageNode)

Aggregations

ComputeLineageResult (org.apache.nifi.provenance.lineage.ComputeLineageResult)10 LineageNode (org.apache.nifi.provenance.lineage.LineageNode)9 EdgeNode (org.apache.nifi.provenance.lineage.EdgeNode)5 ArrayList (java.util.ArrayList)4 Lineage (org.apache.nifi.atlas.emulator.Lineage)4 Node (org.apache.nifi.atlas.emulator.Node)4 EventNode (org.apache.nifi.provenance.lineage.EventNode)4 Test (org.junit.Test)4 Matchers.anyLong (org.mockito.Matchers.anyLong)4 LinkedHashSet (java.util.LinkedHashSet)3 LineageEdge (org.apache.nifi.provenance.lineage.LineageEdge)3 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 List (java.util.List)1 NiFiFlowPath (org.apache.nifi.atlas.NiFiFlowPath)1 DataSetRefs (org.apache.nifi.atlas.provenance.DataSetRefs)1 ProvenanceEventRecord (org.apache.nifi.provenance.ProvenanceEventRecord)1 ProvenanceEventLineageNode (org.apache.nifi.provenance.lineage.ProvenanceEventLineageNode)1 Tuple (org.apache.nifi.util.Tuple)1 LineageDTO (org.apache.nifi.web.api.dto.provenance.lineage.LineageDTO)1