use of org.apache.nifi.provenance.lineage.ComputeLineageResult in project nifi by apache.
the class ITReportLineageToAtlas method testRecordAndDataSetLevel.
@Test
public void testRecordAndDataSetLevel() throws Exception {
final TestConfiguration tc = new TestConfiguration("RecordAndDataSetLevel");
tc.properties.put(NIFI_LINEAGE_STRATEGY, LINEAGE_STRATEGY_COMPLETE_PATH.getValue());
final ProvenanceRecords prs = tc.provenanceRecords;
// Publish part
final String ffIdA1 = "A1000000";
final String ffIdB1 = "B1000000";
// 0
prs.add(pr("22be62d9-c4a1-3056", "GetFile", RECEIVE, "file:/tmp/input/A1.csv", ffIdA1));
// 1
prs.add(pr("22be62d9-c4a1-3056", "GetFile", RECEIVE, "file:/tmp/input/B1.csv", ffIdB1));
// 2
prs.add(pr("eaf013c1-aec5-39b0", "PutFile", SEND, "file:/tmp/output/A1.csv", ffIdA1));
// 3
prs.add(pr("eaf013c1-aec5-39b0", "PutFile", SEND, "file:/tmp/output/B1.csv", ffIdB1));
// 4
prs.add(pr("97641de3-fb76-3d95", "PublishKafkaRecord_0_10", SEND, "PLAINTEXT://localhost:9092/nifi-test", ffIdA1));
// 5
prs.add(pr("97641de3-fb76-3d95", "PublishKafkaRecord_0_10", SEND, "PLAINTEXT://localhost:9092/nifi-test", ffIdB1));
// 6
prs.add(pr("97641de3-fb76-3d95", "PublishKafkaRecord_0_10", DROP, ffIdA1));
// 7
prs.add(pr("97641de3-fb76-3d95", "PublishKafkaRecord_0_10", DROP, ffIdB1));
// Consume part
final String ffIdK1 = "K1000000";
// Forked children
final String ffIdA2 = "A2000000";
// Forked children
final String ffIdB2 = "B2000000";
// 8
prs.add(pr("529e6722-9b49-3b66", "ConsumeKafkaRecord_0_10", RECEIVE, "PLAINTEXT://localhost:9092/nifi-test", ffIdK1));
// 9
prs.add(pr("3f6d405e-6e3d-38c9", "PartitionRecord", FORK, ffIdK1));
// 10
prs.add(pr("db8bb12c-5cd3-3011", "UpdateAttribute", ATTRIBUTES_MODIFIED, ffIdA2));
// 11
prs.add(pr("db8bb12c-5cd3-3011", "UpdateAttribute", ATTRIBUTES_MODIFIED, ffIdB2));
// 12
prs.add(pr("062caf95-da40-3a57", "PutFile", SEND, "file:/tmp/consumed/A_20171101_100701.csv", ffIdA2));
// 13
prs.add(pr("062caf95-da40-3a57", "PutFile", SEND, "file:/tmp/consumed/B_20171101_100701.csv", ffIdB2));
// 14
prs.add(pr("062caf95-da40-3a57", "PutFile", DROP, ffIdA2));
// 15
prs.add(pr("062caf95-da40-3a57", "PutFile", DROP, ffIdB2));
Map<Long, ComputeLineageResult> lineages = tc.lineageResults;
Map<Long, ComputeLineageResult> parents = tc.parentLineageResults;
// Publish A1
lineages.put(6L, createLineage(prs, 0, 2, 4, 6));
// Publish B1
lineages.put(7L, createLineage(prs, 1, 3, 5, 7));
// Consumed and Forked K1
parents.put(9L, createLineage(prs, 8, 9));
// Processed A2
lineages.put(14L, createLineage(prs, 9, 10, 12, 14));
// Processed B2
lineages.put(15L, createLineage(prs, 9, 11, 13, 15));
test(tc);
waitNotificationsGetDelivered();
final Lineage lineage = getLineage();
// Publish part
final Node inputFileA1 = lineage.findNode("fs_path", "/tmp/input/A1.csv@example");
final Node inputFileB1 = lineage.findNode("fs_path", "/tmp/input/B1.csv@example");
// These two flow paths are derived from the same set of Processors, but with different input files, and resulted different hashes.
final Node getFileToPublishKafkaA = lineage.findNode("nifi_flow_path", "GetFile, PutFile, PublishKafkaRecord_0_10", "22be62d9-c4a1-3056-0000-000000000000::2823953997@example");
final Node getFileToPublishKafkaB = lineage.findNode("nifi_flow_path", "GetFile, PutFile, PublishKafkaRecord_0_10", "22be62d9-c4a1-3056-0000-000000000000::568010061@example");
lineage.assertLink(inputFileA1, getFileToPublishKafkaA);
lineage.assertLink(inputFileB1, getFileToPublishKafkaB);
final Node nifiTestTopic = lineage.findNode("kafka_topic", "nifi-test@example");
final Node outputFileA = lineage.findNode("fs_path", "/tmp/output/A1.csv@example");
final Node outputFileB = lineage.findNode("fs_path", "/tmp/output/B1.csv@example");
lineage.assertLink(getFileToPublishKafkaA, nifiTestTopic);
lineage.assertLink(getFileToPublishKafkaB, nifiTestTopic);
lineage.assertLink(getFileToPublishKafkaA, outputFileA);
lineage.assertLink(getFileToPublishKafkaB, outputFileB);
// Consume part
final Node consumeNifiTestTopic = lineage.findNode("nifi_flow_path", "ConsumeKafkaRecord_0_10", "529e6722-9b49-3b66-0000-000000000000::3649132843@example");
final Node forkedA = lineage.findNode("nifi_queue", "FORK", "3f6d405e-6e3d-38c9-0000-000000000000::234149075@example");
final Node forkedB = lineage.findNode("nifi_queue", "FORK", "3f6d405e-6e3d-38c9-0000-000000000000::2377021542@example");
lineage.assertLink(consumeNifiTestTopic, forkedA);
lineage.assertLink(consumeNifiTestTopic, forkedB);
final Node partitionToPutA = lineage.findNode("nifi_flow_path", "PartitionRecord, UpdateAttribute, PutFile", "3f6d405e-6e3d-38c9-0000-000000000000::234149075@example");
final Node partitionToPutB = lineage.findNode("nifi_flow_path", "PartitionRecord, UpdateAttribute, PutFile", "3f6d405e-6e3d-38c9-0000-000000000000::2377021542@example");
final Node consumedFileA = lineage.findNode("fs_path", "/tmp/consumed/A_20171101_100701.csv@example");
final Node consumedFileB = lineage.findNode("fs_path", "/tmp/consumed/B_20171101_100701.csv@example");
lineage.assertLink(forkedA, partitionToPutA);
lineage.assertLink(forkedB, partitionToPutB);
lineage.assertLink(partitionToPutA, consumedFileA);
lineage.assertLink(partitionToPutB, consumedFileB);
}
use of org.apache.nifi.provenance.lineage.ComputeLineageResult in project nifi by apache.
the class ITReportLineageToAtlas method createLineage.
private ComputeLineageResult createLineage(ProvenanceRecords prs, int... indices) throws InterruptedException {
final ComputeLineageResult lineage = mock(ComputeLineageResult.class);
when(lineage.awaitCompletion(anyLong(), any())).thenReturn(true);
final List<LineageEdge> edges = new ArrayList<>();
final Set<LineageNode> nodes = new LinkedHashSet<>();
for (int i = 0; i < indices.length - 1; i++) {
final EdgeNode edge = createEdge(prs, indices[i], indices[i + 1]);
edges.add(edge);
nodes.add(edge.getSource());
nodes.add(edge.getDestination());
}
when(lineage.getEdges()).thenReturn(edges);
when(lineage.getNodes()).thenReturn(new ArrayList<>(nodes));
return lineage;
}
use of org.apache.nifi.provenance.lineage.ComputeLineageResult in project nifi by apache.
the class ITReportLineageToAtlas method testS2SDirect.
/**
* A client NiFi gets FlowFiles from a remote output port and sends it to a remote input port without doing anything.
*/
@Test
public void testS2SDirect() throws Exception {
final TestConfiguration tc = new TestConfiguration("S2SDirect");
final ProvenanceRecords prs = tc.provenanceRecords;
prs.add(pr("d73d9115-b987-4ffc", "Remote Output Port", RECEIVE, "http://nifi.example.com:8080/nifi-api/data-transfer/output-ports" + "/015f1040-dcd7-17bd-5c1f-e31afe0a09a4/transactions/tx-1/flow-files"));
prs.add((pr("a4f14247-89aa-4e6c", "Remote Input Port", SEND, "http://nifi.example.com:8080/nifi-api/data-transfer/input-ports" + "/015f101e-dcd7-17bd-8899-1a723733521a/transactions/tx-2/flow-files")));
Map<Long, ComputeLineageResult> lineages = tc.lineageResults;
// Received from remote output port, then sent it via remote input port
lineages.put(1L, createLineage(prs, 0, 1));
test(tc);
waitNotificationsGetDelivered();
final Lineage lineage = getLineage();
final Node flow = lineage.findNode("nifi_flow", "S2SDirect", "S2SDirect@example");
final Node remoteOutputPort = lineage.findNode("nifi_output_port", "output", "015f1040-dcd7-17bd-5c1f-e31afe0a09a4@example");
final Node remoteOutputPortP = lineage.findNode("nifi_flow_path", "Remote Output Port", "d73d9115-b987-4ffc");
final Node remoteInputPortQ = lineage.findNode("nifi_queue", "queue", "a4f14247-89aa-4e6c");
final Node remoteInputPortP = lineage.findNode("nifi_flow_path", "Remote Input Port", "a4f14247-89aa-4e6c");
final Node remoteInputPort = lineage.findNode("nifi_input_port", "input", "015f101e-dcd7-17bd-8899-1a723733521a@example");
// Even if there is no Processor, lineage can be reported using root flow_path.
lineage.assertLink(flow, remoteOutputPortP);
lineage.assertLink(remoteOutputPort, remoteOutputPortP);
lineage.assertLink(remoteOutputPortP, remoteInputPortQ);
lineage.assertLink(remoteInputPortQ, remoteInputPortP);
lineage.assertLink(remoteInputPortP, remoteInputPort);
}
use of org.apache.nifi.provenance.lineage.ComputeLineageResult in project nifi by apache.
the class CompleteFlowPathLineage method extractLineagePaths.
private void extractLineagePaths(AnalysisContext context, Map<String, List<LineageNode>> lineageTree, LineagePath lineagePath, ProvenanceEventRecord lastEvent) {
lineagePath.getEvents().add(lastEvent);
List<LineageNode> parentEvents = findParentEvents(lineageTree, lastEvent);
final boolean createSeparateParentPath = lineagePath.shouldCreateSeparatePath(lastEvent.getEventType());
if (createSeparateParentPath && (parentEvents == null || parentEvents.isEmpty())) {
// Try expanding the lineage.
// This is for the FlowFiles those are FORKed (or JOINed ... etc) other FlowFile(s).
// FlowFiles routed to 'original' may have these event types, too, however they have parents fetched together.
// For example, with these inputs: CREATE(F1), FORK (F1 -> F2, F3), DROP(F1), SEND (F2), SEND(F3), DROP(F2), DROP(F3)
// Then when DROP(F1) is queried, FORK(F1) and CREATE(F1) are returned.
// For DROP(F2), SEND(F2) and FORK(F2) are returned.
// For DROP(F3), SEND(F3) and FORK(F3) are returned.
// In this case, FORK(F2) and FORK(F3) have to query their parents again, to get CREATE(F1).
final ComputeLineageResult joinedParents = context.findParents(lastEvent.getEventId());
analyzeLineageTree(joinedParents, lineageTree);
parentEvents = findParentEvents(lineageTree, lastEvent);
}
if (parentEvents == null || parentEvents.isEmpty()) {
logger.debug("{} does not have any parent, stop extracting lineage path.", lastEvent);
return;
}
if (createSeparateParentPath) {
// Treat those as separated lineage_path
parentEvents.stream().map(parentEvent -> context.getProvenanceEvent(Long.parseLong(parentEvent.getIdentifier()))).filter(Objects::nonNull).forEach(parent -> {
final LineagePath parentPath = new LineagePath();
lineagePath.getParents().add(parentPath);
extractLineagePaths(context, lineageTree, parentPath, parent);
});
} else {
// Simply traverse upwards.
if (parentEvents.size() > 1) {
throw new IllegalStateException(String.format("Having more than 1 parents for event type %s" + " is not expected. Should ask NiFi developer for investigation. %s", lastEvent.getEventType(), lastEvent));
}
final ProvenanceEventRecord parentEvent = context.getProvenanceEvent(Long.parseLong(parentEvents.get(0).getIdentifier()));
if (parentEvent != null) {
extractLineagePaths(context, lineageTree, lineagePath, parentEvent);
}
}
}
use of org.apache.nifi.provenance.lineage.ComputeLineageResult in project nifi by apache.
the class SimpleFlowPathLineage method findPreviousProvenanceEvent.
private ProvenanceEventRecord findPreviousProvenanceEvent(AnalysisContext context, ProvenanceEventRecord event) {
final ComputeLineageResult lineage = context.queryLineage(event.getEventId());
if (lineage == null) {
logger.warn("Lineage was not found: {}", new Object[] { event });
return null;
}
// If no previous provenance node found due to expired or other reasons, just log a warning msg and do nothing.
final LineageNode previousProvenanceNode = traverseLineage(lineage, String.valueOf(event.getEventId()));
if (previousProvenanceNode == null) {
logger.warn("Traverse lineage could not find any preceding provenance event node: {}", new Object[] { event });
return null;
}
final long previousEventId = Long.parseLong(previousProvenanceNode.getIdentifier());
return context.getProvenanceEvent(previousEventId);
}
Aggregations