Search in sources :

Example 21 with DataSetRefs

use of org.apache.nifi.atlas.provenance.DataSetRefs in project nifi by apache.

the class TestHive2JDBC method testTableLineageWithDefaultTableName.

/**
 * If a provenance event has table name attributes, then table lineages can be created.
 * In this case, if its transit URI does not contain database name, use 'default'.
 */
@Test
public void testTableLineageWithDefaultTableName() {
    final String processorName = "PutHiveQL";
    final String transitUri = "jdbc:hive2://0.example.com:10000";
    final ProvenanceEventRecord record = Mockito.mock(ProvenanceEventRecord.class);
    when(record.getComponentType()).thenReturn(processorName);
    when(record.getTransitUri()).thenReturn(transitUri);
    when(record.getEventType()).thenReturn(ProvenanceEventType.SEND);
    // E.g. insert into databaseB.tableB1 select something from tableA1 a1 inner join tableA2 a2 where a1.id = a2.id
    when(record.getAttribute(ATTR_INPUT_TABLES)).thenReturn("tableA1, tableA2");
    when(record.getAttribute(ATTR_OUTPUT_TABLES)).thenReturn("databaseB.tableB1");
    final ClusterResolvers clusterResolvers = Mockito.mock(ClusterResolvers.class);
    when(clusterResolvers.fromHostNames(matches(".+\\.example\\.com"))).thenReturn("cluster1");
    final AnalysisContext context = Mockito.mock(AnalysisContext.class);
    when(context.getClusterResolver()).thenReturn(clusterResolvers);
    final NiFiProvenanceEventAnalyzer analyzer = NiFiProvenanceEventAnalyzerFactory.getAnalyzer(processorName, transitUri, record.getEventType());
    assertNotNull(analyzer);
    final DataSetRefs refs = analyzer.analyze(context, record);
    assertEquals(2, refs.getInputs().size());
    // QualifiedName : Name
    final Map<String, String> expectedInputRefs = new HashMap<>();
    expectedInputRefs.put("default.tableA1@cluster1", "tableA1");
    expectedInputRefs.put("default.tableA2@cluster1", "tableA2");
    for (Referenceable ref : refs.getInputs()) {
        final String qName = (String) ref.get(ATTR_QUALIFIED_NAME);
        assertTrue(expectedInputRefs.containsKey(qName));
        assertEquals(expectedInputRefs.get(qName), ref.get(ATTR_NAME));
    }
    assertEquals(1, refs.getOutputs().size());
    Referenceable ref = refs.getOutputs().iterator().next();
    assertEquals("hive_table", ref.getTypeName());
    assertEquals("tableB1", ref.get(ATTR_NAME));
    assertEquals("databaseB.tableB1@cluster1", ref.get(ATTR_QUALIFIED_NAME));
}
Also used : Referenceable(org.apache.atlas.typesystem.Referenceable) ClusterResolvers(org.apache.nifi.atlas.resolver.ClusterResolvers) HashMap(java.util.HashMap) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) DataSetRefs(org.apache.nifi.atlas.provenance.DataSetRefs) AnalysisContext(org.apache.nifi.atlas.provenance.AnalysisContext) NiFiProvenanceEventAnalyzer(org.apache.nifi.atlas.provenance.NiFiProvenanceEventAnalyzer) Test(org.junit.Test)

Example 22 with DataSetRefs

use of org.apache.nifi.atlas.provenance.DataSetRefs in project nifi by apache.

the class TestKafkaTopic method testPublishKafkaMultipleBrokers.

@Test
public void testPublishKafkaMultipleBrokers() {
    final String processorName = "PublishKafka";
    final String transitUri = "PLAINTEXT://0.example.com:6667,1.example.com:6667/topicA";
    final ProvenanceEventRecord record = Mockito.mock(ProvenanceEventRecord.class);
    when(record.getComponentType()).thenReturn(processorName);
    when(record.getTransitUri()).thenReturn(transitUri);
    when(record.getEventType()).thenReturn(ProvenanceEventType.SEND);
    final ClusterResolvers clusterResolvers = Mockito.mock(ClusterResolvers.class);
    when(clusterResolvers.fromHostNames(eq("0.example.com"), eq("1.example.com"))).thenReturn("cluster1");
    final AnalysisContext context = Mockito.mock(AnalysisContext.class);
    when(context.getClusterResolver()).thenReturn(clusterResolvers);
    final NiFiProvenanceEventAnalyzer analyzer = NiFiProvenanceEventAnalyzerFactory.getAnalyzer(processorName, transitUri, record.getEventType());
    assertNotNull(analyzer);
    final DataSetRefs refs = analyzer.analyze(context, record);
    assertEquals(0, refs.getInputs().size());
    assertEquals(1, refs.getOutputs().size());
    Referenceable ref = refs.getOutputs().iterator().next();
    assertEquals("topicA", ref.get(ATTR_NAME));
    assertEquals("topicA", ref.get("topic"));
    assertEquals("topicA@cluster1", ref.get(ATTR_QUALIFIED_NAME));
}
Also used : Referenceable(org.apache.atlas.typesystem.Referenceable) ClusterResolvers(org.apache.nifi.atlas.resolver.ClusterResolvers) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) DataSetRefs(org.apache.nifi.atlas.provenance.DataSetRefs) AnalysisContext(org.apache.nifi.atlas.provenance.AnalysisContext) NiFiProvenanceEventAnalyzer(org.apache.nifi.atlas.provenance.NiFiProvenanceEventAnalyzer) Test(org.junit.Test)

Example 23 with DataSetRefs

use of org.apache.nifi.atlas.provenance.DataSetRefs in project nifi by apache.

the class TestNiFiRemotePort method testRemoteOutputPort.

@Test
public void testRemoteOutputPort() {
    final String componentType = "Remote Output Port";
    final String transitUri = "http://0.example.com:8080/nifi-api/data-transfer/output-ports/port-guid/transactions/tx-guid/flow-files";
    final ProvenanceEventRecord record = Mockito.mock(ProvenanceEventRecord.class);
    when(record.getComponentId()).thenReturn("port-guid");
    when(record.getComponentType()).thenReturn(componentType);
    when(record.getTransitUri()).thenReturn(transitUri);
    when(record.getEventType()).thenReturn(ProvenanceEventType.RECEIVE);
    final ClusterResolvers clusterResolvers = Mockito.mock(ClusterResolvers.class);
    when(clusterResolvers.fromHostNames(matches(".+\\.example\\.com"))).thenReturn("cluster1");
    final List<ConnectionStatus> connections = new ArrayList<>();
    final ConnectionStatus connection = new ConnectionStatus();
    connection.setSourceId("port-guid");
    connection.setSourceName("outputPortA");
    connections.add(connection);
    final AnalysisContext context = Mockito.mock(AnalysisContext.class);
    when(context.getClusterResolver()).thenReturn(clusterResolvers);
    when(context.findConnectionFrom(matches("port-guid"))).thenReturn(connections);
    final NiFiProvenanceEventAnalyzer analyzer = NiFiProvenanceEventAnalyzerFactory.getAnalyzer(componentType, transitUri, record.getEventType());
    assertNotNull(analyzer);
    final DataSetRefs refs = analyzer.analyze(context, record);
    assertEquals(1, refs.getInputs().size());
    assertEquals(0, refs.getOutputs().size());
    Referenceable ref = refs.getInputs().iterator().next();
    assertEquals(TYPE_NIFI_OUTPUT_PORT, ref.getTypeName());
    assertEquals("outputPortA", ref.get(ATTR_NAME));
    assertEquals("port-guid@cluster1", ref.get(ATTR_QUALIFIED_NAME));
}
Also used : Referenceable(org.apache.atlas.typesystem.Referenceable) ClusterResolvers(org.apache.nifi.atlas.resolver.ClusterResolvers) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) DataSetRefs(org.apache.nifi.atlas.provenance.DataSetRefs) ArrayList(java.util.ArrayList) AnalysisContext(org.apache.nifi.atlas.provenance.AnalysisContext) ConnectionStatus(org.apache.nifi.controller.status.ConnectionStatus) NiFiProvenanceEventAnalyzer(org.apache.nifi.atlas.provenance.NiFiProvenanceEventAnalyzer) Test(org.junit.Test)

Example 24 with DataSetRefs

use of org.apache.nifi.atlas.provenance.DataSetRefs in project nifi by apache.

the class TestPutHiveStreaming method testTableLineage.

@Test
public void testTableLineage() {
    final String processorName = "PutHiveStreaming";
    final String transitUri = "thrift://0.example.com:9083";
    final ProvenanceEventRecord record = Mockito.mock(ProvenanceEventRecord.class);
    when(record.getComponentType()).thenReturn(processorName);
    when(record.getTransitUri()).thenReturn(transitUri);
    when(record.getEventType()).thenReturn(ProvenanceEventType.SEND);
    when(record.getAttribute(ATTR_OUTPUT_TABLES)).thenReturn("databaseA.tableA");
    final ClusterResolvers clusterResolvers = Mockito.mock(ClusterResolvers.class);
    when(clusterResolvers.fromHostNames(matches(".+\\.example\\.com"))).thenReturn("cluster1");
    final AnalysisContext context = Mockito.mock(AnalysisContext.class);
    when(context.getClusterResolver()).thenReturn(clusterResolvers);
    final NiFiProvenanceEventAnalyzer analyzer = NiFiProvenanceEventAnalyzerFactory.getAnalyzer(processorName, transitUri, record.getEventType());
    assertNotNull(analyzer);
    final DataSetRefs refs = analyzer.analyze(context, record);
    assertEquals(0, refs.getInputs().size());
    assertEquals(1, refs.getOutputs().size());
    Referenceable ref = refs.getOutputs().iterator().next();
    assertEquals("hive_table", ref.getTypeName());
    assertEquals("tableA", ref.get(ATTR_NAME));
    assertEquals("databaseA.tableA@cluster1", ref.get(ATTR_QUALIFIED_NAME));
}
Also used : Referenceable(org.apache.atlas.typesystem.Referenceable) ClusterResolvers(org.apache.nifi.atlas.resolver.ClusterResolvers) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) DataSetRefs(org.apache.nifi.atlas.provenance.DataSetRefs) AnalysisContext(org.apache.nifi.atlas.provenance.AnalysisContext) NiFiProvenanceEventAnalyzer(org.apache.nifi.atlas.provenance.NiFiProvenanceEventAnalyzer) Test(org.junit.Test)

Example 25 with DataSetRefs

use of org.apache.nifi.atlas.provenance.DataSetRefs in project nifi by apache.

the class TestUnknownDataSet method testSomethingHavingIncomingConnection.

@Test
public void testSomethingHavingIncomingConnection() {
    final String processorName = "SomeProcessor";
    final String processorId = "processor-1234";
    final ProvenanceEventRecord record = Mockito.mock(ProvenanceEventRecord.class);
    when(record.getComponentType()).thenReturn(processorName);
    when(record.getComponentId()).thenReturn(processorId);
    when(record.getEventType()).thenReturn(ProvenanceEventType.CREATE);
    final ClusterResolvers clusterResolvers = Mockito.mock(ClusterResolvers.class);
    when(clusterResolvers.fromHostNames(matches(".+\\.example\\.com"))).thenReturn("cluster1");
    final List<ConnectionStatus> connections = new ArrayList<>();
    // The content of connection is not important, just create an empty status.
    connections.add(new ConnectionStatus());
    final AnalysisContext context = Mockito.mock(AnalysisContext.class);
    when(context.getClusterResolver()).thenReturn(clusterResolvers);
    when(context.findConnectionTo(processorId)).thenReturn(connections);
    final NiFiProvenanceEventAnalyzer analyzer = NiFiProvenanceEventAnalyzerFactory.getAnalyzer(processorName, null, record.getEventType());
    assertNotNull(analyzer);
    final DataSetRefs refs = analyzer.analyze(context, record);
    assertNull("If the processor has incoming connections, no refs should be created", refs);
}
Also used : ClusterResolvers(org.apache.nifi.atlas.resolver.ClusterResolvers) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) DataSetRefs(org.apache.nifi.atlas.provenance.DataSetRefs) ArrayList(java.util.ArrayList) AnalysisContext(org.apache.nifi.atlas.provenance.AnalysisContext) ConnectionStatus(org.apache.nifi.controller.status.ConnectionStatus) NiFiProvenanceEventAnalyzer(org.apache.nifi.atlas.provenance.NiFiProvenanceEventAnalyzer) Test(org.junit.Test)

Aggregations

DataSetRefs (org.apache.nifi.atlas.provenance.DataSetRefs)26 Referenceable (org.apache.atlas.typesystem.Referenceable)22 ProvenanceEventRecord (org.apache.nifi.provenance.ProvenanceEventRecord)21 AnalysisContext (org.apache.nifi.atlas.provenance.AnalysisContext)19 NiFiProvenanceEventAnalyzer (org.apache.nifi.atlas.provenance.NiFiProvenanceEventAnalyzer)18 ClusterResolvers (org.apache.nifi.atlas.resolver.ClusterResolvers)18 Test (org.junit.Test)18 HashMap (java.util.HashMap)7 ArrayList (java.util.ArrayList)6 ConnectionStatus (org.apache.nifi.controller.status.ConnectionStatus)5 Tuple (org.apache.nifi.util.Tuple)4 NiFiFlowPath (org.apache.nifi.atlas.NiFiFlowPath)3 List (java.util.List)2 ComputeLineageResult (org.apache.nifi.provenance.lineage.ComputeLineageResult)2 URI (java.net.URI)1 StandardCharsets (java.nio.charset.StandardCharsets)1 Collections (java.util.Collections)1 Map (java.util.Map)1 Objects (java.util.Objects)1 Matcher (java.util.regex.Matcher)1