Search in sources :

Example 1 with DataSetRefs

use of org.apache.nifi.atlas.provenance.DataSetRefs in project nifi by apache.

the class TestHBaseTable method testHBaseTableWithMultipleZkHosts.

@Test
public void testHBaseTableWithMultipleZkHosts() {
    final String processorName = "FetchHBaseRow";
    final String transitUri = "hbase://zk0.example.com,zk2.example.com,zk3.example.com/tableA/rowB";
    final ProvenanceEventRecord record = Mockito.mock(ProvenanceEventRecord.class);
    when(record.getComponentType()).thenReturn(processorName);
    when(record.getTransitUri()).thenReturn(transitUri);
    when(record.getEventType()).thenReturn(ProvenanceEventType.FETCH);
    final ClusterResolvers clusterResolvers = Mockito.mock(ClusterResolvers.class);
    when(clusterResolvers.fromHostNames(matches("zk0.example.com"), matches("zk2.example.com"), matches("zk3.example.com"))).thenReturn("cluster1");
    final AnalysisContext context = Mockito.mock(AnalysisContext.class);
    when(context.getClusterResolver()).thenReturn(clusterResolvers);
    final NiFiProvenanceEventAnalyzer analyzer = NiFiProvenanceEventAnalyzerFactory.getAnalyzer(processorName, transitUri, record.getEventType());
    assertNotNull(analyzer);
    final DataSetRefs refs = analyzer.analyze(context, record);
    assertEquals(1, refs.getInputs().size());
    assertEquals(0, refs.getOutputs().size());
    Referenceable ref = refs.getInputs().iterator().next();
    assertEquals("hbase_table", ref.getTypeName());
    assertEquals("tableA", ref.get(ATTR_NAME));
    assertEquals("tableA@cluster1", ref.get(ATTR_QUALIFIED_NAME));
}
Also used : Referenceable(org.apache.atlas.typesystem.Referenceable) ClusterResolvers(org.apache.nifi.atlas.resolver.ClusterResolvers) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) DataSetRefs(org.apache.nifi.atlas.provenance.DataSetRefs) AnalysisContext(org.apache.nifi.atlas.provenance.AnalysisContext) NiFiProvenanceEventAnalyzer(org.apache.nifi.atlas.provenance.NiFiProvenanceEventAnalyzer) Test(org.junit.Test)

Example 2 with DataSetRefs

use of org.apache.nifi.atlas.provenance.DataSetRefs in project nifi by apache.

the class TestHive2JDBC method testTableLineageWithDefaultTableNameWithConnectionParams.

/**
 * A Hive connection URL can have connection strings delimited by semicolons.
 */
@Test
public void testTableLineageWithDefaultTableNameWithConnectionParams() {
    final String processorName = "PutHiveQL";
    final String transitUri = "jdbc:hive2://0.example.com:10000;transportMode=http;httpPath=cliservice";
    final ProvenanceEventRecord record = Mockito.mock(ProvenanceEventRecord.class);
    when(record.getComponentType()).thenReturn(processorName);
    when(record.getTransitUri()).thenReturn(transitUri);
    when(record.getEventType()).thenReturn(ProvenanceEventType.SEND);
    // E.g. insert into databaseB.tableB1 select something from tableA1 a1 inner join tableA2 a2 where a1.id = a2.id
    when(record.getAttribute(ATTR_INPUT_TABLES)).thenReturn("tableA1, tableA2");
    when(record.getAttribute(ATTR_OUTPUT_TABLES)).thenReturn("databaseB.tableB1");
    final ClusterResolvers clusterResolvers = Mockito.mock(ClusterResolvers.class);
    when(clusterResolvers.fromHostNames(matches(".+\\.example\\.com"))).thenReturn("cluster1");
    final AnalysisContext context = Mockito.mock(AnalysisContext.class);
    when(context.getClusterResolver()).thenReturn(clusterResolvers);
    final NiFiProvenanceEventAnalyzer analyzer = NiFiProvenanceEventAnalyzerFactory.getAnalyzer(processorName, transitUri, record.getEventType());
    assertNotNull(analyzer);
    final DataSetRefs refs = analyzer.analyze(context, record);
    assertEquals(2, refs.getInputs().size());
    // QualifiedName : Name
    final Map<String, String> expectedInputRefs = new HashMap<>();
    expectedInputRefs.put("default.tableA1@cluster1", "tableA1");
    expectedInputRefs.put("default.tableA2@cluster1", "tableA2");
    for (Referenceable ref : refs.getInputs()) {
        final String qName = (String) ref.get(ATTR_QUALIFIED_NAME);
        assertTrue(expectedInputRefs.containsKey(qName));
        assertEquals(expectedInputRefs.get(qName), ref.get(ATTR_NAME));
    }
    assertEquals(1, refs.getOutputs().size());
    Referenceable ref = refs.getOutputs().iterator().next();
    assertEquals("hive_table", ref.getTypeName());
    assertEquals("tableB1", ref.get(ATTR_NAME));
    assertEquals("databaseB.tableB1@cluster1", ref.get(ATTR_QUALIFIED_NAME));
}
Also used : Referenceable(org.apache.atlas.typesystem.Referenceable) ClusterResolvers(org.apache.nifi.atlas.resolver.ClusterResolvers) HashMap(java.util.HashMap) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) DataSetRefs(org.apache.nifi.atlas.provenance.DataSetRefs) AnalysisContext(org.apache.nifi.atlas.provenance.AnalysisContext) NiFiProvenanceEventAnalyzer(org.apache.nifi.atlas.provenance.NiFiProvenanceEventAnalyzer) Test(org.junit.Test)

Example 3 with DataSetRefs

use of org.apache.nifi.atlas.provenance.DataSetRefs in project nifi by apache.

the class TestHive2JDBC method testTableLineageWithZookeeperDiscovery.

/**
 * Hive connection URL can have multiple zookeeper host ports
 * and multiple parameters delimited with semicolons.
 * Database name can be omitted.
 */
@Test
public void testTableLineageWithZookeeperDiscovery() {
    final String processorName = "PutHiveQL";
    final String transitUri = "jdbc:hive2://0.example.com:2181,1.example.com:2181,2.example.com:2181/;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2";
    final ProvenanceEventRecord record = Mockito.mock(ProvenanceEventRecord.class);
    when(record.getComponentType()).thenReturn(processorName);
    when(record.getTransitUri()).thenReturn(transitUri);
    when(record.getEventType()).thenReturn(ProvenanceEventType.SEND);
    // E.g. insert into databaseB.tableB1 select something from tableA1 a1 inner join tableA2 a2 where a1.id = a2.id
    when(record.getAttribute(ATTR_INPUT_TABLES)).thenReturn("tableA1, tableA2");
    when(record.getAttribute(ATTR_OUTPUT_TABLES)).thenReturn("databaseB.tableB1");
    final ClusterResolvers clusterResolvers = Mockito.mock(ClusterResolvers.class);
    when(clusterResolvers.fromHostNames(eq("0.example.com"), eq("1.example.com"), eq("2.example.com"))).thenReturn("cluster1");
    final AnalysisContext context = Mockito.mock(AnalysisContext.class);
    when(context.getClusterResolver()).thenReturn(clusterResolvers);
    final NiFiProvenanceEventAnalyzer analyzer = NiFiProvenanceEventAnalyzerFactory.getAnalyzer(processorName, transitUri, record.getEventType());
    assertNotNull(analyzer);
    final DataSetRefs refs = analyzer.analyze(context, record);
    assertEquals(2, refs.getInputs().size());
    // QualifiedName : Name
    final Map<String, String> expectedInputRefs = new HashMap<>();
    expectedInputRefs.put("default.tableA1@cluster1", "tableA1");
    expectedInputRefs.put("default.tableA2@cluster1", "tableA2");
    for (Referenceable ref : refs.getInputs()) {
        final String qName = (String) ref.get(ATTR_QUALIFIED_NAME);
        assertTrue(expectedInputRefs.containsKey(qName));
        assertEquals(expectedInputRefs.get(qName), ref.get(ATTR_NAME));
    }
    assertEquals(1, refs.getOutputs().size());
    Referenceable ref = refs.getOutputs().iterator().next();
    assertEquals("hive_table", ref.getTypeName());
    assertEquals("tableB1", ref.get(ATTR_NAME));
    assertEquals("databaseB.tableB1@cluster1", ref.get(ATTR_QUALIFIED_NAME));
}
Also used : Referenceable(org.apache.atlas.typesystem.Referenceable) ClusterResolvers(org.apache.nifi.atlas.resolver.ClusterResolvers) HashMap(java.util.HashMap) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) DataSetRefs(org.apache.nifi.atlas.provenance.DataSetRefs) AnalysisContext(org.apache.nifi.atlas.provenance.AnalysisContext) NiFiProvenanceEventAnalyzer(org.apache.nifi.atlas.provenance.NiFiProvenanceEventAnalyzer) Test(org.junit.Test)

Example 4 with DataSetRefs

use of org.apache.nifi.atlas.provenance.DataSetRefs in project nifi by apache.

the class TestHive2JDBC method testTableLineageWithZookeeperDiscoverySpecificDatabase.

/**
 * Hive connection URL using zookeeper and database name.
 */
@Test
public void testTableLineageWithZookeeperDiscoverySpecificDatabase() {
    final String processorName = "PutHiveQL";
    final String transitUri = "jdbc:hive2://0.example.com:2181,1.example.com:2181/some_database;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2";
    final ProvenanceEventRecord record = Mockito.mock(ProvenanceEventRecord.class);
    when(record.getComponentType()).thenReturn(processorName);
    when(record.getTransitUri()).thenReturn(transitUri);
    when(record.getEventType()).thenReturn(ProvenanceEventType.SEND);
    // E.g. insert into databaseB.tableB1 select something from tableA1 a1 inner join tableA2 a2 where a1.id = a2.id
    when(record.getAttribute(ATTR_INPUT_TABLES)).thenReturn("tableA1, tableA2");
    when(record.getAttribute(ATTR_OUTPUT_TABLES)).thenReturn("databaseB.tableB1");
    final ClusterResolvers clusterResolvers = Mockito.mock(ClusterResolvers.class);
    when(clusterResolvers.fromHostNames(eq("0.example.com"), eq("1.example.com"))).thenReturn("cluster1");
    final AnalysisContext context = Mockito.mock(AnalysisContext.class);
    when(context.getClusterResolver()).thenReturn(clusterResolvers);
    final NiFiProvenanceEventAnalyzer analyzer = NiFiProvenanceEventAnalyzerFactory.getAnalyzer(processorName, transitUri, record.getEventType());
    assertNotNull(analyzer);
    final DataSetRefs refs = analyzer.analyze(context, record);
    assertEquals(2, refs.getInputs().size());
    // QualifiedName : Name
    final Map<String, String> expectedInputRefs = new HashMap<>();
    expectedInputRefs.put("some_database.tableA1@cluster1", "tableA1");
    expectedInputRefs.put("some_database.tableA2@cluster1", "tableA2");
    for (Referenceable ref : refs.getInputs()) {
        final String qName = (String) ref.get(ATTR_QUALIFIED_NAME);
        assertTrue(expectedInputRefs.containsKey(qName));
        assertEquals(expectedInputRefs.get(qName), ref.get(ATTR_NAME));
    }
    assertEquals(1, refs.getOutputs().size());
    Referenceable ref = refs.getOutputs().iterator().next();
    assertEquals("hive_table", ref.getTypeName());
    assertEquals("tableB1", ref.get(ATTR_NAME));
    assertEquals("databaseB.tableB1@cluster1", ref.get(ATTR_QUALIFIED_NAME));
}
Also used : Referenceable(org.apache.atlas.typesystem.Referenceable) ClusterResolvers(org.apache.nifi.atlas.resolver.ClusterResolvers) HashMap(java.util.HashMap) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) DataSetRefs(org.apache.nifi.atlas.provenance.DataSetRefs) AnalysisContext(org.apache.nifi.atlas.provenance.AnalysisContext) NiFiProvenanceEventAnalyzer(org.apache.nifi.atlas.provenance.NiFiProvenanceEventAnalyzer) Test(org.junit.Test)

Example 5 with DataSetRefs

use of org.apache.nifi.atlas.provenance.DataSetRefs in project nifi by apache.

the class TestHive2JDBC method testDatabaseLineage.

/**
 * If a provenance event does not have table name attributes,
 * then a database lineage should be created.
 */
@Test
public void testDatabaseLineage() {
    final String processorName = "PutHiveQL";
    final String transitUri = "jdbc:hive2://0.example.com:10000/databaseA";
    final ProvenanceEventRecord record = Mockito.mock(ProvenanceEventRecord.class);
    when(record.getComponentType()).thenReturn(processorName);
    when(record.getTransitUri()).thenReturn(transitUri);
    when(record.getEventType()).thenReturn(ProvenanceEventType.SEND);
    final ClusterResolvers clusterResolvers = Mockito.mock(ClusterResolvers.class);
    when(clusterResolvers.fromHostNames(matches(".+\\.example\\.com"))).thenReturn("cluster1");
    final AnalysisContext context = Mockito.mock(AnalysisContext.class);
    when(context.getClusterResolver()).thenReturn(clusterResolvers);
    final NiFiProvenanceEventAnalyzer analyzer = NiFiProvenanceEventAnalyzerFactory.getAnalyzer(processorName, transitUri, record.getEventType());
    assertNotNull(analyzer);
    final DataSetRefs refs = analyzer.analyze(context, record);
    assertEquals(0, refs.getInputs().size());
    assertEquals(1, refs.getOutputs().size());
    Referenceable ref = refs.getOutputs().iterator().next();
    assertEquals("hive_db", ref.getTypeName());
    assertEquals("databaseA", ref.get(ATTR_NAME));
    assertEquals("databaseA@cluster1", ref.get(ATTR_QUALIFIED_NAME));
}
Also used : Referenceable(org.apache.atlas.typesystem.Referenceable) ClusterResolvers(org.apache.nifi.atlas.resolver.ClusterResolvers) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) DataSetRefs(org.apache.nifi.atlas.provenance.DataSetRefs) AnalysisContext(org.apache.nifi.atlas.provenance.AnalysisContext) NiFiProvenanceEventAnalyzer(org.apache.nifi.atlas.provenance.NiFiProvenanceEventAnalyzer) Test(org.junit.Test)

Aggregations

DataSetRefs (org.apache.nifi.atlas.provenance.DataSetRefs)26 Referenceable (org.apache.atlas.typesystem.Referenceable)22 ProvenanceEventRecord (org.apache.nifi.provenance.ProvenanceEventRecord)21 AnalysisContext (org.apache.nifi.atlas.provenance.AnalysisContext)19 NiFiProvenanceEventAnalyzer (org.apache.nifi.atlas.provenance.NiFiProvenanceEventAnalyzer)18 ClusterResolvers (org.apache.nifi.atlas.resolver.ClusterResolvers)18 Test (org.junit.Test)18 HashMap (java.util.HashMap)7 ArrayList (java.util.ArrayList)6 ConnectionStatus (org.apache.nifi.controller.status.ConnectionStatus)5 Tuple (org.apache.nifi.util.Tuple)4 NiFiFlowPath (org.apache.nifi.atlas.NiFiFlowPath)3 List (java.util.List)2 ComputeLineageResult (org.apache.nifi.provenance.lineage.ComputeLineageResult)2 URI (java.net.URI)1 StandardCharsets (java.nio.charset.StandardCharsets)1 Collections (java.util.Collections)1 Map (java.util.Map)1 Objects (java.util.Objects)1 Matcher (java.util.regex.Matcher)1