Search in sources :

Example 21 with ProvenanceEventRecord

use of org.apache.nifi.provenance.ProvenanceEventRecord in project nifi by apache.

the class TestHive2JDBC method testTableLineageWithDefaultTableNameWithConnectionParams.

/**
 * A Hive connection URL can have connection strings delimited by semicolons.
 */
@Test
public void testTableLineageWithDefaultTableNameWithConnectionParams() {
    final String processorName = "PutHiveQL";
    final String transitUri = "jdbc:hive2://0.example.com:10000;transportMode=http;httpPath=cliservice";
    final ProvenanceEventRecord record = Mockito.mock(ProvenanceEventRecord.class);
    when(record.getComponentType()).thenReturn(processorName);
    when(record.getTransitUri()).thenReturn(transitUri);
    when(record.getEventType()).thenReturn(ProvenanceEventType.SEND);
    // E.g. insert into databaseB.tableB1 select something from tableA1 a1 inner join tableA2 a2 where a1.id = a2.id
    when(record.getAttribute(ATTR_INPUT_TABLES)).thenReturn("tableA1, tableA2");
    when(record.getAttribute(ATTR_OUTPUT_TABLES)).thenReturn("databaseB.tableB1");
    final ClusterResolvers clusterResolvers = Mockito.mock(ClusterResolvers.class);
    when(clusterResolvers.fromHostNames(matches(".+\\.example\\.com"))).thenReturn("cluster1");
    final AnalysisContext context = Mockito.mock(AnalysisContext.class);
    when(context.getClusterResolver()).thenReturn(clusterResolvers);
    final NiFiProvenanceEventAnalyzer analyzer = NiFiProvenanceEventAnalyzerFactory.getAnalyzer(processorName, transitUri, record.getEventType());
    assertNotNull(analyzer);
    final DataSetRefs refs = analyzer.analyze(context, record);
    assertEquals(2, refs.getInputs().size());
    // QualifiedName : Name
    final Map<String, String> expectedInputRefs = new HashMap<>();
    expectedInputRefs.put("default.tableA1@cluster1", "tableA1");
    expectedInputRefs.put("default.tableA2@cluster1", "tableA2");
    for (Referenceable ref : refs.getInputs()) {
        final String qName = (String) ref.get(ATTR_QUALIFIED_NAME);
        assertTrue(expectedInputRefs.containsKey(qName));
        assertEquals(expectedInputRefs.get(qName), ref.get(ATTR_NAME));
    }
    assertEquals(1, refs.getOutputs().size());
    Referenceable ref = refs.getOutputs().iterator().next();
    assertEquals("hive_table", ref.getTypeName());
    assertEquals("tableB1", ref.get(ATTR_NAME));
    assertEquals("databaseB.tableB1@cluster1", ref.get(ATTR_QUALIFIED_NAME));
}
Also used : Referenceable(org.apache.atlas.typesystem.Referenceable) ClusterResolvers(org.apache.nifi.atlas.resolver.ClusterResolvers) HashMap(java.util.HashMap) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) DataSetRefs(org.apache.nifi.atlas.provenance.DataSetRefs) AnalysisContext(org.apache.nifi.atlas.provenance.AnalysisContext) NiFiProvenanceEventAnalyzer(org.apache.nifi.atlas.provenance.NiFiProvenanceEventAnalyzer) Test(org.junit.Test)

Example 22 with ProvenanceEventRecord

use of org.apache.nifi.provenance.ProvenanceEventRecord in project nifi by apache.

the class TestHive2JDBC method testTableLineageWithZookeeperDiscovery.

/**
 * Hive connection URL can have multiple zookeeper host ports
 * and multiple parameters delimited with semicolons.
 * Database name can be omitted.
 */
@Test
public void testTableLineageWithZookeeperDiscovery() {
    final String processorName = "PutHiveQL";
    final String transitUri = "jdbc:hive2://0.example.com:2181,1.example.com:2181,2.example.com:2181/;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2";
    final ProvenanceEventRecord record = Mockito.mock(ProvenanceEventRecord.class);
    when(record.getComponentType()).thenReturn(processorName);
    when(record.getTransitUri()).thenReturn(transitUri);
    when(record.getEventType()).thenReturn(ProvenanceEventType.SEND);
    // E.g. insert into databaseB.tableB1 select something from tableA1 a1 inner join tableA2 a2 where a1.id = a2.id
    when(record.getAttribute(ATTR_INPUT_TABLES)).thenReturn("tableA1, tableA2");
    when(record.getAttribute(ATTR_OUTPUT_TABLES)).thenReturn("databaseB.tableB1");
    final ClusterResolvers clusterResolvers = Mockito.mock(ClusterResolvers.class);
    when(clusterResolvers.fromHostNames(eq("0.example.com"), eq("1.example.com"), eq("2.example.com"))).thenReturn("cluster1");
    final AnalysisContext context = Mockito.mock(AnalysisContext.class);
    when(context.getClusterResolver()).thenReturn(clusterResolvers);
    final NiFiProvenanceEventAnalyzer analyzer = NiFiProvenanceEventAnalyzerFactory.getAnalyzer(processorName, transitUri, record.getEventType());
    assertNotNull(analyzer);
    final DataSetRefs refs = analyzer.analyze(context, record);
    assertEquals(2, refs.getInputs().size());
    // QualifiedName : Name
    final Map<String, String> expectedInputRefs = new HashMap<>();
    expectedInputRefs.put("default.tableA1@cluster1", "tableA1");
    expectedInputRefs.put("default.tableA2@cluster1", "tableA2");
    for (Referenceable ref : refs.getInputs()) {
        final String qName = (String) ref.get(ATTR_QUALIFIED_NAME);
        assertTrue(expectedInputRefs.containsKey(qName));
        assertEquals(expectedInputRefs.get(qName), ref.get(ATTR_NAME));
    }
    assertEquals(1, refs.getOutputs().size());
    Referenceable ref = refs.getOutputs().iterator().next();
    assertEquals("hive_table", ref.getTypeName());
    assertEquals("tableB1", ref.get(ATTR_NAME));
    assertEquals("databaseB.tableB1@cluster1", ref.get(ATTR_QUALIFIED_NAME));
}
Also used : Referenceable(org.apache.atlas.typesystem.Referenceable) ClusterResolvers(org.apache.nifi.atlas.resolver.ClusterResolvers) HashMap(java.util.HashMap) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) DataSetRefs(org.apache.nifi.atlas.provenance.DataSetRefs) AnalysisContext(org.apache.nifi.atlas.provenance.AnalysisContext) NiFiProvenanceEventAnalyzer(org.apache.nifi.atlas.provenance.NiFiProvenanceEventAnalyzer) Test(org.junit.Test)

Example 23 with ProvenanceEventRecord

use of org.apache.nifi.provenance.ProvenanceEventRecord in project nifi by apache.

the class TestHive2JDBC method testTableLineageWithZookeeperDiscoverySpecificDatabase.

/**
 * Hive connection URL using zookeeper and database name.
 */
@Test
public void testTableLineageWithZookeeperDiscoverySpecificDatabase() {
    final String processorName = "PutHiveQL";
    final String transitUri = "jdbc:hive2://0.example.com:2181,1.example.com:2181/some_database;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2";
    final ProvenanceEventRecord record = Mockito.mock(ProvenanceEventRecord.class);
    when(record.getComponentType()).thenReturn(processorName);
    when(record.getTransitUri()).thenReturn(transitUri);
    when(record.getEventType()).thenReturn(ProvenanceEventType.SEND);
    // E.g. insert into databaseB.tableB1 select something from tableA1 a1 inner join tableA2 a2 where a1.id = a2.id
    when(record.getAttribute(ATTR_INPUT_TABLES)).thenReturn("tableA1, tableA2");
    when(record.getAttribute(ATTR_OUTPUT_TABLES)).thenReturn("databaseB.tableB1");
    final ClusterResolvers clusterResolvers = Mockito.mock(ClusterResolvers.class);
    when(clusterResolvers.fromHostNames(eq("0.example.com"), eq("1.example.com"))).thenReturn("cluster1");
    final AnalysisContext context = Mockito.mock(AnalysisContext.class);
    when(context.getClusterResolver()).thenReturn(clusterResolvers);
    final NiFiProvenanceEventAnalyzer analyzer = NiFiProvenanceEventAnalyzerFactory.getAnalyzer(processorName, transitUri, record.getEventType());
    assertNotNull(analyzer);
    final DataSetRefs refs = analyzer.analyze(context, record);
    assertEquals(2, refs.getInputs().size());
    // QualifiedName : Name
    final Map<String, String> expectedInputRefs = new HashMap<>();
    expectedInputRefs.put("some_database.tableA1@cluster1", "tableA1");
    expectedInputRefs.put("some_database.tableA2@cluster1", "tableA2");
    for (Referenceable ref : refs.getInputs()) {
        final String qName = (String) ref.get(ATTR_QUALIFIED_NAME);
        assertTrue(expectedInputRefs.containsKey(qName));
        assertEquals(expectedInputRefs.get(qName), ref.get(ATTR_NAME));
    }
    assertEquals(1, refs.getOutputs().size());
    Referenceable ref = refs.getOutputs().iterator().next();
    assertEquals("hive_table", ref.getTypeName());
    assertEquals("tableB1", ref.get(ATTR_NAME));
    assertEquals("databaseB.tableB1@cluster1", ref.get(ATTR_QUALIFIED_NAME));
}
Also used : Referenceable(org.apache.atlas.typesystem.Referenceable) ClusterResolvers(org.apache.nifi.atlas.resolver.ClusterResolvers) HashMap(java.util.HashMap) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) DataSetRefs(org.apache.nifi.atlas.provenance.DataSetRefs) AnalysisContext(org.apache.nifi.atlas.provenance.AnalysisContext) NiFiProvenanceEventAnalyzer(org.apache.nifi.atlas.provenance.NiFiProvenanceEventAnalyzer) Test(org.junit.Test)

Example 24 with ProvenanceEventRecord

use of org.apache.nifi.provenance.ProvenanceEventRecord in project nifi by apache.

the class TestHive2JDBC method testDatabaseLineage.

/**
 * If a provenance event does not have table name attributes,
 * then a database lineage should be created.
 */
@Test
public void testDatabaseLineage() {
    final String processorName = "PutHiveQL";
    final String transitUri = "jdbc:hive2://0.example.com:10000/databaseA";
    final ProvenanceEventRecord record = Mockito.mock(ProvenanceEventRecord.class);
    when(record.getComponentType()).thenReturn(processorName);
    when(record.getTransitUri()).thenReturn(transitUri);
    when(record.getEventType()).thenReturn(ProvenanceEventType.SEND);
    final ClusterResolvers clusterResolvers = Mockito.mock(ClusterResolvers.class);
    when(clusterResolvers.fromHostNames(matches(".+\\.example\\.com"))).thenReturn("cluster1");
    final AnalysisContext context = Mockito.mock(AnalysisContext.class);
    when(context.getClusterResolver()).thenReturn(clusterResolvers);
    final NiFiProvenanceEventAnalyzer analyzer = NiFiProvenanceEventAnalyzerFactory.getAnalyzer(processorName, transitUri, record.getEventType());
    assertNotNull(analyzer);
    final DataSetRefs refs = analyzer.analyze(context, record);
    assertEquals(0, refs.getInputs().size());
    assertEquals(1, refs.getOutputs().size());
    Referenceable ref = refs.getOutputs().iterator().next();
    assertEquals("hive_db", ref.getTypeName());
    assertEquals("databaseA", ref.get(ATTR_NAME));
    assertEquals("databaseA@cluster1", ref.get(ATTR_QUALIFIED_NAME));
}
Also used : Referenceable(org.apache.atlas.typesystem.Referenceable) ClusterResolvers(org.apache.nifi.atlas.resolver.ClusterResolvers) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) DataSetRefs(org.apache.nifi.atlas.provenance.DataSetRefs) AnalysisContext(org.apache.nifi.atlas.provenance.AnalysisContext) NiFiProvenanceEventAnalyzer(org.apache.nifi.atlas.provenance.NiFiProvenanceEventAnalyzer) Test(org.junit.Test)

Example 25 with ProvenanceEventRecord

use of org.apache.nifi.provenance.ProvenanceEventRecord in project nifi by apache.

the class TestKafkaTopic method testPublishKafka.

@Test
public void testPublishKafka() {
    final String processorName = "PublishKafka";
    final String transitUri = "PLAINTEXT://0.example.com:6667/topicA";
    final ProvenanceEventRecord record = Mockito.mock(ProvenanceEventRecord.class);
    when(record.getComponentType()).thenReturn(processorName);
    when(record.getTransitUri()).thenReturn(transitUri);
    when(record.getEventType()).thenReturn(ProvenanceEventType.SEND);
    final ClusterResolvers clusterResolvers = Mockito.mock(ClusterResolvers.class);
    when(clusterResolvers.fromHostNames(matches(".+\\.example\\.com"))).thenReturn("cluster1");
    final AnalysisContext context = Mockito.mock(AnalysisContext.class);
    when(context.getClusterResolver()).thenReturn(clusterResolvers);
    final NiFiProvenanceEventAnalyzer analyzer = NiFiProvenanceEventAnalyzerFactory.getAnalyzer(processorName, transitUri, record.getEventType());
    assertNotNull(analyzer);
    final DataSetRefs refs = analyzer.analyze(context, record);
    assertEquals(0, refs.getInputs().size());
    assertEquals(1, refs.getOutputs().size());
    Referenceable ref = refs.getOutputs().iterator().next();
    assertEquals("topicA", ref.get(ATTR_NAME));
    assertEquals("topicA", ref.get("topic"));
    assertEquals("topicA@cluster1", ref.get(ATTR_QUALIFIED_NAME));
}
Also used : Referenceable(org.apache.atlas.typesystem.Referenceable) ClusterResolvers(org.apache.nifi.atlas.resolver.ClusterResolvers) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) DataSetRefs(org.apache.nifi.atlas.provenance.DataSetRefs) AnalysisContext(org.apache.nifi.atlas.provenance.AnalysisContext) NiFiProvenanceEventAnalyzer(org.apache.nifi.atlas.provenance.NiFiProvenanceEventAnalyzer) Test(org.junit.Test)

Aggregations

ProvenanceEventRecord (org.apache.nifi.provenance.ProvenanceEventRecord)194 Test (org.junit.Test)118 StandardProvenanceEventRecord (org.apache.nifi.provenance.StandardProvenanceEventRecord)69 HashMap (java.util.HashMap)57 MockFlowFile (org.apache.nifi.util.MockFlowFile)52 ArrayList (java.util.ArrayList)36 IOException (java.io.IOException)32 TestRunner (org.apache.nifi.util.TestRunner)24 FlowFileHandlingException (org.apache.nifi.processor.exception.FlowFileHandlingException)23 DataSetRefs (org.apache.nifi.atlas.provenance.DataSetRefs)21 AnalysisContext (org.apache.nifi.atlas.provenance.AnalysisContext)20 Referenceable (org.apache.atlas.typesystem.Referenceable)19 NiFiProvenanceEventAnalyzer (org.apache.nifi.atlas.provenance.NiFiProvenanceEventAnalyzer)18 ClusterResolvers (org.apache.nifi.atlas.resolver.ClusterResolvers)18 RepositoryConfiguration (org.apache.nifi.provenance.RepositoryConfiguration)17 File (java.io.File)16 List (java.util.List)16 AtomicLong (java.util.concurrent.atomic.AtomicLong)16 PropertyDescriptor (org.apache.nifi.components.PropertyDescriptor)15 Map (java.util.Map)12