Search in sources :

Example 31 with WorkUnit

use of org.apache.gobblin.source.workunit.WorkUnit in project incubator-gobblin by apache.

the class HiveSourceTest method testGetWorkUnitsForTable.

@Test
public void testGetWorkUnitsForTable() throws Exception {
    String dbName = "testdb2";
    String tableName = "testtable2";
    String tableSdLoc = "/tmp/testtable2";
    this.hiveMetastoreTestUtils.getLocalMetastoreClient().dropDatabase(dbName, false, true, true);
    SourceState testState = getTestState(dbName);
    this.hiveMetastoreTestUtils.createTestAvroTable(dbName, tableName, tableSdLoc, Optional.<String>absent());
    List<WorkUnit> workUnits = hiveSource.getWorkunits(testState);
    // One workunit for the table, no dummy workunits
    Assert.assertEquals(workUnits.size(), 1);
    WorkUnit wu = workUnits.get(0);
    HiveWorkUnit hwu = new HiveWorkUnit(wu);
    Assert.assertEquals(hwu.getHiveDataset().getDbAndTable().getDb(), dbName);
    Assert.assertEquals(hwu.getHiveDataset().getDbAndTable().getTable(), tableName);
    Assert.assertEquals(hwu.getTableSchemaUrl(), new Path("/tmp/dummy"));
}
Also used : Path(org.apache.hadoop.fs.Path) SourceState(org.apache.gobblin.configuration.SourceState) HiveWorkUnit(org.apache.gobblin.data.management.conversion.hive.source.HiveWorkUnit) HiveWorkUnit(org.apache.gobblin.data.management.conversion.hive.source.HiveWorkUnit) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) Test(org.testng.annotations.Test)

Example 32 with WorkUnit

use of org.apache.gobblin.source.workunit.WorkUnit in project incubator-gobblin by apache.

the class HiveMaterializerTest method testMaterializeTablePartition.

@Test
public void testMaterializeTablePartition() throws Exception {
    String destinationTable = "materializeTablePartition";
    File tmpDir = Files.createTempDir();
    tmpDir.deleteOnExit();
    WorkUnit workUnit = HiveMaterializer.viewMaterializationWorkUnit(this.dataset, HiveConverterUtils.StorageFormat.AVRO, new TableLikeStageableTableMetadata(this.dataset.getTable(), this.dbName, destinationTable, tmpDir.getAbsolutePath()), String.format("%s=part1", this.partitionColumn));
    HiveMaterializer hiveMaterializer = new HiveMaterializer(getTaskContextForRun(workUnit));
    hiveMaterializer.run();
    Assert.assertEquals(hiveMaterializer.getWorkingState(), WorkUnitState.WorkingState.SUCCESSFUL);
    hiveMaterializer.commit();
    Assert.assertEquals(hiveMaterializer.getWorkingState(), WorkUnitState.WorkingState.SUCCESSFUL);
    List<List<String>> allTable = executeStatementAndGetResults(this.jdbcConnector, String.format("SELECT * FROM %s.%s", this.dbName, destinationTable), 3);
    Assert.assertEquals(allTable.size(), 4);
    Assert.assertEquals(allTable.stream().map(l -> l.get(0)).collect(Collectors.toList()), Lists.newArrayList("101", "102", "103", "104"));
}
Also used : ArrayList(java.util.ArrayList) List(java.util.List) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) TableLikeStageableTableMetadata(org.apache.gobblin.data.management.conversion.hive.entities.TableLikeStageableTableMetadata) File(java.io.File) Test(org.testng.annotations.Test)

Example 33 with WorkUnit

use of org.apache.gobblin.source.workunit.WorkUnit in project incubator-gobblin by apache.

the class HiveMaterializerTest method testMaterializeTable.

@Test
public void testMaterializeTable() throws Exception {
    String destinationTable = "materializeTable";
    File tmpDir = Files.createTempDir();
    tmpDir.deleteOnExit();
    WorkUnit workUnit = HiveMaterializer.viewMaterializationWorkUnit(this.dataset, HiveConverterUtils.StorageFormat.AVRO, new TableLikeStageableTableMetadata(this.dataset.getTable(), this.dbName, destinationTable, tmpDir.getAbsolutePath()), null);
    HiveMaterializer hiveMaterializer = new HiveMaterializer(getTaskContextForRun(workUnit));
    hiveMaterializer.run();
    Assert.assertEquals(hiveMaterializer.getWorkingState(), WorkUnitState.WorkingState.SUCCESSFUL);
    hiveMaterializer.commit();
    Assert.assertEquals(hiveMaterializer.getWorkingState(), WorkUnitState.WorkingState.SUCCESSFUL);
    List<List<String>> allTable = executeStatementAndGetResults(this.jdbcConnector, String.format("SELECT * FROM %s.%s", this.dbName, destinationTable), 3);
    Assert.assertEquals(allTable.size(), 8);
    Assert.assertEquals(allTable.stream().map(l -> l.get(0)).collect(Collectors.toList()), Lists.newArrayList("101", "102", "103", "104", "201", "202", "203", "204"));
}
Also used : ArrayList(java.util.ArrayList) List(java.util.List) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) TableLikeStageableTableMetadata(org.apache.gobblin.data.management.conversion.hive.entities.TableLikeStageableTableMetadata) File(java.io.File) Test(org.testng.annotations.Test)

Example 34 with WorkUnit

use of org.apache.gobblin.source.workunit.WorkUnit in project incubator-gobblin by apache.

the class HiveMaterializerTest method testCopyTable.

@Test
public void testCopyTable() throws Exception {
    String destinationTable = "copyTable";
    File tmpDir = Files.createTempDir();
    tmpDir.deleteOnExit();
    WorkUnit workUnit = HiveMaterializer.tableCopyWorkUnit(this.dataset, new TableLikeStageableTableMetadata(this.dataset.getTable(), this.dbName, destinationTable, tmpDir.getAbsolutePath()), String.format("%s=part1", this.partitionColumn));
    HiveMaterializer hiveMaterializer = new HiveMaterializer(getTaskContextForRun(workUnit));
    hiveMaterializer.run();
    Assert.assertEquals(hiveMaterializer.getWorkingState(), WorkUnitState.WorkingState.SUCCESSFUL);
    hiveMaterializer.commit();
    Assert.assertEquals(hiveMaterializer.getWorkingState(), WorkUnitState.WorkingState.SUCCESSFUL);
    List<List<String>> allTable = executeStatementAndGetResults(this.jdbcConnector, String.format("SELECT * FROM %s.%s", this.dbName, destinationTable), 3);
    Assert.assertEquals(allTable.size(), 4);
    Assert.assertEquals(allTable.stream().map(l -> l.get(0)).collect(Collectors.toList()), Lists.newArrayList("101", "102", "103", "104"));
}
Also used : ArrayList(java.util.ArrayList) List(java.util.List) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) TableLikeStageableTableMetadata(org.apache.gobblin.data.management.conversion.hive.entities.TableLikeStageableTableMetadata) File(java.io.File) Test(org.testng.annotations.Test)

Example 35 with WorkUnit

use of org.apache.gobblin.source.workunit.WorkUnit in project incubator-gobblin by apache.

the class DatasetFinderSourceTest method testDrilledDown.

@Test
public void testDrilledDown() {
    Dataset dataset1 = new SimpleDatasetForTesting("dataset1");
    Dataset dataset2 = new SimplePartitionableDatasetForTesting("dataset2", Lists.newArrayList(new SimpleDatasetPartitionForTesting("p1"), new SimpleDatasetPartitionForTesting("p2")));
    Dataset dataset3 = new SimpleDatasetForTesting("dataset3");
    IterableDatasetFinder finder = new StaticDatasetsFinderForTesting(Lists.newArrayList(dataset1, dataset2, dataset3));
    MySource mySource = new MySource(true, finder);
    List<WorkUnit> workUnits = mySource.getWorkunits(new SourceState());
    Assert.assertEquals(workUnits.size(), 4);
    Assert.assertEquals(workUnits.get(0).getProp(DATASET_URN), "dataset1");
    Assert.assertNull(workUnits.get(0).getProp(PARTITION_URN));
    Assert.assertEquals(workUnits.get(1).getProp(DATASET_URN), "dataset2");
    Assert.assertEquals(workUnits.get(1).getProp(PARTITION_URN), "p1");
    Assert.assertEquals(workUnits.get(2).getProp(DATASET_URN), "dataset2");
    Assert.assertEquals(workUnits.get(2).getProp(PARTITION_URN), "p2");
    Assert.assertEquals(workUnits.get(3).getProp(DATASET_URN), "dataset3");
    Assert.assertNull(workUnits.get(3).getProp(PARTITION_URN));
    WorkUnitStream workUnitStream = mySource.getWorkunitStream(new SourceState());
    Assert.assertEquals(Lists.newArrayList(workUnitStream.getWorkUnits()), workUnits);
}
Also used : SimpleDatasetPartitionForTesting(org.apache.gobblin.dataset.test.SimpleDatasetPartitionForTesting) WorkUnitStream(org.apache.gobblin.source.workunit.WorkUnitStream) SimpleDatasetForTesting(org.apache.gobblin.dataset.test.SimpleDatasetForTesting) SourceState(org.apache.gobblin.configuration.SourceState) IterableDatasetFinder(org.apache.gobblin.dataset.IterableDatasetFinder) PartitionableDataset(org.apache.gobblin.dataset.PartitionableDataset) Dataset(org.apache.gobblin.dataset.Dataset) SimplePartitionableDatasetForTesting(org.apache.gobblin.dataset.test.SimplePartitionableDatasetForTesting) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) StaticDatasetsFinderForTesting(org.apache.gobblin.dataset.test.StaticDatasetsFinderForTesting) Test(org.testng.annotations.Test)

Aggregations

WorkUnit (org.apache.gobblin.source.workunit.WorkUnit)133 Test (org.testng.annotations.Test)59 SourceState (org.apache.gobblin.configuration.SourceState)40 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)40 MultiWorkUnit (org.apache.gobblin.source.workunit.MultiWorkUnit)35 Extract (org.apache.gobblin.source.workunit.Extract)24 Path (org.apache.hadoop.fs.Path)19 State (org.apache.gobblin.configuration.State)13 IOException (java.io.IOException)11 ArrayList (java.util.ArrayList)10 Closer (com.google.common.io.Closer)9 Properties (java.util.Properties)9 WatermarkInterval (org.apache.gobblin.source.extractor.WatermarkInterval)8 List (java.util.List)7 Table (org.apache.hadoop.hive.ql.metadata.Table)7 ImmutableMap (com.google.common.collect.ImmutableMap)6 Config (com.typesafe.config.Config)6 File (java.io.File)6 IterableDatasetFinder (org.apache.gobblin.dataset.IterableDatasetFinder)6 WorkUnitStream (org.apache.gobblin.source.workunit.WorkUnitStream)6