use of org.apache.gobblin.source.workunit.WorkUnit in project incubator-gobblin by apache.
the class HiveSourceTest method testGetWorkUnitsForTable.
@Test
public void testGetWorkUnitsForTable() throws Exception {
String dbName = "testdb2";
String tableName = "testtable2";
String tableSdLoc = "/tmp/testtable2";
this.hiveMetastoreTestUtils.getLocalMetastoreClient().dropDatabase(dbName, false, true, true);
SourceState testState = getTestState(dbName);
this.hiveMetastoreTestUtils.createTestAvroTable(dbName, tableName, tableSdLoc, Optional.<String>absent());
List<WorkUnit> workUnits = hiveSource.getWorkunits(testState);
// One workunit for the table, no dummy workunits
Assert.assertEquals(workUnits.size(), 1);
WorkUnit wu = workUnits.get(0);
HiveWorkUnit hwu = new HiveWorkUnit(wu);
Assert.assertEquals(hwu.getHiveDataset().getDbAndTable().getDb(), dbName);
Assert.assertEquals(hwu.getHiveDataset().getDbAndTable().getTable(), tableName);
Assert.assertEquals(hwu.getTableSchemaUrl(), new Path("/tmp/dummy"));
}
use of org.apache.gobblin.source.workunit.WorkUnit in project incubator-gobblin by apache.
the class HiveMaterializerTest method testMaterializeTablePartition.
@Test
public void testMaterializeTablePartition() throws Exception {
String destinationTable = "materializeTablePartition";
File tmpDir = Files.createTempDir();
tmpDir.deleteOnExit();
WorkUnit workUnit = HiveMaterializer.viewMaterializationWorkUnit(this.dataset, HiveConverterUtils.StorageFormat.AVRO, new TableLikeStageableTableMetadata(this.dataset.getTable(), this.dbName, destinationTable, tmpDir.getAbsolutePath()), String.format("%s=part1", this.partitionColumn));
HiveMaterializer hiveMaterializer = new HiveMaterializer(getTaskContextForRun(workUnit));
hiveMaterializer.run();
Assert.assertEquals(hiveMaterializer.getWorkingState(), WorkUnitState.WorkingState.SUCCESSFUL);
hiveMaterializer.commit();
Assert.assertEquals(hiveMaterializer.getWorkingState(), WorkUnitState.WorkingState.SUCCESSFUL);
List<List<String>> allTable = executeStatementAndGetResults(this.jdbcConnector, String.format("SELECT * FROM %s.%s", this.dbName, destinationTable), 3);
Assert.assertEquals(allTable.size(), 4);
Assert.assertEquals(allTable.stream().map(l -> l.get(0)).collect(Collectors.toList()), Lists.newArrayList("101", "102", "103", "104"));
}
use of org.apache.gobblin.source.workunit.WorkUnit in project incubator-gobblin by apache.
the class HiveMaterializerTest method testMaterializeTable.
@Test
public void testMaterializeTable() throws Exception {
String destinationTable = "materializeTable";
File tmpDir = Files.createTempDir();
tmpDir.deleteOnExit();
WorkUnit workUnit = HiveMaterializer.viewMaterializationWorkUnit(this.dataset, HiveConverterUtils.StorageFormat.AVRO, new TableLikeStageableTableMetadata(this.dataset.getTable(), this.dbName, destinationTable, tmpDir.getAbsolutePath()), null);
HiveMaterializer hiveMaterializer = new HiveMaterializer(getTaskContextForRun(workUnit));
hiveMaterializer.run();
Assert.assertEquals(hiveMaterializer.getWorkingState(), WorkUnitState.WorkingState.SUCCESSFUL);
hiveMaterializer.commit();
Assert.assertEquals(hiveMaterializer.getWorkingState(), WorkUnitState.WorkingState.SUCCESSFUL);
List<List<String>> allTable = executeStatementAndGetResults(this.jdbcConnector, String.format("SELECT * FROM %s.%s", this.dbName, destinationTable), 3);
Assert.assertEquals(allTable.size(), 8);
Assert.assertEquals(allTable.stream().map(l -> l.get(0)).collect(Collectors.toList()), Lists.newArrayList("101", "102", "103", "104", "201", "202", "203", "204"));
}
use of org.apache.gobblin.source.workunit.WorkUnit in project incubator-gobblin by apache.
the class HiveMaterializerTest method testCopyTable.
@Test
public void testCopyTable() throws Exception {
String destinationTable = "copyTable";
File tmpDir = Files.createTempDir();
tmpDir.deleteOnExit();
WorkUnit workUnit = HiveMaterializer.tableCopyWorkUnit(this.dataset, new TableLikeStageableTableMetadata(this.dataset.getTable(), this.dbName, destinationTable, tmpDir.getAbsolutePath()), String.format("%s=part1", this.partitionColumn));
HiveMaterializer hiveMaterializer = new HiveMaterializer(getTaskContextForRun(workUnit));
hiveMaterializer.run();
Assert.assertEquals(hiveMaterializer.getWorkingState(), WorkUnitState.WorkingState.SUCCESSFUL);
hiveMaterializer.commit();
Assert.assertEquals(hiveMaterializer.getWorkingState(), WorkUnitState.WorkingState.SUCCESSFUL);
List<List<String>> allTable = executeStatementAndGetResults(this.jdbcConnector, String.format("SELECT * FROM %s.%s", this.dbName, destinationTable), 3);
Assert.assertEquals(allTable.size(), 4);
Assert.assertEquals(allTable.stream().map(l -> l.get(0)).collect(Collectors.toList()), Lists.newArrayList("101", "102", "103", "104"));
}
use of org.apache.gobblin.source.workunit.WorkUnit in project incubator-gobblin by apache.
the class DatasetFinderSourceTest method testDrilledDown.
@Test
public void testDrilledDown() {
Dataset dataset1 = new SimpleDatasetForTesting("dataset1");
Dataset dataset2 = new SimplePartitionableDatasetForTesting("dataset2", Lists.newArrayList(new SimpleDatasetPartitionForTesting("p1"), new SimpleDatasetPartitionForTesting("p2")));
Dataset dataset3 = new SimpleDatasetForTesting("dataset3");
IterableDatasetFinder finder = new StaticDatasetsFinderForTesting(Lists.newArrayList(dataset1, dataset2, dataset3));
MySource mySource = new MySource(true, finder);
List<WorkUnit> workUnits = mySource.getWorkunits(new SourceState());
Assert.assertEquals(workUnits.size(), 4);
Assert.assertEquals(workUnits.get(0).getProp(DATASET_URN), "dataset1");
Assert.assertNull(workUnits.get(0).getProp(PARTITION_URN));
Assert.assertEquals(workUnits.get(1).getProp(DATASET_URN), "dataset2");
Assert.assertEquals(workUnits.get(1).getProp(PARTITION_URN), "p1");
Assert.assertEquals(workUnits.get(2).getProp(DATASET_URN), "dataset2");
Assert.assertEquals(workUnits.get(2).getProp(PARTITION_URN), "p2");
Assert.assertEquals(workUnits.get(3).getProp(DATASET_URN), "dataset3");
Assert.assertNull(workUnits.get(3).getProp(PARTITION_URN));
WorkUnitStream workUnitStream = mySource.getWorkunitStream(new SourceState());
Assert.assertEquals(Lists.newArrayList(workUnitStream.getWorkUnits()), workUnits);
}
Aggregations