use of org.apache.gobblin.data.management.conversion.hive.source.HiveWorkUnit in project incubator-gobblin by apache.
the class HiveMaterializer method tableCopyWorkUnit.
/**
* Create a work unit to copy a source table to a target table using a staging table in between.
* @param dataset {@link HiveDataset} for the source table.
* @param destinationTable {@link StageableTableMetadata} specifying staging and target tables metadata.
*/
public static HiveWorkUnit tableCopyWorkUnit(HiveDataset dataset, StageableTableMetadata destinationTable, @Nullable String partitionName) {
HiveWorkUnit workUnit = new HiveWorkUnit(dataset);
workUnit.setProp(MATERIALIZER_MODE_KEY, MaterializerMode.TABLE_COPY.name());
workUnit.setProp(STAGEABLE_TABLE_METADATA_KEY, HiveSource.GENERICS_AWARE_GSON.toJson(destinationTable));
if (!Strings.isNullOrEmpty(partitionName)) {
workUnit.setPartitionName(partitionName);
}
TaskUtils.setTaskFactoryClass(workUnit, HiveMaterializerTaskFactory.class);
return workUnit;
}
use of org.apache.gobblin.data.management.conversion.hive.source.HiveWorkUnit in project incubator-gobblin by apache.
the class HiveSourceTest method testGetWorkUnitsForTable.
@Test
public void testGetWorkUnitsForTable() throws Exception {
String dbName = "testdb2";
String tableName = "testtable2";
String tableSdLoc = "/tmp/testtable2";
this.hiveMetastoreTestUtils.getLocalMetastoreClient().dropDatabase(dbName, false, true, true);
SourceState testState = getTestState(dbName);
this.hiveMetastoreTestUtils.createTestAvroTable(dbName, tableName, tableSdLoc, Optional.<String>absent());
List<WorkUnit> workUnits = hiveSource.getWorkunits(testState);
// One workunit for the table, no dummy workunits
Assert.assertEquals(workUnits.size(), 1);
WorkUnit wu = workUnits.get(0);
HiveWorkUnit hwu = new HiveWorkUnit(wu);
Assert.assertEquals(hwu.getHiveDataset().getDbAndTable().getDb(), dbName);
Assert.assertEquals(hwu.getHiveDataset().getDbAndTable().getTable(), tableName);
Assert.assertEquals(hwu.getTableSchemaUrl(), new Path("/tmp/dummy"));
}
use of org.apache.gobblin.data.management.conversion.hive.source.HiveWorkUnit in project incubator-gobblin by apache.
the class HiveMaterializer method viewMaterializationWorkUnit.
/**
* Create a work unit to materialize a table / view to a target table using a staging table in between.
* @param dataset {@link HiveDataset} for the source table.
* @param storageFormat format in which target table should be written.
* @param destinationTable {@link StageableTableMetadata} specifying staging and target tables metadata.
*/
public static HiveWorkUnit viewMaterializationWorkUnit(HiveDataset dataset, HiveConverterUtils.StorageFormat storageFormat, StageableTableMetadata destinationTable, @Nullable String partitionName) {
HiveWorkUnit workUnit = new HiveWorkUnit(dataset);
workUnit.setProp(MATERIALIZER_MODE_KEY, MaterializerMode.TABLE_MATERIALIZATION.name());
workUnit.setProp(STORAGE_FORMAT_KEY, storageFormat.name());
workUnit.setProp(STAGEABLE_TABLE_METADATA_KEY, HiveSource.GENERICS_AWARE_GSON.toJson(destinationTable));
if (!Strings.isNullOrEmpty(partitionName)) {
workUnit.setPartitionName(partitionName);
}
TaskUtils.setTaskFactoryClass(workUnit, HiveMaterializerTaskFactory.class);
return workUnit;
}
use of org.apache.gobblin.data.management.conversion.hive.source.HiveWorkUnit in project incubator-gobblin by apache.
the class HiveSourceTest method testGetWorkunitsAfterWatermark.
@Test
public void testGetWorkunitsAfterWatermark() throws Exception {
String dbName = "testdb4";
String tableName1 = "testtable1";
String tableSdLoc1 = "/tmp/testtable1";
String tableName2 = "testtable2";
String tableSdLoc2 = "/tmp/testtable2";
this.hiveMetastoreTestUtils.getLocalMetastoreClient().dropDatabase(dbName, false, true, true);
this.hiveMetastoreTestUtils.createTestAvroTable(dbName, tableName1, tableSdLoc1, Optional.<String>absent());
this.hiveMetastoreTestUtils.createTestAvroTable(dbName, tableName2, tableSdLoc2, Optional.<String>absent(), true);
List<WorkUnitState> previousWorkUnitStates = Lists.newArrayList();
Table table1 = this.hiveMetastoreTestUtils.getLocalMetastoreClient().getTable(dbName, tableName1);
previousWorkUnitStates.add(ConversionHiveTestUtils.createWus(dbName, tableName1, TimeUnit.MILLISECONDS.convert(table1.getCreateTime(), TimeUnit.SECONDS)));
SourceState testState = new SourceState(getTestState(dbName), previousWorkUnitStates);
testState.setProp(HiveSource.HIVE_SOURCE_WATERMARKER_FACTORY_CLASS_KEY, TableLevelWatermarker.Factory.class.getName());
List<WorkUnit> workUnits = this.hiveSource.getWorkunits(testState);
Assert.assertEquals(workUnits.size(), 1);
WorkUnit wu = workUnits.get(0);
HiveWorkUnit hwu = new HiveWorkUnit(wu);
Assert.assertEquals(hwu.getHiveDataset().getDbAndTable().getDb(), dbName);
Assert.assertEquals(hwu.getHiveDataset().getDbAndTable().getTable(), tableName2);
}
use of org.apache.gobblin.data.management.conversion.hive.source.HiveWorkUnit in project incubator-gobblin by apache.
the class HiveSourceTest method testGetWorkUnitsForPartitions.
@Test
public void testGetWorkUnitsForPartitions() throws Exception {
String dbName = "testdb3";
String tableName = "testtable3";
String tableSdLoc = "/tmp/testtable3";
this.hiveMetastoreTestUtils.getLocalMetastoreClient().dropDatabase(dbName, false, true, true);
SourceState testState = getTestState(dbName);
Table tbl = this.hiveMetastoreTestUtils.createTestAvroTable(dbName, tableName, tableSdLoc, Optional.of("field"));
this.hiveMetastoreTestUtils.addTestPartition(tbl, ImmutableList.of("f1"), (int) System.currentTimeMillis());
List<WorkUnit> workUnits = this.hiveSource.getWorkunits(testState);
// One workunit for the partition + 1 dummy watermark workunit
Assert.assertEquals(workUnits.size(), 2);
WorkUnit wu = workUnits.get(0);
WorkUnit wu2 = workUnits.get(1);
HiveWorkUnit hwu = null;
if (!wu.contains(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY)) {
hwu = new HiveWorkUnit(wu);
} else {
hwu = new HiveWorkUnit(wu2);
}
Assert.assertEquals(hwu.getHiveDataset().getDbAndTable().getDb(), dbName);
Assert.assertEquals(hwu.getHiveDataset().getDbAndTable().getTable(), tableName);
Assert.assertEquals(hwu.getPartitionName().get(), "field=f1");
}
Aggregations