Search in sources :

Example 1 with HiveWorkUnit

use of org.apache.gobblin.data.management.conversion.hive.source.HiveWorkUnit in project incubator-gobblin by apache.

the class HiveMaterializer method tableCopyWorkUnit.

/**
 * Create a work unit to copy a source table to a target table using a staging table in between.
 * @param dataset {@link HiveDataset} for the source table.
 * @param destinationTable {@link StageableTableMetadata} specifying staging and target tables metadata.
 */
public static HiveWorkUnit tableCopyWorkUnit(HiveDataset dataset, StageableTableMetadata destinationTable, @Nullable String partitionName) {
    HiveWorkUnit workUnit = new HiveWorkUnit(dataset);
    workUnit.setProp(MATERIALIZER_MODE_KEY, MaterializerMode.TABLE_COPY.name());
    workUnit.setProp(STAGEABLE_TABLE_METADATA_KEY, HiveSource.GENERICS_AWARE_GSON.toJson(destinationTable));
    if (!Strings.isNullOrEmpty(partitionName)) {
        workUnit.setPartitionName(partitionName);
    }
    TaskUtils.setTaskFactoryClass(workUnit, HiveMaterializerTaskFactory.class);
    return workUnit;
}
Also used : HiveWorkUnit(org.apache.gobblin.data.management.conversion.hive.source.HiveWorkUnit)

Example 2 with HiveWorkUnit

use of org.apache.gobblin.data.management.conversion.hive.source.HiveWorkUnit in project incubator-gobblin by apache.

the class HiveSourceTest method testGetWorkUnitsForTable.

@Test
public void testGetWorkUnitsForTable() throws Exception {
    String dbName = "testdb2";
    String tableName = "testtable2";
    String tableSdLoc = "/tmp/testtable2";
    this.hiveMetastoreTestUtils.getLocalMetastoreClient().dropDatabase(dbName, false, true, true);
    SourceState testState = getTestState(dbName);
    this.hiveMetastoreTestUtils.createTestAvroTable(dbName, tableName, tableSdLoc, Optional.<String>absent());
    List<WorkUnit> workUnits = hiveSource.getWorkunits(testState);
    // One workunit for the table, no dummy workunits
    Assert.assertEquals(workUnits.size(), 1);
    WorkUnit wu = workUnits.get(0);
    HiveWorkUnit hwu = new HiveWorkUnit(wu);
    Assert.assertEquals(hwu.getHiveDataset().getDbAndTable().getDb(), dbName);
    Assert.assertEquals(hwu.getHiveDataset().getDbAndTable().getTable(), tableName);
    Assert.assertEquals(hwu.getTableSchemaUrl(), new Path("/tmp/dummy"));
}
Also used : Path(org.apache.hadoop.fs.Path) SourceState(org.apache.gobblin.configuration.SourceState) HiveWorkUnit(org.apache.gobblin.data.management.conversion.hive.source.HiveWorkUnit) HiveWorkUnit(org.apache.gobblin.data.management.conversion.hive.source.HiveWorkUnit) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) Test(org.testng.annotations.Test)

Example 3 with HiveWorkUnit

use of org.apache.gobblin.data.management.conversion.hive.source.HiveWorkUnit in project incubator-gobblin by apache.

the class HiveMaterializer method viewMaterializationWorkUnit.

/**
 * Create a work unit to materialize a table / view to a target table using a staging table in between.
 * @param dataset {@link HiveDataset} for the source table.
 * @param storageFormat format in which target table should be written.
 * @param destinationTable {@link StageableTableMetadata} specifying staging and target tables metadata.
 */
public static HiveWorkUnit viewMaterializationWorkUnit(HiveDataset dataset, HiveConverterUtils.StorageFormat storageFormat, StageableTableMetadata destinationTable, @Nullable String partitionName) {
    HiveWorkUnit workUnit = new HiveWorkUnit(dataset);
    workUnit.setProp(MATERIALIZER_MODE_KEY, MaterializerMode.TABLE_MATERIALIZATION.name());
    workUnit.setProp(STORAGE_FORMAT_KEY, storageFormat.name());
    workUnit.setProp(STAGEABLE_TABLE_METADATA_KEY, HiveSource.GENERICS_AWARE_GSON.toJson(destinationTable));
    if (!Strings.isNullOrEmpty(partitionName)) {
        workUnit.setPartitionName(partitionName);
    }
    TaskUtils.setTaskFactoryClass(workUnit, HiveMaterializerTaskFactory.class);
    return workUnit;
}
Also used : HiveWorkUnit(org.apache.gobblin.data.management.conversion.hive.source.HiveWorkUnit)

Example 4 with HiveWorkUnit

use of org.apache.gobblin.data.management.conversion.hive.source.HiveWorkUnit in project incubator-gobblin by apache.

the class HiveSourceTest method testGetWorkunitsAfterWatermark.

@Test
public void testGetWorkunitsAfterWatermark() throws Exception {
    String dbName = "testdb4";
    String tableName1 = "testtable1";
    String tableSdLoc1 = "/tmp/testtable1";
    String tableName2 = "testtable2";
    String tableSdLoc2 = "/tmp/testtable2";
    this.hiveMetastoreTestUtils.getLocalMetastoreClient().dropDatabase(dbName, false, true, true);
    this.hiveMetastoreTestUtils.createTestAvroTable(dbName, tableName1, tableSdLoc1, Optional.<String>absent());
    this.hiveMetastoreTestUtils.createTestAvroTable(dbName, tableName2, tableSdLoc2, Optional.<String>absent(), true);
    List<WorkUnitState> previousWorkUnitStates = Lists.newArrayList();
    Table table1 = this.hiveMetastoreTestUtils.getLocalMetastoreClient().getTable(dbName, tableName1);
    previousWorkUnitStates.add(ConversionHiveTestUtils.createWus(dbName, tableName1, TimeUnit.MILLISECONDS.convert(table1.getCreateTime(), TimeUnit.SECONDS)));
    SourceState testState = new SourceState(getTestState(dbName), previousWorkUnitStates);
    testState.setProp(HiveSource.HIVE_SOURCE_WATERMARKER_FACTORY_CLASS_KEY, TableLevelWatermarker.Factory.class.getName());
    List<WorkUnit> workUnits = this.hiveSource.getWorkunits(testState);
    Assert.assertEquals(workUnits.size(), 1);
    WorkUnit wu = workUnits.get(0);
    HiveWorkUnit hwu = new HiveWorkUnit(wu);
    Assert.assertEquals(hwu.getHiveDataset().getDbAndTable().getDb(), dbName);
    Assert.assertEquals(hwu.getHiveDataset().getDbAndTable().getTable(), tableName2);
}
Also used : SourceState(org.apache.gobblin.configuration.SourceState) Table(org.apache.hadoop.hive.metastore.api.Table) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) HiveWorkUnit(org.apache.gobblin.data.management.conversion.hive.source.HiveWorkUnit) HiveWorkUnit(org.apache.gobblin.data.management.conversion.hive.source.HiveWorkUnit) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) Test(org.testng.annotations.Test)

Example 5 with HiveWorkUnit

use of org.apache.gobblin.data.management.conversion.hive.source.HiveWorkUnit in project incubator-gobblin by apache.

the class HiveSourceTest method testGetWorkUnitsForPartitions.

@Test
public void testGetWorkUnitsForPartitions() throws Exception {
    String dbName = "testdb3";
    String tableName = "testtable3";
    String tableSdLoc = "/tmp/testtable3";
    this.hiveMetastoreTestUtils.getLocalMetastoreClient().dropDatabase(dbName, false, true, true);
    SourceState testState = getTestState(dbName);
    Table tbl = this.hiveMetastoreTestUtils.createTestAvroTable(dbName, tableName, tableSdLoc, Optional.of("field"));
    this.hiveMetastoreTestUtils.addTestPartition(tbl, ImmutableList.of("f1"), (int) System.currentTimeMillis());
    List<WorkUnit> workUnits = this.hiveSource.getWorkunits(testState);
    // One workunit for the partition + 1 dummy watermark workunit
    Assert.assertEquals(workUnits.size(), 2);
    WorkUnit wu = workUnits.get(0);
    WorkUnit wu2 = workUnits.get(1);
    HiveWorkUnit hwu = null;
    if (!wu.contains(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY)) {
        hwu = new HiveWorkUnit(wu);
    } else {
        hwu = new HiveWorkUnit(wu2);
    }
    Assert.assertEquals(hwu.getHiveDataset().getDbAndTable().getDb(), dbName);
    Assert.assertEquals(hwu.getHiveDataset().getDbAndTable().getTable(), tableName);
    Assert.assertEquals(hwu.getPartitionName().get(), "field=f1");
}
Also used : SourceState(org.apache.gobblin.configuration.SourceState) Table(org.apache.hadoop.hive.metastore.api.Table) HiveWorkUnit(org.apache.gobblin.data.management.conversion.hive.source.HiveWorkUnit) HiveWorkUnit(org.apache.gobblin.data.management.conversion.hive.source.HiveWorkUnit) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) Test(org.testng.annotations.Test)

Aggregations

HiveWorkUnit (org.apache.gobblin.data.management.conversion.hive.source.HiveWorkUnit)5 SourceState (org.apache.gobblin.configuration.SourceState)3 WorkUnit (org.apache.gobblin.source.workunit.WorkUnit)3 Test (org.testng.annotations.Test)3 Table (org.apache.hadoop.hive.metastore.api.Table)2 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)1 Path (org.apache.hadoop.fs.Path)1