Search in sources :

Example 61 with SourceState

use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.

the class AbstractSourceTest method testGetPreviousWorkUnitStatesOnPartialRetryFullCommit.

/**
 * Test when work unit retry policy is on partial, but the job commit policy is "full".
 */
@Test
public void testGetPreviousWorkUnitStatesOnPartialRetryFullCommit() {
    SourceState sourceState = new SourceState(new State(), this.previousWorkUnitStates);
    sourceState.setProp(ConfigurationKeys.WORK_UNIT_RETRY_POLICY_KEY, "onpartial");
    sourceState.setProp(ConfigurationKeys.JOB_COMMIT_POLICY_KEY, "full");
    Assert.assertEquals(this.testSource.getPreviousWorkUnitStatesForRetry(sourceState), Collections.EMPTY_LIST);
}
Also used : SourceState(org.apache.gobblin.configuration.SourceState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) State(org.apache.gobblin.configuration.State) WorkingState(org.apache.gobblin.configuration.WorkUnitState.WorkingState) SourceState(org.apache.gobblin.configuration.SourceState) Test(org.testng.annotations.Test)

Example 62 with SourceState

use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.

the class HiveSourceTest method testGetWorkunitsAfterWatermark.

@Test
public void testGetWorkunitsAfterWatermark() throws Exception {
    String dbName = "testdb4";
    String tableName1 = "testtable1";
    String tableSdLoc1 = "/tmp/testtable1";
    String tableName2 = "testtable2";
    String tableSdLoc2 = "/tmp/testtable2";
    this.hiveMetastoreTestUtils.getLocalMetastoreClient().dropDatabase(dbName, false, true, true);
    this.hiveMetastoreTestUtils.createTestAvroTable(dbName, tableName1, tableSdLoc1, Optional.<String>absent());
    this.hiveMetastoreTestUtils.createTestAvroTable(dbName, tableName2, tableSdLoc2, Optional.<String>absent(), true);
    List<WorkUnitState> previousWorkUnitStates = Lists.newArrayList();
    Table table1 = this.hiveMetastoreTestUtils.getLocalMetastoreClient().getTable(dbName, tableName1);
    previousWorkUnitStates.add(ConversionHiveTestUtils.createWus(dbName, tableName1, TimeUnit.MILLISECONDS.convert(table1.getCreateTime(), TimeUnit.SECONDS)));
    SourceState testState = new SourceState(getTestState(dbName), previousWorkUnitStates);
    testState.setProp(HiveSource.HIVE_SOURCE_WATERMARKER_FACTORY_CLASS_KEY, TableLevelWatermarker.Factory.class.getName());
    List<WorkUnit> workUnits = this.hiveSource.getWorkunits(testState);
    Assert.assertEquals(workUnits.size(), 1);
    WorkUnit wu = workUnits.get(0);
    HiveWorkUnit hwu = new HiveWorkUnit(wu);
    Assert.assertEquals(hwu.getHiveDataset().getDbAndTable().getDb(), dbName);
    Assert.assertEquals(hwu.getHiveDataset().getDbAndTable().getTable(), tableName2);
}
Also used : SourceState(org.apache.gobblin.configuration.SourceState) Table(org.apache.hadoop.hive.metastore.api.Table) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) HiveWorkUnit(org.apache.gobblin.data.management.conversion.hive.source.HiveWorkUnit) HiveWorkUnit(org.apache.gobblin.data.management.conversion.hive.source.HiveWorkUnit) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) Test(org.testng.annotations.Test)

Example 63 with SourceState

use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.

the class HiveSourceTest method testShouldCreateWorkunitsOlderThanLookback.

@Test
public void testShouldCreateWorkunitsOlderThanLookback() throws Exception {
    long currentTime = System.currentTimeMillis();
    long partitionCreateTime = new DateTime(currentTime).minusDays(35).getMillis();
    org.apache.hadoop.hive.ql.metadata.Partition partition = this.hiveMetastoreTestUtils.createDummyPartition(partitionCreateTime);
    SourceState testState = getTestState("testDb6");
    HiveSource source = new HiveSource();
    source.initialize(testState);
    boolean isOlderThanLookback = source.isOlderThanLookback(partition);
    Assert.assertEquals(isOlderThanLookback, true, "Should not create workunits older than lookback");
}
Also used : SourceState(org.apache.gobblin.configuration.SourceState) HiveSource(org.apache.gobblin.data.management.conversion.hive.source.HiveSource) DateTime(org.joda.time.DateTime) Test(org.testng.annotations.Test)

Example 64 with SourceState

use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.

the class HiveSourceTest method testIsOlderThanLookbackForDistcpGenerationTime.

@Test
public void testIsOlderThanLookbackForDistcpGenerationTime() throws Exception {
    long currentTime = System.currentTimeMillis();
    // Default lookback time is 3 days
    long partitionCreateTime = new DateTime(currentTime).minusDays(2).getMillis();
    Map<String, String> parameters = Maps.newHashMap();
    parameters.put(HiveSource.DISTCP_REGISTRATION_GENERATION_TIME_KEY, partitionCreateTime + "");
    org.apache.hadoop.hive.ql.metadata.Partition partition = this.hiveMetastoreTestUtils.createDummyPartition(0);
    partition.getTPartition().setParameters(parameters);
    SourceState testState = getTestState("testDb6");
    HiveSource source = new HiveSource();
    source.initialize(testState);
    boolean isOlderThanLookback = source.isOlderThanLookback(partition);
    Assert.assertEquals(isOlderThanLookback, false, "Should create workunits newer than lookback");
}
Also used : SourceState(org.apache.gobblin.configuration.SourceState) HiveSource(org.apache.gobblin.data.management.conversion.hive.source.HiveSource) DateTime(org.joda.time.DateTime) Test(org.testng.annotations.Test)

Example 65 with SourceState

use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.

the class HiveSourceTest method testGetWorkUnitsForPartitions.

@Test
public void testGetWorkUnitsForPartitions() throws Exception {
    String dbName = "testdb3";
    String tableName = "testtable3";
    String tableSdLoc = "/tmp/testtable3";
    this.hiveMetastoreTestUtils.getLocalMetastoreClient().dropDatabase(dbName, false, true, true);
    SourceState testState = getTestState(dbName);
    Table tbl = this.hiveMetastoreTestUtils.createTestAvroTable(dbName, tableName, tableSdLoc, Optional.of("field"));
    this.hiveMetastoreTestUtils.addTestPartition(tbl, ImmutableList.of("f1"), (int) System.currentTimeMillis());
    List<WorkUnit> workUnits = this.hiveSource.getWorkunits(testState);
    // One workunit for the partition + 1 dummy watermark workunit
    Assert.assertEquals(workUnits.size(), 2);
    WorkUnit wu = workUnits.get(0);
    WorkUnit wu2 = workUnits.get(1);
    HiveWorkUnit hwu = null;
    if (!wu.contains(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY)) {
        hwu = new HiveWorkUnit(wu);
    } else {
        hwu = new HiveWorkUnit(wu2);
    }
    Assert.assertEquals(hwu.getHiveDataset().getDbAndTable().getDb(), dbName);
    Assert.assertEquals(hwu.getHiveDataset().getDbAndTable().getTable(), tableName);
    Assert.assertEquals(hwu.getPartitionName().get(), "field=f1");
}
Also used : SourceState(org.apache.gobblin.configuration.SourceState) Table(org.apache.hadoop.hive.metastore.api.Table) HiveWorkUnit(org.apache.gobblin.data.management.conversion.hive.source.HiveWorkUnit) HiveWorkUnit(org.apache.gobblin.data.management.conversion.hive.source.HiveWorkUnit) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) Test(org.testng.annotations.Test)

Aggregations

SourceState (org.apache.gobblin.configuration.SourceState)90 Test (org.testng.annotations.Test)76 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)44 WorkUnit (org.apache.gobblin.source.workunit.WorkUnit)38 State (org.apache.gobblin.configuration.State)30 WorkingState (org.apache.gobblin.configuration.WorkUnitState.WorkingState)11 Partition (org.apache.hadoop.hive.ql.metadata.Partition)8 Table (org.apache.hadoop.hive.ql.metadata.Table)8 IterableDatasetFinder (org.apache.gobblin.dataset.IterableDatasetFinder)7 LongWatermark (org.apache.gobblin.source.extractor.extract.LongWatermark)7 Extract (org.apache.gobblin.source.workunit.Extract)7 DateTime (org.joda.time.DateTime)7 Dataset (org.apache.gobblin.dataset.Dataset)6 PartitionableDataset (org.apache.gobblin.dataset.PartitionableDataset)6 MultiWorkUnit (org.apache.gobblin.source.workunit.MultiWorkUnit)6 WorkUnitStream (org.apache.gobblin.source.workunit.WorkUnitStream)6 IOException (java.io.IOException)5 Path (org.apache.hadoop.fs.Path)5 Gson (com.google.gson.Gson)4 JsonObject (com.google.gson.JsonObject)4