use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.
the class AbstractSourceTest method testGetPreviousWorkUnitStatesOnPartialRetryFullCommit.
/**
* Test when work unit retry policy is on partial, but the job commit policy is "full".
*/
@Test
public void testGetPreviousWorkUnitStatesOnPartialRetryFullCommit() {
SourceState sourceState = new SourceState(new State(), this.previousWorkUnitStates);
sourceState.setProp(ConfigurationKeys.WORK_UNIT_RETRY_POLICY_KEY, "onpartial");
sourceState.setProp(ConfigurationKeys.JOB_COMMIT_POLICY_KEY, "full");
Assert.assertEquals(this.testSource.getPreviousWorkUnitStatesForRetry(sourceState), Collections.EMPTY_LIST);
}
use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.
the class HiveSourceTest method testGetWorkunitsAfterWatermark.
@Test
public void testGetWorkunitsAfterWatermark() throws Exception {
String dbName = "testdb4";
String tableName1 = "testtable1";
String tableSdLoc1 = "/tmp/testtable1";
String tableName2 = "testtable2";
String tableSdLoc2 = "/tmp/testtable2";
this.hiveMetastoreTestUtils.getLocalMetastoreClient().dropDatabase(dbName, false, true, true);
this.hiveMetastoreTestUtils.createTestAvroTable(dbName, tableName1, tableSdLoc1, Optional.<String>absent());
this.hiveMetastoreTestUtils.createTestAvroTable(dbName, tableName2, tableSdLoc2, Optional.<String>absent(), true);
List<WorkUnitState> previousWorkUnitStates = Lists.newArrayList();
Table table1 = this.hiveMetastoreTestUtils.getLocalMetastoreClient().getTable(dbName, tableName1);
previousWorkUnitStates.add(ConversionHiveTestUtils.createWus(dbName, tableName1, TimeUnit.MILLISECONDS.convert(table1.getCreateTime(), TimeUnit.SECONDS)));
SourceState testState = new SourceState(getTestState(dbName), previousWorkUnitStates);
testState.setProp(HiveSource.HIVE_SOURCE_WATERMARKER_FACTORY_CLASS_KEY, TableLevelWatermarker.Factory.class.getName());
List<WorkUnit> workUnits = this.hiveSource.getWorkunits(testState);
Assert.assertEquals(workUnits.size(), 1);
WorkUnit wu = workUnits.get(0);
HiveWorkUnit hwu = new HiveWorkUnit(wu);
Assert.assertEquals(hwu.getHiveDataset().getDbAndTable().getDb(), dbName);
Assert.assertEquals(hwu.getHiveDataset().getDbAndTable().getTable(), tableName2);
}
use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.
the class HiveSourceTest method testShouldCreateWorkunitsOlderThanLookback.
@Test
public void testShouldCreateWorkunitsOlderThanLookback() throws Exception {
long currentTime = System.currentTimeMillis();
long partitionCreateTime = new DateTime(currentTime).minusDays(35).getMillis();
org.apache.hadoop.hive.ql.metadata.Partition partition = this.hiveMetastoreTestUtils.createDummyPartition(partitionCreateTime);
SourceState testState = getTestState("testDb6");
HiveSource source = new HiveSource();
source.initialize(testState);
boolean isOlderThanLookback = source.isOlderThanLookback(partition);
Assert.assertEquals(isOlderThanLookback, true, "Should not create workunits older than lookback");
}
use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.
the class HiveSourceTest method testIsOlderThanLookbackForDistcpGenerationTime.
@Test
public void testIsOlderThanLookbackForDistcpGenerationTime() throws Exception {
long currentTime = System.currentTimeMillis();
// Default lookback time is 3 days
long partitionCreateTime = new DateTime(currentTime).minusDays(2).getMillis();
Map<String, String> parameters = Maps.newHashMap();
parameters.put(HiveSource.DISTCP_REGISTRATION_GENERATION_TIME_KEY, partitionCreateTime + "");
org.apache.hadoop.hive.ql.metadata.Partition partition = this.hiveMetastoreTestUtils.createDummyPartition(0);
partition.getTPartition().setParameters(parameters);
SourceState testState = getTestState("testDb6");
HiveSource source = new HiveSource();
source.initialize(testState);
boolean isOlderThanLookback = source.isOlderThanLookback(partition);
Assert.assertEquals(isOlderThanLookback, false, "Should create workunits newer than lookback");
}
use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.
the class HiveSourceTest method testGetWorkUnitsForPartitions.
@Test
public void testGetWorkUnitsForPartitions() throws Exception {
String dbName = "testdb3";
String tableName = "testtable3";
String tableSdLoc = "/tmp/testtable3";
this.hiveMetastoreTestUtils.getLocalMetastoreClient().dropDatabase(dbName, false, true, true);
SourceState testState = getTestState(dbName);
Table tbl = this.hiveMetastoreTestUtils.createTestAvroTable(dbName, tableName, tableSdLoc, Optional.of("field"));
this.hiveMetastoreTestUtils.addTestPartition(tbl, ImmutableList.of("f1"), (int) System.currentTimeMillis());
List<WorkUnit> workUnits = this.hiveSource.getWorkunits(testState);
// One workunit for the partition + 1 dummy watermark workunit
Assert.assertEquals(workUnits.size(), 2);
WorkUnit wu = workUnits.get(0);
WorkUnit wu2 = workUnits.get(1);
HiveWorkUnit hwu = null;
if (!wu.contains(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY)) {
hwu = new HiveWorkUnit(wu);
} else {
hwu = new HiveWorkUnit(wu2);
}
Assert.assertEquals(hwu.getHiveDataset().getDbAndTable().getDb(), dbName);
Assert.assertEquals(hwu.getHiveDataset().getDbAndTable().getTable(), tableName);
Assert.assertEquals(hwu.getPartitionName().get(), "field=f1");
}
Aggregations