use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.
the class PartitionLevelWatermarkerTest method testExpectedHighWatermarkNoPreviousState.
@Test
public void testExpectedHighWatermarkNoPreviousState() throws Exception {
String dbName = "testExpectedHighWatermarkNoPreviousState";
LocalHiveMetastoreTestUtils.getInstance().dropDatabaseIfExists(dbName);
long now = new DateTime().getMillis();
SourceState state = new SourceState();
PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state);
Table table = localTestTable(dbName, "testTable1", true);
Partition part1 = localTestPartition(table, Lists.newArrayList("2015"));
watermarker.onTableProcessBegin(table, 0l);
watermarker.onPartitionProcessBegin(part1, 0l, now + 2015l);
Table table2 = localTestTable(dbName, "testTable2", true);
Partition part2 = localTestPartition(table2, Lists.newArrayList("2016"));
watermarker.onTableProcessBegin(table2, 0l);
watermarker.onPartitionProcessBegin(part2, 0l, now + 16l);
List<WorkUnit> workunits = Lists.newArrayList();
watermarker.onGetWorkunitsEnd(workunits);
Assert.assertEquals(watermarker.getPreviousHighWatermark(part1).getValue(), 0l);
Assert.assertEquals(watermarker.getPreviousHighWatermark(table).getValue(), 0l);
Assert.assertEquals(watermarker.getPreviousHighWatermark(part2).getValue(), 0l);
Assert.assertEquals(watermarker.getPreviousHighWatermark(table2).getValue(), 0l);
Assert.assertEquals(workunits.size(), 2);
Assert.assertEquals(workunits.get(0).getPropAsBoolean(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY), true);
Assert.assertEquals(workunits.get(1).getPropAsBoolean(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY), true);
Collections.sort(workunits, new Comparator<WorkUnit>() {
@Override
public int compare(WorkUnit o1, WorkUnit o2) {
return o1.getProp(ConfigurationKeys.DATASET_URN_KEY).compareTo(o2.getProp(ConfigurationKeys.DATASET_URN_KEY));
}
});
Assert.assertEquals(workunits.get(0).getProp(ConfigurationKeys.DATASET_URN_KEY), table.getCompleteName());
Assert.assertEquals(workunits.get(1).getProp(ConfigurationKeys.DATASET_URN_KEY), table2.getCompleteName());
Assert.assertEquals(workunits.get(0).getExpectedHighWatermark(MultiKeyValueLongWatermark.class).getWatermarks(), ImmutableMap.of(PartitionLevelWatermarker.partitionKey(part1), now + 2015l));
Assert.assertEquals(workunits.get(1).getExpectedHighWatermark(MultiKeyValueLongWatermark.class).getWatermarks(), ImmutableMap.of(PartitionLevelWatermarker.partitionKey(part2), now + 16l));
}
use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.
the class PartitionLevelWatermarkerTest method testMoreThanOneWatermarkWorkunits.
@Test(expectedExceptions = IllegalStateException.class)
public void testMoreThanOneWatermarkWorkunits() throws Exception {
WorkUnitState previousWus = new WorkUnitState();
previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_dataset_urn");
previousWus.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true);
previousWus.setActualHighWatermark(new MultiKeyValueLongWatermark(ImmutableMap.of("2015", 100l)));
WorkUnitState previousWus2 = new WorkUnitState();
previousWus2.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_dataset_urn");
previousWus2.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true);
previousWus2.setActualHighWatermark(new MultiKeyValueLongWatermark(ImmutableMap.of("2016", 101l)));
SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus, previousWus2));
// Expecting IllegalStateException
new PartitionLevelWatermarker(state);
}
use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.
the class PartitionLevelWatermarkerTest method testRecentlyModifiedPartitionWatermarksWithPreviousState.
// Previous state 3. New partitions 3. 2 from new state retained
@Test
public void testRecentlyModifiedPartitionWatermarksWithPreviousState() throws Exception {
String dbName = "testRecentlyModifiedPartitionWatermarksWithPreviousState";
LocalHiveMetastoreTestUtils.getInstance().dropDatabaseIfExists(dbName);
long time5DaysAgo = new DateTime().minusDays(5).getMillis();
WorkUnitState previousWus = new WorkUnitState();
previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, dbName + "@testTable2");
previousWus.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true);
previousWus.setActualHighWatermark(new MultiKeyValueLongWatermark(// Do not retain
ImmutableMap.of(// Do not retain
"2010", // Do not retain
time5DaysAgo - 100l, // Do not retain
"2011", // Do not retain
time5DaysAgo - 101l, // Do retain
"2012", // Do retain
time5DaysAgo + 102l)));
SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus));
state.setProp(HiveSource.HIVE_SOURCE_MAXIMUM_LOOKBACK_DAYS_KEY, 3);
PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state);
watermarker.setLeastWatermarkToPersistInState(time5DaysAgo);
Table table = localTestTable(dbName, "testTable2", true);
// Watermark not retained
Partition part2009 = localTestPartition(table, ImmutableList.of("2009"));
// Watermark retained
Partition part2013 = localTestPartition(table, ImmutableList.of("2013"));
Partition part2014 = localTestPartition(table, ImmutableList.of("2014"));
watermarker.onTableProcessBegin(table, 0l);
// Watermark not retained
watermarker.onPartitionProcessBegin(part2009, 0l, time5DaysAgo - 99l);
// Watermark retained
watermarker.onPartitionProcessBegin(part2013, 0l, time5DaysAgo + 103l);
watermarker.onPartitionProcessBegin(part2014, 0l, time5DaysAgo + 104l);
List<WorkUnit> workunits = Lists.newArrayList();
watermarker.onGetWorkunitsEnd(workunits);
Assert.assertEquals(workunits.size(), 1);
WorkUnit watermarkWu = workunits.get(0);
Map<String, Long> workunitWatermarks = watermarkWu.getExpectedHighWatermark(MultiKeyValueLongWatermark.class).getWatermarks();
Assert.assertEquals(workunitWatermarks.size(), 3, "expectedHighWatermarks size");
ImmutableMap<String, Long> expectedWatermarks = ImmutableMap.of("2014", time5DaysAgo + 104l, "2013", time5DaysAgo + 103l, "2012", time5DaysAgo + 102l);
Assert.assertEquals(workunitWatermarks, expectedWatermarks);
}
use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.
the class PartitionLevelWatermarkerTest method testPartitionBeginBegoreTableBegin.
@Test(expectedExceptions = IllegalStateException.class)
public void testPartitionBeginBegoreTableBegin() throws Exception {
SourceState state = new SourceState();
PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state);
Table table = mockTable("test_dataset_urn");
Partition partition = mockPartition(table, ImmutableList.of(""));
watermarker.onPartitionProcessBegin(partition, 0l, 0l);
}
use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.
the class PartitionLevelWatermarkerTest method testReadPreviousWatermarks.
@Test
public void testReadPreviousWatermarks() throws Exception {
WorkUnitState previousWus = new WorkUnitState();
previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_dataset_urn");
previousWus.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true);
previousWus.setActualHighWatermark(new MultiKeyValueLongWatermark(ImmutableMap.of("2015", 100l, "2016", 101l)));
SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus));
PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state);
Assert.assertEquals(watermarker.getPreviousWatermarks().size(), 1);
Assert.assertEquals(watermarker.getPreviousWatermarks().get("test_dataset_urn"), ImmutableMap.of("2015", 100l, "2016", 101l));
// Make sure all the previousWatermarks are added into current expectedHighWatermarks
Assert.assertEquals(watermarker.getPreviousWatermarks(), watermarker.getExpectedHighWatermarks());
}
Aggregations