use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.
the class PartitionLevelWatermarkerTest method testReadPreviousNullWatermarks.
@Test
public void testReadPreviousNullWatermarks() throws Exception {
WorkUnitState previousWus = new WorkUnitState();
previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_dataset_urn");
previousWus.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true);
SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus));
PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state);
Assert.assertEquals(watermarker.getPreviousWatermarks().size(), 0);
}
use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.
the class PartitionLevelWatermarkerTest method testRecentlyModifiedPartitionWatermarks.
// No previous state. 5 new modified partitions. Only 3 most recently modified retained in getExpectedHighWatermark
@Test
public void testRecentlyModifiedPartitionWatermarks() throws Exception {
String dbName = "testRecentlyModifiedPartitionWatermarks";
LocalHiveMetastoreTestUtils.getInstance().dropDatabaseIfExists(dbName);
SourceState state = new SourceState();
state.setProp(HiveSource.HIVE_SOURCE_MAXIMUM_LOOKBACK_DAYS_KEY, 3);
long time5DaysAgo = new DateTime().minusDays(5).getMillis();
PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state);
watermarker.setLeastWatermarkToPersistInState(time5DaysAgo);
Table table = localTestTable(dbName, "testTable2", true);
Partition part2010 = localTestPartition(table, ImmutableList.of("2010"));
Partition part2011 = localTestPartition(table, ImmutableList.of("2011"));
Partition part2012 = localTestPartition(table, ImmutableList.of("2012"));
Partition part2013 = localTestPartition(table, ImmutableList.of("2013"));
Partition part2014 = localTestPartition(table, ImmutableList.of("2014"));
watermarker.onTableProcessBegin(table, 0l);
watermarker.onPartitionProcessBegin(part2010, 0l, time5DaysAgo - 100l);
watermarker.onPartitionProcessBegin(part2011, 0l, time5DaysAgo - 101l);
watermarker.onPartitionProcessBegin(part2012, 0l, time5DaysAgo + 102l);
watermarker.onPartitionProcessBegin(part2013, 0l, time5DaysAgo + 103l);
watermarker.onPartitionProcessBegin(part2014, 0l, time5DaysAgo + 104l);
List<WorkUnit> workunits = Lists.newArrayList();
watermarker.onGetWorkunitsEnd(workunits);
Assert.assertEquals(workunits.size(), 1);
WorkUnit watermarkWu = workunits.get(0);
Map<String, Long> workunitWatermarks = watermarkWu.getExpectedHighWatermark(MultiKeyValueLongWatermark.class).getWatermarks();
Assert.assertEquals(workunitWatermarks.size(), 3, "expectedHighWatermarks size");
ImmutableMap<String, Long> expectedWatermarks = ImmutableMap.of("2014", time5DaysAgo + 104l, "2013", time5DaysAgo + 103l, "2012", time5DaysAgo + 102l);
Assert.assertEquals(workunitWatermarks, expectedWatermarks);
}
use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.
the class TableLevelWatermarkerTest method testPartitionWatermarks.
/**
* Make sure that all partitions get the same previous high watermark (table's watermark)
*/
@Test
public void testPartitionWatermarks() throws Exception {
WorkUnitState previousWus = new WorkUnitState();
previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_table");
previousWus.setActualHighWatermark(new LongWatermark(100l));
SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus));
TableLevelWatermarker watermarker = new TableLevelWatermarker(state);
Table mockTable = mockTable("test_table");
Assert.assertEquals(watermarker.getPreviousHighWatermark(mockTable), new LongWatermark(100l));
Assert.assertEquals(watermarker.getPreviousHighWatermark(mockPartition(mockTable, ImmutableList.of("2015"))), new LongWatermark(100l));
Assert.assertEquals(watermarker.getPreviousHighWatermark(mockPartition(mockTable, ImmutableList.of("2016"))), new LongWatermark(100l));
}
use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.
the class CopySourcePrioritizationTest method testNoPrioritization.
@Test
public void testNoPrioritization() throws Exception {
SourceState state = new SourceState();
state.setProp(ConfigurationKeys.SOURCE_FILEBASED_FS_URI, "file:///");
state.setProp(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, "file:///");
state.setProp(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/target/dir");
state.setProp(DatasetUtils.DATASET_PROFILE_CLASS_KEY, MyFinder.class.getName());
CopySource source = new CopySource();
List<WorkUnit> workunits = source.getWorkunits(state);
workunits = JobLauncherUtils.flattenWorkUnits(workunits);
Assert.assertEquals(workunits.size(), MyFinder.DATASETS * MyDataset.FILE_SETS * MyFileSet.FILES);
}
use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.
the class CopySourcePrioritizationTest method testUnprioritizedFileLimit.
@Test
public void testUnprioritizedFileLimit() throws Exception {
SourceState state = new SourceState();
state.setProp(ConfigurationKeys.SOURCE_FILEBASED_FS_URI, "file:///");
state.setProp(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, "file:///");
state.setProp(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/target/dir");
state.setProp(DatasetUtils.DATASET_PROFILE_CLASS_KEY, MyFinder.class.getName());
// Disable parallel listing to make work unit selection deterministic
state.setProp(CopySource.MAX_CONCURRENT_LISTING_SERVICES, 1);
state.setProp(CopyConfiguration.MAX_COPY_PREFIX + "." + CopyResourcePool.ENTITIES_KEY, 10);
state.setProp(CopyConfiguration.MAX_COPY_PREFIX + "." + CopyResourcePool.TOLERANCE_KEY, 1);
CopySource source = new CopySource();
List<WorkUnit> workunits = source.getWorkunits(state);
workunits = JobLauncherUtils.flattenWorkUnits(workunits);
// Check limited to 10 entities
Assert.assertEquals(workunits.size(), 10);
List<String> paths = extractPaths(workunits);
Assert.assertTrue(paths.contains("d0.fs0.f1"));
Assert.assertTrue(paths.contains("d0.fs0.f2"));
Assert.assertTrue(paths.contains("d0.fs1.f1"));
Assert.assertTrue(paths.contains("d0.fs1.f2"));
Assert.assertTrue(paths.contains("d0.fs2.f1"));
Assert.assertTrue(paths.contains("d0.fs2.f2"));
Assert.assertTrue(paths.contains("d0.fs3.f1"));
Assert.assertTrue(paths.contains("d0.fs3.f2"));
Assert.assertTrue(paths.contains("d1.fs0.f1"));
Assert.assertTrue(paths.contains("d1.fs0.f2"));
}
Aggregations