Search in sources :

Example 71 with SourceState

use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.

the class PartitionLevelWatermarkerTest method testReadPreviousWatermarksMultipleTables.

@Test
public void testReadPreviousWatermarksMultipleTables() throws Exception {
    WorkUnitState previousWus = new WorkUnitState();
    previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_dataset_urn");
    previousWus.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true);
    previousWus.setActualHighWatermark(new MultiKeyValueLongWatermark(ImmutableMap.of("2015", 100l, "2016", 101l)));
    WorkUnitState previousWus2 = new WorkUnitState();
    previousWus2.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_dataset_urn2");
    previousWus2.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true);
    previousWus2.setActualHighWatermark(new MultiKeyValueLongWatermark(ImmutableMap.of("01", 1l, "02", 2l)));
    SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus, previousWus2));
    PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state);
    Assert.assertEquals(watermarker.getPreviousWatermarks().size(), 2);
    Assert.assertEquals(watermarker.getPreviousWatermarks().get("test_dataset_urn"), ImmutableMap.of("2015", 100l, "2016", 101l));
    Assert.assertEquals(watermarker.getPreviousWatermarks().get("test_dataset_urn2"), ImmutableMap.of("01", 1l, "02", 2l));
    // Make sure all the previousWatermarks are added into current expectedHighWatermarks
    Assert.assertEquals(watermarker.getPreviousWatermarks(), watermarker.getExpectedHighWatermarks());
}
Also used : SourceState(org.apache.gobblin.configuration.SourceState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) State(org.apache.gobblin.configuration.State) SourceState(org.apache.gobblin.configuration.SourceState) Test(org.testng.annotations.Test)

Example 72 with SourceState

use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.

the class PartitionLevelWatermarkerTest method testGetPreviousHighWatermarkForPartition.

@Test
public void testGetPreviousHighWatermarkForPartition() throws Exception {
    WorkUnitState previousWus = new WorkUnitState();
    previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "db@test_dataset_urn");
    previousWus.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true);
    previousWus.setActualHighWatermark(new MultiKeyValueLongWatermark(ImmutableMap.of("2015", 100l, "2016", 101l)));
    SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus));
    PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state);
    Table table = mockTable("test_dataset_urn");
    Partition partition2015 = mockPartition(table, ImmutableList.of("2015"));
    Partition partition2016 = mockPartition(table, ImmutableList.of("2016"));
    Assert.assertEquals(watermarker.getPreviousHighWatermark(partition2015), new LongWatermark(100l));
    Assert.assertEquals(watermarker.getPreviousHighWatermark(partition2016), new LongWatermark(101l));
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) SourceState(org.apache.gobblin.configuration.SourceState) Table(org.apache.hadoop.hive.ql.metadata.Table) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) State(org.apache.gobblin.configuration.State) SourceState(org.apache.gobblin.configuration.SourceState) LongWatermark(org.apache.gobblin.source.extractor.extract.LongWatermark) Test(org.testng.annotations.Test)

Example 73 with SourceState

use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.

the class TableLevelWatermarkerTest method testPreviousState.

@Test
public void testPreviousState() throws Exception {
    WorkUnitState previousWus = new WorkUnitState();
    previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_table");
    previousWus.setActualHighWatermark(new LongWatermark(100l));
    // Watermark will be lowest of 100l and 101l
    WorkUnitState previousWus1 = new WorkUnitState();
    previousWus1.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_table");
    previousWus1.setActualHighWatermark(new LongWatermark(101l));
    SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus));
    TableLevelWatermarker watermarker = new TableLevelWatermarker(state);
    Assert.assertEquals(watermarker.getPreviousHighWatermark(mockTable("test_table")), new LongWatermark(100l));
}
Also used : SourceState(org.apache.gobblin.configuration.SourceState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) State(org.apache.gobblin.configuration.State) SourceState(org.apache.gobblin.configuration.SourceState) LongWatermark(org.apache.gobblin.source.extractor.extract.LongWatermark) Test(org.testng.annotations.Test)

Example 74 with SourceState

use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.

the class TableLevelWatermarkerTest method testPreviousStateWithPartitionWatermark.

@Test
public void testPreviousStateWithPartitionWatermark() throws Exception {
    WorkUnitState previousWus = new WorkUnitState();
    previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_table");
    previousWus.setActualHighWatermark(new LongWatermark(100l));
    // Watermark workunits created by PartitionLevelWatermarker need to be ignored.
    WorkUnitState previousWus1 = new WorkUnitState();
    previousWus1.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_table");
    previousWus1.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true);
    previousWus1.setActualHighWatermark(new MultiKeyValueLongWatermark(ImmutableMap.of("part1", 200l)));
    SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus));
    TableLevelWatermarker watermarker = new TableLevelWatermarker(state);
    Assert.assertEquals(watermarker.getPreviousHighWatermark(mockTable("test_table")), new LongWatermark(100l));
}
Also used : SourceState(org.apache.gobblin.configuration.SourceState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) State(org.apache.gobblin.configuration.State) SourceState(org.apache.gobblin.configuration.SourceState) LongWatermark(org.apache.gobblin.source.extractor.extract.LongWatermark) Test(org.testng.annotations.Test)

Example 75 with SourceState

use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.

the class CopySourcePrioritizationTest method testPrioritization.

// This test uses a prioritizer that preferentially copies the lower file sets of each dataset
@Test
public void testPrioritization() throws Exception {
    SourceState state = new SourceState();
    state.setProp(ConfigurationKeys.SOURCE_FILEBASED_FS_URI, "file:///");
    state.setProp(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, "file:///");
    state.setProp(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/target/dir");
    state.setProp(DatasetUtils.DATASET_PROFILE_CLASS_KEY, MyFinder.class.getName());
    state.setProp(CopyConfiguration.PRIORITIZER_ALIAS_KEY, MyPrioritizer.class.getName());
    state.setProp(CopyConfiguration.MAX_COPY_PREFIX + "." + CopyResourcePool.ENTITIES_KEY, 8);
    state.setProp(CopyConfiguration.MAX_COPY_PREFIX + "." + CopyResourcePool.TOLERANCE_KEY, 1);
    CopySource source = new CopySource();
    List<WorkUnit> workunits = source.getWorkunits(state);
    workunits = JobLauncherUtils.flattenWorkUnits(workunits);
    Assert.assertEquals(workunits.size(), 8);
    List<String> paths = extractPaths(workunits);
    Assert.assertTrue(paths.contains("d0.fs0.f1"));
    Assert.assertTrue(paths.contains("d0.fs0.f2"));
    Assert.assertTrue(paths.contains("d0.fs1.f1"));
    Assert.assertTrue(paths.contains("d0.fs1.f2"));
    Assert.assertTrue(paths.contains("d1.fs0.f1"));
    Assert.assertTrue(paths.contains("d1.fs0.f2"));
    Assert.assertTrue(paths.contains("d1.fs1.f1"));
    Assert.assertTrue(paths.contains("d1.fs1.f2"));
}
Also used : SourceState(org.apache.gobblin.configuration.SourceState) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) Test(org.testng.annotations.Test)

Aggregations

SourceState (org.apache.gobblin.configuration.SourceState)90 Test (org.testng.annotations.Test)76 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)44 WorkUnit (org.apache.gobblin.source.workunit.WorkUnit)38 State (org.apache.gobblin.configuration.State)30 WorkingState (org.apache.gobblin.configuration.WorkUnitState.WorkingState)11 Partition (org.apache.hadoop.hive.ql.metadata.Partition)8 Table (org.apache.hadoop.hive.ql.metadata.Table)8 IterableDatasetFinder (org.apache.gobblin.dataset.IterableDatasetFinder)7 LongWatermark (org.apache.gobblin.source.extractor.extract.LongWatermark)7 Extract (org.apache.gobblin.source.workunit.Extract)7 DateTime (org.joda.time.DateTime)7 Dataset (org.apache.gobblin.dataset.Dataset)6 PartitionableDataset (org.apache.gobblin.dataset.PartitionableDataset)6 MultiWorkUnit (org.apache.gobblin.source.workunit.MultiWorkUnit)6 WorkUnitStream (org.apache.gobblin.source.workunit.WorkUnitStream)6 IOException (java.io.IOException)5 Path (org.apache.hadoop.fs.Path)5 Gson (com.google.gson.Gson)4 JsonObject (com.google.gson.JsonObject)4