Search in sources :

Example 66 with SourceState

use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.

the class PartitionLevelWatermarkerTest method testExpectedHighWatermarkNoPreviousState.

@Test
public void testExpectedHighWatermarkNoPreviousState() throws Exception {
    String dbName = "testExpectedHighWatermarkNoPreviousState";
    LocalHiveMetastoreTestUtils.getInstance().dropDatabaseIfExists(dbName);
    long now = new DateTime().getMillis();
    SourceState state = new SourceState();
    PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state);
    Table table = localTestTable(dbName, "testTable1", true);
    Partition part1 = localTestPartition(table, Lists.newArrayList("2015"));
    watermarker.onTableProcessBegin(table, 0l);
    watermarker.onPartitionProcessBegin(part1, 0l, now + 2015l);
    Table table2 = localTestTable(dbName, "testTable2", true);
    Partition part2 = localTestPartition(table2, Lists.newArrayList("2016"));
    watermarker.onTableProcessBegin(table2, 0l);
    watermarker.onPartitionProcessBegin(part2, 0l, now + 16l);
    List<WorkUnit> workunits = Lists.newArrayList();
    watermarker.onGetWorkunitsEnd(workunits);
    Assert.assertEquals(watermarker.getPreviousHighWatermark(part1).getValue(), 0l);
    Assert.assertEquals(watermarker.getPreviousHighWatermark(table).getValue(), 0l);
    Assert.assertEquals(watermarker.getPreviousHighWatermark(part2).getValue(), 0l);
    Assert.assertEquals(watermarker.getPreviousHighWatermark(table2).getValue(), 0l);
    Assert.assertEquals(workunits.size(), 2);
    Assert.assertEquals(workunits.get(0).getPropAsBoolean(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY), true);
    Assert.assertEquals(workunits.get(1).getPropAsBoolean(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY), true);
    Collections.sort(workunits, new Comparator<WorkUnit>() {

        @Override
        public int compare(WorkUnit o1, WorkUnit o2) {
            return o1.getProp(ConfigurationKeys.DATASET_URN_KEY).compareTo(o2.getProp(ConfigurationKeys.DATASET_URN_KEY));
        }
    });
    Assert.assertEquals(workunits.get(0).getProp(ConfigurationKeys.DATASET_URN_KEY), table.getCompleteName());
    Assert.assertEquals(workunits.get(1).getProp(ConfigurationKeys.DATASET_URN_KEY), table2.getCompleteName());
    Assert.assertEquals(workunits.get(0).getExpectedHighWatermark(MultiKeyValueLongWatermark.class).getWatermarks(), ImmutableMap.of(PartitionLevelWatermarker.partitionKey(part1), now + 2015l));
    Assert.assertEquals(workunits.get(1).getExpectedHighWatermark(MultiKeyValueLongWatermark.class).getWatermarks(), ImmutableMap.of(PartitionLevelWatermarker.partitionKey(part2), now + 16l));
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) SourceState(org.apache.gobblin.configuration.SourceState) Table(org.apache.hadoop.hive.ql.metadata.Table) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) DateTime(org.joda.time.DateTime) Test(org.testng.annotations.Test)

Example 67 with SourceState

use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.

the class PartitionLevelWatermarkerTest method testMoreThanOneWatermarkWorkunits.

@Test(expectedExceptions = IllegalStateException.class)
public void testMoreThanOneWatermarkWorkunits() throws Exception {
    WorkUnitState previousWus = new WorkUnitState();
    previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_dataset_urn");
    previousWus.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true);
    previousWus.setActualHighWatermark(new MultiKeyValueLongWatermark(ImmutableMap.of("2015", 100l)));
    WorkUnitState previousWus2 = new WorkUnitState();
    previousWus2.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_dataset_urn");
    previousWus2.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true);
    previousWus2.setActualHighWatermark(new MultiKeyValueLongWatermark(ImmutableMap.of("2016", 101l)));
    SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus, previousWus2));
    // Expecting IllegalStateException
    new PartitionLevelWatermarker(state);
}
Also used : SourceState(org.apache.gobblin.configuration.SourceState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) State(org.apache.gobblin.configuration.State) SourceState(org.apache.gobblin.configuration.SourceState) Test(org.testng.annotations.Test)

Example 68 with SourceState

use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.

the class PartitionLevelWatermarkerTest method testRecentlyModifiedPartitionWatermarksWithPreviousState.

// Previous state 3. New partitions 3. 2 from new state retained
@Test
public void testRecentlyModifiedPartitionWatermarksWithPreviousState() throws Exception {
    String dbName = "testRecentlyModifiedPartitionWatermarksWithPreviousState";
    LocalHiveMetastoreTestUtils.getInstance().dropDatabaseIfExists(dbName);
    long time5DaysAgo = new DateTime().minusDays(5).getMillis();
    WorkUnitState previousWus = new WorkUnitState();
    previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, dbName + "@testTable2");
    previousWus.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true);
    previousWus.setActualHighWatermark(new MultiKeyValueLongWatermark(// Do not retain
    ImmutableMap.of(// Do not retain
    "2010", // Do not retain
    time5DaysAgo - 100l, // Do not retain
    "2011", // Do not retain
    time5DaysAgo - 101l, // Do retain
    "2012", // Do retain
    time5DaysAgo + 102l)));
    SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus));
    state.setProp(HiveSource.HIVE_SOURCE_MAXIMUM_LOOKBACK_DAYS_KEY, 3);
    PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state);
    watermarker.setLeastWatermarkToPersistInState(time5DaysAgo);
    Table table = localTestTable(dbName, "testTable2", true);
    // Watermark not retained
    Partition part2009 = localTestPartition(table, ImmutableList.of("2009"));
    // Watermark retained
    Partition part2013 = localTestPartition(table, ImmutableList.of("2013"));
    Partition part2014 = localTestPartition(table, ImmutableList.of("2014"));
    watermarker.onTableProcessBegin(table, 0l);
    // Watermark not retained
    watermarker.onPartitionProcessBegin(part2009, 0l, time5DaysAgo - 99l);
    // Watermark retained
    watermarker.onPartitionProcessBegin(part2013, 0l, time5DaysAgo + 103l);
    watermarker.onPartitionProcessBegin(part2014, 0l, time5DaysAgo + 104l);
    List<WorkUnit> workunits = Lists.newArrayList();
    watermarker.onGetWorkunitsEnd(workunits);
    Assert.assertEquals(workunits.size(), 1);
    WorkUnit watermarkWu = workunits.get(0);
    Map<String, Long> workunitWatermarks = watermarkWu.getExpectedHighWatermark(MultiKeyValueLongWatermark.class).getWatermarks();
    Assert.assertEquals(workunitWatermarks.size(), 3, "expectedHighWatermarks size");
    ImmutableMap<String, Long> expectedWatermarks = ImmutableMap.of("2014", time5DaysAgo + 104l, "2013", time5DaysAgo + 103l, "2012", time5DaysAgo + 102l);
    Assert.assertEquals(workunitWatermarks, expectedWatermarks);
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) SourceState(org.apache.gobblin.configuration.SourceState) Table(org.apache.hadoop.hive.ql.metadata.Table) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) DateTime(org.joda.time.DateTime) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) State(org.apache.gobblin.configuration.State) SourceState(org.apache.gobblin.configuration.SourceState) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) Test(org.testng.annotations.Test)

Example 69 with SourceState

use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.

the class PartitionLevelWatermarkerTest method testPartitionBeginBegoreTableBegin.

@Test(expectedExceptions = IllegalStateException.class)
public void testPartitionBeginBegoreTableBegin() throws Exception {
    SourceState state = new SourceState();
    PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state);
    Table table = mockTable("test_dataset_urn");
    Partition partition = mockPartition(table, ImmutableList.of(""));
    watermarker.onPartitionProcessBegin(partition, 0l, 0l);
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) SourceState(org.apache.gobblin.configuration.SourceState) Table(org.apache.hadoop.hive.ql.metadata.Table) Test(org.testng.annotations.Test)

Example 70 with SourceState

use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.

the class PartitionLevelWatermarkerTest method testReadPreviousWatermarks.

@Test
public void testReadPreviousWatermarks() throws Exception {
    WorkUnitState previousWus = new WorkUnitState();
    previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_dataset_urn");
    previousWus.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true);
    previousWus.setActualHighWatermark(new MultiKeyValueLongWatermark(ImmutableMap.of("2015", 100l, "2016", 101l)));
    SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus));
    PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state);
    Assert.assertEquals(watermarker.getPreviousWatermarks().size(), 1);
    Assert.assertEquals(watermarker.getPreviousWatermarks().get("test_dataset_urn"), ImmutableMap.of("2015", 100l, "2016", 101l));
    // Make sure all the previousWatermarks are added into current expectedHighWatermarks
    Assert.assertEquals(watermarker.getPreviousWatermarks(), watermarker.getExpectedHighWatermarks());
}
Also used : SourceState(org.apache.gobblin.configuration.SourceState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) State(org.apache.gobblin.configuration.State) SourceState(org.apache.gobblin.configuration.SourceState) Test(org.testng.annotations.Test)

Aggregations

SourceState (org.apache.gobblin.configuration.SourceState)90 Test (org.testng.annotations.Test)76 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)44 WorkUnit (org.apache.gobblin.source.workunit.WorkUnit)38 State (org.apache.gobblin.configuration.State)30 WorkingState (org.apache.gobblin.configuration.WorkUnitState.WorkingState)11 Partition (org.apache.hadoop.hive.ql.metadata.Partition)8 Table (org.apache.hadoop.hive.ql.metadata.Table)8 IterableDatasetFinder (org.apache.gobblin.dataset.IterableDatasetFinder)7 LongWatermark (org.apache.gobblin.source.extractor.extract.LongWatermark)7 Extract (org.apache.gobblin.source.workunit.Extract)7 DateTime (org.joda.time.DateTime)7 Dataset (org.apache.gobblin.dataset.Dataset)6 PartitionableDataset (org.apache.gobblin.dataset.PartitionableDataset)6 MultiWorkUnit (org.apache.gobblin.source.workunit.MultiWorkUnit)6 WorkUnitStream (org.apache.gobblin.source.workunit.WorkUnitStream)6 IOException (java.io.IOException)5 Path (org.apache.hadoop.fs.Path)5 Gson (com.google.gson.Gson)4 JsonObject (com.google.gson.JsonObject)4