Search in sources :

Example 66 with State

use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.

the class PartitionLevelWatermarkerTest method testNoPreviousWatermarkWorkunits.

@Test
public void testNoPreviousWatermarkWorkunits() throws Exception {
    // Create one previous workunit with IS_WATERMARK_WORKUNIT_KEY set to true
    WorkUnitState previousWus = new WorkUnitState();
    previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_dataset_urn");
    previousWus.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true);
    previousWus.setActualHighWatermark(new MultiKeyValueLongWatermark(ImmutableMap.of("2015", 100l)));
    // Create one previous workunit with IS_WATERMARK_WORKUNIT_KEY not set (false)
    WorkUnitState previousWus2 = new WorkUnitState();
    previousWus2.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_dataset_urn2");
    previousWus2.setActualHighWatermark(new LongWatermark(101l));
    SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus, previousWus2));
    PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state);
    Assert.assertEquals(watermarker.getPreviousWatermarks().size(), 1);
    Assert.assertEquals(watermarker.getPreviousWatermarks().get("test_dataset_urn"), ImmutableMap.of("2015", 100l));
}
Also used : SourceState(org.apache.gobblin.configuration.SourceState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) State(org.apache.gobblin.configuration.State) SourceState(org.apache.gobblin.configuration.SourceState) LongWatermark(org.apache.gobblin.source.extractor.extract.LongWatermark) Test(org.testng.annotations.Test)

Example 67 with State

use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.

the class PartitionLevelWatermarkerTest method testStateStoreReadWrite.

@Test
public void testStateStoreReadWrite() throws Exception {
    String dbName = "testStateStoreReadWrite";
    LocalHiveMetastoreTestUtils.getInstance().dropDatabaseIfExists(dbName);
    PartitionLevelWatermarker watermarker0 = new PartitionLevelWatermarker(new SourceState());
    Table mockTable = localTestTable(dbName, "table1", true);
    watermarker0.onTableProcessBegin(mockTable, 0l);
    long now = new DateTime().getMillis();
    watermarker0.onPartitionProcessBegin(localTestPartition(mockTable, ImmutableList.of("2016")), 0, now);
    List<WorkUnit> workunits = Lists.newArrayList();
    watermarker0.onGetWorkunitsEnd(workunits);
    @SuppressWarnings("deprecation") WorkUnitState previousWus = new WorkUnitState(workunits.get(0));
    watermarker0.setActualHighWatermark(previousWus);
    SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus));
    PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state);
    Assert.assertEquals(watermarker.getPreviousWatermarks().size(), 1);
    Assert.assertEquals(watermarker.getPreviousWatermarks().get(dbName + "@table1"), ImmutableMap.of("2016", now));
}
Also used : SourceState(org.apache.gobblin.configuration.SourceState) Table(org.apache.hadoop.hive.ql.metadata.Table) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) State(org.apache.gobblin.configuration.State) SourceState(org.apache.gobblin.configuration.SourceState) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) DateTime(org.joda.time.DateTime) Test(org.testng.annotations.Test)

Example 68 with State

use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.

the class PartitionLevelWatermarkerTest method testDroppedPartitions.

@Test
public void testDroppedPartitions() throws Exception {
    WorkUnitState previousWus = new WorkUnitState();
    previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "db@test_dataset_urn");
    previousWus.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true);
    previousWus.setActualHighWatermark(new MultiKeyValueLongWatermark(ImmutableMap.of("2015-01", 100l, "2015-02", 101l)));
    SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus));
    PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state);
    Table table = mockTable("test_dataset_urn");
    Mockito.when(table.getPartitionKeys()).thenReturn(ImmutableList.of(new FieldSchema("year", "string", "")));
    Partition partition2015 = mockPartition(table, ImmutableList.of("2015"));
    // partition 2015 replaces 2015-01 and 2015-02
    Mockito.when(partition2015.getParameters()).thenReturn(ImmutableMap.of(AbstractAvroToOrcConverter.REPLACED_PARTITIONS_HIVE_METASTORE_KEY, "2015-01|2015-02"));
    watermarker.onPartitionProcessBegin(partition2015, 0l, 0l);
    Assert.assertEquals(watermarker.getExpectedHighWatermarks().get("db@test_dataset_urn"), ImmutableMap.of("2015", 0l));
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) SourceState(org.apache.gobblin.configuration.SourceState) Table(org.apache.hadoop.hive.ql.metadata.Table) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) State(org.apache.gobblin.configuration.State) SourceState(org.apache.gobblin.configuration.SourceState) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) Test(org.testng.annotations.Test)

Example 69 with State

use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.

the class PartitionLevelWatermarkerTest method testReadPreviousNullWatermarks.

@Test
public void testReadPreviousNullWatermarks() throws Exception {
    WorkUnitState previousWus = new WorkUnitState();
    previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_dataset_urn");
    previousWus.setProp(PartitionLevelWatermarker.IS_WATERMARK_WORKUNIT_KEY, true);
    SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus));
    PartitionLevelWatermarker watermarker = new PartitionLevelWatermarker(state);
    Assert.assertEquals(watermarker.getPreviousWatermarks().size(), 0);
}
Also used : SourceState(org.apache.gobblin.configuration.SourceState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) State(org.apache.gobblin.configuration.State) SourceState(org.apache.gobblin.configuration.SourceState) Test(org.testng.annotations.Test)

Example 70 with State

use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.

the class TableLevelWatermarkerTest method testPartitionWatermarks.

/**
 * Make sure that all partitions get the same previous high watermark (table's watermark)
 */
@Test
public void testPartitionWatermarks() throws Exception {
    WorkUnitState previousWus = new WorkUnitState();
    previousWus.setProp(ConfigurationKeys.DATASET_URN_KEY, "test_table");
    previousWus.setActualHighWatermark(new LongWatermark(100l));
    SourceState state = new SourceState(new State(), Lists.newArrayList(previousWus));
    TableLevelWatermarker watermarker = new TableLevelWatermarker(state);
    Table mockTable = mockTable("test_table");
    Assert.assertEquals(watermarker.getPreviousHighWatermark(mockTable), new LongWatermark(100l));
    Assert.assertEquals(watermarker.getPreviousHighWatermark(mockPartition(mockTable, ImmutableList.of("2015"))), new LongWatermark(100l));
    Assert.assertEquals(watermarker.getPreviousHighWatermark(mockPartition(mockTable, ImmutableList.of("2016"))), new LongWatermark(100l));
}
Also used : SourceState(org.apache.gobblin.configuration.SourceState) Table(org.apache.hadoop.hive.ql.metadata.Table) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) State(org.apache.gobblin.configuration.State) SourceState(org.apache.gobblin.configuration.SourceState) LongWatermark(org.apache.gobblin.source.extractor.extract.LongWatermark) Test(org.testng.annotations.Test)

Aggregations

State (org.apache.gobblin.configuration.State)195 Test (org.testng.annotations.Test)103 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)74 SourceState (org.apache.gobblin.configuration.SourceState)38 Path (org.apache.hadoop.fs.Path)30 File (java.io.File)20 IOException (java.io.IOException)16 Map (java.util.Map)14 WorkingState (org.apache.gobblin.configuration.WorkUnitState.WorkingState)14 WorkUnit (org.apache.gobblin.source.workunit.WorkUnit)14 TaskState (org.apache.hadoop.mapreduce.v2.api.records.TaskState)13 Properties (java.util.Properties)12 FinalState (org.apache.gobblin.util.FinalState)12 Configuration (org.apache.hadoop.conf.Configuration)12 TaskLevelPolicyCheckResults (org.apache.gobblin.qualitychecker.task.TaskLevelPolicyCheckResults)9 Config (com.typesafe.config.Config)8 ArrayList (java.util.ArrayList)8 GenericRecord (org.apache.avro.generic.GenericRecord)8 LongWatermark (org.apache.gobblin.source.extractor.extract.LongWatermark)7 FileInputStream (java.io.FileInputStream)6