Search in sources :

Example 1 with ExtractType

use of org.apache.gobblin.source.extractor.extract.ExtractType in project incubator-gobblin by apache.

the class PartitionerTest method testGetHighWatermarkOnAppendExtract.

/**
 * Test getHighWatermark. Extract type: Append.
 */
@Test
public void testGetHighWatermarkOnAppendExtract() {
    String endValue = "20140101000000";
    SourceState sourceState = new SourceState();
    sourceState.setProp(ConfigurationKeys.EXTRACT_IS_FULL_KEY, true);
    sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_END_VALUE, endValue);
    ExtractType extractType = ExtractType.APPEND_DAILY;
    TestPartitioner partitioner = new TestPartitioner(sourceState);
    Assert.assertEquals(partitioner.getHighWatermark(extractType, null), Long.parseLong(endValue), "High watermark should be " + endValue);
    Assert.assertEquals(partitioner.getUserSpecifiedHighWatermark(), true, "Should mark as user specified high watermark");
    partitioner.reset();
    // Test non-full-dump cases below
    sourceState.removeProp(ConfigurationKeys.EXTRACT_IS_FULL_KEY);
    // No limit type
    Assert.assertEquals(partitioner.getHighWatermark(ExtractType.APPEND_BATCH, null), ConfigurationKeys.DEFAULT_WATERMARK_VALUE, "High watermark should be " + ConfigurationKeys.DEFAULT_WATERMARK_VALUE);
    Assert.assertEquals(partitioner.getUserSpecifiedHighWatermark(), false, "Should not mark as user specified high watermark");
    // No limit delta
    long expected = Long.parseLong(TestPartitioner.currentTimeString);
    Assert.assertEquals(partitioner.getHighWatermark(extractType, null), expected, "High watermark should be " + expected);
    Assert.assertEquals(partitioner.getUserSpecifiedHighWatermark(), false, "Should not mark as user specified high watermark");
    // CURRENTDATE - 1
    String maxLimit = "CURRENTDATE-1";
    sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_APPEND_MAX_WATERMARK_LIMIT, maxLimit);
    Assert.assertEquals(partitioner.getHighWatermark(extractType, null), 20161231235959L, "High watermark should be 20161231235959");
    Assert.assertEquals(partitioner.getUserSpecifiedHighWatermark(), true, "Should not mark as user specified high watermark");
    partitioner.reset();
    // CURRENTHOUR - 1
    maxLimit = "CURRENTHOUR-1";
    sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_APPEND_MAX_WATERMARK_LIMIT, maxLimit);
    Assert.assertEquals(partitioner.getHighWatermark(extractType, null), 20161231235959L, "High watermark should be 20161231235959");
    Assert.assertEquals(partitioner.getUserSpecifiedHighWatermark(), true, "Should not mark as user specified high watermark");
    partitioner.reset();
    // CURRENTMINUTE - 1
    maxLimit = "CURRENTMINUTE-1";
    sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_APPEND_MAX_WATERMARK_LIMIT, maxLimit);
    Assert.assertEquals(partitioner.getHighWatermark(extractType, null), 20161231235959L, "High watermark should be 20161231235959");
    Assert.assertEquals(partitioner.getUserSpecifiedHighWatermark(), true, "Should not mark as user specified high watermark");
    partitioner.reset();
    // CURRENTSECOND - 1
    maxLimit = "CURRENTSECOND-1";
    sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_APPEND_MAX_WATERMARK_LIMIT, maxLimit);
    Assert.assertEquals(partitioner.getHighWatermark(extractType, null), 20161231235959L, "High watermark should be 20161231235959");
    Assert.assertEquals(partitioner.getUserSpecifiedHighWatermark(), true, "Should not mark as user specified high watermark");
}
Also used : SourceState(org.apache.gobblin.configuration.SourceState) ExtractType(org.apache.gobblin.source.extractor.extract.ExtractType) Test(org.testng.annotations.Test)

Example 2 with ExtractType

use of org.apache.gobblin.source.extractor.extract.ExtractType in project incubator-gobblin by apache.

the class PartitionerTest method testGetHighWatermarkOnSnapshotExtract.

/**
 * Test getHighWatermark. Extract type: Snapshot.
 */
@Test
public void testGetHighWatermarkOnSnapshotExtract() {
    String endValue = "20140101000000";
    SourceState sourceState = new SourceState();
    // It won't use SOURCE_QUERYBASED_END_VALUE when extract is full
    sourceState.setProp(ConfigurationKeys.EXTRACT_IS_FULL_KEY, true);
    sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_END_VALUE, endValue);
    ExtractType extractType = ExtractType.SNAPSHOT;
    TestPartitioner partitioner = new TestPartitioner(sourceState);
    Assert.assertEquals(partitioner.getHighWatermark(extractType, WatermarkType.SIMPLE), ConfigurationKeys.DEFAULT_WATERMARK_VALUE, "High watermark should be " + ConfigurationKeys.DEFAULT_WATERMARK_VALUE);
    Assert.assertEquals(partitioner.getUserSpecifiedHighWatermark(), false, "Should not mark as user specified high watermark");
    long expected = Long.parseLong(TestPartitioner.currentTimeString);
    Assert.assertEquals(partitioner.getHighWatermark(extractType, WatermarkType.TIMESTAMP), expected, "High watermark should be " + expected);
    Assert.assertEquals(partitioner.getUserSpecifiedHighWatermark(), false, "Should not mark as user specified high watermark");
}
Also used : SourceState(org.apache.gobblin.configuration.SourceState) ExtractType(org.apache.gobblin.source.extractor.extract.ExtractType) Test(org.testng.annotations.Test)

Example 3 with ExtractType

use of org.apache.gobblin.source.extractor.extract.ExtractType in project incubator-gobblin by apache.

the class PartitionerTest method testGetLowWatermarkOnAppendExtract.

/**
 * Test getLowWatermark. Extract type: Append.
 */
@Test
public void testGetLowWatermarkOnAppendExtract() {
    SourceState sourceState = new SourceState();
    String startValue = "20140101000000";
    sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_START_VALUE, startValue);
    TestPartitioner partitioner = new TestPartitioner(sourceState);
    ExtractType extractType = ExtractType.APPEND_DAILY;
    int delta = 1;
    // No previous watermark
    Assert.assertEquals(partitioner.getLowWatermark(extractType, null, ConfigurationKeys.DEFAULT_WATERMARK_VALUE, delta), Long.parseLong(startValue), "Low watermark should be " + startValue);
    // With previous watermark
    long previousWatermark = 20140101000050L;
    long expected = previousWatermark + delta;
    Assert.assertEquals(partitioner.getLowWatermark(extractType, WatermarkType.SIMPLE, previousWatermark, delta), expected, "Low watermark should be " + expected);
    Assert.assertEquals(partitioner.getLowWatermark(extractType, WatermarkType.TIMESTAMP, previousWatermark, delta), expected, "Low watermark should be " + expected);
    // The result has nothing to do with SOURCE_QUERYBASED_LOW_WATERMARK_BACKUP_SECS
    int backupSecs = 10;
    sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_LOW_WATERMARK_BACKUP_SECS, backupSecs);
    Assert.assertEquals(partitioner.getLowWatermark(extractType, WatermarkType.SIMPLE, previousWatermark, delta), expected, "Low watermark should be " + expected);
    Assert.assertEquals(partitioner.getLowWatermark(extractType, WatermarkType.TIMESTAMP, previousWatermark, delta), expected, "Low watermark should be " + expected);
}
Also used : SourceState(org.apache.gobblin.configuration.SourceState) ExtractType(org.apache.gobblin.source.extractor.extract.ExtractType) Test(org.testng.annotations.Test)

Example 4 with ExtractType

use of org.apache.gobblin.source.extractor.extract.ExtractType in project incubator-gobblin by apache.

the class PartitionerTest method testGetLowWatermarkOnSnapshotExtract.

/**
 * Test getLowWatermark. Extract type: Snapshot.
 */
@Test
public void testGetLowWatermarkOnSnapshotExtract() {
    SourceState sourceState = new SourceState();
    String startValue = "20140101000000";
    sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_START_VALUE, startValue);
    TestPartitioner partitioner = new TestPartitioner(sourceState);
    ExtractType extractType = ExtractType.SNAPSHOT;
    int delta = 1;
    // No previous watermark
    Assert.assertEquals(partitioner.getLowWatermark(extractType, null, ConfigurationKeys.DEFAULT_WATERMARK_VALUE, delta), Long.parseLong(startValue), "Low watermark should be " + startValue);
    // With previous watermark
    long previousWatermark = 20140101000050L;
    long expected = previousWatermark + delta;
    Assert.assertEquals(partitioner.getLowWatermark(extractType, WatermarkType.SIMPLE, previousWatermark, delta), expected, "Low watermark should be " + expected);
    Assert.assertEquals(partitioner.getLowWatermark(extractType, WatermarkType.TIMESTAMP, previousWatermark, delta), expected, "Low watermark should be " + expected);
    // With SOURCE_QUERYBASED_LOW_WATERMARK_BACKUP_SECS
    int backupSecs = 10;
    expected = previousWatermark + delta - backupSecs;
    sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_LOW_WATERMARK_BACKUP_SECS, backupSecs);
    Assert.assertEquals(partitioner.getLowWatermark(extractType, WatermarkType.SIMPLE, previousWatermark, delta), expected, "Low watermark should be " + expected);
    Assert.assertEquals(partitioner.getLowWatermark(extractType, WatermarkType.TIMESTAMP, previousWatermark, delta), expected, "Low watermark should be " + expected);
}
Also used : SourceState(org.apache.gobblin.configuration.SourceState) ExtractType(org.apache.gobblin.source.extractor.extract.ExtractType) Test(org.testng.annotations.Test)

Example 5 with ExtractType

use of org.apache.gobblin.source.extractor.extract.ExtractType in project incubator-gobblin by apache.

the class Partitioner method createUserSpecifiedPartitions.

/**
 * Generate the partitions based on the lists specified by the user in job config
 */
private List<Partition> createUserSpecifiedPartitions() {
    List<Partition> partitions = new ArrayList<>();
    List<String> watermarkPoints = state.getPropAsList(USER_SPECIFIED_PARTITIONS);
    boolean isEarlyStopped = state.getPropAsBoolean(IS_EARLY_STOPPED);
    if (watermarkPoints == null || watermarkPoints.size() == 0) {
        LOG.info("There should be some partition points");
        long defaultWatermark = ConfigurationKeys.DEFAULT_WATERMARK_VALUE;
        partitions.add(new Partition(defaultWatermark, defaultWatermark, true, true));
        return partitions;
    }
    WatermarkType watermarkType = WatermarkType.valueOf(state.getProp(ConfigurationKeys.SOURCE_QUERYBASED_WATERMARK_TYPE, ConfigurationKeys.DEFAULT_WATERMARK_TYPE).toUpperCase());
    long lowWatermark = adjustWatermark(watermarkPoints.get(0), watermarkType);
    long highWatermark = ConfigurationKeys.DEFAULT_WATERMARK_VALUE;
    // Only one partition point specified
    if (watermarkPoints.size() == 1) {
        if (watermarkType != WatermarkType.SIMPLE) {
            String timeZone = this.state.getProp(ConfigurationKeys.SOURCE_TIMEZONE);
            String currentTime = Utils.dateTimeToString(getCurrentTime(timeZone), WATERMARKTIMEFORMAT, timeZone);
            highWatermark = adjustWatermark(currentTime, watermarkType);
        }
        partitions.add(new Partition(lowWatermark, highWatermark, true, false));
        return partitions;
    }
    int i;
    for (i = 1; i < watermarkPoints.size() - 1; i++) {
        highWatermark = adjustWatermark(watermarkPoints.get(i), watermarkType);
        partitions.add(new Partition(lowWatermark, highWatermark, true));
        lowWatermark = highWatermark;
    }
    // Last partition
    highWatermark = adjustWatermark(watermarkPoints.get(i), watermarkType);
    ExtractType extractType = ExtractType.valueOf(this.state.getProp(ConfigurationKeys.SOURCE_QUERYBASED_EXTRACT_TYPE).toUpperCase());
    // If it is early stop, we should not remove upper bounds
    if ((isFullDump() || isSnapshot(extractType)) && !isEarlyStopped) {
        // The upper bounds can be removed for last work unit
        partitions.add(new Partition(lowWatermark, highWatermark, true, false));
    } else {
        // The upper bounds can not be removed for last work unit
        partitions.add(new Partition(lowWatermark, highWatermark, true, true));
    }
    return partitions;
}
Also used : WatermarkType(org.apache.gobblin.source.extractor.watermark.WatermarkType) ArrayList(java.util.ArrayList) ExtractType(org.apache.gobblin.source.extractor.extract.ExtractType)

Aggregations

ExtractType (org.apache.gobblin.source.extractor.extract.ExtractType)7 SourceState (org.apache.gobblin.configuration.SourceState)4 Test (org.testng.annotations.Test)4 WatermarkType (org.apache.gobblin.source.extractor.watermark.WatermarkType)3 WatermarkPredicate (org.apache.gobblin.source.extractor.watermark.WatermarkPredicate)2 ArrayList (java.util.ArrayList)1