use of org.apache.gobblin.source.extractor.extract.ExtractType in project incubator-gobblin by apache.
the class PartitionerTest method testGetHighWatermarkOnAppendExtract.
/**
* Test getHighWatermark. Extract type: Append.
*/
@Test
public void testGetHighWatermarkOnAppendExtract() {
String endValue = "20140101000000";
SourceState sourceState = new SourceState();
sourceState.setProp(ConfigurationKeys.EXTRACT_IS_FULL_KEY, true);
sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_END_VALUE, endValue);
ExtractType extractType = ExtractType.APPEND_DAILY;
TestPartitioner partitioner = new TestPartitioner(sourceState);
Assert.assertEquals(partitioner.getHighWatermark(extractType, null), Long.parseLong(endValue), "High watermark should be " + endValue);
Assert.assertEquals(partitioner.getUserSpecifiedHighWatermark(), true, "Should mark as user specified high watermark");
partitioner.reset();
// Test non-full-dump cases below
sourceState.removeProp(ConfigurationKeys.EXTRACT_IS_FULL_KEY);
// No limit type
Assert.assertEquals(partitioner.getHighWatermark(ExtractType.APPEND_BATCH, null), ConfigurationKeys.DEFAULT_WATERMARK_VALUE, "High watermark should be " + ConfigurationKeys.DEFAULT_WATERMARK_VALUE);
Assert.assertEquals(partitioner.getUserSpecifiedHighWatermark(), false, "Should not mark as user specified high watermark");
// No limit delta
long expected = Long.parseLong(TestPartitioner.currentTimeString);
Assert.assertEquals(partitioner.getHighWatermark(extractType, null), expected, "High watermark should be " + expected);
Assert.assertEquals(partitioner.getUserSpecifiedHighWatermark(), false, "Should not mark as user specified high watermark");
// CURRENTDATE - 1
String maxLimit = "CURRENTDATE-1";
sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_APPEND_MAX_WATERMARK_LIMIT, maxLimit);
Assert.assertEquals(partitioner.getHighWatermark(extractType, null), 20161231235959L, "High watermark should be 20161231235959");
Assert.assertEquals(partitioner.getUserSpecifiedHighWatermark(), true, "Should not mark as user specified high watermark");
partitioner.reset();
// CURRENTHOUR - 1
maxLimit = "CURRENTHOUR-1";
sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_APPEND_MAX_WATERMARK_LIMIT, maxLimit);
Assert.assertEquals(partitioner.getHighWatermark(extractType, null), 20161231235959L, "High watermark should be 20161231235959");
Assert.assertEquals(partitioner.getUserSpecifiedHighWatermark(), true, "Should not mark as user specified high watermark");
partitioner.reset();
// CURRENTMINUTE - 1
maxLimit = "CURRENTMINUTE-1";
sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_APPEND_MAX_WATERMARK_LIMIT, maxLimit);
Assert.assertEquals(partitioner.getHighWatermark(extractType, null), 20161231235959L, "High watermark should be 20161231235959");
Assert.assertEquals(partitioner.getUserSpecifiedHighWatermark(), true, "Should not mark as user specified high watermark");
partitioner.reset();
// CURRENTSECOND - 1
maxLimit = "CURRENTSECOND-1";
sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_APPEND_MAX_WATERMARK_LIMIT, maxLimit);
Assert.assertEquals(partitioner.getHighWatermark(extractType, null), 20161231235959L, "High watermark should be 20161231235959");
Assert.assertEquals(partitioner.getUserSpecifiedHighWatermark(), true, "Should not mark as user specified high watermark");
}
use of org.apache.gobblin.source.extractor.extract.ExtractType in project incubator-gobblin by apache.
the class PartitionerTest method testGetHighWatermarkOnSnapshotExtract.
/**
* Test getHighWatermark. Extract type: Snapshot.
*/
@Test
public void testGetHighWatermarkOnSnapshotExtract() {
String endValue = "20140101000000";
SourceState sourceState = new SourceState();
// It won't use SOURCE_QUERYBASED_END_VALUE when extract is full
sourceState.setProp(ConfigurationKeys.EXTRACT_IS_FULL_KEY, true);
sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_END_VALUE, endValue);
ExtractType extractType = ExtractType.SNAPSHOT;
TestPartitioner partitioner = new TestPartitioner(sourceState);
Assert.assertEquals(partitioner.getHighWatermark(extractType, WatermarkType.SIMPLE), ConfigurationKeys.DEFAULT_WATERMARK_VALUE, "High watermark should be " + ConfigurationKeys.DEFAULT_WATERMARK_VALUE);
Assert.assertEquals(partitioner.getUserSpecifiedHighWatermark(), false, "Should not mark as user specified high watermark");
long expected = Long.parseLong(TestPartitioner.currentTimeString);
Assert.assertEquals(partitioner.getHighWatermark(extractType, WatermarkType.TIMESTAMP), expected, "High watermark should be " + expected);
Assert.assertEquals(partitioner.getUserSpecifiedHighWatermark(), false, "Should not mark as user specified high watermark");
}
use of org.apache.gobblin.source.extractor.extract.ExtractType in project incubator-gobblin by apache.
the class PartitionerTest method testGetLowWatermarkOnAppendExtract.
/**
* Test getLowWatermark. Extract type: Append.
*/
@Test
public void testGetLowWatermarkOnAppendExtract() {
SourceState sourceState = new SourceState();
String startValue = "20140101000000";
sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_START_VALUE, startValue);
TestPartitioner partitioner = new TestPartitioner(sourceState);
ExtractType extractType = ExtractType.APPEND_DAILY;
int delta = 1;
// No previous watermark
Assert.assertEquals(partitioner.getLowWatermark(extractType, null, ConfigurationKeys.DEFAULT_WATERMARK_VALUE, delta), Long.parseLong(startValue), "Low watermark should be " + startValue);
// With previous watermark
long previousWatermark = 20140101000050L;
long expected = previousWatermark + delta;
Assert.assertEquals(partitioner.getLowWatermark(extractType, WatermarkType.SIMPLE, previousWatermark, delta), expected, "Low watermark should be " + expected);
Assert.assertEquals(partitioner.getLowWatermark(extractType, WatermarkType.TIMESTAMP, previousWatermark, delta), expected, "Low watermark should be " + expected);
// The result has nothing to do with SOURCE_QUERYBASED_LOW_WATERMARK_BACKUP_SECS
int backupSecs = 10;
sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_LOW_WATERMARK_BACKUP_SECS, backupSecs);
Assert.assertEquals(partitioner.getLowWatermark(extractType, WatermarkType.SIMPLE, previousWatermark, delta), expected, "Low watermark should be " + expected);
Assert.assertEquals(partitioner.getLowWatermark(extractType, WatermarkType.TIMESTAMP, previousWatermark, delta), expected, "Low watermark should be " + expected);
}
use of org.apache.gobblin.source.extractor.extract.ExtractType in project incubator-gobblin by apache.
the class PartitionerTest method testGetLowWatermarkOnSnapshotExtract.
/**
* Test getLowWatermark. Extract type: Snapshot.
*/
@Test
public void testGetLowWatermarkOnSnapshotExtract() {
SourceState sourceState = new SourceState();
String startValue = "20140101000000";
sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_START_VALUE, startValue);
TestPartitioner partitioner = new TestPartitioner(sourceState);
ExtractType extractType = ExtractType.SNAPSHOT;
int delta = 1;
// No previous watermark
Assert.assertEquals(partitioner.getLowWatermark(extractType, null, ConfigurationKeys.DEFAULT_WATERMARK_VALUE, delta), Long.parseLong(startValue), "Low watermark should be " + startValue);
// With previous watermark
long previousWatermark = 20140101000050L;
long expected = previousWatermark + delta;
Assert.assertEquals(partitioner.getLowWatermark(extractType, WatermarkType.SIMPLE, previousWatermark, delta), expected, "Low watermark should be " + expected);
Assert.assertEquals(partitioner.getLowWatermark(extractType, WatermarkType.TIMESTAMP, previousWatermark, delta), expected, "Low watermark should be " + expected);
// With SOURCE_QUERYBASED_LOW_WATERMARK_BACKUP_SECS
int backupSecs = 10;
expected = previousWatermark + delta - backupSecs;
sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_LOW_WATERMARK_BACKUP_SECS, backupSecs);
Assert.assertEquals(partitioner.getLowWatermark(extractType, WatermarkType.SIMPLE, previousWatermark, delta), expected, "Low watermark should be " + expected);
Assert.assertEquals(partitioner.getLowWatermark(extractType, WatermarkType.TIMESTAMP, previousWatermark, delta), expected, "Low watermark should be " + expected);
}
use of org.apache.gobblin.source.extractor.extract.ExtractType in project incubator-gobblin by apache.
the class Partitioner method createUserSpecifiedPartitions.
/**
* Generate the partitions based on the lists specified by the user in job config
*/
private List<Partition> createUserSpecifiedPartitions() {
List<Partition> partitions = new ArrayList<>();
List<String> watermarkPoints = state.getPropAsList(USER_SPECIFIED_PARTITIONS);
boolean isEarlyStopped = state.getPropAsBoolean(IS_EARLY_STOPPED);
if (watermarkPoints == null || watermarkPoints.size() == 0) {
LOG.info("There should be some partition points");
long defaultWatermark = ConfigurationKeys.DEFAULT_WATERMARK_VALUE;
partitions.add(new Partition(defaultWatermark, defaultWatermark, true, true));
return partitions;
}
WatermarkType watermarkType = WatermarkType.valueOf(state.getProp(ConfigurationKeys.SOURCE_QUERYBASED_WATERMARK_TYPE, ConfigurationKeys.DEFAULT_WATERMARK_TYPE).toUpperCase());
long lowWatermark = adjustWatermark(watermarkPoints.get(0), watermarkType);
long highWatermark = ConfigurationKeys.DEFAULT_WATERMARK_VALUE;
// Only one partition point specified
if (watermarkPoints.size() == 1) {
if (watermarkType != WatermarkType.SIMPLE) {
String timeZone = this.state.getProp(ConfigurationKeys.SOURCE_TIMEZONE);
String currentTime = Utils.dateTimeToString(getCurrentTime(timeZone), WATERMARKTIMEFORMAT, timeZone);
highWatermark = adjustWatermark(currentTime, watermarkType);
}
partitions.add(new Partition(lowWatermark, highWatermark, true, false));
return partitions;
}
int i;
for (i = 1; i < watermarkPoints.size() - 1; i++) {
highWatermark = adjustWatermark(watermarkPoints.get(i), watermarkType);
partitions.add(new Partition(lowWatermark, highWatermark, true));
lowWatermark = highWatermark;
}
// Last partition
highWatermark = adjustWatermark(watermarkPoints.get(i), watermarkType);
ExtractType extractType = ExtractType.valueOf(this.state.getProp(ConfigurationKeys.SOURCE_QUERYBASED_EXTRACT_TYPE).toUpperCase());
// If it is early stop, we should not remove upper bounds
if ((isFullDump() || isSnapshot(extractType)) && !isEarlyStopped) {
// The upper bounds can be removed for last work unit
partitions.add(new Partition(lowWatermark, highWatermark, true, false));
} else {
// The upper bounds can not be removed for last work unit
partitions.add(new Partition(lowWatermark, highWatermark, true, true));
}
return partitions;
}
Aggregations