use of org.apache.gobblin.source.extractor.extract.ExtractType in project incubator-gobblin by apache.
the class Partitioner method getPartitions.
/**
* Get partitions with low and high water marks
*
* @param previousWatermark previous water mark from metadata
* @return map of partition intervals.
* map's key is interval begin time (in format {@link Partitioner#WATERMARKTIMEFORMAT})
* map's value is interval end time (in format {@link Partitioner#WATERMARKTIMEFORMAT})
*/
@Deprecated
public HashMap<Long, Long> getPartitions(long previousWatermark) {
HashMap<Long, Long> defaultPartition = Maps.newHashMap();
if (!isWatermarkExists()) {
defaultPartition.put(ConfigurationKeys.DEFAULT_WATERMARK_VALUE, ConfigurationKeys.DEFAULT_WATERMARK_VALUE);
LOG.info("Watermark column or type not found - Default partition with low watermark and high watermark as " + ConfigurationKeys.DEFAULT_WATERMARK_VALUE);
return defaultPartition;
}
ExtractType extractType = ExtractType.valueOf(this.state.getProp(ConfigurationKeys.SOURCE_QUERYBASED_EXTRACT_TYPE).toUpperCase());
WatermarkType watermarkType = WatermarkType.valueOf(this.state.getProp(ConfigurationKeys.SOURCE_QUERYBASED_WATERMARK_TYPE, ConfigurationKeys.DEFAULT_WATERMARK_TYPE).toUpperCase());
int interval = getUpdatedInterval(this.state.getPropAsInt(ConfigurationKeys.SOURCE_QUERYBASED_PARTITION_INTERVAL, 0), extractType, watermarkType);
int sourceMaxAllowedPartitions = this.state.getPropAsInt(ConfigurationKeys.SOURCE_MAX_NUMBER_OF_PARTITIONS, 0);
int maxPartitions = (sourceMaxAllowedPartitions != 0 ? sourceMaxAllowedPartitions : ConfigurationKeys.DEFAULT_MAX_NUMBER_OF_PARTITIONS);
WatermarkPredicate watermark = new WatermarkPredicate(null, watermarkType);
int deltaForNextWatermark = watermark.getDeltaNumForNextWatermark();
LOG.info("is watermark override: " + this.isWatermarkOverride());
LOG.info("is full extract: " + this.isFullDump());
long lowWatermark = this.getLowWatermark(extractType, watermarkType, previousWatermark, deltaForNextWatermark);
long highWatermark = this.getHighWatermark(extractType, watermarkType);
if (lowWatermark == ConfigurationKeys.DEFAULT_WATERMARK_VALUE || highWatermark == ConfigurationKeys.DEFAULT_WATERMARK_VALUE) {
LOG.info("Low watermark or high water mark is not found. Hence cannot generate partitions - Default partition with low watermark: " + lowWatermark + " and high watermark: " + highWatermark);
defaultPartition.put(lowWatermark, highWatermark);
return defaultPartition;
}
LOG.info("Generate partitions with low watermark: " + lowWatermark + "; high watermark: " + highWatermark + "; partition interval in hours: " + interval + "; Maximum number of allowed partitions: " + maxPartitions);
return watermark.getPartitions(lowWatermark, highWatermark, interval, maxPartitions);
}
use of org.apache.gobblin.source.extractor.extract.ExtractType in project incubator-gobblin by apache.
the class Partitioner method getGlobalPartition.
/**
* Get the global partition of the whole data set, which has the global low and high watermarks
*
* @param previousWatermark previous watermark for computing the low watermark of current run
* @return a Partition instance
*/
public Partition getGlobalPartition(long previousWatermark) {
ExtractType extractType = ExtractType.valueOf(state.getProp(ConfigurationKeys.SOURCE_QUERYBASED_EXTRACT_TYPE).toUpperCase());
WatermarkType watermarkType = WatermarkType.valueOf(state.getProp(ConfigurationKeys.SOURCE_QUERYBASED_WATERMARK_TYPE, ConfigurationKeys.DEFAULT_WATERMARK_TYPE).toUpperCase());
WatermarkPredicate watermark = new WatermarkPredicate(null, watermarkType);
int deltaForNextWatermark = watermark.getDeltaNumForNextWatermark();
long lowWatermark = getLowWatermark(extractType, watermarkType, previousWatermark, deltaForNextWatermark);
long highWatermark = getHighWatermark(extractType, watermarkType);
return new Partition(lowWatermark, highWatermark, true, hasUserSpecifiedHighWatermark);
}
Aggregations