Search in sources :

Example 6 with ExtractType

use of org.apache.gobblin.source.extractor.extract.ExtractType in project incubator-gobblin by apache.

the class Partitioner method getPartitions.

/**
 * Get partitions with low and high water marks
 *
 * @param previousWatermark previous water mark from metadata
 * @return map of partition intervals.
 *         map's key is interval begin time (in format {@link Partitioner#WATERMARKTIMEFORMAT})
 *         map's value is interval end time (in format {@link Partitioner#WATERMARKTIMEFORMAT})
 */
@Deprecated
public HashMap<Long, Long> getPartitions(long previousWatermark) {
    HashMap<Long, Long> defaultPartition = Maps.newHashMap();
    if (!isWatermarkExists()) {
        defaultPartition.put(ConfigurationKeys.DEFAULT_WATERMARK_VALUE, ConfigurationKeys.DEFAULT_WATERMARK_VALUE);
        LOG.info("Watermark column or type not found - Default partition with low watermark and high watermark as " + ConfigurationKeys.DEFAULT_WATERMARK_VALUE);
        return defaultPartition;
    }
    ExtractType extractType = ExtractType.valueOf(this.state.getProp(ConfigurationKeys.SOURCE_QUERYBASED_EXTRACT_TYPE).toUpperCase());
    WatermarkType watermarkType = WatermarkType.valueOf(this.state.getProp(ConfigurationKeys.SOURCE_QUERYBASED_WATERMARK_TYPE, ConfigurationKeys.DEFAULT_WATERMARK_TYPE).toUpperCase());
    int interval = getUpdatedInterval(this.state.getPropAsInt(ConfigurationKeys.SOURCE_QUERYBASED_PARTITION_INTERVAL, 0), extractType, watermarkType);
    int sourceMaxAllowedPartitions = this.state.getPropAsInt(ConfigurationKeys.SOURCE_MAX_NUMBER_OF_PARTITIONS, 0);
    int maxPartitions = (sourceMaxAllowedPartitions != 0 ? sourceMaxAllowedPartitions : ConfigurationKeys.DEFAULT_MAX_NUMBER_OF_PARTITIONS);
    WatermarkPredicate watermark = new WatermarkPredicate(null, watermarkType);
    int deltaForNextWatermark = watermark.getDeltaNumForNextWatermark();
    LOG.info("is watermark override: " + this.isWatermarkOverride());
    LOG.info("is full extract: " + this.isFullDump());
    long lowWatermark = this.getLowWatermark(extractType, watermarkType, previousWatermark, deltaForNextWatermark);
    long highWatermark = this.getHighWatermark(extractType, watermarkType);
    if (lowWatermark == ConfigurationKeys.DEFAULT_WATERMARK_VALUE || highWatermark == ConfigurationKeys.DEFAULT_WATERMARK_VALUE) {
        LOG.info("Low watermark or high water mark is not found. Hence cannot generate partitions - Default partition with low watermark:  " + lowWatermark + " and high watermark: " + highWatermark);
        defaultPartition.put(lowWatermark, highWatermark);
        return defaultPartition;
    }
    LOG.info("Generate partitions with low watermark: " + lowWatermark + "; high watermark: " + highWatermark + "; partition interval in hours: " + interval + "; Maximum number of allowed partitions: " + maxPartitions);
    return watermark.getPartitions(lowWatermark, highWatermark, interval, maxPartitions);
}
Also used : WatermarkType(org.apache.gobblin.source.extractor.watermark.WatermarkType) WatermarkPredicate(org.apache.gobblin.source.extractor.watermark.WatermarkPredicate) ExtractType(org.apache.gobblin.source.extractor.extract.ExtractType)

Example 7 with ExtractType

use of org.apache.gobblin.source.extractor.extract.ExtractType in project incubator-gobblin by apache.

the class Partitioner method getGlobalPartition.

/**
 * Get the global partition of the whole data set, which has the global low and high watermarks
 *
 * @param previousWatermark previous watermark for computing the low watermark of current run
 * @return a Partition instance
 */
public Partition getGlobalPartition(long previousWatermark) {
    ExtractType extractType = ExtractType.valueOf(state.getProp(ConfigurationKeys.SOURCE_QUERYBASED_EXTRACT_TYPE).toUpperCase());
    WatermarkType watermarkType = WatermarkType.valueOf(state.getProp(ConfigurationKeys.SOURCE_QUERYBASED_WATERMARK_TYPE, ConfigurationKeys.DEFAULT_WATERMARK_TYPE).toUpperCase());
    WatermarkPredicate watermark = new WatermarkPredicate(null, watermarkType);
    int deltaForNextWatermark = watermark.getDeltaNumForNextWatermark();
    long lowWatermark = getLowWatermark(extractType, watermarkType, previousWatermark, deltaForNextWatermark);
    long highWatermark = getHighWatermark(extractType, watermarkType);
    return new Partition(lowWatermark, highWatermark, true, hasUserSpecifiedHighWatermark);
}
Also used : WatermarkType(org.apache.gobblin.source.extractor.watermark.WatermarkType) WatermarkPredicate(org.apache.gobblin.source.extractor.watermark.WatermarkPredicate) ExtractType(org.apache.gobblin.source.extractor.extract.ExtractType)

Aggregations

ExtractType (org.apache.gobblin.source.extractor.extract.ExtractType)7 SourceState (org.apache.gobblin.configuration.SourceState)4 Test (org.testng.annotations.Test)4 WatermarkType (org.apache.gobblin.source.extractor.watermark.WatermarkType)3 WatermarkPredicate (org.apache.gobblin.source.extractor.watermark.WatermarkPredicate)2 ArrayList (java.util.ArrayList)1