Search in sources :

Example 1 with WatermarkPredicate

use of org.apache.gobblin.source.extractor.watermark.WatermarkPredicate in project incubator-gobblin by apache.

the class QueryBasedExtractor method getLowWatermarkWithNoDelta.

private long getLowWatermarkWithNoDelta(long lwm) {
    if (lwm == ConfigurationKeys.DEFAULT_WATERMARK_VALUE) {
        return ConfigurationKeys.DEFAULT_WATERMARK_VALUE;
    }
    String watermarkType = this.workUnitState.getProp(ConfigurationKeys.SOURCE_QUERYBASED_WATERMARK_TYPE, "TIMESTAMP");
    WatermarkType wmType = WatermarkType.valueOf(watermarkType.toUpperCase());
    int deltaNum = new WatermarkPredicate(wmType).getDeltaNumForNextWatermark();
    switch(wmType) {
        case SIMPLE:
            return lwm - deltaNum;
        default:
            Date lowWaterMarkDate = Utils.toDate(lwm, "yyyyMMddHHmmss");
            return Long.parseLong(Utils.dateToString(Utils.addSecondsToDate(lowWaterMarkDate, deltaNum * -1), "yyyyMMddHHmmss"));
    }
}
Also used : WatermarkType(org.apache.gobblin.source.extractor.watermark.WatermarkType) WatermarkPredicate(org.apache.gobblin.source.extractor.watermark.WatermarkPredicate) Date(java.util.Date)

Example 2 with WatermarkPredicate

use of org.apache.gobblin.source.extractor.watermark.WatermarkPredicate in project incubator-gobblin by apache.

the class Partitioner method getPartitions.

/**
 * Get partitions with low and high water marks
 *
 * @param previousWatermark previous water mark from metadata
 * @return map of partition intervals.
 *         map's key is interval begin time (in format {@link Partitioner#WATERMARKTIMEFORMAT})
 *         map's value is interval end time (in format {@link Partitioner#WATERMARKTIMEFORMAT})
 */
@Deprecated
public HashMap<Long, Long> getPartitions(long previousWatermark) {
    HashMap<Long, Long> defaultPartition = Maps.newHashMap();
    if (!isWatermarkExists()) {
        defaultPartition.put(ConfigurationKeys.DEFAULT_WATERMARK_VALUE, ConfigurationKeys.DEFAULT_WATERMARK_VALUE);
        LOG.info("Watermark column or type not found - Default partition with low watermark and high watermark as " + ConfigurationKeys.DEFAULT_WATERMARK_VALUE);
        return defaultPartition;
    }
    ExtractType extractType = ExtractType.valueOf(this.state.getProp(ConfigurationKeys.SOURCE_QUERYBASED_EXTRACT_TYPE).toUpperCase());
    WatermarkType watermarkType = WatermarkType.valueOf(this.state.getProp(ConfigurationKeys.SOURCE_QUERYBASED_WATERMARK_TYPE, ConfigurationKeys.DEFAULT_WATERMARK_TYPE).toUpperCase());
    int interval = getUpdatedInterval(this.state.getPropAsInt(ConfigurationKeys.SOURCE_QUERYBASED_PARTITION_INTERVAL, 0), extractType, watermarkType);
    int sourceMaxAllowedPartitions = this.state.getPropAsInt(ConfigurationKeys.SOURCE_MAX_NUMBER_OF_PARTITIONS, 0);
    int maxPartitions = (sourceMaxAllowedPartitions != 0 ? sourceMaxAllowedPartitions : ConfigurationKeys.DEFAULT_MAX_NUMBER_OF_PARTITIONS);
    WatermarkPredicate watermark = new WatermarkPredicate(null, watermarkType);
    int deltaForNextWatermark = watermark.getDeltaNumForNextWatermark();
    LOG.info("is watermark override: " + this.isWatermarkOverride());
    LOG.info("is full extract: " + this.isFullDump());
    long lowWatermark = this.getLowWatermark(extractType, watermarkType, previousWatermark, deltaForNextWatermark);
    long highWatermark = this.getHighWatermark(extractType, watermarkType);
    if (lowWatermark == ConfigurationKeys.DEFAULT_WATERMARK_VALUE || highWatermark == ConfigurationKeys.DEFAULT_WATERMARK_VALUE) {
        LOG.info("Low watermark or high water mark is not found. Hence cannot generate partitions - Default partition with low watermark:  " + lowWatermark + " and high watermark: " + highWatermark);
        defaultPartition.put(lowWatermark, highWatermark);
        return defaultPartition;
    }
    LOG.info("Generate partitions with low watermark: " + lowWatermark + "; high watermark: " + highWatermark + "; partition interval in hours: " + interval + "; Maximum number of allowed partitions: " + maxPartitions);
    return watermark.getPartitions(lowWatermark, highWatermark, interval, maxPartitions);
}
Also used : WatermarkType(org.apache.gobblin.source.extractor.watermark.WatermarkType) WatermarkPredicate(org.apache.gobblin.source.extractor.watermark.WatermarkPredicate) ExtractType(org.apache.gobblin.source.extractor.extract.ExtractType)

Example 3 with WatermarkPredicate

use of org.apache.gobblin.source.extractor.watermark.WatermarkPredicate in project incubator-gobblin by apache.

the class Partitioner method getGlobalPartition.

/**
 * Get the global partition of the whole data set, which has the global low and high watermarks
 *
 * @param previousWatermark previous watermark for computing the low watermark of current run
 * @return a Partition instance
 */
public Partition getGlobalPartition(long previousWatermark) {
    ExtractType extractType = ExtractType.valueOf(state.getProp(ConfigurationKeys.SOURCE_QUERYBASED_EXTRACT_TYPE).toUpperCase());
    WatermarkType watermarkType = WatermarkType.valueOf(state.getProp(ConfigurationKeys.SOURCE_QUERYBASED_WATERMARK_TYPE, ConfigurationKeys.DEFAULT_WATERMARK_TYPE).toUpperCase());
    WatermarkPredicate watermark = new WatermarkPredicate(null, watermarkType);
    int deltaForNextWatermark = watermark.getDeltaNumForNextWatermark();
    long lowWatermark = getLowWatermark(extractType, watermarkType, previousWatermark, deltaForNextWatermark);
    long highWatermark = getHighWatermark(extractType, watermarkType);
    return new Partition(lowWatermark, highWatermark, true, hasUserSpecifiedHighWatermark);
}
Also used : WatermarkType(org.apache.gobblin.source.extractor.watermark.WatermarkType) WatermarkPredicate(org.apache.gobblin.source.extractor.watermark.WatermarkPredicate) ExtractType(org.apache.gobblin.source.extractor.extract.ExtractType)

Example 4 with WatermarkPredicate

use of org.apache.gobblin.source.extractor.watermark.WatermarkPredicate in project incubator-gobblin by apache.

the class QueryBasedExtractor method getLatestWatermark.

/**
 * if snapshot extract, get latest watermark else return work unit high watermark
 *
 * @param watermark column
 * @param low watermark value
 * @param high watermark value
 * @param column format
 * @return letst watermark
 * @throws IOException
 */
private long getLatestWatermark(String watermarkColumn, WatermarkType watermarkType, long lwmValue, long hwmValue) throws HighWatermarkException, IOException {
    if (!Boolean.valueOf(this.workUnitState.getProp(ConfigurationKeys.SOURCE_QUERYBASED_SKIP_HIGH_WATERMARK_CALC))) {
        log.info("Getting high watermark");
        List<Predicate> list = new ArrayList<>();
        WatermarkPredicate watermark = new WatermarkPredicate(watermarkColumn, watermarkType);
        String lwmOperator = partition.isLowWatermarkInclusive() ? ">=" : ">";
        String hwmOperator = (partition.isLastPartition() || partition.isHighWatermarkInclusive()) ? "<=" : "<";
        Predicate lwmPredicate = watermark.getPredicate(this, lwmValue, lwmOperator, Predicate.PredicateType.LWM);
        Predicate hwmPredicate = watermark.getPredicate(this, hwmValue, hwmOperator, Predicate.PredicateType.HWM);
        if (lwmPredicate != null) {
            list.add(lwmPredicate);
        }
        if (hwmPredicate != null) {
            list.add(hwmPredicate);
        }
        return this.getMaxWatermark(this.schema, this.entity, watermarkColumn, list, watermark.getWatermarkSourceFormat(this));
    }
    return hwmValue;
}
Also used : ArrayList(java.util.ArrayList) WatermarkPredicate(org.apache.gobblin.source.extractor.watermark.WatermarkPredicate) WatermarkPredicate(org.apache.gobblin.source.extractor.watermark.WatermarkPredicate) Predicate(org.apache.gobblin.source.extractor.watermark.Predicate)

Example 5 with WatermarkPredicate

use of org.apache.gobblin.source.extractor.watermark.WatermarkPredicate in project incubator-gobblin by apache.

the class QueryBasedExtractor method setRangePredicates.

/**
 * range predicates for watermark column and transaction columns.
 *
 * @param watermarkColumn name of the column used as watermark
 * @param watermarkType watermark type
 * @param lwmValue estimated low watermark value
 * @param hwmValue estimated high watermark value
 */
private void setRangePredicates(String watermarkColumn, WatermarkType watermarkType, long lwmValue, long hwmValue) {
    log.debug("Getting range predicates");
    String lwmOperator = partition.isLowWatermarkInclusive() ? ">=" : ">";
    String hwmOperator = (partition.isLastPartition() || partition.isHighWatermarkInclusive()) ? "<=" : "<";
    WatermarkPredicate watermark = new WatermarkPredicate(watermarkColumn, watermarkType);
    this.addPredicates(watermark.getPredicate(this, lwmValue, lwmOperator, Predicate.PredicateType.LWM));
    this.addPredicates(watermark.getPredicate(this, hwmValue, hwmOperator, Predicate.PredicateType.HWM));
    if (Boolean.valueOf(this.workUnitState.getProp(ConfigurationKeys.SOURCE_QUERYBASED_IS_HOURLY_EXTRACT))) {
        String hourColumn = this.workUnitState.getProp(ConfigurationKeys.SOURCE_QUERYBASED_HOUR_COLUMN);
        if (StringUtils.isNotBlank(hourColumn)) {
            WatermarkPredicate hourlyWatermark = new WatermarkPredicate(hourColumn, WatermarkType.HOUR);
            this.addPredicates(hourlyWatermark.getPredicate(this, lwmValue, lwmOperator, Predicate.PredicateType.LWM));
            this.addPredicates(hourlyWatermark.getPredicate(this, hwmValue, hwmOperator, Predicate.PredicateType.HWM));
        }
    }
}
Also used : WatermarkPredicate(org.apache.gobblin.source.extractor.watermark.WatermarkPredicate)

Aggregations

WatermarkPredicate (org.apache.gobblin.source.extractor.watermark.WatermarkPredicate)5 WatermarkType (org.apache.gobblin.source.extractor.watermark.WatermarkType)3 ExtractType (org.apache.gobblin.source.extractor.extract.ExtractType)2 ArrayList (java.util.ArrayList)1 Date (java.util.Date)1 Predicate (org.apache.gobblin.source.extractor.watermark.Predicate)1