use of org.apache.gobblin.source.extractor.watermark.WatermarkPredicate in project incubator-gobblin by apache.
the class QueryBasedExtractor method getLowWatermarkWithNoDelta.
private long getLowWatermarkWithNoDelta(long lwm) {
if (lwm == ConfigurationKeys.DEFAULT_WATERMARK_VALUE) {
return ConfigurationKeys.DEFAULT_WATERMARK_VALUE;
}
String watermarkType = this.workUnitState.getProp(ConfigurationKeys.SOURCE_QUERYBASED_WATERMARK_TYPE, "TIMESTAMP");
WatermarkType wmType = WatermarkType.valueOf(watermarkType.toUpperCase());
int deltaNum = new WatermarkPredicate(wmType).getDeltaNumForNextWatermark();
switch(wmType) {
case SIMPLE:
return lwm - deltaNum;
default:
Date lowWaterMarkDate = Utils.toDate(lwm, "yyyyMMddHHmmss");
return Long.parseLong(Utils.dateToString(Utils.addSecondsToDate(lowWaterMarkDate, deltaNum * -1), "yyyyMMddHHmmss"));
}
}
use of org.apache.gobblin.source.extractor.watermark.WatermarkPredicate in project incubator-gobblin by apache.
the class Partitioner method getPartitions.
/**
* Get partitions with low and high water marks
*
* @param previousWatermark previous water mark from metadata
* @return map of partition intervals.
* map's key is interval begin time (in format {@link Partitioner#WATERMARKTIMEFORMAT})
* map's value is interval end time (in format {@link Partitioner#WATERMARKTIMEFORMAT})
*/
@Deprecated
public HashMap<Long, Long> getPartitions(long previousWatermark) {
HashMap<Long, Long> defaultPartition = Maps.newHashMap();
if (!isWatermarkExists()) {
defaultPartition.put(ConfigurationKeys.DEFAULT_WATERMARK_VALUE, ConfigurationKeys.DEFAULT_WATERMARK_VALUE);
LOG.info("Watermark column or type not found - Default partition with low watermark and high watermark as " + ConfigurationKeys.DEFAULT_WATERMARK_VALUE);
return defaultPartition;
}
ExtractType extractType = ExtractType.valueOf(this.state.getProp(ConfigurationKeys.SOURCE_QUERYBASED_EXTRACT_TYPE).toUpperCase());
WatermarkType watermarkType = WatermarkType.valueOf(this.state.getProp(ConfigurationKeys.SOURCE_QUERYBASED_WATERMARK_TYPE, ConfigurationKeys.DEFAULT_WATERMARK_TYPE).toUpperCase());
int interval = getUpdatedInterval(this.state.getPropAsInt(ConfigurationKeys.SOURCE_QUERYBASED_PARTITION_INTERVAL, 0), extractType, watermarkType);
int sourceMaxAllowedPartitions = this.state.getPropAsInt(ConfigurationKeys.SOURCE_MAX_NUMBER_OF_PARTITIONS, 0);
int maxPartitions = (sourceMaxAllowedPartitions != 0 ? sourceMaxAllowedPartitions : ConfigurationKeys.DEFAULT_MAX_NUMBER_OF_PARTITIONS);
WatermarkPredicate watermark = new WatermarkPredicate(null, watermarkType);
int deltaForNextWatermark = watermark.getDeltaNumForNextWatermark();
LOG.info("is watermark override: " + this.isWatermarkOverride());
LOG.info("is full extract: " + this.isFullDump());
long lowWatermark = this.getLowWatermark(extractType, watermarkType, previousWatermark, deltaForNextWatermark);
long highWatermark = this.getHighWatermark(extractType, watermarkType);
if (lowWatermark == ConfigurationKeys.DEFAULT_WATERMARK_VALUE || highWatermark == ConfigurationKeys.DEFAULT_WATERMARK_VALUE) {
LOG.info("Low watermark or high water mark is not found. Hence cannot generate partitions - Default partition with low watermark: " + lowWatermark + " and high watermark: " + highWatermark);
defaultPartition.put(lowWatermark, highWatermark);
return defaultPartition;
}
LOG.info("Generate partitions with low watermark: " + lowWatermark + "; high watermark: " + highWatermark + "; partition interval in hours: " + interval + "; Maximum number of allowed partitions: " + maxPartitions);
return watermark.getPartitions(lowWatermark, highWatermark, interval, maxPartitions);
}
use of org.apache.gobblin.source.extractor.watermark.WatermarkPredicate in project incubator-gobblin by apache.
the class Partitioner method getGlobalPartition.
/**
* Get the global partition of the whole data set, which has the global low and high watermarks
*
* @param previousWatermark previous watermark for computing the low watermark of current run
* @return a Partition instance
*/
public Partition getGlobalPartition(long previousWatermark) {
ExtractType extractType = ExtractType.valueOf(state.getProp(ConfigurationKeys.SOURCE_QUERYBASED_EXTRACT_TYPE).toUpperCase());
WatermarkType watermarkType = WatermarkType.valueOf(state.getProp(ConfigurationKeys.SOURCE_QUERYBASED_WATERMARK_TYPE, ConfigurationKeys.DEFAULT_WATERMARK_TYPE).toUpperCase());
WatermarkPredicate watermark = new WatermarkPredicate(null, watermarkType);
int deltaForNextWatermark = watermark.getDeltaNumForNextWatermark();
long lowWatermark = getLowWatermark(extractType, watermarkType, previousWatermark, deltaForNextWatermark);
long highWatermark = getHighWatermark(extractType, watermarkType);
return new Partition(lowWatermark, highWatermark, true, hasUserSpecifiedHighWatermark);
}
use of org.apache.gobblin.source.extractor.watermark.WatermarkPredicate in project incubator-gobblin by apache.
the class QueryBasedExtractor method getLatestWatermark.
/**
* if snapshot extract, get latest watermark else return work unit high watermark
*
* @param watermark column
* @param low watermark value
* @param high watermark value
* @param column format
* @return letst watermark
* @throws IOException
*/
private long getLatestWatermark(String watermarkColumn, WatermarkType watermarkType, long lwmValue, long hwmValue) throws HighWatermarkException, IOException {
if (!Boolean.valueOf(this.workUnitState.getProp(ConfigurationKeys.SOURCE_QUERYBASED_SKIP_HIGH_WATERMARK_CALC))) {
log.info("Getting high watermark");
List<Predicate> list = new ArrayList<>();
WatermarkPredicate watermark = new WatermarkPredicate(watermarkColumn, watermarkType);
String lwmOperator = partition.isLowWatermarkInclusive() ? ">=" : ">";
String hwmOperator = (partition.isLastPartition() || partition.isHighWatermarkInclusive()) ? "<=" : "<";
Predicate lwmPredicate = watermark.getPredicate(this, lwmValue, lwmOperator, Predicate.PredicateType.LWM);
Predicate hwmPredicate = watermark.getPredicate(this, hwmValue, hwmOperator, Predicate.PredicateType.HWM);
if (lwmPredicate != null) {
list.add(lwmPredicate);
}
if (hwmPredicate != null) {
list.add(hwmPredicate);
}
return this.getMaxWatermark(this.schema, this.entity, watermarkColumn, list, watermark.getWatermarkSourceFormat(this));
}
return hwmValue;
}
use of org.apache.gobblin.source.extractor.watermark.WatermarkPredicate in project incubator-gobblin by apache.
the class QueryBasedExtractor method setRangePredicates.
/**
* range predicates for watermark column and transaction columns.
*
* @param watermarkColumn name of the column used as watermark
* @param watermarkType watermark type
* @param lwmValue estimated low watermark value
* @param hwmValue estimated high watermark value
*/
private void setRangePredicates(String watermarkColumn, WatermarkType watermarkType, long lwmValue, long hwmValue) {
log.debug("Getting range predicates");
String lwmOperator = partition.isLowWatermarkInclusive() ? ">=" : ">";
String hwmOperator = (partition.isLastPartition() || partition.isHighWatermarkInclusive()) ? "<=" : "<";
WatermarkPredicate watermark = new WatermarkPredicate(watermarkColumn, watermarkType);
this.addPredicates(watermark.getPredicate(this, lwmValue, lwmOperator, Predicate.PredicateType.LWM));
this.addPredicates(watermark.getPredicate(this, hwmValue, hwmOperator, Predicate.PredicateType.HWM));
if (Boolean.valueOf(this.workUnitState.getProp(ConfigurationKeys.SOURCE_QUERYBASED_IS_HOURLY_EXTRACT))) {
String hourColumn = this.workUnitState.getProp(ConfigurationKeys.SOURCE_QUERYBASED_HOUR_COLUMN);
if (StringUtils.isNotBlank(hourColumn)) {
WatermarkPredicate hourlyWatermark = new WatermarkPredicate(hourColumn, WatermarkType.HOUR);
this.addPredicates(hourlyWatermark.getPredicate(this, lwmValue, lwmOperator, Predicate.PredicateType.LWM));
this.addPredicates(hourlyWatermark.getPredicate(this, hwmValue, hwmOperator, Predicate.PredicateType.HWM));
}
}
}
Aggregations