use of org.apache.gobblin.source.extractor.watermark.WatermarkType in project incubator-gobblin by apache.
the class Partitioner method getGlobalPartition.
/**
* Get the global partition of the whole data set, which has the global low and high watermarks
*
* @param previousWatermark previous watermark for computing the low watermark of current run
* @return a Partition instance
*/
public Partition getGlobalPartition(long previousWatermark) {
ExtractType extractType = ExtractType.valueOf(state.getProp(ConfigurationKeys.SOURCE_QUERYBASED_EXTRACT_TYPE).toUpperCase());
WatermarkType watermarkType = WatermarkType.valueOf(state.getProp(ConfigurationKeys.SOURCE_QUERYBASED_WATERMARK_TYPE, ConfigurationKeys.DEFAULT_WATERMARK_TYPE).toUpperCase());
WatermarkPredicate watermark = new WatermarkPredicate(null, watermarkType);
int deltaForNextWatermark = watermark.getDeltaNumForNextWatermark();
long lowWatermark = getLowWatermark(extractType, watermarkType, previousWatermark, deltaForNextWatermark);
long highWatermark = getHighWatermark(extractType, watermarkType);
return new Partition(lowWatermark, highWatermark, true, hasUserSpecifiedHighWatermark);
}
use of org.apache.gobblin.source.extractor.watermark.WatermarkType in project incubator-gobblin by apache.
the class QueryBasedExtractor method build.
/**
* build schema, record count and high water mark
*/
public Extractor<S, D> build() throws ExtractPrepareException {
String watermarkColumn = this.workUnitState.getProp(ConfigurationKeys.EXTRACT_DELTA_FIELDS_KEY);
long lwm = partition.getLowWatermark();
long hwm = partition.getHighWatermark();
log.info("Low water mark: " + lwm + "; and High water mark: " + hwm);
WatermarkType watermarkType;
if (StringUtils.isBlank(this.workUnitState.getProp(ConfigurationKeys.SOURCE_QUERYBASED_WATERMARK_TYPE))) {
watermarkType = null;
} else {
watermarkType = WatermarkType.valueOf(this.workUnitState.getProp(ConfigurationKeys.SOURCE_QUERYBASED_WATERMARK_TYPE).toUpperCase());
}
log.info("Source Entity is " + this.entity);
try {
this.setTimeOut(this.workUnitState.getPropAsInt(ConfigurationKeys.SOURCE_CONN_TIMEOUT, ConfigurationKeys.DEFAULT_CONN_TIMEOUT));
this.extractMetadata(this.schema, this.entity, this.workUnit);
if (StringUtils.isNotBlank(watermarkColumn)) {
if (partition.isLastPartition()) {
// Get a more accurate high watermark from the source
long adjustedHighWatermark = this.getLatestWatermark(watermarkColumn, watermarkType, lwm, hwm);
log.info("High water mark from source: " + adjustedHighWatermark);
// Else, consider the low watermark as high water mark(with no delta).i.e, don't move the pointer
if (adjustedHighWatermark == ConfigurationKeys.DEFAULT_WATERMARK_VALUE) {
adjustedHighWatermark = getLowWatermarkWithNoDelta(lwm);
}
this.highWatermark = adjustedHighWatermark;
} else {
this.highWatermark = hwm;
}
log.info("High water mark for the current run: " + highWatermark);
this.setRangePredicates(watermarkColumn, watermarkType, lwm, highWatermark);
}
// if it is set to true, skip count calculation and set source count to -1
if (!Boolean.valueOf(this.workUnitState.getProp(ConfigurationKeys.SOURCE_QUERYBASED_SKIP_COUNT_CALC))) {
this.sourceRecordCount = this.getSourceCount(this.schema, this.entity, this.workUnit, this.predicateList);
} else {
log.info("Skip count calculation");
this.sourceRecordCount = -1;
}
if (this.sourceRecordCount == 0) {
log.info("Record count is 0; Setting fetch status to false to skip readRecord()");
this.setFetchStatus(false);
}
} catch (SchemaException e) {
throw new ExtractPrepareException("Failed to get schema for this object; error - " + e.getMessage(), e);
} catch (HighWatermarkException e) {
throw new ExtractPrepareException("Failed to get high watermark; error - " + e.getMessage(), e);
} catch (RecordCountException e) {
throw new ExtractPrepareException("Failed to get record count; error - " + e.getMessage(), e);
} catch (Exception e) {
throw new ExtractPrepareException("Failed to prepare the extract build; error - " + e.getMessage(), e);
}
return this;
}
Aggregations