use of org.apache.gobblin.source.extractor.exception.HighWatermarkException in project incubator-gobblin by apache.
the class JdbcExtractor method getHighWatermark.
@Override
public long getHighWatermark(CommandOutput<?, ?> response, String watermarkColumn, String watermarkColumnFormat) throws HighWatermarkException {
this.log.debug("Extract high watermark from resultset");
ResultSet resultset = null;
Iterator<ResultSet> itr = (Iterator<ResultSet>) response.getResults().values().iterator();
if (itr.hasNext()) {
resultset = itr.next();
} else {
throw new HighWatermarkException("Failed to get high watermark from database - Resultset has no records");
}
Long HighWatermark;
try {
String watermark;
if (resultset.next()) {
watermark = resultset.getString(1);
} else {
watermark = null;
}
if (watermark == null) {
return ConfigurationKeys.DEFAULT_WATERMARK_VALUE;
}
if (watermarkColumnFormat != null) {
SimpleDateFormat inFormat = new SimpleDateFormat(watermarkColumnFormat);
Date date = null;
try {
date = inFormat.parse(watermark);
} catch (ParseException e) {
this.log.error("ParseException: " + e.getMessage(), e);
}
SimpleDateFormat outFormat = new SimpleDateFormat("yyyyMMddHHmmss");
HighWatermark = Long.parseLong(outFormat.format(date));
} else {
HighWatermark = Long.parseLong(watermark);
}
} catch (Exception e) {
throw new HighWatermarkException("Failed to get high watermark from database; error - " + e.getMessage(), e);
}
return HighWatermark;
}
use of org.apache.gobblin.source.extractor.exception.HighWatermarkException in project incubator-gobblin by apache.
the class SalesforceExtractor method getHighWatermark.
@Override
public long getHighWatermark(CommandOutput<?, ?> response, String watermarkColumn, String format) throws HighWatermarkException {
log.info("Get high watermark from salesforce");
String output;
Iterator<String> itr = (Iterator<String>) response.getResults().values().iterator();
if (itr.hasNext()) {
output = itr.next();
} else {
throw new HighWatermarkException("Failed to get high watermark from salesforce; REST response has no output");
}
JsonElement element = GSON.fromJson(output, JsonObject.class);
long high_ts;
try {
JsonObject jsonObject = element.getAsJsonObject();
JsonArray jsonArray = jsonObject.getAsJsonArray("records");
if (jsonArray.size() == 0) {
return -1;
}
String value = jsonObject.getAsJsonArray("records").get(0).getAsJsonObject().get(watermarkColumn).getAsString();
if (format != null) {
SimpleDateFormat inFormat = new SimpleDateFormat(format);
Date date = null;
try {
date = inFormat.parse(value);
} catch (ParseException e) {
log.error("ParseException: " + e.getMessage(), e);
}
SimpleDateFormat outFormat = new SimpleDateFormat("yyyyMMddHHmmss");
high_ts = Long.parseLong(outFormat.format(date));
} else {
high_ts = Long.parseLong(value);
}
} catch (Exception e) {
throw new HighWatermarkException("Failed to get high watermark from salesforce; error - " + e.getMessage(), e);
}
return high_ts;
}
use of org.apache.gobblin.source.extractor.exception.HighWatermarkException in project incubator-gobblin by apache.
the class SalesforceExtractor method getHighWatermarkMetadata.
@Override
public List<Command> getHighWatermarkMetadata(String schema, String entity, String watermarkColumn, List<Predicate> predicateList) throws HighWatermarkException {
log.debug("Build url to retrieve high watermark");
String query = "SELECT " + watermarkColumn + " FROM " + entity;
String defaultPredicate = " " + watermarkColumn + " != null";
String defaultSortOrder = " ORDER BY " + watermarkColumn + " desc LIMIT 1";
String existingPredicate = "";
if (this.updatedQuery != null) {
String queryLowerCase = this.updatedQuery.toLowerCase();
int startIndex = queryLowerCase.indexOf(" where ");
if (startIndex > 0) {
existingPredicate = this.updatedQuery.substring(startIndex);
}
}
query = query + existingPredicate;
String limitString = getLimitFromInputQuery(query);
query = query.replace(limitString, "");
Iterator<Predicate> i = predicateList.listIterator();
while (i.hasNext()) {
Predicate predicate = i.next();
query = SqlQueryUtils.addPredicate(query, predicate.getCondition());
}
query = SqlQueryUtils.addPredicate(query, defaultPredicate);
query = query + defaultSortOrder;
log.info("QUERY: " + query);
try {
return constructGetCommand(this.sfConnector.getFullUri(getSoqlUrl(query)));
} catch (Exception e) {
throw new HighWatermarkException("Failed to get salesforce url for high watermark; error - " + e.getMessage(), e);
}
}
use of org.apache.gobblin.source.extractor.exception.HighWatermarkException in project incubator-gobblin by apache.
the class RestApiExtractor method getMaxWatermark.
@Override
public long getMaxWatermark(String schema, String entity, String watermarkColumn, List<Predicate> predicateList, String watermarkSourceFormat) throws HighWatermarkException {
log.info("Get high watermark using Rest Api");
long CalculatedHighWatermark = -1;
try {
boolean success = this.connector.connect();
if (!success) {
throw new HighWatermarkException("Failed to connect.");
}
log.debug("Connected successfully.");
List<Command> cmds = this.getHighWatermarkMetadata(schema, entity, watermarkColumn, predicateList);
CommandOutput<?, ?> response = this.connector.getResponse(cmds);
CalculatedHighWatermark = this.getHighWatermark(response, watermarkColumn, watermarkSourceFormat);
log.info("High watermark:" + CalculatedHighWatermark);
return CalculatedHighWatermark;
} catch (Exception e) {
throw new HighWatermarkException("Failed to get high watermark using rest api; error - " + e.getMessage(), e);
}
}
use of org.apache.gobblin.source.extractor.exception.HighWatermarkException in project incubator-gobblin by apache.
the class QueryBasedExtractor method build.
/**
* build schema, record count and high water mark
*/
public Extractor<S, D> build() throws ExtractPrepareException {
String watermarkColumn = this.workUnitState.getProp(ConfigurationKeys.EXTRACT_DELTA_FIELDS_KEY);
long lwm = partition.getLowWatermark();
long hwm = partition.getHighWatermark();
log.info("Low water mark: " + lwm + "; and High water mark: " + hwm);
WatermarkType watermarkType;
if (StringUtils.isBlank(this.workUnitState.getProp(ConfigurationKeys.SOURCE_QUERYBASED_WATERMARK_TYPE))) {
watermarkType = null;
} else {
watermarkType = WatermarkType.valueOf(this.workUnitState.getProp(ConfigurationKeys.SOURCE_QUERYBASED_WATERMARK_TYPE).toUpperCase());
}
log.info("Source Entity is " + this.entity);
try {
this.setTimeOut(this.workUnitState.getPropAsInt(ConfigurationKeys.SOURCE_CONN_TIMEOUT, ConfigurationKeys.DEFAULT_CONN_TIMEOUT));
this.extractMetadata(this.schema, this.entity, this.workUnit);
if (StringUtils.isNotBlank(watermarkColumn)) {
if (partition.isLastPartition()) {
// Get a more accurate high watermark from the source
long adjustedHighWatermark = this.getLatestWatermark(watermarkColumn, watermarkType, lwm, hwm);
log.info("High water mark from source: " + adjustedHighWatermark);
// Else, consider the low watermark as high water mark(with no delta).i.e, don't move the pointer
if (adjustedHighWatermark == ConfigurationKeys.DEFAULT_WATERMARK_VALUE) {
adjustedHighWatermark = getLowWatermarkWithNoDelta(lwm);
}
this.highWatermark = adjustedHighWatermark;
} else {
this.highWatermark = hwm;
}
log.info("High water mark for the current run: " + highWatermark);
this.setRangePredicates(watermarkColumn, watermarkType, lwm, highWatermark);
}
// if it is set to true, skip count calculation and set source count to -1
if (!Boolean.valueOf(this.workUnitState.getProp(ConfigurationKeys.SOURCE_QUERYBASED_SKIP_COUNT_CALC))) {
this.sourceRecordCount = this.getSourceCount(this.schema, this.entity, this.workUnit, this.predicateList);
} else {
log.info("Skip count calculation");
this.sourceRecordCount = -1;
}
if (this.sourceRecordCount == 0) {
log.info("Record count is 0; Setting fetch status to false to skip readRecord()");
this.setFetchStatus(false);
}
} catch (SchemaException e) {
throw new ExtractPrepareException("Failed to get schema for this object; error - " + e.getMessage(), e);
} catch (HighWatermarkException e) {
throw new ExtractPrepareException("Failed to get high watermark; error - " + e.getMessage(), e);
} catch (RecordCountException e) {
throw new ExtractPrepareException("Failed to get record count; error - " + e.getMessage(), e);
} catch (Exception e) {
throw new ExtractPrepareException("Failed to prepare the extract build; error - " + e.getMessage(), e);
}
return this;
}
Aggregations