use of org.apache.gobblin.source.extractor.DataRecordException in project incubator-gobblin by apache.
the class SalesforceExtractor method getBulkData.
/**
* Get data from the bulk api input stream
* @return record set with each record as a JsonObject
*/
private RecordSet<JsonElement> getBulkData() throws DataRecordException {
log.debug("Processing bulk api batch...");
RecordSetList<JsonElement> rs = new RecordSetList<>();
try {
// if Buffer is empty then get stream for the new resultset id
if (this.bulkBufferedReader == null || !this.bulkBufferedReader.ready()) {
// log the number of records from each result set after it is processed (bulkResultIdCount > 0)
if (this.bulkResultIdCount > 0) {
log.info("Result set {} had {} records", this.bulkResultIdCount, this.bulkRecordCount - this.prevBulkRecordCount);
}
// if there is unprocessed resultset id then get result stream for that id
if (this.bulkResultIdCount < this.bulkResultIdList.size()) {
log.info("Stream resultset for resultId:" + this.bulkResultIdList.get(this.bulkResultIdCount));
this.setNewBulkResultSet(true);
if (this.bulkBufferedReader != null) {
this.bulkBufferedReader.close();
}
this.bulkBufferedReader = getBulkBufferedReader(this.bulkResultIdCount);
this.bulkResultIdCount++;
this.prevBulkRecordCount = bulkRecordCount;
} else {
// if result stream processed for all resultset ids then finish the bulk job
log.info("Bulk job is finished");
this.setBulkJobFinished(true);
return rs;
}
}
// fetch a batch of results with retry for network errors
fetchResultBatchWithRetry(rs);
} catch (Exception e) {
throw new DataRecordException("Failed to get records from salesforce; error - " + e.getMessage(), e);
}
return rs;
}
use of org.apache.gobblin.source.extractor.DataRecordException in project incubator-gobblin by apache.
the class SalesforceExtractor method getDataMetadata.
@Override
public List<Command> getDataMetadata(String schema, String entity, WorkUnit workUnit, List<Predicate> predicateList) throws DataRecordException {
log.debug("Build url to retrieve data records");
String query = this.updatedQuery;
String url = null;
try {
if (this.getNextUrl() != null && this.pullStatus == true) {
url = this.getNextUrl();
} else {
if (isNullPredicate(predicateList)) {
log.info("QUERY:" + query);
return constructGetCommand(this.sfConnector.getFullUri(getSoqlUrl(query)));
}
String limitString = getLimitFromInputQuery(query);
query = query.replace(limitString, "");
Iterator<Predicate> i = predicateList.listIterator();
while (i.hasNext()) {
Predicate predicate = i.next();
query = SqlQueryUtils.addPredicate(query, predicate.getCondition());
}
if (Boolean.valueOf(this.workUnitState.getProp(ConfigurationKeys.SOURCE_QUERYBASED_IS_SPECIFIC_API_ACTIVE))) {
query = SqlQueryUtils.addPredicate(query, "IsDeleted = true");
}
query = query + limitString;
log.info("QUERY: " + query);
url = this.sfConnector.getFullUri(getSoqlUrl(query));
}
return constructGetCommand(url);
} catch (Exception e) {
throw new DataRecordException("Failed to get salesforce url for data records; error - " + e.getMessage(), e);
}
}
use of org.apache.gobblin.source.extractor.DataRecordException in project incubator-gobblin by apache.
the class RestApiExtractor method getRecordSet.
@Override
public Iterator<JsonElement> getRecordSet(String schema, String entity, WorkUnit workUnit, List<Predicate> predicateList) throws DataRecordException {
log.debug("Get data records using Rest Api");
Iterator<JsonElement> rs = null;
List<Command> cmds;
try {
boolean success = true;
if (this.connector.isConnectionClosed()) {
success = this.connector.connect();
}
if (!success) {
throw new DataRecordException("Failed to connect.");
}
log.debug("Connected successfully.");
if (this.getPullStatus() == false) {
return null;
}
if (this.getNextUrl() == null) {
cmds = this.getDataMetadata(schema, entity, workUnit, predicateList);
} else {
cmds = RestApiConnector.constructGetCommand(this.getNextUrl());
}
CommandOutput<?, ?> response = this.connector.getResponse(cmds);
rs = this.getData(response);
return rs;
} catch (Exception e) {
throw new DataRecordException("Failed to get records using rest api; error - " + e.getMessage(), e);
}
}
Aggregations