use of org.apache.gobblin.source.extractor.resultset.RecordSetList in project incubator-gobblin by apache.
the class SalesforceExtractor method getBulkData.
/**
* Get data from the bulk api input stream
* @return record set with each record as a JsonObject
*/
private RecordSet<JsonElement> getBulkData() throws DataRecordException {
log.debug("Processing bulk api batch...");
RecordSetList<JsonElement> rs = new RecordSetList<>();
try {
// if Buffer is empty then get stream for the new resultset id
if (this.bulkBufferedReader == null || !this.bulkBufferedReader.ready()) {
// log the number of records from each result set after it is processed (bulkResultIdCount > 0)
if (this.bulkResultIdCount > 0) {
log.info("Result set {} had {} records", this.bulkResultIdCount, this.bulkRecordCount - this.prevBulkRecordCount);
}
// if there is unprocessed resultset id then get result stream for that id
if (this.bulkResultIdCount < this.bulkResultIdList.size()) {
log.info("Stream resultset for resultId:" + this.bulkResultIdList.get(this.bulkResultIdCount));
this.setNewBulkResultSet(true);
if (this.bulkBufferedReader != null) {
this.bulkBufferedReader.close();
}
this.bulkBufferedReader = getBulkBufferedReader(this.bulkResultIdCount);
this.bulkResultIdCount++;
this.prevBulkRecordCount = bulkRecordCount;
} else {
// if result stream processed for all resultset ids then finish the bulk job
log.info("Bulk job is finished");
this.setBulkJobFinished(true);
return rs;
}
}
// fetch a batch of results with retry for network errors
fetchResultBatchWithRetry(rs);
} catch (Exception e) {
throw new DataRecordException("Failed to get records from salesforce; error - " + e.getMessage(), e);
}
return rs;
}
Aggregations