use of com.sforce.async.BatchInfoList in project incubator-gobblin by apache.
the class SalesforceExtractor method getQueryResultIds.
/**
* Get Record set using salesforce specific API(Bulk API)
* @param entity/tablename
* @param predicateList of all predicate conditions
* @return iterator with batch of records
*/
private List<BatchIdAndResultId> getQueryResultIds(String entity, List<Predicate> predicateList) throws Exception {
if (!bulkApiLogin()) {
throw new IllegalArgumentException("Invalid Login");
}
try {
boolean usingPkChunking = false;
// Set bulk job attributes
this.bulkJob.setObject(entity);
this.bulkJob.setOperation(OperationEnum.query);
this.bulkJob.setConcurrencyMode(ConcurrencyMode.Parallel);
// use pk chunking if pk chunking is configured and the expected record count is larger than the pk chunking size
if (this.pkChunking && getExpectedRecordCount() > this.pkChunkingSize) {
log.info("Enabling pk chunking with size {}", this.pkChunkingSize);
this.bulkConnection.addHeader("Sforce-Enable-PKChunking", "chunkSize=" + this.pkChunkingSize);
usingPkChunking = true;
}
// Result type as CSV
this.bulkJob.setContentType(ContentType.CSV);
this.bulkJob = this.bulkConnection.createJob(this.bulkJob);
this.bulkJob = this.bulkConnection.getJobStatus(this.bulkJob.getId());
// Construct query with the predicates
String query = this.updatedQuery;
if (!isNullPredicate(predicateList)) {
String limitString = getLimitFromInputQuery(query);
query = query.replace(limitString, "");
Iterator<Predicate> i = predicateList.listIterator();
while (i.hasNext()) {
Predicate predicate = i.next();
query = SqlQueryUtils.addPredicate(query, predicate.getCondition());
}
query = query + limitString;
}
log.info("QUERY:" + query);
ByteArrayInputStream bout = new ByteArrayInputStream(query.getBytes(ConfigurationKeys.DEFAULT_CHARSET_ENCODING));
BatchInfo bulkBatchInfo = this.bulkConnection.createBatchFromStream(this.bulkJob, bout);
long expectedSizePerBatch = usingPkChunking ? this.pkChunkingSize : this.getExpectedRecordCount();
int retryInterval = Math.min(MAX_RETRY_INTERVAL_SECS, 30 + (int) Math.ceil((float) expectedSizePerBatch / 10000) * 2);
log.info("Salesforce bulk api retry interval in seconds:" + retryInterval);
// Get batch info with complete resultset (info id - refers to the resultset id corresponding to entire resultset)
bulkBatchInfo = this.bulkConnection.getBatchInfo(this.bulkJob.getId(), bulkBatchInfo.getId());
// wait for completion, failure, or formation of PK chunking batches
while ((bulkBatchInfo.getState() != BatchStateEnum.Completed) && (bulkBatchInfo.getState() != BatchStateEnum.Failed) && (!usingPkChunking || bulkBatchInfo.getState() != BatchStateEnum.NotProcessed)) {
Thread.sleep(retryInterval * 1000);
bulkBatchInfo = this.bulkConnection.getBatchInfo(this.bulkJob.getId(), bulkBatchInfo.getId());
log.debug("Bulk Api Batch Info:" + bulkBatchInfo);
log.info("Waiting for bulk resultSetIds");
}
// Wait for pk chunking batches
BatchInfoList batchInfoList = this.bulkConnection.getBatchInfoList(this.bulkJob.getId());
if (usingPkChunking && bulkBatchInfo.getState() == BatchStateEnum.NotProcessed) {
bulkBatchInfo = waitForPkBatches(batchInfoList, retryInterval);
}
if (bulkBatchInfo.getState() == BatchStateEnum.Failed) {
log.error("Bulk batch failed: " + bulkBatchInfo.toString());
throw new RuntimeException("Failed to get bulk batch info for jobId " + bulkBatchInfo.getJobId() + " error - " + bulkBatchInfo.getStateMessage());
}
// Get resultset ids of all the batches from the batch info list
List<BatchIdAndResultId> batchIdAndResultIdList = Lists.newArrayList();
for (BatchInfo bi : batchInfoList.getBatchInfo()) {
QueryResultList list = this.bulkConnection.getQueryResultList(this.bulkJob.getId(), bi.getId());
for (String result : list.getResult()) {
batchIdAndResultIdList.add(new BatchIdAndResultId(bi.getId(), result));
}
}
log.info("QueryResultList: " + batchIdAndResultIdList);
return batchIdAndResultIdList;
} catch (RuntimeException | AsyncApiException | InterruptedException e) {
throw new RuntimeException("Failed to get query result ids from salesforce using bulk api; error - " + e.getMessage(), e);
}
}
use of com.sforce.async.BatchInfoList in project components by Talend.
the class SalesforceBulkRuntime method retrieveResultsOfQuery.
/**
* Retrieve resultId(-s) from job batches info.
* Results will be retrieved only from completed batches.
*
* When pk chunking is enabled, we need to go through all batches in the job.
* More information on Salesforce documentation:
* https://developer.salesforce.com/docs/atlas.en-us.api_asynch.meta/api_asynch/
* asynch_api_code_curl_walkthrough_pk_chunking.htm
*
* If some batches were queued or in progress, we must wait till they completed or failed/notprocessed.
* Quick instructions for primary key chunking flow may be read here:
* https://developer.salesforce.com/docs/atlas.en-us.api_asynch.meta/api_asynch/asynch_api_bulk_query_processing.htm
*
* @param info - batch info from created job.
* @throws AsyncApiException
* @throws ConnectionException
* @throws InterruptedException
*/
private void retrieveResultsOfQuery(BatchInfo info) throws AsyncApiException, ConnectionException, InterruptedException {
if (BatchStateEnum.Completed == info.getState()) {
QueryResultList list = getQueryResultList(job.getId(), info.getId());
queryResultIDs = new HashSet<String>(Arrays.asList(list.getResult())).iterator();
this.batchInfoList = Collections.singletonList(info);
return;
}
BatchInfoList batchInfoList = null;
Set<String> resultSet = new HashSet<>();
boolean isInProgress = true;
while (isInProgress) {
batchInfoList = getBatchInfoList(job.getId());
isInProgress = isJobBatchesInProgress(batchInfoList, info);
if (isInProgress) {
Thread.sleep(chunkSleepTime);
long processingTime = System.currentTimeMillis() - job.getCreatedDate().getTimeInMillis();
if (processingTime > MAX_BATCH_EXECUTION_TIME) {
// Break processing and return processed data if any batch was processed.
LOGGER.warn(MESSAGES.getMessage("warn.batch.timeout"));
break;
}
}
}
for (BatchInfo batch : batchInfoList.getBatchInfo()) {
if (batch.getId().equals(info.getId())) {
continue;
}
resultSet.addAll(Arrays.asList(getQueryResultList(job.getId(), batch.getId()).getResult()));
}
queryResultIDs = resultSet.iterator();
this.batchInfoList = Arrays.asList(batchInfoList.getBatchInfo());
}
Aggregations