Search in sources :

Example 1 with BatchInfoList

use of com.sforce.async.BatchInfoList in project incubator-gobblin by apache.

the class SalesforceExtractor method getQueryResultIds.

/**
 * Get Record set using salesforce specific API(Bulk API)
 * @param entity/tablename
 * @param predicateList of all predicate conditions
 * @return iterator with batch of records
 */
private List<BatchIdAndResultId> getQueryResultIds(String entity, List<Predicate> predicateList) throws Exception {
    if (!bulkApiLogin()) {
        throw new IllegalArgumentException("Invalid Login");
    }
    try {
        boolean usingPkChunking = false;
        // Set bulk job attributes
        this.bulkJob.setObject(entity);
        this.bulkJob.setOperation(OperationEnum.query);
        this.bulkJob.setConcurrencyMode(ConcurrencyMode.Parallel);
        // use pk chunking if pk chunking is configured and the expected record count is larger than the pk chunking size
        if (this.pkChunking && getExpectedRecordCount() > this.pkChunkingSize) {
            log.info("Enabling pk chunking with size {}", this.pkChunkingSize);
            this.bulkConnection.addHeader("Sforce-Enable-PKChunking", "chunkSize=" + this.pkChunkingSize);
            usingPkChunking = true;
        }
        // Result type as CSV
        this.bulkJob.setContentType(ContentType.CSV);
        this.bulkJob = this.bulkConnection.createJob(this.bulkJob);
        this.bulkJob = this.bulkConnection.getJobStatus(this.bulkJob.getId());
        // Construct query with the predicates
        String query = this.updatedQuery;
        if (!isNullPredicate(predicateList)) {
            String limitString = getLimitFromInputQuery(query);
            query = query.replace(limitString, "");
            Iterator<Predicate> i = predicateList.listIterator();
            while (i.hasNext()) {
                Predicate predicate = i.next();
                query = SqlQueryUtils.addPredicate(query, predicate.getCondition());
            }
            query = query + limitString;
        }
        log.info("QUERY:" + query);
        ByteArrayInputStream bout = new ByteArrayInputStream(query.getBytes(ConfigurationKeys.DEFAULT_CHARSET_ENCODING));
        BatchInfo bulkBatchInfo = this.bulkConnection.createBatchFromStream(this.bulkJob, bout);
        long expectedSizePerBatch = usingPkChunking ? this.pkChunkingSize : this.getExpectedRecordCount();
        int retryInterval = Math.min(MAX_RETRY_INTERVAL_SECS, 30 + (int) Math.ceil((float) expectedSizePerBatch / 10000) * 2);
        log.info("Salesforce bulk api retry interval in seconds:" + retryInterval);
        // Get batch info with complete resultset (info id - refers to the resultset id corresponding to entire resultset)
        bulkBatchInfo = this.bulkConnection.getBatchInfo(this.bulkJob.getId(), bulkBatchInfo.getId());
        // wait for completion, failure, or formation of PK chunking batches
        while ((bulkBatchInfo.getState() != BatchStateEnum.Completed) && (bulkBatchInfo.getState() != BatchStateEnum.Failed) && (!usingPkChunking || bulkBatchInfo.getState() != BatchStateEnum.NotProcessed)) {
            Thread.sleep(retryInterval * 1000);
            bulkBatchInfo = this.bulkConnection.getBatchInfo(this.bulkJob.getId(), bulkBatchInfo.getId());
            log.debug("Bulk Api Batch Info:" + bulkBatchInfo);
            log.info("Waiting for bulk resultSetIds");
        }
        // Wait for pk chunking batches
        BatchInfoList batchInfoList = this.bulkConnection.getBatchInfoList(this.bulkJob.getId());
        if (usingPkChunking && bulkBatchInfo.getState() == BatchStateEnum.NotProcessed) {
            bulkBatchInfo = waitForPkBatches(batchInfoList, retryInterval);
        }
        if (bulkBatchInfo.getState() == BatchStateEnum.Failed) {
            log.error("Bulk batch failed: " + bulkBatchInfo.toString());
            throw new RuntimeException("Failed to get bulk batch info for jobId " + bulkBatchInfo.getJobId() + " error - " + bulkBatchInfo.getStateMessage());
        }
        // Get resultset ids of all the batches from the batch info list
        List<BatchIdAndResultId> batchIdAndResultIdList = Lists.newArrayList();
        for (BatchInfo bi : batchInfoList.getBatchInfo()) {
            QueryResultList list = this.bulkConnection.getQueryResultList(this.bulkJob.getId(), bi.getId());
            for (String result : list.getResult()) {
                batchIdAndResultIdList.add(new BatchIdAndResultId(bi.getId(), result));
            }
        }
        log.info("QueryResultList: " + batchIdAndResultIdList);
        return batchIdAndResultIdList;
    } catch (RuntimeException | AsyncApiException | InterruptedException e) {
        throw new RuntimeException("Failed to get query result ids from salesforce using bulk api; error - " + e.getMessage(), e);
    }
}
Also used : QueryResultList(com.sforce.async.QueryResultList) AsyncApiException(com.sforce.async.AsyncApiException) Predicate(org.apache.gobblin.source.extractor.watermark.Predicate) ByteArrayInputStream(java.io.ByteArrayInputStream) BatchInfoList(com.sforce.async.BatchInfoList) BatchInfo(com.sforce.async.BatchInfo)

Example 2 with BatchInfoList

use of com.sforce.async.BatchInfoList in project components by Talend.

the class SalesforceBulkRuntime method retrieveResultsOfQuery.

/**
 * Retrieve resultId(-s) from job batches info.
 * Results will be retrieved only from completed batches.
 *
 * When pk chunking is enabled, we need to go through all batches in the job.
 * More information on Salesforce documentation:
 * https://developer.salesforce.com/docs/atlas.en-us.api_asynch.meta/api_asynch/
 * asynch_api_code_curl_walkthrough_pk_chunking.htm
 *
 * If some batches were queued or in progress, we must wait till they completed or failed/notprocessed.
 * Quick instructions for primary key chunking flow may be read here:
 * https://developer.salesforce.com/docs/atlas.en-us.api_asynch.meta/api_asynch/asynch_api_bulk_query_processing.htm
 *
 * @param info - batch info from created job.
 * @throws AsyncApiException
 * @throws ConnectionException
 * @throws InterruptedException
 */
private void retrieveResultsOfQuery(BatchInfo info) throws AsyncApiException, ConnectionException, InterruptedException {
    if (BatchStateEnum.Completed == info.getState()) {
        QueryResultList list = getQueryResultList(job.getId(), info.getId());
        queryResultIDs = new HashSet<String>(Arrays.asList(list.getResult())).iterator();
        this.batchInfoList = Collections.singletonList(info);
        return;
    }
    BatchInfoList batchInfoList = null;
    Set<String> resultSet = new HashSet<>();
    boolean isInProgress = true;
    while (isInProgress) {
        batchInfoList = getBatchInfoList(job.getId());
        isInProgress = isJobBatchesInProgress(batchInfoList, info);
        if (isInProgress) {
            Thread.sleep(chunkSleepTime);
            long processingTime = System.currentTimeMillis() - job.getCreatedDate().getTimeInMillis();
            if (processingTime > MAX_BATCH_EXECUTION_TIME) {
                // Break processing and return processed data if any batch was processed.
                LOGGER.warn(MESSAGES.getMessage("warn.batch.timeout"));
                break;
            }
        }
    }
    for (BatchInfo batch : batchInfoList.getBatchInfo()) {
        if (batch.getId().equals(info.getId())) {
            continue;
        }
        resultSet.addAll(Arrays.asList(getQueryResultList(job.getId(), batch.getId()).getResult()));
    }
    queryResultIDs = resultSet.iterator();
    this.batchInfoList = Arrays.asList(batchInfoList.getBatchInfo());
}
Also used : QueryResultList(com.sforce.async.QueryResultList) BatchInfoList(com.sforce.async.BatchInfoList) BatchInfo(com.sforce.async.BatchInfo) HashSet(java.util.HashSet)

Aggregations

BatchInfo (com.sforce.async.BatchInfo)2 BatchInfoList (com.sforce.async.BatchInfoList)2 QueryResultList (com.sforce.async.QueryResultList)2 AsyncApiException (com.sforce.async.AsyncApiException)1 ByteArrayInputStream (java.io.ByteArrayInputStream)1 HashSet (java.util.HashSet)1 Predicate (org.apache.gobblin.source.extractor.watermark.Predicate)1