Search in sources :

Example 1 with BatchInfo

use of com.sforce.async.BatchInfo in project tdi-studio-se by Talend.

the class SalesforceBulkAPI method createBatch.

private void createBatch(FileOutputStream tmpOut, File tmpFile, List<BatchInfo> batchInfos) throws IOException, AsyncApiException, ConnectionException {
    tmpOut.flush();
    tmpOut.close();
    FileInputStream tmpInputStream = new FileInputStream(tmpFile);
    try {
        BatchInfo batchInfo = connection.createBatchFromStream(job, tmpInputStream);
        // System.out.println(batchInfo);
        batchInfos.add(batchInfo);
    } finally {
        tmpInputStream.close();
    }
}
Also used : BatchInfo(com.sforce.async.BatchInfo) FileInputStream(java.io.FileInputStream)

Example 2 with BatchInfo

use of com.sforce.async.BatchInfo in project incubator-gobblin by apache.

the class SalesforceExtractor method getQueryResultIds.

/**
 * Get Record set using salesforce specific API(Bulk API)
 * @param entity/tablename
 * @param predicateList of all predicate conditions
 * @return iterator with batch of records
 */
private List<BatchIdAndResultId> getQueryResultIds(String entity, List<Predicate> predicateList) throws Exception {
    if (!bulkApiLogin()) {
        throw new IllegalArgumentException("Invalid Login");
    }
    try {
        boolean usingPkChunking = false;
        // Set bulk job attributes
        this.bulkJob.setObject(entity);
        this.bulkJob.setOperation(OperationEnum.query);
        this.bulkJob.setConcurrencyMode(ConcurrencyMode.Parallel);
        // use pk chunking if pk chunking is configured and the expected record count is larger than the pk chunking size
        if (this.pkChunking && getExpectedRecordCount() > this.pkChunkingSize) {
            log.info("Enabling pk chunking with size {}", this.pkChunkingSize);
            this.bulkConnection.addHeader("Sforce-Enable-PKChunking", "chunkSize=" + this.pkChunkingSize);
            usingPkChunking = true;
        }
        // Result type as CSV
        this.bulkJob.setContentType(ContentType.CSV);
        this.bulkJob = this.bulkConnection.createJob(this.bulkJob);
        this.bulkJob = this.bulkConnection.getJobStatus(this.bulkJob.getId());
        // Construct query with the predicates
        String query = this.updatedQuery;
        if (!isNullPredicate(predicateList)) {
            String limitString = getLimitFromInputQuery(query);
            query = query.replace(limitString, "");
            Iterator<Predicate> i = predicateList.listIterator();
            while (i.hasNext()) {
                Predicate predicate = i.next();
                query = SqlQueryUtils.addPredicate(query, predicate.getCondition());
            }
            query = query + limitString;
        }
        log.info("QUERY:" + query);
        ByteArrayInputStream bout = new ByteArrayInputStream(query.getBytes(ConfigurationKeys.DEFAULT_CHARSET_ENCODING));
        BatchInfo bulkBatchInfo = this.bulkConnection.createBatchFromStream(this.bulkJob, bout);
        long expectedSizePerBatch = usingPkChunking ? this.pkChunkingSize : this.getExpectedRecordCount();
        int retryInterval = Math.min(MAX_RETRY_INTERVAL_SECS, 30 + (int) Math.ceil((float) expectedSizePerBatch / 10000) * 2);
        log.info("Salesforce bulk api retry interval in seconds:" + retryInterval);
        // Get batch info with complete resultset (info id - refers to the resultset id corresponding to entire resultset)
        bulkBatchInfo = this.bulkConnection.getBatchInfo(this.bulkJob.getId(), bulkBatchInfo.getId());
        // wait for completion, failure, or formation of PK chunking batches
        while ((bulkBatchInfo.getState() != BatchStateEnum.Completed) && (bulkBatchInfo.getState() != BatchStateEnum.Failed) && (!usingPkChunking || bulkBatchInfo.getState() != BatchStateEnum.NotProcessed)) {
            Thread.sleep(retryInterval * 1000);
            bulkBatchInfo = this.bulkConnection.getBatchInfo(this.bulkJob.getId(), bulkBatchInfo.getId());
            log.debug("Bulk Api Batch Info:" + bulkBatchInfo);
            log.info("Waiting for bulk resultSetIds");
        }
        // Wait for pk chunking batches
        BatchInfoList batchInfoList = this.bulkConnection.getBatchInfoList(this.bulkJob.getId());
        if (usingPkChunking && bulkBatchInfo.getState() == BatchStateEnum.NotProcessed) {
            bulkBatchInfo = waitForPkBatches(batchInfoList, retryInterval);
        }
        if (bulkBatchInfo.getState() == BatchStateEnum.Failed) {
            log.error("Bulk batch failed: " + bulkBatchInfo.toString());
            throw new RuntimeException("Failed to get bulk batch info for jobId " + bulkBatchInfo.getJobId() + " error - " + bulkBatchInfo.getStateMessage());
        }
        // Get resultset ids of all the batches from the batch info list
        List<BatchIdAndResultId> batchIdAndResultIdList = Lists.newArrayList();
        for (BatchInfo bi : batchInfoList.getBatchInfo()) {
            QueryResultList list = this.bulkConnection.getQueryResultList(this.bulkJob.getId(), bi.getId());
            for (String result : list.getResult()) {
                batchIdAndResultIdList.add(new BatchIdAndResultId(bi.getId(), result));
            }
        }
        log.info("QueryResultList: " + batchIdAndResultIdList);
        return batchIdAndResultIdList;
    } catch (RuntimeException | AsyncApiException | InterruptedException e) {
        throw new RuntimeException("Failed to get query result ids from salesforce using bulk api; error - " + e.getMessage(), e);
    }
}
Also used : QueryResultList(com.sforce.async.QueryResultList) AsyncApiException(com.sforce.async.AsyncApiException) Predicate(org.apache.gobblin.source.extractor.watermark.Predicate) ByteArrayInputStream(java.io.ByteArrayInputStream) BatchInfoList(com.sforce.async.BatchInfoList) BatchInfo(com.sforce.async.BatchInfo)

Example 3 with BatchInfo

use of com.sforce.async.BatchInfo in project components by Talend.

the class SalesforceBulkRuntime method awaitCompletion.

/**
 * Wait for a job to complete by polling the Bulk API.
 *
 * @throws AsyncApiException
 * @throws ConnectionException
 */
private void awaitCompletion() throws AsyncApiException, ConnectionException {
    long sleepTime = 0L;
    Set<String> incomplete = new HashSet<String>();
    for (BatchInfo bi : batchInfoList) {
        incomplete.add(bi.getId());
    }
    while (!incomplete.isEmpty()) {
        try {
            Thread.sleep(sleepTime);
        } catch (InterruptedException e) {
        }
        sleepTime = awaitTime;
        BatchInfo[] statusList = getBatchInfoList(job.getId()).getBatchInfo();
        for (BatchInfo b : statusList) {
            if (b.getState() == BatchStateEnum.Completed || b.getState() == BatchStateEnum.Failed) {
                incomplete.remove(b.getId());
            }
        }
    }
}
Also used : BatchInfo(com.sforce.async.BatchInfo) HashSet(java.util.HashSet)

Example 4 with BatchInfo

use of com.sforce.async.BatchInfo in project components by Talend.

the class SalesforceBulkRuntime method getBatchLog.

/**
 * Gets the results of the operation and checks for errors.
 *
 * @param batchNum
 * @return
 * @throws AsyncApiException
 * @throws IOException
 * @throws ConnectionException
 */
public List<BulkResult> getBatchLog(int batchNum) throws AsyncApiException, IOException, ConnectionException {
    // batchInfoList was populated when batches were created and submitted
    List<BulkResult> resultInfoList = new ArrayList<BulkResult>();
    BulkResult resultInfo;
    BatchInfo b = batchInfoList.get(batchNum);
    CSVReader rdr = new CSVReader(getBatchResultStream(job.getId(), b.getId()));
    List<String> resultHeader = rdr.nextRecord();
    int resultCols = resultHeader.size();
    List<String> row;
    while ((row = rdr.nextRecord()) != null) {
        resultInfo = new BulkResult();
        resultInfo.copyValues(getBaseFileRow());
        for (int i = 0; i < resultCols; i++) {
            String header = resultHeader.get(i);
            resultInfo.setValue(header, row.get(i));
            if ("Created".equals(header)) {
                resultInfo.setValue("salesforce_created", row.get(i));
            } else if ("Id".equals(header)) {
                resultInfo.setValue("salesforce_id", row.get(i));
            }
        }
        resultInfoList.add(resultInfo);
    }
    return resultInfoList;
}
Also used : CSVReader(com.sforce.async.CSVReader) ArrayList(java.util.ArrayList) BatchInfo(com.sforce.async.BatchInfo)

Example 5 with BatchInfo

use of com.sforce.async.BatchInfo in project tdi-studio-se by Talend.

the class SalesforceBulkAPI method createBatchesFromCSVFile.

private List<BatchInfo> createBatchesFromCSVFile() throws IOException, AsyncApiException, ConnectionException {
    List<BatchInfo> batchInfos = new ArrayList<BatchInfo>();
    BufferedReader rdr = new BufferedReader(new InputStreamReader(new FileInputStream(bulkFileName), FILE_ENCODING));
    // read the CSV header row
    byte[] headerBytes = (rdr.readLine() + "\n").getBytes("UTF-8");
    int headerBytesLength = headerBytes.length;
    File tmpFile = File.createTempFile("sforceBulkAPI", ".csv");
    // Split the CSV file into multiple batches
    try {
        FileOutputStream tmpOut = new FileOutputStream(tmpFile);
        int currentBytes = 0;
        int currentLines = 0;
        String nextLine;
        boolean needStart = true;
        boolean needEnds = true;
        while ((nextLine = rdr.readLine()) != null) {
            int num = countQuotes(nextLine);
            // nextLine is header or footer of the record
            if (num % 2 == 1) {
                if (!needStart) {
                    needEnds = false;
                } else {
                    needStart = false;
                }
            } else {
                // nextLine is a whole record or middle of the record
                if (needEnds && needStart) {
                    needEnds = false;
                    needStart = false;
                }
            }
            byte[] bytes = (nextLine + "\n").getBytes("UTF-8");
            // Create a new batch when our batch size limit is reached
            if (currentBytes + bytes.length > maxBytesPerBatch || currentLines > maxRowsPerBatch) {
                createBatch(tmpOut, tmpFile, batchInfos);
                currentBytes = 0;
                currentLines = 0;
            }
            if (currentBytes == 0) {
                tmpOut = new FileOutputStream(tmpFile);
                tmpOut.write(headerBytes);
                currentBytes = headerBytesLength;
                currentLines = 1;
            }
            tmpOut.write(bytes);
            currentBytes += bytes.length;
            if (!needStart && !needEnds) {
                currentLines++;
                needStart = true;
                needEnds = true;
            }
        }
        // Finished processing all rows
        // Create a final batch for any remaining data
        rdr.close();
        if (currentLines > 1) {
            createBatch(tmpOut, tmpFile, batchInfos);
        }
    } finally {
        tmpFile.delete();
    }
    return batchInfos;
}
Also used : InputStreamReader(java.io.InputStreamReader) FileOutputStream(java.io.FileOutputStream) ArrayList(java.util.ArrayList) BufferedReader(java.io.BufferedReader) BatchInfo(com.sforce.async.BatchInfo) File(java.io.File) FileInputStream(java.io.FileInputStream)

Aggregations

BatchInfo (com.sforce.async.BatchInfo)13 FileInputStream (java.io.FileInputStream)4 ArrayList (java.util.ArrayList)4 QueryResultList (com.sforce.async.QueryResultList)3 ByteArrayInputStream (java.io.ByteArrayInputStream)3 HashSet (java.util.HashSet)3 BatchInfoList (com.sforce.async.BatchInfoList)2 CSVReader (com.sforce.async.CSVReader)2 JobInfo (com.sforce.async.JobInfo)2 BufferedReader (java.io.BufferedReader)2 File (java.io.File)2 FileOutputStream (java.io.FileOutputStream)2 InputStreamReader (java.io.InputStreamReader)2 AsyncApiException (com.sforce.async.AsyncApiException)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 Predicate (org.apache.gobblin.source.extractor.watermark.Predicate)1 ComponentException (org.talend.components.api.exception.ComponentException)1 DefaultErrorCode (org.talend.daikon.exception.error.DefaultErrorCode)1