use of com.sforce.async.BatchInfo in project tdi-studio-se by Talend.
the class SalesforceBulkAPI method createBatch.
private void createBatch(FileOutputStream tmpOut, File tmpFile, List<BatchInfo> batchInfos) throws IOException, AsyncApiException, ConnectionException {
tmpOut.flush();
tmpOut.close();
FileInputStream tmpInputStream = new FileInputStream(tmpFile);
try {
BatchInfo batchInfo = connection.createBatchFromStream(job, tmpInputStream);
// System.out.println(batchInfo);
batchInfos.add(batchInfo);
} finally {
tmpInputStream.close();
}
}
use of com.sforce.async.BatchInfo in project incubator-gobblin by apache.
the class SalesforceExtractor method getQueryResultIds.
/**
* Get Record set using salesforce specific API(Bulk API)
* @param entity/tablename
* @param predicateList of all predicate conditions
* @return iterator with batch of records
*/
private List<BatchIdAndResultId> getQueryResultIds(String entity, List<Predicate> predicateList) throws Exception {
if (!bulkApiLogin()) {
throw new IllegalArgumentException("Invalid Login");
}
try {
boolean usingPkChunking = false;
// Set bulk job attributes
this.bulkJob.setObject(entity);
this.bulkJob.setOperation(OperationEnum.query);
this.bulkJob.setConcurrencyMode(ConcurrencyMode.Parallel);
// use pk chunking if pk chunking is configured and the expected record count is larger than the pk chunking size
if (this.pkChunking && getExpectedRecordCount() > this.pkChunkingSize) {
log.info("Enabling pk chunking with size {}", this.pkChunkingSize);
this.bulkConnection.addHeader("Sforce-Enable-PKChunking", "chunkSize=" + this.pkChunkingSize);
usingPkChunking = true;
}
// Result type as CSV
this.bulkJob.setContentType(ContentType.CSV);
this.bulkJob = this.bulkConnection.createJob(this.bulkJob);
this.bulkJob = this.bulkConnection.getJobStatus(this.bulkJob.getId());
// Construct query with the predicates
String query = this.updatedQuery;
if (!isNullPredicate(predicateList)) {
String limitString = getLimitFromInputQuery(query);
query = query.replace(limitString, "");
Iterator<Predicate> i = predicateList.listIterator();
while (i.hasNext()) {
Predicate predicate = i.next();
query = SqlQueryUtils.addPredicate(query, predicate.getCondition());
}
query = query + limitString;
}
log.info("QUERY:" + query);
ByteArrayInputStream bout = new ByteArrayInputStream(query.getBytes(ConfigurationKeys.DEFAULT_CHARSET_ENCODING));
BatchInfo bulkBatchInfo = this.bulkConnection.createBatchFromStream(this.bulkJob, bout);
long expectedSizePerBatch = usingPkChunking ? this.pkChunkingSize : this.getExpectedRecordCount();
int retryInterval = Math.min(MAX_RETRY_INTERVAL_SECS, 30 + (int) Math.ceil((float) expectedSizePerBatch / 10000) * 2);
log.info("Salesforce bulk api retry interval in seconds:" + retryInterval);
// Get batch info with complete resultset (info id - refers to the resultset id corresponding to entire resultset)
bulkBatchInfo = this.bulkConnection.getBatchInfo(this.bulkJob.getId(), bulkBatchInfo.getId());
// wait for completion, failure, or formation of PK chunking batches
while ((bulkBatchInfo.getState() != BatchStateEnum.Completed) && (bulkBatchInfo.getState() != BatchStateEnum.Failed) && (!usingPkChunking || bulkBatchInfo.getState() != BatchStateEnum.NotProcessed)) {
Thread.sleep(retryInterval * 1000);
bulkBatchInfo = this.bulkConnection.getBatchInfo(this.bulkJob.getId(), bulkBatchInfo.getId());
log.debug("Bulk Api Batch Info:" + bulkBatchInfo);
log.info("Waiting for bulk resultSetIds");
}
// Wait for pk chunking batches
BatchInfoList batchInfoList = this.bulkConnection.getBatchInfoList(this.bulkJob.getId());
if (usingPkChunking && bulkBatchInfo.getState() == BatchStateEnum.NotProcessed) {
bulkBatchInfo = waitForPkBatches(batchInfoList, retryInterval);
}
if (bulkBatchInfo.getState() == BatchStateEnum.Failed) {
log.error("Bulk batch failed: " + bulkBatchInfo.toString());
throw new RuntimeException("Failed to get bulk batch info for jobId " + bulkBatchInfo.getJobId() + " error - " + bulkBatchInfo.getStateMessage());
}
// Get resultset ids of all the batches from the batch info list
List<BatchIdAndResultId> batchIdAndResultIdList = Lists.newArrayList();
for (BatchInfo bi : batchInfoList.getBatchInfo()) {
QueryResultList list = this.bulkConnection.getQueryResultList(this.bulkJob.getId(), bi.getId());
for (String result : list.getResult()) {
batchIdAndResultIdList.add(new BatchIdAndResultId(bi.getId(), result));
}
}
log.info("QueryResultList: " + batchIdAndResultIdList);
return batchIdAndResultIdList;
} catch (RuntimeException | AsyncApiException | InterruptedException e) {
throw new RuntimeException("Failed to get query result ids from salesforce using bulk api; error - " + e.getMessage(), e);
}
}
use of com.sforce.async.BatchInfo in project components by Talend.
the class SalesforceBulkRuntime method awaitCompletion.
/**
* Wait for a job to complete by polling the Bulk API.
*
* @throws AsyncApiException
* @throws ConnectionException
*/
private void awaitCompletion() throws AsyncApiException, ConnectionException {
long sleepTime = 0L;
Set<String> incomplete = new HashSet<String>();
for (BatchInfo bi : batchInfoList) {
incomplete.add(bi.getId());
}
while (!incomplete.isEmpty()) {
try {
Thread.sleep(sleepTime);
} catch (InterruptedException e) {
}
sleepTime = awaitTime;
BatchInfo[] statusList = getBatchInfoList(job.getId()).getBatchInfo();
for (BatchInfo b : statusList) {
if (b.getState() == BatchStateEnum.Completed || b.getState() == BatchStateEnum.Failed) {
incomplete.remove(b.getId());
}
}
}
}
use of com.sforce.async.BatchInfo in project components by Talend.
the class SalesforceBulkRuntime method getBatchLog.
/**
* Gets the results of the operation and checks for errors.
*
* @param batchNum
* @return
* @throws AsyncApiException
* @throws IOException
* @throws ConnectionException
*/
public List<BulkResult> getBatchLog(int batchNum) throws AsyncApiException, IOException, ConnectionException {
// batchInfoList was populated when batches were created and submitted
List<BulkResult> resultInfoList = new ArrayList<BulkResult>();
BulkResult resultInfo;
BatchInfo b = batchInfoList.get(batchNum);
CSVReader rdr = new CSVReader(getBatchResultStream(job.getId(), b.getId()));
List<String> resultHeader = rdr.nextRecord();
int resultCols = resultHeader.size();
List<String> row;
while ((row = rdr.nextRecord()) != null) {
resultInfo = new BulkResult();
resultInfo.copyValues(getBaseFileRow());
for (int i = 0; i < resultCols; i++) {
String header = resultHeader.get(i);
resultInfo.setValue(header, row.get(i));
if ("Created".equals(header)) {
resultInfo.setValue("salesforce_created", row.get(i));
} else if ("Id".equals(header)) {
resultInfo.setValue("salesforce_id", row.get(i));
}
}
resultInfoList.add(resultInfo);
}
return resultInfoList;
}
use of com.sforce.async.BatchInfo in project tdi-studio-se by Talend.
the class SalesforceBulkAPI method createBatchesFromCSVFile.
private List<BatchInfo> createBatchesFromCSVFile() throws IOException, AsyncApiException, ConnectionException {
List<BatchInfo> batchInfos = new ArrayList<BatchInfo>();
BufferedReader rdr = new BufferedReader(new InputStreamReader(new FileInputStream(bulkFileName), FILE_ENCODING));
// read the CSV header row
byte[] headerBytes = (rdr.readLine() + "\n").getBytes("UTF-8");
int headerBytesLength = headerBytes.length;
File tmpFile = File.createTempFile("sforceBulkAPI", ".csv");
// Split the CSV file into multiple batches
try {
FileOutputStream tmpOut = new FileOutputStream(tmpFile);
int currentBytes = 0;
int currentLines = 0;
String nextLine;
boolean needStart = true;
boolean needEnds = true;
while ((nextLine = rdr.readLine()) != null) {
int num = countQuotes(nextLine);
// nextLine is header or footer of the record
if (num % 2 == 1) {
if (!needStart) {
needEnds = false;
} else {
needStart = false;
}
} else {
// nextLine is a whole record or middle of the record
if (needEnds && needStart) {
needEnds = false;
needStart = false;
}
}
byte[] bytes = (nextLine + "\n").getBytes("UTF-8");
// Create a new batch when our batch size limit is reached
if (currentBytes + bytes.length > maxBytesPerBatch || currentLines > maxRowsPerBatch) {
createBatch(tmpOut, tmpFile, batchInfos);
currentBytes = 0;
currentLines = 0;
}
if (currentBytes == 0) {
tmpOut = new FileOutputStream(tmpFile);
tmpOut.write(headerBytes);
currentBytes = headerBytesLength;
currentLines = 1;
}
tmpOut.write(bytes);
currentBytes += bytes.length;
if (!needStart && !needEnds) {
currentLines++;
needStart = true;
needEnds = true;
}
}
// Finished processing all rows
// Create a final batch for any remaining data
rdr.close();
if (currentLines > 1) {
createBatch(tmpOut, tmpFile, batchInfos);
}
} finally {
tmpFile.delete();
}
return batchInfos;
}
Aggregations