Search in sources :

Example 6 with DataRecordException

use of org.apache.gobblin.source.extractor.DataRecordException in project incubator-gobblin by apache.

the class SalesforceExtractor method getBulkData.

/**
 * Get data from the bulk api input stream
 * @return record set with each record as a JsonObject
 */
private RecordSet<JsonElement> getBulkData() throws DataRecordException {
    log.debug("Processing bulk api batch...");
    RecordSetList<JsonElement> rs = new RecordSetList<>();
    try {
        // if Buffer is empty then get stream for the new resultset id
        if (this.bulkBufferedReader == null || !this.bulkBufferedReader.ready()) {
            // log the number of records from each result set after it is processed (bulkResultIdCount > 0)
            if (this.bulkResultIdCount > 0) {
                log.info("Result set {} had {} records", this.bulkResultIdCount, this.bulkRecordCount - this.prevBulkRecordCount);
            }
            // if there is unprocessed resultset id then get result stream for that id
            if (this.bulkResultIdCount < this.bulkResultIdList.size()) {
                log.info("Stream resultset for resultId:" + this.bulkResultIdList.get(this.bulkResultIdCount));
                this.setNewBulkResultSet(true);
                if (this.bulkBufferedReader != null) {
                    this.bulkBufferedReader.close();
                }
                this.bulkBufferedReader = getBulkBufferedReader(this.bulkResultIdCount);
                this.bulkResultIdCount++;
                this.prevBulkRecordCount = bulkRecordCount;
            } else {
                // if result stream processed for all resultset ids then finish the bulk job
                log.info("Bulk job is finished");
                this.setBulkJobFinished(true);
                return rs;
            }
        }
        // fetch a batch of results with retry for network errors
        fetchResultBatchWithRetry(rs);
    } catch (Exception e) {
        throw new DataRecordException("Failed to get records from salesforce; error - " + e.getMessage(), e);
    }
    return rs;
}
Also used : RecordSetList(org.apache.gobblin.source.extractor.resultset.RecordSetList) JsonElement(com.google.gson.JsonElement) ParseException(java.text.ParseException) RecordCountException(org.apache.gobblin.source.extractor.exception.RecordCountException) AsyncApiException(com.sforce.async.AsyncApiException) DataRecordException(org.apache.gobblin.source.extractor.DataRecordException) SchemaException(org.apache.gobblin.source.extractor.exception.SchemaException) RestApiClientException(org.apache.gobblin.source.extractor.exception.RestApiClientException) IOException(java.io.IOException) HighWatermarkException(org.apache.gobblin.source.extractor.exception.HighWatermarkException) RestApiConnectionException(org.apache.gobblin.source.extractor.exception.RestApiConnectionException) DataRecordException(org.apache.gobblin.source.extractor.DataRecordException)

Example 7 with DataRecordException

use of org.apache.gobblin.source.extractor.DataRecordException in project incubator-gobblin by apache.

the class SalesforceExtractor method getDataMetadata.

@Override
public List<Command> getDataMetadata(String schema, String entity, WorkUnit workUnit, List<Predicate> predicateList) throws DataRecordException {
    log.debug("Build url to retrieve data records");
    String query = this.updatedQuery;
    String url = null;
    try {
        if (this.getNextUrl() != null && this.pullStatus == true) {
            url = this.getNextUrl();
        } else {
            if (isNullPredicate(predicateList)) {
                log.info("QUERY:" + query);
                return constructGetCommand(this.sfConnector.getFullUri(getSoqlUrl(query)));
            }
            String limitString = getLimitFromInputQuery(query);
            query = query.replace(limitString, "");
            Iterator<Predicate> i = predicateList.listIterator();
            while (i.hasNext()) {
                Predicate predicate = i.next();
                query = SqlQueryUtils.addPredicate(query, predicate.getCondition());
            }
            if (Boolean.valueOf(this.workUnitState.getProp(ConfigurationKeys.SOURCE_QUERYBASED_IS_SPECIFIC_API_ACTIVE))) {
                query = SqlQueryUtils.addPredicate(query, "IsDeleted = true");
            }
            query = query + limitString;
            log.info("QUERY: " + query);
            url = this.sfConnector.getFullUri(getSoqlUrl(query));
        }
        return constructGetCommand(url);
    } catch (Exception e) {
        throw new DataRecordException("Failed to get salesforce url for data records; error - " + e.getMessage(), e);
    }
}
Also used : ParseException(java.text.ParseException) RecordCountException(org.apache.gobblin.source.extractor.exception.RecordCountException) AsyncApiException(com.sforce.async.AsyncApiException) DataRecordException(org.apache.gobblin.source.extractor.DataRecordException) SchemaException(org.apache.gobblin.source.extractor.exception.SchemaException) RestApiClientException(org.apache.gobblin.source.extractor.exception.RestApiClientException) IOException(java.io.IOException) HighWatermarkException(org.apache.gobblin.source.extractor.exception.HighWatermarkException) RestApiConnectionException(org.apache.gobblin.source.extractor.exception.RestApiConnectionException) DataRecordException(org.apache.gobblin.source.extractor.DataRecordException) Predicate(org.apache.gobblin.source.extractor.watermark.Predicate)

Example 8 with DataRecordException

use of org.apache.gobblin.source.extractor.DataRecordException in project incubator-gobblin by apache.

the class RestApiExtractor method getRecordSet.

@Override
public Iterator<JsonElement> getRecordSet(String schema, String entity, WorkUnit workUnit, List<Predicate> predicateList) throws DataRecordException {
    log.debug("Get data records using Rest Api");
    Iterator<JsonElement> rs = null;
    List<Command> cmds;
    try {
        boolean success = true;
        if (this.connector.isConnectionClosed()) {
            success = this.connector.connect();
        }
        if (!success) {
            throw new DataRecordException("Failed to connect.");
        }
        log.debug("Connected successfully.");
        if (this.getPullStatus() == false) {
            return null;
        }
        if (this.getNextUrl() == null) {
            cmds = this.getDataMetadata(schema, entity, workUnit, predicateList);
        } else {
            cmds = RestApiConnector.constructGetCommand(this.getNextUrl());
        }
        CommandOutput<?, ?> response = this.connector.getResponse(cmds);
        rs = this.getData(response);
        return rs;
    } catch (Exception e) {
        throw new DataRecordException("Failed to get records using rest api; error - " + e.getMessage(), e);
    }
}
Also used : Command(org.apache.gobblin.source.extractor.extract.Command) JsonElement(com.google.gson.JsonElement) DataRecordException(org.apache.gobblin.source.extractor.DataRecordException) SchemaException(org.apache.gobblin.source.extractor.exception.SchemaException) RestApiProcessingException(org.apache.gobblin.source.extractor.exception.RestApiProcessingException) IOException(java.io.IOException) HighWatermarkException(org.apache.gobblin.source.extractor.exception.HighWatermarkException) RecordCountException(org.apache.gobblin.source.extractor.exception.RecordCountException) DataRecordException(org.apache.gobblin.source.extractor.DataRecordException) RestApiConnectionException(org.apache.gobblin.source.extractor.exception.RestApiConnectionException)

Aggregations

IOException (java.io.IOException)8 DataRecordException (org.apache.gobblin.source.extractor.DataRecordException)8 HighWatermarkException (org.apache.gobblin.source.extractor.exception.HighWatermarkException)7 RecordCountException (org.apache.gobblin.source.extractor.exception.RecordCountException)7 SchemaException (org.apache.gobblin.source.extractor.exception.SchemaException)7 JsonElement (com.google.gson.JsonElement)5 ParseException (java.text.ParseException)5 RestApiConnectionException (org.apache.gobblin.source.extractor.exception.RestApiConnectionException)4 AsyncApiException (com.sforce.async.AsyncApiException)3 RestApiClientException (org.apache.gobblin.source.extractor.exception.RestApiClientException)3 JsonObject (com.google.gson.JsonObject)2 SQLException (java.sql.SQLException)2 Iterator (java.util.Iterator)2 SqlParseException (org.apache.calcite.sql.parser.SqlParseException)2 Command (org.apache.gobblin.source.extractor.extract.Command)2 JsonArray (com.google.gson.JsonArray)1 ResultSet (java.sql.ResultSet)1 ResultSetMetaData (java.sql.ResultSetMetaData)1 ListIterator (java.util.ListIterator)1 ByteArrayBasedKafkaRecord (org.apache.gobblin.kafka.client.ByteArrayBasedKafkaRecord)1