Search in sources :

Example 1 with DataRecordException

use of org.apache.gobblin.source.extractor.DataRecordException in project incubator-gobblin by apache.

the class QueryBasedExtractor method readRecord.

@Override
public D readRecord(@Deprecated D reuse) throws DataRecordException, IOException {
    if (!this.isPullRequired()) {
        log.info("No more records to read");
        return null;
    }
    D nextElement = null;
    try {
        if (isInitialPull()) {
            log.info("Initial pull");
            if (shouldRemoveDataPullUpperBounds()) {
                this.removeDataPullUpperBounds();
            }
            this.iterator = this.getIterator();
        }
        if (this.iterator.hasNext()) {
            nextElement = this.iterator.next();
            if (!this.iterator.hasNext()) {
                log.debug("Getting next pull");
                this.iterator = this.getIterator();
                if (this.iterator == null) {
                    this.setFetchStatus(false);
                }
            }
        }
    } catch (Exception e) {
        throw new DataRecordException("Failed to get records using rest api; error - " + e.getMessage(), e);
    }
    return nextElement;
}
Also used : SchemaException(org.apache.gobblin.source.extractor.exception.SchemaException) IOException(java.io.IOException) HighWatermarkException(org.apache.gobblin.source.extractor.exception.HighWatermarkException) RecordCountException(org.apache.gobblin.source.extractor.exception.RecordCountException) ExtractPrepareException(org.apache.gobblin.source.extractor.exception.ExtractPrepareException) DataRecordException(org.apache.gobblin.source.extractor.DataRecordException) DataRecordException(org.apache.gobblin.source.extractor.DataRecordException)

Example 2 with DataRecordException

use of org.apache.gobblin.source.extractor.DataRecordException in project incubator-gobblin by apache.

the class KafkaExtractor method readRecordImpl.

/**
 * Return the next decodable record from the current partition. If the current partition has no more
 * decodable record, move on to the next partition. If all partitions have been processed, return null.
 */
@SuppressWarnings("unchecked")
@Override
public D readRecordImpl(D reuse) throws DataRecordException, IOException {
    long readStartTime = System.nanoTime();
    while (!allPartitionsFinished()) {
        if (currentPartitionFinished()) {
            moveToNextPartition();
            continue;
        }
        if (this.messageIterator == null || !this.messageIterator.hasNext()) {
            try {
                long fetchStartTime = System.nanoTime();
                this.messageIterator = fetchNextMessageBuffer();
                this.currentPartitionFetchMessageBufferTime += System.nanoTime() - fetchStartTime;
            } catch (Exception e) {
                LOG.error(String.format("Failed to fetch next message buffer for partition %s. Will skip this partition.", getCurrentPartition()), e);
                moveToNextPartition();
                continue;
            }
            if (this.messageIterator == null || !this.messageIterator.hasNext()) {
                moveToNextPartition();
                continue;
            }
        }
        while (!currentPartitionFinished()) {
            if (!this.messageIterator.hasNext()) {
                break;
            }
            KafkaConsumerRecord nextValidMessage = this.messageIterator.next();
            // until we get to x.
            if (nextValidMessage.getOffset() < this.nextWatermark.get(this.currentPartitionIdx)) {
                continue;
            }
            this.nextWatermark.set(this.currentPartitionIdx, nextValidMessage.getNextOffset());
            try {
                D record = null;
                // track time for decode/convert depending on the record type
                long decodeStartTime = System.nanoTime();
                if (nextValidMessage instanceof ByteArrayBasedKafkaRecord) {
                    record = decodeRecord((ByteArrayBasedKafkaRecord) nextValidMessage);
                } else if (nextValidMessage instanceof DecodeableKafkaRecord) {
                    // if value is null then this is a bad record that is returned for further error handling, so raise an error
                    if (((DecodeableKafkaRecord) nextValidMessage).getValue() == null) {
                        throw new DataRecordException("Could not decode Kafka record");
                    }
                    // get value from decodeable record and convert to the output schema if necessary
                    record = convertRecord(((DecodeableKafkaRecord<?, D>) nextValidMessage).getValue());
                } else {
                    throw new IllegalStateException("Unsupported KafkaConsumerRecord type. The returned record can either be ByteArrayBasedKafkaRecord" + " or DecodeableKafkaRecord");
                }
                this.currentPartitionDecodeRecordTime += System.nanoTime() - decodeStartTime;
                this.currentPartitionRecordCount++;
                this.currentPartitionTotalSize += nextValidMessage.getValueSizeInBytes();
                this.currentPartitionReadRecordTime += System.nanoTime() - readStartTime;
                return record;
            } catch (Throwable t) {
                this.errorPartitions.add(this.currentPartitionIdx);
                this.undecodableMessageCount++;
                if (shouldLogError()) {
                    LOG.error(String.format("A record from partition %s cannot be decoded.", getCurrentPartition()), t);
                    incrementErrorCount();
                }
            }
        }
    }
    LOG.info("Finished pulling topic " + this.topicName);
    this.currentPartitionReadRecordTime += System.nanoTime() - readStartTime;
    return null;
}
Also used : DecodeableKafkaRecord(org.apache.gobblin.kafka.client.DecodeableKafkaRecord) KafkaConsumerRecord(org.apache.gobblin.kafka.client.KafkaConsumerRecord) ByteArrayBasedKafkaRecord(org.apache.gobblin.kafka.client.ByteArrayBasedKafkaRecord) IOException(java.io.IOException) DataRecordException(org.apache.gobblin.source.extractor.DataRecordException) DataRecordException(org.apache.gobblin.source.extractor.DataRecordException)

Example 3 with DataRecordException

use of org.apache.gobblin.source.extractor.DataRecordException in project incubator-gobblin by apache.

the class JdbcExtractor method getData.

@Override
public Iterator<JsonElement> getData(CommandOutput<?, ?> response) throws DataRecordException, IOException {
    this.log.debug("Extract data records from resultset");
    RecordSetList<JsonElement> recordSet = this.getNewRecordSetList();
    if (response == null || !this.hasNextRecord()) {
        return recordSet.iterator();
    }
    ResultSet resultset = null;
    Iterator<ResultSet> itr = (Iterator<ResultSet>) response.getResults().values().iterator();
    if (itr.hasNext()) {
        resultset = itr.next();
    } else {
        throw new DataRecordException("Failed to get source record count from database - Resultset has no records");
    }
    try {
        final ResultSetMetaData resultsetMetadata = resultset.getMetaData();
        int batchSize = this.workUnitState.getPropAsInt(ConfigurationKeys.SOURCE_QUERYBASED_FETCH_SIZE, 0);
        batchSize = (batchSize == 0 ? ConfigurationKeys.DEFAULT_SOURCE_FETCH_SIZE : batchSize);
        int recordCount = 0;
        while (resultset.next()) {
            final int numColumns = resultsetMetadata.getColumnCount();
            JsonObject jsonObject = new JsonObject();
            for (int i = 1; i < numColumns + 1; i++) {
                final String columnName = this.getHeaderRecord().get(i - 1);
                jsonObject.addProperty(columnName, parseColumnAsString(resultset, resultsetMetadata, i));
            }
            recordSet.add(jsonObject);
            recordCount++;
            this.totalRecordCount++;
            // Insert records in record set until it reaches the batch size
            if (recordCount >= batchSize) {
                this.log.info("Total number of records processed so far: " + this.totalRecordCount);
                return recordSet.iterator();
            }
        }
        this.setNextRecord(false);
        this.log.info("Total number of records processed so far: " + this.totalRecordCount);
        return recordSet.iterator();
    } catch (Exception e) {
        throw new DataRecordException("Failed to get records from database; error - " + e.getMessage(), e);
    }
}
Also used : ResultSetMetaData(java.sql.ResultSetMetaData) JsonElement(com.google.gson.JsonElement) ResultSet(java.sql.ResultSet) Iterator(java.util.Iterator) JsonObject(com.google.gson.JsonObject) DataRecordException(org.apache.gobblin.source.extractor.DataRecordException) ParseException(java.text.ParseException) RecordCountException(org.apache.gobblin.source.extractor.exception.RecordCountException) DataRecordException(org.apache.gobblin.source.extractor.DataRecordException) SqlParseException(org.apache.calcite.sql.parser.SqlParseException) SchemaException(org.apache.gobblin.source.extractor.exception.SchemaException) SQLException(java.sql.SQLException) IOException(java.io.IOException) HighWatermarkException(org.apache.gobblin.source.extractor.exception.HighWatermarkException)

Example 4 with DataRecordException

use of org.apache.gobblin.source.extractor.DataRecordException in project incubator-gobblin by apache.

the class JdbcExtractor method getRecordSet.

@Override
public Iterator<JsonElement> getRecordSet(String schema, String entity, WorkUnit workUnit, List<Predicate> predicateList) throws DataRecordException, IOException {
    Iterator<JsonElement> rs = null;
    List<Command> cmds;
    try {
        if (isFirstPull()) {
            this.log.info("Get data recordset using JDBC");
            cmds = this.getDataMetadata(schema, entity, workUnit, predicateList);
            this.dataResponse = this.executePreparedSql(cmds);
            this.setFirstPull(false);
        }
        rs = this.getData(this.dataResponse);
        return rs;
    } catch (Exception e) {
        throw new DataRecordException("Failed to get record set using JDBC; error - " + e.getMessage(), e);
    }
}
Also used : Command(org.apache.gobblin.source.extractor.extract.Command) JsonElement(com.google.gson.JsonElement) ParseException(java.text.ParseException) RecordCountException(org.apache.gobblin.source.extractor.exception.RecordCountException) DataRecordException(org.apache.gobblin.source.extractor.DataRecordException) SqlParseException(org.apache.calcite.sql.parser.SqlParseException) SchemaException(org.apache.gobblin.source.extractor.exception.SchemaException) SQLException(java.sql.SQLException) IOException(java.io.IOException) HighWatermarkException(org.apache.gobblin.source.extractor.exception.HighWatermarkException) DataRecordException(org.apache.gobblin.source.extractor.DataRecordException)

Example 5 with DataRecordException

use of org.apache.gobblin.source.extractor.DataRecordException in project incubator-gobblin by apache.

the class SalesforceExtractor method getData.

@Override
public Iterator<JsonElement> getData(CommandOutput<?, ?> response) throws DataRecordException {
    log.debug("Get data records from response");
    String output;
    Iterator<String> itr = (Iterator<String>) response.getResults().values().iterator();
    if (itr.hasNext()) {
        output = itr.next();
    } else {
        throw new DataRecordException("Failed to get data from salesforce; REST response has no output");
    }
    List<JsonElement> rs = Lists.newArrayList();
    JsonElement element = GSON.fromJson(output, JsonObject.class);
    JsonArray partRecords;
    try {
        JsonObject jsonObject = element.getAsJsonObject();
        partRecords = jsonObject.getAsJsonArray("records");
        if (jsonObject.get("done").getAsBoolean()) {
            setPullStatus(false);
        } else {
            setNextUrl(this.sfConnector.getFullUri(jsonObject.get("nextRecordsUrl").getAsString().replaceAll(this.sfConnector.getServicesDataEnvPath(), "")));
        }
        JsonArray array = Utils.removeElementFromJsonArray(partRecords, "attributes");
        Iterator<JsonElement> li = array.iterator();
        while (li.hasNext()) {
            JsonElement recordElement = li.next();
            rs.add(recordElement);
        }
        return rs.iterator();
    } catch (Exception e) {
        throw new DataRecordException("Failed to get records from salesforce; error - " + e.getMessage(), e);
    }
}
Also used : JsonArray(com.google.gson.JsonArray) JsonElement(com.google.gson.JsonElement) ListIterator(java.util.ListIterator) Iterator(java.util.Iterator) JsonObject(com.google.gson.JsonObject) DataRecordException(org.apache.gobblin.source.extractor.DataRecordException) ParseException(java.text.ParseException) RecordCountException(org.apache.gobblin.source.extractor.exception.RecordCountException) AsyncApiException(com.sforce.async.AsyncApiException) DataRecordException(org.apache.gobblin.source.extractor.DataRecordException) SchemaException(org.apache.gobblin.source.extractor.exception.SchemaException) RestApiClientException(org.apache.gobblin.source.extractor.exception.RestApiClientException) IOException(java.io.IOException) HighWatermarkException(org.apache.gobblin.source.extractor.exception.HighWatermarkException) RestApiConnectionException(org.apache.gobblin.source.extractor.exception.RestApiConnectionException)

Aggregations

IOException (java.io.IOException)8 DataRecordException (org.apache.gobblin.source.extractor.DataRecordException)8 HighWatermarkException (org.apache.gobblin.source.extractor.exception.HighWatermarkException)7 RecordCountException (org.apache.gobblin.source.extractor.exception.RecordCountException)7 SchemaException (org.apache.gobblin.source.extractor.exception.SchemaException)7 JsonElement (com.google.gson.JsonElement)5 ParseException (java.text.ParseException)5 RestApiConnectionException (org.apache.gobblin.source.extractor.exception.RestApiConnectionException)4 AsyncApiException (com.sforce.async.AsyncApiException)3 RestApiClientException (org.apache.gobblin.source.extractor.exception.RestApiClientException)3 JsonObject (com.google.gson.JsonObject)2 SQLException (java.sql.SQLException)2 Iterator (java.util.Iterator)2 SqlParseException (org.apache.calcite.sql.parser.SqlParseException)2 Command (org.apache.gobblin.source.extractor.extract.Command)2 JsonArray (com.google.gson.JsonArray)1 ResultSet (java.sql.ResultSet)1 ResultSetMetaData (java.sql.ResultSetMetaData)1 ListIterator (java.util.ListIterator)1 ByteArrayBasedKafkaRecord (org.apache.gobblin.kafka.client.ByteArrayBasedKafkaRecord)1