Search in sources :

Example 1 with KafkaConsumerRecord

use of org.apache.gobblin.kafka.client.KafkaConsumerRecord in project incubator-gobblin by apache.

the class KafkaExtractor method readRecordImpl.

/**
 * Return the next decodable record from the current partition. If the current partition has no more
 * decodable record, move on to the next partition. If all partitions have been processed, return null.
 */
@SuppressWarnings("unchecked")
@Override
public D readRecordImpl(D reuse) throws DataRecordException, IOException {
    long readStartTime = System.nanoTime();
    while (!allPartitionsFinished()) {
        if (currentPartitionFinished()) {
            moveToNextPartition();
            continue;
        }
        if (this.messageIterator == null || !this.messageIterator.hasNext()) {
            try {
                long fetchStartTime = System.nanoTime();
                this.messageIterator = fetchNextMessageBuffer();
                this.currentPartitionFetchMessageBufferTime += System.nanoTime() - fetchStartTime;
            } catch (Exception e) {
                LOG.error(String.format("Failed to fetch next message buffer for partition %s. Will skip this partition.", getCurrentPartition()), e);
                moveToNextPartition();
                continue;
            }
            if (this.messageIterator == null || !this.messageIterator.hasNext()) {
                moveToNextPartition();
                continue;
            }
        }
        while (!currentPartitionFinished()) {
            if (!this.messageIterator.hasNext()) {
                break;
            }
            KafkaConsumerRecord nextValidMessage = this.messageIterator.next();
            // until we get to x.
            if (nextValidMessage.getOffset() < this.nextWatermark.get(this.currentPartitionIdx)) {
                continue;
            }
            this.nextWatermark.set(this.currentPartitionIdx, nextValidMessage.getNextOffset());
            try {
                D record = null;
                // track time for decode/convert depending on the record type
                long decodeStartTime = System.nanoTime();
                if (nextValidMessage instanceof ByteArrayBasedKafkaRecord) {
                    record = decodeRecord((ByteArrayBasedKafkaRecord) nextValidMessage);
                } else if (nextValidMessage instanceof DecodeableKafkaRecord) {
                    // if value is null then this is a bad record that is returned for further error handling, so raise an error
                    if (((DecodeableKafkaRecord) nextValidMessage).getValue() == null) {
                        throw new DataRecordException("Could not decode Kafka record");
                    }
                    // get value from decodeable record and convert to the output schema if necessary
                    record = convertRecord(((DecodeableKafkaRecord<?, D>) nextValidMessage).getValue());
                } else {
                    throw new IllegalStateException("Unsupported KafkaConsumerRecord type. The returned record can either be ByteArrayBasedKafkaRecord" + " or DecodeableKafkaRecord");
                }
                this.currentPartitionDecodeRecordTime += System.nanoTime() - decodeStartTime;
                this.currentPartitionRecordCount++;
                this.currentPartitionTotalSize += nextValidMessage.getValueSizeInBytes();
                this.currentPartitionReadRecordTime += System.nanoTime() - readStartTime;
                return record;
            } catch (Throwable t) {
                this.errorPartitions.add(this.currentPartitionIdx);
                this.undecodableMessageCount++;
                if (shouldLogError()) {
                    LOG.error(String.format("A record from partition %s cannot be decoded.", getCurrentPartition()), t);
                    incrementErrorCount();
                }
            }
        }
    }
    LOG.info("Finished pulling topic " + this.topicName);
    this.currentPartitionReadRecordTime += System.nanoTime() - readStartTime;
    return null;
}
Also used : DecodeableKafkaRecord(org.apache.gobblin.kafka.client.DecodeableKafkaRecord) KafkaConsumerRecord(org.apache.gobblin.kafka.client.KafkaConsumerRecord) ByteArrayBasedKafkaRecord(org.apache.gobblin.kafka.client.ByteArrayBasedKafkaRecord) IOException(java.io.IOException) DataRecordException(org.apache.gobblin.source.extractor.DataRecordException) DataRecordException(org.apache.gobblin.source.extractor.DataRecordException)

Example 2 with KafkaConsumerRecord

use of org.apache.gobblin.kafka.client.KafkaConsumerRecord in project incubator-gobblin by apache.

the class SimpleKafkaSpecConsumer method changedSpecs.

@Override
public Future<? extends List<Pair<SpecExecutor.Verb, Spec>>> changedSpecs() {
    List<Pair<SpecExecutor.Verb, Spec>> changesSpecs = new ArrayList<>();
    initializeWatermarks();
    this.currentPartitionIdx = -1;
    while (!allPartitionsFinished()) {
        if (currentPartitionFinished()) {
            moveToNextPartition();
            continue;
        }
        if (this.messageIterator == null || !this.messageIterator.hasNext()) {
            try {
                this.messageIterator = fetchNextMessageBuffer();
            } catch (Exception e) {
                log.error(String.format("Failed to fetch next message buffer for partition %s. Will skip this partition.", getCurrentPartition()), e);
                moveToNextPartition();
                continue;
            }
            if (this.messageIterator == null || !this.messageIterator.hasNext()) {
                moveToNextPartition();
                continue;
            }
        }
        while (!currentPartitionFinished()) {
            if (!this.messageIterator.hasNext()) {
                break;
            }
            KafkaConsumerRecord nextValidMessage = this.messageIterator.next();
            // until we get to x.
            if (nextValidMessage.getOffset() < _nextWatermark.get(this.currentPartitionIdx)) {
                continue;
            }
            _nextWatermark.set(this.currentPartitionIdx, nextValidMessage.getNextOffset());
            try {
                final AvroJobSpec record;
                if (nextValidMessage instanceof ByteArrayBasedKafkaRecord) {
                    record = decodeRecord((ByteArrayBasedKafkaRecord) nextValidMessage);
                } else if (nextValidMessage instanceof DecodeableKafkaRecord) {
                    record = ((DecodeableKafkaRecord<?, AvroJobSpec>) nextValidMessage).getValue();
                } else {
                    throw new IllegalStateException("Unsupported KafkaConsumerRecord type. The returned record can either be ByteArrayBasedKafkaRecord" + " or DecodeableKafkaRecord");
                }
                JobSpec.Builder jobSpecBuilder = JobSpec.builder(record.getUri());
                Properties props = new Properties();
                props.putAll(record.getProperties());
                jobSpecBuilder.withJobCatalogURI(record.getUri()).withVersion(record.getVersion()).withDescription(record.getDescription()).withConfigAsProperties(props);
                if (!record.getTemplateUri().isEmpty()) {
                    jobSpecBuilder.withTemplate(new URI(record.getTemplateUri()));
                }
                String verbName = record.getMetadata().get(VERB_KEY);
                SpecExecutor.Verb verb = SpecExecutor.Verb.valueOf(verbName);
                changesSpecs.add(new ImmutablePair<SpecExecutor.Verb, Spec>(verb, jobSpecBuilder.build()));
            } catch (Throwable t) {
                log.error("Could not decode record at partition " + this.currentPartitionIdx + " offset " + nextValidMessage.getOffset());
            }
        }
    }
    return new CompletedFuture(changesSpecs, null);
}
Also used : DecodeableKafkaRecord(org.apache.gobblin.kafka.client.DecodeableKafkaRecord) ArrayList(java.util.ArrayList) KafkaConsumerRecord(org.apache.gobblin.kafka.client.KafkaConsumerRecord) AvroJobSpec(org.apache.gobblin.runtime.job_spec.AvroJobSpec) Properties(java.util.Properties) ByteArrayBasedKafkaRecord(org.apache.gobblin.kafka.client.ByteArrayBasedKafkaRecord) URI(java.net.URI) KafkaOffsetRetrievalFailureException(org.apache.gobblin.source.extractor.extract.kafka.KafkaOffsetRetrievalFailureException) IOException(java.io.IOException) InvocationTargetException(java.lang.reflect.InvocationTargetException) SpecExecutor(org.apache.gobblin.runtime.api.SpecExecutor) JobSpec(org.apache.gobblin.runtime.api.JobSpec) AvroJobSpec(org.apache.gobblin.runtime.job_spec.AvroJobSpec) JobSpec(org.apache.gobblin.runtime.api.JobSpec) Spec(org.apache.gobblin.runtime.api.Spec) AvroJobSpec(org.apache.gobblin.runtime.job_spec.AvroJobSpec) Pair(org.apache.commons.lang3.tuple.Pair) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) CompletedFuture(org.apache.gobblin.util.CompletedFuture)

Aggregations

IOException (java.io.IOException)2 ByteArrayBasedKafkaRecord (org.apache.gobblin.kafka.client.ByteArrayBasedKafkaRecord)2 DecodeableKafkaRecord (org.apache.gobblin.kafka.client.DecodeableKafkaRecord)2 KafkaConsumerRecord (org.apache.gobblin.kafka.client.KafkaConsumerRecord)2 InvocationTargetException (java.lang.reflect.InvocationTargetException)1 URI (java.net.URI)1 ArrayList (java.util.ArrayList)1 Properties (java.util.Properties)1 ImmutablePair (org.apache.commons.lang3.tuple.ImmutablePair)1 Pair (org.apache.commons.lang3.tuple.Pair)1 JobSpec (org.apache.gobblin.runtime.api.JobSpec)1 Spec (org.apache.gobblin.runtime.api.Spec)1 SpecExecutor (org.apache.gobblin.runtime.api.SpecExecutor)1 AvroJobSpec (org.apache.gobblin.runtime.job_spec.AvroJobSpec)1 DataRecordException (org.apache.gobblin.source.extractor.DataRecordException)1 KafkaOffsetRetrievalFailureException (org.apache.gobblin.source.extractor.extract.kafka.KafkaOffsetRetrievalFailureException)1 CompletedFuture (org.apache.gobblin.util.CompletedFuture)1