use of org.apache.gobblin.kafka.client.DecodeableKafkaRecord in project incubator-gobblin by apache.
the class KafkaExtractor method readRecordImpl.
/**
* Return the next decodable record from the current partition. If the current partition has no more
* decodable record, move on to the next partition. If all partitions have been processed, return null.
*/
@SuppressWarnings("unchecked")
@Override
public D readRecordImpl(D reuse) throws DataRecordException, IOException {
long readStartTime = System.nanoTime();
while (!allPartitionsFinished()) {
if (currentPartitionFinished()) {
moveToNextPartition();
continue;
}
if (this.messageIterator == null || !this.messageIterator.hasNext()) {
try {
long fetchStartTime = System.nanoTime();
this.messageIterator = fetchNextMessageBuffer();
this.currentPartitionFetchMessageBufferTime += System.nanoTime() - fetchStartTime;
} catch (Exception e) {
LOG.error(String.format("Failed to fetch next message buffer for partition %s. Will skip this partition.", getCurrentPartition()), e);
moveToNextPartition();
continue;
}
if (this.messageIterator == null || !this.messageIterator.hasNext()) {
moveToNextPartition();
continue;
}
}
while (!currentPartitionFinished()) {
if (!this.messageIterator.hasNext()) {
break;
}
KafkaConsumerRecord nextValidMessage = this.messageIterator.next();
// until we get to x.
if (nextValidMessage.getOffset() < this.nextWatermark.get(this.currentPartitionIdx)) {
continue;
}
this.nextWatermark.set(this.currentPartitionIdx, nextValidMessage.getNextOffset());
try {
D record = null;
// track time for decode/convert depending on the record type
long decodeStartTime = System.nanoTime();
if (nextValidMessage instanceof ByteArrayBasedKafkaRecord) {
record = decodeRecord((ByteArrayBasedKafkaRecord) nextValidMessage);
} else if (nextValidMessage instanceof DecodeableKafkaRecord) {
// if value is null then this is a bad record that is returned for further error handling, so raise an error
if (((DecodeableKafkaRecord) nextValidMessage).getValue() == null) {
throw new DataRecordException("Could not decode Kafka record");
}
// get value from decodeable record and convert to the output schema if necessary
record = convertRecord(((DecodeableKafkaRecord<?, D>) nextValidMessage).getValue());
} else {
throw new IllegalStateException("Unsupported KafkaConsumerRecord type. The returned record can either be ByteArrayBasedKafkaRecord" + " or DecodeableKafkaRecord");
}
this.currentPartitionDecodeRecordTime += System.nanoTime() - decodeStartTime;
this.currentPartitionRecordCount++;
this.currentPartitionTotalSize += nextValidMessage.getValueSizeInBytes();
this.currentPartitionReadRecordTime += System.nanoTime() - readStartTime;
return record;
} catch (Throwable t) {
this.errorPartitions.add(this.currentPartitionIdx);
this.undecodableMessageCount++;
if (shouldLogError()) {
LOG.error(String.format("A record from partition %s cannot be decoded.", getCurrentPartition()), t);
incrementErrorCount();
}
}
}
}
LOG.info("Finished pulling topic " + this.topicName);
this.currentPartitionReadRecordTime += System.nanoTime() - readStartTime;
return null;
}
use of org.apache.gobblin.kafka.client.DecodeableKafkaRecord in project incubator-gobblin by apache.
the class SimpleKafkaSpecConsumer method changedSpecs.
@Override
public Future<? extends List<Pair<SpecExecutor.Verb, Spec>>> changedSpecs() {
List<Pair<SpecExecutor.Verb, Spec>> changesSpecs = new ArrayList<>();
initializeWatermarks();
this.currentPartitionIdx = -1;
while (!allPartitionsFinished()) {
if (currentPartitionFinished()) {
moveToNextPartition();
continue;
}
if (this.messageIterator == null || !this.messageIterator.hasNext()) {
try {
this.messageIterator = fetchNextMessageBuffer();
} catch (Exception e) {
log.error(String.format("Failed to fetch next message buffer for partition %s. Will skip this partition.", getCurrentPartition()), e);
moveToNextPartition();
continue;
}
if (this.messageIterator == null || !this.messageIterator.hasNext()) {
moveToNextPartition();
continue;
}
}
while (!currentPartitionFinished()) {
if (!this.messageIterator.hasNext()) {
break;
}
KafkaConsumerRecord nextValidMessage = this.messageIterator.next();
// until we get to x.
if (nextValidMessage.getOffset() < _nextWatermark.get(this.currentPartitionIdx)) {
continue;
}
_nextWatermark.set(this.currentPartitionIdx, nextValidMessage.getNextOffset());
try {
final AvroJobSpec record;
if (nextValidMessage instanceof ByteArrayBasedKafkaRecord) {
record = decodeRecord((ByteArrayBasedKafkaRecord) nextValidMessage);
} else if (nextValidMessage instanceof DecodeableKafkaRecord) {
record = ((DecodeableKafkaRecord<?, AvroJobSpec>) nextValidMessage).getValue();
} else {
throw new IllegalStateException("Unsupported KafkaConsumerRecord type. The returned record can either be ByteArrayBasedKafkaRecord" + " or DecodeableKafkaRecord");
}
JobSpec.Builder jobSpecBuilder = JobSpec.builder(record.getUri());
Properties props = new Properties();
props.putAll(record.getProperties());
jobSpecBuilder.withJobCatalogURI(record.getUri()).withVersion(record.getVersion()).withDescription(record.getDescription()).withConfigAsProperties(props);
if (!record.getTemplateUri().isEmpty()) {
jobSpecBuilder.withTemplate(new URI(record.getTemplateUri()));
}
String verbName = record.getMetadata().get(VERB_KEY);
SpecExecutor.Verb verb = SpecExecutor.Verb.valueOf(verbName);
changesSpecs.add(new ImmutablePair<SpecExecutor.Verb, Spec>(verb, jobSpecBuilder.build()));
} catch (Throwable t) {
log.error("Could not decode record at partition " + this.currentPartitionIdx + " offset " + nextValidMessage.getOffset());
}
}
}
return new CompletedFuture(changesSpecs, null);
}
Aggregations