use of org.apache.beam.sdk.transforms.splittabledofn.ManualWatermarkEstimator in project beam by apache.
the class ReadFromKafkaDoFn method processElement.
@ProcessElement
public ProcessContinuation processElement(@Element KafkaSourceDescriptor kafkaSourceDescriptor, RestrictionTracker<OffsetRange, Long> tracker, WatermarkEstimator watermarkEstimator, OutputReceiver<KV<KafkaSourceDescriptor, KafkaRecord<K, V>>> receiver) {
// Stop processing current TopicPartition when it's time to stop.
if (checkStopReadingFn != null && checkStopReadingFn.apply(kafkaSourceDescriptor.getTopicPartition())) {
return ProcessContinuation.stop();
}
Map<String, Object> updatedConsumerConfig = overrideBootstrapServersConfig(consumerConfig, kafkaSourceDescriptor);
// If there is a timestampPolicyFactory, create the TimestampPolicy for current
// TopicPartition.
TimestampPolicy timestampPolicy = null;
if (timestampPolicyFactory != null) {
timestampPolicy = timestampPolicyFactory.createTimestampPolicy(kafkaSourceDescriptor.getTopicPartition(), Optional.ofNullable(watermarkEstimator.currentWatermark()));
}
try (Consumer<byte[], byte[]> consumer = consumerFactoryFn.apply(updatedConsumerConfig)) {
// Check whether current TopicPartition is still available to read.
Set<TopicPartition> existingTopicPartitions = new HashSet<>();
for (List<PartitionInfo> topicPartitionList : consumer.listTopics().values()) {
topicPartitionList.forEach(partitionInfo -> {
existingTopicPartitions.add(new TopicPartition(partitionInfo.topic(), partitionInfo.partition()));
});
}
if (!existingTopicPartitions.contains(kafkaSourceDescriptor.getTopicPartition())) {
return ProcessContinuation.stop();
}
ConsumerSpEL.evaluateAssign(consumer, ImmutableList.of(kafkaSourceDescriptor.getTopicPartition()));
long startOffset = tracker.currentRestriction().getFrom();
long expectedOffset = startOffset;
consumer.seek(kafkaSourceDescriptor.getTopicPartition(), startOffset);
ConsumerRecords<byte[], byte[]> rawRecords = ConsumerRecords.empty();
while (true) {
rawRecords = consumer.poll(KAFKA_POLL_TIMEOUT);
// and move to process the next element.
if (rawRecords.isEmpty()) {
return ProcessContinuation.resume();
}
for (ConsumerRecord<byte[], byte[]> rawRecord : rawRecords) {
if (!tracker.tryClaim(rawRecord.offset())) {
return ProcessContinuation.stop();
}
KafkaRecord<K, V> kafkaRecord = new KafkaRecord<>(rawRecord.topic(), rawRecord.partition(), rawRecord.offset(), ConsumerSpEL.getRecordTimestamp(rawRecord), ConsumerSpEL.getRecordTimestampType(rawRecord), ConsumerSpEL.hasHeaders() ? rawRecord.headers() : null, ConsumerSpEL.deserializeKey(keyDeserializerInstance, rawRecord), ConsumerSpEL.deserializeValue(valueDeserializerInstance, rawRecord));
int recordSize = (rawRecord.key() == null ? 0 : rawRecord.key().length) + (rawRecord.value() == null ? 0 : rawRecord.value().length);
avgRecordSize.getUnchecked(kafkaSourceDescriptor.getTopicPartition()).update(recordSize, rawRecord.offset() - expectedOffset);
expectedOffset = rawRecord.offset() + 1;
Instant outputTimestamp;
// WatermarkEstimator should be a manual one.
if (timestampPolicy != null) {
checkState(watermarkEstimator instanceof ManualWatermarkEstimator);
TimestampPolicyContext context = new TimestampPolicyContext((long) ((HasProgress) tracker).getProgress().getWorkRemaining(), Instant.now());
outputTimestamp = timestampPolicy.getTimestampForRecord(context, kafkaRecord);
((ManualWatermarkEstimator) watermarkEstimator).setWatermark(ensureTimestampWithinBounds(timestampPolicy.getWatermark(context)));
} else {
outputTimestamp = extractOutputTimestampFn.apply(kafkaRecord);
}
receiver.outputWithTimestamp(KV.of(kafkaSourceDescriptor, kafkaRecord), outputTimestamp);
}
}
}
}
Aggregations