Search in sources :

Example 1 with ManualWatermarkEstimator

use of org.apache.beam.sdk.transforms.splittabledofn.ManualWatermarkEstimator in project beam by apache.

the class ReadFromKafkaDoFn method processElement.

@ProcessElement
public ProcessContinuation processElement(@Element KafkaSourceDescriptor kafkaSourceDescriptor, RestrictionTracker<OffsetRange, Long> tracker, WatermarkEstimator watermarkEstimator, OutputReceiver<KV<KafkaSourceDescriptor, KafkaRecord<K, V>>> receiver) {
    // Stop processing current TopicPartition when it's time to stop.
    if (checkStopReadingFn != null && checkStopReadingFn.apply(kafkaSourceDescriptor.getTopicPartition())) {
        return ProcessContinuation.stop();
    }
    Map<String, Object> updatedConsumerConfig = overrideBootstrapServersConfig(consumerConfig, kafkaSourceDescriptor);
    // If there is a timestampPolicyFactory, create the TimestampPolicy for current
    // TopicPartition.
    TimestampPolicy timestampPolicy = null;
    if (timestampPolicyFactory != null) {
        timestampPolicy = timestampPolicyFactory.createTimestampPolicy(kafkaSourceDescriptor.getTopicPartition(), Optional.ofNullable(watermarkEstimator.currentWatermark()));
    }
    try (Consumer<byte[], byte[]> consumer = consumerFactoryFn.apply(updatedConsumerConfig)) {
        // Check whether current TopicPartition is still available to read.
        Set<TopicPartition> existingTopicPartitions = new HashSet<>();
        for (List<PartitionInfo> topicPartitionList : consumer.listTopics().values()) {
            topicPartitionList.forEach(partitionInfo -> {
                existingTopicPartitions.add(new TopicPartition(partitionInfo.topic(), partitionInfo.partition()));
            });
        }
        if (!existingTopicPartitions.contains(kafkaSourceDescriptor.getTopicPartition())) {
            return ProcessContinuation.stop();
        }
        ConsumerSpEL.evaluateAssign(consumer, ImmutableList.of(kafkaSourceDescriptor.getTopicPartition()));
        long startOffset = tracker.currentRestriction().getFrom();
        long expectedOffset = startOffset;
        consumer.seek(kafkaSourceDescriptor.getTopicPartition(), startOffset);
        ConsumerRecords<byte[], byte[]> rawRecords = ConsumerRecords.empty();
        while (true) {
            rawRecords = consumer.poll(KAFKA_POLL_TIMEOUT);
            // and move to process the next element.
            if (rawRecords.isEmpty()) {
                return ProcessContinuation.resume();
            }
            for (ConsumerRecord<byte[], byte[]> rawRecord : rawRecords) {
                if (!tracker.tryClaim(rawRecord.offset())) {
                    return ProcessContinuation.stop();
                }
                KafkaRecord<K, V> kafkaRecord = new KafkaRecord<>(rawRecord.topic(), rawRecord.partition(), rawRecord.offset(), ConsumerSpEL.getRecordTimestamp(rawRecord), ConsumerSpEL.getRecordTimestampType(rawRecord), ConsumerSpEL.hasHeaders() ? rawRecord.headers() : null, ConsumerSpEL.deserializeKey(keyDeserializerInstance, rawRecord), ConsumerSpEL.deserializeValue(valueDeserializerInstance, rawRecord));
                int recordSize = (rawRecord.key() == null ? 0 : rawRecord.key().length) + (rawRecord.value() == null ? 0 : rawRecord.value().length);
                avgRecordSize.getUnchecked(kafkaSourceDescriptor.getTopicPartition()).update(recordSize, rawRecord.offset() - expectedOffset);
                expectedOffset = rawRecord.offset() + 1;
                Instant outputTimestamp;
                // WatermarkEstimator should be a manual one.
                if (timestampPolicy != null) {
                    checkState(watermarkEstimator instanceof ManualWatermarkEstimator);
                    TimestampPolicyContext context = new TimestampPolicyContext((long) ((HasProgress) tracker).getProgress().getWorkRemaining(), Instant.now());
                    outputTimestamp = timestampPolicy.getTimestampForRecord(context, kafkaRecord);
                    ((ManualWatermarkEstimator) watermarkEstimator).setWatermark(ensureTimestampWithinBounds(timestampPolicy.getWatermark(context)));
                } else {
                    outputTimestamp = extractOutputTimestampFn.apply(kafkaRecord);
                }
                receiver.outputWithTimestamp(KV.of(kafkaSourceDescriptor, kafkaRecord), outputTimestamp);
            }
        }
    }
}
Also used : TimestampPolicyContext(org.apache.beam.sdk.io.kafka.KafkaUnboundedReader.TimestampPolicyContext) Instant(org.joda.time.Instant) TopicPartition(org.apache.kafka.common.TopicPartition) KV(org.apache.beam.sdk.values.KV) PartitionInfo(org.apache.kafka.common.PartitionInfo) HashSet(java.util.HashSet) ManualWatermarkEstimator(org.apache.beam.sdk.transforms.splittabledofn.ManualWatermarkEstimator)

Aggregations

HashSet (java.util.HashSet)1 TimestampPolicyContext (org.apache.beam.sdk.io.kafka.KafkaUnboundedReader.TimestampPolicyContext)1 ManualWatermarkEstimator (org.apache.beam.sdk.transforms.splittabledofn.ManualWatermarkEstimator)1 KV (org.apache.beam.sdk.values.KV)1 PartitionInfo (org.apache.kafka.common.PartitionInfo)1 TopicPartition (org.apache.kafka.common.TopicPartition)1 Instant (org.joda.time.Instant)1