Search in sources :

Example 1 with OffsetManager

use of org.apache.storm.kafka.spout.internal.OffsetManager in project storm by apache.

the class KafkaSpout method commitOffsetsForAckedTuples.

private void commitOffsetsForAckedTuples() {
    final Map<TopicPartition, OffsetAndMetadata> nextCommitOffsets = new HashMap<>();
    for (Map.Entry<TopicPartition, OffsetManager> tpOffset : offsetManagers.entrySet()) {
        final OffsetAndMetadata nextCommitOffset = tpOffset.getValue().findNextCommitOffset(commitMetadataManager.getCommitMetadata());
        if (nextCommitOffset != null) {
            nextCommitOffsets.put(tpOffset.getKey(), nextCommitOffset);
        }
    }
    // Commit offsets that are ready to be committed for every topic partition
    if (!nextCommitOffsets.isEmpty()) {
        consumer.commitSync(nextCommitOffsets);
        LOG.debug("Offsets successfully committed to Kafka [{}]", nextCommitOffsets);
        // in the prior loop, but the multiple network calls should be more expensive than iterating twice over a small loop
        for (Map.Entry<TopicPartition, OffsetAndMetadata> tpOffset : nextCommitOffsets.entrySet()) {
            // Update the OffsetManager for each committed partition, and update numUncommittedOffsets
            final TopicPartition tp = tpOffset.getKey();
            long position = consumer.position(tp);
            long committedOffset = tpOffset.getValue().offset();
            if (position < committedOffset) {
                /*
                     * The position is behind the committed offset. This can happen in some cases, e.g. if a message failed, lots of (more
                     * than max.poll.records) later messages were acked, and the failed message then gets acked. The consumer may only be
                     * part way through "catching up" to where it was when it went back to retry the failed tuple. Skip the consumer forward
                     * to the committed offset.
                     */
                LOG.debug("Consumer fell behind committed offset. Catching up. Position was [{}], skipping to [{}]", position, committedOffset);
                consumer.seek(tp, committedOffset);
            }
            /**
             * In some cases the waitingToEmit list may contain tuples that have just been committed. Drop these.
             */
            List<ConsumerRecord<K, V>> waitingToEmitForTp = waitingToEmit.get(tp);
            if (waitingToEmitForTp != null) {
                // Discard the pending records that are already committed
                waitingToEmit.put(tp, waitingToEmitForTp.stream().filter(record -> record.offset() >= committedOffset).collect(Collectors.toCollection(LinkedList::new)));
            }
            final OffsetManager offsetManager = offsetManagers.get(tp);
            offsetManager.commit(tpOffset.getValue());
            LOG.debug("[{}] uncommitted offsets for partition [{}] after commit", offsetManager.getNumUncommittedOffsets(), tp);
        }
    } else {
        LOG.trace("No offsets to commit. {}", this);
    }
}
Also used : HashMap(java.util.HashMap) TopicPartition(org.apache.kafka.common.TopicPartition) OffsetManager(org.apache.storm.kafka.spout.internal.OffsetManager) OffsetAndMetadata(org.apache.kafka.clients.consumer.OffsetAndMetadata) HashMap(java.util.HashMap) Map(java.util.Map) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) LinkedList(java.util.LinkedList)

Example 2 with OffsetManager

use of org.apache.storm.kafka.spout.internal.OffsetManager in project storm by apache.

the class KafkaOffsetMetric method getValueAndReset.

@Override
public Object getValueAndReset() {
    Map<TopicPartition, OffsetManager> offsetManagers = offsetManagerSupplier.get();
    Consumer<K, V> consumer = consumerSupplier.get();
    if (offsetManagers == null || offsetManagers.isEmpty() || consumer == null) {
        LOG.debug("Metrics Tick: offsetManagers or kafkaConsumer is null.");
        return null;
    }
    Map<String, TopicMetrics> topicMetricsMap = new HashMap<>();
    Set<TopicPartition> topicPartitions = offsetManagers.keySet();
    Map<TopicPartition, Long> beginningOffsets;
    Map<TopicPartition, Long> endOffsets;
    try {
        beginningOffsets = consumer.beginningOffsets(topicPartitions);
        endOffsets = consumer.endOffsets(topicPartitions);
    } catch (RetriableException e) {
        LOG.warn("Failed to get offsets from Kafka! Will retry on next metrics tick.", e);
        return null;
    }
    // map to hold partition level and topic level metrics
    Map<String, Long> result = new HashMap<>();
    for (Map.Entry<TopicPartition, OffsetManager> entry : offsetManagers.entrySet()) {
        TopicPartition topicPartition = entry.getKey();
        OffsetManager offsetManager = entry.getValue();
        long latestTimeOffset = endOffsets.get(topicPartition);
        long earliestTimeOffset = beginningOffsets.get(topicPartition);
        long latestEmittedOffset = offsetManager.getLatestEmittedOffset();
        long latestCompletedOffset = offsetManager.getCommittedOffset();
        long spoutLag = latestTimeOffset - latestCompletedOffset;
        long recordsInPartition = latestTimeOffset - earliestTimeOffset;
        String metricPath = topicPartition.topic() + "/partition_" + topicPartition.partition();
        result.put(metricPath + "/" + "spoutLag", spoutLag);
        result.put(metricPath + "/" + "earliestTimeOffset", earliestTimeOffset);
        result.put(metricPath + "/" + "latestTimeOffset", latestTimeOffset);
        result.put(metricPath + "/" + "latestEmittedOffset", latestEmittedOffset);
        result.put(metricPath + "/" + "latestCompletedOffset", latestCompletedOffset);
        result.put(metricPath + "/" + "recordsInPartition", recordsInPartition);
        TopicMetrics topicMetrics = topicMetricsMap.get(topicPartition.topic());
        if (topicMetrics == null) {
            topicMetrics = new TopicMetrics();
            topicMetricsMap.put(topicPartition.topic(), topicMetrics);
        }
        topicMetrics.totalSpoutLag += spoutLag;
        topicMetrics.totalEarliestTimeOffset += earliestTimeOffset;
        topicMetrics.totalLatestTimeOffset += latestTimeOffset;
        topicMetrics.totalLatestEmittedOffset += latestEmittedOffset;
        topicMetrics.totalLatestCompletedOffset += latestCompletedOffset;
        topicMetrics.totalRecordsInPartitions += recordsInPartition;
    }
    for (Map.Entry<String, TopicMetrics> e : topicMetricsMap.entrySet()) {
        String topic = e.getKey();
        TopicMetrics topicMetrics = e.getValue();
        result.put(topic + "/" + "totalSpoutLag", topicMetrics.totalSpoutLag);
        result.put(topic + "/" + "totalEarliestTimeOffset", topicMetrics.totalEarliestTimeOffset);
        result.put(topic + "/" + "totalLatestTimeOffset", topicMetrics.totalLatestTimeOffset);
        result.put(topic + "/" + "totalLatestEmittedOffset", topicMetrics.totalLatestEmittedOffset);
        result.put(topic + "/" + "totalLatestCompletedOffset", topicMetrics.totalLatestCompletedOffset);
        result.put(topic + "/" + "totalRecordsInPartitions", topicMetrics.totalRecordsInPartitions);
    }
    LOG.debug("Metrics Tick: value : {}", result);
    return result;
}
Also used : HashMap(java.util.HashMap) OffsetManager(org.apache.storm.kafka.spout.internal.OffsetManager) TopicPartition(org.apache.kafka.common.TopicPartition) Map(java.util.Map) HashMap(java.util.HashMap) RetriableException(org.apache.kafka.common.errors.RetriableException)

Example 3 with OffsetManager

use of org.apache.storm.kafka.spout.internal.OffsetManager in project storm by apache.

the class KafkaSpout method getPollablePartitionsInfo.

private PollablePartitionsInfo getPollablePartitionsInfo() {
    if (isWaitingToEmit()) {
        LOG.debug("Not polling. Tuples waiting to be emitted.");
        return new PollablePartitionsInfo(Collections.emptySet(), Collections.emptyMap());
    }
    Set<TopicPartition> assignment = consumer.assignment();
    if (!isAtLeastOnceProcessing()) {
        return new PollablePartitionsInfo(assignment, Collections.emptyMap());
    }
    Map<TopicPartition, Long> earliestRetriableOffsets = retryService.earliestRetriableOffsets();
    Set<TopicPartition> pollablePartitions = new HashSet<>();
    final int maxUncommittedOffsets = kafkaSpoutConfig.getMaxUncommittedOffsets();
    for (TopicPartition tp : assignment) {
        OffsetManager offsetManager = offsetManagers.get(tp);
        int numUncommittedOffsets = offsetManager.getNumUncommittedOffsets();
        if (numUncommittedOffsets < maxUncommittedOffsets) {
            // Allow poll if the partition is not at the maxUncommittedOffsets limit
            pollablePartitions.add(tp);
        } else {
            long offsetAtLimit = offsetManager.getNthUncommittedOffsetAfterCommittedOffset(maxUncommittedOffsets);
            Long earliestRetriableOffset = earliestRetriableOffsets.get(tp);
            if (earliestRetriableOffset != null && earliestRetriableOffset <= offsetAtLimit) {
                // Allow poll if there are retriable tuples within the maxUncommittedOffsets limit
                pollablePartitions.add(tp);
            } else {
                LOG.debug("Not polling on partition [{}]. It has [{}] uncommitted offsets, which exceeds the limit of [{}]. ", tp, numUncommittedOffsets, maxUncommittedOffsets);
            }
        }
    }
    return new PollablePartitionsInfo(pollablePartitions, earliestRetriableOffsets);
}
Also used : TopicPartition(org.apache.kafka.common.TopicPartition) OffsetManager(org.apache.storm.kafka.spout.internal.OffsetManager) HashSet(java.util.HashSet)

Aggregations

TopicPartition (org.apache.kafka.common.TopicPartition)3 OffsetManager (org.apache.storm.kafka.spout.internal.OffsetManager)3 HashMap (java.util.HashMap)2 Map (java.util.Map)2 HashSet (java.util.HashSet)1 LinkedList (java.util.LinkedList)1 ConsumerRecord (org.apache.kafka.clients.consumer.ConsumerRecord)1 OffsetAndMetadata (org.apache.kafka.clients.consumer.OffsetAndMetadata)1 RetriableException (org.apache.kafka.common.errors.RetriableException)1