use of org.apache.storm.kafka.spout.internal.OffsetManager in project storm by apache.
the class KafkaSpout method commitOffsetsForAckedTuples.
private void commitOffsetsForAckedTuples() {
final Map<TopicPartition, OffsetAndMetadata> nextCommitOffsets = new HashMap<>();
for (Map.Entry<TopicPartition, OffsetManager> tpOffset : offsetManagers.entrySet()) {
final OffsetAndMetadata nextCommitOffset = tpOffset.getValue().findNextCommitOffset(commitMetadataManager.getCommitMetadata());
if (nextCommitOffset != null) {
nextCommitOffsets.put(tpOffset.getKey(), nextCommitOffset);
}
}
// Commit offsets that are ready to be committed for every topic partition
if (!nextCommitOffsets.isEmpty()) {
consumer.commitSync(nextCommitOffsets);
LOG.debug("Offsets successfully committed to Kafka [{}]", nextCommitOffsets);
// in the prior loop, but the multiple network calls should be more expensive than iterating twice over a small loop
for (Map.Entry<TopicPartition, OffsetAndMetadata> tpOffset : nextCommitOffsets.entrySet()) {
// Update the OffsetManager for each committed partition, and update numUncommittedOffsets
final TopicPartition tp = tpOffset.getKey();
long position = consumer.position(tp);
long committedOffset = tpOffset.getValue().offset();
if (position < committedOffset) {
/*
* The position is behind the committed offset. This can happen in some cases, e.g. if a message failed, lots of (more
* than max.poll.records) later messages were acked, and the failed message then gets acked. The consumer may only be
* part way through "catching up" to where it was when it went back to retry the failed tuple. Skip the consumer forward
* to the committed offset.
*/
LOG.debug("Consumer fell behind committed offset. Catching up. Position was [{}], skipping to [{}]", position, committedOffset);
consumer.seek(tp, committedOffset);
}
/**
* In some cases the waitingToEmit list may contain tuples that have just been committed. Drop these.
*/
List<ConsumerRecord<K, V>> waitingToEmitForTp = waitingToEmit.get(tp);
if (waitingToEmitForTp != null) {
// Discard the pending records that are already committed
waitingToEmit.put(tp, waitingToEmitForTp.stream().filter(record -> record.offset() >= committedOffset).collect(Collectors.toCollection(LinkedList::new)));
}
final OffsetManager offsetManager = offsetManagers.get(tp);
offsetManager.commit(tpOffset.getValue());
LOG.debug("[{}] uncommitted offsets for partition [{}] after commit", offsetManager.getNumUncommittedOffsets(), tp);
}
} else {
LOG.trace("No offsets to commit. {}", this);
}
}
use of org.apache.storm.kafka.spout.internal.OffsetManager in project storm by apache.
the class KafkaOffsetMetric method getValueAndReset.
@Override
public Object getValueAndReset() {
Map<TopicPartition, OffsetManager> offsetManagers = offsetManagerSupplier.get();
Consumer<K, V> consumer = consumerSupplier.get();
if (offsetManagers == null || offsetManagers.isEmpty() || consumer == null) {
LOG.debug("Metrics Tick: offsetManagers or kafkaConsumer is null.");
return null;
}
Map<String, TopicMetrics> topicMetricsMap = new HashMap<>();
Set<TopicPartition> topicPartitions = offsetManagers.keySet();
Map<TopicPartition, Long> beginningOffsets;
Map<TopicPartition, Long> endOffsets;
try {
beginningOffsets = consumer.beginningOffsets(topicPartitions);
endOffsets = consumer.endOffsets(topicPartitions);
} catch (RetriableException e) {
LOG.warn("Failed to get offsets from Kafka! Will retry on next metrics tick.", e);
return null;
}
// map to hold partition level and topic level metrics
Map<String, Long> result = new HashMap<>();
for (Map.Entry<TopicPartition, OffsetManager> entry : offsetManagers.entrySet()) {
TopicPartition topicPartition = entry.getKey();
OffsetManager offsetManager = entry.getValue();
long latestTimeOffset = endOffsets.get(topicPartition);
long earliestTimeOffset = beginningOffsets.get(topicPartition);
long latestEmittedOffset = offsetManager.getLatestEmittedOffset();
long latestCompletedOffset = offsetManager.getCommittedOffset();
long spoutLag = latestTimeOffset - latestCompletedOffset;
long recordsInPartition = latestTimeOffset - earliestTimeOffset;
String metricPath = topicPartition.topic() + "/partition_" + topicPartition.partition();
result.put(metricPath + "/" + "spoutLag", spoutLag);
result.put(metricPath + "/" + "earliestTimeOffset", earliestTimeOffset);
result.put(metricPath + "/" + "latestTimeOffset", latestTimeOffset);
result.put(metricPath + "/" + "latestEmittedOffset", latestEmittedOffset);
result.put(metricPath + "/" + "latestCompletedOffset", latestCompletedOffset);
result.put(metricPath + "/" + "recordsInPartition", recordsInPartition);
TopicMetrics topicMetrics = topicMetricsMap.get(topicPartition.topic());
if (topicMetrics == null) {
topicMetrics = new TopicMetrics();
topicMetricsMap.put(topicPartition.topic(), topicMetrics);
}
topicMetrics.totalSpoutLag += spoutLag;
topicMetrics.totalEarliestTimeOffset += earliestTimeOffset;
topicMetrics.totalLatestTimeOffset += latestTimeOffset;
topicMetrics.totalLatestEmittedOffset += latestEmittedOffset;
topicMetrics.totalLatestCompletedOffset += latestCompletedOffset;
topicMetrics.totalRecordsInPartitions += recordsInPartition;
}
for (Map.Entry<String, TopicMetrics> e : topicMetricsMap.entrySet()) {
String topic = e.getKey();
TopicMetrics topicMetrics = e.getValue();
result.put(topic + "/" + "totalSpoutLag", topicMetrics.totalSpoutLag);
result.put(topic + "/" + "totalEarliestTimeOffset", topicMetrics.totalEarliestTimeOffset);
result.put(topic + "/" + "totalLatestTimeOffset", topicMetrics.totalLatestTimeOffset);
result.put(topic + "/" + "totalLatestEmittedOffset", topicMetrics.totalLatestEmittedOffset);
result.put(topic + "/" + "totalLatestCompletedOffset", topicMetrics.totalLatestCompletedOffset);
result.put(topic + "/" + "totalRecordsInPartitions", topicMetrics.totalRecordsInPartitions);
}
LOG.debug("Metrics Tick: value : {}", result);
return result;
}
use of org.apache.storm.kafka.spout.internal.OffsetManager in project storm by apache.
the class KafkaSpout method getPollablePartitionsInfo.
private PollablePartitionsInfo getPollablePartitionsInfo() {
if (isWaitingToEmit()) {
LOG.debug("Not polling. Tuples waiting to be emitted.");
return new PollablePartitionsInfo(Collections.emptySet(), Collections.emptyMap());
}
Set<TopicPartition> assignment = consumer.assignment();
if (!isAtLeastOnceProcessing()) {
return new PollablePartitionsInfo(assignment, Collections.emptyMap());
}
Map<TopicPartition, Long> earliestRetriableOffsets = retryService.earliestRetriableOffsets();
Set<TopicPartition> pollablePartitions = new HashSet<>();
final int maxUncommittedOffsets = kafkaSpoutConfig.getMaxUncommittedOffsets();
for (TopicPartition tp : assignment) {
OffsetManager offsetManager = offsetManagers.get(tp);
int numUncommittedOffsets = offsetManager.getNumUncommittedOffsets();
if (numUncommittedOffsets < maxUncommittedOffsets) {
// Allow poll if the partition is not at the maxUncommittedOffsets limit
pollablePartitions.add(tp);
} else {
long offsetAtLimit = offsetManager.getNthUncommittedOffsetAfterCommittedOffset(maxUncommittedOffsets);
Long earliestRetriableOffset = earliestRetriableOffsets.get(tp);
if (earliestRetriableOffset != null && earliestRetriableOffset <= offsetAtLimit) {
// Allow poll if there are retriable tuples within the maxUncommittedOffsets limit
pollablePartitions.add(tp);
} else {
LOG.debug("Not polling on partition [{}]. It has [{}] uncommitted offsets, which exceeds the limit of [{}]. ", tp, numUncommittedOffsets, maxUncommittedOffsets);
}
}
}
return new PollablePartitionsInfo(pollablePartitions, earliestRetriableOffsets);
}
Aggregations