use of org.apache.kafka.clients.consumer.ConsumerRecord in project storm by apache.
the class KafkaSpoutEmitTest method testSpoutWillSkipPartitionsAtTheMaxUncommittedOffsetsLimit.
@Test
public void testSpoutWillSkipPartitionsAtTheMaxUncommittedOffsetsLimit() {
// This verifies that partitions can't prevent each other from retrying tuples due to the maxUncommittedOffsets limit.
try (SimulatedTime simulatedTime = new SimulatedTime()) {
TopicPartition partitionTwo = new TopicPartition(SingleTopicKafkaSpoutConfiguration.TOPIC, 2);
KafkaSpout<String, String> spout = SpoutWithMockedConsumerSetupHelper.setupSpout(spoutConfig, conf, contextMock, collectorMock, consumerMock, partition, partitionTwo);
Map<TopicPartition, List<ConsumerRecord<String, String>>> records = new HashMap<>();
// This is cheating a bit since maxPollRecords would normally spread this across multiple polls
records.put(partition, SpoutWithMockedConsumerSetupHelper.createRecords(partition, 0, spoutConfig.getMaxUncommittedOffsets()));
records.put(partitionTwo, SpoutWithMockedConsumerSetupHelper.createRecords(partitionTwo, 0, spoutConfig.getMaxUncommittedOffsets() + 1));
int numMessages = spoutConfig.getMaxUncommittedOffsets() * 2 + 1;
when(consumerMock.poll(anyLong())).thenReturn(new ConsumerRecords<>(records));
for (int i = 0; i < numMessages; i++) {
spout.nextTuple();
}
ArgumentCaptor<KafkaSpoutMessageId> messageIds = ArgumentCaptor.forClass(KafkaSpoutMessageId.class);
verify(collectorMock, times(numMessages)).emit(anyString(), anyList(), messageIds.capture());
// Now fail a tuple on partition one and verify that it is allowed to retry, because the failed tuple is below the maxUncommittedOffsets limit
Optional<KafkaSpoutMessageId> failedMessageIdPartitionOne = messageIds.getAllValues().stream().filter(messageId -> messageId.partition() == partition.partition()).findAny();
spout.fail(failedMessageIdPartitionOne.get());
// Also fail the last tuple from partition two. Since the failed tuple is beyond the maxUncommittedOffsets limit, it should not be retried until earlier messages are acked.
Optional<KafkaSpoutMessageId> failedMessagePartitionTwo = messageIds.getAllValues().stream().filter(messageId -> messageId.partition() == partitionTwo.partition()).max((msgId, msgId2) -> (int) (msgId.offset() - msgId2.offset()));
spout.fail(failedMessagePartitionTwo.get());
reset(collectorMock);
Time.advanceTime(50);
when(consumerMock.poll(anyLong())).thenReturn(new ConsumerRecords<>(Collections.singletonMap(partition, SpoutWithMockedConsumerSetupHelper.createRecords(partition, failedMessageIdPartitionOne.get().offset(), 1))));
spout.nextTuple();
verify(collectorMock, times(1)).emit(anyObject(), anyObject(), anyObject());
InOrder inOrder = inOrder(consumerMock);
inOrder.verify(consumerMock).seek(partition, failedMessageIdPartitionOne.get().offset());
// Should not seek on the paused partition
inOrder.verify(consumerMock, never()).seek(eq(partitionTwo), anyLong());
inOrder.verify(consumerMock).pause(Collections.singleton(partitionTwo));
inOrder.verify(consumerMock).poll(anyLong());
inOrder.verify(consumerMock).resume(Collections.singleton(partitionTwo));
reset(collectorMock);
// Now also check that no more tuples are polled for, since both partitions are at their limits
spout.nextTuple();
verify(collectorMock, never()).emit(anyObject(), anyObject(), anyObject());
}
}
use of org.apache.kafka.clients.consumer.ConsumerRecord in project storm by apache.
the class SimpleRecordTranslatorTest method testBasic.
@Test
public void testBasic() {
SimpleRecordTranslator<String, String> trans = new SimpleRecordTranslator<>((r) -> new Values(r.value()), new Fields("value"));
assertEquals(Arrays.asList("default"), trans.streams());
ConsumerRecord<String, String> cr = new ConsumerRecord<>("TOPIC", 100, 100, "THE KEY", "THE VALUE");
assertEquals(Arrays.asList("THE VALUE"), trans.apply(cr));
}
use of org.apache.kafka.clients.consumer.ConsumerRecord in project storm by apache.
the class KafkaSpout method commitOffsetsForAckedTuples.
private void commitOffsetsForAckedTuples() {
final Map<TopicPartition, OffsetAndMetadata> nextCommitOffsets = new HashMap<>();
for (Map.Entry<TopicPartition, OffsetManager> tpOffset : offsetManagers.entrySet()) {
final OffsetAndMetadata nextCommitOffset = tpOffset.getValue().findNextCommitOffset(commitMetadataManager.getCommitMetadata());
if (nextCommitOffset != null) {
nextCommitOffsets.put(tpOffset.getKey(), nextCommitOffset);
}
}
// Commit offsets that are ready to be committed for every topic partition
if (!nextCommitOffsets.isEmpty()) {
consumer.commitSync(nextCommitOffsets);
LOG.debug("Offsets successfully committed to Kafka [{}]", nextCommitOffsets);
// in the prior loop, but the multiple network calls should be more expensive than iterating twice over a small loop
for (Map.Entry<TopicPartition, OffsetAndMetadata> tpOffset : nextCommitOffsets.entrySet()) {
// Update the OffsetManager for each committed partition, and update numUncommittedOffsets
final TopicPartition tp = tpOffset.getKey();
long position = consumer.position(tp);
long committedOffset = tpOffset.getValue().offset();
if (position < committedOffset) {
/*
* The position is behind the committed offset. This can happen in some cases, e.g. if a message failed, lots of (more
* than max.poll.records) later messages were acked, and the failed message then gets acked. The consumer may only be
* part way through "catching up" to where it was when it went back to retry the failed tuple. Skip the consumer forward
* to the committed offset.
*/
LOG.debug("Consumer fell behind committed offset. Catching up. Position was [{}], skipping to [{}]", position, committedOffset);
consumer.seek(tp, committedOffset);
}
/**
* In some cases the waitingToEmit list may contain tuples that have just been committed. Drop these.
*/
List<ConsumerRecord<K, V>> waitingToEmitForTp = waitingToEmit.get(tp);
if (waitingToEmitForTp != null) {
// Discard the pending records that are already committed
waitingToEmit.put(tp, waitingToEmitForTp.stream().filter(record -> record.offset() >= committedOffset).collect(Collectors.toCollection(LinkedList::new)));
}
final OffsetManager offsetManager = offsetManagers.get(tp);
offsetManager.commit(tpOffset.getValue());
LOG.debug("[{}] uncommitted offsets for partition [{}] after commit", offsetManager.getNumUncommittedOffsets(), tp);
}
} else {
LOG.trace("No offsets to commit. {}", this);
}
}
use of org.apache.kafka.clients.consumer.ConsumerRecord in project storm by apache.
the class KafkaSpout method ackRetriableOffsetsIfCompactedAway.
private void ackRetriableOffsetsIfCompactedAway(Map<TopicPartition, Long> earliestRetriableOffsets, ConsumerRecords<K, V> consumerRecords) {
for (Entry<TopicPartition, Long> entry : earliestRetriableOffsets.entrySet()) {
TopicPartition tp = entry.getKey();
List<ConsumerRecord<K, V>> records = consumerRecords.records(tp);
if (!records.isEmpty()) {
ConsumerRecord<K, V> record = records.get(0);
long seekOffset = entry.getValue();
long earliestReceivedOffset = record.offset();
if (seekOffset < earliestReceivedOffset) {
// Ack up to the first offset received if the record is not already acked or currently in the topology
for (long i = seekOffset; i < earliestReceivedOffset; i++) {
KafkaSpoutMessageId msgId = retryService.getMessageId(tp, i);
if (!offsetManagers.get(tp).contains(msgId) && !emitted.contains(msgId)) {
LOG.debug("Record at offset [{}] appears to have been compacted away from topic [{}], marking as acked", i, tp);
retryService.remove(msgId);
emitted.add(msgId);
ack(msgId);
}
}
}
}
}
}
use of org.apache.kafka.clients.consumer.ConsumerRecord in project storm by apache.
the class KafkaTridentSpoutEmitter method emitPartitionBatchNew.
/**
* Emit a new batch.
*/
public Map<String, Object> emitPartitionBatchNew(TransactionAttempt tx, TridentCollector collector, KafkaTridentSpoutTopicPartition currBatchPartition, Map<String, Object> lastBatch) {
LOG.debug("Processing batch: [transaction = {}], [currBatchPartition = {}], [lastBatchMetadata = {}], [collector = {}]", tx, currBatchPartition, lastBatch, collector);
final TopicPartition currBatchTp = currBatchPartition.getTopicPartition();
throwIfEmittingForUnassignedPartition(currBatchTp);
KafkaTridentSpoutBatchMetadata lastBatchMeta = lastBatch == null ? null : KafkaTridentSpoutBatchMetadata.fromMap(lastBatch);
KafkaTridentSpoutBatchMetadata currentBatch = lastBatchMeta;
Collection<TopicPartition> pausedTopicPartitions = Collections.emptySet();
try {
// pause other topic-partitions to only poll from current topic-partition
pausedTopicPartitions = pauseTopicPartitions(currBatchTp);
seek(currBatchTp, lastBatchMeta);
final List<ConsumerRecord<K, V>> records = consumer.poll(pollTimeoutMs).records(currBatchTp);
LOG.debug("Polled [{}] records from Kafka.", records.size());
if (!records.isEmpty()) {
for (ConsumerRecord<K, V> record : records) {
emitTuple(collector, record);
}
// build new metadata based on emitted records
currentBatch = new KafkaTridentSpoutBatchMetadata(records.get(0).offset(), records.get(records.size() - 1).offset(), topologyContext.getStormId());
} else {
// Build new metadata based on the consumer position.
// We want the next emit to start at the current consumer position,
// so make a meta that indicates that position - 1 is the last emitted offset
// This helps us avoid cases like STORM-3279, and simplifies the seek logic.
long lastEmittedOffset = consumer.position(currBatchTp) - 1;
currentBatch = new KafkaTridentSpoutBatchMetadata(lastEmittedOffset, lastEmittedOffset, topologyContext.getStormId());
}
} finally {
consumer.resume(pausedTopicPartitions);
LOG.trace("Resumed topic-partitions {}", pausedTopicPartitions);
}
LOG.debug("Emitted batch: [transaction = {}], [currBatchPartition = {}], [lastBatchMetadata = {}], " + "[currBatchMetadata = {}], [collector = {}]", tx, currBatchPartition, lastBatch, currentBatch, collector);
return currentBatch.toMap();
}
Aggregations